31#define DEBUG_TYPE "amdgpu-resource-usage"
40 "amdgpu-assume-external-call-stack-size",
45 "amdgpu-assume-dynamic-stack-object-size",
46 cl::desc(
"Assumed extra stack use if there are any "
47 "variable sized objects (in bytes)"),
51 "Function register usage analysis",
true,
true)
58 return cast<Function>(
Op.getGlobal()->stripPointerCastsAndAliases());
64 if (!UseOp.isImplicit() || !
TII.isFLAT(*UseOp.getParent()))
72 auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
81 uint32_t AssumedStackSizeForDynamicSizeObjects =
88 AssumedStackSizeForDynamicSizeObjects = 0;
90 AssumedStackSizeForExternalCall = 0;
93 ResourceInfo = analyzeResourceUsage(MF, AssumedStackSizeForDynamicSizeObjects,
94 AssumedStackSizeForExternalCall);
100AMDGPUResourceUsageAnalysis::analyzeResourceUsage(
102 uint32_t AssumedStackSizeForExternalCall)
const {
103 SIFunctionResourceInfo
Info;
112 Info.UsesFlatScratch =
MRI.isPhysRegUsed(AMDGPU::FLAT_SCR_LO) ||
113 MRI.isPhysRegUsed(AMDGPU::FLAT_SCR_HI) ||
127 Info.UsesFlatScratch =
false;
130 Info.PrivateSegmentSize = FrameInfo.getStackSize();
133 Info.HasDynamicallySizedStack = FrameInfo.hasVarSizedObjects();
134 if (
Info.HasDynamicallySizedStack)
135 Info.PrivateSegmentSize += AssumedStackSizeForDynamicSizeObjects;
138 Info.PrivateSegmentSize += FrameInfo.getMaxAlign().value();
141 MRI.isPhysRegUsed(AMDGPU::VCC_LO) ||
MRI.isPhysRegUsed(AMDGPU::VCC_HI);
146 if (!FrameInfo.hasCalls() && !FrameInfo.hasTailCall()) {
147 Info.NumVGPR =
TRI.getNumUsedPhysRegs(
MRI, AMDGPU::VGPR_32RegClass);
148 Info.NumExplicitSGPR =
TRI.getNumUsedPhysRegs(
MRI, AMDGPU::SGPR_32RegClass);
149 if (ST.hasMAIInsts())
150 Info.NumAGPR =
TRI.getNumUsedPhysRegs(
MRI, AMDGPU::AGPR_32RegClass);
154 int32_t MaxVGPR = -1;
155 int32_t MaxAGPR = -1;
156 int32_t MaxSGPR = -1;
157 Info.CalleeSegmentSize = 0;
173 case AMDGPU::EXEC_LO:
174 case AMDGPU::EXEC_HI:
177 case AMDGPU::M0_LO16:
178 case AMDGPU::M0_HI16:
179 case AMDGPU::SRC_SHARED_BASE_LO:
180 case AMDGPU::SRC_SHARED_BASE:
181 case AMDGPU::SRC_SHARED_LIMIT_LO:
182 case AMDGPU::SRC_SHARED_LIMIT:
183 case AMDGPU::SRC_PRIVATE_BASE_LO:
184 case AMDGPU::SRC_PRIVATE_BASE:
185 case AMDGPU::SRC_PRIVATE_LIMIT_LO:
186 case AMDGPU::SRC_PRIVATE_LIMIT:
187 case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
188 case AMDGPU::SGPR_NULL:
189 case AMDGPU::SGPR_NULL64:
193 case AMDGPU::NoRegister:
195 "Instruction uses invalid noreg register");
201 case AMDGPU::VCC_LO_LO16:
202 case AMDGPU::VCC_LO_HI16:
203 case AMDGPU::VCC_HI_LO16:
204 case AMDGPU::VCC_HI_HI16:
208 case AMDGPU::FLAT_SCR:
209 case AMDGPU::FLAT_SCR_LO:
210 case AMDGPU::FLAT_SCR_HI:
213 case AMDGPU::XNACK_MASK:
214 case AMDGPU::XNACK_MASK_LO:
215 case AMDGPU::XNACK_MASK_HI:
218 case AMDGPU::LDS_DIRECT:
229 case AMDGPU::SRC_VCCZ:
232 case AMDGPU::SRC_EXECZ:
235 case AMDGPU::SRC_SCC:
242 if (AMDGPU::SGPR_32RegClass.
contains(Reg) ||
243 AMDGPU::SGPR_LO16RegClass.
contains(Reg) ||
244 AMDGPU::SGPR_HI16RegClass.
contains(Reg)) {
247 }
else if (AMDGPU::VGPR_32RegClass.
contains(Reg) ||
248 AMDGPU::VGPR_16RegClass.
contains(Reg)) {
251 }
else if (AMDGPU::AGPR_32RegClass.
contains(Reg) ||
252 AMDGPU::AGPR_LO16RegClass.
contains(Reg)) {
256 }
else if (AMDGPU::SGPR_64RegClass.
contains(Reg)) {
259 }
else if (AMDGPU::VReg_64RegClass.
contains(Reg)) {
262 }
else if (AMDGPU::AReg_64RegClass.
contains(Reg)) {
266 }
else if (AMDGPU::VReg_96RegClass.
contains(Reg)) {
269 }
else if (AMDGPU::SReg_96RegClass.
contains(Reg)) {
272 }
else if (AMDGPU::AReg_96RegClass.
contains(Reg)) {
276 }
else if (AMDGPU::SGPR_128RegClass.
contains(Reg)) {
279 }
else if (AMDGPU::VReg_128RegClass.
contains(Reg)) {
282 }
else if (AMDGPU::AReg_128RegClass.
contains(Reg)) {
286 }
else if (AMDGPU::VReg_160RegClass.
contains(Reg)) {
289 }
else if (AMDGPU::SReg_160RegClass.
contains(Reg)) {
292 }
else if (AMDGPU::AReg_160RegClass.
contains(Reg)) {
296 }
else if (AMDGPU::VReg_192RegClass.
contains(Reg)) {
299 }
else if (AMDGPU::SReg_192RegClass.
contains(Reg)) {
302 }
else if (AMDGPU::AReg_192RegClass.
contains(Reg)) {
306 }
else if (AMDGPU::VReg_224RegClass.
contains(Reg)) {
309 }
else if (AMDGPU::SReg_224RegClass.
contains(Reg)) {
312 }
else if (AMDGPU::AReg_224RegClass.
contains(Reg)) {
316 }
else if (AMDGPU::SReg_256RegClass.
contains(Reg)) {
319 }
else if (AMDGPU::VReg_256RegClass.
contains(Reg)) {
322 }
else if (AMDGPU::AReg_256RegClass.
contains(Reg)) {
326 }
else if (AMDGPU::VReg_288RegClass.
contains(Reg)) {
329 }
else if (AMDGPU::SReg_288RegClass.
contains(Reg)) {
332 }
else if (AMDGPU::AReg_288RegClass.
contains(Reg)) {
336 }
else if (AMDGPU::VReg_320RegClass.
contains(Reg)) {
339 }
else if (AMDGPU::SReg_320RegClass.
contains(Reg)) {
342 }
else if (AMDGPU::AReg_320RegClass.
contains(Reg)) {
346 }
else if (AMDGPU::VReg_352RegClass.
contains(Reg)) {
349 }
else if (AMDGPU::SReg_352RegClass.
contains(Reg)) {
352 }
else if (AMDGPU::AReg_352RegClass.
contains(Reg)) {
356 }
else if (AMDGPU::VReg_384RegClass.
contains(Reg)) {
359 }
else if (AMDGPU::SReg_384RegClass.
contains(Reg)) {
362 }
else if (AMDGPU::AReg_384RegClass.
contains(Reg)) {
366 }
else if (AMDGPU::SReg_512RegClass.
contains(Reg)) {
369 }
else if (AMDGPU::VReg_512RegClass.
contains(Reg)) {
372 }
else if (AMDGPU::AReg_512RegClass.
contains(Reg)) {
376 }
else if (AMDGPU::SReg_1024RegClass.
contains(Reg)) {
379 }
else if (AMDGPU::VReg_1024RegClass.
contains(Reg)) {
382 }
else if (AMDGPU::AReg_1024RegClass.
contains(Reg)) {
390 AMDGPU::TTMP_64RegClass.
contains(Reg) ||
391 AMDGPU::TTMP_128RegClass.
contains(Reg) ||
392 AMDGPU::TTMP_256RegClass.
contains(Reg) ||
393 AMDGPU::TTMP_512RegClass.
contains(Reg) ||
394 !
TRI.getPhysRegBaseClass(Reg)) &&
395 "Unknown register class");
397 unsigned HWReg =
TRI.getHWRegIndex(Reg);
398 int MaxUsed = HWReg + Width - 1;
400 MaxSGPR = MaxUsed > MaxSGPR ? MaxUsed : MaxSGPR;
402 MaxAGPR = MaxUsed > MaxAGPR ? MaxUsed : MaxAGPR;
404 MaxVGPR = MaxUsed > MaxVGPR ? MaxUsed : MaxVGPR;
413 TII->getNamedOperand(
MI, AMDGPU::OpName::callee);
428 if (Callee && !isSameFunction(MF, Callee))
429 Info.Callees.push_back(Callee);
434 if (!Callee || !
Callee->doesNotRecurse()) {
435 Info.HasRecursion =
true;
439 if (!
MI.isReturn()) {
446 Info.CalleeSegmentSize = std::max(
447 Info.CalleeSegmentSize,
448 static_cast<uint64_t>(AssumedStackSizeForExternalCall));
453 Info.CalleeSegmentSize =
454 std::max(
Info.CalleeSegmentSize,
455 static_cast<uint64_t>(AssumedStackSizeForExternalCall));
459 Info.UsesFlatScratch =
ST.hasFlatAddressSpace();
460 Info.HasDynamicallySizedStack =
true;
461 Info.HasIndirectCall =
true;
467 Info.NumExplicitSGPR = MaxSGPR + 1;
468 Info.NumVGPR = MaxVGPR + 1;
469 Info.NumAGPR = MaxAGPR + 1;
unsigned const MachineRegisterInfo * MRI
static cl::opt< uint32_t > clAssumedStackSizeForDynamicSizeObjects("amdgpu-assume-dynamic-stack-object-size", cl::desc("Assumed extra stack use if there are any " "variable sized objects (in bytes)"), cl::Hidden, cl::init(4096))
static bool hasAnyNonFlatUseOfReg(const MachineRegisterInfo &MRI, const SIInstrInfo &TII, unsigned Reg)
static cl::opt< uint32_t > clAssumedStackSizeForExternalCall("amdgpu-assume-external-call-stack-size", cl::desc("Assumed stack use of any external call (in bytes)"), cl::Hidden, cl::init(16384))
Analyzes how many registers and other resources are used by functions.
Analysis containing CSE Info
AMD GCN specific subclass of TargetSubtarget.
const HexagonInstrInfo * TII
unsigned const TargetRegisterInfo * TRI
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Target-Independent Code Generator Pass Configuration Options pass.
This class represents an Operation in the Expression.
bool hasFlatScratchInit() const
Module * getParent()
Get the module that this global value is contained inside of...
Generic base class for all target subtargets.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Representation of each machine instruction.
MachineOperand class - Representation of each machine instruction operand.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Wrapper class representing virtual and physical registers.
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
GCNUserSGPRUsageInfo & getUserSGPRInfo()
bool isStackRealigned() const
MCRegister getPreloadedReg(AMDGPUFunctionArgInfo::PreloadedValue Value) const
Primary interface to the complete machine description for the target machine.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
bool isEntryFunctionCC(CallingConv::ID CC)
unsigned getAMDHSACodeObjectVersion(const Module &M)
Reg
All possible values of the reg field in the ModR/M byte.
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
char & AMDGPUResourceUsageAnalysisID
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
bool runOnMachineFunction(MachineFunction &MF) override
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...