77 std::unique_ptr<MCStreamer> &&Streamer) {
89 std::unique_ptr<MCStreamer> Streamer)
95 return "AMDGPU Assembly Printer";
112void AMDGPUAsmPrinter::initTargetStreamer(
Module &M) {
118 initializeTargetID(M);
124 if (CodeObjectVersion >= 3)
133 if (CodeObjectVersion >= 3)
143 Version.Major, Version.Minor, Version.Stepping,
"AMD",
"AMDGPU");
149 initTargetStreamer(M);
157 HSAMetadataStream->end();
186 initializeTargetID(*
F.getParent());
188 const auto &FunctionTargetID = STM.getTargetID();
191 if (FunctionTargetID.isXnackSupported() &&
192 FunctionTargetID.getXnackSetting() != IsaInfo::TargetIDSetting::Any &&
195 "' function does not match module xnack setting");
200 if (FunctionTargetID.isSramEccSupported() &&
201 FunctionTargetID.getSramEccSetting() != IsaInfo::TargetIDSetting::Any &&
204 "' function does not match module sramecc setting");
211 if ((STM.isMesaKernel(
F) || CodeObjectVersion == 2) &&
215 getAmdKernelCode(KernelCode, CurrentProgramInfo, *
MF);
219 if (STM.isAmdHsaOS())
220 HSAMetadataStream->emitKernel(*
MF, CurrentProgramInfo);
232 auto &
Context = Streamer.getContext();
233 auto &ObjectFileInfo = *
Context.getObjectFileInfo();
234 auto &ReadOnlySection = *ObjectFileInfo.getReadOnlySection();
236 Streamer.pushSection();
237 Streamer.switchSection(&ReadOnlySection);
241 Streamer.emitValueToAlignment(
Align(64), 0, 1, 0);
242 ReadOnlySection.ensureMinAlignment(
Align(64));
249 STM, KernelName, getAmdhsaKernelDescriptor(*
MF, CurrentProgramInfo),
258 Streamer.popSection();
263 CodeObjectVersion >=3) {
276 if (DumpCodeInstEmitter) {
303 ": unsupported initializer for address space");
317 "' is already defined");
326 TS->emitAMDGPULDS(GVSym,
Size, Alignment);
337 switch (CodeObjectVersion) {
373void AMDGPUAsmPrinter::emitCommonFunctionComments(
390uint16_t AMDGPUAsmPrinter::getAmdhsaKernelCodeProperties(
396 KernelCodeProperties |=
397 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER;
400 KernelCodeProperties |=
401 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR;
404 KernelCodeProperties |=
405 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR;
408 KernelCodeProperties |=
409 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR;
412 KernelCodeProperties |=
413 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID;
416 KernelCodeProperties |=
417 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT;
420 KernelCodeProperties |=
421 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32;
425 KernelCodeProperties |= amdhsa::KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK;
427 return KernelCodeProperties;
437 memset(&KernelDescriptor, 0x0,
sizeof(KernelDescriptor));
446 Align MaxKernArgAlign;
458 return KernelDescriptor;
467 ResourceUsage = &getAnalysis<AMDGPUResourceUsageAnalysis>();
488 getSIProgramInfo(CurrentProgramInfo,
MF);
493 EmitPALMetadata(
MF, CurrentProgramInfo);
495 emitPALFunctionMetadata(
MF);
497 EmitProgramInfoSI(
MF, CurrentProgramInfo);
500 DumpCodeInstEmitter =
nullptr;
504 bool SaveFlag =
OutStreamer->getUseAssemblerInfoForParsing();
507 OutStreamer->setUseAssemblerInfoForParsing(SaveFlag);
527 OutStreamer->emitRawComment(
" Function info:",
false);
530 emitCommonFunctionComments(
533 Info.getTotalNumVGPRs(STM),
535 Info.PrivateSegmentSize, getFunctionCodeSize(
MF), MFI);
539 OutStreamer->emitRawComment(
" Kernel info:",
false);
540 emitCommonFunctionComments(
543 : std::optional<uint32_t>(),
545 CurrentProgramInfo.
ScratchSize, getFunctionCodeSize(
MF), MFI);
553 " bytes/workgroup (compile time only)",
false);
561 " NumSGPRsForWavesPerEU: " +
564 " NumVGPRsForWavesPerEU: " +
580 " COMPUTE_PGM_RSRC2:SCRATCH_EN: " +
583 " COMPUTE_PGM_RSRC2:USER_SGPR: " +
586 " COMPUTE_PGM_RSRC2:TRAP_HANDLER: " +
589 " COMPUTE_PGM_RSRC2:TGID_X_EN: " +
592 " COMPUTE_PGM_RSRC2:TGID_Y_EN: " +
595 " COMPUTE_PGM_RSRC2:TGID_Z_EN: " +
598 " COMPUTE_PGM_RSRC2:TIDIG_COMP_CNT: " +
606 " COMPUTE_PGM_RSRC3_GFX90A:ACCUM_OFFSET: " +
608 amdhsa::COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET))),
611 " COMPUTE_PGM_RSRC3_GFX90A:TG_SPLIT: " +
613 amdhsa::COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT))),
618 if (DumpCodeInstEmitter) {
624 std::string Comment =
"\n";
627 Comment +=
" ; " +
HexLines[i] +
"\n";
639void AMDGPUAsmPrinter::initializeTargetID(
const Module &M) {
653 if ((!TSTargetID->isXnackSupported() || TSTargetID->isXnackOnOrOff()) &&
654 (!TSTargetID->isSramEccSupported() || TSTargetID->isSramEccOnOrOff()))
659 if (TSTargetID->isXnackSupported())
660 if (TSTargetID->getXnackSetting() == IsaInfo::TargetIDSetting::Any)
662 if (TSTargetID->isSramEccSupported())
663 if (TSTargetID->getSramEccSetting() == IsaInfo::TargetIDSetting::Any)
664 TSTargetID->setSramEccSetting(STMTargetID.getSramEccSetting());
679 if (
MI.isDebugInstr())
682 CodeSize +=
TII->getInstSizeInBytes(
MI);
689void AMDGPUAsmPrinter::getSIProgramInfo(
SIProgramInfo &ProgInfo,
706 const uint64_t MaxScratchPerWorkitem =
708 if (ProgInfo.
ScratchSize > MaxScratchPerWorkitem) {
727 if (ProgInfo.
NumSGPR > MaxAddressableNumSGPRs) {
734 ProgInfo.
NumSGPR = MaxAddressableNumSGPRs - 1;
739 ProgInfo.
NumSGPR += ExtraSGPRs;
745 unsigned WaveDispatchNumSGPR = 0, WaveDispatchNumVGPR = 0;
754 unsigned LastEna = 0;
767 assert((InputEna || InputAddr) &&
768 "PSInputAddr and PSInputEnable should "
769 "never both be 0 for AMDGPU_PS shaders");
779 unsigned PSArgCount = 0;
780 unsigned IntermediateVGPR = 0;
781 for (
auto &
Arg :
F.args()) {
782 unsigned NumRegs = (
DL.getTypeSizeInBits(
Arg.getType()) + 31) / 32;
783 if (
Arg.hasAttribute(Attribute::InReg)) {
784 WaveDispatchNumSGPR += NumRegs;
791 if (IsPixelShader && PSArgCount < 16) {
792 if ((1 << PSArgCount) & InputAddr) {
793 if (PSArgCount < LastEna)
794 WaveDispatchNumVGPR += NumRegs;
796 IntermediateVGPR += NumRegs;
802 if (IntermediateVGPR) {
803 WaveDispatchNumVGPR += IntermediateVGPR;
804 IntermediateVGPR = 0;
806 WaveDispatchNumVGPR += NumRegs;
810 ProgInfo.
NumSGPR = std::max(ProgInfo.
NumSGPR, WaveDispatchNumSGPR);
826 if (ProgInfo.
NumSGPR > MaxAddressableNumSGPRs) {
831 ProgInfo.
NumSGPR, MaxAddressableNumSGPRs,
834 ProgInfo.
NumSGPR = MaxAddressableNumSGPRs;
879 unsigned LDSAlignShift;
893 alignTo(ProgInfo.
LDSSize, 1ULL << LDSAlignShift) >> LDSAlignShift;
896 unsigned ScratchAlignShift =
910 unsigned TIDIGCompCnt = 0;
921 const bool EnablePrivateSegment =
940 amdhsa::COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET,
943 amdhsa::COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT,
954 default: [[fallthrough]];
1002 : CurrentProgramInfo.LDSBlocks;
1033 MD->setNumUsedAgprs(
CC, CurrentProgramInfo.
NumAccVGPR);
1049 : CurrentProgramInfo.LDSBlocks;
1059void AMDGPUAsmPrinter::emitPALFunctionMetadata(
const MachineFunction &MF) {
1070 MD->setFunctionLdsSize(
MF, CurrentProgramInfo.
LDSSize);
1136 if (STM.isXNACKEnabled())
1139 Align MaxKernArgAlign;
1158 if (ExtraCode && ExtraCode[0]) {
1159 if (ExtraCode[1] != 0)
1162 switch (ExtraCode[0]) {
1176 }
else if (MO.
isImm()) {
1177 int64_t Val = MO.
getImm();
1180 }
else if (isUInt<16>(Val)) {
1182 }
else if (isUInt<32>(Val)) {
1198void AMDGPUAsmPrinter::emitResourceUsageRemarks(
1204 const char *
Name =
"kernel-resource-usage";
1205 const char *Indent =
" ";
1212 auto EmitResourceUsageRemark = [&](
StringRef RemarkName,
1217 std::string LabelStr = RemarkLabel.str() +
": ";
1218 if (!RemarkName.
equals(
"FunctionName"))
1219 LabelStr = Indent + LabelStr;
1234 EmitResourceUsageRemark(
"FunctionName",
"Function Name",
1236 EmitResourceUsageRemark(
"NumSGPR",
"SGPRs", CurrentProgramInfo.
NumSGPR);
1237 EmitResourceUsageRemark(
"NumVGPR",
"VGPRs", CurrentProgramInfo.
NumArchVGPR);
1239 EmitResourceUsageRemark(
"NumAGPR",
"AGPRs", CurrentProgramInfo.
NumAccVGPR);
1240 EmitResourceUsageRemark(
"ScratchSize",
"ScratchSize [bytes/lane]",
1242 EmitResourceUsageRemark(
"Occupancy",
"Occupancy [waves/SIMD]",
1244 EmitResourceUsageRemark(
"SGPRSpill",
"SGPRs Spill",
1246 EmitResourceUsageRemark(
"VGPRSpill",
"VGPRs Spill",
1248 if (isModuleEntryFunction)
1249 EmitResourceUsageRemark(
"BytesLDS",
"LDS Size [bytes/block]",
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static unsigned getRsrcReg(CallingConv::ID CallConv)
static amd_element_byte_size_t getElementByteSizeValue(unsigned Size)
static uint32_t getFPMode(AMDGPU::SIModeRegisterDefaults Mode)
void LLVM_EXTERNAL_VISIBILITY LLVMInitializeAMDGPUAsmPrinter()
static AsmPrinter * createAMDGPUAsmPrinterPass(TargetMachine &tm, std::unique_ptr< MCStreamer > &&Streamer)
AMDGPU Assembly printer class.
amdgpu Simplify well known AMD library false FunctionCallee Value * Arg
Analyzes how many registers and other resources are used by functions.
AMDHSA kernel descriptor definitions.
#define AMDHSA_BITS_GET(SRC, MSK)
#define AMDHSA_BITS_SET(DST, MSK, VAL)
amd_element_byte_size_t
The values used to define the number of bytes to use for the swizzle element size.
#define AMD_HSA_BITS_SET(dst, mask, val)
@ AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID
@ AMD_CODE_PROPERTY_PRIVATE_ELEMENT_SIZE
@ AMD_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR
@ AMD_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR
@ AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER
@ AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR
@ AMD_CODE_PROPERTY_IS_XNACK_SUPPORTED
@ AMD_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT
@ AMD_CODE_PROPERTY_IS_PTR64
@ AMD_CODE_PROPERTY_IS_DYNAMIC_CALLSTACK
Analysis containing CSE Info
#define LLVM_EXTERNAL_VISIBILITY
AMD GCN specific subclass of TargetSubtarget.
const HexagonInstrInfo * TII
return ToRemove size() > 0
const char LLVMTargetMachineRef TM
R600 Assembly printer class.
#define R_00B028_SPI_SHADER_PGM_RSRC1_PS
#define S_00B84C_EXCP_EN(x)
#define S_0286E8_WAVESIZE_PreGFX11(x)
#define R_0286E8_SPI_TMPRING_SIZE
#define S_00B84C_SCRATCH_EN(x)
#define S_00B84C_TGID_Z_EN(x)
#define S_0286E8_WAVESIZE_GFX11Plus(x)
#define FP_ROUND_MODE_DP(x)
#define S_00B860_WAVESIZE_GFX11Plus(x)
#define FP_ROUND_ROUND_TO_NEAREST
#define G_00B84C_TIDIG_COMP_CNT(x)
#define R_0286D0_SPI_PS_INPUT_ADDR
#define R_00B860_COMPUTE_TMPRING_SIZE
#define R_00B428_SPI_SHADER_PGM_RSRC1_HS
#define G_00B84C_TGID_X_EN(x)
#define R_00B328_SPI_SHADER_PGM_RSRC1_ES
#define S_00B84C_TGID_X_EN(x)
#define G_00B84C_TRAP_HANDLER(x)
#define S_00B028_SGPRS(x)
#define R_00B528_SPI_SHADER_PGM_RSRC1_LS
#define R_0286CC_SPI_PS_INPUT_ENA
#define G_00B84C_TGID_Y_EN(x)
#define S_00B860_WAVESIZE_PreGFX11(x)
#define S_00B84C_TG_SIZE_EN(x)
#define S_00B84C_TIDIG_COMP_CNT(x)
#define S_00B028_VGPRS(x)
#define R_00B128_SPI_SHADER_PGM_RSRC1_VS
#define FP_DENORM_MODE_DP(x)
#define R_00B848_COMPUTE_PGM_RSRC1
#define S_00B84C_LDS_SIZE(x)
#define S_00B84C_USER_SGPR(x)
#define S_00B84C_TRAP_HANDLER(x)
#define G_00B84C_TGID_Z_EN(x)
#define S_00B84C_TGID_Y_EN(x)
#define FP_ROUND_MODE_SP(x)
#define FP_DENORM_MODE_SP(x)
#define R_00B228_SPI_SHADER_PGM_RSRC1_GS
#define S_00B84C_EXCP_EN_MSB(x)
#define G_00B84C_USER_SGPR(x)
#define G_00B84C_SCRATCH_EN(x)
#define S_00B02C_EXTRA_LDS_SIZE(x)
#define R_00B84C_COMPUTE_PGM_RSRC2
#define R_00B02C_SPI_SHADER_PGM_RSRC2_PS
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This header is deprecated in favour of llvm/TargetParser/TargetParser.h.
void emitFunctionEntryLabel() override
EmitFunctionEntryLabel - Emit the label that is the entrypoint for the function.
const MCSubtargetInfo * getGlobalSTI() const
std::vector< std::string > DisasmLines
void emitStartOfAsmFile(Module &M) override
This virtual method can be overridden by targets that want to emit something at the start of their fi...
StringRef getPassName() const override
getPassName - Return a nice clean name for a pass.
std::vector< std::string > HexLines
bool isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB) const override
Return true if the basic block has exactly one predecessor and the control transfer mechanism between...
bool IsTargetStreamerInitialized
void emitGlobalVariable(const GlobalVariable *GV) override
Emit the specified global variable to the .s file.
void getAnalysisUsage(AnalysisUsage &AU) const override
Record analysis usage.
bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, const char *ExtraCode, raw_ostream &O) override
Print the specified operand of MI, an INLINEASM instruction, using the specified assembler variant.
bool runOnMachineFunction(MachineFunction &MF) override
Emit the specified function out to the OutStreamer.
void emitFunctionBodyEnd() override
Targets can override this to emit stuff after the last basic block in the function.
bool doFinalization(Module &M) override
Shut down the asmprinter.
void emitEndOfAsmFile(Module &M) override
This virtual method can be overridden by targets that want to emit something at the end of their file...
AMDGPUAsmPrinter(TargetMachine &TM, std::unique_ptr< MCStreamer > Streamer)
bool doInitialization(Module &M) override
Set up the AsmPrinter when we are working on a new module.
void emitFunctionBodyStart() override
Targets can override this to emit stuff before the first basic block in the function.
void emitBasicBlockStart(const MachineBasicBlock &MBB) override
Targets can override this to emit stuff at the start of a basic block.
AMDGPUTargetStreamer * getTargetStreamer() const
static void printRegOperand(unsigned RegNo, raw_ostream &O, const MCRegisterInfo &MRI)
uint32_t getLDSSize() const
bool isMemoryBound() const
bool needsWaveLimiter() const
bool isEntryFunction() const
bool isModuleEntryFunction() const
unsigned getAddressableLocalMemorySize() const
unsigned getKernArgSegmentSize(const Function &F, Align &MaxAlign) const
unsigned getWavefrontSize() const
AMDGPUPALMetadata * getPALMetadata()
virtual bool EmitISAVersion()
void initializeTargetID(const MCSubtargetInfo &STI)
virtual void EmitAMDKernelCodeT(const amd_kernel_code_t &Header)
virtual bool EmitCodeEnd(const MCSubtargetInfo &STI)
virtual void EmitDirectiveHSACodeObjectVersion(uint32_t Major, uint32_t Minor)
virtual void EmitAMDGPUSymbolType(StringRef SymbolName, unsigned Type)
virtual void EmitAmdhsaKernelDescriptor(const MCSubtargetInfo &STI, StringRef KernelName, const amdhsa::kernel_descriptor_t &KernelDescriptor, uint64_t NextVGPR, uint64_t NextSGPR, bool ReserveVCC, bool ReserveFlatScr, unsigned CodeObjectVersion)
virtual void EmitDirectiveAMDGCNTarget()
virtual void EmitDirectiveHSACodeObjectISAV2(uint32_t Major, uint32_t Minor, uint32_t Stepping, StringRef VendorName, StringRef ArchName)
const std::optional< AMDGPU::IsaInfo::AMDGPUTargetID > & getTargetID() const
void setXnackSetting(TargetIDSetting NewXnackSetting)
Sets xnack setting to NewXnackSetting.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
This class represents an incoming formal argument to a Function.
This class is intended to be used as a driving class for all asm writers.
const TargetLoweringObjectFile & getObjFileLowering() const
Return information about object file lowering.
MCSymbol * getSymbol(const GlobalValue *GV) const
virtual void emitGlobalVariable(const GlobalVariable *GV)
Emit the specified global variable to the .s file.
TargetMachine & TM
Target machine description.
MachineFunction * MF
The current machine function.
virtual void SetupMachineFunction(MachineFunction &MF)
This should be called when a new MachineFunction is being processed from runOnMachineFunction.
void emitFunctionBody()
This method emits the body and trailer for a function.
virtual bool isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB) const
Return true if the basic block has exactly one predecessor and the control transfer mechanism between...
bool doInitialization(Module &M) override
Set up the AsmPrinter when we are working on a new module.
virtual void emitLinkage(const GlobalValue *GV, MCSymbol *GVSym) const
This emits linkage information about GVSym based on GV, if this is supported by the target.
void getAnalysisUsage(AnalysisUsage &AU) const override
Record analysis usage.
unsigned getFunctionNumber() const
Return a unique ID for the current function.
MachineOptimizationRemarkEmitter * ORE
Optimization remark emitter.
MCContext & OutContext
This is the context for the output file that we are streaming.
bool doFinalization(Module &M) override
Shut down the asmprinter.
virtual void emitBasicBlockStart(const MachineBasicBlock &MBB)
Targets can override this to emit stuff at the start of a basic block.
void emitVisibility(MCSymbol *Sym, unsigned Visibility, bool IsDefinition=true) const
This emits visibility information about symbol, if this is supported by the target.
std::unique_ptr< MCStreamer > OutStreamer
This is the MCStreamer object for the file we are generating.
bool isVerbose() const
Return true if assembly output should contain comments.
void getNameWithPrefix(SmallVectorImpl< char > &Name, const GlobalValue *GV) const
virtual void emitFunctionEntryLabel()
EmitFunctionEntryLabel - Emit the label that is the entrypoint for the function.
virtual bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, const char *ExtraCode, raw_ostream &OS)
Print the specified operand of MI, an INLINEASM instruction, using the specified assembler variant.
A parsed version of the target data layout string in and methods for querying it.
Diagnostic information for stack size etc.
DISubprogram * getSubprogram() const
Get the attached subprogram.
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
unsigned getMinNumSGPRs(unsigned WavesPerEU) const
bool hasGFX90AInsts() const
unsigned computeOccupancy(const Function &F, unsigned LDSSize=0, unsigned NumSGPRs=0, unsigned NumVGPRs=0) const
Return occupancy for the given function.
const SIInstrInfo * getInstrInfo() const override
bool hasSGPRInitBug() const
bool isTgSplitEnabled() const
unsigned getMinNumVGPRs(unsigned WavesPerEU) const
bool isCuModeEnabled() const
const AMDGPU::IsaInfo::AMDGPUTargetID & getTargetID() const
bool isTrapHandlerEnabled() const
unsigned getMaxNumUserSGPRs() const
Generation getGeneration() const
unsigned getAddressableNumSGPRs() const
unsigned getMaxWaveScratchSize() const
MaybeAlign getAlign() const
Returns the alignment of the given variable or function.
VisibilityTypes getVisibility() const
bool isDeclaration() const
Return true if the primary definition of this global value is outside of the current translation unit...
unsigned getAddressSpace() const
Module * getParent()
Get the module that this global value is contained inside of...
Type * getValueType() const
const Constant * getInitializer() const
getInitializer - Return the initializer for this global variable.
bool hasInitializer() const
Definitions have initializers, declarations don't.
This is an important class for using LLVM in a threaded context.
void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
const DiagnosticHandler * getDiagHandlerPtr() const
getDiagHandlerPtr - Returns const raw pointer of DiagnosticHandler set by setDiagnosticHandler.
MCCodeEmitter * getEmitterPtr() const
Context object for machine code objects.
void reportError(SMLoc L, const Twine &Msg)
MCContext & getContext() const
This represents a section on linux, lots of unix variants and some bare metal systems.
Generic base class for all target subtargets.
const Triple & getTargetTriple() const
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
bool isDefined() const
isDefined - Check if this symbol is defined (i.e., it has an address).
StringRef getName() const
getName - Get the symbol name.
bool isVariable() const
isVariable - Check if this is a variable symbol.
void redefineIfPossible()
Prepare this symbol to be redefined.
MCStreamer & getStreamer()
int getNumber() const
MachineBasicBlocks are uniquely numbered at the function level, unless they're not in a MachineFuncti...
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
uint64_t getStackSize() const
Return the number of bytes that must be allocated to hold all of the fixed size frame objects.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
void setAlignment(Align A)
setAlignment - Set the alignment of the function.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
const MachineBasicBlock & front() const
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
MachineOperand class - Representation of each machine instruction operand.
bool isReg() const
isReg - Tests if this is a MO_Register operand.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
Register getReg() const
getReg - Returns the register number.
A Module instance is used to store all the information related to an LLVM module.
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
unsigned getNumSpilledVGPRs() const
unsigned getNumSpilledSGPRs() const
bool hasFlatScratchInit() const
unsigned getMaxWavesPerEU() const
bool hasDispatchID() const
bool hasWorkGroupIDZ() const
bool hasWorkGroupIDY() const
bool hasWorkGroupInfo() const
bool hasWorkItemIDY() const
bool hasWorkGroupIDX() const
bool hasDispatchPtr() const
AMDGPU::SIModeRegisterDefaults getMode() const
bool hasPrivateSegmentBuffer() const
unsigned getNumUserSGPRs() const
bool hasKernargSegmentPtr() const
unsigned getPSInputAddr() const
bool hasWorkItemIDZ() const
unsigned getPSInputEnable() const
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
StringRef - Represent a constant reference to a string, i.e.
std::string str() const
str - Get the contents as an std::string.
bool equals(StringRef RHS) const
equals - Check for string equality, this is more efficient than compare() when the relative ordering ...
Primary interface to the complete machine description for the target machine.
const Triple & getTargetTriple() const
const MCSubtargetInfo * getMCSubtargetInfo() const
const STC & getSubtarget(const Function &F) const
This method returns a pointer to the specified type of TargetSubtargetInfo.
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
OSType getOS() const
Get the parsed operating system type of this triple.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
StringRef getName() const
Return a constant reference to the value's name.
This class implements an extremely fast bulk output stream that can only output to a stream.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ LOCAL_ADDRESS
Address space for local memory.
unsigned getNumVGPRBlocks(const MCSubtargetInfo *STI, unsigned NumVGPRs, std::optional< bool > EnableWavefrontSize32)
@ FIXED_NUM_SGPRS_FOR_INIT_BUG
unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed, bool FlatScrUsed, bool XNACKUsed)
unsigned getNumSGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs)
void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header, const MCSubtargetInfo *STI)
unsigned getCodeObjectVersion(const Module &M)
IsaVersion getIsaVersion(StringRef GPU)
bool isCompute(CallingConv::ID cc)
bool isGFX90A(const MCSubtargetInfo &STI)
bool hasMAIInsts(const MCSubtargetInfo &STI)
LLVM_READNONE bool isInlinableIntLiteral(int64_t Literal)
Is this literal inlinable, and not one of the values intended for floating point values.
bool isShader(CallingConv::ID cc)
bool isGFX10Plus(const MCSubtargetInfo &STI)
@ AMDGPU_CS
Used for Mesa/AMDPAL compute shaders.
@ AMDGPU_VS
Used for Mesa vertex shaders, or AMDPAL last shader stage before rasterization (vertex shader if tess...
@ AMDGPU_KERNEL
Used for AMDGPU code object kernels.
@ AMDGPU_HS
Used for Mesa/AMDPAL hull shaders (= tessellation control shaders).
@ AMDGPU_GS
Used for Mesa/AMDPAL geometry shaders.
@ AMDGPU_PS
Used for Mesa/AMDPAL pixel shaders.
@ SPIR_KERNEL
Used for SPIR kernel functions.
@ AMDGPU_ES
Used for AMDPAL shader stage before geometry shader if geometry is in use.
@ AMDGPU_LS
Used for AMDPAL vertex shader if tessellation is in use.
DiagnosticInfoOptimizationBase::Argument NV
This is an optimization pass for GlobalISel generic memory operations.
uint64_t divideCeil(uint64_t Numerator, uint64_t Denominator)
Returns the integer ceil(Numerator / Denominator).
static StringRef getCPU(StringRef CPU)
Processes a CPU name.
AsmPrinter * createR600AsmPrinterPass(TargetMachine &TM, std::unique_ptr< MCStreamer > &&Streamer)
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
format_object< Ts... > format(const char *Fmt, const Ts &... Vals)
These are helper functions used to produce formatted output.
Target & getTheAMDGPUTarget()
The target which supports all AMD GPUs.
Target & getTheGCNTarget()
The target for GCN GPUs.
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
unsigned Log2(Align A)
Returns the log2 of the alignment.
AMD Kernel Code Object (amd_kernel_code_t).
uint16_t workitem_vgpr_count
Number of vector registers used by each work-item.
uint32_t code_properties
Code properties.
uint8_t kernarg_segment_alignment
The maximum byte alignment of variables used by the kernel in the specified memory segment.
uint32_t workgroup_group_segment_byte_size
The amount of group segment memory required by a work-group in bytes.
uint16_t wavefront_sgpr_count
Number of scalar registers used by a wavefront.
uint32_t workitem_private_segment_byte_size
The amount of memory required for the combined private, spill and arg segments for a work-item in byt...
uint64_t kernarg_segment_byte_size
The size in bytes of the kernarg segment that holds the values of the arguments to the kernel.
uint64_t compute_pgm_resource_registers
Shader program settings for CS.
const SIFunctionResourceInfo & getResourceInfo(const Function *F) const
Instruction set architecture version.
This struct is a compact representation of a valid (non-zero power of two) alignment.
virtual bool isAnalysisRemarkEnabled(StringRef PassName) const
Return true if analysis remarks are enabled, override to provide different implementation.
Track resource usage for kernels / entry functions.
uint64_t getPGMRSrc1(CallingConv::ID CC) const
uint32_t NumSGPRsForWavesPerEU
uint32_t NumVGPRsForWavesPerEU
uint64_t ComputePGMRSrc3GFX90A
uint64_t getComputePGMRSrc1() const
Compute the value of the ComputePGMRsrc1 register.
static void RegisterAsmPrinter(Target &T, Target::AsmPrinterCtorTy Fn)
RegisterAsmPrinter - Register an AsmPrinter implementation for the given target.
uint32_t group_segment_fixed_size
uint32_t compute_pgm_rsrc1
uint32_t private_segment_fixed_size
uint32_t compute_pgm_rsrc2
uint16_t kernel_code_properties
uint32_t compute_pgm_rsrc3