83 std::unique_ptr<MCStreamer> &&Streamer) {
95 std::unique_ptr<MCStreamer> Streamer)
99 return "AMDGPU Assembly Printer";
142 SIProgramInfo KernelInfo;
144 getSIProgramInfo(KernelInfo, *
MF);
145 EmitAmdKernelCodeT(*
MF, KernelInfo);
152 if (MFI->
isKernel() && STM.isAmdCodeObjectV2(*
MF)) {
187 SIProgramInfo KernelInfo;
189 getSIProgramInfo(KernelInfo, MF);
191 EmitProgramInfoSI(MF, KernelInfo);
194 EmitProgramInfoR600(MF);
209 OutStreamer->emitRawComment(
" Kernel info:",
false);
210 OutStreamer->emitRawComment(
" codeLenInByte = " +
Twine(KernelInfo.CodeLen),
220 OutStreamer->emitRawComment(
" ScratchSize: " +
Twine(KernelInfo.ScratchSize),
223 " bytes/workgroup (compile time only)",
false);
226 Twine(KernelInfo.SGPRBlocks),
false);
228 Twine(KernelInfo.VGPRBlocks),
false);
230 OutStreamer->emitRawComment(
" NumSGPRsForWavesPerEU: " +
231 Twine(KernelInfo.NumSGPRsForWavesPerEU),
false);
232 OutStreamer->emitRawComment(
" NumVGPRsForWavesPerEU: " +
233 Twine(KernelInfo.NumVGPRsForWavesPerEU),
false);
235 OutStreamer->emitRawComment(
" ReservedVGPRFirst: " +
Twine(KernelInfo.ReservedVGPRFirst),
237 OutStreamer->emitRawComment(
" ReservedVGPRCount: " +
Twine(KernelInfo.ReservedVGPRCount),
241 OutStreamer->emitRawComment(
" DebuggerWavefrontPrivateSegmentOffsetSGPR: s" +
242 Twine(KernelInfo.DebuggerWavefrontPrivateSegmentOffsetSGPR),
false);
243 OutStreamer->emitRawComment(
" DebuggerPrivateSegmentBufferSGPR: s" +
244 Twine(KernelInfo.DebuggerPrivateSegmentBufferSGPR),
false);
247 OutStreamer->emitRawComment(
" COMPUTE_PGM_RSRC2:USER_SGPR: " +
250 OutStreamer->emitRawComment(
" COMPUTE_PGM_RSRC2:TGID_X_EN: " +
253 OutStreamer->emitRawComment(
" COMPUTE_PGM_RSRC2:TGID_Y_EN: " +
256 OutStreamer->emitRawComment(
" COMPUTE_PGM_RSRC2:TGID_Z_EN: " +
259 OutStreamer->emitRawComment(
" COMPUTE_PGM_RSRC2:TIDIG_COMP_CNT: " +
287 void AMDGPUAsmPrinter::EmitProgramInfoR600(
const MachineFunction &MF) {
289 bool killPixel =
false;
296 if (
MI.getOpcode() == AMDGPU::KILLGT)
298 unsigned numOperands =
MI.getNumOperands();
299 for (
unsigned op_idx = 0; op_idx < numOperands; op_idx++) {
303 unsigned HWReg = RI->getEncodingValue(MO.
getReg()) & 0xff;
308 MaxGPR = std::max(MaxGPR, HWReg);
316 switch (MF.getFunction()->getCallingConv()) {
325 switch (MF.getFunction()->getCallingConv()) {
346 void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
350 uint64_t CodeSize = 0;
351 unsigned MaxSGPR = 0;
352 unsigned MaxVGPR = 0;
353 bool VCCUsed =
false;
354 bool FlatUsed =
false;
363 if (
MI.isDebugValue())
369 unsigned numOperands =
MI.getNumOperands();
370 for (
unsigned op_idx = 0; op_idx < numOperands; op_idx++) {
378 unsigned reg = MO.
getReg();
381 case AMDGPU::EXEC_LO:
382 case AMDGPU::EXEC_HI:
393 case AMDGPU::FLAT_SCR:
394 case AMDGPU::FLAT_SCR_LO:
395 case AMDGPU::FLAT_SCR_HI:
398 if (MFI->hasFlatScratchInit())
414 if (AMDGPU::SReg_32RegClass.
contains(reg)) {
416 "trap handler registers should not be used");
419 }
else if (AMDGPU::VGPR_32RegClass.
contains(reg)) {
422 }
else if (AMDGPU::SReg_64RegClass.
contains(reg)) {
424 "trap handler registers should not be used");
427 }
else if (AMDGPU::VReg_64RegClass.
contains(reg)) {
430 }
else if (AMDGPU::VReg_96RegClass.
contains(reg)) {
433 }
else if (AMDGPU::SReg_128RegClass.
contains(reg)) {
436 }
else if (AMDGPU::VReg_128RegClass.
contains(reg)) {
439 }
else if (AMDGPU::SReg_256RegClass.
contains(reg)) {
442 }
else if (AMDGPU::VReg_256RegClass.
contains(reg)) {
445 }
else if (AMDGPU::SReg_512RegClass.
contains(reg)) {
448 }
else if (AMDGPU::VReg_512RegClass.
contains(reg)) {
454 unsigned hwReg = RI->getEncodingValue(reg) & 0xff;
455 unsigned maxUsed = hwReg + width - 1;
457 MaxSGPR = maxUsed > MaxSGPR ? maxUsed : MaxSGPR;
459 MaxVGPR = maxUsed > MaxVGPR ? maxUsed : MaxVGPR;
465 unsigned ExtraSGPRs = 0;
491 ProgInfo.DebuggerWavefrontPrivateSegmentOffsetSGPR =
493 ProgInfo.DebuggerPrivateSegmentBufferSGPR =
501 if (MaxSGPR + 1 > MaxAddressableNumSGPRs) {
505 "addressable scalar registers",
509 MaxSGPR = MaxAddressableNumSGPRs - 1;
514 MaxSGPR += ExtraSGPRs;
519 ProgInfo.NumVGPR = MaxVGPR + 1;
520 ProgInfo.NumSGPR = MaxSGPR + 1;
524 ProgInfo.NumSGPRsForWavesPerEU = std::max(
526 ProgInfo.NumVGPRsForWavesPerEU = std::max(
532 if (ProgInfo.NumSGPR > MaxNumSGPRs) {
542 ProgInfo.NumSGPR = MaxNumSGPRs;
543 ProgInfo.NumSGPRsForWavesPerEU = MaxNumSGPRs;
567 ProgInfo.SGPRBlocks =
alignTo(ProgInfo.NumSGPRsForWavesPerEU,
572 ProgInfo.VGPRBlocks =
alignTo(ProgInfo.NumVGPRsForWavesPerEU,
583 ProgInfo.DX10Clamp = 1;
588 ProgInfo.FlatUsed = FlatUsed;
589 ProgInfo.VCCUsed = VCCUsed;
590 ProgInfo.CodeLen = CodeSize;
592 unsigned LDSAlignShift;
601 unsigned LDSSpillSize =
602 MFI->LDSWaveSpillSize * MFI->getMaxFlatWorkGroupSize();
604 ProgInfo.LDSSize = MFI->getLDSSize() + LDSSpillSize;
606 alignTo(ProgInfo.LDSSize, 1ULL << LDSAlignShift) >> LDSAlignShift;
609 unsigned ScratchAlignShift = 10;
613 ProgInfo.ScratchBlocks =
615 1ULL << ScratchAlignShift) >>
618 ProgInfo.ComputePGMRSrc1 =
629 unsigned TIDIGCompCnt = 0;
630 if (MFI->hasWorkItemIDZ())
632 else if (MFI->hasWorkItemIDY())
635 ProgInfo.ComputePGMRSrc2 =
659 const SIProgramInfo &KernelInfo) {
667 OutStreamer->EmitIntValue(KernelInfo.ComputePGMRSrc1, 4);
670 OutStreamer->EmitIntValue(KernelInfo.ComputePGMRSrc2, 4);
693 OutStreamer->EmitIntValue(MFI->getPSInputAddr(), 4);
697 OutStreamer->EmitIntValue(MFI->getNumSpilledSGPRs(), 4);
699 OutStreamer->EmitIntValue(MFI->getNumSpilledVGPRs(), 4);
717 const SIProgramInfo &KernelInfo)
const {
724 header.compute_pgm_resource_registers =
725 KernelInfo.ComputePGMRSrc1 |
726 (KernelInfo.ComputePGMRSrc2 << 32);
735 header.code_properties |=
757 header.code_properties |=
762 header.code_properties |=
767 header.code_properties |=
774 if (STM.debuggerSupported())
777 if (STM.isXNACKEnabled())
781 header.kernarg_segment_byte_size =
783 header.wavefront_sgpr_count = KernelInfo.NumSGPR;
784 header.workitem_vgpr_count = KernelInfo.NumVGPR;
785 header.workitem_private_segment_byte_size = KernelInfo.ScratchSize;
786 header.workgroup_group_segment_byte_size = KernelInfo.LDSSize;
787 header.reserved_vgpr_first = KernelInfo.ReservedVGPRFirst;
788 header.reserved_vgpr_count = KernelInfo.ReservedVGPRCount;
792 header.kernarg_segment_alignment = std::max((
size_t)4,
795 if (STM.debuggerEmitPrologue()) {
796 header.debug_wavefront_private_segment_offset_sgpr =
797 KernelInfo.DebuggerWavefrontPrivateSegmentOffsetSGPR;
798 header.debug_private_segment_buffer_sgpr =
799 KernelInfo.DebuggerPrivateSegmentBufferSGPR;
812 if (ExtraCode && ExtraCode[0]) {
813 if (ExtraCode[1] != 0)
816 switch (ExtraCode[0]) {
virtual void EmitGlobalVariable(const GlobalVariable *GV)
Emit the specified global variable to the .s file.
OSType getOS() const
getOS - Get the parsed operating system type of this triple.
void EmitGlobalVariable(const GlobalVariable *GV) override
Emit the specified global variable to the .s file.
Interface definition for SIRegisterInfo.
StringRef getTargetCPU() const
Target & getTheGCNTarget()
The target for GCN GPUs.
#define S_00B848_VGPRS(x)
bool hasFlatScratchInit() const
AMDGPU specific subclass of TargetSubtarget.
std::unique_ptr< MCStreamer > OutStreamer
This is the MCStreamer object for the file we are generating.
#define FP_DENORM_MODE_SP(x)
#define R_00B228_SPI_SHADER_PGM_RSRC1_GS
#define G_00B84C_USER_SGPR(x)
bool isVGPRSpillingEnabled(const Function &F) const
A Module instance is used to store all the information related to an LLVM module. ...
virtual void EmitRuntimeMetadata(Module &M)=0
#define R_028850_SQ_PGM_RESOURCES_PS
#define R_028860_SQ_PGM_RESOURCES_VS
bool hasDispatchID() const
Target & getTheAMDGPUTarget()
The target which suports all AMD GPUs.
#define G_00B84C_TGID_Z_EN(x)
#define R_028878_SQ_PGM_RESOURCES_GS
Calling convention used for Mesa vertex shaders.
const SIInstrInfo * getInstrInfo() const override
const MachineFunction * MF
The current machine function.
#define R_00B028_SPI_SHADER_PGM_RSRC1_PS
#define S_00B84C_SCRATCH_EN(x)
bool enableIEEEBit(const MachineFunction &MF) const
#define S_00B84C_TG_SIZE_EN(x)
#define S_00B848_DX10_CLAMP(x)
const std::string & str() const
const Function * getFunction() const
getFunction - Return the LLVM function that this machine code represents
uint64_t alignTo(uint64_t Value, uint64_t Align, uint64_t Skew=0)
Returns the next integer (mod 2**64) that is greater than or equal to Value and is a multiple of Alig...
#define G_00B84C_TGID_Y_EN(x)
int getLocalMemorySize() const
Interface definition for R600RegisterInfo.
bool isAmdCodeObjectV2(const MachineFunction &MF) const
#define R_0286CC_SPI_PS_INPUT_ENA
#define S_00B028_SGPRS(x)
virtual void EmitAMDGPUSymbolType(StringRef SymbolName, unsigned Type)=0
return AArch64::GPR64RegClass contains(Reg)
void getNameWithPrefix(SmallVectorImpl< char > &Name, const GlobalValue *GV) const
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
unsigned getHWRegIndex(unsigned Reg) const
const Triple & getTargetTriple() const
uint64_t getStackSize() const
Return the number of bytes that must be allocated to hold all of the fixed size frame objects...
static amd_element_byte_size_t getElementByteSizeValue(unsigned Size)
#define FP_DENORM_FLUSH_NONE
MCSubtargetInfo * createMCSubtargetInfo(StringRef TheTriple, StringRef CPU, StringRef Features) const
createMCSubtargetInfo - Create a MCSubtargetInfo implementation.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
unsigned getInstSizeInBytes(const MachineInstr &MI) const override
const HexagonInstrInfo * TII
unsigned getABIArgOffset() const
AMD Kernel Code Object (amd_kernel_code_t).
static void printRegOperand(unsigned RegNo, raw_ostream &O, const MCRegisterInfo &MRI)
unsigned getLDSSize() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted...
AMDGPUAsmPrinter(TargetMachine &TM, std::unique_ptr< MCStreamer > Streamer)
#define FP_ROUND_MODE_SP(x)
unsigned getVGPRAllocGranule() const
Diagnostic information for stack size etc.
#define S_00B84C_TGID_Y_EN(x)
Context object for machine code objects.
#define S_00B848_FLOAT_MODE(x)
#define R_00B848_COMPUTE_PGM_RSRC1
unsigned getMaxNumUserSGPRs() const
void EmitFunctionBody()
This method emits the body and trailer for a function.
bool isXNACKEnabled() const
bool isGroupSegment(const GlobalValue *GV)
unsigned getMaxKernArgAlign() const
MCContext & getContext() const
Generation getGeneration() const
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
bool hasSGPRInitBug() const
bool isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB) const override
Return true if the basic block has exactly one predecessor and the control transfer mechanism between...
#define S_00B848_IEEE_MODE(x)
#define S_00B028_VGPRS(x)
static uint32_t getFPMode(const MachineFunction &F)
* if(!EatIfPresent(lltok::kw_thread_local)) return false
ParseOptionalThreadLocal := /*empty.
std::size_t countTrailingZeros(T Val, ZeroBehavior ZB=ZB_Width)
Count number of 0's from the least significant bit to the most stopping at the first 1...
unsigned getMaxWavesPerEU() const
IsaVersion getIsaVersion(const FeatureBitset &Features)
This is an important class for using LLVM in a threaded context.
#define G_00B84C_TGID_X_EN(x)
const MachineOperand & getOperand(unsigned i) const
#define S_00B84C_TIDIG_COMP_CNT(x)
#define FP_ROUND_MODE_DP(x)
virtual bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, unsigned AsmVariant, const char *ExtraCode, raw_ostream &OS)
Print the specified operand of MI, an INLINEASM instruction, using the specified assembler variant...
bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, unsigned AsmVariant, const char *ExtraCode, raw_ostream &O) override
Print the specified operand of MI, an INLINEASM instruction, using the specified assembler variant...
TargetMachine & TM
Target machine description.
This class is intended to be used as a driving class for all asm writers.
bool isCompute(CallingConv::ID cc)
bool hasDispatchPtr() const
bool hasKernargSegmentPtr() const
#define R_0288D4_SQ_PGM_RESOURCES_LS
#define S_00B84C_EXCP_EN_MSB(x)
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
bool hasFP32Denormals() const
#define R_00B02C_SPI_SHADER_PGM_RSRC2_PS
#define FP_DENORM_FLUSH_IN_FLUSH_OUT
bool debuggerReserveRegs() const
virtual bool isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB) const
Return true if the basic block has exactly one predecessor and the control transfer mechanism between...
#define S_00B84C_TGID_Z_EN(x)
#define R_02880C_DB_SHADER_CONTROL
#define S_00B84C_LDS_SIZE(x)
virtual void EmitDirectiveHSACodeObjectISA(uint32_t Major, uint32_t Minor, uint32_t Stepping, StringRef VendorName, StringRef ArchName)=0
Calling convention used for Mesa pixel shaders.
void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header, const FeatureBitset &Features)
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
bool hasFP64Denormals() const
amd_element_byte_size_t
The values used to define the number of bytes to use for the swizzle element size.
unsigned getSGPRAllocGranule() const
#define FP_ROUND_ROUND_TO_NEAREST
bool hasGridWorkgroupCountX() const
#define S_00B84C_EXCP_EN(x)
void LLVMInitializeAMDGPUAsmPrinter()
virtual const TargetSubtargetInfo * getSubtargetImpl(const Function &) const
Virtual method implemented by subclasses that returns a reference to that target's TargetSubtargetInf...
const R600RegisterInfo * getRegisterInfo() const override
unsigned getMinNumVGPRs(unsigned WavesPerEU) const
#define G_00B84C_TIDIG_COMP_CNT(x)
std::vector< std::string > HexLines
unsigned getNumDebuggerReservedVGPRs(const SISubtarget &ST) const
#define R_00B128_SPI_SHADER_PGM_RSRC1_VS
Calling convention used for Mesa geometry shaders.
StringRef getTargetFeatureString() const
const SIRegisterInfo * getRegisterInfo() const override
MachineOperand class - Representation of each machine instruction operand.
virtual void EmitAMDKernelCodeT(const amd_kernel_code_t &Header)=0
#define S_00B848_DEBUG_MODE(x)
#define R_028868_SQ_PGM_RESOURCES_VS
#define FP_DENORM_MODE_DP(x)
#define S_0286E8_WAVESIZE(x)
#define S_00B84C_TGID_X_EN(x)
bool runOnMachineFunction(MachineFunction &MF) override
Emit the specified function out to the OutStreamer.
#define AMD_HSA_BITS_SET(dst, mask, val)
std::vector< std::string > DisasmLines
virtual void EmitDirectiveHSACodeObjectVersion(uint32_t Major, uint32_t Minor)=0
void EmitFunctionEntryLabel() override
EmitFunctionEntryLabel - Emit the label that is the entrypoint for the function.
static AsmPrinter * createAMDGPUAsmPrinterPass(TargetMachine &tm, std::unique_ptr< MCStreamer > &&Streamer)
Representation of each machine instruction.
void EmitFunctionBodyStart() override
Targets can override this to emit stuff before the first basic block in the function.
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
Interface definition for SIInstrInfo.
void SetupMachineFunction(MachineFunction &MF)
This should be called when a new MachineFunction is being processed from runOnMachineFunction.
#define S_02880C_KILL_ENABLE(x)
#define S_00B848_SGPRS(x)
#define S_00B84C_USER_SGPR(x)
AMDGPU Assembly printer class.
#define R_00B860_COMPUTE_TMPRING_SIZE
bool hasGridWorkgroupCountZ() const
This represents a section on linux, lots of unix variants and some bare metal systems.
unsigned getWavefrontSize() const
void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
virtual void EmitFunctionEntryLabel()
EmitFunctionEntryLabel - Emit the label that is the entrypoint for the function.
unsigned getMinNumSGPRs(const SISubtarget &ST, unsigned WavesPerEU) const
Calling convention used for Mesa compute shaders.
static unsigned getRsrcReg(CallingConv::ID CallConv)
unsigned getReg() const
getReg - Returns the register number.
#define S_00B860_WAVESIZE(x)
const TargetLoweringObjectFile & getObjFileLowering() const
Return information about object file lowering.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
#define R_00B84C_COMPUTE_PGM_RSRC2
#define S_00B848_PRIORITY(x)
StringRef getPassName() const override
getPassName - Return a nice clean name for a pass.
void EmitStartOfAsmFile(Module &M) override
This virtual method can be overridden by targets that want to emit something at the start of their fi...
#define R_0286E8_SPI_TMPRING_SIZE
#define LLVM_FALLTHROUGH
LLVM_FALLTHROUGH - Mark fallthrough cases in switch statements.
void setAlignment(unsigned A)
setAlignment - Set the alignment (log2, not bytes) of the function.
#define S_00B02C_EXTRA_LDS_SIZE(x)
This class implements an extremely fast bulk output stream that can only output to a stream...
const Target & getTarget() const
Primary interface to the complete machine description for the target machine.
static void RegisterAsmPrinter(Target &T, Target::AsmPrinterCtorTy Fn)
RegisterAsmPrinter - Register an AsmPrinter implementation for the given target.
StringRef - Represent a constant reference to a string, i.e.
#define R_0286D0_SPI_PS_INPUT_ADDR
bool isVerbose() const
Return true if assembly output should contain comments.
unsigned getMaxNumSGPRs() const
bool debuggerEmitPrologue() const
bool hasGridWorkgroupCountY() const
#define R_0288E8_SQ_LDS_ALLOC
#define R_028844_SQ_PGM_RESOURCES_PS
bool hasPrivateSegmentBuffer() const