67 uint32_t FP32Denormals =
70 uint32_t FP64Denormals =
81 std::unique_ptr<MCStreamer> &&Streamer) {
91 std::unique_ptr<MCStreamer> Streamer)
96 SIProgramInfo KernelInfo;
98 getSIProgramInfo(KernelInfo, *
MF);
99 EmitAmdKernelCodeT(*
MF, KernelInfo);
126 SIProgramInfo KernelInfo;
129 getSIProgramInfo(KernelInfo, MF);
130 EmitProgramInfoSI(MF, KernelInfo);
140 EmitProgramInfoR600(MF);
155 OutStreamer->emitRawComment(
" Kernel info:",
false);
156 OutStreamer->emitRawComment(
" codeLenInByte = " +
Twine(KernelInfo.CodeLen),
166 OutStreamer->emitRawComment(
" ScratchSize: " +
Twine(KernelInfo.ScratchSize),
182 Comment +=
" ; " +
HexLines[i] +
"\n";
192 void AMDGPUAsmPrinter::EmitProgramInfoR600(
const MachineFunction &MF) {
194 bool killPixel =
false;
202 if (
MI.getOpcode() == AMDGPU::KILLGT)
204 unsigned numOperands =
MI.getNumOperands();
205 for (
unsigned op_idx = 0; op_idx < numOperands; op_idx++) {
209 unsigned HWReg = RI->getEncodingValue(MO.
getReg()) & 0xff;
214 MaxGPR = std::max(MaxGPR, HWReg);
252 void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
256 uint64_t CodeSize = 0;
257 unsigned MaxSGPR = 0;
258 unsigned MaxVGPR = 0;
259 bool VCCUsed =
false;
260 bool FlatUsed =
false;
267 CodeSize +=
MI.getDesc().Size;
269 unsigned numOperands =
MI.getNumOperands();
270 for (
unsigned op_idx = 0; op_idx < numOperands; op_idx++) {
278 unsigned reg = MO.
getReg();
279 if (reg == AMDGPU::VCC || reg == AMDGPU::VCC_LO ||
280 reg == AMDGPU::VCC_HI) {
283 }
else if (reg == AMDGPU::FLAT_SCR ||
284 reg == AMDGPU::FLAT_SCR_LO ||
285 reg == AMDGPU::FLAT_SCR_HI) {
298 if (AMDGPU::SReg_32RegClass.
contains(reg)) {
301 }
else if (AMDGPU::VGPR_32RegClass.
contains(reg)) {
304 }
else if (AMDGPU::SReg_64RegClass.
contains(reg)) {
307 }
else if (AMDGPU::VReg_64RegClass.
contains(reg)) {
310 }
else if (AMDGPU::VReg_96RegClass.
contains(reg)) {
313 }
else if (AMDGPU::SReg_128RegClass.
contains(reg)) {
316 }
else if (AMDGPU::VReg_128RegClass.
contains(reg)) {
319 }
else if (AMDGPU::SReg_256RegClass.
contains(reg)) {
322 }
else if (AMDGPU::VReg_256RegClass.
contains(reg)) {
325 }
else if (AMDGPU::SReg_512RegClass.
contains(reg)) {
328 }
else if (AMDGPU::VReg_512RegClass.
contains(reg)) {
334 unsigned hwReg = RI->getEncodingValue(reg) & 0xff;
335 unsigned maxUsed = hwReg + width - 1;
337 MaxSGPR = maxUsed > MaxSGPR ? maxUsed : MaxSGPR;
339 MaxVGPR = maxUsed > MaxVGPR ? maxUsed : MaxVGPR;
353 ProgInfo.NumVGPR = MaxVGPR + 1;
354 ProgInfo.NumSGPR = MaxSGPR + 1;
359 Ctx.
emitError(
"too many SGPRs used with the SGPR init bug");
365 ProgInfo.VGPRBlocks = (ProgInfo.NumVGPR - 1) / 4;
366 ProgInfo.SGPRBlocks = (ProgInfo.NumSGPR - 1) / 8;
372 ProgInfo.IEEEMode = 0;
375 ProgInfo.DX10Clamp = 0;
380 ProgInfo.FlatUsed = FlatUsed;
381 ProgInfo.VCCUsed = VCCUsed;
382 ProgInfo.CodeLen = CodeSize;
384 unsigned LDSAlignShift;
393 unsigned LDSSpillSize = MFI->LDSWaveSpillSize *
394 MFI->getMaximumWorkGroupSize(MF);
396 ProgInfo.LDSSize = MFI->LDSSize + LDSSpillSize;
401 unsigned ScratchAlignShift = 10;
405 ProgInfo.ScratchBlocks =
407 1 << ScratchAlignShift) >> ScratchAlignShift;
409 ProgInfo.ComputePGMRSrc1 =
419 ProgInfo.ComputePGMRSrc2 =
431 switch (ShaderType) {
441 const SIProgramInfo &KernelInfo) {
444 unsigned RsrcReg =
getRsrcReg(MFI->getShaderType());
449 OutStreamer->EmitIntValue(KernelInfo.ComputePGMRSrc1, 4);
452 OutStreamer->EmitIntValue(KernelInfo.ComputePGMRSrc2, 4);
478 const SIProgramInfo &KernelInfo)
const {
485 header.compute_pgm_resource_registers =
486 KernelInfo.ComputePGMRSrc1 |
487 (KernelInfo.ComputePGMRSrc2 << 32);
488 header.code_properties =
493 header.wavefront_sgpr_count = KernelInfo.NumSGPR;
494 header.workitem_vgpr_count = KernelInfo.NumVGPR;
505 if (ExtraCode && ExtraCode[0]) {
506 if (ExtraCode[1] != 0)
509 switch (ExtraCode[0]) {
Interface definition for SIRegisterInfo.
#define S_00B848_VGPRS(x)
AMDGPU specific subclass of TargetSubtarget.
std::unique_ptr< MCStreamer > OutStreamer
This is the MCStreamer object for the file we are generating.
#define FP_DENORM_MODE_SP(x)
#define R_00B228_SPI_SHADER_PGM_RSRC1_GS
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
A Module instance is used to store all the information related to an LLVM module. ...
Target TheGCNTarget
The target for GCN GPUs.
MCContext & OutContext
This is the context for the output file that we are streaming.
#define R_028850_SQ_PGM_RESOURCES_PS
MCSectionELF * getELFSection(StringRef Section, unsigned Type, unsigned Flags)
#define END_OF_TEXT_LABEL_NAME
#define R_028860_SQ_PGM_RESOURCES_VS
#define R_028878_SQ_PGM_RESOURCES_GS
const MachineFunction * MF
The current machine function.
#define R_00B028_SPI_SHADER_PGM_RSRC1_PS
#define S_00B84C_SCRATCH_EN(x)
#define S_00B84C_TG_SIZE_EN(x)
#define S_00B848_DX10_CLAMP(x)
const Function * getFunction() const
getFunction - Return the LLVM function that this machine code represents
Interface definition for R600RegisterInfo.
#define R_0286CC_SPI_PS_INPUT_ENA
#define S_00B028_SGPRS(x)
Target TheAMDGPUTarget
The target which suports all AMD GPUs.
#define FP_DENORM_FLUSH_NONE
bool hasSGPRInitBug() const
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
AMD Kernel Code Object (amd_kernel_code_t).
unsigned getShaderType() const
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
static void printRegOperand(unsigned RegNo, raw_ostream &O, const MCRegisterInfo &MRI)
bool isReg() const
isReg - Tests if this is a MO_Register operand.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted...
AMDGPUAsmPrinter(TargetMachine &TM, std::unique_ptr< MCStreamer > Streamer)
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
#define FP_ROUND_MODE_SP(x)
#define S_00B84C_TGID_Y_EN(x)
Context object for machine code objects.
#define S_00B848_FLOAT_MODE(x)
void emitError(unsigned LocCookie, const Twine &ErrorStr)
emitError - Emit an error message to the currently installed error handler with optional location inf...
#define R_00B848_COMPUTE_PGM_RSRC1
void EmitFunctionBody()
This method emits the body and trailer for a function.
MCContext & getContext() const
Generation getGeneration() const
unsigned estimateStackSize(const MachineFunction &MF) const
Estimate and return the size of the stack frame.
#define S_00B848_IEEE_MODE(x)
#define S_00B028_VGPRS(x)
static uint32_t getFPMode(const MachineFunction &F)
This is an important class for using LLVM in a threaded context.
const MachineOperand & getOperand(unsigned i) const
#define S_00B84C_TIDIG_COMP_CNT(x)
#define FP_ROUND_MODE_DP(x)
virtual bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, unsigned AsmVariant, const char *ExtraCode, raw_ostream &OS)
Print the specified operand of MI, an INLINEASM instruction, using the specified assembler variant...
bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, unsigned AsmVariant, const char *ExtraCode, raw_ostream &O) override
Print the specified operand of MI, an INLINEASM instruction, using the specified assembler variant...
TargetMachine & TM
Target machine description.
This class is intended to be used as a driving class for all asm writers.
void EmitEndOfAsmFile(Module &M) override
This virtual method can be overridden by targets that want to emit something at the end of their file...
#define R_0288D4_SQ_PGM_RESOURCES_LS
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
bool hasFP32Denormals() const
#define R_00B02C_SPI_SHADER_PGM_RSRC2_PS
#define FP_DENORM_FLUSH_IN_FLUSH_OUT
MCSection * getTextSection() const
#define S_00B84C_TGID_Z_EN(x)
#define R_02880C_DB_SHADER_CONTROL
#define S_00B84C_LDS_SIZE(x)
AMDGPU::IsaVersion getIsaVersion() const
virtual void EmitDirectiveHSACodeObjectISA(uint32_t Major, uint32_t Minor, uint32_t Stepping, StringRef VendorName, StringRef ArchName)=0
void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header, const FeatureBitset &Features)
bool hasFP64Denormals() const
#define FP_ROUND_ROUND_TO_NEAREST
void LLVMInitializeAMDGPUAsmPrinter()
virtual const TargetSubtargetInfo * getSubtargetImpl(const Function &) const
Virtual method implemented by subclasses that returns a reference to that target's TargetSubtargetInf...
std::vector< std::string > HexLines
#define R_00B128_SPI_SHADER_PGM_RSRC1_VS
unsigned LDSSize
Number of bytes in the LDS that are being used.
MachineOperand class - Representation of each machine instruction operand.
virtual void EmitAMDKernelCodeT(const amd_kernel_code_t &Header)=0
#define R_028868_SQ_PGM_RESOURCES_VS
#define FP_DENORM_MODE_DP(x)
#define S_0286E8_WAVESIZE(x)
#define S_00B84C_TGID_X_EN(x)
bool runOnMachineFunction(MachineFunction &MF) override
Emit the specified function out to the OutStreamer.
std::vector< std::string > DisasmLines
virtual void EmitDirectiveHSACodeObjectVersion(uint32_t Major, uint32_t Minor)=0
uint64_t RoundUpToAlignment(uint64_t Value, uint64_t Align)
Returns the next integer (mod 2**64) that is greater than or equal to Value and is a multiple of Alig...
static AsmPrinter * createAMDGPUAsmPrinterPass(TargetMachine &tm, std::unique_ptr< MCStreamer > &&Streamer)
Representation of each machine instruction.
void EmitFunctionBodyStart() override
Targets can override this to emit stuff before the first basic block in the function.
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
void SetupMachineFunction(MachineFunction &MF)
This should be called when a new MachineFunction is being processed from runOnMachineFunction.
#define S_02880C_KILL_ENABLE(x)
MCSymbol * getOrCreateSymbol(const Twine &Name)
Lookup the symbol inside with the specified Name.
unsigned ABIArgOffset
Start of implicit kernel args.
#define S_00B848_SGPRS(x)
#define S_00B84C_USER_SGPR(x)
AMDGPU Assembly printer class.
#define R_00B860_COMPUTE_TMPRING_SIZE
MCSectionELF - This represents a section on linux, lots of unix variants and some bare metal systems...
unsigned getWavefrontSize() const
static unsigned getRsrcReg(unsigned ShaderType)
unsigned getReg() const
getReg - Returns the register number.
#define S_00B860_WAVESIZE(x)
const TargetLoweringObjectFile & getObjFileLowering() const
Return information about object file lowering.
#define R_00B84C_COMPUTE_PGM_RSRC2
#define S_00B848_PRIORITY(x)
#define R_0286E8_SPI_TMPRING_SIZE
void setAlignment(unsigned A)
setAlignment - Set the alignment (log2, not bytes) of the function.
#define S_00B02C_EXTRA_LDS_SIZE(x)
This class implements an extremely fast bulk output stream that can only output to a stream...
Primary interface to the complete machine description for the target machine.
static void RegisterAsmPrinter(Target &T, Target::AsmPrinterCtorTy Fn)
RegisterAsmPrinter - Register an AsmPrinter implementation for the given target.
StringRef - Represent a constant reference to a string, i.e.
bool isVGPRSpillingEnabled(const SIMachineFunctionInfo *MFI) const
bool isVerbose() const
Return true if assembly output should contain comments.
#define R_0288E8_SQ_LDS_ALLOC
#define R_028844_SQ_PGM_RESOURCES_PS
const AMDGPURegisterInfo * getRegisterInfo() const override