LLVM  3.7.0
AMDGPUAsmPrinter.cpp
Go to the documentation of this file.
1 //===-- AMDGPUAsmPrinter.cpp - AMDGPU Assebly printer --------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 /// \file
11 ///
12 /// The AMDGPUAsmPrinter is used to print both assembly string and also binary
13 /// code. When passed an MCAsmStreamer it prints assembly and when passed
14 /// an MCObjectStreamer it outputs binary code.
15 //
16 //===----------------------------------------------------------------------===//
17 //
18 
19 #include "AMDGPUAsmPrinter.h"
22 #include "Utils/AMDGPUBaseInfo.h"
23 #include "AMDGPU.h"
24 #include "AMDKernelCodeT.h"
25 #include "AMDGPUSubtarget.h"
26 #include "R600Defines.h"
28 #include "R600RegisterInfo.h"
29 #include "SIDefines.h"
30 #include "SIMachineFunctionInfo.h"
31 #include "SIRegisterInfo.h"
33 #include "llvm/MC/MCContext.h"
34 #include "llvm/MC/MCSectionELF.h"
35 #include "llvm/MC/MCStreamer.h"
36 #include "llvm/Support/ELF.h"
40 
41 using namespace llvm;
42 
43 // TODO: This should get the default rounding mode from the kernel. We just set
44 // the default here, but this could change if the OpenCL rounding mode pragmas
45 // are used.
46 //
47 // The denormal mode here should match what is reported by the OpenCL runtime
48 // for the CL_FP_DENORM bit from CL_DEVICE_{HALF|SINGLE|DOUBLE}_FP_CONFIG, but
49 // can also be override to flush with the -cl-denorms-are-zero compiler flag.
50 //
51 // AMD OpenCL only sets flush none and reports CL_FP_DENORM for double
52 // precision, and leaves single precision to flush all and does not report
53 // CL_FP_DENORM for CL_DEVICE_SINGLE_FP_CONFIG. Mesa's OpenCL currently reports
54 // CL_FP_DENORM for both.
55 //
56 // FIXME: It seems some instructions do not support single precision denormals
57 // regardless of the mode (exp_*_f32, rcp_*_f32, rsq_*_f32, rsq_*f32, sqrt_f32,
58 // and sin_f32, cos_f32 on most parts).
59 
60 // We want to use these instructions, and using fp32 denormals also causes
61 // instructions to run at the double precision rate for the device so it's
62 // probably best to just report no single precision denormals.
63 static uint32_t getFPMode(const MachineFunction &F) {
65  // TODO: Is there any real use for the flush in only / flush out only modes?
66 
67  uint32_t FP32Denormals =
69 
70  uint32_t FP64Denormals =
72 
75  FP_DENORM_MODE_SP(FP32Denormals) |
76  FP_DENORM_MODE_DP(FP64Denormals);
77 }
78 
79 static AsmPrinter *
81  std::unique_ptr<MCStreamer> &&Streamer) {
82  return new AMDGPUAsmPrinter(tm, std::move(Streamer));
83 }
84 
85 extern "C" void LLVMInitializeAMDGPUAsmPrinter() {
88 }
89 
91  std::unique_ptr<MCStreamer> Streamer)
92  : AsmPrinter(TM, std::move(Streamer)) {}
93 
96  SIProgramInfo KernelInfo;
97  if (STM.isAmdHsaOS()) {
98  getSIProgramInfo(KernelInfo, *MF);
99  EmitAmdKernelCodeT(*MF, KernelInfo);
100  }
101 }
102 
104 
105  // This label is used to mark the end of the .text section.
107  OutStreamer->SwitchSection(TLOF.getTextSection());
108  MCSymbol *EndOfTextLabel =
110  OutStreamer->EmitLabel(EndOfTextLabel);
111 }
112 
114 
115  // The starting address of all shader programs must be 256 bytes aligned.
116  MF.setAlignment(8);
117 
119 
120  MCContext &Context = getObjFileLowering().getContext();
121  MCSectionELF *ConfigSection =
122  Context.getELFSection(".AMDGPU.config", ELF::SHT_PROGBITS, 0);
123  OutStreamer->SwitchSection(ConfigSection);
124 
125  const AMDGPUSubtarget &STM = MF.getSubtarget<AMDGPUSubtarget>();
126  SIProgramInfo KernelInfo;
128  if (!STM.isAmdHsaOS()) {
129  getSIProgramInfo(KernelInfo, MF);
130  EmitProgramInfoSI(MF, KernelInfo);
131  }
132  // Emit directives
134  static_cast<AMDGPUTargetStreamer *>(OutStreamer->getTargetStreamer());
136  AMDGPU::IsaVersion ISA = STM.getIsaVersion();
138  "AMD", "AMDGPU");
139  } else {
140  EmitProgramInfoR600(MF);
141  }
142 
143  DisasmLines.clear();
144  HexLines.clear();
145  DisasmLineMaxLen = 0;
146 
148 
149  if (isVerbose()) {
150  MCSectionELF *CommentSection =
151  Context.getELFSection(".AMDGPU.csdata", ELF::SHT_PROGBITS, 0);
152  OutStreamer->SwitchSection(CommentSection);
153 
155  OutStreamer->emitRawComment(" Kernel info:", false);
156  OutStreamer->emitRawComment(" codeLenInByte = " + Twine(KernelInfo.CodeLen),
157  false);
158  OutStreamer->emitRawComment(" NumSgprs: " + Twine(KernelInfo.NumSGPR),
159  false);
160  OutStreamer->emitRawComment(" NumVgprs: " + Twine(KernelInfo.NumVGPR),
161  false);
162  OutStreamer->emitRawComment(" FloatMode: " + Twine(KernelInfo.FloatMode),
163  false);
164  OutStreamer->emitRawComment(" IeeeMode: " + Twine(KernelInfo.IEEEMode),
165  false);
166  OutStreamer->emitRawComment(" ScratchSize: " + Twine(KernelInfo.ScratchSize),
167  false);
168  } else {
170  OutStreamer->emitRawComment(
171  Twine("SQ_PGM_RESOURCES:STACK_SIZE = " + Twine(MFI->StackSize)));
172  }
173  }
174 
175  if (STM.dumpCode()) {
176 
177  OutStreamer->SwitchSection(
178  Context.getELFSection(".AMDGPU.disasm", ELF::SHT_NOTE, 0));
179 
180  for (size_t i = 0; i < DisasmLines.size(); ++i) {
181  std::string Comment(DisasmLineMaxLen - DisasmLines[i].size(), ' ');
182  Comment += " ; " + HexLines[i] + "\n";
183 
184  OutStreamer->EmitBytes(StringRef(DisasmLines[i]));
185  OutStreamer->EmitBytes(StringRef(Comment));
186  }
187  }
188 
189  return false;
190 }
191 
192 void AMDGPUAsmPrinter::EmitProgramInfoR600(const MachineFunction &MF) {
193  unsigned MaxGPR = 0;
194  bool killPixel = false;
195  const AMDGPUSubtarget &STM = MF.getSubtarget<AMDGPUSubtarget>();
196  const R600RegisterInfo *RI =
197  static_cast<const R600RegisterInfo *>(STM.getRegisterInfo());
199 
200  for (const MachineBasicBlock &MBB : MF) {
201  for (const MachineInstr &MI : MBB) {
202  if (MI.getOpcode() == AMDGPU::KILLGT)
203  killPixel = true;
204  unsigned numOperands = MI.getNumOperands();
205  for (unsigned op_idx = 0; op_idx < numOperands; op_idx++) {
206  const MachineOperand &MO = MI.getOperand(op_idx);
207  if (!MO.isReg())
208  continue;
209  unsigned HWReg = RI->getEncodingValue(MO.getReg()) & 0xff;
210 
211  // Register with value > 127 aren't GPR
212  if (HWReg > 127)
213  continue;
214  MaxGPR = std::max(MaxGPR, HWReg);
215  }
216  }
217  }
218 
219  unsigned RsrcReg;
221  // Evergreen / Northern Islands
222  switch (MFI->getShaderType()) {
223  default: // Fall through
224  case ShaderType::COMPUTE: RsrcReg = R_0288D4_SQ_PGM_RESOURCES_LS; break;
225  case ShaderType::GEOMETRY: RsrcReg = R_028878_SQ_PGM_RESOURCES_GS; break;
226  case ShaderType::PIXEL: RsrcReg = R_028844_SQ_PGM_RESOURCES_PS; break;
227  case ShaderType::VERTEX: RsrcReg = R_028860_SQ_PGM_RESOURCES_VS; break;
228  }
229  } else {
230  // R600 / R700
231  switch (MFI->getShaderType()) {
232  default: // Fall through
233  case ShaderType::GEOMETRY: // Fall through
234  case ShaderType::COMPUTE: // Fall through
235  case ShaderType::VERTEX: RsrcReg = R_028868_SQ_PGM_RESOURCES_VS; break;
236  case ShaderType::PIXEL: RsrcReg = R_028850_SQ_PGM_RESOURCES_PS; break;
237  }
238  }
239 
240  OutStreamer->EmitIntValue(RsrcReg, 4);
241  OutStreamer->EmitIntValue(S_NUM_GPRS(MaxGPR + 1) |
242  S_STACK_SIZE(MFI->StackSize), 4);
243  OutStreamer->EmitIntValue(R_02880C_DB_SHADER_CONTROL, 4);
244  OutStreamer->EmitIntValue(S_02880C_KILL_ENABLE(killPixel), 4);
245 
246  if (MFI->getShaderType() == ShaderType::COMPUTE) {
247  OutStreamer->EmitIntValue(R_0288E8_SQ_LDS_ALLOC, 4);
248  OutStreamer->EmitIntValue(RoundUpToAlignment(MFI->LDSSize, 4) >> 2, 4);
249  }
250 }
251 
252 void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
253  const MachineFunction &MF) const {
254  const AMDGPUSubtarget &STM = MF.getSubtarget<AMDGPUSubtarget>();
256  uint64_t CodeSize = 0;
257  unsigned MaxSGPR = 0;
258  unsigned MaxVGPR = 0;
259  bool VCCUsed = false;
260  bool FlatUsed = false;
261  const SIRegisterInfo *RI =
262  static_cast<const SIRegisterInfo *>(STM.getRegisterInfo());
263 
264  for (const MachineBasicBlock &MBB : MF) {
265  for (const MachineInstr &MI : MBB) {
266  // TODO: CodeSize should account for multiple functions.
267  CodeSize += MI.getDesc().Size;
268 
269  unsigned numOperands = MI.getNumOperands();
270  for (unsigned op_idx = 0; op_idx < numOperands; op_idx++) {
271  const MachineOperand &MO = MI.getOperand(op_idx);
272  unsigned width = 0;
273  bool isSGPR = false;
274 
275  if (!MO.isReg()) {
276  continue;
277  }
278  unsigned reg = MO.getReg();
279  if (reg == AMDGPU::VCC || reg == AMDGPU::VCC_LO ||
280  reg == AMDGPU::VCC_HI) {
281  VCCUsed = true;
282  continue;
283  } else if (reg == AMDGPU::FLAT_SCR ||
284  reg == AMDGPU::FLAT_SCR_LO ||
285  reg == AMDGPU::FLAT_SCR_HI) {
286  FlatUsed = true;
287  continue;
288  }
289 
290  switch (reg) {
291  default: break;
292  case AMDGPU::SCC:
293  case AMDGPU::EXEC:
294  case AMDGPU::M0:
295  continue;
296  }
297 
298  if (AMDGPU::SReg_32RegClass.contains(reg)) {
299  isSGPR = true;
300  width = 1;
301  } else if (AMDGPU::VGPR_32RegClass.contains(reg)) {
302  isSGPR = false;
303  width = 1;
304  } else if (AMDGPU::SReg_64RegClass.contains(reg)) {
305  isSGPR = true;
306  width = 2;
307  } else if (AMDGPU::VReg_64RegClass.contains(reg)) {
308  isSGPR = false;
309  width = 2;
310  } else if (AMDGPU::VReg_96RegClass.contains(reg)) {
311  isSGPR = false;
312  width = 3;
313  } else if (AMDGPU::SReg_128RegClass.contains(reg)) {
314  isSGPR = true;
315  width = 4;
316  } else if (AMDGPU::VReg_128RegClass.contains(reg)) {
317  isSGPR = false;
318  width = 4;
319  } else if (AMDGPU::SReg_256RegClass.contains(reg)) {
320  isSGPR = true;
321  width = 8;
322  } else if (AMDGPU::VReg_256RegClass.contains(reg)) {
323  isSGPR = false;
324  width = 8;
325  } else if (AMDGPU::SReg_512RegClass.contains(reg)) {
326  isSGPR = true;
327  width = 16;
328  } else if (AMDGPU::VReg_512RegClass.contains(reg)) {
329  isSGPR = false;
330  width = 16;
331  } else {
332  llvm_unreachable("Unknown register class");
333  }
334  unsigned hwReg = RI->getEncodingValue(reg) & 0xff;
335  unsigned maxUsed = hwReg + width - 1;
336  if (isSGPR) {
337  MaxSGPR = maxUsed > MaxSGPR ? maxUsed : MaxSGPR;
338  } else {
339  MaxVGPR = maxUsed > MaxVGPR ? maxUsed : MaxVGPR;
340  }
341  }
342  }
343  }
344 
345  if (VCCUsed)
346  MaxSGPR += 2;
347 
348  if (FlatUsed)
349  MaxSGPR += 2;
350 
351  // We found the maximum register index. They start at 0, so add one to get the
352  // number of registers.
353  ProgInfo.NumVGPR = MaxVGPR + 1;
354  ProgInfo.NumSGPR = MaxSGPR + 1;
355 
356  if (STM.hasSGPRInitBug()) {
357  if (ProgInfo.NumSGPR > AMDGPUSubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG) {
358  LLVMContext &Ctx = MF.getFunction()->getContext();
359  Ctx.emitError("too many SGPRs used with the SGPR init bug");
360  }
361 
363  }
364 
365  ProgInfo.VGPRBlocks = (ProgInfo.NumVGPR - 1) / 4;
366  ProgInfo.SGPRBlocks = (ProgInfo.NumSGPR - 1) / 8;
367  // Set the value to initialize FP_ROUND and FP_DENORM parts of the mode
368  // register.
369  ProgInfo.FloatMode = getFPMode(MF);
370 
371  // XXX: Not quite sure what this does, but sc seems to unset this.
372  ProgInfo.IEEEMode = 0;
373 
374  // Do not clamp NAN to 0.
375  ProgInfo.DX10Clamp = 0;
376 
377  const MachineFrameInfo *FrameInfo = MF.getFrameInfo();
378  ProgInfo.ScratchSize = FrameInfo->estimateStackSize(MF);
379 
380  ProgInfo.FlatUsed = FlatUsed;
381  ProgInfo.VCCUsed = VCCUsed;
382  ProgInfo.CodeLen = CodeSize;
383 
384  unsigned LDSAlignShift;
386  // LDS is allocated in 64 dword blocks.
387  LDSAlignShift = 8;
388  } else {
389  // LDS is allocated in 128 dword blocks.
390  LDSAlignShift = 9;
391  }
392 
393  unsigned LDSSpillSize = MFI->LDSWaveSpillSize *
394  MFI->getMaximumWorkGroupSize(MF);
395 
396  ProgInfo.LDSSize = MFI->LDSSize + LDSSpillSize;
397  ProgInfo.LDSBlocks =
398  RoundUpToAlignment(ProgInfo.LDSSize, 1 << LDSAlignShift) >> LDSAlignShift;
399 
400  // Scratch is allocated in 256 dword blocks.
401  unsigned ScratchAlignShift = 10;
402  // We need to program the hardware with the amount of scratch memory that
403  // is used by the entire wave. ProgInfo.ScratchSize is the amount of
404  // scratch memory used per thread.
405  ProgInfo.ScratchBlocks =
406  RoundUpToAlignment(ProgInfo.ScratchSize * STM.getWavefrontSize(),
407  1 << ScratchAlignShift) >> ScratchAlignShift;
408 
409  ProgInfo.ComputePGMRSrc1 =
410  S_00B848_VGPRS(ProgInfo.VGPRBlocks) |
411  S_00B848_SGPRS(ProgInfo.SGPRBlocks) |
412  S_00B848_PRIORITY(ProgInfo.Priority) |
413  S_00B848_FLOAT_MODE(ProgInfo.FloatMode) |
414  S_00B848_PRIV(ProgInfo.Priv) |
415  S_00B848_DX10_CLAMP(ProgInfo.DX10Clamp) |
416  S_00B848_IEEE_MODE(ProgInfo.DebugMode) |
417  S_00B848_IEEE_MODE(ProgInfo.IEEEMode);
418 
419  ProgInfo.ComputePGMRSrc2 =
420  S_00B84C_SCRATCH_EN(ProgInfo.ScratchBlocks > 0) |
421  S_00B84C_USER_SGPR(MFI->NumUserSGPRs) |
422  S_00B84C_TGID_X_EN(1) |
423  S_00B84C_TGID_Y_EN(1) |
424  S_00B84C_TGID_Z_EN(1) |
427  S_00B84C_LDS_SIZE(ProgInfo.LDSBlocks);
428 }
429 
430 static unsigned getRsrcReg(unsigned ShaderType) {
431  switch (ShaderType) {
432  default: // Fall through
437  }
438 }
439 
440 void AMDGPUAsmPrinter::EmitProgramInfoSI(const MachineFunction &MF,
441  const SIProgramInfo &KernelInfo) {
442  const AMDGPUSubtarget &STM = MF.getSubtarget<AMDGPUSubtarget>();
444  unsigned RsrcReg = getRsrcReg(MFI->getShaderType());
445 
446  if (MFI->getShaderType() == ShaderType::COMPUTE) {
447  OutStreamer->EmitIntValue(R_00B848_COMPUTE_PGM_RSRC1, 4);
448 
449  OutStreamer->EmitIntValue(KernelInfo.ComputePGMRSrc1, 4);
450 
451  OutStreamer->EmitIntValue(R_00B84C_COMPUTE_PGM_RSRC2, 4);
452  OutStreamer->EmitIntValue(KernelInfo.ComputePGMRSrc2, 4);
453 
454  OutStreamer->EmitIntValue(R_00B860_COMPUTE_TMPRING_SIZE, 4);
455  OutStreamer->EmitIntValue(S_00B860_WAVESIZE(KernelInfo.ScratchBlocks), 4);
456 
457  // TODO: Should probably note flat usage somewhere. SC emits a "FlatPtr32 =
458  // 0" comment but I don't see a corresponding field in the register spec.
459  } else {
460  OutStreamer->EmitIntValue(RsrcReg, 4);
461  OutStreamer->EmitIntValue(S_00B028_VGPRS(KernelInfo.VGPRBlocks) |
462  S_00B028_SGPRS(KernelInfo.SGPRBlocks), 4);
463  if (STM.isVGPRSpillingEnabled(MFI)) {
464  OutStreamer->EmitIntValue(R_0286E8_SPI_TMPRING_SIZE, 4);
465  OutStreamer->EmitIntValue(S_0286E8_WAVESIZE(KernelInfo.ScratchBlocks), 4);
466  }
467  }
468 
469  if (MFI->getShaderType() == ShaderType::PIXEL) {
471  OutStreamer->EmitIntValue(S_00B02C_EXTRA_LDS_SIZE(KernelInfo.LDSBlocks), 4);
472  OutStreamer->EmitIntValue(R_0286CC_SPI_PS_INPUT_ENA, 4);
473  OutStreamer->EmitIntValue(MFI->PSInputAddr, 4);
474  }
475 }
476 
477 void AMDGPUAsmPrinter::EmitAmdKernelCodeT(const MachineFunction &MF,
478  const SIProgramInfo &KernelInfo) const {
480  const AMDGPUSubtarget &STM = MF.getSubtarget<AMDGPUSubtarget>();
481  amd_kernel_code_t header;
482 
483  AMDGPU::initDefaultAMDKernelCodeT(header, STM.getFeatureBits());
484 
485  header.compute_pgm_resource_registers =
486  KernelInfo.ComputePGMRSrc1 |
487  (KernelInfo.ComputePGMRSrc2 << 32);
488  header.code_properties =
491 
492  header.kernarg_segment_byte_size = MFI->ABIArgOffset;
493  header.wavefront_sgpr_count = KernelInfo.NumSGPR;
494  header.workitem_vgpr_count = KernelInfo.NumVGPR;
495 
496 
498  static_cast<AMDGPUTargetStreamer *>(OutStreamer->getTargetStreamer());
499  TS->EmitAMDKernelCodeT(header);
500 }
501 
503  unsigned AsmVariant,
504  const char *ExtraCode, raw_ostream &O) {
505  if (ExtraCode && ExtraCode[0]) {
506  if (ExtraCode[1] != 0)
507  return true; // Unknown modifier.
508 
509  switch (ExtraCode[0]) {
510  default:
511  // See if this is a generic print operand
512  return AsmPrinter::PrintAsmOperand(MI, OpNo, AsmVariant, ExtraCode, O);
513  case 'r':
514  break;
515  }
516  }
517 
519  *TM.getSubtargetImpl(*MF->getFunction())->getRegisterInfo());
520  return false;
521 }
Interface definition for SIRegisterInfo.
#define S_00B848_VGPRS(x)
Definition: SIDefines.h:142
#define S_00B848_PRIV(x)
Definition: SIDefines.h:154
AMDGPU specific subclass of TargetSubtarget.
std::unique_ptr< MCStreamer > OutStreamer
This is the MCStreamer object for the file we are generating.
Definition: AsmPrinter.h:83
#define FP_DENORM_MODE_SP(x)
Definition: SIDefines.h:187
#define R_00B228_SPI_SHADER_PGM_RSRC1_GS
Definition: SIDefines.h:99
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
Definition: MCSymbol.h:39
A Module instance is used to store all the information related to an LLVM module. ...
Definition: Module.h:114
Target TheGCNTarget
The target for GCN GPUs.
MCContext & OutContext
This is the context for the output file that we are streaming.
Definition: AsmPrinter.h:78
#define R_028850_SQ_PGM_RESOURCES_PS
Definition: R600Defines.h:157
MCSectionELF * getELFSection(StringRef Section, unsigned Type, unsigned Flags)
Definition: MCContext.h:311
#define END_OF_TEXT_LABEL_NAME
Definition: AMDGPU.h:88
#define R_028860_SQ_PGM_RESOURCES_VS
Definition: R600Defines.h:165
#define R_028878_SQ_PGM_RESOURCES_GS
Definition: R600Defines.h:166
const MachineFunction * MF
The current machine function.
Definition: AsmPrinter.h:86
#define R_00B028_SPI_SHADER_PGM_RSRC1_PS
Definition: SIDefines.h:95
#define S_00B84C_SCRATCH_EN(x)
Definition: SIDefines.h:105
#define S_00B84C_TG_SIZE_EN(x)
Definition: SIDefines.h:120
#define S_00B848_DX10_CLAMP(x)
Definition: SIDefines.h:157
F(f)
const Function * getFunction() const
getFunction - Return the LLVM function that this machine code represents
Interface definition for R600RegisterInfo.
#define R_0286CC_SPI_PS_INPUT_ENA
Definition: SIDefines.h:138
#define S_00B028_SGPRS(x)
Definition: SIDefines.h:102
Target TheAMDGPUTarget
The target which suports all AMD GPUs.
#define S_NUM_GPRS(x)
Definition: R600Defines.h:151
#define FP_DENORM_FLUSH_NONE
Definition: SIDefines.h:182
bool hasSGPRInitBug() const
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:79
AMD Kernel Code Object (amd_kernel_code_t).
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:98
static void printRegOperand(unsigned RegNo, raw_ostream &O, const MCRegisterInfo &MRI)
bool isReg() const
isReg - Tests if this is a MO_Register operand.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted...
AMDGPUAsmPrinter(TargetMachine &TM, std::unique_ptr< MCStreamer > Streamer)
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition: Value.cpp:317
#define FP_ROUND_MODE_SP(x)
Definition: SIDefines.h:176
#define S_00B84C_TGID_Y_EN(x)
Definition: SIDefines.h:114
Context object for machine code objects.
Definition: MCContext.h:48
#define S_00B848_FLOAT_MODE(x)
Definition: SIDefines.h:151
void emitError(unsigned LocCookie, const Twine &ErrorStr)
emitError - Emit an error message to the currently installed error handler with optional location inf...
#define R_00B848_COMPUTE_PGM_RSRC1
Definition: SIDefines.h:141
void EmitFunctionBody()
This method emits the body and trailer for a function.
Definition: AsmPrinter.cpp:784
Generation getGeneration() const
unsigned estimateStackSize(const MachineFunction &MF) const
Estimate and return the size of the stack frame.
#define S_00B848_IEEE_MODE(x)
Definition: SIDefines.h:163
#define S_00B028_VGPRS(x)
Definition: SIDefines.h:101
static uint32_t getFPMode(const MachineFunction &F)
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:41
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:273
#define S_00B84C_TIDIG_COMP_CNT(x)
Definition: SIDefines.h:123
#define FP_ROUND_MODE_DP(x)
Definition: SIDefines.h:177
virtual bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, unsigned AsmVariant, const char *ExtraCode, raw_ostream &OS)
Print the specified operand of MI, an INLINEASM instruction, using the specified assembler variant...
bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, unsigned AsmVariant, const char *ExtraCode, raw_ostream &O) override
Print the specified operand of MI, an INLINEASM instruction, using the specified assembler variant...
TargetMachine & TM
Target machine description.
Definition: AsmPrinter.h:70
This class is intended to be used as a driving class for all asm writers.
Definition: AsmPrinter.h:66
void EmitEndOfAsmFile(Module &M) override
This virtual method can be overridden by targets that want to emit something at the end of their file...
#define R_0288D4_SQ_PGM_RESOURCES_LS
Definition: R600Defines.h:167
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
bool hasFP32Denormals() const
#define R_00B02C_SPI_SHADER_PGM_RSRC2_PS
Definition: SIDefines.h:96
#define FP_DENORM_FLUSH_IN_FLUSH_OUT
Definition: SIDefines.h:179
MCSection * getTextSection() const
#define S_00B84C_TGID_Z_EN(x)
Definition: SIDefines.h:117
#define R_02880C_DB_SHADER_CONTROL
Definition: R600Defines.h:147
#define S_00B84C_LDS_SIZE(x)
Definition: SIDefines.h:131
AMDGPU::IsaVersion getIsaVersion() const
virtual void EmitDirectiveHSACodeObjectISA(uint32_t Major, uint32_t Minor, uint32_t Stepping, StringRef VendorName, StringRef ArchName)=0
void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header, const FeatureBitset &Features)
bool hasFP64Denormals() const
#define FP_ROUND_ROUND_TO_NEAREST
Definition: SIDefines.h:169
void LLVMInitializeAMDGPUAsmPrinter()
virtual const TargetSubtargetInfo * getSubtargetImpl(const Function &) const
Virtual method implemented by subclasses that returns a reference to that target's TargetSubtargetInf...
std::vector< std::string > HexLines
#define R_00B128_SPI_SHADER_PGM_RSRC1_VS
Definition: SIDefines.h:98
unsigned LDSSize
Number of bytes in the LDS that are being used.
MachineOperand class - Representation of each machine instruction operand.
virtual void EmitAMDKernelCodeT(const amd_kernel_code_t &Header)=0
#define S_STACK_SIZE(x)
Definition: R600Defines.h:152
#define R_028868_SQ_PGM_RESOURCES_VS
Definition: R600Defines.h:158
#define FP_DENORM_MODE_DP(x)
Definition: SIDefines.h:188
#define S_0286E8_WAVESIZE(x)
Definition: SIDefines.h:194
#define S_00B84C_TGID_X_EN(x)
Definition: SIDefines.h:111
bool runOnMachineFunction(MachineFunction &MF) override
Emit the specified function out to the OutStreamer.
std::vector< std::string > DisasmLines
virtual void EmitDirectiveHSACodeObjectVersion(uint32_t Major, uint32_t Minor)=0
bool isAmdHsaOS() const
uint64_t RoundUpToAlignment(uint64_t Value, uint64_t Align)
Returns the next integer (mod 2**64) that is greater than or equal to Value and is a multiple of Alig...
Definition: MathExtras.h:609
static AsmPrinter * createAMDGPUAsmPrinterPass(TargetMachine &tm, std::unique_ptr< MCStreamer > &&Streamer)
Representation of each machine instruction.
Definition: MachineInstr.h:51
void EmitFunctionBodyStart() override
Targets can override this to emit stuff before the first basic block in the function.
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
void SetupMachineFunction(MachineFunction &MF)
This should be called when a new MachineFunction is being processed from runOnMachineFunction.
#define S_02880C_KILL_ENABLE(x)
Definition: R600Defines.h:148
MCSymbol * getOrCreateSymbol(const Twine &Name)
Lookup the symbol inside with the specified Name.
Definition: MCContext.cpp:111
unsigned ABIArgOffset
Start of implicit kernel args.
#define S_00B848_SGPRS(x)
Definition: SIDefines.h:145
#define S_00B84C_USER_SGPR(x)
Definition: SIDefines.h:108
AMDGPU Assembly printer class.
#define R_00B860_COMPUTE_TMPRING_SIZE
Definition: SIDefines.h:190
void size_t size
MCSectionELF - This represents a section on linux, lots of unix variants and some bare metal systems...
Definition: MCSectionELF.h:30
unsigned getWavefrontSize() const
static unsigned getRsrcReg(unsigned ShaderType)
unsigned getReg() const
getReg - Returns the register number.
#define S_00B860_WAVESIZE(x)
Definition: SIDefines.h:191
const TargetLoweringObjectFile & getObjFileLowering() const
Return information about object file lowering.
Definition: AsmPrinter.cpp:134
#define R_00B84C_COMPUTE_PGM_RSRC2
Definition: SIDefines.h:104
#define S_00B848_PRIORITY(x)
Definition: SIDefines.h:148
#define R_0286E8_SPI_TMPRING_SIZE
Definition: SIDefines.h:193
void setAlignment(unsigned A)
setAlignment - Set the alignment (log2, not bytes) of the function.
#define S_00B02C_EXTRA_LDS_SIZE(x)
Definition: SIDefines.h:97
This class implements an extremely fast bulk output stream that can only output to a stream...
Definition: raw_ostream.h:38
static TraceState * TS
Primary interface to the complete machine description for the target machine.
static void RegisterAsmPrinter(Target &T, Target::AsmPrinterCtorTy Fn)
RegisterAsmPrinter - Register an AsmPrinter implementation for the given target.
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:40
bool isVGPRSpillingEnabled(const SIMachineFunctionInfo *MFI) const
bool isVerbose() const
Return true if assembly output should contain comments.
Definition: AsmPrinter.h:152
#define R_0288E8_SQ_LDS_ALLOC
Definition: R600Defines.h:169
#define R_028844_SQ_PGM_RESOURCES_PS
Definition: R600Defines.h:164
const AMDGPURegisterInfo * getRegisterInfo() const override