LLVM  4.0.0
AMDGPUAsmPrinter.cpp
Go to the documentation of this file.
1 //===-- AMDGPUAsmPrinter.cpp - AMDGPU Assebly printer --------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 /// \file
11 ///
12 /// The AMDGPUAsmPrinter is used to print both assembly string and also binary
13 /// code. When passed an MCAsmStreamer it prints assembly and when passed
14 /// an MCObjectStreamer it outputs binary code.
15 //
16 //===----------------------------------------------------------------------===//
17 //
18 
19 #include "AMDGPUAsmPrinter.h"
22 #include "Utils/AMDGPUBaseInfo.h"
23 #include "AMDGPU.h"
24 #include "AMDKernelCodeT.h"
25 #include "AMDGPUSubtarget.h"
26 #include "R600Defines.h"
28 #include "R600RegisterInfo.h"
29 #include "SIDefines.h"
30 #include "SIMachineFunctionInfo.h"
31 #include "SIInstrInfo.h"
32 #include "SIRegisterInfo.h"
34 #include "llvm/IR/DiagnosticInfo.h"
35 #include "llvm/MC/MCContext.h"
36 #include "llvm/MC/MCSectionELF.h"
37 #include "llvm/MC/MCStreamer.h"
38 #include "llvm/Support/ELF.h"
42 
43 using namespace llvm;
44 
45 // TODO: This should get the default rounding mode from the kernel. We just set
46 // the default here, but this could change if the OpenCL rounding mode pragmas
47 // are used.
48 //
49 // The denormal mode here should match what is reported by the OpenCL runtime
50 // for the CL_FP_DENORM bit from CL_DEVICE_{HALF|SINGLE|DOUBLE}_FP_CONFIG, but
51 // can also be override to flush with the -cl-denorms-are-zero compiler flag.
52 //
53 // AMD OpenCL only sets flush none and reports CL_FP_DENORM for double
54 // precision, and leaves single precision to flush all and does not report
55 // CL_FP_DENORM for CL_DEVICE_SINGLE_FP_CONFIG. Mesa's OpenCL currently reports
56 // CL_FP_DENORM for both.
57 //
58 // FIXME: It seems some instructions do not support single precision denormals
59 // regardless of the mode (exp_*_f32, rcp_*_f32, rsq_*_f32, rsq_*f32, sqrt_f32,
60 // and sin_f32, cos_f32 on most parts).
61 
62 // We want to use these instructions, and using fp32 denormals also causes
63 // instructions to run at the double precision rate for the device so it's
64 // probably best to just report no single precision denormals.
66  const SISubtarget& ST = F.getSubtarget<SISubtarget>();
67  // TODO: Is there any real use for the flush in only / flush out only modes?
68 
69  uint32_t FP32Denormals =
71 
72  uint32_t FP64Denormals =
74 
77  FP_DENORM_MODE_SP(FP32Denormals) |
78  FP_DENORM_MODE_DP(FP64Denormals);
79 }
80 
81 static AsmPrinter *
83  std::unique_ptr<MCStreamer> &&Streamer) {
84  return new AMDGPUAsmPrinter(tm, std::move(Streamer));
85 }
86 
87 extern "C" void LLVMInitializeAMDGPUAsmPrinter() {
92 }
93 
95  std::unique_ptr<MCStreamer> Streamer)
96  : AsmPrinter(TM, std::move(Streamer)) {}
97 
99  return "AMDGPU Assembly Printer";
100 }
101 
104  return;
105 
106  // Need to construct an MCSubtargetInfo here in case we have no functions
107  // in the module.
108  std::unique_ptr<MCSubtargetInfo> STI(TM.getTarget().createMCSubtargetInfo(
111 
113  static_cast<AMDGPUTargetStreamer *>(OutStreamer->getTargetStreamer());
114 
116 
117  AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(STI->getFeatureBits());
119  "AMD", "AMDGPU");
120 
121  // Emit runtime metadata.
122  TS->EmitRuntimeMetadata(M);
123 }
124 
126  const MachineBasicBlock *MBB) const {
128  return false;
129 
130  if (MBB->empty())
131  return true;
132 
133  // If this is a block implementing a long branch, an expression relative to
134  // the start of the block is needed. to the start of the block.
135  // XXX - Is there a smarter way to check this?
136  return (MBB->back().getOpcode() != AMDGPU::S_SETPC_B64);
137 }
138 
139 
142  SIProgramInfo KernelInfo;
143  if (STM.isAmdCodeObjectV2(*MF)) {
144  getSIProgramInfo(KernelInfo, *MF);
145  EmitAmdKernelCodeT(*MF, KernelInfo);
146  }
147 }
148 
152  if (MFI->isKernel() && STM.isAmdCodeObjectV2(*MF)) {
154  static_cast<AMDGPUTargetStreamer *>(OutStreamer->getTargetStreamer());
155  SmallString<128> SymbolName;
156  getNameWithPrefix(SymbolName, MF->getFunction()),
158  }
159 
161 }
162 
164 
165  // Group segment variables aren't emitted in HSA.
166  if (AMDGPU::isGroupSegment(GV))
167  return;
168 
170 }
171 
173 
174  // The starting address of all shader programs must be 256 bytes aligned.
175  MF.setAlignment(8);
176 
178 
179  const AMDGPUSubtarget &STM = MF.getSubtarget<AMDGPUSubtarget>();
181  if (!STM.isAmdHsaOS()) {
182  MCSectionELF *ConfigSection =
183  Context.getELFSection(".AMDGPU.config", ELF::SHT_PROGBITS, 0);
184  OutStreamer->SwitchSection(ConfigSection);
185  }
186 
187  SIProgramInfo KernelInfo;
189  getSIProgramInfo(KernelInfo, MF);
190  if (!STM.isAmdHsaOS()) {
191  EmitProgramInfoSI(MF, KernelInfo);
192  }
193  } else {
194  EmitProgramInfoR600(MF);
195  }
196 
197  DisasmLines.clear();
198  HexLines.clear();
199  DisasmLineMaxLen = 0;
200 
202 
203  if (isVerbose()) {
204  MCSectionELF *CommentSection =
205  Context.getELFSection(".AMDGPU.csdata", ELF::SHT_PROGBITS, 0);
206  OutStreamer->SwitchSection(CommentSection);
207 
209  OutStreamer->emitRawComment(" Kernel info:", false);
210  OutStreamer->emitRawComment(" codeLenInByte = " + Twine(KernelInfo.CodeLen),
211  false);
212  OutStreamer->emitRawComment(" NumSgprs: " + Twine(KernelInfo.NumSGPR),
213  false);
214  OutStreamer->emitRawComment(" NumVgprs: " + Twine(KernelInfo.NumVGPR),
215  false);
216  OutStreamer->emitRawComment(" FloatMode: " + Twine(KernelInfo.FloatMode),
217  false);
218  OutStreamer->emitRawComment(" IeeeMode: " + Twine(KernelInfo.IEEEMode),
219  false);
220  OutStreamer->emitRawComment(" ScratchSize: " + Twine(KernelInfo.ScratchSize),
221  false);
222  OutStreamer->emitRawComment(" LDSByteSize: " + Twine(KernelInfo.LDSSize) +
223  " bytes/workgroup (compile time only)", false);
224 
225  OutStreamer->emitRawComment(" SGPRBlocks: " +
226  Twine(KernelInfo.SGPRBlocks), false);
227  OutStreamer->emitRawComment(" VGPRBlocks: " +
228  Twine(KernelInfo.VGPRBlocks), false);
229 
230  OutStreamer->emitRawComment(" NumSGPRsForWavesPerEU: " +
231  Twine(KernelInfo.NumSGPRsForWavesPerEU), false);
232  OutStreamer->emitRawComment(" NumVGPRsForWavesPerEU: " +
233  Twine(KernelInfo.NumVGPRsForWavesPerEU), false);
234 
235  OutStreamer->emitRawComment(" ReservedVGPRFirst: " + Twine(KernelInfo.ReservedVGPRFirst),
236  false);
237  OutStreamer->emitRawComment(" ReservedVGPRCount: " + Twine(KernelInfo.ReservedVGPRCount),
238  false);
239 
241  OutStreamer->emitRawComment(" DebuggerWavefrontPrivateSegmentOffsetSGPR: s" +
242  Twine(KernelInfo.DebuggerWavefrontPrivateSegmentOffsetSGPR), false);
243  OutStreamer->emitRawComment(" DebuggerPrivateSegmentBufferSGPR: s" +
244  Twine(KernelInfo.DebuggerPrivateSegmentBufferSGPR), false);
245  }
246 
247  OutStreamer->emitRawComment(" COMPUTE_PGM_RSRC2:USER_SGPR: " +
248  Twine(G_00B84C_USER_SGPR(KernelInfo.ComputePGMRSrc2)),
249  false);
250  OutStreamer->emitRawComment(" COMPUTE_PGM_RSRC2:TGID_X_EN: " +
251  Twine(G_00B84C_TGID_X_EN(KernelInfo.ComputePGMRSrc2)),
252  false);
253  OutStreamer->emitRawComment(" COMPUTE_PGM_RSRC2:TGID_Y_EN: " +
254  Twine(G_00B84C_TGID_Y_EN(KernelInfo.ComputePGMRSrc2)),
255  false);
256  OutStreamer->emitRawComment(" COMPUTE_PGM_RSRC2:TGID_Z_EN: " +
257  Twine(G_00B84C_TGID_Z_EN(KernelInfo.ComputePGMRSrc2)),
258  false);
259  OutStreamer->emitRawComment(" COMPUTE_PGM_RSRC2:TIDIG_COMP_CNT: " +
260  Twine(G_00B84C_TIDIG_COMP_CNT(KernelInfo.ComputePGMRSrc2)),
261  false);
262 
263  } else {
265  OutStreamer->emitRawComment(
266  Twine("SQ_PGM_RESOURCES:STACK_SIZE = " + Twine(MFI->CFStackSize)));
267  }
268  }
269 
270  if (STM.dumpCode()) {
271 
272  OutStreamer->SwitchSection(
273  Context.getELFSection(".AMDGPU.disasm", ELF::SHT_NOTE, 0));
274 
275  for (size_t i = 0; i < DisasmLines.size(); ++i) {
276  std::string Comment(DisasmLineMaxLen - DisasmLines[i].size(), ' ');
277  Comment += " ; " + HexLines[i] + "\n";
278 
279  OutStreamer->EmitBytes(StringRef(DisasmLines[i]));
280  OutStreamer->EmitBytes(StringRef(Comment));
281  }
282  }
283 
284  return false;
285 }
286 
287 void AMDGPUAsmPrinter::EmitProgramInfoR600(const MachineFunction &MF) {
288  unsigned MaxGPR = 0;
289  bool killPixel = false;
290  const R600Subtarget &STM = MF.getSubtarget<R600Subtarget>();
291  const R600RegisterInfo *RI = STM.getRegisterInfo();
293 
294  for (const MachineBasicBlock &MBB : MF) {
295  for (const MachineInstr &MI : MBB) {
296  if (MI.getOpcode() == AMDGPU::KILLGT)
297  killPixel = true;
298  unsigned numOperands = MI.getNumOperands();
299  for (unsigned op_idx = 0; op_idx < numOperands; op_idx++) {
300  const MachineOperand &MO = MI.getOperand(op_idx);
301  if (!MO.isReg())
302  continue;
303  unsigned HWReg = RI->getEncodingValue(MO.getReg()) & 0xff;
304 
305  // Register with value > 127 aren't GPR
306  if (HWReg > 127)
307  continue;
308  MaxGPR = std::max(MaxGPR, HWReg);
309  }
310  }
311  }
312 
313  unsigned RsrcReg;
315  // Evergreen / Northern Islands
316  switch (MF.getFunction()->getCallingConv()) {
317  default: LLVM_FALLTHROUGH;
322  }
323  } else {
324  // R600 / R700
325  switch (MF.getFunction()->getCallingConv()) {
326  default: LLVM_FALLTHROUGH;
331  }
332  }
333 
334  OutStreamer->EmitIntValue(RsrcReg, 4);
335  OutStreamer->EmitIntValue(S_NUM_GPRS(MaxGPR + 1) |
336  S_STACK_SIZE(MFI->CFStackSize), 4);
337  OutStreamer->EmitIntValue(R_02880C_DB_SHADER_CONTROL, 4);
338  OutStreamer->EmitIntValue(S_02880C_KILL_ENABLE(killPixel), 4);
339 
340  if (AMDGPU::isCompute(MF.getFunction()->getCallingConv())) {
341  OutStreamer->EmitIntValue(R_0288E8_SQ_LDS_ALLOC, 4);
342  OutStreamer->EmitIntValue(alignTo(MFI->getLDSSize(), 4) >> 2, 4);
343  }
344 }
345 
346 void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
347  const MachineFunction &MF) const {
348  const SISubtarget &STM = MF.getSubtarget<SISubtarget>();
350  uint64_t CodeSize = 0;
351  unsigned MaxSGPR = 0;
352  unsigned MaxVGPR = 0;
353  bool VCCUsed = false;
354  bool FlatUsed = false;
355  const SIRegisterInfo *RI = STM.getRegisterInfo();
356  const SIInstrInfo *TII = STM.getInstrInfo();
357 
358  for (const MachineBasicBlock &MBB : MF) {
359  for (const MachineInstr &MI : MBB) {
360  // TODO: CodeSize should account for multiple functions.
361 
362  // TODO: Should we count size of debug info?
363  if (MI.isDebugValue())
364  continue;
365 
366  if (isVerbose())
367  CodeSize += TII->getInstSizeInBytes(MI);
368 
369  unsigned numOperands = MI.getNumOperands();
370  for (unsigned op_idx = 0; op_idx < numOperands; op_idx++) {
371  const MachineOperand &MO = MI.getOperand(op_idx);
372  unsigned width = 0;
373  bool isSGPR = false;
374 
375  if (!MO.isReg())
376  continue;
377 
378  unsigned reg = MO.getReg();
379  switch (reg) {
380  case AMDGPU::EXEC:
381  case AMDGPU::EXEC_LO:
382  case AMDGPU::EXEC_HI:
383  case AMDGPU::SCC:
384  case AMDGPU::M0:
385  continue;
386 
387  case AMDGPU::VCC:
388  case AMDGPU::VCC_LO:
389  case AMDGPU::VCC_HI:
390  VCCUsed = true;
391  continue;
392 
393  case AMDGPU::FLAT_SCR:
394  case AMDGPU::FLAT_SCR_LO:
395  case AMDGPU::FLAT_SCR_HI:
396  // Even if FLAT_SCRATCH is implicitly used, it has no effect if flat
397  // instructions aren't used to access the scratch buffer.
398  if (MFI->hasFlatScratchInit())
399  FlatUsed = true;
400  continue;
401 
402  case AMDGPU::TBA:
403  case AMDGPU::TBA_LO:
404  case AMDGPU::TBA_HI:
405  case AMDGPU::TMA:
406  case AMDGPU::TMA_LO:
407  case AMDGPU::TMA_HI:
408  llvm_unreachable("trap handler registers should not be used");
409 
410  default:
411  break;
412  }
413 
414  if (AMDGPU::SReg_32RegClass.contains(reg)) {
415  assert(!AMDGPU::TTMP_32RegClass.contains(reg) &&
416  "trap handler registers should not be used");
417  isSGPR = true;
418  width = 1;
419  } else if (AMDGPU::VGPR_32RegClass.contains(reg)) {
420  isSGPR = false;
421  width = 1;
422  } else if (AMDGPU::SReg_64RegClass.contains(reg)) {
423  assert(!AMDGPU::TTMP_64RegClass.contains(reg) &&
424  "trap handler registers should not be used");
425  isSGPR = true;
426  width = 2;
427  } else if (AMDGPU::VReg_64RegClass.contains(reg)) {
428  isSGPR = false;
429  width = 2;
430  } else if (AMDGPU::VReg_96RegClass.contains(reg)) {
431  isSGPR = false;
432  width = 3;
433  } else if (AMDGPU::SReg_128RegClass.contains(reg)) {
434  isSGPR = true;
435  width = 4;
436  } else if (AMDGPU::VReg_128RegClass.contains(reg)) {
437  isSGPR = false;
438  width = 4;
439  } else if (AMDGPU::SReg_256RegClass.contains(reg)) {
440  isSGPR = true;
441  width = 8;
442  } else if (AMDGPU::VReg_256RegClass.contains(reg)) {
443  isSGPR = false;
444  width = 8;
445  } else if (AMDGPU::SReg_512RegClass.contains(reg)) {
446  isSGPR = true;
447  width = 16;
448  } else if (AMDGPU::VReg_512RegClass.contains(reg)) {
449  isSGPR = false;
450  width = 16;
451  } else {
452  llvm_unreachable("Unknown register class");
453  }
454  unsigned hwReg = RI->getEncodingValue(reg) & 0xff;
455  unsigned maxUsed = hwReg + width - 1;
456  if (isSGPR) {
457  MaxSGPR = maxUsed > MaxSGPR ? maxUsed : MaxSGPR;
458  } else {
459  MaxVGPR = maxUsed > MaxVGPR ? maxUsed : MaxVGPR;
460  }
461  }
462  }
463  }
464 
465  unsigned ExtraSGPRs = 0;
466 
467  if (VCCUsed)
468  ExtraSGPRs = 2;
469 
471  if (FlatUsed)
472  ExtraSGPRs = 4;
473  } else {
474  if (STM.isXNACKEnabled())
475  ExtraSGPRs = 4;
476 
477  if (FlatUsed)
478  ExtraSGPRs = 6;
479  }
480 
481  // Record first reserved register and reserved register count fields, and
482  // update max register counts if "amdgpu-debugger-reserve-regs" attribute was
483  // requested.
484  ProgInfo.ReservedVGPRFirst = STM.debuggerReserveRegs() ? MaxVGPR + 1 : 0;
485  ProgInfo.ReservedVGPRCount = RI->getNumDebuggerReservedVGPRs(STM);
486 
487  // Update DebuggerWavefrontPrivateSegmentOffsetSGPR and
488  // DebuggerPrivateSegmentBufferSGPR fields if "amdgpu-debugger-emit-prologue"
489  // attribute was requested.
490  if (STM.debuggerEmitPrologue()) {
491  ProgInfo.DebuggerWavefrontPrivateSegmentOffsetSGPR =
492  RI->getHWRegIndex(MFI->getScratchWaveOffsetReg());
493  ProgInfo.DebuggerPrivateSegmentBufferSGPR =
494  RI->getHWRegIndex(MFI->getScratchRSrcReg());
495  }
496 
497  // Check the addressable register limit before we add ExtraSGPRs.
499  !STM.hasSGPRInitBug()) {
500  unsigned MaxAddressableNumSGPRs = STM.getMaxNumSGPRs();
501  if (MaxSGPR + 1 > MaxAddressableNumSGPRs) {
502  // This can happen due to a compiler bug or when using inline asm.
503  LLVMContext &Ctx = MF.getFunction()->getContext();
504  DiagnosticInfoResourceLimit Diag(*MF.getFunction(),
505  "addressable scalar registers",
506  MaxSGPR + 1, DS_Error,
507  DK_ResourceLimit, MaxAddressableNumSGPRs);
508  Ctx.diagnose(Diag);
509  MaxSGPR = MaxAddressableNumSGPRs - 1;
510  }
511  }
512 
513  // Account for extra SGPRs and VGPRs reserved for debugger use.
514  MaxSGPR += ExtraSGPRs;
515  MaxVGPR += RI->getNumDebuggerReservedVGPRs(STM);
516 
517  // We found the maximum register index. They start at 0, so add one to get the
518  // number of registers.
519  ProgInfo.NumVGPR = MaxVGPR + 1;
520  ProgInfo.NumSGPR = MaxSGPR + 1;
521 
522  // Adjust number of registers used to meet default/requested minimum/maximum
523  // number of waves per execution unit request.
524  ProgInfo.NumSGPRsForWavesPerEU = std::max(
525  ProgInfo.NumSGPR, RI->getMinNumSGPRs(STM, MFI->getMaxWavesPerEU()));
526  ProgInfo.NumVGPRsForWavesPerEU = std::max(
527  ProgInfo.NumVGPR, RI->getMinNumVGPRs(MFI->getMaxWavesPerEU()));
528 
530  STM.hasSGPRInitBug()) {
531  unsigned MaxNumSGPRs = STM.getMaxNumSGPRs();
532  if (ProgInfo.NumSGPR > MaxNumSGPRs) {
533  // This can happen due to a compiler bug or when using inline asm to use the
534  // registers which are usually reserved for vcc etc.
535 
536  LLVMContext &Ctx = MF.getFunction()->getContext();
537  DiagnosticInfoResourceLimit Diag(*MF.getFunction(),
538  "scalar registers",
539  ProgInfo.NumSGPR, DS_Error,
540  DK_ResourceLimit, MaxNumSGPRs);
541  Ctx.diagnose(Diag);
542  ProgInfo.NumSGPR = MaxNumSGPRs;
543  ProgInfo.NumSGPRsForWavesPerEU = MaxNumSGPRs;
544  }
545  }
546 
547  if (STM.hasSGPRInitBug()) {
549  ProgInfo.NumSGPRsForWavesPerEU = SISubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG;
550  }
551 
552  if (MFI->NumUserSGPRs > STM.getMaxNumUserSGPRs()) {
553  LLVMContext &Ctx = MF.getFunction()->getContext();
554  DiagnosticInfoResourceLimit Diag(*MF.getFunction(), "user SGPRs",
555  MFI->NumUserSGPRs, DS_Error);
556  Ctx.diagnose(Diag);
557  }
558 
559  if (MFI->getLDSSize() > static_cast<unsigned>(STM.getLocalMemorySize())) {
560  LLVMContext &Ctx = MF.getFunction()->getContext();
561  DiagnosticInfoResourceLimit Diag(*MF.getFunction(), "local memory",
562  MFI->getLDSSize(), DS_Error);
563  Ctx.diagnose(Diag);
564  }
565 
566  // SGPRBlocks is actual number of SGPR blocks minus 1.
567  ProgInfo.SGPRBlocks = alignTo(ProgInfo.NumSGPRsForWavesPerEU,
568  RI->getSGPRAllocGranule());
569  ProgInfo.SGPRBlocks = ProgInfo.SGPRBlocks / RI->getSGPRAllocGranule() - 1;
570 
571  // VGPRBlocks is actual number of VGPR blocks minus 1.
572  ProgInfo.VGPRBlocks = alignTo(ProgInfo.NumVGPRsForWavesPerEU,
573  RI->getVGPRAllocGranule());
574  ProgInfo.VGPRBlocks = ProgInfo.VGPRBlocks / RI->getVGPRAllocGranule() - 1;
575 
576  // Set the value to initialize FP_ROUND and FP_DENORM parts of the mode
577  // register.
578  ProgInfo.FloatMode = getFPMode(MF);
579 
580  ProgInfo.IEEEMode = STM.enableIEEEBit(MF);
581 
582  // Make clamp modifier on NaN input returns 0.
583  ProgInfo.DX10Clamp = 1;
584 
585  const MachineFrameInfo &FrameInfo = MF.getFrameInfo();
586  ProgInfo.ScratchSize = FrameInfo.getStackSize();
587 
588  ProgInfo.FlatUsed = FlatUsed;
589  ProgInfo.VCCUsed = VCCUsed;
590  ProgInfo.CodeLen = CodeSize;
591 
592  unsigned LDSAlignShift;
594  // LDS is allocated in 64 dword blocks.
595  LDSAlignShift = 8;
596  } else {
597  // LDS is allocated in 128 dword blocks.
598  LDSAlignShift = 9;
599  }
600 
601  unsigned LDSSpillSize =
602  MFI->LDSWaveSpillSize * MFI->getMaxFlatWorkGroupSize();
603 
604  ProgInfo.LDSSize = MFI->getLDSSize() + LDSSpillSize;
605  ProgInfo.LDSBlocks =
606  alignTo(ProgInfo.LDSSize, 1ULL << LDSAlignShift) >> LDSAlignShift;
607 
608  // Scratch is allocated in 256 dword blocks.
609  unsigned ScratchAlignShift = 10;
610  // We need to program the hardware with the amount of scratch memory that
611  // is used by the entire wave. ProgInfo.ScratchSize is the amount of
612  // scratch memory used per thread.
613  ProgInfo.ScratchBlocks =
614  alignTo(ProgInfo.ScratchSize * STM.getWavefrontSize(),
615  1ULL << ScratchAlignShift) >>
616  ScratchAlignShift;
617 
618  ProgInfo.ComputePGMRSrc1 =
619  S_00B848_VGPRS(ProgInfo.VGPRBlocks) |
620  S_00B848_SGPRS(ProgInfo.SGPRBlocks) |
621  S_00B848_PRIORITY(ProgInfo.Priority) |
622  S_00B848_FLOAT_MODE(ProgInfo.FloatMode) |
623  S_00B848_PRIV(ProgInfo.Priv) |
624  S_00B848_DX10_CLAMP(ProgInfo.DX10Clamp) |
625  S_00B848_DEBUG_MODE(ProgInfo.DebugMode) |
626  S_00B848_IEEE_MODE(ProgInfo.IEEEMode);
627 
628  // 0 = X, 1 = XY, 2 = XYZ
629  unsigned TIDIGCompCnt = 0;
630  if (MFI->hasWorkItemIDZ())
631  TIDIGCompCnt = 2;
632  else if (MFI->hasWorkItemIDY())
633  TIDIGCompCnt = 1;
634 
635  ProgInfo.ComputePGMRSrc2 =
636  S_00B84C_SCRATCH_EN(ProgInfo.ScratchBlocks > 0) |
637  S_00B84C_USER_SGPR(MFI->getNumUserSGPRs()) |
638  S_00B84C_TGID_X_EN(MFI->hasWorkGroupIDX()) |
639  S_00B84C_TGID_Y_EN(MFI->hasWorkGroupIDY()) |
640  S_00B84C_TGID_Z_EN(MFI->hasWorkGroupIDZ()) |
641  S_00B84C_TG_SIZE_EN(MFI->hasWorkGroupInfo()) |
642  S_00B84C_TIDIG_COMP_CNT(TIDIGCompCnt) |
644  S_00B84C_LDS_SIZE(ProgInfo.LDSBlocks) |
645  S_00B84C_EXCP_EN(0);
646 }
647 
648 static unsigned getRsrcReg(CallingConv::ID CallConv) {
649  switch (CallConv) {
650  default: LLVM_FALLTHROUGH;
655  }
656 }
657 
658 void AMDGPUAsmPrinter::EmitProgramInfoSI(const MachineFunction &MF,
659  const SIProgramInfo &KernelInfo) {
660  const SISubtarget &STM = MF.getSubtarget<SISubtarget>();
662  unsigned RsrcReg = getRsrcReg(MF.getFunction()->getCallingConv());
663 
665  OutStreamer->EmitIntValue(R_00B848_COMPUTE_PGM_RSRC1, 4);
666 
667  OutStreamer->EmitIntValue(KernelInfo.ComputePGMRSrc1, 4);
668 
669  OutStreamer->EmitIntValue(R_00B84C_COMPUTE_PGM_RSRC2, 4);
670  OutStreamer->EmitIntValue(KernelInfo.ComputePGMRSrc2, 4);
671 
672  OutStreamer->EmitIntValue(R_00B860_COMPUTE_TMPRING_SIZE, 4);
673  OutStreamer->EmitIntValue(S_00B860_WAVESIZE(KernelInfo.ScratchBlocks), 4);
674 
675  // TODO: Should probably note flat usage somewhere. SC emits a "FlatPtr32 =
676  // 0" comment but I don't see a corresponding field in the register spec.
677  } else {
678  OutStreamer->EmitIntValue(RsrcReg, 4);
679  OutStreamer->EmitIntValue(S_00B028_VGPRS(KernelInfo.VGPRBlocks) |
680  S_00B028_SGPRS(KernelInfo.SGPRBlocks), 4);
681  if (STM.isVGPRSpillingEnabled(*MF.getFunction())) {
682  OutStreamer->EmitIntValue(R_0286E8_SPI_TMPRING_SIZE, 4);
683  OutStreamer->EmitIntValue(S_0286E8_WAVESIZE(KernelInfo.ScratchBlocks), 4);
684  }
685  }
686 
689  OutStreamer->EmitIntValue(S_00B02C_EXTRA_LDS_SIZE(KernelInfo.LDSBlocks), 4);
690  OutStreamer->EmitIntValue(R_0286CC_SPI_PS_INPUT_ENA, 4);
691  OutStreamer->EmitIntValue(MFI->PSInputEna, 4);
692  OutStreamer->EmitIntValue(R_0286D0_SPI_PS_INPUT_ADDR, 4);
693  OutStreamer->EmitIntValue(MFI->getPSInputAddr(), 4);
694  }
695 
696  OutStreamer->EmitIntValue(R_SPILLED_SGPRS, 4);
697  OutStreamer->EmitIntValue(MFI->getNumSpilledSGPRs(), 4);
698  OutStreamer->EmitIntValue(R_SPILLED_VGPRS, 4);
699  OutStreamer->EmitIntValue(MFI->getNumSpilledVGPRs(), 4);
700 }
701 
702 // This is supposed to be log2(Size)
704  switch (Size) {
705  case 4:
706  return AMD_ELEMENT_4_BYTES;
707  case 8:
708  return AMD_ELEMENT_8_BYTES;
709  case 16:
710  return AMD_ELEMENT_16_BYTES;
711  default:
712  llvm_unreachable("invalid private_element_size");
713  }
714 }
715 
716 void AMDGPUAsmPrinter::EmitAmdKernelCodeT(const MachineFunction &MF,
717  const SIProgramInfo &KernelInfo) const {
719  const SISubtarget &STM = MF.getSubtarget<SISubtarget>();
720  amd_kernel_code_t header;
721 
722  AMDGPU::initDefaultAMDKernelCodeT(header, STM.getFeatureBits());
723 
724  header.compute_pgm_resource_registers =
725  KernelInfo.ComputePGMRSrc1 |
726  (KernelInfo.ComputePGMRSrc2 << 32);
727  header.code_properties = AMD_CODE_PROPERTY_IS_PTR64;
728 
729 
730  AMD_HSA_BITS_SET(header.code_properties,
732  getElementByteSizeValue(STM.getMaxPrivateElementSize()));
733 
734  if (MFI->hasPrivateSegmentBuffer()) {
735  header.code_properties |=
737  }
738 
739  if (MFI->hasDispatchPtr())
740  header.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR;
741 
742  if (MFI->hasQueuePtr())
743  header.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR;
744 
745  if (MFI->hasKernargSegmentPtr())
746  header.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR;
747 
748  if (MFI->hasDispatchID())
749  header.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID;
750 
751  if (MFI->hasFlatScratchInit())
752  header.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT;
753 
754  // TODO: Private segment size
755 
756  if (MFI->hasGridWorkgroupCountX()) {
757  header.code_properties |=
759  }
760 
761  if (MFI->hasGridWorkgroupCountY()) {
762  header.code_properties |=
764  }
765 
766  if (MFI->hasGridWorkgroupCountZ()) {
767  header.code_properties |=
769  }
770 
771  if (MFI->hasDispatchPtr())
772  header.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR;
773 
774  if (STM.debuggerSupported())
775  header.code_properties |= AMD_CODE_PROPERTY_IS_DEBUG_SUPPORTED;
776 
777  if (STM.isXNACKEnabled())
778  header.code_properties |= AMD_CODE_PROPERTY_IS_XNACK_SUPPORTED;
779 
780  // FIXME: Should use getKernArgSize
781  header.kernarg_segment_byte_size =
782  STM.getKernArgSegmentSize(MF, MFI->getABIArgOffset());
783  header.wavefront_sgpr_count = KernelInfo.NumSGPR;
784  header.workitem_vgpr_count = KernelInfo.NumVGPR;
785  header.workitem_private_segment_byte_size = KernelInfo.ScratchSize;
786  header.workgroup_group_segment_byte_size = KernelInfo.LDSSize;
787  header.reserved_vgpr_first = KernelInfo.ReservedVGPRFirst;
788  header.reserved_vgpr_count = KernelInfo.ReservedVGPRCount;
789 
790  // These alignment values are specified in powers of two, so alignment =
791  // 2^n. The minimum alignment is 2^4 = 16.
792  header.kernarg_segment_alignment = std::max((size_t)4,
794 
795  if (STM.debuggerEmitPrologue()) {
796  header.debug_wavefront_private_segment_offset_sgpr =
797  KernelInfo.DebuggerWavefrontPrivateSegmentOffsetSGPR;
798  header.debug_private_segment_buffer_sgpr =
799  KernelInfo.DebuggerPrivateSegmentBufferSGPR;
800  }
801 
803  static_cast<AMDGPUTargetStreamer *>(OutStreamer->getTargetStreamer());
804 
805  OutStreamer->SwitchSection(getObjFileLowering().getTextSection());
806  TS->EmitAMDKernelCodeT(header);
807 }
808 
810  unsigned AsmVariant,
811  const char *ExtraCode, raw_ostream &O) {
812  if (ExtraCode && ExtraCode[0]) {
813  if (ExtraCode[1] != 0)
814  return true; // Unknown modifier.
815 
816  switch (ExtraCode[0]) {
817  default:
818  // See if this is a generic print operand
819  return AsmPrinter::PrintAsmOperand(MI, OpNo, AsmVariant, ExtraCode, O);
820  case 'r':
821  break;
822  }
823  }
824 
826  *TM.getSubtargetImpl(*MF->getFunction())->getRegisterInfo());
827  return false;
828 }
virtual void EmitGlobalVariable(const GlobalVariable *GV)
Emit the specified global variable to the .s file.
Definition: AsmPrinter.cpp:376
OSType getOS() const
getOS - Get the parsed operating system type of this triple.
Definition: Triple.h:279
void EmitGlobalVariable(const GlobalVariable *GV) override
Emit the specified global variable to the .s file.
Interface definition for SIRegisterInfo.
StringRef getTargetCPU() const
Target & getTheGCNTarget()
The target for GCN GPUs.
#define S_00B848_VGPRS(x)
Definition: SIDefines.h:334
#define S_00B848_PRIV(x)
Definition: SIDefines.h:346
LLVMContext & Context
AMDGPU specific subclass of TargetSubtarget.
std::unique_ptr< MCStreamer > OutStreamer
This is the MCStreamer object for the file we are generating.
Definition: AsmPrinter.h:84
#define FP_DENORM_MODE_SP(x)
Definition: SIDefines.h:379
size_t i
#define R_00B228_SPI_SHADER_PGM_RSRC1_GS
Definition: SIDefines.h:291
#define G_00B84C_USER_SGPR(x)
Definition: SIDefines.h:301
bool isVGPRSpillingEnabled(const Function &F) const
A Module instance is used to store all the information related to an LLVM module. ...
Definition: Module.h:52
virtual void EmitRuntimeMetadata(Module &M)=0
#define R_028850_SQ_PGM_RESOURCES_PS
Definition: R600Defines.h:157
#define R_028860_SQ_PGM_RESOURCES_VS
Definition: R600Defines.h:165
Target & getTheAMDGPUTarget()
The target which suports all AMD GPUs.
#define G_00B84C_TGID_Z_EN(x)
Definition: SIDefines.h:310
#define R_028878_SQ_PGM_RESOURCES_GS
Definition: R600Defines.h:166
Calling convention used for Mesa vertex shaders.
Definition: CallingConv.h:182
const SIInstrInfo * getInstrInfo() const override
const MachineFunction * MF
The current machine function.
Definition: AsmPrinter.h:87
#define R_00B028_SPI_SHADER_PGM_RSRC1_PS
Definition: SIDefines.h:287
#define S_00B84C_SCRATCH_EN(x)
Definition: SIDefines.h:297
bool enableIEEEBit(const MachineFunction &MF) const
#define S_00B84C_TG_SIZE_EN(x)
Definition: SIDefines.h:312
#define S_00B848_DX10_CLAMP(x)
Definition: SIDefines.h:349
const std::string & str() const
Definition: Triple.h:339
const Function * getFunction() const
getFunction - Return the LLVM function that this machine code represents
uint64_t alignTo(uint64_t Value, uint64_t Align, uint64_t Skew=0)
Returns the next integer (mod 2**64) that is greater than or equal to Value and is a multiple of Alig...
Definition: MathExtras.h:664
#define G_00B84C_TGID_Y_EN(x)
Definition: SIDefines.h:307
int getLocalMemorySize() const
Interface definition for R600RegisterInfo.
bool isAmdCodeObjectV2(const MachineFunction &MF) const
#define R_0286CC_SPI_PS_INPUT_ENA
Definition: SIDefines.h:330
#define S_00B028_SGPRS(x)
Definition: SIDefines.h:294
virtual void EmitAMDGPUSymbolType(StringRef SymbolName, unsigned Type)=0
return AArch64::GPR64RegClass contains(Reg)
void getNameWithPrefix(SmallVectorImpl< char > &Name, const GlobalValue *GV) const
Definition: AsmPrinter.cpp:366
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:165
#define S_NUM_GPRS(x)
Definition: R600Defines.h:151
unsigned getHWRegIndex(unsigned Reg) const
const Triple & getTargetTriple() const
uint64_t getStackSize() const
Return the number of bytes that must be allocated to hold all of the fixed size frame objects...
static amd_element_byte_size_t getElementByteSizeValue(unsigned Size)
#define FP_DENORM_FLUSH_NONE
Definition: SIDefines.h:374
MCSubtargetInfo * createMCSubtargetInfo(StringRef TheTriple, StringRef CPU, StringRef Features) const
createMCSubtargetInfo - Create a MCSubtargetInfo implementation.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
unsigned getInstSizeInBytes(const MachineInstr &MI) const override
const HexagonInstrInfo * TII
AMD Kernel Code Object (amd_kernel_code_t).
static void printRegOperand(unsigned RegNo, raw_ostream &O, const MCRegisterInfo &MRI)
bool isReg() const
isReg - Tests if this is a MO_Register operand.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted...
AMDGPUAsmPrinter(TargetMachine &TM, std::unique_ptr< MCStreamer > Streamer)
#define FP_ROUND_MODE_SP(x)
Definition: SIDefines.h:368
unsigned getVGPRAllocGranule() const
Diagnostic information for stack size etc.
#define S_00B84C_TGID_Y_EN(x)
Definition: SIDefines.h:306
Context object for machine code objects.
Definition: MCContext.h:51
#define S_00B848_FLOAT_MODE(x)
Definition: SIDefines.h:343
#define R_00B848_COMPUTE_PGM_RSRC1
Definition: SIDefines.h:333
unsigned getMaxNumUserSGPRs() const
void EmitFunctionBody()
This method emits the body and trailer for a function.
Definition: AsmPrinter.cpp:876
#define F(x, y, z)
Definition: MD5.cpp:51
bool isXNACKEnabled() const
MachineBasicBlock * MBB
bool isGroupSegment(const GlobalValue *GV)
Generation getGeneration() const
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:273
bool hasSGPRInitBug() const
bool isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB) const override
Return true if the basic block has exactly one predecessor and the control transfer mechanism between...
#define S_00B848_IEEE_MODE(x)
Definition: SIDefines.h:355
#define S_00B028_VGPRS(x)
Definition: SIDefines.h:293
static uint32_t getFPMode(const MachineFunction &F)
* if(!EatIfPresent(lltok::kw_thread_local)) return false
ParseOptionalThreadLocal := /*empty.
std::size_t countTrailingZeros(T Val, ZeroBehavior ZB=ZB_Width)
Count number of 0's from the least significant bit to the most stopping at the first 1...
Definition: MathExtras.h:111
unsigned getMaxWavesPerEU() const
IsaVersion getIsaVersion(const FeatureBitset &Features)
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:48
#define G_00B84C_TGID_X_EN(x)
Definition: SIDefines.h:304
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:279
#define S_00B84C_TIDIG_COMP_CNT(x)
Definition: SIDefines.h:315
#define FP_ROUND_MODE_DP(x)
Definition: SIDefines.h:369
virtual bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, unsigned AsmVariant, const char *ExtraCode, raw_ostream &OS)
Print the specified operand of MI, an INLINEASM instruction, using the specified assembler variant...
bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, unsigned AsmVariant, const char *ExtraCode, raw_ostream &O) override
Print the specified operand of MI, an INLINEASM instruction, using the specified assembler variant...
TargetMachine & TM
Target machine description.
Definition: AsmPrinter.h:71
This class is intended to be used as a driving class for all asm writers.
Definition: AsmPrinter.h:67
bool isCompute(CallingConv::ID cc)
#define R_0288D4_SQ_PGM_RESOURCES_LS
Definition: R600Defines.h:167
#define S_00B84C_EXCP_EN_MSB(x)
Definition: SIDefines.h:319
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
bool hasFP32Denormals() const
#define R_00B02C_SPI_SHADER_PGM_RSRC2_PS
Definition: SIDefines.h:288
#define FP_DENORM_FLUSH_IN_FLUSH_OUT
Definition: SIDefines.h:371
bool debuggerReserveRegs() const
virtual bool isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB) const
Return true if the basic block has exactly one predecessor and the control transfer mechanism between...
#define S_00B84C_TGID_Z_EN(x)
Definition: SIDefines.h:309
#define R_02880C_DB_SHADER_CONTROL
Definition: R600Defines.h:147
#define S_00B84C_LDS_SIZE(x)
Definition: SIDefines.h:323
#define R_SPILLED_SGPRS
Definition: SIDefines.h:388
virtual void EmitDirectiveHSACodeObjectISA(uint32_t Major, uint32_t Minor, uint32_t Stepping, StringRef VendorName, StringRef ArchName)=0
Calling convention used for Mesa pixel shaders.
Definition: CallingConv.h:188
void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header, const FeatureBitset &Features)
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
bool hasFP64Denormals() const
amd_element_byte_size_t
The values used to define the number of bytes to use for the swizzle element size.
unsigned getSGPRAllocGranule() const
#define FP_ROUND_ROUND_TO_NEAREST
Definition: SIDefines.h:361
#define S_00B84C_EXCP_EN(x)
Definition: SIDefines.h:326
void LLVMInitializeAMDGPUAsmPrinter()
virtual const TargetSubtargetInfo * getSubtargetImpl(const Function &) const
Virtual method implemented by subclasses that returns a reference to that target's TargetSubtargetInf...
const R600RegisterInfo * getRegisterInfo() const override
unsigned getMinNumVGPRs(unsigned WavesPerEU) const
#define G_00B84C_TIDIG_COMP_CNT(x)
Definition: SIDefines.h:316
std::vector< std::string > HexLines
unsigned getNumDebuggerReservedVGPRs(const SISubtarget &ST) const
#define R_00B128_SPI_SHADER_PGM_RSRC1_VS
Definition: SIDefines.h:290
Calling convention used for Mesa geometry shaders.
Definition: CallingConv.h:185
StringRef getTargetFeatureString() const
const SIRegisterInfo * getRegisterInfo() const override
MachineOperand class - Representation of each machine instruction operand.
virtual void EmitAMDKernelCodeT(const amd_kernel_code_t &Header)=0
#define S_STACK_SIZE(x)
Definition: R600Defines.h:152
#define S_00B848_DEBUG_MODE(x)
Definition: SIDefines.h:352
#define R_028868_SQ_PGM_RESOURCES_VS
Definition: R600Defines.h:158
#define FP_DENORM_MODE_DP(x)
Definition: SIDefines.h:380
#define S_0286E8_WAVESIZE(x)
Definition: SIDefines.h:386
#define S_00B84C_TGID_X_EN(x)
Definition: SIDefines.h:303
bool runOnMachineFunction(MachineFunction &MF) override
Emit the specified function out to the OutStreamer.
#define AMD_HSA_BITS_SET(dst, mask, val)
std::vector< std::string > DisasmLines
virtual void EmitDirectiveHSACodeObjectVersion(uint32_t Major, uint32_t Minor)=0
bool isAmdHsaOS() const
void EmitFunctionEntryLabel() override
EmitFunctionEntryLabel - Emit the label that is the entrypoint for the function.
static AsmPrinter * createAMDGPUAsmPrinterPass(TargetMachine &tm, std::unique_ptr< MCStreamer > &&Streamer)
Representation of each machine instruction.
Definition: MachineInstr.h:52
void EmitFunctionBodyStart() override
Targets can override this to emit stuff before the first basic block in the function.
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
Interface definition for SIInstrInfo.
void SetupMachineFunction(MachineFunction &MF)
This should be called when a new MachineFunction is being processed from runOnMachineFunction.
#define S_02880C_KILL_ENABLE(x)
Definition: R600Defines.h:148
#define S_00B848_SGPRS(x)
Definition: SIDefines.h:337
#define S_00B84C_USER_SGPR(x)
Definition: SIDefines.h:300
AMDGPU Assembly printer class.
#define R_00B860_COMPUTE_TMPRING_SIZE
Definition: SIDefines.h:382
This represents a section on linux, lots of unix variants and some bare metal systems.
Definition: MCSectionELF.h:30
#define R_SPILLED_VGPRS
Definition: SIDefines.h:389
unsigned getWavefrontSize() const
void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
virtual void EmitFunctionEntryLabel()
EmitFunctionEntryLabel - Emit the label that is the entrypoint for the function.
Definition: AsmPrinter.cpp:647
unsigned getMinNumSGPRs(const SISubtarget &ST, unsigned WavesPerEU) const
Calling convention used for Mesa compute shaders.
Definition: CallingConv.h:191
static unsigned getRsrcReg(CallingConv::ID CallConv)
unsigned getReg() const
getReg - Returns the register number.
#define S_00B860_WAVESIZE(x)
Definition: SIDefines.h:383
const TargetLoweringObjectFile & getObjFileLowering() const
Return information about object file lowering.
Definition: AsmPrinter.cpp:144
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
#define R_00B84C_COMPUTE_PGM_RSRC2
Definition: SIDefines.h:296
#define S_00B848_PRIORITY(x)
Definition: SIDefines.h:340
StringRef getPassName() const override
getPassName - Return a nice clean name for a pass.
void EmitStartOfAsmFile(Module &M) override
This virtual method can be overridden by targets that want to emit something at the start of their fi...
#define R_0286E8_SPI_TMPRING_SIZE
Definition: SIDefines.h:385
#define LLVM_FALLTHROUGH
LLVM_FALLTHROUGH - Mark fallthrough cases in switch statements.
Definition: Compiler.h:239
void setAlignment(unsigned A)
setAlignment - Set the alignment (log2, not bytes) of the function.
#define S_00B02C_EXTRA_LDS_SIZE(x)
Definition: SIDefines.h:289
This class implements an extremely fast bulk output stream that can only output to a stream...
Definition: raw_ostream.h:44
static TraceState * TS
const Target & getTarget() const
Primary interface to the complete machine description for the target machine.
static void RegisterAsmPrinter(Target &T, Target::AsmPrinterCtorTy Fn)
RegisterAsmPrinter - Register an AsmPrinter implementation for the given target.
IRTranslator LLVM IR MI
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:47
#define R_0286D0_SPI_PS_INPUT_ADDR
Definition: SIDefines.h:331
bool isVerbose() const
Return true if assembly output should contain comments.
Definition: AsmPrinter.h:162
unsigned getMaxNumSGPRs() const
bool debuggerEmitPrologue() const
#define R_0288E8_SQ_LDS_ALLOC
Definition: R600Defines.h:169
#define R_028844_SQ_PGM_RESOURCES_PS
Definition: R600Defines.h:164