LLVM  6.0.0svn
AMDGPUAsmPrinter.cpp
Go to the documentation of this file.
1 //===-- AMDGPUAsmPrinter.cpp - AMDGPU Assebly printer --------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 /// \file
11 ///
12 /// The AMDGPUAsmPrinter is used to print both assembly string and also binary
13 /// code. When passed an MCAsmStreamer it prints assembly and when passed
14 /// an MCObjectStreamer it outputs binary code.
15 //
16 //===----------------------------------------------------------------------===//
17 //
18 
19 #include "AMDGPUAsmPrinter.h"
20 #include "AMDGPU.h"
21 #include "AMDGPUSubtarget.h"
22 #include "AMDGPUTargetMachine.h"
25 #include "R600Defines.h"
27 #include "R600RegisterInfo.h"
28 #include "SIDefines.h"
29 #include "SIInstrInfo.h"
30 #include "SIMachineFunctionInfo.h"
31 #include "SIRegisterInfo.h"
32 #include "Utils/AMDGPUBaseInfo.h"
33 #include "llvm/BinaryFormat/ELF.h"
36 #include "llvm/IR/DiagnosticInfo.h"
37 #include "llvm/MC/MCContext.h"
38 #include "llvm/MC/MCSectionELF.h"
39 #include "llvm/MC/MCStreamer.h"
43 
44 using namespace llvm;
45 using namespace llvm::AMDGPU;
46 
47 // TODO: This should get the default rounding mode from the kernel. We just set
48 // the default here, but this could change if the OpenCL rounding mode pragmas
49 // are used.
50 //
51 // The denormal mode here should match what is reported by the OpenCL runtime
52 // for the CL_FP_DENORM bit from CL_DEVICE_{HALF|SINGLE|DOUBLE}_FP_CONFIG, but
53 // can also be override to flush with the -cl-denorms-are-zero compiler flag.
54 //
55 // AMD OpenCL only sets flush none and reports CL_FP_DENORM for double
56 // precision, and leaves single precision to flush all and does not report
57 // CL_FP_DENORM for CL_DEVICE_SINGLE_FP_CONFIG. Mesa's OpenCL currently reports
58 // CL_FP_DENORM for both.
59 //
60 // FIXME: It seems some instructions do not support single precision denormals
61 // regardless of the mode (exp_*_f32, rcp_*_f32, rsq_*_f32, rsq_*f32, sqrt_f32,
62 // and sin_f32, cos_f32 on most parts).
63 
64 // We want to use these instructions, and using fp32 denormals also causes
65 // instructions to run at the double precision rate for the device so it's
66 // probably best to just report no single precision denormals.
68  const SISubtarget& ST = F.getSubtarget<SISubtarget>();
69  // TODO: Is there any real use for the flush in only / flush out only modes?
70 
71  uint32_t FP32Denormals =
73 
74  uint32_t FP64Denormals =
76 
79  FP_DENORM_MODE_SP(FP32Denormals) |
80  FP_DENORM_MODE_DP(FP64Denormals);
81 }
82 
83 static AsmPrinter *
85  std::unique_ptr<MCStreamer> &&Streamer) {
86  return new AMDGPUAsmPrinter(tm, std::move(Streamer));
87 }
88 
89 extern "C" void LLVMInitializeAMDGPUAsmPrinter() {
94 }
95 
97  std::unique_ptr<MCStreamer> Streamer)
98  : AsmPrinter(TM, std::move(Streamer)) {
99  AMDGPUASI = static_cast<AMDGPUTargetMachine*>(&TM)->getAMDGPUAS();
100  }
101 
103  return "AMDGPU Assembly Printer";
104 }
105 
107  return TM.getMCSubtargetInfo();
108 }
109 
111  if (!OutStreamer)
112  return nullptr;
113  return static_cast<AMDGPUTargetStreamer*>(OutStreamer->getTargetStreamer());
114 }
115 
118  return;
119 
120  if (TM.getTargetTriple().getOS() != Triple::AMDHSA &&
122  return;
123 
125  HSAMetadataStream.begin(M);
126 
128  readPALMetadata(M);
129 
130  // Deprecated notes are not emitted for code object v3.
131  if (IsaInfo::hasCodeObjectV3(getSTI()->getFeatureBits()))
132  return;
133 
134  // HSA emits NT_AMDGPU_HSA_CODE_OBJECT_VERSION for code objects v2.
137 
138  // HSA and PAL emit NT_AMDGPU_HSA_ISA for code objects v2.
139  IsaInfo::IsaVersion ISA = IsaInfo::getIsaVersion(getSTI()->getFeatureBits());
141  ISA.Major, ISA.Minor, ISA.Stepping, "AMD", "AMDGPU");
142 }
143 
146  return;
147 
148  // Following code requires TargetStreamer to be present.
149  if (!getTargetStreamer())
150  return;
151 
152  // Emit ISA Version (NT_AMD_AMDGPU_ISA).
153  std::string ISAVersionString;
154  raw_string_ostream ISAVersionStream(ISAVersionString);
155  IsaInfo::streamIsaVersion(getSTI(), ISAVersionStream);
156  getTargetStreamer()->EmitISAVersion(ISAVersionStream.str());
157 
158  // Emit HSA Metadata (NT_AMD_AMDGPU_HSA_METADATA).
159  if (TM.getTargetTriple().getOS() == Triple::AMDHSA) {
160  HSAMetadataStream.end();
161  getTargetStreamer()->EmitHSAMetadata(HSAMetadataStream.getHSAMetadata());
162  }
163 
164  // Emit PAL Metadata (NT_AMD_AMDGPU_PAL_METADATA).
165  if (TM.getTargetTriple().getOS() == Triple::AMDPAL) {
166  // Copy the PAL metadata from the map where we collected it into a vector,
167  // then write it as a .note.
168  PALMD::Metadata PALMetadataVector;
169  for (auto i : PALMetadataMap) {
170  PALMetadataVector.push_back(i.first);
171  PALMetadataVector.push_back(i.second);
172  }
173  getTargetStreamer()->EmitPALMetadata(PALMetadataVector);
174  }
175 }
176 
178  const MachineBasicBlock *MBB) const {
180  return false;
181 
182  if (MBB->empty())
183  return true;
184 
185  // If this is a block implementing a long branch, an expression relative to
186  // the start of the block is needed. to the start of the block.
187  // XXX - Is there a smarter way to check this?
188  return (MBB->back().getOpcode() != AMDGPU::S_SETPC_B64);
189 }
190 
193  if (!MFI->isEntryFunction())
194  return;
195 
197  amd_kernel_code_t KernelCode;
198  if (STM.isAmdCodeObjectV2(*MF)) {
199  getAmdKernelCode(KernelCode, CurrentProgramInfo, *MF);
200 
201  OutStreamer->SwitchSection(getObjFileLowering().getTextSection());
202  getTargetStreamer()->EmitAMDKernelCodeT(KernelCode);
203  }
204 
206  return;
207 
208  HSAMetadataStream.emitKernel(*MF->getFunction(),
209  getHSACodeProps(*MF, CurrentProgramInfo),
210  getHSADebugProps(*MF, CurrentProgramInfo));
211 }
212 
216  if (MFI->isEntryFunction() && STM.isAmdCodeObjectV2(*MF)) {
218  getNameWithPrefix(SymbolName, MF->getFunction()),
220  SymbolName, ELF::STT_AMDGPU_HSA_KERNEL);
221  }
222 
224 }
225 
227 
228  // Group segment variables aren't emitted in HSA.
229  if (AMDGPU::isGroupSegment(GV))
230  return;
231 
233 }
234 
236  CallGraphResourceInfo.clear();
237  return AsmPrinter::doFinalization(M);
238 }
239 
240 // For the amdpal OS type, read the amdgpu.pal.metadata supplied by the
241 // frontend into our PALMetadataMap, ready for per-function modification. It
242 // is a NamedMD containing an MDTuple containing a number of MDNodes each of
243 // which is an integer value, and each two integer values forms a key=value
244 // pair that we store as PALMetadataMap[key]=value in the map.
245 void AMDGPUAsmPrinter::readPALMetadata(Module &M) {
246  auto NamedMD = M.getNamedMetadata("amdgpu.pal.metadata");
247  if (!NamedMD || !NamedMD->getNumOperands())
248  return;
249  auto Tuple = dyn_cast<MDTuple>(NamedMD->getOperand(0));
250  if (!Tuple)
251  return;
252  for (unsigned I = 0, E = Tuple->getNumOperands() & -2; I != E; I += 2) {
253  auto Key = mdconst::dyn_extract<ConstantInt>(Tuple->getOperand(I));
254  auto Val = mdconst::dyn_extract<ConstantInt>(Tuple->getOperand(I + 1));
255  if (!Key || !Val)
256  continue;
257  PALMetadataMap[Key->getZExtValue()] = Val->getZExtValue();
258  }
259 }
260 
261 // Print comments that apply to both callable functions and entry points.
262 void AMDGPUAsmPrinter::emitCommonFunctionComments(
263  uint32_t NumVGPR,
264  uint32_t NumSGPR,
265  uint64_t ScratchSize,
266  uint64_t CodeSize) {
267  OutStreamer->emitRawComment(" codeLenInByte = " + Twine(CodeSize), false);
268  OutStreamer->emitRawComment(" NumSgprs: " + Twine(NumSGPR), false);
269  OutStreamer->emitRawComment(" NumVgprs: " + Twine(NumVGPR), false);
270  OutStreamer->emitRawComment(" ScratchSize: " + Twine(ScratchSize), false);
271 }
272 
274  CurrentProgramInfo = SIProgramInfo();
275 
277 
278  // The starting address of all shader programs must be 256 bytes aligned.
279  // Regular functions just need the basic required instruction alignment.
280  MF.setAlignment(MFI->isEntryFunction() ? 8 : 2);
281 
283 
284  const AMDGPUSubtarget &STM = MF.getSubtarget<AMDGPUSubtarget>();
286  if (!STM.isAmdHsaOS()) {
287  MCSectionELF *ConfigSection =
288  Context.getELFSection(".AMDGPU.config", ELF::SHT_PROGBITS, 0);
289  OutStreamer->SwitchSection(ConfigSection);
290  }
291 
293  if (MFI->isEntryFunction()) {
294  getSIProgramInfo(CurrentProgramInfo, MF);
295  } else {
296  auto I = CallGraphResourceInfo.insert(
297  std::make_pair(MF.getFunction(), SIFunctionResourceInfo()));
298  SIFunctionResourceInfo &Info = I.first->second;
299  assert(I.second && "should only be called once per function");
300  Info = analyzeResourceUsage(MF);
301  }
302 
303  if (STM.isAmdPalOS())
304  EmitPALMetadata(MF, CurrentProgramInfo);
305  if (!STM.isAmdHsaOS()) {
306  EmitProgramInfoSI(MF, CurrentProgramInfo);
307  }
308  } else {
309  EmitProgramInfoR600(MF);
310  }
311 
312  DisasmLines.clear();
313  HexLines.clear();
314  DisasmLineMaxLen = 0;
315 
317 
318  if (isVerbose()) {
319  MCSectionELF *CommentSection =
320  Context.getELFSection(".AMDGPU.csdata", ELF::SHT_PROGBITS, 0);
321  OutStreamer->SwitchSection(CommentSection);
322 
324  if (!MFI->isEntryFunction()) {
325  OutStreamer->emitRawComment(" Function info:", false);
326  SIFunctionResourceInfo &Info = CallGraphResourceInfo[MF.getFunction()];
327  emitCommonFunctionComments(
328  Info.NumVGPR,
329  Info.getTotalNumSGPRs(MF.getSubtarget<SISubtarget>()),
330  Info.PrivateSegmentSize,
331  getFunctionCodeSize(MF));
332  return false;
333  }
334 
335  OutStreamer->emitRawComment(" Kernel info:", false);
336  emitCommonFunctionComments(CurrentProgramInfo.NumVGPR,
337  CurrentProgramInfo.NumSGPR,
338  CurrentProgramInfo.ScratchSize,
339  getFunctionCodeSize(MF));
340 
341  OutStreamer->emitRawComment(
342  " FloatMode: " + Twine(CurrentProgramInfo.FloatMode), false);
343  OutStreamer->emitRawComment(
344  " IeeeMode: " + Twine(CurrentProgramInfo.IEEEMode), false);
345  OutStreamer->emitRawComment(
346  " LDSByteSize: " + Twine(CurrentProgramInfo.LDSSize) +
347  " bytes/workgroup (compile time only)", false);
348 
349  OutStreamer->emitRawComment(
350  " SGPRBlocks: " + Twine(CurrentProgramInfo.SGPRBlocks), false);
351  OutStreamer->emitRawComment(
352  " VGPRBlocks: " + Twine(CurrentProgramInfo.VGPRBlocks), false);
353 
354  OutStreamer->emitRawComment(
355  " NumSGPRsForWavesPerEU: " +
356  Twine(CurrentProgramInfo.NumSGPRsForWavesPerEU), false);
357  OutStreamer->emitRawComment(
358  " NumVGPRsForWavesPerEU: " +
359  Twine(CurrentProgramInfo.NumVGPRsForWavesPerEU), false);
360 
361  OutStreamer->emitRawComment(
362  " ReservedVGPRFirst: " + Twine(CurrentProgramInfo.ReservedVGPRFirst),
363  false);
364  OutStreamer->emitRawComment(
365  " ReservedVGPRCount: " + Twine(CurrentProgramInfo.ReservedVGPRCount),
366  false);
367 
369  OutStreamer->emitRawComment(
370  " DebuggerWavefrontPrivateSegmentOffsetSGPR: s" +
371  Twine(CurrentProgramInfo.DebuggerWavefrontPrivateSegmentOffsetSGPR), false);
372  OutStreamer->emitRawComment(
373  " DebuggerPrivateSegmentBufferSGPR: s" +
374  Twine(CurrentProgramInfo.DebuggerPrivateSegmentBufferSGPR), false);
375  }
376 
377  OutStreamer->emitRawComment(
378  " COMPUTE_PGM_RSRC2:USER_SGPR: " +
379  Twine(G_00B84C_USER_SGPR(CurrentProgramInfo.ComputePGMRSrc2)), false);
380  OutStreamer->emitRawComment(
381  " COMPUTE_PGM_RSRC2:TRAP_HANDLER: " +
382  Twine(G_00B84C_TRAP_HANDLER(CurrentProgramInfo.ComputePGMRSrc2)), false);
383  OutStreamer->emitRawComment(
384  " COMPUTE_PGM_RSRC2:TGID_X_EN: " +
385  Twine(G_00B84C_TGID_X_EN(CurrentProgramInfo.ComputePGMRSrc2)), false);
386  OutStreamer->emitRawComment(
387  " COMPUTE_PGM_RSRC2:TGID_Y_EN: " +
388  Twine(G_00B84C_TGID_Y_EN(CurrentProgramInfo.ComputePGMRSrc2)), false);
389  OutStreamer->emitRawComment(
390  " COMPUTE_PGM_RSRC2:TGID_Z_EN: " +
391  Twine(G_00B84C_TGID_Z_EN(CurrentProgramInfo.ComputePGMRSrc2)), false);
392  OutStreamer->emitRawComment(
393  " COMPUTE_PGM_RSRC2:TIDIG_COMP_CNT: " +
394  Twine(G_00B84C_TIDIG_COMP_CNT(CurrentProgramInfo.ComputePGMRSrc2)),
395  false);
396  } else {
398  OutStreamer->emitRawComment(
399  Twine("SQ_PGM_RESOURCES:STACK_SIZE = " + Twine(MFI->CFStackSize)));
400  }
401  }
402 
403  if (STM.dumpCode()) {
404 
405  OutStreamer->SwitchSection(
406  Context.getELFSection(".AMDGPU.disasm", ELF::SHT_NOTE, 0));
407 
408  for (size_t i = 0; i < DisasmLines.size(); ++i) {
409  std::string Comment(DisasmLineMaxLen - DisasmLines[i].size(), ' ');
410  Comment += " ; " + HexLines[i] + "\n";
411 
412  OutStreamer->EmitBytes(StringRef(DisasmLines[i]));
413  OutStreamer->EmitBytes(StringRef(Comment));
414  }
415  }
416 
417  return false;
418 }
419 
420 void AMDGPUAsmPrinter::EmitProgramInfoR600(const MachineFunction &MF) {
421  unsigned MaxGPR = 0;
422  bool killPixel = false;
423  const R600Subtarget &STM = MF.getSubtarget<R600Subtarget>();
424  const R600RegisterInfo *RI = STM.getRegisterInfo();
426 
427  for (const MachineBasicBlock &MBB : MF) {
428  for (const MachineInstr &MI : MBB) {
429  if (MI.getOpcode() == AMDGPU::KILLGT)
430  killPixel = true;
431  unsigned numOperands = MI.getNumOperands();
432  for (unsigned op_idx = 0; op_idx < numOperands; op_idx++) {
433  const MachineOperand &MO = MI.getOperand(op_idx);
434  if (!MO.isReg())
435  continue;
436  unsigned HWReg = RI->getHWRegIndex(MO.getReg());
437 
438  // Register with value > 127 aren't GPR
439  if (HWReg > 127)
440  continue;
441  MaxGPR = std::max(MaxGPR, HWReg);
442  }
443  }
444  }
445 
446  unsigned RsrcReg;
448  // Evergreen / Northern Islands
449  switch (MF.getFunction()->getCallingConv()) {
450  default: LLVM_FALLTHROUGH;
455  }
456  } else {
457  // R600 / R700
458  switch (MF.getFunction()->getCallingConv()) {
459  default: LLVM_FALLTHROUGH;
464  }
465  }
466 
467  OutStreamer->EmitIntValue(RsrcReg, 4);
468  OutStreamer->EmitIntValue(S_NUM_GPRS(MaxGPR + 1) |
469  S_STACK_SIZE(MFI->CFStackSize), 4);
470  OutStreamer->EmitIntValue(R_02880C_DB_SHADER_CONTROL, 4);
471  OutStreamer->EmitIntValue(S_02880C_KILL_ENABLE(killPixel), 4);
472 
473  if (AMDGPU::isCompute(MF.getFunction()->getCallingConv())) {
474  OutStreamer->EmitIntValue(R_0288E8_SQ_LDS_ALLOC, 4);
475  OutStreamer->EmitIntValue(alignTo(MFI->getLDSSize(), 4) >> 2, 4);
476  }
477 }
478 
479 uint64_t AMDGPUAsmPrinter::getFunctionCodeSize(const MachineFunction &MF) const {
480  const SISubtarget &STM = MF.getSubtarget<SISubtarget>();
481  const SIInstrInfo *TII = STM.getInstrInfo();
482 
483  uint64_t CodeSize = 0;
484 
485  for (const MachineBasicBlock &MBB : MF) {
486  for (const MachineInstr &MI : MBB) {
487  // TODO: CodeSize should account for multiple functions.
488 
489  // TODO: Should we count size of debug info?
490  if (MI.isDebugValue())
491  continue;
492 
493  CodeSize += TII->getInstSizeInBytes(MI);
494  }
495  }
496 
497  return CodeSize;
498 }
499 
501  const SIInstrInfo &TII,
502  unsigned Reg) {
503  for (const MachineOperand &UseOp : MRI.reg_operands(Reg)) {
504  if (!UseOp.isImplicit() || !TII.isFLAT(*UseOp.getParent()))
505  return true;
506  }
507 
508  return false;
509 }
510 
511 static unsigned getNumExtraSGPRs(const SISubtarget &ST,
512  bool VCCUsed,
513  bool FlatScrUsed) {
514  unsigned ExtraSGPRs = 0;
515  if (VCCUsed)
516  ExtraSGPRs = 2;
517 
519  if (FlatScrUsed)
520  ExtraSGPRs = 4;
521  } else {
522  if (ST.isXNACKEnabled())
523  ExtraSGPRs = 4;
524 
525  if (FlatScrUsed)
526  ExtraSGPRs = 6;
527  }
528 
529  return ExtraSGPRs;
530 }
531 
533  const SISubtarget &ST) const {
534  return NumExplicitSGPR + getNumExtraSGPRs(ST, UsesVCC, UsesFlatScratch);
535 }
536 
537 AMDGPUAsmPrinter::SIFunctionResourceInfo AMDGPUAsmPrinter::analyzeResourceUsage(
538  const MachineFunction &MF) const {
539  SIFunctionResourceInfo Info;
540 
542  const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
543  const MachineFrameInfo &FrameInfo = MF.getFrameInfo();
544  const MachineRegisterInfo &MRI = MF.getRegInfo();
545  const SIInstrInfo *TII = ST.getInstrInfo();
546  const SIRegisterInfo &TRI = TII->getRegisterInfo();
547 
548  Info.UsesFlatScratch = MRI.isPhysRegUsed(AMDGPU::FLAT_SCR_LO) ||
549  MRI.isPhysRegUsed(AMDGPU::FLAT_SCR_HI);
550 
551  // Even if FLAT_SCRATCH is implicitly used, it has no effect if flat
552  // instructions aren't used to access the scratch buffer. Inline assembly may
553  // need it though.
554  //
555  // If we only have implicit uses of flat_scr on flat instructions, it is not
556  // really needed.
557  if (Info.UsesFlatScratch && !MFI->hasFlatScratchInit() &&
558  (!hasAnyNonFlatUseOfReg(MRI, *TII, AMDGPU::FLAT_SCR) &&
559  !hasAnyNonFlatUseOfReg(MRI, *TII, AMDGPU::FLAT_SCR_LO) &&
560  !hasAnyNonFlatUseOfReg(MRI, *TII, AMDGPU::FLAT_SCR_HI))) {
561  Info.UsesFlatScratch = false;
562  }
563 
564  Info.HasDynamicallySizedStack = FrameInfo.hasVarSizedObjects();
565  Info.PrivateSegmentSize = FrameInfo.getStackSize();
566 
567 
568  Info.UsesVCC = MRI.isPhysRegUsed(AMDGPU::VCC_LO) ||
569  MRI.isPhysRegUsed(AMDGPU::VCC_HI);
570 
571  // If there are no calls, MachineRegisterInfo can tell us the used register
572  // count easily.
573  // A tail call isn't considered a call for MachineFrameInfo's purposes.
574  if (!FrameInfo.hasCalls() && !FrameInfo.hasTailCall()) {
575  MCPhysReg HighestVGPRReg = AMDGPU::NoRegister;
576  for (MCPhysReg Reg : reverse(AMDGPU::VGPR_32RegClass.getRegisters())) {
577  if (MRI.isPhysRegUsed(Reg)) {
578  HighestVGPRReg = Reg;
579  break;
580  }
581  }
582 
583  MCPhysReg HighestSGPRReg = AMDGPU::NoRegister;
584  for (MCPhysReg Reg : reverse(AMDGPU::SGPR_32RegClass.getRegisters())) {
585  if (MRI.isPhysRegUsed(Reg)) {
586  HighestSGPRReg = Reg;
587  break;
588  }
589  }
590 
591  // We found the maximum register index. They start at 0, so add one to get the
592  // number of registers.
593  Info.NumVGPR = HighestVGPRReg == AMDGPU::NoRegister ? 0 :
594  TRI.getHWRegIndex(HighestVGPRReg) + 1;
595  Info.NumExplicitSGPR = HighestSGPRReg == AMDGPU::NoRegister ? 0 :
596  TRI.getHWRegIndex(HighestSGPRReg) + 1;
597 
598  return Info;
599  }
600 
601  int32_t MaxVGPR = -1;
602  int32_t MaxSGPR = -1;
603  uint64_t CalleeFrameSize = 0;
604 
605  for (const MachineBasicBlock &MBB : MF) {
606  for (const MachineInstr &MI : MBB) {
607  // TODO: Check regmasks? Do they occur anywhere except calls?
608  for (const MachineOperand &MO : MI.operands()) {
609  unsigned Width = 0;
610  bool IsSGPR = false;
611 
612  if (!MO.isReg())
613  continue;
614 
615  unsigned Reg = MO.getReg();
616  switch (Reg) {
617  case AMDGPU::EXEC:
618  case AMDGPU::EXEC_LO:
619  case AMDGPU::EXEC_HI:
620  case AMDGPU::SCC:
621  case AMDGPU::M0:
622  case AMDGPU::SRC_SHARED_BASE:
623  case AMDGPU::SRC_SHARED_LIMIT:
624  case AMDGPU::SRC_PRIVATE_BASE:
625  case AMDGPU::SRC_PRIVATE_LIMIT:
626  continue;
627 
628  case AMDGPU::NoRegister:
629  assert(MI.isDebugValue());
630  continue;
631 
632  case AMDGPU::VCC:
633  case AMDGPU::VCC_LO:
634  case AMDGPU::VCC_HI:
635  Info.UsesVCC = true;
636  continue;
637 
638  case AMDGPU::FLAT_SCR:
639  case AMDGPU::FLAT_SCR_LO:
640  case AMDGPU::FLAT_SCR_HI:
641  continue;
642 
643  case AMDGPU::TBA:
644  case AMDGPU::TBA_LO:
645  case AMDGPU::TBA_HI:
646  case AMDGPU::TMA:
647  case AMDGPU::TMA_LO:
648  case AMDGPU::TMA_HI:
649  llvm_unreachable("trap handler registers should not be used");
650 
651  default:
652  break;
653  }
654 
655  if (AMDGPU::SReg_32RegClass.contains(Reg)) {
656  assert(!AMDGPU::TTMP_32RegClass.contains(Reg) &&
657  "trap handler registers should not be used");
658  IsSGPR = true;
659  Width = 1;
660  } else if (AMDGPU::VGPR_32RegClass.contains(Reg)) {
661  IsSGPR = false;
662  Width = 1;
663  } else if (AMDGPU::SReg_64RegClass.contains(Reg)) {
664  assert(!AMDGPU::TTMP_64RegClass.contains(Reg) &&
665  "trap handler registers should not be used");
666  IsSGPR = true;
667  Width = 2;
668  } else if (AMDGPU::VReg_64RegClass.contains(Reg)) {
669  IsSGPR = false;
670  Width = 2;
671  } else if (AMDGPU::VReg_96RegClass.contains(Reg)) {
672  IsSGPR = false;
673  Width = 3;
674  } else if (AMDGPU::SReg_128RegClass.contains(Reg)) {
675  IsSGPR = true;
676  Width = 4;
677  } else if (AMDGPU::VReg_128RegClass.contains(Reg)) {
678  IsSGPR = false;
679  Width = 4;
680  } else if (AMDGPU::SReg_256RegClass.contains(Reg)) {
681  IsSGPR = true;
682  Width = 8;
683  } else if (AMDGPU::VReg_256RegClass.contains(Reg)) {
684  IsSGPR = false;
685  Width = 8;
686  } else if (AMDGPU::SReg_512RegClass.contains(Reg)) {
687  IsSGPR = true;
688  Width = 16;
689  } else if (AMDGPU::VReg_512RegClass.contains(Reg)) {
690  IsSGPR = false;
691  Width = 16;
692  } else {
693  llvm_unreachable("Unknown register class");
694  }
695  unsigned HWReg = TRI.getHWRegIndex(Reg);
696  int MaxUsed = HWReg + Width - 1;
697  if (IsSGPR) {
698  MaxSGPR = MaxUsed > MaxSGPR ? MaxUsed : MaxSGPR;
699  } else {
700  MaxVGPR = MaxUsed > MaxVGPR ? MaxUsed : MaxVGPR;
701  }
702  }
703 
704  if (MI.isCall()) {
705  // Pseudo used just to encode the underlying global. Is there a better
706  // way to track this?
707 
708  const MachineOperand *CalleeOp
709  = TII->getNamedOperand(MI, AMDGPU::OpName::callee);
710  const Function *Callee = cast<Function>(CalleeOp->getGlobal());
711  if (Callee->isDeclaration()) {
712  // If this is a call to an external function, we can't do much. Make
713  // conservative guesses.
714 
715  // 48 SGPRs - vcc, - flat_scr, -xnack
716  int MaxSGPRGuess = 47 - getNumExtraSGPRs(ST, true,
717  ST.hasFlatAddressSpace());
718  MaxSGPR = std::max(MaxSGPR, MaxSGPRGuess);
719  MaxVGPR = std::max(MaxVGPR, 23);
720 
721  CalleeFrameSize = std::max(CalleeFrameSize, UINT64_C(16384));
722  Info.UsesVCC = true;
723  Info.UsesFlatScratch = ST.hasFlatAddressSpace();
724  Info.HasDynamicallySizedStack = true;
725  } else {
726  // We force CodeGen to run in SCC order, so the callee's register
727  // usage etc. should be the cumulative usage of all callees.
728  auto I = CallGraphResourceInfo.find(Callee);
729  assert(I != CallGraphResourceInfo.end() &&
730  "callee should have been handled before caller");
731 
732  MaxSGPR = std::max(I->second.NumExplicitSGPR - 1, MaxSGPR);
733  MaxVGPR = std::max(I->second.NumVGPR - 1, MaxVGPR);
734  CalleeFrameSize
735  = std::max(I->second.PrivateSegmentSize, CalleeFrameSize);
736  Info.UsesVCC |= I->second.UsesVCC;
737  Info.UsesFlatScratch |= I->second.UsesFlatScratch;
738  Info.HasDynamicallySizedStack |= I->second.HasDynamicallySizedStack;
739  Info.HasRecursion |= I->second.HasRecursion;
740  }
741 
742  if (!Callee->doesNotRecurse())
743  Info.HasRecursion = true;
744  }
745  }
746  }
747 
748  Info.NumExplicitSGPR = MaxSGPR + 1;
749  Info.NumVGPR = MaxVGPR + 1;
750  Info.PrivateSegmentSize += CalleeFrameSize;
751 
752  return Info;
753 }
754 
755 void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
756  const MachineFunction &MF) {
757  SIFunctionResourceInfo Info = analyzeResourceUsage(MF);
758 
759  ProgInfo.NumVGPR = Info.NumVGPR;
760  ProgInfo.NumSGPR = Info.NumExplicitSGPR;
761  ProgInfo.ScratchSize = Info.PrivateSegmentSize;
762  ProgInfo.VCCUsed = Info.UsesVCC;
763  ProgInfo.FlatUsed = Info.UsesFlatScratch;
764  ProgInfo.DynamicCallStack = Info.HasDynamicallySizedStack || Info.HasRecursion;
765 
766  if (!isUInt<32>(ProgInfo.ScratchSize)) {
767  DiagnosticInfoStackSize DiagStackSize(*MF.getFunction(),
768  ProgInfo.ScratchSize, DS_Error);
769  MF.getFunction()->getContext().diagnose(DiagStackSize);
770  }
771 
772  const SISubtarget &STM = MF.getSubtarget<SISubtarget>();
774  const SIInstrInfo *TII = STM.getInstrInfo();
775  const SIRegisterInfo *RI = &TII->getRegisterInfo();
776 
777  unsigned ExtraSGPRs = getNumExtraSGPRs(STM,
778  ProgInfo.VCCUsed,
779  ProgInfo.FlatUsed);
780  unsigned ExtraVGPRs = STM.getReservedNumVGPRs(MF);
781 
782  // Check the addressable register limit before we add ExtraSGPRs.
784  !STM.hasSGPRInitBug()) {
785  unsigned MaxAddressableNumSGPRs = STM.getAddressableNumSGPRs();
786  if (ProgInfo.NumSGPR > MaxAddressableNumSGPRs) {
787  // This can happen due to a compiler bug or when using inline asm.
788  LLVMContext &Ctx = MF.getFunction()->getContext();
790  "addressable scalar registers",
791  ProgInfo.NumSGPR, DS_Error,
793  MaxAddressableNumSGPRs);
794  Ctx.diagnose(Diag);
795  ProgInfo.NumSGPR = MaxAddressableNumSGPRs - 1;
796  }
797  }
798 
799  // Account for extra SGPRs and VGPRs reserved for debugger use.
800  ProgInfo.NumSGPR += ExtraSGPRs;
801  ProgInfo.NumVGPR += ExtraVGPRs;
802 
803  // Adjust number of registers used to meet default/requested minimum/maximum
804  // number of waves per execution unit request.
805  ProgInfo.NumSGPRsForWavesPerEU = std::max(
806  std::max(ProgInfo.NumSGPR, 1u), STM.getMinNumSGPRs(MFI->getMaxWavesPerEU()));
807  ProgInfo.NumVGPRsForWavesPerEU = std::max(
808  std::max(ProgInfo.NumVGPR, 1u), STM.getMinNumVGPRs(MFI->getMaxWavesPerEU()));
809 
811  STM.hasSGPRInitBug()) {
812  unsigned MaxAddressableNumSGPRs = STM.getAddressableNumSGPRs();
813  if (ProgInfo.NumSGPR > MaxAddressableNumSGPRs) {
814  // This can happen due to a compiler bug or when using inline asm to use
815  // the registers which are usually reserved for vcc etc.
816  LLVMContext &Ctx = MF.getFunction()->getContext();
818  "scalar registers",
819  ProgInfo.NumSGPR, DS_Error,
821  MaxAddressableNumSGPRs);
822  Ctx.diagnose(Diag);
823  ProgInfo.NumSGPR = MaxAddressableNumSGPRs;
824  ProgInfo.NumSGPRsForWavesPerEU = MaxAddressableNumSGPRs;
825  }
826  }
827 
828  if (STM.hasSGPRInitBug()) {
829  ProgInfo.NumSGPR =
831  ProgInfo.NumSGPRsForWavesPerEU =
833  }
834 
835  if (MFI->getNumUserSGPRs() > STM.getMaxNumUserSGPRs()) {
836  LLVMContext &Ctx = MF.getFunction()->getContext();
837  DiagnosticInfoResourceLimit Diag(*MF.getFunction(), "user SGPRs",
838  MFI->getNumUserSGPRs(), DS_Error);
839  Ctx.diagnose(Diag);
840  }
841 
842  if (MFI->getLDSSize() > static_cast<unsigned>(STM.getLocalMemorySize())) {
843  LLVMContext &Ctx = MF.getFunction()->getContext();
844  DiagnosticInfoResourceLimit Diag(*MF.getFunction(), "local memory",
845  MFI->getLDSSize(), DS_Error);
846  Ctx.diagnose(Diag);
847  }
848 
849  // SGPRBlocks is actual number of SGPR blocks minus 1.
850  ProgInfo.SGPRBlocks = alignTo(ProgInfo.NumSGPRsForWavesPerEU,
851  STM.getSGPREncodingGranule());
852  ProgInfo.SGPRBlocks = ProgInfo.SGPRBlocks / STM.getSGPREncodingGranule() - 1;
853 
854  // VGPRBlocks is actual number of VGPR blocks minus 1.
855  ProgInfo.VGPRBlocks = alignTo(ProgInfo.NumVGPRsForWavesPerEU,
856  STM.getVGPREncodingGranule());
857  ProgInfo.VGPRBlocks = ProgInfo.VGPRBlocks / STM.getVGPREncodingGranule() - 1;
858 
859  // Record first reserved VGPR and number of reserved VGPRs.
860  ProgInfo.ReservedVGPRFirst = STM.debuggerReserveRegs() ? ProgInfo.NumVGPR : 0;
861  ProgInfo.ReservedVGPRCount = STM.getReservedNumVGPRs(MF);
862 
863  // Update DebuggerWavefrontPrivateSegmentOffsetSGPR and
864  // DebuggerPrivateSegmentBufferSGPR fields if "amdgpu-debugger-emit-prologue"
865  // attribute was requested.
866  if (STM.debuggerEmitPrologue()) {
867  ProgInfo.DebuggerWavefrontPrivateSegmentOffsetSGPR =
868  RI->getHWRegIndex(MFI->getScratchWaveOffsetReg());
869  ProgInfo.DebuggerPrivateSegmentBufferSGPR =
870  RI->getHWRegIndex(MFI->getScratchRSrcReg());
871  }
872 
873  // Set the value to initialize FP_ROUND and FP_DENORM parts of the mode
874  // register.
875  ProgInfo.FloatMode = getFPMode(MF);
876 
877  ProgInfo.IEEEMode = STM.enableIEEEBit(MF);
878 
879  // Make clamp modifier on NaN input returns 0.
880  ProgInfo.DX10Clamp = STM.enableDX10Clamp();
881 
882  unsigned LDSAlignShift;
884  // LDS is allocated in 64 dword blocks.
885  LDSAlignShift = 8;
886  } else {
887  // LDS is allocated in 128 dword blocks.
888  LDSAlignShift = 9;
889  }
890 
891  unsigned LDSSpillSize =
892  MFI->getLDSWaveSpillSize() * MFI->getMaxFlatWorkGroupSize();
893 
894  ProgInfo.LDSSize = MFI->getLDSSize() + LDSSpillSize;
895  ProgInfo.LDSBlocks =
896  alignTo(ProgInfo.LDSSize, 1ULL << LDSAlignShift) >> LDSAlignShift;
897 
898  // Scratch is allocated in 256 dword blocks.
899  unsigned ScratchAlignShift = 10;
900  // We need to program the hardware with the amount of scratch memory that
901  // is used by the entire wave. ProgInfo.ScratchSize is the amount of
902  // scratch memory used per thread.
903  ProgInfo.ScratchBlocks =
904  alignTo(ProgInfo.ScratchSize * STM.getWavefrontSize(),
905  1ULL << ScratchAlignShift) >>
906  ScratchAlignShift;
907 
908  ProgInfo.ComputePGMRSrc1 =
909  S_00B848_VGPRS(ProgInfo.VGPRBlocks) |
910  S_00B848_SGPRS(ProgInfo.SGPRBlocks) |
911  S_00B848_PRIORITY(ProgInfo.Priority) |
912  S_00B848_FLOAT_MODE(ProgInfo.FloatMode) |
913  S_00B848_PRIV(ProgInfo.Priv) |
914  S_00B848_DX10_CLAMP(ProgInfo.DX10Clamp) |
915  S_00B848_DEBUG_MODE(ProgInfo.DebugMode) |
916  S_00B848_IEEE_MODE(ProgInfo.IEEEMode);
917 
918  // 0 = X, 1 = XY, 2 = XYZ
919  unsigned TIDIGCompCnt = 0;
920  if (MFI->hasWorkItemIDZ())
921  TIDIGCompCnt = 2;
922  else if (MFI->hasWorkItemIDY())
923  TIDIGCompCnt = 1;
924 
925  ProgInfo.ComputePGMRSrc2 =
926  S_00B84C_SCRATCH_EN(ProgInfo.ScratchBlocks > 0) |
927  S_00B84C_USER_SGPR(MFI->getNumUserSGPRs()) |
929  S_00B84C_TGID_X_EN(MFI->hasWorkGroupIDX()) |
930  S_00B84C_TGID_Y_EN(MFI->hasWorkGroupIDY()) |
931  S_00B84C_TGID_Z_EN(MFI->hasWorkGroupIDZ()) |
932  S_00B84C_TG_SIZE_EN(MFI->hasWorkGroupInfo()) |
933  S_00B84C_TIDIG_COMP_CNT(TIDIGCompCnt) |
935  // For AMDHSA, LDS_SIZE must be zero, as it is populated by the CP.
936  S_00B84C_LDS_SIZE(STM.isAmdHsaOS() ? 0 : ProgInfo.LDSBlocks) |
937  S_00B84C_EXCP_EN(0);
938 }
939 
940 static unsigned getRsrcReg(CallingConv::ID CallConv) {
941  switch (CallConv) {
942  default: LLVM_FALLTHROUGH;
950  }
951 }
952 
953 void AMDGPUAsmPrinter::EmitProgramInfoSI(const MachineFunction &MF,
954  const SIProgramInfo &CurrentProgramInfo) {
955  const SISubtarget &STM = MF.getSubtarget<SISubtarget>();
957  unsigned RsrcReg = getRsrcReg(MF.getFunction()->getCallingConv());
958 
960  OutStreamer->EmitIntValue(R_00B848_COMPUTE_PGM_RSRC1, 4);
961 
962  OutStreamer->EmitIntValue(CurrentProgramInfo.ComputePGMRSrc1, 4);
963 
964  OutStreamer->EmitIntValue(R_00B84C_COMPUTE_PGM_RSRC2, 4);
965  OutStreamer->EmitIntValue(CurrentProgramInfo.ComputePGMRSrc2, 4);
966 
967  OutStreamer->EmitIntValue(R_00B860_COMPUTE_TMPRING_SIZE, 4);
968  OutStreamer->EmitIntValue(S_00B860_WAVESIZE(CurrentProgramInfo.ScratchBlocks), 4);
969 
970  // TODO: Should probably note flat usage somewhere. SC emits a "FlatPtr32 =
971  // 0" comment but I don't see a corresponding field in the register spec.
972  } else {
973  OutStreamer->EmitIntValue(RsrcReg, 4);
974  OutStreamer->EmitIntValue(S_00B028_VGPRS(CurrentProgramInfo.VGPRBlocks) |
975  S_00B028_SGPRS(CurrentProgramInfo.SGPRBlocks), 4);
976  unsigned Rsrc2Val = 0;
977  if (STM.isVGPRSpillingEnabled(*MF.getFunction())) {
978  OutStreamer->EmitIntValue(R_0286E8_SPI_TMPRING_SIZE, 4);
979  OutStreamer->EmitIntValue(S_0286E8_WAVESIZE(CurrentProgramInfo.ScratchBlocks), 4);
981  Rsrc2Val = S_00B84C_SCRATCH_EN(CurrentProgramInfo.ScratchBlocks > 0);
982  }
984  OutStreamer->EmitIntValue(R_0286CC_SPI_PS_INPUT_ENA, 4);
985  OutStreamer->EmitIntValue(MFI->getPSInputEnable(), 4);
986  OutStreamer->EmitIntValue(R_0286D0_SPI_PS_INPUT_ADDR, 4);
987  OutStreamer->EmitIntValue(MFI->getPSInputAddr(), 4);
988  Rsrc2Val |= S_00B02C_EXTRA_LDS_SIZE(CurrentProgramInfo.LDSBlocks);
989  }
990  if (Rsrc2Val) {
991  OutStreamer->EmitIntValue(RsrcReg + 4 /*rsrc2*/, 4);
992  OutStreamer->EmitIntValue(Rsrc2Val, 4);
993  }
994  }
995 
996  OutStreamer->EmitIntValue(R_SPILLED_SGPRS, 4);
997  OutStreamer->EmitIntValue(MFI->getNumSpilledSGPRs(), 4);
998  OutStreamer->EmitIntValue(R_SPILLED_VGPRS, 4);
999  OutStreamer->EmitIntValue(MFI->getNumSpilledVGPRs(), 4);
1000 }
1001 
1002 // This is the equivalent of EmitProgramInfoSI above, but for when the OS type
1003 // is AMDPAL. It stores each compute/SPI register setting and other PAL
1004 // metadata items into the PALMetadataMap, combining with any provided by the
1005 // frontend as LLVM metadata. Once all functions are written, PALMetadataMap is
1006 // then written as a single block in the .note section.
1007 void AMDGPUAsmPrinter::EmitPALMetadata(const MachineFunction &MF,
1008  const SIProgramInfo &CurrentProgramInfo) {
1010  // Given the calling convention, calculate the register number for rsrc1. In
1011  // principle the register number could change in future hardware, but we know
1012  // it is the same for gfx6-9 (except that LS and ES don't exist on gfx9), so
1013  // we can use the same fixed value that .AMDGPU.config has for Mesa. Note
1014  // that we use a register number rather than a byte offset, so we need to
1015  // divide by 4.
1016  unsigned Rsrc1Reg = getRsrcReg(MF.getFunction()->getCallingConv()) / 4;
1017  unsigned Rsrc2Reg = Rsrc1Reg + 1;
1018  // Also calculate the PAL metadata key for *S_SCRATCH_SIZE. It can be used
1019  // with a constant offset to access any non-register shader-specific PAL
1020  // metadata key.
1021  unsigned ScratchSizeKey = PALMD::Key::CS_SCRATCH_SIZE;
1022  switch (MF.getFunction()->getCallingConv()) {
1024  ScratchSizeKey = PALMD::Key::PS_SCRATCH_SIZE;
1025  break;
1027  ScratchSizeKey = PALMD::Key::VS_SCRATCH_SIZE;
1028  break;
1030  ScratchSizeKey = PALMD::Key::GS_SCRATCH_SIZE;
1031  break;
1033  ScratchSizeKey = PALMD::Key::ES_SCRATCH_SIZE;
1034  break;
1036  ScratchSizeKey = PALMD::Key::HS_SCRATCH_SIZE;
1037  break;
1039  ScratchSizeKey = PALMD::Key::LS_SCRATCH_SIZE;
1040  break;
1041  }
1042  unsigned NumUsedVgprsKey = ScratchSizeKey +
1044  unsigned NumUsedSgprsKey = ScratchSizeKey +
1046  PALMetadataMap[NumUsedVgprsKey] = CurrentProgramInfo.NumVGPRsForWavesPerEU;
1047  PALMetadataMap[NumUsedSgprsKey] = CurrentProgramInfo.NumSGPRsForWavesPerEU;
1049  PALMetadataMap[Rsrc1Reg] |= CurrentProgramInfo.ComputePGMRSrc1;
1050  PALMetadataMap[Rsrc2Reg] |= CurrentProgramInfo.ComputePGMRSrc2;
1051  // ScratchSize is in bytes, 16 aligned.
1052  PALMetadataMap[ScratchSizeKey] |=
1053  alignTo(CurrentProgramInfo.ScratchSize, 16);
1054  } else {
1055  PALMetadataMap[Rsrc1Reg] |= S_00B028_VGPRS(CurrentProgramInfo.VGPRBlocks) |
1056  S_00B028_SGPRS(CurrentProgramInfo.SGPRBlocks);
1057  if (CurrentProgramInfo.ScratchBlocks > 0)
1058  PALMetadataMap[Rsrc2Reg] |= S_00B84C_SCRATCH_EN(1);
1059  // ScratchSize is in bytes, 16 aligned.
1060  PALMetadataMap[ScratchSizeKey] |=
1061  alignTo(CurrentProgramInfo.ScratchSize, 16);
1062  }
1064  PALMetadataMap[Rsrc2Reg] |=
1065  S_00B02C_EXTRA_LDS_SIZE(CurrentProgramInfo.LDSBlocks);
1066  PALMetadataMap[R_0286CC_SPI_PS_INPUT_ENA / 4] |= MFI->getPSInputEnable();
1067  PALMetadataMap[R_0286D0_SPI_PS_INPUT_ADDR / 4] |= MFI->getPSInputAddr();
1068  }
1069 }
1070 
1071 // This is supposed to be log2(Size)
1073  switch (Size) {
1074  case 4:
1075  return AMD_ELEMENT_4_BYTES;
1076  case 8:
1077  return AMD_ELEMENT_8_BYTES;
1078  case 16:
1079  return AMD_ELEMENT_16_BYTES;
1080  default:
1081  llvm_unreachable("invalid private_element_size");
1082  }
1083 }
1084 
1085 void AMDGPUAsmPrinter::getAmdKernelCode(amd_kernel_code_t &Out,
1086  const SIProgramInfo &CurrentProgramInfo,
1087  const MachineFunction &MF) const {
1089  const SISubtarget &STM = MF.getSubtarget<SISubtarget>();
1090 
1091  AMDGPU::initDefaultAMDKernelCodeT(Out, STM.getFeatureBits());
1092 
1094  CurrentProgramInfo.ComputePGMRSrc1 |
1095  (CurrentProgramInfo.ComputePGMRSrc2 << 32);
1097 
1098  if (CurrentProgramInfo.DynamicCallStack)
1100 
1103  getElementByteSizeValue(STM.getMaxPrivateElementSize()));
1104 
1105  if (MFI->hasPrivateSegmentBuffer()) {
1106  Out.code_properties |=
1108  }
1109 
1110  if (MFI->hasDispatchPtr())
1112 
1113  if (MFI->hasQueuePtr())
1115 
1116  if (MFI->hasKernargSegmentPtr())
1118 
1119  if (MFI->hasDispatchID())
1121 
1122  if (MFI->hasFlatScratchInit())
1124 
1125  if (MFI->hasGridWorkgroupCountX()) {
1126  Out.code_properties |=
1128  }
1129 
1130  if (MFI->hasGridWorkgroupCountY()) {
1131  Out.code_properties |=
1133  }
1134 
1135  if (MFI->hasGridWorkgroupCountZ()) {
1136  Out.code_properties |=
1138  }
1139 
1140  if (MFI->hasDispatchPtr())
1142 
1143  if (STM.debuggerSupported())
1145 
1146  if (STM.isXNACKEnabled())
1148 
1149  // FIXME: Should use getKernArgSize
1151  STM.getKernArgSegmentSize(MF, MFI->getABIArgOffset());
1152  Out.wavefront_sgpr_count = CurrentProgramInfo.NumSGPR;
1153  Out.workitem_vgpr_count = CurrentProgramInfo.NumVGPR;
1154  Out.workitem_private_segment_byte_size = CurrentProgramInfo.ScratchSize;
1155  Out.workgroup_group_segment_byte_size = CurrentProgramInfo.LDSSize;
1156  Out.reserved_vgpr_first = CurrentProgramInfo.ReservedVGPRFirst;
1157  Out.reserved_vgpr_count = CurrentProgramInfo.ReservedVGPRCount;
1158 
1159  // These alignment values are specified in powers of two, so alignment =
1160  // 2^n. The minimum alignment is 2^4 = 16.
1161  Out.kernarg_segment_alignment = std::max((size_t)4,
1163 
1164  if (STM.debuggerEmitPrologue()) {
1166  CurrentProgramInfo.DebuggerWavefrontPrivateSegmentOffsetSGPR;
1168  CurrentProgramInfo.DebuggerPrivateSegmentBufferSGPR;
1169  }
1170 }
1171 
1172 AMDGPU::HSAMD::Kernel::CodeProps::Metadata AMDGPUAsmPrinter::getHSACodeProps(
1173  const MachineFunction &MF,
1174  const SIProgramInfo &ProgramInfo) const {
1175  const SISubtarget &STM = MF.getSubtarget<SISubtarget>();
1178 
1179  HSACodeProps.mKernargSegmentSize =
1180  STM.getKernArgSegmentSize(MF, MFI.getABIArgOffset());
1181  HSACodeProps.mGroupSegmentFixedSize = ProgramInfo.LDSSize;
1182  HSACodeProps.mPrivateSegmentFixedSize = ProgramInfo.ScratchSize;
1183  HSACodeProps.mKernargSegmentAlign =
1184  std::max(uint32_t(4), MFI.getMaxKernArgAlign());
1185  HSACodeProps.mWavefrontSize = STM.getWavefrontSize();
1186  HSACodeProps.mNumSGPRs = CurrentProgramInfo.NumSGPR;
1187  HSACodeProps.mNumVGPRs = CurrentProgramInfo.NumVGPR;
1188  HSACodeProps.mMaxFlatWorkGroupSize = MFI.getMaxFlatWorkGroupSize();
1189  HSACodeProps.mIsDynamicCallStack = ProgramInfo.DynamicCallStack;
1190  HSACodeProps.mIsXNACKEnabled = STM.isXNACKEnabled();
1191 
1192  return HSACodeProps;
1193 }
1194 
1195 AMDGPU::HSAMD::Kernel::DebugProps::Metadata AMDGPUAsmPrinter::getHSADebugProps(
1196  const MachineFunction &MF,
1197  const SIProgramInfo &ProgramInfo) const {
1198  const SISubtarget &STM = MF.getSubtarget<SISubtarget>();
1200 
1201  if (!STM.debuggerSupported())
1202  return HSADebugProps;
1203 
1204  HSADebugProps.mDebuggerABIVersion.push_back(1);
1205  HSADebugProps.mDebuggerABIVersion.push_back(0);
1206  HSADebugProps.mReservedNumVGPRs = ProgramInfo.ReservedVGPRCount;
1207  HSADebugProps.mReservedFirstVGPR = ProgramInfo.ReservedVGPRFirst;
1208 
1209  if (STM.debuggerEmitPrologue()) {
1210  HSADebugProps.mPrivateSegmentBufferSGPR =
1211  ProgramInfo.DebuggerPrivateSegmentBufferSGPR;
1212  HSADebugProps.mWavefrontPrivateSegmentOffsetSGPR =
1213  ProgramInfo.DebuggerWavefrontPrivateSegmentOffsetSGPR;
1214  }
1215 
1216  return HSADebugProps;
1217 }
1218 
1220  unsigned AsmVariant,
1221  const char *ExtraCode, raw_ostream &O) {
1222  // First try the generic code, which knows about modifiers like 'c' and 'n'.
1223  if (!AsmPrinter::PrintAsmOperand(MI, OpNo, AsmVariant, ExtraCode, O))
1224  return false;
1225 
1226  if (ExtraCode && ExtraCode[0]) {
1227  if (ExtraCode[1] != 0)
1228  return true; // Unknown modifier.
1229 
1230  switch (ExtraCode[0]) {
1231  case 'r':
1232  break;
1233  default:
1234  return true;
1235  }
1236  }
1237 
1238  // TODO: Should be able to support other operand types like globals.
1239  const MachineOperand &MO = MI->getOperand(OpNo);
1240  if (MO.isReg()) {
1242  *MF->getSubtarget().getRegisterInfo());
1243  return false;
1244  }
1245 
1246  return true;
1247 }
virtual void EmitGlobalVariable(const GlobalVariable *GV)
Emit the specified global variable to the .s file.
Definition: AsmPrinter.cpp:438
constexpr bool isUInt< 32 >(uint64_t x)
Definition: MathExtras.h:341
bool hasFP64Denormals() const
const TargetLoweringObjectFile & getObjFileLowering() const
Return information about object file lowering.
Definition: AsmPrinter.cpp:205
void EmitGlobalVariable(const GlobalVariable *GV) override
Emit the specified global variable to the .s file.
bool isVGPRSpillingEnabled(const Function &F) const
Interface definition for SIRegisterInfo.
Target & getTheGCNTarget()
The target for GCN GPUs.
#define S_00B848_VGPRS(x)
Definition: SIDefines.h:429
Generation getGeneration() const
#define S_00B848_PRIV(x)
Definition: SIDefines.h:441
GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2)
LLVMContext & Context
AMDGPU specific subclass of TargetSubtarget.
std::unique_ptr< MCStreamer > OutStreamer
This is the MCStreamer object for the file we are generating.
Definition: AsmPrinter.h:92
#define FP_DENORM_MODE_SP(x)
Definition: SIDefines.h:474
Compute iterated dominance frontiers using a linear time algorithm.
Definition: AllocatorList.h:24
#define R_00B228_SPI_SHADER_PGM_RSRC1_GS
Definition: SIDefines.h:380
#define G_00B84C_USER_SGPR(x)
Definition: SIDefines.h:393
A Module instance is used to store all the information related to an LLVM module. ...
Definition: Module.h:63
bool doFinalization(Module &M) override
Shut down the asmprinter.
unsigned getVGPREncodingGranule() const
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
#define R_028850_SQ_PGM_RESOURCES_PS
Definition: R600Defines.h:157
OSType getOS() const
getOS - Get the parsed operating system type of this triple.
Definition: Triple.h:294
#define R_028860_SQ_PGM_RESOURCES_VS
Definition: R600Defines.h:165
Target & getTheAMDGPUTarget()
The target which supports all AMD GPUs.
#define G_00B84C_TGID_Z_EN(x)
Definition: SIDefines.h:405
unsigned getReg() const
getReg - Returns the register number.
#define R_028878_SQ_PGM_RESOURCES_GS
Definition: R600Defines.h:166
const SIInstrInfo * getInstrInfo() const override
const MachineFunction * MF
The current machine function.
Definition: AsmPrinter.h:95
AMDGPUAS getAMDGPUAS(const Module &M)
#define R_00B028_SPI_SHADER_PGM_RSRC1_PS
Definition: SIDefines.h:376
iterator_range< reg_iterator > reg_operands(unsigned Reg) const
#define S_00B84C_SCRATCH_EN(x)
Definition: SIDefines.h:389
#define S_00B84C_TG_SIZE_EN(x)
Definition: SIDefines.h:407
#define S_00B848_DX10_CLAMP(x)
Definition: SIDefines.h:444
F(f)
uint64_t alignTo(uint64_t Value, uint64_t Align, uint64_t Skew=0)
Returns the next integer (mod 2**64) that is greater than or equal to Value and is a multiple of Alig...
Definition: MathExtras.h:677
#define G_00B84C_TGID_Y_EN(x)
Definition: SIDefines.h:402
const MCSubtargetInfo * getSTI() const
Interface definition for R600RegisterInfo.
bool debuggerReserveRegs() const
#define R_0286CC_SPI_PS_INPUT_ENA
Definition: SIDefines.h:425
#define S_00B028_SGPRS(x)
Definition: SIDefines.h:386
Tuple of metadata.
Definition: Metadata.h:1104
virtual void EmitAMDGPUSymbolType(StringRef SymbolName, unsigned Type)=0
const SIRegisterInfo & getRegisterInfo() const
Definition: SIInstrInfo.h:146
return AArch64::GPR64RegClass contains(Reg)
#define S_NUM_GPRS(x)
Definition: R600Defines.h:151
Calling convention used for Mesa/AMDPAL compute shaders.
Definition: CallingConv.h:198
unsigned getKernArgSegmentSize(const MachineFunction &MF, unsigned ExplictArgBytes) const
static amd_element_byte_size_t getElementByteSizeValue(unsigned Size)
#define FP_DENORM_FLUSH_NONE
Definition: SIDefines.h:469
Calling convention used for AMDPAL vertex shader if tessellation is in use.
Definition: CallingConv.h:216
uint32_t code_properties
Code properties.
virtual bool EmitHSAMetadata(StringRef HSAMetadataString)
Definition: BitVector.h:920
In-memory representation of kernel debug properties metadata.
constexpr char SymbolName[]
Key for Kernel::Metadata::mSymbolName.
static bool isFLAT(const MachineInstr &MI)
Definition: SIInstrInfo.h:456
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
unsigned getInstSizeInBytes(const MachineInstr &MI) const override
const HexagonInstrInfo * TII
int getLocalMemorySize() const
AMD Kernel Code Object (amd_kernel_code_t).
static void printRegOperand(unsigned RegNo, raw_ostream &O, const MCRegisterInfo &MRI)
#define G_00B84C_TRAP_HANDLER(x)
Definition: SIDefines.h:396
Reg
All possible values of the reg field in the ModR/M byte.
bool hasCodeObjectV3(const FeatureBitset &Features)
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted...
AMDGPUAsmPrinter(TargetMachine &TM, std::unique_ptr< MCStreamer > Streamer)
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:290
virtual bool EmitPALMetadata(const AMDGPU::PALMD::Metadata &PALMetadata)=0
#define FP_ROUND_MODE_SP(x)
Definition: SIDefines.h:463
Diagnostic information for stack size etc.
#define S_00B84C_TGID_Y_EN(x)
Definition: SIDefines.h:401
Context object for machine code objects.
Definition: MCContext.h:59
auto reverse(ContainerTy &&C, typename std::enable_if< has_rbegin< ContainerTy >::value >::type *=nullptr) -> decltype(make_range(C.rbegin(), C.rend()))
Definition: STLExtras.h:186
#define S_00B848_FLOAT_MODE(x)
Definition: SIDefines.h:438
#define R_00B848_COMPUTE_PGM_RSRC1
Definition: SIDefines.h:428
Key
PAL metadata keys.
uint8_t kernarg_segment_alignment
The maximum byte alignment of variables used by the kernel in the specified memory segment...
void EmitFunctionBody()
This method emits the body and trailer for a function.
Definition: AsmPrinter.cpp:982
ArchType getArch() const
getArch - Get the parsed architecture type of this triple.
Definition: Triple.h:285
LLVM_READONLY MachineOperand * getNamedOperand(MachineInstr &MI, unsigned OperandName) const
Returns the operand named Op.
bool isGroupSegment(const GlobalValue *GV)
unsigned getMaxNumUserSGPRs() const
NamedMDNode * getNamedMetadata(const Twine &Name) const
Return the first NamedMDNode in the module with the specified name.
Definition: Module.cpp:242
unsigned getMinNumVGPRs(unsigned WavesPerEU) const
#define S_00B84C_TRAP_HANDLER(x)
Definition: SIDefines.h:395
bool isVerbose() const
Return true if assembly output should contain comments.
Definition: AsmPrinter.h:189
amdgpu Simplify well known AMD library false Value * Callee
bool debuggerEmitPrologue() const
uint64_t compute_pgm_resource_registers
Shader program settings for CS.
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
bool isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB) const override
Return true if the basic block has exactly one predecessor and the control transfer mechanism between...
bool hasFP32Denormals() const
#define S_00B848_IEEE_MODE(x)
Definition: SIDefines.h:450
Calling convention used for AMDPAL shader stage before geometry shader if geometry is in use...
Definition: CallingConv.h:221
bool isCompute(CallingConv::ID cc)
#define S_00B028_VGPRS(x)
Definition: SIDefines.h:385
static uint32_t getFPMode(const MachineFunction &F)
uint16_t wavefront_sgpr_count
Number of scalar registers used by a wavefront.
* if(!EatIfPresent(lltok::kw_thread_local)) return false
ParseOptionalThreadLocal := /*empty.
unsigned const MachineRegisterInfo * MRI
std::size_t countTrailingZeros(T Val, ZeroBehavior ZB=ZB_Width)
Count number of 0&#39;s from the least significant bit to the most stopping at the first 1...
Definition: MathExtras.h:112
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:69
unsigned getSGPREncodingGranule() const
unsigned getTotalNumSGPRs(const FeatureBitset &Features)
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
#define G_00B84C_TGID_X_EN(x)
Definition: SIDefines.h:399
#define S_00B84C_TIDIG_COMP_CNT(x)
Definition: SIDefines.h:410
#define FP_ROUND_MODE_DP(x)
Definition: SIDefines.h:464
const GlobalValue * getGlobal() const
virtual bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, unsigned AsmVariant, const char *ExtraCode, raw_ostream &OS)
Print the specified operand of MI, an INLINEASM instruction, using the specified assembler variant...
bool enableDX10Clamp() const
virtual bool isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB) const
Return true if the basic block has exactly one predecessor and the control transfer mechanism between...
bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, unsigned AsmVariant, const char *ExtraCode, raw_ostream &O) override
Print the specified operand of MI, an INLINEASM instruction, using the specified assembler variant...
TargetMachine & TM
Target machine description.
Definition: AsmPrinter.h:80
This class is intended to be used as a driving class for all asm writers.
Definition: AsmPrinter.h:77
void getNameWithPrefix(SmallVectorImpl< char > &Name, const GlobalValue *GV) const
Definition: AsmPrinter.cpp:428
void EmitEndOfAsmFile(Module &M) override
This virtual method can be overridden by targets that want to emit something at the end of their file...
unsigned getMinNumSGPRs(unsigned WavesPerEU) const
#define R_0288D4_SQ_PGM_RESOURCES_LS
Definition: R600Defines.h:167
#define S_00B84C_EXCP_EN_MSB(x)
Definition: SIDefines.h:414
Instruction set architecture version.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
const Triple & getTargetTriple() const
#define FP_DENORM_FLUSH_IN_FLUSH_OUT
Definition: SIDefines.h:466
bool debuggerSupported() const
#define S_00B84C_TGID_Z_EN(x)
Definition: SIDefines.h:404
The AMDGPU TargetMachine interface definition for hw codgen targets.
#define R_02880C_DB_SHADER_CONTROL
Definition: R600Defines.h:147
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function. ...
Definition: Function.cpp:194
#define R_00B328_SPI_SHADER_PGM_RSRC1_ES
Definition: SIDefines.h:381
#define S_00B84C_LDS_SIZE(x)
Definition: SIDefines.h:418
#define R_SPILLED_SGPRS
Definition: SIDefines.h:483
virtual void EmitDirectiveHSACodeObjectISA(uint32_t Major, uint32_t Minor, uint32_t Stepping, StringRef VendorName, StringRef ArchName)=0
void emitKernel(const Function &Func, const Kernel::CodeProps::Metadata &CodeProps, const Kernel::DebugProps::Metadata &DebugProps)
void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header, const FeatureBitset &Features)
std::string & str()
Flushes the stream contents to the target string and returns the string&#39;s reference.
Definition: raw_ostream.h:478
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
amd_element_byte_size_t
The values used to define the number of bytes to use for the swizzle element size.
Calling convention used for Mesa vertex shaders, or AMDPAL last shader stage before rasterization (ve...
Definition: CallingConv.h:189
#define FP_ROUND_ROUND_TO_NEAREST
Definition: SIDefines.h:456
bool doesNotRecurse() const
Determine if the function is known not to recurse, directly or indirectly.
Definition: Function.h:488
#define S_00B84C_EXCP_EN(x)
Definition: SIDefines.h:421
void LLVMInitializeAMDGPUAsmPrinter()
unsigned getAddressableNumSGPRs() const
unsigned getWavefrontSize() const
#define R_00B428_SPI_SHADER_PGM_RSRC1_HS
Definition: SIDefines.h:382
const R600RegisterInfo * getRegisterInfo() const override
AMDGPUTargetStreamer * getTargetStreamer() const
#define G_00B84C_TIDIG_COMP_CNT(x)
Definition: SIDefines.h:411
std::vector< std::string > HexLines
#define R_00B128_SPI_SHADER_PGM_RSRC1_VS
Definition: SIDefines.h:379
uint64_t kernarg_segment_byte_size
The size in bytes of the kernarg segment that holds the values of the arguments to the kernel...
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:194
MachineOperand class - Representation of each machine instruction operand.
unsigned getReservedNumVGPRs(const MachineFunction &MF) const
uint16_t debug_wavefront_private_segment_offset_sgpr
If is_debug_supported is 0 then must be 0.
const MCSubtargetInfo * getMCSubtargetInfo() const
virtual void EmitAMDKernelCodeT(const amd_kernel_code_t &Header)=0
#define S_STACK_SIZE(x)
Definition: R600Defines.h:152
uint16_t workitem_vgpr_count
Number of vector registers used by each work-item.
#define S_00B848_DEBUG_MODE(x)
Definition: SIDefines.h:447
#define R_028868_SQ_PGM_RESOURCES_VS
Definition: R600Defines.h:158
#define FP_DENORM_MODE_DP(x)
Definition: SIDefines.h:475
#define S_0286E8_WAVESIZE(x)
Definition: SIDefines.h:481
#define S_00B84C_TGID_X_EN(x)
Definition: SIDefines.h:398
bool runOnMachineFunction(MachineFunction &MF) override
Emit the specified function out to the OutStreamer.
bool isPhysRegUsed(unsigned PhysReg) const
Return true if the specified register is modified or read in this function.
uint32_t workgroup_group_segment_byte_size
The amount of group segment memory required by a work-group in bytes.
#define AMD_HSA_BITS_SET(dst, mask, val)
Calling convention used for Mesa/AMDPAL pixel shaders.
Definition: CallingConv.h:195
std::vector< std::string > DisasmLines
virtual void EmitDirectiveHSACodeObjectVersion(uint32_t Major, uint32_t Minor)=0
void EmitFunctionEntryLabel() override
EmitFunctionEntryLabel - Emit the label that is the entrypoint for the function.
static AsmPrinter * createAMDGPUAsmPrinterPass(TargetMachine &tm, std::unique_ptr< MCStreamer > &&Streamer)
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
static unsigned getNumExtraSGPRs(const SISubtarget &ST, bool VCCUsed, bool FlatScrUsed)
virtual bool EmitISAVersion(StringRef IsaVersionString)=0
Representation of each machine instruction.
Definition: MachineInstr.h:59
void EmitFunctionBodyStart() override
Targets can override this to emit stuff before the first basic block in the function.
Calling convention used for Mesa/AMDPAL hull shaders (= tessellation control shaders).
Definition: CallingConv.h:208
bool doFinalization(Module &M) override
Shut down the asmprinter.
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
Interface definition for SIInstrInfo.
void SetupMachineFunction(MachineFunction &MF)
This should be called when a new MachineFunction is being processed from runOnMachineFunction.
#define S_02880C_KILL_ENABLE(x)
Definition: R600Defines.h:148
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Calling convention used for Mesa/AMDPAL geometry shaders.
Definition: CallingConv.h:192
#define S_00B848_SGPRS(x)
Definition: SIDefines.h:432
uint32_t workitem_private_segment_byte_size
The amount of memory required for the combined private, spill and arg segments for a work-item in byt...
#define S_00B84C_USER_SGPR(x)
Definition: SIDefines.h:392
#define I(x, y, z)
Definition: MD5.cpp:58
AMDGPU Assembly printer class.
#define R_00B860_COMPUTE_TMPRING_SIZE
Definition: SIDefines.h:477
MCSubtargetInfo - Generic base class for all target subtargets.
This represents a section on linux, lots of unix variants and some bare metal systems.
Definition: MCSectionELF.h:28
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:323
In-memory representation of kernel code properties metadata.
#define R_SPILLED_VGPRS
Definition: SIDefines.h:484
void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
static bool hasAnyNonFlatUseOfReg(const MachineRegisterInfo &MRI, const SIInstrInfo &TII, unsigned Reg)
virtual void EmitFunctionEntryLabel()
EmitFunctionEntryLabel - Emit the label that is the entrypoint for the function.
Definition: AsmPrinter.cpp:729
bool isReg() const
isReg - Tests if this is a MO_Register operand.
bool isDeclaration() const
Return true if the primary definition of this global value is outside of the current translation unit...
Definition: Globals.cpp:201
const Function * getFunction() const
getFunction - Return the LLVM function that this machine code represents
static unsigned getRsrcReg(CallingConv::ID CallConv)
bool isAmdCodeObjectV2(const MachineFunction &MF) const
uint64_t mKernargSegmentSize
Size in bytes of the kernarg segment memory.
#define S_00B860_WAVESIZE(x)
Definition: SIDefines.h:478
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
#define R_00B84C_COMPUTE_PGM_RSRC2
Definition: SIDefines.h:388
bool isTrapHandlerEnabled() const
#define S_00B848_PRIORITY(x)
Definition: SIDefines.h:435
A raw_ostream that writes to an std::string.
Definition: raw_ostream.h:462
StringRef getPassName() const override
getPassName - Return a nice clean name for a pass.
void EmitStartOfAsmFile(Module &M) override
This virtual method can be overridden by targets that want to emit something at the start of their fi...
bool hasSGPRInitBug() const
constexpr char Size[]
Key for Kernel::Arg::Metadata::mSize.
#define R_0286E8_SPI_TMPRING_SIZE
Definition: SIDefines.h:480
#define LLVM_FALLTHROUGH
LLVM_FALLTHROUGH - Mark fallthrough cases in switch statements.
Definition: Compiler.h:235
void setAlignment(unsigned A)
setAlignment - Set the alignment (log2, not bytes) of the function.
unsigned getHWRegIndex(unsigned Reg) const
#define S_00B02C_EXTRA_LDS_SIZE(x)
Definition: SIDefines.h:378
This class implements an extremely fast bulk output stream that can only output to a stream...
Definition: raw_ostream.h:44
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:57
static void RegisterAsmPrinter(Target &T, Target::AsmPrinterCtorTy Fn)
RegisterAsmPrinter - Register an AsmPrinter implementation for the given target.
IRTranslator LLVM IR MI
#define R_00B528_SPI_SHADER_PGM_RSRC1_LS
Definition: SIDefines.h:383
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:49
AMDGPU metadata definitions and in-memory representations.
bool enableIEEEBit(const MachineFunction &MF) const
#define R_0286D0_SPI_PS_INPUT_ADDR
Definition: SIDefines.h:426
uint16_t debug_private_segment_buffer_sgpr
If is_debug_supported is 0 then must be 0.
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:295
uint16_t reserved_vgpr_count
The number of consecutive VGPRs reserved by the client.
bool isXNACKEnabled() const
IsaVersion getIsaVersion(const FeatureBitset &Features)
#define R_0288E8_SQ_LDS_ALLOC
Definition: R600Defines.h:169
std::vector< uint32_t > Metadata
PAL metadata represented as a vector.
void streamIsaVersion(const MCSubtargetInfo *STI, raw_ostream &Stream)
Streams isa version string for given subtarget STI into Stream.
#define R_028844_SQ_PGM_RESOURCES_PS
Definition: R600Defines.h:164
std::vector< uint32_t > mDebuggerABIVersion
Debugger ABI version. Optional.
uint16_t reserved_vgpr_first
If reserved_vgpr_count is 0 then must be 0.