LLVM  10.0.0svn
AMDGPUAsmPrinter.cpp
Go to the documentation of this file.
1 //===-- AMDGPUAsmPrinter.cpp - AMDGPU assembly printer -------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 ///
11 /// The AMDGPUAsmPrinter is used to print both assembly string and also binary
12 /// code. When passed an MCAsmStreamer it prints assembly and when passed
13 /// an MCObjectStreamer it outputs binary code.
14 //
15 //===----------------------------------------------------------------------===//
16 //
17 
18 #include "AMDGPUAsmPrinter.h"
19 #include "AMDGPU.h"
20 #include "AMDGPUSubtarget.h"
21 #include "AMDGPUTargetMachine.h"
25 #include "R600AsmPrinter.h"
26 #include "R600Defines.h"
28 #include "R600RegisterInfo.h"
29 #include "SIDefines.h"
30 #include "SIInstrInfo.h"
31 #include "SIMachineFunctionInfo.h"
32 #include "SIRegisterInfo.h"
34 #include "Utils/AMDGPUBaseInfo.h"
35 #include "llvm/BinaryFormat/ELF.h"
37 #include "llvm/IR/DiagnosticInfo.h"
38 #include "llvm/MC/MCAssembler.h"
39 #include "llvm/MC/MCContext.h"
40 #include "llvm/MC/MCSectionELF.h"
41 #include "llvm/MC/MCStreamer.h"
47 
48 using namespace llvm;
49 using namespace llvm::AMDGPU;
50 using namespace llvm::AMDGPU::HSAMD;
51 
52 // TODO: This should get the default rounding mode from the kernel. We just set
53 // the default here, but this could change if the OpenCL rounding mode pragmas
54 // are used.
55 //
56 // The denormal mode here should match what is reported by the OpenCL runtime
57 // for the CL_FP_DENORM bit from CL_DEVICE_{HALF|SINGLE|DOUBLE}_FP_CONFIG, but
58 // can also be override to flush with the -cl-denorms-are-zero compiler flag.
59 //
60 // AMD OpenCL only sets flush none and reports CL_FP_DENORM for double
61 // precision, and leaves single precision to flush all and does not report
62 // CL_FP_DENORM for CL_DEVICE_SINGLE_FP_CONFIG. Mesa's OpenCL currently reports
63 // CL_FP_DENORM for both.
64 //
65 // FIXME: It seems some instructions do not support single precision denormals
66 // regardless of the mode (exp_*_f32, rcp_*_f32, rsq_*_f32, rsq_*f32, sqrt_f32,
67 // and sin_f32, cos_f32 on most parts).
68 
69 // We want to use these instructions, and using fp32 denormals also causes
70 // instructions to run at the double precision rate for the device so it's
71 // probably best to just report no single precision denormals.
74  // TODO: Is there any real use for the flush in only / flush out only modes?
75 
76  uint32_t FP32Denormals =
78 
79  uint32_t FP64Denormals =
81 
84  FP_DENORM_MODE_SP(FP32Denormals) |
85  FP_DENORM_MODE_DP(FP64Denormals);
86 }
87 
88 static AsmPrinter *
90  std::unique_ptr<MCStreamer> &&Streamer) {
91  return new AMDGPUAsmPrinter(tm, std::move(Streamer));
92 }
93 
94 extern "C" void LLVMInitializeAMDGPUAsmPrinter() {
99 }
100 
102  std::unique_ptr<MCStreamer> Streamer)
103  : AsmPrinter(TM, std::move(Streamer)) {
105  HSAMetadataStream.reset(new MetadataStreamerV3());
106  else
107  HSAMetadataStream.reset(new MetadataStreamerV2());
108 }
109 
111  return "AMDGPU Assembly Printer";
112 }
113 
115  return TM.getMCSubtargetInfo();
116 }
117 
119  if (!OutStreamer)
120  return nullptr;
121  return static_cast<AMDGPUTargetStreamer*>(OutStreamer->getTargetStreamer());
122 }
123 
126  std::string ExpectedTarget;
127  raw_string_ostream ExpectedTargetOS(ExpectedTarget);
128  IsaInfo::streamIsaVersion(getGlobalSTI(), ExpectedTargetOS);
129 
130  getTargetStreamer()->EmitDirectiveAMDGCNTarget(ExpectedTarget);
131  }
132 
133  if (TM.getTargetTriple().getOS() != Triple::AMDHSA &&
135  return;
136 
138  HSAMetadataStream->begin(M);
139 
142 
144  return;
145 
146  // HSA emits NT_AMDGPU_HSA_CODE_OBJECT_VERSION for code objects v2.
149 
150  // HSA and PAL emit NT_AMDGPU_HSA_ISA for code objects v2.
153  Version.Major, Version.Minor, Version.Stepping, "AMD", "AMDGPU");
154 }
155 
157  // Following code requires TargetStreamer to be present.
158  if (!getTargetStreamer())
159  return;
160 
162  // Emit ISA Version (NT_AMD_AMDGPU_ISA).
163  std::string ISAVersionString;
164  raw_string_ostream ISAVersionStream(ISAVersionString);
165  IsaInfo::streamIsaVersion(getGlobalSTI(), ISAVersionStream);
166  getTargetStreamer()->EmitISAVersion(ISAVersionStream.str());
167  }
168 
169  // Emit HSA Metadata (NT_AMD_AMDGPU_HSA_METADATA).
170  if (TM.getTargetTriple().getOS() == Triple::AMDHSA) {
171  HSAMetadataStream->end();
172  bool Success = HSAMetadataStream->emitTo(*getTargetStreamer());
173  (void)Success;
174  assert(Success && "Malformed HSA Metadata");
175  }
176 }
177 
179  const MachineBasicBlock *MBB) const {
181  return false;
182 
183  if (MBB->empty())
184  return true;
185 
186  // If this is a block implementing a long branch, an expression relative to
187  // the start of the block is needed. to the start of the block.
188  // XXX - Is there a smarter way to check this?
189  return (MBB->back().getOpcode() != AMDGPU::S_SETPC_B64);
190 }
191 
194  if (!MFI.isEntryFunction())
195  return;
196 
197  const GCNSubtarget &STM = MF->getSubtarget<GCNSubtarget>();
198  const Function &F = MF->getFunction();
199  if (!STM.hasCodeObjectV3() && STM.isAmdHsaOrMesa(F) &&
200  (F.getCallingConv() == CallingConv::AMDGPU_KERNEL ||
201  F.getCallingConv() == CallingConv::SPIR_KERNEL)) {
202  amd_kernel_code_t KernelCode;
203  getAmdKernelCode(KernelCode, CurrentProgramInfo, *MF);
204  getTargetStreamer()->EmitAMDKernelCodeT(KernelCode);
205  }
206 
207  if (STM.isAmdHsaOS())
208  HSAMetadataStream->emitKernel(*MF, CurrentProgramInfo);
209 }
210 
213  if (!MFI.isEntryFunction())
214  return;
215 
218  return;
219 
220  auto &Streamer = getTargetStreamer()->getStreamer();
221  auto &Context = Streamer.getContext();
222  auto &ObjectFileInfo = *Context.getObjectFileInfo();
223  auto &ReadOnlySection = *ObjectFileInfo.getReadOnlySection();
224 
225  Streamer.PushSection();
226  Streamer.SwitchSection(&ReadOnlySection);
227 
228  // CP microcode requires the kernel descriptor to be allocated on 64 byte
229  // alignment.
230  Streamer.EmitValueToAlignment(64, 0, 1, 0);
231  if (ReadOnlySection.getAlignment() < 64)
232  ReadOnlySection.setAlignment(64);
233 
234  const MCSubtargetInfo &STI = MF->getSubtarget();
235 
236  SmallString<128> KernelName;
237  getNameWithPrefix(KernelName, &MF->getFunction());
239  STI, KernelName, getAmdhsaKernelDescriptor(*MF, CurrentProgramInfo),
240  CurrentProgramInfo.NumVGPRsForWavesPerEU,
241  CurrentProgramInfo.NumSGPRsForWavesPerEU -
243  CurrentProgramInfo.VCCUsed,
244  CurrentProgramInfo.FlatUsed),
245  CurrentProgramInfo.VCCUsed, CurrentProgramInfo.FlatUsed,
246  hasXNACK(STI));
247 
248  Streamer.PopSection();
249 }
250 
255  return;
256  }
257 
259  const GCNSubtarget &STM = MF->getSubtarget<GCNSubtarget>();
260  if (MFI->isEntryFunction() && STM.isAmdHsaOrMesa(MF->getFunction())) {
262  getNameWithPrefix(SymbolName, &MF->getFunction()),
264  SymbolName, ELF::STT_AMDGPU_HSA_KERNEL);
265  }
266  if (DumpCodeInstEmitter) {
267  // Disassemble function name label to text.
268  DisasmLines.push_back(MF->getName().str() + ":");
270  HexLines.push_back("");
271  }
272 
274 }
275 
277  if (DumpCodeInstEmitter && !isBlockOnlyReachableByFallthrough(&MBB)) {
278  // Write a line for the basic block label if it is not only fallthrough.
279  DisasmLines.push_back(
280  (Twine("BB") + Twine(getFunctionNumber())
281  + "_" + Twine(MBB.getNumber()) + ":").str());
283  HexLines.push_back("");
284  }
286 }
287 
290  if (GV->hasInitializer() && !isa<UndefValue>(GV->getInitializer())) {
292  Twine(GV->getName()) +
293  ": unsupported initializer for address space");
294  return;
295  }
296 
297  // LDS variables aren't emitted in HSA or PAL yet.
298  const Triple::OSType OS = TM.getTargetTriple().getOS();
299  if (OS == Triple::AMDHSA || OS == Triple::AMDPAL)
300  return;
301 
302  MCSymbol *GVSym = getSymbol(GV);
303 
304  GVSym->redefineIfPossible();
305  if (GVSym->isDefined() || GVSym->isVariable())
306  report_fatal_error("symbol '" + Twine(GVSym->getName()) +
307  "' is already defined");
308 
309  const DataLayout &DL = GV->getParent()->getDataLayout();
310  uint64_t Size = DL.getTypeAllocSize(GV->getValueType());
311  unsigned Align = GV->getAlignment();
312  if (!Align)
313  Align = 4;
314 
315  EmitVisibility(GVSym, GV->getVisibility(), !GV->isDeclaration());
316  EmitLinkage(GV, GVSym);
317  if (auto TS = getTargetStreamer())
318  TS->emitAMDGPULDS(GVSym, Size, Align);
319  return;
320  }
321 
323 }
324 
326  CallGraphResourceInfo.clear();
327 
328  // Pad with s_code_end to help tools and guard against instruction prefetch
329  // causing stale data in caches. Arguably this should be done by the linker,
330  // which is why this isn't done for Mesa.
331  const MCSubtargetInfo &STI = *getGlobalSTI();
332  if (AMDGPU::isGFX10(STI) &&
333  (STI.getTargetTriple().getOS() == Triple::AMDHSA ||
334  STI.getTargetTriple().getOS() == Triple::AMDPAL)) {
335  OutStreamer->SwitchSection(getObjFileLowering().getTextSection());
337  }
338 
339  return AsmPrinter::doFinalization(M);
340 }
341 
342 // Print comments that apply to both callable functions and entry points.
343 void AMDGPUAsmPrinter::emitCommonFunctionComments(
344  uint32_t NumVGPR,
345  uint32_t NumSGPR,
346  uint64_t ScratchSize,
347  uint64_t CodeSize,
348  const AMDGPUMachineFunction *MFI) {
349  OutStreamer->emitRawComment(" codeLenInByte = " + Twine(CodeSize), false);
350  OutStreamer->emitRawComment(" NumSgprs: " + Twine(NumSGPR), false);
351  OutStreamer->emitRawComment(" NumVgprs: " + Twine(NumVGPR), false);
352  OutStreamer->emitRawComment(" ScratchSize: " + Twine(ScratchSize), false);
353  OutStreamer->emitRawComment(" MemoryBound: " + Twine(MFI->isMemoryBound()),
354  false);
355 }
356 
357 uint16_t AMDGPUAsmPrinter::getAmdhsaKernelCodeProperties(
358  const MachineFunction &MF) const {
360  uint16_t KernelCodeProperties = 0;
361 
362  if (MFI.hasPrivateSegmentBuffer()) {
363  KernelCodeProperties |=
364  amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER;
365  }
366  if (MFI.hasDispatchPtr()) {
367  KernelCodeProperties |=
368  amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR;
369  }
370  if (MFI.hasQueuePtr()) {
371  KernelCodeProperties |=
372  amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR;
373  }
374  if (MFI.hasKernargSegmentPtr()) {
375  KernelCodeProperties |=
376  amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR;
377  }
378  if (MFI.hasDispatchID()) {
379  KernelCodeProperties |=
380  amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID;
381  }
382  if (MFI.hasFlatScratchInit()) {
383  KernelCodeProperties |=
384  amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT;
385  }
386  if (MF.getSubtarget<GCNSubtarget>().isWave32()) {
387  KernelCodeProperties |=
388  amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32;
389  }
390 
391  return KernelCodeProperties;
392 }
393 
394 amdhsa::kernel_descriptor_t AMDGPUAsmPrinter::getAmdhsaKernelDescriptor(
395  const MachineFunction &MF,
396  const SIProgramInfo &PI) const {
397  amdhsa::kernel_descriptor_t KernelDescriptor;
398  memset(&KernelDescriptor, 0x0, sizeof(KernelDescriptor));
399 
403 
404  KernelDescriptor.group_segment_fixed_size = PI.LDSSize;
405  KernelDescriptor.private_segment_fixed_size = PI.ScratchSize;
406  KernelDescriptor.compute_pgm_rsrc1 = PI.ComputePGMRSrc1;
407  KernelDescriptor.compute_pgm_rsrc2 = PI.ComputePGMRSrc2;
408  KernelDescriptor.kernel_code_properties = getAmdhsaKernelCodeProperties(MF);
409 
410  return KernelDescriptor;
411 }
412 
414  CurrentProgramInfo = SIProgramInfo();
415 
417 
418  // The starting address of all shader programs must be 256 bytes aligned.
419  // Regular functions just need the basic required instruction alignment.
420  MF.setAlignment(MFI->isEntryFunction() ? 8 : 2);
421 
423 
424  const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
426  // FIXME: This should be an explicit check for Mesa.
427  if (!STM.isAmdHsaOS() && !STM.isAmdPalOS()) {
428  MCSectionELF *ConfigSection =
429  Context.getELFSection(".AMDGPU.config", ELF::SHT_PROGBITS, 0);
430  OutStreamer->SwitchSection(ConfigSection);
431  }
432 
433  if (MFI->isEntryFunction()) {
434  getSIProgramInfo(CurrentProgramInfo, MF);
435  } else {
436  auto I = CallGraphResourceInfo.insert(
437  std::make_pair(&MF.getFunction(), SIFunctionResourceInfo()));
438  SIFunctionResourceInfo &Info = I.first->second;
439  assert(I.second && "should only be called once per function");
440  Info = analyzeResourceUsage(MF);
441  }
442 
443  if (STM.isAmdPalOS())
444  EmitPALMetadata(MF, CurrentProgramInfo);
445  else if (!STM.isAmdHsaOS()) {
446  EmitProgramInfoSI(MF, CurrentProgramInfo);
447  }
448 
449  DumpCodeInstEmitter = nullptr;
450  if (STM.dumpCode()) {
451  // For -dumpcode, get the assembler out of the streamer, even if it does
452  // not really want to let us have it. This only works with -filetype=obj.
453  bool SaveFlag = OutStreamer->getUseAssemblerInfoForParsing();
454  OutStreamer->setUseAssemblerInfoForParsing(true);
455  MCAssembler *Assembler = OutStreamer->getAssemblerPtr();
456  OutStreamer->setUseAssemblerInfoForParsing(SaveFlag);
457  if (Assembler)
458  DumpCodeInstEmitter = Assembler->getEmitterPtr();
459  }
460 
461  DisasmLines.clear();
462  HexLines.clear();
463  DisasmLineMaxLen = 0;
464 
466 
467  if (isVerbose()) {
468  MCSectionELF *CommentSection =
469  Context.getELFSection(".AMDGPU.csdata", ELF::SHT_PROGBITS, 0);
470  OutStreamer->SwitchSection(CommentSection);
471 
472  if (!MFI->isEntryFunction()) {
473  OutStreamer->emitRawComment(" Function info:", false);
474  SIFunctionResourceInfo &Info = CallGraphResourceInfo[&MF.getFunction()];
475  emitCommonFunctionComments(
476  Info.NumVGPR,
477  Info.getTotalNumSGPRs(MF.getSubtarget<GCNSubtarget>()),
478  Info.PrivateSegmentSize,
479  getFunctionCodeSize(MF), MFI);
480  return false;
481  }
482 
483  OutStreamer->emitRawComment(" Kernel info:", false);
484  emitCommonFunctionComments(CurrentProgramInfo.NumVGPR,
485  CurrentProgramInfo.NumSGPR,
486  CurrentProgramInfo.ScratchSize,
487  getFunctionCodeSize(MF), MFI);
488 
489  OutStreamer->emitRawComment(
490  " FloatMode: " + Twine(CurrentProgramInfo.FloatMode), false);
491  OutStreamer->emitRawComment(
492  " IeeeMode: " + Twine(CurrentProgramInfo.IEEEMode), false);
493  OutStreamer->emitRawComment(
494  " LDSByteSize: " + Twine(CurrentProgramInfo.LDSSize) +
495  " bytes/workgroup (compile time only)", false);
496 
497  OutStreamer->emitRawComment(
498  " SGPRBlocks: " + Twine(CurrentProgramInfo.SGPRBlocks), false);
499  OutStreamer->emitRawComment(
500  " VGPRBlocks: " + Twine(CurrentProgramInfo.VGPRBlocks), false);
501 
502  OutStreamer->emitRawComment(
503  " NumSGPRsForWavesPerEU: " +
504  Twine(CurrentProgramInfo.NumSGPRsForWavesPerEU), false);
505  OutStreamer->emitRawComment(
506  " NumVGPRsForWavesPerEU: " +
507  Twine(CurrentProgramInfo.NumVGPRsForWavesPerEU), false);
508 
509  OutStreamer->emitRawComment(
510  " Occupancy: " +
511  Twine(CurrentProgramInfo.Occupancy), false);
512 
513  OutStreamer->emitRawComment(
514  " WaveLimiterHint : " + Twine(MFI->needsWaveLimiter()), false);
515 
516  OutStreamer->emitRawComment(
517  " COMPUTE_PGM_RSRC2:USER_SGPR: " +
518  Twine(G_00B84C_USER_SGPR(CurrentProgramInfo.ComputePGMRSrc2)), false);
519  OutStreamer->emitRawComment(
520  " COMPUTE_PGM_RSRC2:TRAP_HANDLER: " +
521  Twine(G_00B84C_TRAP_HANDLER(CurrentProgramInfo.ComputePGMRSrc2)), false);
522  OutStreamer->emitRawComment(
523  " COMPUTE_PGM_RSRC2:TGID_X_EN: " +
524  Twine(G_00B84C_TGID_X_EN(CurrentProgramInfo.ComputePGMRSrc2)), false);
525  OutStreamer->emitRawComment(
526  " COMPUTE_PGM_RSRC2:TGID_Y_EN: " +
527  Twine(G_00B84C_TGID_Y_EN(CurrentProgramInfo.ComputePGMRSrc2)), false);
528  OutStreamer->emitRawComment(
529  " COMPUTE_PGM_RSRC2:TGID_Z_EN: " +
530  Twine(G_00B84C_TGID_Z_EN(CurrentProgramInfo.ComputePGMRSrc2)), false);
531  OutStreamer->emitRawComment(
532  " COMPUTE_PGM_RSRC2:TIDIG_COMP_CNT: " +
533  Twine(G_00B84C_TIDIG_COMP_CNT(CurrentProgramInfo.ComputePGMRSrc2)),
534  false);
535  }
536 
537  if (DumpCodeInstEmitter) {
538 
539  OutStreamer->SwitchSection(
540  Context.getELFSection(".AMDGPU.disasm", ELF::SHT_NOTE, 0));
541 
542  for (size_t i = 0; i < DisasmLines.size(); ++i) {
543  std::string Comment = "\n";
544  if (!HexLines[i].empty()) {
545  Comment = std::string(DisasmLineMaxLen - DisasmLines[i].size(), ' ');
546  Comment += " ; " + HexLines[i] + "\n";
547  }
548 
549  OutStreamer->EmitBytes(StringRef(DisasmLines[i]));
550  OutStreamer->EmitBytes(StringRef(Comment));
551  }
552  }
553 
554  return false;
555 }
556 
557 uint64_t AMDGPUAsmPrinter::getFunctionCodeSize(const MachineFunction &MF) const {
558  const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
559  const SIInstrInfo *TII = STM.getInstrInfo();
560 
561  uint64_t CodeSize = 0;
562 
563  for (const MachineBasicBlock &MBB : MF) {
564  for (const MachineInstr &MI : MBB) {
565  // TODO: CodeSize should account for multiple functions.
566 
567  // TODO: Should we count size of debug info?
568  if (MI.isDebugInstr())
569  continue;
570 
571  CodeSize += TII->getInstSizeInBytes(MI);
572  }
573  }
574 
575  return CodeSize;
576 }
577 
579  const SIInstrInfo &TII,
580  unsigned Reg) {
581  for (const MachineOperand &UseOp : MRI.reg_operands(Reg)) {
582  if (!UseOp.isImplicit() || !TII.isFLAT(*UseOp.getParent()))
583  return true;
584  }
585 
586  return false;
587 }
588 
590  const GCNSubtarget &ST) const {
591  return NumExplicitSGPR + IsaInfo::getNumExtraSGPRs(&ST,
592  UsesVCC, UsesFlatScratch);
593 }
594 
595 AMDGPUAsmPrinter::SIFunctionResourceInfo AMDGPUAsmPrinter::analyzeResourceUsage(
596  const MachineFunction &MF) const {
597  SIFunctionResourceInfo Info;
598 
600  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
601  const MachineFrameInfo &FrameInfo = MF.getFrameInfo();
602  const MachineRegisterInfo &MRI = MF.getRegInfo();
603  const SIInstrInfo *TII = ST.getInstrInfo();
604  const SIRegisterInfo &TRI = TII->getRegisterInfo();
605 
606  Info.UsesFlatScratch = MRI.isPhysRegUsed(AMDGPU::FLAT_SCR_LO) ||
607  MRI.isPhysRegUsed(AMDGPU::FLAT_SCR_HI);
608 
609  // Even if FLAT_SCRATCH is implicitly used, it has no effect if flat
610  // instructions aren't used to access the scratch buffer. Inline assembly may
611  // need it though.
612  //
613  // If we only have implicit uses of flat_scr on flat instructions, it is not
614  // really needed.
615  if (Info.UsesFlatScratch && !MFI->hasFlatScratchInit() &&
616  (!hasAnyNonFlatUseOfReg(MRI, *TII, AMDGPU::FLAT_SCR) &&
617  !hasAnyNonFlatUseOfReg(MRI, *TII, AMDGPU::FLAT_SCR_LO) &&
618  !hasAnyNonFlatUseOfReg(MRI, *TII, AMDGPU::FLAT_SCR_HI))) {
619  Info.UsesFlatScratch = false;
620  }
621 
622  Info.HasDynamicallySizedStack = FrameInfo.hasVarSizedObjects();
623  Info.PrivateSegmentSize = FrameInfo.getStackSize();
624  if (MFI->isStackRealigned())
625  Info.PrivateSegmentSize += FrameInfo.getMaxAlignment();
626 
627 
628  Info.UsesVCC = MRI.isPhysRegUsed(AMDGPU::VCC_LO) ||
629  MRI.isPhysRegUsed(AMDGPU::VCC_HI);
630 
631  // If there are no calls, MachineRegisterInfo can tell us the used register
632  // count easily.
633  // A tail call isn't considered a call for MachineFrameInfo's purposes.
634  if (!FrameInfo.hasCalls() && !FrameInfo.hasTailCall()) {
635  MCPhysReg HighestVGPRReg = AMDGPU::NoRegister;
636  for (MCPhysReg Reg : reverse(AMDGPU::VGPR_32RegClass.getRegisters())) {
637  if (MRI.isPhysRegUsed(Reg)) {
638  HighestVGPRReg = Reg;
639  break;
640  }
641  MCPhysReg AReg = AMDGPU::AGPR0 + TRI.getHWRegIndex(Reg);
642  if (MRI.isPhysRegUsed(AReg)) {
643  HighestVGPRReg = AReg;
644  break;
645  }
646  }
647 
648  MCPhysReg HighestSGPRReg = AMDGPU::NoRegister;
649  for (MCPhysReg Reg : reverse(AMDGPU::SGPR_32RegClass.getRegisters())) {
650  if (MRI.isPhysRegUsed(Reg)) {
651  HighestSGPRReg = Reg;
652  break;
653  }
654  }
655 
656  // We found the maximum register index. They start at 0, so add one to get the
657  // number of registers.
658  Info.NumVGPR = HighestVGPRReg == AMDGPU::NoRegister ? 0 :
659  TRI.getHWRegIndex(HighestVGPRReg) + 1;
660  Info.NumExplicitSGPR = HighestSGPRReg == AMDGPU::NoRegister ? 0 :
661  TRI.getHWRegIndex(HighestSGPRReg) + 1;
662 
663  return Info;
664  }
665 
666  int32_t MaxVGPR = -1;
667  int32_t MaxSGPR = -1;
668  uint64_t CalleeFrameSize = 0;
669 
670  for (const MachineBasicBlock &MBB : MF) {
671  for (const MachineInstr &MI : MBB) {
672  // TODO: Check regmasks? Do they occur anywhere except calls?
673  for (const MachineOperand &MO : MI.operands()) {
674  unsigned Width = 0;
675  bool IsSGPR = false;
676 
677  if (!MO.isReg())
678  continue;
679 
680  Register Reg = MO.getReg();
681  switch (Reg) {
682  case AMDGPU::EXEC:
683  case AMDGPU::EXEC_LO:
684  case AMDGPU::EXEC_HI:
685  case AMDGPU::SCC:
686  case AMDGPU::M0:
687  case AMDGPU::SRC_SHARED_BASE:
688  case AMDGPU::SRC_SHARED_LIMIT:
689  case AMDGPU::SRC_PRIVATE_BASE:
690  case AMDGPU::SRC_PRIVATE_LIMIT:
691  case AMDGPU::SGPR_NULL:
692  continue;
693 
694  case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
695  llvm_unreachable("src_pops_exiting_wave_id should not be used");
696 
697  case AMDGPU::NoRegister:
698  assert(MI.isDebugInstr());
699  continue;
700 
701  case AMDGPU::VCC:
702  case AMDGPU::VCC_LO:
703  case AMDGPU::VCC_HI:
704  Info.UsesVCC = true;
705  continue;
706 
707  case AMDGPU::FLAT_SCR:
708  case AMDGPU::FLAT_SCR_LO:
709  case AMDGPU::FLAT_SCR_HI:
710  continue;
711 
712  case AMDGPU::XNACK_MASK:
713  case AMDGPU::XNACK_MASK_LO:
714  case AMDGPU::XNACK_MASK_HI:
715  llvm_unreachable("xnack_mask registers should not be used");
716 
717  case AMDGPU::LDS_DIRECT:
718  llvm_unreachable("lds_direct register should not be used");
719 
720  case AMDGPU::TBA:
721  case AMDGPU::TBA_LO:
722  case AMDGPU::TBA_HI:
723  case AMDGPU::TMA:
724  case AMDGPU::TMA_LO:
725  case AMDGPU::TMA_HI:
726  llvm_unreachable("trap handler registers should not be used");
727 
728  case AMDGPU::SRC_VCCZ:
729  llvm_unreachable("src_vccz register should not be used");
730 
731  case AMDGPU::SRC_EXECZ:
732  llvm_unreachable("src_execz register should not be used");
733 
734  case AMDGPU::SRC_SCC:
735  llvm_unreachable("src_scc register should not be used");
736 
737  default:
738  break;
739  }
740 
741  if (AMDGPU::SReg_32RegClass.contains(Reg)) {
742  assert(!AMDGPU::TTMP_32RegClass.contains(Reg) &&
743  "trap handler registers should not be used");
744  IsSGPR = true;
745  Width = 1;
746  } else if (AMDGPU::VGPR_32RegClass.contains(Reg)) {
747  IsSGPR = false;
748  Width = 1;
749  } else if (AMDGPU::AGPR_32RegClass.contains(Reg)) {
750  IsSGPR = false;
751  Width = 1;
752  } else if (AMDGPU::SReg_64RegClass.contains(Reg)) {
753  assert(!AMDGPU::TTMP_64RegClass.contains(Reg) &&
754  "trap handler registers should not be used");
755  IsSGPR = true;
756  Width = 2;
757  } else if (AMDGPU::VReg_64RegClass.contains(Reg)) {
758  IsSGPR = false;
759  Width = 2;
760  } else if (AMDGPU::AReg_64RegClass.contains(Reg)) {
761  IsSGPR = false;
762  Width = 2;
763  } else if (AMDGPU::VReg_96RegClass.contains(Reg)) {
764  IsSGPR = false;
765  Width = 3;
766  } else if (AMDGPU::SReg_96RegClass.contains(Reg)) {
767  Width = 3;
768  } else if (AMDGPU::SReg_128RegClass.contains(Reg)) {
769  assert(!AMDGPU::TTMP_128RegClass.contains(Reg) &&
770  "trap handler registers should not be used");
771  IsSGPR = true;
772  Width = 4;
773  } else if (AMDGPU::VReg_128RegClass.contains(Reg)) {
774  IsSGPR = false;
775  Width = 4;
776  } else if (AMDGPU::AReg_128RegClass.contains(Reg)) {
777  IsSGPR = false;
778  Width = 4;
779  } else if (AMDGPU::SReg_256RegClass.contains(Reg)) {
780  assert(!AMDGPU::TTMP_256RegClass.contains(Reg) &&
781  "trap handler registers should not be used");
782  IsSGPR = true;
783  Width = 8;
784  } else if (AMDGPU::VReg_256RegClass.contains(Reg)) {
785  IsSGPR = false;
786  Width = 8;
787  } else if (AMDGPU::SReg_512RegClass.contains(Reg)) {
788  assert(!AMDGPU::TTMP_512RegClass.contains(Reg) &&
789  "trap handler registers should not be used");
790  IsSGPR = true;
791  Width = 16;
792  } else if (AMDGPU::VReg_512RegClass.contains(Reg)) {
793  IsSGPR = false;
794  Width = 16;
795  } else if (AMDGPU::AReg_512RegClass.contains(Reg)) {
796  IsSGPR = false;
797  Width = 16;
798  } else if (AMDGPU::SReg_1024RegClass.contains(Reg)) {
799  IsSGPR = true;
800  Width = 32;
801  } else if (AMDGPU::VReg_1024RegClass.contains(Reg)) {
802  IsSGPR = false;
803  Width = 32;
804  } else if (AMDGPU::AReg_1024RegClass.contains(Reg)) {
805  IsSGPR = false;
806  Width = 32;
807  } else {
808  llvm_unreachable("Unknown register class");
809  }
810  unsigned HWReg = TRI.getHWRegIndex(Reg);
811  int MaxUsed = HWReg + Width - 1;
812  if (IsSGPR) {
813  MaxSGPR = MaxUsed > MaxSGPR ? MaxUsed : MaxSGPR;
814  } else {
815  MaxVGPR = MaxUsed > MaxVGPR ? MaxUsed : MaxVGPR;
816  }
817  }
818 
819  if (MI.isCall()) {
820  // Pseudo used just to encode the underlying global. Is there a better
821  // way to track this?
822 
823  const MachineOperand *CalleeOp
824  = TII->getNamedOperand(MI, AMDGPU::OpName::callee);
825  const Function *Callee = cast<Function>(CalleeOp->getGlobal());
826  if (Callee->isDeclaration()) {
827  // If this is a call to an external function, we can't do much. Make
828  // conservative guesses.
829 
830  // 48 SGPRs - vcc, - flat_scr, -xnack
831  int MaxSGPRGuess =
832  47 - IsaInfo::getNumExtraSGPRs(&ST, true, ST.hasFlatAddressSpace());
833  MaxSGPR = std::max(MaxSGPR, MaxSGPRGuess);
834  MaxVGPR = std::max(MaxVGPR, 23);
835 
836  CalleeFrameSize = std::max(CalleeFrameSize, UINT64_C(16384));
837  Info.UsesVCC = true;
838  Info.UsesFlatScratch = ST.hasFlatAddressSpace();
839  Info.HasDynamicallySizedStack = true;
840  } else {
841  // We force CodeGen to run in SCC order, so the callee's register
842  // usage etc. should be the cumulative usage of all callees.
843 
844  auto I = CallGraphResourceInfo.find(Callee);
845  if (I == CallGraphResourceInfo.end()) {
846  // Avoid crashing on undefined behavior with an illegal call to a
847  // kernel. If a callsite's calling convention doesn't match the
848  // function's, it's undefined behavior. If the callsite calling
849  // convention does match, that would have errored earlier.
850  // FIXME: The verifier shouldn't allow this.
852  report_fatal_error("invalid call to entry function");
853 
854  llvm_unreachable("callee should have been handled before caller");
855  }
856 
857  MaxSGPR = std::max(I->second.NumExplicitSGPR - 1, MaxSGPR);
858  MaxVGPR = std::max(I->second.NumVGPR - 1, MaxVGPR);
859  CalleeFrameSize
860  = std::max(I->second.PrivateSegmentSize, CalleeFrameSize);
861  Info.UsesVCC |= I->second.UsesVCC;
862  Info.UsesFlatScratch |= I->second.UsesFlatScratch;
863  Info.HasDynamicallySizedStack |= I->second.HasDynamicallySizedStack;
864  Info.HasRecursion |= I->second.HasRecursion;
865  }
866 
867  if (!Callee->doesNotRecurse())
868  Info.HasRecursion = true;
869  }
870  }
871  }
872 
873  Info.NumExplicitSGPR = MaxSGPR + 1;
874  Info.NumVGPR = MaxVGPR + 1;
875  Info.PrivateSegmentSize += CalleeFrameSize;
876 
877  return Info;
878 }
879 
880 void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
881  const MachineFunction &MF) {
882  SIFunctionResourceInfo Info = analyzeResourceUsage(MF);
883 
884  ProgInfo.NumVGPR = Info.NumVGPR;
885  ProgInfo.NumSGPR = Info.NumExplicitSGPR;
886  ProgInfo.ScratchSize = Info.PrivateSegmentSize;
887  ProgInfo.VCCUsed = Info.UsesVCC;
888  ProgInfo.FlatUsed = Info.UsesFlatScratch;
889  ProgInfo.DynamicCallStack = Info.HasDynamicallySizedStack || Info.HasRecursion;
890 
891  if (!isUInt<32>(ProgInfo.ScratchSize)) {
892  DiagnosticInfoStackSize DiagStackSize(MF.getFunction(),
893  ProgInfo.ScratchSize, DS_Error);
894  MF.getFunction().getContext().diagnose(DiagStackSize);
895  }
896 
897  const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
899 
900  // TODO(scott.linder): The calculations related to SGPR/VGPR blocks are
901  // duplicated in part in AMDGPUAsmParser::calculateGPRBlocks, and could be
902  // unified.
903  unsigned ExtraSGPRs = IsaInfo::getNumExtraSGPRs(
904  &STM, ProgInfo.VCCUsed, ProgInfo.FlatUsed);
905 
906  // Check the addressable register limit before we add ExtraSGPRs.
908  !STM.hasSGPRInitBug()) {
909  unsigned MaxAddressableNumSGPRs = STM.getAddressableNumSGPRs();
910  if (ProgInfo.NumSGPR > MaxAddressableNumSGPRs) {
911  // This can happen due to a compiler bug or when using inline asm.
912  LLVMContext &Ctx = MF.getFunction().getContext();
914  "addressable scalar registers",
915  ProgInfo.NumSGPR, DS_Error,
917  MaxAddressableNumSGPRs);
918  Ctx.diagnose(Diag);
919  ProgInfo.NumSGPR = MaxAddressableNumSGPRs - 1;
920  }
921  }
922 
923  // Account for extra SGPRs and VGPRs reserved for debugger use.
924  ProgInfo.NumSGPR += ExtraSGPRs;
925 
926  // Ensure there are enough SGPRs and VGPRs for wave dispatch, where wave
927  // dispatch registers are function args.
928  unsigned WaveDispatchNumSGPR = 0, WaveDispatchNumVGPR = 0;
929  for (auto &Arg : MF.getFunction().args()) {
930  unsigned NumRegs = (Arg.getType()->getPrimitiveSizeInBits() + 31) / 32;
931  if (Arg.hasAttribute(Attribute::InReg))
932  WaveDispatchNumSGPR += NumRegs;
933  else
934  WaveDispatchNumVGPR += NumRegs;
935  }
936  ProgInfo.NumSGPR = std::max(ProgInfo.NumSGPR, WaveDispatchNumSGPR);
937  ProgInfo.NumVGPR = std::max(ProgInfo.NumVGPR, WaveDispatchNumVGPR);
938 
939  // Adjust number of registers used to meet default/requested minimum/maximum
940  // number of waves per execution unit request.
941  ProgInfo.NumSGPRsForWavesPerEU = std::max(
942  std::max(ProgInfo.NumSGPR, 1u), STM.getMinNumSGPRs(MFI->getMaxWavesPerEU()));
943  ProgInfo.NumVGPRsForWavesPerEU = std::max(
944  std::max(ProgInfo.NumVGPR, 1u), STM.getMinNumVGPRs(MFI->getMaxWavesPerEU()));
945 
947  STM.hasSGPRInitBug()) {
948  unsigned MaxAddressableNumSGPRs = STM.getAddressableNumSGPRs();
949  if (ProgInfo.NumSGPR > MaxAddressableNumSGPRs) {
950  // This can happen due to a compiler bug or when using inline asm to use
951  // the registers which are usually reserved for vcc etc.
952  LLVMContext &Ctx = MF.getFunction().getContext();
954  "scalar registers",
955  ProgInfo.NumSGPR, DS_Error,
957  MaxAddressableNumSGPRs);
958  Ctx.diagnose(Diag);
959  ProgInfo.NumSGPR = MaxAddressableNumSGPRs;
960  ProgInfo.NumSGPRsForWavesPerEU = MaxAddressableNumSGPRs;
961  }
962  }
963 
964  if (STM.hasSGPRInitBug()) {
965  ProgInfo.NumSGPR =
967  ProgInfo.NumSGPRsForWavesPerEU =
969  }
970 
971  if (MFI->getNumUserSGPRs() > STM.getMaxNumUserSGPRs()) {
972  LLVMContext &Ctx = MF.getFunction().getContext();
973  DiagnosticInfoResourceLimit Diag(MF.getFunction(), "user SGPRs",
974  MFI->getNumUserSGPRs(), DS_Error);
975  Ctx.diagnose(Diag);
976  }
977 
978  if (MFI->getLDSSize() > static_cast<unsigned>(STM.getLocalMemorySize())) {
979  LLVMContext &Ctx = MF.getFunction().getContext();
980  DiagnosticInfoResourceLimit Diag(MF.getFunction(), "local memory",
981  MFI->getLDSSize(), DS_Error);
982  Ctx.diagnose(Diag);
983  }
984 
986  &STM, ProgInfo.NumSGPRsForWavesPerEU);
988  &STM, ProgInfo.NumVGPRsForWavesPerEU);
989 
990  // Set the value to initialize FP_ROUND and FP_DENORM parts of the mode
991  // register.
992  ProgInfo.FloatMode = getFPMode(MF);
993 
994  const SIModeRegisterDefaults Mode = MFI->getMode();
995  ProgInfo.IEEEMode = Mode.IEEE;
996 
997  // Make clamp modifier on NaN input returns 0.
998  ProgInfo.DX10Clamp = Mode.DX10Clamp;
999 
1000  unsigned LDSAlignShift;
1002  // LDS is allocated in 64 dword blocks.
1003  LDSAlignShift = 8;
1004  } else {
1005  // LDS is allocated in 128 dword blocks.
1006  LDSAlignShift = 9;
1007  }
1008 
1009  unsigned LDSSpillSize =
1010  MFI->getLDSWaveSpillSize() * MFI->getMaxFlatWorkGroupSize();
1011 
1012  ProgInfo.LDSSize = MFI->getLDSSize() + LDSSpillSize;
1013  ProgInfo.LDSBlocks =
1014  alignTo(ProgInfo.LDSSize, 1ULL << LDSAlignShift) >> LDSAlignShift;
1015 
1016  // Scratch is allocated in 256 dword blocks.
1017  unsigned ScratchAlignShift = 10;
1018  // We need to program the hardware with the amount of scratch memory that
1019  // is used by the entire wave. ProgInfo.ScratchSize is the amount of
1020  // scratch memory used per thread.
1021  ProgInfo.ScratchBlocks =
1022  alignTo(ProgInfo.ScratchSize * STM.getWavefrontSize(),
1023  1ULL << ScratchAlignShift) >>
1024  ScratchAlignShift;
1025 
1026  if (getIsaVersion(getGlobalSTI()->getCPU()).Major >= 10) {
1027  ProgInfo.WgpMode = STM.isCuModeEnabled() ? 0 : 1;
1028  ProgInfo.MemOrdered = 1;
1029  }
1030 
1031  ProgInfo.ComputePGMRSrc1 =
1032  S_00B848_VGPRS(ProgInfo.VGPRBlocks) |
1033  S_00B848_SGPRS(ProgInfo.SGPRBlocks) |
1034  S_00B848_PRIORITY(ProgInfo.Priority) |
1035  S_00B848_FLOAT_MODE(ProgInfo.FloatMode) |
1036  S_00B848_PRIV(ProgInfo.Priv) |
1037  S_00B848_DX10_CLAMP(ProgInfo.DX10Clamp) |
1038  S_00B848_DEBUG_MODE(ProgInfo.DebugMode) |
1039  S_00B848_IEEE_MODE(ProgInfo.IEEEMode) |
1040  S_00B848_WGP_MODE(ProgInfo.WgpMode) |
1041  S_00B848_MEM_ORDERED(ProgInfo.MemOrdered);
1042 
1043  // 0 = X, 1 = XY, 2 = XYZ
1044  unsigned TIDIGCompCnt = 0;
1045  if (MFI->hasWorkItemIDZ())
1046  TIDIGCompCnt = 2;
1047  else if (MFI->hasWorkItemIDY())
1048  TIDIGCompCnt = 1;
1049 
1050  ProgInfo.ComputePGMRSrc2 =
1051  S_00B84C_SCRATCH_EN(ProgInfo.ScratchBlocks > 0) |
1052  S_00B84C_USER_SGPR(MFI->getNumUserSGPRs()) |
1053  // For AMDHSA, TRAP_HANDLER must be zero, as it is populated by the CP.
1055  S_00B84C_TGID_X_EN(MFI->hasWorkGroupIDX()) |
1056  S_00B84C_TGID_Y_EN(MFI->hasWorkGroupIDY()) |
1057  S_00B84C_TGID_Z_EN(MFI->hasWorkGroupIDZ()) |
1058  S_00B84C_TG_SIZE_EN(MFI->hasWorkGroupInfo()) |
1059  S_00B84C_TIDIG_COMP_CNT(TIDIGCompCnt) |
1061  // For AMDHSA, LDS_SIZE must be zero, as it is populated by the CP.
1062  S_00B84C_LDS_SIZE(STM.isAmdHsaOS() ? 0 : ProgInfo.LDSBlocks) |
1063  S_00B84C_EXCP_EN(0);
1064 
1065  ProgInfo.Occupancy = STM.computeOccupancy(MF, ProgInfo.LDSSize,
1066  ProgInfo.NumSGPRsForWavesPerEU,
1067  ProgInfo.NumVGPRsForWavesPerEU);
1068 }
1069 
1070 static unsigned getRsrcReg(CallingConv::ID CallConv) {
1071  switch (CallConv) {
1072  default: LLVM_FALLTHROUGH;
1080  }
1081 }
1082 
1083 void AMDGPUAsmPrinter::EmitProgramInfoSI(const MachineFunction &MF,
1084  const SIProgramInfo &CurrentProgramInfo) {
1086  unsigned RsrcReg = getRsrcReg(MF.getFunction().getCallingConv());
1087 
1089  OutStreamer->EmitIntValue(R_00B848_COMPUTE_PGM_RSRC1, 4);
1090 
1091  OutStreamer->EmitIntValue(CurrentProgramInfo.ComputePGMRSrc1, 4);
1092 
1093  OutStreamer->EmitIntValue(R_00B84C_COMPUTE_PGM_RSRC2, 4);
1094  OutStreamer->EmitIntValue(CurrentProgramInfo.ComputePGMRSrc2, 4);
1095 
1096  OutStreamer->EmitIntValue(R_00B860_COMPUTE_TMPRING_SIZE, 4);
1097  OutStreamer->EmitIntValue(S_00B860_WAVESIZE(CurrentProgramInfo.ScratchBlocks), 4);
1098 
1099  // TODO: Should probably note flat usage somewhere. SC emits a "FlatPtr32 =
1100  // 0" comment but I don't see a corresponding field in the register spec.
1101  } else {
1102  OutStreamer->EmitIntValue(RsrcReg, 4);
1103  OutStreamer->EmitIntValue(S_00B028_VGPRS(CurrentProgramInfo.VGPRBlocks) |
1104  S_00B028_SGPRS(CurrentProgramInfo.SGPRBlocks), 4);
1105  OutStreamer->EmitIntValue(R_0286E8_SPI_TMPRING_SIZE, 4);
1106  OutStreamer->EmitIntValue(
1107  S_0286E8_WAVESIZE(CurrentProgramInfo.ScratchBlocks), 4);
1108  }
1109 
1112  OutStreamer->EmitIntValue(S_00B02C_EXTRA_LDS_SIZE(CurrentProgramInfo.LDSBlocks), 4);
1113  OutStreamer->EmitIntValue(R_0286CC_SPI_PS_INPUT_ENA, 4);
1114  OutStreamer->EmitIntValue(MFI->getPSInputEnable(), 4);
1115  OutStreamer->EmitIntValue(R_0286D0_SPI_PS_INPUT_ADDR, 4);
1116  OutStreamer->EmitIntValue(MFI->getPSInputAddr(), 4);
1117  }
1118 
1119  OutStreamer->EmitIntValue(R_SPILLED_SGPRS, 4);
1120  OutStreamer->EmitIntValue(MFI->getNumSpilledSGPRs(), 4);
1121  OutStreamer->EmitIntValue(R_SPILLED_VGPRS, 4);
1122  OutStreamer->EmitIntValue(MFI->getNumSpilledVGPRs(), 4);
1123 }
1124 
1125 // This is the equivalent of EmitProgramInfoSI above, but for when the OS type
1126 // is AMDPAL. It stores each compute/SPI register setting and other PAL
1127 // metadata items into the PALMD::Metadata, combining with any provided by the
1128 // frontend as LLVM metadata. Once all functions are written, the PAL metadata
1129 // is then written as a single block in the .note section.
1130 void AMDGPUAsmPrinter::EmitPALMetadata(const MachineFunction &MF,
1131  const SIProgramInfo &CurrentProgramInfo) {
1133  auto CC = MF.getFunction().getCallingConv();
1134  auto MD = getTargetStreamer()->getPALMetadata();
1135 
1136  MD->setEntryPoint(CC, MF.getFunction().getName());
1137  MD->setNumUsedVgprs(CC, CurrentProgramInfo.NumVGPRsForWavesPerEU);
1138  MD->setNumUsedSgprs(CC, CurrentProgramInfo.NumSGPRsForWavesPerEU);
1140  MD->setRsrc1(CC, CurrentProgramInfo.ComputePGMRSrc1);
1141  MD->setRsrc2(CC, CurrentProgramInfo.ComputePGMRSrc2);
1142  } else {
1143  MD->setRsrc1(CC, S_00B028_VGPRS(CurrentProgramInfo.VGPRBlocks) |
1144  S_00B028_SGPRS(CurrentProgramInfo.SGPRBlocks));
1145  if (CurrentProgramInfo.ScratchBlocks > 0)
1146  MD->setRsrc2(CC, S_00B84C_SCRATCH_EN(1));
1147  }
1148  // ScratchSize is in bytes, 16 aligned.
1149  MD->setScratchSize(CC, alignTo(CurrentProgramInfo.ScratchSize, 16));
1151  MD->setRsrc2(CC, S_00B02C_EXTRA_LDS_SIZE(CurrentProgramInfo.LDSBlocks));
1152  MD->setSpiPsInputEna(MFI->getPSInputEnable());
1153  MD->setSpiPsInputAddr(MFI->getPSInputAddr());
1154  }
1155 
1156  const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
1157  if (STM.isWave32())
1158  MD->setWave32(MF.getFunction().getCallingConv());
1159 }
1160 
1161 // This is supposed to be log2(Size)
1163  switch (Size) {
1164  case 4:
1165  return AMD_ELEMENT_4_BYTES;
1166  case 8:
1167  return AMD_ELEMENT_8_BYTES;
1168  case 16:
1169  return AMD_ELEMENT_16_BYTES;
1170  default:
1171  llvm_unreachable("invalid private_element_size");
1172  }
1173 }
1174 
1175 void AMDGPUAsmPrinter::getAmdKernelCode(amd_kernel_code_t &Out,
1176  const SIProgramInfo &CurrentProgramInfo,
1177  const MachineFunction &MF) const {
1178  const Function &F = MF.getFunction();
1181 
1183  const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
1184 
1186 
1188  CurrentProgramInfo.ComputePGMRSrc1 |
1189  (CurrentProgramInfo.ComputePGMRSrc2 << 32);
1191 
1192  if (CurrentProgramInfo.DynamicCallStack)
1194 
1197  getElementByteSizeValue(STM.getMaxPrivateElementSize()));
1198 
1199  if (MFI->hasPrivateSegmentBuffer()) {
1200  Out.code_properties |=
1202  }
1203 
1204  if (MFI->hasDispatchPtr())
1206 
1207  if (MFI->hasQueuePtr())
1209 
1210  if (MFI->hasKernargSegmentPtr())
1212 
1213  if (MFI->hasDispatchID())
1215 
1216  if (MFI->hasFlatScratchInit())
1218 
1219  if (MFI->hasDispatchPtr())
1221 
1222  if (STM.isXNACKEnabled())
1224 
1225  unsigned MaxKernArgAlign;
1226  Out.kernarg_segment_byte_size = STM.getKernArgSegmentSize(F, MaxKernArgAlign);
1227  Out.wavefront_sgpr_count = CurrentProgramInfo.NumSGPR;
1228  Out.workitem_vgpr_count = CurrentProgramInfo.NumVGPR;
1229  Out.workitem_private_segment_byte_size = CurrentProgramInfo.ScratchSize;
1230  Out.workgroup_group_segment_byte_size = CurrentProgramInfo.LDSSize;
1231 
1232  // These alignment values are specified in powers of two, so alignment =
1233  // 2^n. The minimum alignment is 2^4 = 16.
1234  Out.kernarg_segment_alignment = std::max<size_t>(4,
1235  countTrailingZeros(MaxKernArgAlign));
1236 }
1237 
1239  const char *ExtraCode, raw_ostream &O) {
1240  // First try the generic code, which knows about modifiers like 'c' and 'n'.
1241  if (!AsmPrinter::PrintAsmOperand(MI, OpNo, ExtraCode, O))
1242  return false;
1243 
1244  if (ExtraCode && ExtraCode[0]) {
1245  if (ExtraCode[1] != 0)
1246  return true; // Unknown modifier.
1247 
1248  switch (ExtraCode[0]) {
1249  case 'r':
1250  break;
1251  default:
1252  return true;
1253  }
1254  }
1255 
1256  // TODO: Should be able to support other operand types like globals.
1257  const MachineOperand &MO = MI->getOperand(OpNo);
1258  if (MO.isReg()) {
1260  *MF->getSubtarget().getRegisterInfo());
1261  return false;
1262  }
1263 
1264  return true;
1265 }
virtual void EmitGlobalVariable(const GlobalVariable *GV)
Emit the specified global variable to the .s file.
Definition: AsmPrinter.cpp:448
constexpr bool isUInt< 32 >(uint64_t x)
Definition: MathExtras.h:348
const TargetLoweringObjectFile & getObjFileLowering() const
Return information about object file lowering.
Definition: AsmPrinter.cpp:214
unsigned getAlignment() const
Definition: GlobalObject.h:59
void EmitGlobalVariable(const GlobalVariable *GV) override
Emit the specified global variable to the .s file.
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:111
Interface definition for SIRegisterInfo.
Target & getTheGCNTarget()
The target for GCN GPUs.
#define S_00B848_VGPRS(x)
Definition: SIDefines.h:548
#define S_00B848_PRIV(x)
Definition: SIDefines.h:560
GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2)
LLVM_NODISCARD std::string str() const
str - Get the contents as an std::string.
Definition: StringRef.h:218
LLVMContext & Context
AMDGPU specific subclass of TargetSubtarget.
std::unique_ptr< MCStreamer > OutStreamer
This is the MCStreamer object for the file we are generating.
Definition: AsmPrinter.h:93
SI Whole Quad Mode
#define FP_DENORM_MODE_SP(x)
Definition: SIDefines.h:602
const Constant * getInitializer() const
getInitializer - Return the initializer for this global variable.
LLVM_ATTRIBUTE_NORETURN void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:139
This class represents lattice values for constants.
Definition: AllocatorList.h:23
bool isVariable() const
isVariable - Check if this is a variable symbol.
Definition: MCSymbol.h:297
#define R_00B228_SPI_SHADER_PGM_RSRC1_GS
Definition: SIDefines.h:499
#define G_00B84C_USER_SGPR(x)
Definition: SIDefines.h:512
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
Definition: MCSymbol.h:41
const MCSubtargetInfo * getGlobalSTI() const
A Module instance is used to store all the information related to an LLVM module. ...
Definition: Module.h:65
void redefineIfPossible()
Prepare this symbol to be redefined.
Definition: MCSymbol.h:231
bool doFinalization(Module &M) override
Shut down the asmprinter.
MCContext & OutContext
This is the context for the output file that we are streaming.
Definition: AsmPrinter.h:88
Address space for local memory.
Definition: AMDGPU.h:274
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
OSType getOS() const
getOS - Get the parsed operating system type of this triple.
Definition: Triple.h:305
void EmitFunctionBodyEnd() override
Targets can override this to emit stuff after the last basic block in the function.
Target & getTheAMDGPUTarget()
The target which supports all AMD GPUs.
#define G_00B84C_TGID_Z_EN(x)
Definition: SIDefines.h:524
unsigned Reg
#define R_00B028_SPI_SHADER_PGM_RSRC1_PS
Definition: SIDefines.h:495
iterator_range< reg_iterator > reg_operands(unsigned Reg) const
virtual void EmitAmdhsaKernelDescriptor(const MCSubtargetInfo &STI, StringRef KernelName, const amdhsa::kernel_descriptor_t &KernelDescriptor, uint64_t NextVGPR, uint64_t NextSGPR, bool ReserveVCC, bool ReserveFlatScr, bool ReserveXNACK)=0
Instruction set architecture version.
Definition: TargetParser.h:136
bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, const char *ExtraCode, raw_ostream &O) override
Print the specified operand of MI, an INLINEASM instruction, using the specified assembler variant...
#define S_00B84C_SCRATCH_EN(x)
Definition: SIDefines.h:508
const SIInstrInfo * getInstrInfo() const override
#define S_00B84C_TG_SIZE_EN(x)
Definition: SIDefines.h:526
#define S_00B848_DX10_CLAMP(x)
Definition: SIDefines.h:563
#define S_00B848_WGP_MODE(x)
Definition: SIDefines.h:572
uint32_t NumSGPRsForWavesPerEU
Definition: SIProgramInfo.h:49
unsigned const TargetRegisterInfo * TRI
F(f)
MachineFunction * MF
The current machine function.
Definition: AsmPrinter.h:96
Calling convention used for Mesa/AMDPAL hull shaders (= tessellation control shaders).
Definition: CallingConv.h:207
#define G_00B84C_TGID_Y_EN(x)
Definition: SIDefines.h:521
Interface definition for R600RegisterInfo.
unsigned getMinNumSGPRs(unsigned WavesPerEU) const
#define R_0286CC_SPI_PS_INPUT_ENA
Definition: SIDefines.h:544
#define S_00B028_SGPRS(x)
Definition: SIDefines.h:505
AsmPrinter * createR600AsmPrinterPass(TargetMachine &TM, std::unique_ptr< MCStreamer > &&Streamer)
Track resource usage for kernels / entry functions.
Definition: SIProgramInfo.h:21
void setEntryPoint(unsigned CC, StringRef Name)
virtual void EmitAMDGPUSymbolType(StringRef SymbolName, unsigned Type)=0
const SIRegisterInfo & getRegisterInfo() const
Definition: SIInstrInfo.h:171
return AArch64::GPR64RegClass contains(Reg)
const Triple & getTargetTriple() const
bool hasFP64Denormals() const
unsigned countTrailingZeros(T Val, ZeroBehavior ZB=ZB_Width)
Count number of 0&#39;s from the least significant bit to the most stopping at the first 1...
Definition: MathExtras.h:119
static amd_element_byte_size_t getElementByteSizeValue(unsigned Size)
#define S_00B848_MEM_ORDERED(x)
Definition: SIDefines.h:575
#define FP_DENORM_FLUSH_NONE
Definition: SIDefines.h:597
uint32_t code_properties
Code properties.
Definition: BitVector.h:937
constexpr char SymbolName[]
Key for Kernel::Metadata::mSymbolName.
static bool isFLAT(const MachineInstr &MI)
Definition: SIInstrInfo.h:488
const DataLayout & getDataLayout() const
Get the data layout for the module&#39;s target platform.
Definition: Module.cpp:369
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:80
unsigned getInstSizeInBytes(const MachineInstr &MI) const override
const HexagonInstrInfo * TII
int getLocalMemorySize() const
AMD Kernel Code Object (amd_kernel_code_t).
static void printRegOperand(unsigned RegNo, raw_ostream &O, const MCRegisterInfo &MRI)
bool IEEE
Floating point opcodes that support exception flag gathering quiet and propagate signaling NaN inputs...
#define G_00B84C_TRAP_HANDLER(x)
Definition: SIDefines.h:515
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted...
AMDGPUAsmPrinter(TargetMachine &TM, std::unique_ptr< MCStreamer > Streamer)
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:411
bool isTrapHandlerEnabled() const
Calling convention used for Mesa/AMDPAL compute shaders.
Definition: CallingConv.h:197
#define FP_ROUND_MODE_SP(x)
Definition: SIDefines.h:591
Diagnostic information for stack size etc.
Calling convention used for Mesa/AMDPAL geometry shaders.
Definition: CallingConv.h:191
#define S_00B84C_TGID_Y_EN(x)
Definition: SIDefines.h:520
bool hasCodeObjectV3(const MCSubtargetInfo *STI)
Context object for machine code objects.
Definition: MCContext.h:65
auto reverse(ContainerTy &&C, typename std::enable_if< has_rbegin< ContainerTy >::value >::type *=nullptr) -> decltype(make_range(C.rbegin(), C.rend()))
Definition: STLExtras.h:273
#define S_00B848_FLOAT_MODE(x)
Definition: SIDefines.h:557
#define R_00B848_COMPUTE_PGM_RSRC1
Definition: SIDefines.h:547
uint8_t kernarg_segment_alignment
The maximum byte alignment of variables used by the kernel in the specified memory segment...
void EmitFunctionBody()
This method emits the body and trailer for a function.
virtual bool EmitCodeEnd()=0
unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed, bool FlatScrUsed, bool XNACKUsed)
unsigned getAddressableNumSGPRs() const
LLVM_READONLY MachineOperand * getNamedOperand(MachineInstr &MI, unsigned OperandName) const
Returns the operand named Op.
#define S_00B84C_TRAP_HANDLER(x)
Definition: SIDefines.h:514
int getNumber() const
MachineBasicBlocks are uniquely numbered at the function level, unless they&#39;re not in a MachineFuncti...
uint64_t ComputePGMRSrc2
Definition: SIProgramInfo.h:41
bool isVerbose() const
Return true if assembly output should contain comments.
Definition: AsmPrinter.h:199
uint64_t compute_pgm_resource_registers
Shader program settings for CS.
Analysis containing CSE Info
Definition: CSEInfo.cpp:20
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
bool dumpCode() const
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Definition: MCRegister.h:19
uint32_t NumVGPRsForWavesPerEU
Definition: SIProgramInfo.h:52
bool isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB) const override
Return true if the basic block has exactly one predecessor and the control transfer mechanism between...
bool hasFP32Denormals() const
#define S_00B848_IEEE_MODE(x)
Definition: SIDefines.h:569
bool isCompute(CallingConv::ID cc)
#define S_00B028_VGPRS(x)
Definition: SIDefines.h:504
static uint32_t getFPMode(const MachineFunction &F)
uint16_t wavefront_sgpr_count
Number of scalar registers used by a wavefront.
* if(!EatIfPresent(lltok::kw_thread_local)) return false
ParseOptionalThreadLocal := /*empty.
unsigned const MachineRegisterInfo * MRI
VisibilityTypes getVisibility() const
Definition: GlobalValue.h:236
bool isGFX10(const MCSubtargetInfo &STI)
AMDGPUPALMetadata * getPALMetadata()
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:64
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
#define G_00B84C_TGID_X_EN(x)
Definition: SIDefines.h:518
#define S_00B84C_TIDIG_COMP_CNT(x)
Definition: SIDefines.h:529
#define FP_ROUND_MODE_DP(x)
Definition: SIDefines.h:592
const GlobalValue * getGlobal() const
virtual bool isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB) const
Return true if the basic block has exactly one predecessor and the control transfer mechanism between...
bool isEntryFunctionCC(CallingConv::ID CC)
TargetMachine & TM
Target machine description.
Definition: AsmPrinter.h:81
This class is intended to be used as a driving class for all asm writers.
Definition: AsmPrinter.h:78
void getNameWithPrefix(SmallVectorImpl< char > &Name, const GlobalValue *GV) const
Definition: AsmPrinter.cpp:438
void EmitEndOfAsmFile(Module &M) override
This virtual method can be overridden by targets that want to emit something at the end of their file...
bool isCuModeEnabled() const
unsigned getAddressSpace() const
Definition: Globals.cpp:111
#define S_00B84C_EXCP_EN_MSB(x)
Definition: SIDefines.h:533
bool DX10Clamp
Used by the vector ALU to force DX10-style treatment of NaNs: when set, clamp NaN to zero; otherwise...
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
void reportError(SMLoc L, const Twine &Msg)
Definition: MCContext.cpp:687
Generation getGeneration() const
amdgpu Simplify well known AMD library false FunctionCallee Value * Arg
const Triple & getTargetTriple() const
#define R_00B02C_SPI_SHADER_PGM_RSRC2_PS
Definition: SIDefines.h:496
#define FP_DENORM_FLUSH_IN_FLUSH_OUT
Definition: SIDefines.h:594
#define S_00B84C_TGID_Z_EN(x)
Definition: SIDefines.h:523
The AMDGPU TargetMachine interface definition for hw codgen targets.
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function. ...
Definition: Function.cpp:205
#define R_00B328_SPI_SHADER_PGM_RSRC1_ES
Definition: SIDefines.h:500
#define S_00B84C_LDS_SIZE(x)
Definition: SIDefines.h:537
#define R_SPILLED_SGPRS
Definition: SIDefines.h:620
virtual void EmitDirectiveHSACodeObjectISA(uint32_t Major, uint32_t Minor, uint32_t Stepping, StringRef VendorName, StringRef ArchName)=0
std::string & str()
Flushes the stream contents to the target string and returns the string&#39;s reference.
Definition: raw_ostream.h:519
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:40
amd_element_byte_size_t
The values used to define the number of bytes to use for the swizzle element size.
constexpr bool empty(const T &RangeOrContainer)
Test whether RangeOrContainer is empty. Similar to C++17 std::empty.
Definition: STLExtras.h:209
#define FP_ROUND_ROUND_TO_NEAREST
Definition: SIDefines.h:584
bool doesNotRecurse() const
Determine if the function is known not to recurse, directly or indirectly.
Definition: Function.h:577
#define S_00B84C_EXCP_EN(x)
Definition: SIDefines.h:540
void LLVMInitializeAMDGPUAsmPrinter()
IsaVersion getIsaVersion(StringRef GPU)
unsigned getWavefrontSize() const
#define R_00B428_SPI_SHADER_PGM_RSRC1_HS
Definition: SIDefines.h:501
AMDGPUTargetStreamer * getTargetStreamer() const
Calling convention used for Mesa/AMDPAL pixel shaders.
Definition: CallingConv.h:194
unsigned getFunctionNumber() const
Return a unique ID for the current function.
Definition: AsmPrinter.cpp:210
Calling convention for AMDGPU code object kernels.
Definition: CallingConv.h:200
#define G_00B84C_TIDIG_COMP_CNT(x)
Definition: SIDefines.h:530
MCStreamer & getStreamer()
Definition: MCStreamer.h:92
void EmitBasicBlockStart(const MachineBasicBlock &MBB) override
Targets can override this to emit stuff at the start of a basic block.
std::vector< std::string > HexLines
auto size(R &&Range, typename std::enable_if< std::is_same< typename std::iterator_traits< decltype(Range.begin())>::iterator_category, std::random_access_iterator_tag >::value, void >::type *=nullptr) -> decltype(std::distance(Range.begin(), Range.end()))
Get the size of a range.
Definition: STLExtras.h:1158
#define R_00B128_SPI_SHADER_PGM_RSRC1_VS
Definition: SIDefines.h:498
uint64_t kernarg_segment_byte_size
The size in bytes of the kernarg segment that holds the values of the arguments to the kernel...
unsigned getTotalNumSGPRs(const MCSubtargetInfo *STI)
MCSymbol * getSymbol(const GlobalValue *GV) const
Definition: AsmPrinter.cpp:443
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:212
R600 Assembly printer class.
MachineOperand class - Representation of each machine instruction operand.
SPIR_KERNEL - Calling convention for SPIR kernel functions.
Definition: CallingConv.h:136
const MCSubtargetInfo * getMCSubtargetInfo() const
virtual bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, const char *ExtraCode, raw_ostream &OS)
Print the specified operand of MI, an INLINEASM instruction, using the specified assembler variant...
virtual void EmitAMDKernelCodeT(const amd_kernel_code_t &Header)=0
uint16_t workitem_vgpr_count
Number of vector registers used by each work-item.
bool hasSGPRInitBug() const
#define S_00B848_DEBUG_MODE(x)
Definition: SIDefines.h:566
#define FP_DENORM_MODE_DP(x)
Definition: SIDefines.h:603
#define S_0286E8_WAVESIZE(x)
Definition: SIDefines.h:609
const Function & getFunction() const
Return the LLVM function that this machine code represents.
#define S_00B84C_TGID_X_EN(x)
Definition: SIDefines.h:517
bool runOnMachineFunction(MachineFunction &MF) override
Emit the specified function out to the OutStreamer.
bool isPhysRegUsed(unsigned PhysReg) const
Return true if the specified register is modified or read in this function.
uint32_t workgroup_group_segment_byte_size
The amount of group segment memory required by a work-group in bytes.
bool isDefined() const
isDefined - Check if this symbol is defined (i.e., it has an address).
Definition: MCSymbol.h:249
#define AMD_HSA_BITS_SET(dst, mask, val)
amdgpu Simplify well known AMD library false FunctionCallee Callee
std::vector< std::string > DisasmLines
virtual void EmitDirectiveHSACodeObjectVersion(uint32_t Major, uint32_t Minor)=0
unsigned computeOccupancy(const MachineFunction &MF, unsigned LDSSize=0, unsigned NumSGPRs=0, unsigned NumVGPRs=0) const
Return occupancy for the given function.
unsigned getNumVGPRBlocks(const MCSubtargetInfo *STI, unsigned NumVGPRs, Optional< bool > EnableWavefrontSize32)
Calling convention used for Mesa vertex shaders, or AMDPAL last shader stage before rasterization (ve...
Definition: CallingConv.h:188
void EmitFunctionEntryLabel() override
EmitFunctionEntryLabel - Emit the label that is the entrypoint for the function.
static AsmPrinter * createAMDGPUAsmPrinterPass(TargetMachine &tm, std::unique_ptr< MCStreamer > &&Streamer)
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
#define Success
virtual bool EmitISAVersion(StringRef IsaVersionString)=0
uint64_t getTypeAllocSize(Type *Ty) const
Returns the offset in bytes between successive objects of the specified type, including alignment pad...
Definition: DataLayout.h:470
Provides AMDGPU specific target descriptions.
Representation of each machine instruction.
Definition: MachineInstr.h:64
void EmitFunctionBodyStart() override
Targets can override this to emit stuff before the first basic block in the function.
Calling convention used for AMDPAL vertex shader if tessellation is in use.
Definition: CallingConv.h:215
virtual void EmitDirectiveAMDGCNTarget(StringRef Target)=0
bool doFinalization(Module &M) override
Shut down the asmprinter.
void EmitLinkage(const GlobalValue *GV, MCSymbol *GVSym) const
This emits linkage information about GVSym based on GV, if this is supported by the target...
Definition: AsmPrinter.cpp:397
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
Interface definition for SIInstrInfo.
void SetupMachineFunction(MachineFunction &MF)
This should be called when a new MachineFunction is being processed from runOnMachineFunction.
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition: Alignment.h:126
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
unsigned getNumSGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs)
#define S_00B848_SGPRS(x)
Definition: SIDefines.h:551
bool hasXNACK(const MCSubtargetInfo &STI)
StringRef getName() const
Return a constant reference to the value&#39;s name.
Definition: Value.cpp:214
uint32_t workitem_private_segment_byte_size
The amount of memory required for the combined private, spill and arg segments for a work-item in byt...
#define S_00B84C_USER_SGPR(x)
Definition: SIDefines.h:511
#define I(x, y, z)
Definition: MD5.cpp:58
AMDGPU Assembly printer class.
#define R_00B860_COMPUTE_TMPRING_SIZE
Definition: SIDefines.h:605
Generic base class for all target subtargets.
bool isAmdHsaOrMesa(const Function &F) const
This represents a section on linux, lots of unix variants and some bare metal systems.
Definition: MCSectionELF.h:27
Type * getValueType() const
Definition: GlobalValue.h:279
uint32_t Size
Definition: Profile.cpp:46
#define R_SPILLED_VGPRS
Definition: SIDefines.h:621
void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
static bool hasAnyNonFlatUseOfReg(const MachineRegisterInfo &MRI, const SIInstrInfo &TII, unsigned Reg)
virtual void EmitFunctionEntryLabel()
EmitFunctionEntryLabel - Emit the label that is the entrypoint for the function.
Definition: AsmPrinter.cpp:741
bool isReg() const
isReg - Tests if this is a MO_Register operand.
unsigned getMinNumVGPRs(unsigned WavesPerEU) const
bool isDeclaration() const
Return true if the primary definition of this global value is outside of the current translation unit...
Definition: Globals.cpp:231
StringRef getName() const
getName - Get the symbol name.
Definition: MCSymbol.h:204
static unsigned getRsrcReg(CallingConv::ID CallConv)
static StringRef getCPU(StringRef CPU)
Processes a CPU name.
unsigned getMaxNumUserSGPRs() const
#define S_00B860_WAVESIZE(x)
Definition: SIDefines.h:606
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
#define R_00B84C_COMPUTE_PGM_RSRC2
Definition: SIDefines.h:507
#define S_00B848_PRIORITY(x)
Definition: SIDefines.h:554
A raw_ostream that writes to an std::string.
Definition: raw_ostream.h:503
StringRef getPassName() const override
getPassName - Return a nice clean name for a pass.
void EmitStartOfAsmFile(Module &M) override
This virtual method can be overridden by targets that want to emit something at the start of their fi...
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:575
uint64_t ComputePGMRSrc1
Definition: SIProgramInfo.h:35
bool hasInitializer() const
Definitions have initializers, declarations don&#39;t.
#define R_0286E8_SPI_TMPRING_SIZE
Definition: SIDefines.h:608
#define LLVM_FALLTHROUGH
LLVM_FALLTHROUGH - Mark fallthrough cases in switch statements.
Definition: Compiler.h:258
void setAlignment(unsigned A)
setAlignment - Set the alignment (log2, not bytes) of the function.
unsigned getHWRegIndex(unsigned Reg) const
#define S_00B02C_EXTRA_LDS_SIZE(x)
Definition: SIDefines.h:497
This class implements an extremely fast bulk output stream that can only output to a stream...
Definition: raw_ostream.h:45
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:65
static void RegisterAsmPrinter(Target &T, Target::AsmPrinterCtorTy Fn)
RegisterAsmPrinter - Register an AsmPrinter implementation for the given target.
IRTranslator LLVM IR MI
#define R_00B528_SPI_SHADER_PGM_RSRC1_LS
Definition: SIDefines.h:502
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:48
AMDGPU metadata definitions and in-memory representations.
virtual void EmitBasicBlockStart(const MachineBasicBlock &MBB)
Targets can override this to emit stuff at the start of a basic block.
#define R_0286D0_SPI_PS_INPUT_ADDR
Definition: SIDefines.h:545
Register getReg() const
getReg - Returns the register number.
void EmitVisibility(MCSymbol *Sym, unsigned Visibility, bool IsDefinition=true) const
This emits visibility information about symbol, if this is supported by the target.
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:416
void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header, const MCSubtargetInfo *STI)
const uint64_t Version
Definition: InstrProf.h:984
Calling convention used for AMDPAL shader stage before geometry shader if geometry is in use...
Definition: CallingConv.h:220
MCCodeEmitter * getEmitterPtr() const
Definition: MCAssembler.h:288
void streamIsaVersion(const MCSubtargetInfo *STI, raw_ostream &Stream)
Streams isa version string for given subtarget STI into Stream.
iterator_range< arg_iterator > args()
Definition: Function.h:719
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
bool hasCodeObjectV3() const