LLVM  10.0.0svn
AMDGPUAsmPrinter.cpp
Go to the documentation of this file.
1 //===-- AMDGPUAsmPrinter.cpp - AMDGPU assembly printer -------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 ///
11 /// The AMDGPUAsmPrinter is used to print both assembly string and also binary
12 /// code. When passed an MCAsmStreamer it prints assembly and when passed
13 /// an MCObjectStreamer it outputs binary code.
14 //
15 //===----------------------------------------------------------------------===//
16 //
17 
18 #include "AMDGPUAsmPrinter.h"
19 #include "AMDGPU.h"
20 #include "AMDGPUSubtarget.h"
21 #include "AMDGPUTargetMachine.h"
25 #include "R600AsmPrinter.h"
26 #include "R600Defines.h"
28 #include "R600RegisterInfo.h"
29 #include "SIDefines.h"
30 #include "SIInstrInfo.h"
31 #include "SIMachineFunctionInfo.h"
32 #include "SIRegisterInfo.h"
34 #include "Utils/AMDGPUBaseInfo.h"
35 #include "llvm/BinaryFormat/ELF.h"
37 #include "llvm/IR/DiagnosticInfo.h"
38 #include "llvm/MC/MCAssembler.h"
39 #include "llvm/MC/MCContext.h"
40 #include "llvm/MC/MCSectionELF.h"
41 #include "llvm/MC/MCStreamer.h"
47 
48 using namespace llvm;
49 using namespace llvm::AMDGPU;
50 using namespace llvm::AMDGPU::HSAMD;
51 
52 // TODO: This should get the default rounding mode from the kernel. We just set
53 // the default here, but this could change if the OpenCL rounding mode pragmas
54 // are used.
55 //
56 // The denormal mode here should match what is reported by the OpenCL runtime
57 // for the CL_FP_DENORM bit from CL_DEVICE_{HALF|SINGLE|DOUBLE}_FP_CONFIG, but
58 // can also be override to flush with the -cl-denorms-are-zero compiler flag.
59 //
60 // AMD OpenCL only sets flush none and reports CL_FP_DENORM for double
61 // precision, and leaves single precision to flush all and does not report
62 // CL_FP_DENORM for CL_DEVICE_SINGLE_FP_CONFIG. Mesa's OpenCL currently reports
63 // CL_FP_DENORM for both.
64 //
65 // FIXME: It seems some instructions do not support single precision denormals
66 // regardless of the mode (exp_*_f32, rcp_*_f32, rsq_*_f32, rsq_*f32, sqrt_f32,
67 // and sin_f32, cos_f32 on most parts).
68 
69 // We want to use these instructions, and using fp32 denormals also causes
70 // instructions to run at the double precision rate for the device so it's
71 // probably best to just report no single precision denormals.
74  // TODO: Is there any real use for the flush in only / flush out only modes?
75 
76  uint32_t FP32Denormals =
78 
79  uint32_t FP64Denormals =
81 
84  FP_DENORM_MODE_SP(FP32Denormals) |
85  FP_DENORM_MODE_DP(FP64Denormals);
86 }
87 
88 static AsmPrinter *
90  std::unique_ptr<MCStreamer> &&Streamer) {
91  return new AMDGPUAsmPrinter(tm, std::move(Streamer));
92 }
93 
94 extern "C" void LLVMInitializeAMDGPUAsmPrinter() {
99 }
100 
102  std::unique_ptr<MCStreamer> Streamer)
103  : AsmPrinter(TM, std::move(Streamer)) {
105  HSAMetadataStream.reset(new MetadataStreamerV3());
106  else
107  HSAMetadataStream.reset(new MetadataStreamerV2());
108 }
109 
111  return "AMDGPU Assembly Printer";
112 }
113 
115  return TM.getMCSubtargetInfo();
116 }
117 
119  if (!OutStreamer)
120  return nullptr;
121  return static_cast<AMDGPUTargetStreamer*>(OutStreamer->getTargetStreamer());
122 }
123 
126  std::string ExpectedTarget;
127  raw_string_ostream ExpectedTargetOS(ExpectedTarget);
128  IsaInfo::streamIsaVersion(getGlobalSTI(), ExpectedTargetOS);
129 
130  getTargetStreamer()->EmitDirectiveAMDGCNTarget(ExpectedTarget);
131  }
132 
133  if (TM.getTargetTriple().getOS() != Triple::AMDHSA &&
135  return;
136 
138  HSAMetadataStream->begin(M);
139 
142 
144  return;
145 
146  // HSA emits NT_AMDGPU_HSA_CODE_OBJECT_VERSION for code objects v2.
149 
150  // HSA and PAL emit NT_AMDGPU_HSA_ISA for code objects v2.
153  Version.Major, Version.Minor, Version.Stepping, "AMD", "AMDGPU");
154 }
155 
157  // Following code requires TargetStreamer to be present.
158  if (!getTargetStreamer())
159  return;
160 
162  // Emit ISA Version (NT_AMD_AMDGPU_ISA).
163  std::string ISAVersionString;
164  raw_string_ostream ISAVersionStream(ISAVersionString);
165  IsaInfo::streamIsaVersion(getGlobalSTI(), ISAVersionStream);
166  getTargetStreamer()->EmitISAVersion(ISAVersionStream.str());
167  }
168 
169  // Emit HSA Metadata (NT_AMD_AMDGPU_HSA_METADATA).
170  if (TM.getTargetTriple().getOS() == Triple::AMDHSA) {
171  HSAMetadataStream->end();
172  bool Success = HSAMetadataStream->emitTo(*getTargetStreamer());
173  (void)Success;
174  assert(Success && "Malformed HSA Metadata");
175  }
176 }
177 
179  const MachineBasicBlock *MBB) const {
181  return false;
182 
183  if (MBB->empty())
184  return true;
185 
186  // If this is a block implementing a long branch, an expression relative to
187  // the start of the block is needed. to the start of the block.
188  // XXX - Is there a smarter way to check this?
189  return (MBB->back().getOpcode() != AMDGPU::S_SETPC_B64);
190 }
191 
194  if (!MFI.isEntryFunction())
195  return;
196 
197  const GCNSubtarget &STM = MF->getSubtarget<GCNSubtarget>();
198  const Function &F = MF->getFunction();
199  if (!STM.hasCodeObjectV3() && STM.isAmdHsaOrMesa(F) &&
200  (F.getCallingConv() == CallingConv::AMDGPU_KERNEL ||
201  F.getCallingConv() == CallingConv::SPIR_KERNEL)) {
202  amd_kernel_code_t KernelCode;
203  getAmdKernelCode(KernelCode, CurrentProgramInfo, *MF);
204  getTargetStreamer()->EmitAMDKernelCodeT(KernelCode);
205  }
206 
207  if (STM.isAmdHsaOS())
208  HSAMetadataStream->emitKernel(*MF, CurrentProgramInfo);
209 }
210 
213  if (!MFI.isEntryFunction())
214  return;
215 
218  return;
219 
220  auto &Streamer = getTargetStreamer()->getStreamer();
221  auto &Context = Streamer.getContext();
222  auto &ObjectFileInfo = *Context.getObjectFileInfo();
223  auto &ReadOnlySection = *ObjectFileInfo.getReadOnlySection();
224 
225  Streamer.PushSection();
226  Streamer.SwitchSection(&ReadOnlySection);
227 
228  // CP microcode requires the kernel descriptor to be allocated on 64 byte
229  // alignment.
230  Streamer.EmitValueToAlignment(64, 0, 1, 0);
231  if (ReadOnlySection.getAlignment() < 64)
232  ReadOnlySection.setAlignment(Align(64));
233 
234  const MCSubtargetInfo &STI = MF->getSubtarget();
235 
236  SmallString<128> KernelName;
237  getNameWithPrefix(KernelName, &MF->getFunction());
239  STI, KernelName, getAmdhsaKernelDescriptor(*MF, CurrentProgramInfo),
240  CurrentProgramInfo.NumVGPRsForWavesPerEU,
241  CurrentProgramInfo.NumSGPRsForWavesPerEU -
243  CurrentProgramInfo.VCCUsed,
244  CurrentProgramInfo.FlatUsed),
245  CurrentProgramInfo.VCCUsed, CurrentProgramInfo.FlatUsed,
246  hasXNACK(STI));
247 
248  Streamer.PopSection();
249 }
250 
255  return;
256  }
257 
259  const GCNSubtarget &STM = MF->getSubtarget<GCNSubtarget>();
260  if (MFI->isEntryFunction() && STM.isAmdHsaOrMesa(MF->getFunction())) {
262  getNameWithPrefix(SymbolName, &MF->getFunction()),
264  SymbolName, ELF::STT_AMDGPU_HSA_KERNEL);
265  }
266  if (DumpCodeInstEmitter) {
267  // Disassemble function name label to text.
268  DisasmLines.push_back(MF->getName().str() + ":");
270  HexLines.push_back("");
271  }
272 
274 }
275 
277  if (DumpCodeInstEmitter && !isBlockOnlyReachableByFallthrough(&MBB)) {
278  // Write a line for the basic block label if it is not only fallthrough.
279  DisasmLines.push_back(
280  (Twine("BB") + Twine(getFunctionNumber())
281  + "_" + Twine(MBB.getNumber()) + ":").str());
283  HexLines.push_back("");
284  }
286 }
287 
290  if (GV->hasInitializer() && !isa<UndefValue>(GV->getInitializer())) {
292  Twine(GV->getName()) +
293  ": unsupported initializer for address space");
294  return;
295  }
296 
297  // LDS variables aren't emitted in HSA or PAL yet.
298  const Triple::OSType OS = TM.getTargetTriple().getOS();
299  if (OS == Triple::AMDHSA || OS == Triple::AMDPAL)
300  return;
301 
302  MCSymbol *GVSym = getSymbol(GV);
303 
304  GVSym->redefineIfPossible();
305  if (GVSym->isDefined() || GVSym->isVariable())
306  report_fatal_error("symbol '" + Twine(GVSym->getName()) +
307  "' is already defined");
308 
309  const DataLayout &DL = GV->getParent()->getDataLayout();
310  uint64_t Size = DL.getTypeAllocSize(GV->getValueType());
311  unsigned Align = GV->getAlignment();
312  if (!Align)
313  Align = 4;
314 
315  EmitVisibility(GVSym, GV->getVisibility(), !GV->isDeclaration());
316  EmitLinkage(GV, GVSym);
317  if (auto TS = getTargetStreamer())
318  TS->emitAMDGPULDS(GVSym, Size, Align);
319  return;
320  }
321 
323 }
324 
326  CallGraphResourceInfo.clear();
327 
328  // Pad with s_code_end to help tools and guard against instruction prefetch
329  // causing stale data in caches. Arguably this should be done by the linker,
330  // which is why this isn't done for Mesa.
331  const MCSubtargetInfo &STI = *getGlobalSTI();
332  if (AMDGPU::isGFX10(STI) &&
333  (STI.getTargetTriple().getOS() == Triple::AMDHSA ||
334  STI.getTargetTriple().getOS() == Triple::AMDPAL)) {
335  OutStreamer->SwitchSection(getObjFileLowering().getTextSection());
337  }
338 
339  return AsmPrinter::doFinalization(M);
340 }
341 
342 // Print comments that apply to both callable functions and entry points.
343 void AMDGPUAsmPrinter::emitCommonFunctionComments(
344  uint32_t NumVGPR,
345  Optional<uint32_t> NumAGPR,
346  uint32_t TotalNumVGPR,
347  uint32_t NumSGPR,
348  uint64_t ScratchSize,
349  uint64_t CodeSize,
350  const AMDGPUMachineFunction *MFI) {
351  OutStreamer->emitRawComment(" codeLenInByte = " + Twine(CodeSize), false);
352  OutStreamer->emitRawComment(" NumSgprs: " + Twine(NumSGPR), false);
353  OutStreamer->emitRawComment(" NumVgprs: " + Twine(NumVGPR), false);
354  if (NumAGPR) {
355  OutStreamer->emitRawComment(" NumAgprs: " + Twine(*NumAGPR), false);
356  OutStreamer->emitRawComment(" TotalNumVgprs: " + Twine(TotalNumVGPR),
357  false);
358  }
359  OutStreamer->emitRawComment(" ScratchSize: " + Twine(ScratchSize), false);
360  OutStreamer->emitRawComment(" MemoryBound: " + Twine(MFI->isMemoryBound()),
361  false);
362 }
363 
364 uint16_t AMDGPUAsmPrinter::getAmdhsaKernelCodeProperties(
365  const MachineFunction &MF) const {
367  uint16_t KernelCodeProperties = 0;
368 
369  if (MFI.hasPrivateSegmentBuffer()) {
370  KernelCodeProperties |=
371  amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER;
372  }
373  if (MFI.hasDispatchPtr()) {
374  KernelCodeProperties |=
375  amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR;
376  }
377  if (MFI.hasQueuePtr()) {
378  KernelCodeProperties |=
379  amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR;
380  }
381  if (MFI.hasKernargSegmentPtr()) {
382  KernelCodeProperties |=
383  amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR;
384  }
385  if (MFI.hasDispatchID()) {
386  KernelCodeProperties |=
387  amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID;
388  }
389  if (MFI.hasFlatScratchInit()) {
390  KernelCodeProperties |=
391  amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT;
392  }
393  if (MF.getSubtarget<GCNSubtarget>().isWave32()) {
394  KernelCodeProperties |=
395  amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32;
396  }
397 
398  return KernelCodeProperties;
399 }
400 
401 amdhsa::kernel_descriptor_t AMDGPUAsmPrinter::getAmdhsaKernelDescriptor(
402  const MachineFunction &MF,
403  const SIProgramInfo &PI) const {
404  amdhsa::kernel_descriptor_t KernelDescriptor;
405  memset(&KernelDescriptor, 0x0, sizeof(KernelDescriptor));
406 
410 
411  KernelDescriptor.group_segment_fixed_size = PI.LDSSize;
412  KernelDescriptor.private_segment_fixed_size = PI.ScratchSize;
413  KernelDescriptor.compute_pgm_rsrc1 = PI.ComputePGMRSrc1;
414  KernelDescriptor.compute_pgm_rsrc2 = PI.ComputePGMRSrc2;
415  KernelDescriptor.kernel_code_properties = getAmdhsaKernelCodeProperties(MF);
416 
417  return KernelDescriptor;
418 }
419 
421  CurrentProgramInfo = SIProgramInfo();
422 
424 
425  // The starting address of all shader programs must be 256 bytes aligned.
426  // Regular functions just need the basic required instruction alignment.
427  MF.setAlignment(MFI->isEntryFunction() ? Align(256) : Align(4));
428 
430 
431  const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
433  // FIXME: This should be an explicit check for Mesa.
434  if (!STM.isAmdHsaOS() && !STM.isAmdPalOS()) {
435  MCSectionELF *ConfigSection =
436  Context.getELFSection(".AMDGPU.config", ELF::SHT_PROGBITS, 0);
437  OutStreamer->SwitchSection(ConfigSection);
438  }
439 
440  if (MFI->isEntryFunction()) {
441  getSIProgramInfo(CurrentProgramInfo, MF);
442  } else {
443  auto I = CallGraphResourceInfo.insert(
444  std::make_pair(&MF.getFunction(), SIFunctionResourceInfo()));
445  SIFunctionResourceInfo &Info = I.first->second;
446  assert(I.second && "should only be called once per function");
447  Info = analyzeResourceUsage(MF);
448  }
449 
450  if (STM.isAmdPalOS())
451  EmitPALMetadata(MF, CurrentProgramInfo);
452  else if (!STM.isAmdHsaOS()) {
453  EmitProgramInfoSI(MF, CurrentProgramInfo);
454  }
455 
456  DumpCodeInstEmitter = nullptr;
457  if (STM.dumpCode()) {
458  // For -dumpcode, get the assembler out of the streamer, even if it does
459  // not really want to let us have it. This only works with -filetype=obj.
460  bool SaveFlag = OutStreamer->getUseAssemblerInfoForParsing();
461  OutStreamer->setUseAssemblerInfoForParsing(true);
462  MCAssembler *Assembler = OutStreamer->getAssemblerPtr();
463  OutStreamer->setUseAssemblerInfoForParsing(SaveFlag);
464  if (Assembler)
465  DumpCodeInstEmitter = Assembler->getEmitterPtr();
466  }
467 
468  DisasmLines.clear();
469  HexLines.clear();
470  DisasmLineMaxLen = 0;
471 
473 
474  if (isVerbose()) {
475  MCSectionELF *CommentSection =
476  Context.getELFSection(".AMDGPU.csdata", ELF::SHT_PROGBITS, 0);
477  OutStreamer->SwitchSection(CommentSection);
478 
479  if (!MFI->isEntryFunction()) {
480  OutStreamer->emitRawComment(" Function info:", false);
481  SIFunctionResourceInfo &Info = CallGraphResourceInfo[&MF.getFunction()];
482  emitCommonFunctionComments(
483  Info.NumVGPR,
484  STM.hasMAIInsts() ? Info.NumAGPR : Optional<uint32_t>(),
485  Info.getTotalNumVGPRs(STM),
486  Info.getTotalNumSGPRs(MF.getSubtarget<GCNSubtarget>()),
487  Info.PrivateSegmentSize,
488  getFunctionCodeSize(MF), MFI);
489  return false;
490  }
491 
492  OutStreamer->emitRawComment(" Kernel info:", false);
493  emitCommonFunctionComments(CurrentProgramInfo.NumArchVGPR,
494  STM.hasMAIInsts()
495  ? CurrentProgramInfo.NumAccVGPR
496  : Optional<uint32_t>(),
497  CurrentProgramInfo.NumVGPR,
498  CurrentProgramInfo.NumSGPR,
499  CurrentProgramInfo.ScratchSize,
500  getFunctionCodeSize(MF), MFI);
501 
502  OutStreamer->emitRawComment(
503  " FloatMode: " + Twine(CurrentProgramInfo.FloatMode), false);
504  OutStreamer->emitRawComment(
505  " IeeeMode: " + Twine(CurrentProgramInfo.IEEEMode), false);
506  OutStreamer->emitRawComment(
507  " LDSByteSize: " + Twine(CurrentProgramInfo.LDSSize) +
508  " bytes/workgroup (compile time only)", false);
509 
510  OutStreamer->emitRawComment(
511  " SGPRBlocks: " + Twine(CurrentProgramInfo.SGPRBlocks), false);
512  OutStreamer->emitRawComment(
513  " VGPRBlocks: " + Twine(CurrentProgramInfo.VGPRBlocks), false);
514 
515  OutStreamer->emitRawComment(
516  " NumSGPRsForWavesPerEU: " +
517  Twine(CurrentProgramInfo.NumSGPRsForWavesPerEU), false);
518  OutStreamer->emitRawComment(
519  " NumVGPRsForWavesPerEU: " +
520  Twine(CurrentProgramInfo.NumVGPRsForWavesPerEU), false);
521 
522  OutStreamer->emitRawComment(
523  " Occupancy: " +
524  Twine(CurrentProgramInfo.Occupancy), false);
525 
526  OutStreamer->emitRawComment(
527  " WaveLimiterHint : " + Twine(MFI->needsWaveLimiter()), false);
528 
529  OutStreamer->emitRawComment(
530  " COMPUTE_PGM_RSRC2:USER_SGPR: " +
531  Twine(G_00B84C_USER_SGPR(CurrentProgramInfo.ComputePGMRSrc2)), false);
532  OutStreamer->emitRawComment(
533  " COMPUTE_PGM_RSRC2:TRAP_HANDLER: " +
534  Twine(G_00B84C_TRAP_HANDLER(CurrentProgramInfo.ComputePGMRSrc2)), false);
535  OutStreamer->emitRawComment(
536  " COMPUTE_PGM_RSRC2:TGID_X_EN: " +
537  Twine(G_00B84C_TGID_X_EN(CurrentProgramInfo.ComputePGMRSrc2)), false);
538  OutStreamer->emitRawComment(
539  " COMPUTE_PGM_RSRC2:TGID_Y_EN: " +
540  Twine(G_00B84C_TGID_Y_EN(CurrentProgramInfo.ComputePGMRSrc2)), false);
541  OutStreamer->emitRawComment(
542  " COMPUTE_PGM_RSRC2:TGID_Z_EN: " +
543  Twine(G_00B84C_TGID_Z_EN(CurrentProgramInfo.ComputePGMRSrc2)), false);
544  OutStreamer->emitRawComment(
545  " COMPUTE_PGM_RSRC2:TIDIG_COMP_CNT: " +
546  Twine(G_00B84C_TIDIG_COMP_CNT(CurrentProgramInfo.ComputePGMRSrc2)),
547  false);
548  }
549 
550  if (DumpCodeInstEmitter) {
551 
552  OutStreamer->SwitchSection(
553  Context.getELFSection(".AMDGPU.disasm", ELF::SHT_NOTE, 0));
554 
555  for (size_t i = 0; i < DisasmLines.size(); ++i) {
556  std::string Comment = "\n";
557  if (!HexLines[i].empty()) {
558  Comment = std::string(DisasmLineMaxLen - DisasmLines[i].size(), ' ');
559  Comment += " ; " + HexLines[i] + "\n";
560  }
561 
562  OutStreamer->EmitBytes(StringRef(DisasmLines[i]));
563  OutStreamer->EmitBytes(StringRef(Comment));
564  }
565  }
566 
567  return false;
568 }
569 
570 uint64_t AMDGPUAsmPrinter::getFunctionCodeSize(const MachineFunction &MF) const {
571  const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
572  const SIInstrInfo *TII = STM.getInstrInfo();
573 
574  uint64_t CodeSize = 0;
575 
576  for (const MachineBasicBlock &MBB : MF) {
577  for (const MachineInstr &MI : MBB) {
578  // TODO: CodeSize should account for multiple functions.
579 
580  // TODO: Should we count size of debug info?
581  if (MI.isDebugInstr())
582  continue;
583 
584  CodeSize += TII->getInstSizeInBytes(MI);
585  }
586  }
587 
588  return CodeSize;
589 }
590 
592  const SIInstrInfo &TII,
593  unsigned Reg) {
594  for (const MachineOperand &UseOp : MRI.reg_operands(Reg)) {
595  if (!UseOp.isImplicit() || !TII.isFLAT(*UseOp.getParent()))
596  return true;
597  }
598 
599  return false;
600 }
601 
603  const GCNSubtarget &ST) const {
604  return NumExplicitSGPR + IsaInfo::getNumExtraSGPRs(&ST,
605  UsesVCC, UsesFlatScratch);
606 }
607 
609  const GCNSubtarget &ST) const {
610  return std::max(NumVGPR, NumAGPR);
611 }
612 
613 AMDGPUAsmPrinter::SIFunctionResourceInfo AMDGPUAsmPrinter::analyzeResourceUsage(
614  const MachineFunction &MF) const {
615  SIFunctionResourceInfo Info;
616 
618  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
619  const MachineFrameInfo &FrameInfo = MF.getFrameInfo();
620  const MachineRegisterInfo &MRI = MF.getRegInfo();
621  const SIInstrInfo *TII = ST.getInstrInfo();
622  const SIRegisterInfo &TRI = TII->getRegisterInfo();
623 
624  Info.UsesFlatScratch = MRI.isPhysRegUsed(AMDGPU::FLAT_SCR_LO) ||
625  MRI.isPhysRegUsed(AMDGPU::FLAT_SCR_HI);
626 
627  // Even if FLAT_SCRATCH is implicitly used, it has no effect if flat
628  // instructions aren't used to access the scratch buffer. Inline assembly may
629  // need it though.
630  //
631  // If we only have implicit uses of flat_scr on flat instructions, it is not
632  // really needed.
633  if (Info.UsesFlatScratch && !MFI->hasFlatScratchInit() &&
634  (!hasAnyNonFlatUseOfReg(MRI, *TII, AMDGPU::FLAT_SCR) &&
635  !hasAnyNonFlatUseOfReg(MRI, *TII, AMDGPU::FLAT_SCR_LO) &&
636  !hasAnyNonFlatUseOfReg(MRI, *TII, AMDGPU::FLAT_SCR_HI))) {
637  Info.UsesFlatScratch = false;
638  }
639 
640  Info.HasDynamicallySizedStack = FrameInfo.hasVarSizedObjects();
641  Info.PrivateSegmentSize = FrameInfo.getStackSize();
642  if (MFI->isStackRealigned())
643  Info.PrivateSegmentSize += FrameInfo.getMaxAlignment();
644 
645 
646  Info.UsesVCC = MRI.isPhysRegUsed(AMDGPU::VCC_LO) ||
647  MRI.isPhysRegUsed(AMDGPU::VCC_HI);
648 
649  // If there are no calls, MachineRegisterInfo can tell us the used register
650  // count easily.
651  // A tail call isn't considered a call for MachineFrameInfo's purposes.
652  if (!FrameInfo.hasCalls() && !FrameInfo.hasTailCall()) {
653  MCPhysReg HighestVGPRReg = AMDGPU::NoRegister;
654  for (MCPhysReg Reg : reverse(AMDGPU::VGPR_32RegClass.getRegisters())) {
655  if (MRI.isPhysRegUsed(Reg)) {
656  HighestVGPRReg = Reg;
657  break;
658  }
659  }
660 
661  if (ST.hasMAIInsts()) {
662  MCPhysReg HighestAGPRReg = AMDGPU::NoRegister;
663  for (MCPhysReg Reg : reverse(AMDGPU::AGPR_32RegClass.getRegisters())) {
664  if (MRI.isPhysRegUsed(Reg)) {
665  HighestAGPRReg = Reg;
666  break;
667  }
668  }
669  Info.NumAGPR = HighestAGPRReg == AMDGPU::NoRegister ? 0 :
670  TRI.getHWRegIndex(HighestAGPRReg) + 1;
671  }
672 
673  MCPhysReg HighestSGPRReg = AMDGPU::NoRegister;
674  for (MCPhysReg Reg : reverse(AMDGPU::SGPR_32RegClass.getRegisters())) {
675  if (MRI.isPhysRegUsed(Reg)) {
676  HighestSGPRReg = Reg;
677  break;
678  }
679  }
680 
681  // We found the maximum register index. They start at 0, so add one to get the
682  // number of registers.
683  Info.NumVGPR = HighestVGPRReg == AMDGPU::NoRegister ? 0 :
684  TRI.getHWRegIndex(HighestVGPRReg) + 1;
685  Info.NumExplicitSGPR = HighestSGPRReg == AMDGPU::NoRegister ? 0 :
686  TRI.getHWRegIndex(HighestSGPRReg) + 1;
687 
688  return Info;
689  }
690 
691  int32_t MaxVGPR = -1;
692  int32_t MaxAGPR = -1;
693  int32_t MaxSGPR = -1;
694  uint64_t CalleeFrameSize = 0;
695 
696  for (const MachineBasicBlock &MBB : MF) {
697  for (const MachineInstr &MI : MBB) {
698  // TODO: Check regmasks? Do they occur anywhere except calls?
699  for (const MachineOperand &MO : MI.operands()) {
700  unsigned Width = 0;
701  bool IsSGPR = false;
702  bool IsAGPR = false;
703 
704  if (!MO.isReg())
705  continue;
706 
707  Register Reg = MO.getReg();
708  switch (Reg) {
709  case AMDGPU::EXEC:
710  case AMDGPU::EXEC_LO:
711  case AMDGPU::EXEC_HI:
712  case AMDGPU::SCC:
713  case AMDGPU::M0:
714  case AMDGPU::SRC_SHARED_BASE:
715  case AMDGPU::SRC_SHARED_LIMIT:
716  case AMDGPU::SRC_PRIVATE_BASE:
717  case AMDGPU::SRC_PRIVATE_LIMIT:
718  case AMDGPU::SGPR_NULL:
719  continue;
720 
721  case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
722  llvm_unreachable("src_pops_exiting_wave_id should not be used");
723 
724  case AMDGPU::NoRegister:
725  assert(MI.isDebugInstr());
726  continue;
727 
728  case AMDGPU::VCC:
729  case AMDGPU::VCC_LO:
730  case AMDGPU::VCC_HI:
731  Info.UsesVCC = true;
732  continue;
733 
734  case AMDGPU::FLAT_SCR:
735  case AMDGPU::FLAT_SCR_LO:
736  case AMDGPU::FLAT_SCR_HI:
737  continue;
738 
739  case AMDGPU::XNACK_MASK:
740  case AMDGPU::XNACK_MASK_LO:
741  case AMDGPU::XNACK_MASK_HI:
742  llvm_unreachable("xnack_mask registers should not be used");
743 
744  case AMDGPU::LDS_DIRECT:
745  llvm_unreachable("lds_direct register should not be used");
746 
747  case AMDGPU::TBA:
748  case AMDGPU::TBA_LO:
749  case AMDGPU::TBA_HI:
750  case AMDGPU::TMA:
751  case AMDGPU::TMA_LO:
752  case AMDGPU::TMA_HI:
753  llvm_unreachable("trap handler registers should not be used");
754 
755  case AMDGPU::SRC_VCCZ:
756  llvm_unreachable("src_vccz register should not be used");
757 
758  case AMDGPU::SRC_EXECZ:
759  llvm_unreachable("src_execz register should not be used");
760 
761  case AMDGPU::SRC_SCC:
762  llvm_unreachable("src_scc register should not be used");
763 
764  default:
765  break;
766  }
767 
768  if (AMDGPU::SReg_32RegClass.contains(Reg)) {
769  assert(!AMDGPU::TTMP_32RegClass.contains(Reg) &&
770  "trap handler registers should not be used");
771  IsSGPR = true;
772  Width = 1;
773  } else if (AMDGPU::VGPR_32RegClass.contains(Reg)) {
774  IsSGPR = false;
775  Width = 1;
776  } else if (AMDGPU::AGPR_32RegClass.contains(Reg)) {
777  IsSGPR = false;
778  IsAGPR = true;
779  Width = 1;
780  } else if (AMDGPU::SReg_64RegClass.contains(Reg)) {
781  assert(!AMDGPU::TTMP_64RegClass.contains(Reg) &&
782  "trap handler registers should not be used");
783  IsSGPR = true;
784  Width = 2;
785  } else if (AMDGPU::VReg_64RegClass.contains(Reg)) {
786  IsSGPR = false;
787  Width = 2;
788  } else if (AMDGPU::AReg_64RegClass.contains(Reg)) {
789  IsSGPR = false;
790  IsAGPR = true;
791  Width = 2;
792  } else if (AMDGPU::VReg_96RegClass.contains(Reg)) {
793  IsSGPR = false;
794  Width = 3;
795  } else if (AMDGPU::SReg_96RegClass.contains(Reg)) {
796  Width = 3;
797  } else if (AMDGPU::SReg_128RegClass.contains(Reg)) {
798  assert(!AMDGPU::TTMP_128RegClass.contains(Reg) &&
799  "trap handler registers should not be used");
800  IsSGPR = true;
801  Width = 4;
802  } else if (AMDGPU::VReg_128RegClass.contains(Reg)) {
803  IsSGPR = false;
804  Width = 4;
805  } else if (AMDGPU::AReg_128RegClass.contains(Reg)) {
806  IsSGPR = false;
807  IsAGPR = true;
808  Width = 4;
809  } else if (AMDGPU::SReg_256RegClass.contains(Reg)) {
810  assert(!AMDGPU::TTMP_256RegClass.contains(Reg) &&
811  "trap handler registers should not be used");
812  IsSGPR = true;
813  Width = 8;
814  } else if (AMDGPU::VReg_256RegClass.contains(Reg)) {
815  IsSGPR = false;
816  Width = 8;
817  } else if (AMDGPU::SReg_512RegClass.contains(Reg)) {
818  assert(!AMDGPU::TTMP_512RegClass.contains(Reg) &&
819  "trap handler registers should not be used");
820  IsSGPR = true;
821  Width = 16;
822  } else if (AMDGPU::VReg_512RegClass.contains(Reg)) {
823  IsSGPR = false;
824  Width = 16;
825  } else if (AMDGPU::AReg_512RegClass.contains(Reg)) {
826  IsSGPR = false;
827  IsAGPR = true;
828  Width = 16;
829  } else if (AMDGPU::SReg_1024RegClass.contains(Reg)) {
830  IsSGPR = true;
831  Width = 32;
832  } else if (AMDGPU::VReg_1024RegClass.contains(Reg)) {
833  IsSGPR = false;
834  Width = 32;
835  } else if (AMDGPU::AReg_1024RegClass.contains(Reg)) {
836  IsSGPR = false;
837  IsAGPR = true;
838  Width = 32;
839  } else {
840  llvm_unreachable("Unknown register class");
841  }
842  unsigned HWReg = TRI.getHWRegIndex(Reg);
843  int MaxUsed = HWReg + Width - 1;
844  if (IsSGPR) {
845  MaxSGPR = MaxUsed > MaxSGPR ? MaxUsed : MaxSGPR;
846  } else if (IsAGPR) {
847  MaxAGPR = MaxUsed > MaxAGPR ? MaxUsed : MaxAGPR;
848  } else {
849  MaxVGPR = MaxUsed > MaxVGPR ? MaxUsed : MaxVGPR;
850  }
851  }
852 
853  if (MI.isCall()) {
854  // Pseudo used just to encode the underlying global. Is there a better
855  // way to track this?
856 
857  const MachineOperand *CalleeOp
858  = TII->getNamedOperand(MI, AMDGPU::OpName::callee);
859  const Function *Callee = cast<Function>(CalleeOp->getGlobal());
860  if (Callee->isDeclaration()) {
861  // If this is a call to an external function, we can't do much. Make
862  // conservative guesses.
863 
864  // 48 SGPRs - vcc, - flat_scr, -xnack
865  int MaxSGPRGuess =
866  47 - IsaInfo::getNumExtraSGPRs(&ST, true, ST.hasFlatAddressSpace());
867  MaxSGPR = std::max(MaxSGPR, MaxSGPRGuess);
868  MaxVGPR = std::max(MaxVGPR, 23);
869  MaxAGPR = std::max(MaxAGPR, 23);
870 
871  CalleeFrameSize = std::max(CalleeFrameSize, UINT64_C(16384));
872  Info.UsesVCC = true;
873  Info.UsesFlatScratch = ST.hasFlatAddressSpace();
874  Info.HasDynamicallySizedStack = true;
875  } else {
876  // We force CodeGen to run in SCC order, so the callee's register
877  // usage etc. should be the cumulative usage of all callees.
878 
879  auto I = CallGraphResourceInfo.find(Callee);
880  if (I == CallGraphResourceInfo.end()) {
881  // Avoid crashing on undefined behavior with an illegal call to a
882  // kernel. If a callsite's calling convention doesn't match the
883  // function's, it's undefined behavior. If the callsite calling
884  // convention does match, that would have errored earlier.
885  // FIXME: The verifier shouldn't allow this.
887  report_fatal_error("invalid call to entry function");
888 
889  llvm_unreachable("callee should have been handled before caller");
890  }
891 
892  MaxSGPR = std::max(I->second.NumExplicitSGPR - 1, MaxSGPR);
893  MaxVGPR = std::max(I->second.NumVGPR - 1, MaxVGPR);
894  MaxAGPR = std::max(I->second.NumAGPR - 1, MaxAGPR);
895  CalleeFrameSize
896  = std::max(I->second.PrivateSegmentSize, CalleeFrameSize);
897  Info.UsesVCC |= I->second.UsesVCC;
898  Info.UsesFlatScratch |= I->second.UsesFlatScratch;
899  Info.HasDynamicallySizedStack |= I->second.HasDynamicallySizedStack;
900  Info.HasRecursion |= I->second.HasRecursion;
901  }
902 
903  if (!Callee->doesNotRecurse())
904  Info.HasRecursion = true;
905  }
906  }
907  }
908 
909  Info.NumExplicitSGPR = MaxSGPR + 1;
910  Info.NumVGPR = MaxVGPR + 1;
911  Info.NumAGPR = MaxAGPR + 1;
912  Info.PrivateSegmentSize += CalleeFrameSize;
913 
914  return Info;
915 }
916 
917 void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
918  const MachineFunction &MF) {
919  SIFunctionResourceInfo Info = analyzeResourceUsage(MF);
920  const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
921 
922  ProgInfo.NumArchVGPR = Info.NumVGPR;
923  ProgInfo.NumAccVGPR = Info.NumAGPR;
924  ProgInfo.NumVGPR = Info.getTotalNumVGPRs(STM);
925  ProgInfo.NumSGPR = Info.NumExplicitSGPR;
926  ProgInfo.ScratchSize = Info.PrivateSegmentSize;
927  ProgInfo.VCCUsed = Info.UsesVCC;
928  ProgInfo.FlatUsed = Info.UsesFlatScratch;
929  ProgInfo.DynamicCallStack = Info.HasDynamicallySizedStack || Info.HasRecursion;
930 
931  if (!isUInt<32>(ProgInfo.ScratchSize)) {
932  DiagnosticInfoStackSize DiagStackSize(MF.getFunction(),
933  ProgInfo.ScratchSize, DS_Error);
934  MF.getFunction().getContext().diagnose(DiagStackSize);
935  }
936 
938 
939  // TODO(scott.linder): The calculations related to SGPR/VGPR blocks are
940  // duplicated in part in AMDGPUAsmParser::calculateGPRBlocks, and could be
941  // unified.
942  unsigned ExtraSGPRs = IsaInfo::getNumExtraSGPRs(
943  &STM, ProgInfo.VCCUsed, ProgInfo.FlatUsed);
944 
945  // Check the addressable register limit before we add ExtraSGPRs.
947  !STM.hasSGPRInitBug()) {
948  unsigned MaxAddressableNumSGPRs = STM.getAddressableNumSGPRs();
949  if (ProgInfo.NumSGPR > MaxAddressableNumSGPRs) {
950  // This can happen due to a compiler bug or when using inline asm.
951  LLVMContext &Ctx = MF.getFunction().getContext();
953  "addressable scalar registers",
954  ProgInfo.NumSGPR, DS_Error,
956  MaxAddressableNumSGPRs);
957  Ctx.diagnose(Diag);
958  ProgInfo.NumSGPR = MaxAddressableNumSGPRs - 1;
959  }
960  }
961 
962  // Account for extra SGPRs and VGPRs reserved for debugger use.
963  ProgInfo.NumSGPR += ExtraSGPRs;
964 
965  // Ensure there are enough SGPRs and VGPRs for wave dispatch, where wave
966  // dispatch registers are function args.
967  unsigned WaveDispatchNumSGPR = 0, WaveDispatchNumVGPR = 0;
968  for (auto &Arg : MF.getFunction().args()) {
969  unsigned NumRegs = (Arg.getType()->getPrimitiveSizeInBits() + 31) / 32;
970  if (Arg.hasAttribute(Attribute::InReg))
971  WaveDispatchNumSGPR += NumRegs;
972  else
973  WaveDispatchNumVGPR += NumRegs;
974  }
975  ProgInfo.NumSGPR = std::max(ProgInfo.NumSGPR, WaveDispatchNumSGPR);
976  ProgInfo.NumVGPR = std::max(ProgInfo.NumVGPR, WaveDispatchNumVGPR);
977 
978  // Adjust number of registers used to meet default/requested minimum/maximum
979  // number of waves per execution unit request.
980  ProgInfo.NumSGPRsForWavesPerEU = std::max(
981  std::max(ProgInfo.NumSGPR, 1u), STM.getMinNumSGPRs(MFI->getMaxWavesPerEU()));
982  ProgInfo.NumVGPRsForWavesPerEU = std::max(
983  std::max(ProgInfo.NumVGPR, 1u), STM.getMinNumVGPRs(MFI->getMaxWavesPerEU()));
984 
986  STM.hasSGPRInitBug()) {
987  unsigned MaxAddressableNumSGPRs = STM.getAddressableNumSGPRs();
988  if (ProgInfo.NumSGPR > MaxAddressableNumSGPRs) {
989  // This can happen due to a compiler bug or when using inline asm to use
990  // the registers which are usually reserved for vcc etc.
991  LLVMContext &Ctx = MF.getFunction().getContext();
993  "scalar registers",
994  ProgInfo.NumSGPR, DS_Error,
996  MaxAddressableNumSGPRs);
997  Ctx.diagnose(Diag);
998  ProgInfo.NumSGPR = MaxAddressableNumSGPRs;
999  ProgInfo.NumSGPRsForWavesPerEU = MaxAddressableNumSGPRs;
1000  }
1001  }
1002 
1003  if (STM.hasSGPRInitBug()) {
1004  ProgInfo.NumSGPR =
1006  ProgInfo.NumSGPRsForWavesPerEU =
1008  }
1009 
1010  if (MFI->getNumUserSGPRs() > STM.getMaxNumUserSGPRs()) {
1011  LLVMContext &Ctx = MF.getFunction().getContext();
1012  DiagnosticInfoResourceLimit Diag(MF.getFunction(), "user SGPRs",
1013  MFI->getNumUserSGPRs(), DS_Error);
1014  Ctx.diagnose(Diag);
1015  }
1016 
1017  if (MFI->getLDSSize() > static_cast<unsigned>(STM.getLocalMemorySize())) {
1018  LLVMContext &Ctx = MF.getFunction().getContext();
1019  DiagnosticInfoResourceLimit Diag(MF.getFunction(), "local memory",
1020  MFI->getLDSSize(), DS_Error);
1021  Ctx.diagnose(Diag);
1022  }
1023 
1025  &STM, ProgInfo.NumSGPRsForWavesPerEU);
1027  &STM, ProgInfo.NumVGPRsForWavesPerEU);
1028 
1029  // Set the value to initialize FP_ROUND and FP_DENORM parts of the mode
1030  // register.
1031  ProgInfo.FloatMode = getFPMode(MF);
1032 
1033  const SIModeRegisterDefaults Mode = MFI->getMode();
1034  ProgInfo.IEEEMode = Mode.IEEE;
1035 
1036  // Make clamp modifier on NaN input returns 0.
1037  ProgInfo.DX10Clamp = Mode.DX10Clamp;
1038 
1039  unsigned LDSAlignShift;
1041  // LDS is allocated in 64 dword blocks.
1042  LDSAlignShift = 8;
1043  } else {
1044  // LDS is allocated in 128 dword blocks.
1045  LDSAlignShift = 9;
1046  }
1047 
1048  unsigned LDSSpillSize =
1050 
1051  ProgInfo.LDSSize = MFI->getLDSSize() + LDSSpillSize;
1052  ProgInfo.LDSBlocks =
1053  alignTo(ProgInfo.LDSSize, 1ULL << LDSAlignShift) >> LDSAlignShift;
1054 
1055  // Scratch is allocated in 256 dword blocks.
1056  unsigned ScratchAlignShift = 10;
1057  // We need to program the hardware with the amount of scratch memory that
1058  // is used by the entire wave. ProgInfo.ScratchSize is the amount of
1059  // scratch memory used per thread.
1060  ProgInfo.ScratchBlocks =
1061  alignTo(ProgInfo.ScratchSize * STM.getWavefrontSize(),
1062  1ULL << ScratchAlignShift) >>
1063  ScratchAlignShift;
1064 
1065  if (getIsaVersion(getGlobalSTI()->getCPU()).Major >= 10) {
1066  ProgInfo.WgpMode = STM.isCuModeEnabled() ? 0 : 1;
1067  ProgInfo.MemOrdered = 1;
1068  }
1069 
1070  ProgInfo.ComputePGMRSrc1 =
1071  S_00B848_VGPRS(ProgInfo.VGPRBlocks) |
1072  S_00B848_SGPRS(ProgInfo.SGPRBlocks) |
1073  S_00B848_PRIORITY(ProgInfo.Priority) |
1074  S_00B848_FLOAT_MODE(ProgInfo.FloatMode) |
1075  S_00B848_PRIV(ProgInfo.Priv) |
1076  S_00B848_DX10_CLAMP(ProgInfo.DX10Clamp) |
1077  S_00B848_DEBUG_MODE(ProgInfo.DebugMode) |
1078  S_00B848_IEEE_MODE(ProgInfo.IEEEMode) |
1079  S_00B848_WGP_MODE(ProgInfo.WgpMode) |
1080  S_00B848_MEM_ORDERED(ProgInfo.MemOrdered);
1081 
1082  // 0 = X, 1 = XY, 2 = XYZ
1083  unsigned TIDIGCompCnt = 0;
1084  if (MFI->hasWorkItemIDZ())
1085  TIDIGCompCnt = 2;
1086  else if (MFI->hasWorkItemIDY())
1087  TIDIGCompCnt = 1;
1088 
1089  ProgInfo.ComputePGMRSrc2 =
1090  S_00B84C_SCRATCH_EN(ProgInfo.ScratchBlocks > 0) |
1092  // For AMDHSA, TRAP_HANDLER must be zero, as it is populated by the CP.
1098  S_00B84C_TIDIG_COMP_CNT(TIDIGCompCnt) |
1100  // For AMDHSA, LDS_SIZE must be zero, as it is populated by the CP.
1101  S_00B84C_LDS_SIZE(STM.isAmdHsaOS() ? 0 : ProgInfo.LDSBlocks) |
1102  S_00B84C_EXCP_EN(0);
1103 
1104  ProgInfo.Occupancy = STM.computeOccupancy(MF, ProgInfo.LDSSize,
1105  ProgInfo.NumSGPRsForWavesPerEU,
1106  ProgInfo.NumVGPRsForWavesPerEU);
1107 }
1108 
1109 static unsigned getRsrcReg(CallingConv::ID CallConv) {
1110  switch (CallConv) {
1111  default: LLVM_FALLTHROUGH;
1119  }
1120 }
1121 
1122 void AMDGPUAsmPrinter::EmitProgramInfoSI(const MachineFunction &MF,
1123  const SIProgramInfo &CurrentProgramInfo) {
1125  unsigned RsrcReg = getRsrcReg(MF.getFunction().getCallingConv());
1126 
1128  OutStreamer->EmitIntValue(R_00B848_COMPUTE_PGM_RSRC1, 4);
1129 
1130  OutStreamer->EmitIntValue(CurrentProgramInfo.ComputePGMRSrc1, 4);
1131 
1132  OutStreamer->EmitIntValue(R_00B84C_COMPUTE_PGM_RSRC2, 4);
1133  OutStreamer->EmitIntValue(CurrentProgramInfo.ComputePGMRSrc2, 4);
1134 
1135  OutStreamer->EmitIntValue(R_00B860_COMPUTE_TMPRING_SIZE, 4);
1136  OutStreamer->EmitIntValue(S_00B860_WAVESIZE(CurrentProgramInfo.ScratchBlocks), 4);
1137 
1138  // TODO: Should probably note flat usage somewhere. SC emits a "FlatPtr32 =
1139  // 0" comment but I don't see a corresponding field in the register spec.
1140  } else {
1141  OutStreamer->EmitIntValue(RsrcReg, 4);
1142  OutStreamer->EmitIntValue(S_00B028_VGPRS(CurrentProgramInfo.VGPRBlocks) |
1143  S_00B028_SGPRS(CurrentProgramInfo.SGPRBlocks), 4);
1144  OutStreamer->EmitIntValue(R_0286E8_SPI_TMPRING_SIZE, 4);
1145  OutStreamer->EmitIntValue(
1146  S_0286E8_WAVESIZE(CurrentProgramInfo.ScratchBlocks), 4);
1147  }
1148 
1151  OutStreamer->EmitIntValue(S_00B02C_EXTRA_LDS_SIZE(CurrentProgramInfo.LDSBlocks), 4);
1152  OutStreamer->EmitIntValue(R_0286CC_SPI_PS_INPUT_ENA, 4);
1153  OutStreamer->EmitIntValue(MFI->getPSInputEnable(), 4);
1154  OutStreamer->EmitIntValue(R_0286D0_SPI_PS_INPUT_ADDR, 4);
1155  OutStreamer->EmitIntValue(MFI->getPSInputAddr(), 4);
1156  }
1157 
1158  OutStreamer->EmitIntValue(R_SPILLED_SGPRS, 4);
1159  OutStreamer->EmitIntValue(MFI->getNumSpilledSGPRs(), 4);
1160  OutStreamer->EmitIntValue(R_SPILLED_VGPRS, 4);
1161  OutStreamer->EmitIntValue(MFI->getNumSpilledVGPRs(), 4);
1162 }
1163 
1164 // This is the equivalent of EmitProgramInfoSI above, but for when the OS type
1165 // is AMDPAL. It stores each compute/SPI register setting and other PAL
1166 // metadata items into the PALMD::Metadata, combining with any provided by the
1167 // frontend as LLVM metadata. Once all functions are written, the PAL metadata
1168 // is then written as a single block in the .note section.
1169 void AMDGPUAsmPrinter::EmitPALMetadata(const MachineFunction &MF,
1170  const SIProgramInfo &CurrentProgramInfo) {
1172  auto CC = MF.getFunction().getCallingConv();
1173  auto MD = getTargetStreamer()->getPALMetadata();
1174 
1175  MD->setEntryPoint(CC, MF.getFunction().getName());
1176  MD->setNumUsedVgprs(CC, CurrentProgramInfo.NumVGPRsForWavesPerEU);
1177  MD->setNumUsedSgprs(CC, CurrentProgramInfo.NumSGPRsForWavesPerEU);
1179  MD->setRsrc1(CC, CurrentProgramInfo.ComputePGMRSrc1);
1180  MD->setRsrc2(CC, CurrentProgramInfo.ComputePGMRSrc2);
1181  } else {
1182  MD->setRsrc1(CC, S_00B028_VGPRS(CurrentProgramInfo.VGPRBlocks) |
1183  S_00B028_SGPRS(CurrentProgramInfo.SGPRBlocks));
1184  if (CurrentProgramInfo.ScratchBlocks > 0)
1185  MD->setRsrc2(CC, S_00B84C_SCRATCH_EN(1));
1186  }
1187  // ScratchSize is in bytes, 16 aligned.
1188  MD->setScratchSize(CC, alignTo(CurrentProgramInfo.ScratchSize, 16));
1190  MD->setRsrc2(CC, S_00B02C_EXTRA_LDS_SIZE(CurrentProgramInfo.LDSBlocks));
1191  MD->setSpiPsInputEna(MFI->getPSInputEnable());
1192  MD->setSpiPsInputAddr(MFI->getPSInputAddr());
1193  }
1194 
1195  const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
1196  if (STM.isWave32())
1197  MD->setWave32(MF.getFunction().getCallingConv());
1198 }
1199 
1200 // This is supposed to be log2(Size)
1202  switch (Size) {
1203  case 4:
1204  return AMD_ELEMENT_4_BYTES;
1205  case 8:
1206  return AMD_ELEMENT_8_BYTES;
1207  case 16:
1208  return AMD_ELEMENT_16_BYTES;
1209  default:
1210  llvm_unreachable("invalid private_element_size");
1211  }
1212 }
1213 
1214 void AMDGPUAsmPrinter::getAmdKernelCode(amd_kernel_code_t &Out,
1215  const SIProgramInfo &CurrentProgramInfo,
1216  const MachineFunction &MF) const {
1217  const Function &F = MF.getFunction();
1220 
1222  const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
1223 
1225 
1227  CurrentProgramInfo.ComputePGMRSrc1 |
1228  (CurrentProgramInfo.ComputePGMRSrc2 << 32);
1230 
1231  if (CurrentProgramInfo.DynamicCallStack)
1233 
1236  getElementByteSizeValue(STM.getMaxPrivateElementSize()));
1237 
1238  if (MFI->hasPrivateSegmentBuffer()) {
1239  Out.code_properties |=
1241  }
1242 
1243  if (MFI->hasDispatchPtr())
1245 
1246  if (MFI->hasQueuePtr())
1248 
1249  if (MFI->hasKernargSegmentPtr())
1251 
1252  if (MFI->hasDispatchID())
1254 
1255  if (MFI->hasFlatScratchInit())
1257 
1258  if (MFI->hasDispatchPtr())
1260 
1261  if (STM.isXNACKEnabled())
1263 
1264  Align MaxKernArgAlign;
1265  Out.kernarg_segment_byte_size = STM.getKernArgSegmentSize(F, MaxKernArgAlign);
1266  Out.wavefront_sgpr_count = CurrentProgramInfo.NumSGPR;
1267  Out.workitem_vgpr_count = CurrentProgramInfo.NumVGPR;
1268  Out.workitem_private_segment_byte_size = CurrentProgramInfo.ScratchSize;
1269  Out.workgroup_group_segment_byte_size = CurrentProgramInfo.LDSSize;
1270 
1271  // kernarg_segment_alignment is specified as log of the alignment.
1272  // The minimum alignment is 16.
1273  Out.kernarg_segment_alignment = Log2(std::max(Align(16), MaxKernArgAlign));
1274 }
1275 
1277  const char *ExtraCode, raw_ostream &O) {
1278  // First try the generic code, which knows about modifiers like 'c' and 'n'.
1279  if (!AsmPrinter::PrintAsmOperand(MI, OpNo, ExtraCode, O))
1280  return false;
1281 
1282  if (ExtraCode && ExtraCode[0]) {
1283  if (ExtraCode[1] != 0)
1284  return true; // Unknown modifier.
1285 
1286  switch (ExtraCode[0]) {
1287  case 'r':
1288  break;
1289  default:
1290  return true;
1291  }
1292  }
1293 
1294  // TODO: Should be able to support other operand types like globals.
1295  const MachineOperand &MO = MI->getOperand(OpNo);
1296  if (MO.isReg()) {
1298  *MF->getSubtarget().getRegisterInfo());
1299  return false;
1300  }
1301 
1302  return true;
1303 }
virtual void EmitGlobalVariable(const GlobalVariable *GV)
Emit the specified global variable to the .s file.
Definition: AsmPrinter.cpp:454
unsigned Log2(Align A)
Returns the log2 of the alignment.
Definition: Alignment.h:204
constexpr bool isUInt< 32 >(uint64_t x)
Definition: MathExtras.h:385
const TargetLoweringObjectFile & getObjFileLowering() const
Return information about object file lowering.
Definition: AsmPrinter.cpp:216
unsigned getAlignment() const
Definition: GlobalObject.h:73
void EmitGlobalVariable(const GlobalVariable *GV) override
Emit the specified global variable to the .s file.
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:111
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
Interface definition for SIRegisterInfo.
Target & getTheGCNTarget()
The target for GCN GPUs.
#define S_00B848_VGPRS(x)
Definition: SIDefines.h:552
#define S_00B848_PRIV(x)
Definition: SIDefines.h:564
LLVM_NODISCARD std::string str() const
str - Get the contents as an std::string.
Definition: StringRef.h:232
LLVMContext & Context
AMDGPU specific subclass of TargetSubtarget.
std::unique_ptr< MCStreamer > OutStreamer
This is the MCStreamer object for the file we are generating.
Definition: AsmPrinter.h:93
SI Whole Quad Mode
#define FP_DENORM_MODE_SP(x)
Definition: SIDefines.h:606
const Constant * getInitializer() const
getInitializer - Return the initializer for this global variable.
LLVM_ATTRIBUTE_NORETURN void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:139
This class represents lattice values for constants.
Definition: AllocatorList.h:23
bool isVariable() const
isVariable - Check if this is a variable symbol.
Definition: MCSymbol.h:297
#define R_00B228_SPI_SHADER_PGM_RSRC1_GS
Definition: SIDefines.h:503
#define G_00B84C_USER_SGPR(x)
Definition: SIDefines.h:516
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
Definition: MCSymbol.h:41
const MCSubtargetInfo * getGlobalSTI() const
Calling convention used for Mesa/AMDPAL pixel shaders.
Definition: CallingConv.h:199
A Module instance is used to store all the information related to an LLVM module. ...
Definition: Module.h:66
void redefineIfPossible()
Prepare this symbol to be redefined.
Definition: MCSymbol.h:231
bool doFinalization(Module &M) override
Shut down the asmprinter.
MCContext & OutContext
This is the context for the output file that we are streaming.
Definition: AsmPrinter.h:88
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
OSType getOS() const
getOS - Get the parsed operating system type of this triple.
Definition: Triple.h:305
void EmitFunctionBodyEnd() override
Targets can override this to emit stuff after the last basic block in the function.
Target & getTheAMDGPUTarget()
The target which supports all AMD GPUs.
#define G_00B84C_TGID_Z_EN(x)
Definition: SIDefines.h:528
unsigned Reg
#define R_00B028_SPI_SHADER_PGM_RSRC1_PS
Definition: SIDefines.h:499
iterator_range< reg_iterator > reg_operands(unsigned Reg) const
virtual void EmitAmdhsaKernelDescriptor(const MCSubtargetInfo &STI, StringRef KernelName, const amdhsa::kernel_descriptor_t &KernelDescriptor, uint64_t NextVGPR, uint64_t NextSGPR, bool ReserveVCC, bool ReserveFlatScr, bool ReserveXNACK)=0
Instruction set architecture version.
Definition: TargetParser.h:136
bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, const char *ExtraCode, raw_ostream &O) override
Print the specified operand of MI, an INLINEASM instruction, using the specified assembler variant...
#define S_00B84C_SCRATCH_EN(x)
Definition: SIDefines.h:512
const SIInstrInfo * getInstrInfo() const override
#define S_00B84C_TG_SIZE_EN(x)
Definition: SIDefines.h:530
#define S_00B848_DX10_CLAMP(x)
Definition: SIDefines.h:567
#define S_00B848_WGP_MODE(x)
Definition: SIDefines.h:576
uint32_t NumSGPRsForWavesPerEU
Definition: SIProgramInfo.h:51
unsigned const TargetRegisterInfo * TRI
F(f)
MachineFunction * MF
The current machine function.
Definition: AsmPrinter.h:96
#define G_00B84C_TGID_Y_EN(x)
Definition: SIDefines.h:525
Interface definition for R600RegisterInfo.
unsigned getMinNumSGPRs(unsigned WavesPerEU) const
#define R_0286CC_SPI_PS_INPUT_ENA
Definition: SIDefines.h:548
#define S_00B028_SGPRS(x)
Definition: SIDefines.h:509
AsmPrinter * createR600AsmPrinterPass(TargetMachine &TM, std::unique_ptr< MCStreamer > &&Streamer)
Track resource usage for kernels / entry functions.
Definition: SIProgramInfo.h:21
void setEntryPoint(unsigned CC, StringRef Name)
virtual void EmitAMDGPUSymbolType(StringRef SymbolName, unsigned Type)=0
const SIRegisterInfo & getRegisterInfo() const
Definition: SIInstrInfo.h:171
return AArch64::GPR64RegClass contains(Reg)
AMDGPU::SIModeRegisterDefaults getMode() const
const Triple & getTargetTriple() const
bool hasFP64Denormals() const
static amd_element_byte_size_t getElementByteSizeValue(unsigned Size)
#define S_00B848_MEM_ORDERED(x)
Definition: SIDefines.h:579
#define FP_DENORM_FLUSH_NONE
Definition: SIDefines.h:601
uint32_t code_properties
Code properties.
void setAlignment(Align A)
setAlignment - Set the alignment of the function.
Definition: BitVector.h:937
constexpr char SymbolName[]
Key for Kernel::Metadata::mSymbolName.
static bool isFLAT(const MachineInstr &MI)
Definition: SIInstrInfo.h:495
const DataLayout & getDataLayout() const
Get the data layout for the module&#39;s target platform.
Definition: Module.cpp:369
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:80
unsigned getInstSizeInBytes(const MachineInstr &MI) const override
const HexagonInstrInfo * TII
int getLocalMemorySize() const
AMD Kernel Code Object (amd_kernel_code_t).
Calling convention used for Mesa vertex shaders, or AMDPAL last shader stage before rasterization (ve...
Definition: CallingConv.h:193
static void printRegOperand(unsigned RegNo, raw_ostream &O, const MCRegisterInfo &MRI)
bool IEEE
Floating point opcodes that support exception flag gathering quiet and propagate signaling NaN inputs...
#define G_00B84C_TRAP_HANDLER(x)
Definition: SIDefines.h:519
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted...
AMDGPUAsmPrinter(TargetMachine &TM, std::unique_ptr< MCStreamer > Streamer)
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:410
bool isTrapHandlerEnabled() const
#define FP_ROUND_MODE_SP(x)
Definition: SIDefines.h:595
Diagnostic information for stack size etc.
#define S_00B84C_TGID_Y_EN(x)
Definition: SIDefines.h:524
bool hasCodeObjectV3(const MCSubtargetInfo *STI)
unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI)
Context object for machine code objects.
Definition: MCContext.h:65
auto reverse(ContainerTy &&C, typename std::enable_if< has_rbegin< ContainerTy >::value >::type *=nullptr) -> decltype(make_range(C.rbegin(), C.rend()))
Definition: STLExtras.h:261
#define S_00B848_FLOAT_MODE(x)
Definition: SIDefines.h:561
#define R_00B848_COMPUTE_PGM_RSRC1
Definition: SIDefines.h:551
uint8_t kernarg_segment_alignment
The maximum byte alignment of variables used by the kernel in the specified memory segment...
void EmitFunctionBody()
This method emits the body and trailer for a function.
virtual bool EmitCodeEnd()=0
unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed, bool FlatScrUsed, bool XNACKUsed)
unsigned getAddressableNumSGPRs() const
LLVM_READONLY MachineOperand * getNamedOperand(MachineInstr &MI, unsigned OperandName) const
Returns the operand named Op.
#define S_00B84C_TRAP_HANDLER(x)
Definition: SIDefines.h:518
Calling convention used for Mesa/AMDPAL geometry shaders.
Definition: CallingConv.h:196
Calling convention used for AMDPAL vertex shader if tessellation is in use.
Definition: CallingConv.h:220
int getNumber() const
MachineBasicBlocks are uniquely numbered at the function level, unless they&#39;re not in a MachineFuncti...
uint64_t ComputePGMRSrc2
Definition: SIProgramInfo.h:41
bool isVerbose() const
Return true if assembly output should contain comments.
Definition: AsmPrinter.h:203
uint64_t compute_pgm_resource_registers
Shader program settings for CS.
Analysis containing CSE Info
Definition: CSEInfo.cpp:20
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
bool dumpCode() const
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Definition: MCRegister.h:19
uint32_t NumVGPRsForWavesPerEU
Definition: SIProgramInfo.h:54
bool isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB) const override
Return true if the basic block has exactly one predecessor and the control transfer mechanism between...
bool hasFP32Denormals() const
#define S_00B848_IEEE_MODE(x)
Definition: SIDefines.h:573
bool isCompute(CallingConv::ID cc)
#define S_00B028_VGPRS(x)
Definition: SIDefines.h:508
static uint32_t getFPMode(const MachineFunction &F)
uint16_t wavefront_sgpr_count
Number of scalar registers used by a wavefront.
* if(!EatIfPresent(lltok::kw_thread_local)) return false
ParseOptionalThreadLocal := /*empty.
unsigned const MachineRegisterInfo * MRI
VisibilityTypes getVisibility() const
Definition: GlobalValue.h:236
bool isGFX10(const MCSubtargetInfo &STI)
AMDGPUPALMetadata * getPALMetadata()
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:64
Address space for local memory.
Definition: AMDGPU.h:274
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
#define G_00B84C_TGID_X_EN(x)
Definition: SIDefines.h:522
#define S_00B84C_TIDIG_COMP_CNT(x)
Definition: SIDefines.h:533
#define FP_ROUND_MODE_DP(x)
Definition: SIDefines.h:596
const GlobalValue * getGlobal() const
TypeSize getTypeAllocSize(Type *Ty) const
Returns the offset in bytes between successive objects of the specified type, including alignment pad...
Definition: DataLayout.h:486
virtual bool isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB) const
Return true if the basic block has exactly one predecessor and the control transfer mechanism between...
bool isEntryFunctionCC(CallingConv::ID CC)
TargetMachine & TM
Target machine description.
Definition: AsmPrinter.h:81
This class is intended to be used as a driving class for all asm writers.
Definition: AsmPrinter.h:78
void getNameWithPrefix(SmallVectorImpl< char > &Name, const GlobalValue *GV) const
Definition: AsmPrinter.cpp:444
void EmitEndOfAsmFile(Module &M) override
This virtual method can be overridden by targets that want to emit something at the end of their file...
bool isCuModeEnabled() const
unsigned getAddressSpace() const
Definition: Globals.cpp:111
#define S_00B84C_EXCP_EN_MSB(x)
Definition: SIDefines.h:537
bool DX10Clamp
Used by the vector ALU to force DX10-style treatment of NaNs: when set, clamp NaN to zero; otherwise...
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
void reportError(SMLoc L, const Twine &Msg)
Definition: MCContext.cpp:687
Generation getGeneration() const
amdgpu Simplify well known AMD library false FunctionCallee Value * Arg
const Triple & getTargetTriple() const
#define R_00B02C_SPI_SHADER_PGM_RSRC2_PS
Definition: SIDefines.h:500
#define FP_DENORM_FLUSH_IN_FLUSH_OUT
Definition: SIDefines.h:598
#define S_00B84C_TGID_Z_EN(x)
Definition: SIDefines.h:527
The AMDGPU TargetMachine interface definition for hw codgen targets.
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function. ...
Definition: Function.cpp:205
#define R_00B328_SPI_SHADER_PGM_RSRC1_ES
Definition: SIDefines.h:504
#define S_00B84C_LDS_SIZE(x)
Definition: SIDefines.h:541
#define R_SPILLED_SGPRS
Definition: SIDefines.h:624
virtual void EmitDirectiveHSACodeObjectISA(uint32_t Major, uint32_t Minor, uint32_t Stepping, StringRef VendorName, StringRef ArchName)=0
std::string & str()
Flushes the stream contents to the target string and returns the string&#39;s reference.
Definition: raw_ostream.h:519
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:40
amd_element_byte_size_t
The values used to define the number of bytes to use for the swizzle element size.
constexpr bool empty(const T &RangeOrContainer)
Test whether RangeOrContainer is empty. Similar to C++17 std::empty.
Definition: STLExtras.h:197
#define FP_ROUND_ROUND_TO_NEAREST
Definition: SIDefines.h:588
bool doesNotRecurse() const
Determine if the function is known not to recurse, directly or indirectly.
Definition: Function.h:582
#define S_00B84C_EXCP_EN(x)
Definition: SIDefines.h:544
void LLVMInitializeAMDGPUAsmPrinter()
IsaVersion getIsaVersion(StringRef GPU)
unsigned getWavefrontSize() const
#define R_00B428_SPI_SHADER_PGM_RSRC1_HS
Definition: SIDefines.h:505
AMDGPUTargetStreamer * getTargetStreamer() const
unsigned getFunctionNumber() const
Return a unique ID for the current function.
Definition: AsmPrinter.cpp:212
#define G_00B84C_TIDIG_COMP_CNT(x)
Definition: SIDefines.h:534
MCStreamer & getStreamer()
Definition: MCStreamer.h:99
void EmitBasicBlockStart(const MachineBasicBlock &MBB) override
Targets can override this to emit stuff at the start of a basic block.
std::vector< std::string > HexLines
auto size(R &&Range, typename std::enable_if< std::is_same< typename std::iterator_traits< decltype(Range.begin())>::iterator_category, std::random_access_iterator_tag >::value, void >::type *=nullptr) -> decltype(std::distance(Range.begin(), Range.end()))
Get the size of a range.
Definition: STLExtras.h:1146
Align max(MaybeAlign Lhs, Align Rhs)
Definition: Alignment.h:390
#define R_00B128_SPI_SHADER_PGM_RSRC1_VS
Definition: SIDefines.h:502
uint64_t kernarg_segment_byte_size
The size in bytes of the kernarg segment that holds the values of the arguments to the kernel...
unsigned getTotalNumSGPRs(const MCSubtargetInfo *STI)
MCSymbol * getSymbol(const GlobalValue *GV) const
Definition: AsmPrinter.cpp:449
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:212
R600 Assembly printer class.
MachineOperand class - Representation of each machine instruction operand.
const MCSubtargetInfo * getMCSubtargetInfo() const
virtual bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, const char *ExtraCode, raw_ostream &OS)
Print the specified operand of MI, an INLINEASM instruction, using the specified assembler variant...
virtual void EmitAMDKernelCodeT(const amd_kernel_code_t &Header)=0
uint16_t workitem_vgpr_count
Number of vector registers used by each work-item.
Calling convention used for Mesa/AMDPAL compute shaders.
Definition: CallingConv.h:202
bool hasSGPRInitBug() const
#define S_00B848_DEBUG_MODE(x)
Definition: SIDefines.h:570
#define FP_DENORM_MODE_DP(x)
Definition: SIDefines.h:607
#define S_0286E8_WAVESIZE(x)
Definition: SIDefines.h:613
const Function & getFunction() const
Return the LLVM function that this machine code represents.
#define S_00B84C_TGID_X_EN(x)
Definition: SIDefines.h:521
bool runOnMachineFunction(MachineFunction &MF) override
Emit the specified function out to the OutStreamer.
bool isPhysRegUsed(unsigned PhysReg) const
Return true if the specified register is modified or read in this function.
uint32_t workgroup_group_segment_byte_size
The amount of group segment memory required by a work-group in bytes.
bool isDefined() const
isDefined - Check if this symbol is defined (i.e., it has an address).
Definition: MCSymbol.h:249
SPIR_KERNEL - Calling convention for SPIR kernel functions.
Definition: CallingConv.h:141
#define AMD_HSA_BITS_SET(dst, mask, val)
amdgpu Simplify well known AMD library false FunctionCallee Callee
std::vector< std::string > DisasmLines
virtual void EmitDirectiveHSACodeObjectVersion(uint32_t Major, uint32_t Minor)=0
unsigned computeOccupancy(const MachineFunction &MF, unsigned LDSSize=0, unsigned NumSGPRs=0, unsigned NumVGPRs=0) const
Return occupancy for the given function.
unsigned getNumVGPRBlocks(const MCSubtargetInfo *STI, unsigned NumVGPRs, Optional< bool > EnableWavefrontSize32)
Calling convention used for AMDPAL shader stage before geometry shader if geometry is in use...
Definition: CallingConv.h:225
void EmitFunctionEntryLabel() override
EmitFunctionEntryLabel - Emit the label that is the entrypoint for the function.
static AsmPrinter * createAMDGPUAsmPrinterPass(TargetMachine &tm, std::unique_ptr< MCStreamer > &&Streamer)
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
#define Success
virtual bool EmitISAVersion(StringRef IsaVersionString)=0
Provides AMDGPU specific target descriptions.
Representation of each machine instruction.
Definition: MachineInstr.h:63
void EmitFunctionBodyStart() override
Targets can override this to emit stuff before the first basic block in the function.
virtual void EmitDirectiveAMDGCNTarget(StringRef Target)=0
bool doFinalization(Module &M) override
Shut down the asmprinter.
void EmitLinkage(const GlobalValue *GV, MCSymbol *GVSym) const
This emits linkage information about GVSym based on GV, if this is supported by the target...
Definition: AsmPrinter.cpp:400
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
Interface definition for SIInstrInfo.
virtual void SetupMachineFunction(MachineFunction &MF)
This should be called when a new MachineFunction is being processed from runOnMachineFunction.
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition: Alignment.h:163
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
unsigned getNumSGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs)
#define S_00B848_SGPRS(x)
Definition: SIDefines.h:555
bool hasXNACK(const MCSubtargetInfo &STI)
StringRef getName() const
Return a constant reference to the value&#39;s name.
Definition: Value.cpp:214
uint32_t workitem_private_segment_byte_size
The amount of memory required for the combined private, spill and arg segments for a work-item in byt...
#define S_00B84C_USER_SGPR(x)
Definition: SIDefines.h:515
#define I(x, y, z)
Definition: MD5.cpp:58
AMDGPU Assembly printer class.
#define R_00B860_COMPUTE_TMPRING_SIZE
Definition: SIDefines.h:609
Generic base class for all target subtargets.
bool isAmdHsaOrMesa(const Function &F) const
This represents a section on linux, lots of unix variants and some bare metal systems.
Definition: MCSectionELF.h:27
Type * getValueType() const
Definition: GlobalValue.h:279
uint32_t Size
Definition: Profile.cpp:46
#define R_SPILLED_VGPRS
Definition: SIDefines.h:625
void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
static bool hasAnyNonFlatUseOfReg(const MachineRegisterInfo &MRI, const SIInstrInfo &TII, unsigned Reg)
virtual void EmitFunctionEntryLabel()
EmitFunctionEntryLabel - Emit the label that is the entrypoint for the function.
Definition: AsmPrinter.cpp:753
bool isReg() const
isReg - Tests if this is a MO_Register operand.
unsigned getMinNumVGPRs(unsigned WavesPerEU) const
bool isDeclaration() const
Return true if the primary definition of this global value is outside of the current translation unit...
Definition: Globals.cpp:231
StringRef getName() const
getName - Get the symbol name.
Definition: MCSymbol.h:204
static unsigned getRsrcReg(CallingConv::ID CallConv)
static StringRef getCPU(StringRef CPU)
Processes a CPU name.
unsigned getMaxNumUserSGPRs() const
#define S_00B860_WAVESIZE(x)
Definition: SIDefines.h:610
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
#define R_00B84C_COMPUTE_PGM_RSRC2
Definition: SIDefines.h:511
Calling convention used for Mesa/AMDPAL hull shaders (= tessellation control shaders).
Definition: CallingConv.h:212
#define S_00B848_PRIORITY(x)
Definition: SIDefines.h:558
A raw_ostream that writes to an std::string.
Definition: raw_ostream.h:503
StringRef getPassName() const override
getPassName - Return a nice clean name for a pass.
void EmitStartOfAsmFile(Module &M) override
This virtual method can be overridden by targets that want to emit something at the start of their fi...
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:575
uint64_t ComputePGMRSrc1
Definition: SIProgramInfo.h:35
bool hasInitializer() const
Definitions have initializers, declarations don&#39;t.
#define R_0286E8_SPI_TMPRING_SIZE
Definition: SIDefines.h:612
#define LLVM_FALLTHROUGH
LLVM_FALLTHROUGH - Mark fallthrough cases in switch statements.
Definition: Compiler.h:273
unsigned getHWRegIndex(unsigned Reg) const
#define S_00B02C_EXTRA_LDS_SIZE(x)
Definition: SIDefines.h:501
This class implements an extremely fast bulk output stream that can only output to a stream...
Definition: raw_ostream.h:45
bool hasMAIInsts() const
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:65
static void RegisterAsmPrinter(Target &T, Target::AsmPrinterCtorTy Fn)
RegisterAsmPrinter - Register an AsmPrinter implementation for the given target.
IRTranslator LLVM IR MI
#define R_00B528_SPI_SHADER_PGM_RSRC1_LS
Definition: SIDefines.h:506
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:48
AMDGPU metadata definitions and in-memory representations.
virtual void EmitBasicBlockStart(const MachineBasicBlock &MBB)
Targets can override this to emit stuff at the start of a basic block.
#define R_0286D0_SPI_PS_INPUT_ADDR
Definition: SIDefines.h:549
Register getReg() const
getReg - Returns the register number.
void EmitVisibility(MCSymbol *Sym, unsigned Visibility, bool IsDefinition=true) const
This emits visibility information about symbol, if this is supported by the target.
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:415
void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header, const MCSubtargetInfo *STI)
Calling convention for AMDGPU code object kernels.
Definition: CallingConv.h:205
const uint64_t Version
Definition: InstrProf.h:980
MCCodeEmitter * getEmitterPtr() const
Definition: MCAssembler.h:288
void streamIsaVersion(const MCSubtargetInfo *STI, raw_ostream &Stream)
Streams isa version string for given subtarget STI into Stream.
iterator_range< arg_iterator > args()
Definition: Function.h:724
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
bool hasCodeObjectV3() const