LLVM  9.0.0svn
AMDGPUBaseInfo.cpp
Go to the documentation of this file.
1 //===- AMDGPUBaseInfo.cpp - AMDGPU Base encoding information --------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "AMDGPUBaseInfo.h"
11 #include "AMDGPU.h"
12 #include "SIDefines.h"
13 #include "llvm/ADT/StringRef.h"
14 #include "llvm/ADT/Triple.h"
15 #include "llvm/BinaryFormat/ELF.h"
17 #include "llvm/IR/Attributes.h"
18 #include "llvm/IR/Constants.h"
19 #include "llvm/IR/Function.h"
20 #include "llvm/IR/GlobalValue.h"
21 #include "llvm/IR/Instruction.h"
22 #include "llvm/IR/LLVMContext.h"
23 #include "llvm/IR/Module.h"
24 #include "llvm/MC/MCContext.h"
25 #include "llvm/MC/MCInstrDesc.h"
26 #include "llvm/MC/MCInstrInfo.h"
27 #include "llvm/MC/MCRegisterInfo.h"
28 #include "llvm/MC/MCSectionELF.h"
31 #include "llvm/Support/Casting.h"
34 #include <algorithm>
35 #include <cassert>
36 #include <cstdint>
37 #include <cstring>
38 #include <utility>
39 
41 
42 #define GET_INSTRINFO_NAMED_OPS
43 #define GET_INSTRMAP_INFO
44 #include "AMDGPUGenInstrInfo.inc"
45 #undef GET_INSTRMAP_INFO
46 #undef GET_INSTRINFO_NAMED_OPS
47 
48 namespace {
49 
50 /// \returns Bit mask for given bit \p Shift and bit \p Width.
51 unsigned getBitMask(unsigned Shift, unsigned Width) {
52  return ((1 << Width) - 1) << Shift;
53 }
54 
55 /// Packs \p Src into \p Dst for given bit \p Shift and bit \p Width.
56 ///
57 /// \returns Packed \p Dst.
58 unsigned packBits(unsigned Src, unsigned Dst, unsigned Shift, unsigned Width) {
59  Dst &= ~(1 << Shift) & ~getBitMask(Shift, Width);
60  Dst |= (Src << Shift) & getBitMask(Shift, Width);
61  return Dst;
62 }
63 
64 /// Unpacks bits from \p Src for given bit \p Shift and bit \p Width.
65 ///
66 /// \returns Unpacked bits.
67 unsigned unpackBits(unsigned Src, unsigned Shift, unsigned Width) {
68  return (Src & getBitMask(Shift, Width)) >> Shift;
69 }
70 
71 /// \returns Vmcnt bit shift (lower bits).
72 unsigned getVmcntBitShiftLo() { return 0; }
73 
74 /// \returns Vmcnt bit width (lower bits).
75 unsigned getVmcntBitWidthLo() { return 4; }
76 
77 /// \returns Expcnt bit shift.
78 unsigned getExpcntBitShift() { return 4; }
79 
80 /// \returns Expcnt bit width.
81 unsigned getExpcntBitWidth() { return 3; }
82 
83 /// \returns Lgkmcnt bit shift.
84 unsigned getLgkmcntBitShift() { return 8; }
85 
86 /// \returns Lgkmcnt bit width.
87 unsigned getLgkmcntBitWidth() { return 4; }
88 
89 /// \returns Vmcnt bit shift (higher bits).
90 unsigned getVmcntBitShiftHi() { return 14; }
91 
92 /// \returns Vmcnt bit width (higher bits).
93 unsigned getVmcntBitWidthHi() { return 2; }
94 
95 } // end namespace anonymous
96 
97 namespace llvm {
98 
99 namespace AMDGPU {
100 
101 struct MIMGInfo {
102  uint16_t Opcode;
103  uint16_t BaseOpcode;
104  uint8_t MIMGEncoding;
105  uint8_t VDataDwords;
106  uint8_t VAddrDwords;
107 };
108 
109 #define GET_MIMGBaseOpcodesTable_IMPL
110 #define GET_MIMGDimInfoTable_IMPL
111 #define GET_MIMGInfoTable_IMPL
112 #define GET_MIMGLZMappingTable_IMPL
113 #include "AMDGPUGenSearchableTables.inc"
114 
115 int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding,
116  unsigned VDataDwords, unsigned VAddrDwords) {
117  const MIMGInfo *Info = getMIMGOpcodeHelper(BaseOpcode, MIMGEncoding,
118  VDataDwords, VAddrDwords);
119  return Info ? Info->Opcode : -1;
120 }
121 
122 int getMaskedMIMGOp(unsigned Opc, unsigned NewChannels) {
123  const MIMGInfo *OrigInfo = getMIMGInfo(Opc);
124  const MIMGInfo *NewInfo =
125  getMIMGOpcodeHelper(OrigInfo->BaseOpcode, OrigInfo->MIMGEncoding,
126  NewChannels, OrigInfo->VAddrDwords);
127  return NewInfo ? NewInfo->Opcode : -1;
128 }
129 
130 struct MUBUFInfo {
131  uint16_t Opcode;
132  uint16_t BaseOpcode;
133  uint8_t dwords;
134  bool has_vaddr;
135  bool has_srsrc;
137 };
138 
139 #define GET_MUBUFInfoTable_DECL
140 #define GET_MUBUFInfoTable_IMPL
141 #include "AMDGPUGenSearchableTables.inc"
142 
143 int getMUBUFBaseOpcode(unsigned Opc) {
144  const MUBUFInfo *Info = getMUBUFInfoFromOpcode(Opc);
145  return Info ? Info->BaseOpcode : -1;
146 }
147 
148 int getMUBUFOpcode(unsigned BaseOpc, unsigned Dwords) {
149  const MUBUFInfo *Info = getMUBUFInfoFromBaseOpcodeAndDwords(BaseOpc, Dwords);
150  return Info ? Info->Opcode : -1;
151 }
152 
153 int getMUBUFDwords(unsigned Opc) {
154  const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc);
155  return Info ? Info->dwords : 0;
156 }
157 
158 bool getMUBUFHasVAddr(unsigned Opc) {
159  const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc);
160  return Info ? Info->has_vaddr : false;
161 }
162 
163 bool getMUBUFHasSrsrc(unsigned Opc) {
164  const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc);
165  return Info ? Info->has_srsrc : false;
166 }
167 
168 bool getMUBUFHasSoffset(unsigned Opc) {
169  const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc);
170  return Info ? Info->has_soffset : false;
171 }
172 
173 // Wrapper for Tablegen'd function. enum Subtarget is not defined in any
174 // header files, so we need to wrap it in a function that takes unsigned
175 // instead.
176 int getMCOpcode(uint16_t Opcode, unsigned Gen) {
177  return getMCOpcodeGen(Opcode, static_cast<Subtarget>(Gen));
178 }
179 
180 namespace IsaInfo {
181 
182 void streamIsaVersion(const MCSubtargetInfo *STI, raw_ostream &Stream) {
183  auto TargetTriple = STI->getTargetTriple();
184  auto Version = getIsaVersion(STI->getCPU());
185 
186  Stream << TargetTriple.getArchName() << '-'
187  << TargetTriple.getVendorName() << '-'
188  << TargetTriple.getOSName() << '-'
189  << TargetTriple.getEnvironmentName() << '-'
190  << "gfx"
191  << Version.Major
192  << Version.Minor
193  << Version.Stepping;
194 
195  if (hasXNACK(*STI))
196  Stream << "+xnack";
197  if (hasSRAMECC(*STI))
198  Stream << "+sram-ecc";
199 
200  Stream.flush();
201 }
202 
204  return STI->getTargetTriple().getOS() == Triple::AMDHSA &&
205  STI->getFeatureBits().test(FeatureCodeObjectV3);
206 }
207 
208 unsigned getWavefrontSize(const MCSubtargetInfo *STI) {
209  if (STI->getFeatureBits().test(FeatureWavefrontSize16))
210  return 16;
211  if (STI->getFeatureBits().test(FeatureWavefrontSize32))
212  return 32;
213 
214  return 64;
215 }
216 
217 unsigned getLocalMemorySize(const MCSubtargetInfo *STI) {
218  if (STI->getFeatureBits().test(FeatureLocalMemorySize32768))
219  return 32768;
220  if (STI->getFeatureBits().test(FeatureLocalMemorySize65536))
221  return 65536;
222 
223  return 0;
224 }
225 
226 unsigned getEUsPerCU(const MCSubtargetInfo *STI) {
227  return 4;
228 }
229 
231  unsigned FlatWorkGroupSize) {
232  if (!STI->getFeatureBits().test(FeatureGCN))
233  return 8;
234  unsigned N = getWavesPerWorkGroup(STI, FlatWorkGroupSize);
235  if (N == 1)
236  return 40;
237  N = 40 / N;
238  return std::min(N, 16u);
239 }
240 
241 unsigned getMaxWavesPerCU(const MCSubtargetInfo *STI) {
242  return getMaxWavesPerEU() * getEUsPerCU(STI);
243 }
244 
245 unsigned getMaxWavesPerCU(const MCSubtargetInfo *STI,
246  unsigned FlatWorkGroupSize) {
247  return getWavesPerWorkGroup(STI, FlatWorkGroupSize);
248 }
249 
250 unsigned getMinWavesPerEU(const MCSubtargetInfo *STI) {
251  return 1;
252 }
253 
254 unsigned getMaxWavesPerEU() {
255  // FIXME: Need to take scratch memory into account.
256  return 10;
257 }
258 
259 unsigned getMaxWavesPerEU(const MCSubtargetInfo *STI,
260  unsigned FlatWorkGroupSize) {
261  return alignTo(getMaxWavesPerCU(STI, FlatWorkGroupSize),
262  getEUsPerCU(STI)) / getEUsPerCU(STI);
263 }
264 
266  return 1;
267 }
268 
270  return 2048;
271 }
272 
274  unsigned FlatWorkGroupSize) {
275  return alignTo(FlatWorkGroupSize, getWavefrontSize(STI)) /
276  getWavefrontSize(STI);
277 }
278 
279 unsigned getSGPRAllocGranule(const MCSubtargetInfo *STI) {
281  if (Version.Major >= 8)
282  return 16;
283  return 8;
284 }
285 
287  return 8;
288 }
289 
290 unsigned getTotalNumSGPRs(const MCSubtargetInfo *STI) {
292  if (Version.Major >= 8)
293  return 800;
294  return 512;
295 }
296 
298  if (STI->getFeatureBits().test(FeatureSGPRInitBug))
300 
302  if (Version.Major >= 8)
303  return 102;
304  return 104;
305 }
306 
307 unsigned getMinNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU) {
308  assert(WavesPerEU != 0);
309 
310  if (WavesPerEU >= getMaxWavesPerEU())
311  return 0;
312 
313  unsigned MinNumSGPRs = getTotalNumSGPRs(STI) / (WavesPerEU + 1);
314  if (STI->getFeatureBits().test(FeatureTrapHandler))
315  MinNumSGPRs -= std::min(MinNumSGPRs, (unsigned)TRAP_NUM_SGPRS);
316  MinNumSGPRs = alignDown(MinNumSGPRs, getSGPRAllocGranule(STI)) + 1;
317  return std::min(MinNumSGPRs, getAddressableNumSGPRs(STI));
318 }
319 
320 unsigned getMaxNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU,
321  bool Addressable) {
322  assert(WavesPerEU != 0);
323 
325  unsigned AddressableNumSGPRs = getAddressableNumSGPRs(STI);
326  if (Version.Major >= 8 && !Addressable)
327  AddressableNumSGPRs = 112;
328  unsigned MaxNumSGPRs = getTotalNumSGPRs(STI) / WavesPerEU;
329  if (STI->getFeatureBits().test(FeatureTrapHandler))
330  MaxNumSGPRs -= std::min(MaxNumSGPRs, (unsigned)TRAP_NUM_SGPRS);
331  MaxNumSGPRs = alignDown(MaxNumSGPRs, getSGPRAllocGranule(STI));
332  return std::min(MaxNumSGPRs, AddressableNumSGPRs);
333 }
334 
335 unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed,
336  bool FlatScrUsed, bool XNACKUsed) {
337  unsigned ExtraSGPRs = 0;
338  if (VCCUsed)
339  ExtraSGPRs = 2;
340 
342  if (Version.Major < 8) {
343  if (FlatScrUsed)
344  ExtraSGPRs = 4;
345  } else {
346  if (XNACKUsed)
347  ExtraSGPRs = 4;
348 
349  if (FlatScrUsed)
350  ExtraSGPRs = 6;
351  }
352 
353  return ExtraSGPRs;
354 }
355 
356 unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed,
357  bool FlatScrUsed) {
358  return getNumExtraSGPRs(STI, VCCUsed, FlatScrUsed,
359  STI->getFeatureBits().test(AMDGPU::FeatureXNACK));
360 }
361 
362 unsigned getNumSGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs) {
363  NumSGPRs = alignTo(std::max(1u, NumSGPRs), getSGPREncodingGranule(STI));
364  // SGPRBlocks is actual number of SGPR blocks minus 1.
365  return NumSGPRs / getSGPREncodingGranule(STI) - 1;
366 }
367 
368 unsigned getVGPRAllocGranule(const MCSubtargetInfo *STI) {
369  return 4;
370 }
371 
373  return getVGPRAllocGranule(STI);
374 }
375 
376 unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI) {
377  return 256;
378 }
379 
381  return getTotalNumVGPRs(STI);
382 }
383 
384 unsigned getMinNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU) {
385  assert(WavesPerEU != 0);
386 
387  if (WavesPerEU >= getMaxWavesPerEU())
388  return 0;
389  unsigned MinNumVGPRs =
390  alignDown(getTotalNumVGPRs(STI) / (WavesPerEU + 1),
391  getVGPRAllocGranule(STI)) + 1;
392  return std::min(MinNumVGPRs, getAddressableNumVGPRs(STI));
393 }
394 
395 unsigned getMaxNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU) {
396  assert(WavesPerEU != 0);
397 
398  unsigned MaxNumVGPRs = alignDown(getTotalNumVGPRs(STI) / WavesPerEU,
399  getVGPRAllocGranule(STI));
400  unsigned AddressableNumVGPRs = getAddressableNumVGPRs(STI);
401  return std::min(MaxNumVGPRs, AddressableNumVGPRs);
402 }
403 
404 unsigned getNumVGPRBlocks(const MCSubtargetInfo *STI, unsigned NumVGPRs) {
405  NumVGPRs = alignTo(std::max(1u, NumVGPRs), getVGPREncodingGranule(STI));
406  // VGPRBlocks is actual number of VGPR blocks minus 1.
407  return NumVGPRs / getVGPREncodingGranule(STI) - 1;
408 }
409 
410 } // end namespace IsaInfo
411 
413  const MCSubtargetInfo *STI) {
415 
416  memset(&Header, 0, sizeof(Header));
417 
420  Header.amd_machine_kind = 1; // AMD_MACHINE_KIND_AMDGPU
421  Header.amd_machine_version_major = Version.Major;
422  Header.amd_machine_version_minor = Version.Minor;
423  Header.amd_machine_version_stepping = Version.Stepping;
424  Header.kernel_code_entry_byte_offset = sizeof(Header);
425  // wavefront_size is specified as a power of 2: 2^6 = 64 threads.
426  Header.wavefront_size = 6;
427 
428  // If the code object does not support indirect functions, then the value must
429  // be 0xffffffff.
430  Header.call_convention = -1;
431 
432  // These alignment values are specified in powers of two, so alignment =
433  // 2^n. The minimum alignment is 2^4 = 16.
434  Header.kernarg_segment_alignment = 4;
435  Header.group_segment_alignment = 4;
436  Header.private_segment_alignment = 4;
437 }
438 
441  memset(&KD, 0, sizeof(KD));
443  amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64,
446  amdhsa::COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, 1);
448  amdhsa::COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE, 1);
450  amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, 1);
451  return KD;
452 }
453 
454 bool isGroupSegment(const GlobalValue *GV) {
456 }
457 
458 bool isGlobalSegment(const GlobalValue *GV) {
460 }
461 
465 }
466 
468  return TT.getOS() != Triple::AMDHSA;
469 }
470 
471 int getIntegerAttribute(const Function &F, StringRef Name, int Default) {
472  Attribute A = F.getFnAttribute(Name);
473  int Result = Default;
474 
475  if (A.isStringAttribute()) {
476  StringRef Str = A.getValueAsString();
477  if (Str.getAsInteger(0, Result)) {
478  LLVMContext &Ctx = F.getContext();
479  Ctx.emitError("can't parse integer attribute " + Name);
480  }
481  }
482 
483  return Result;
484 }
485 
486 std::pair<int, int> getIntegerPairAttribute(const Function &F,
487  StringRef Name,
488  std::pair<int, int> Default,
489  bool OnlyFirstRequired) {
490  Attribute A = F.getFnAttribute(Name);
491  if (!A.isStringAttribute())
492  return Default;
493 
494  LLVMContext &Ctx = F.getContext();
495  std::pair<int, int> Ints = Default;
496  std::pair<StringRef, StringRef> Strs = A.getValueAsString().split(',');
497  if (Strs.first.trim().getAsInteger(0, Ints.first)) {
498  Ctx.emitError("can't parse first integer attribute " + Name);
499  return Default;
500  }
501  if (Strs.second.trim().getAsInteger(0, Ints.second)) {
502  if (!OnlyFirstRequired || !Strs.second.trim().empty()) {
503  Ctx.emitError("can't parse second integer attribute " + Name);
504  return Default;
505  }
506  }
507 
508  return Ints;
509 }
510 
512  unsigned VmcntLo = (1 << getVmcntBitWidthLo()) - 1;
513  if (Version.Major < 9)
514  return VmcntLo;
515 
516  unsigned VmcntHi = ((1 << getVmcntBitWidthHi()) - 1) << getVmcntBitWidthLo();
517  return VmcntLo | VmcntHi;
518 }
519 
521  return (1 << getExpcntBitWidth()) - 1;
522 }
523 
525  return (1 << getLgkmcntBitWidth()) - 1;
526 }
527 
529  unsigned VmcntLo = getBitMask(getVmcntBitShiftLo(), getVmcntBitWidthLo());
530  unsigned Expcnt = getBitMask(getExpcntBitShift(), getExpcntBitWidth());
531  unsigned Lgkmcnt = getBitMask(getLgkmcntBitShift(), getLgkmcntBitWidth());
532  unsigned Waitcnt = VmcntLo | Expcnt | Lgkmcnt;
533  if (Version.Major < 9)
534  return Waitcnt;
535 
536  unsigned VmcntHi = getBitMask(getVmcntBitShiftHi(), getVmcntBitWidthHi());
537  return Waitcnt | VmcntHi;
538 }
539 
540 unsigned decodeVmcnt(const IsaVersion &Version, unsigned Waitcnt) {
541  unsigned VmcntLo =
542  unpackBits(Waitcnt, getVmcntBitShiftLo(), getVmcntBitWidthLo());
543  if (Version.Major < 9)
544  return VmcntLo;
545 
546  unsigned VmcntHi =
547  unpackBits(Waitcnt, getVmcntBitShiftHi(), getVmcntBitWidthHi());
548  VmcntHi <<= getVmcntBitWidthLo();
549  return VmcntLo | VmcntHi;
550 }
551 
552 unsigned decodeExpcnt(const IsaVersion &Version, unsigned Waitcnt) {
553  return unpackBits(Waitcnt, getExpcntBitShift(), getExpcntBitWidth());
554 }
555 
556 unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt) {
557  return unpackBits(Waitcnt, getLgkmcntBitShift(), getLgkmcntBitWidth());
558 }
559 
560 void decodeWaitcnt(const IsaVersion &Version, unsigned Waitcnt,
561  unsigned &Vmcnt, unsigned &Expcnt, unsigned &Lgkmcnt) {
562  Vmcnt = decodeVmcnt(Version, Waitcnt);
563  Expcnt = decodeExpcnt(Version, Waitcnt);
564  Lgkmcnt = decodeLgkmcnt(Version, Waitcnt);
565 }
566 
567 Waitcnt decodeWaitcnt(const IsaVersion &Version, unsigned Encoded) {
568  Waitcnt Decoded;
569  Decoded.VmCnt = decodeVmcnt(Version, Encoded);
570  Decoded.ExpCnt = decodeExpcnt(Version, Encoded);
571  Decoded.LgkmCnt = decodeLgkmcnt(Version, Encoded);
572  return Decoded;
573 }
574 
575 unsigned encodeVmcnt(const IsaVersion &Version, unsigned Waitcnt,
576  unsigned Vmcnt) {
577  Waitcnt =
578  packBits(Vmcnt, Waitcnt, getVmcntBitShiftLo(), getVmcntBitWidthLo());
579  if (Version.Major < 9)
580  return Waitcnt;
581 
582  Vmcnt >>= getVmcntBitWidthLo();
583  return packBits(Vmcnt, Waitcnt, getVmcntBitShiftHi(), getVmcntBitWidthHi());
584 }
585 
586 unsigned encodeExpcnt(const IsaVersion &Version, unsigned Waitcnt,
587  unsigned Expcnt) {
588  return packBits(Expcnt, Waitcnt, getExpcntBitShift(), getExpcntBitWidth());
589 }
590 
591 unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt,
592  unsigned Lgkmcnt) {
593  return packBits(Lgkmcnt, Waitcnt, getLgkmcntBitShift(), getLgkmcntBitWidth());
594 }
595 
597  unsigned Vmcnt, unsigned Expcnt, unsigned Lgkmcnt) {
598  unsigned Waitcnt = getWaitcntBitMask(Version);
599  Waitcnt = encodeVmcnt(Version, Waitcnt, Vmcnt);
600  Waitcnt = encodeExpcnt(Version, Waitcnt, Expcnt);
601  Waitcnt = encodeLgkmcnt(Version, Waitcnt, Lgkmcnt);
602  return Waitcnt;
603 }
604 
605 unsigned encodeWaitcnt(const IsaVersion &Version, const Waitcnt &Decoded) {
606  return encodeWaitcnt(Version, Decoded.VmCnt, Decoded.ExpCnt, Decoded.LgkmCnt);
607 }
608 
609 unsigned getInitialPSInputAddr(const Function &F) {
610  return getIntegerAttribute(F, "InitialPSInputAddr", 0);
611 }
612 
614  switch(cc) {
622  return true;
623  default:
624  return false;
625  }
626 }
627 
629  return !isShader(cc) || cc == CallingConv::AMDGPU_CS;
630 }
631 
633  switch (CC) {
643  return true;
644  default:
645  return false;
646  }
647 }
648 
649 bool hasXNACK(const MCSubtargetInfo &STI) {
650  return STI.getFeatureBits()[AMDGPU::FeatureXNACK];
651 }
652 
653 bool hasSRAMECC(const MCSubtargetInfo &STI) {
654  return STI.getFeatureBits()[AMDGPU::FeatureSRAMECC];
655 }
656 
657 bool hasMIMG_R128(const MCSubtargetInfo &STI) {
658  return STI.getFeatureBits()[AMDGPU::FeatureMIMG_R128];
659 }
660 
661 bool hasPackedD16(const MCSubtargetInfo &STI) {
662  return !STI.getFeatureBits()[AMDGPU::FeatureUnpackedD16VMem];
663 }
664 
665 bool isSI(const MCSubtargetInfo &STI) {
666  return STI.getFeatureBits()[AMDGPU::FeatureSouthernIslands];
667 }
668 
669 bool isCI(const MCSubtargetInfo &STI) {
670  return STI.getFeatureBits()[AMDGPU::FeatureSeaIslands];
671 }
672 
673 bool isVI(const MCSubtargetInfo &STI) {
674  return STI.getFeatureBits()[AMDGPU::FeatureVolcanicIslands];
675 }
676 
677 bool isGFX9(const MCSubtargetInfo &STI) {
678  return STI.getFeatureBits()[AMDGPU::FeatureGFX9];
679 }
680 
681 bool isGCN3Encoding(const MCSubtargetInfo &STI) {
682  return STI.getFeatureBits()[AMDGPU::FeatureGCN3Encoding];
683 }
684 
685 bool isSGPR(unsigned Reg, const MCRegisterInfo* TRI) {
686  const MCRegisterClass SGPRClass = TRI->getRegClass(AMDGPU::SReg_32RegClassID);
687  const unsigned FirstSubReg = TRI->getSubReg(Reg, 1);
688  return SGPRClass.contains(FirstSubReg != 0 ? FirstSubReg : Reg) ||
689  Reg == AMDGPU::SCC;
690 }
691 
692 bool isRegIntersect(unsigned Reg0, unsigned Reg1, const MCRegisterInfo* TRI) {
693  for (MCRegAliasIterator R(Reg0, TRI, true); R.isValid(); ++R) {
694  if (*R == Reg1) return true;
695  }
696  return false;
697 }
698 
699 #define MAP_REG2REG \
700  using namespace AMDGPU; \
701  switch(Reg) { \
702  default: return Reg; \
703  CASE_CI_VI(FLAT_SCR) \
704  CASE_CI_VI(FLAT_SCR_LO) \
705  CASE_CI_VI(FLAT_SCR_HI) \
706  CASE_VI_GFX9(TTMP0) \
707  CASE_VI_GFX9(TTMP1) \
708  CASE_VI_GFX9(TTMP2) \
709  CASE_VI_GFX9(TTMP3) \
710  CASE_VI_GFX9(TTMP4) \
711  CASE_VI_GFX9(TTMP5) \
712  CASE_VI_GFX9(TTMP6) \
713  CASE_VI_GFX9(TTMP7) \
714  CASE_VI_GFX9(TTMP8) \
715  CASE_VI_GFX9(TTMP9) \
716  CASE_VI_GFX9(TTMP10) \
717  CASE_VI_GFX9(TTMP11) \
718  CASE_VI_GFX9(TTMP12) \
719  CASE_VI_GFX9(TTMP13) \
720  CASE_VI_GFX9(TTMP14) \
721  CASE_VI_GFX9(TTMP15) \
722  CASE_VI_GFX9(TTMP0_TTMP1) \
723  CASE_VI_GFX9(TTMP2_TTMP3) \
724  CASE_VI_GFX9(TTMP4_TTMP5) \
725  CASE_VI_GFX9(TTMP6_TTMP7) \
726  CASE_VI_GFX9(TTMP8_TTMP9) \
727  CASE_VI_GFX9(TTMP10_TTMP11) \
728  CASE_VI_GFX9(TTMP12_TTMP13) \
729  CASE_VI_GFX9(TTMP14_TTMP15) \
730  CASE_VI_GFX9(TTMP0_TTMP1_TTMP2_TTMP3) \
731  CASE_VI_GFX9(TTMP4_TTMP5_TTMP6_TTMP7) \
732  CASE_VI_GFX9(TTMP8_TTMP9_TTMP10_TTMP11) \
733  CASE_VI_GFX9(TTMP12_TTMP13_TTMP14_TTMP15) \
734  CASE_VI_GFX9(TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7) \
735  CASE_VI_GFX9(TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11) \
736  CASE_VI_GFX9(TTMP8_TTMP9_TTMP10_TTMP11_TTMP12_TTMP13_TTMP14_TTMP15) \
737  CASE_VI_GFX9(TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11_TTMP12_TTMP13_TTMP14_TTMP15) \
738  }
739 
740 #define CASE_CI_VI(node) \
741  assert(!isSI(STI)); \
742  case node: return isCI(STI) ? node##_ci : node##_vi;
743 
744 #define CASE_VI_GFX9(node) \
745  case node: return isGFX9(STI) ? node##_gfx9 : node##_vi;
746 
747 unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI) {
748  if (STI.getTargetTriple().getArch() == Triple::r600)
749  return Reg;
751 }
752 
753 #undef CASE_CI_VI
754 #undef CASE_VI_GFX9
755 
756 #define CASE_CI_VI(node) case node##_ci: case node##_vi: return node;
757 #define CASE_VI_GFX9(node) case node##_vi: case node##_gfx9: return node;
758 
759 unsigned mc2PseudoReg(unsigned Reg) {
761 }
762 
763 #undef CASE_CI_VI
764 #undef CASE_VI_GFX9
765 #undef MAP_REG2REG
766 
767 bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo) {
768  assert(OpNo < Desc.NumOperands);
769  unsigned OpType = Desc.OpInfo[OpNo].OperandType;
770  return OpType >= AMDGPU::OPERAND_SRC_FIRST &&
771  OpType <= AMDGPU::OPERAND_SRC_LAST;
772 }
773 
774 bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo) {
775  assert(OpNo < Desc.NumOperands);
776  unsigned OpType = Desc.OpInfo[OpNo].OperandType;
777  switch (OpType) {
785  return true;
786  default:
787  return false;
788  }
789 }
790 
791 bool isSISrcInlinableOperand(const MCInstrDesc &Desc, unsigned OpNo) {
792  assert(OpNo < Desc.NumOperands);
793  unsigned OpType = Desc.OpInfo[OpNo].OperandType;
794  return OpType >= AMDGPU::OPERAND_REG_INLINE_C_FIRST &&
796 }
797 
798 // Avoid using MCRegisterClass::getSize, since that function will go away
799 // (move from MC* level to Target* level). Return size in bits.
800 unsigned getRegBitWidth(unsigned RCID) {
801  switch (RCID) {
802  case AMDGPU::SGPR_32RegClassID:
803  case AMDGPU::VGPR_32RegClassID:
804  case AMDGPU::VS_32RegClassID:
805  case AMDGPU::SReg_32RegClassID:
806  case AMDGPU::SReg_32_XM0RegClassID:
807  return 32;
808  case AMDGPU::SGPR_64RegClassID:
809  case AMDGPU::VS_64RegClassID:
810  case AMDGPU::SReg_64RegClassID:
811  case AMDGPU::VReg_64RegClassID:
812  case AMDGPU::SReg_64_XEXECRegClassID:
813  return 64;
814  case AMDGPU::VReg_96RegClassID:
815  return 96;
816  case AMDGPU::SGPR_128RegClassID:
817  case AMDGPU::SReg_128RegClassID:
818  case AMDGPU::VReg_128RegClassID:
819  return 128;
820  case AMDGPU::SReg_256RegClassID:
821  case AMDGPU::VReg_256RegClassID:
822  return 256;
823  case AMDGPU::SReg_512RegClassID:
824  case AMDGPU::VReg_512RegClassID:
825  return 512;
826  default:
827  llvm_unreachable("Unexpected register class");
828  }
829 }
830 
831 unsigned getRegBitWidth(const MCRegisterClass &RC) {
832  return getRegBitWidth(RC.getID());
833 }
834 
835 unsigned getRegOperandSize(const MCRegisterInfo *MRI, const MCInstrDesc &Desc,
836  unsigned OpNo) {
837  assert(OpNo < Desc.NumOperands);
838  unsigned RCID = Desc.OpInfo[OpNo].RegClass;
839  return getRegBitWidth(MRI->getRegClass(RCID)) / 8;
840 }
841 
842 bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi) {
843  if (Literal >= -16 && Literal <= 64)
844  return true;
845 
846  uint64_t Val = static_cast<uint64_t>(Literal);
847  return (Val == DoubleToBits(0.0)) ||
848  (Val == DoubleToBits(1.0)) ||
849  (Val == DoubleToBits(-1.0)) ||
850  (Val == DoubleToBits(0.5)) ||
851  (Val == DoubleToBits(-0.5)) ||
852  (Val == DoubleToBits(2.0)) ||
853  (Val == DoubleToBits(-2.0)) ||
854  (Val == DoubleToBits(4.0)) ||
855  (Val == DoubleToBits(-4.0)) ||
856  (Val == 0x3fc45f306dc9c882 && HasInv2Pi);
857 }
858 
859 bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi) {
860  if (Literal >= -16 && Literal <= 64)
861  return true;
862 
863  // The actual type of the operand does not seem to matter as long
864  // as the bits match one of the inline immediate values. For example:
865  //
866  // -nan has the hexadecimal encoding of 0xfffffffe which is -2 in decimal,
867  // so it is a legal inline immediate.
868  //
869  // 1065353216 has the hexadecimal encoding 0x3f800000 which is 1.0f in
870  // floating-point, so it is a legal inline immediate.
871 
872  uint32_t Val = static_cast<uint32_t>(Literal);
873  return (Val == FloatToBits(0.0f)) ||
874  (Val == FloatToBits(1.0f)) ||
875  (Val == FloatToBits(-1.0f)) ||
876  (Val == FloatToBits(0.5f)) ||
877  (Val == FloatToBits(-0.5f)) ||
878  (Val == FloatToBits(2.0f)) ||
879  (Val == FloatToBits(-2.0f)) ||
880  (Val == FloatToBits(4.0f)) ||
881  (Val == FloatToBits(-4.0f)) ||
882  (Val == 0x3e22f983 && HasInv2Pi);
883 }
884 
885 bool isInlinableLiteral16(int16_t Literal, bool HasInv2Pi) {
886  if (!HasInv2Pi)
887  return false;
888 
889  if (Literal >= -16 && Literal <= 64)
890  return true;
891 
892  uint16_t Val = static_cast<uint16_t>(Literal);
893  return Val == 0x3C00 || // 1.0
894  Val == 0xBC00 || // -1.0
895  Val == 0x3800 || // 0.5
896  Val == 0xB800 || // -0.5
897  Val == 0x4000 || // 2.0
898  Val == 0xC000 || // -2.0
899  Val == 0x4400 || // 4.0
900  Val == 0xC400 || // -4.0
901  Val == 0x3118; // 1/2pi
902 }
903 
904 bool isInlinableLiteralV216(int32_t Literal, bool HasInv2Pi) {
905  assert(HasInv2Pi);
906 
907  int16_t Lo16 = static_cast<int16_t>(Literal);
908  int16_t Hi16 = static_cast<int16_t>(Literal >> 16);
909  return Lo16 == Hi16 && isInlinableLiteral16(Lo16, HasInv2Pi);
910 }
911 
912 bool isArgPassedInSGPR(const Argument *A) {
913  const Function *F = A->getParent();
914 
915  // Arguments to compute shaders are never a source of divergence.
917  switch (CC) {
920  return true;
928  // For non-compute shaders, SGPR inputs are marked with either inreg or byval.
929  // Everything else is in VGPRs.
930  return F->getAttributes().hasParamAttribute(A->getArgNo(), Attribute::InReg) ||
931  F->getAttributes().hasParamAttribute(A->getArgNo(), Attribute::ByVal);
932  default:
933  // TODO: Should calls support inreg for SGPR inputs?
934  return false;
935  }
936 }
937 
938 int64_t getSMRDEncodedOffset(const MCSubtargetInfo &ST, int64_t ByteOffset) {
939  if (isGCN3Encoding(ST))
940  return ByteOffset;
941  return ByteOffset >> 2;
942 }
943 
944 bool isLegalSMRDImmOffset(const MCSubtargetInfo &ST, int64_t ByteOffset) {
945  int64_t EncodedOffset = getSMRDEncodedOffset(ST, ByteOffset);
946  return isGCN3Encoding(ST) ?
947  isUInt<20>(EncodedOffset) : isUInt<8>(EncodedOffset);
948 }
949 
950 // Given Imm, split it into the values to put into the SOffset and ImmOffset
951 // fields in an MUBUF instruction. Return false if it is not possible (due to a
952 // hardware bug needing a workaround).
953 //
954 // The required alignment ensures that individual address components remain
955 // aligned if they are aligned to begin with. It also ensures that additional
956 // offsets within the given alignment can be added to the resulting ImmOffset.
957 bool splitMUBUFOffset(uint32_t Imm, uint32_t &SOffset, uint32_t &ImmOffset,
958  const GCNSubtarget *Subtarget, uint32_t Align) {
959  const uint32_t MaxImm = alignDown(4095, Align);
960  uint32_t Overflow = 0;
961 
962  if (Imm > MaxImm) {
963  if (Imm <= MaxImm + 64) {
964  // Use an SOffset inline constant for 4..64
965  Overflow = Imm - MaxImm;
966  Imm = MaxImm;
967  } else {
968  // Try to keep the same value in SOffset for adjacent loads, so that
969  // the corresponding register contents can be re-used.
970  //
971  // Load values with all low-bits (except for alignment bits) set into
972  // SOffset, so that a larger range of values can be covered using
973  // s_movk_i32.
974  //
975  // Atomic operations fail to work correctly when individual address
976  // components are unaligned, even if their sum is aligned.
977  uint32_t High = (Imm + Align) & ~4095;
978  uint32_t Low = (Imm + Align) & 4095;
979  Imm = Low;
980  Overflow = High - Align;
981  }
982  }
983 
984  // There is a hardware bug in SI and CI which prevents address clamping in
985  // MUBUF instructions from working correctly with SOffsets. The immediate
986  // offset is unaffected.
987  if (Overflow > 0 &&
989  return false;
990 
991  ImmOffset = Imm;
992  SOffset = Overflow;
993  return true;
994 }
995 
996 namespace {
997 
998 struct SourceOfDivergence {
999  unsigned Intr;
1000 };
1001 const SourceOfDivergence *lookupSourceOfDivergence(unsigned Intr);
1002 
1003 #define GET_SourcesOfDivergence_IMPL
1004 #include "AMDGPUGenSearchableTables.inc"
1005 
1006 } // end anonymous namespace
1007 
1008 bool isIntrinsicSourceOfDivergence(unsigned IntrID) {
1009  return lookupSourceOfDivergence(IntrID);
1010 }
1011 } // namespace AMDGPU
1012 } // namespace llvm
int getMUBUFOpcode(unsigned BaseOpc, unsigned Dwords)
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
unsigned mc2PseudoReg(unsigned Reg)
Convert hardware register Reg to a pseudo register.
This file a TargetTransformInfo::Concept conforming object specific to the AMDGPU target machine...
bool hasPackedD16(const MCSubtargetInfo &STI)
GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2)
This class represents an incoming formal argument to a Function.
Definition: Argument.h:29
bool getMUBUFHasSrsrc(unsigned Opc)
This class represents lattice values for constants.
Definition: AllocatorList.h:23
unsigned getMaxWorkGroupsPerCU(const MCSubtargetInfo *STI, unsigned FlatWorkGroupSize)
unsigned getExpcntBitMask(const IsaVersion &Version)
OSType getOS() const
getOS - Get the parsed operating system type of this triple.
Definition: Triple.h:298
Represents the counter values to wait for in an s_waitcnt instruction.
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:163
unsigned Reg
unsigned getRegBitWidth(unsigned RCID)
Get the size in bits of a register from the register class RC.
Address space for 32-bit constant memory.
Definition: AMDGPU.h:262
unsigned getMaxWavesPerCU(const MCSubtargetInfo *STI, unsigned FlatWorkGroupSize)
Instruction set architecture version.
Definition: TargetParser.h:131
unsigned decodeVmcnt(const IsaVersion &Version, unsigned Waitcnt)
unsigned const TargetRegisterInfo * TRI
F(f)
uint64_t alignTo(uint64_t Value, uint64_t Align, uint64_t Skew=0)
Returns the next integer (mod 2**64) that is greater than or equal to Value and is a multiple of Alig...
Definition: MathExtras.h:684
bool isInlinableLiteralV216(int32_t Literal, bool HasInv2Pi)
unsigned encodeVmcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Vmcnt)
int getMUBUFDwords(unsigned Opc)
unsigned decodeExpcnt(const IsaVersion &Version, unsigned Waitcnt)
uint64_t High
unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI)
std::pair< int, int > getIntegerPairAttribute(const Function &F, StringRef Name, std::pair< int, int > Default, bool OnlyFirstRequired)
const Triple & getTargetTriple() const
bool isGlobalSegment(const GlobalValue *GV)
bool isInlinableLiteral16(int16_t Literal, bool HasInv2Pi)
amdhsa::kernel_descriptor_t getDefaultAmdhsaKernelDescriptor()
unsigned getID() const
getID() - Return the register class ID number.
uint32_t amd_kernel_code_version_major
Calling convention used for Mesa/AMDPAL geometry shaders.
Definition: CallingConv.h:191
uint64_t alignDown(uint64_t Value, uint64_t Align, uint64_t Skew=0)
Returns the largest uint64_t less than or equal to Value and is Skew mod Align.
Definition: MathExtras.h:717
amdgpu Simplify well known AMD library false Value Value const Twine & Name
unsigned getWavesPerWorkGroup(const MCSubtargetInfo *STI, unsigned FlatWorkGroupSize)
Address space for constant memory (VTX2)
Definition: AMDGPU.h:258
Calling convention used for Mesa/AMDPAL compute shaders.
Definition: CallingConv.h:197
bool isRegIntersect(unsigned Reg0, unsigned Reg1, const MCRegisterInfo *TRI)
Is there any intersection between registers.
bool isStringAttribute() const
Return true if the attribute is a string (target-dependent) attribute.
Definition: Attributes.cpp:169
bool contains(unsigned Reg) const
contains - Return true if the specified register is included in this register class.
const FeatureBitset & getFeatureBits() const
AMD Kernel Code Object (amd_kernel_code_t).
unsigned getLocalMemorySize(const MCSubtargetInfo *STI)
SPIR_KERNEL - Calling convention for SPIR kernel functions.
Definition: CallingConv.h:136
unsigned getRegOperandSize(const MCRegisterInfo *MRI, const MCInstrDesc &Desc, unsigned OpNo)
Get size of register operand.
uint16_t amd_machine_version_major
unsigned getNumVGPRBlocks(const MCSubtargetInfo *STI, unsigned NumVGPRs)
This file contains the simple types necessary to represent the attributes associated with functions a...
unsigned Intr
int getMaskedMIMGOp(unsigned Opc, unsigned NewChannels)
unsigned getMaxNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU, bool Addressable)
bool hasCodeObjectV3(const MCSubtargetInfo *STI)
unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI)
int getMCOpcode(uint16_t Opcode, unsigned Gen)
void emitError(unsigned LocCookie, const Twine &ErrorStr)
emitError - Emit an error message to the currently installed error handler with optional location inf...
uint8_t OperandType
Information about the type of the operand.
Definition: MCInstrDesc.h:78
unsigned getMinWavesPerEU(const MCSubtargetInfo *STI)
uint8_t kernarg_segment_alignment
The maximum byte alignment of variables used by the kernel in the specified memory segment...
Calling convention used for AMDPAL shader stage before geometry shader if geometry is in use...
Definition: CallingConv.h:220
unsigned getVGPRAllocGranule(const MCSubtargetInfo *STI)
unsigned getMinFlatWorkGroupSize(const MCSubtargetInfo *STI)
bool splitMUBUFOffset(uint32_t Imm, uint32_t &SOffset, uint32_t &ImmOffset, const GCNSubtarget *Subtarget, uint32_t Align)
ArchType getArch() const
getArch - Get the parsed architecture type of this triple.
Definition: Triple.h:289
uint8_t group_segment_alignment
bool isGroupSegment(const GlobalValue *GV)
unsigned encodeWaitcnt(const IsaVersion &Version, unsigned Vmcnt, unsigned Expcnt, unsigned Lgkmcnt)
Encodes Vmcnt, Expcnt and Lgkmcnt into Waitcnt for given isa Version.
unsigned getMaxNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU)
bool isReadOnlySegment(const GlobalValue *GV)
unsigned encodeExpcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Expcnt)
AttributeList getAttributes() const
Return the attribute list for this Function.
Definition: Function.h:223
uint16_t amd_machine_version_minor
unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt)
uint32_t amd_kernel_code_version_minor
MCRegisterClass - Base class of TargetRegisterClass.
Calling convention used for Mesa vertex shaders, or AMDPAL last shader stage before rasterization (ve...
Definition: CallingConv.h:188
Analysis containing CSE Info
Definition: CSEInfo.cpp:20
unsigned short NumOperands
Definition: MCInstrDesc.h:166
uint32_t FloatToBits(float Float)
This function takes a float and returns the bit equivalent 32-bit integer.
Definition: MathExtras.h:600
MCRegisterInfo base class - We assume that the target defines a static array of MCRegisterDesc object...
bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo)
Is this floating-point operand?
bool hasSRAMECC(const MCSubtargetInfo &STI)
int64_t getSMRDEncodedOffset(const MCSubtargetInfo &ST, int64_t ByteOffset)
bool isCompute(CallingConv::ID cc)
bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi)
Is this literal inlinable.
bool isSI(const MCSubtargetInfo &STI)
unsigned getMinNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU)
constexpr bool isUInt< 8 >(uint64_t x)
Definition: MathExtras.h:342
unsigned const MachineRegisterInfo * MRI
bool getMUBUFHasSoffset(unsigned Opc)
unsigned getEUsPerCU(const MCSubtargetInfo *STI)
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:68
bool hasMIMG_R128(const MCSubtargetInfo &STI)
This file contains the declarations for the subclasses of Constant, which represent the different fla...
bool isEntryFunctionCC(CallingConv::ID CC)
unsigned getSubReg(unsigned Reg, unsigned Idx) const
Returns the physical register number of sub-register "Index" for physical register RegNo...
int getMUBUFBaseOpcode(unsigned Opc)
uint8_t private_segment_alignment
bool isIntrinsicSourceOfDivergence(unsigned IntrID)
MCRegAliasIterator enumerates all registers aliasing Reg.
Address space for local memory.
Definition: AMDGPU.h:259
Calling convention used for AMDPAL vertex shader if tessellation is in use.
Definition: CallingConv.h:215
Generation getGeneration() const
unsigned getAddressSpace() const
Return the address space of the Pointer type.
Definition: DerivedTypes.h:494
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function. ...
Definition: Function.cpp:192
bool isLegalSMRDImmOffset(const MCSubtargetInfo &ST, int64_t ByteOffset)
Address space for global memory (RAT0, VTX0).
Definition: AMDGPU.h:255
Calling convention used for Mesa/AMDPAL pixel shaders.
Definition: CallingConv.h:194
bool isSISrcInlinableOperand(const MCInstrDesc &Desc, unsigned OpNo)
Does this opearnd support only inlinable literals?
#define AMDHSA_BITS_SET(DST, MSK, VAL)
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char NumSGPRs[]
Key for Kernel::CodeProps::Metadata::mNumSGPRs.
Triple - Helper class for working with autoconf configuration names.
Definition: Triple.h:43
int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding, unsigned VDataDwords, unsigned VAddrDwords)
std::enable_if< std::numeric_limits< T >::is_signed, bool >::type getAsInteger(unsigned Radix, T &Result) const
Parse the current string as an integer of the specified radix.
Definition: StringRef.h:478
IsaVersion getIsaVersion(StringRef GPU)
unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI)
bool hasParamAttribute(unsigned ArgNo, Attribute::AttrKind Kind) const
Equivalent to hasAttribute(ArgNo + FirstArgIndex, Kind).
unsigned getWavefrontSize(const MCSubtargetInfo *STI)
unsigned getTotalNumSGPRs(const MCSubtargetInfo *STI)
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:212
unsigned getAddressableNumSGPRs(const MCSubtargetInfo *STI)
Module.h This file contains the declarations for the Module class.
LLVM_NODISCARD std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
Definition: StringRef.h:696
uint16_t amd_machine_version_stepping
uint8_t wavefront_size
Wavefront size expressed as a power of two.
bool isArgPassedInSGPR(const Argument *A)
uint64_t DoubleToBits(double Double)
This function takes a double and returns the bit equivalent 64-bit integer.
Definition: MathExtras.h:590
unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed, bool FlatScrUsed)
StringRef getCPU() const
bool isShader(CallingConv::ID cc)
unsigned getArgNo() const
Return the index of this formal argument in its containing function.
Definition: Argument.h:47
unsigned getMaxFlatWorkGroupSize(const MCSubtargetInfo *STI)
constexpr char NumVGPRs[]
Key for Kernel::CodeProps::Metadata::mNumVGPRs.
bool isGCN3Encoding(const MCSubtargetInfo &STI)
#define MAP_REG2REG
bool isCI(const MCSubtargetInfo &STI)
unsigned getAddressableNumVGPRs(const MCSubtargetInfo *STI)
unsigned getInitialPSInputAddr(const Function &F)
bool isGFX9(const MCSubtargetInfo &STI)
Provides AMDGPU specific target descriptions.
const Function * getParent() const
Definition: Argument.h:41
bool isVI(const MCSubtargetInfo &STI)
StringRef getValueAsString() const
Return the attribute&#39;s value as a string.
Definition: Attributes.cpp:194
unsigned getNumSGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs)
int16_t RegClass
This specifies the register class enumeration of the operand if the operand is a register.
Definition: MCInstrDesc.h:72
bool hasXNACK(const MCSubtargetInfo &STI)
Calling convention used for Mesa/AMDPAL hull shaders (= tessellation control shaders).
Definition: CallingConv.h:207
int getIntegerAttribute(const Function &F, StringRef Name, int Default)
#define N
Generic base class for all target subtargets.
unsigned getWaitcntBitMask(const IsaVersion &Version)
bool shouldEmitConstantsToTextSection(const Triple &TT)
int64_t kernel_code_entry_byte_offset
Byte offset (possibly negative) from start of amd_kernel_code_t object to kernel&#39;s entry point instru...
bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo)
Can this operand also contain immediate values?
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
const MCOperandInfo * OpInfo
Definition: MCInstrDesc.h:174
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition: Function.h:330
unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI)
If Reg is a pseudo reg, return the correct hardware register given STI otherwise return Reg...
This class implements an extremely fast bulk output stream that can only output to a stream...
Definition: raw_ostream.h:45
uint16_t amd_machine_kind
unsigned getMinNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU)
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:48
unsigned getLgkmcntBitMask(const IsaVersion &Version)
bool getMUBUFHasVAddr(unsigned Opc)
bool isSGPR(unsigned Reg, const MCRegisterInfo *TRI)
Is Reg - scalar register.
unsigned getMaxWavesPerEU(const MCSubtargetInfo *STI, unsigned FlatWorkGroupSize)
void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header, const MCSubtargetInfo *STI)
const uint64_t Version
Definition: InstrProf.h:894
Calling convention for AMDGPU code object kernels.
Definition: CallingConv.h:200
unsigned getSGPRAllocGranule(const MCSubtargetInfo *STI)
unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Lgkmcnt)
PointerType * getType() const
Global values are always pointers.
Definition: GlobalValue.h:273
void streamIsaVersion(const MCSubtargetInfo *STI, raw_ostream &Stream)
Streams isa version string for given subtarget STI into Stream.
const MCRegisterClass & getRegClass(unsigned i) const
Returns the register class associated with the enumeration value.
unsigned getVmcntBitMask(const IsaVersion &Version)
void decodeWaitcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned &Vmcnt, unsigned &Expcnt, unsigned &Lgkmcnt)
Decodes Vmcnt, Expcnt and Lgkmcnt from given Waitcnt for given isa Version, and writes decoded values...