LLVM  8.0.0svn
AMDGPUBaseInfo.cpp
Go to the documentation of this file.
1 //===- AMDGPUBaseInfo.cpp - AMDGPU Base encoding information --------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 
10 #include "AMDGPUBaseInfo.h"
12 #include "AMDGPU.h"
13 #include "SIDefines.h"
14 #include "llvm/ADT/StringRef.h"
15 #include "llvm/ADT/Triple.h"
16 #include "llvm/BinaryFormat/ELF.h"
18 #include "llvm/IR/Attributes.h"
19 #include "llvm/IR/Constants.h"
20 #include "llvm/IR/Function.h"
21 #include "llvm/IR/GlobalValue.h"
22 #include "llvm/IR/Instruction.h"
23 #include "llvm/IR/LLVMContext.h"
24 #include "llvm/IR/Module.h"
25 #include "llvm/MC/MCContext.h"
26 #include "llvm/MC/MCInstrDesc.h"
27 #include "llvm/MC/MCInstrInfo.h"
28 #include "llvm/MC/MCRegisterInfo.h"
29 #include "llvm/MC/MCSectionELF.h"
32 #include "llvm/Support/Casting.h"
35 #include <algorithm>
36 #include <cassert>
37 #include <cstdint>
38 #include <cstring>
39 #include <utility>
40 
42 
43 #define GET_INSTRINFO_NAMED_OPS
44 #define GET_INSTRMAP_INFO
45 #include "AMDGPUGenInstrInfo.inc"
46 #undef GET_INSTRMAP_INFO
47 #undef GET_INSTRINFO_NAMED_OPS
48 
49 namespace {
50 
51 /// \returns Bit mask for given bit \p Shift and bit \p Width.
52 unsigned getBitMask(unsigned Shift, unsigned Width) {
53  return ((1 << Width) - 1) << Shift;
54 }
55 
56 /// Packs \p Src into \p Dst for given bit \p Shift and bit \p Width.
57 ///
58 /// \returns Packed \p Dst.
59 unsigned packBits(unsigned Src, unsigned Dst, unsigned Shift, unsigned Width) {
60  Dst &= ~(1 << Shift) & ~getBitMask(Shift, Width);
61  Dst |= (Src << Shift) & getBitMask(Shift, Width);
62  return Dst;
63 }
64 
65 /// Unpacks bits from \p Src for given bit \p Shift and bit \p Width.
66 ///
67 /// \returns Unpacked bits.
68 unsigned unpackBits(unsigned Src, unsigned Shift, unsigned Width) {
69  return (Src & getBitMask(Shift, Width)) >> Shift;
70 }
71 
72 /// \returns Vmcnt bit shift (lower bits).
73 unsigned getVmcntBitShiftLo() { return 0; }
74 
75 /// \returns Vmcnt bit width (lower bits).
76 unsigned getVmcntBitWidthLo() { return 4; }
77 
78 /// \returns Expcnt bit shift.
79 unsigned getExpcntBitShift() { return 4; }
80 
81 /// \returns Expcnt bit width.
82 unsigned getExpcntBitWidth() { return 3; }
83 
84 /// \returns Lgkmcnt bit shift.
85 unsigned getLgkmcntBitShift() { return 8; }
86 
87 /// \returns Lgkmcnt bit width.
88 unsigned getLgkmcntBitWidth() { return 4; }
89 
90 /// \returns Vmcnt bit shift (higher bits).
91 unsigned getVmcntBitShiftHi() { return 14; }
92 
93 /// \returns Vmcnt bit width (higher bits).
94 unsigned getVmcntBitWidthHi() { return 2; }
95 
96 } // end namespace anonymous
97 
98 namespace llvm {
99 
100 namespace AMDGPU {
101 
102 struct MIMGInfo {
103  uint16_t Opcode;
104  uint16_t BaseOpcode;
105  uint8_t MIMGEncoding;
106  uint8_t VDataDwords;
107  uint8_t VAddrDwords;
108 };
109 
110 #define GET_MIMGBaseOpcodesTable_IMPL
111 #define GET_MIMGDimInfoTable_IMPL
112 #define GET_MIMGInfoTable_IMPL
113 #define GET_MIMGLZMappingTable_IMPL
114 #include "AMDGPUGenSearchableTables.inc"
115 
116 int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding,
117  unsigned VDataDwords, unsigned VAddrDwords) {
118  const MIMGInfo *Info = getMIMGOpcodeHelper(BaseOpcode, MIMGEncoding,
119  VDataDwords, VAddrDwords);
120  return Info ? Info->Opcode : -1;
121 }
122 
123 int getMaskedMIMGOp(unsigned Opc, unsigned NewChannels) {
124  const MIMGInfo *OrigInfo = getMIMGInfo(Opc);
125  const MIMGInfo *NewInfo =
126  getMIMGOpcodeHelper(OrigInfo->BaseOpcode, OrigInfo->MIMGEncoding,
127  NewChannels, OrigInfo->VAddrDwords);
128  return NewInfo ? NewInfo->Opcode : -1;
129 }
130 
131 // Wrapper for Tablegen'd function. enum Subtarget is not defined in any
132 // header files, so we need to wrap it in a function that takes unsigned
133 // instead.
134 int getMCOpcode(uint16_t Opcode, unsigned Gen) {
135  return getMCOpcodeGen(Opcode, static_cast<Subtarget>(Gen));
136 }
137 
138 namespace IsaInfo {
139 
140 void streamIsaVersion(const MCSubtargetInfo *STI, raw_ostream &Stream) {
141  auto TargetTriple = STI->getTargetTriple();
142  auto Version = getIsaVersion(STI->getCPU());
143 
144  Stream << TargetTriple.getArchName() << '-'
145  << TargetTriple.getVendorName() << '-'
146  << TargetTriple.getOSName() << '-'
147  << TargetTriple.getEnvironmentName() << '-'
148  << "gfx"
149  << Version.Major
150  << Version.Minor
151  << Version.Stepping;
152 
153  if (hasXNACK(*STI))
154  Stream << "+xnack";
155 
156  Stream.flush();
157 }
158 
160  return STI->getFeatureBits().test(FeatureCodeObjectV3);
161 }
162 
163 unsigned getWavefrontSize(const MCSubtargetInfo *STI) {
164  if (STI->getFeatureBits().test(FeatureWavefrontSize16))
165  return 16;
166  if (STI->getFeatureBits().test(FeatureWavefrontSize32))
167  return 32;
168 
169  return 64;
170 }
171 
172 unsigned getLocalMemorySize(const MCSubtargetInfo *STI) {
173  if (STI->getFeatureBits().test(FeatureLocalMemorySize32768))
174  return 32768;
175  if (STI->getFeatureBits().test(FeatureLocalMemorySize65536))
176  return 65536;
177 
178  return 0;
179 }
180 
181 unsigned getEUsPerCU(const MCSubtargetInfo *STI) {
182  return 4;
183 }
184 
186  unsigned FlatWorkGroupSize) {
187  if (!STI->getFeatureBits().test(FeatureGCN))
188  return 8;
189  unsigned N = getWavesPerWorkGroup(STI, FlatWorkGroupSize);
190  if (N == 1)
191  return 40;
192  N = 40 / N;
193  return std::min(N, 16u);
194 }
195 
196 unsigned getMaxWavesPerCU(const MCSubtargetInfo *STI) {
197  return getMaxWavesPerEU() * getEUsPerCU(STI);
198 }
199 
200 unsigned getMaxWavesPerCU(const MCSubtargetInfo *STI,
201  unsigned FlatWorkGroupSize) {
202  return getWavesPerWorkGroup(STI, FlatWorkGroupSize);
203 }
204 
205 unsigned getMinWavesPerEU(const MCSubtargetInfo *STI) {
206  return 1;
207 }
208 
209 unsigned getMaxWavesPerEU() {
210  // FIXME: Need to take scratch memory into account.
211  return 10;
212 }
213 
214 unsigned getMaxWavesPerEU(const MCSubtargetInfo *STI,
215  unsigned FlatWorkGroupSize) {
216  return alignTo(getMaxWavesPerCU(STI, FlatWorkGroupSize),
217  getEUsPerCU(STI)) / getEUsPerCU(STI);
218 }
219 
221  return 1;
222 }
223 
225  return 2048;
226 }
227 
229  unsigned FlatWorkGroupSize) {
230  return alignTo(FlatWorkGroupSize, getWavefrontSize(STI)) /
231  getWavefrontSize(STI);
232 }
233 
234 unsigned getSGPRAllocGranule(const MCSubtargetInfo *STI) {
236  if (Version.Major >= 8)
237  return 16;
238  return 8;
239 }
240 
242  return 8;
243 }
244 
245 unsigned getTotalNumSGPRs(const MCSubtargetInfo *STI) {
247  if (Version.Major >= 8)
248  return 800;
249  return 512;
250 }
251 
253  if (STI->getFeatureBits().test(FeatureSGPRInitBug))
255 
257  if (Version.Major >= 8)
258  return 102;
259  return 104;
260 }
261 
262 unsigned getMinNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU) {
263  assert(WavesPerEU != 0);
264 
265  if (WavesPerEU >= getMaxWavesPerEU())
266  return 0;
267 
268  unsigned MinNumSGPRs = getTotalNumSGPRs(STI) / (WavesPerEU + 1);
269  if (STI->getFeatureBits().test(FeatureTrapHandler))
270  MinNumSGPRs -= std::min(MinNumSGPRs, (unsigned)TRAP_NUM_SGPRS);
271  MinNumSGPRs = alignDown(MinNumSGPRs, getSGPRAllocGranule(STI)) + 1;
272  return std::min(MinNumSGPRs, getAddressableNumSGPRs(STI));
273 }
274 
275 unsigned getMaxNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU,
276  bool Addressable) {
277  assert(WavesPerEU != 0);
278 
280  unsigned AddressableNumSGPRs = getAddressableNumSGPRs(STI);
281  if (Version.Major >= 8 && !Addressable)
282  AddressableNumSGPRs = 112;
283  unsigned MaxNumSGPRs = getTotalNumSGPRs(STI) / WavesPerEU;
284  if (STI->getFeatureBits().test(FeatureTrapHandler))
285  MaxNumSGPRs -= std::min(MaxNumSGPRs, (unsigned)TRAP_NUM_SGPRS);
286  MaxNumSGPRs = alignDown(MaxNumSGPRs, getSGPRAllocGranule(STI));
287  return std::min(MaxNumSGPRs, AddressableNumSGPRs);
288 }
289 
290 unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed,
291  bool FlatScrUsed, bool XNACKUsed) {
292  unsigned ExtraSGPRs = 0;
293  if (VCCUsed)
294  ExtraSGPRs = 2;
295 
297  if (Version.Major < 8) {
298  if (FlatScrUsed)
299  ExtraSGPRs = 4;
300  } else {
301  if (XNACKUsed)
302  ExtraSGPRs = 4;
303 
304  if (FlatScrUsed)
305  ExtraSGPRs = 6;
306  }
307 
308  return ExtraSGPRs;
309 }
310 
311 unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed,
312  bool FlatScrUsed) {
313  return getNumExtraSGPRs(STI, VCCUsed, FlatScrUsed,
314  STI->getFeatureBits().test(AMDGPU::FeatureXNACK));
315 }
316 
317 unsigned getNumSGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs) {
318  NumSGPRs = alignTo(std::max(1u, NumSGPRs), getSGPREncodingGranule(STI));
319  // SGPRBlocks is actual number of SGPR blocks minus 1.
320  return NumSGPRs / getSGPREncodingGranule(STI) - 1;
321 }
322 
323 unsigned getVGPRAllocGranule(const MCSubtargetInfo *STI) {
324  return 4;
325 }
326 
328  return getVGPRAllocGranule(STI);
329 }
330 
331 unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI) {
332  return 256;
333 }
334 
336  return getTotalNumVGPRs(STI);
337 }
338 
339 unsigned getMinNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU) {
340  assert(WavesPerEU != 0);
341 
342  if (WavesPerEU >= getMaxWavesPerEU())
343  return 0;
344  unsigned MinNumVGPRs =
345  alignDown(getTotalNumVGPRs(STI) / (WavesPerEU + 1),
346  getVGPRAllocGranule(STI)) + 1;
347  return std::min(MinNumVGPRs, getAddressableNumVGPRs(STI));
348 }
349 
350 unsigned getMaxNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU) {
351  assert(WavesPerEU != 0);
352 
353  unsigned MaxNumVGPRs = alignDown(getTotalNumVGPRs(STI) / WavesPerEU,
354  getVGPRAllocGranule(STI));
355  unsigned AddressableNumVGPRs = getAddressableNumVGPRs(STI);
356  return std::min(MaxNumVGPRs, AddressableNumVGPRs);
357 }
358 
359 unsigned getNumVGPRBlocks(const MCSubtargetInfo *STI, unsigned NumVGPRs) {
360  NumVGPRs = alignTo(std::max(1u, NumVGPRs), getVGPREncodingGranule(STI));
361  // VGPRBlocks is actual number of VGPR blocks minus 1.
362  return NumVGPRs / getVGPREncodingGranule(STI) - 1;
363 }
364 
365 } // end namespace IsaInfo
366 
368  const MCSubtargetInfo *STI) {
370 
371  memset(&Header, 0, sizeof(Header));
372 
375  Header.amd_machine_kind = 1; // AMD_MACHINE_KIND_AMDGPU
376  Header.amd_machine_version_major = Version.Major;
377  Header.amd_machine_version_minor = Version.Minor;
378  Header.amd_machine_version_stepping = Version.Stepping;
379  Header.kernel_code_entry_byte_offset = sizeof(Header);
380  // wavefront_size is specified as a power of 2: 2^6 = 64 threads.
381  Header.wavefront_size = 6;
382 
383  // If the code object does not support indirect functions, then the value must
384  // be 0xffffffff.
385  Header.call_convention = -1;
386 
387  // These alignment values are specified in powers of two, so alignment =
388  // 2^n. The minimum alignment is 2^4 = 16.
389  Header.kernarg_segment_alignment = 4;
390  Header.group_segment_alignment = 4;
391  Header.private_segment_alignment = 4;
392 }
393 
396  memset(&KD, 0, sizeof(KD));
398  amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64,
401  amdhsa::COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, 1);
403  amdhsa::COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE, 1);
405  amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, 1);
406  return KD;
407 }
408 
409 bool isGroupSegment(const GlobalValue *GV) {
411 }
412 
413 bool isGlobalSegment(const GlobalValue *GV) {
415 }
416 
420 }
421 
423  return TT.getOS() != Triple::AMDHSA;
424 }
425 
426 int getIntegerAttribute(const Function &F, StringRef Name, int Default) {
427  Attribute A = F.getFnAttribute(Name);
428  int Result = Default;
429 
430  if (A.isStringAttribute()) {
431  StringRef Str = A.getValueAsString();
432  if (Str.getAsInteger(0, Result)) {
433  LLVMContext &Ctx = F.getContext();
434  Ctx.emitError("can't parse integer attribute " + Name);
435  }
436  }
437 
438  return Result;
439 }
440 
441 std::pair<int, int> getIntegerPairAttribute(const Function &F,
442  StringRef Name,
443  std::pair<int, int> Default,
444  bool OnlyFirstRequired) {
445  Attribute A = F.getFnAttribute(Name);
446  if (!A.isStringAttribute())
447  return Default;
448 
449  LLVMContext &Ctx = F.getContext();
450  std::pair<int, int> Ints = Default;
451  std::pair<StringRef, StringRef> Strs = A.getValueAsString().split(',');
452  if (Strs.first.trim().getAsInteger(0, Ints.first)) {
453  Ctx.emitError("can't parse first integer attribute " + Name);
454  return Default;
455  }
456  if (Strs.second.trim().getAsInteger(0, Ints.second)) {
457  if (!OnlyFirstRequired || !Strs.second.trim().empty()) {
458  Ctx.emitError("can't parse second integer attribute " + Name);
459  return Default;
460  }
461  }
462 
463  return Ints;
464 }
465 
467  unsigned VmcntLo = (1 << getVmcntBitWidthLo()) - 1;
468  if (Version.Major < 9)
469  return VmcntLo;
470 
471  unsigned VmcntHi = ((1 << getVmcntBitWidthHi()) - 1) << getVmcntBitWidthLo();
472  return VmcntLo | VmcntHi;
473 }
474 
476  return (1 << getExpcntBitWidth()) - 1;
477 }
478 
480  return (1 << getLgkmcntBitWidth()) - 1;
481 }
482 
484  unsigned VmcntLo = getBitMask(getVmcntBitShiftLo(), getVmcntBitWidthLo());
485  unsigned Expcnt = getBitMask(getExpcntBitShift(), getExpcntBitWidth());
486  unsigned Lgkmcnt = getBitMask(getLgkmcntBitShift(), getLgkmcntBitWidth());
487  unsigned Waitcnt = VmcntLo | Expcnt | Lgkmcnt;
488  if (Version.Major < 9)
489  return Waitcnt;
490 
491  unsigned VmcntHi = getBitMask(getVmcntBitShiftHi(), getVmcntBitWidthHi());
492  return Waitcnt | VmcntHi;
493 }
494 
495 unsigned decodeVmcnt(const IsaVersion &Version, unsigned Waitcnt) {
496  unsigned VmcntLo =
497  unpackBits(Waitcnt, getVmcntBitShiftLo(), getVmcntBitWidthLo());
498  if (Version.Major < 9)
499  return VmcntLo;
500 
501  unsigned VmcntHi =
502  unpackBits(Waitcnt, getVmcntBitShiftHi(), getVmcntBitWidthHi());
503  VmcntHi <<= getVmcntBitWidthLo();
504  return VmcntLo | VmcntHi;
505 }
506 
507 unsigned decodeExpcnt(const IsaVersion &Version, unsigned Waitcnt) {
508  return unpackBits(Waitcnt, getExpcntBitShift(), getExpcntBitWidth());
509 }
510 
511 unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt) {
512  return unpackBits(Waitcnt, getLgkmcntBitShift(), getLgkmcntBitWidth());
513 }
514 
515 void decodeWaitcnt(const IsaVersion &Version, unsigned Waitcnt,
516  unsigned &Vmcnt, unsigned &Expcnt, unsigned &Lgkmcnt) {
517  Vmcnt = decodeVmcnt(Version, Waitcnt);
518  Expcnt = decodeExpcnt(Version, Waitcnt);
519  Lgkmcnt = decodeLgkmcnt(Version, Waitcnt);
520 }
521 
522 unsigned encodeVmcnt(const IsaVersion &Version, unsigned Waitcnt,
523  unsigned Vmcnt) {
524  Waitcnt =
525  packBits(Vmcnt, Waitcnt, getVmcntBitShiftLo(), getVmcntBitWidthLo());
526  if (Version.Major < 9)
527  return Waitcnt;
528 
529  Vmcnt >>= getVmcntBitWidthLo();
530  return packBits(Vmcnt, Waitcnt, getVmcntBitShiftHi(), getVmcntBitWidthHi());
531 }
532 
533 unsigned encodeExpcnt(const IsaVersion &Version, unsigned Waitcnt,
534  unsigned Expcnt) {
535  return packBits(Expcnt, Waitcnt, getExpcntBitShift(), getExpcntBitWidth());
536 }
537 
538 unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt,
539  unsigned Lgkmcnt) {
540  return packBits(Lgkmcnt, Waitcnt, getLgkmcntBitShift(), getLgkmcntBitWidth());
541 }
542 
544  unsigned Vmcnt, unsigned Expcnt, unsigned Lgkmcnt) {
545  unsigned Waitcnt = getWaitcntBitMask(Version);
546  Waitcnt = encodeVmcnt(Version, Waitcnt, Vmcnt);
547  Waitcnt = encodeExpcnt(Version, Waitcnt, Expcnt);
548  Waitcnt = encodeLgkmcnt(Version, Waitcnt, Lgkmcnt);
549  return Waitcnt;
550 }
551 
552 unsigned getInitialPSInputAddr(const Function &F) {
553  return getIntegerAttribute(F, "InitialPSInputAddr", 0);
554 }
555 
557  switch(cc) {
565  return true;
566  default:
567  return false;
568  }
569 }
570 
572  return !isShader(cc) || cc == CallingConv::AMDGPU_CS;
573 }
574 
576  switch (CC) {
586  return true;
587  default:
588  return false;
589  }
590 }
591 
592 bool hasXNACK(const MCSubtargetInfo &STI) {
593  return STI.getFeatureBits()[AMDGPU::FeatureXNACK];
594 }
595 
596 bool hasMIMG_R128(const MCSubtargetInfo &STI) {
597  return STI.getFeatureBits()[AMDGPU::FeatureMIMG_R128];
598 }
599 
600 bool hasPackedD16(const MCSubtargetInfo &STI) {
601  return !STI.getFeatureBits()[AMDGPU::FeatureUnpackedD16VMem];
602 }
603 
604 bool isSI(const MCSubtargetInfo &STI) {
605  return STI.getFeatureBits()[AMDGPU::FeatureSouthernIslands];
606 }
607 
608 bool isCI(const MCSubtargetInfo &STI) {
609  return STI.getFeatureBits()[AMDGPU::FeatureSeaIslands];
610 }
611 
612 bool isVI(const MCSubtargetInfo &STI) {
613  return STI.getFeatureBits()[AMDGPU::FeatureVolcanicIslands];
614 }
615 
616 bool isGFX9(const MCSubtargetInfo &STI) {
617  return STI.getFeatureBits()[AMDGPU::FeatureGFX9];
618 }
619 
620 bool isGCN3Encoding(const MCSubtargetInfo &STI) {
621  return STI.getFeatureBits()[AMDGPU::FeatureGCN3Encoding];
622 }
623 
624 bool isSGPR(unsigned Reg, const MCRegisterInfo* TRI) {
625  const MCRegisterClass SGPRClass = TRI->getRegClass(AMDGPU::SReg_32RegClassID);
626  const unsigned FirstSubReg = TRI->getSubReg(Reg, 1);
627  return SGPRClass.contains(FirstSubReg != 0 ? FirstSubReg : Reg) ||
628  Reg == AMDGPU::SCC;
629 }
630 
631 bool isRegIntersect(unsigned Reg0, unsigned Reg1, const MCRegisterInfo* TRI) {
632  for (MCRegAliasIterator R(Reg0, TRI, true); R.isValid(); ++R) {
633  if (*R == Reg1) return true;
634  }
635  return false;
636 }
637 
638 #define MAP_REG2REG \
639  using namespace AMDGPU; \
640  switch(Reg) { \
641  default: return Reg; \
642  CASE_CI_VI(FLAT_SCR) \
643  CASE_CI_VI(FLAT_SCR_LO) \
644  CASE_CI_VI(FLAT_SCR_HI) \
645  CASE_VI_GFX9(TTMP0) \
646  CASE_VI_GFX9(TTMP1) \
647  CASE_VI_GFX9(TTMP2) \
648  CASE_VI_GFX9(TTMP3) \
649  CASE_VI_GFX9(TTMP4) \
650  CASE_VI_GFX9(TTMP5) \
651  CASE_VI_GFX9(TTMP6) \
652  CASE_VI_GFX9(TTMP7) \
653  CASE_VI_GFX9(TTMP8) \
654  CASE_VI_GFX9(TTMP9) \
655  CASE_VI_GFX9(TTMP10) \
656  CASE_VI_GFX9(TTMP11) \
657  CASE_VI_GFX9(TTMP12) \
658  CASE_VI_GFX9(TTMP13) \
659  CASE_VI_GFX9(TTMP14) \
660  CASE_VI_GFX9(TTMP15) \
661  CASE_VI_GFX9(TTMP0_TTMP1) \
662  CASE_VI_GFX9(TTMP2_TTMP3) \
663  CASE_VI_GFX9(TTMP4_TTMP5) \
664  CASE_VI_GFX9(TTMP6_TTMP7) \
665  CASE_VI_GFX9(TTMP8_TTMP9) \
666  CASE_VI_GFX9(TTMP10_TTMP11) \
667  CASE_VI_GFX9(TTMP12_TTMP13) \
668  CASE_VI_GFX9(TTMP14_TTMP15) \
669  CASE_VI_GFX9(TTMP0_TTMP1_TTMP2_TTMP3) \
670  CASE_VI_GFX9(TTMP4_TTMP5_TTMP6_TTMP7) \
671  CASE_VI_GFX9(TTMP8_TTMP9_TTMP10_TTMP11) \
672  CASE_VI_GFX9(TTMP12_TTMP13_TTMP14_TTMP15) \
673  CASE_VI_GFX9(TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7) \
674  CASE_VI_GFX9(TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11) \
675  CASE_VI_GFX9(TTMP8_TTMP9_TTMP10_TTMP11_TTMP12_TTMP13_TTMP14_TTMP15) \
676  CASE_VI_GFX9(TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11_TTMP12_TTMP13_TTMP14_TTMP15) \
677  }
678 
679 #define CASE_CI_VI(node) \
680  assert(!isSI(STI)); \
681  case node: return isCI(STI) ? node##_ci : node##_vi;
682 
683 #define CASE_VI_GFX9(node) \
684  case node: return isGFX9(STI) ? node##_gfx9 : node##_vi;
685 
686 unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI) {
687  if (STI.getTargetTriple().getArch() == Triple::r600)
688  return Reg;
690 }
691 
692 #undef CASE_CI_VI
693 #undef CASE_VI_GFX9
694 
695 #define CASE_CI_VI(node) case node##_ci: case node##_vi: return node;
696 #define CASE_VI_GFX9(node) case node##_vi: case node##_gfx9: return node;
697 
698 unsigned mc2PseudoReg(unsigned Reg) {
700 }
701 
702 #undef CASE_CI_VI
703 #undef CASE_VI_GFX9
704 #undef MAP_REG2REG
705 
706 bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo) {
707  assert(OpNo < Desc.NumOperands);
708  unsigned OpType = Desc.OpInfo[OpNo].OperandType;
709  return OpType >= AMDGPU::OPERAND_SRC_FIRST &&
710  OpType <= AMDGPU::OPERAND_SRC_LAST;
711 }
712 
713 bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo) {
714  assert(OpNo < Desc.NumOperands);
715  unsigned OpType = Desc.OpInfo[OpNo].OperandType;
716  switch (OpType) {
724  return true;
725  default:
726  return false;
727  }
728 }
729 
730 bool isSISrcInlinableOperand(const MCInstrDesc &Desc, unsigned OpNo) {
731  assert(OpNo < Desc.NumOperands);
732  unsigned OpType = Desc.OpInfo[OpNo].OperandType;
733  return OpType >= AMDGPU::OPERAND_REG_INLINE_C_FIRST &&
735 }
736 
737 // Avoid using MCRegisterClass::getSize, since that function will go away
738 // (move from MC* level to Target* level). Return size in bits.
739 unsigned getRegBitWidth(unsigned RCID) {
740  switch (RCID) {
741  case AMDGPU::SGPR_32RegClassID:
742  case AMDGPU::VGPR_32RegClassID:
743  case AMDGPU::VS_32RegClassID:
744  case AMDGPU::SReg_32RegClassID:
745  case AMDGPU::SReg_32_XM0RegClassID:
746  return 32;
747  case AMDGPU::SGPR_64RegClassID:
748  case AMDGPU::VS_64RegClassID:
749  case AMDGPU::SReg_64RegClassID:
750  case AMDGPU::VReg_64RegClassID:
751  return 64;
752  case AMDGPU::VReg_96RegClassID:
753  return 96;
754  case AMDGPU::SGPR_128RegClassID:
755  case AMDGPU::SReg_128RegClassID:
756  case AMDGPU::VReg_128RegClassID:
757  return 128;
758  case AMDGPU::SReg_256RegClassID:
759  case AMDGPU::VReg_256RegClassID:
760  return 256;
761  case AMDGPU::SReg_512RegClassID:
762  case AMDGPU::VReg_512RegClassID:
763  return 512;
764  default:
765  llvm_unreachable("Unexpected register class");
766  }
767 }
768 
769 unsigned getRegBitWidth(const MCRegisterClass &RC) {
770  return getRegBitWidth(RC.getID());
771 }
772 
773 unsigned getRegOperandSize(const MCRegisterInfo *MRI, const MCInstrDesc &Desc,
774  unsigned OpNo) {
775  assert(OpNo < Desc.NumOperands);
776  unsigned RCID = Desc.OpInfo[OpNo].RegClass;
777  return getRegBitWidth(MRI->getRegClass(RCID)) / 8;
778 }
779 
780 bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi) {
781  if (Literal >= -16 && Literal <= 64)
782  return true;
783 
784  uint64_t Val = static_cast<uint64_t>(Literal);
785  return (Val == DoubleToBits(0.0)) ||
786  (Val == DoubleToBits(1.0)) ||
787  (Val == DoubleToBits(-1.0)) ||
788  (Val == DoubleToBits(0.5)) ||
789  (Val == DoubleToBits(-0.5)) ||
790  (Val == DoubleToBits(2.0)) ||
791  (Val == DoubleToBits(-2.0)) ||
792  (Val == DoubleToBits(4.0)) ||
793  (Val == DoubleToBits(-4.0)) ||
794  (Val == 0x3fc45f306dc9c882 && HasInv2Pi);
795 }
796 
797 bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi) {
798  if (Literal >= -16 && Literal <= 64)
799  return true;
800 
801  // The actual type of the operand does not seem to matter as long
802  // as the bits match one of the inline immediate values. For example:
803  //
804  // -nan has the hexadecimal encoding of 0xfffffffe which is -2 in decimal,
805  // so it is a legal inline immediate.
806  //
807  // 1065353216 has the hexadecimal encoding 0x3f800000 which is 1.0f in
808  // floating-point, so it is a legal inline immediate.
809 
810  uint32_t Val = static_cast<uint32_t>(Literal);
811  return (Val == FloatToBits(0.0f)) ||
812  (Val == FloatToBits(1.0f)) ||
813  (Val == FloatToBits(-1.0f)) ||
814  (Val == FloatToBits(0.5f)) ||
815  (Val == FloatToBits(-0.5f)) ||
816  (Val == FloatToBits(2.0f)) ||
817  (Val == FloatToBits(-2.0f)) ||
818  (Val == FloatToBits(4.0f)) ||
819  (Val == FloatToBits(-4.0f)) ||
820  (Val == 0x3e22f983 && HasInv2Pi);
821 }
822 
823 bool isInlinableLiteral16(int16_t Literal, bool HasInv2Pi) {
824  if (!HasInv2Pi)
825  return false;
826 
827  if (Literal >= -16 && Literal <= 64)
828  return true;
829 
830  uint16_t Val = static_cast<uint16_t>(Literal);
831  return Val == 0x3C00 || // 1.0
832  Val == 0xBC00 || // -1.0
833  Val == 0x3800 || // 0.5
834  Val == 0xB800 || // -0.5
835  Val == 0x4000 || // 2.0
836  Val == 0xC000 || // -2.0
837  Val == 0x4400 || // 4.0
838  Val == 0xC400 || // -4.0
839  Val == 0x3118; // 1/2pi
840 }
841 
842 bool isInlinableLiteralV216(int32_t Literal, bool HasInv2Pi) {
843  assert(HasInv2Pi);
844 
845  int16_t Lo16 = static_cast<int16_t>(Literal);
846  int16_t Hi16 = static_cast<int16_t>(Literal >> 16);
847  return Lo16 == Hi16 && isInlinableLiteral16(Lo16, HasInv2Pi);
848 }
849 
850 bool isArgPassedInSGPR(const Argument *A) {
851  const Function *F = A->getParent();
852 
853  // Arguments to compute shaders are never a source of divergence.
855  switch (CC) {
858  return true;
866  // For non-compute shaders, SGPR inputs are marked with either inreg or byval.
867  // Everything else is in VGPRs.
868  return F->getAttributes().hasParamAttribute(A->getArgNo(), Attribute::InReg) ||
869  F->getAttributes().hasParamAttribute(A->getArgNo(), Attribute::ByVal);
870  default:
871  // TODO: Should calls support inreg for SGPR inputs?
872  return false;
873  }
874 }
875 
876 int64_t getSMRDEncodedOffset(const MCSubtargetInfo &ST, int64_t ByteOffset) {
877  if (isGCN3Encoding(ST))
878  return ByteOffset;
879  return ByteOffset >> 2;
880 }
881 
882 bool isLegalSMRDImmOffset(const MCSubtargetInfo &ST, int64_t ByteOffset) {
883  int64_t EncodedOffset = getSMRDEncodedOffset(ST, ByteOffset);
884  return isGCN3Encoding(ST) ?
885  isUInt<20>(EncodedOffset) : isUInt<8>(EncodedOffset);
886 }
887 
888 // Given Imm, split it into the values to put into the SOffset and ImmOffset
889 // fields in an MUBUF instruction. Return false if it is not possible (due to a
890 // hardware bug needing a workaround).
891 bool splitMUBUFOffset(uint32_t Imm, uint32_t &SOffset, uint32_t &ImmOffset,
892  const GCNSubtarget *Subtarget) {
893  const uint32_t Align = 4;
894  const uint32_t MaxImm = alignDown(4095, Align);
895  uint32_t Overflow = 0;
896 
897  if (Imm > MaxImm) {
898  if (Imm <= MaxImm + 64) {
899  // Use an SOffset inline constant for 4..64
900  Overflow = Imm - MaxImm;
901  Imm = MaxImm;
902  } else {
903  // Try to keep the same value in SOffset for adjacent loads, so that
904  // the corresponding register contents can be re-used.
905  //
906  // Load values with all low-bits (except for alignment bits) set into
907  // SOffset, so that a larger range of values can be covered using
908  // s_movk_i32.
909  //
910  // Atomic operations fail to work correctly when individual address
911  // components are unaligned, even if their sum is aligned.
912  uint32_t High = (Imm + Align) & ~4095;
913  uint32_t Low = (Imm + Align) & 4095;
914  Imm = Low;
915  Overflow = High - Align;
916  }
917  }
918 
919  // There is a hardware bug in SI and CI which prevents address clamping in
920  // MUBUF instructions from working correctly with SOffsets. The immediate
921  // offset is unaffected.
922  if (Overflow > 0 &&
924  return false;
925 
926  ImmOffset = Imm;
927  SOffset = Overflow;
928  return true;
929 }
930 
931 namespace {
932 
933 struct SourceOfDivergence {
934  unsigned Intr;
935 };
936 const SourceOfDivergence *lookupSourceOfDivergence(unsigned Intr);
937 
938 #define GET_SourcesOfDivergence_IMPL
939 #include "AMDGPUGenSearchableTables.inc"
940 
941 } // end anonymous namespace
942 
943 bool isIntrinsicSourceOfDivergence(unsigned IntrID) {
944  return lookupSourceOfDivergence(IntrID);
945 }
946 } // namespace AMDGPU
947 } // namespace llvm
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
unsigned mc2PseudoReg(unsigned Reg)
Convert hardware register Reg to a pseudo register.
This file a TargetTransformInfo::Concept conforming object specific to the AMDGPU target machine...
bool hasPackedD16(const MCSubtargetInfo &STI)
SPIR_KERNEL - Calling convention for SPIR kernel functions.
Definition: CallingConv.h:137
GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2)
This class represents an incoming formal argument to a Function.
Definition: Argument.h:30
Compute iterated dominance frontiers using a linear time algorithm.
Definition: AllocatorList.h:24
unsigned getMaxWorkGroupsPerCU(const MCSubtargetInfo *STI, unsigned FlatWorkGroupSize)
unsigned getExpcntBitMask(const IsaVersion &Version)
OSType getOS() const
getOS - Get the parsed operating system type of this triple.
Definition: Triple.h:295
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:163
unsigned Reg
unsigned getRegBitWidth(unsigned RCID)
Get the size in bits of a register from the register class RC.
unsigned getMaxWavesPerCU(const MCSubtargetInfo *STI, unsigned FlatWorkGroupSize)
Instruction set architecture version.
Definition: TargetParser.h:324
unsigned decodeVmcnt(const IsaVersion &Version, unsigned Waitcnt)
unsigned const TargetRegisterInfo * TRI
Calling convention used for Mesa/AMDPAL pixel shaders.
Definition: CallingConv.h:195
F(f)
uint64_t alignTo(uint64_t Value, uint64_t Align, uint64_t Skew=0)
Returns the next integer (mod 2**64) that is greater than or equal to Value and is a multiple of Alig...
Definition: MathExtras.h:685
bool isInlinableLiteralV216(int32_t Literal, bool HasInv2Pi)
unsigned encodeVmcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Vmcnt)
unsigned decodeExpcnt(const IsaVersion &Version, unsigned Waitcnt)
uint64_t High
unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI)
std::pair< int, int > getIntegerPairAttribute(const Function &F, StringRef Name, std::pair< int, int > Default, bool OnlyFirstRequired)
const Triple & getTargetTriple() const
bool isGlobalSegment(const GlobalValue *GV)
bool isInlinableLiteral16(int16_t Literal, bool HasInv2Pi)
amdhsa::kernel_descriptor_t getDefaultAmdhsaKernelDescriptor()
unsigned getID() const
getID() - Return the register class ID number.
Calling convention used for Mesa/AMDPAL hull shaders (= tessellation control shaders).
Definition: CallingConv.h:208
uint32_t amd_kernel_code_version_major
uint64_t alignDown(uint64_t Value, uint64_t Align, uint64_t Skew=0)
Returns the largest uint64_t less than or equal to Value and is Skew mod Align.
Definition: MathExtras.h:718
unsigned getWavesPerWorkGroup(const MCSubtargetInfo *STI, unsigned FlatWorkGroupSize)
bool isRegIntersect(unsigned Reg0, unsigned Reg1, const MCRegisterInfo *TRI)
Is there any intersection between registers.
bool isStringAttribute() const
Return true if the attribute is a string (target-dependent) attribute.
Definition: Attributes.cpp:170
bool contains(unsigned Reg) const
contains - Return true if the specified register is included in this register class.
const FeatureBitset & getFeatureBits() const
AMD Kernel Code Object (amd_kernel_code_t).
unsigned getLocalMemorySize(const MCSubtargetInfo *STI)
Address space for global memory (RAT0, VTX0).
Definition: AMDGPU.h:230
unsigned getRegOperandSize(const MCRegisterInfo *MRI, const MCInstrDesc &Desc, unsigned OpNo)
Get size of register operand.
uint16_t amd_machine_version_major
unsigned getNumVGPRBlocks(const MCSubtargetInfo *STI, unsigned NumVGPRs)
This file contains the simple types necessary to represent the attributes associated with functions a...
unsigned Intr
Calling convention used for AMDPAL vertex shader if tessellation is in use.
Definition: CallingConv.h:216
int getMaskedMIMGOp(unsigned Opc, unsigned NewChannels)
unsigned getMaxNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU, bool Addressable)
bool hasCodeObjectV3(const MCSubtargetInfo *STI)
unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI)
int getMCOpcode(uint16_t Opcode, unsigned Gen)
void emitError(unsigned LocCookie, const Twine &ErrorStr)
emitError - Emit an error message to the currently installed error handler with optional location inf...
uint8_t OperandType
Information about the type of the operand.
Definition: MCInstrDesc.h:79
unsigned getMinWavesPerEU(const MCSubtargetInfo *STI)
uint8_t kernarg_segment_alignment
The maximum byte alignment of variables used by the kernel in the specified memory segment...
unsigned getVGPRAllocGranule(const MCSubtargetInfo *STI)
unsigned getMinFlatWorkGroupSize(const MCSubtargetInfo *STI)
ArchType getArch() const
getArch - Get the parsed architecture type of this triple.
Definition: Triple.h:286
uint8_t group_segment_alignment
bool isGroupSegment(const GlobalValue *GV)
unsigned encodeWaitcnt(const IsaVersion &Version, unsigned Vmcnt, unsigned Expcnt, unsigned Lgkmcnt)
Encodes Vmcnt, Expcnt and Lgkmcnt into Waitcnt for given isa Version.
unsigned getMaxNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU)
bool isReadOnlySegment(const GlobalValue *GV)
unsigned encodeExpcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Expcnt)
AttributeList getAttributes() const
Return the attribute list for this Function.
Definition: Function.h:224
uint16_t amd_machine_version_minor
unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt)
uint32_t amd_kernel_code_version_minor
MCRegisterClass - Base class of TargetRegisterClass.
unsigned short NumOperands
Definition: MCInstrDesc.h:166
uint32_t FloatToBits(float Float)
This function takes a float and returns the bit equivalent 32-bit integer.
Definition: MathExtras.h:601
MCRegisterInfo base class - We assume that the target defines a static array of MCRegisterDesc object...
bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo)
Is this floating-point operand?
int64_t getSMRDEncodedOffset(const MCSubtargetInfo &ST, int64_t ByteOffset)
bool isCompute(CallingConv::ID cc)
bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi)
Is this literal inlinable.
bool isSI(const MCSubtargetInfo &STI)
unsigned getMinNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU)
constexpr bool isUInt< 8 >(uint64_t x)
Definition: MathExtras.h:343
unsigned const MachineRegisterInfo * MRI
unsigned getEUsPerCU(const MCSubtargetInfo *STI)
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:69
bool hasMIMG_R128(const MCSubtargetInfo &STI)
This file contains the declarations for the subclasses of Constant, which represent the different fla...
bool isEntryFunctionCC(CallingConv::ID CC)
unsigned getSubReg(unsigned Reg, unsigned Idx) const
Returns the physical register number of sub-register "Index" for physical register RegNo...
uint8_t private_segment_alignment
bool isIntrinsicSourceOfDivergence(unsigned IntrID)
MCRegAliasIterator enumerates all registers aliasing Reg.
Generation getGeneration() const
unsigned getAddressSpace() const
Return the address space of the Pointer type.
Definition: DerivedTypes.h:495
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function. ...
Definition: Function.cpp:194
bool isLegalSMRDImmOffset(const MCSubtargetInfo &ST, int64_t ByteOffset)
bool isSISrcInlinableOperand(const MCInstrDesc &Desc, unsigned OpNo)
Does this opearnd support only inlinable literals?
#define AMDHSA_BITS_SET(DST, MSK, VAL)
bool splitMUBUFOffset(uint32_t Imm, uint32_t &SOffset, uint32_t &ImmOffset, const GCNSubtarget *Subtarget)
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char NumSGPRs[]
Key for Kernel::CodeProps::Metadata::mNumSGPRs.
Triple - Helper class for working with autoconf configuration names.
Definition: Triple.h:44
int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding, unsigned VDataDwords, unsigned VAddrDwords)
std::enable_if< std::numeric_limits< T >::is_signed, bool >::type getAsInteger(unsigned Radix, T &Result) const
Parse the current string as an integer of the specified radix.
Definition: StringRef.h:497
Calling convention used for Mesa/AMDPAL geometry shaders.
Definition: CallingConv.h:192
IsaVersion getIsaVersion(StringRef GPU)
Address space for local memory.
Definition: AMDGPU.h:234
unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI)
bool hasParamAttribute(unsigned ArgNo, Attribute::AttrKind Kind) const
Equivalent to hasAttribute(ArgNo + FirstArgIndex, Kind).
unsigned getWavefrontSize(const MCSubtargetInfo *STI)
unsigned getTotalNumSGPRs(const MCSubtargetInfo *STI)
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:213
unsigned getAddressableNumSGPRs(const MCSubtargetInfo *STI)
Module.h This file contains the declarations for the Module class.
LLVM_NODISCARD std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
Definition: StringRef.h:727
Address space for constant memory (VTX2)
Definition: AMDGPU.h:233
uint16_t amd_machine_version_stepping
uint8_t wavefront_size
Wavefront size expressed as a power of two.
bool isArgPassedInSGPR(const Argument *A)
uint64_t DoubleToBits(double Double)
This function takes a double and returns the bit equivalent 64-bit integer.
Definition: MathExtras.h:591
unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed, bool FlatScrUsed)
StringRef getCPU() const
Calling convention for AMDGPU code object kernels.
Definition: CallingConv.h:201
Calling convention used for Mesa/AMDPAL compute shaders.
Definition: CallingConv.h:198
bool isShader(CallingConv::ID cc)
unsigned getArgNo() const
Return the index of this formal argument in its containing function.
Definition: Argument.h:48
unsigned getMaxFlatWorkGroupSize(const MCSubtargetInfo *STI)
constexpr char NumVGPRs[]
Key for Kernel::CodeProps::Metadata::mNumVGPRs.
bool isGCN3Encoding(const MCSubtargetInfo &STI)
#define MAP_REG2REG
bool isCI(const MCSubtargetInfo &STI)
Calling convention used for Mesa vertex shaders, or AMDPAL last shader stage before rasterization (ve...
Definition: CallingConv.h:189
unsigned getAddressableNumVGPRs(const MCSubtargetInfo *STI)
unsigned getInitialPSInputAddr(const Function &F)
bool isGFX9(const MCSubtargetInfo &STI)
Provides AMDGPU specific target descriptions.
const Function * getParent() const
Definition: Argument.h:42
bool isVI(const MCSubtargetInfo &STI)
StringRef getValueAsString() const
Return the attribute&#39;s value as a string.
Definition: Attributes.cpp:195
unsigned getNumSGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs)
int16_t RegClass
This specifies the register class enumeration of the operand if the operand is a register.
Definition: MCInstrDesc.h:73
bool hasXNACK(const MCSubtargetInfo &STI)
int getIntegerAttribute(const Function &F, StringRef Name, int Default)
#define N
Generic base class for all target subtargets.
unsigned getWaitcntBitMask(const IsaVersion &Version)
bool shouldEmitConstantsToTextSection(const Triple &TT)
int64_t kernel_code_entry_byte_offset
Byte offset (possibly negative) from start of amd_kernel_code_t object to kernel&#39;s entry point instru...
bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo)
Can this operand also contain immediate values?
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
const MCOperandInfo * OpInfo
Definition: MCInstrDesc.h:174
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition: Function.h:331
unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI)
If Reg is a pseudo reg, return the correct hardware register given STI otherwise return Reg...
This class implements an extremely fast bulk output stream that can only output to a stream...
Definition: raw_ostream.h:46
uint16_t amd_machine_kind
unsigned getMinNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU)
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:49
unsigned getLgkmcntBitMask(const IsaVersion &Version)
bool isSGPR(unsigned Reg, const MCRegisterInfo *TRI)
Is Reg - scalar register.
unsigned getMaxWavesPerEU(const MCSubtargetInfo *STI, unsigned FlatWorkGroupSize)
Calling convention used for AMDPAL shader stage before geometry shader if geometry is in use...
Definition: CallingConv.h:221
Address space for 32-bit constant memory.
Definition: AMDGPU.h:237
void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header, const MCSubtargetInfo *STI)
const uint64_t Version
Definition: InstrProf.h:895
unsigned getSGPRAllocGranule(const MCSubtargetInfo *STI)
unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Lgkmcnt)
PointerType * getType() const
Global values are always pointers.
Definition: GlobalValue.h:274
void streamIsaVersion(const MCSubtargetInfo *STI, raw_ostream &Stream)
Streams isa version string for given subtarget STI into Stream.
const MCRegisterClass & getRegClass(unsigned i) const
Returns the register class associated with the enumeration value.
unsigned getVmcntBitMask(const IsaVersion &Version)
void decodeWaitcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned &Vmcnt, unsigned &Expcnt, unsigned &Lgkmcnt)
Decodes Vmcnt, Expcnt and Lgkmcnt from given Waitcnt for given isa Version, and writes decoded values...