LLVM  8.0.0svn
AMDGPUBaseInfo.cpp
Go to the documentation of this file.
1 //===- AMDGPUBaseInfo.cpp - AMDGPU Base encoding information --------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 
10 #include "AMDGPUBaseInfo.h"
12 #include "AMDGPU.h"
13 #include "SIDefines.h"
14 #include "llvm/ADT/StringRef.h"
15 #include "llvm/ADT/Triple.h"
16 #include "llvm/BinaryFormat/ELF.h"
18 #include "llvm/IR/Attributes.h"
19 #include "llvm/IR/Constants.h"
20 #include "llvm/IR/Function.h"
21 #include "llvm/IR/GlobalValue.h"
22 #include "llvm/IR/Instruction.h"
23 #include "llvm/IR/LLVMContext.h"
24 #include "llvm/IR/Module.h"
25 #include "llvm/MC/MCContext.h"
26 #include "llvm/MC/MCInstrDesc.h"
27 #include "llvm/MC/MCInstrInfo.h"
28 #include "llvm/MC/MCRegisterInfo.h"
29 #include "llvm/MC/MCSectionELF.h"
32 #include "llvm/Support/Casting.h"
35 #include <algorithm>
36 #include <cassert>
37 #include <cstdint>
38 #include <cstring>
39 #include <utility>
40 
42 
43 #define GET_INSTRINFO_NAMED_OPS
44 #define GET_INSTRMAP_INFO
45 #include "AMDGPUGenInstrInfo.inc"
46 #undef GET_INSTRMAP_INFO
47 #undef GET_INSTRINFO_NAMED_OPS
48 
49 namespace {
50 
51 /// \returns Bit mask for given bit \p Shift and bit \p Width.
52 unsigned getBitMask(unsigned Shift, unsigned Width) {
53  return ((1 << Width) - 1) << Shift;
54 }
55 
56 /// Packs \p Src into \p Dst for given bit \p Shift and bit \p Width.
57 ///
58 /// \returns Packed \p Dst.
59 unsigned packBits(unsigned Src, unsigned Dst, unsigned Shift, unsigned Width) {
60  Dst &= ~(1 << Shift) & ~getBitMask(Shift, Width);
61  Dst |= (Src << Shift) & getBitMask(Shift, Width);
62  return Dst;
63 }
64 
65 /// Unpacks bits from \p Src for given bit \p Shift and bit \p Width.
66 ///
67 /// \returns Unpacked bits.
68 unsigned unpackBits(unsigned Src, unsigned Shift, unsigned Width) {
69  return (Src & getBitMask(Shift, Width)) >> Shift;
70 }
71 
72 /// \returns Vmcnt bit shift (lower bits).
73 unsigned getVmcntBitShiftLo() { return 0; }
74 
75 /// \returns Vmcnt bit width (lower bits).
76 unsigned getVmcntBitWidthLo() { return 4; }
77 
78 /// \returns Expcnt bit shift.
79 unsigned getExpcntBitShift() { return 4; }
80 
81 /// \returns Expcnt bit width.
82 unsigned getExpcntBitWidth() { return 3; }
83 
84 /// \returns Lgkmcnt bit shift.
85 unsigned getLgkmcntBitShift() { return 8; }
86 
87 /// \returns Lgkmcnt bit width.
88 unsigned getLgkmcntBitWidth() { return 4; }
89 
90 /// \returns Vmcnt bit shift (higher bits).
91 unsigned getVmcntBitShiftHi() { return 14; }
92 
93 /// \returns Vmcnt bit width (higher bits).
94 unsigned getVmcntBitWidthHi() { return 2; }
95 
96 } // end namespace anonymous
97 
98 namespace llvm {
99 
100 namespace AMDGPU {
101 
102 struct MIMGInfo {
103  uint16_t Opcode;
104  uint16_t BaseOpcode;
105  uint8_t MIMGEncoding;
106  uint8_t VDataDwords;
107  uint8_t VAddrDwords;
108 };
109 
110 #define GET_MIMGBaseOpcodesTable_IMPL
111 #define GET_MIMGDimInfoTable_IMPL
112 #define GET_MIMGInfoTable_IMPL
113 #define GET_MIMGLZMappingTable_IMPL
114 #include "AMDGPUGenSearchableTables.inc"
115 
116 int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding,
117  unsigned VDataDwords, unsigned VAddrDwords) {
118  const MIMGInfo *Info = getMIMGOpcodeHelper(BaseOpcode, MIMGEncoding,
119  VDataDwords, VAddrDwords);
120  return Info ? Info->Opcode : -1;
121 }
122 
123 int getMaskedMIMGOp(unsigned Opc, unsigned NewChannels) {
124  const MIMGInfo *OrigInfo = getMIMGInfo(Opc);
125  const MIMGInfo *NewInfo =
126  getMIMGOpcodeHelper(OrigInfo->BaseOpcode, OrigInfo->MIMGEncoding,
127  NewChannels, OrigInfo->VAddrDwords);
128  return NewInfo ? NewInfo->Opcode : -1;
129 }
130 
131 // Wrapper for Tablegen'd function. enum Subtarget is not defined in any
132 // header files, so we need to wrap it in a function that takes unsigned
133 // instead.
134 int getMCOpcode(uint16_t Opcode, unsigned Gen) {
135  return getMCOpcodeGen(Opcode, static_cast<Subtarget>(Gen));
136 }
137 
138 namespace IsaInfo {
139 
140 void streamIsaVersion(const MCSubtargetInfo *STI, raw_ostream &Stream) {
141  auto TargetTriple = STI->getTargetTriple();
142  auto Version = getIsaVersion(STI->getCPU());
143 
144  Stream << TargetTriple.getArchName() << '-'
145  << TargetTriple.getVendorName() << '-'
146  << TargetTriple.getOSName() << '-'
147  << TargetTriple.getEnvironmentName() << '-'
148  << "gfx"
149  << Version.Major
150  << Version.Minor
151  << Version.Stepping;
152 
153  if (hasXNACK(*STI))
154  Stream << "+xnack";
155  if (hasSRAMECC(*STI))
156  Stream << "+sram-ecc";
157 
158  Stream.flush();
159 }
160 
162  return STI->getTargetTriple().getOS() == Triple::AMDHSA &&
163  STI->getFeatureBits().test(FeatureCodeObjectV3);
164 }
165 
166 unsigned getWavefrontSize(const MCSubtargetInfo *STI) {
167  if (STI->getFeatureBits().test(FeatureWavefrontSize16))
168  return 16;
169  if (STI->getFeatureBits().test(FeatureWavefrontSize32))
170  return 32;
171 
172  return 64;
173 }
174 
175 unsigned getLocalMemorySize(const MCSubtargetInfo *STI) {
176  if (STI->getFeatureBits().test(FeatureLocalMemorySize32768))
177  return 32768;
178  if (STI->getFeatureBits().test(FeatureLocalMemorySize65536))
179  return 65536;
180 
181  return 0;
182 }
183 
184 unsigned getEUsPerCU(const MCSubtargetInfo *STI) {
185  return 4;
186 }
187 
189  unsigned FlatWorkGroupSize) {
190  if (!STI->getFeatureBits().test(FeatureGCN))
191  return 8;
192  unsigned N = getWavesPerWorkGroup(STI, FlatWorkGroupSize);
193  if (N == 1)
194  return 40;
195  N = 40 / N;
196  return std::min(N, 16u);
197 }
198 
199 unsigned getMaxWavesPerCU(const MCSubtargetInfo *STI) {
200  return getMaxWavesPerEU() * getEUsPerCU(STI);
201 }
202 
203 unsigned getMaxWavesPerCU(const MCSubtargetInfo *STI,
204  unsigned FlatWorkGroupSize) {
205  return getWavesPerWorkGroup(STI, FlatWorkGroupSize);
206 }
207 
208 unsigned getMinWavesPerEU(const MCSubtargetInfo *STI) {
209  return 1;
210 }
211 
212 unsigned getMaxWavesPerEU() {
213  // FIXME: Need to take scratch memory into account.
214  return 10;
215 }
216 
217 unsigned getMaxWavesPerEU(const MCSubtargetInfo *STI,
218  unsigned FlatWorkGroupSize) {
219  return alignTo(getMaxWavesPerCU(STI, FlatWorkGroupSize),
220  getEUsPerCU(STI)) / getEUsPerCU(STI);
221 }
222 
224  return 1;
225 }
226 
228  return 2048;
229 }
230 
232  unsigned FlatWorkGroupSize) {
233  return alignTo(FlatWorkGroupSize, getWavefrontSize(STI)) /
234  getWavefrontSize(STI);
235 }
236 
237 unsigned getSGPRAllocGranule(const MCSubtargetInfo *STI) {
239  if (Version.Major >= 8)
240  return 16;
241  return 8;
242 }
243 
245  return 8;
246 }
247 
248 unsigned getTotalNumSGPRs(const MCSubtargetInfo *STI) {
250  if (Version.Major >= 8)
251  return 800;
252  return 512;
253 }
254 
256  if (STI->getFeatureBits().test(FeatureSGPRInitBug))
258 
260  if (Version.Major >= 8)
261  return 102;
262  return 104;
263 }
264 
265 unsigned getMinNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU) {
266  assert(WavesPerEU != 0);
267 
268  if (WavesPerEU >= getMaxWavesPerEU())
269  return 0;
270 
271  unsigned MinNumSGPRs = getTotalNumSGPRs(STI) / (WavesPerEU + 1);
272  if (STI->getFeatureBits().test(FeatureTrapHandler))
273  MinNumSGPRs -= std::min(MinNumSGPRs, (unsigned)TRAP_NUM_SGPRS);
274  MinNumSGPRs = alignDown(MinNumSGPRs, getSGPRAllocGranule(STI)) + 1;
275  return std::min(MinNumSGPRs, getAddressableNumSGPRs(STI));
276 }
277 
278 unsigned getMaxNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU,
279  bool Addressable) {
280  assert(WavesPerEU != 0);
281 
283  unsigned AddressableNumSGPRs = getAddressableNumSGPRs(STI);
284  if (Version.Major >= 8 && !Addressable)
285  AddressableNumSGPRs = 112;
286  unsigned MaxNumSGPRs = getTotalNumSGPRs(STI) / WavesPerEU;
287  if (STI->getFeatureBits().test(FeatureTrapHandler))
288  MaxNumSGPRs -= std::min(MaxNumSGPRs, (unsigned)TRAP_NUM_SGPRS);
289  MaxNumSGPRs = alignDown(MaxNumSGPRs, getSGPRAllocGranule(STI));
290  return std::min(MaxNumSGPRs, AddressableNumSGPRs);
291 }
292 
293 unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed,
294  bool FlatScrUsed, bool XNACKUsed) {
295  unsigned ExtraSGPRs = 0;
296  if (VCCUsed)
297  ExtraSGPRs = 2;
298 
300  if (Version.Major < 8) {
301  if (FlatScrUsed)
302  ExtraSGPRs = 4;
303  } else {
304  if (XNACKUsed)
305  ExtraSGPRs = 4;
306 
307  if (FlatScrUsed)
308  ExtraSGPRs = 6;
309  }
310 
311  return ExtraSGPRs;
312 }
313 
314 unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed,
315  bool FlatScrUsed) {
316  return getNumExtraSGPRs(STI, VCCUsed, FlatScrUsed,
317  STI->getFeatureBits().test(AMDGPU::FeatureXNACK));
318 }
319 
320 unsigned getNumSGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs) {
321  NumSGPRs = alignTo(std::max(1u, NumSGPRs), getSGPREncodingGranule(STI));
322  // SGPRBlocks is actual number of SGPR blocks minus 1.
323  return NumSGPRs / getSGPREncodingGranule(STI) - 1;
324 }
325 
326 unsigned getVGPRAllocGranule(const MCSubtargetInfo *STI) {
327  return 4;
328 }
329 
331  return getVGPRAllocGranule(STI);
332 }
333 
334 unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI) {
335  return 256;
336 }
337 
339  return getTotalNumVGPRs(STI);
340 }
341 
342 unsigned getMinNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU) {
343  assert(WavesPerEU != 0);
344 
345  if (WavesPerEU >= getMaxWavesPerEU())
346  return 0;
347  unsigned MinNumVGPRs =
348  alignDown(getTotalNumVGPRs(STI) / (WavesPerEU + 1),
349  getVGPRAllocGranule(STI)) + 1;
350  return std::min(MinNumVGPRs, getAddressableNumVGPRs(STI));
351 }
352 
353 unsigned getMaxNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU) {
354  assert(WavesPerEU != 0);
355 
356  unsigned MaxNumVGPRs = alignDown(getTotalNumVGPRs(STI) / WavesPerEU,
357  getVGPRAllocGranule(STI));
358  unsigned AddressableNumVGPRs = getAddressableNumVGPRs(STI);
359  return std::min(MaxNumVGPRs, AddressableNumVGPRs);
360 }
361 
362 unsigned getNumVGPRBlocks(const MCSubtargetInfo *STI, unsigned NumVGPRs) {
363  NumVGPRs = alignTo(std::max(1u, NumVGPRs), getVGPREncodingGranule(STI));
364  // VGPRBlocks is actual number of VGPR blocks minus 1.
365  return NumVGPRs / getVGPREncodingGranule(STI) - 1;
366 }
367 
368 } // end namespace IsaInfo
369 
371  const MCSubtargetInfo *STI) {
373 
374  memset(&Header, 0, sizeof(Header));
375 
378  Header.amd_machine_kind = 1; // AMD_MACHINE_KIND_AMDGPU
379  Header.amd_machine_version_major = Version.Major;
380  Header.amd_machine_version_minor = Version.Minor;
381  Header.amd_machine_version_stepping = Version.Stepping;
382  Header.kernel_code_entry_byte_offset = sizeof(Header);
383  // wavefront_size is specified as a power of 2: 2^6 = 64 threads.
384  Header.wavefront_size = 6;
385 
386  // If the code object does not support indirect functions, then the value must
387  // be 0xffffffff.
388  Header.call_convention = -1;
389 
390  // These alignment values are specified in powers of two, so alignment =
391  // 2^n. The minimum alignment is 2^4 = 16.
392  Header.kernarg_segment_alignment = 4;
393  Header.group_segment_alignment = 4;
394  Header.private_segment_alignment = 4;
395 }
396 
399  memset(&KD, 0, sizeof(KD));
401  amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64,
404  amdhsa::COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, 1);
406  amdhsa::COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE, 1);
408  amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, 1);
409  return KD;
410 }
411 
412 bool isGroupSegment(const GlobalValue *GV) {
414 }
415 
416 bool isGlobalSegment(const GlobalValue *GV) {
418 }
419 
423 }
424 
426  return TT.getOS() != Triple::AMDHSA;
427 }
428 
429 int getIntegerAttribute(const Function &F, StringRef Name, int Default) {
430  Attribute A = F.getFnAttribute(Name);
431  int Result = Default;
432 
433  if (A.isStringAttribute()) {
434  StringRef Str = A.getValueAsString();
435  if (Str.getAsInteger(0, Result)) {
436  LLVMContext &Ctx = F.getContext();
437  Ctx.emitError("can't parse integer attribute " + Name);
438  }
439  }
440 
441  return Result;
442 }
443 
444 std::pair<int, int> getIntegerPairAttribute(const Function &F,
445  StringRef Name,
446  std::pair<int, int> Default,
447  bool OnlyFirstRequired) {
448  Attribute A = F.getFnAttribute(Name);
449  if (!A.isStringAttribute())
450  return Default;
451 
452  LLVMContext &Ctx = F.getContext();
453  std::pair<int, int> Ints = Default;
454  std::pair<StringRef, StringRef> Strs = A.getValueAsString().split(',');
455  if (Strs.first.trim().getAsInteger(0, Ints.first)) {
456  Ctx.emitError("can't parse first integer attribute " + Name);
457  return Default;
458  }
459  if (Strs.second.trim().getAsInteger(0, Ints.second)) {
460  if (!OnlyFirstRequired || !Strs.second.trim().empty()) {
461  Ctx.emitError("can't parse second integer attribute " + Name);
462  return Default;
463  }
464  }
465 
466  return Ints;
467 }
468 
470  unsigned VmcntLo = (1 << getVmcntBitWidthLo()) - 1;
471  if (Version.Major < 9)
472  return VmcntLo;
473 
474  unsigned VmcntHi = ((1 << getVmcntBitWidthHi()) - 1) << getVmcntBitWidthLo();
475  return VmcntLo | VmcntHi;
476 }
477 
479  return (1 << getExpcntBitWidth()) - 1;
480 }
481 
483  return (1 << getLgkmcntBitWidth()) - 1;
484 }
485 
487  unsigned VmcntLo = getBitMask(getVmcntBitShiftLo(), getVmcntBitWidthLo());
488  unsigned Expcnt = getBitMask(getExpcntBitShift(), getExpcntBitWidth());
489  unsigned Lgkmcnt = getBitMask(getLgkmcntBitShift(), getLgkmcntBitWidth());
490  unsigned Waitcnt = VmcntLo | Expcnt | Lgkmcnt;
491  if (Version.Major < 9)
492  return Waitcnt;
493 
494  unsigned VmcntHi = getBitMask(getVmcntBitShiftHi(), getVmcntBitWidthHi());
495  return Waitcnt | VmcntHi;
496 }
497 
498 unsigned decodeVmcnt(const IsaVersion &Version, unsigned Waitcnt) {
499  unsigned VmcntLo =
500  unpackBits(Waitcnt, getVmcntBitShiftLo(), getVmcntBitWidthLo());
501  if (Version.Major < 9)
502  return VmcntLo;
503 
504  unsigned VmcntHi =
505  unpackBits(Waitcnt, getVmcntBitShiftHi(), getVmcntBitWidthHi());
506  VmcntHi <<= getVmcntBitWidthLo();
507  return VmcntLo | VmcntHi;
508 }
509 
510 unsigned decodeExpcnt(const IsaVersion &Version, unsigned Waitcnt) {
511  return unpackBits(Waitcnt, getExpcntBitShift(), getExpcntBitWidth());
512 }
513 
514 unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt) {
515  return unpackBits(Waitcnt, getLgkmcntBitShift(), getLgkmcntBitWidth());
516 }
517 
518 void decodeWaitcnt(const IsaVersion &Version, unsigned Waitcnt,
519  unsigned &Vmcnt, unsigned &Expcnt, unsigned &Lgkmcnt) {
520  Vmcnt = decodeVmcnt(Version, Waitcnt);
521  Expcnt = decodeExpcnt(Version, Waitcnt);
522  Lgkmcnt = decodeLgkmcnt(Version, Waitcnt);
523 }
524 
525 unsigned encodeVmcnt(const IsaVersion &Version, unsigned Waitcnt,
526  unsigned Vmcnt) {
527  Waitcnt =
528  packBits(Vmcnt, Waitcnt, getVmcntBitShiftLo(), getVmcntBitWidthLo());
529  if (Version.Major < 9)
530  return Waitcnt;
531 
532  Vmcnt >>= getVmcntBitWidthLo();
533  return packBits(Vmcnt, Waitcnt, getVmcntBitShiftHi(), getVmcntBitWidthHi());
534 }
535 
536 unsigned encodeExpcnt(const IsaVersion &Version, unsigned Waitcnt,
537  unsigned Expcnt) {
538  return packBits(Expcnt, Waitcnt, getExpcntBitShift(), getExpcntBitWidth());
539 }
540 
541 unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt,
542  unsigned Lgkmcnt) {
543  return packBits(Lgkmcnt, Waitcnt, getLgkmcntBitShift(), getLgkmcntBitWidth());
544 }
545 
547  unsigned Vmcnt, unsigned Expcnt, unsigned Lgkmcnt) {
548  unsigned Waitcnt = getWaitcntBitMask(Version);
549  Waitcnt = encodeVmcnt(Version, Waitcnt, Vmcnt);
550  Waitcnt = encodeExpcnt(Version, Waitcnt, Expcnt);
551  Waitcnt = encodeLgkmcnt(Version, Waitcnt, Lgkmcnt);
552  return Waitcnt;
553 }
554 
555 unsigned getInitialPSInputAddr(const Function &F) {
556  return getIntegerAttribute(F, "InitialPSInputAddr", 0);
557 }
558 
560  switch(cc) {
568  return true;
569  default:
570  return false;
571  }
572 }
573 
575  return !isShader(cc) || cc == CallingConv::AMDGPU_CS;
576 }
577 
579  switch (CC) {
589  return true;
590  default:
591  return false;
592  }
593 }
594 
595 bool hasXNACK(const MCSubtargetInfo &STI) {
596  return STI.getFeatureBits()[AMDGPU::FeatureXNACK];
597 }
598 
599 bool hasSRAMECC(const MCSubtargetInfo &STI) {
600  return STI.getFeatureBits()[AMDGPU::FeatureSRAMECC];
601 }
602 
603 bool hasMIMG_R128(const MCSubtargetInfo &STI) {
604  return STI.getFeatureBits()[AMDGPU::FeatureMIMG_R128];
605 }
606 
607 bool hasPackedD16(const MCSubtargetInfo &STI) {
608  return !STI.getFeatureBits()[AMDGPU::FeatureUnpackedD16VMem];
609 }
610 
611 bool isSI(const MCSubtargetInfo &STI) {
612  return STI.getFeatureBits()[AMDGPU::FeatureSouthernIslands];
613 }
614 
615 bool isCI(const MCSubtargetInfo &STI) {
616  return STI.getFeatureBits()[AMDGPU::FeatureSeaIslands];
617 }
618 
619 bool isVI(const MCSubtargetInfo &STI) {
620  return STI.getFeatureBits()[AMDGPU::FeatureVolcanicIslands];
621 }
622 
623 bool isGFX9(const MCSubtargetInfo &STI) {
624  return STI.getFeatureBits()[AMDGPU::FeatureGFX9];
625 }
626 
627 bool isGCN3Encoding(const MCSubtargetInfo &STI) {
628  return STI.getFeatureBits()[AMDGPU::FeatureGCN3Encoding];
629 }
630 
631 bool isSGPR(unsigned Reg, const MCRegisterInfo* TRI) {
632  const MCRegisterClass SGPRClass = TRI->getRegClass(AMDGPU::SReg_32RegClassID);
633  const unsigned FirstSubReg = TRI->getSubReg(Reg, 1);
634  return SGPRClass.contains(FirstSubReg != 0 ? FirstSubReg : Reg) ||
635  Reg == AMDGPU::SCC;
636 }
637 
638 bool isRegIntersect(unsigned Reg0, unsigned Reg1, const MCRegisterInfo* TRI) {
639  for (MCRegAliasIterator R(Reg0, TRI, true); R.isValid(); ++R) {
640  if (*R == Reg1) return true;
641  }
642  return false;
643 }
644 
645 #define MAP_REG2REG \
646  using namespace AMDGPU; \
647  switch(Reg) { \
648  default: return Reg; \
649  CASE_CI_VI(FLAT_SCR) \
650  CASE_CI_VI(FLAT_SCR_LO) \
651  CASE_CI_VI(FLAT_SCR_HI) \
652  CASE_VI_GFX9(TTMP0) \
653  CASE_VI_GFX9(TTMP1) \
654  CASE_VI_GFX9(TTMP2) \
655  CASE_VI_GFX9(TTMP3) \
656  CASE_VI_GFX9(TTMP4) \
657  CASE_VI_GFX9(TTMP5) \
658  CASE_VI_GFX9(TTMP6) \
659  CASE_VI_GFX9(TTMP7) \
660  CASE_VI_GFX9(TTMP8) \
661  CASE_VI_GFX9(TTMP9) \
662  CASE_VI_GFX9(TTMP10) \
663  CASE_VI_GFX9(TTMP11) \
664  CASE_VI_GFX9(TTMP12) \
665  CASE_VI_GFX9(TTMP13) \
666  CASE_VI_GFX9(TTMP14) \
667  CASE_VI_GFX9(TTMP15) \
668  CASE_VI_GFX9(TTMP0_TTMP1) \
669  CASE_VI_GFX9(TTMP2_TTMP3) \
670  CASE_VI_GFX9(TTMP4_TTMP5) \
671  CASE_VI_GFX9(TTMP6_TTMP7) \
672  CASE_VI_GFX9(TTMP8_TTMP9) \
673  CASE_VI_GFX9(TTMP10_TTMP11) \
674  CASE_VI_GFX9(TTMP12_TTMP13) \
675  CASE_VI_GFX9(TTMP14_TTMP15) \
676  CASE_VI_GFX9(TTMP0_TTMP1_TTMP2_TTMP3) \
677  CASE_VI_GFX9(TTMP4_TTMP5_TTMP6_TTMP7) \
678  CASE_VI_GFX9(TTMP8_TTMP9_TTMP10_TTMP11) \
679  CASE_VI_GFX9(TTMP12_TTMP13_TTMP14_TTMP15) \
680  CASE_VI_GFX9(TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7) \
681  CASE_VI_GFX9(TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11) \
682  CASE_VI_GFX9(TTMP8_TTMP9_TTMP10_TTMP11_TTMP12_TTMP13_TTMP14_TTMP15) \
683  CASE_VI_GFX9(TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11_TTMP12_TTMP13_TTMP14_TTMP15) \
684  }
685 
686 #define CASE_CI_VI(node) \
687  assert(!isSI(STI)); \
688  case node: return isCI(STI) ? node##_ci : node##_vi;
689 
690 #define CASE_VI_GFX9(node) \
691  case node: return isGFX9(STI) ? node##_gfx9 : node##_vi;
692 
693 unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI) {
694  if (STI.getTargetTriple().getArch() == Triple::r600)
695  return Reg;
697 }
698 
699 #undef CASE_CI_VI
700 #undef CASE_VI_GFX9
701 
702 #define CASE_CI_VI(node) case node##_ci: case node##_vi: return node;
703 #define CASE_VI_GFX9(node) case node##_vi: case node##_gfx9: return node;
704 
705 unsigned mc2PseudoReg(unsigned Reg) {
707 }
708 
709 #undef CASE_CI_VI
710 #undef CASE_VI_GFX9
711 #undef MAP_REG2REG
712 
713 bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo) {
714  assert(OpNo < Desc.NumOperands);
715  unsigned OpType = Desc.OpInfo[OpNo].OperandType;
716  return OpType >= AMDGPU::OPERAND_SRC_FIRST &&
717  OpType <= AMDGPU::OPERAND_SRC_LAST;
718 }
719 
720 bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo) {
721  assert(OpNo < Desc.NumOperands);
722  unsigned OpType = Desc.OpInfo[OpNo].OperandType;
723  switch (OpType) {
731  return true;
732  default:
733  return false;
734  }
735 }
736 
737 bool isSISrcInlinableOperand(const MCInstrDesc &Desc, unsigned OpNo) {
738  assert(OpNo < Desc.NumOperands);
739  unsigned OpType = Desc.OpInfo[OpNo].OperandType;
740  return OpType >= AMDGPU::OPERAND_REG_INLINE_C_FIRST &&
742 }
743 
744 // Avoid using MCRegisterClass::getSize, since that function will go away
745 // (move from MC* level to Target* level). Return size in bits.
746 unsigned getRegBitWidth(unsigned RCID) {
747  switch (RCID) {
748  case AMDGPU::SGPR_32RegClassID:
749  case AMDGPU::VGPR_32RegClassID:
750  case AMDGPU::VS_32RegClassID:
751  case AMDGPU::SReg_32RegClassID:
752  case AMDGPU::SReg_32_XM0RegClassID:
753  return 32;
754  case AMDGPU::SGPR_64RegClassID:
755  case AMDGPU::VS_64RegClassID:
756  case AMDGPU::SReg_64RegClassID:
757  case AMDGPU::VReg_64RegClassID:
758  case AMDGPU::SReg_64_XEXECRegClassID:
759  return 64;
760  case AMDGPU::VReg_96RegClassID:
761  return 96;
762  case AMDGPU::SGPR_128RegClassID:
763  case AMDGPU::SReg_128RegClassID:
764  case AMDGPU::VReg_128RegClassID:
765  return 128;
766  case AMDGPU::SReg_256RegClassID:
767  case AMDGPU::VReg_256RegClassID:
768  return 256;
769  case AMDGPU::SReg_512RegClassID:
770  case AMDGPU::VReg_512RegClassID:
771  return 512;
772  default:
773  llvm_unreachable("Unexpected register class");
774  }
775 }
776 
777 unsigned getRegBitWidth(const MCRegisterClass &RC) {
778  return getRegBitWidth(RC.getID());
779 }
780 
781 unsigned getRegOperandSize(const MCRegisterInfo *MRI, const MCInstrDesc &Desc,
782  unsigned OpNo) {
783  assert(OpNo < Desc.NumOperands);
784  unsigned RCID = Desc.OpInfo[OpNo].RegClass;
785  return getRegBitWidth(MRI->getRegClass(RCID)) / 8;
786 }
787 
788 bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi) {
789  if (Literal >= -16 && Literal <= 64)
790  return true;
791 
792  uint64_t Val = static_cast<uint64_t>(Literal);
793  return (Val == DoubleToBits(0.0)) ||
794  (Val == DoubleToBits(1.0)) ||
795  (Val == DoubleToBits(-1.0)) ||
796  (Val == DoubleToBits(0.5)) ||
797  (Val == DoubleToBits(-0.5)) ||
798  (Val == DoubleToBits(2.0)) ||
799  (Val == DoubleToBits(-2.0)) ||
800  (Val == DoubleToBits(4.0)) ||
801  (Val == DoubleToBits(-4.0)) ||
802  (Val == 0x3fc45f306dc9c882 && HasInv2Pi);
803 }
804 
805 bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi) {
806  if (Literal >= -16 && Literal <= 64)
807  return true;
808 
809  // The actual type of the operand does not seem to matter as long
810  // as the bits match one of the inline immediate values. For example:
811  //
812  // -nan has the hexadecimal encoding of 0xfffffffe which is -2 in decimal,
813  // so it is a legal inline immediate.
814  //
815  // 1065353216 has the hexadecimal encoding 0x3f800000 which is 1.0f in
816  // floating-point, so it is a legal inline immediate.
817 
818  uint32_t Val = static_cast<uint32_t>(Literal);
819  return (Val == FloatToBits(0.0f)) ||
820  (Val == FloatToBits(1.0f)) ||
821  (Val == FloatToBits(-1.0f)) ||
822  (Val == FloatToBits(0.5f)) ||
823  (Val == FloatToBits(-0.5f)) ||
824  (Val == FloatToBits(2.0f)) ||
825  (Val == FloatToBits(-2.0f)) ||
826  (Val == FloatToBits(4.0f)) ||
827  (Val == FloatToBits(-4.0f)) ||
828  (Val == 0x3e22f983 && HasInv2Pi);
829 }
830 
831 bool isInlinableLiteral16(int16_t Literal, bool HasInv2Pi) {
832  if (!HasInv2Pi)
833  return false;
834 
835  if (Literal >= -16 && Literal <= 64)
836  return true;
837 
838  uint16_t Val = static_cast<uint16_t>(Literal);
839  return Val == 0x3C00 || // 1.0
840  Val == 0xBC00 || // -1.0
841  Val == 0x3800 || // 0.5
842  Val == 0xB800 || // -0.5
843  Val == 0x4000 || // 2.0
844  Val == 0xC000 || // -2.0
845  Val == 0x4400 || // 4.0
846  Val == 0xC400 || // -4.0
847  Val == 0x3118; // 1/2pi
848 }
849 
850 bool isInlinableLiteralV216(int32_t Literal, bool HasInv2Pi) {
851  assert(HasInv2Pi);
852 
853  int16_t Lo16 = static_cast<int16_t>(Literal);
854  int16_t Hi16 = static_cast<int16_t>(Literal >> 16);
855  return Lo16 == Hi16 && isInlinableLiteral16(Lo16, HasInv2Pi);
856 }
857 
858 bool isArgPassedInSGPR(const Argument *A) {
859  const Function *F = A->getParent();
860 
861  // Arguments to compute shaders are never a source of divergence.
863  switch (CC) {
866  return true;
874  // For non-compute shaders, SGPR inputs are marked with either inreg or byval.
875  // Everything else is in VGPRs.
876  return F->getAttributes().hasParamAttribute(A->getArgNo(), Attribute::InReg) ||
877  F->getAttributes().hasParamAttribute(A->getArgNo(), Attribute::ByVal);
878  default:
879  // TODO: Should calls support inreg for SGPR inputs?
880  return false;
881  }
882 }
883 
884 int64_t getSMRDEncodedOffset(const MCSubtargetInfo &ST, int64_t ByteOffset) {
885  if (isGCN3Encoding(ST))
886  return ByteOffset;
887  return ByteOffset >> 2;
888 }
889 
890 bool isLegalSMRDImmOffset(const MCSubtargetInfo &ST, int64_t ByteOffset) {
891  int64_t EncodedOffset = getSMRDEncodedOffset(ST, ByteOffset);
892  return isGCN3Encoding(ST) ?
893  isUInt<20>(EncodedOffset) : isUInt<8>(EncodedOffset);
894 }
895 
896 // Given Imm, split it into the values to put into the SOffset and ImmOffset
897 // fields in an MUBUF instruction. Return false if it is not possible (due to a
898 // hardware bug needing a workaround).
899 bool splitMUBUFOffset(uint32_t Imm, uint32_t &SOffset, uint32_t &ImmOffset,
900  const GCNSubtarget *Subtarget) {
901  const uint32_t Align = 4;
902  const uint32_t MaxImm = alignDown(4095, Align);
903  uint32_t Overflow = 0;
904 
905  if (Imm > MaxImm) {
906  if (Imm <= MaxImm + 64) {
907  // Use an SOffset inline constant for 4..64
908  Overflow = Imm - MaxImm;
909  Imm = MaxImm;
910  } else {
911  // Try to keep the same value in SOffset for adjacent loads, so that
912  // the corresponding register contents can be re-used.
913  //
914  // Load values with all low-bits (except for alignment bits) set into
915  // SOffset, so that a larger range of values can be covered using
916  // s_movk_i32.
917  //
918  // Atomic operations fail to work correctly when individual address
919  // components are unaligned, even if their sum is aligned.
920  uint32_t High = (Imm + Align) & ~4095;
921  uint32_t Low = (Imm + Align) & 4095;
922  Imm = Low;
923  Overflow = High - Align;
924  }
925  }
926 
927  // There is a hardware bug in SI and CI which prevents address clamping in
928  // MUBUF instructions from working correctly with SOffsets. The immediate
929  // offset is unaffected.
930  if (Overflow > 0 &&
932  return false;
933 
934  ImmOffset = Imm;
935  SOffset = Overflow;
936  return true;
937 }
938 
939 namespace {
940 
941 struct SourceOfDivergence {
942  unsigned Intr;
943 };
944 const SourceOfDivergence *lookupSourceOfDivergence(unsigned Intr);
945 
946 #define GET_SourcesOfDivergence_IMPL
947 #include "AMDGPUGenSearchableTables.inc"
948 
949 } // end anonymous namespace
950 
951 bool isIntrinsicSourceOfDivergence(unsigned IntrID) {
952  return lookupSourceOfDivergence(IntrID);
953 }
954 } // namespace AMDGPU
955 } // namespace llvm
Address space for global memory (RAT0, VTX0).
Definition: AMDGPU.h:244
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
unsigned mc2PseudoReg(unsigned Reg)
Convert hardware register Reg to a pseudo register.
This file a TargetTransformInfo::Concept conforming object specific to the AMDGPU target machine...
bool hasPackedD16(const MCSubtargetInfo &STI)
GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2)
This class represents an incoming formal argument to a Function.
Definition: Argument.h:30
Compute iterated dominance frontiers using a linear time algorithm.
Definition: AllocatorList.h:24
unsigned getMaxWorkGroupsPerCU(const MCSubtargetInfo *STI, unsigned FlatWorkGroupSize)
unsigned getExpcntBitMask(const IsaVersion &Version)
OSType getOS() const
getOS - Get the parsed operating system type of this triple.
Definition: Triple.h:298
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:163
unsigned Reg
unsigned getRegBitWidth(unsigned RCID)
Get the size in bits of a register from the register class RC.
unsigned getMaxWavesPerCU(const MCSubtargetInfo *STI, unsigned FlatWorkGroupSize)
Instruction set architecture version.
Definition: TargetParser.h:327
unsigned decodeVmcnt(const IsaVersion &Version, unsigned Waitcnt)
unsigned const TargetRegisterInfo * TRI
F(f)
uint64_t alignTo(uint64_t Value, uint64_t Align, uint64_t Skew=0)
Returns the next integer (mod 2**64) that is greater than or equal to Value and is a multiple of Alig...
Definition: MathExtras.h:685
bool isInlinableLiteralV216(int32_t Literal, bool HasInv2Pi)
unsigned encodeVmcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Vmcnt)
unsigned decodeExpcnt(const IsaVersion &Version, unsigned Waitcnt)
uint64_t High
unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI)
std::pair< int, int > getIntegerPairAttribute(const Function &F, StringRef Name, std::pair< int, int > Default, bool OnlyFirstRequired)
const Triple & getTargetTriple() const
bool isGlobalSegment(const GlobalValue *GV)
bool isInlinableLiteral16(int16_t Literal, bool HasInv2Pi)
amdhsa::kernel_descriptor_t getDefaultAmdhsaKernelDescriptor()
unsigned getID() const
getID() - Return the register class ID number.
uint32_t amd_kernel_code_version_major
uint64_t alignDown(uint64_t Value, uint64_t Align, uint64_t Skew=0)
Returns the largest uint64_t less than or equal to Value and is Skew mod Align.
Definition: MathExtras.h:718
amdgpu Simplify well known AMD library false Value Value const Twine & Name
unsigned getWavesPerWorkGroup(const MCSubtargetInfo *STI, unsigned FlatWorkGroupSize)
bool isRegIntersect(unsigned Reg0, unsigned Reg1, const MCRegisterInfo *TRI)
Is there any intersection between registers.
bool isStringAttribute() const
Return true if the attribute is a string (target-dependent) attribute.
Definition: Attributes.cpp:170
bool contains(unsigned Reg) const
contains - Return true if the specified register is included in this register class.
const FeatureBitset & getFeatureBits() const
AMD Kernel Code Object (amd_kernel_code_t).
unsigned getLocalMemorySize(const MCSubtargetInfo *STI)
Address space for constant memory (VTX2)
Definition: AMDGPU.h:247
unsigned getRegOperandSize(const MCRegisterInfo *MRI, const MCInstrDesc &Desc, unsigned OpNo)
Get size of register operand.
uint16_t amd_machine_version_major
unsigned getNumVGPRBlocks(const MCSubtargetInfo *STI, unsigned NumVGPRs)
This file contains the simple types necessary to represent the attributes associated with functions a...
unsigned Intr
Calling convention used for Mesa/AMDPAL pixel shaders.
Definition: CallingConv.h:195
int getMaskedMIMGOp(unsigned Opc, unsigned NewChannels)
unsigned getMaxNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU, bool Addressable)
bool hasCodeObjectV3(const MCSubtargetInfo *STI)
unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI)
int getMCOpcode(uint16_t Opcode, unsigned Gen)
void emitError(unsigned LocCookie, const Twine &ErrorStr)
emitError - Emit an error message to the currently installed error handler with optional location inf...
uint8_t OperandType
Information about the type of the operand.
Definition: MCInstrDesc.h:79
unsigned getMinWavesPerEU(const MCSubtargetInfo *STI)
uint8_t kernarg_segment_alignment
The maximum byte alignment of variables used by the kernel in the specified memory segment...
Calling convention for AMDGPU code object kernels.
Definition: CallingConv.h:201
unsigned getVGPRAllocGranule(const MCSubtargetInfo *STI)
unsigned getMinFlatWorkGroupSize(const MCSubtargetInfo *STI)
ArchType getArch() const
getArch - Get the parsed architecture type of this triple.
Definition: Triple.h:289
uint8_t group_segment_alignment
bool isGroupSegment(const GlobalValue *GV)
unsigned encodeWaitcnt(const IsaVersion &Version, unsigned Vmcnt, unsigned Expcnt, unsigned Lgkmcnt)
Encodes Vmcnt, Expcnt and Lgkmcnt into Waitcnt for given isa Version.
unsigned getMaxNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU)
bool isReadOnlySegment(const GlobalValue *GV)
unsigned encodeExpcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Expcnt)
AttributeList getAttributes() const
Return the attribute list for this Function.
Definition: Function.h:224
uint16_t amd_machine_version_minor
unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt)
uint32_t amd_kernel_code_version_minor
MCRegisterClass - Base class of TargetRegisterClass.
unsigned short NumOperands
Definition: MCInstrDesc.h:166
uint32_t FloatToBits(float Float)
This function takes a float and returns the bit equivalent 32-bit integer.
Definition: MathExtras.h:601
MCRegisterInfo base class - We assume that the target defines a static array of MCRegisterDesc object...
bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo)
Is this floating-point operand?
bool hasSRAMECC(const MCSubtargetInfo &STI)
Calling convention used for AMDPAL vertex shader if tessellation is in use.
Definition: CallingConv.h:216
SPIR_KERNEL - Calling convention for SPIR kernel functions.
Definition: CallingConv.h:137
int64_t getSMRDEncodedOffset(const MCSubtargetInfo &ST, int64_t ByteOffset)
bool isCompute(CallingConv::ID cc)
bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi)
Is this literal inlinable.
bool isSI(const MCSubtargetInfo &STI)
unsigned getMinNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU)
constexpr bool isUInt< 8 >(uint64_t x)
Definition: MathExtras.h:343
unsigned const MachineRegisterInfo * MRI
unsigned getEUsPerCU(const MCSubtargetInfo *STI)
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:69
bool hasMIMG_R128(const MCSubtargetInfo &STI)
This file contains the declarations for the subclasses of Constant, which represent the different fla...
bool isEntryFunctionCC(CallingConv::ID CC)
unsigned getSubReg(unsigned Reg, unsigned Idx) const
Returns the physical register number of sub-register "Index" for physical register RegNo...
uint8_t private_segment_alignment
bool isIntrinsicSourceOfDivergence(unsigned IntrID)
MCRegAliasIterator enumerates all registers aliasing Reg.
Calling convention used for Mesa/AMDPAL geometry shaders.
Definition: CallingConv.h:192
Generation getGeneration() const
unsigned getAddressSpace() const
Return the address space of the Pointer type.
Definition: DerivedTypes.h:495
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function. ...
Definition: Function.cpp:194
bool isLegalSMRDImmOffset(const MCSubtargetInfo &ST, int64_t ByteOffset)
bool isSISrcInlinableOperand(const MCInstrDesc &Desc, unsigned OpNo)
Does this opearnd support only inlinable literals?
Calling convention used for Mesa/AMDPAL hull shaders (= tessellation control shaders).
Definition: CallingConv.h:208
#define AMDHSA_BITS_SET(DST, MSK, VAL)
bool splitMUBUFOffset(uint32_t Imm, uint32_t &SOffset, uint32_t &ImmOffset, const GCNSubtarget *Subtarget)
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char NumSGPRs[]
Key for Kernel::CodeProps::Metadata::mNumSGPRs.
Triple - Helper class for working with autoconf configuration names.
Definition: Triple.h:44
int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding, unsigned VDataDwords, unsigned VAddrDwords)
std::enable_if< std::numeric_limits< T >::is_signed, bool >::type getAsInteger(unsigned Radix, T &Result) const
Parse the current string as an integer of the specified radix.
Definition: StringRef.h:497
Address space for 32-bit constant memory.
Definition: AMDGPU.h:251
IsaVersion getIsaVersion(StringRef GPU)
unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI)
bool hasParamAttribute(unsigned ArgNo, Attribute::AttrKind Kind) const
Equivalent to hasAttribute(ArgNo + FirstArgIndex, Kind).
unsigned getWavefrontSize(const MCSubtargetInfo *STI)
unsigned getTotalNumSGPRs(const MCSubtargetInfo *STI)
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:213
unsigned getAddressableNumSGPRs(const MCSubtargetInfo *STI)
Module.h This file contains the declarations for the Module class.
LLVM_NODISCARD std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
Definition: StringRef.h:727
uint16_t amd_machine_version_stepping
uint8_t wavefront_size
Wavefront size expressed as a power of two.
bool isArgPassedInSGPR(const Argument *A)
uint64_t DoubleToBits(double Double)
This function takes a double and returns the bit equivalent 64-bit integer.
Definition: MathExtras.h:591
unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed, bool FlatScrUsed)
Address space for local memory.
Definition: AMDGPU.h:248
StringRef getCPU() const
bool isShader(CallingConv::ID cc)
unsigned getArgNo() const
Return the index of this formal argument in its containing function.
Definition: Argument.h:48
unsigned getMaxFlatWorkGroupSize(const MCSubtargetInfo *STI)
constexpr char NumVGPRs[]
Key for Kernel::CodeProps::Metadata::mNumVGPRs.
bool isGCN3Encoding(const MCSubtargetInfo &STI)
#define MAP_REG2REG
bool isCI(const MCSubtargetInfo &STI)
unsigned getAddressableNumVGPRs(const MCSubtargetInfo *STI)
unsigned getInitialPSInputAddr(const Function &F)
bool isGFX9(const MCSubtargetInfo &STI)
Provides AMDGPU specific target descriptions.
const Function * getParent() const
Definition: Argument.h:42
bool isVI(const MCSubtargetInfo &STI)
StringRef getValueAsString() const
Return the attribute&#39;s value as a string.
Definition: Attributes.cpp:195
unsigned getNumSGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs)
Calling convention used for Mesa vertex shaders, or AMDPAL last shader stage before rasterization (ve...
Definition: CallingConv.h:189
int16_t RegClass
This specifies the register class enumeration of the operand if the operand is a register.
Definition: MCInstrDesc.h:73
bool hasXNACK(const MCSubtargetInfo &STI)
int getIntegerAttribute(const Function &F, StringRef Name, int Default)
#define N
Generic base class for all target subtargets.
unsigned getWaitcntBitMask(const IsaVersion &Version)
bool shouldEmitConstantsToTextSection(const Triple &TT)
Calling convention used for AMDPAL shader stage before geometry shader if geometry is in use...
Definition: CallingConv.h:221
int64_t kernel_code_entry_byte_offset
Byte offset (possibly negative) from start of amd_kernel_code_t object to kernel&#39;s entry point instru...
bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo)
Can this operand also contain immediate values?
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
const MCOperandInfo * OpInfo
Definition: MCInstrDesc.h:174
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition: Function.h:331
unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI)
If Reg is a pseudo reg, return the correct hardware register given STI otherwise return Reg...
This class implements an extremely fast bulk output stream that can only output to a stream...
Definition: raw_ostream.h:46
uint16_t amd_machine_kind
Calling convention used for Mesa/AMDPAL compute shaders.
Definition: CallingConv.h:198
unsigned getMinNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU)
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:49
unsigned getLgkmcntBitMask(const IsaVersion &Version)
bool isSGPR(unsigned Reg, const MCRegisterInfo *TRI)
Is Reg - scalar register.
unsigned getMaxWavesPerEU(const MCSubtargetInfo *STI, unsigned FlatWorkGroupSize)
void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header, const MCSubtargetInfo *STI)
const uint64_t Version
Definition: InstrProf.h:895
unsigned getSGPRAllocGranule(const MCSubtargetInfo *STI)
unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Lgkmcnt)
PointerType * getType() const
Global values are always pointers.
Definition: GlobalValue.h:274
void streamIsaVersion(const MCSubtargetInfo *STI, raw_ostream &Stream)
Streams isa version string for given subtarget STI into Stream.
const MCRegisterClass & getRegClass(unsigned i) const
Returns the register class associated with the enumeration value.
unsigned getVmcntBitMask(const IsaVersion &Version)
void decodeWaitcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned &Vmcnt, unsigned &Expcnt, unsigned &Lgkmcnt)
Decodes Vmcnt, Expcnt and Lgkmcnt from given Waitcnt for given isa Version, and writes decoded values...