LLVM  7.0.0svn
AMDGPUBaseInfo.cpp
Go to the documentation of this file.
1 //===- AMDGPUBaseInfo.cpp - AMDGPU Base encoding information --------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 
10 #include "AMDGPUBaseInfo.h"
12 #include "AMDGPU.h"
13 #include "SIDefines.h"
14 #include "llvm/ADT/StringRef.h"
15 #include "llvm/ADT/Triple.h"
16 #include "llvm/BinaryFormat/ELF.h"
18 #include "llvm/IR/Attributes.h"
19 #include "llvm/IR/Constants.h"
20 #include "llvm/IR/Function.h"
21 #include "llvm/IR/GlobalValue.h"
22 #include "llvm/IR/Instruction.h"
23 #include "llvm/IR/LLVMContext.h"
24 #include "llvm/IR/Module.h"
25 #include "llvm/MC/MCContext.h"
26 #include "llvm/MC/MCInstrDesc.h"
27 #include "llvm/MC/MCInstrInfo.h"
28 #include "llvm/MC/MCRegisterInfo.h"
29 #include "llvm/MC/MCSectionELF.h"
32 #include "llvm/Support/Casting.h"
35 #include <algorithm>
36 #include <cassert>
37 #include <cstdint>
38 #include <cstring>
39 #include <utility>
40 
42 
43 #define GET_INSTRINFO_NAMED_OPS
44 #define GET_INSTRMAP_INFO
45 #include "AMDGPUGenInstrInfo.inc"
46 #undef GET_INSTRMAP_INFO
47 #undef GET_INSTRINFO_NAMED_OPS
48 
49 namespace {
50 
51 /// \returns Bit mask for given bit \p Shift and bit \p Width.
52 unsigned getBitMask(unsigned Shift, unsigned Width) {
53  return ((1 << Width) - 1) << Shift;
54 }
55 
56 /// Packs \p Src into \p Dst for given bit \p Shift and bit \p Width.
57 ///
58 /// \returns Packed \p Dst.
59 unsigned packBits(unsigned Src, unsigned Dst, unsigned Shift, unsigned Width) {
60  Dst &= ~(1 << Shift) & ~getBitMask(Shift, Width);
61  Dst |= (Src << Shift) & getBitMask(Shift, Width);
62  return Dst;
63 }
64 
65 /// Unpacks bits from \p Src for given bit \p Shift and bit \p Width.
66 ///
67 /// \returns Unpacked bits.
68 unsigned unpackBits(unsigned Src, unsigned Shift, unsigned Width) {
69  return (Src & getBitMask(Shift, Width)) >> Shift;
70 }
71 
72 /// \returns Vmcnt bit shift (lower bits).
73 unsigned getVmcntBitShiftLo() { return 0; }
74 
75 /// \returns Vmcnt bit width (lower bits).
76 unsigned getVmcntBitWidthLo() { return 4; }
77 
78 /// \returns Expcnt bit shift.
79 unsigned getExpcntBitShift() { return 4; }
80 
81 /// \returns Expcnt bit width.
82 unsigned getExpcntBitWidth() { return 3; }
83 
84 /// \returns Lgkmcnt bit shift.
85 unsigned getLgkmcntBitShift() { return 8; }
86 
87 /// \returns Lgkmcnt bit width.
88 unsigned getLgkmcntBitWidth() { return 4; }
89 
90 /// \returns Vmcnt bit shift (higher bits).
91 unsigned getVmcntBitShiftHi() { return 14; }
92 
93 /// \returns Vmcnt bit width (higher bits).
94 unsigned getVmcntBitWidthHi() { return 2; }
95 
96 } // end namespace anonymous
97 
98 namespace llvm {
99 
100 namespace AMDGPU {
101 
102 struct MIMGInfo {
103  uint16_t Opcode;
104  uint16_t BaseOpcode;
105  uint8_t MIMGEncoding;
106  uint8_t VDataDwords;
107  uint8_t VAddrDwords;
108 };
109 
110 #define GET_MIMGBaseOpcodesTable_IMPL
111 #define GET_MIMGDimInfoTable_IMPL
112 #define GET_MIMGInfoTable_IMPL
113 #include "AMDGPUGenSearchableTables.inc"
114 
115 int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding,
116  unsigned VDataDwords, unsigned VAddrDwords) {
117  const MIMGInfo *Info = getMIMGOpcodeHelper(BaseOpcode, MIMGEncoding,
118  VDataDwords, VAddrDwords);
119  return Info ? Info->Opcode : -1;
120 }
121 
122 int getMaskedMIMGOp(unsigned Opc, unsigned NewChannels) {
123  const MIMGInfo *OrigInfo = getMIMGInfo(Opc);
124  const MIMGInfo *NewInfo =
125  getMIMGOpcodeHelper(OrigInfo->BaseOpcode, OrigInfo->MIMGEncoding,
126  NewChannels, OrigInfo->VAddrDwords);
127  return NewInfo ? NewInfo->Opcode : -1;
128 }
129 
130 // Wrapper for Tablegen'd function. enum Subtarget is not defined in any
131 // header files, so we need to wrap it in a function that takes unsigned
132 // instead.
133 int getMCOpcode(uint16_t Opcode, unsigned Gen) {
134  return getMCOpcodeGen(Opcode, static_cast<Subtarget>(Gen));
135 }
136 
137 namespace IsaInfo {
138 
140  // GCN GFX6 (Southern Islands (SI)).
141  if (Features.test(FeatureISAVersion6_0_0))
142  return {6, 0, 0};
143  if (Features.test(FeatureISAVersion6_0_1))
144  return {6, 0, 1};
145 
146  // GCN GFX7 (Sea Islands (CI)).
147  if (Features.test(FeatureISAVersion7_0_0))
148  return {7, 0, 0};
149  if (Features.test(FeatureISAVersion7_0_1))
150  return {7, 0, 1};
151  if (Features.test(FeatureISAVersion7_0_2))
152  return {7, 0, 2};
153  if (Features.test(FeatureISAVersion7_0_3))
154  return {7, 0, 3};
155  if (Features.test(FeatureISAVersion7_0_4))
156  return {7, 0, 4};
157  if (Features.test(FeatureSeaIslands))
158  return {7, 0, 0};
159 
160  // GCN GFX8 (Volcanic Islands (VI)).
161  if (Features.test(FeatureISAVersion8_0_1))
162  return {8, 0, 1};
163  if (Features.test(FeatureISAVersion8_0_2))
164  return {8, 0, 2};
165  if (Features.test(FeatureISAVersion8_0_3))
166  return {8, 0, 3};
167  if (Features.test(FeatureISAVersion8_1_0))
168  return {8, 1, 0};
169  if (Features.test(FeatureVolcanicIslands))
170  return {8, 0, 0};
171 
172  // GCN GFX9.
173  if (Features.test(FeatureISAVersion9_0_0))
174  return {9, 0, 0};
175  if (Features.test(FeatureISAVersion9_0_2))
176  return {9, 0, 2};
177  if (Features.test(FeatureISAVersion9_0_4))
178  return {9, 0, 4};
179  if (Features.test(FeatureISAVersion9_0_6))
180  return {9, 0, 6};
181  if (Features.test(FeatureGFX9))
182  return {9, 0, 0};
183 
184  if (!Features.test(FeatureGCN) || Features.test(FeatureSouthernIslands))
185  return {0, 0, 0};
186  return {7, 0, 0};
187 }
188 
189 void streamIsaVersion(const MCSubtargetInfo *STI, raw_ostream &Stream) {
190  auto TargetTriple = STI->getTargetTriple();
191  auto ISAVersion = IsaInfo::getIsaVersion(STI->getFeatureBits());
192 
193  Stream << TargetTriple.getArchName() << '-'
194  << TargetTriple.getVendorName() << '-'
195  << TargetTriple.getOSName() << '-'
196  << TargetTriple.getEnvironmentName() << '-'
197  << "gfx"
198  << ISAVersion.Major
199  << ISAVersion.Minor
200  << ISAVersion.Stepping;
201 
202  if (hasXNACK(*STI))
203  Stream << "+xnack";
204 
205  Stream.flush();
206 }
207 
209  return STI->getFeatureBits().test(FeatureCodeObjectV3);
210 }
211 
213  if (Features.test(FeatureWavefrontSize16))
214  return 16;
215  if (Features.test(FeatureWavefrontSize32))
216  return 32;
217 
218  return 64;
219 }
220 
222  if (Features.test(FeatureLocalMemorySize32768))
223  return 32768;
224  if (Features.test(FeatureLocalMemorySize65536))
225  return 65536;
226 
227  return 0;
228 }
229 
231  return 4;
232 }
233 
235  unsigned FlatWorkGroupSize) {
236  if (!Features.test(FeatureGCN))
237  return 8;
238  unsigned N = getWavesPerWorkGroup(Features, FlatWorkGroupSize);
239  if (N == 1)
240  return 40;
241  N = 40 / N;
242  return std::min(N, 16u);
243 }
244 
246  return getMaxWavesPerEU(Features) * getEUsPerCU(Features);
247 }
248 
250  unsigned FlatWorkGroupSize) {
251  return getWavesPerWorkGroup(Features, FlatWorkGroupSize);
252 }
253 
255  return 1;
256 }
257 
259  if (!Features.test(FeatureGCN))
260  return 8;
261  // FIXME: Need to take scratch memory into account.
262  return 10;
263 }
264 
266  unsigned FlatWorkGroupSize) {
267  return alignTo(getMaxWavesPerCU(Features, FlatWorkGroupSize),
268  getEUsPerCU(Features)) / getEUsPerCU(Features);
269 }
270 
272  return 1;
273 }
274 
276  return 2048;
277 }
278 
280  unsigned FlatWorkGroupSize) {
281  return alignTo(FlatWorkGroupSize, getWavefrontSize(Features)) /
282  getWavefrontSize(Features);
283 }
284 
286  IsaVersion Version = getIsaVersion(Features);
287  if (Version.Major >= 8)
288  return 16;
289  return 8;
290 }
291 
293  return 8;
294 }
295 
297  IsaVersion Version = getIsaVersion(Features);
298  if (Version.Major >= 8)
299  return 800;
300  return 512;
301 }
302 
304  if (Features.test(FeatureSGPRInitBug))
306 
307  IsaVersion Version = getIsaVersion(Features);
308  if (Version.Major >= 8)
309  return 102;
310  return 104;
311 }
312 
313 unsigned getMinNumSGPRs(const FeatureBitset &Features, unsigned WavesPerEU) {
314  assert(WavesPerEU != 0);
315 
316  if (WavesPerEU >= getMaxWavesPerEU(Features))
317  return 0;
318 
319  unsigned MinNumSGPRs = getTotalNumSGPRs(Features) / (WavesPerEU + 1);
320  if (Features.test(FeatureTrapHandler))
321  MinNumSGPRs -= std::min(MinNumSGPRs, (unsigned)TRAP_NUM_SGPRS);
322  MinNumSGPRs = alignDown(MinNumSGPRs, getSGPRAllocGranule(Features)) + 1;
323  return std::min(MinNumSGPRs, getAddressableNumSGPRs(Features));
324 }
325 
326 unsigned getMaxNumSGPRs(const FeatureBitset &Features, unsigned WavesPerEU,
327  bool Addressable) {
328  assert(WavesPerEU != 0);
329 
330  IsaVersion Version = getIsaVersion(Features);
331  unsigned AddressableNumSGPRs = getAddressableNumSGPRs(Features);
332  if (Version.Major >= 8 && !Addressable)
333  AddressableNumSGPRs = 112;
334  unsigned MaxNumSGPRs = getTotalNumSGPRs(Features) / WavesPerEU;
335  if (Features.test(FeatureTrapHandler))
336  MaxNumSGPRs -= std::min(MaxNumSGPRs, (unsigned)TRAP_NUM_SGPRS);
337  MaxNumSGPRs = alignDown(MaxNumSGPRs, getSGPRAllocGranule(Features));
338  return std::min(MaxNumSGPRs, AddressableNumSGPRs);
339 }
340 
341 unsigned getNumExtraSGPRs(const FeatureBitset &Features, bool VCCUsed,
342  bool FlatScrUsed, bool XNACKUsed) {
343  unsigned ExtraSGPRs = 0;
344  if (VCCUsed)
345  ExtraSGPRs = 2;
346 
347  IsaVersion Version = getIsaVersion(Features);
348  if (Version.Major < 8) {
349  if (FlatScrUsed)
350  ExtraSGPRs = 4;
351  } else {
352  if (XNACKUsed)
353  ExtraSGPRs = 4;
354 
355  if (FlatScrUsed)
356  ExtraSGPRs = 6;
357  }
358 
359  return ExtraSGPRs;
360 }
361 
362 unsigned getNumExtraSGPRs(const FeatureBitset &Features, bool VCCUsed,
363  bool FlatScrUsed) {
364  return getNumExtraSGPRs(Features, VCCUsed, FlatScrUsed,
365  Features[AMDGPU::FeatureXNACK]);
366 }
367 
368 unsigned getNumSGPRBlocks(const FeatureBitset &Features, unsigned NumSGPRs) {
369  NumSGPRs = alignTo(std::max(1u, NumSGPRs), getSGPREncodingGranule(Features));
370  // SGPRBlocks is actual number of SGPR blocks minus 1.
371  return NumSGPRs / getSGPREncodingGranule(Features) - 1;
372 }
373 
375  return 4;
376 }
377 
379  return getVGPRAllocGranule(Features);
380 }
381 
383  return 256;
384 }
385 
387  return getTotalNumVGPRs(Features);
388 }
389 
390 unsigned getMinNumVGPRs(const FeatureBitset &Features, unsigned WavesPerEU) {
391  assert(WavesPerEU != 0);
392 
393  if (WavesPerEU >= getMaxWavesPerEU(Features))
394  return 0;
395  unsigned MinNumVGPRs =
396  alignDown(getTotalNumVGPRs(Features) / (WavesPerEU + 1),
397  getVGPRAllocGranule(Features)) + 1;
398  return std::min(MinNumVGPRs, getAddressableNumVGPRs(Features));
399 }
400 
401 unsigned getMaxNumVGPRs(const FeatureBitset &Features, unsigned WavesPerEU) {
402  assert(WavesPerEU != 0);
403 
404  unsigned MaxNumVGPRs = alignDown(getTotalNumVGPRs(Features) / WavesPerEU,
405  getVGPRAllocGranule(Features));
406  unsigned AddressableNumVGPRs = getAddressableNumVGPRs(Features);
407  return std::min(MaxNumVGPRs, AddressableNumVGPRs);
408 }
409 
410 unsigned getNumVGPRBlocks(const FeatureBitset &Features, unsigned NumVGPRs) {
411  NumVGPRs = alignTo(std::max(1u, NumVGPRs), getVGPREncodingGranule(Features));
412  // VGPRBlocks is actual number of VGPR blocks minus 1.
413  return NumVGPRs / getVGPREncodingGranule(Features) - 1;
414 }
415 
416 } // end namespace IsaInfo
417 
419  const FeatureBitset &Features) {
421 
422  memset(&Header, 0, sizeof(Header));
423 
426  Header.amd_machine_kind = 1; // AMD_MACHINE_KIND_AMDGPU
427  Header.amd_machine_version_major = ISA.Major;
428  Header.amd_machine_version_minor = ISA.Minor;
430  Header.kernel_code_entry_byte_offset = sizeof(Header);
431  // wavefront_size is specified as a power of 2: 2^6 = 64 threads.
432  Header.wavefront_size = 6;
433 
434  // If the code object does not support indirect functions, then the value must
435  // be 0xffffffff.
436  Header.call_convention = -1;
437 
438  // These alignment values are specified in powers of two, so alignment =
439  // 2^n. The minimum alignment is 2^4 = 16.
440  Header.kernarg_segment_alignment = 4;
441  Header.group_segment_alignment = 4;
442  Header.private_segment_alignment = 4;
443 }
444 
447  memset(&KD, 0, sizeof(KD));
449  amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64,
452  amdhsa::COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, 1);
454  amdhsa::COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE, 1);
456  amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, 1);
457  return KD;
458 }
459 
460 bool isGroupSegment(const GlobalValue *GV) {
462 }
463 
464 bool isGlobalSegment(const GlobalValue *GV) {
466 }
467 
471 }
472 
474  return TT.getOS() != Triple::AMDHSA;
475 }
476 
477 int getIntegerAttribute(const Function &F, StringRef Name, int Default) {
478  Attribute A = F.getFnAttribute(Name);
479  int Result = Default;
480 
481  if (A.isStringAttribute()) {
482  StringRef Str = A.getValueAsString();
483  if (Str.getAsInteger(0, Result)) {
484  LLVMContext &Ctx = F.getContext();
485  Ctx.emitError("can't parse integer attribute " + Name);
486  }
487  }
488 
489  return Result;
490 }
491 
492 std::pair<int, int> getIntegerPairAttribute(const Function &F,
493  StringRef Name,
494  std::pair<int, int> Default,
495  bool OnlyFirstRequired) {
496  Attribute A = F.getFnAttribute(Name);
497  if (!A.isStringAttribute())
498  return Default;
499 
500  LLVMContext &Ctx = F.getContext();
501  std::pair<int, int> Ints = Default;
502  std::pair<StringRef, StringRef> Strs = A.getValueAsString().split(',');
503  if (Strs.first.trim().getAsInteger(0, Ints.first)) {
504  Ctx.emitError("can't parse first integer attribute " + Name);
505  return Default;
506  }
507  if (Strs.second.trim().getAsInteger(0, Ints.second)) {
508  if (!OnlyFirstRequired || !Strs.second.trim().empty()) {
509  Ctx.emitError("can't parse second integer attribute " + Name);
510  return Default;
511  }
512  }
513 
514  return Ints;
515 }
516 
518  unsigned VmcntLo = (1 << getVmcntBitWidthLo()) - 1;
519  if (Version.Major < 9)
520  return VmcntLo;
521 
522  unsigned VmcntHi = ((1 << getVmcntBitWidthHi()) - 1) << getVmcntBitWidthLo();
523  return VmcntLo | VmcntHi;
524 }
525 
527  return (1 << getExpcntBitWidth()) - 1;
528 }
529 
531  return (1 << getLgkmcntBitWidth()) - 1;
532 }
533 
535  unsigned VmcntLo = getBitMask(getVmcntBitShiftLo(), getVmcntBitWidthLo());
536  unsigned Expcnt = getBitMask(getExpcntBitShift(), getExpcntBitWidth());
537  unsigned Lgkmcnt = getBitMask(getLgkmcntBitShift(), getLgkmcntBitWidth());
538  unsigned Waitcnt = VmcntLo | Expcnt | Lgkmcnt;
539  if (Version.Major < 9)
540  return Waitcnt;
541 
542  unsigned VmcntHi = getBitMask(getVmcntBitShiftHi(), getVmcntBitWidthHi());
543  return Waitcnt | VmcntHi;
544 }
545 
546 unsigned decodeVmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt) {
547  unsigned VmcntLo =
548  unpackBits(Waitcnt, getVmcntBitShiftLo(), getVmcntBitWidthLo());
549  if (Version.Major < 9)
550  return VmcntLo;
551 
552  unsigned VmcntHi =
553  unpackBits(Waitcnt, getVmcntBitShiftHi(), getVmcntBitWidthHi());
554  VmcntHi <<= getVmcntBitWidthLo();
555  return VmcntLo | VmcntHi;
556 }
557 
558 unsigned decodeExpcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt) {
559  return unpackBits(Waitcnt, getExpcntBitShift(), getExpcntBitWidth());
560 }
561 
562 unsigned decodeLgkmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt) {
563  return unpackBits(Waitcnt, getLgkmcntBitShift(), getLgkmcntBitWidth());
564 }
565 
566 void decodeWaitcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt,
567  unsigned &Vmcnt, unsigned &Expcnt, unsigned &Lgkmcnt) {
568  Vmcnt = decodeVmcnt(Version, Waitcnt);
569  Expcnt = decodeExpcnt(Version, Waitcnt);
570  Lgkmcnt = decodeLgkmcnt(Version, Waitcnt);
571 }
572 
573 unsigned encodeVmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt,
574  unsigned Vmcnt) {
575  Waitcnt =
576  packBits(Vmcnt, Waitcnt, getVmcntBitShiftLo(), getVmcntBitWidthLo());
577  if (Version.Major < 9)
578  return Waitcnt;
579 
580  Vmcnt >>= getVmcntBitWidthLo();
581  return packBits(Vmcnt, Waitcnt, getVmcntBitShiftHi(), getVmcntBitWidthHi());
582 }
583 
584 unsigned encodeExpcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt,
585  unsigned Expcnt) {
586  return packBits(Expcnt, Waitcnt, getExpcntBitShift(), getExpcntBitWidth());
587 }
588 
589 unsigned encodeLgkmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt,
590  unsigned Lgkmcnt) {
591  return packBits(Lgkmcnt, Waitcnt, getLgkmcntBitShift(), getLgkmcntBitWidth());
592 }
593 
595  unsigned Vmcnt, unsigned Expcnt, unsigned Lgkmcnt) {
596  unsigned Waitcnt = getWaitcntBitMask(Version);
597  Waitcnt = encodeVmcnt(Version, Waitcnt, Vmcnt);
598  Waitcnt = encodeExpcnt(Version, Waitcnt, Expcnt);
599  Waitcnt = encodeLgkmcnt(Version, Waitcnt, Lgkmcnt);
600  return Waitcnt;
601 }
602 
603 unsigned getInitialPSInputAddr(const Function &F) {
604  return getIntegerAttribute(F, "InitialPSInputAddr", 0);
605 }
606 
608  switch(cc) {
616  return true;
617  default:
618  return false;
619  }
620 }
621 
623  return !isShader(cc) || cc == CallingConv::AMDGPU_CS;
624 }
625 
627  switch (CC) {
637  return true;
638  default:
639  return false;
640  }
641 }
642 
643 bool hasXNACK(const MCSubtargetInfo &STI) {
644  return STI.getFeatureBits()[AMDGPU::FeatureXNACK];
645 }
646 
647 bool hasMIMG_R128(const MCSubtargetInfo &STI) {
648  return STI.getFeatureBits()[AMDGPU::FeatureMIMG_R128];
649 }
650 
651 bool hasPackedD16(const MCSubtargetInfo &STI) {
652  return !STI.getFeatureBits()[AMDGPU::FeatureUnpackedD16VMem];
653 }
654 
655 bool isSI(const MCSubtargetInfo &STI) {
656  return STI.getFeatureBits()[AMDGPU::FeatureSouthernIslands];
657 }
658 
659 bool isCI(const MCSubtargetInfo &STI) {
660  return STI.getFeatureBits()[AMDGPU::FeatureSeaIslands];
661 }
662 
663 bool isVI(const MCSubtargetInfo &STI) {
664  return STI.getFeatureBits()[AMDGPU::FeatureVolcanicIslands];
665 }
666 
667 bool isGFX9(const MCSubtargetInfo &STI) {
668  return STI.getFeatureBits()[AMDGPU::FeatureGFX9];
669 }
670 
671 bool isGCN3Encoding(const MCSubtargetInfo &STI) {
672  return STI.getFeatureBits()[AMDGPU::FeatureGCN3Encoding];
673 }
674 
675 bool isSGPR(unsigned Reg, const MCRegisterInfo* TRI) {
676  const MCRegisterClass SGPRClass = TRI->getRegClass(AMDGPU::SReg_32RegClassID);
677  const unsigned FirstSubReg = TRI->getSubReg(Reg, 1);
678  return SGPRClass.contains(FirstSubReg != 0 ? FirstSubReg : Reg) ||
679  Reg == AMDGPU::SCC;
680 }
681 
682 bool isRegIntersect(unsigned Reg0, unsigned Reg1, const MCRegisterInfo* TRI) {
683  for (MCRegAliasIterator R(Reg0, TRI, true); R.isValid(); ++R) {
684  if (*R == Reg1) return true;
685  }
686  return false;
687 }
688 
689 #define MAP_REG2REG \
690  using namespace AMDGPU; \
691  switch(Reg) { \
692  default: return Reg; \
693  CASE_CI_VI(FLAT_SCR) \
694  CASE_CI_VI(FLAT_SCR_LO) \
695  CASE_CI_VI(FLAT_SCR_HI) \
696  CASE_VI_GFX9(TTMP0) \
697  CASE_VI_GFX9(TTMP1) \
698  CASE_VI_GFX9(TTMP2) \
699  CASE_VI_GFX9(TTMP3) \
700  CASE_VI_GFX9(TTMP4) \
701  CASE_VI_GFX9(TTMP5) \
702  CASE_VI_GFX9(TTMP6) \
703  CASE_VI_GFX9(TTMP7) \
704  CASE_VI_GFX9(TTMP8) \
705  CASE_VI_GFX9(TTMP9) \
706  CASE_VI_GFX9(TTMP10) \
707  CASE_VI_GFX9(TTMP11) \
708  CASE_VI_GFX9(TTMP12) \
709  CASE_VI_GFX9(TTMP13) \
710  CASE_VI_GFX9(TTMP14) \
711  CASE_VI_GFX9(TTMP15) \
712  CASE_VI_GFX9(TTMP0_TTMP1) \
713  CASE_VI_GFX9(TTMP2_TTMP3) \
714  CASE_VI_GFX9(TTMP4_TTMP5) \
715  CASE_VI_GFX9(TTMP6_TTMP7) \
716  CASE_VI_GFX9(TTMP8_TTMP9) \
717  CASE_VI_GFX9(TTMP10_TTMP11) \
718  CASE_VI_GFX9(TTMP12_TTMP13) \
719  CASE_VI_GFX9(TTMP14_TTMP15) \
720  CASE_VI_GFX9(TTMP0_TTMP1_TTMP2_TTMP3) \
721  CASE_VI_GFX9(TTMP4_TTMP5_TTMP6_TTMP7) \
722  CASE_VI_GFX9(TTMP8_TTMP9_TTMP10_TTMP11) \
723  CASE_VI_GFX9(TTMP12_TTMP13_TTMP14_TTMP15) \
724  CASE_VI_GFX9(TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7) \
725  CASE_VI_GFX9(TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11) \
726  CASE_VI_GFX9(TTMP8_TTMP9_TTMP10_TTMP11_TTMP12_TTMP13_TTMP14_TTMP15) \
727  CASE_VI_GFX9(TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11_TTMP12_TTMP13_TTMP14_TTMP15) \
728  }
729 
730 #define CASE_CI_VI(node) \
731  assert(!isSI(STI)); \
732  case node: return isCI(STI) ? node##_ci : node##_vi;
733 
734 #define CASE_VI_GFX9(node) \
735  case node: return isGFX9(STI) ? node##_gfx9 : node##_vi;
736 
737 unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI) {
739 }
740 
741 #undef CASE_CI_VI
742 #undef CASE_VI_GFX9
743 
744 #define CASE_CI_VI(node) case node##_ci: case node##_vi: return node;
745 #define CASE_VI_GFX9(node) case node##_vi: case node##_gfx9: return node;
746 
747 unsigned mc2PseudoReg(unsigned Reg) {
749 }
750 
751 #undef CASE_CI_VI
752 #undef CASE_VI_GFX9
753 #undef MAP_REG2REG
754 
755 bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo) {
756  assert(OpNo < Desc.NumOperands);
757  unsigned OpType = Desc.OpInfo[OpNo].OperandType;
758  return OpType >= AMDGPU::OPERAND_SRC_FIRST &&
759  OpType <= AMDGPU::OPERAND_SRC_LAST;
760 }
761 
762 bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo) {
763  assert(OpNo < Desc.NumOperands);
764  unsigned OpType = Desc.OpInfo[OpNo].OperandType;
765  switch (OpType) {
773  return true;
774  default:
775  return false;
776  }
777 }
778 
779 bool isSISrcInlinableOperand(const MCInstrDesc &Desc, unsigned OpNo) {
780  assert(OpNo < Desc.NumOperands);
781  unsigned OpType = Desc.OpInfo[OpNo].OperandType;
782  return OpType >= AMDGPU::OPERAND_REG_INLINE_C_FIRST &&
784 }
785 
786 // Avoid using MCRegisterClass::getSize, since that function will go away
787 // (move from MC* level to Target* level). Return size in bits.
788 unsigned getRegBitWidth(unsigned RCID) {
789  switch (RCID) {
790  case AMDGPU::SGPR_32RegClassID:
791  case AMDGPU::VGPR_32RegClassID:
792  case AMDGPU::VS_32RegClassID:
793  case AMDGPU::SReg_32RegClassID:
794  case AMDGPU::SReg_32_XM0RegClassID:
795  return 32;
796  case AMDGPU::SGPR_64RegClassID:
797  case AMDGPU::VS_64RegClassID:
798  case AMDGPU::SReg_64RegClassID:
799  case AMDGPU::VReg_64RegClassID:
800  return 64;
801  case AMDGPU::VReg_96RegClassID:
802  return 96;
803  case AMDGPU::SGPR_128RegClassID:
804  case AMDGPU::SReg_128RegClassID:
805  case AMDGPU::VReg_128RegClassID:
806  return 128;
807  case AMDGPU::SReg_256RegClassID:
808  case AMDGPU::VReg_256RegClassID:
809  return 256;
810  case AMDGPU::SReg_512RegClassID:
811  case AMDGPU::VReg_512RegClassID:
812  return 512;
813  default:
814  llvm_unreachable("Unexpected register class");
815  }
816 }
817 
818 unsigned getRegBitWidth(const MCRegisterClass &RC) {
819  return getRegBitWidth(RC.getID());
820 }
821 
822 unsigned getRegOperandSize(const MCRegisterInfo *MRI, const MCInstrDesc &Desc,
823  unsigned OpNo) {
824  assert(OpNo < Desc.NumOperands);
825  unsigned RCID = Desc.OpInfo[OpNo].RegClass;
826  return getRegBitWidth(MRI->getRegClass(RCID)) / 8;
827 }
828 
829 bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi) {
830  if (Literal >= -16 && Literal <= 64)
831  return true;
832 
833  uint64_t Val = static_cast<uint64_t>(Literal);
834  return (Val == DoubleToBits(0.0)) ||
835  (Val == DoubleToBits(1.0)) ||
836  (Val == DoubleToBits(-1.0)) ||
837  (Val == DoubleToBits(0.5)) ||
838  (Val == DoubleToBits(-0.5)) ||
839  (Val == DoubleToBits(2.0)) ||
840  (Val == DoubleToBits(-2.0)) ||
841  (Val == DoubleToBits(4.0)) ||
842  (Val == DoubleToBits(-4.0)) ||
843  (Val == 0x3fc45f306dc9c882 && HasInv2Pi);
844 }
845 
846 bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi) {
847  if (Literal >= -16 && Literal <= 64)
848  return true;
849 
850  // The actual type of the operand does not seem to matter as long
851  // as the bits match one of the inline immediate values. For example:
852  //
853  // -nan has the hexadecimal encoding of 0xfffffffe which is -2 in decimal,
854  // so it is a legal inline immediate.
855  //
856  // 1065353216 has the hexadecimal encoding 0x3f800000 which is 1.0f in
857  // floating-point, so it is a legal inline immediate.
858 
859  uint32_t Val = static_cast<uint32_t>(Literal);
860  return (Val == FloatToBits(0.0f)) ||
861  (Val == FloatToBits(1.0f)) ||
862  (Val == FloatToBits(-1.0f)) ||
863  (Val == FloatToBits(0.5f)) ||
864  (Val == FloatToBits(-0.5f)) ||
865  (Val == FloatToBits(2.0f)) ||
866  (Val == FloatToBits(-2.0f)) ||
867  (Val == FloatToBits(4.0f)) ||
868  (Val == FloatToBits(-4.0f)) ||
869  (Val == 0x3e22f983 && HasInv2Pi);
870 }
871 
872 bool isInlinableLiteral16(int16_t Literal, bool HasInv2Pi) {
873  if (!HasInv2Pi)
874  return false;
875 
876  if (Literal >= -16 && Literal <= 64)
877  return true;
878 
879  uint16_t Val = static_cast<uint16_t>(Literal);
880  return Val == 0x3C00 || // 1.0
881  Val == 0xBC00 || // -1.0
882  Val == 0x3800 || // 0.5
883  Val == 0xB800 || // -0.5
884  Val == 0x4000 || // 2.0
885  Val == 0xC000 || // -2.0
886  Val == 0x4400 || // 4.0
887  Val == 0xC400 || // -4.0
888  Val == 0x3118; // 1/2pi
889 }
890 
891 bool isInlinableLiteralV216(int32_t Literal, bool HasInv2Pi) {
892  assert(HasInv2Pi);
893 
894  int16_t Lo16 = static_cast<int16_t>(Literal);
895  int16_t Hi16 = static_cast<int16_t>(Literal >> 16);
896  return Lo16 == Hi16 && isInlinableLiteral16(Lo16, HasInv2Pi);
897 }
898 
899 bool isArgPassedInSGPR(const Argument *A) {
900  const Function *F = A->getParent();
901 
902  // Arguments to compute shaders are never a source of divergence.
904  switch (CC) {
907  return true;
915  // For non-compute shaders, SGPR inputs are marked with either inreg or byval.
916  // Everything else is in VGPRs.
917  return F->getAttributes().hasParamAttribute(A->getArgNo(), Attribute::InReg) ||
918  F->getAttributes().hasParamAttribute(A->getArgNo(), Attribute::ByVal);
919  default:
920  // TODO: Should calls support inreg for SGPR inputs?
921  return false;
922  }
923 }
924 
925 int64_t getSMRDEncodedOffset(const MCSubtargetInfo &ST, int64_t ByteOffset) {
926  if (isGCN3Encoding(ST))
927  return ByteOffset;
928  return ByteOffset >> 2;
929 }
930 
931 bool isLegalSMRDImmOffset(const MCSubtargetInfo &ST, int64_t ByteOffset) {
932  int64_t EncodedOffset = getSMRDEncodedOffset(ST, ByteOffset);
933  return isGCN3Encoding(ST) ?
934  isUInt<20>(EncodedOffset) : isUInt<8>(EncodedOffset);
935 }
936 
937 } // end namespace AMDGPU
938 
939 } // end namespace llvm
940 
941 namespace llvm {
942 namespace AMDGPU {
943 
945  AMDGPUAS AS;
946  AS.FLAT_ADDRESS = 0;
947  AS.PRIVATE_ADDRESS = 5;
948  AS.REGION_ADDRESS = 2;
949  return AS;
950 }
951 
953  return getAMDGPUAS(M.getTargetTriple());
954 }
955 
957  return getAMDGPUAS(Triple(M.getTargetTriple()));
958 }
959 
960 namespace {
961 
962 struct SourceOfDivergence {
963  unsigned Intr;
964 };
965 const SourceOfDivergence *lookupSourceOfDivergence(unsigned Intr);
966 
967 #define GET_SourcesOfDivergence_IMPL
968 #include "AMDGPUGenSearchableTables.inc"
969 
970 } // end anonymous namespace
971 
972 bool isIntrinsicSourceOfDivergence(unsigned IntrID) {
973  return lookupSourceOfDivergence(IntrID);
974 }
975 } // namespace AMDGPU
976 } // namespace llvm
unsigned getMinNumVGPRs(const FeatureBitset &Features, unsigned WavesPerEU)
const std::string & getTargetTriple() const
Get the target triple which is a string describing the target host.
Definition: Module.h:238
unsigned mc2PseudoReg(unsigned Reg)
Convert hardware register Reg to a pseudo register.
This file a TargetTransformInfo::Concept conforming object specific to the AMDGPU target machine...
bool hasPackedD16(const MCSubtargetInfo &STI)
unsigned getAddressableNumVGPRs(const FeatureBitset &Features)
GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2)
This class represents an incoming formal argument to a Function.
Definition: Argument.h:30
Compute iterated dominance frontiers using a linear time algorithm.
Definition: AllocatorList.h:24
Calling convention used for Mesa/AMDPAL hull shaders (= tessellation control shaders).
Definition: CallingConv.h:208
A Module instance is used to store all the information related to an LLVM module. ...
Definition: Module.h:63
OSType getOS() const
getOS - Get the parsed operating system type of this triple.
Definition: Triple.h:292
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:161
Address space for global memory (RAT0, VTX0).
Definition: AMDGPU.h:230
unsigned getMaxWorkGroupsPerCU(const FeatureBitset &Features, unsigned FlatWorkGroupSize)
unsigned Reg
AMDGPUAS getAMDGPUAS(const Module &M)
unsigned getMaxWavesPerEU(const FeatureBitset &Features, unsigned FlatWorkGroupSize)
unsigned getRegBitWidth(unsigned RCID)
Get the size in bits of a register from the register class RC.
Calling convention used for AMDPAL shader stage before geometry shader if geometry is in use...
Definition: CallingConv.h:221
Calling convention for AMDGPU code object kernels.
Definition: CallingConv.h:201
unsigned const TargetRegisterInfo * TRI
F(f)
unsigned getTotalNumVGPRs(const FeatureBitset &Features)
unsigned getMinWavesPerEU(const FeatureBitset &Features)
uint64_t alignTo(uint64_t Value, uint64_t Align, uint64_t Skew=0)
Returns the next integer (mod 2**64) that is greater than or equal to Value and is a multiple of Alig...
Definition: MathExtras.h:683
bool isInlinableLiteralV216(int32_t Literal, bool HasInv2Pi)
unsigned getWavesPerWorkGroup(const FeatureBitset &Features, unsigned FlatWorkGroupSize)
std::pair< int, int > getIntegerPairAttribute(const Function &F, StringRef Name, std::pair< int, int > Default, bool OnlyFirstRequired)
const Triple & getTargetTriple() const
unsigned getVGPREncodingGranule(const FeatureBitset &Features)
bool isGlobalSegment(const GlobalValue *GV)
bool isInlinableLiteral16(int16_t Literal, bool HasInv2Pi)
amdhsa::kernel_descriptor_t getDefaultAmdhsaKernelDescriptor()
unsigned getID() const
getID() - Return the register class ID number.
uint32_t amd_kernel_code_version_major
unsigned getEUsPerCU(const FeatureBitset &Features)
uint64_t alignDown(uint64_t Value, uint64_t Align, uint64_t Skew=0)
Returns the largest uint64_t less than or equal to Value and is Skew mod Align.
Definition: MathExtras.h:716
Calling convention used for Mesa/AMDPAL compute shaders.
Definition: CallingConv.h:198
unsigned getWavefrontSize(const FeatureBitset &Features)
unsigned getVGPRAllocGranule(const FeatureBitset &Features)
bool isRegIntersect(unsigned Reg0, unsigned Reg1, const MCRegisterInfo *TRI)
Is there any intersection between registers.
bool isStringAttribute() const
Return true if the attribute is a string (target-dependent) attribute.
Definition: Attributes.cpp:170
OpenCL uses address spaces to differentiate between various memory regions on the hardware...
Definition: AMDGPU.h:220
bool contains(unsigned Reg) const
contains - Return true if the specified register is included in this register class.
const FeatureBitset & getFeatureBits() const
Calling convention used for Mesa vertex shaders, or AMDPAL last shader stage before rasterization (ve...
Definition: CallingConv.h:189
AMD Kernel Code Object (amd_kernel_code_t).
unsigned getRegOperandSize(const MCRegisterInfo *MRI, const MCInstrDesc &Desc, unsigned OpNo)
Get size of register operand.
uint16_t amd_machine_version_major
This file contains the simple types necessary to represent the attributes associated with functions a...
unsigned getMinFlatWorkGroupSize(const FeatureBitset &Features)
unsigned Intr
unsigned encodeLgkmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt, unsigned Lgkmcnt)
unsigned decodeLgkmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt)
int getMaskedMIMGOp(unsigned Opc, unsigned NewChannels)
unsigned encodeExpcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt, unsigned Expcnt)
unsigned getNumExtraSGPRs(const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed)
unsigned getMaxNumSGPRs(const FeatureBitset &Features, unsigned WavesPerEU, bool Addressable)
bool hasCodeObjectV3(const MCSubtargetInfo *STI)
unsigned getWaitcntBitMask(const IsaInfo::IsaVersion &Version)
int getMCOpcode(uint16_t Opcode, unsigned Gen)
void emitError(unsigned LocCookie, const Twine &ErrorStr)
emitError - Emit an error message to the currently installed error handler with optional location inf...
uint8_t OperandType
Information about the type of the operand.
Definition: MCInstrDesc.h:79
Address space for 32-bit constant memory.
Definition: AMDGPU.h:234
uint8_t kernarg_segment_alignment
The maximum byte alignment of variables used by the kernel in the specified memory segment...
uint8_t group_segment_alignment
bool isGroupSegment(const GlobalValue *GV)
bool isReadOnlySegment(const GlobalValue *GV)
AttributeList getAttributes() const
Return the attribute list for this Function.
Definition: Function.h:210
uint16_t amd_machine_version_minor
unsigned getMaxFlatWorkGroupSize(const FeatureBitset &Features)
uint32_t amd_kernel_code_version_minor
MCRegisterClass - Base class of TargetRegisterClass.
unsigned short NumOperands
Definition: MCInstrDesc.h:164
uint32_t FloatToBits(float Float)
This function takes a float and returns the bit equivalent 32-bit integer.
Definition: MathExtras.h:599
MCRegisterInfo base class - We assume that the target defines a static array of MCRegisterDesc object...
bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo)
Is this floating-point operand?
unsigned getNumSGPRBlocks(const FeatureBitset &Features, unsigned NumSGPRs)
int64_t getSMRDEncodedOffset(const MCSubtargetInfo &ST, int64_t ByteOffset)
bool isCompute(CallingConv::ID cc)
bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi)
Is this literal inlinable.
bool isSI(const MCSubtargetInfo &STI)
constexpr bool isUInt< 8 >(uint64_t x)
Definition: MathExtras.h:341
unsigned const MachineRegisterInfo * MRI
Container class for subtarget features.
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:69
void decodeWaitcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt, unsigned &Vmcnt, unsigned &Expcnt, unsigned &Lgkmcnt)
Decodes Vmcnt, Expcnt and Lgkmcnt from given Waitcnt for given isa Version, and writes decoded values...
unsigned getTotalNumSGPRs(const FeatureBitset &Features)
Calling convention used for Mesa/AMDPAL pixel shaders.
Definition: CallingConv.h:195
bool hasMIMG_R128(const MCSubtargetInfo &STI)
This file contains the declarations for the subclasses of Constant, which represent the different fla...
bool isEntryFunctionCC(CallingConv::ID CC)
unsigned getSubReg(unsigned Reg, unsigned Idx) const
Returns the physical register number of sub-register "Index" for physical register RegNo...
unsigned getMinNumSGPRs(const FeatureBitset &Features, unsigned WavesPerEU)
uint8_t private_segment_alignment
bool isIntrinsicSourceOfDivergence(unsigned IntrID)
MCRegAliasIterator enumerates all registers aliasing Reg.
Instruction set architecture version.
const Triple & getTargetTriple() const
unsigned getAddressSpace() const
Return the address space of the Pointer type.
Definition: DerivedTypes.h:495
Calling convention used for AMDPAL vertex shader if tessellation is in use.
Definition: CallingConv.h:216
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function. ...
Definition: Function.cpp:193
bool isLegalSMRDImmOffset(const MCSubtargetInfo &ST, int64_t ByteOffset)
unsigned REGION_ADDRESS
Address space for region memory.
Definition: AMDGPU.h:224
const AMDGPUAS & AS
bool isSISrcInlinableOperand(const MCInstrDesc &Desc, unsigned OpNo)
Does this opearnd support only inlinable literals?
#define AMDHSA_BITS_SET(DST, MSK, VAL)
void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header, const FeatureBitset &Features)
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char NumSGPRs[]
Key for Kernel::CodeProps::Metadata::mNumSGPRs.
Triple - Helper class for working with autoconf configuration names.
Definition: Triple.h:44
int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding, unsigned VDataDwords, unsigned VAddrDwords)
std::enable_if< std::numeric_limits< T >::is_signed, bool >::type getAsInteger(unsigned Radix, T &Result) const
Parse the current string as an integer of the specified radix.
Definition: StringRef.h:497
unsigned decodeVmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt)
unsigned getLgkmcntBitMask(const IsaInfo::IsaVersion &Version)
bool hasParamAttribute(unsigned ArgNo, Attribute::AttrKind Kind) const
Equivalent to hasAttribute(ArgNo + FirstArgIndex, Kind).
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:199
Module.h This file contains the declarations for the Module class.
LLVM_NODISCARD std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
Definition: StringRef.h:727
uint16_t amd_machine_version_stepping
unsigned getExpcntBitMask(const IsaInfo::IsaVersion &Version)
uint8_t wavefront_size
Wavefront size expressed as a power of two.
bool isArgPassedInSGPR(const Argument *A)
uint64_t DoubleToBits(double Double)
This function takes a double and returns the bit equivalent 64-bit integer.
Definition: MathExtras.h:589
unsigned encodeVmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt, unsigned Vmcnt)
SPIR_KERNEL - Calling convention for SPIR kernel functions.
Definition: CallingConv.h:137
unsigned getNumVGPRBlocks(const FeatureBitset &Features, unsigned NumVGPRs)
bool isShader(CallingConv::ID cc)
unsigned getArgNo() const
Return the index of this formal argument in its containing function.
Definition: Argument.h:48
constexpr char NumVGPRs[]
Key for Kernel::CodeProps::Metadata::mNumVGPRs.
bool isGCN3Encoding(const MCSubtargetInfo &STI)
#define MAP_REG2REG
bool isCI(const MCSubtargetInfo &STI)
unsigned getInitialPSInputAddr(const Function &F)
Calling convention used for Mesa/AMDPAL geometry shaders.
Definition: CallingConv.h:192
bool isGFX9(const MCSubtargetInfo &STI)
Provides AMDGPU specific target descriptions.
const Function * getParent() const
Definition: Argument.h:42
bool isVI(const MCSubtargetInfo &STI)
unsigned getMaxWavesPerCU(const FeatureBitset &Features, unsigned FlatWorkGroupSize)
StringRef getValueAsString() const
Return the attribute&#39;s value as a string.
Definition: Attributes.cpp:195
int16_t RegClass
This specifies the register class enumeration of the operand if the operand is a register.
Definition: MCInstrDesc.h:73
unsigned decodeExpcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt)
bool hasXNACK(const MCSubtargetInfo &STI)
unsigned encodeWaitcnt(const IsaInfo::IsaVersion &Version, unsigned Vmcnt, unsigned Expcnt, unsigned Lgkmcnt)
Encodes Vmcnt, Expcnt and Lgkmcnt into Waitcnt for given isa Version.
unsigned FLAT_ADDRESS
Address space for flat memory.
Definition: AMDGPU.h:223
int getIntegerAttribute(const Function &F, StringRef Name, int Default)
#define N
Generic base class for all target subtargets.
bool shouldEmitConstantsToTextSection(const Triple &TT)
unsigned getVmcntBitMask(const IsaInfo::IsaVersion &Version)
unsigned getAddressableNumSGPRs(const FeatureBitset &Features)
unsigned getSGPREncodingGranule(const FeatureBitset &Features)
int64_t kernel_code_entry_byte_offset
Byte offset (possibly negative) from start of amd_kernel_code_t object to kernel&#39;s entry point instru...
bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo)
Can this operand also contain immediate values?
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
const FeatureBitset Features
const MCOperandInfo * OpInfo
Definition: MCInstrDesc.h:172
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition: Function.h:317
unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI)
If Reg is a pseudo reg, return the correct hardware register given STI otherwise return Reg...
This class implements an extremely fast bulk output stream that can only output to a stream...
Definition: raw_ostream.h:46
uint16_t amd_machine_kind
unsigned getSGPRAllocGranule(const FeatureBitset &Features)
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:59
unsigned getLocalMemorySize(const FeatureBitset &Features)
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:49
bool isSGPR(unsigned Reg, const MCRegisterInfo *TRI)
Is Reg - scalar register.
Address space for local memory.
Definition: AMDGPU.h:232
unsigned getMaxNumVGPRs(const FeatureBitset &Features, unsigned WavesPerEU)
Address space for constant memory (VTX2)
Definition: AMDGPU.h:231
const uint64_t Version
Definition: InstrProf.h:895
IsaVersion getIsaVersion(const FeatureBitset &Features)
PointerType * getType() const
Global values are always pointers.
Definition: GlobalValue.h:273
unsigned PRIVATE_ADDRESS
Address space for private memory.
Definition: AMDGPU.h:222
void streamIsaVersion(const MCSubtargetInfo *STI, raw_ostream &Stream)
Streams isa version string for given subtarget STI into Stream.
const MCRegisterClass & getRegClass(unsigned i) const
Returns the register class associated with the enumeration value.