LLVM  6.0.0svn
AMDGPUBaseInfo.cpp
Go to the documentation of this file.
1 //===- AMDGPUBaseInfo.cpp - AMDGPU Base encoding information --------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 
10 #include "AMDGPUBaseInfo.h"
11 #include "AMDGPU.h"
12 #include "SIDefines.h"
13 #include "llvm/ADT/StringRef.h"
14 #include "llvm/ADT/Triple.h"
15 #include "llvm/BinaryFormat/ELF.h"
17 #include "llvm/IR/Attributes.h"
18 #include "llvm/IR/Constants.h"
19 #include "llvm/IR/Function.h"
20 #include "llvm/IR/GlobalValue.h"
21 #include "llvm/IR/Instruction.h"
22 #include "llvm/IR/LLVMContext.h"
23 #include "llvm/IR/Module.h"
24 #include "llvm/MC/MCContext.h"
25 #include "llvm/MC/MCInstrDesc.h"
26 #include "llvm/MC/MCRegisterInfo.h"
27 #include "llvm/MC/MCSectionELF.h"
30 #include "llvm/Support/Casting.h"
33 #include <algorithm>
34 #include <cassert>
35 #include <cstdint>
36 #include <cstring>
37 #include <utility>
38 
40 
41 #define GET_INSTRINFO_NAMED_OPS
42 #include "AMDGPUGenInstrInfo.inc"
43 #undef GET_INSTRINFO_NAMED_OPS
44 
45 namespace {
46 
47 /// \returns Bit mask for given bit \p Shift and bit \p Width.
48 unsigned getBitMask(unsigned Shift, unsigned Width) {
49  return ((1 << Width) - 1) << Shift;
50 }
51 
52 /// \brief Packs \p Src into \p Dst for given bit \p Shift and bit \p Width.
53 ///
54 /// \returns Packed \p Dst.
55 unsigned packBits(unsigned Src, unsigned Dst, unsigned Shift, unsigned Width) {
56  Dst &= ~(1 << Shift) & ~getBitMask(Shift, Width);
57  Dst |= (Src << Shift) & getBitMask(Shift, Width);
58  return Dst;
59 }
60 
61 /// \brief Unpacks bits from \p Src for given bit \p Shift and bit \p Width.
62 ///
63 /// \returns Unpacked bits.
64 unsigned unpackBits(unsigned Src, unsigned Shift, unsigned Width) {
65  return (Src & getBitMask(Shift, Width)) >> Shift;
66 }
67 
68 /// \returns Vmcnt bit shift (lower bits).
69 unsigned getVmcntBitShiftLo() { return 0; }
70 
71 /// \returns Vmcnt bit width (lower bits).
72 unsigned getVmcntBitWidthLo() { return 4; }
73 
74 /// \returns Expcnt bit shift.
75 unsigned getExpcntBitShift() { return 4; }
76 
77 /// \returns Expcnt bit width.
78 unsigned getExpcntBitWidth() { return 3; }
79 
80 /// \returns Lgkmcnt bit shift.
81 unsigned getLgkmcntBitShift() { return 8; }
82 
83 /// \returns Lgkmcnt bit width.
84 unsigned getLgkmcntBitWidth() { return 4; }
85 
86 /// \returns Vmcnt bit shift (higher bits).
87 unsigned getVmcntBitShiftHi() { return 14; }
88 
89 /// \returns Vmcnt bit width (higher bits).
90 unsigned getVmcntBitWidthHi() { return 2; }
91 
92 } // end namespace anonymous
93 
94 namespace llvm {
95 
96 static cl::opt<bool> EnablePackedInlinableLiterals(
97  "enable-packed-inlinable-literals",
98  cl::desc("Enable packed inlinable literals (v2f16, v2i16)"),
99  cl::init(false));
100 
101 namespace AMDGPU {
102 
103 namespace IsaInfo {
104 
106  // SI.
107  if (Features.test(FeatureISAVersion6_0_0))
108  return {6, 0, 0};
109  if (Features.test(FeatureISAVersion6_0_1))
110  return {6, 0, 1};
111  // CI.
112  if (Features.test(FeatureISAVersion7_0_0))
113  return {7, 0, 0};
114  if (Features.test(FeatureISAVersion7_0_1))
115  return {7, 0, 1};
116  if (Features.test(FeatureISAVersion7_0_2))
117  return {7, 0, 2};
118  if (Features.test(FeatureISAVersion7_0_3))
119  return {7, 0, 3};
120 
121  // VI.
122  if (Features.test(FeatureISAVersion8_0_0))
123  return {8, 0, 0};
124  if (Features.test(FeatureISAVersion8_0_1))
125  return {8, 0, 1};
126  if (Features.test(FeatureISAVersion8_0_2))
127  return {8, 0, 2};
128  if (Features.test(FeatureISAVersion8_0_3))
129  return {8, 0, 3};
130  if (Features.test(FeatureISAVersion8_0_4))
131  return {8, 0, 4};
132  if (Features.test(FeatureISAVersion8_1_0))
133  return {8, 1, 0};
134 
135  // GFX9.
136  if (Features.test(FeatureISAVersion9_0_0))
137  return {9, 0, 0};
138  if (Features.test(FeatureISAVersion9_0_1))
139  return {9, 0, 1};
140  if (Features.test(FeatureISAVersion9_0_2))
141  return {9, 0, 2};
142  if (Features.test(FeatureISAVersion9_0_3))
143  return {9, 0, 3};
144 
145  if (!Features.test(FeatureGCN) || Features.test(FeatureSouthernIslands))
146  return {0, 0, 0};
147  return {7, 0, 0};
148 }
149 
150 void streamIsaVersion(const MCSubtargetInfo *STI, raw_ostream &Stream) {
151  auto TargetTriple = STI->getTargetTriple();
152  auto ISAVersion = IsaInfo::getIsaVersion(STI->getFeatureBits());
153 
154  Stream << TargetTriple.getArchName() << '-'
155  << TargetTriple.getVendorName() << '-'
156  << TargetTriple.getOSName() << '-'
157  << TargetTriple.getEnvironmentName() << '-'
158  << "gfx"
159  << ISAVersion.Major
160  << ISAVersion.Minor
161  << ISAVersion.Stepping;
162  Stream.flush();
163 }
164 
166  return Features.test(FeatureCodeObjectV3);
167 }
168 
170  if (Features.test(FeatureWavefrontSize16))
171  return 16;
172  if (Features.test(FeatureWavefrontSize32))
173  return 32;
174 
175  return 64;
176 }
177 
179  if (Features.test(FeatureLocalMemorySize32768))
180  return 32768;
181  if (Features.test(FeatureLocalMemorySize65536))
182  return 65536;
183 
184  return 0;
185 }
186 
188  return 4;
189 }
190 
192  unsigned FlatWorkGroupSize) {
193  if (!Features.test(FeatureGCN))
194  return 8;
195  unsigned N = getWavesPerWorkGroup(Features, FlatWorkGroupSize);
196  if (N == 1)
197  return 40;
198  N = 40 / N;
199  return std::min(N, 16u);
200 }
201 
203  return getMaxWavesPerEU(Features) * getEUsPerCU(Features);
204 }
205 
207  unsigned FlatWorkGroupSize) {
208  return getWavesPerWorkGroup(Features, FlatWorkGroupSize);
209 }
210 
212  return 1;
213 }
214 
216  if (!Features.test(FeatureGCN))
217  return 8;
218  // FIXME: Need to take scratch memory into account.
219  return 10;
220 }
221 
223  unsigned FlatWorkGroupSize) {
224  return alignTo(getMaxWavesPerCU(Features, FlatWorkGroupSize),
225  getEUsPerCU(Features)) / getEUsPerCU(Features);
226 }
227 
229  return 1;
230 }
231 
233  return 2048;
234 }
235 
237  unsigned FlatWorkGroupSize) {
238  return alignTo(FlatWorkGroupSize, getWavefrontSize(Features)) /
239  getWavefrontSize(Features);
240 }
241 
243  IsaVersion Version = getIsaVersion(Features);
244  if (Version.Major >= 8)
245  return 16;
246  return 8;
247 }
248 
250  return 8;
251 }
252 
254  IsaVersion Version = getIsaVersion(Features);
255  if (Version.Major >= 8)
256  return 800;
257  return 512;
258 }
259 
261  if (Features.test(FeatureSGPRInitBug))
263 
264  IsaVersion Version = getIsaVersion(Features);
265  if (Version.Major >= 8)
266  return 102;
267  return 104;
268 }
269 
270 unsigned getMinNumSGPRs(const FeatureBitset &Features, unsigned WavesPerEU) {
271  assert(WavesPerEU != 0);
272 
273  if (WavesPerEU >= getMaxWavesPerEU(Features))
274  return 0;
275  unsigned MinNumSGPRs =
276  alignDown(getTotalNumSGPRs(Features) / (WavesPerEU + 1),
277  getSGPRAllocGranule(Features)) + 1;
278  return std::min(MinNumSGPRs, getAddressableNumSGPRs(Features));
279 }
280 
281 unsigned getMaxNumSGPRs(const FeatureBitset &Features, unsigned WavesPerEU,
282  bool Addressable) {
283  assert(WavesPerEU != 0);
284 
285  IsaVersion Version = getIsaVersion(Features);
286  unsigned MaxNumSGPRs = alignDown(getTotalNumSGPRs(Features) / WavesPerEU,
287  getSGPRAllocGranule(Features));
288  unsigned AddressableNumSGPRs = getAddressableNumSGPRs(Features);
289  if (Version.Major >= 8 && !Addressable)
290  AddressableNumSGPRs = 112;
291  return std::min(MaxNumSGPRs, AddressableNumSGPRs);
292 }
293 
295  return 4;
296 }
297 
299  return getVGPRAllocGranule(Features);
300 }
301 
303  return 256;
304 }
305 
307  return getTotalNumVGPRs(Features);
308 }
309 
310 unsigned getMinNumVGPRs(const FeatureBitset &Features, unsigned WavesPerEU) {
311  assert(WavesPerEU != 0);
312 
313  if (WavesPerEU >= getMaxWavesPerEU(Features))
314  return 0;
315  unsigned MinNumVGPRs =
316  alignDown(getTotalNumVGPRs(Features) / (WavesPerEU + 1),
317  getVGPRAllocGranule(Features)) + 1;
318  return std::min(MinNumVGPRs, getAddressableNumVGPRs(Features));
319 }
320 
321 unsigned getMaxNumVGPRs(const FeatureBitset &Features, unsigned WavesPerEU) {
322  assert(WavesPerEU != 0);
323 
324  unsigned MaxNumVGPRs = alignDown(getTotalNumVGPRs(Features) / WavesPerEU,
325  getVGPRAllocGranule(Features));
326  unsigned AddressableNumVGPRs = getAddressableNumVGPRs(Features);
327  return std::min(MaxNumVGPRs, AddressableNumVGPRs);
328 }
329 
330 } // end namespace IsaInfo
331 
333  const FeatureBitset &Features) {
335 
336  memset(&Header, 0, sizeof(Header));
337 
340  Header.amd_machine_kind = 1; // AMD_MACHINE_KIND_AMDGPU
341  Header.amd_machine_version_major = ISA.Major;
342  Header.amd_machine_version_minor = ISA.Minor;
344  Header.kernel_code_entry_byte_offset = sizeof(Header);
345  // wavefront_size is specified as a power of 2: 2^6 = 64 threads.
346  Header.wavefront_size = 6;
347 
348  // If the code object does not support indirect functions, then the value must
349  // be 0xffffffff.
350  Header.call_convention = -1;
351 
352  // These alignment values are specified in powers of two, so alignment =
353  // 2^n. The minimum alignment is 2^4 = 16.
354  Header.kernarg_segment_alignment = 4;
355  Header.group_segment_alignment = 4;
356  Header.private_segment_alignment = 4;
357 }
358 
359 bool isGroupSegment(const GlobalValue *GV) {
361 }
362 
363 bool isGlobalSegment(const GlobalValue *GV) {
365 }
366 
369 }
370 
372  return TT.getOS() != Triple::AMDHSA;
373 }
374 
375 int getIntegerAttribute(const Function &F, StringRef Name, int Default) {
376  Attribute A = F.getFnAttribute(Name);
377  int Result = Default;
378 
379  if (A.isStringAttribute()) {
380  StringRef Str = A.getValueAsString();
381  if (Str.getAsInteger(0, Result)) {
382  LLVMContext &Ctx = F.getContext();
383  Ctx.emitError("can't parse integer attribute " + Name);
384  }
385  }
386 
387  return Result;
388 }
389 
390 std::pair<int, int> getIntegerPairAttribute(const Function &F,
391  StringRef Name,
392  std::pair<int, int> Default,
393  bool OnlyFirstRequired) {
394  Attribute A = F.getFnAttribute(Name);
395  if (!A.isStringAttribute())
396  return Default;
397 
398  LLVMContext &Ctx = F.getContext();
399  std::pair<int, int> Ints = Default;
400  std::pair<StringRef, StringRef> Strs = A.getValueAsString().split(',');
401  if (Strs.first.trim().getAsInteger(0, Ints.first)) {
402  Ctx.emitError("can't parse first integer attribute " + Name);
403  return Default;
404  }
405  if (Strs.second.trim().getAsInteger(0, Ints.second)) {
406  if (!OnlyFirstRequired || !Strs.second.trim().empty()) {
407  Ctx.emitError("can't parse second integer attribute " + Name);
408  return Default;
409  }
410  }
411 
412  return Ints;
413 }
414 
416  unsigned VmcntLo = (1 << getVmcntBitWidthLo()) - 1;
417  if (Version.Major < 9)
418  return VmcntLo;
419 
420  unsigned VmcntHi = ((1 << getVmcntBitWidthHi()) - 1) << getVmcntBitWidthLo();
421  return VmcntLo | VmcntHi;
422 }
423 
425  return (1 << getExpcntBitWidth()) - 1;
426 }
427 
429  return (1 << getLgkmcntBitWidth()) - 1;
430 }
431 
433  unsigned VmcntLo = getBitMask(getVmcntBitShiftLo(), getVmcntBitWidthLo());
434  unsigned Expcnt = getBitMask(getExpcntBitShift(), getExpcntBitWidth());
435  unsigned Lgkmcnt = getBitMask(getLgkmcntBitShift(), getLgkmcntBitWidth());
436  unsigned Waitcnt = VmcntLo | Expcnt | Lgkmcnt;
437  if (Version.Major < 9)
438  return Waitcnt;
439 
440  unsigned VmcntHi = getBitMask(getVmcntBitShiftHi(), getVmcntBitWidthHi());
441  return Waitcnt | VmcntHi;
442 }
443 
444 unsigned decodeVmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt) {
445  unsigned VmcntLo =
446  unpackBits(Waitcnt, getVmcntBitShiftLo(), getVmcntBitWidthLo());
447  if (Version.Major < 9)
448  return VmcntLo;
449 
450  unsigned VmcntHi =
451  unpackBits(Waitcnt, getVmcntBitShiftHi(), getVmcntBitWidthHi());
452  VmcntHi <<= getVmcntBitWidthLo();
453  return VmcntLo | VmcntHi;
454 }
455 
456 unsigned decodeExpcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt) {
457  return unpackBits(Waitcnt, getExpcntBitShift(), getExpcntBitWidth());
458 }
459 
460 unsigned decodeLgkmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt) {
461  return unpackBits(Waitcnt, getLgkmcntBitShift(), getLgkmcntBitWidth());
462 }
463 
464 void decodeWaitcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt,
465  unsigned &Vmcnt, unsigned &Expcnt, unsigned &Lgkmcnt) {
466  Vmcnt = decodeVmcnt(Version, Waitcnt);
467  Expcnt = decodeExpcnt(Version, Waitcnt);
468  Lgkmcnt = decodeLgkmcnt(Version, Waitcnt);
469 }
470 
471 unsigned encodeVmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt,
472  unsigned Vmcnt) {
473  Waitcnt =
474  packBits(Vmcnt, Waitcnt, getVmcntBitShiftLo(), getVmcntBitWidthLo());
475  if (Version.Major < 9)
476  return Waitcnt;
477 
478  Vmcnt >>= getVmcntBitWidthLo();
479  return packBits(Vmcnt, Waitcnt, getVmcntBitShiftHi(), getVmcntBitWidthHi());
480 }
481 
482 unsigned encodeExpcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt,
483  unsigned Expcnt) {
484  return packBits(Expcnt, Waitcnt, getExpcntBitShift(), getExpcntBitWidth());
485 }
486 
487 unsigned encodeLgkmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt,
488  unsigned Lgkmcnt) {
489  return packBits(Lgkmcnt, Waitcnt, getLgkmcntBitShift(), getLgkmcntBitWidth());
490 }
491 
493  unsigned Vmcnt, unsigned Expcnt, unsigned Lgkmcnt) {
494  unsigned Waitcnt = getWaitcntBitMask(Version);
495  Waitcnt = encodeVmcnt(Version, Waitcnt, Vmcnt);
496  Waitcnt = encodeExpcnt(Version, Waitcnt, Expcnt);
497  Waitcnt = encodeLgkmcnt(Version, Waitcnt, Lgkmcnt);
498  return Waitcnt;
499 }
500 
501 unsigned getInitialPSInputAddr(const Function &F) {
502  return getIntegerAttribute(F, "InitialPSInputAddr", 0);
503 }
504 
506  switch(cc) {
514  return true;
515  default:
516  return false;
517  }
518 }
519 
521  return !isShader(cc) || cc == CallingConv::AMDGPU_CS;
522 }
523 
525  switch (CC) {
535  return true;
536  default:
537  return false;
538  }
539 }
540 
541 bool isSI(const MCSubtargetInfo &STI) {
542  return STI.getFeatureBits()[AMDGPU::FeatureSouthernIslands];
543 }
544 
545 bool isCI(const MCSubtargetInfo &STI) {
546  return STI.getFeatureBits()[AMDGPU::FeatureSeaIslands];
547 }
548 
549 bool isVI(const MCSubtargetInfo &STI) {
550  return STI.getFeatureBits()[AMDGPU::FeatureVolcanicIslands];
551 }
552 
553 bool isGFX9(const MCSubtargetInfo &STI) {
554  return STI.getFeatureBits()[AMDGPU::FeatureGFX9];
555 }
556 
557 bool isGCN3Encoding(const MCSubtargetInfo &STI) {
558  return STI.getFeatureBits()[AMDGPU::FeatureGCN3Encoding];
559 }
560 
561 bool isSGPR(unsigned Reg, const MCRegisterInfo* TRI) {
562  const MCRegisterClass SGPRClass = TRI->getRegClass(AMDGPU::SReg_32RegClassID);
563  const unsigned FirstSubReg = TRI->getSubReg(Reg, 1);
564  return SGPRClass.contains(FirstSubReg != 0 ? FirstSubReg : Reg) ||
565  Reg == AMDGPU::SCC;
566 }
567 
568 bool isRegIntersect(unsigned Reg0, unsigned Reg1, const MCRegisterInfo* TRI) {
569  for (MCRegAliasIterator R(Reg0, TRI, true); R.isValid(); ++R) {
570  if (*R == Reg1) return true;
571  }
572  return false;
573 }
574 
575 unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI) {
576 
577  switch(Reg) {
578  default: break;
579  case AMDGPU::FLAT_SCR:
580  assert(!isSI(STI));
581  return isCI(STI) ? AMDGPU::FLAT_SCR_ci : AMDGPU::FLAT_SCR_vi;
582 
583  case AMDGPU::FLAT_SCR_LO:
584  assert(!isSI(STI));
585  return isCI(STI) ? AMDGPU::FLAT_SCR_LO_ci : AMDGPU::FLAT_SCR_LO_vi;
586 
587  case AMDGPU::FLAT_SCR_HI:
588  assert(!isSI(STI));
589  return isCI(STI) ? AMDGPU::FLAT_SCR_HI_ci : AMDGPU::FLAT_SCR_HI_vi;
590  }
591  return Reg;
592 }
593 
594 unsigned mc2PseudoReg(unsigned Reg) {
595  switch (Reg) {
596  case AMDGPU::FLAT_SCR_ci:
597  case AMDGPU::FLAT_SCR_vi:
598  return FLAT_SCR;
599 
600  case AMDGPU::FLAT_SCR_LO_ci:
601  case AMDGPU::FLAT_SCR_LO_vi:
602  return AMDGPU::FLAT_SCR_LO;
603 
604  case AMDGPU::FLAT_SCR_HI_ci:
605  case AMDGPU::FLAT_SCR_HI_vi:
606  return AMDGPU::FLAT_SCR_HI;
607 
608  default:
609  return Reg;
610  }
611 }
612 
613 bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo) {
614  assert(OpNo < Desc.NumOperands);
615  unsigned OpType = Desc.OpInfo[OpNo].OperandType;
616  return OpType >= AMDGPU::OPERAND_SRC_FIRST &&
617  OpType <= AMDGPU::OPERAND_SRC_LAST;
618 }
619 
620 bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo) {
621  assert(OpNo < Desc.NumOperands);
622  unsigned OpType = Desc.OpInfo[OpNo].OperandType;
623  switch (OpType) {
631  return true;
632  default:
633  return false;
634  }
635 }
636 
637 bool isSISrcInlinableOperand(const MCInstrDesc &Desc, unsigned OpNo) {
638  assert(OpNo < Desc.NumOperands);
639  unsigned OpType = Desc.OpInfo[OpNo].OperandType;
640  return OpType >= AMDGPU::OPERAND_REG_INLINE_C_FIRST &&
642 }
643 
644 // Avoid using MCRegisterClass::getSize, since that function will go away
645 // (move from MC* level to Target* level). Return size in bits.
646 unsigned getRegBitWidth(unsigned RCID) {
647  switch (RCID) {
648  case AMDGPU::SGPR_32RegClassID:
649  case AMDGPU::VGPR_32RegClassID:
650  case AMDGPU::VS_32RegClassID:
651  case AMDGPU::SReg_32RegClassID:
652  case AMDGPU::SReg_32_XM0RegClassID:
653  return 32;
654  case AMDGPU::SGPR_64RegClassID:
655  case AMDGPU::VS_64RegClassID:
656  case AMDGPU::SReg_64RegClassID:
657  case AMDGPU::VReg_64RegClassID:
658  return 64;
659  case AMDGPU::VReg_96RegClassID:
660  return 96;
661  case AMDGPU::SGPR_128RegClassID:
662  case AMDGPU::SReg_128RegClassID:
663  case AMDGPU::VReg_128RegClassID:
664  return 128;
665  case AMDGPU::SReg_256RegClassID:
666  case AMDGPU::VReg_256RegClassID:
667  return 256;
668  case AMDGPU::SReg_512RegClassID:
669  case AMDGPU::VReg_512RegClassID:
670  return 512;
671  default:
672  llvm_unreachable("Unexpected register class");
673  }
674 }
675 
676 unsigned getRegBitWidth(const MCRegisterClass &RC) {
677  return getRegBitWidth(RC.getID());
678 }
679 
680 unsigned getRegOperandSize(const MCRegisterInfo *MRI, const MCInstrDesc &Desc,
681  unsigned OpNo) {
682  assert(OpNo < Desc.NumOperands);
683  unsigned RCID = Desc.OpInfo[OpNo].RegClass;
684  return getRegBitWidth(MRI->getRegClass(RCID)) / 8;
685 }
686 
687 bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi) {
688  if (Literal >= -16 && Literal <= 64)
689  return true;
690 
691  uint64_t Val = static_cast<uint64_t>(Literal);
692  return (Val == DoubleToBits(0.0)) ||
693  (Val == DoubleToBits(1.0)) ||
694  (Val == DoubleToBits(-1.0)) ||
695  (Val == DoubleToBits(0.5)) ||
696  (Val == DoubleToBits(-0.5)) ||
697  (Val == DoubleToBits(2.0)) ||
698  (Val == DoubleToBits(-2.0)) ||
699  (Val == DoubleToBits(4.0)) ||
700  (Val == DoubleToBits(-4.0)) ||
701  (Val == 0x3fc45f306dc9c882 && HasInv2Pi);
702 }
703 
704 bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi) {
705  if (Literal >= -16 && Literal <= 64)
706  return true;
707 
708  // The actual type of the operand does not seem to matter as long
709  // as the bits match one of the inline immediate values. For example:
710  //
711  // -nan has the hexadecimal encoding of 0xfffffffe which is -2 in decimal,
712  // so it is a legal inline immediate.
713  //
714  // 1065353216 has the hexadecimal encoding 0x3f800000 which is 1.0f in
715  // floating-point, so it is a legal inline immediate.
716 
717  uint32_t Val = static_cast<uint32_t>(Literal);
718  return (Val == FloatToBits(0.0f)) ||
719  (Val == FloatToBits(1.0f)) ||
720  (Val == FloatToBits(-1.0f)) ||
721  (Val == FloatToBits(0.5f)) ||
722  (Val == FloatToBits(-0.5f)) ||
723  (Val == FloatToBits(2.0f)) ||
724  (Val == FloatToBits(-2.0f)) ||
725  (Val == FloatToBits(4.0f)) ||
726  (Val == FloatToBits(-4.0f)) ||
727  (Val == 0x3e22f983 && HasInv2Pi);
728 }
729 
730 bool isInlinableLiteral16(int16_t Literal, bool HasInv2Pi) {
731  if (!HasInv2Pi)
732  return false;
733 
734  if (Literal >= -16 && Literal <= 64)
735  return true;
736 
737  uint16_t Val = static_cast<uint16_t>(Literal);
738  return Val == 0x3C00 || // 1.0
739  Val == 0xBC00 || // -1.0
740  Val == 0x3800 || // 0.5
741  Val == 0xB800 || // -0.5
742  Val == 0x4000 || // 2.0
743  Val == 0xC000 || // -2.0
744  Val == 0x4400 || // 4.0
745  Val == 0xC400 || // -4.0
746  Val == 0x3118; // 1/2pi
747 }
748 
749 bool isInlinableLiteralV216(int32_t Literal, bool HasInv2Pi) {
750  assert(HasInv2Pi);
751 
752  if (!EnablePackedInlinableLiterals)
753  return false;
754 
755  int16_t Lo16 = static_cast<int16_t>(Literal);
756  int16_t Hi16 = static_cast<int16_t>(Literal >> 16);
757  return Lo16 == Hi16 && isInlinableLiteral16(Lo16, HasInv2Pi);
758 }
759 
760 bool isArgPassedInSGPR(const Argument *A) {
761  const Function *F = A->getParent();
762 
763  // Arguments to compute shaders are never a source of divergence.
765  switch (CC) {
768  return true;
776  // For non-compute shaders, SGPR inputs are marked with either inreg or byval.
777  // Everything else is in VGPRs.
778  return F->getAttributes().hasParamAttribute(A->getArgNo(), Attribute::InReg) ||
779  F->getAttributes().hasParamAttribute(A->getArgNo(), Attribute::ByVal);
780  default:
781  // TODO: Should calls support inreg for SGPR inputs?
782  return false;
783  }
784 }
785 
786 // TODO: Should largely merge with AMDGPUTTIImpl::isSourceOfDivergence.
787 bool isUniformMMO(const MachineMemOperand *MMO) {
788  const Value *Ptr = MMO->getValue();
789  // UndefValue means this is a load of a kernel input. These are uniform.
790  // Sometimes LDS instructions have constant pointers.
791  // If Ptr is null, then that means this mem operand contains a
792  // PseudoSourceValue like GOT.
793  if (!Ptr || isa<UndefValue>(Ptr) ||
794  isa<Constant>(Ptr) || isa<GlobalValue>(Ptr))
795  return true;
796 
797  if (const Argument *Arg = dyn_cast<Argument>(Ptr))
798  return isArgPassedInSGPR(Arg);
799 
800  const Instruction *I = dyn_cast<Instruction>(Ptr);
801  return I && I->getMetadata("amdgpu.uniform");
802 }
803 
804 int64_t getSMRDEncodedOffset(const MCSubtargetInfo &ST, int64_t ByteOffset) {
805  if (isGCN3Encoding(ST))
806  return ByteOffset;
807  return ByteOffset >> 2;
808 }
809 
810 bool isLegalSMRDImmOffset(const MCSubtargetInfo &ST, int64_t ByteOffset) {
811  int64_t EncodedOffset = getSMRDEncodedOffset(ST, ByteOffset);
812  return isGCN3Encoding(ST) ?
813  isUInt<20>(EncodedOffset) : isUInt<8>(EncodedOffset);
814 }
815 } // end namespace AMDGPU
816 
817 } // end namespace llvm
818 
819 namespace llvm {
820 namespace AMDGPU {
821 
823  auto Env = T.getEnvironmentName();
824  AMDGPUAS AS;
825  if (Env == "amdgiz" || Env == "amdgizcl") {
826  AS.FLAT_ADDRESS = 0;
827  AS.PRIVATE_ADDRESS = 5;
828  AS.REGION_ADDRESS = 4;
829  }
830  else {
831  AS.FLAT_ADDRESS = 4;
832  AS.PRIVATE_ADDRESS = 0;
833  AS.REGION_ADDRESS = 5;
834  }
835  return AS;
836 }
837 
839  return getAMDGPUAS(M.getTargetTriple());
840 }
841 
843  return getAMDGPUAS(Triple(M.getTargetTriple()));
844 }
845 } // namespace AMDGPU
846 } // namespace llvm
unsigned getMinNumVGPRs(const FeatureBitset &Features, unsigned WavesPerEU)
const std::string & getTargetTriple() const
Get the target triple which is a string describing the target host.
Definition: Module.h:233
unsigned mc2PseudoReg(unsigned Reg)
Convert hardware register Reg to a pseudo register.
unsigned getAddressableNumVGPRs(const FeatureBitset &Features)
This class represents an incoming formal argument to a Function.
Definition: Argument.h:30
Compute iterated dominance frontiers using a linear time algorithm.
Definition: AllocatorList.h:24
A Module instance is used to store all the information related to an LLVM module. ...
Definition: Module.h:63
OSType getOS() const
getOS - Get the parsed operating system type of this triple.
Definition: Triple.h:294
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:163
unsigned getMaxWorkGroupsPerCU(const FeatureBitset &Features, unsigned FlatWorkGroupSize)
AMDGPUAS getAMDGPUAS(const Module &M)
unsigned getRegBitWidth(unsigned RCID)
Get the size in bits of a register from the register class RC.
F(f)
unsigned getTotalNumVGPRs(const FeatureBitset &Features)
unsigned getMinWavesPerEU(const FeatureBitset &Features)
uint64_t alignTo(uint64_t Value, uint64_t Align, uint64_t Skew=0)
Returns the next integer (mod 2**64) that is greater than or equal to Value and is a multiple of Alig...
Definition: MathExtras.h:677
bool isInlinableLiteralV216(int32_t Literal, bool HasInv2Pi)
unsigned getWavesPerWorkGroup(const FeatureBitset &Features, unsigned FlatWorkGroupSize)
Address space for global memory (RAT0, VTX0).
Definition: AMDGPU.h:224
std::pair< int, int > getIntegerPairAttribute(const Function &F, StringRef Name, std::pair< int, int > Default, bool OnlyFirstRequired)
const Triple & getTargetTriple() const
getTargetTriple - Return the target triple string.
Calling convention used for Mesa/AMDPAL compute shaders.
Definition: CallingConv.h:198
unsigned getVGPREncodingGranule(const FeatureBitset &Features)
bool isGlobalSegment(const GlobalValue *GV)
bool isInlinableLiteral16(int16_t Literal, bool HasInv2Pi)
unsigned getID() const
getID() - Return the register class ID number.
uint32_t amd_kernel_code_version_major
Calling convention used for AMDPAL vertex shader if tessellation is in use.
Definition: CallingConv.h:216
unsigned getEUsPerCU(const FeatureBitset &Features)
uint64_t alignDown(uint64_t Value, uint64_t Align, uint64_t Skew=0)
Returns the largest uint64_t less than or equal to Value and is Skew mod Align.
Definition: MathExtras.h:710
A description of a memory reference used in the backend.
unsigned getWavefrontSize(const FeatureBitset &Features)
unsigned getVGPRAllocGranule(const FeatureBitset &Features)
bool isRegIntersect(unsigned Reg0, unsigned Reg1, const MCRegisterInfo *TRI)
Is there any intersection between registers.
bool isStringAttribute() const
Return true if the attribute is a string (target-dependent) attribute.
Definition: Attributes.cpp:170
OpenCL uses address spaces to differentiate between various memory regions on the hardware...
Definition: AMDGPU.h:214
bool contains(unsigned Reg) const
contains - Return true if the specified register is included in this register class.
const FeatureBitset & getFeatureBits() const
getFeatureBits - Return the feature bits.
AMD Kernel Code Object (amd_kernel_code_t).
unsigned getRegOperandSize(const MCRegisterInfo *MRI, const MCInstrDesc &Desc, unsigned OpNo)
Get size of register operand.
uint16_t amd_machine_version_major
Reg
All possible values of the reg field in the ModR/M byte.
This file contains the simple types necessary to represent the attributes associated with functions a...
unsigned getMinFlatWorkGroupSize(const FeatureBitset &Features)
bool hasCodeObjectV3(const FeatureBitset &Features)
unsigned encodeLgkmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt, unsigned Lgkmcnt)
unsigned decodeLgkmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt)
unsigned encodeExpcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt, unsigned Expcnt)
unsigned getMaxWavesPerCU(const FeatureBitset &Features)
unsigned getMaxNumSGPRs(const FeatureBitset &Features, unsigned WavesPerEU, bool Addressable)
unsigned getWaitcntBitMask(const IsaInfo::IsaVersion &Version)
void emitError(unsigned LocCookie, const Twine &ErrorStr)
emitError - Emit an error message to the currently installed error handler with optional location inf...
uint8_t OperandType
Information about the type of the operand.
Definition: MCInstrDesc.h:82
uint8_t kernarg_segment_alignment
The maximum byte alignment of variables used by the kernel in the specified memory segment...
uint8_t group_segment_alignment
bool isGroupSegment(const GlobalValue *GV)
unsigned getMaxWavesPerEU(const FeatureBitset &Features)
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
Definition: Instruction.h:194
bool isReadOnlySegment(const GlobalValue *GV)
static cl::opt< bool > EnablePackedInlinableLiterals("enable-packed-inlinable-literals", cl::desc("Enable packed inlinable literals (v2f16, v2i16)"), cl::init(false))
AttributeList getAttributes() const
Return the attribute list for this Function.
Definition: Function.h:205
uint16_t amd_machine_version_minor
unsigned getMaxFlatWorkGroupSize(const FeatureBitset &Features)
uint32_t amd_kernel_code_version_minor
MCRegisterClass - Base class of TargetRegisterClass.
unsigned short NumOperands
Definition: MCInstrDesc.h:166
uint32_t FloatToBits(float Float)
This function takes a float and returns the bit equivalent 32-bit integer.
Definition: MathExtras.h:593
MCRegisterInfo base class - We assume that the target defines a static array of MCRegisterDesc object...
const Value * getValue() const
Return the base address of the memory access.
bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo)
Is this floating-point operand?
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:406
int64_t getSMRDEncodedOffset(const MCSubtargetInfo &ST, int64_t ByteOffset)
Calling convention used for AMDPAL shader stage before geometry shader if geometry is in use...
Definition: CallingConv.h:221
bool isCompute(CallingConv::ID cc)
bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi)
Is this literal inlinable.
bool isSI(const MCSubtargetInfo &STI)
constexpr bool isUInt< 8 >(uint64_t x)
Definition: MathExtras.h:335
unsigned const MachineRegisterInfo * MRI
Container class for subtarget features.
SPIR_KERNEL - Calling convention for SPIR kernel functions.
Definition: CallingConv.h:137
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:69
void decodeWaitcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt, unsigned &Vmcnt, unsigned &Expcnt, unsigned &Lgkmcnt)
Decodes Vmcnt, Expcnt and Lgkmcnt from given Waitcnt for given isa Version, and writes decoded values...
unsigned getTotalNumSGPRs(const FeatureBitset &Features)
This file contains the declarations for the subclasses of Constant, which represent the different fla...
bool isEntryFunctionCC(CallingConv::ID CC)
unsigned getSubReg(unsigned Reg, unsigned Idx) const
Returns the physical register number of sub-register "Index" for physical register RegNo...
unsigned getMinNumSGPRs(const FeatureBitset &Features, unsigned WavesPerEU)
uint8_t private_segment_alignment
MCRegAliasIterator enumerates all registers aliasing Reg.
Instruction set architecture version.
const Triple & getTargetTriple() const
Calling convention for AMDGPU code object kernels.
Definition: CallingConv.h:201
unsigned getAddressSpace() const
Return the address space of the Pointer type.
Definition: DerivedTypes.h:495
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function. ...
Definition: Function.cpp:194
bool isLegalSMRDImmOffset(const MCSubtargetInfo &ST, int64_t ByteOffset)
unsigned REGION_ADDRESS
Address space for region memory.
Definition: AMDGPU.h:218
const AMDGPUAS & AS
bool isSISrcInlinableOperand(const MCInstrDesc &Desc, unsigned OpNo)
Does this opearnd support only inlinable literals?
void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header, const FeatureBitset &Features)
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Triple - Helper class for working with autoconf configuration names.
Definition: Triple.h:44
Calling convention used for Mesa vertex shaders, or AMDPAL last shader stage before rasterization (ve...
Definition: CallingConv.h:189
std::enable_if< std::numeric_limits< T >::is_signed, bool >::type getAsInteger(unsigned Radix, T &Result) const
Parse the current string as an integer of the specified radix.
Definition: StringRef.h:497
unsigned decodeVmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt)
unsigned getLgkmcntBitMask(const IsaInfo::IsaVersion &Version)
bool hasParamAttribute(unsigned ArgNo, Attribute::AttrKind Kind) const
Equivalent to hasAttribute(ArgNo + FirstArgIndex, Kind).
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:194
Module.h This file contains the declarations for the Module class.
LLVM_NODISCARD std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
Definition: StringRef.h:727
uint16_t amd_machine_version_stepping
unsigned getExpcntBitMask(const IsaInfo::IsaVersion &Version)
uint8_t wavefront_size
Wavefront size expressed as a power of two.
bool isArgPassedInSGPR(const Argument *A)
uint64_t DoubleToBits(double Double)
This function takes a double and returns the bit equivalent 64-bit integer.
Definition: MathExtras.h:583
unsigned encodeVmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt, unsigned Vmcnt)
StringRef getEnvironmentName() const
getEnvironmentName - Get the optional environment (fourth) component of the triple, or "" if empty.
Definition: Triple.cpp:955
Calling convention used for Mesa/AMDPAL pixel shaders.
Definition: CallingConv.h:195
bool isShader(CallingConv::ID cc)
unsigned getArgNo() const
Return the index of this formal argument in its containing function.
Definition: Argument.h:48
bool isGCN3Encoding(const MCSubtargetInfo &STI)
Address space for constant memory (VTX2)
Definition: AMDGPU.h:225
bool isCI(const MCSubtargetInfo &STI)
amdgpu Simplify well known AMD library false Value Value * Arg
unsigned getInitialPSInputAddr(const Function &F)
bool isGFX9(const MCSubtargetInfo &STI)
Provides AMDGPU specific target descriptions.
Calling convention used for Mesa/AMDPAL hull shaders (= tessellation control shaders).
Definition: CallingConv.h:208
const Function * getParent() const
Definition: Argument.h:42
bool isVI(const MCSubtargetInfo &STI)
StringRef getValueAsString() const
Return the attribute&#39;s value as a string.
Definition: Attributes.cpp:195
Calling convention used for Mesa/AMDPAL geometry shaders.
Definition: CallingConv.h:192
int16_t RegClass
This specifies the register class enumeration of the operand if the operand is a register.
Definition: MCInstrDesc.h:76
unsigned decodeExpcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt)
unsigned encodeWaitcnt(const IsaInfo::IsaVersion &Version, unsigned Vmcnt, unsigned Expcnt, unsigned Lgkmcnt)
Encodes Vmcnt, Expcnt and Lgkmcnt into Waitcnt for given isa Version.
unsigned FLAT_ADDRESS
Address space for flat memory.
Definition: AMDGPU.h:217
int getIntegerAttribute(const Function &F, StringRef Name, int Default)
#define I(x, y, z)
Definition: MD5.cpp:58
#define N
MCSubtargetInfo - Generic base class for all target subtargets.
bool shouldEmitConstantsToTextSection(const Triple &TT)
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:323
unsigned getVmcntBitMask(const IsaInfo::IsaVersion &Version)
Address space for local memory.
Definition: AMDGPU.h:226
bool isUniformMMO(const MachineMemOperand *MMO)
unsigned getAddressableNumSGPRs(const FeatureBitset &Features)
unsigned getSGPREncodingGranule(const FeatureBitset &Features)
int64_t kernel_code_entry_byte_offset
Byte offset (possibly negative) from start of amd_kernel_code_t object to kernel&#39;s entry point instru...
bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo)
Can this operand also contain immediate values?
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
LLVM Value Representation.
Definition: Value.h:73
const FeatureBitset Features
const MCOperandInfo * OpInfo
Definition: MCInstrDesc.h:174
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition: Function.h:270
unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI)
If Reg is a pseudo reg, return the correct hardware register given STI otherwise return Reg...
This class implements an extremely fast bulk output stream that can only output to a stream...
Definition: raw_ostream.h:44
uint16_t amd_machine_kind
unsigned getSGPRAllocGranule(const FeatureBitset &Features)
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:57
unsigned getLocalMemorySize(const FeatureBitset &Features)
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:49
bool isSGPR(unsigned Reg, const MCRegisterInfo *TRI)
Is Reg - scalar register.
unsigned getMaxNumVGPRs(const FeatureBitset &Features, unsigned WavesPerEU)
const uint64_t Version
Definition: InstrProf.h:867
IsaVersion getIsaVersion(const FeatureBitset &Features)
PointerType * getType() const
Global values are always pointers.
Definition: GlobalValue.h:265
unsigned PRIVATE_ADDRESS
Address space for private memory.
Definition: AMDGPU.h:216
void streamIsaVersion(const MCSubtargetInfo *STI, raw_ostream &Stream)
Streams isa version string for given subtarget STI into Stream.
const MCRegisterClass & getRegClass(unsigned i) const
Returns the register class associated with the enumeration value.