LLVM  10.0.0svn
AMDGPUBaseInfo.cpp
Go to the documentation of this file.
1 //===- AMDGPUBaseInfo.cpp - AMDGPU Base encoding information --------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "AMDGPUBaseInfo.h"
11 #include "AMDGPU.h"
12 #include "SIDefines.h"
13 #include "AMDGPUAsmUtils.h"
14 #include "llvm/ADT/StringRef.h"
15 #include "llvm/ADT/Triple.h"
16 #include "llvm/BinaryFormat/ELF.h"
18 #include "llvm/IR/Attributes.h"
19 #include "llvm/IR/Constants.h"
20 #include "llvm/IR/Function.h"
21 #include "llvm/IR/GlobalValue.h"
22 #include "llvm/IR/Instruction.h"
23 #include "llvm/IR/LLVMContext.h"
24 #include "llvm/IR/Module.h"
25 #include "llvm/MC/MCContext.h"
26 #include "llvm/MC/MCInstrDesc.h"
27 #include "llvm/MC/MCInstrInfo.h"
28 #include "llvm/MC/MCRegisterInfo.h"
29 #include "llvm/MC/MCSectionELF.h"
32 #include "llvm/Support/Casting.h"
35 #include <algorithm>
36 #include <cassert>
37 #include <cstdint>
38 #include <cstring>
39 #include <utility>
40 
42 
43 #define GET_INSTRINFO_NAMED_OPS
44 #define GET_INSTRMAP_INFO
45 #include "AMDGPUGenInstrInfo.inc"
46 #undef GET_INSTRMAP_INFO
47 #undef GET_INSTRINFO_NAMED_OPS
48 
49 namespace {
50 
51 /// \returns Bit mask for given bit \p Shift and bit \p Width.
52 unsigned getBitMask(unsigned Shift, unsigned Width) {
53  return ((1 << Width) - 1) << Shift;
54 }
55 
56 /// Packs \p Src into \p Dst for given bit \p Shift and bit \p Width.
57 ///
58 /// \returns Packed \p Dst.
59 unsigned packBits(unsigned Src, unsigned Dst, unsigned Shift, unsigned Width) {
60  Dst &= ~(1 << Shift) & ~getBitMask(Shift, Width);
61  Dst |= (Src << Shift) & getBitMask(Shift, Width);
62  return Dst;
63 }
64 
65 /// Unpacks bits from \p Src for given bit \p Shift and bit \p Width.
66 ///
67 /// \returns Unpacked bits.
68 unsigned unpackBits(unsigned Src, unsigned Shift, unsigned Width) {
69  return (Src & getBitMask(Shift, Width)) >> Shift;
70 }
71 
72 /// \returns Vmcnt bit shift (lower bits).
73 unsigned getVmcntBitShiftLo() { return 0; }
74 
75 /// \returns Vmcnt bit width (lower bits).
76 unsigned getVmcntBitWidthLo() { return 4; }
77 
78 /// \returns Expcnt bit shift.
79 unsigned getExpcntBitShift() { return 4; }
80 
81 /// \returns Expcnt bit width.
82 unsigned getExpcntBitWidth() { return 3; }
83 
84 /// \returns Lgkmcnt bit shift.
85 unsigned getLgkmcntBitShift() { return 8; }
86 
87 /// \returns Lgkmcnt bit width.
88 unsigned getLgkmcntBitWidth(unsigned VersionMajor) {
89  return (VersionMajor >= 10) ? 6 : 4;
90 }
91 
92 /// \returns Vmcnt bit shift (higher bits).
93 unsigned getVmcntBitShiftHi() { return 14; }
94 
95 /// \returns Vmcnt bit width (higher bits).
96 unsigned getVmcntBitWidthHi() { return 2; }
97 
98 } // end namespace anonymous
99 
100 namespace llvm {
101 
102 namespace AMDGPU {
103 
104 #define GET_MIMGBaseOpcodesTable_IMPL
105 #define GET_MIMGDimInfoTable_IMPL
106 #define GET_MIMGInfoTable_IMPL
107 #define GET_MIMGLZMappingTable_IMPL
108 #define GET_MIMGMIPMappingTable_IMPL
109 #include "AMDGPUGenSearchableTables.inc"
110 
111 int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding,
112  unsigned VDataDwords, unsigned VAddrDwords) {
113  const MIMGInfo *Info = getMIMGOpcodeHelper(BaseOpcode, MIMGEncoding,
114  VDataDwords, VAddrDwords);
115  return Info ? Info->Opcode : -1;
116 }
117 
118 const MIMGBaseOpcodeInfo *getMIMGBaseOpcode(unsigned Opc) {
119  const MIMGInfo *Info = getMIMGInfo(Opc);
120  return Info ? getMIMGBaseOpcodeInfo(Info->BaseOpcode) : nullptr;
121 }
122 
123 int getMaskedMIMGOp(unsigned Opc, unsigned NewChannels) {
124  const MIMGInfo *OrigInfo = getMIMGInfo(Opc);
125  const MIMGInfo *NewInfo =
126  getMIMGOpcodeHelper(OrigInfo->BaseOpcode, OrigInfo->MIMGEncoding,
127  NewChannels, OrigInfo->VAddrDwords);
128  return NewInfo ? NewInfo->Opcode : -1;
129 }
130 
131 struct MUBUFInfo {
132  uint16_t Opcode;
133  uint16_t BaseOpcode;
134  uint8_t elements;
135  bool has_vaddr;
136  bool has_srsrc;
138 };
139 
140 struct MTBUFInfo {
141  uint16_t Opcode;
142  uint16_t BaseOpcode;
143  uint8_t elements;
144  bool has_vaddr;
145  bool has_srsrc;
147 };
148 
149 #define GET_MTBUFInfoTable_DECL
150 #define GET_MTBUFInfoTable_IMPL
151 #define GET_MUBUFInfoTable_DECL
152 #define GET_MUBUFInfoTable_IMPL
153 #include "AMDGPUGenSearchableTables.inc"
154 
155 int getMTBUFBaseOpcode(unsigned Opc) {
156  const MTBUFInfo *Info = getMTBUFInfoFromOpcode(Opc);
157  return Info ? Info->BaseOpcode : -1;
158 }
159 
160 int getMTBUFOpcode(unsigned BaseOpc, unsigned Elements) {
161  const MTBUFInfo *Info = getMTBUFInfoFromBaseOpcodeAndElements(BaseOpc, Elements);
162  return Info ? Info->Opcode : -1;
163 }
164 
165 int getMTBUFElements(unsigned Opc) {
166  const MTBUFInfo *Info = getMTBUFOpcodeHelper(Opc);
167  return Info ? Info->elements : 0;
168 }
169 
170 bool getMTBUFHasVAddr(unsigned Opc) {
171  const MTBUFInfo *Info = getMTBUFOpcodeHelper(Opc);
172  return Info ? Info->has_vaddr : false;
173 }
174 
175 bool getMTBUFHasSrsrc(unsigned Opc) {
176  const MTBUFInfo *Info = getMTBUFOpcodeHelper(Opc);
177  return Info ? Info->has_srsrc : false;
178 }
179 
180 bool getMTBUFHasSoffset(unsigned Opc) {
181  const MTBUFInfo *Info = getMTBUFOpcodeHelper(Opc);
182  return Info ? Info->has_soffset : false;
183 }
184 
185 int getMUBUFBaseOpcode(unsigned Opc) {
186  const MUBUFInfo *Info = getMUBUFInfoFromOpcode(Opc);
187  return Info ? Info->BaseOpcode : -1;
188 }
189 
190 int getMUBUFOpcode(unsigned BaseOpc, unsigned Elements) {
191  const MUBUFInfo *Info = getMUBUFInfoFromBaseOpcodeAndElements(BaseOpc, Elements);
192  return Info ? Info->Opcode : -1;
193 }
194 
195 int getMUBUFElements(unsigned Opc) {
196  const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc);
197  return Info ? Info->elements : 0;
198 }
199 
200 bool getMUBUFHasVAddr(unsigned Opc) {
201  const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc);
202  return Info ? Info->has_vaddr : false;
203 }
204 
205 bool getMUBUFHasSrsrc(unsigned Opc) {
206  const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc);
207  return Info ? Info->has_srsrc : false;
208 }
209 
210 bool getMUBUFHasSoffset(unsigned Opc) {
211  const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc);
212  return Info ? Info->has_soffset : false;
213 }
214 
215 // Wrapper for Tablegen'd function. enum Subtarget is not defined in any
216 // header files, so we need to wrap it in a function that takes unsigned
217 // instead.
218 int getMCOpcode(uint16_t Opcode, unsigned Gen) {
219  return getMCOpcodeGen(Opcode, static_cast<Subtarget>(Gen));
220 }
221 
222 namespace IsaInfo {
223 
224 void streamIsaVersion(const MCSubtargetInfo *STI, raw_ostream &Stream) {
225  auto TargetTriple = STI->getTargetTriple();
226  auto Version = getIsaVersion(STI->getCPU());
227 
228  Stream << TargetTriple.getArchName() << '-'
229  << TargetTriple.getVendorName() << '-'
230  << TargetTriple.getOSName() << '-'
231  << TargetTriple.getEnvironmentName() << '-'
232  << "gfx"
233  << Version.Major
234  << Version.Minor
235  << Version.Stepping;
236 
237  if (hasXNACK(*STI))
238  Stream << "+xnack";
239  if (hasSRAMECC(*STI))
240  Stream << "+sram-ecc";
241 
242  Stream.flush();
243 }
244 
246  return STI->getTargetTriple().getOS() == Triple::AMDHSA &&
247  STI->getFeatureBits().test(FeatureCodeObjectV3);
248 }
249 
250 unsigned getWavefrontSize(const MCSubtargetInfo *STI) {
251  if (STI->getFeatureBits().test(FeatureWavefrontSize16))
252  return 16;
253  if (STI->getFeatureBits().test(FeatureWavefrontSize32))
254  return 32;
255 
256  return 64;
257 }
258 
259 unsigned getLocalMemorySize(const MCSubtargetInfo *STI) {
260  if (STI->getFeatureBits().test(FeatureLocalMemorySize32768))
261  return 32768;
262  if (STI->getFeatureBits().test(FeatureLocalMemorySize65536))
263  return 65536;
264 
265  return 0;
266 }
267 
268 unsigned getEUsPerCU(const MCSubtargetInfo *STI) {
269  return 4;
270 }
271 
273  unsigned FlatWorkGroupSize) {
274  assert(FlatWorkGroupSize != 0);
275  if (STI->getTargetTriple().getArch() != Triple::amdgcn)
276  return 8;
277  unsigned N = getWavesPerWorkGroup(STI, FlatWorkGroupSize);
278  if (N == 1)
279  return 40;
280  N = 40 / N;
281  return std::min(N, 16u);
282 }
283 
284 unsigned getMaxWavesPerCU(const MCSubtargetInfo *STI) {
285  return getMaxWavesPerEU(STI) * getEUsPerCU(STI);
286 }
287 
288 unsigned getMaxWavesPerCU(const MCSubtargetInfo *STI,
289  unsigned FlatWorkGroupSize) {
290  return getWavesPerWorkGroup(STI, FlatWorkGroupSize);
291 }
292 
293 unsigned getMinWavesPerEU(const MCSubtargetInfo *STI) {
294  return 1;
295 }
296 
297 unsigned getMaxWavesPerEU(const MCSubtargetInfo *STI) {
298  // FIXME: Need to take scratch memory into account.
299  if (!isGFX10(*STI))
300  return 10;
301  return 20;
302 }
303 
304 unsigned getMaxWavesPerEU(const MCSubtargetInfo *STI,
305  unsigned FlatWorkGroupSize) {
306  return alignTo(getMaxWavesPerCU(STI, FlatWorkGroupSize),
307  getEUsPerCU(STI)) / getEUsPerCU(STI);
308 }
309 
311  return 1;
312 }
313 
315  return 2048;
316 }
317 
319  unsigned FlatWorkGroupSize) {
320  return alignTo(FlatWorkGroupSize, getWavefrontSize(STI)) /
321  getWavefrontSize(STI);
322 }
323 
324 unsigned getSGPRAllocGranule(const MCSubtargetInfo *STI) {
326  if (Version.Major >= 10)
327  return getAddressableNumSGPRs(STI);
328  if (Version.Major >= 8)
329  return 16;
330  return 8;
331 }
332 
334  return 8;
335 }
336 
337 unsigned getTotalNumSGPRs(const MCSubtargetInfo *STI) {
339  if (Version.Major >= 8)
340  return 800;
341  return 512;
342 }
343 
345  if (STI->getFeatureBits().test(FeatureSGPRInitBug))
347 
349  if (Version.Major >= 10)
350  return 106;
351  if (Version.Major >= 8)
352  return 102;
353  return 104;
354 }
355 
356 unsigned getMinNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU) {
357  assert(WavesPerEU != 0);
358 
360  if (Version.Major >= 10)
361  return 0;
362 
363  if (WavesPerEU >= getMaxWavesPerEU(STI))
364  return 0;
365 
366  unsigned MinNumSGPRs = getTotalNumSGPRs(STI) / (WavesPerEU + 1);
367  if (STI->getFeatureBits().test(FeatureTrapHandler))
368  MinNumSGPRs -= std::min(MinNumSGPRs, (unsigned)TRAP_NUM_SGPRS);
369  MinNumSGPRs = alignDown(MinNumSGPRs, getSGPRAllocGranule(STI)) + 1;
370  return std::min(MinNumSGPRs, getAddressableNumSGPRs(STI));
371 }
372 
373 unsigned getMaxNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU,
374  bool Addressable) {
375  assert(WavesPerEU != 0);
376 
377  unsigned AddressableNumSGPRs = getAddressableNumSGPRs(STI);
379  if (Version.Major >= 10)
380  return Addressable ? AddressableNumSGPRs : 108;
381  if (Version.Major >= 8 && !Addressable)
382  AddressableNumSGPRs = 112;
383  unsigned MaxNumSGPRs = getTotalNumSGPRs(STI) / WavesPerEU;
384  if (STI->getFeatureBits().test(FeatureTrapHandler))
385  MaxNumSGPRs -= std::min(MaxNumSGPRs, (unsigned)TRAP_NUM_SGPRS);
386  MaxNumSGPRs = alignDown(MaxNumSGPRs, getSGPRAllocGranule(STI));
387  return std::min(MaxNumSGPRs, AddressableNumSGPRs);
388 }
389 
390 unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed,
391  bool FlatScrUsed, bool XNACKUsed) {
392  unsigned ExtraSGPRs = 0;
393  if (VCCUsed)
394  ExtraSGPRs = 2;
395 
397  if (Version.Major >= 10)
398  return ExtraSGPRs;
399 
400  if (Version.Major < 8) {
401  if (FlatScrUsed)
402  ExtraSGPRs = 4;
403  } else {
404  if (XNACKUsed)
405  ExtraSGPRs = 4;
406 
407  if (FlatScrUsed)
408  ExtraSGPRs = 6;
409  }
410 
411  return ExtraSGPRs;
412 }
413 
414 unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed,
415  bool FlatScrUsed) {
416  return getNumExtraSGPRs(STI, VCCUsed, FlatScrUsed,
417  STI->getFeatureBits().test(AMDGPU::FeatureXNACK));
418 }
419 
420 unsigned getNumSGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs) {
421  NumSGPRs = alignTo(std::max(1u, NumSGPRs), getSGPREncodingGranule(STI));
422  // SGPRBlocks is actual number of SGPR blocks minus 1.
423  return NumSGPRs / getSGPREncodingGranule(STI) - 1;
424 }
425 
427  Optional<bool> EnableWavefrontSize32) {
428  bool IsWave32 = EnableWavefrontSize32 ?
429  *EnableWavefrontSize32 :
430  STI->getFeatureBits().test(FeatureWavefrontSize32);
431  return IsWave32 ? 8 : 4;
432 }
433 
435  Optional<bool> EnableWavefrontSize32) {
436  return getVGPRAllocGranule(STI, EnableWavefrontSize32);
437 }
438 
439 unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI) {
440  if (!isGFX10(*STI))
441  return 256;
442  return STI->getFeatureBits().test(FeatureWavefrontSize32) ? 1024 : 512;
443 }
444 
446  return 256;
447 }
448 
449 unsigned getMinNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU) {
450  assert(WavesPerEU != 0);
451 
452  if (WavesPerEU >= getMaxWavesPerEU(STI))
453  return 0;
454  unsigned MinNumVGPRs =
455  alignDown(getTotalNumVGPRs(STI) / (WavesPerEU + 1),
456  getVGPRAllocGranule(STI)) + 1;
457  return std::min(MinNumVGPRs, getAddressableNumVGPRs(STI));
458 }
459 
460 unsigned getMaxNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU) {
461  assert(WavesPerEU != 0);
462 
463  unsigned MaxNumVGPRs = alignDown(getTotalNumVGPRs(STI) / WavesPerEU,
464  getVGPRAllocGranule(STI));
465  unsigned AddressableNumVGPRs = getAddressableNumVGPRs(STI);
466  return std::min(MaxNumVGPRs, AddressableNumVGPRs);
467 }
468 
469 unsigned getNumVGPRBlocks(const MCSubtargetInfo *STI, unsigned NumVGPRs,
470  Optional<bool> EnableWavefrontSize32) {
471  NumVGPRs = alignTo(std::max(1u, NumVGPRs),
472  getVGPREncodingGranule(STI, EnableWavefrontSize32));
473  // VGPRBlocks is actual number of VGPR blocks minus 1.
474  return NumVGPRs / getVGPREncodingGranule(STI, EnableWavefrontSize32) - 1;
475 }
476 
477 } // end namespace IsaInfo
478 
480  const MCSubtargetInfo *STI) {
482 
483  memset(&Header, 0, sizeof(Header));
484 
487  Header.amd_machine_kind = 1; // AMD_MACHINE_KIND_AMDGPU
488  Header.amd_machine_version_major = Version.Major;
489  Header.amd_machine_version_minor = Version.Minor;
490  Header.amd_machine_version_stepping = Version.Stepping;
491  Header.kernel_code_entry_byte_offset = sizeof(Header);
492  Header.wavefront_size = 6;
493 
494  // If the code object does not support indirect functions, then the value must
495  // be 0xffffffff.
496  Header.call_convention = -1;
497 
498  // These alignment values are specified in powers of two, so alignment =
499  // 2^n. The minimum alignment is 2^4 = 16.
500  Header.kernarg_segment_alignment = 4;
501  Header.group_segment_alignment = 4;
502  Header.private_segment_alignment = 4;
503 
504  if (Version.Major >= 10) {
505  if (STI->getFeatureBits().test(FeatureWavefrontSize32)) {
506  Header.wavefront_size = 5;
508  }
510  S_00B848_WGP_MODE(STI->getFeatureBits().test(FeatureCuMode) ? 0 : 1) |
512  }
513 }
514 
516  const MCSubtargetInfo *STI) {
518 
520  memset(&KD, 0, sizeof(KD));
521 
522  AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
523  amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64,
525  AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
526  amdhsa::COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, 1);
527  AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
528  amdhsa::COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE, 1);
529  AMDHSA_BITS_SET(KD.compute_pgm_rsrc2,
530  amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, 1);
531  if (Version.Major >= 10) {
532  AMDHSA_BITS_SET(KD.kernel_code_properties,
533  amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
534  STI->getFeatureBits().test(FeatureWavefrontSize32) ? 1 : 0);
535  AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
536  amdhsa::COMPUTE_PGM_RSRC1_WGP_MODE,
537  STI->getFeatureBits().test(FeatureCuMode) ? 0 : 1);
538  AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
539  amdhsa::COMPUTE_PGM_RSRC1_MEM_ORDERED, 1);
540  }
541  return KD;
542 }
543 
544 bool isGroupSegment(const GlobalValue *GV) {
546 }
547 
548 bool isGlobalSegment(const GlobalValue *GV) {
550 }
551 
555 }
556 
558  return TT.getOS() == Triple::AMDPAL;
559 }
560 
561 int getIntegerAttribute(const Function &F, StringRef Name, int Default) {
562  Attribute A = F.getFnAttribute(Name);
563  int Result = Default;
564 
565  if (A.isStringAttribute()) {
566  StringRef Str = A.getValueAsString();
567  if (Str.getAsInteger(0, Result)) {
568  LLVMContext &Ctx = F.getContext();
569  Ctx.emitError("can't parse integer attribute " + Name);
570  }
571  }
572 
573  return Result;
574 }
575 
576 std::pair<int, int> getIntegerPairAttribute(const Function &F,
577  StringRef Name,
578  std::pair<int, int> Default,
579  bool OnlyFirstRequired) {
580  Attribute A = F.getFnAttribute(Name);
581  if (!A.isStringAttribute())
582  return Default;
583 
584  LLVMContext &Ctx = F.getContext();
585  std::pair<int, int> Ints = Default;
586  std::pair<StringRef, StringRef> Strs = A.getValueAsString().split(',');
587  if (Strs.first.trim().getAsInteger(0, Ints.first)) {
588  Ctx.emitError("can't parse first integer attribute " + Name);
589  return Default;
590  }
591  if (Strs.second.trim().getAsInteger(0, Ints.second)) {
592  if (!OnlyFirstRequired || !Strs.second.trim().empty()) {
593  Ctx.emitError("can't parse second integer attribute " + Name);
594  return Default;
595  }
596  }
597 
598  return Ints;
599 }
600 
602  unsigned VmcntLo = (1 << getVmcntBitWidthLo()) - 1;
603  if (Version.Major < 9)
604  return VmcntLo;
605 
606  unsigned VmcntHi = ((1 << getVmcntBitWidthHi()) - 1) << getVmcntBitWidthLo();
607  return VmcntLo | VmcntHi;
608 }
609 
611  return (1 << getExpcntBitWidth()) - 1;
612 }
613 
615  return (1 << getLgkmcntBitWidth(Version.Major)) - 1;
616 }
617 
619  unsigned VmcntLo = getBitMask(getVmcntBitShiftLo(), getVmcntBitWidthLo());
620  unsigned Expcnt = getBitMask(getExpcntBitShift(), getExpcntBitWidth());
621  unsigned Lgkmcnt = getBitMask(getLgkmcntBitShift(),
622  getLgkmcntBitWidth(Version.Major));
623  unsigned Waitcnt = VmcntLo | Expcnt | Lgkmcnt;
624  if (Version.Major < 9)
625  return Waitcnt;
626 
627  unsigned VmcntHi = getBitMask(getVmcntBitShiftHi(), getVmcntBitWidthHi());
628  return Waitcnt | VmcntHi;
629 }
630 
631 unsigned decodeVmcnt(const IsaVersion &Version, unsigned Waitcnt) {
632  unsigned VmcntLo =
633  unpackBits(Waitcnt, getVmcntBitShiftLo(), getVmcntBitWidthLo());
634  if (Version.Major < 9)
635  return VmcntLo;
636 
637  unsigned VmcntHi =
638  unpackBits(Waitcnt, getVmcntBitShiftHi(), getVmcntBitWidthHi());
639  VmcntHi <<= getVmcntBitWidthLo();
640  return VmcntLo | VmcntHi;
641 }
642 
643 unsigned decodeExpcnt(const IsaVersion &Version, unsigned Waitcnt) {
644  return unpackBits(Waitcnt, getExpcntBitShift(), getExpcntBitWidth());
645 }
646 
647 unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt) {
648  return unpackBits(Waitcnt, getLgkmcntBitShift(),
649  getLgkmcntBitWidth(Version.Major));
650 }
651 
652 void decodeWaitcnt(const IsaVersion &Version, unsigned Waitcnt,
653  unsigned &Vmcnt, unsigned &Expcnt, unsigned &Lgkmcnt) {
654  Vmcnt = decodeVmcnt(Version, Waitcnt);
655  Expcnt = decodeExpcnt(Version, Waitcnt);
656  Lgkmcnt = decodeLgkmcnt(Version, Waitcnt);
657 }
658 
659 Waitcnt decodeWaitcnt(const IsaVersion &Version, unsigned Encoded) {
660  Waitcnt Decoded;
661  Decoded.VmCnt = decodeVmcnt(Version, Encoded);
662  Decoded.ExpCnt = decodeExpcnt(Version, Encoded);
663  Decoded.LgkmCnt = decodeLgkmcnt(Version, Encoded);
664  return Decoded;
665 }
666 
667 unsigned encodeVmcnt(const IsaVersion &Version, unsigned Waitcnt,
668  unsigned Vmcnt) {
669  Waitcnt =
670  packBits(Vmcnt, Waitcnt, getVmcntBitShiftLo(), getVmcntBitWidthLo());
671  if (Version.Major < 9)
672  return Waitcnt;
673 
674  Vmcnt >>= getVmcntBitWidthLo();
675  return packBits(Vmcnt, Waitcnt, getVmcntBitShiftHi(), getVmcntBitWidthHi());
676 }
677 
678 unsigned encodeExpcnt(const IsaVersion &Version, unsigned Waitcnt,
679  unsigned Expcnt) {
680  return packBits(Expcnt, Waitcnt, getExpcntBitShift(), getExpcntBitWidth());
681 }
682 
683 unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt,
684  unsigned Lgkmcnt) {
685  return packBits(Lgkmcnt, Waitcnt, getLgkmcntBitShift(),
686  getLgkmcntBitWidth(Version.Major));
687 }
688 
690  unsigned Vmcnt, unsigned Expcnt, unsigned Lgkmcnt) {
691  unsigned Waitcnt = getWaitcntBitMask(Version);
692  Waitcnt = encodeVmcnt(Version, Waitcnt, Vmcnt);
693  Waitcnt = encodeExpcnt(Version, Waitcnt, Expcnt);
694  Waitcnt = encodeLgkmcnt(Version, Waitcnt, Lgkmcnt);
695  return Waitcnt;
696 }
697 
698 unsigned encodeWaitcnt(const IsaVersion &Version, const Waitcnt &Decoded) {
699  return encodeWaitcnt(Version, Decoded.VmCnt, Decoded.ExpCnt, Decoded.LgkmCnt);
700 }
701 
702 //===----------------------------------------------------------------------===//
703 // hwreg
704 //===----------------------------------------------------------------------===//
705 
706 namespace Hwreg {
707 
708 int64_t getHwregId(const StringRef Name) {
709  for (int Id = ID_SYMBOLIC_FIRST_; Id < ID_SYMBOLIC_LAST_; ++Id) {
710  if (IdSymbolic[Id] && Name == IdSymbolic[Id])
711  return Id;
712  }
713  return ID_UNKNOWN_;
714 }
715 
716 static unsigned getLastSymbolicHwreg(const MCSubtargetInfo &STI) {
717  if (isSI(STI) || isCI(STI) || isVI(STI))
719  else if (isGFX9(STI))
721  else
722  return ID_SYMBOLIC_LAST_;
723 }
724 
725 bool isValidHwreg(int64_t Id, const MCSubtargetInfo &STI) {
726  return ID_SYMBOLIC_FIRST_ <= Id && Id < getLastSymbolicHwreg(STI) &&
727  IdSymbolic[Id];
728 }
729 
730 bool isValidHwreg(int64_t Id) {
731  return 0 <= Id && isUInt<ID_WIDTH_>(Id);
732 }
733 
734 bool isValidHwregOffset(int64_t Offset) {
735  return 0 <= Offset && isUInt<OFFSET_WIDTH_>(Offset);
736 }
737 
738 bool isValidHwregWidth(int64_t Width) {
739  return 0 <= (Width - 1) && isUInt<WIDTH_M1_WIDTH_>(Width - 1);
740 }
741 
742 uint64_t encodeHwreg(uint64_t Id, uint64_t Offset, uint64_t Width) {
743  return (Id << ID_SHIFT_) |
744  (Offset << OFFSET_SHIFT_) |
745  ((Width - 1) << WIDTH_M1_SHIFT_);
746 }
747 
748 StringRef getHwreg(unsigned Id, const MCSubtargetInfo &STI) {
749  return isValidHwreg(Id, STI) ? IdSymbolic[Id] : "";
750 }
751 
752 void decodeHwreg(unsigned Val, unsigned &Id, unsigned &Offset, unsigned &Width) {
753  Id = (Val & ID_MASK_) >> ID_SHIFT_;
754  Offset = (Val & OFFSET_MASK_) >> OFFSET_SHIFT_;
755  Width = ((Val & WIDTH_M1_MASK_) >> WIDTH_M1_SHIFT_) + 1;
756 }
757 
758 } // namespace Hwreg
759 
760 //===----------------------------------------------------------------------===//
761 // SendMsg
762 //===----------------------------------------------------------------------===//
763 
764 namespace SendMsg {
765 
766 int64_t getMsgId(const StringRef Name) {
767  for (int i = ID_GAPS_FIRST_; i < ID_GAPS_LAST_; ++i) {
768  if (IdSymbolic[i] && Name == IdSymbolic[i])
769  return i;
770  }
771  return ID_UNKNOWN_;
772 }
773 
774 static bool isValidMsgId(int64_t MsgId) {
775  return (ID_GAPS_FIRST_ <= MsgId && MsgId < ID_GAPS_LAST_) && IdSymbolic[MsgId];
776 }
777 
778 bool isValidMsgId(int64_t MsgId, const MCSubtargetInfo &STI, bool Strict) {
779  if (Strict) {
780  if (MsgId == ID_GS_ALLOC_REQ || MsgId == ID_GET_DOORBELL)
781  return isGFX9(STI) || isGFX10(STI);
782  else
783  return isValidMsgId(MsgId);
784  } else {
785  return 0 <= MsgId && isUInt<ID_WIDTH_>(MsgId);
786  }
787 }
788 
789 StringRef getMsgName(int64_t MsgId) {
790  return isValidMsgId(MsgId)? IdSymbolic[MsgId] : "";
791 }
792 
793 int64_t getMsgOpId(int64_t MsgId, const StringRef Name) {
794  const char* const *S = (MsgId == ID_SYSMSG) ? OpSysSymbolic : OpGsSymbolic;
795  const int F = (MsgId == ID_SYSMSG) ? OP_SYS_FIRST_ : OP_GS_FIRST_;
796  const int L = (MsgId == ID_SYSMSG) ? OP_SYS_LAST_ : OP_GS_LAST_;
797  for (int i = F; i < L; ++i) {
798  if (Name == S[i]) {
799  return i;
800  }
801  }
802  return OP_UNKNOWN_;
803 }
804 
805 bool isValidMsgOp(int64_t MsgId, int64_t OpId, bool Strict) {
806 
807  if (!Strict)
808  return 0 <= OpId && isUInt<OP_WIDTH_>(OpId);
809 
810  switch(MsgId)
811  {
812  case ID_GS:
813  return (OP_GS_FIRST_ <= OpId && OpId < OP_GS_LAST_) && OpId != OP_GS_NOP;
814  case ID_GS_DONE:
815  return OP_GS_FIRST_ <= OpId && OpId < OP_GS_LAST_;
816  case ID_SYSMSG:
817  return OP_SYS_FIRST_ <= OpId && OpId < OP_SYS_LAST_;
818  default:
819  return OpId == OP_NONE_;
820  }
821 }
822 
823 StringRef getMsgOpName(int64_t MsgId, int64_t OpId) {
824  assert(msgRequiresOp(MsgId));
825  return (MsgId == ID_SYSMSG)? OpSysSymbolic[OpId] : OpGsSymbolic[OpId];
826 }
827 
828 bool isValidMsgStream(int64_t MsgId, int64_t OpId, int64_t StreamId, bool Strict) {
829 
830  if (!Strict)
831  return 0 <= StreamId && isUInt<STREAM_ID_WIDTH_>(StreamId);
832 
833  switch(MsgId)
834  {
835  case ID_GS:
836  return STREAM_ID_FIRST_ <= StreamId && StreamId < STREAM_ID_LAST_;
837  case ID_GS_DONE:
838  return (OpId == OP_GS_NOP)?
839  (StreamId == STREAM_ID_NONE_) :
840  (STREAM_ID_FIRST_ <= StreamId && StreamId < STREAM_ID_LAST_);
841  default:
842  return StreamId == STREAM_ID_NONE_;
843  }
844 }
845 
846 bool msgRequiresOp(int64_t MsgId) {
847  return MsgId == ID_GS || MsgId == ID_GS_DONE || MsgId == ID_SYSMSG;
848 }
849 
850 bool msgSupportsStream(int64_t MsgId, int64_t OpId) {
851  return (MsgId == ID_GS || MsgId == ID_GS_DONE) && OpId != OP_GS_NOP;
852 }
853 
854 void decodeMsg(unsigned Val,
855  uint16_t &MsgId,
856  uint16_t &OpId,
857  uint16_t &StreamId) {
858  MsgId = Val & ID_MASK_;
859  OpId = (Val & OP_MASK_) >> OP_SHIFT_;
860  StreamId = (Val & STREAM_ID_MASK_) >> STREAM_ID_SHIFT_;
861 }
862 
863 uint64_t encodeMsg(uint64_t MsgId,
864  uint64_t OpId,
865  uint64_t StreamId) {
866  return (MsgId << ID_SHIFT_) |
867  (OpId << OP_SHIFT_) |
868  (StreamId << STREAM_ID_SHIFT_);
869 }
870 
871 } // namespace SendMsg
872 
873 //===----------------------------------------------------------------------===//
874 //
875 //===----------------------------------------------------------------------===//
876 
877 unsigned getInitialPSInputAddr(const Function &F) {
878  return getIntegerAttribute(F, "InitialPSInputAddr", 0);
879 }
880 
882  switch(cc) {
890  return true;
891  default:
892  return false;
893  }
894 }
895 
897  return !isShader(cc) || cc == CallingConv::AMDGPU_CS;
898 }
899 
901  switch (CC) {
911  return true;
912  default:
913  return false;
914  }
915 }
916 
917 bool hasXNACK(const MCSubtargetInfo &STI) {
918  return STI.getFeatureBits()[AMDGPU::FeatureXNACK];
919 }
920 
921 bool hasSRAMECC(const MCSubtargetInfo &STI) {
922  return STI.getFeatureBits()[AMDGPU::FeatureSRAMECC];
923 }
924 
925 bool hasMIMG_R128(const MCSubtargetInfo &STI) {
926  return STI.getFeatureBits()[AMDGPU::FeatureMIMG_R128];
927 }
928 
929 bool hasPackedD16(const MCSubtargetInfo &STI) {
930  return !STI.getFeatureBits()[AMDGPU::FeatureUnpackedD16VMem];
931 }
932 
933 bool isSI(const MCSubtargetInfo &STI) {
934  return STI.getFeatureBits()[AMDGPU::FeatureSouthernIslands];
935 }
936 
937 bool isCI(const MCSubtargetInfo &STI) {
938  return STI.getFeatureBits()[AMDGPU::FeatureSeaIslands];
939 }
940 
941 bool isVI(const MCSubtargetInfo &STI) {
942  return STI.getFeatureBits()[AMDGPU::FeatureVolcanicIslands];
943 }
944 
945 bool isGFX9(const MCSubtargetInfo &STI) {
946  return STI.getFeatureBits()[AMDGPU::FeatureGFX9];
947 }
948 
949 bool isGFX10(const MCSubtargetInfo &STI) {
950  return STI.getFeatureBits()[AMDGPU::FeatureGFX10];
951 }
952 
953 bool isGCN3Encoding(const MCSubtargetInfo &STI) {
954  return STI.getFeatureBits()[AMDGPU::FeatureGCN3Encoding];
955 }
956 
957 bool isSGPR(unsigned Reg, const MCRegisterInfo* TRI) {
958  const MCRegisterClass SGPRClass = TRI->getRegClass(AMDGPU::SReg_32RegClassID);
959  const unsigned FirstSubReg = TRI->getSubReg(Reg, 1);
960  return SGPRClass.contains(FirstSubReg != 0 ? FirstSubReg : Reg) ||
961  Reg == AMDGPU::SCC;
962 }
963 
964 bool isRegIntersect(unsigned Reg0, unsigned Reg1, const MCRegisterInfo* TRI) {
965  for (MCRegAliasIterator R(Reg0, TRI, true); R.isValid(); ++R) {
966  if (*R == Reg1) return true;
967  }
968  return false;
969 }
970 
971 #define MAP_REG2REG \
972  using namespace AMDGPU; \
973  switch(Reg) { \
974  default: return Reg; \
975  CASE_CI_VI(FLAT_SCR) \
976  CASE_CI_VI(FLAT_SCR_LO) \
977  CASE_CI_VI(FLAT_SCR_HI) \
978  CASE_VI_GFX9_GFX10(TTMP0) \
979  CASE_VI_GFX9_GFX10(TTMP1) \
980  CASE_VI_GFX9_GFX10(TTMP2) \
981  CASE_VI_GFX9_GFX10(TTMP3) \
982  CASE_VI_GFX9_GFX10(TTMP4) \
983  CASE_VI_GFX9_GFX10(TTMP5) \
984  CASE_VI_GFX9_GFX10(TTMP6) \
985  CASE_VI_GFX9_GFX10(TTMP7) \
986  CASE_VI_GFX9_GFX10(TTMP8) \
987  CASE_VI_GFX9_GFX10(TTMP9) \
988  CASE_VI_GFX9_GFX10(TTMP10) \
989  CASE_VI_GFX9_GFX10(TTMP11) \
990  CASE_VI_GFX9_GFX10(TTMP12) \
991  CASE_VI_GFX9_GFX10(TTMP13) \
992  CASE_VI_GFX9_GFX10(TTMP14) \
993  CASE_VI_GFX9_GFX10(TTMP15) \
994  CASE_VI_GFX9_GFX10(TTMP0_TTMP1) \
995  CASE_VI_GFX9_GFX10(TTMP2_TTMP3) \
996  CASE_VI_GFX9_GFX10(TTMP4_TTMP5) \
997  CASE_VI_GFX9_GFX10(TTMP6_TTMP7) \
998  CASE_VI_GFX9_GFX10(TTMP8_TTMP9) \
999  CASE_VI_GFX9_GFX10(TTMP10_TTMP11) \
1000  CASE_VI_GFX9_GFX10(TTMP12_TTMP13) \
1001  CASE_VI_GFX9_GFX10(TTMP14_TTMP15) \
1002  CASE_VI_GFX9_GFX10(TTMP0_TTMP1_TTMP2_TTMP3) \
1003  CASE_VI_GFX9_GFX10(TTMP4_TTMP5_TTMP6_TTMP7) \
1004  CASE_VI_GFX9_GFX10(TTMP8_TTMP9_TTMP10_TTMP11) \
1005  CASE_VI_GFX9_GFX10(TTMP12_TTMP13_TTMP14_TTMP15) \
1006  CASE_VI_GFX9_GFX10(TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7) \
1007  CASE_VI_GFX9_GFX10(TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11) \
1008  CASE_VI_GFX9_GFX10(TTMP8_TTMP9_TTMP10_TTMP11_TTMP12_TTMP13_TTMP14_TTMP15) \
1009  CASE_VI_GFX9_GFX10(TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11_TTMP12_TTMP13_TTMP14_TTMP15) \
1010  }
1011 
1012 #define CASE_CI_VI(node) \
1013  assert(!isSI(STI)); \
1014  case node: return isCI(STI) ? node##_ci : node##_vi;
1015 
1016 #define CASE_VI_GFX9_GFX10(node) \
1017  case node: return (isGFX9(STI) || isGFX10(STI)) ? node##_gfx9_gfx10 : node##_vi;
1018 
1019 unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI) {
1020  if (STI.getTargetTriple().getArch() == Triple::r600)
1021  return Reg;
1022  MAP_REG2REG
1023 }
1024 
1025 #undef CASE_CI_VI
1026 #undef CASE_VI_GFX9_GFX10
1027 
1028 #define CASE_CI_VI(node) case node##_ci: case node##_vi: return node;
1029 #define CASE_VI_GFX9_GFX10(node) case node##_vi: case node##_gfx9_gfx10: return node;
1030 
1031 unsigned mc2PseudoReg(unsigned Reg) {
1032  MAP_REG2REG
1033 }
1034 
1035 #undef CASE_CI_VI
1036 #undef CASE_VI_GFX9_GFX10
1037 #undef MAP_REG2REG
1038 
1039 bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo) {
1040  assert(OpNo < Desc.NumOperands);
1041  unsigned OpType = Desc.OpInfo[OpNo].OperandType;
1042  return OpType >= AMDGPU::OPERAND_SRC_FIRST &&
1043  OpType <= AMDGPU::OPERAND_SRC_LAST;
1044 }
1045 
1046 bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo) {
1047  assert(OpNo < Desc.NumOperands);
1048  unsigned OpType = Desc.OpInfo[OpNo].OperandType;
1049  switch (OpType) {
1064  return true;
1065  default:
1066  return false;
1067  }
1068 }
1069 
1070 bool isSISrcInlinableOperand(const MCInstrDesc &Desc, unsigned OpNo) {
1071  assert(OpNo < Desc.NumOperands);
1072  unsigned OpType = Desc.OpInfo[OpNo].OperandType;
1073  return OpType >= AMDGPU::OPERAND_REG_INLINE_C_FIRST &&
1075 }
1076 
1077 // Avoid using MCRegisterClass::getSize, since that function will go away
1078 // (move from MC* level to Target* level). Return size in bits.
1079 unsigned getRegBitWidth(unsigned RCID) {
1080  switch (RCID) {
1081  case AMDGPU::SGPR_32RegClassID:
1082  case AMDGPU::VGPR_32RegClassID:
1083  case AMDGPU::VRegOrLds_32RegClassID:
1084  case AMDGPU::AGPR_32RegClassID:
1085  case AMDGPU::VS_32RegClassID:
1086  case AMDGPU::AV_32RegClassID:
1087  case AMDGPU::SReg_32RegClassID:
1088  case AMDGPU::SReg_32_XM0RegClassID:
1089  case AMDGPU::SRegOrLds_32RegClassID:
1090  return 32;
1091  case AMDGPU::SGPR_64RegClassID:
1092  case AMDGPU::VS_64RegClassID:
1093  case AMDGPU::AV_64RegClassID:
1094  case AMDGPU::SReg_64RegClassID:
1095  case AMDGPU::VReg_64RegClassID:
1096  case AMDGPU::AReg_64RegClassID:
1097  case AMDGPU::SReg_64_XEXECRegClassID:
1098  return 64;
1099  case AMDGPU::SGPR_96RegClassID:
1100  case AMDGPU::SReg_96RegClassID:
1101  case AMDGPU::VReg_96RegClassID:
1102  return 96;
1103  case AMDGPU::SGPR_128RegClassID:
1104  case AMDGPU::SReg_128RegClassID:
1105  case AMDGPU::VReg_128RegClassID:
1106  case AMDGPU::AReg_128RegClassID:
1107  return 128;
1108  case AMDGPU::SGPR_160RegClassID:
1109  case AMDGPU::SReg_160RegClassID:
1110  case AMDGPU::VReg_160RegClassID:
1111  return 160;
1112  case AMDGPU::SReg_256RegClassID:
1113  case AMDGPU::VReg_256RegClassID:
1114  return 256;
1115  case AMDGPU::SReg_512RegClassID:
1116  case AMDGPU::VReg_512RegClassID:
1117  case AMDGPU::AReg_512RegClassID:
1118  return 512;
1119  case AMDGPU::SReg_1024RegClassID:
1120  case AMDGPU::VReg_1024RegClassID:
1121  case AMDGPU::AReg_1024RegClassID:
1122  return 1024;
1123  default:
1124  llvm_unreachable("Unexpected register class");
1125  }
1126 }
1127 
1128 unsigned getRegBitWidth(const MCRegisterClass &RC) {
1129  return getRegBitWidth(RC.getID());
1130 }
1131 
1132 unsigned getRegOperandSize(const MCRegisterInfo *MRI, const MCInstrDesc &Desc,
1133  unsigned OpNo) {
1134  assert(OpNo < Desc.NumOperands);
1135  unsigned RCID = Desc.OpInfo[OpNo].RegClass;
1136  return getRegBitWidth(MRI->getRegClass(RCID)) / 8;
1137 }
1138 
1139 bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi) {
1140  if (Literal >= -16 && Literal <= 64)
1141  return true;
1142 
1143  uint64_t Val = static_cast<uint64_t>(Literal);
1144  return (Val == DoubleToBits(0.0)) ||
1145  (Val == DoubleToBits(1.0)) ||
1146  (Val == DoubleToBits(-1.0)) ||
1147  (Val == DoubleToBits(0.5)) ||
1148  (Val == DoubleToBits(-0.5)) ||
1149  (Val == DoubleToBits(2.0)) ||
1150  (Val == DoubleToBits(-2.0)) ||
1151  (Val == DoubleToBits(4.0)) ||
1152  (Val == DoubleToBits(-4.0)) ||
1153  (Val == 0x3fc45f306dc9c882 && HasInv2Pi);
1154 }
1155 
1156 bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi) {
1157  if (Literal >= -16 && Literal <= 64)
1158  return true;
1159 
1160  // The actual type of the operand does not seem to matter as long
1161  // as the bits match one of the inline immediate values. For example:
1162  //
1163  // -nan has the hexadecimal encoding of 0xfffffffe which is -2 in decimal,
1164  // so it is a legal inline immediate.
1165  //
1166  // 1065353216 has the hexadecimal encoding 0x3f800000 which is 1.0f in
1167  // floating-point, so it is a legal inline immediate.
1168 
1169  uint32_t Val = static_cast<uint32_t>(Literal);
1170  return (Val == FloatToBits(0.0f)) ||
1171  (Val == FloatToBits(1.0f)) ||
1172  (Val == FloatToBits(-1.0f)) ||
1173  (Val == FloatToBits(0.5f)) ||
1174  (Val == FloatToBits(-0.5f)) ||
1175  (Val == FloatToBits(2.0f)) ||
1176  (Val == FloatToBits(-2.0f)) ||
1177  (Val == FloatToBits(4.0f)) ||
1178  (Val == FloatToBits(-4.0f)) ||
1179  (Val == 0x3e22f983 && HasInv2Pi);
1180 }
1181 
1182 bool isInlinableLiteral16(int16_t Literal, bool HasInv2Pi) {
1183  if (!HasInv2Pi)
1184  return false;
1185 
1186  if (Literal >= -16 && Literal <= 64)
1187  return true;
1188 
1189  uint16_t Val = static_cast<uint16_t>(Literal);
1190  return Val == 0x3C00 || // 1.0
1191  Val == 0xBC00 || // -1.0
1192  Val == 0x3800 || // 0.5
1193  Val == 0xB800 || // -0.5
1194  Val == 0x4000 || // 2.0
1195  Val == 0xC000 || // -2.0
1196  Val == 0x4400 || // 4.0
1197  Val == 0xC400 || // -4.0
1198  Val == 0x3118; // 1/2pi
1199 }
1200 
1201 bool isInlinableLiteralV216(int32_t Literal, bool HasInv2Pi) {
1202  assert(HasInv2Pi);
1203 
1204  if (isInt<16>(Literal) || isUInt<16>(Literal)) {
1205  int16_t Trunc = static_cast<int16_t>(Literal);
1206  return AMDGPU::isInlinableLiteral16(Trunc, HasInv2Pi);
1207  }
1208  if (!(Literal & 0xffff))
1209  return AMDGPU::isInlinableLiteral16(Literal >> 16, HasInv2Pi);
1210 
1211  int16_t Lo16 = static_cast<int16_t>(Literal);
1212  int16_t Hi16 = static_cast<int16_t>(Literal >> 16);
1213  return Lo16 == Hi16 && isInlinableLiteral16(Lo16, HasInv2Pi);
1214 }
1215 
1216 bool isArgPassedInSGPR(const Argument *A) {
1217  const Function *F = A->getParent();
1218 
1219  // Arguments to compute shaders are never a source of divergence.
1220  CallingConv::ID CC = F->getCallingConv();
1221  switch (CC) {
1224  return true;
1232  // For non-compute shaders, SGPR inputs are marked with either inreg or byval.
1233  // Everything else is in VGPRs.
1234  return F->getAttributes().hasParamAttribute(A->getArgNo(), Attribute::InReg) ||
1235  F->getAttributes().hasParamAttribute(A->getArgNo(), Attribute::ByVal);
1236  default:
1237  // TODO: Should calls support inreg for SGPR inputs?
1238  return false;
1239  }
1240 }
1241 
1242 static bool hasSMEMByteOffset(const MCSubtargetInfo &ST) {
1243  return isGCN3Encoding(ST) || isGFX10(ST);
1244 }
1245 
1246 int64_t getSMRDEncodedOffset(const MCSubtargetInfo &ST, int64_t ByteOffset) {
1247  if (hasSMEMByteOffset(ST))
1248  return ByteOffset;
1249  return ByteOffset >> 2;
1250 }
1251 
1252 bool isLegalSMRDImmOffset(const MCSubtargetInfo &ST, int64_t ByteOffset) {
1253  int64_t EncodedOffset = getSMRDEncodedOffset(ST, ByteOffset);
1254  return (hasSMEMByteOffset(ST)) ?
1255  isUInt<20>(EncodedOffset) : isUInt<8>(EncodedOffset);
1256 }
1257 
1258 // Given Imm, split it into the values to put into the SOffset and ImmOffset
1259 // fields in an MUBUF instruction. Return false if it is not possible (due to a
1260 // hardware bug needing a workaround).
1261 //
1262 // The required alignment ensures that individual address components remain
1263 // aligned if they are aligned to begin with. It also ensures that additional
1264 // offsets within the given alignment can be added to the resulting ImmOffset.
1265 bool splitMUBUFOffset(uint32_t Imm, uint32_t &SOffset, uint32_t &ImmOffset,
1266  const GCNSubtarget *Subtarget, uint32_t Align) {
1267  const uint32_t MaxImm = alignDown(4095, Align);
1268  uint32_t Overflow = 0;
1269 
1270  if (Imm > MaxImm) {
1271  if (Imm <= MaxImm + 64) {
1272  // Use an SOffset inline constant for 4..64
1273  Overflow = Imm - MaxImm;
1274  Imm = MaxImm;
1275  } else {
1276  // Try to keep the same value in SOffset for adjacent loads, so that
1277  // the corresponding register contents can be re-used.
1278  //
1279  // Load values with all low-bits (except for alignment bits) set into
1280  // SOffset, so that a larger range of values can be covered using
1281  // s_movk_i32.
1282  //
1283  // Atomic operations fail to work correctly when individual address
1284  // components are unaligned, even if their sum is aligned.
1285  uint32_t High = (Imm + Align) & ~4095;
1286  uint32_t Low = (Imm + Align) & 4095;
1287  Imm = Low;
1288  Overflow = High - Align;
1289  }
1290  }
1291 
1292  // There is a hardware bug in SI and CI which prevents address clamping in
1293  // MUBUF instructions from working correctly with SOffsets. The immediate
1294  // offset is unaffected.
1295  if (Overflow > 0 &&
1297  return false;
1298 
1299  ImmOffset = Imm;
1300  SOffset = Overflow;
1301  return true;
1302 }
1303 
1305  *this = getDefaultForCallingConv(F.getCallingConv());
1306 
1307  StringRef IEEEAttr = F.getFnAttribute("amdgpu-ieee").getValueAsString();
1308  if (!IEEEAttr.empty())
1309  IEEE = IEEEAttr == "true";
1310 
1311  StringRef DX10ClampAttr
1312  = F.getFnAttribute("amdgpu-dx10-clamp").getValueAsString();
1313  if (!DX10ClampAttr.empty())
1314  DX10Clamp = DX10ClampAttr == "true";
1315 }
1316 
1317 namespace {
1318 
1319 struct SourceOfDivergence {
1320  unsigned Intr;
1321 };
1322 const SourceOfDivergence *lookupSourceOfDivergence(unsigned Intr);
1323 
1324 #define GET_SourcesOfDivergence_IMPL
1325 #include "AMDGPUGenSearchableTables.inc"
1326 
1327 } // end anonymous namespace
1328 
1329 bool isIntrinsicSourceOfDivergence(unsigned IntrID) {
1330  return lookupSourceOfDivergence(IntrID);
1331 }
1332 
1333 } // namespace AMDGPU
1334 } // namespace llvm
bool isValidMsgId(int64_t MsgId, const MCSubtargetInfo &STI, bool Strict)
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
unsigned mc2PseudoReg(unsigned Reg)
Convert hardware register Reg to a pseudo register.
This file a TargetTransformInfo::Concept conforming object specific to the AMDGPU target machine...
bool hasPackedD16(const MCSubtargetInfo &STI)
int64_t getHwregId(const StringRef Name)
uint64_t encodeHwreg(uint64_t Id, uint64_t Offset, uint64_t Width)
This class represents an incoming formal argument to a Function.
Definition: Argument.h:29
bool getMUBUFHasSrsrc(unsigned Opc)
LLVM_READONLY const MIMGInfo * getMIMGInfo(unsigned Opc)
static unsigned getLastSymbolicHwreg(const MCSubtargetInfo &STI)
This class represents lattice values for constants.
Definition: AllocatorList.h:23
bool isValidHwregWidth(int64_t Width)
Calling convention used for Mesa/AMDPAL pixel shaders.
Definition: CallingConv.h:199
unsigned getMaxWorkGroupsPerCU(const MCSubtargetInfo *STI, unsigned FlatWorkGroupSize)
unsigned getExpcntBitMask(const IsaVersion &Version)
amdgpu Simplify well known AMD library false FunctionCallee Value const Twine & Name
OSType getOS() const
getOS - Get the parsed operating system type of this triple.
Definition: Triple.h:305
Address space for 32-bit constant memory.
Definition: AMDGPU.h:277
Represents the counter values to wait for in an s_waitcnt instruction.
unsigned getVGPRAllocGranule(const MCSubtargetInfo *STI, Optional< bool > EnableWavefrontSize32)
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:179
unsigned Reg
uint64_t encodeMsg(uint64_t MsgId, uint64_t OpId, uint64_t StreamId)
unsigned getRegBitWidth(unsigned RCID)
Get the size in bits of a register from the register class RC.
unsigned getMaxWavesPerCU(const MCSubtargetInfo *STI, unsigned FlatWorkGroupSize)
Instruction set architecture version.
Definition: TargetParser.h:136
unsigned decodeVmcnt(const IsaVersion &Version, unsigned Waitcnt)
#define S_00B848_WGP_MODE(x)
Definition: SIDefines.h:576
constexpr bool isInt< 16 >(int64_t x)
Definition: MathExtras.h:342
unsigned const TargetRegisterInfo * TRI
F(f)
bool isInlinableLiteralV216(int32_t Literal, bool HasInv2Pi)
unsigned encodeVmcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Vmcnt)
bool isValidMsgOp(int64_t MsgId, int64_t OpId, bool Strict)
unsigned decodeExpcnt(const IsaVersion &Version, unsigned Waitcnt)
uint64_t High
unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI)
std::pair< int, int > getIntegerPairAttribute(const Function &F, StringRef Name, std::pair< int, int > Default, bool OnlyFirstRequired)
const Triple & getTargetTriple() const
bool isGlobalSegment(const GlobalValue *GV)
bool isInlinableLiteral16(int16_t Literal, bool HasInv2Pi)
unsigned getID() const
getID() - Return the register class ID number.
Address space for global memory (RAT0, VTX0).
Definition: AMDGPU.h:270
#define S_00B848_MEM_ORDERED(x)
Definition: SIDefines.h:579
uint32_t amd_kernel_code_version_major
uint64_t alignDown(uint64_t Value, uint64_t Align, uint64_t Skew=0)
Returns the largest uint64_t less than or equal to Value and is Skew mod Align.
Definition: MathExtras.h:737
uint32_t code_properties
Code properties.
unsigned getWavesPerWorkGroup(const MCSubtargetInfo *STI, unsigned FlatWorkGroupSize)
bool isRegIntersect(unsigned Reg0, unsigned Reg1, const MCRegisterInfo *TRI)
Is there any intersection between registers.
bool isStringAttribute() const
Return true if the attribute is a string (target-dependent) attribute.
Definition: Attributes.cpp:191
const FeatureBitset & getFeatureBits() const
int64_t getMsgId(const StringRef Name)
AMD Kernel Code Object (amd_kernel_code_t).
unsigned getLocalMemorySize(const MCSubtargetInfo *STI)
Calling convention used for Mesa vertex shaders, or AMDPAL last shader stage before rasterization (ve...
Definition: CallingConv.h:193
unsigned getRegOperandSize(const MCRegisterInfo *MRI, const MCInstrDesc &Desc, unsigned OpNo)
Get size of register operand.
uint16_t amd_machine_version_major
This file contains the simple types necessary to represent the attributes associated with functions a...
unsigned Intr
int getMUBUFElements(unsigned Opc)
int getMTBUFOpcode(unsigned BaseOpc, unsigned Elements)
StringRef getMsgOpName(int64_t MsgId, int64_t OpId)
int getMaskedMIMGOp(unsigned Opc, unsigned NewChannels)
LLVM_NODISCARD bool empty() const
empty - Check if the string is empty.
Definition: StringRef.h:140
unsigned getMaxNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU, bool Addressable)
bool hasCodeObjectV3(const MCSubtargetInfo *STI)
unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI)
int getMCOpcode(uint16_t Opcode, unsigned Gen)
void emitError(unsigned LocCookie, const Twine &ErrorStr)
emitError - Emit an error message to the currently installed error handler with optional location inf...
uint8_t OperandType
Information about the type of the operand.
Definition: MCInstrDesc.h:82
unsigned getMinWavesPerEU(const MCSubtargetInfo *STI)
uint8_t kernarg_segment_alignment
The maximum byte alignment of variables used by the kernel in the specified memory segment...
constexpr bool test(unsigned I) const
unsigned getMinFlatWorkGroupSize(const MCSubtargetInfo *STI)
bool splitMUBUFOffset(uint32_t Imm, uint32_t &SOffset, uint32_t &ImmOffset, const GCNSubtarget *Subtarget, uint32_t Align)
ArchType getArch() const
getArch - Get the parsed architecture type of this triple.
Definition: Triple.h:296
uint8_t group_segment_alignment
bool isGroupSegment(const GlobalValue *GV)
bool contains(MCRegister Reg) const
contains - Return true if the specified register is included in this register class.
unsigned encodeWaitcnt(const IsaVersion &Version, unsigned Vmcnt, unsigned Expcnt, unsigned Lgkmcnt)
Encodes Vmcnt, Expcnt and Lgkmcnt into Waitcnt for given isa Version.
unsigned getMaxNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU)
bool isReadOnlySegment(const GlobalValue *GV)
unsigned encodeExpcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Expcnt)
LLVM_READONLY const MIMGBaseOpcodeInfo * getMIMGBaseOpcodeInfo(unsigned BaseOpcode)
AttributeList getAttributes() const
Return the attribute list for this Function.
Definition: Function.h:223
Calling convention used for Mesa/AMDPAL geometry shaders.
Definition: CallingConv.h:196
uint16_t amd_machine_version_minor
Calling convention used for AMDPAL vertex shader if tessellation is in use.
Definition: CallingConv.h:220
unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt)
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
uint32_t amd_kernel_code_version_minor
MCRegisterClass - Base class of TargetRegisterClass.
uint64_t compute_pgm_resource_registers
Shader program settings for CS.
Analysis containing CSE Info
Definition: CSEInfo.cpp:20
unsigned short NumOperands
Definition: MCInstrDesc.h:182
uint32_t FloatToBits(float Float)
This function takes a float and returns the bit equivalent 32-bit integer.
Definition: MathExtras.h:652
MCRegisterInfo base class - We assume that the target defines a static array of MCRegisterDesc object...
bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo)
Is this floating-point operand?
static bool hasSMEMByteOffset(const MCSubtargetInfo &ST)
void decodeHwreg(unsigned Val, unsigned &Id, unsigned &Offset, unsigned &Width)
bool hasSRAMECC(const MCSubtargetInfo &STI)
bool getMTBUFHasVAddr(unsigned Opc)
int64_t getSMRDEncodedOffset(const MCSubtargetInfo &ST, int64_t ByteOffset)
bool isCompute(CallingConv::ID cc)
bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi)
Is this literal inlinable.
bool isSI(const MCSubtargetInfo &STI)
unsigned getMinNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU)
constexpr bool isUInt< 8 >(uint64_t x)
Definition: MathExtras.h:379
unsigned const MachineRegisterInfo * MRI
bool getMUBUFHasSoffset(unsigned Opc)
unsigned getEUsPerCU(const MCSubtargetInfo *STI)
bool isGFX10(const MCSubtargetInfo &STI)
StringRef getHwreg(unsigned Id, const MCSubtargetInfo &STI)
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:64
bool hasMIMG_R128(const MCSubtargetInfo &STI)
Address space for local memory.
Definition: AMDGPU.h:274
const char *const IdSymbolic[]
This file contains the declarations for the subclasses of Constant, which represent the different fla...
bool isEntryFunctionCC(CallingConv::ID CC)
int getMUBUFBaseOpcode(unsigned Opc)
uint8_t private_segment_alignment
bool isIntrinsicSourceOfDivergence(unsigned IntrID)
bool isValidHwreg(int64_t Id)
MCRegAliasIterator enumerates all registers aliasing Reg.
int getMUBUFOpcode(unsigned BaseOpc, unsigned Elements)
int getMTBUFElements(unsigned Opc)
Generation getGeneration() const
unsigned getAddressSpace() const
Return the address space of the Pointer type.
Definition: DerivedTypes.h:607
bool isValidHwregOffset(int64_t Offset)
MCRegister getSubReg(MCRegister Reg, unsigned Idx) const
Returns the physical register number of sub-register "Index" for physical register RegNo...
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function. ...
Definition: Function.cpp:205
bool isLegalSMRDImmOffset(const MCSubtargetInfo &ST, int64_t ByteOffset)
bool isSISrcInlinableOperand(const MCInstrDesc &Desc, unsigned OpNo)
Does this opearnd support only inlinable literals?
#define AMDHSA_BITS_SET(DST, MSK, VAL)
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char NumSGPRs[]
Key for Kernel::CodeProps::Metadata::mNumSGPRs.
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:40
Triple - Helper class for working with autoconf configuration names.
Definition: Triple.h:43
int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding, unsigned VDataDwords, unsigned VAddrDwords)
std::enable_if< std::numeric_limits< T >::is_signed, bool >::type getAsInteger(unsigned Radix, T &Result) const
Parse the current string as an integer of the specified radix.
Definition: StringRef.h:492
IsaVersion getIsaVersion(StringRef GPU)
unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI, Optional< bool > EnableWavefrontSize32)
bool hasParamAttribute(unsigned ArgNo, Attribute::AttrKind Kind) const
Equivalent to hasAttribute(ArgNo + FirstArgIndex, Kind).
unsigned getWavefrontSize(const MCSubtargetInfo *STI)
Align max(MaybeAlign Lhs, Align Rhs)
Definition: Alignment.h:390
unsigned getTotalNumSGPRs(const MCSubtargetInfo *STI)
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:212
unsigned getAddressableNumSGPRs(const MCSubtargetInfo *STI)
Module.h This file contains the declarations for the Module class.
bool getMTBUFHasSoffset(unsigned Opc)
LLVM_NODISCARD std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
Definition: StringRef.h:710
bool getMTBUFHasSrsrc(unsigned Opc)
bool isValidMsgStream(int64_t MsgId, int64_t OpId, int64_t StreamId, bool Strict)
uint16_t amd_machine_version_stepping
Calling convention used for Mesa/AMDPAL compute shaders.
Definition: CallingConv.h:202
uint8_t wavefront_size
Wavefront size expressed as a power of two.
bool isArgPassedInSGPR(const Argument *A)
int64_t getMsgOpId(int64_t MsgId, const StringRef Name)
uint64_t DoubleToBits(double Double)
This function takes a double and returns the bit equivalent 64-bit integer.
Definition: MathExtras.h:642
unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed, bool FlatScrUsed)
StringRef getCPU() const
bool msgSupportsStream(int64_t MsgId, int64_t OpId)
SPIR_KERNEL - Calling convention for SPIR kernel functions.
Definition: CallingConv.h:141
bool isShader(CallingConv::ID cc)
const char *const OpSysSymbolic[]
unsigned getArgNo() const
Return the index of this formal argument in its containing function.
Definition: Argument.h:47
Address space for constant memory (VTX2).
Definition: AMDGPU.h:273
unsigned getMaxFlatWorkGroupSize(const MCSubtargetInfo *STI)
bool msgRequiresOp(int64_t MsgId)
constexpr char NumVGPRs[]
Key for Kernel::CodeProps::Metadata::mNumVGPRs.
unsigned getNumVGPRBlocks(const MCSubtargetInfo *STI, unsigned NumVGPRs, Optional< bool > EnableWavefrontSize32)
bool isGCN3Encoding(const MCSubtargetInfo &STI)
Calling convention used for AMDPAL shader stage before geometry shader if geometry is in use...
Definition: CallingConv.h:225
#define MAP_REG2REG
bool isCI(const MCSubtargetInfo &STI)
int getMTBUFBaseOpcode(unsigned Opc)
unsigned getAddressableNumVGPRs(const MCSubtargetInfo *STI)
unsigned getInitialPSInputAddr(const Function &F)
bool isGFX9(const MCSubtargetInfo &STI)
const char *const OpGsSymbolic[]
Provides AMDGPU specific target descriptions.
amdhsa::kernel_descriptor_t getDefaultAmdhsaKernelDescriptor(const MCSubtargetInfo *STI)
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition: Alignment.h:163
const Function * getParent() const
Definition: Argument.h:41
bool isVI(const MCSubtargetInfo &STI)
StringRef getValueAsString() const
Return the attribute&#39;s value as a string.
Definition: Attributes.cpp:220
unsigned getNumSGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs)
int16_t RegClass
This specifies the register class enumeration of the operand if the operand is a register.
Definition: MCInstrDesc.h:76
bool hasXNACK(const MCSubtargetInfo &STI)
int getIntegerAttribute(const Function &F, StringRef Name, int Default)
#define N
Generic base class for all target subtargets.
unsigned getWaitcntBitMask(const IsaVersion &Version)
bool shouldEmitConstantsToTextSection(const Triple &TT)
constexpr bool isUInt< 16 >(uint64_t x)
Definition: MathExtras.h:382
int64_t kernel_code_entry_byte_offset
Byte offset (possibly negative) from start of amd_kernel_code_t object to kernel&#39;s entry point instru...
bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo)
Can this operand also contain immediate values?
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
void decodeMsg(unsigned Val, uint16_t &MsgId, uint16_t &OpId, uint16_t &StreamId)
Calling convention used for Mesa/AMDPAL hull shaders (= tessellation control shaders).
Definition: CallingConv.h:212
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
const MCOperandInfo * OpInfo
Definition: MCInstrDesc.h:190
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition: Function.h:333
unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI)
If Reg is a pseudo reg, return the correct hardware register given STI otherwise return Reg...
This class implements an extremely fast bulk output stream that can only output to a stream...
Definition: raw_ostream.h:45
uint16_t amd_machine_kind
unsigned getMinNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU)
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:48
unsigned getLgkmcntBitMask(const IsaVersion &Version)
bool getMUBUFHasVAddr(unsigned Opc)
const MIMGBaseOpcodeInfo * getMIMGBaseOpcode(unsigned Opc)
bool isSGPR(unsigned Reg, const MCRegisterInfo *TRI)
Is Reg - scalar register.
unsigned getMaxWavesPerEU(const MCSubtargetInfo *STI, unsigned FlatWorkGroupSize)
void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header, const MCSubtargetInfo *STI)
Calling convention for AMDGPU code object kernels.
Definition: CallingConv.h:205
const uint64_t Version
Definition: InstrProf.h:980
StringRef getMsgName(int64_t MsgId)
unsigned getSGPRAllocGranule(const MCSubtargetInfo *STI)
unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Lgkmcnt)
PointerType * getType() const
Global values are always pointers.
Definition: GlobalValue.h:277
constexpr uint32_t VersionMajor
HSA metadata major version.
void streamIsaVersion(const MCSubtargetInfo *STI, raw_ostream &Stream)
Streams isa version string for given subtarget STI into Stream.
const MCRegisterClass & getRegClass(unsigned i) const
Returns the register class associated with the enumeration value.
unsigned getVmcntBitMask(const IsaVersion &Version)
void decodeWaitcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned &Vmcnt, unsigned &Expcnt, unsigned &Lgkmcnt)
Decodes Vmcnt, Expcnt and Lgkmcnt from given Waitcnt for given isa Version, and writes decoded values...