LCOV - code coverage report
Current view: top level - lib/Target/AMDGPU/Utils - AMDGPUBaseInfo.cpp (source / functions) Hit Total Coverage
Test: llvm-toolchain.info Lines: 350 369 94.9 %
Date: 2017-09-14 15:23:50 Functions: 71 76 93.4 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : //===- AMDGPUBaseInfo.cpp - AMDGPU Base encoding information --------------===//
       2             : //
       3             : //                     The LLVM Compiler Infrastructure
       4             : //
       5             : // This file is distributed under the University of Illinois Open Source
       6             : // License. See LICENSE.TXT for details.
       7             : //
       8             : //===----------------------------------------------------------------------===//
       9             : 
      10             : #include "AMDGPUBaseInfo.h"
      11             : #include "AMDGPU.h"
      12             : #include "SIDefines.h"
      13             : #include "llvm/ADT/StringRef.h"
      14             : #include "llvm/ADT/Triple.h"
      15             : #include "llvm/BinaryFormat/ELF.h"
      16             : #include "llvm/CodeGen/MachineMemOperand.h"
      17             : #include "llvm/IR/Attributes.h"
      18             : #include "llvm/IR/Constants.h"
      19             : #include "llvm/IR/Function.h"
      20             : #include "llvm/IR/GlobalValue.h"
      21             : #include "llvm/IR/Instruction.h"
      22             : #include "llvm/IR/LLVMContext.h"
      23             : #include "llvm/IR/Module.h"
      24             : #include "llvm/MC/MCContext.h"
      25             : #include "llvm/MC/MCInstrDesc.h"
      26             : #include "llvm/MC/MCRegisterInfo.h"
      27             : #include "llvm/MC/MCSectionELF.h"
      28             : #include "llvm/MC/MCSubtargetInfo.h"
      29             : #include "llvm/MC/SubtargetFeature.h"
      30             : #include "llvm/Support/Casting.h"
      31             : #include "llvm/Support/ErrorHandling.h"
      32             : #include "llvm/Support/MathExtras.h"
      33             : #include <algorithm>
      34             : #include <cassert>
      35             : #include <cstdint>
      36             : #include <cstring>
      37             : #include <utility>
      38             : 
      39             : #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
      40             : 
      41             : #define GET_INSTRINFO_NAMED_OPS
      42             : #include "AMDGPUGenInstrInfo.inc"
      43             : #undef GET_INSTRINFO_NAMED_OPS
      44             : 
      45             : namespace {
      46             : 
      47             : /// \returns Bit mask for given bit \p Shift and bit \p Width.
      48             : unsigned getBitMask(unsigned Shift, unsigned Width) {
      49             :   return ((1 << Width) - 1) << Shift;
      50             : }
      51             : 
      52             : /// \brief Packs \p Src into \p Dst for given bit \p Shift and bit \p Width.
      53             : ///
      54             : /// \returns Packed \p Dst.
      55             : unsigned packBits(unsigned Src, unsigned Dst, unsigned Shift, unsigned Width) {
      56      151645 :   Dst &= ~(1 << Shift) & ~getBitMask(Shift, Width);
      57      151645 :   Dst |= (Src << Shift) & getBitMask(Shift, Width);
      58             :   return Dst;
      59             : }
      60             : 
      61             : /// \brief Unpacks bits from \p Src for given bit \p Shift and bit \p Width.
      62             : ///
      63             : /// \returns Unpacked bits.
      64             : unsigned unpackBits(unsigned Src, unsigned Shift, unsigned Width) {
      65      115711 :   return (Src & getBitMask(Shift, Width)) >> Shift;
      66             : }
      67             : 
      68             : /// \returns Vmcnt bit shift (lower bits).
      69             : unsigned getVmcntBitShiftLo() { return 0; }
      70             : 
      71             : /// \returns Vmcnt bit width (lower bits).
      72             : unsigned getVmcntBitWidthLo() { return 4; }
      73             : 
      74             : /// \returns Expcnt bit shift.
      75             : unsigned getExpcntBitShift() { return 4; }
      76             : 
      77             : /// \returns Expcnt bit width.
      78             : unsigned getExpcntBitWidth() { return 3; }
      79             : 
      80             : /// \returns Lgkmcnt bit shift.
      81             : unsigned getLgkmcntBitShift() { return 8; }
      82             : 
      83             : /// \returns Lgkmcnt bit width.
      84             : unsigned getLgkmcntBitWidth() { return 4; }
      85             : 
      86             : /// \returns Vmcnt bit shift (higher bits).
      87             : unsigned getVmcntBitShiftHi() { return 14; }
      88             : 
      89             : /// \returns Vmcnt bit width (higher bits).
      90             : unsigned getVmcntBitWidthHi() { return 2; }
      91             : 
      92             : } // end namespace anonymous
      93             : 
      94             : namespace llvm {
      95             : 
      96       72306 : static cl::opt<bool> EnablePackedInlinableLiterals(
      97             :     "enable-packed-inlinable-literals",
      98      216918 :     cl::desc("Enable packed inlinable literals (v2f16, v2i16)"),
      99      289224 :     cl::init(false));
     100             : 
     101             : namespace AMDGPU {
     102             : 
     103             : namespace IsaInfo {
     104             : 
     105     1054326 : IsaVersion getIsaVersion(const FeatureBitset &Features) {
     106             :   // SI.
     107     2108652 :   if (Features.test(FeatureISAVersion6_0_0))
     108       96822 :     return {6, 0, 0};
     109     1915008 :   if (Features.test(FeatureISAVersion6_0_1))
     110       66519 :     return {6, 0, 1};
     111             :   // CI.
     112     1781970 :   if (Features.test(FeatureISAVersion7_0_0))
     113      117818 :     return {7, 0, 0};
     114     1546334 :   if (Features.test(FeatureISAVersion7_0_1))
     115       22514 :     return {7, 0, 1};
     116     1501306 :   if (Features.test(FeatureISAVersion7_0_2))
     117           1 :     return {7, 0, 2};
     118     1501304 :   if (Features.test(FeatureISAVersion7_0_3))
     119         448 :     return {7, 0, 3};
     120             : 
     121             :   // VI.
     122     1500408 :   if (Features.test(FeatureISAVersion8_0_0))
     123        6210 :     return {8, 0, 0};
     124     1487988 :   if (Features.test(FeatureISAVersion8_0_1))
     125        2480 :     return {8, 0, 1};
     126     1483028 :   if (Features.test(FeatureISAVersion8_0_2))
     127      232340 :     return {8, 0, 2};
     128     1018348 :   if (Features.test(FeatureISAVersion8_0_3))
     129      116658 :     return {8, 0, 3};
     130      785032 :   if (Features.test(FeatureISAVersion8_0_4))
     131         788 :     return {8, 0, 4};
     132      783456 :   if (Features.test(FeatureISAVersion8_1_0))
     133         870 :     return {8, 1, 0};
     134             : 
     135             :   // GFX9.
     136      781716 :   if (Features.test(FeatureISAVersion9_0_0))
     137       76179 :     return {9, 0, 0};
     138      629358 :   if (Features.test(FeatureISAVersion9_0_1))
     139       46366 :     return {9, 0, 1};
     140      536626 :   if (Features.test(FeatureISAVersion9_0_2))
     141           1 :     return {9, 0, 2};
     142      536624 :   if (Features.test(FeatureISAVersion9_0_3))
     143           1 :     return {9, 0, 3};
     144             : 
     145      804681 :   if (!Features.test(FeatureGCN) || Features.test(FeatureSouthernIslands))
     146         462 :     return {0, 0, 0};
     147      267849 :   return {7, 0, 0};
     148             : }
     149             : 
     150      330090 : unsigned getWavefrontSize(const FeatureBitset &Features) {
     151      660180 :   if (Features.test(FeatureWavefrontSize16))
     152             :     return 16;
     153      660180 :   if (Features.test(FeatureWavefrontSize32))
     154             :     return 32;
     155             : 
     156      330084 :   return 64;
     157             : }
     158             : 
     159           0 : unsigned getLocalMemorySize(const FeatureBitset &Features) {
     160           0 :   if (Features.test(FeatureLocalMemorySize32768))
     161             :     return 32768;
     162           0 :   if (Features.test(FeatureLocalMemorySize65536))
     163             :     return 65536;
     164             : 
     165           0 :   return 0;
     166             : }
     167             : 
     168       60358 : unsigned getEUsPerCU(const FeatureBitset &Features) {
     169       60358 :   return 4;
     170             : }
     171             : 
     172      138453 : unsigned getMaxWorkGroupsPerCU(const FeatureBitset &Features,
     173             :                                unsigned FlatWorkGroupSize) {
     174      276906 :   if (!Features.test(FeatureGCN))
     175             :     return 8;
     176      134866 :   unsigned N = getWavesPerWorkGroup(Features, FlatWorkGroupSize);
     177      134866 :   if (N == 1)
     178             :     return 40;
     179      128936 :   N = 40 / N;
     180      257872 :   return std::min(N, 16u);
     181             : }
     182             : 
     183           0 : unsigned getMaxWavesPerCU(const FeatureBitset &Features) {
     184           0 :   return getMaxWavesPerEU(Features) * getEUsPerCU(Features);
     185             : }
     186             : 
     187       30179 : unsigned getMaxWavesPerCU(const FeatureBitset &Features,
     188             :                           unsigned FlatWorkGroupSize) {
     189       30179 :   return getWavesPerWorkGroup(Features, FlatWorkGroupSize);
     190             : }
     191             : 
     192       30179 : unsigned getMinWavesPerEU(const FeatureBitset &Features) {
     193       30179 :   return 1;
     194             : }
     195             : 
     196      272443 : unsigned getMaxWavesPerEU(const FeatureBitset &Features) {
     197      544886 :   if (!Features.test(FeatureGCN))
     198             :     return 8;
     199             :   // FIXME: Need to take scratch memory into account.
     200      261641 :   return 10;
     201             : }
     202             : 
     203       30179 : unsigned getMaxWavesPerEU(const FeatureBitset &Features,
     204             :                           unsigned FlatWorkGroupSize) {
     205       60358 :   return alignTo(getMaxWavesPerCU(Features, FlatWorkGroupSize),
     206       60358 :                  getEUsPerCU(Features)) / getEUsPerCU(Features);
     207             : }
     208             : 
     209      187391 : unsigned getMinFlatWorkGroupSize(const FeatureBitset &Features) {
     210      187391 :   return 1;
     211             : }
     212             : 
     213      187391 : unsigned getMaxFlatWorkGroupSize(const FeatureBitset &Features) {
     214      187391 :   return 2048;
     215             : }
     216             : 
     217      165045 : unsigned getWavesPerWorkGroup(const FeatureBitset &Features,
     218             :                               unsigned FlatWorkGroupSize) {
     219      495135 :   return alignTo(FlatWorkGroupSize, getWavefrontSize(Features)) /
     220      330090 :                  getWavefrontSize(Features);
     221             : }
     222             : 
     223      259302 : unsigned getSGPRAllocGranule(const FeatureBitset &Features) {
     224      259302 :   IsaVersion Version = getIsaVersion(Features);
     225      259302 :   if (Version.Major >= 8)
     226             :     return 16;
     227      130826 :   return 8;
     228             : }
     229             : 
     230       28336 : unsigned getSGPREncodingGranule(const FeatureBitset &Features) {
     231       28336 :   return 8;
     232             : }
     233             : 
     234      259302 : unsigned getTotalNumSGPRs(const FeatureBitset &Features) {
     235      259302 :   IsaVersion Version = getIsaVersion(Features);
     236      259302 :   if (Version.Major >= 8)
     237             :     return 800;
     238      130826 :   return 512;
     239             : }
     240             : 
     241      288302 : unsigned getAddressableNumSGPRs(const FeatureBitset &Features) {
     242      576604 :   if (Features.test(FeatureSGPRInitBug))
     243             :     return FIXED_NUM_SGPRS_FOR_INIT_BUG;
     244             : 
     245      207166 :   IsaVersion Version = getIsaVersion(Features);
     246      207166 :   if (Version.Major >= 8)
     247             :     return 102;
     248      145327 :   return 104;
     249             : }
     250             : 
     251       14208 : unsigned getMinNumSGPRs(const FeatureBitset &Features, unsigned WavesPerEU) {
     252             :   assert(WavesPerEU != 0);
     253             : 
     254       14208 :   if (WavesPerEU >= getMaxWavesPerEU(Features))
     255             :     return 0;
     256             :   unsigned MinNumSGPRs =
     257         230 :       alignDown(getTotalNumSGPRs(Features) / (WavesPerEU + 1),
     258         230 :                 getSGPRAllocGranule(Features)) + 1;
     259         230 :   return std::min(MinNumSGPRs, getAddressableNumSGPRs(Features));
     260             : }
     261             : 
     262      259187 : unsigned getMaxNumSGPRs(const FeatureBitset &Features, unsigned WavesPerEU,
     263             :                         bool Addressable) {
     264             :   assert(WavesPerEU != 0);
     265             : 
     266      259187 :   IsaVersion Version = getIsaVersion(Features);
     267      518374 :   unsigned MaxNumSGPRs = alignDown(getTotalNumSGPRs(Features) / WavesPerEU,
     268      518374 :                                    getSGPRAllocGranule(Features));
     269      259187 :   unsigned AddressableNumSGPRs = getAddressableNumSGPRs(Features);
     270      259187 :   if (Version.Major >= 8 && !Addressable)
     271       52690 :     AddressableNumSGPRs = 112;
     272      259187 :   return std::min(MaxNumSGPRs, AddressableNumSGPRs);
     273             : }
     274             : 
     275      151414 : unsigned getVGPRAllocGranule(const FeatureBitset &Features) {
     276      151414 :   return 4;
     277             : }
     278             : 
     279       28336 : unsigned getVGPREncodingGranule(const FeatureBitset &Features) {
     280       28336 :   return getVGPRAllocGranule(Features);
     281             : }
     282             : 
     283      260988 : unsigned getTotalNumVGPRs(const FeatureBitset &Features) {
     284      260988 :   return 256;
     285             : }
     286             : 
     287      137910 : unsigned getAddressableNumVGPRs(const FeatureBitset &Features) {
     288      137910 :   return getTotalNumVGPRs(Features);
     289             : }
     290             : 
     291       14174 : unsigned getMinNumVGPRs(const FeatureBitset &Features, unsigned WavesPerEU) {
     292             :   assert(WavesPerEU != 0);
     293             : 
     294       14174 :   if (WavesPerEU >= getMaxWavesPerEU(Features))
     295             :     return 0;
     296             :   unsigned MinNumVGPRs =
     297         230 :       alignDown(getTotalNumVGPRs(Features) / (WavesPerEU + 1),
     298         230 :                 getVGPRAllocGranule(Features)) + 1;
     299         230 :   return std::min(MinNumVGPRs, getAddressableNumVGPRs(Features));
     300             : }
     301             : 
     302      122963 : unsigned getMaxNumVGPRs(const FeatureBitset &Features, unsigned WavesPerEU) {
     303             :   assert(WavesPerEU != 0);
     304             : 
     305      245926 :   unsigned MaxNumVGPRs = alignDown(getTotalNumVGPRs(Features) / WavesPerEU,
     306      245926 :                                    getVGPRAllocGranule(Features));
     307      122963 :   unsigned AddressableNumVGPRs = getAddressableNumVGPRs(Features);
     308      122963 :   return std::min(MaxNumVGPRs, AddressableNumVGPRs);
     309             : }
     310             : 
     311             : } // end namespace IsaInfo
     312             : 
     313        1757 : void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header,
     314             :                                const FeatureBitset &Features) {
     315        1757 :   IsaInfo::IsaVersion ISA = IsaInfo::getIsaVersion(Features);
     316             : 
     317        1757 :   memset(&Header, 0, sizeof(Header));
     318             : 
     319        1757 :   Header.amd_kernel_code_version_major = 1;
     320        1757 :   Header.amd_kernel_code_version_minor = 1;
     321        1757 :   Header.amd_machine_kind = 1; // AMD_MACHINE_KIND_AMDGPU
     322        1757 :   Header.amd_machine_version_major = ISA.Major;
     323        1757 :   Header.amd_machine_version_minor = ISA.Minor;
     324        1757 :   Header.amd_machine_version_stepping = ISA.Stepping;
     325        1757 :   Header.kernel_code_entry_byte_offset = sizeof(Header);
     326             :   // wavefront_size is specified as a power of 2: 2^6 = 64 threads.
     327        1757 :   Header.wavefront_size = 6;
     328             : 
     329             :   // If the code object does not support indirect functions, then the value must
     330             :   // be 0xffffffff.
     331        1757 :   Header.call_convention = -1;
     332             : 
     333             :   // These alignment values are specified in powers of two, so alignment =
     334             :   // 2^n.  The minimum alignment is 2^4 = 16.
     335        1757 :   Header.kernarg_segment_alignment = 4;
     336        1757 :   Header.group_segment_alignment = 4;
     337        1757 :   Header.private_segment_alignment = 4;
     338        1757 : }
     339             : 
     340         276 : bool isGroupSegment(const GlobalValue *GV, AMDGPUAS AS) {
     341         552 :   return GV->getType()->getAddressSpace() == AS.LOCAL_ADDRESS;
     342             : }
     343             : 
     344           0 : bool isGlobalSegment(const GlobalValue *GV, AMDGPUAS AS) {
     345           0 :   return GV->getType()->getAddressSpace() == AS.GLOBAL_ADDRESS;
     346             : }
     347             : 
     348          37 : bool isReadOnlySegment(const GlobalValue *GV, AMDGPUAS AS) {
     349          74 :   return GV->getType()->getAddressSpace() == AS.CONSTANT_ADDRESS;
     350             : }
     351             : 
     352         116 : bool shouldEmitConstantsToTextSection(const Triple &TT) {
     353         116 :   return TT.getOS() != Triple::AMDHSA;
     354             : }
     355             : 
     356      187880 : int getIntegerAttribute(const Function &F, StringRef Name, int Default) {
     357      187880 :   Attribute A = F.getFnAttribute(Name);
     358      187880 :   int Result = Default;
     359             : 
     360      187880 :   if (A.isStringAttribute()) {
     361         154 :     StringRef Str = A.getValueAsString();
     362         154 :     if (Str.getAsInteger(0, Result)) {
     363          18 :       LLVMContext &Ctx = F.getContext();
     364          18 :       Ctx.emitError("can't parse integer attribute " + Name);
     365             :     }
     366             :   }
     367             : 
     368      187880 :   return Result;
     369             : }
     370             : 
     371      217570 : std::pair<int, int> getIntegerPairAttribute(const Function &F,
     372             :                                             StringRef Name,
     373             :                                             std::pair<int, int> Default,
     374             :                                             bool OnlyFirstRequired) {
     375      217570 :   Attribute A = F.getFnAttribute(Name);
     376      217570 :   if (!A.isStringAttribute())
     377      216571 :     return Default;
     378             : 
     379         999 :   LLVMContext &Ctx = F.getContext();
     380         999 :   std::pair<int, int> Ints = Default;
     381         999 :   std::pair<StringRef, StringRef> Strs = A.getValueAsString().split(',');
     382        2997 :   if (Strs.first.trim().getAsInteger(0, Ints.first)) {
     383          20 :     Ctx.emitError("can't parse first integer attribute " + Name);
     384          20 :     return Default;
     385             :   }
     386        2901 :   if (Strs.second.trim().getAsInteger(0, Ints.second)) {
     387          42 :     if (!OnlyFirstRequired || !Strs.second.trim().empty()) {
     388          30 :       Ctx.emitError("can't parse second integer attribute " + Name);
     389          30 :       return Default;
     390             :     }
     391             :   }
     392             : 
     393         949 :   return Ints;
     394             : }
     395             : 
     396       52182 : unsigned getVmcntBitMask(const IsaInfo::IsaVersion &Version) {
     397       52182 :   unsigned VmcntLo = (1 << getVmcntBitWidthLo()) - 1;
     398       52182 :   if (Version.Major < 9)
     399             :     return VmcntLo;
     400             : 
     401        5141 :   unsigned VmcntHi = ((1 << getVmcntBitWidthHi()) - 1) << getVmcntBitWidthLo();
     402        5141 :   return VmcntLo | VmcntHi;
     403             : }
     404             : 
     405       67014 : unsigned getExpcntBitMask(const IsaInfo::IsaVersion &Version) {
     406       67014 :   return (1 << getExpcntBitWidth()) - 1;
     407             : }
     408             : 
     409       67014 : unsigned getLgkmcntBitMask(const IsaInfo::IsaVersion &Version) {
     410       67014 :   return (1 << getLgkmcntBitWidth()) - 1;
     411             : }
     412             : 
     413       49151 : unsigned getWaitcntBitMask(const IsaInfo::IsaVersion &Version) {
     414       49151 :   unsigned VmcntLo = getBitMask(getVmcntBitShiftLo(), getVmcntBitWidthLo());
     415       49151 :   unsigned Expcnt = getBitMask(getExpcntBitShift(), getExpcntBitWidth());
     416       49151 :   unsigned Lgkmcnt = getBitMask(getLgkmcntBitShift(), getLgkmcntBitWidth());
     417       49151 :   unsigned Waitcnt = VmcntLo | Expcnt | Lgkmcnt;
     418       49151 :   if (Version.Major < 9)
     419             :     return Waitcnt;
     420             : 
     421        4387 :   unsigned VmcntHi = getBitMask(getVmcntBitShiftHi(), getVmcntBitWidthHi());
     422        4387 :   return Waitcnt | VmcntHi;
     423             : }
     424             : 
     425       37401 : unsigned decodeVmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt) {
     426             :   unsigned VmcntLo =
     427       74802 :       unpackBits(Waitcnt, getVmcntBitShiftLo(), getVmcntBitWidthLo());
     428       37401 :   if (Version.Major < 9)
     429             :     return VmcntLo;
     430             : 
     431             :   unsigned VmcntHi =
     432        7064 :       unpackBits(Waitcnt, getVmcntBitShiftHi(), getVmcntBitWidthHi());
     433        3532 :   VmcntHi <<= getVmcntBitWidthLo();
     434        3532 :   return VmcntLo | VmcntHi;
     435             : }
     436             : 
     437       37384 : unsigned decodeExpcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt) {
     438       74768 :   return unpackBits(Waitcnt, getExpcntBitShift(), getExpcntBitWidth());
     439             : }
     440             : 
     441       37394 : unsigned decodeLgkmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt) {
     442       74788 :   return unpackBits(Waitcnt, getLgkmcntBitShift(), getLgkmcntBitWidth());
     443             : }
     444             : 
     445       37102 : void decodeWaitcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt,
     446             :                    unsigned &Vmcnt, unsigned &Expcnt, unsigned &Lgkmcnt) {
     447       37102 :   Vmcnt = decodeVmcnt(Version, Waitcnt);
     448       37102 :   Expcnt = decodeExpcnt(Version, Waitcnt);
     449       37102 :   Lgkmcnt = decodeLgkmcnt(Version, Waitcnt);
     450       37102 : }
     451             : 
     452       49096 : unsigned encodeVmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt,
     453             :                      unsigned Vmcnt) {
     454       49096 :   Waitcnt =
     455       49096 :       packBits(Vmcnt, Waitcnt, getVmcntBitShiftLo(), getVmcntBitWidthLo());
     456       49096 :   if (Version.Major < 9)
     457             :     return Waitcnt;
     458             : 
     459        4381 :   Vmcnt >>= getVmcntBitWidthLo();
     460        8762 :   return packBits(Vmcnt, Waitcnt, getVmcntBitShiftHi(), getVmcntBitWidthHi());
     461             : }
     462             : 
     463       49079 : unsigned encodeExpcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt,
     464             :                       unsigned Expcnt) {
     465       98158 :   return packBits(Expcnt, Waitcnt, getExpcntBitShift(), getExpcntBitWidth());
     466             : }
     467             : 
     468       49089 : unsigned encodeLgkmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt,
     469             :                        unsigned Lgkmcnt) {
     470       98178 :   return packBits(Lgkmcnt, Waitcnt, getLgkmcntBitShift(), getLgkmcntBitWidth());
     471             : }
     472             : 
     473       49027 : unsigned encodeWaitcnt(const IsaInfo::IsaVersion &Version,
     474             :                        unsigned Vmcnt, unsigned Expcnt, unsigned Lgkmcnt) {
     475       49027 :   unsigned Waitcnt = getWaitcntBitMask(Version);
     476       49027 :   Waitcnt = encodeVmcnt(Version, Waitcnt, Vmcnt);
     477       49027 :   Waitcnt = encodeExpcnt(Version, Waitcnt, Expcnt);
     478       49027 :   Waitcnt = encodeLgkmcnt(Version, Waitcnt, Lgkmcnt);
     479       49027 :   return Waitcnt;
     480             : }
     481             : 
     482         443 : unsigned getInitialPSInputAddr(const Function &F) {
     483         443 :   return getIntegerAttribute(F, "InitialPSInputAddr", 0);
     484             : }
     485             : 
     486      626306 : bool isShader(CallingConv::ID cc) {
     487             :   switch(cc) {
     488             :     case CallingConv::AMDGPU_VS:
     489             :     case CallingConv::AMDGPU_HS:
     490             :     case CallingConv::AMDGPU_GS:
     491             :     case CallingConv::AMDGPU_PS:
     492             :     case CallingConv::AMDGPU_CS:
     493             :       return true;
     494      599916 :     default:
     495      599916 :       return false;
     496             :   }
     497             : }
     498             : 
     499      268456 : bool isCompute(CallingConv::ID cc) {
     500      268456 :   return !isShader(cc) || cc == CallingConv::AMDGPU_CS;
     501             : }
     502             : 
     503       62079 : bool isEntryFunctionCC(CallingConv::ID CC) {
     504             :   switch (CC) {
     505             :   case CallingConv::AMDGPU_KERNEL:
     506             :   case CallingConv::SPIR_KERNEL:
     507             :   case CallingConv::AMDGPU_VS:
     508             :   case CallingConv::AMDGPU_GS:
     509             :   case CallingConv::AMDGPU_PS:
     510             :   case CallingConv::AMDGPU_CS:
     511             :   case CallingConv::AMDGPU_HS:
     512             :     return true;
     513        3345 :   default:
     514        3345 :     return false;
     515             :   }
     516             : }
     517             : 
     518      227716 : bool isSI(const MCSubtargetInfo &STI) {
     519      455432 :   return STI.getFeatureBits()[AMDGPU::FeatureSouthernIslands];
     520             : }
     521             : 
     522      328742 : bool isCI(const MCSubtargetInfo &STI) {
     523      657484 :   return STI.getFeatureBits()[AMDGPU::FeatureSeaIslands];
     524             : }
     525             : 
     526       39292 : bool isVI(const MCSubtargetInfo &STI) {
     527       78584 :   return STI.getFeatureBits()[AMDGPU::FeatureVolcanicIslands];
     528             : }
     529             : 
     530       21488 : bool isGFX9(const MCSubtargetInfo &STI) {
     531       42976 :   return STI.getFeatureBits()[AMDGPU::FeatureGFX9];
     532             : }
     533             : 
     534       82056 : bool isGCN3Encoding(const MCSubtargetInfo &STI) {
     535      164112 :   return STI.getFeatureBits()[AMDGPU::FeatureGCN3Encoding];
     536             : }
     537             : 
     538      180242 : bool isSGPR(unsigned Reg, const MCRegisterInfo* TRI) {
     539      180242 :   const MCRegisterClass SGPRClass = TRI->getRegClass(AMDGPU::SReg_32RegClassID);
     540      180242 :   const unsigned FirstSubReg = TRI->getSubReg(Reg, 1);
     541      353984 :   return SGPRClass.contains(FirstSubReg != 0 ? FirstSubReg : Reg) ||
     542      180242 :     Reg == AMDGPU::SCC;
     543             : }
     544             : 
     545        1084 : bool isRegIntersect(unsigned Reg0, unsigned Reg1, const MCRegisterInfo* TRI) {
     546      101280 :   for (MCRegAliasIterator R(Reg0, TRI, true); R.isValid(); ++R) {
     547       49590 :     if (*R == Reg1) return true;
     548             :   }
     549        1050 :   return false;
     550             : }
     551             : 
     552     1016467 : unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI) {
     553             : 
     554     1016467 :   switch(Reg) {
     555             :   default: break;
     556        3048 :   case AMDGPU::FLAT_SCR:
     557             :     assert(!isSI(STI));
     558        3048 :     return isCI(STI) ? AMDGPU::FLAT_SCR_ci : AMDGPU::FLAT_SCR_vi;
     559             : 
     560        5732 :   case AMDGPU::FLAT_SCR_LO:
     561             :     assert(!isSI(STI));
     562        5732 :     return isCI(STI) ? AMDGPU::FLAT_SCR_LO_ci : AMDGPU::FLAT_SCR_LO_vi;
     563             : 
     564        5722 :   case AMDGPU::FLAT_SCR_HI:
     565             :     assert(!isSI(STI));
     566        5722 :     return isCI(STI) ? AMDGPU::FLAT_SCR_HI_ci : AMDGPU::FLAT_SCR_HI_vi;
     567             :   }
     568             :   return Reg;
     569             : }
     570             : 
     571      219511 : unsigned mc2PseudoReg(unsigned Reg) {
     572             :   switch (Reg) {
     573             :   case AMDGPU::FLAT_SCR_ci:
     574             :   case AMDGPU::FLAT_SCR_vi:
     575             :     return FLAT_SCR;
     576             : 
     577        5453 :   case AMDGPU::FLAT_SCR_LO_ci:
     578             :   case AMDGPU::FLAT_SCR_LO_vi:
     579        5453 :     return AMDGPU::FLAT_SCR_LO;
     580             : 
     581        5435 :   case AMDGPU::FLAT_SCR_HI_ci:
     582             :   case AMDGPU::FLAT_SCR_HI_vi:
     583        5435 :     return AMDGPU::FLAT_SCR_HI;
     584             : 
     585      206868 :   default:
     586      206868 :     return Reg;
     587             :   }
     588             : }
     589             : 
     590      666177 : bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo) {
     591             :   assert(OpNo < Desc.NumOperands);
     592      666177 :   unsigned OpType = Desc.OpInfo[OpNo].OperandType;
     593      666177 :   return OpType >= AMDGPU::OPERAND_SRC_FIRST &&
     594      666177 :          OpType <= AMDGPU::OPERAND_SRC_LAST;
     595             : }
     596             : 
     597          62 : bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo) {
     598             :   assert(OpNo < Desc.NumOperands);
     599          62 :   unsigned OpType = Desc.OpInfo[OpNo].OperandType;
     600             :   switch (OpType) {
     601             :   case AMDGPU::OPERAND_REG_IMM_FP32:
     602             :   case AMDGPU::OPERAND_REG_IMM_FP64:
     603             :   case AMDGPU::OPERAND_REG_IMM_FP16:
     604             :   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
     605             :   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
     606             :   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
     607             :   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
     608             :     return true;
     609           0 :   default:
     610           0 :     return false;
     611             :   }
     612             : }
     613             : 
     614           0 : bool isSISrcInlinableOperand(const MCInstrDesc &Desc, unsigned OpNo) {
     615             :   assert(OpNo < Desc.NumOperands);
     616           0 :   unsigned OpType = Desc.OpInfo[OpNo].OperandType;
     617           0 :   return OpType >= AMDGPU::OPERAND_REG_INLINE_C_FIRST &&
     618           0 :          OpType <= AMDGPU::OPERAND_REG_INLINE_C_LAST;
     619             : }
     620             : 
     621             : // Avoid using MCRegisterClass::getSize, since that function will go away
     622             : // (move from MC* level to Target* level). Return size in bits.
     623       42842 : unsigned getRegBitWidth(unsigned RCID) {
     624       42842 :   switch (RCID) {
     625             :   case AMDGPU::SGPR_32RegClassID:
     626             :   case AMDGPU::VGPR_32RegClassID:
     627             :   case AMDGPU::VS_32RegClassID:
     628             :   case AMDGPU::SReg_32RegClassID:
     629             :   case AMDGPU::SReg_32_XM0RegClassID:
     630             :     return 32;
     631       10649 :   case AMDGPU::SGPR_64RegClassID:
     632             :   case AMDGPU::VS_64RegClassID:
     633             :   case AMDGPU::SReg_64RegClassID:
     634             :   case AMDGPU::VReg_64RegClassID:
     635       10649 :     return 64;
     636          12 :   case AMDGPU::VReg_96RegClassID:
     637          12 :     return 96;
     638       19488 :   case AMDGPU::SGPR_128RegClassID:
     639             :   case AMDGPU::SReg_128RegClassID:
     640             :   case AMDGPU::VReg_128RegClassID:
     641       19488 :     return 128;
     642          63 :   case AMDGPU::SReg_256RegClassID:
     643             :   case AMDGPU::VReg_256RegClassID:
     644          63 :     return 256;
     645          22 :   case AMDGPU::SReg_512RegClassID:
     646             :   case AMDGPU::VReg_512RegClassID:
     647          22 :     return 512;
     648           0 :   default:
     649           0 :     llvm_unreachable("Unexpected register class");
     650             :   }
     651             : }
     652             : 
     653        5420 : unsigned getRegBitWidth(const MCRegisterClass &RC) {
     654       10840 :   return getRegBitWidth(RC.getID());
     655             : }
     656             : 
     657           0 : unsigned getRegOperandSize(const MCRegisterInfo *MRI, const MCInstrDesc &Desc,
     658             :                            unsigned OpNo) {
     659             :   assert(OpNo < Desc.NumOperands);
     660           0 :   unsigned RCID = Desc.OpInfo[OpNo].RegClass;
     661           0 :   return getRegBitWidth(MRI->getRegClass(RCID)) / 8;
     662             : }
     663             : 
     664       61331 : bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi) {
     665       61331 :   if (Literal >= -16 && Literal <= 64)
     666             :     return true;
     667             : 
     668       30228 :   uint64_t Val = static_cast<uint64_t>(Literal);
     669       60456 :   return (Val == DoubleToBits(0.0)) ||
     670       49048 :          (Val == DoubleToBits(1.0)) ||
     671       36448 :          (Val == DoubleToBits(-1.0)) ||
     672       31280 :          (Val == DoubleToBits(0.5)) ||
     673       27130 :          (Val == DoubleToBits(-0.5)) ||
     674       23822 :          (Val == DoubleToBits(2.0)) ||
     675       20514 :          (Val == DoubleToBits(-2.0)) ||
     676       18886 :          (Val == DoubleToBits(4.0)) ||
     677       43823 :          (Val == DoubleToBits(-4.0)) ||
     678        4879 :          (Val == 0x3fc45f306dc9c882 && HasInv2Pi);
     679             : }
     680             : 
     681     3586023 : bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi) {
     682     3586023 :   if (Literal >= -16 && Literal <= 64)
     683             :     return true;
     684             : 
     685             :   // The actual type of the operand does not seem to matter as long
     686             :   // as the bits match one of the inline immediate values.  For example:
     687             :   //
     688             :   // -nan has the hexadecimal encoding of 0xfffffffe which is -2 in decimal,
     689             :   // so it is a legal inline immediate.
     690             :   //
     691             :   // 1065353216 has the hexadecimal encoding 0x3f800000 which is 1.0f in
     692             :   // floating-point, so it is a legal inline immediate.
     693             : 
     694      316948 :   uint32_t Val = static_cast<uint32_t>(Literal);
     695      633896 :   return (Val == FloatToBits(0.0f)) ||
     696      592114 :          (Val == FloatToBits(1.0f)) ||
     697      545038 :          (Val == FloatToBits(-1.0f)) ||
     698      527606 :          (Val == FloatToBits(0.5f)) ||
     699      514198 :          (Val == FloatToBits(-0.5f)) ||
     700      494436 :          (Val == FloatToBits(2.0f)) ||
     701      472263 :          (Val == FloatToBits(-2.0f)) ||
     702      454445 :          (Val == FloatToBits(4.0f)) ||
     703      748021 :          (Val == FloatToBits(-4.0f)) ||
     704      210919 :          (Val == 0x3e22f983 && HasInv2Pi);
     705             : }
     706             : 
     707      125360 : bool isInlinableLiteral16(int16_t Literal, bool HasInv2Pi) {
     708      125360 :   if (!HasInv2Pi)
     709             :     return false;
     710             : 
     711      125160 :   if (Literal >= -16 && Literal <= 64)
     712             :     return true;
     713             : 
     714       18170 :   uint16_t Val = static_cast<uint16_t>(Literal);
     715       36340 :   return Val == 0x3C00 || // 1.0
     716       18170 :          Val == 0xBC00 || // -1.0
     717       29942 :          Val == 0x3800 || // 0.5
     718       14971 :          Val == 0xB800 || // -0.5
     719       24982 :          Val == 0x4000 || // 2.0
     720       12491 :          Val == 0xC000 || // -2.0
     721       10780 :          Val == 0x4400 || // 4.0
     722       25873 :          Val == 0xC400 || // -4.0
     723             :          Val == 0x3118;   // 1/2pi
     724             : }
     725             : 
     726        3282 : bool isInlinableLiteralV216(int32_t Literal, bool HasInv2Pi) {
     727             :   assert(HasInv2Pi);
     728             : 
     729        3282 :   if (!EnablePackedInlinableLiterals)
     730             :     return false;
     731             : 
     732        3038 :   int16_t Lo16 = static_cast<int16_t>(Literal);
     733        3038 :   int16_t Hi16 = static_cast<int16_t>(Literal >> 16);
     734        3038 :   return Lo16 == Hi16 && isInlinableLiteral16(Lo16, HasInv2Pi);
     735             : }
     736             : 
     737        2327 : bool isArgPassedInSGPR(const Argument *A) {
     738        2327 :   const Function *F = A->getParent();
     739             : 
     740             :   // Arguments to compute shaders are never a source of divergence.
     741        2327 :   CallingConv::ID CC = F->getCallingConv();
     742             :   switch (CC) {
     743             :   case CallingConv::AMDGPU_KERNEL:
     744             :   case CallingConv::SPIR_KERNEL:
     745             :     return true;
     746         150 :   case CallingConv::AMDGPU_VS:
     747             :   case CallingConv::AMDGPU_HS:
     748             :   case CallingConv::AMDGPU_GS:
     749             :   case CallingConv::AMDGPU_PS:
     750             :   case CallingConv::AMDGPU_CS:
     751             :     // For non-compute shaders, SGPR inputs are marked with either inreg or byval.
     752             :     // Everything else is in VGPRs.
     753         300 :     return F->getAttributes().hasParamAttribute(A->getArgNo(), Attribute::InReg) ||
     754         150 :            F->getAttributes().hasParamAttribute(A->getArgNo(), Attribute::ByVal);
     755          84 :   default:
     756             :     // TODO: Should calls support inreg for SGPR inputs?
     757          84 :     return false;
     758             :   }
     759             : }
     760             : 
     761             : // TODO: Should largely merge with AMDGPUTTIImpl::isSourceOfDivergence.
     762      114980 : bool isUniformMMO(const MachineMemOperand *MMO) {
     763      114980 :   const Value *Ptr = MMO->getValue();
     764             :   // UndefValue means this is a load of a kernel input.  These are uniform.
     765             :   // Sometimes LDS instructions have constant pointers.
     766             :   // If Ptr is null, then that means this mem operand contains a
     767             :   // PseudoSourceValue like GOT.
     768      229960 :   if (!Ptr || isa<UndefValue>(Ptr) ||
     769      142448 :       isa<Constant>(Ptr) || isa<GlobalValue>(Ptr))
     770             :     return true;
     771             : 
     772       27430 :   if (const Argument *Arg = dyn_cast<Argument>(Ptr))
     773        2327 :     return isArgPassedInSGPR(Arg);
     774             : 
     775       22776 :   const Instruction *I = dyn_cast<Instruction>(Ptr);
     776       14521 :   return I && I->getMetadata("amdgpu.uniform");
     777             : }
     778             : 
     779       54704 : int64_t getSMRDEncodedOffset(const MCSubtargetInfo &ST, int64_t ByteOffset) {
     780       54704 :   if (isGCN3Encoding(ST))
     781             :     return ByteOffset;
     782       27360 :   return ByteOffset >> 2;
     783             : }
     784             : 
     785       27352 : bool isLegalSMRDImmOffset(const MCSubtargetInfo &ST, int64_t ByteOffset) {
     786       27352 :   int64_t EncodedOffset = getSMRDEncodedOffset(ST, ByteOffset);
     787       54704 :   return isGCN3Encoding(ST) ?
     788       54704 :     isUInt<20>(EncodedOffset) : isUInt<8>(EncodedOffset);
     789             : }
     790             : } // end namespace AMDGPU
     791             : 
     792             : } // end namespace llvm
     793             : 
     794             : const unsigned AMDGPUAS::MAX_COMMON_ADDRESS;
     795             : const unsigned AMDGPUAS::GLOBAL_ADDRESS;
     796             : const unsigned AMDGPUAS::LOCAL_ADDRESS;
     797             : const unsigned AMDGPUAS::PARAM_D_ADDRESS;
     798             : const unsigned AMDGPUAS::PARAM_I_ADDRESS;
     799             : const unsigned AMDGPUAS::CONSTANT_BUFFER_0;
     800             : const unsigned AMDGPUAS::CONSTANT_BUFFER_1;
     801             : const unsigned AMDGPUAS::CONSTANT_BUFFER_2;
     802             : const unsigned AMDGPUAS::CONSTANT_BUFFER_3;
     803             : const unsigned AMDGPUAS::CONSTANT_BUFFER_4;
     804             : const unsigned AMDGPUAS::CONSTANT_BUFFER_5;
     805             : const unsigned AMDGPUAS::CONSTANT_BUFFER_6;
     806             : const unsigned AMDGPUAS::CONSTANT_BUFFER_7;
     807             : const unsigned AMDGPUAS::CONSTANT_BUFFER_8;
     808             : const unsigned AMDGPUAS::CONSTANT_BUFFER_9;
     809             : const unsigned AMDGPUAS::CONSTANT_BUFFER_10;
     810             : const unsigned AMDGPUAS::CONSTANT_BUFFER_11;
     811             : const unsigned AMDGPUAS::CONSTANT_BUFFER_12;
     812             : const unsigned AMDGPUAS::CONSTANT_BUFFER_13;
     813             : const unsigned AMDGPUAS::CONSTANT_BUFFER_14;
     814             : const unsigned AMDGPUAS::CONSTANT_BUFFER_15;
     815             : const unsigned AMDGPUAS::UNKNOWN_ADDRESS_SPACE;
     816             : 
     817             : namespace llvm {
     818             : namespace AMDGPU {
     819             : 
     820       29154 : AMDGPUAS getAMDGPUAS(Triple T) {
     821       29154 :   auto Env = T.getEnvironmentName();
     822             :   AMDGPUAS AS;
     823       58274 :   if (Env == "amdgiz" || Env == "amdgizcl") {
     824             :     AS.FLAT_ADDRESS     = 0;
     825             :     AS.PRIVATE_ADDRESS  = 5;
     826             :     AS.REGION_ADDRESS   = 4;
     827             :   }
     828             :   else {
     829             :     AS.FLAT_ADDRESS     = 4;
     830             :     AS.PRIVATE_ADDRESS  = 0;
     831             :     AS.REGION_ADDRESS   = 5;
     832             :    }
     833       29154 :   return AS;
     834             : }
     835             : 
     836        3756 : AMDGPUAS getAMDGPUAS(const TargetMachine &M) {
     837        7512 :   return getAMDGPUAS(M.getTargetTriple());
     838             : }
     839             : 
     840       19538 : AMDGPUAS getAMDGPUAS(const Module &M) {
     841       58614 :   return getAMDGPUAS(Triple(M.getTargetTriple()));
     842             : }
     843             : } // namespace AMDGPU
     844      216918 : } // namespace llvm

Generated by: LCOV version 1.13