LLVM  6.0.0svn
AMDGPUSubtarget.h
Go to the documentation of this file.
1 //=====-- AMDGPUSubtarget.h - Define Subtarget for AMDGPU ------*- C++ -*-====//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //==-----------------------------------------------------------------------===//
9 //
10 /// \file
11 /// \brief AMDGPU specific subclass of TargetSubtarget.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUSUBTARGET_H
16 #define LLVM_LIB_TARGET_AMDGPU_AMDGPUSUBTARGET_H
17 
18 #include "AMDGPU.h"
19 #include "AMDGPUCallLowering.h"
20 #include "R600FrameLowering.h"
21 #include "R600ISelLowering.h"
22 #include "R600InstrInfo.h"
23 #include "SIFrameLowering.h"
24 #include "SIISelLowering.h"
25 #include "SIInstrInfo.h"
26 #include "SIMachineFunctionInfo.h"
27 #include "Utils/AMDGPUBaseInfo.h"
28 #include "llvm/ADT/Triple.h"
36 #include <cassert>
37 #include <cstdint>
38 #include <memory>
39 #include <utility>
40 
41 #define GET_SUBTARGETINFO_HEADER
42 #include "AMDGPUGenSubtargetInfo.inc"
43 
44 namespace llvm {
45 
46 class StringRef;
47 
49 public:
50  enum Generation {
51  R600 = 0,
59  };
60 
61  enum {
79  };
80 
84  };
85 
86  enum TrapID {
95  };
96 
99  };
100 
101 protected:
102  // Basic subtarget description.
105  unsigned IsaVersion;
106  unsigned WavefrontSize;
110 
111  // Possibly statically set by tablegen, but may want to be overridden.
114 
115  // Dynamially set bits that enable features.
119  bool DX10Clamp;
131 
132  // Used as options.
139  bool DumpCode;
140 
141  // Subtarget statically properties set by tablegen
142  bool FP64;
143  bool IsGCN;
145  bool CIInsts;
146  bool GFX9Insts;
153  bool HasMovrel;
157  bool HasSDWA;
163  bool HasDPP;
170  bool CaymanISA;
171  bool CFALUBug;
175 
176  // Dummy feature to use for assembler in tablegen.
178 
182 
183 public:
184  AMDGPUSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
185  const TargetMachine &TM);
186  ~AMDGPUSubtarget() override;
187 
189  StringRef GPU, StringRef FS);
190 
191  const AMDGPUInstrInfo *getInstrInfo() const override = 0;
192  const AMDGPUFrameLowering *getFrameLowering() const override = 0;
193  const AMDGPUTargetLowering *getTargetLowering() const override = 0;
194  const AMDGPURegisterInfo *getRegisterInfo() const override = 0;
195 
196  const InstrItineraryData *getInstrItineraryData() const override {
197  return &InstrItins;
198  }
199 
200  // Nothing implemented, just prevent crashes on use.
201  const SelectionDAGTargetInfo *getSelectionDAGInfo() const override {
202  return &TSInfo;
203  }
204 
206 
207  bool isAmdHsaOS() const {
208  return TargetTriple.getOS() == Triple::AMDHSA;
209  }
210 
211  bool isMesa3DOS() const {
212  return TargetTriple.getOS() == Triple::Mesa3D;
213  }
214 
215  bool isOpenCLEnv() const {
216  return TargetTriple.getEnvironment() == Triple::OpenCL ||
217  TargetTriple.getEnvironmentName() == "amdgizcl";
218  }
219 
220  bool isAmdPalOS() const {
221  return TargetTriple.getOS() == Triple::AMDPAL;
222  }
223 
225  return Gen;
226  }
227 
228  unsigned getWavefrontSize() const {
229  return WavefrontSize;
230  }
231 
232  unsigned getWavefrontSizeLog2() const {
233  return Log2_32(WavefrontSize);
234  }
235 
236  int getLocalMemorySize() const {
237  return LocalMemorySize;
238  }
239 
240  int getLDSBankCount() const {
241  return LDSBankCount;
242  }
243 
244  unsigned getMaxPrivateElementSize() const {
245  return MaxPrivateElementSize;
246  }
247 
249  return AS;
250  }
251 
252  bool has16BitInsts() const {
253  return Has16BitInsts;
254  }
255 
256  bool hasIntClamp() const {
257  return HasIntClamp;
258  }
259 
260  bool hasVOP3PInsts() const {
261  return HasVOP3PInsts;
262  }
263 
264  bool hasHWFP64() const {
265  return FP64;
266  }
267 
268  bool hasFastFMAF32() const {
269  return FastFMAF32;
270  }
271 
272  bool hasHalfRate64Ops() const {
273  return HalfRate64Ops;
274  }
275 
276  bool hasAddr64() const {
277  return (getGeneration() < VOLCANIC_ISLANDS);
278  }
279 
280  bool hasBFE() const {
281  return (getGeneration() >= EVERGREEN);
282  }
283 
284  bool hasBFI() const {
285  return (getGeneration() >= EVERGREEN);
286  }
287 
288  bool hasBFM() const {
289  return hasBFE();
290  }
291 
292  bool hasBCNT(unsigned Size) const {
293  if (Size == 32)
294  return (getGeneration() >= EVERGREEN);
295 
296  if (Size == 64)
297  return (getGeneration() >= SOUTHERN_ISLANDS);
298 
299  return false;
300  }
301 
302  bool hasMulU24() const {
303  return (getGeneration() >= EVERGREEN);
304  }
305 
306  bool hasMulI24() const {
307  return (getGeneration() >= SOUTHERN_ISLANDS ||
308  hasCaymanISA());
309  }
310 
311  bool hasFFBL() const {
312  return (getGeneration() >= EVERGREEN);
313  }
314 
315  bool hasFFBH() const {
316  return (getGeneration() >= EVERGREEN);
317  }
318 
319  bool hasMed3_16() const {
320  return getGeneration() >= GFX9;
321  }
322 
323  bool hasMin3Max3_16() const {
324  return getGeneration() >= GFX9;
325  }
326 
327  bool hasMadMixInsts() const {
328  return HasMadMixInsts;
329  }
330 
332  // Only use the "x1" variants on GFX9 or don't use the buffer variants.
333  // For x2 and higher variants, if the accessed region spans 2 VM pages and
334  // the second page is unmapped, the hw hangs.
335  // TODO: There is one future GFX9 chip that doesn't have this bug.
336  return getGeneration() != GFX9;
337  }
338 
339  bool hasCARRY() const {
340  return (getGeneration() >= EVERGREEN);
341  }
342 
343  bool hasBORROW() const {
344  return (getGeneration() >= EVERGREEN);
345  }
346 
347  bool hasCaymanISA() const {
348  return CaymanISA;
349  }
350 
353  }
354 
355  bool enableHugePrivateBuffer() const {
357  }
358 
359  bool isPromoteAllocaEnabled() const {
360  return EnablePromoteAlloca;
361  }
362 
365  }
366 
367  bool dumpCode() const {
368  return DumpCode;
369  }
370 
371  /// Return the amount of LDS that can be used that will not restrict the
372  /// occupancy lower than WaveCount.
373  unsigned getMaxLocalMemSizeWithWaveCount(unsigned WaveCount,
374  const Function &) const;
375 
376  /// Inverse of getMaxLocalMemWithWaveCount. Return the maximum wavecount if
377  /// the given LDS memory size is the only constraint.
378  unsigned getOccupancyWithLocalMemSize(uint32_t Bytes, const Function &) const;
379 
380  unsigned getOccupancyWithLocalMemSize(const MachineFunction &MF) const {
381  const auto *MFI = MF.getInfo<SIMachineFunctionInfo>();
382  return getOccupancyWithLocalMemSize(MFI->getLDSSize(), *MF.getFunction());
383  }
384 
385  bool hasFP16Denormals() const {
386  return FP64FP16Denormals;
387  }
388 
389  bool hasFP32Denormals() const {
390  return FP32Denormals;
391  }
392 
393  bool hasFP64Denormals() const {
394  return FP64FP16Denormals;
395  }
396 
399  }
400 
401  bool hasFPExceptions() const {
402  return FPExceptions;
403  }
404 
405  bool enableDX10Clamp() const {
406  return DX10Clamp;
407  }
408 
409  bool enableIEEEBit(const MachineFunction &MF) const {
411  }
412 
413  bool useFlatForGlobal() const {
414  return FlatForGlobal;
415  }
416 
419  }
420 
421  bool hasCodeObjectV3() const {
422  return CodeObjectV3;
423  }
424 
426  return UnalignedBufferAccess;
427  }
428 
430  return UnalignedScratchAccess;
431  }
432 
433  bool hasApertureRegs() const {
434  return HasApertureRegs;
435  }
436 
437  bool isTrapHandlerEnabled() const {
438  return TrapHandler;
439  }
440 
441  bool isXNACKEnabled() const {
442  return EnableXNACK;
443  }
444 
445  bool hasFlatAddressSpace() const {
446  return FlatAddressSpace;
447  }
448 
449  bool hasFlatInstOffsets() const {
450  return FlatInstOffsets;
451  }
452 
453  bool hasFlatGlobalInsts() const {
454  return FlatGlobalInsts;
455  }
456 
457  bool hasFlatScratchInsts() const {
458  return FlatScratchInsts;
459  }
460 
461  bool hasD16LoadStore() const {
462  return getGeneration() >= GFX9;
463  }
464 
465  bool hasAddNoCarry() const {
466  return AddNoCarryInsts;
467  }
468 
469  bool isMesaKernel(const MachineFunction &MF) const {
471  }
472 
473  // Covers VS/PS/CS graphics shaders
474  bool isMesaGfxShader(const MachineFunction &MF) const {
476  }
477 
478  bool isAmdCodeObjectV2(const MachineFunction &MF) const {
479  return isAmdHsaOS() || isMesaKernel(MF);
480  }
481 
482  bool hasMad64_32() const {
483  return getGeneration() >= SEA_ISLANDS;
484  }
485 
486  bool hasFminFmaxLegacy() const {
488  }
489 
490  bool hasSDWA() const {
491  return HasSDWA;
492  }
493 
494  bool hasSDWAOmod() const {
495  return HasSDWAOmod;
496  }
497 
498  bool hasSDWAScalar() const {
499  return HasSDWAScalar;
500  }
501 
502  bool hasSDWASdst() const {
503  return HasSDWASdst;
504  }
505 
506  bool hasSDWAMac() const {
507  return HasSDWAMac;
508  }
509 
510  bool hasSDWAOutModsVOPC() const {
511  return HasSDWAOutModsVOPC;
512  }
513 
514  /// \brief Returns the offset in bytes from the start of the input buffer
515  /// of the first explicit kernel argument.
516  unsigned getExplicitKernelArgOffset(const MachineFunction &MF) const {
517  return isAmdCodeObjectV2(MF) ? 0 : 36;
518  }
519 
520  unsigned getAlignmentForImplicitArgPtr() const {
521  return isAmdHsaOS() ? 8 : 4;
522  }
523 
524  unsigned getImplicitArgNumBytes(const MachineFunction &MF) const {
525  if (isMesaKernel(MF))
526  return 16;
527  if (isAmdHsaOS() && isOpenCLEnv())
528  return 32;
529  return 0;
530  }
531 
532  // Scratch is allocated in 256 dword per wave blocks for the entire
533  // wavefront. When viewed from the perspecive of an arbitrary workitem, this
534  // is 4-byte aligned.
535  unsigned getStackAlignment() const {
536  return 4;
537  }
538 
539  bool enableMachineScheduler() const override {
540  return true;
541  }
542 
543  bool enableSubRegLiveness() const override {
544  return true;
545  }
546 
547  void setScalarizeGlobalBehavior(bool b) { ScalarizeGlobal = b;}
549 
550  /// \returns Number of execution units per compute unit supported by the
551  /// subtarget.
552  unsigned getEUsPerCU() const {
553  return AMDGPU::IsaInfo::getEUsPerCU(getFeatureBits());
554  }
555 
556  /// \returns Maximum number of work groups per compute unit supported by the
557  /// subtarget and limited by given \p FlatWorkGroupSize.
558  unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const {
559  return AMDGPU::IsaInfo::getMaxWorkGroupsPerCU(getFeatureBits(),
560  FlatWorkGroupSize);
561  }
562 
563  /// \returns Maximum number of waves per compute unit supported by the
564  /// subtarget without any kind of limitation.
565  unsigned getMaxWavesPerCU() const {
566  return AMDGPU::IsaInfo::getMaxWavesPerCU(getFeatureBits());
567  }
568 
569  /// \returns Maximum number of waves per compute unit supported by the
570  /// subtarget and limited by given \p FlatWorkGroupSize.
571  unsigned getMaxWavesPerCU(unsigned FlatWorkGroupSize) const {
572  return AMDGPU::IsaInfo::getMaxWavesPerCU(getFeatureBits(),
573  FlatWorkGroupSize);
574  }
575 
576  /// \returns Minimum number of waves per execution unit supported by the
577  /// subtarget.
578  unsigned getMinWavesPerEU() const {
579  return AMDGPU::IsaInfo::getMinWavesPerEU(getFeatureBits());
580  }
581 
582  /// \returns Maximum number of waves per execution unit supported by the
583  /// subtarget without any kind of limitation.
584  unsigned getMaxWavesPerEU() const {
585  return AMDGPU::IsaInfo::getMaxWavesPerEU(getFeatureBits());
586  }
587 
588  /// \returns Maximum number of waves per execution unit supported by the
589  /// subtarget and limited by given \p FlatWorkGroupSize.
590  unsigned getMaxWavesPerEU(unsigned FlatWorkGroupSize) const {
591  return AMDGPU::IsaInfo::getMaxWavesPerEU(getFeatureBits(),
592  FlatWorkGroupSize);
593  }
594 
595  /// \returns Minimum flat work group size supported by the subtarget.
596  unsigned getMinFlatWorkGroupSize() const {
597  return AMDGPU::IsaInfo::getMinFlatWorkGroupSize(getFeatureBits());
598  }
599 
600  /// \returns Maximum flat work group size supported by the subtarget.
601  unsigned getMaxFlatWorkGroupSize() const {
602  return AMDGPU::IsaInfo::getMaxFlatWorkGroupSize(getFeatureBits());
603  }
604 
605  /// \returns Number of waves per work group supported by the subtarget and
606  /// limited by given \p FlatWorkGroupSize.
607  unsigned getWavesPerWorkGroup(unsigned FlatWorkGroupSize) const {
608  return AMDGPU::IsaInfo::getWavesPerWorkGroup(getFeatureBits(),
609  FlatWorkGroupSize);
610  }
611 
612  /// \returns Default range flat work group size for a calling convention.
613  std::pair<unsigned, unsigned> getDefaultFlatWorkGroupSize(CallingConv::ID CC) const;
614 
615  /// \returns Subtarget's default pair of minimum/maximum flat work group sizes
616  /// for function \p F, or minimum/maximum flat work group sizes explicitly
617  /// requested using "amdgpu-flat-work-group-size" attribute attached to
618  /// function \p F.
619  ///
620  /// \returns Subtarget's default values if explicitly requested values cannot
621  /// be converted to integer, or violate subtarget's specifications.
622  std::pair<unsigned, unsigned> getFlatWorkGroupSizes(const Function &F) const;
623 
624  /// \returns Subtarget's default pair of minimum/maximum number of waves per
625  /// execution unit for function \p F, or minimum/maximum number of waves per
626  /// execution unit explicitly requested using "amdgpu-waves-per-eu" attribute
627  /// attached to function \p F.
628  ///
629  /// \returns Subtarget's default values if explicitly requested values cannot
630  /// be converted to integer, violate subtarget's specifications, or are not
631  /// compatible with minimum/maximum number of waves limited by flat work group
632  /// size, register usage, and/or lds usage.
633  std::pair<unsigned, unsigned> getWavesPerEU(const Function &F) const;
634 
635  /// Creates value range metadata on an workitemid.* inrinsic call or load.
636  bool makeLIDRangeMetadata(Instruction *I) const;
637 };
638 
639 class R600Subtarget final : public AMDGPUSubtarget {
640 private:
641  R600InstrInfo InstrInfo;
642  R600FrameLowering FrameLowering;
643  R600TargetLowering TLInfo;
644 
645 public:
646  R600Subtarget(const Triple &TT, StringRef CPU, StringRef FS,
647  const TargetMachine &TM);
648 
649  const R600InstrInfo *getInstrInfo() const override {
650  return &InstrInfo;
651  }
652 
653  const R600FrameLowering *getFrameLowering() const override {
654  return &FrameLowering;
655  }
656 
657  const R600TargetLowering *getTargetLowering() const override {
658  return &TLInfo;
659  }
660 
661  const R600RegisterInfo *getRegisterInfo() const override {
662  return &InstrInfo.getRegisterInfo();
663  }
664 
665  bool hasCFAluBug() const {
666  return CFALUBug;
667  }
668 
669  bool hasVertexCache() const {
670  return HasVertexCache;
671  }
672 
673  short getTexVTXClauseSize() const {
674  return TexVTXClauseSize;
675  }
676 };
677 
678 class SISubtarget final : public AMDGPUSubtarget {
679 private:
680  SIInstrInfo InstrInfo;
681  SIFrameLowering FrameLowering;
682  SITargetLowering TLInfo;
683 
684  /// GlobalISel related APIs.
685  std::unique_ptr<AMDGPUCallLowering> CallLoweringInfo;
686  std::unique_ptr<InstructionSelector> InstSelector;
687  std::unique_ptr<LegalizerInfo> Legalizer;
688  std::unique_ptr<RegisterBankInfo> RegBankInfo;
689 
690 public:
691  SISubtarget(const Triple &TT, StringRef CPU, StringRef FS,
692  const TargetMachine &TM);
693 
694  const SIInstrInfo *getInstrInfo() const override {
695  return &InstrInfo;
696  }
697 
698  const SIFrameLowering *getFrameLowering() const override {
699  return &FrameLowering;
700  }
701 
702  const SITargetLowering *getTargetLowering() const override {
703  return &TLInfo;
704  }
705 
706  const CallLowering *getCallLowering() const override {
707  return CallLoweringInfo.get();
708  }
709 
710  const InstructionSelector *getInstructionSelector() const override {
711  return InstSelector.get();
712  }
713 
714  const LegalizerInfo *getLegalizerInfo() const override {
715  return Legalizer.get();
716  }
717 
718  const RegisterBankInfo *getRegBankInfo() const override {
719  return RegBankInfo.get();
720  }
721 
722  const SIRegisterInfo *getRegisterInfo() const override {
723  return &InstrInfo.getRegisterInfo();
724  }
725 
726  // XXX - Why is this here if it isn't in the default pass set?
727  bool enableEarlyIfConversion() const override {
728  return true;
729  }
730 
731  void overrideSchedPolicy(MachineSchedPolicy &Policy,
732  unsigned NumRegionInstrs) const override;
733 
734  bool isVGPRSpillingEnabled(const Function& F) const;
735 
736  unsigned getMaxNumUserSGPRs() const {
737  return 16;
738  }
739 
740  bool hasSMemRealTime() const {
741  return HasSMemRealTime;
742  }
743 
744  bool hasMovrel() const {
745  return HasMovrel;
746  }
747 
748  bool hasVGPRIndexMode() const {
749  return HasVGPRIndexMode;
750  }
751 
752  bool useVGPRIndexMode(bool UserEnable) const {
753  return !hasMovrel() || (UserEnable && hasVGPRIndexMode());
754  }
755 
756  bool hasScalarCompareEq64() const {
757  return getGeneration() >= VOLCANIC_ISLANDS;
758  }
759 
760  bool hasScalarStores() const {
761  return HasScalarStores;
762  }
763 
764  bool hasInv2PiInlineImm() const {
765  return HasInv2PiInlineImm;
766  }
767 
768  bool hasDPP() const {
769  return HasDPP;
770  }
771 
772  bool enableSIScheduler() const {
773  return EnableSIScheduler;
774  }
775 
776  bool debuggerSupported() const {
777  return debuggerInsertNops() && debuggerReserveRegs() &&
778  debuggerEmitPrologue();
779  }
780 
781  bool debuggerInsertNops() const {
782  return DebuggerInsertNops;
783  }
784 
785  bool debuggerReserveRegs() const {
786  return DebuggerReserveRegs;
787  }
788 
789  bool debuggerEmitPrologue() const {
790  return DebuggerEmitPrologue;
791  }
792 
793  bool loadStoreOptEnabled() const {
794  return EnableLoadStoreOpt;
795  }
796 
797  bool hasSGPRInitBug() const {
798  return SGPRInitBug;
799  }
800 
801  bool has12DWordStoreHazard() const {
803  }
804 
805  bool hasSMovFedHazard() const {
807  }
808 
811  }
812 
813  bool hasReadM0SendMsgHazard() const {
815  }
816 
817  unsigned getKernArgSegmentSize(const MachineFunction &MF,
818  unsigned ExplictArgBytes) const;
819 
820  /// Return the maximum number of waves per SIMD for kernels using \p SGPRs SGPRs
821  unsigned getOccupancyWithNumSGPRs(unsigned SGPRs) const;
822 
823  /// Return the maximum number of waves per SIMD for kernels using \p VGPRs VGPRs
824  unsigned getOccupancyWithNumVGPRs(unsigned VGPRs) const;
825 
826  /// \returns true if the flat_scratch register should be initialized with the
827  /// pointer to the wave's scratch memory rather than a size and offset.
828  bool flatScratchIsPointer() const {
829  return getGeneration() >= GFX9;
830  }
831 
832  /// \returns SGPR allocation granularity supported by the subtarget.
833  unsigned getSGPRAllocGranule() const {
834  return AMDGPU::IsaInfo::getSGPRAllocGranule(getFeatureBits());
835  }
836 
837  /// \returns SGPR encoding granularity supported by the subtarget.
838  unsigned getSGPREncodingGranule() const {
839  return AMDGPU::IsaInfo::getSGPREncodingGranule(getFeatureBits());
840  }
841 
842  /// \returns Total number of SGPRs supported by the subtarget.
843  unsigned getTotalNumSGPRs() const {
844  return AMDGPU::IsaInfo::getTotalNumSGPRs(getFeatureBits());
845  }
846 
847  /// \returns Addressable number of SGPRs supported by the subtarget.
848  unsigned getAddressableNumSGPRs() const {
849  return AMDGPU::IsaInfo::getAddressableNumSGPRs(getFeatureBits());
850  }
851 
852  /// \returns Minimum number of SGPRs that meets the given number of waves per
853  /// execution unit requirement supported by the subtarget.
854  unsigned getMinNumSGPRs(unsigned WavesPerEU) const {
855  return AMDGPU::IsaInfo::getMinNumSGPRs(getFeatureBits(), WavesPerEU);
856  }
857 
858  /// \returns Maximum number of SGPRs that meets the given number of waves per
859  /// execution unit requirement supported by the subtarget.
860  unsigned getMaxNumSGPRs(unsigned WavesPerEU, bool Addressable) const {
861  return AMDGPU::IsaInfo::getMaxNumSGPRs(getFeatureBits(), WavesPerEU,
862  Addressable);
863  }
864 
865  /// \returns Reserved number of SGPRs for given function \p MF.
866  unsigned getReservedNumSGPRs(const MachineFunction &MF) const;
867 
868  /// \returns Maximum number of SGPRs that meets number of waves per execution
869  /// unit requirement for function \p MF, or number of SGPRs explicitly
870  /// requested using "amdgpu-num-sgpr" attribute attached to function \p MF.
871  ///
872  /// \returns Value that meets number of waves per execution unit requirement
873  /// if explicitly requested value cannot be converted to integer, violates
874  /// subtarget's specifications, or does not meet number of waves per execution
875  /// unit requirement.
876  unsigned getMaxNumSGPRs(const MachineFunction &MF) const;
877 
878  /// \returns VGPR allocation granularity supported by the subtarget.
879  unsigned getVGPRAllocGranule() const {
880  return AMDGPU::IsaInfo::getVGPRAllocGranule(getFeatureBits());
881  }
882 
883  /// \returns VGPR encoding granularity supported by the subtarget.
884  unsigned getVGPREncodingGranule() const {
885  return AMDGPU::IsaInfo::getVGPREncodingGranule(getFeatureBits());
886  }
887 
888  /// \returns Total number of VGPRs supported by the subtarget.
889  unsigned getTotalNumVGPRs() const {
890  return AMDGPU::IsaInfo::getTotalNumVGPRs(getFeatureBits());
891  }
892 
893  /// \returns Addressable number of VGPRs supported by the subtarget.
894  unsigned getAddressableNumVGPRs() const {
895  return AMDGPU::IsaInfo::getAddressableNumVGPRs(getFeatureBits());
896  }
897 
898  /// \returns Minimum number of VGPRs that meets given number of waves per
899  /// execution unit requirement supported by the subtarget.
900  unsigned getMinNumVGPRs(unsigned WavesPerEU) const {
901  return AMDGPU::IsaInfo::getMinNumVGPRs(getFeatureBits(), WavesPerEU);
902  }
903 
904  /// \returns Maximum number of VGPRs that meets given number of waves per
905  /// execution unit requirement supported by the subtarget.
906  unsigned getMaxNumVGPRs(unsigned WavesPerEU) const {
907  return AMDGPU::IsaInfo::getMaxNumVGPRs(getFeatureBits(), WavesPerEU);
908  }
909 
910  /// \returns Reserved number of VGPRs for given function \p MF.
911  unsigned getReservedNumVGPRs(const MachineFunction &MF) const {
912  return debuggerReserveRegs() ? 4 : 0;
913  }
914 
915  /// \returns Maximum number of VGPRs that meets number of waves per execution
916  /// unit requirement for function \p MF, or number of VGPRs explicitly
917  /// requested using "amdgpu-num-vgpr" attribute attached to function \p MF.
918  ///
919  /// \returns Value that meets number of waves per execution unit requirement
920  /// if explicitly requested value cannot be converted to integer, violates
921  /// subtarget's specifications, or does not meet number of waves per execution
922  /// unit requirement.
923  unsigned getMaxNumVGPRs(const MachineFunction &MF) const;
924 
925  void getPostRAMutations(
926  std::vector<std::unique_ptr<ScheduleDAGMutation>> &Mutations)
927  const override;
928 };
929 
930 } // end namespace llvm
931 
932 #endif // LLVM_LIB_TARGET_AMDGPU_AMDGPUSUBTARGET_H
bool hasAutoWaitcntBeforeBarrier() const
bool makeLIDRangeMetadata(Instruction *I) const
Creates value range metadata on an workitemid.* inrinsic call or load.
unsigned getMinNumVGPRs(const FeatureBitset &Features, unsigned WavesPerEU)
bool hasFP64Denormals() const
const AMDGPURegisterInfo * getRegisterInfo() const override=0
unsigned getAddressableNumVGPRs(const FeatureBitset &Features)
Generation getGeneration() const
unsigned getMaxWavesPerEU(unsigned FlatWorkGroupSize) const
unsigned getWavesPerWorkGroup(unsigned FlatWorkGroupSize) const
Compute iterated dominance frontiers using a linear time algorithm.
Definition: AllocatorList.h:24
bool getScalarizeGlobalBehavior() const
Interface definition for R600InstrInfo.
bool hasScalarCompareEq64() const
bool hasSBufferLoadStoreAtomicDwordxN() const
unsigned getVGPREncodingGranule() const
unsigned getOccupancyWithLocalMemSize(uint32_t Bytes, const Function &) const
Inverse of getMaxLocalMemWithWaveCount.
bool hasFlatScratchInsts() const
OSType getOS() const
getOS - Get the parsed operating system type of this triple.
Definition: Triple.h:294
bool isPromoteAllocaEnabled() const
This file describes how to lower LLVM calls to machine code calls.
AMDGPUSubtarget & initializeSubtargetDependencies(const Triple &TT, StringRef GPU, StringRef FS)
unsigned getMaxWorkGroupsPerCU(const FeatureBitset &Features, unsigned FlatWorkGroupSize)
const SIInstrInfo * getInstrInfo() const override
std::pair< unsigned, unsigned > getDefaultFlatWorkGroupSize(CallingConv::ID CC) const
bool hasUnalignedScratchAccess() const
SelectionDAGTargetInfo TSInfo
F(f)
unsigned getTotalNumVGPRs(const FeatureBitset &Features)
unsigned getMinWavesPerEU(const FeatureBitset &Features)
unsigned getStackAlignment() const
bool debuggerReserveRegs() const
bool hasFlatGlobalInsts() const
unsigned getWavesPerWorkGroup(const FeatureBitset &Features, unsigned FlatWorkGroupSize)
bool hasMovrel() const
const SIRegisterInfo & getRegisterInfo() const
Definition: SIInstrInfo.h:146
bool hasVOP3PInsts() const
unsigned getVGPREncodingGranule(const FeatureBitset &Features)
unsigned getEUsPerCU(const FeatureBitset &Features)
Holds all the information related to register banks.
bool flatScratchIsPointer() const
bool hasFP16Denormals() const
bool hasInv2PiInlineImm() const
unsigned getVGPRAllocGranule(const FeatureBitset &Features)
OpenCL uses address spaces to differentiate between various memory regions on the hardware...
Definition: AMDGPU.h:214
unsigned getMinFlatWorkGroupSize() const
int getLocalMemorySize() const
unsigned getTotalNumVGPRs() const
bool debuggerInsertNops() const
const InstrItineraryData * getInstrItineraryData() const override
const InstructionSelector * getInstructionSelector() const override
unsigned getMinFlatWorkGroupSize(const FeatureBitset &Features)
unsigned getMaxWavesPerCU(const FeatureBitset &Features)
unsigned getMaxNumSGPRs(const FeatureBitset &Features, unsigned WavesPerEU, bool Addressable)
bool hasSDWAOutModsVOPC() const
const LegalizerInfo * getLegalizerInfo() const override
unsigned getMaxNumUserSGPRs() const
unsigned getMaxNumVGPRs(unsigned WavesPerEU) const
unsigned getMaxWavesPerEU(const FeatureBitset &Features)
unsigned getMinNumVGPRs(unsigned WavesPerEU) const
const AMDGPUFrameLowering * getFrameLowering() const override=0
Itinerary data supplied by a subtarget to be used by a target.
bool hasSMovFedHazard() const
bool hasD16LoadStore() const
unsigned getMaxFlatWorkGroupSize(const FeatureBitset &Features)
const RegisterBankInfo * getRegBankInfo() const override
unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const
unsigned getMaxNumSGPRs(unsigned WavesPerEU, bool Addressable) const
bool debuggerEmitPrologue() const
void ParseSubtargetFeatures(StringRef CPU, StringRef FS)
unsigned getImplicitArgNumBytes(const MachineFunction &MF) const
const R600FrameLowering * getFrameLowering() const override
bool hasScalarStores() const
std::pair< unsigned, unsigned > getWavesPerEU(const Function &F) const
bool hasFP32Denormals() const
bool isCompute(CallingConv::ID cc)
bool hasApertureRegs() const
bool hasCFAluBug() const
unsigned getSGPREncodingGranule() const
unsigned getMaxWavesPerCU(unsigned FlatWorkGroupSize) const
bool hasFminFmaxLegacy() const
unsigned getTotalNumSGPRs(const FeatureBitset &Features)
bool hasCodeObjectV3() const
bool hasFPExceptions() const
bool enableDX10Clamp() const
bool has16BitInsts() const
unsigned getMinNumSGPRs(const FeatureBitset &Features, unsigned WavesPerEU)
SI DAG Lowering interface definition.
bool hasVGPRIndexMode() const
const R600InstrInfo * getInstrInfo() const override
unsigned getMinNumSGPRs(unsigned WavesPerEU) const
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
const R600RegisterInfo & getRegisterInfo() const
Definition: R600InstrInfo.h:69
bool debuggerSupported() const
bool hasUnalignedBufferAccess() const
Targets can subclass this to parameterize the SelectionDAG lowering and instruction selection process...
bool hasDPP() const
const AMDGPUInstrInfo * getInstrInfo() const override=0
Triple - Helper class for working with autoconf configuration names.
Definition: Triple.h:44
bool hasVertexCache() const
unsigned getMaxPrivateElementSize() const
unsigned getEUsPerCU() const
unsigned getAddressableNumSGPRs() const
unsigned getWavefrontSize() const
const R600RegisterInfo * getRegisterInfo() const override
const AMDGPUTargetLowering * getTargetLowering() const override=0
bool has12DWordStoreHazard() const
bool loadStoreOptEnabled() const
const SIRegisterInfo * getRegisterInfo() const override
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:194
bool hasMadMixInsts() const
bool isMesaKernel(const MachineFunction &MF) const
bool hasFlatAddressSpace() const
unsigned getReservedNumVGPRs(const MachineFunction &MF) const
EnvironmentType getEnvironment() const
getEnvironment - Get the parsed environment type of this triple.
Definition: Triple.h:303
bool hasFastFMAF32() const
void setScalarizeGlobalBehavior(bool b)
unsigned getAlignmentForImplicitArgPtr() const
StringRef getEnvironmentName() const
getEnvironmentName - Get the optional environment (fourth) component of the triple, or "" if empty.
Definition: Triple.cpp:955
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:531
unsigned getSGPRAllocGranule() const
bool hasAddNoCarry() const
bool isShader(CallingConv::ID cc)
TrapHandlerAbi getTrapHandlerAbi() const
bool enableSubRegLiveness() const override
unsigned getMaxWavesPerEU() const
AMDGPUAS getAMDGPUAS() const
Provides the logic to select generic machine instructions.
Define a generic scheduling policy for targets that don&#39;t provide their own MachineSchedStrategy.
unsigned getOccupancyWithLocalMemSize(const MachineFunction &MF) const
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
Interface definition for SIInstrInfo.
bool enableHugePrivateBuffer() const
short getTexVTXClauseSize() const
unsigned getMinWavesPerEU() const
R600 DAG Lowering interface definition.
unsigned getMaxFlatWorkGroupSize() const
bool enableEarlyIfConversion() const override
bool hasHalfRate64Ops() const
Information about the stack frame layout on the AMDGPU targets.
#define I(x, y, z)
Definition: MD5.cpp:58
const CallLowering * getCallLowering() const override
unsigned getVGPRAllocGranule() const
bool hasSMemRealTime() const
const SIFrameLowering * getFrameLowering() const override
bool hasReadM0SendMsgHazard() const
bool hasMin3Max3_16() const
bool enableSIScheduler() const
~AMDGPUSubtarget() override
unsigned getAddressableNumSGPRs(const FeatureBitset &Features)
AMDGPUSubtarget(const Triple &TT, StringRef GPU, StringRef FS, const TargetMachine &TM)
const SITargetLowering * getTargetLowering() const override
unsigned getSGPREncodingGranule(const FeatureBitset &Features)
const Function * getFunction() const
getFunction - Return the LLVM function that this machine code represents
bool isAmdCodeObjectV2(const MachineFunction &MF) const
unsigned getExplicitKernelArgOffset(const MachineFunction &MF) const
Returns the offset in bytes from the start of the input buffer of the first explicit kernel argument...
const SelectionDAGTargetInfo * getSelectionDAGInfo() const override
bool isTrapHandlerEnabled() const
bool isMesaGfxShader(const MachineFunction &MF) const
bool hasSGPRInitBug() const
unsigned getMaxWavesPerCU() const
constexpr char Size[]
Key for Kernel::Arg::Metadata::mSize.
InstrItineraryData InstrItins
bool hasBCNT(unsigned Size) const
const R600TargetLowering * getTargetLowering() const override
unsigned getSGPRAllocGranule(const FeatureBitset &Features)
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:57
unsigned getWavefrontSizeLog2() const
bool unsafeDSOffsetFoldingEnabled() const
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:49
unsigned getMaxLocalMemSizeWithWaveCount(unsigned WaveCount, const Function &) const
Return the amount of LDS that can be used that will not restrict the occupancy lower than WaveCount...
bool enableIEEEBit(const MachineFunction &MF) const
bool hasSDWAScalar() const
bool useFlatForGlobal() const
unsigned getMaxNumVGPRs(const FeatureBitset &Features, unsigned WavesPerEU)
bool enableMachineScheduler() const override
std::pair< unsigned, unsigned > getFlatWorkGroupSizes(const Function &F) const
bool useVGPRIndexMode(bool UserEnable) const
unsigned getTotalNumSGPRs() const
bool isXNACKEnabled() const
bool hasFlatInstOffsets() const
bool supportsMinMaxDenormModes() const
bool hasReadM0MovRelInterpHazard() const
unsigned getAddressableNumVGPRs() const