LLVM  7.0.0svn
AMDGPUSubtarget.h
Go to the documentation of this file.
1 //=====-- AMDGPUSubtarget.h - Define Subtarget for AMDGPU ------*- C++ -*-====//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //==-----------------------------------------------------------------------===//
9 //
10 /// \file
11 /// \brief AMDGPU specific subclass of TargetSubtarget.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUSUBTARGET_H
16 #define LLVM_LIB_TARGET_AMDGPU_AMDGPUSUBTARGET_H
17 
18 #include "AMDGPU.h"
19 #include "AMDGPUCallLowering.h"
20 #include "R600FrameLowering.h"
21 #include "R600ISelLowering.h"
22 #include "R600InstrInfo.h"
23 #include "SIFrameLowering.h"
24 #include "SIISelLowering.h"
25 #include "SIInstrInfo.h"
26 #include "SIMachineFunctionInfo.h"
27 #include "Utils/AMDGPUBaseInfo.h"
28 #include "llvm/ADT/Triple.h"
36 #include <cassert>
37 #include <cstdint>
38 #include <memory>
39 #include <utility>
40 
41 #define GET_SUBTARGETINFO_HEADER
42 #include "AMDGPUGenSubtargetInfo.inc"
43 
44 namespace llvm {
45 
46 class StringRef;
47 
49 public:
50  enum Generation {
51  R600 = 0,
59  };
60 
61  enum {
76  };
77 
81  };
82 
83  enum TrapID {
92  };
93 
96  };
97 
98 protected:
99  // Basic subtarget description.
102  unsigned IsaVersion;
103  unsigned WavefrontSize;
107 
108  // Possibly statically set by tablegen, but may want to be overridden.
111 
112  // Dynamially set bits that enable features.
116  bool DX10Clamp;
128 
129  // Used as options.
136  bool DumpCode;
137 
138  // Subtarget statically properties set by tablegen
139  bool FP64;
140  bool FMA;
141  bool MIMG_R128;
142  bool IsGCN;
144  bool CIInsts;
145  bool GFX9Insts;
152  bool HasMovrel;
156  bool HasSDWA;
162  bool HasDPP;
170  bool CaymanISA;
171  bool CFALUBug;
175 
176  // Dummy feature to use for assembler in tablegen.
178 
182 
183 public:
184  AMDGPUSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
185  const TargetMachine &TM);
186  ~AMDGPUSubtarget() override;
187 
189  StringRef GPU, StringRef FS);
190 
191  const AMDGPUInstrInfo *getInstrInfo() const override = 0;
192  const AMDGPUFrameLowering *getFrameLowering() const override = 0;
193  const AMDGPUTargetLowering *getTargetLowering() const override = 0;
194  const AMDGPURegisterInfo *getRegisterInfo() const override = 0;
195 
196  const InstrItineraryData *getInstrItineraryData() const override {
197  return &InstrItins;
198  }
199 
200  // Nothing implemented, just prevent crashes on use.
201  const SelectionDAGTargetInfo *getSelectionDAGInfo() const override {
202  return &TSInfo;
203  }
204 
206 
207  bool isAmdHsaOS() const {
208  return TargetTriple.getOS() == Triple::AMDHSA;
209  }
210 
211  bool isMesa3DOS() const {
212  return TargetTriple.getOS() == Triple::Mesa3D;
213  }
214 
215  bool isOpenCLEnv() const {
216  return TargetTriple.getEnvironment() == Triple::OpenCL ||
217  TargetTriple.getEnvironmentName() == "amdgizcl";
218  }
219 
220  bool isAmdPalOS() const {
221  return TargetTriple.getOS() == Triple::AMDPAL;
222  }
223 
225  return Gen;
226  }
227 
228  unsigned getWavefrontSize() const {
229  return WavefrontSize;
230  }
231 
232  unsigned getWavefrontSizeLog2() const {
233  return Log2_32(WavefrontSize);
234  }
235 
236  int getLocalMemorySize() const {
237  return LocalMemorySize;
238  }
239 
240  int getLDSBankCount() const {
241  return LDSBankCount;
242  }
243 
244  unsigned getMaxPrivateElementSize() const {
245  return MaxPrivateElementSize;
246  }
247 
249  return AS;
250  }
251 
252  bool has16BitInsts() const {
253  return Has16BitInsts;
254  }
255 
256  bool hasIntClamp() const {
257  return HasIntClamp;
258  }
259 
260  bool hasVOP3PInsts() const {
261  return HasVOP3PInsts;
262  }
263 
264  bool hasFP64() const {
265  return FP64;
266  }
267 
268  bool hasMIMG_R128() const {
269  return MIMG_R128;
270  }
271 
272  bool hasFastFMAF32() const {
273  return FastFMAF32;
274  }
275 
276  bool hasHalfRate64Ops() const {
277  return HalfRate64Ops;
278  }
279 
280  bool hasAddr64() const {
281  return (getGeneration() < VOLCANIC_ISLANDS);
282  }
283 
284  bool hasBFE() const {
285  return (getGeneration() >= EVERGREEN);
286  }
287 
288  bool hasBFI() const {
289  return (getGeneration() >= EVERGREEN);
290  }
291 
292  bool hasBFM() const {
293  return hasBFE();
294  }
295 
296  bool hasBCNT(unsigned Size) const {
297  if (Size == 32)
298  return (getGeneration() >= EVERGREEN);
299 
300  if (Size == 64)
301  return (getGeneration() >= SOUTHERN_ISLANDS);
302 
303  return false;
304  }
305 
306  bool hasMulU24() const {
307  return (getGeneration() >= EVERGREEN);
308  }
309 
310  bool hasMulI24() const {
311  return (getGeneration() >= SOUTHERN_ISLANDS ||
312  hasCaymanISA());
313  }
314 
315  bool hasFFBL() const {
316  return (getGeneration() >= EVERGREEN);
317  }
318 
319  bool hasFFBH() const {
320  return (getGeneration() >= EVERGREEN);
321  }
322 
323  bool hasMed3_16() const {
324  return getGeneration() >= GFX9;
325  }
326 
327  bool hasMin3Max3_16() const {
328  return getGeneration() >= GFX9;
329  }
330 
331  bool hasMadMixInsts() const {
332  return HasMadMixInsts;
333  }
334 
335  bool hasCARRY() const {
336  return (getGeneration() >= EVERGREEN);
337  }
338 
339  bool hasBORROW() const {
340  return (getGeneration() >= EVERGREEN);
341  }
342 
343  bool hasCaymanISA() const {
344  return CaymanISA;
345  }
346 
347  bool hasFMA() const {
348  return FMA;
349  }
350 
353  }
354 
355  bool enableHugePrivateBuffer() const {
357  }
358 
359  bool isPromoteAllocaEnabled() const {
360  return EnablePromoteAlloca;
361  }
362 
365  }
366 
367  bool dumpCode() const {
368  return DumpCode;
369  }
370 
371  /// Return the amount of LDS that can be used that will not restrict the
372  /// occupancy lower than WaveCount.
373  unsigned getMaxLocalMemSizeWithWaveCount(unsigned WaveCount,
374  const Function &) const;
375 
376  /// Inverse of getMaxLocalMemWithWaveCount. Return the maximum wavecount if
377  /// the given LDS memory size is the only constraint.
378  unsigned getOccupancyWithLocalMemSize(uint32_t Bytes, const Function &) const;
379 
380  unsigned getOccupancyWithLocalMemSize(const MachineFunction &MF) const {
381  const auto *MFI = MF.getInfo<SIMachineFunctionInfo>();
382  return getOccupancyWithLocalMemSize(MFI->getLDSSize(), MF.getFunction());
383  }
384 
385  bool hasFP16Denormals() const {
386  return FP64FP16Denormals;
387  }
388 
389  bool hasFP32Denormals() const {
390  return FP32Denormals;
391  }
392 
393  bool hasFP64Denormals() const {
394  return FP64FP16Denormals;
395  }
396 
399  }
400 
401  bool hasFPExceptions() const {
402  return FPExceptions;
403  }
404 
405  bool enableDX10Clamp() const {
406  return DX10Clamp;
407  }
408 
409  bool enableIEEEBit(const MachineFunction &MF) const {
411  }
412 
413  bool useFlatForGlobal() const {
414  return FlatForGlobal;
415  }
416 
417  /// \returns If MUBUF instructions always perform range checking, even for
418  /// buffer resources used for private memory access.
421  }
422 
425  }
426 
427  bool hasCodeObjectV3() const {
428  return CodeObjectV3;
429  }
430 
432  return UnalignedBufferAccess;
433  }
434 
436  return UnalignedScratchAccess;
437  }
438 
439  bool hasApertureRegs() const {
440  return HasApertureRegs;
441  }
442 
443  bool isTrapHandlerEnabled() const {
444  return TrapHandler;
445  }
446 
447  bool isXNACKEnabled() const {
448  return EnableXNACK;
449  }
450 
451  bool hasFlatAddressSpace() const {
452  return FlatAddressSpace;
453  }
454 
455  bool hasFlatInstOffsets() const {
456  return FlatInstOffsets;
457  }
458 
459  bool hasFlatGlobalInsts() const {
460  return FlatGlobalInsts;
461  }
462 
463  bool hasFlatScratchInsts() const {
464  return FlatScratchInsts;
465  }
466 
467  bool hasD16LoadStore() const {
468  return getGeneration() >= GFX9;
469  }
470 
471  /// Return if most LDS instructions have an m0 use that require m0 to be
472  /// iniitalized.
473  bool ldsRequiresM0Init() const {
474  return getGeneration() < GFX9;
475  }
476 
477  bool hasAddNoCarry() const {
478  return AddNoCarryInsts;
479  }
480 
481  bool hasUnpackedD16VMem() const {
482  return HasUnpackedD16VMem;
483  }
484 
485  bool isMesaKernel(const MachineFunction &MF) const {
487  }
488 
489  // Covers VS/PS/CS graphics shaders
490  bool isMesaGfxShader(const MachineFunction &MF) const {
492  }
493 
494  bool isAmdCodeObjectV2(const MachineFunction &MF) const {
495  return isAmdHsaOS() || isMesaKernel(MF);
496  }
497 
498  bool hasMad64_32() const {
499  return getGeneration() >= SEA_ISLANDS;
500  }
501 
502  bool hasFminFmaxLegacy() const {
504  }
505 
506  bool hasSDWA() const {
507  return HasSDWA;
508  }
509 
510  bool hasSDWAOmod() const {
511  return HasSDWAOmod;
512  }
513 
514  bool hasSDWAScalar() const {
515  return HasSDWAScalar;
516  }
517 
518  bool hasSDWASdst() const {
519  return HasSDWASdst;
520  }
521 
522  bool hasSDWAMac() const {
523  return HasSDWAMac;
524  }
525 
526  bool hasSDWAOutModsVOPC() const {
527  return HasSDWAOutModsVOPC;
528  }
529 
530  /// \brief Returns the offset in bytes from the start of the input buffer
531  /// of the first explicit kernel argument.
532  unsigned getExplicitKernelArgOffset(const MachineFunction &MF) const {
533  return isAmdCodeObjectV2(MF) ? 0 : 36;
534  }
535 
536  unsigned getAlignmentForImplicitArgPtr() const {
537  return isAmdHsaOS() ? 8 : 4;
538  }
539 
540  unsigned getImplicitArgNumBytes(const MachineFunction &MF) const {
541  if (isMesaKernel(MF))
542  return 16;
543  if (isAmdHsaOS() && isOpenCLEnv())
544  return 32;
545  return 0;
546  }
547 
548  // Scratch is allocated in 256 dword per wave blocks for the entire
549  // wavefront. When viewed from the perspecive of an arbitrary workitem, this
550  // is 4-byte aligned.
551  unsigned getStackAlignment() const {
552  return 4;
553  }
554 
555  bool enableMachineScheduler() const override {
556  return true;
557  }
558 
559  bool enableSubRegLiveness() const override {
560  return true;
561  }
562 
563  void setScalarizeGlobalBehavior(bool b) { ScalarizeGlobal = b;}
565 
566  /// \returns Number of execution units per compute unit supported by the
567  /// subtarget.
568  unsigned getEUsPerCU() const {
569  return AMDGPU::IsaInfo::getEUsPerCU(getFeatureBits());
570  }
571 
572  /// \returns Maximum number of work groups per compute unit supported by the
573  /// subtarget and limited by given \p FlatWorkGroupSize.
574  unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const {
575  return AMDGPU::IsaInfo::getMaxWorkGroupsPerCU(getFeatureBits(),
576  FlatWorkGroupSize);
577  }
578 
579  /// \returns Maximum number of waves per compute unit supported by the
580  /// subtarget without any kind of limitation.
581  unsigned getMaxWavesPerCU() const {
582  return AMDGPU::IsaInfo::getMaxWavesPerCU(getFeatureBits());
583  }
584 
585  /// \returns Maximum number of waves per compute unit supported by the
586  /// subtarget and limited by given \p FlatWorkGroupSize.
587  unsigned getMaxWavesPerCU(unsigned FlatWorkGroupSize) const {
588  return AMDGPU::IsaInfo::getMaxWavesPerCU(getFeatureBits(),
589  FlatWorkGroupSize);
590  }
591 
592  /// \returns Minimum number of waves per execution unit supported by the
593  /// subtarget.
594  unsigned getMinWavesPerEU() const {
595  return AMDGPU::IsaInfo::getMinWavesPerEU(getFeatureBits());
596  }
597 
598  /// \returns Maximum number of waves per execution unit supported by the
599  /// subtarget without any kind of limitation.
600  unsigned getMaxWavesPerEU() const {
601  return AMDGPU::IsaInfo::getMaxWavesPerEU(getFeatureBits());
602  }
603 
604  /// \returns Maximum number of waves per execution unit supported by the
605  /// subtarget and limited by given \p FlatWorkGroupSize.
606  unsigned getMaxWavesPerEU(unsigned FlatWorkGroupSize) const {
607  return AMDGPU::IsaInfo::getMaxWavesPerEU(getFeatureBits(),
608  FlatWorkGroupSize);
609  }
610 
611  /// \returns Minimum flat work group size supported by the subtarget.
612  unsigned getMinFlatWorkGroupSize() const {
613  return AMDGPU::IsaInfo::getMinFlatWorkGroupSize(getFeatureBits());
614  }
615 
616  /// \returns Maximum flat work group size supported by the subtarget.
617  unsigned getMaxFlatWorkGroupSize() const {
618  return AMDGPU::IsaInfo::getMaxFlatWorkGroupSize(getFeatureBits());
619  }
620 
621  /// \returns Number of waves per work group supported by the subtarget and
622  /// limited by given \p FlatWorkGroupSize.
623  unsigned getWavesPerWorkGroup(unsigned FlatWorkGroupSize) const {
624  return AMDGPU::IsaInfo::getWavesPerWorkGroup(getFeatureBits(),
625  FlatWorkGroupSize);
626  }
627 
628  /// \returns Default range flat work group size for a calling convention.
629  std::pair<unsigned, unsigned> getDefaultFlatWorkGroupSize(CallingConv::ID CC) const;
630 
631  /// \returns Subtarget's default pair of minimum/maximum flat work group sizes
632  /// for function \p F, or minimum/maximum flat work group sizes explicitly
633  /// requested using "amdgpu-flat-work-group-size" attribute attached to
634  /// function \p F.
635  ///
636  /// \returns Subtarget's default values if explicitly requested values cannot
637  /// be converted to integer, or violate subtarget's specifications.
638  std::pair<unsigned, unsigned> getFlatWorkGroupSizes(const Function &F) const;
639 
640  /// \returns Subtarget's default pair of minimum/maximum number of waves per
641  /// execution unit for function \p F, or minimum/maximum number of waves per
642  /// execution unit explicitly requested using "amdgpu-waves-per-eu" attribute
643  /// attached to function \p F.
644  ///
645  /// \returns Subtarget's default values if explicitly requested values cannot
646  /// be converted to integer, violate subtarget's specifications, or are not
647  /// compatible with minimum/maximum number of waves limited by flat work group
648  /// size, register usage, and/or lds usage.
649  std::pair<unsigned, unsigned> getWavesPerEU(const Function &F) const;
650 
651  /// Creates value range metadata on an workitemid.* inrinsic call or load.
652  bool makeLIDRangeMetadata(Instruction *I) const;
653 };
654 
655 class R600Subtarget final : public AMDGPUSubtarget {
656 private:
657  R600InstrInfo InstrInfo;
658  R600FrameLowering FrameLowering;
659  R600TargetLowering TLInfo;
660 
661 public:
662  R600Subtarget(const Triple &TT, StringRef CPU, StringRef FS,
663  const TargetMachine &TM);
664 
665  const R600InstrInfo *getInstrInfo() const override {
666  return &InstrInfo;
667  }
668 
669  const R600FrameLowering *getFrameLowering() const override {
670  return &FrameLowering;
671  }
672 
673  const R600TargetLowering *getTargetLowering() const override {
674  return &TLInfo;
675  }
676 
677  const R600RegisterInfo *getRegisterInfo() const override {
678  return &InstrInfo.getRegisterInfo();
679  }
680 
681  bool hasCFAluBug() const {
682  return CFALUBug;
683  }
684 
685  bool hasVertexCache() const {
686  return HasVertexCache;
687  }
688 
689  short getTexVTXClauseSize() const {
690  return TexVTXClauseSize;
691  }
692 };
693 
694 class SISubtarget final : public AMDGPUSubtarget {
695 private:
696  SIInstrInfo InstrInfo;
697  SIFrameLowering FrameLowering;
698  SITargetLowering TLInfo;
699 
700  /// GlobalISel related APIs.
701  std::unique_ptr<AMDGPUCallLowering> CallLoweringInfo;
702  std::unique_ptr<InstructionSelector> InstSelector;
703  std::unique_ptr<LegalizerInfo> Legalizer;
704  std::unique_ptr<RegisterBankInfo> RegBankInfo;
705 
706 public:
707  SISubtarget(const Triple &TT, StringRef CPU, StringRef FS,
708  const TargetMachine &TM);
709 
710  const SIInstrInfo *getInstrInfo() const override {
711  return &InstrInfo;
712  }
713 
714  const SIFrameLowering *getFrameLowering() const override {
715  return &FrameLowering;
716  }
717 
718  const SITargetLowering *getTargetLowering() const override {
719  return &TLInfo;
720  }
721 
722  const CallLowering *getCallLowering() const override {
723  return CallLoweringInfo.get();
724  }
725 
726  const InstructionSelector *getInstructionSelector() const override {
727  return InstSelector.get();
728  }
729 
730  const LegalizerInfo *getLegalizerInfo() const override {
731  return Legalizer.get();
732  }
733 
734  const RegisterBankInfo *getRegBankInfo() const override {
735  return RegBankInfo.get();
736  }
737 
738  const SIRegisterInfo *getRegisterInfo() const override {
739  return &InstrInfo.getRegisterInfo();
740  }
741 
742  // XXX - Why is this here if it isn't in the default pass set?
743  bool enableEarlyIfConversion() const override {
744  return true;
745  }
746 
747  void overrideSchedPolicy(MachineSchedPolicy &Policy,
748  unsigned NumRegionInstrs) const override;
749 
750  bool isVGPRSpillingEnabled(const Function& F) const;
751 
752  unsigned getMaxNumUserSGPRs() const {
753  return 16;
754  }
755 
756  bool hasSMemRealTime() const {
757  return HasSMemRealTime;
758  }
759 
760  bool hasMovrel() const {
761  return HasMovrel;
762  }
763 
764  bool hasVGPRIndexMode() const {
765  return HasVGPRIndexMode;
766  }
767 
768  bool useVGPRIndexMode(bool UserEnable) const {
769  return !hasMovrel() || (UserEnable && hasVGPRIndexMode());
770  }
771 
772  bool hasScalarCompareEq64() const {
773  return getGeneration() >= VOLCANIC_ISLANDS;
774  }
775 
776  bool hasScalarStores() const {
777  return HasScalarStores;
778  }
779 
780  bool hasInv2PiInlineImm() const {
781  return HasInv2PiInlineImm;
782  }
783 
784  bool hasDPP() const {
785  return HasDPP;
786  }
787 
788  bool enableSIScheduler() const {
789  return EnableSIScheduler;
790  }
791 
792  bool debuggerSupported() const {
793  return debuggerInsertNops() && debuggerReserveRegs() &&
794  debuggerEmitPrologue();
795  }
796 
797  bool debuggerInsertNops() const {
798  return DebuggerInsertNops;
799  }
800 
801  bool debuggerReserveRegs() const {
802  return DebuggerReserveRegs;
803  }
804 
805  bool debuggerEmitPrologue() const {
806  return DebuggerEmitPrologue;
807  }
808 
809  bool loadStoreOptEnabled() const {
810  return EnableLoadStoreOpt;
811  }
812 
813  bool hasSGPRInitBug() const {
814  return SGPRInitBug;
815  }
816 
817  bool has12DWordStoreHazard() const {
819  }
820 
821  bool hasSMovFedHazard() const {
823  }
824 
827  }
828 
829  bool hasReadM0SendMsgHazard() const {
831  }
832 
833  unsigned getKernArgSegmentSize(const MachineFunction &MF,
834  unsigned ExplictArgBytes) const;
835 
836  /// Return the maximum number of waves per SIMD for kernels using \p SGPRs SGPRs
837  unsigned getOccupancyWithNumSGPRs(unsigned SGPRs) const;
838 
839  /// Return the maximum number of waves per SIMD for kernels using \p VGPRs VGPRs
840  unsigned getOccupancyWithNumVGPRs(unsigned VGPRs) const;
841 
842  /// \returns true if the flat_scratch register should be initialized with the
843  /// pointer to the wave's scratch memory rather than a size and offset.
844  bool flatScratchIsPointer() const {
845  return getGeneration() >= GFX9;
846  }
847 
848  /// \returns SGPR allocation granularity supported by the subtarget.
849  unsigned getSGPRAllocGranule() const {
850  return AMDGPU::IsaInfo::getSGPRAllocGranule(getFeatureBits());
851  }
852 
853  /// \returns SGPR encoding granularity supported by the subtarget.
854  unsigned getSGPREncodingGranule() const {
855  return AMDGPU::IsaInfo::getSGPREncodingGranule(getFeatureBits());
856  }
857 
858  /// \returns Total number of SGPRs supported by the subtarget.
859  unsigned getTotalNumSGPRs() const {
860  return AMDGPU::IsaInfo::getTotalNumSGPRs(getFeatureBits());
861  }
862 
863  /// \returns Addressable number of SGPRs supported by the subtarget.
864  unsigned getAddressableNumSGPRs() const {
865  return AMDGPU::IsaInfo::getAddressableNumSGPRs(getFeatureBits());
866  }
867 
868  /// \returns Minimum number of SGPRs that meets the given number of waves per
869  /// execution unit requirement supported by the subtarget.
870  unsigned getMinNumSGPRs(unsigned WavesPerEU) const {
871  return AMDGPU::IsaInfo::getMinNumSGPRs(getFeatureBits(), WavesPerEU);
872  }
873 
874  /// \returns Maximum number of SGPRs that meets the given number of waves per
875  /// execution unit requirement supported by the subtarget.
876  unsigned getMaxNumSGPRs(unsigned WavesPerEU, bool Addressable) const {
877  return AMDGPU::IsaInfo::getMaxNumSGPRs(getFeatureBits(), WavesPerEU,
878  Addressable);
879  }
880 
881  /// \returns Reserved number of SGPRs for given function \p MF.
882  unsigned getReservedNumSGPRs(const MachineFunction &MF) const;
883 
884  /// \returns Maximum number of SGPRs that meets number of waves per execution
885  /// unit requirement for function \p MF, or number of SGPRs explicitly
886  /// requested using "amdgpu-num-sgpr" attribute attached to function \p MF.
887  ///
888  /// \returns Value that meets number of waves per execution unit requirement
889  /// if explicitly requested value cannot be converted to integer, violates
890  /// subtarget's specifications, or does not meet number of waves per execution
891  /// unit requirement.
892  unsigned getMaxNumSGPRs(const MachineFunction &MF) const;
893 
894  /// \returns VGPR allocation granularity supported by the subtarget.
895  unsigned getVGPRAllocGranule() const {
896  return AMDGPU::IsaInfo::getVGPRAllocGranule(getFeatureBits());
897  }
898 
899  /// \returns VGPR encoding granularity supported by the subtarget.
900  unsigned getVGPREncodingGranule() const {
901  return AMDGPU::IsaInfo::getVGPREncodingGranule(getFeatureBits());
902  }
903 
904  /// \returns Total number of VGPRs supported by the subtarget.
905  unsigned getTotalNumVGPRs() const {
906  return AMDGPU::IsaInfo::getTotalNumVGPRs(getFeatureBits());
907  }
908 
909  /// \returns Addressable number of VGPRs supported by the subtarget.
910  unsigned getAddressableNumVGPRs() const {
911  return AMDGPU::IsaInfo::getAddressableNumVGPRs(getFeatureBits());
912  }
913 
914  /// \returns Minimum number of VGPRs that meets given number of waves per
915  /// execution unit requirement supported by the subtarget.
916  unsigned getMinNumVGPRs(unsigned WavesPerEU) const {
917  return AMDGPU::IsaInfo::getMinNumVGPRs(getFeatureBits(), WavesPerEU);
918  }
919 
920  /// \returns Maximum number of VGPRs that meets given number of waves per
921  /// execution unit requirement supported by the subtarget.
922  unsigned getMaxNumVGPRs(unsigned WavesPerEU) const {
923  return AMDGPU::IsaInfo::getMaxNumVGPRs(getFeatureBits(), WavesPerEU);
924  }
925 
926  /// \returns Reserved number of VGPRs for given function \p MF.
927  unsigned getReservedNumVGPRs(const MachineFunction &MF) const {
928  return debuggerReserveRegs() ? 4 : 0;
929  }
930 
931  /// \returns Maximum number of VGPRs that meets number of waves per execution
932  /// unit requirement for function \p MF, or number of VGPRs explicitly
933  /// requested using "amdgpu-num-vgpr" attribute attached to function \p MF.
934  ///
935  /// \returns Value that meets number of waves per execution unit requirement
936  /// if explicitly requested value cannot be converted to integer, violates
937  /// subtarget's specifications, or does not meet number of waves per execution
938  /// unit requirement.
939  unsigned getMaxNumVGPRs(const MachineFunction &MF) const;
940 
941  void getPostRAMutations(
942  std::vector<std::unique_ptr<ScheduleDAGMutation>> &Mutations)
943  const override;
944 };
945 
946 } // end namespace llvm
947 
948 #endif // LLVM_LIB_TARGET_AMDGPU_AMDGPUSUBTARGET_H
bool hasAutoWaitcntBeforeBarrier() const
bool makeLIDRangeMetadata(Instruction *I) const
Creates value range metadata on an workitemid.* inrinsic call or load.
unsigned getMinNumVGPRs(const FeatureBitset &Features, unsigned WavesPerEU)
bool hasFP64Denormals() const
const AMDGPURegisterInfo * getRegisterInfo() const override=0
unsigned getAddressableNumVGPRs(const FeatureBitset &Features)
Generation getGeneration() const
unsigned getMaxWavesPerEU(unsigned FlatWorkGroupSize) const
unsigned getWavesPerWorkGroup(unsigned FlatWorkGroupSize) const
Compute iterated dominance frontiers using a linear time algorithm.
Definition: AllocatorList.h:24
bool getScalarizeGlobalBehavior() const
Interface definition for R600InstrInfo.
bool hasScalarCompareEq64() const
unsigned getVGPREncodingGranule() const
unsigned getOccupancyWithLocalMemSize(uint32_t Bytes, const Function &) const
Inverse of getMaxLocalMemWithWaveCount.
bool hasFlatScratchInsts() const
OSType getOS() const
getOS - Get the parsed operating system type of this triple.
Definition: Triple.h:294
bool isPromoteAllocaEnabled() const
This file describes how to lower LLVM calls to machine code calls.
AMDGPUSubtarget & initializeSubtargetDependencies(const Triple &TT, StringRef GPU, StringRef FS)
unsigned getMaxWorkGroupsPerCU(const FeatureBitset &Features, unsigned FlatWorkGroupSize)
bool ldsRequiresM0Init() const
Return if most LDS instructions have an m0 use that require m0 to be iniitalized. ...
const SIInstrInfo * getInstrInfo() const override
std::pair< unsigned, unsigned > getDefaultFlatWorkGroupSize(CallingConv::ID CC) const
bool hasUnalignedScratchAccess() const
SelectionDAGTargetInfo TSInfo
F(f)
unsigned getTotalNumVGPRs(const FeatureBitset &Features)
unsigned getMinWavesPerEU(const FeatureBitset &Features)
unsigned getStackAlignment() const
bool debuggerReserveRegs() const
bool hasFlatGlobalInsts() const
unsigned getWavesPerWorkGroup(const FeatureBitset &Features, unsigned FlatWorkGroupSize)
bool hasMovrel() const
const SIRegisterInfo & getRegisterInfo() const
Definition: SIInstrInfo.h:149
bool hasVOP3PInsts() const
unsigned getVGPREncodingGranule(const FeatureBitset &Features)
unsigned getEUsPerCU(const FeatureBitset &Features)
Holds all the information related to register banks.
bool flatScratchIsPointer() const
bool hasFP16Denormals() const
bool hasInv2PiInlineImm() const
unsigned getVGPRAllocGranule(const FeatureBitset &Features)
OpenCL uses address spaces to differentiate between various memory regions on the hardware...
Definition: AMDGPU.h:214
unsigned getMinFlatWorkGroupSize() const
int getLocalMemorySize() const
unsigned getTotalNumVGPRs() const
bool debuggerInsertNops() const
const InstrItineraryData * getInstrItineraryData() const override
const InstructionSelector * getInstructionSelector() const override
unsigned getMinFlatWorkGroupSize(const FeatureBitset &Features)
unsigned getMaxWavesPerCU(const FeatureBitset &Features)
unsigned getMaxNumSGPRs(const FeatureBitset &Features, unsigned WavesPerEU, bool Addressable)
bool hasSDWAOutModsVOPC() const
const LegalizerInfo * getLegalizerInfo() const override
unsigned getMaxNumUserSGPRs() const
unsigned getMaxNumVGPRs(unsigned WavesPerEU) const
unsigned getMaxWavesPerEU(const FeatureBitset &Features)
unsigned getMinNumVGPRs(unsigned WavesPerEU) const
const AMDGPUFrameLowering * getFrameLowering() const override=0
Itinerary data supplied by a subtarget to be used by a target.
bool hasSMovFedHazard() const
bool hasD16LoadStore() const
unsigned getMaxFlatWorkGroupSize(const FeatureBitset &Features)
const RegisterBankInfo * getRegBankInfo() const override
unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const
unsigned getMaxNumSGPRs(unsigned WavesPerEU, bool Addressable) const
bool debuggerEmitPrologue() const
void ParseSubtargetFeatures(StringRef CPU, StringRef FS)
unsigned getImplicitArgNumBytes(const MachineFunction &MF) const
const R600FrameLowering * getFrameLowering() const override
bool hasScalarStores() const
std::pair< unsigned, unsigned > getWavesPerEU(const Function &F) const
bool hasFP32Denormals() const
bool isCompute(CallingConv::ID cc)
bool hasApertureRegs() const
bool hasCFAluBug() const
unsigned getSGPREncodingGranule() const
unsigned getMaxWavesPerCU(unsigned FlatWorkGroupSize) const
bool hasFminFmaxLegacy() const
unsigned getTotalNumSGPRs(const FeatureBitset &Features)
bool hasCodeObjectV3() const
bool hasUnpackedD16VMem() const
bool hasFPExceptions() const
bool enableDX10Clamp() const
bool has16BitInsts() const
unsigned getMinNumSGPRs(const FeatureBitset &Features, unsigned WavesPerEU)
SI DAG Lowering interface definition.
bool hasVGPRIndexMode() const
const R600InstrInfo * getInstrInfo() const override
unsigned getMinNumSGPRs(unsigned WavesPerEU) const
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
const R600RegisterInfo & getRegisterInfo() const
Definition: R600InstrInfo.h:69
bool debuggerSupported() const
bool hasUnalignedBufferAccess() const
Targets can subclass this to parameterize the SelectionDAG lowering and instruction selection process...
bool hasDPP() const
const AMDGPUInstrInfo * getInstrInfo() const override=0
Triple - Helper class for working with autoconf configuration names.
Definition: Triple.h:44
bool hasVertexCache() const
unsigned getMaxPrivateElementSize() const
unsigned getEUsPerCU() const
unsigned getAddressableNumSGPRs() const
unsigned getWavefrontSize() const
const R600RegisterInfo * getRegisterInfo() const override
const AMDGPUTargetLowering * getTargetLowering() const override=0
bool has12DWordStoreHazard() const
bool loadStoreOptEnabled() const
const SIRegisterInfo * getRegisterInfo() const override
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:194
bool hasMadMixInsts() const
bool isMesaKernel(const MachineFunction &MF) const
bool hasFlatAddressSpace() const
unsigned getReservedNumVGPRs(const MachineFunction &MF) const
EnvironmentType getEnvironment() const
getEnvironment - Get the parsed environment type of this triple.
Definition: Triple.h:303
bool hasFastFMAF32() const
void setScalarizeGlobalBehavior(bool b)
unsigned getAlignmentForImplicitArgPtr() const
const Function & getFunction() const
Return the LLVM function that this machine code represents.
StringRef getEnvironmentName() const
getEnvironmentName - Get the optional environment (fourth) component of the triple, or "" if empty.
Definition: Triple.cpp:957
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:531
unsigned getSGPRAllocGranule() const
bool hasAddNoCarry() const
bool isShader(CallingConv::ID cc)
TrapHandlerAbi getTrapHandlerAbi() const
bool enableSubRegLiveness() const override
unsigned getMaxWavesPerEU() const
AMDGPUAS getAMDGPUAS() const
Provides the logic to select generic machine instructions.
Define a generic scheduling policy for targets that don&#39;t provide their own MachineSchedStrategy.
unsigned getOccupancyWithLocalMemSize(const MachineFunction &MF) const
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
Interface definition for SIInstrInfo.
bool enableHugePrivateBuffer() const
short getTexVTXClauseSize() const
unsigned getMinWavesPerEU() const
R600 DAG Lowering interface definition.
unsigned getMaxFlatWorkGroupSize() const
bool enableEarlyIfConversion() const override
bool hasHalfRate64Ops() const
Information about the stack frame layout on the AMDGPU targets.
#define I(x, y, z)
Definition: MD5.cpp:58
const CallLowering * getCallLowering() const override
unsigned getVGPRAllocGranule() const
bool hasSMemRealTime() const
const SIFrameLowering * getFrameLowering() const override
bool hasReadM0SendMsgHazard() const
bool hasMin3Max3_16() const
bool enableSIScheduler() const
~AMDGPUSubtarget() override
unsigned getAddressableNumSGPRs(const FeatureBitset &Features)
AMDGPUSubtarget(const Triple &TT, StringRef GPU, StringRef FS, const TargetMachine &TM)
const SITargetLowering * getTargetLowering() const override
unsigned getSGPREncodingGranule(const FeatureBitset &Features)
bool isAmdCodeObjectV2(const MachineFunction &MF) const
unsigned getExplicitKernelArgOffset(const MachineFunction &MF) const
Returns the offset in bytes from the start of the input buffer of the first explicit kernel argument...
const SelectionDAGTargetInfo * getSelectionDAGInfo() const override
bool isTrapHandlerEnabled() const
bool privateMemoryResourceIsRangeChecked() const
bool isMesaGfxShader(const MachineFunction &MF) const
bool hasSGPRInitBug() const
unsigned getMaxWavesPerCU() const
constexpr char Size[]
Key for Kernel::Arg::Metadata::mSize.
InstrItineraryData InstrItins
bool hasBCNT(unsigned Size) const
const R600TargetLowering * getTargetLowering() const override
unsigned getSGPRAllocGranule(const FeatureBitset &Features)
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:59
unsigned getWavefrontSizeLog2() const
bool unsafeDSOffsetFoldingEnabled() const
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:49
unsigned getMaxLocalMemSizeWithWaveCount(unsigned WaveCount, const Function &) const
Return the amount of LDS that can be used that will not restrict the occupancy lower than WaveCount...
bool enableIEEEBit(const MachineFunction &MF) const
bool hasSDWAScalar() const
bool useFlatForGlobal() const
unsigned getMaxNumVGPRs(const FeatureBitset &Features, unsigned WavesPerEU)
bool enableMachineScheduler() const override
std::pair< unsigned, unsigned > getFlatWorkGroupSizes(const Function &F) const
bool useVGPRIndexMode(bool UserEnable) const
unsigned getTotalNumSGPRs() const
bool isXNACKEnabled() const
bool hasFlatInstOffsets() const
bool supportsMinMaxDenormModes() const
bool hasReadM0MovRelInterpHazard() const
unsigned getAddressableNumVGPRs() const