LLVM  7.0.0svn
AMDGPUSubtarget.h
Go to the documentation of this file.
1 //=====-- AMDGPUSubtarget.h - Define Subtarget for AMDGPU ------*- C++ -*-====//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //==-----------------------------------------------------------------------===//
9 //
10 /// \file
11 /// AMDGPU specific subclass of TargetSubtarget.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUSUBTARGET_H
16 #define LLVM_LIB_TARGET_AMDGPU_AMDGPUSUBTARGET_H
17 
18 #include "AMDGPU.h"
19 #include "AMDGPUCallLowering.h"
20 #include "R600FrameLowering.h"
21 #include "R600ISelLowering.h"
22 #include "R600InstrInfo.h"
23 #include "SIFrameLowering.h"
24 #include "SIISelLowering.h"
25 #include "SIInstrInfo.h"
26 #include "Utils/AMDGPUBaseInfo.h"
27 #include "llvm/ADT/Triple.h"
35 #include <cassert>
36 #include <cstdint>
37 #include <memory>
38 #include <utility>
39 
40 #define GET_SUBTARGETINFO_HEADER
41 #include "AMDGPUGenSubtargetInfo.inc"
42 
43 namespace llvm {
44 
45 class StringRef;
46 
48 public:
49  enum Generation {
50  R600 = 0,
58  };
59 
60  enum {
77  };
78 
82  };
83 
84  enum TrapID {
93  };
94 
97  };
98 
99 protected:
100  // Basic subtarget description.
103  unsigned IsaVersion;
104  unsigned WavefrontSize;
108 
109  // Possibly statically set by tablegen, but may want to be overridden.
112 
113  // Dynamially set bits that enable features.
117  bool DX10Clamp;
128 
129  // Used as options.
137  bool DumpCode;
138 
139  // Subtarget statically properties set by tablegen
140  bool FP64;
141  bool FMA;
142  bool MIMG_R128;
143  bool IsGCN;
145  bool CIInsts;
146  bool GFX9Insts;
154  bool HasMovrel;
159  bool HasSDWA;
165  bool HasDPP;
175  bool CaymanISA;
176  bool CFALUBug;
180 
181  // Dummy feature to use for assembler in tablegen.
183 
187 
188 public:
189  AMDGPUSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
190  const TargetMachine &TM);
191  ~AMDGPUSubtarget() override;
192 
194  StringRef GPU, StringRef FS);
195 
196  const AMDGPUInstrInfo *getInstrInfo() const override = 0;
197  const AMDGPUFrameLowering *getFrameLowering() const override = 0;
198  const AMDGPUTargetLowering *getTargetLowering() const override = 0;
199  const AMDGPURegisterInfo *getRegisterInfo() const override = 0;
200 
201  const InstrItineraryData *getInstrItineraryData() const override {
202  return &InstrItins;
203  }
204 
205  // Nothing implemented, just prevent crashes on use.
206  const SelectionDAGTargetInfo *getSelectionDAGInfo() const override {
207  return &TSInfo;
208  }
209 
211 
212  bool isAmdHsaOS() const {
213  return TargetTriple.getOS() == Triple::AMDHSA;
214  }
215 
216  bool isMesa3DOS() const {
217  return TargetTriple.getOS() == Triple::Mesa3D;
218  }
219 
220  bool isAmdPalOS() const {
221  return TargetTriple.getOS() == Triple::AMDPAL;
222  }
223 
225  return Gen;
226  }
227 
228  unsigned getWavefrontSize() const {
229  return WavefrontSize;
230  }
231 
232  unsigned getWavefrontSizeLog2() const {
233  return Log2_32(WavefrontSize);
234  }
235 
236  int getLocalMemorySize() const {
237  return LocalMemorySize;
238  }
239 
240  int getLDSBankCount() const {
241  return LDSBankCount;
242  }
243 
244  unsigned getMaxPrivateElementSize() const {
245  return MaxPrivateElementSize;
246  }
247 
249  return AS;
250  }
251 
252  bool has16BitInsts() const {
253  return Has16BitInsts;
254  }
255 
256  bool hasIntClamp() const {
257  return HasIntClamp;
258  }
259 
260  bool hasVOP3PInsts() const {
261  return HasVOP3PInsts;
262  }
263 
264  bool hasFP64() const {
265  return FP64;
266  }
267 
268  bool hasMIMG_R128() const {
269  return MIMG_R128;
270  }
271 
272  bool hasFastFMAF32() const {
273  return FastFMAF32;
274  }
275 
276  bool hasHalfRate64Ops() const {
277  return HalfRate64Ops;
278  }
279 
280  bool hasAddr64() const {
281  return (getGeneration() < VOLCANIC_ISLANDS);
282  }
283 
284  bool hasBFE() const {
285  return (getGeneration() >= EVERGREEN);
286  }
287 
288  bool hasBFI() const {
289  return (getGeneration() >= EVERGREEN);
290  }
291 
292  bool hasBFM() const {
293  return hasBFE();
294  }
295 
296  bool hasBCNT(unsigned Size) const {
297  if (Size == 32)
298  return (getGeneration() >= EVERGREEN);
299 
300  if (Size == 64)
301  return (getGeneration() >= SOUTHERN_ISLANDS);
302 
303  return false;
304  }
305 
306  bool hasMulU24() const {
307  return (getGeneration() >= EVERGREEN);
308  }
309 
310  bool hasMulI24() const {
311  return (getGeneration() >= SOUTHERN_ISLANDS ||
312  hasCaymanISA());
313  }
314 
315  bool hasFFBL() const {
316  return (getGeneration() >= EVERGREEN);
317  }
318 
319  bool hasFFBH() const {
320  return (getGeneration() >= EVERGREEN);
321  }
322 
323  bool hasMed3_16() const {
324  return getGeneration() >= GFX9;
325  }
326 
327  bool hasMin3Max3_16() const {
328  return getGeneration() >= GFX9;
329  }
330 
331  bool hasMadMixInsts() const {
332  return HasMadMixInsts;
333  }
334 
335  bool hasFmaMixInsts() const {
336  return HasFmaMixInsts;
337  }
338 
339  bool hasCARRY() const {
340  return (getGeneration() >= EVERGREEN);
341  }
342 
343  bool hasBORROW() const {
344  return (getGeneration() >= EVERGREEN);
345  }
346 
347  bool hasCaymanISA() const {
348  return CaymanISA;
349  }
350 
351  bool hasFMA() const {
352  return FMA;
353  }
354 
357  }
358 
359  bool enableHugePrivateBuffer() const {
361  }
362 
363  bool isPromoteAllocaEnabled() const {
364  return EnablePromoteAlloca;
365  }
366 
369  }
370 
371  bool dumpCode() const {
372  return DumpCode;
373  }
374 
375  /// Return the amount of LDS that can be used that will not restrict the
376  /// occupancy lower than WaveCount.
377  unsigned getMaxLocalMemSizeWithWaveCount(unsigned WaveCount,
378  const Function &) const;
379 
380  /// Inverse of getMaxLocalMemWithWaveCount. Return the maximum wavecount if
381  /// the given LDS memory size is the only constraint.
382  unsigned getOccupancyWithLocalMemSize(uint32_t Bytes, const Function &) const;
383 
384  unsigned getOccupancyWithLocalMemSize(const MachineFunction &MF) const;
385 
386  bool hasFP16Denormals() const {
387  return FP64FP16Denormals;
388  }
389 
390  bool hasFP32Denormals() const {
391  return FP32Denormals;
392  }
393 
394  bool hasFP64Denormals() const {
395  return FP64FP16Denormals;
396  }
397 
400  }
401 
402  bool hasFPExceptions() const {
403  return FPExceptions;
404  }
405 
406  bool enableDX10Clamp() const {
407  return DX10Clamp;
408  }
409 
410  bool enableIEEEBit(const MachineFunction &MF) const {
412  }
413 
414  bool useFlatForGlobal() const {
415  return FlatForGlobal;
416  }
417 
418  /// \returns If target supports ds_read/write_b128 and user enables generation
419  /// of ds_read/write_b128.
420  bool useDS128() const {
421  return CIInsts && EnableDS128;
422  }
423 
424  /// \returns If MUBUF instructions always perform range checking, even for
425  /// buffer resources used for private memory access.
428  }
429 
432  }
433 
434  bool hasCodeObjectV3() const {
435  return CodeObjectV3;
436  }
437 
439  return UnalignedBufferAccess;
440  }
441 
443  return UnalignedScratchAccess;
444  }
445 
446  bool hasApertureRegs() const {
447  return HasApertureRegs;
448  }
449 
450  bool isTrapHandlerEnabled() const {
451  return TrapHandler;
452  }
453 
454  bool isXNACKEnabled() const {
455  return EnableXNACK;
456  }
457 
458  bool hasFlatAddressSpace() const {
459  return FlatAddressSpace;
460  }
461 
462  bool hasFlatInstOffsets() const {
463  return FlatInstOffsets;
464  }
465 
466  bool hasFlatGlobalInsts() const {
467  return FlatGlobalInsts;
468  }
469 
470  bool hasFlatScratchInsts() const {
471  return FlatScratchInsts;
472  }
473 
475  return getGeneration() > GFX9;
476  }
477 
478  bool hasD16LoadStore() const {
479  return getGeneration() >= GFX9;
480  }
481 
482  /// Return if most LDS instructions have an m0 use that require m0 to be
483  /// iniitalized.
484  bool ldsRequiresM0Init() const {
485  return getGeneration() < GFX9;
486  }
487 
488  bool hasAddNoCarry() const {
489  return AddNoCarryInsts;
490  }
491 
492  bool hasUnpackedD16VMem() const {
493  return HasUnpackedD16VMem;
494  }
495 
496  bool isMesaKernel(const Function &F) const {
497  return isMesa3DOS() && !AMDGPU::isShader(F.getCallingConv());
498  }
499 
500  // Covers VS/PS/CS graphics shaders
501  bool isMesaGfxShader(const Function &F) const {
502  return isMesa3DOS() && AMDGPU::isShader(F.getCallingConv());
503  }
504 
505  bool isAmdCodeObjectV2(const Function &F) const {
506  return isAmdHsaOS() || isMesaKernel(F);
507  }
508 
509  bool hasMad64_32() const {
510  return getGeneration() >= SEA_ISLANDS;
511  }
512 
513  bool hasFminFmaxLegacy() const {
515  }
516 
517  bool hasSDWA() const {
518  return HasSDWA;
519  }
520 
521  bool hasSDWAOmod() const {
522  return HasSDWAOmod;
523  }
524 
525  bool hasSDWAScalar() const {
526  return HasSDWAScalar;
527  }
528 
529  bool hasSDWASdst() const {
530  return HasSDWASdst;
531  }
532 
533  bool hasSDWAMac() const {
534  return HasSDWAMac;
535  }
536 
537  bool hasSDWAOutModsVOPC() const {
538  return HasSDWAOutModsVOPC;
539  }
540 
542  return getGeneration() < SEA_ISLANDS;
543  }
544 
545  bool hasDLInsts() const {
546  return HasDLInsts;
547  }
548 
549  bool d16PreservesUnusedBits() const {
550  return D16PreservesUnusedBits;
551  }
552 
553  /// Returns the offset in bytes from the start of the input buffer
554  /// of the first explicit kernel argument.
555  unsigned getExplicitKernelArgOffset(const Function &F) const {
556  return isAmdCodeObjectV2(F) ? 0 : 36;
557  }
558 
559  unsigned getAlignmentForImplicitArgPtr() const {
560  return isAmdHsaOS() ? 8 : 4;
561  }
562 
563  /// \returns Number of bytes of arguments that are passed to a shader or
564  /// kernel in addition to the explicit ones declared for the function.
565  unsigned getImplicitArgNumBytes(const Function &F) const {
566  if (isMesaKernel(F))
567  return 16;
568  return AMDGPU::getIntegerAttribute(F, "amdgpu-implicitarg-num-bytes", 0);
569  }
570 
571  // Scratch is allocated in 256 dword per wave blocks for the entire
572  // wavefront. When viewed from the perspecive of an arbitrary workitem, this
573  // is 4-byte aligned.
574  //
575  // Only 4-byte alignment is really needed to access anything. Transformations
576  // on the pointer value itself may rely on the alignment / known low bits of
577  // the pointer. Set this to something above the minimum to avoid needing
578  // dynamic realignment in common cases.
579  unsigned getStackAlignment() const {
580  return 16;
581  }
582 
583  bool enableMachineScheduler() const override {
584  return true;
585  }
586 
587  bool enableSubRegLiveness() const override {
588  return true;
589  }
590 
591  void setScalarizeGlobalBehavior(bool b) { ScalarizeGlobal = b;}
593 
594  /// \returns Number of execution units per compute unit supported by the
595  /// subtarget.
596  unsigned getEUsPerCU() const {
597  return AMDGPU::IsaInfo::getEUsPerCU(getFeatureBits());
598  }
599 
600  /// \returns Maximum number of work groups per compute unit supported by the
601  /// subtarget and limited by given \p FlatWorkGroupSize.
602  unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const {
603  return AMDGPU::IsaInfo::getMaxWorkGroupsPerCU(getFeatureBits(),
604  FlatWorkGroupSize);
605  }
606 
607  /// \returns Maximum number of waves per compute unit supported by the
608  /// subtarget without any kind of limitation.
609  unsigned getMaxWavesPerCU() const {
610  return AMDGPU::IsaInfo::getMaxWavesPerCU(getFeatureBits());
611  }
612 
613  /// \returns Maximum number of waves per compute unit supported by the
614  /// subtarget and limited by given \p FlatWorkGroupSize.
615  unsigned getMaxWavesPerCU(unsigned FlatWorkGroupSize) const {
616  return AMDGPU::IsaInfo::getMaxWavesPerCU(getFeatureBits(),
617  FlatWorkGroupSize);
618  }
619 
620  /// \returns Minimum number of waves per execution unit supported by the
621  /// subtarget.
622  unsigned getMinWavesPerEU() const {
623  return AMDGPU::IsaInfo::getMinWavesPerEU(getFeatureBits());
624  }
625 
626  /// \returns Maximum number of waves per execution unit supported by the
627  /// subtarget without any kind of limitation.
628  unsigned getMaxWavesPerEU() const {
629  return AMDGPU::IsaInfo::getMaxWavesPerEU(getFeatureBits());
630  }
631 
632  /// \returns Maximum number of waves per execution unit supported by the
633  /// subtarget and limited by given \p FlatWorkGroupSize.
634  unsigned getMaxWavesPerEU(unsigned FlatWorkGroupSize) const {
635  return AMDGPU::IsaInfo::getMaxWavesPerEU(getFeatureBits(),
636  FlatWorkGroupSize);
637  }
638 
639  /// \returns Minimum flat work group size supported by the subtarget.
640  unsigned getMinFlatWorkGroupSize() const {
641  return AMDGPU::IsaInfo::getMinFlatWorkGroupSize(getFeatureBits());
642  }
643 
644  /// \returns Maximum flat work group size supported by the subtarget.
645  unsigned getMaxFlatWorkGroupSize() const {
646  return AMDGPU::IsaInfo::getMaxFlatWorkGroupSize(getFeatureBits());
647  }
648 
649  /// \returns Number of waves per work group supported by the subtarget and
650  /// limited by given \p FlatWorkGroupSize.
651  unsigned getWavesPerWorkGroup(unsigned FlatWorkGroupSize) const {
652  return AMDGPU::IsaInfo::getWavesPerWorkGroup(getFeatureBits(),
653  FlatWorkGroupSize);
654  }
655 
656  /// \returns Default range flat work group size for a calling convention.
657  std::pair<unsigned, unsigned> getDefaultFlatWorkGroupSize(CallingConv::ID CC) const;
658 
659  /// \returns Subtarget's default pair of minimum/maximum flat work group sizes
660  /// for function \p F, or minimum/maximum flat work group sizes explicitly
661  /// requested using "amdgpu-flat-work-group-size" attribute attached to
662  /// function \p F.
663  ///
664  /// \returns Subtarget's default values if explicitly requested values cannot
665  /// be converted to integer, or violate subtarget's specifications.
666  std::pair<unsigned, unsigned> getFlatWorkGroupSizes(const Function &F) const;
667 
668  /// \returns Subtarget's default pair of minimum/maximum number of waves per
669  /// execution unit for function \p F, or minimum/maximum number of waves per
670  /// execution unit explicitly requested using "amdgpu-waves-per-eu" attribute
671  /// attached to function \p F.
672  ///
673  /// \returns Subtarget's default values if explicitly requested values cannot
674  /// be converted to integer, violate subtarget's specifications, or are not
675  /// compatible with minimum/maximum number of waves limited by flat work group
676  /// size, register usage, and/or lds usage.
677  std::pair<unsigned, unsigned> getWavesPerEU(const Function &F) const;
678 
679  /// Creates value range metadata on an workitemid.* inrinsic call or load.
680  bool makeLIDRangeMetadata(Instruction *I) const;
681 };
682 
683 class R600Subtarget final : public AMDGPUSubtarget {
684 private:
685  R600InstrInfo InstrInfo;
686  R600FrameLowering FrameLowering;
687  R600TargetLowering TLInfo;
688 
689 public:
690  R600Subtarget(const Triple &TT, StringRef CPU, StringRef FS,
691  const TargetMachine &TM);
692 
693  const R600InstrInfo *getInstrInfo() const override {
694  return &InstrInfo;
695  }
696 
697  const R600FrameLowering *getFrameLowering() const override {
698  return &FrameLowering;
699  }
700 
701  const R600TargetLowering *getTargetLowering() const override {
702  return &TLInfo;
703  }
704 
705  const R600RegisterInfo *getRegisterInfo() const override {
706  return &InstrInfo.getRegisterInfo();
707  }
708 
709  bool hasCFAluBug() const {
710  return CFALUBug;
711  }
712 
713  bool hasVertexCache() const {
714  return HasVertexCache;
715  }
716 
717  short getTexVTXClauseSize() const {
718  return TexVTXClauseSize;
719  }
720 };
721 
722 class SISubtarget final : public AMDGPUSubtarget {
723 private:
724  SIInstrInfo InstrInfo;
725  SIFrameLowering FrameLowering;
726  SITargetLowering TLInfo;
727 
728  /// GlobalISel related APIs.
729  std::unique_ptr<AMDGPUCallLowering> CallLoweringInfo;
730  std::unique_ptr<InstructionSelector> InstSelector;
731  std::unique_ptr<LegalizerInfo> Legalizer;
732  std::unique_ptr<RegisterBankInfo> RegBankInfo;
733 
734 public:
735  SISubtarget(const Triple &TT, StringRef CPU, StringRef FS,
736  const GCNTargetMachine &TM);
737 
738  const SIInstrInfo *getInstrInfo() const override {
739  return &InstrInfo;
740  }
741 
742  const SIFrameLowering *getFrameLowering() const override {
743  return &FrameLowering;
744  }
745 
746  const SITargetLowering *getTargetLowering() const override {
747  return &TLInfo;
748  }
749 
750  const CallLowering *getCallLowering() const override {
751  return CallLoweringInfo.get();
752  }
753 
754  const InstructionSelector *getInstructionSelector() const override {
755  return InstSelector.get();
756  }
757 
758  const LegalizerInfo *getLegalizerInfo() const override {
759  return Legalizer.get();
760  }
761 
762  const RegisterBankInfo *getRegBankInfo() const override {
763  return RegBankInfo.get();
764  }
765 
766  const SIRegisterInfo *getRegisterInfo() const override {
767  return &InstrInfo.getRegisterInfo();
768  }
769 
770  // XXX - Why is this here if it isn't in the default pass set?
771  bool enableEarlyIfConversion() const override {
772  return true;
773  }
774 
775  void overrideSchedPolicy(MachineSchedPolicy &Policy,
776  unsigned NumRegionInstrs) const override;
777 
778  bool isVGPRSpillingEnabled(const Function& F) const;
779 
780  unsigned getMaxNumUserSGPRs() const {
781  return 16;
782  }
783 
784  bool hasSMemRealTime() const {
785  return HasSMemRealTime;
786  }
787 
788  bool hasMovrel() const {
789  return HasMovrel;
790  }
791 
792  bool hasVGPRIndexMode() const {
793  return HasVGPRIndexMode;
794  }
795 
796  bool useVGPRIndexMode(bool UserEnable) const {
797  return !hasMovrel() || (UserEnable && hasVGPRIndexMode());
798  }
799 
800  bool hasScalarCompareEq64() const {
801  return getGeneration() >= VOLCANIC_ISLANDS;
802  }
803 
804  bool hasScalarStores() const {
805  return HasScalarStores;
806  }
807 
808  bool hasScalarAtomics() const {
809  return HasScalarAtomics;
810  }
811 
812  bool hasInv2PiInlineImm() const {
813  return HasInv2PiInlineImm;
814  }
815 
816  bool hasDPP() const {
817  return HasDPP;
818  }
819 
820  bool enableSIScheduler() const {
821  return EnableSIScheduler;
822  }
823 
824  bool debuggerSupported() const {
825  return debuggerInsertNops() && debuggerEmitPrologue();
826  }
827 
828  bool debuggerInsertNops() const {
829  return DebuggerInsertNops;
830  }
831 
832  bool debuggerEmitPrologue() const {
833  return DebuggerEmitPrologue;
834  }
835 
836  bool loadStoreOptEnabled() const {
837  return EnableLoadStoreOpt;
838  }
839 
840  bool hasSGPRInitBug() const {
841  return SGPRInitBug;
842  }
843 
844  bool has12DWordStoreHazard() const {
846  }
847 
848  bool hasSMovFedHazard() const {
850  }
851 
854  }
855 
856  bool hasReadM0SendMsgHazard() const {
858  }
859 
860  unsigned getKernArgSegmentSize(const Function &F,
861  unsigned ExplictArgBytes) const;
862 
863  /// Return the maximum number of waves per SIMD for kernels using \p SGPRs SGPRs
864  unsigned getOccupancyWithNumSGPRs(unsigned SGPRs) const;
865 
866  /// Return the maximum number of waves per SIMD for kernels using \p VGPRs VGPRs
867  unsigned getOccupancyWithNumVGPRs(unsigned VGPRs) const;
868 
869  /// \returns true if the flat_scratch register should be initialized with the
870  /// pointer to the wave's scratch memory rather than a size and offset.
871  bool flatScratchIsPointer() const {
872  return getGeneration() >= GFX9;
873  }
874 
875  /// \returns true if the machine has merged shaders in which s0-s7 are
876  /// reserved by the hardware and user SGPRs start at s8
877  bool hasMergedShaders() const {
878  return getGeneration() >= GFX9;
879  }
880 
881  /// \returns SGPR allocation granularity supported by the subtarget.
882  unsigned getSGPRAllocGranule() const {
883  return AMDGPU::IsaInfo::getSGPRAllocGranule(getFeatureBits());
884  }
885 
886  /// \returns SGPR encoding granularity supported by the subtarget.
887  unsigned getSGPREncodingGranule() const {
888  return AMDGPU::IsaInfo::getSGPREncodingGranule(getFeatureBits());
889  }
890 
891  /// \returns Total number of SGPRs supported by the subtarget.
892  unsigned getTotalNumSGPRs() const {
893  return AMDGPU::IsaInfo::getTotalNumSGPRs(getFeatureBits());
894  }
895 
896  /// \returns Addressable number of SGPRs supported by the subtarget.
897  unsigned getAddressableNumSGPRs() const {
898  return AMDGPU::IsaInfo::getAddressableNumSGPRs(getFeatureBits());
899  }
900 
901  /// \returns Minimum number of SGPRs that meets the given number of waves per
902  /// execution unit requirement supported by the subtarget.
903  unsigned getMinNumSGPRs(unsigned WavesPerEU) const {
904  return AMDGPU::IsaInfo::getMinNumSGPRs(getFeatureBits(), WavesPerEU);
905  }
906 
907  /// \returns Maximum number of SGPRs that meets the given number of waves per
908  /// execution unit requirement supported by the subtarget.
909  unsigned getMaxNumSGPRs(unsigned WavesPerEU, bool Addressable) const {
910  return AMDGPU::IsaInfo::getMaxNumSGPRs(getFeatureBits(), WavesPerEU,
911  Addressable);
912  }
913 
914  /// \returns Reserved number of SGPRs for given function \p MF.
915  unsigned getReservedNumSGPRs(const MachineFunction &MF) const;
916 
917  /// \returns Maximum number of SGPRs that meets number of waves per execution
918  /// unit requirement for function \p MF, or number of SGPRs explicitly
919  /// requested using "amdgpu-num-sgpr" attribute attached to function \p MF.
920  ///
921  /// \returns Value that meets number of waves per execution unit requirement
922  /// if explicitly requested value cannot be converted to integer, violates
923  /// subtarget's specifications, or does not meet number of waves per execution
924  /// unit requirement.
925  unsigned getMaxNumSGPRs(const MachineFunction &MF) const;
926 
927  /// \returns VGPR allocation granularity supported by the subtarget.
928  unsigned getVGPRAllocGranule() const {
929  return AMDGPU::IsaInfo::getVGPRAllocGranule(getFeatureBits());
930  }
931 
932  /// \returns VGPR encoding granularity supported by the subtarget.
933  unsigned getVGPREncodingGranule() const {
934  return AMDGPU::IsaInfo::getVGPREncodingGranule(getFeatureBits());
935  }
936 
937  /// \returns Total number of VGPRs supported by the subtarget.
938  unsigned getTotalNumVGPRs() const {
939  return AMDGPU::IsaInfo::getTotalNumVGPRs(getFeatureBits());
940  }
941 
942  /// \returns Addressable number of VGPRs supported by the subtarget.
943  unsigned getAddressableNumVGPRs() const {
944  return AMDGPU::IsaInfo::getAddressableNumVGPRs(getFeatureBits());
945  }
946 
947  /// \returns Minimum number of VGPRs that meets given number of waves per
948  /// execution unit requirement supported by the subtarget.
949  unsigned getMinNumVGPRs(unsigned WavesPerEU) const {
950  return AMDGPU::IsaInfo::getMinNumVGPRs(getFeatureBits(), WavesPerEU);
951  }
952 
953  /// \returns Maximum number of VGPRs that meets given number of waves per
954  /// execution unit requirement supported by the subtarget.
955  unsigned getMaxNumVGPRs(unsigned WavesPerEU) const {
956  return AMDGPU::IsaInfo::getMaxNumVGPRs(getFeatureBits(), WavesPerEU);
957  }
958 
959  /// \returns Maximum number of VGPRs that meets number of waves per execution
960  /// unit requirement for function \p MF, or number of VGPRs explicitly
961  /// requested using "amdgpu-num-vgpr" attribute attached to function \p MF.
962  ///
963  /// \returns Value that meets number of waves per execution unit requirement
964  /// if explicitly requested value cannot be converted to integer, violates
965  /// subtarget's specifications, or does not meet number of waves per execution
966  /// unit requirement.
967  unsigned getMaxNumVGPRs(const MachineFunction &MF) const;
968 
969  void getPostRAMutations(
970  std::vector<std::unique_ptr<ScheduleDAGMutation>> &Mutations)
971  const override;
972 };
973 
974 } // end namespace llvm
975 
976 #endif // LLVM_LIB_TARGET_AMDGPU_AMDGPUSUBTARGET_H
bool hasAutoWaitcntBeforeBarrier() const
bool makeLIDRangeMetadata(Instruction *I) const
Creates value range metadata on an workitemid.* inrinsic call or load.
unsigned getMinNumVGPRs(const FeatureBitset &Features, unsigned WavesPerEU)
bool hasFP64Denormals() const
const AMDGPURegisterInfo * getRegisterInfo() const override=0
unsigned getAddressableNumVGPRs(const FeatureBitset &Features)
Generation getGeneration() const
unsigned getMaxWavesPerEU(unsigned FlatWorkGroupSize) const
unsigned getWavesPerWorkGroup(unsigned FlatWorkGroupSize) const
bool isMesaKernel(const Function &F) const
Compute iterated dominance frontiers using a linear time algorithm.
Definition: AllocatorList.h:24
bool getScalarizeGlobalBehavior() const
Interface definition for R600InstrInfo.
bool hasScalarCompareEq64() const
unsigned getImplicitArgNumBytes(const Function &F) const
unsigned getVGPREncodingGranule() const
unsigned getOccupancyWithLocalMemSize(uint32_t Bytes, const Function &) const
Inverse of getMaxLocalMemWithWaveCount.
bool hasFlatScratchInsts() const
OSType getOS() const
getOS - Get the parsed operating system type of this triple.
Definition: Triple.h:292
bool isPromoteAllocaEnabled() const
This file describes how to lower LLVM calls to machine code calls.
AMDGPUSubtarget & initializeSubtargetDependencies(const Triple &TT, StringRef GPU, StringRef FS)
unsigned getMaxWorkGroupsPerCU(const FeatureBitset &Features, unsigned FlatWorkGroupSize)
bool ldsRequiresM0Init() const
Return if most LDS instructions have an m0 use that require m0 to be iniitalized. ...
const SIInstrInfo * getInstrInfo() const override
bool vmemWriteNeedsExpWaitcnt() const
std::pair< unsigned, unsigned > getDefaultFlatWorkGroupSize(CallingConv::ID CC) const
bool hasUnalignedScratchAccess() const
SelectionDAGTargetInfo TSInfo
bool isAmdCodeObjectV2(const Function &F) const
F(f)
unsigned getTotalNumVGPRs(const FeatureBitset &Features)
unsigned getMinWavesPerEU(const FeatureBitset &Features)
unsigned getStackAlignment() const
bool hasFlatGlobalInsts() const
unsigned getWavesPerWorkGroup(const FeatureBitset &Features, unsigned FlatWorkGroupSize)
bool hasMovrel() const
const SIRegisterInfo & getRegisterInfo() const
Definition: SIInstrInfo.h:149
bool hasVOP3PInsts() const
unsigned getVGPREncodingGranule(const FeatureBitset &Features)
unsigned getEUsPerCU(const FeatureBitset &Features)
Holds all the information related to register banks.
bool flatScratchIsPointer() const
bool hasFP16Denormals() const
bool hasInv2PiInlineImm() const
unsigned getVGPRAllocGranule(const FeatureBitset &Features)
OpenCL uses address spaces to differentiate between various memory regions on the hardware...
Definition: AMDGPU.h:220
unsigned getMinFlatWorkGroupSize() const
int getLocalMemorySize() const
unsigned getTotalNumVGPRs() const
bool debuggerInsertNops() const
const InstrItineraryData * getInstrItineraryData() const override
const InstructionSelector * getInstructionSelector() const override
unsigned getMinFlatWorkGroupSize(const FeatureBitset &Features)
unsigned getMaxWavesPerCU(const FeatureBitset &Features)
unsigned getMaxNumSGPRs(const FeatureBitset &Features, unsigned WavesPerEU, bool Addressable)
bool hasFmaMixInsts() const
bool hasSDWAOutModsVOPC() const
const LegalizerInfo * getLegalizerInfo() const override
unsigned getMaxNumUserSGPRs() const
unsigned getMaxNumVGPRs(unsigned WavesPerEU) const
unsigned getMaxWavesPerEU(const FeatureBitset &Features)
unsigned getMinNumVGPRs(unsigned WavesPerEU) const
const AMDGPUFrameLowering * getFrameLowering() const override=0
Itinerary data supplied by a subtarget to be used by a target.
bool hasSMovFedHazard() const
bool hasD16LoadStore() const
unsigned getMaxFlatWorkGroupSize(const FeatureBitset &Features)
const RegisterBankInfo * getRegBankInfo() const override
unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const
unsigned getMaxNumSGPRs(unsigned WavesPerEU, bool Addressable) const
bool debuggerEmitPrologue() const
void ParseSubtargetFeatures(StringRef CPU, StringRef FS)
const R600FrameLowering * getFrameLowering() const override
bool hasScalarStores() const
std::pair< unsigned, unsigned > getWavesPerEU(const Function &F) const
bool hasFP32Denormals() const
bool isCompute(CallingConv::ID cc)
bool hasApertureRegs() const
bool hasCFAluBug() const
unsigned getSGPREncodingGranule() const
unsigned getMaxWavesPerCU(unsigned FlatWorkGroupSize) const
bool hasFminFmaxLegacy() const
bool isMesaGfxShader(const Function &F) const
unsigned getTotalNumSGPRs(const FeatureBitset &Features)
bool hasCodeObjectV3() const
bool hasUnpackedD16VMem() const
bool hasFPExceptions() const
bool enableDX10Clamp() const
bool has16BitInsts() const
unsigned getMinNumSGPRs(const FeatureBitset &Features, unsigned WavesPerEU)
SI DAG Lowering interface definition.
bool hasVGPRIndexMode() const
const R600InstrInfo * getInstrInfo() const override
unsigned getMinNumSGPRs(unsigned WavesPerEU) const
const R600RegisterInfo & getRegisterInfo() const
Definition: R600InstrInfo.h:69
bool debuggerSupported() const
bool hasUnalignedBufferAccess() const
Targets can subclass this to parameterize the SelectionDAG lowering and instruction selection process...
bool hasDPP() const
const AMDGPUInstrInfo * getInstrInfo() const override=0
Triple - Helper class for working with autoconf configuration names.
Definition: Triple.h:44
bool hasVertexCache() const
unsigned getMaxPrivateElementSize() const
unsigned getEUsPerCU() const
unsigned getAddressableNumSGPRs() const
unsigned getWavefrontSize() const
const R600RegisterInfo * getRegisterInfo() const override
const AMDGPUTargetLowering * getTargetLowering() const override=0
bool has12DWordStoreHazard() const
bool loadStoreOptEnabled() const
const SIRegisterInfo * getRegisterInfo() const override
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:199
bool hasMadMixInsts() const
unsigned getExplicitKernelArgOffset(const Function &F) const
Returns the offset in bytes from the start of the input buffer of the first explicit kernel argument...
bool hasFlatAddressSpace() const
bool d16PreservesUnusedBits() const
bool hasFastFMAF32() const
void setScalarizeGlobalBehavior(bool b)
unsigned getAlignmentForImplicitArgPtr() const
const Function & getFunction() const
Return the LLVM function that this machine code represents.
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:531
unsigned getSGPRAllocGranule() const
bool hasAddNoCarry() const
bool isShader(CallingConv::ID cc)
TrapHandlerAbi getTrapHandlerAbi() const
bool enableSubRegLiveness() const override
unsigned getMaxWavesPerEU() const
AMDGPUAS getAMDGPUAS() const
Provides the logic to select generic machine instructions.
Define a generic scheduling policy for targets that don&#39;t provide their own MachineSchedStrategy.
Interface definition for SIInstrInfo.
bool enableHugePrivateBuffer() const
short getTexVTXClauseSize() const
unsigned getMinWavesPerEU() const
R600 DAG Lowering interface definition.
unsigned getMaxFlatWorkGroupSize() const
bool enableEarlyIfConversion() const override
bool hasHalfRate64Ops() const
Information about the stack frame layout on the AMDGPU targets.
int getIntegerAttribute(const Function &F, StringRef Name, int Default)
#define I(x, y, z)
Definition: MD5.cpp:58
const CallLowering * getCallLowering() const override
unsigned getVGPRAllocGranule() const
bool hasSMemRealTime() const
const SIFrameLowering * getFrameLowering() const override
bool hasReadM0SendMsgHazard() const
bool hasMin3Max3_16() const
bool enableSIScheduler() const
~AMDGPUSubtarget() override
unsigned getAddressableNumSGPRs(const FeatureBitset &Features)
AMDGPUSubtarget(const Triple &TT, StringRef GPU, StringRef FS, const TargetMachine &TM)
const SITargetLowering * getTargetLowering() const override
unsigned getSGPREncodingGranule(const FeatureBitset &Features)
bool hasMergedShaders() const
const SelectionDAGTargetInfo * getSelectionDAGInfo() const override
bool isTrapHandlerEnabled() const
bool privateMemoryResourceIsRangeChecked() const
bool hasSGPRInitBug() const
unsigned getMaxWavesPerCU() const
constexpr char Size[]
Key for Kernel::Arg::Metadata::mSize.
InstrItineraryData InstrItins
bool hasBCNT(unsigned Size) const
const R600TargetLowering * getTargetLowering() const override
unsigned getSGPRAllocGranule(const FeatureBitset &Features)
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:59
unsigned getWavefrontSizeLog2() const
bool unsafeDSOffsetFoldingEnabled() const
bool hasFlatLgkmVMemCountInOrder() const
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:49
unsigned getMaxLocalMemSizeWithWaveCount(unsigned WaveCount, const Function &) const
Return the amount of LDS that can be used that will not restrict the occupancy lower than WaveCount...
bool enableIEEEBit(const MachineFunction &MF) const
bool hasSDWAScalar() const
bool useFlatForGlobal() const
unsigned getMaxNumVGPRs(const FeatureBitset &Features, unsigned WavesPerEU)
bool enableMachineScheduler() const override
std::pair< unsigned, unsigned > getFlatWorkGroupSizes(const Function &F) const
bool hasScalarAtomics() const
bool useVGPRIndexMode(bool UserEnable) const
unsigned getTotalNumSGPRs() const
bool isXNACKEnabled() const
bool hasFlatInstOffsets() const
bool supportsMinMaxDenormModes() const
bool hasReadM0MovRelInterpHazard() const
unsigned getAddressableNumVGPRs() const