LLVM  13.0.0git
GCNSubtarget.h
Go to the documentation of this file.
1 //=====-- GCNSubtarget.h - Define GCN Subtarget for AMDGPU ------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //==-----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// AMD GCN specific subclass of TargetSubtarget.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #ifndef LLVM_LIB_TARGET_AMDGPU_GCNSUBTARGET_H
15 #define LLVM_LIB_TARGET_AMDGPU_GCNSUBTARGET_H
16 
17 #include "AMDGPUCallLowering.h"
18 #include "AMDGPUSubtarget.h"
19 #include "SIFrameLowering.h"
20 #include "SIISelLowering.h"
21 #include "SIInstrInfo.h"
23 
24 namespace llvm {
25 
26 class MCInst;
27 class MCInstrInfo;
28 
29 } // namespace llvm
30 
31 #define GET_SUBTARGETINFO_HEADER
32 #include "AMDGPUGenSubtargetInfo.inc"
33 
34 namespace llvm {
35 
36 class GCNTargetMachine;
37 
38 class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
39  public AMDGPUSubtarget {
40 
42 
43 public:
44  // Following 2 enums are documented at:
45  // - https://llvm.org/docs/AMDGPUUsage.html#trap-handler-abi
46  enum class TrapHandlerAbi {
47  NONE = 0x00,
48  AMDHSA = 0x01,
49  };
50 
51  enum class TrapID {
52  LLVMAMDHSATrap = 0x02,
53  LLVMAMDHSADebugTrap = 0x03,
54  };
55 
56 private:
57  /// GlobalISel related APIs.
58  std::unique_ptr<AMDGPUCallLowering> CallLoweringInfo;
59  std::unique_ptr<InlineAsmLowering> InlineAsmLoweringInfo;
60  std::unique_ptr<InstructionSelector> InstSelector;
61  std::unique_ptr<LegalizerInfo> Legalizer;
62  std::unique_ptr<RegisterBankInfo> RegBankInfo;
63 
64 protected:
65  // Basic subtarget description.
68  unsigned Gen;
72 
73  // Possibly statically set by tablegen, but may want to be overridden.
74  bool FastFMAF32;
78 
79  // Dynamically set bits that enable features.
86 
87  // This should not be used directly. 'TargetID' tracks the dynamic settings
88  // for XNACK.
90 
94 
95  // Used as options.
101  bool DumpCode;
102 
103  // Subtarget statically properties set by tablegen
104  bool FP64;
105  bool FMA;
106  bool MIMG_R128;
107  bool IsGCN;
108  bool CIInsts;
109  bool GFX8Insts;
110  bool GFX9Insts;
119  bool HasMovrel;
128  bool HasDPP;
129  bool HasDPP8;
135  bool HasG16;
150 
151  // This should not be used directly. 'TargetID' tracks the dynamic settings
152  // for SRAMECC.
154 
156  bool HasVscnt;
171  bool CaymanISA;
172  bool CFALUBug;
181 
193 
194  // Dummy feature to use for assembler in tablegen.
196 
198 private:
199  SIInstrInfo InstrInfo;
200  SITargetLowering TLInfo;
201  SIFrameLowering FrameLowering;
202 
203 public:
204  // See COMPUTE_TMPRING_SIZE.WAVESIZE, 13-bit field in units of 256-dword.
205  static const unsigned MaxWaveScratchSize = (256 * 4) * ((1 << 13) - 1);
206 
207  GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
208  const GCNTargetMachine &TM);
209  ~GCNSubtarget() override;
210 
212  StringRef GPU, StringRef FS);
213 
214  const SIInstrInfo *getInstrInfo() const override {
215  return &InstrInfo;
216  }
217 
218  const SIFrameLowering *getFrameLowering() const override {
219  return &FrameLowering;
220  }
221 
222  const SITargetLowering *getTargetLowering() const override {
223  return &TLInfo;
224  }
225 
226  const SIRegisterInfo *getRegisterInfo() const override {
227  return &InstrInfo.getRegisterInfo();
228  }
229 
230  const CallLowering *getCallLowering() const override {
231  return CallLoweringInfo.get();
232  }
233 
234  const InlineAsmLowering *getInlineAsmLowering() const override {
235  return InlineAsmLoweringInfo.get();
236  }
237 
239  return InstSelector.get();
240  }
241 
242  const LegalizerInfo *getLegalizerInfo() const override {
243  return Legalizer.get();
244  }
245 
246  const RegisterBankInfo *getRegBankInfo() const override {
247  return RegBankInfo.get();
248  }
249 
251  return TargetID;
252  }
253 
254  // Nothing implemented, just prevent crashes on use.
255  const SelectionDAGTargetInfo *getSelectionDAGInfo() const override {
256  return &TSInfo;
257  }
258 
259  const InstrItineraryData *getInstrItineraryData() const override {
260  return &InstrItins;
261  }
262 
264 
266  return (Generation)Gen;
267  }
268 
269  /// Return the number of high bits known to be zero fror a frame index.
272  }
273 
274  int getLDSBankCount() const {
275  return LDSBankCount;
276  }
277 
278  unsigned getMaxPrivateElementSize(bool ForBufferRSrc = false) const {
279  return (ForBufferRSrc || !enableFlatScratch()) ? MaxPrivateElementSize : 16;
280  }
281 
282  unsigned getConstantBusLimit(unsigned Opcode) const;
283 
284  bool hasIntClamp() const {
285  return HasIntClamp;
286  }
287 
288  bool hasFP64() const {
289  return FP64;
290  }
291 
292  bool hasMIMG_R128() const {
293  return MIMG_R128;
294  }
295 
296  bool hasHWFP64() const {
297  return FP64;
298  }
299 
300  bool hasFastFMAF32() const {
301  return FastFMAF32;
302  }
303 
304  bool hasHalfRate64Ops() const {
305  return HalfRate64Ops;
306  }
307 
308  bool hasFullRate64Ops() const {
309  return FullRate64Ops;
310  }
311 
312  bool hasAddr64() const {
314  }
315 
316  bool hasFlat() const {
318  }
319 
320  // Return true if the target only has the reverse operand versions of VALU
321  // shift instructions (e.g. v_lshrrev_b32, and no v_lshr_b32).
322  bool hasOnlyRevVALUShifts() const {
323  return getGeneration() >= VOLCANIC_ISLANDS;
324  }
325 
326  bool hasFractBug() const {
327  return getGeneration() == SOUTHERN_ISLANDS;
328  }
329 
330  bool hasBFE() const {
331  return true;
332  }
333 
334  bool hasBFI() const {
335  return true;
336  }
337 
338  bool hasBFM() const {
339  return hasBFE();
340  }
341 
342  bool hasBCNT(unsigned Size) const {
343  return true;
344  }
345 
346  bool hasFFBL() const {
347  return true;
348  }
349 
350  bool hasFFBH() const {
351  return true;
352  }
353 
354  bool hasMed3_16() const {
356  }
357 
358  bool hasMin3Max3_16() const {
360  }
361 
362  bool hasFmaMixInsts() const {
363  return HasFmaMixInsts;
364  }
365 
366  bool hasCARRY() const {
367  return true;
368  }
369 
370  bool hasFMA() const {
371  return FMA;
372  }
373 
374  bool hasSwap() const {
375  return GFX9Insts;
376  }
377 
378  bool hasScalarPackInsts() const {
379  return GFX9Insts;
380  }
381 
382  bool hasScalarMulHiInsts() const {
383  return GFX9Insts;
384  }
385 
388  }
389 
390  bool supportsGetDoorbellID() const {
391  // The S_GETREG DOORBELL_ID is supported by all GFX9 onward targets.
392  return getGeneration() >= GFX9;
393  }
394 
395  /// True if the offset field of DS instructions works as expected. On SI, the
396  /// offset uses a 16-bit adder and does not always wrap properly.
397  bool hasUsableDSOffset() const {
398  return getGeneration() >= SEA_ISLANDS;
399  }
400 
403  }
404 
405  /// Condition output from div_scale is usable.
407  return getGeneration() != SOUTHERN_ISLANDS;
408  }
409 
410  /// Extra wait hazard is needed in some cases before
411  /// s_cbranch_vccnz/s_cbranch_vccz.
412  bool hasReadVCCZBug() const {
413  return getGeneration() <= SEA_ISLANDS;
414  }
415 
416  /// Writes to VCC_LO/VCC_HI update the VCCZ flag.
418  return getGeneration() >= GFX10;
419  }
420 
421  /// A read of an SGPR by SMRD instruction requires 4 wait states when the SGPR
422  /// was written by a VALU instruction.
424  return getGeneration() == SOUTHERN_ISLANDS;
425  }
426 
427  /// A read of an SGPR by a VMEM instruction requires 5 wait states when the
428  /// SGPR was written by a VALU Instruction.
430  return getGeneration() >= VOLCANIC_ISLANDS;
431  }
432 
433  bool hasRFEHazards() const {
434  return getGeneration() >= VOLCANIC_ISLANDS;
435  }
436 
437  /// Number of hazard wait states for s_setreg_b32/s_setreg_imm32_b32.
438  unsigned getSetRegWaitStates() const {
439  return getGeneration() <= SEA_ISLANDS ? 1 : 2;
440  }
441 
442  bool dumpCode() const {
443  return DumpCode;
444  }
445 
446  /// Return the amount of LDS that can be used that will not restrict the
447  /// occupancy lower than WaveCount.
448  unsigned getMaxLocalMemSizeWithWaveCount(unsigned WaveCount,
449  const Function &) const;
450 
453  }
454 
455  /// \returns If target supports S_DENORM_MODE.
456  bool hasDenormModeInst() const {
458  }
459 
460  bool useFlatForGlobal() const {
461  return FlatForGlobal;
462  }
463 
464  /// \returns If target supports ds_read/write_b128 and user enables generation
465  /// of ds_read/write_b128.
466  bool useDS128() const {
467  return CIInsts && EnableDS128;
468  }
469 
470  /// \return If target supports ds_read/write_b96/128.
471  bool hasDS96AndDS128() const {
472  return CIInsts;
473  }
474 
475  /// Have v_trunc_f64, v_ceil_f64, v_rndne_f64
476  bool haveRoundOpsF64() const {
477  return CIInsts;
478  }
479 
480  /// \returns If MUBUF instructions always perform range checking, even for
481  /// buffer resources used for private memory access.
484  }
485 
486  /// \returns If target requires PRT Struct NULL support (zero result registers
487  /// for sparse texture support).
488  bool usePRTStrictNull() const {
489  return EnablePRTStrictNull;
490  }
491 
494  }
495 
497  return UnalignedBufferAccess;
498  }
499 
502  }
503 
504  bool hasUnalignedDSAccess() const {
505  return UnalignedDSAccess;
506  }
507 
510  }
511 
513  return UnalignedScratchAccess;
514  }
515 
516  bool hasUnalignedAccessMode() const {
517  return UnalignedAccessMode;
518  }
519 
520  bool hasApertureRegs() const {
521  return HasApertureRegs;
522  }
523 
524  bool isTrapHandlerEnabled() const {
525  return TrapHandler;
526  }
527 
528  bool isXNACKEnabled() const {
529  return TargetID.isXnackOnOrAny();
530  }
531 
532  bool isTgSplitEnabled() const {
533  return EnableTgSplit;
534  }
535 
536  bool isCuModeEnabled() const {
537  return EnableCuMode;
538  }
539 
540  bool hasFlatAddressSpace() const {
541  return FlatAddressSpace;
542  }
543 
544  bool hasFlatScrRegister() const {
545  return hasFlatAddressSpace();
546  }
547 
548  bool hasFlatInstOffsets() const {
549  return FlatInstOffsets;
550  }
551 
552  bool hasFlatGlobalInsts() const {
553  return FlatGlobalInsts;
554  }
555 
556  bool hasFlatScratchInsts() const {
557  return FlatScratchInsts;
558  }
559 
560  // Check if target supports ST addressing mode with FLAT scratch instructions.
561  // The ST addressing mode means no registers are used, either VGPR or SGPR,
562  // but only immediate offset is swizzled and added to the FLAT scratch base.
563  bool hasFlatScratchSTMode() const {
564  return hasFlatScratchInsts() && hasGFX10_3Insts();
565  }
566 
568  return ScalarFlatScratchInsts;
569  }
570 
571  bool hasGlobalAddTidInsts() const {
572  return GFX10_BEncoding;
573  }
574 
575  bool hasAtomicCSub() const {
576  return GFX10_BEncoding;
577  }
578 
580  return getGeneration() >= GFX9;
581  }
582 
583  bool hasFlatSegmentOffsetBug() const {
585  }
586 
588  return getGeneration() > GFX9;
589  }
590 
591  bool hasD16LoadStore() const {
592  return getGeneration() >= GFX9;
593  }
594 
595  bool d16PreservesUnusedBits() const {
597  }
598 
599  bool hasD16Images() const {
600  return getGeneration() >= VOLCANIC_ISLANDS;
601  }
602 
603  /// Return if most LDS instructions have an m0 use that require m0 to be
604  /// iniitalized.
605  bool ldsRequiresM0Init() const {
606  return getGeneration() < GFX9;
607  }
608 
609  // True if the hardware rewinds and replays GWS operations if a wave is
610  // preempted.
611  //
612  // If this is false, a GWS operation requires testing if a nack set the
613  // MEM_VIOL bit, and repeating if so.
614  bool hasGWSAutoReplay() const {
615  return getGeneration() >= GFX9;
616  }
617 
618  /// \returns if target has ds_gws_sema_release_all instruction.
619  bool hasGWSSemaReleaseAll() const {
620  return CIInsts;
621  }
622 
623  /// \returns true if the target has integer add/sub instructions that do not
624  /// produce a carry-out. This includes v_add_[iu]32, v_sub_[iu]32,
625  /// v_add_[iu]16, and v_sub_[iu]16, all of which support the clamp modifier
626  /// for saturation.
627  bool hasAddNoCarry() const {
628  return AddNoCarryInsts;
629  }
630 
631  bool hasUnpackedD16VMem() const {
632  return HasUnpackedD16VMem;
633  }
634 
635  // Covers VS/PS/CS graphics shaders
636  bool isMesaGfxShader(const Function &F) const {
637  return isMesa3DOS() && AMDGPU::isShader(F.getCallingConv());
638  }
639 
640  bool hasMad64_32() const {
641  return getGeneration() >= SEA_ISLANDS;
642  }
643 
644  bool hasSDWAOmod() const {
645  return HasSDWAOmod;
646  }
647 
648  bool hasSDWAScalar() const {
649  return HasSDWAScalar;
650  }
651 
652  bool hasSDWASdst() const {
653  return HasSDWASdst;
654  }
655 
656  bool hasSDWAMac() const {
657  return HasSDWAMac;
658  }
659 
660  bool hasSDWAOutModsVOPC() const {
661  return HasSDWAOutModsVOPC;
662  }
663 
664  bool hasDLInsts() const {
665  return HasDLInsts;
666  }
667 
668  bool hasDot1Insts() const {
669  return HasDot1Insts;
670  }
671 
672  bool hasDot2Insts() const {
673  return HasDot2Insts;
674  }
675 
676  bool hasDot3Insts() const {
677  return HasDot3Insts;
678  }
679 
680  bool hasDot4Insts() const {
681  return HasDot4Insts;
682  }
683 
684  bool hasDot5Insts() const {
685  return HasDot5Insts;
686  }
687 
688  bool hasDot6Insts() const {
689  return HasDot6Insts;
690  }
691 
692  bool hasDot7Insts() const {
693  return HasDot7Insts;
694  }
695 
696  bool hasMAIInsts() const {
697  return HasMAIInsts;
698  }
699 
700  bool hasPkFmacF16Inst() const {
701  return HasPkFmacF16Inst;
702  }
703 
704  bool hasAtomicFaddInsts() const {
705  return HasAtomicFaddInsts;
706  }
707 
708  bool hasNoSdstCMPX() const {
709  return HasNoSdstCMPX;
710  }
711 
712  bool hasVscnt() const {
713  return HasVscnt;
714  }
715 
716  bool hasGetWaveIdInst() const {
717  return HasGetWaveIdInst;
718  }
719 
720  bool hasSMemTimeInst() const {
721  return HasSMemTimeInst;
722  }
723 
724  bool hasShaderCyclesRegister() const {
726  }
727 
728  bool hasRegisterBanking() const {
729  return HasRegisterBanking;
730  }
731 
732  bool hasVOP3Literal() const {
733  return HasVOP3Literal;
734  }
735 
736  bool hasNoDataDepHazard() const {
737  return HasNoDataDepHazard;
738  }
739 
741  return getGeneration() < SEA_ISLANDS;
742  }
743 
744  // Scratch is allocated in 256 dword per wave blocks for the entire
745  // wavefront. When viewed from the perspecive of an arbitrary workitem, this
746  // is 4-byte aligned.
747  //
748  // Only 4-byte alignment is really needed to access anything. Transformations
749  // on the pointer value itself may rely on the alignment / known low bits of
750  // the pointer. Set this to something above the minimum to avoid needing
751  // dynamic realignment in common cases.
752  Align getStackAlignment() const { return Align(16); }
753 
754  bool enableMachineScheduler() const override {
755  return true;
756  }
757 
758  bool useAA() const override;
759 
760  bool enableSubRegLiveness() const override {
761  return true;
762  }
763 
766 
767  // static wrappers
768  static bool hasHalfRate64Ops(const TargetSubtargetInfo &STI);
769 
770  // XXX - Why is this here if it isn't in the default pass set?
771  bool enableEarlyIfConversion() const override {
772  return true;
773  }
774 
775  bool enableFlatScratch() const;
776 
778  unsigned NumRegionInstrs) const override;
779 
780  unsigned getMaxNumUserSGPRs() const {
781  return 16;
782  }
783 
784  bool hasSMemRealTime() const {
785  return HasSMemRealTime;
786  }
787 
788  bool hasMovrel() const {
789  return HasMovrel;
790  }
791 
792  bool hasVGPRIndexMode() const {
793  return HasVGPRIndexMode;
794  }
795 
796  bool useVGPRIndexMode() const;
797 
798  bool hasScalarCompareEq64() const {
799  return getGeneration() >= VOLCANIC_ISLANDS;
800  }
801 
802  bool hasScalarStores() const {
803  return HasScalarStores;
804  }
805 
806  bool hasScalarAtomics() const {
807  return HasScalarAtomics;
808  }
809 
810  bool hasLDSFPAtomics() const {
811  return GFX8Insts;
812  }
813 
814  /// \returns true if the subtarget has the v_permlanex16_b32 instruction.
815  bool hasPermLaneX16() const { return getGeneration() >= GFX10; }
816 
817  bool hasDPP() const {
818  return HasDPP;
819  }
820 
821  bool hasDPPBroadcasts() const {
822  return HasDPP && getGeneration() < GFX10;
823  }
824 
825  bool hasDPPWavefrontShifts() const {
826  return HasDPP && getGeneration() < GFX10;
827  }
828 
829  bool hasDPP8() const {
830  return HasDPP8;
831  }
832 
833  bool has64BitDPP() const {
834  return Has64BitDPP;
835  }
836 
837  bool hasPackedFP32Ops() const {
838  return HasPackedFP32Ops;
839  }
840 
841  bool hasExtendedImageInsts() const {
842  return HasExtendedImageInsts;
843  }
844 
845  bool hasR128A16() const {
846  return HasR128A16;
847  }
848 
849  bool hasGFX10A16() const {
850  return HasGFX10A16;
851  }
852 
853  bool hasA16() const { return hasR128A16() || hasGFX10A16(); }
854 
855  bool hasG16() const { return HasG16; }
856 
857  bool hasOffset3fBug() const {
858  return HasOffset3fBug;
859  }
860 
861  bool hasImageStoreD16Bug() const { return HasImageStoreD16Bug; }
862 
864 
865  bool hasNSAEncoding() const { return HasNSAEncoding; }
866 
867  bool hasGFX10_BEncoding() const {
868  return GFX10_BEncoding;
869  }
870 
871  bool hasGFX10_3Insts() const {
872  return GFX10_3Insts;
873  }
874 
875  bool hasMadF16() const;
876 
877  bool enableSIScheduler() const {
878  return EnableSIScheduler;
879  }
880 
881  bool loadStoreOptEnabled() const {
882  return EnableLoadStoreOpt;
883  }
884 
885  bool hasSGPRInitBug() const {
886  return SGPRInitBug;
887  }
888 
889  bool hasMFMAInlineLiteralBug() const {
891  }
892 
893  bool has12DWordStoreHazard() const {
895  }
896 
897  // \returns true if the subtarget supports DWORDX3 load/store instructions.
898  bool hasDwordx3LoadStores() const {
899  return CIInsts;
900  }
901 
904  }
905 
906  bool hasReadM0SendMsgHazard() const {
909  }
910 
911  bool hasVcmpxPermlaneHazard() const {
912  return HasVcmpxPermlaneHazard;
913  }
914 
917  }
918 
921  }
922 
923  bool hasLDSMisalignedBug() const {
924  return LDSMisalignedBug && !EnableCuMode;
925  }
926 
927  bool hasInstFwdPrefetchBug() const {
928  return HasInstFwdPrefetchBug;
929  }
930 
931  bool hasVcmpxExecWARHazard() const {
932  return HasVcmpxExecWARHazard;
933  }
934 
937  }
938 
939  bool hasNSAtoVMEMBug() const {
940  return HasNSAtoVMEMBug;
941  }
942 
943  bool hasHardClauses() const { return getGeneration() >= GFX10; }
944 
945  bool hasGFX90AInsts() const { return GFX90AInsts; }
946 
947  /// Return if operations acting on VGPR tuples require even alignment.
948  bool needsAlignedVGPRs() const { return GFX90AInsts; }
949 
950  bool hasPackedTID() const { return HasPackedTID; }
951 
952  /// Return the maximum number of waves per SIMD for kernels using \p SGPRs
953  /// SGPRs
954  unsigned getOccupancyWithNumSGPRs(unsigned SGPRs) const;
955 
956  /// Return the maximum number of waves per SIMD for kernels using \p VGPRs
957  /// VGPRs
958  unsigned getOccupancyWithNumVGPRs(unsigned VGPRs) const;
959 
960  /// Return occupancy for the given function. Used LDS and a number of
961  /// registers if provided.
962  /// Note, occupancy can be affected by the scratch allocation as well, but
963  /// we do not have enough information to compute it.
964  unsigned computeOccupancy(const Function &F, unsigned LDSSize = 0,
965  unsigned NumSGPRs = 0, unsigned NumVGPRs = 0) const;
966 
967  /// \returns true if the flat_scratch register should be initialized with the
968  /// pointer to the wave's scratch memory rather than a size and offset.
969  bool flatScratchIsPointer() const {
971  }
972 
973  /// \returns true if the machine has merged shaders in which s0-s7 are
974  /// reserved by the hardware and user SGPRs start at s8
975  bool hasMergedShaders() const {
976  return getGeneration() >= GFX9;
977  }
978 
979  /// \returns SGPR allocation granularity supported by the subtarget.
980  unsigned getSGPRAllocGranule() const {
982  }
983 
984  /// \returns SGPR encoding granularity supported by the subtarget.
985  unsigned getSGPREncodingGranule() const {
987  }
988 
989  /// \returns Total number of SGPRs supported by the subtarget.
990  unsigned getTotalNumSGPRs() const {
992  }
993 
994  /// \returns Addressable number of SGPRs supported by the subtarget.
995  unsigned getAddressableNumSGPRs() const {
997  }
998 
999  /// \returns Minimum number of SGPRs that meets the given number of waves per
1000  /// execution unit requirement supported by the subtarget.
1001  unsigned getMinNumSGPRs(unsigned WavesPerEU) const {
1002  return AMDGPU::IsaInfo::getMinNumSGPRs(this, WavesPerEU);
1003  }
1004 
1005  /// \returns Maximum number of SGPRs that meets the given number of waves per
1006  /// execution unit requirement supported by the subtarget.
1007  unsigned getMaxNumSGPRs(unsigned WavesPerEU, bool Addressable) const {
1008  return AMDGPU::IsaInfo::getMaxNumSGPRs(this, WavesPerEU, Addressable);
1009  }
1010 
1011  /// \returns Reserved number of SGPRs for given function \p MF.
1012  unsigned getReservedNumSGPRs(const MachineFunction &MF) const;
1013 
1014  /// \returns Maximum number of SGPRs that meets number of waves per execution
1015  /// unit requirement for function \p MF, or number of SGPRs explicitly
1016  /// requested using "amdgpu-num-sgpr" attribute attached to function \p MF.
1017  ///
1018  /// \returns Value that meets number of waves per execution unit requirement
1019  /// if explicitly requested value cannot be converted to integer, violates
1020  /// subtarget's specifications, or does not meet number of waves per execution
1021  /// unit requirement.
1022  unsigned getMaxNumSGPRs(const MachineFunction &MF) const;
1023 
1024  /// \returns VGPR allocation granularity supported by the subtarget.
1025  unsigned getVGPRAllocGranule() const {
1027  }
1028 
1029  /// \returns VGPR encoding granularity supported by the subtarget.
1030  unsigned getVGPREncodingGranule() const {
1032  }
1033 
1034  /// \returns Total number of VGPRs supported by the subtarget.
1035  unsigned getTotalNumVGPRs() const {
1036  return AMDGPU::IsaInfo::getTotalNumVGPRs(this);
1037  }
1038 
1039  /// \returns Addressable number of VGPRs supported by the subtarget.
1040  unsigned getAddressableNumVGPRs() const {
1042  }
1043 
1044  /// \returns Minimum number of VGPRs that meets given number of waves per
1045  /// execution unit requirement supported by the subtarget.
1046  unsigned getMinNumVGPRs(unsigned WavesPerEU) const {
1047  return AMDGPU::IsaInfo::getMinNumVGPRs(this, WavesPerEU);
1048  }
1049 
1050  /// \returns Maximum number of VGPRs that meets given number of waves per
1051  /// execution unit requirement supported by the subtarget.
1052  unsigned getMaxNumVGPRs(unsigned WavesPerEU) const {
1053  return AMDGPU::IsaInfo::getMaxNumVGPRs(this, WavesPerEU);
1054  }
1055 
1056  /// \returns Maximum number of VGPRs that meets number of waves per execution
1057  /// unit requirement for function \p MF, or number of VGPRs explicitly
1058  /// requested using "amdgpu-num-vgpr" attribute attached to function \p MF.
1059  ///
1060  /// \returns Value that meets number of waves per execution unit requirement
1061  /// if explicitly requested value cannot be converted to integer, violates
1062  /// subtarget's specifications, or does not meet number of waves per execution
1063  /// unit requirement.
1064  unsigned getMaxNumVGPRs(const MachineFunction &MF) const;
1065 
1066  void getPostRAMutations(
1067  std::vector<std::unique_ptr<ScheduleDAGMutation>> &Mutations)
1068  const override;
1069 
1070  bool isWave32() const {
1071  return getWavefrontSize() == 32;
1072  }
1073 
1074  bool isWave64() const {
1075  return getWavefrontSize() == 64;
1076  }
1077 
1079  return getRegisterInfo()->getBoolRC();
1080  }
1081 
1082  /// \returns Maximum number of work groups per compute unit supported by the
1083  /// subtarget and limited by given \p FlatWorkGroupSize.
1084  unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const override {
1085  return AMDGPU::IsaInfo::getMaxWorkGroupsPerCU(this, FlatWorkGroupSize);
1086  }
1087 
1088  /// \returns Minimum flat work group size supported by the subtarget.
1089  unsigned getMinFlatWorkGroupSize() const override {
1091  }
1092 
1093  /// \returns Maximum flat work group size supported by the subtarget.
1094  unsigned getMaxFlatWorkGroupSize() const override {
1096  }
1097 
1098  /// \returns Number of waves per execution unit required to support the given
1099  /// \p FlatWorkGroupSize.
1100  unsigned
1101  getWavesPerEUForWorkGroup(unsigned FlatWorkGroupSize) const override {
1102  return AMDGPU::IsaInfo::getWavesPerEUForWorkGroup(this, FlatWorkGroupSize);
1103  }
1104 
1105  /// \returns Minimum number of waves per execution unit supported by the
1106  /// subtarget.
1107  unsigned getMinWavesPerEU() const override {
1108  return AMDGPU::IsaInfo::getMinWavesPerEU(this);
1109  }
1110 
1111  void adjustSchedDependency(SUnit *Def, int DefOpIdx, SUnit *Use, int UseOpIdx,
1112  SDep &Dep) const override;
1113 };
1114 
1115 } // end namespace llvm
1116 
1117 #endif // LLVM_LIB_TARGET_AMDGPU_GCNSUBTARGET_H
llvm::Check::Size
@ Size
Definition: FileCheck.h:73
llvm::GCNSubtarget::hasScalarMulHiInsts
bool hasScalarMulHiInsts() const
Definition: GCNSubtarget.h:382
llvm::GCNSubtarget::HasDot3Insts
bool HasDot3Insts
Definition: GCNSubtarget.h:141
llvm::GCNSubtarget::Gen
unsigned Gen
Definition: GCNSubtarget.h:68
llvm::GCNSubtarget::hasGFX10A16
bool hasGFX10A16() const
Definition: GCNSubtarget.h:849
llvm::GCNSubtarget::hasBFM
bool hasBFM() const
Definition: GCNSubtarget.h:338
llvm::GCNSubtarget::hasDot2Insts
bool hasDot2Insts() const
Definition: GCNSubtarget.h:672
llvm
Definition: AllocatorList.h:23
llvm::AMDGPU::IsaInfo::getSGPRAllocGranule
unsigned getSGPRAllocGranule(const MCSubtargetInfo *STI)
Definition: AMDGPUBaseInfo.cpp:557
llvm::tgtok::Def
@ Def
Definition: TGLexer.h:50
llvm::GCNSubtarget::GFX8Insts
bool GFX8Insts
Definition: GCNSubtarget.h:109
llvm::GCNSubtarget::hasGWSAutoReplay
bool hasGWSAutoReplay() const
Definition: GCNSubtarget.h:614
llvm::GCNSubtarget::hasFlatLgkmVMemCountInOrder
bool hasFlatLgkmVMemCountInOrder() const
Definition: GCNSubtarget.h:587
llvm::GCNSubtarget::HasSDWAScalar
bool HasSDWAScalar
Definition: GCNSubtarget.h:124
llvm::GCNSubtarget::TrapHandlerAbi
TrapHandlerAbi
Definition: GCNSubtarget.h:46
llvm::SystemZISD::TM
@ TM
Definition: SystemZISelLowering.h:65
llvm::GCNSubtarget::HasGetWaveIdInst
bool HasGetWaveIdInst
Definition: GCNSubtarget.h:157
llvm::GCNSubtarget::getRegBankInfo
const RegisterBankInfo * getRegBankInfo() const override
Definition: GCNSubtarget.h:246
llvm::GCNSubtarget::hasRegisterBanking
bool hasRegisterBanking() const
Definition: GCNSubtarget.h:728
llvm::GCNSubtarget::hasSDWAMac
bool hasSDWAMac() const
Definition: GCNSubtarget.h:656
llvm::AMDGPU::HSAMD::Kernel::CodeProps::Key::NumSGPRs
constexpr char NumSGPRs[]
Key for Kernel::CodeProps::Metadata::mNumSGPRs.
Definition: AMDGPUMetadata.h:253
llvm::GCNSubtarget::hasVGPRIndexMode
bool hasVGPRIndexMode() const
Definition: GCNSubtarget.h:792
llvm::InlineAsmLowering
Definition: InlineAsmLowering.h:28
llvm::GCNSubtarget::hasSDWASdst
bool hasSDWASdst() const
Definition: GCNSubtarget.h:652
llvm::GCNSubtarget::getFrameLowering
const SIFrameLowering * getFrameLowering() const override
Definition: GCNSubtarget.h:218
llvm::GCNSubtarget::initializeSubtargetDependencies
GCNSubtarget & initializeSubtargetDependencies(const Triple &TT, StringRef GPU, StringRef FS)
Definition: AMDGPUSubtarget.cpp:81
llvm::GCNSubtarget::hasD16Images
bool hasD16Images() const
Definition: GCNSubtarget.h:599
llvm::GCNSubtarget::EnablePRTStrictNull
bool EnablePRTStrictNull
Definition: GCNSubtarget.h:100
llvm::Function
Definition: Function.h:61
llvm::GCNSubtarget::HasDot2Insts
bool HasDot2Insts
Definition: GCNSubtarget.h:140
llvm::GCNSubtarget::HasPackedFP32Ops
bool HasPackedFP32Ops
Definition: GCNSubtarget.h:131
llvm::AMDGPU::IsaInfo::AMDGPUTargetID::isSramEccOnOrAny
bool isSramEccOnOrAny() const
Definition: AMDGPUBaseInfo.h:130
llvm::GCNSubtarget::FlatGlobalInsts
bool FlatGlobalInsts
Definition: GCNSubtarget.h:165
llvm::GCNSubtarget::FlatAddressSpace
bool FlatAddressSpace
Definition: GCNSubtarget.h:163
llvm::GCNSubtarget::HasDLInsts
bool HasDLInsts
Definition: GCNSubtarget.h:138
llvm::GCNSubtarget::hasMovrel
bool hasMovrel() const
Definition: GCNSubtarget.h:788
llvm::AMDGPUSubtarget::GFX9
@ GFX9
Definition: AMDGPUSubtarget.h:40
llvm::AMDGPUSubtarget::SOUTHERN_ISLANDS
@ SOUTHERN_ISLANDS
Definition: AMDGPUSubtarget.h:37
llvm::GCNSubtarget::hasVMEMReadSGPRVALUDefHazard
bool hasVMEMReadSGPRVALUDefHazard() const
A read of an SGPR by a VMEM instruction requires 5 wait states when the SGPR was written by a VALU In...
Definition: GCNSubtarget.h:429
llvm::GCNSubtarget::TrapHandlerAbi::NONE
@ NONE
llvm::GCNSubtarget::hasPermLaneX16
bool hasPermLaneX16() const
Definition: GCNSubtarget.h:815
llvm::GCNSubtarget::hasShaderCyclesRegister
bool hasShaderCyclesRegister() const
Definition: GCNSubtarget.h:724
llvm::GCNSubtarget::needsAlignedVGPRs
bool needsAlignedVGPRs() const
Return if operations acting on VGPR tuples require even alignment.
Definition: GCNSubtarget.h:948
llvm::GCNSubtarget::hasFlatScratchInsts
bool hasFlatScratchInsts() const
Definition: GCNSubtarget.h:556
llvm::GCNSubtarget::UnalignedDSAccess
bool UnalignedDSAccess
Definition: GCNSubtarget.h:178
llvm::GCNSubtarget::hasFP64
bool hasFP64() const
Definition: GCNSubtarget.h:288
llvm::GCNSubtarget::InstrItins
InstrItineraryData InstrItins
Definition: GCNSubtarget.h:69
llvm::GCNSubtarget::HasImageStoreD16Bug
bool HasImageStoreD16Bug
Definition: GCNSubtarget.h:191
llvm::GCNSubtarget::hasAutoWaitcntBeforeBarrier
bool hasAutoWaitcntBeforeBarrier() const
Definition: GCNSubtarget.h:492
llvm::GCNSubtarget::supportsMinMaxDenormModes
bool supportsMinMaxDenormModes() const
Definition: GCNSubtarget.h:451
llvm::GCNSubtarget::hasFlatSegmentOffsetBug
bool hasFlatSegmentOffsetBug() const
Definition: GCNSubtarget.h:583
llvm::GCNSubtarget::HasDot4Insts
bool HasDot4Insts
Definition: GCNSubtarget.h:142
llvm::AMDGPUSubtarget::SEA_ISLANDS
@ SEA_ISLANDS
Definition: AMDGPUSubtarget.h:38
llvm::GCNSubtarget::hasDS96AndDS128
bool hasDS96AndDS128() const
Definition: GCNSubtarget.h:471
llvm::GCNSubtarget::HasVcmpxPermlaneHazard
bool HasVcmpxPermlaneHazard
Definition: GCNSubtarget.h:182
llvm::GCNSubtarget::getSetRegWaitStates
unsigned getSetRegWaitStates() const
Number of hazard wait states for s_setreg_b32/s_setreg_imm32_b32.
Definition: GCNSubtarget.h:438
llvm::Triple
Triple - Helper class for working with autoconf configuration names.
Definition: Triple.h:45
llvm::GCNSubtarget::HasExtendedImageInsts
bool HasExtendedImageInsts
Definition: GCNSubtarget.h:132
llvm::GCNSubtarget::HasVertexCache
bool HasVertexCache
Definition: GCNSubtarget.h:175
llvm::GCNSubtarget::hasFlatGlobalInsts
bool hasFlatGlobalInsts() const
Definition: GCNSubtarget.h:552
llvm::GCNSubtarget::hasCARRY
bool hasCARRY() const
Definition: GCNSubtarget.h:366
llvm::GCNSubtarget::useDS128
bool useDS128() const
Definition: GCNSubtarget.h:466
llvm::GCNSubtarget::getMaxWorkGroupsPerCU
unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const override
Definition: GCNSubtarget.h:1084
llvm::GCNSubtarget::getMaxFlatWorkGroupSize
unsigned getMaxFlatWorkGroupSize() const override
Definition: GCNSubtarget.h:1094
llvm::GCNSubtarget::isTrapHandlerEnabled
bool isTrapHandlerEnabled() const
Definition: GCNSubtarget.h:524
llvm::GCNSubtarget::hasDot3Insts
bool hasDot3Insts() const
Definition: GCNSubtarget.h:676
llvm::AMDGPU::IsaInfo::getTotalNumVGPRs
unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI)
Definition: AMDGPUBaseInfo.cpp:686
llvm::GCNSubtarget::hasDLInsts
bool hasDLInsts() const
Definition: GCNSubtarget.h:664
llvm::GCNSubtarget::hasFractBug
bool hasFractBug() const
Definition: GCNSubtarget.h:326
llvm::GCNSubtarget::hasDwordx3LoadStores
bool hasDwordx3LoadStores() const
Definition: GCNSubtarget.h:898
llvm::GCNSubtarget::hasNSAEncoding
bool hasNSAEncoding() const
Definition: GCNSubtarget.h:865
llvm::GCNSubtarget::TrapID::LLVMAMDHSADebugTrap
@ LLVMAMDHSADebugTrap
llvm::AMDGPUSubtarget::getMaxWavesPerEU
unsigned getMaxWavesPerEU() const
Definition: AMDGPUSubtarget.h:223
llvm::GCNSubtarget::HasSDWAOutModsVOPC
bool HasSDWAOutModsVOPC
Definition: GCNSubtarget.h:127
llvm::AMDGPU::IsaInfo::getMinWavesPerEU
unsigned getMinWavesPerEU(const MCSubtargetInfo *STI)
Definition: AMDGPUBaseInfo.cpp:524
llvm::GCNSubtarget::getStackAlignment
Align getStackAlignment() const
Definition: GCNSubtarget.h:752
llvm::GCNSubtarget::hasUnalignedDSAccessEnabled
bool hasUnalignedDSAccessEnabled() const
Definition: GCNSubtarget.h:508
llvm::GCNSubtarget::EnableSIScheduler
bool EnableSIScheduler
Definition: GCNSubtarget.h:98
llvm::GCNSubtarget::partialVCCWritesUpdateVCCZ
bool partialVCCWritesUpdateVCCZ() const
Writes to VCC_LO/VCC_HI update the VCCZ flag.
Definition: GCNSubtarget.h:417
llvm::GCNSubtarget
Definition: GCNSubtarget.h:38
llvm::GCNSubtarget::HasFlatSegmentOffsetBug
bool HasFlatSegmentOffsetBug
Definition: GCNSubtarget.h:190
llvm::GCNSubtarget::loadStoreOptEnabled
bool loadStoreOptEnabled() const
Definition: GCNSubtarget.h:881
llvm::GCNSubtarget::TargetID
AMDGPU::IsaInfo::AMDGPUTargetID TargetID
Definition: GCNSubtarget.h:67
llvm::GCNSubtarget::UnalignedBufferAccess
bool UnalignedBufferAccess
Definition: GCNSubtarget.h:177
llvm::GCNSubtarget::TexVTXClauseSize
short TexVTXClauseSize
Definition: GCNSubtarget.h:176
llvm::GCNSubtarget::hasGetWaveIdInst
bool hasGetWaveIdInst() const
Definition: GCNSubtarget.h:716
llvm::GCNSubtarget::HasNSAtoVMEMBug
bool HasNSAtoVMEMBug
Definition: GCNSubtarget.h:188
llvm::GCNSubtarget::HasScalarAtomics
bool HasScalarAtomics
Definition: GCNSubtarget.h:122
llvm::GCNSubtarget::ScalarFlatScratchInsts
bool ScalarFlatScratchInsts
Definition: GCNSubtarget.h:167
llvm::AMDGPU::IsaInfo::getMaxNumVGPRs
unsigned getMaxNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU)
Definition: AMDGPUBaseInfo.cpp:711
llvm::GCNSubtarget::hasPackedFP32Ops
bool hasPackedFP32Ops() const
Definition: GCNSubtarget.h:837
llvm::GCNSubtarget::HalfRate64Ops
bool HalfRate64Ops
Definition: GCNSubtarget.h:76
llvm::GCNSubtarget::hasScalarCompareEq64
bool hasScalarCompareEq64() const
Definition: GCNSubtarget.h:798
llvm::AMDGPU::IsaInfo::getMaxWorkGroupsPerCU
unsigned getMaxWorkGroupsPerCU(const MCSubtargetInfo *STI, unsigned FlatWorkGroupSize)
Definition: AMDGPUBaseInfo.cpp:512
llvm::GCNSubtarget::getRegisterInfo
const SIRegisterInfo * getRegisterInfo() const override
Definition: GCNSubtarget.h:226
llvm::GCNSubtarget::HasOffset3fBug
bool HasOffset3fBug
Definition: GCNSubtarget.h:189
F
#define F(x, y, z)
Definition: MD5.cpp:56
llvm::GCNSubtarget::UnalignedAccessMode
bool UnalignedAccessMode
Definition: GCNSubtarget.h:83
llvm::GCNSubtarget::getInstrInfo
const SIInstrInfo * getInstrInfo() const override
Definition: GCNSubtarget.h:214
llvm::GCNSubtarget::unsafeDSOffsetFoldingEnabled
bool unsafeDSOffsetFoldingEnabled() const
Definition: GCNSubtarget.h:401
llvm::GCNSubtarget::hasUnalignedBufferAccessEnabled
bool hasUnalignedBufferAccessEnabled() const
Definition: GCNSubtarget.h:500
llvm::GCNSubtarget::hasHalfRate64Ops
bool hasHalfRate64Ops() const
Definition: GCNSubtarget.h:304
llvm::GCNSubtarget::hasGWSSemaReleaseAll
bool hasGWSSemaReleaseAll() const
Definition: GCNSubtarget.h:619
llvm::GCNSubtarget::HasSDWASdst
bool HasSDWASdst
Definition: GCNSubtarget.h:125
llvm::AMDGPU::IsaInfo::getSGPREncodingGranule
unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI)
Definition: AMDGPUBaseInfo.cpp:566
llvm::AMDGPUSubtarget::isAmdHsaOS
bool isAmdHsaOS() const
Definition: AMDGPUSubtarget.h:106
llvm::GCNSubtarget::HasNoDataDepHazard
bool HasNoDataDepHazard
Definition: GCNSubtarget.h:162
llvm::GCNSubtarget::HasVGPRIndexMode
bool HasVGPRIndexMode
Definition: GCNSubtarget.h:120
llvm::GCNSubtarget::overrideSchedPolicy
void overrideSchedPolicy(MachineSchedPolicy &Policy, unsigned NumRegionInstrs) const override
Definition: AMDGPUSubtarget.cpp:631
llvm::AMDGPU::IsaInfo::AMDGPUTargetID
Definition: AMDGPUBaseInfo.h:85
llvm::GCNSubtarget::getKnownHighZeroBitsForFrameIndex
unsigned getKnownHighZeroBitsForFrameIndex() const
Return the number of high bits known to be zero fror a frame index.
Definition: GCNSubtarget.h:270
llvm::GCNSubtarget::ParseSubtargetFeatures
void ParseSubtargetFeatures(StringRef CPU, StringRef TuneCPU, StringRef FS)
llvm::GCNSubtarget::hasFlatScrRegister
bool hasFlatScrRegister() const
Definition: GCNSubtarget.h:544
llvm::AMDGPU::isShader
bool isShader(CallingConv::ID cc)
Definition: AMDGPUBaseInfo.cpp:1313
llvm::GCNSubtarget::hasFastFMAF32
bool hasFastFMAF32() const
Definition: GCNSubtarget.h:300
llvm::GCNSubtarget::hasA16
bool hasA16() const
Definition: GCNSubtarget.h:853
llvm::GCNSubtarget::GFX10_3Insts
bool GFX10_3Insts
Definition: GCNSubtarget.h:113
llvm::GCNSubtarget::enableMachineScheduler
bool enableMachineScheduler() const override
Definition: GCNSubtarget.h:754
llvm::GCNSubtarget::hasBFI
bool hasBFI() const
Definition: GCNSubtarget.h:334
llvm::GCNSubtarget::getMaxNumSGPRs
unsigned getMaxNumSGPRs(unsigned WavesPerEU, bool Addressable) const
Definition: GCNSubtarget.h:1007
llvm::GCNSubtarget::useVGPRIndexMode
bool useVGPRIndexMode() const
Definition: AMDGPUSubtarget.cpp:652
llvm::GCNSubtarget::hasAddr64
bool hasAddr64() const
Definition: GCNSubtarget.h:312
llvm::X86AS::FS
@ FS
Definition: X86.h:183
llvm::GCNSubtarget::EnableLoadStoreOpt
bool EnableLoadStoreOpt
Definition: GCNSubtarget.h:96
llvm::GCNSubtarget::useFlatForGlobal
bool useFlatForGlobal() const
Definition: GCNSubtarget.h:460
llvm::GCNSubtarget::hasFmaMixInsts
bool hasFmaMixInsts() const
Definition: GCNSubtarget.h:362
llvm::GCNSubtarget::HasSMemRealTime
bool HasSMemRealTime
Definition: GCNSubtarget.h:116
llvm::Legalizer
Definition: Legalizer.h:31
llvm::GCNSubtarget::hasUnalignedAccessMode
bool hasUnalignedAccessMode() const
Definition: GCNSubtarget.h:516
llvm::GCNSubtarget::hasScalarFlatScratchInsts
bool hasScalarFlatScratchInsts() const
Definition: GCNSubtarget.h:567
llvm::GCNSubtarget::getTargetLowering
const SITargetLowering * getTargetLowering() const override
Definition: GCNSubtarget.h:222
llvm::AMDGPUSubtarget::isMesa3DOS
bool isMesa3DOS() const
Definition: AMDGPUSubtarget.h:114
llvm::GCNSubtarget::hasSDWAOmod
bool hasSDWAOmod() const
Definition: GCNSubtarget.h:644
llvm::AMDGPU::IsaInfo::getMaxNumSGPRs
unsigned getMaxNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU, bool Addressable)
Definition: AMDGPUBaseInfo.cpp:606
llvm::GCNSubtarget::hasSMRDReadVALUDefHazard
bool hasSMRDReadVALUDefHazard() const
A read of an SGPR by SMRD instruction requires 4 wait states when the SGPR was written by a VALU inst...
Definition: GCNSubtarget.h:423
llvm::GCNSubtarget::getOccupancyWithNumSGPRs
unsigned getOccupancyWithNumSGPRs(unsigned SGPRs) const
Return the maximum number of waves per SIMD for kernels using SGPRs SGPRs.
Definition: AMDGPUSubtarget.cpp:658
llvm::TargetRegisterClass
Definition: TargetRegisterInfo.h:46
llvm::GCNSubtarget::getInlineAsmLowering
const InlineAsmLowering * getInlineAsmLowering() const override
Definition: GCNSubtarget.h:234
b
the resulting code requires compare and branches when and if the revised code is with conditional branches instead of More there is a byte word extend before each where there should be only and the condition codes are not remembered when the same two values are compared twice More LSR enhancements i8 and i32 load store addressing modes are identical int b
Definition: README.txt:418
llvm::GCNSubtarget::hasR128A16
bool hasR128A16() const
Definition: GCNSubtarget.h:845
llvm::GCNSubtarget::hasVscnt
bool hasVscnt() const
Definition: GCNSubtarget.h:712
AMDGPUSubtarget.h
llvm::GCNSubtarget::getMinFlatWorkGroupSize
unsigned getMinFlatWorkGroupSize() const override
Definition: GCNSubtarget.h:1089
llvm::AMDGPU::IsaInfo::AMDGPUTargetID::isXnackOnOrAny
bool isXnackOnOrAny() const
Definition: AMDGPUBaseInfo.h:101
llvm::SelectionDAGTargetInfo
Targets can subclass this to parameterize the SelectionDAG lowering and instruction selection process...
Definition: SelectionDAGTargetInfo.h:31
llvm::GCNSubtarget::getMaxPrivateElementSize
unsigned getMaxPrivateElementSize(bool ForBufferRSrc=false) const
Definition: GCNSubtarget.h:278
llvm::GCNSubtarget::hasVOP3Literal
bool hasVOP3Literal() const
Definition: GCNSubtarget.h:732
llvm::GCNSubtarget::SupportsXNACK
bool SupportsXNACK
Definition: GCNSubtarget.h:85
llvm::GCNSubtarget::CFALUBug
bool CFALUBug
Definition: GCNSubtarget.h:172
llvm::GCNSubtarget::hasFFBH
bool hasFFBH() const
Definition: GCNSubtarget.h:350
llvm::GCNSubtarget::hasSMemTimeInst
bool hasSMemTimeInst() const
Definition: GCNSubtarget.h:720
llvm::GCNSubtarget::hasReadVCCZBug
bool hasReadVCCZBug() const
Extra wait hazard is needed in some cases before s_cbranch_vccnz/s_cbranch_vccz.
Definition: GCNSubtarget.h:412
llvm::GCNSubtarget::isWave32
bool isWave32() const
Definition: GCNSubtarget.h:1070
llvm::GCNSubtarget::hasSGPRInitBug
bool hasSGPRInitBug() const
Definition: GCNSubtarget.h:885
llvm::GCNSubtarget::AutoWaitcntBeforeBarrier
bool AutoWaitcntBeforeBarrier
Definition: GCNSubtarget.h:81
llvm::GCNSubtarget::EnableTgSplit
bool EnableTgSplit
Definition: GCNSubtarget.h:91
llvm::GCNSubtarget::hasSMemRealTime
bool hasSMemRealTime() const
Definition: GCNSubtarget.h:784
llvm::GCNSubtarget::hasMed3_16
bool hasMed3_16() const
Definition: GCNSubtarget.h:354
llvm::GCNSubtarget::getInstructionSelector
InstructionSelector * getInstructionSelector() const override
Definition: GCNSubtarget.h:238
llvm::GCNSubtarget::isMesaGfxShader
bool isMesaGfxShader(const Function &F) const
Definition: GCNSubtarget.h:636
llvm::AMDGPU::IsaInfo::getMinFlatWorkGroupSize
unsigned getMinFlatWorkGroupSize(const MCSubtargetInfo *STI)
Definition: AMDGPUBaseInfo.cpp:543
llvm::GCNSubtarget::hasGFX10_BEncoding
bool hasGFX10_BEncoding() const
Definition: GCNSubtarget.h:867
llvm::GCNSubtarget::TrapHandler
bool TrapHandler
Definition: GCNSubtarget.h:93
llvm::GCNSubtarget::hasPkFmacF16Inst
bool hasPkFmacF16Inst() const
Definition: GCNSubtarget.h:700
llvm::GCNSubtarget::TrapID
TrapID
Definition: GCNSubtarget.h:51
llvm::SIRegisterInfo
Definition: SIRegisterInfo.h:29
llvm::GCNSubtarget::isTgSplitEnabled
bool isTgSplitEnabled() const
Definition: GCNSubtarget.h:532
Align
uint64_t Align
Definition: ELFObjHandler.cpp:83
llvm::SIFrameLowering
Definition: SIFrameLowering.h:21
llvm::Align
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
SIISelLowering.h
llvm::GCNSubtarget::getMaxNumVGPRs
unsigned getMaxNumVGPRs(unsigned WavesPerEU) const
Definition: GCNSubtarget.h:1052
llvm::GCNSubtarget::hasDot6Insts
bool hasDot6Insts() const
Definition: GCNSubtarget.h:688
llvm::GCNSubtarget::LDSBankCount
int LDSBankCount
Definition: GCNSubtarget.h:70
llvm::GCNSubtarget::dumpCode
bool dumpCode() const
Definition: GCNSubtarget.h:442
llvm::GCNSubtarget::hasScalarAtomics
bool hasScalarAtomics() const
Definition: GCNSubtarget.h:806
llvm::GCNSubtarget::hasUnalignedDSAccess
bool hasUnalignedDSAccess() const
Definition: GCNSubtarget.h:504
llvm::GCNSubtarget::getScalarizeGlobalBehavior
bool getScalarizeGlobalBehavior() const
Definition: GCNSubtarget.h:765
llvm::GCNSubtarget::MIMG_R128
bool MIMG_R128
Definition: GCNSubtarget.h:106
llvm::GCNSubtarget::hasExtendedImageInsts
bool hasExtendedImageInsts() const
Definition: GCNSubtarget.h:841
llvm::GCNSubtarget::hasDot1Insts
bool hasDot1Insts() const
Definition: GCNSubtarget.h:668
llvm::GCNSubtarget::hasOffset3fBug
bool hasOffset3fBug() const
Definition: GCNSubtarget.h:857
llvm::GCNSubtarget::hasVcmpxExecWARHazard
bool hasVcmpxExecWARHazard() const
Definition: GCNSubtarget.h:931
llvm::AMDGPUSubtarget::getWavefrontSize
unsigned getWavefrontSize() const
Definition: AMDGPUSubtarget.h:180
llvm::GCNSubtarget::getVGPREncodingGranule
unsigned getVGPREncodingGranule() const
Definition: GCNSubtarget.h:1030
llvm::GCNSubtarget::isWave64
bool isWave64() const
Definition: GCNSubtarget.h:1074
llvm::GCNSubtarget::HasLdsBranchVmemWARHazard
bool HasLdsBranchVmemWARHazard
Definition: GCNSubtarget.h:187
llvm::GCNSubtarget::EnableUnsafeDSOffsetFolding
bool EnableUnsafeDSOffsetFolding
Definition: GCNSubtarget.h:97
llvm::AMDGPUSubtarget::GFX10
@ GFX10
Definition: AMDGPUSubtarget.h:41
llvm::GCNSubtarget::MaxWaveScratchSize
static const unsigned MaxWaveScratchSize
Definition: GCNSubtarget.h:205
llvm::GCNSubtarget::FMA
bool FMA
Definition: GCNSubtarget.h:105
llvm::GCNSubtarget::FullRate64Ops
bool FullRate64Ops
Definition: GCNSubtarget.h:77
llvm::GCNSubtarget::getLegalizerInfo
const LegalizerInfo * getLegalizerInfo() const override
Definition: GCNSubtarget.h:242
llvm::GCNSubtarget::AddNoCarryInsts
bool AddNoCarryInsts
Definition: GCNSubtarget.h:168
llvm::GCNSubtarget::usePRTStrictNull
bool usePRTStrictNull() const
Definition: GCNSubtarget.h:488
llvm::GCNSubtarget::hasInstFwdPrefetchBug
bool hasInstFwdPrefetchBug() const
Definition: GCNSubtarget.h:927
llvm::GCNSubtarget::FastDenormalF32
bool FastDenormalF32
Definition: GCNSubtarget.h:75
llvm::GCNSubtarget::HasRegisterBanking
bool HasRegisterBanking
Definition: GCNSubtarget.h:160
llvm::GCNSubtarget::HasPackedTID
bool HasPackedTID
Definition: GCNSubtarget.h:179
llvm::GCNSubtarget::GFX10_BEncoding
bool GFX10_BEncoding
Definition: GCNSubtarget.h:137
llvm::RegisterBankInfo
Holds all the information related to register banks.
Definition: RegisterBankInfo.h:39
llvm::InstructionSelector
Provides the logic to select generic machine instructions.
Definition: InstructionSelector.h:418
llvm::AMDGPUSubtarget
Definition: AMDGPUSubtarget.h:29
llvm::AMDGPU::IsaInfo::getMinNumSGPRs
unsigned getMinNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU)
Definition: AMDGPUBaseInfo.cpp:589
llvm::GCNSubtarget::HasUnpackedD16VMem
bool HasUnpackedD16VMem
Definition: GCNSubtarget.h:169
llvm::GCNSubtarget::IsGCN
bool IsGCN
Definition: GCNSubtarget.h:107
llvm::GCNSubtarget::getSGPRAllocGranule
unsigned getSGPRAllocGranule() const
Definition: GCNSubtarget.h:980
llvm::GCNSubtarget::hasImageGather4D16Bug
bool hasImageGather4D16Bug() const
Definition: GCNSubtarget.h:863
llvm::GCNSubtarget::hasPackedTID
bool hasPackedTID() const
Definition: GCNSubtarget.h:950
llvm::GCNSubtarget::getTrapHandlerAbi
TrapHandlerAbi getTrapHandlerAbi() const
Definition: GCNSubtarget.h:386
llvm::GCNSubtarget::HasFmaMixInsts
bool HasFmaMixInsts
Definition: GCNSubtarget.h:118
llvm::GCNSubtarget::has64BitDPP
bool has64BitDPP() const
Definition: GCNSubtarget.h:833
llvm::GCNSubtarget::GFX90AInsts
bool GFX90AInsts
Definition: GCNSubtarget.h:111
llvm::GCNSubtarget::HasIntClamp
bool HasIntClamp
Definition: GCNSubtarget.h:117
llvm::GCNSubtarget::hasVMEMtoScalarWriteHazard
bool hasVMEMtoScalarWriteHazard() const
Definition: GCNSubtarget.h:915
llvm::GCNSubtarget::hasAddNoCarry
bool hasAddNoCarry() const
Definition: GCNSubtarget.h:627
llvm::GCNSubtarget::HasShaderCyclesRegister
bool HasShaderCyclesRegister
Definition: GCNSubtarget.h:159
llvm::GCNSubtarget::hasUnalignedBufferAccess
bool hasUnalignedBufferAccess() const
Definition: GCNSubtarget.h:496
llvm::GCNSubtarget::ScalarizeGlobal
bool ScalarizeGlobal
Definition: GCNSubtarget.h:180
llvm::GCNSubtarget::LDSMisalignedBug
bool LDSMisalignedBug
Definition: GCNSubtarget.h:173
llvm::GCNSubtarget::getTotalNumVGPRs
unsigned getTotalNumVGPRs() const
Definition: GCNSubtarget.h:1035
llvm::GCNSubtarget::GCNSubtarget
GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS, const GCNTargetMachine &TM)
Definition: AMDGPUSubtarget.cpp:197
llvm::GCNSubtarget::privateMemoryResourceIsRangeChecked
bool privateMemoryResourceIsRangeChecked() const
Definition: GCNSubtarget.h:482
llvm::GCNSubtarget::HasVMEMtoScalarWriteHazard
bool HasVMEMtoScalarWriteHazard
Definition: GCNSubtarget.h:183
llvm::GCNSubtarget::hasLDSMisalignedBug
bool hasLDSMisalignedBug() const
Definition: GCNSubtarget.h:923
llvm::GCNSubtarget::hasMFMAInlineLiteralBug
bool hasMFMAInlineLiteralBug() const
Definition: GCNSubtarget.h:889
llvm::GCNSubtarget::HasImageGather4D16Bug
bool HasImageGather4D16Bug
Definition: GCNSubtarget.h:192
llvm::GCNSubtarget::FeatureDisable
bool FeatureDisable
Definition: GCNSubtarget.h:195
llvm::GCNSubtarget::EnableXNACK
bool EnableXNACK
Definition: GCNSubtarget.h:89
llvm::GCNSubtarget::hasFlat
bool hasFlat() const
Definition: GCNSubtarget.h:316
llvm::GCNSubtarget::hasImageStoreD16Bug
bool hasImageStoreD16Bug() const
Definition: GCNSubtarget.h:861
llvm::GCNSubtarget::hasGlobalAddTidInsts
bool hasGlobalAddTidInsts() const
Definition: GCNSubtarget.h:571
llvm::GCNSubtarget::hasSDWAScalar
bool hasSDWAScalar() const
Definition: GCNSubtarget.h:648
llvm::SIRegisterInfo::getBoolRC
const TargetRegisterClass * getBoolRC() const
Definition: SIRegisterInfo.h:278
llvm::GCNSubtarget::SupportsSRAMECC
bool SupportsSRAMECC
Definition: GCNSubtarget.h:149
llvm::GCNSubtarget::EnableCuMode
bool EnableCuMode
Definition: GCNSubtarget.h:92
llvm::GCNSubtarget::HasDot6Insts
bool HasDot6Insts
Definition: GCNSubtarget.h:144
llvm::GCNSubtarget::hasLDSFPAtomics
bool hasLDSFPAtomics() const
Definition: GCNSubtarget.h:810
llvm::GCNSubtarget::getMinNumVGPRs
unsigned getMinNumVGPRs(unsigned WavesPerEU) const
Definition: GCNSubtarget.h:1046
llvm::GCNSubtarget::hasReadM0MovRelInterpHazard
bool hasReadM0MovRelInterpHazard() const
Definition: GCNSubtarget.h:902
llvm::GCNSubtarget::getAddressableNumSGPRs
unsigned getAddressableNumSGPRs() const
Definition: GCNSubtarget.h:995
llvm::GCNSubtarget::hasUnalignedScratchAccess
bool hasUnalignedScratchAccess() const
Definition: GCNSubtarget.h:512
llvm::GCNSubtarget::hasDot7Insts
bool hasDot7Insts() const
Definition: GCNSubtarget.h:692
llvm::GCNSubtarget::TSInfo
SelectionDAGTargetInfo TSInfo
Definition: GCNSubtarget.h:197
llvm::GCNSubtarget::hasRFEHazards
bool hasRFEHazards() const
Definition: GCNSubtarget.h:433
llvm::GCNSubtarget::HasVcmpxExecWARHazard
bool HasVcmpxExecWARHazard
Definition: GCNSubtarget.h:186
llvm::GCNSubtarget::hasGFX90AInsts
bool hasGFX90AInsts() const
Definition: GCNSubtarget.h:945
llvm::GCNSubtarget::getOccupancyWithNumVGPRs
unsigned getOccupancyWithNumVGPRs(unsigned VGPRs) const
Return the maximum number of waves per SIMD for kernels using VGPRs VGPRs.
Definition: AMDGPUSubtarget.cpp:684
llvm::AMDGPU::IsaInfo::getTotalNumSGPRs
unsigned getTotalNumSGPRs(const MCSubtargetInfo *STI)
Definition: AMDGPUBaseInfo.cpp:570
llvm::GCNSubtarget::hasNoDataDepHazard
bool hasNoDataDepHazard() const
Definition: GCNSubtarget.h:736
llvm::GCNSubtarget::enableEarlyIfConversion
bool enableEarlyIfConversion() const override
Definition: GCNSubtarget.h:771
llvm::AMDGPU::IsaInfo::getAddressableNumSGPRs
unsigned getAddressableNumSGPRs(const MCSubtargetInfo *STI)
Definition: AMDGPUBaseInfo.cpp:577
llvm::GCNSubtarget::getWavesPerEUForWorkGroup
unsigned getWavesPerEUForWorkGroup(unsigned FlatWorkGroupSize) const override
Definition: GCNSubtarget.h:1101
llvm::GCNSubtarget::getLDSBankCount
int getLDSBankCount() const
Definition: GCNSubtarget.h:274
llvm::GCNSubtarget::MaxPrivateElementSize
unsigned MaxPrivateElementSize
Definition: GCNSubtarget.h:71
llvm::GCNSubtarget::hasBFE
bool hasBFE() const
Definition: GCNSubtarget.h:330
SIInstrInfo.h
llvm::GCNSubtarget::hasFMA
bool hasFMA() const
Definition: GCNSubtarget.h:370
llvm::GCNSubtarget::hasLdsBranchVmemWARHazard
bool hasLdsBranchVmemWARHazard() const
Definition: GCNSubtarget.h:935
llvm::GCNSubtarget::HasPkFmacF16Inst
bool HasPkFmacF16Inst
Definition: GCNSubtarget.h:147
llvm::MachineFunction
Definition: MachineFunction.h:227
llvm::GCNTargetMachine
Definition: AMDGPUTargetMachine.h:95
llvm::GCNSubtarget::hasIntClamp
bool hasIntClamp() const
Definition: GCNSubtarget.h:284
llvm::GCNSubtarget::hasMultiDwordFlatScratchAddressing
bool hasMultiDwordFlatScratchAddressing() const
Definition: GCNSubtarget.h:579
llvm::GCNSubtarget::hasDenormModeInst
bool hasDenormModeInst() const
Definition: GCNSubtarget.h:456
llvm::GCNSubtarget::hasAtomicFaddInsts
bool hasAtomicFaddInsts() const
Definition: GCNSubtarget.h:704
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:57
llvm::GCNSubtarget::hasFlatInstOffsets
bool hasFlatInstOffsets() const
Definition: GCNSubtarget.h:548
llvm::GCNSubtarget::hasDot5Insts
bool hasDot5Insts() const
Definition: GCNSubtarget.h:684
llvm::GCNSubtarget::hasScalarStores
bool hasScalarStores() const
Definition: GCNSubtarget.h:802
llvm::GCNSubtarget::hasGFX10_3Insts
bool hasGFX10_3Insts() const
Definition: GCNSubtarget.h:871
llvm::AMDGPU::IsaInfo::getVGPREncodingGranule
unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI, Optional< bool > EnableWavefrontSize32)
Definition: AMDGPUBaseInfo.cpp:674
llvm::GCNSubtarget::CaymanISA
bool CaymanISA
Definition: GCNSubtarget.h:171
llvm::GCNSubtarget::HasScalarStores
bool HasScalarStores
Definition: GCNSubtarget.h:121
llvm::GCNSubtarget::flatScratchIsPointer
bool flatScratchIsPointer() const
Definition: GCNSubtarget.h:969
llvm::GCNSubtarget::supportsGetDoorbellID
bool supportsGetDoorbellID() const
Definition: GCNSubtarget.h:390
llvm::GCNSubtarget::hasDPPBroadcasts
bool hasDPPBroadcasts() const
Definition: GCNSubtarget.h:821
llvm::GCNSubtarget::hasMadF16
bool hasMadF16() const
Definition: AMDGPUSubtarget.cpp:648
SIFrameLowering.h
llvm::GCNSubtarget::GFX7GFX8GFX9Insts
bool GFX7GFX8GFX9Insts
Definition: GCNSubtarget.h:114
llvm::GCNSubtarget::HasSDWAOmod
bool HasSDWAOmod
Definition: GCNSubtarget.h:123
llvm::GCNSubtarget::hasDPPWavefrontShifts
bool hasDPPWavefrontShifts() const
Definition: GCNSubtarget.h:825
llvm::GCNSubtarget::SGPRInitBug
bool SGPRInitBug
Definition: GCNSubtarget.h:115
llvm::GCNSubtarget::HasVOP3Literal
bool HasVOP3Literal
Definition: GCNSubtarget.h:161
llvm::GCNSubtarget::hasDPP8
bool hasDPP8() const
Definition: GCNSubtarget.h:829
llvm::AMDGPU::IsaInfo::getMaxFlatWorkGroupSize
unsigned getMaxFlatWorkGroupSize(const MCSubtargetInfo *STI)
Definition: AMDGPUBaseInfo.cpp:547
llvm::GCNSubtarget::hasScalarPackInsts
bool hasScalarPackInsts() const
Definition: GCNSubtarget.h:378
llvm::GCNSubtarget::hasD16LoadStore
bool hasD16LoadStore() const
Definition: GCNSubtarget.h:591
llvm::GCNSubtarget::getBoolRC
const TargetRegisterClass * getBoolRC() const
Definition: GCNSubtarget.h:1078
llvm::GCNSubtarget::adjustSchedDependency
void adjustSchedDependency(SUnit *Def, int DefOpIdx, SUnit *Use, int UseOpIdx, SDep &Dep) const override
Definition: AMDGPUSubtarget.cpp:806
llvm::GCNSubtarget::hasDot4Insts
bool hasDot4Insts() const
Definition: GCNSubtarget.h:680
llvm::GCNSubtarget::hasNoSdstCMPX
bool hasNoSdstCMPX() const
Definition: GCNSubtarget.h:708
llvm::TargetSubtargetInfo
TargetSubtargetInfo - Generic base class for all target subtargets.
Definition: TargetSubtargetInfo.h:59
llvm::GCNSubtarget::getMinNumSGPRs
unsigned getMinNumSGPRs(unsigned WavesPerEU) const
Definition: GCNSubtarget.h:1001
llvm::GCNSubtarget::enableFlatScratch
bool enableFlatScratch() const
Definition: AMDGPUSubtarget.cpp:324
llvm::GCNSubtarget::getPostRAMutations
void getPostRAMutations(std::vector< std::unique_ptr< ScheduleDAGMutation >> &Mutations) const override
Definition: AMDGPUSubtarget.cpp:968
llvm::GCNSubtarget::hasFlatScratchSTMode
bool hasFlatScratchSTMode() const
Definition: GCNSubtarget.h:563
llvm::GCNSubtarget::hasFlatAddressSpace
bool hasFlatAddressSpace() const
Definition: GCNSubtarget.h:540
llvm::GCNSubtarget::FlatForGlobal
bool FlatForGlobal
Definition: GCNSubtarget.h:80
llvm::GCNSubtarget::HasNSAEncoding
bool HasNSAEncoding
Definition: GCNSubtarget.h:136
llvm::GCNSubtarget::getReservedNumSGPRs
unsigned getReservedNumSGPRs(const MachineFunction &MF) const
Definition: AMDGPUSubtarget.cpp:693
llvm::GCNSubtarget::GFX10Insts
bool GFX10Insts
Definition: GCNSubtarget.h:112
llvm::SDep
Scheduling dependency.
Definition: ScheduleDAG.h:49
llvm::GCNSubtarget::getVGPRAllocGranule
unsigned getVGPRAllocGranule() const
Definition: GCNSubtarget.h:1025
llvm::GCNSubtarget::hasNSAtoVMEMBug
bool hasNSAtoVMEMBug() const
Definition: GCNSubtarget.h:939
llvm::GCNSubtarget::HasMAIInsts
bool HasMAIInsts
Definition: GCNSubtarget.h:146
llvm::GCNSubtarget::enableSubRegLiveness
bool enableSubRegLiveness() const override
Definition: GCNSubtarget.h:760
llvm::GCNSubtarget::getMaxLocalMemSizeWithWaveCount
unsigned getMaxLocalMemSizeWithWaveCount(unsigned WaveCount, const Function &) const
Return the amount of LDS that can be used that will not restrict the occupancy lower than WaveCount.
AMDGPUGenSubtargetInfo
llvm::GCNSubtarget::setScalarizeGlobalBehavior
void setScalarizeGlobalBehavior(bool b)
Definition: GCNSubtarget.h:764
llvm::GCNSubtarget::GFX9Insts
bool GFX9Insts
Definition: GCNSubtarget.h:110
llvm::GCNSubtarget::HasDot1Insts
bool HasDot1Insts
Definition: GCNSubtarget.h:139
llvm::GCNSubtarget::HasDPP
bool HasDPP
Definition: GCNSubtarget.h:128
llvm::GCNSubtarget::hasFullRate64Ops
bool hasFullRate64Ops() const
Definition: GCNSubtarget.h:308
llvm::GCNSubtarget::HasNoSdstCMPX
bool HasNoSdstCMPX
Definition: GCNSubtarget.h:155
llvm::AMDGPUSubtarget::VOLCANIC_ISLANDS
@ VOLCANIC_ISLANDS
Definition: AMDGPUSubtarget.h:39
llvm::GCNSubtarget::hasUnpackedD16VMem
bool hasUnpackedD16VMem() const
Definition: GCNSubtarget.h:631
llvm::GCNSubtarget::getTargetID
const AMDGPU::IsaInfo::AMDGPUTargetID & getTargetID() const
Definition: GCNSubtarget.h:250
llvm::GCNSubtarget::EnableSRAMECC
bool EnableSRAMECC
Definition: GCNSubtarget.h:153
llvm::GCNSubtarget::getMinWavesPerEU
unsigned getMinWavesPerEU() const override
Definition: GCNSubtarget.h:1107
llvm::GCNSubtarget::HasDot5Insts
bool HasDot5Insts
Definition: GCNSubtarget.h:143
llvm::GCNSubtarget::HasAtomicFaddInsts
bool HasAtomicFaddInsts
Definition: GCNSubtarget.h:148
llvm::GCNSubtarget::useAA
bool useAA() const override
Definition: AMDGPUSubtarget.cpp:656
llvm::GCNSubtarget::hasDPP
bool hasDPP() const
Definition: GCNSubtarget.h:817
llvm::GCNSubtarget::HasInstFwdPrefetchBug
bool HasInstFwdPrefetchBug
Definition: GCNSubtarget.h:185
llvm::SITargetLowering
Definition: SIISelLowering.h:30
llvm::GCNSubtarget::HasSMEMtoVectorWriteHazard
bool HasSMEMtoVectorWriteHazard
Definition: GCNSubtarget.h:184
llvm::GCNSubtarget::hasG16
bool hasG16() const
Definition: GCNSubtarget.h:855
llvm::GCNSubtarget::hasHardClauses
bool hasHardClauses() const
Definition: GCNSubtarget.h:943
llvm::GCNSubtarget::hasBCNT
bool hasBCNT(unsigned Size) const
Definition: GCNSubtarget.h:342
llvm::GCNSubtarget::haveRoundOpsF64
bool haveRoundOpsF64() const
Have v_trunc_f64, v_ceil_f64, v_rndne_f64.
Definition: GCNSubtarget.h:476
llvm::GCNSubtarget::ldsRequiresM0Init
bool ldsRequiresM0Init() const
Return if most LDS instructions have an m0 use that require m0 to be iniitalized.
Definition: GCNSubtarget.h:605
llvm::SIInstrInfo::getRegisterInfo
const SIRegisterInfo & getRegisterInfo() const
Definition: SIInstrInfo.h:171
llvm::GCNSubtarget::getMaxNumUserSGPRs
unsigned getMaxNumUserSGPRs() const
Definition: GCNSubtarget.h:780
llvm::AMDGPU::IsaInfo::getAddressableNumVGPRs
unsigned getAddressableNumVGPRs(const MCSubtargetInfo *STI)
Definition: AMDGPUBaseInfo.cpp:694
llvm::GCNSubtarget::HasDPP8
bool HasDPP8
Definition: GCNSubtarget.h:129
llvm::GCNSubtarget::EnableDS128
bool EnableDS128
Definition: GCNSubtarget.h:99
llvm::GCNSubtarget::HasMFMAInlineLiteralBug
bool HasMFMAInlineLiteralBug
Definition: GCNSubtarget.h:174
llvm::countLeadingZeros
unsigned countLeadingZeros(T Val, ZeroBehavior ZB=ZB_Width)
Count number of 0's from the most significant bit to the least stopping at the first 1.
Definition: MathExtras.h:226
llvm::GCNSubtarget::hasOnlyRevVALUShifts
bool hasOnlyRevVALUShifts() const
Definition: GCNSubtarget.h:322
llvm::GCNSubtarget::UnalignedScratchAccess
bool UnalignedScratchAccess
Definition: GCNSubtarget.h:82
llvm::GCNSubtarget::TrapID::LLVMAMDHSATrap
@ LLVMAMDHSATrap
llvm::GCNSubtarget::DumpCode
bool DumpCode
Definition: GCNSubtarget.h:101
llvm::GCNSubtarget::getGeneration
Generation getGeneration() const
Definition: GCNSubtarget.h:265
llvm::GCNSubtarget::TrapHandlerAbi::AMDHSA
@ AMDHSA
llvm::GCNSubtarget::getAddressableNumVGPRs
unsigned getAddressableNumVGPRs() const
Definition: GCNSubtarget.h:1040
llvm::SIInstrInfo
Definition: SIInstrInfo.h:38
llvm::GCNSubtarget::hasSwap
bool hasSwap() const
Definition: GCNSubtarget.h:374
llvm::GCNSubtarget::isXNACKEnabled
bool isXNACKEnabled() const
Definition: GCNSubtarget.h:528
llvm::GCNSubtarget::FlatScratchInsts
bool FlatScratchInsts
Definition: GCNSubtarget.h:166
AMDGPUCallLowering.h
llvm::GCNSubtarget::HasMovrel
bool HasMovrel
Definition: GCNSubtarget.h:119
llvm::GCNSubtarget::HasDot7Insts
bool HasDot7Insts
Definition: GCNSubtarget.h:145
SelectionDAGTargetInfo.h
llvm::GCNSubtarget::isCuModeEnabled
bool isCuModeEnabled() const
Definition: GCNSubtarget.h:536
llvm::GCNSubtarget::getConstantBusLimit
unsigned getConstantBusLimit(unsigned Opcode) const
Definition: AMDGPUSubtarget.cpp:328
llvm::GCNSubtarget::hasMergedShaders
bool hasMergedShaders() const
Definition: GCNSubtarget.h:975
llvm::GCNSubtarget::FP64
bool FP64
Definition: GCNSubtarget.h:104
llvm::GCNSubtarget::R600ALUInst
bool R600ALUInst
Definition: GCNSubtarget.h:170
llvm::GCNSubtarget::hasFFBL
bool hasFFBL() const
Definition: GCNSubtarget.h:346
llvm::GCNSubtarget::getSelectionDAGInfo
const SelectionDAGTargetInfo * getSelectionDAGInfo() const override
Definition: GCNSubtarget.h:255
llvm::GCNSubtarget::hasApertureRegs
bool hasApertureRegs() const
Definition: GCNSubtarget.h:520
llvm::GCNSubtarget::~GCNSubtarget
~GCNSubtarget() override
llvm::GCNSubtarget::getSGPREncodingGranule
unsigned getSGPREncodingGranule() const
Definition: GCNSubtarget.h:985
llvm::AMDGPU::IsaInfo::getWavesPerEUForWorkGroup
unsigned getWavesPerEUForWorkGroup(const MCSubtargetInfo *STI, unsigned FlatWorkGroupSize)
Definition: AMDGPUBaseInfo.cpp:537
llvm::GCNSubtarget::d16PreservesUnusedBits
bool d16PreservesUnusedBits() const
Definition: GCNSubtarget.h:595
llvm::GCNSubtarget::enableSIScheduler
bool enableSIScheduler() const
Definition: GCNSubtarget.h:877
llvm::AMDGPU::IsaInfo::getMinNumVGPRs
unsigned getMinNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU)
Definition: AMDGPUBaseInfo.cpp:700
llvm::GCNSubtarget::getInstrItineraryData
const InstrItineraryData * getInstrItineraryData() const override
Definition: GCNSubtarget.h:259
llvm::GCNSubtarget::CIInsts
bool CIInsts
Definition: GCNSubtarget.h:108
llvm::GCNSubtarget::HasVscnt
bool HasVscnt
Definition: GCNSubtarget.h:156
llvm::GCNSubtarget::FastFMAF32
bool FastFMAF32
Definition: GCNSubtarget.h:74
llvm::GCNSubtarget::vmemWriteNeedsExpWaitcnt
bool vmemWriteNeedsExpWaitcnt() const
Definition: GCNSubtarget.h:740
llvm::GCNSubtarget::hasReadM0SendMsgHazard
bool hasReadM0SendMsgHazard() const
Definition: GCNSubtarget.h:906
llvm::GCNSubtarget::Has64BitDPP
bool Has64BitDPP
Definition: GCNSubtarget.h:130
llvm::GCNSubtarget::hasMIMG_R128
bool hasMIMG_R128() const
Definition: GCNSubtarget.h:292
llvm::LegalizerInfo
Definition: LegalizerInfo.h:1041
llvm::AMDGPU::HSAMD::Kernel::CodeProps::Key::NumVGPRs
constexpr char NumVGPRs[]
Key for Kernel::CodeProps::Metadata::mNumVGPRs.
Definition: AMDGPUMetadata.h:255
llvm::SUnit
Scheduling unit. This is a node in the scheduling DAG.
Definition: ScheduleDAG.h:242
llvm::GCNSubtarget::hasSMEMtoVectorWriteHazard
bool hasSMEMtoVectorWriteHazard() const
Definition: GCNSubtarget.h:919
llvm::GCNSubtarget::HasApertureRegs
bool HasApertureRegs
Definition: GCNSubtarget.h:84
llvm::GCNSubtarget::hasMin3Max3_16
bool hasMin3Max3_16() const
Definition: GCNSubtarget.h:358
llvm::GCNSubtarget::HasSMemTimeInst
bool HasSMemTimeInst
Definition: GCNSubtarget.h:158
llvm::AMDGPUSubtarget::Generation
Generation
Definition: AMDGPUSubtarget.h:31
llvm::MachineSchedPolicy
Define a generic scheduling policy for targets that don't provide their own MachineSchedStrategy.
Definition: MachineScheduler.h:173
llvm::GCNSubtarget::TargetTriple
Triple TargetTriple
Definition: GCNSubtarget.h:66
llvm::GCNSubtarget::HasSDWAMac
bool HasSDWAMac
Definition: GCNSubtarget.h:126
llvm::GCNSubtarget::hasHWFP64
bool hasHWFP64() const
Definition: GCNSubtarget.h:296
llvm::GCNSubtarget::getTotalNumSGPRs
unsigned getTotalNumSGPRs() const
Definition: GCNSubtarget.h:990
llvm::GCNSubtarget::has12DWordStoreHazard
bool has12DWordStoreHazard() const
Definition: GCNSubtarget.h:893
llvm::AMDGPUSubtarget::getWavefrontSizeLog2
unsigned getWavefrontSizeLog2() const
Definition: AMDGPUSubtarget.h:184
llvm::GCNSubtarget::hasVcmpxPermlaneHazard
bool hasVcmpxPermlaneHazard() const
Definition: GCNSubtarget.h:911
llvm::GCNSubtarget::hasSDWAOutModsVOPC
bool hasSDWAOutModsVOPC() const
Definition: GCNSubtarget.h:660
llvm::CallLowering
Definition: CallLowering.h:43
llvm::GCNSubtarget::computeOccupancy
unsigned computeOccupancy(const Function &F, unsigned LDSSize=0, unsigned NumSGPRs=0, unsigned NumVGPRs=0) const
Return occupancy for the given function.
Definition: AMDGPUSubtarget.cpp:710
llvm::GCNSubtarget::hasUsableDSOffset
bool hasUsableDSOffset() const
True if the offset field of DS instructions works as expected.
Definition: GCNSubtarget.h:397
llvm::GCNSubtarget::hasMad64_32
bool hasMad64_32() const
Definition: GCNSubtarget.h:640
llvm::AMDGPU::IsaInfo::getVGPRAllocGranule
unsigned getVGPRAllocGranule(const MCSubtargetInfo *STI, Optional< bool > EnableWavefrontSize32)
Definition: AMDGPUBaseInfo.cpp:659
llvm::InstrItineraryData
Itinerary data supplied by a subtarget to be used by a target.
Definition: MCInstrItineraries.h:109
llvm::GCNSubtarget::FlatInstOffsets
bool FlatInstOffsets
Definition: GCNSubtarget.h:164
llvm::Use
A Use represents the edge between a Value definition and its users.
Definition: Use.h:44
llvm::GCNSubtarget::hasMAIInsts
bool hasMAIInsts() const
Definition: GCNSubtarget.h:696
llvm::GCNSubtarget::HasG16
bool HasG16
Definition: GCNSubtarget.h:135
llvm::GCNSubtarget::hasUsableDivScaleConditionOutput
bool hasUsableDivScaleConditionOutput() const
Condition output from div_scale is usable.
Definition: GCNSubtarget.h:406
llvm::GCNSubtarget::hasAtomicCSub
bool hasAtomicCSub() const
Definition: GCNSubtarget.h:575
llvm::GCNSubtarget::HasGFX10A16
bool HasGFX10A16
Definition: GCNSubtarget.h:134
llvm::GCNSubtarget::HasR128A16
bool HasR128A16
Definition: GCNSubtarget.h:133
llvm::GCNSubtarget::getCallLowering
const CallLowering * getCallLowering() const override
Definition: GCNSubtarget.h:230