LLVM  13.0.0git
GCNSubtarget.h
Go to the documentation of this file.
1 //=====-- GCNSubtarget.h - Define GCN Subtarget for AMDGPU ------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //==-----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// AMD GCN specific subclass of TargetSubtarget.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #ifndef LLVM_LIB_TARGET_AMDGPU_GCNSUBTARGET_H
15 #define LLVM_LIB_TARGET_AMDGPU_GCNSUBTARGET_H
16 
17 #include "AMDGPUCallLowering.h"
18 #include "AMDGPUSubtarget.h"
19 #include "SIFrameLowering.h"
20 #include "SIISelLowering.h"
21 #include "SIInstrInfo.h"
23 
24 namespace llvm {
25 
26 class MCInst;
27 class MCInstrInfo;
28 
29 } // namespace llvm
30 
31 #define GET_SUBTARGETINFO_HEADER
32 #include "AMDGPUGenSubtargetInfo.inc"
33 
34 namespace llvm {
35 
36 class GCNTargetMachine;
37 
38 class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
39  public AMDGPUSubtarget {
40 
42 
43 public:
44  // Following 2 enums are documented at:
45  // - https://llvm.org/docs/AMDGPUUsage.html#trap-handler-abi
46  enum class TrapHandlerAbi {
47  NONE = 0x00,
48  AMDHSA = 0x01,
49  };
50 
51  enum class TrapID {
52  LLVMAMDHSATrap = 0x02,
53  LLVMAMDHSADebugTrap = 0x03,
54  };
55 
56 private:
57  /// GlobalISel related APIs.
58  std::unique_ptr<AMDGPUCallLowering> CallLoweringInfo;
59  std::unique_ptr<InlineAsmLowering> InlineAsmLoweringInfo;
60  std::unique_ptr<InstructionSelector> InstSelector;
61  std::unique_ptr<LegalizerInfo> Legalizer;
62  std::unique_ptr<RegisterBankInfo> RegBankInfo;
63 
64 protected:
65  // Basic subtarget description.
68  unsigned Gen;
72 
73  // Possibly statically set by tablegen, but may want to be overridden.
74  bool FastFMAF32;
78 
79  // Dynamically set bits that enable features.
86 
87  // This should not be used directly. 'TargetID' tracks the dynamic settings
88  // for XNACK.
90 
94 
95  // Used as options.
101  bool DumpCode;
102 
103  // Subtarget statically properties set by tablegen
104  bool FP64;
105  bool FMA;
106  bool MIMG_R128;
107  bool IsGCN;
108  bool CIInsts;
109  bool GFX8Insts;
110  bool GFX9Insts;
121  bool HasMovrel;
130  bool HasDPP;
131  bool HasDPP8;
137  bool HasG16;
139  unsigned NSAMaxSize;
154 
155  // This should not be used directly. 'TargetID' tracks the dynamic settings
156  // for SRAMECC.
158 
160  bool HasVscnt;
176  bool CaymanISA;
177  bool CFALUBug;
186 
199 
200  // Dummy feature to use for assembler in tablegen.
202 
204 private:
205  SIInstrInfo InstrInfo;
206  SITargetLowering TLInfo;
207  SIFrameLowering FrameLowering;
208 
209 public:
210  // See COMPUTE_TMPRING_SIZE.WAVESIZE, 13-bit field in units of 256-dword.
211  static const unsigned MaxWaveScratchSize = (256 * 4) * ((1 << 13) - 1);
212 
213  GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
214  const GCNTargetMachine &TM);
215  ~GCNSubtarget() override;
216 
218  StringRef GPU, StringRef FS);
219 
220  const SIInstrInfo *getInstrInfo() const override {
221  return &InstrInfo;
222  }
223 
224  const SIFrameLowering *getFrameLowering() const override {
225  return &FrameLowering;
226  }
227 
228  const SITargetLowering *getTargetLowering() const override {
229  return &TLInfo;
230  }
231 
232  const SIRegisterInfo *getRegisterInfo() const override {
233  return &InstrInfo.getRegisterInfo();
234  }
235 
236  const CallLowering *getCallLowering() const override {
237  return CallLoweringInfo.get();
238  }
239 
240  const InlineAsmLowering *getInlineAsmLowering() const override {
241  return InlineAsmLoweringInfo.get();
242  }
243 
245  return InstSelector.get();
246  }
247 
248  const LegalizerInfo *getLegalizerInfo() const override {
249  return Legalizer.get();
250  }
251 
252  const RegisterBankInfo *getRegBankInfo() const override {
253  return RegBankInfo.get();
254  }
255 
257  return TargetID;
258  }
259 
260  // Nothing implemented, just prevent crashes on use.
261  const SelectionDAGTargetInfo *getSelectionDAGInfo() const override {
262  return &TSInfo;
263  }
264 
265  const InstrItineraryData *getInstrItineraryData() const override {
266  return &InstrItins;
267  }
268 
270 
272  return (Generation)Gen;
273  }
274 
275  /// Return the number of high bits known to be zero fror a frame index.
278  }
279 
280  int getLDSBankCount() const {
281  return LDSBankCount;
282  }
283 
284  unsigned getMaxPrivateElementSize(bool ForBufferRSrc = false) const {
285  return (ForBufferRSrc || !enableFlatScratch()) ? MaxPrivateElementSize : 16;
286  }
287 
288  unsigned getConstantBusLimit(unsigned Opcode) const;
289 
290  /// Returns if the result of this instruction with a 16-bit result returned in
291  /// a 32-bit register implicitly zeroes the high 16-bits, rather than preserve
292  /// the original value.
293  bool zeroesHigh16BitsOfDest(unsigned Opcode) const;
294 
295  bool hasIntClamp() const {
296  return HasIntClamp;
297  }
298 
299  bool hasFP64() const {
300  return FP64;
301  }
302 
303  bool hasMIMG_R128() const {
304  return MIMG_R128;
305  }
306 
307  bool hasHWFP64() const {
308  return FP64;
309  }
310 
311  bool hasFastFMAF32() const {
312  return FastFMAF32;
313  }
314 
315  bool hasHalfRate64Ops() const {
316  return HalfRate64Ops;
317  }
318 
319  bool hasFullRate64Ops() const {
320  return FullRate64Ops;
321  }
322 
323  bool hasAddr64() const {
325  }
326 
327  bool hasFlat() const {
329  }
330 
331  // Return true if the target only has the reverse operand versions of VALU
332  // shift instructions (e.g. v_lshrrev_b32, and no v_lshr_b32).
333  bool hasOnlyRevVALUShifts() const {
334  return getGeneration() >= VOLCANIC_ISLANDS;
335  }
336 
337  bool hasFractBug() const {
338  return getGeneration() == SOUTHERN_ISLANDS;
339  }
340 
341  bool hasBFE() const {
342  return true;
343  }
344 
345  bool hasBFI() const {
346  return true;
347  }
348 
349  bool hasBFM() const {
350  return hasBFE();
351  }
352 
353  bool hasBCNT(unsigned Size) const {
354  return true;
355  }
356 
357  bool hasFFBL() const {
358  return true;
359  }
360 
361  bool hasFFBH() const {
362  return true;
363  }
364 
365  bool hasMed3_16() const {
367  }
368 
369  bool hasMin3Max3_16() const {
371  }
372 
373  bool hasFmaMixInsts() const {
374  return HasFmaMixInsts;
375  }
376 
377  bool hasCARRY() const {
378  return true;
379  }
380 
381  bool hasFMA() const {
382  return FMA;
383  }
384 
385  bool hasSwap() const {
386  return GFX9Insts;
387  }
388 
389  bool hasScalarPackInsts() const {
390  return GFX9Insts;
391  }
392 
393  bool hasScalarMulHiInsts() const {
394  return GFX9Insts;
395  }
396 
399  }
400 
401  bool supportsGetDoorbellID() const {
402  // The S_GETREG DOORBELL_ID is supported by all GFX9 onward targets.
403  return getGeneration() >= GFX9;
404  }
405 
406  /// True if the offset field of DS instructions works as expected. On SI, the
407  /// offset uses a 16-bit adder and does not always wrap properly.
408  bool hasUsableDSOffset() const {
409  return getGeneration() >= SEA_ISLANDS;
410  }
411 
414  }
415 
416  /// Condition output from div_scale is usable.
418  return getGeneration() != SOUTHERN_ISLANDS;
419  }
420 
421  /// Extra wait hazard is needed in some cases before
422  /// s_cbranch_vccnz/s_cbranch_vccz.
423  bool hasReadVCCZBug() const {
424  return getGeneration() <= SEA_ISLANDS;
425  }
426 
427  /// Writes to VCC_LO/VCC_HI update the VCCZ flag.
429  return getGeneration() >= GFX10;
430  }
431 
432  /// A read of an SGPR by SMRD instruction requires 4 wait states when the SGPR
433  /// was written by a VALU instruction.
435  return getGeneration() == SOUTHERN_ISLANDS;
436  }
437 
438  /// A read of an SGPR by a VMEM instruction requires 5 wait states when the
439  /// SGPR was written by a VALU Instruction.
441  return getGeneration() >= VOLCANIC_ISLANDS;
442  }
443 
444  bool hasRFEHazards() const {
445  return getGeneration() >= VOLCANIC_ISLANDS;
446  }
447 
448  /// Number of hazard wait states for s_setreg_b32/s_setreg_imm32_b32.
449  unsigned getSetRegWaitStates() const {
450  return getGeneration() <= SEA_ISLANDS ? 1 : 2;
451  }
452 
453  bool dumpCode() const {
454  return DumpCode;
455  }
456 
457  /// Return the amount of LDS that can be used that will not restrict the
458  /// occupancy lower than WaveCount.
459  unsigned getMaxLocalMemSizeWithWaveCount(unsigned WaveCount,
460  const Function &) const;
461 
464  }
465 
466  /// \returns If target supports S_DENORM_MODE.
467  bool hasDenormModeInst() const {
469  }
470 
471  bool useFlatForGlobal() const {
472  return FlatForGlobal;
473  }
474 
475  /// \returns If target supports ds_read/write_b128 and user enables generation
476  /// of ds_read/write_b128.
477  bool useDS128() const {
478  return CIInsts && EnableDS128;
479  }
480 
481  /// \return If target supports ds_read/write_b96/128.
482  bool hasDS96AndDS128() const {
483  return CIInsts;
484  }
485 
486  /// Have v_trunc_f64, v_ceil_f64, v_rndne_f64
487  bool haveRoundOpsF64() const {
488  return CIInsts;
489  }
490 
491  /// \returns If MUBUF instructions always perform range checking, even for
492  /// buffer resources used for private memory access.
495  }
496 
497  /// \returns If target requires PRT Struct NULL support (zero result registers
498  /// for sparse texture support).
499  bool usePRTStrictNull() const {
500  return EnablePRTStrictNull;
501  }
502 
505  }
506 
508  return UnalignedBufferAccess;
509  }
510 
513  }
514 
515  bool hasUnalignedDSAccess() const {
516  return UnalignedDSAccess;
517  }
518 
521  }
522 
524  return UnalignedScratchAccess;
525  }
526 
527  bool hasUnalignedAccessMode() const {
528  return UnalignedAccessMode;
529  }
530 
531  bool hasApertureRegs() const {
532  return HasApertureRegs;
533  }
534 
535  bool isTrapHandlerEnabled() const {
536  return TrapHandler;
537  }
538 
539  bool isXNACKEnabled() const {
540  return TargetID.isXnackOnOrAny();
541  }
542 
543  bool isTgSplitEnabled() const {
544  return EnableTgSplit;
545  }
546 
547  bool isCuModeEnabled() const {
548  return EnableCuMode;
549  }
550 
551  bool hasFlatAddressSpace() const {
552  return FlatAddressSpace;
553  }
554 
555  bool hasFlatScrRegister() const {
556  return hasFlatAddressSpace();
557  }
558 
559  bool hasFlatInstOffsets() const {
560  return FlatInstOffsets;
561  }
562 
563  bool hasFlatGlobalInsts() const {
564  return FlatGlobalInsts;
565  }
566 
567  bool hasFlatScratchInsts() const {
568  return FlatScratchInsts;
569  }
570 
571  // Check if target supports ST addressing mode with FLAT scratch instructions.
572  // The ST addressing mode means no registers are used, either VGPR or SGPR,
573  // but only immediate offset is swizzled and added to the FLAT scratch base.
574  bool hasFlatScratchSTMode() const {
575  return hasFlatScratchInsts() && hasGFX10_3Insts();
576  }
577 
579  return ScalarFlatScratchInsts;
580  }
581 
582  bool hasGlobalAddTidInsts() const {
583  return GFX10_BEncoding;
584  }
585 
586  bool hasAtomicCSub() const {
587  return GFX10_BEncoding;
588  }
589 
591  return getGeneration() >= GFX9;
592  }
593 
594  bool hasFlatSegmentOffsetBug() const {
596  }
597 
599  return getGeneration() > GFX9;
600  }
601 
602  bool hasD16LoadStore() const {
603  return getGeneration() >= GFX9;
604  }
605 
606  bool d16PreservesUnusedBits() const {
608  }
609 
610  bool hasD16Images() const {
611  return getGeneration() >= VOLCANIC_ISLANDS;
612  }
613 
614  /// Return if most LDS instructions have an m0 use that require m0 to be
615  /// iniitalized.
616  bool ldsRequiresM0Init() const {
617  return getGeneration() < GFX9;
618  }
619 
620  // True if the hardware rewinds and replays GWS operations if a wave is
621  // preempted.
622  //
623  // If this is false, a GWS operation requires testing if a nack set the
624  // MEM_VIOL bit, and repeating if so.
625  bool hasGWSAutoReplay() const {
626  return getGeneration() >= GFX9;
627  }
628 
629  /// \returns if target has ds_gws_sema_release_all instruction.
630  bool hasGWSSemaReleaseAll() const {
631  return CIInsts;
632  }
633 
634  /// \returns true if the target has integer add/sub instructions that do not
635  /// produce a carry-out. This includes v_add_[iu]32, v_sub_[iu]32,
636  /// v_add_[iu]16, and v_sub_[iu]16, all of which support the clamp modifier
637  /// for saturation.
638  bool hasAddNoCarry() const {
639  return AddNoCarryInsts;
640  }
641 
642  bool hasUnpackedD16VMem() const {
643  return HasUnpackedD16VMem;
644  }
645 
646  // Covers VS/PS/CS graphics shaders
647  bool isMesaGfxShader(const Function &F) const {
648  return isMesa3DOS() && AMDGPU::isShader(F.getCallingConv());
649  }
650 
651  bool hasMad64_32() const {
652  return getGeneration() >= SEA_ISLANDS;
653  }
654 
655  bool hasSDWAOmod() const {
656  return HasSDWAOmod;
657  }
658 
659  bool hasSDWAScalar() const {
660  return HasSDWAScalar;
661  }
662 
663  bool hasSDWASdst() const {
664  return HasSDWASdst;
665  }
666 
667  bool hasSDWAMac() const {
668  return HasSDWAMac;
669  }
670 
671  bool hasSDWAOutModsVOPC() const {
672  return HasSDWAOutModsVOPC;
673  }
674 
675  bool hasDLInsts() const {
676  return HasDLInsts;
677  }
678 
679  bool hasDot1Insts() const {
680  return HasDot1Insts;
681  }
682 
683  bool hasDot2Insts() const {
684  return HasDot2Insts;
685  }
686 
687  bool hasDot3Insts() const {
688  return HasDot3Insts;
689  }
690 
691  bool hasDot4Insts() const {
692  return HasDot4Insts;
693  }
694 
695  bool hasDot5Insts() const {
696  return HasDot5Insts;
697  }
698 
699  bool hasDot6Insts() const {
700  return HasDot6Insts;
701  }
702 
703  bool hasDot7Insts() const {
704  return HasDot7Insts;
705  }
706 
707  bool hasMAIInsts() const {
708  return HasMAIInsts;
709  }
710 
711  bool hasPkFmacF16Inst() const {
712  return HasPkFmacF16Inst;
713  }
714 
715  bool hasAtomicFaddInsts() const {
716  return HasAtomicFaddInsts;
717  }
718 
719  bool hasNoSdstCMPX() const {
720  return HasNoSdstCMPX;
721  }
722 
723  bool hasVscnt() const {
724  return HasVscnt;
725  }
726 
727  bool hasGetWaveIdInst() const {
728  return HasGetWaveIdInst;
729  }
730 
731  bool hasSMemTimeInst() const {
732  return HasSMemTimeInst;
733  }
734 
735  bool hasShaderCyclesRegister() const {
737  }
738 
739  bool hasRegisterBanking() const {
740  return HasRegisterBanking;
741  }
742 
743  bool hasVOP3Literal() const {
744  return HasVOP3Literal;
745  }
746 
747  bool hasNoDataDepHazard() const {
748  return HasNoDataDepHazard;
749  }
750 
752  return getGeneration() < SEA_ISLANDS;
753  }
754 
755  // Scratch is allocated in 256 dword per wave blocks for the entire
756  // wavefront. When viewed from the perspecive of an arbitrary workitem, this
757  // is 4-byte aligned.
758  //
759  // Only 4-byte alignment is really needed to access anything. Transformations
760  // on the pointer value itself may rely on the alignment / known low bits of
761  // the pointer. Set this to something above the minimum to avoid needing
762  // dynamic realignment in common cases.
763  Align getStackAlignment() const { return Align(16); }
764 
765  bool enableMachineScheduler() const override {
766  return true;
767  }
768 
769  bool useAA() const override;
770 
771  bool enableSubRegLiveness() const override {
772  return true;
773  }
774 
777 
778  // static wrappers
779  static bool hasHalfRate64Ops(const TargetSubtargetInfo &STI);
780 
781  // XXX - Why is this here if it isn't in the default pass set?
782  bool enableEarlyIfConversion() const override {
783  return true;
784  }
785 
786  bool enableFlatScratch() const;
787 
789  unsigned NumRegionInstrs) const override;
790 
791  unsigned getMaxNumUserSGPRs() const {
792  return 16;
793  }
794 
795  bool hasSMemRealTime() const {
796  return HasSMemRealTime;
797  }
798 
799  bool hasMovrel() const {
800  return HasMovrel;
801  }
802 
803  bool hasVGPRIndexMode() const {
804  return HasVGPRIndexMode;
805  }
806 
807  bool useVGPRIndexMode() const;
808 
809  bool hasScalarCompareEq64() const {
810  return getGeneration() >= VOLCANIC_ISLANDS;
811  }
812 
813  bool hasScalarStores() const {
814  return HasScalarStores;
815  }
816 
817  bool hasScalarAtomics() const {
818  return HasScalarAtomics;
819  }
820 
821  bool hasLDSFPAtomics() const {
822  return GFX8Insts;
823  }
824 
825  /// \returns true if the subtarget has the v_permlanex16_b32 instruction.
826  bool hasPermLaneX16() const { return getGeneration() >= GFX10; }
827 
828  bool hasDPP() const {
829  return HasDPP;
830  }
831 
832  bool hasDPPBroadcasts() const {
833  return HasDPP && getGeneration() < GFX10;
834  }
835 
836  bool hasDPPWavefrontShifts() const {
837  return HasDPP && getGeneration() < GFX10;
838  }
839 
840  bool hasDPP8() const {
841  return HasDPP8;
842  }
843 
844  bool has64BitDPP() const {
845  return Has64BitDPP;
846  }
847 
848  bool hasPackedFP32Ops() const {
849  return HasPackedFP32Ops;
850  }
851 
852  bool hasFmaakFmamkF32Insts() const {
853  return getGeneration() >= GFX10;
854  }
855 
856  bool hasExtendedImageInsts() const {
857  return HasExtendedImageInsts;
858  }
859 
860  bool hasR128A16() const {
861  return HasR128A16;
862  }
863 
864  bool hasGFX10A16() const {
865  return HasGFX10A16;
866  }
867 
868  bool hasA16() const { return hasR128A16() || hasGFX10A16(); }
869 
870  bool hasG16() const { return HasG16; }
871 
872  bool hasOffset3fBug() const {
873  return HasOffset3fBug;
874  }
875 
876  bool hasImageStoreD16Bug() const { return HasImageStoreD16Bug; }
877 
879 
880  bool hasNSAEncoding() const { return HasNSAEncoding; }
881 
882  unsigned getNSAMaxSize() const { return NSAMaxSize; }
883 
884  bool hasGFX10_AEncoding() const {
885  return GFX10_AEncoding;
886  }
887 
888  bool hasGFX10_BEncoding() const {
889  return GFX10_BEncoding;
890  }
891 
892  bool hasGFX10_3Insts() const {
893  return GFX10_3Insts;
894  }
895 
896  bool hasMadF16() const;
897 
898  bool enableSIScheduler() const {
899  return EnableSIScheduler;
900  }
901 
902  bool loadStoreOptEnabled() const {
903  return EnableLoadStoreOpt;
904  }
905 
906  bool hasSGPRInitBug() const {
907  return SGPRInitBug;
908  }
909 
911 
914  }
915 
916  bool hasMFMAInlineLiteralBug() const {
918  }
919 
920  bool has12DWordStoreHazard() const {
922  }
923 
924  // \returns true if the subtarget supports DWORDX3 load/store instructions.
925  bool hasDwordx3LoadStores() const {
926  return CIInsts;
927  }
928 
931  }
932 
933  bool hasReadM0SendMsgHazard() const {
936  }
937 
938  bool hasVcmpxPermlaneHazard() const {
939  return HasVcmpxPermlaneHazard;
940  }
941 
944  }
945 
948  }
949 
950  bool hasLDSMisalignedBug() const {
951  return LDSMisalignedBug && !EnableCuMode;
952  }
953 
954  bool hasInstFwdPrefetchBug() const {
955  return HasInstFwdPrefetchBug;
956  }
957 
958  bool hasVcmpxExecWARHazard() const {
959  return HasVcmpxExecWARHazard;
960  }
961 
964  }
965 
966  bool hasNSAtoVMEMBug() const {
967  return HasNSAtoVMEMBug;
968  }
969 
970  bool hasNSAClauseBug() const { return HasNSAClauseBug; }
971 
972  bool hasHardClauses() const { return getGeneration() >= GFX10; }
973 
974  bool hasGFX90AInsts() const { return GFX90AInsts; }
975 
976  /// Return if operations acting on VGPR tuples require even alignment.
977  bool needsAlignedVGPRs() const { return GFX90AInsts; }
978 
979  bool hasPackedTID() const { return HasPackedTID; }
980 
981  /// Return the maximum number of waves per SIMD for kernels using \p SGPRs
982  /// SGPRs
983  unsigned getOccupancyWithNumSGPRs(unsigned SGPRs) const;
984 
985  /// Return the maximum number of waves per SIMD for kernels using \p VGPRs
986  /// VGPRs
987  unsigned getOccupancyWithNumVGPRs(unsigned VGPRs) const;
988 
989  /// Return occupancy for the given function. Used LDS and a number of
990  /// registers if provided.
991  /// Note, occupancy can be affected by the scratch allocation as well, but
992  /// we do not have enough information to compute it.
993  unsigned computeOccupancy(const Function &F, unsigned LDSSize = 0,
994  unsigned NumSGPRs = 0, unsigned NumVGPRs = 0) const;
995 
996  /// \returns true if the flat_scratch register should be initialized with the
997  /// pointer to the wave's scratch memory rather than a size and offset.
998  bool flatScratchIsPointer() const {
1000  }
1001 
1002  /// \returns true if the flat_scratch register is initialized by the HW.
1003  /// In this case it is readonly.
1005 
1006  /// \returns true if the machine has merged shaders in which s0-s7 are
1007  /// reserved by the hardware and user SGPRs start at s8
1008  bool hasMergedShaders() const {
1009  return getGeneration() >= GFX9;
1010  }
1011 
1012  /// \returns SGPR allocation granularity supported by the subtarget.
1013  unsigned getSGPRAllocGranule() const {
1015  }
1016 
1017  /// \returns SGPR encoding granularity supported by the subtarget.
1018  unsigned getSGPREncodingGranule() const {
1020  }
1021 
1022  /// \returns Total number of SGPRs supported by the subtarget.
1023  unsigned getTotalNumSGPRs() const {
1024  return AMDGPU::IsaInfo::getTotalNumSGPRs(this);
1025  }
1026 
1027  /// \returns Addressable number of SGPRs supported by the subtarget.
1028  unsigned getAddressableNumSGPRs() const {
1030  }
1031 
1032  /// \returns Minimum number of SGPRs that meets the given number of waves per
1033  /// execution unit requirement supported by the subtarget.
1034  unsigned getMinNumSGPRs(unsigned WavesPerEU) const {
1035  return AMDGPU::IsaInfo::getMinNumSGPRs(this, WavesPerEU);
1036  }
1037 
1038  /// \returns Maximum number of SGPRs that meets the given number of waves per
1039  /// execution unit requirement supported by the subtarget.
1040  unsigned getMaxNumSGPRs(unsigned WavesPerEU, bool Addressable) const {
1041  return AMDGPU::IsaInfo::getMaxNumSGPRs(this, WavesPerEU, Addressable);
1042  }
1043 
1044  /// \returns Reserved number of SGPRs. This is common
1045  /// utility function called by MachineFunction and
1046  /// Function variants of getReservedNumSGPRs.
1047  unsigned getBaseReservedNumSGPRs(const bool HasFlatScratchInit) const;
1048  /// \returns Reserved number of SGPRs for given machine function \p MF.
1049  unsigned getReservedNumSGPRs(const MachineFunction &MF) const;
1050 
1051  /// \returns Reserved number of SGPRs for given function \p F.
1052  unsigned getReservedNumSGPRs(const Function &F) const;
1053 
1054  /// \returns max num SGPRs. This is the common utility
1055  /// function called by MachineFunction and Function
1056  /// variants of getMaxNumSGPRs.
1057  unsigned getBaseMaxNumSGPRs(const Function &F,
1058  std::pair<unsigned, unsigned> WavesPerEU,
1059  unsigned PreloadedSGPRs,
1060  unsigned ReservedNumSGPRs) const;
1061 
1062  /// \returns Maximum number of SGPRs that meets number of waves per execution
1063  /// unit requirement for function \p MF, or number of SGPRs explicitly
1064  /// requested using "amdgpu-num-sgpr" attribute attached to function \p MF.
1065  ///
1066  /// \returns Value that meets number of waves per execution unit requirement
1067  /// if explicitly requested value cannot be converted to integer, violates
1068  /// subtarget's specifications, or does not meet number of waves per execution
1069  /// unit requirement.
1070  unsigned getMaxNumSGPRs(const MachineFunction &MF) const;
1071 
1072  /// \returns Maximum number of SGPRs that meets number of waves per execution
1073  /// unit requirement for function \p F, or number of SGPRs explicitly
1074  /// requested using "amdgpu-num-sgpr" attribute attached to function \p F.
1075  ///
1076  /// \returns Value that meets number of waves per execution unit requirement
1077  /// if explicitly requested value cannot be converted to integer, violates
1078  /// subtarget's specifications, or does not meet number of waves per execution
1079  /// unit requirement.
1080  unsigned getMaxNumSGPRs(const Function &F) const;
1081 
1082  /// \returns VGPR allocation granularity supported by the subtarget.
1083  unsigned getVGPRAllocGranule() const {
1085  }
1086 
1087  /// \returns VGPR encoding granularity supported by the subtarget.
1088  unsigned getVGPREncodingGranule() const {
1090  }
1091 
1092  /// \returns Total number of VGPRs supported by the subtarget.
1093  unsigned getTotalNumVGPRs() const {
1094  return AMDGPU::IsaInfo::getTotalNumVGPRs(this);
1095  }
1096 
1097  /// \returns Addressable number of VGPRs supported by the subtarget.
1098  unsigned getAddressableNumVGPRs() const {
1100  }
1101 
1102  /// \returns Minimum number of VGPRs that meets given number of waves per
1103  /// execution unit requirement supported by the subtarget.
1104  unsigned getMinNumVGPRs(unsigned WavesPerEU) const {
1105  return AMDGPU::IsaInfo::getMinNumVGPRs(this, WavesPerEU);
1106  }
1107 
1108  /// \returns Maximum number of VGPRs that meets given number of waves per
1109  /// execution unit requirement supported by the subtarget.
1110  unsigned getMaxNumVGPRs(unsigned WavesPerEU) const {
1111  return AMDGPU::IsaInfo::getMaxNumVGPRs(this, WavesPerEU);
1112  }
1113 
1114  /// \returns max num VGPRs. This is the common utility function
1115  /// called by MachineFunction and Function variants of getMaxNumVGPRs.
1116  unsigned getBaseMaxNumVGPRs(const Function &F,
1117  std::pair<unsigned, unsigned> WavesPerEU) const;
1118  /// \returns Maximum number of VGPRs that meets number of waves per execution
1119  /// unit requirement for function \p F, or number of VGPRs explicitly
1120  /// requested using "amdgpu-num-vgpr" attribute attached to function \p F.
1121  ///
1122  /// \returns Value that meets number of waves per execution unit requirement
1123  /// if explicitly requested value cannot be converted to integer, violates
1124  /// subtarget's specifications, or does not meet number of waves per execution
1125  /// unit requirement.
1126  unsigned getMaxNumVGPRs(const Function &F) const;
1127 
1128  /// \returns Maximum number of VGPRs that meets number of waves per execution
1129  /// unit requirement for function \p MF, or number of VGPRs explicitly
1130  /// requested using "amdgpu-num-vgpr" attribute attached to function \p MF.
1131  ///
1132  /// \returns Value that meets number of waves per execution unit requirement
1133  /// if explicitly requested value cannot be converted to integer, violates
1134  /// subtarget's specifications, or does not meet number of waves per execution
1135  /// unit requirement.
1136  unsigned getMaxNumVGPRs(const MachineFunction &MF) const;
1137 
1138  void getPostRAMutations(
1139  std::vector<std::unique_ptr<ScheduleDAGMutation>> &Mutations)
1140  const override;
1141 
1142  bool isWave32() const {
1143  return getWavefrontSize() == 32;
1144  }
1145 
1146  bool isWave64() const {
1147  return getWavefrontSize() == 64;
1148  }
1149 
1151  return getRegisterInfo()->getBoolRC();
1152  }
1153 
1154  /// \returns Maximum number of work groups per compute unit supported by the
1155  /// subtarget and limited by given \p FlatWorkGroupSize.
1156  unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const override {
1157  return AMDGPU::IsaInfo::getMaxWorkGroupsPerCU(this, FlatWorkGroupSize);
1158  }
1159 
1160  /// \returns Minimum flat work group size supported by the subtarget.
1161  unsigned getMinFlatWorkGroupSize() const override {
1163  }
1164 
1165  /// \returns Maximum flat work group size supported by the subtarget.
1166  unsigned getMaxFlatWorkGroupSize() const override {
1168  }
1169 
1170  /// \returns Number of waves per execution unit required to support the given
1171  /// \p FlatWorkGroupSize.
1172  unsigned
1173  getWavesPerEUForWorkGroup(unsigned FlatWorkGroupSize) const override {
1174  return AMDGPU::IsaInfo::getWavesPerEUForWorkGroup(this, FlatWorkGroupSize);
1175  }
1176 
1177  /// \returns Minimum number of waves per execution unit supported by the
1178  /// subtarget.
1179  unsigned getMinWavesPerEU() const override {
1180  return AMDGPU::IsaInfo::getMinWavesPerEU(this);
1181  }
1182 
1183  void adjustSchedDependency(SUnit *Def, int DefOpIdx, SUnit *Use, int UseOpIdx,
1184  SDep &Dep) const override;
1185 };
1186 
1187 } // end namespace llvm
1188 
1189 #endif // LLVM_LIB_TARGET_AMDGPU_GCNSUBTARGET_H
llvm::Check::Size
@ Size
Definition: FileCheck.h:73
llvm::GCNSubtarget::hasScalarMulHiInsts
bool hasScalarMulHiInsts() const
Definition: GCNSubtarget.h:393
llvm::GCNSubtarget::HasDot3Insts
bool HasDot3Insts
Definition: GCNSubtarget.h:145
llvm::GCNSubtarget::Gen
unsigned Gen
Definition: GCNSubtarget.h:68
llvm::GCNSubtarget::hasGFX10A16
bool hasGFX10A16() const
Definition: GCNSubtarget.h:864
llvm::GCNSubtarget::hasBFM
bool hasBFM() const
Definition: GCNSubtarget.h:349
llvm::GCNSubtarget::hasDot2Insts
bool hasDot2Insts() const
Definition: GCNSubtarget.h:683
llvm
---------------------— PointerInfo ------------------------------------—
Definition: AllocatorList.h:23
llvm::AMDGPU::IsaInfo::getSGPRAllocGranule
unsigned getSGPRAllocGranule(const MCSubtargetInfo *STI)
Definition: AMDGPUBaseInfo.cpp:591
llvm::tgtok::Def
@ Def
Definition: TGLexer.h:50
llvm::GCNSubtarget::GFX8Insts
bool GFX8Insts
Definition: GCNSubtarget.h:109
llvm::GCNSubtarget::hasGWSAutoReplay
bool hasGWSAutoReplay() const
Definition: GCNSubtarget.h:625
llvm::GCNSubtarget::hasFlatLgkmVMemCountInOrder
bool hasFlatLgkmVMemCountInOrder() const
Definition: GCNSubtarget.h:598
llvm::GCNSubtarget::HasSDWAScalar
bool HasSDWAScalar
Definition: GCNSubtarget.h:126
llvm::GCNSubtarget::TrapHandlerAbi
TrapHandlerAbi
Definition: GCNSubtarget.h:46
llvm::GCNSubtarget::HasGetWaveIdInst
bool HasGetWaveIdInst
Definition: GCNSubtarget.h:161
llvm::GCNSubtarget::getRegBankInfo
const RegisterBankInfo * getRegBankInfo() const override
Definition: GCNSubtarget.h:252
llvm::GCNSubtarget::hasRegisterBanking
bool hasRegisterBanking() const
Definition: GCNSubtarget.h:739
llvm::GCNSubtarget::hasSDWAMac
bool hasSDWAMac() const
Definition: GCNSubtarget.h:667
llvm::AMDGPU::HSAMD::Kernel::CodeProps::Key::NumSGPRs
constexpr char NumSGPRs[]
Key for Kernel::CodeProps::Metadata::mNumSGPRs.
Definition: AMDGPUMetadata.h:253
llvm::GCNSubtarget::hasVGPRIndexMode
bool hasVGPRIndexMode() const
Definition: GCNSubtarget.h:803
llvm::InlineAsmLowering
Definition: InlineAsmLowering.h:28
llvm::GCNSubtarget::hasSDWASdst
bool hasSDWASdst() const
Definition: GCNSubtarget.h:663
llvm::GCNSubtarget::getFrameLowering
const SIFrameLowering * getFrameLowering() const override
Definition: GCNSubtarget.h:224
llvm::GCNSubtarget::initializeSubtargetDependencies
GCNSubtarget & initializeSubtargetDependencies(const Triple &TT, StringRef GPU, StringRef FS)
Definition: AMDGPUSubtarget.cpp:81
llvm::GCNSubtarget::hasD16Images
bool hasD16Images() const
Definition: GCNSubtarget.h:610
llvm::GCNSubtarget::EnablePRTStrictNull
bool EnablePRTStrictNull
Definition: GCNSubtarget.h:100
llvm::Function
Definition: Function.h:61
llvm::GCNSubtarget::HasDot2Insts
bool HasDot2Insts
Definition: GCNSubtarget.h:144
llvm::GCNSubtarget::HasPackedFP32Ops
bool HasPackedFP32Ops
Definition: GCNSubtarget.h:133
llvm::AMDGPU::IsaInfo::AMDGPUTargetID::isSramEccOnOrAny
bool isSramEccOnOrAny() const
Definition: AMDGPUBaseInfo.h:130
llvm::GCNSubtarget::FlatGlobalInsts
bool FlatGlobalInsts
Definition: GCNSubtarget.h:169
llvm::GCNSubtarget::FlatAddressSpace
bool FlatAddressSpace
Definition: GCNSubtarget.h:167
llvm::GCNSubtarget::HasDLInsts
bool HasDLInsts
Definition: GCNSubtarget.h:142
llvm::GCNSubtarget::hasNSAClauseBug
bool hasNSAClauseBug() const
Definition: GCNSubtarget.h:970
llvm::GCNSubtarget::getNSAMaxSize
unsigned getNSAMaxSize() const
Definition: GCNSubtarget.h:882
llvm::GCNSubtarget::hasMovrel
bool hasMovrel() const
Definition: GCNSubtarget.h:799
llvm::AMDGPUSubtarget::GFX9
@ GFX9
Definition: AMDGPUSubtarget.h:40
llvm::AMDGPUSubtarget::SOUTHERN_ISLANDS
@ SOUTHERN_ISLANDS
Definition: AMDGPUSubtarget.h:37
llvm::GCNSubtarget::hasVMEMReadSGPRVALUDefHazard
bool hasVMEMReadSGPRVALUDefHazard() const
A read of an SGPR by a VMEM instruction requires 5 wait states when the SGPR was written by a VALU In...
Definition: GCNSubtarget.h:440
llvm::GCNSubtarget::TrapHandlerAbi::NONE
@ NONE
llvm::GCNSubtarget::hasPermLaneX16
bool hasPermLaneX16() const
Definition: GCNSubtarget.h:826
llvm::GCNSubtarget::hasShaderCyclesRegister
bool hasShaderCyclesRegister() const
Definition: GCNSubtarget.h:735
llvm::GCNSubtarget::needsAlignedVGPRs
bool needsAlignedVGPRs() const
Return if operations acting on VGPR tuples require even alignment.
Definition: GCNSubtarget.h:977
llvm::GCNSubtarget::hasFlatScratchInsts
bool hasFlatScratchInsts() const
Definition: GCNSubtarget.h:567
llvm::GCNSubtarget::UnalignedDSAccess
bool UnalignedDSAccess
Definition: GCNSubtarget.h:183
llvm::GCNSubtarget::hasFP64
bool hasFP64() const
Definition: GCNSubtarget.h:299
llvm::GCNSubtarget::InstrItins
InstrItineraryData InstrItins
Definition: GCNSubtarget.h:69
llvm::GCNSubtarget::HasImageStoreD16Bug
bool HasImageStoreD16Bug
Definition: GCNSubtarget.h:197
llvm::GCNSubtarget::hasAutoWaitcntBeforeBarrier
bool hasAutoWaitcntBeforeBarrier() const
Definition: GCNSubtarget.h:503
llvm::GCNSubtarget::supportsMinMaxDenormModes
bool supportsMinMaxDenormModes() const
Definition: GCNSubtarget.h:462
llvm::GCNSubtarget::hasFlatSegmentOffsetBug
bool hasFlatSegmentOffsetBug() const
Definition: GCNSubtarget.h:594
llvm::GCNSubtarget::HasDot4Insts
bool HasDot4Insts
Definition: GCNSubtarget.h:146
llvm::AMDGPUSubtarget::SEA_ISLANDS
@ SEA_ISLANDS
Definition: AMDGPUSubtarget.h:38
llvm::GCNSubtarget::hasDS96AndDS128
bool hasDS96AndDS128() const
Definition: GCNSubtarget.h:482
llvm::GCNSubtarget::HasVcmpxPermlaneHazard
bool HasVcmpxPermlaneHazard
Definition: GCNSubtarget.h:187
llvm::GCNSubtarget::getSetRegWaitStates
unsigned getSetRegWaitStates() const
Number of hazard wait states for s_setreg_b32/s_setreg_imm32_b32.
Definition: GCNSubtarget.h:449
llvm::Triple
Triple - Helper class for working with autoconf configuration names.
Definition: Triple.h:45
llvm::GCNSubtarget::HasExtendedImageInsts
bool HasExtendedImageInsts
Definition: GCNSubtarget.h:134
llvm::GCNSubtarget::HasVertexCache
bool HasVertexCache
Definition: GCNSubtarget.h:180
llvm::GCNSubtarget::hasFlatGlobalInsts
bool hasFlatGlobalInsts() const
Definition: GCNSubtarget.h:563
llvm::GCNSubtarget::hasCARRY
bool hasCARRY() const
Definition: GCNSubtarget.h:377
llvm::GCNSubtarget::useDS128
bool useDS128() const
Definition: GCNSubtarget.h:477
llvm::GCNSubtarget::getMaxWorkGroupsPerCU
unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const override
Definition: GCNSubtarget.h:1156
llvm::GCNSubtarget::getMaxFlatWorkGroupSize
unsigned getMaxFlatWorkGroupSize() const override
Definition: GCNSubtarget.h:1166
llvm::GCNSubtarget::isTrapHandlerEnabled
bool isTrapHandlerEnabled() const
Definition: GCNSubtarget.h:535
llvm::GCNSubtarget::hasDot3Insts
bool hasDot3Insts() const
Definition: GCNSubtarget.h:687
llvm::AMDGPU::IsaInfo::getTotalNumVGPRs
unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI)
Definition: AMDGPUBaseInfo.cpp:720
llvm::GCNSubtarget::hasDLInsts
bool hasDLInsts() const
Definition: GCNSubtarget.h:675
llvm::GCNSubtarget::hasFractBug
bool hasFractBug() const
Definition: GCNSubtarget.h:337
llvm::GCNSubtarget::hasDwordx3LoadStores
bool hasDwordx3LoadStores() const
Definition: GCNSubtarget.h:925
llvm::GCNSubtarget::hasNSAEncoding
bool hasNSAEncoding() const
Definition: GCNSubtarget.h:880
llvm::GCNSubtarget::TrapID::LLVMAMDHSADebugTrap
@ LLVMAMDHSADebugTrap
llvm::AMDGPUSubtarget::getMaxWavesPerEU
unsigned getMaxWavesPerEU() const
Definition: AMDGPUSubtarget.h:228
llvm::GCNSubtarget::HasSDWAOutModsVOPC
bool HasSDWAOutModsVOPC
Definition: GCNSubtarget.h:129
llvm::AMDGPU::IsaInfo::getMinWavesPerEU
unsigned getMinWavesPerEU(const MCSubtargetInfo *STI)
Definition: AMDGPUBaseInfo.cpp:558
llvm::GCNSubtarget::getStackAlignment
Align getStackAlignment() const
Definition: GCNSubtarget.h:763
llvm::GCNSubtarget::hasUnalignedDSAccessEnabled
bool hasUnalignedDSAccessEnabled() const
Definition: GCNSubtarget.h:519
llvm::GCNSubtarget::EnableSIScheduler
bool EnableSIScheduler
Definition: GCNSubtarget.h:98
llvm::GCNSubtarget::partialVCCWritesUpdateVCCZ
bool partialVCCWritesUpdateVCCZ() const
Writes to VCC_LO/VCC_HI update the VCCZ flag.
Definition: GCNSubtarget.h:428
llvm::GCNSubtarget
Definition: GCNSubtarget.h:38
llvm::GCNSubtarget::getBaseReservedNumSGPRs
unsigned getBaseReservedNumSGPRs(const bool HasFlatScratchInit) const
Definition: AMDGPUSubtarget.cpp:804
llvm::GCNSubtarget::HasFlatSegmentOffsetBug
bool HasFlatSegmentOffsetBug
Definition: GCNSubtarget.h:196
llvm::GCNSubtarget::loadStoreOptEnabled
bool loadStoreOptEnabled() const
Definition: GCNSubtarget.h:902
llvm::GCNSubtarget::TargetID
AMDGPU::IsaInfo::AMDGPUTargetID TargetID
Definition: GCNSubtarget.h:67
llvm::GCNSubtarget::UnalignedBufferAccess
bool UnalignedBufferAccess
Definition: GCNSubtarget.h:182
llvm::GCNSubtarget::TexVTXClauseSize
short TexVTXClauseSize
Definition: GCNSubtarget.h:181
llvm::GCNSubtarget::hasGetWaveIdInst
bool hasGetWaveIdInst() const
Definition: GCNSubtarget.h:727
llvm::GCNSubtarget::HasNSAtoVMEMBug
bool HasNSAtoVMEMBug
Definition: GCNSubtarget.h:193
llvm::GCNSubtarget::HasScalarAtomics
bool HasScalarAtomics
Definition: GCNSubtarget.h:124
llvm::GCNSubtarget::ScalarFlatScratchInsts
bool ScalarFlatScratchInsts
Definition: GCNSubtarget.h:171
llvm::AMDGPU::IsaInfo::getMaxNumVGPRs
unsigned getMaxNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU)
Definition: AMDGPUBaseInfo.cpp:745
llvm::GCNSubtarget::hasPackedFP32Ops
bool hasPackedFP32Ops() const
Definition: GCNSubtarget.h:848
llvm::GCNSubtarget::HalfRate64Ops
bool HalfRate64Ops
Definition: GCNSubtarget.h:76
llvm::GCNSubtarget::NegativeScratchOffsetBug
bool NegativeScratchOffsetBug
Definition: GCNSubtarget.h:116
llvm::GCNSubtarget::HasNSAClauseBug
bool HasNSAClauseBug
Definition: GCNSubtarget.h:194
llvm::GCNSubtarget::hasScalarCompareEq64
bool hasScalarCompareEq64() const
Definition: GCNSubtarget.h:809
llvm::AMDGPU::IsaInfo::getMaxWorkGroupsPerCU
unsigned getMaxWorkGroupsPerCU(const MCSubtargetInfo *STI, unsigned FlatWorkGroupSize)
Definition: AMDGPUBaseInfo.cpp:546
llvm::GCNSubtarget::getRegisterInfo
const SIRegisterInfo * getRegisterInfo() const override
Definition: GCNSubtarget.h:232
llvm::GCNSubtarget::HasOffset3fBug
bool HasOffset3fBug
Definition: GCNSubtarget.h:195
F
#define F(x, y, z)
Definition: MD5.cpp:56
llvm::GCNSubtarget::UnalignedAccessMode
bool UnalignedAccessMode
Definition: GCNSubtarget.h:83
llvm::GCNSubtarget::getInstrInfo
const SIInstrInfo * getInstrInfo() const override
Definition: GCNSubtarget.h:220
llvm::GCNSubtarget::unsafeDSOffsetFoldingEnabled
bool unsafeDSOffsetFoldingEnabled() const
Definition: GCNSubtarget.h:412
llvm::GCNSubtarget::hasUnalignedBufferAccessEnabled
bool hasUnalignedBufferAccessEnabled() const
Definition: GCNSubtarget.h:511
llvm::GCNSubtarget::hasHalfRate64Ops
bool hasHalfRate64Ops() const
Definition: GCNSubtarget.h:315
llvm::GCNSubtarget::hasGFX10_AEncoding
bool hasGFX10_AEncoding() const
Definition: GCNSubtarget.h:884
llvm::GCNSubtarget::hasGWSSemaReleaseAll
bool hasGWSSemaReleaseAll() const
Definition: GCNSubtarget.h:630
llvm::GCNSubtarget::HasSDWASdst
bool HasSDWASdst
Definition: GCNSubtarget.h:127
llvm::AMDGPU::IsaInfo::getSGPREncodingGranule
unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI)
Definition: AMDGPUBaseInfo.cpp:600
llvm::AMDGPUSubtarget::isAmdHsaOS
bool isAmdHsaOS() const
Definition: AMDGPUSubtarget.h:107
llvm::GCNSubtarget::HasNoDataDepHazard
bool HasNoDataDepHazard
Definition: GCNSubtarget.h:166
llvm::GCNSubtarget::HasVGPRIndexMode
bool HasVGPRIndexMode
Definition: GCNSubtarget.h:122
llvm::GCNSubtarget::overrideSchedPolicy
void overrideSchedPolicy(MachineSchedPolicy &Policy, unsigned NumRegionInstrs) const override
Definition: AMDGPUSubtarget.cpp:741
llvm::AMDGPU::IsaInfo::AMDGPUTargetID
Definition: AMDGPUBaseInfo.h:85
llvm::GCNSubtarget::getKnownHighZeroBitsForFrameIndex
unsigned getKnownHighZeroBitsForFrameIndex() const
Return the number of high bits known to be zero fror a frame index.
Definition: GCNSubtarget.h:276
llvm::GCNSubtarget::ParseSubtargetFeatures
void ParseSubtargetFeatures(StringRef CPU, StringRef TuneCPU, StringRef FS)
llvm::GCNSubtarget::hasFlatScrRegister
bool hasFlatScrRegister() const
Definition: GCNSubtarget.h:555
llvm::AMDGPU::isShader
bool isShader(CallingConv::ID cc)
Definition: AMDGPUBaseInfo.cpp:1358
llvm::GCNSubtarget::hasFastFMAF32
bool hasFastFMAF32() const
Definition: GCNSubtarget.h:311
llvm::GCNSubtarget::hasA16
bool hasA16() const
Definition: GCNSubtarget.h:868
llvm::GCNSubtarget::GFX10_3Insts
bool GFX10_3Insts
Definition: GCNSubtarget.h:113
llvm::GCNSubtarget::enableMachineScheduler
bool enableMachineScheduler() const override
Definition: GCNSubtarget.h:765
llvm::GCNSubtarget::hasBFI
bool hasBFI() const
Definition: GCNSubtarget.h:345
llvm::GCNSubtarget::getMaxNumSGPRs
unsigned getMaxNumSGPRs(unsigned WavesPerEU, bool Addressable) const
Definition: GCNSubtarget.h:1040
llvm::GCNSubtarget::useVGPRIndexMode
bool useVGPRIndexMode() const
Definition: AMDGPUSubtarget.cpp:762
llvm::GCNSubtarget::hasAddr64
bool hasAddr64() const
Definition: GCNSubtarget.h:323
llvm::GCNSubtarget::EnableLoadStoreOpt
bool EnableLoadStoreOpt
Definition: GCNSubtarget.h:96
llvm::GCNSubtarget::useFlatForGlobal
bool useFlatForGlobal() const
Definition: GCNSubtarget.h:471
llvm::GCNSubtarget::hasFmaMixInsts
bool hasFmaMixInsts() const
Definition: GCNSubtarget.h:373
llvm::GCNSubtarget::HasSMemRealTime
bool HasSMemRealTime
Definition: GCNSubtarget.h:118
llvm::Legalizer
Definition: Legalizer.h:31
llvm::GCNSubtarget::hasUnalignedAccessMode
bool hasUnalignedAccessMode() const
Definition: GCNSubtarget.h:527
llvm::GCNSubtarget::hasScalarFlatScratchInsts
bool hasScalarFlatScratchInsts() const
Definition: GCNSubtarget.h:578
llvm::GCNSubtarget::getTargetLowering
const SITargetLowering * getTargetLowering() const override
Definition: GCNSubtarget.h:228
llvm::AMDGPUSubtarget::isMesa3DOS
bool isMesa3DOS() const
Definition: AMDGPUSubtarget.h:115
llvm::X86AS::FS
@ FS
Definition: X86.h:188
llvm::GCNSubtarget::hasSDWAOmod
bool hasSDWAOmod() const
Definition: GCNSubtarget.h:655
llvm::AMDGPU::IsaInfo::getMaxNumSGPRs
unsigned getMaxNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU, bool Addressable)
Definition: AMDGPUBaseInfo.cpp:640
llvm::GCNSubtarget::hasSMRDReadVALUDefHazard
bool hasSMRDReadVALUDefHazard() const
A read of an SGPR by SMRD instruction requires 4 wait states when the SGPR was written by a VALU inst...
Definition: GCNSubtarget.h:434
llvm::GCNSubtarget::getOccupancyWithNumSGPRs
unsigned getOccupancyWithNumSGPRs(unsigned SGPRs) const
Return the maximum number of waves per SIMD for kernels using SGPRs SGPRs.
Definition: AMDGPUSubtarget.cpp:768
llvm::TargetRegisterClass
Definition: TargetRegisterInfo.h:46
llvm::GCNSubtarget::getInlineAsmLowering
const InlineAsmLowering * getInlineAsmLowering() const override
Definition: GCNSubtarget.h:240
llvm::GCNSubtarget::HasArchitectedFlatScratch
bool HasArchitectedFlatScratch
Definition: GCNSubtarget.h:172
b
the resulting code requires compare and branches when and if the revised code is with conditional branches instead of More there is a byte word extend before each where there should be only and the condition codes are not remembered when the same two values are compared twice More LSR enhancements i8 and i32 load store addressing modes are identical int b
Definition: README.txt:418
llvm::GCNSubtarget::hasR128A16
bool hasR128A16() const
Definition: GCNSubtarget.h:860
llvm::GCNSubtarget::getBaseMaxNumSGPRs
unsigned getBaseMaxNumSGPRs(const Function &F, std::pair< unsigned, unsigned > WavesPerEU, unsigned PreloadedSGPRs, unsigned ReservedNumSGPRs) const
Definition: AMDGPUSubtarget.cpp:855
llvm::GCNSubtarget::hasVscnt
bool hasVscnt() const
Definition: GCNSubtarget.h:723
AMDGPUSubtarget.h
llvm::GCNSubtarget::getMinFlatWorkGroupSize
unsigned getMinFlatWorkGroupSize() const override
Definition: GCNSubtarget.h:1161
llvm::AMDGPU::IsaInfo::AMDGPUTargetID::isXnackOnOrAny
bool isXnackOnOrAny() const
Definition: AMDGPUBaseInfo.h:101
llvm::SelectionDAGTargetInfo
Targets can subclass this to parameterize the SelectionDAG lowering and instruction selection process...
Definition: SelectionDAGTargetInfo.h:31
llvm::GCNSubtarget::getMaxPrivateElementSize
unsigned getMaxPrivateElementSize(bool ForBufferRSrc=false) const
Definition: GCNSubtarget.h:284
llvm::GCNSubtarget::hasVOP3Literal
bool hasVOP3Literal() const
Definition: GCNSubtarget.h:743
llvm::GCNSubtarget::SupportsXNACK
bool SupportsXNACK
Definition: GCNSubtarget.h:85
llvm::GCNSubtarget::CFALUBug
bool CFALUBug
Definition: GCNSubtarget.h:177
llvm::GCNSubtarget::hasFFBH
bool hasFFBH() const
Definition: GCNSubtarget.h:361
llvm::GCNSubtarget::hasSMemTimeInst
bool hasSMemTimeInst() const
Definition: GCNSubtarget.h:731
llvm::GCNSubtarget::hasReadVCCZBug
bool hasReadVCCZBug() const
Extra wait hazard is needed in some cases before s_cbranch_vccnz/s_cbranch_vccz.
Definition: GCNSubtarget.h:423
llvm::GCNSubtarget::isWave32
bool isWave32() const
Definition: GCNSubtarget.h:1142
llvm::GCNSubtarget::hasSGPRInitBug
bool hasSGPRInitBug() const
Definition: GCNSubtarget.h:906
llvm::GCNSubtarget::AutoWaitcntBeforeBarrier
bool AutoWaitcntBeforeBarrier
Definition: GCNSubtarget.h:81
llvm::GCNSubtarget::EnableTgSplit
bool EnableTgSplit
Definition: GCNSubtarget.h:91
llvm::GCNSubtarget::hasSMemRealTime
bool hasSMemRealTime() const
Definition: GCNSubtarget.h:795
llvm::GCNSubtarget::hasMed3_16
bool hasMed3_16() const
Definition: GCNSubtarget.h:365
llvm::GCNSubtarget::getInstructionSelector
InstructionSelector * getInstructionSelector() const override
Definition: GCNSubtarget.h:244
llvm::GCNSubtarget::isMesaGfxShader
bool isMesaGfxShader(const Function &F) const
Definition: GCNSubtarget.h:647
llvm::AMDGPU::IsaInfo::getMinFlatWorkGroupSize
unsigned getMinFlatWorkGroupSize(const MCSubtargetInfo *STI)
Definition: AMDGPUBaseInfo.cpp:577
llvm::GCNSubtarget::hasGFX10_BEncoding
bool hasGFX10_BEncoding() const
Definition: GCNSubtarget.h:888
llvm::GCNSubtarget::TrapHandler
bool TrapHandler
Definition: GCNSubtarget.h:93
llvm::GCNSubtarget::hasPkFmacF16Inst
bool hasPkFmacF16Inst() const
Definition: GCNSubtarget.h:711
llvm::GCNSubtarget::TrapID
TrapID
Definition: GCNSubtarget.h:51
llvm::SIRegisterInfo
Definition: SIRegisterInfo.h:29
llvm::GCNSubtarget::isTgSplitEnabled
bool isTgSplitEnabled() const
Definition: GCNSubtarget.h:543
Align
uint64_t Align
Definition: ELFObjHandler.cpp:83
llvm::SIFrameLowering
Definition: SIFrameLowering.h:21
llvm::Align
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
SIISelLowering.h
llvm::GCNSubtarget::getMaxNumVGPRs
unsigned getMaxNumVGPRs(unsigned WavesPerEU) const
Definition: GCNSubtarget.h:1110
llvm::GCNSubtarget::hasDot6Insts
bool hasDot6Insts() const
Definition: GCNSubtarget.h:699
llvm::GCNSubtarget::LDSBankCount
int LDSBankCount
Definition: GCNSubtarget.h:70
llvm::GCNSubtarget::dumpCode
bool dumpCode() const
Definition: GCNSubtarget.h:453
llvm::GCNSubtarget::hasScalarAtomics
bool hasScalarAtomics() const
Definition: GCNSubtarget.h:817
llvm::GCNSubtarget::hasUnalignedDSAccess
bool hasUnalignedDSAccess() const
Definition: GCNSubtarget.h:515
llvm::GCNSubtarget::getScalarizeGlobalBehavior
bool getScalarizeGlobalBehavior() const
Definition: GCNSubtarget.h:776
llvm::GCNSubtarget::MIMG_R128
bool MIMG_R128
Definition: GCNSubtarget.h:106
llvm::GCNSubtarget::hasExtendedImageInsts
bool hasExtendedImageInsts() const
Definition: GCNSubtarget.h:856
llvm::GCNSubtarget::hasDot1Insts
bool hasDot1Insts() const
Definition: GCNSubtarget.h:679
llvm::GCNSubtarget::hasOffset3fBug
bool hasOffset3fBug() const
Definition: GCNSubtarget.h:872
llvm::GCNSubtarget::hasVcmpxExecWARHazard
bool hasVcmpxExecWARHazard() const
Definition: GCNSubtarget.h:958
llvm::AMDGPUSubtarget::getWavefrontSize
unsigned getWavefrontSize() const
Definition: AMDGPUSubtarget.h:185
llvm::GCNSubtarget::getVGPREncodingGranule
unsigned getVGPREncodingGranule() const
Definition: GCNSubtarget.h:1088
llvm::GCNSubtarget::isWave64
bool isWave64() const
Definition: GCNSubtarget.h:1146
llvm::GCNSubtarget::HasLdsBranchVmemWARHazard
bool HasLdsBranchVmemWARHazard
Definition: GCNSubtarget.h:192
llvm::GCNSubtarget::EnableUnsafeDSOffsetFolding
bool EnableUnsafeDSOffsetFolding
Definition: GCNSubtarget.h:97
llvm::AMDGPUSubtarget::GFX10
@ GFX10
Definition: AMDGPUSubtarget.h:41
llvm::GCNSubtarget::MaxWaveScratchSize
static const unsigned MaxWaveScratchSize
Definition: GCNSubtarget.h:211
llvm::GCNSubtarget::FMA
bool FMA
Definition: GCNSubtarget.h:105
llvm::GCNSubtarget::FullRate64Ops
bool FullRate64Ops
Definition: GCNSubtarget.h:77
llvm::GCNSubtarget::getLegalizerInfo
const LegalizerInfo * getLegalizerInfo() const override
Definition: GCNSubtarget.h:248
llvm::GCNSubtarget::AddNoCarryInsts
bool AddNoCarryInsts
Definition: GCNSubtarget.h:173
llvm::GCNSubtarget::hasNegativeUnalignedScratchOffsetBug
bool hasNegativeUnalignedScratchOffsetBug() const
Definition: GCNSubtarget.h:912
llvm::GCNSubtarget::usePRTStrictNull
bool usePRTStrictNull() const
Definition: GCNSubtarget.h:499
llvm::GCNSubtarget::hasInstFwdPrefetchBug
bool hasInstFwdPrefetchBug() const
Definition: GCNSubtarget.h:954
llvm::GCNSubtarget::FastDenormalF32
bool FastDenormalF32
Definition: GCNSubtarget.h:75
llvm::GCNSubtarget::HasRegisterBanking
bool HasRegisterBanking
Definition: GCNSubtarget.h:164
llvm::GCNSubtarget::HasPackedTID
bool HasPackedTID
Definition: GCNSubtarget.h:184
llvm::GCNSubtarget::GFX10_BEncoding
bool GFX10_BEncoding
Definition: GCNSubtarget.h:141
llvm::RegisterBankInfo
Holds all the information related to register banks.
Definition: RegisterBankInfo.h:39
llvm::InstructionSelector
Provides the logic to select generic machine instructions.
Definition: InstructionSelector.h:423
llvm::AMDGPUSubtarget
Definition: AMDGPUSubtarget.h:29
llvm::AMDGPU::IsaInfo::getMinNumSGPRs
unsigned getMinNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU)
Definition: AMDGPUBaseInfo.cpp:623
llvm::GCNSubtarget::HasUnpackedD16VMem
bool HasUnpackedD16VMem
Definition: GCNSubtarget.h:174
llvm::GCNSubtarget::IsGCN
bool IsGCN
Definition: GCNSubtarget.h:107
llvm::GCNSubtarget::getSGPRAllocGranule
unsigned getSGPRAllocGranule() const
Definition: GCNSubtarget.h:1013
llvm::GCNSubtarget::hasImageGather4D16Bug
bool hasImageGather4D16Bug() const
Definition: GCNSubtarget.h:878
llvm::GCNSubtarget::hasPackedTID
bool hasPackedTID() const
Definition: GCNSubtarget.h:979
llvm::GCNSubtarget::getTrapHandlerAbi
TrapHandlerAbi getTrapHandlerAbi() const
Definition: GCNSubtarget.h:397
llvm::GCNSubtarget::HasFmaMixInsts
bool HasFmaMixInsts
Definition: GCNSubtarget.h:120
llvm::GCNSubtarget::has64BitDPP
bool has64BitDPP() const
Definition: GCNSubtarget.h:844
llvm::GCNSubtarget::GFX90AInsts
bool GFX90AInsts
Definition: GCNSubtarget.h:111
llvm::GCNSubtarget::HasIntClamp
bool HasIntClamp
Definition: GCNSubtarget.h:119
llvm::GCNSubtarget::hasNegativeScratchOffsetBug
bool hasNegativeScratchOffsetBug() const
Definition: GCNSubtarget.h:910
llvm::GCNSubtarget::hasVMEMtoScalarWriteHazard
bool hasVMEMtoScalarWriteHazard() const
Definition: GCNSubtarget.h:942
llvm::GCNSubtarget::NSAMaxSize
unsigned NSAMaxSize
Definition: GCNSubtarget.h:139
llvm::GCNSubtarget::hasAddNoCarry
bool hasAddNoCarry() const
Definition: GCNSubtarget.h:638
llvm::GCNSubtarget::HasShaderCyclesRegister
bool HasShaderCyclesRegister
Definition: GCNSubtarget.h:163
llvm::GCNSubtarget::hasUnalignedBufferAccess
bool hasUnalignedBufferAccess() const
Definition: GCNSubtarget.h:507
llvm::GCNSubtarget::ScalarizeGlobal
bool ScalarizeGlobal
Definition: GCNSubtarget.h:185
llvm::GCNSubtarget::LDSMisalignedBug
bool LDSMisalignedBug
Definition: GCNSubtarget.h:178
llvm::GCNSubtarget::getTotalNumVGPRs
unsigned getTotalNumVGPRs() const
Definition: GCNSubtarget.h:1093
llvm::GCNSubtarget::GCNSubtarget
GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS, const GCNTargetMachine &TM)
Definition: AMDGPUSubtarget.cpp:199
llvm::GCNSubtarget::privateMemoryResourceIsRangeChecked
bool privateMemoryResourceIsRangeChecked() const
Definition: GCNSubtarget.h:493
llvm::GCNSubtarget::HasVMEMtoScalarWriteHazard
bool HasVMEMtoScalarWriteHazard
Definition: GCNSubtarget.h:188
llvm::GCNSubtarget::hasLDSMisalignedBug
bool hasLDSMisalignedBug() const
Definition: GCNSubtarget.h:950
llvm::GCNSubtarget::hasMFMAInlineLiteralBug
bool hasMFMAInlineLiteralBug() const
Definition: GCNSubtarget.h:916
llvm::GCNSubtarget::HasImageGather4D16Bug
bool HasImageGather4D16Bug
Definition: GCNSubtarget.h:198
llvm::GCNSubtarget::FeatureDisable
bool FeatureDisable
Definition: GCNSubtarget.h:201
llvm::GCNSubtarget::EnableXNACK
bool EnableXNACK
Definition: GCNSubtarget.h:89
llvm::GCNSubtarget::hasFlat
bool hasFlat() const
Definition: GCNSubtarget.h:327
llvm::GCNSubtarget::hasImageStoreD16Bug
bool hasImageStoreD16Bug() const
Definition: GCNSubtarget.h:876
llvm::GCNSubtarget::hasGlobalAddTidInsts
bool hasGlobalAddTidInsts() const
Definition: GCNSubtarget.h:582
llvm::GCNSubtarget::hasSDWAScalar
bool hasSDWAScalar() const
Definition: GCNSubtarget.h:659
llvm::SIRegisterInfo::getBoolRC
const TargetRegisterClass * getBoolRC() const
Definition: SIRegisterInfo.h:278
llvm::GCNSubtarget::hasFmaakFmamkF32Insts
bool hasFmaakFmamkF32Insts() const
Definition: GCNSubtarget.h:852
llvm::GCNSubtarget::SupportsSRAMECC
bool SupportsSRAMECC
Definition: GCNSubtarget.h:153
llvm::GCNSubtarget::EnableCuMode
bool EnableCuMode
Definition: GCNSubtarget.h:92
llvm::GCNSubtarget::HasDot6Insts
bool HasDot6Insts
Definition: GCNSubtarget.h:148
llvm::GCNSubtarget::hasLDSFPAtomics
bool hasLDSFPAtomics() const
Definition: GCNSubtarget.h:821
llvm::GCNSubtarget::getMinNumVGPRs
unsigned getMinNumVGPRs(unsigned WavesPerEU) const
Definition: GCNSubtarget.h:1104
llvm::GCNSubtarget::hasReadM0MovRelInterpHazard
bool hasReadM0MovRelInterpHazard() const
Definition: GCNSubtarget.h:929
llvm::GCNSubtarget::getAddressableNumSGPRs
unsigned getAddressableNumSGPRs() const
Definition: GCNSubtarget.h:1028
llvm::GCNSubtarget::hasUnalignedScratchAccess
bool hasUnalignedScratchAccess() const
Definition: GCNSubtarget.h:523
llvm::GCNSubtarget::hasDot7Insts
bool hasDot7Insts() const
Definition: GCNSubtarget.h:703
llvm::GCNSubtarget::TSInfo
SelectionDAGTargetInfo TSInfo
Definition: GCNSubtarget.h:203
llvm::GCNSubtarget::hasRFEHazards
bool hasRFEHazards() const
Definition: GCNSubtarget.h:444
llvm::GCNSubtarget::HasVcmpxExecWARHazard
bool HasVcmpxExecWARHazard
Definition: GCNSubtarget.h:191
llvm::GCNSubtarget::hasGFX90AInsts
bool hasGFX90AInsts() const
Definition: GCNSubtarget.h:974
llvm::GCNSubtarget::getOccupancyWithNumVGPRs
unsigned getOccupancyWithNumVGPRs(unsigned VGPRs) const
Return the maximum number of waves per SIMD for kernels using VGPRs VGPRs.
Definition: AMDGPUSubtarget.cpp:794
llvm::AMDGPU::IsaInfo::getTotalNumSGPRs
unsigned getTotalNumSGPRs(const MCSubtargetInfo *STI)
Definition: AMDGPUBaseInfo.cpp:604
llvm::GCNSubtarget::hasNoDataDepHazard
bool hasNoDataDepHazard() const
Definition: GCNSubtarget.h:747
llvm::GCNSubtarget::enableEarlyIfConversion
bool enableEarlyIfConversion() const override
Definition: GCNSubtarget.h:782
llvm::AMDGPU::IsaInfo::getAddressableNumSGPRs
unsigned getAddressableNumSGPRs(const MCSubtargetInfo *STI)
Definition: AMDGPUBaseInfo.cpp:611
llvm::GCNSubtarget::getWavesPerEUForWorkGroup
unsigned getWavesPerEUForWorkGroup(unsigned FlatWorkGroupSize) const override
Definition: GCNSubtarget.h:1173
llvm::GCNSubtarget::getLDSBankCount
int getLDSBankCount() const
Definition: GCNSubtarget.h:280
llvm::GCNSubtarget::MaxPrivateElementSize
unsigned MaxPrivateElementSize
Definition: GCNSubtarget.h:71
llvm::GCNSubtarget::hasBFE
bool hasBFE() const
Definition: GCNSubtarget.h:341
SIInstrInfo.h
llvm::GCNSubtarget::hasFMA
bool hasFMA() const
Definition: GCNSubtarget.h:381
llvm::GCNSubtarget::hasLdsBranchVmemWARHazard
bool hasLdsBranchVmemWARHazard() const
Definition: GCNSubtarget.h:962
llvm::GCNSubtarget::HasPkFmacF16Inst
bool HasPkFmacF16Inst
Definition: GCNSubtarget.h:151
llvm::MachineFunction
Definition: MachineFunction.h:230
llvm::GCNTargetMachine
Definition: AMDGPUTargetMachine.h:95
llvm::GCNSubtarget::hasIntClamp
bool hasIntClamp() const
Definition: GCNSubtarget.h:295
llvm::GCNSubtarget::hasMultiDwordFlatScratchAddressing
bool hasMultiDwordFlatScratchAddressing() const
Definition: GCNSubtarget.h:590
llvm::GCNSubtarget::hasDenormModeInst
bool hasDenormModeInst() const
Definition: GCNSubtarget.h:467
llvm::GCNSubtarget::hasAtomicFaddInsts
bool hasAtomicFaddInsts() const
Definition: GCNSubtarget.h:715
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:58
llvm::GCNSubtarget::hasFlatInstOffsets
bool hasFlatInstOffsets() const
Definition: GCNSubtarget.h:559
llvm::GCNSubtarget::hasDot5Insts
bool hasDot5Insts() const
Definition: GCNSubtarget.h:695
llvm::GCNSubtarget::hasScalarStores
bool hasScalarStores() const
Definition: GCNSubtarget.h:813
llvm::GCNSubtarget::hasGFX10_3Insts
bool hasGFX10_3Insts() const
Definition: GCNSubtarget.h:892
llvm::AMDGPU::IsaInfo::getVGPREncodingGranule
unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI, Optional< bool > EnableWavefrontSize32)
Definition: AMDGPUBaseInfo.cpp:708
llvm::GCNSubtarget::CaymanISA
bool CaymanISA
Definition: GCNSubtarget.h:176
llvm::GCNSubtarget::HasScalarStores
bool HasScalarStores
Definition: GCNSubtarget.h:123
llvm::GCNSubtarget::flatScratchIsPointer
bool flatScratchIsPointer() const
Definition: GCNSubtarget.h:998
llvm::GCNSubtarget::supportsGetDoorbellID
bool supportsGetDoorbellID() const
Definition: GCNSubtarget.h:401
llvm::GCNSubtarget::hasDPPBroadcasts
bool hasDPPBroadcasts() const
Definition: GCNSubtarget.h:832
llvm::GCNSubtarget::hasMadF16
bool hasMadF16() const
Definition: AMDGPUSubtarget.cpp:758
SIFrameLowering.h
llvm::GCNSubtarget::GFX7GFX8GFX9Insts
bool GFX7GFX8GFX9Insts
Definition: GCNSubtarget.h:114
llvm::GCNSubtarget::HasSDWAOmod
bool HasSDWAOmod
Definition: GCNSubtarget.h:125
llvm::GCNSubtarget::GFX10_AEncoding
bool GFX10_AEncoding
Definition: GCNSubtarget.h:140
llvm::GCNSubtarget::hasDPPWavefrontShifts
bool hasDPPWavefrontShifts() const
Definition: GCNSubtarget.h:836
llvm::GCNSubtarget::NegativeUnalignedScratchOffsetBug
bool NegativeUnalignedScratchOffsetBug
Definition: GCNSubtarget.h:117
llvm::GCNSubtarget::SGPRInitBug
bool SGPRInitBug
Definition: GCNSubtarget.h:115
llvm::GCNSubtarget::HasVOP3Literal
bool HasVOP3Literal
Definition: GCNSubtarget.h:165
llvm::GCNSubtarget::hasDPP8
bool hasDPP8() const
Definition: GCNSubtarget.h:840
llvm::AMDGPU::IsaInfo::getMaxFlatWorkGroupSize
unsigned getMaxFlatWorkGroupSize(const MCSubtargetInfo *STI)
Definition: AMDGPUBaseInfo.cpp:581
llvm::GCNSubtarget::hasScalarPackInsts
bool hasScalarPackInsts() const
Definition: GCNSubtarget.h:389
llvm::GCNSubtarget::hasD16LoadStore
bool hasD16LoadStore() const
Definition: GCNSubtarget.h:602
llvm::GCNSubtarget::getBoolRC
const TargetRegisterClass * getBoolRC() const
Definition: GCNSubtarget.h:1150
llvm::GCNSubtarget::adjustSchedDependency
void adjustSchedDependency(SUnit *Def, int DefOpIdx, SUnit *Use, int UseOpIdx, SDep &Dep) const override
Definition: AMDGPUSubtarget.cpp:972
llvm::GCNSubtarget::hasDot4Insts
bool hasDot4Insts() const
Definition: GCNSubtarget.h:691
llvm::GCNSubtarget::hasNoSdstCMPX
bool hasNoSdstCMPX() const
Definition: GCNSubtarget.h:719
llvm::TargetSubtargetInfo
TargetSubtargetInfo - Generic base class for all target subtargets.
Definition: TargetSubtargetInfo.h:59
llvm::GCNSubtarget::getMinNumSGPRs
unsigned getMinNumSGPRs(unsigned WavesPerEU) const
Definition: GCNSubtarget.h:1034
llvm::GCNSubtarget::enableFlatScratch
bool enableFlatScratch() const
Definition: AMDGPUSubtarget.cpp:334
llvm::GCNSubtarget::getPostRAMutations
void getPostRAMutations(std::vector< std::unique_ptr< ScheduleDAGMutation >> &Mutations) const override
Definition: AMDGPUSubtarget.cpp:1134
llvm::GCNSubtarget::hasFlatScratchSTMode
bool hasFlatScratchSTMode() const
Definition: GCNSubtarget.h:574
llvm::GCNSubtarget::hasFlatAddressSpace
bool hasFlatAddressSpace() const
Definition: GCNSubtarget.h:551
llvm::GCNSubtarget::FlatForGlobal
bool FlatForGlobal
Definition: GCNSubtarget.h:80
llvm::GCNSubtarget::HasNSAEncoding
bool HasNSAEncoding
Definition: GCNSubtarget.h:138
llvm::GCNSubtarget::getReservedNumSGPRs
unsigned getReservedNumSGPRs(const MachineFunction &MF) const
Definition: AMDGPUSubtarget.cpp:820
llvm::GCNSubtarget::GFX10Insts
bool GFX10Insts
Definition: GCNSubtarget.h:112
llvm::SDep
Scheduling dependency.
Definition: ScheduleDAG.h:49
llvm::GCNSubtarget::getVGPRAllocGranule
unsigned getVGPRAllocGranule() const
Definition: GCNSubtarget.h:1083
llvm::GCNSubtarget::hasNSAtoVMEMBug
bool hasNSAtoVMEMBug() const
Definition: GCNSubtarget.h:966
llvm::GCNSubtarget::HasMAIInsts
bool HasMAIInsts
Definition: GCNSubtarget.h:150
llvm::GCNSubtarget::enableSubRegLiveness
bool enableSubRegLiveness() const override
Definition: GCNSubtarget.h:771
llvm::GCNSubtarget::getMaxLocalMemSizeWithWaveCount
unsigned getMaxLocalMemSizeWithWaveCount(unsigned WaveCount, const Function &) const
Return the amount of LDS that can be used that will not restrict the occupancy lower than WaveCount.
AMDGPUGenSubtargetInfo
llvm::GCNSubtarget::setScalarizeGlobalBehavior
void setScalarizeGlobalBehavior(bool b)
Definition: GCNSubtarget.h:775
llvm::GCNSubtarget::GFX9Insts
bool GFX9Insts
Definition: GCNSubtarget.h:110
llvm::GCNSubtarget::HasDot1Insts
bool HasDot1Insts
Definition: GCNSubtarget.h:143
llvm::GCNSubtarget::HasDPP
bool HasDPP
Definition: GCNSubtarget.h:130
llvm::GCNSubtarget::hasFullRate64Ops
bool hasFullRate64Ops() const
Definition: GCNSubtarget.h:319
llvm::GCNSubtarget::HasNoSdstCMPX
bool HasNoSdstCMPX
Definition: GCNSubtarget.h:159
llvm::AMDGPUSubtarget::VOLCANIC_ISLANDS
@ VOLCANIC_ISLANDS
Definition: AMDGPUSubtarget.h:39
llvm::GCNSubtarget::hasUnpackedD16VMem
bool hasUnpackedD16VMem() const
Definition: GCNSubtarget.h:642
llvm::GCNSubtarget::flatScratchIsArchitected
bool flatScratchIsArchitected() const
Definition: GCNSubtarget.h:1004
llvm::GCNSubtarget::getTargetID
const AMDGPU::IsaInfo::AMDGPUTargetID & getTargetID() const
Definition: GCNSubtarget.h:256
llvm::GCNSubtarget::EnableSRAMECC
bool EnableSRAMECC
Definition: GCNSubtarget.h:157
llvm::GCNSubtarget::getMinWavesPerEU
unsigned getMinWavesPerEU() const override
Definition: GCNSubtarget.h:1179
llvm::GCNSubtarget::HasDot5Insts
bool HasDot5Insts
Definition: GCNSubtarget.h:147
llvm::GCNSubtarget::HasAtomicFaddInsts
bool HasAtomicFaddInsts
Definition: GCNSubtarget.h:152
llvm::GCNSubtarget::useAA
bool useAA() const override
Definition: AMDGPUSubtarget.cpp:766
llvm::GCNSubtarget::hasDPP
bool hasDPP() const
Definition: GCNSubtarget.h:828
llvm::GCNSubtarget::HasInstFwdPrefetchBug
bool HasInstFwdPrefetchBug
Definition: GCNSubtarget.h:190
llvm::SITargetLowering
Definition: SIISelLowering.h:30
llvm::GCNSubtarget::HasSMEMtoVectorWriteHazard
bool HasSMEMtoVectorWriteHazard
Definition: GCNSubtarget.h:189
llvm::GCNSubtarget::hasG16
bool hasG16() const
Definition: GCNSubtarget.h:870
llvm::GCNSubtarget::hasHardClauses
bool hasHardClauses() const
Definition: GCNSubtarget.h:972
llvm::GCNSubtarget::hasBCNT
bool hasBCNT(unsigned Size) const
Definition: GCNSubtarget.h:353
llvm::GCNSubtarget::haveRoundOpsF64
bool haveRoundOpsF64() const
Have v_trunc_f64, v_ceil_f64, v_rndne_f64.
Definition: GCNSubtarget.h:487
llvm::GCNSubtarget::ldsRequiresM0Init
bool ldsRequiresM0Init() const
Return if most LDS instructions have an m0 use that require m0 to be iniitalized.
Definition: GCNSubtarget.h:616
llvm::SIInstrInfo::getRegisterInfo
const SIRegisterInfo & getRegisterInfo() const
Definition: SIInstrInfo.h:173
llvm::GCNSubtarget::getMaxNumUserSGPRs
unsigned getMaxNumUserSGPRs() const
Definition: GCNSubtarget.h:791
llvm::AMDGPU::IsaInfo::getAddressableNumVGPRs
unsigned getAddressableNumVGPRs(const MCSubtargetInfo *STI)
Definition: AMDGPUBaseInfo.cpp:728
llvm::GCNSubtarget::HasDPP8
bool HasDPP8
Definition: GCNSubtarget.h:131
llvm::GCNSubtarget::zeroesHigh16BitsOfDest
bool zeroesHigh16BitsOfDest(unsigned Opcode) const
Returns if the result of this instruction with a 16-bit result returned in a 32-bit register implicit...
Definition: AMDGPUSubtarget.cpp:360
llvm::GCNSubtarget::EnableDS128
bool EnableDS128
Definition: GCNSubtarget.h:99
llvm::GCNSubtarget::HasMFMAInlineLiteralBug
bool HasMFMAInlineLiteralBug
Definition: GCNSubtarget.h:179
llvm::countLeadingZeros
unsigned countLeadingZeros(T Val, ZeroBehavior ZB=ZB_Width)
Count number of 0's from the most significant bit to the least stopping at the first 1.
Definition: MathExtras.h:225
llvm::GCNSubtarget::hasOnlyRevVALUShifts
bool hasOnlyRevVALUShifts() const
Definition: GCNSubtarget.h:333
llvm::GCNSubtarget::UnalignedScratchAccess
bool UnalignedScratchAccess
Definition: GCNSubtarget.h:82
llvm::GCNSubtarget::TrapID::LLVMAMDHSATrap
@ LLVMAMDHSATrap
llvm::GCNSubtarget::DumpCode
bool DumpCode
Definition: GCNSubtarget.h:101
llvm::GCNSubtarget::getGeneration
Generation getGeneration() const
Definition: GCNSubtarget.h:271
llvm::GCNSubtarget::TrapHandlerAbi::AMDHSA
@ AMDHSA
llvm::GCNSubtarget::getAddressableNumVGPRs
unsigned getAddressableNumVGPRs() const
Definition: GCNSubtarget.h:1098
llvm::SIInstrInfo
Definition: SIInstrInfo.h:38
llvm::GCNSubtarget::hasSwap
bool hasSwap() const
Definition: GCNSubtarget.h:385
llvm::GCNSubtarget::isXNACKEnabled
bool isXNACKEnabled() const
Definition: GCNSubtarget.h:539
llvm::GCNSubtarget::FlatScratchInsts
bool FlatScratchInsts
Definition: GCNSubtarget.h:170
AMDGPUCallLowering.h
llvm::GCNSubtarget::HasMovrel
bool HasMovrel
Definition: GCNSubtarget.h:121
llvm::GCNSubtarget::HasDot7Insts
bool HasDot7Insts
Definition: GCNSubtarget.h:149
SelectionDAGTargetInfo.h
llvm::GCNSubtarget::isCuModeEnabled
bool isCuModeEnabled() const
Definition: GCNSubtarget.h:547
llvm::GCNSubtarget::getConstantBusLimit
unsigned getConstantBusLimit(unsigned Opcode) const
Definition: AMDGPUSubtarget.cpp:339
llvm::GCNSubtarget::hasMergedShaders
bool hasMergedShaders() const
Definition: GCNSubtarget.h:1008
llvm::GCNSubtarget::FP64
bool FP64
Definition: GCNSubtarget.h:104
llvm::GCNSubtarget::R600ALUInst
bool R600ALUInst
Definition: GCNSubtarget.h:175
llvm::GCNSubtarget::hasFFBL
bool hasFFBL() const
Definition: GCNSubtarget.h:357
llvm::GCNSubtarget::getSelectionDAGInfo
const SelectionDAGTargetInfo * getSelectionDAGInfo() const override
Definition: GCNSubtarget.h:261
llvm::GCNSubtarget::hasApertureRegs
bool hasApertureRegs() const
Definition: GCNSubtarget.h:531
llvm::GCNSubtarget::~GCNSubtarget
~GCNSubtarget() override
llvm::GCNSubtarget::getSGPREncodingGranule
unsigned getSGPREncodingGranule() const
Definition: GCNSubtarget.h:1018
llvm::AMDGPU::IsaInfo::getWavesPerEUForWorkGroup
unsigned getWavesPerEUForWorkGroup(const MCSubtargetInfo *STI, unsigned FlatWorkGroupSize)
Definition: AMDGPUBaseInfo.cpp:571
llvm::GCNSubtarget::d16PreservesUnusedBits
bool d16PreservesUnusedBits() const
Definition: GCNSubtarget.h:606
llvm::GCNSubtarget::enableSIScheduler
bool enableSIScheduler() const
Definition: GCNSubtarget.h:898
llvm::AMDGPU::IsaInfo::getMinNumVGPRs
unsigned getMinNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU)
Definition: AMDGPUBaseInfo.cpp:734
llvm::GCNSubtarget::getInstrItineraryData
const InstrItineraryData * getInstrItineraryData() const override
Definition: GCNSubtarget.h:265
llvm::GCNSubtarget::CIInsts
bool CIInsts
Definition: GCNSubtarget.h:108
llvm::GCNSubtarget::HasVscnt
bool HasVscnt
Definition: GCNSubtarget.h:160
llvm::GCNSubtarget::FastFMAF32
bool FastFMAF32
Definition: GCNSubtarget.h:74
llvm::GCNSubtarget::vmemWriteNeedsExpWaitcnt
bool vmemWriteNeedsExpWaitcnt() const
Definition: GCNSubtarget.h:751
llvm::GCNSubtarget::hasReadM0SendMsgHazard
bool hasReadM0SendMsgHazard() const
Definition: GCNSubtarget.h:933
llvm::GCNSubtarget::Has64BitDPP
bool Has64BitDPP
Definition: GCNSubtarget.h:132
llvm::GCNSubtarget::hasMIMG_R128
bool hasMIMG_R128() const
Definition: GCNSubtarget.h:303
llvm::LegalizerInfo
Definition: LegalizerInfo.h:1083
TM
const char LLVMTargetMachineRef TM
Definition: PassBuilderBindings.cpp:47
llvm::AMDGPU::HSAMD::Kernel::CodeProps::Key::NumVGPRs
constexpr char NumVGPRs[]
Key for Kernel::CodeProps::Metadata::mNumVGPRs.
Definition: AMDGPUMetadata.h:255
llvm::SUnit
Scheduling unit. This is a node in the scheduling DAG.
Definition: ScheduleDAG.h:242
llvm::GCNSubtarget::hasSMEMtoVectorWriteHazard
bool hasSMEMtoVectorWriteHazard() const
Definition: GCNSubtarget.h:946
llvm::GCNSubtarget::HasApertureRegs
bool HasApertureRegs
Definition: GCNSubtarget.h:84
llvm::GCNSubtarget::hasMin3Max3_16
bool hasMin3Max3_16() const
Definition: GCNSubtarget.h:369
llvm::GCNSubtarget::HasSMemTimeInst
bool HasSMemTimeInst
Definition: GCNSubtarget.h:162
llvm::AMDGPUSubtarget::Generation
Generation
Definition: AMDGPUSubtarget.h:31
llvm::MachineSchedPolicy
Define a generic scheduling policy for targets that don't provide their own MachineSchedStrategy.
Definition: MachineScheduler.h:174
llvm::GCNSubtarget::TargetTriple
Triple TargetTriple
Definition: GCNSubtarget.h:66
llvm::GCNSubtarget::HasSDWAMac
bool HasSDWAMac
Definition: GCNSubtarget.h:128
llvm::GCNSubtarget::hasHWFP64
bool hasHWFP64() const
Definition: GCNSubtarget.h:307
llvm::GCNSubtarget::getTotalNumSGPRs
unsigned getTotalNumSGPRs() const
Definition: GCNSubtarget.h:1023
llvm::GCNSubtarget::has12DWordStoreHazard
bool has12DWordStoreHazard() const
Definition: GCNSubtarget.h:920
llvm::AMDGPUSubtarget::getWavefrontSizeLog2
unsigned getWavefrontSizeLog2() const
Definition: AMDGPUSubtarget.h:189
llvm::GCNSubtarget::hasVcmpxPermlaneHazard
bool hasVcmpxPermlaneHazard() const
Definition: GCNSubtarget.h:938
llvm::GCNSubtarget::hasSDWAOutModsVOPC
bool hasSDWAOutModsVOPC() const
Definition: GCNSubtarget.h:671
llvm::CallLowering
Definition: CallLowering.h:43
llvm::GCNSubtarget::computeOccupancy
unsigned computeOccupancy(const Function &F, unsigned LDSSize=0, unsigned NumSGPRs=0, unsigned NumVGPRs=0) const
Return occupancy for the given function.
Definition: AMDGPUSubtarget.cpp:842
llvm::GCNSubtarget::hasUsableDSOffset
bool hasUsableDSOffset() const
True if the offset field of DS instructions works as expected.
Definition: GCNSubtarget.h:408
llvm::GCNSubtarget::hasMad64_32
bool hasMad64_32() const
Definition: GCNSubtarget.h:651
llvm::AMDGPU::IsaInfo::getVGPRAllocGranule
unsigned getVGPRAllocGranule(const MCSubtargetInfo *STI, Optional< bool > EnableWavefrontSize32)
Definition: AMDGPUBaseInfo.cpp:693
llvm::InstrItineraryData
Itinerary data supplied by a subtarget to be used by a target.
Definition: MCInstrItineraries.h:109
llvm::GCNSubtarget::getBaseMaxNumVGPRs
unsigned getBaseMaxNumVGPRs(const Function &F, std::pair< unsigned, unsigned > WavesPerEU) const
Definition: AMDGPUSubtarget.cpp:932
llvm::GCNSubtarget::FlatInstOffsets
bool FlatInstOffsets
Definition: GCNSubtarget.h:168
llvm::Use
A Use represents the edge between a Value definition and its users.
Definition: Use.h:44
llvm::GCNSubtarget::hasMAIInsts
bool hasMAIInsts() const
Definition: GCNSubtarget.h:707
llvm::GCNSubtarget::HasG16
bool HasG16
Definition: GCNSubtarget.h:137
llvm::GCNSubtarget::hasUsableDivScaleConditionOutput
bool hasUsableDivScaleConditionOutput() const
Condition output from div_scale is usable.
Definition: GCNSubtarget.h:417
llvm::GCNSubtarget::hasAtomicCSub
bool hasAtomicCSub() const
Definition: GCNSubtarget.h:586
llvm::GCNSubtarget::HasGFX10A16
bool HasGFX10A16
Definition: GCNSubtarget.h:136
llvm::GCNSubtarget::HasR128A16
bool HasR128A16
Definition: GCNSubtarget.h:135
llvm::GCNSubtarget::getCallLowering
const CallLowering * getCallLowering() const override
Definition: GCNSubtarget.h:236