LLVM 20.0.0git
Macros | Functions | Variables
AMDGPUTargetMachine.cpp File Reference

This file contains both AMDGPU target machine and the CodeGen pass builder. More...

#include "AMDGPUTargetMachine.h"
#include "AMDGPU.h"
#include "AMDGPUAliasAnalysis.h"
#include "AMDGPUCtorDtorLowering.h"
#include "AMDGPUExportClustering.h"
#include "AMDGPUIGroupLP.h"
#include "AMDGPUISelDAGToDAG.h"
#include "AMDGPUMacroFusion.h"
#include "AMDGPUPerfHintAnalysis.h"
#include "AMDGPUSplitModule.h"
#include "AMDGPUTargetObjectFile.h"
#include "AMDGPUTargetTransformInfo.h"
#include "AMDGPUUnifyDivergentExitNodes.h"
#include "GCNDPPCombine.h"
#include "GCNIterativeScheduler.h"
#include "GCNSchedStrategy.h"
#include "GCNVOPDUtils.h"
#include "R600.h"
#include "R600TargetMachine.h"
#include "SIFixSGPRCopies.h"
#include "SIFoldOperands.h"
#include "SILoadStoreOptimizer.h"
#include "SILowerSGPRSpills.h"
#include "SIMachineFunctionInfo.h"
#include "SIMachineScheduler.h"
#include "SIOptimizeVGPRLiveRange.h"
#include "SIPeepholeSDWA.h"
#include "SIPreAllocateWWMRegs.h"
#include "SIShrinkInstructions.h"
#include "TargetInfo/AMDGPUTargetInfo.h"
#include "Utils/AMDGPUBaseInfo.h"
#include "llvm/Analysis/CGSCCPassManager.h"
#include "llvm/Analysis/CallGraphSCCPass.h"
#include "llvm/Analysis/UniformityAnalysis.h"
#include "llvm/CodeGen/DeadMachineInstructionElim.h"
#include "llvm/CodeGen/GlobalISel/CSEInfo.h"
#include "llvm/CodeGen/GlobalISel/IRTranslator.h"
#include "llvm/CodeGen/GlobalISel/InstructionSelect.h"
#include "llvm/CodeGen/GlobalISel/Legalizer.h"
#include "llvm/CodeGen/GlobalISel/Localizer.h"
#include "llvm/CodeGen/GlobalISel/RegBankSelect.h"
#include "llvm/CodeGen/MIRParser/MIParser.h"
#include "llvm/CodeGen/MachineCSE.h"
#include "llvm/CodeGen/MachineLICM.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/RegAllocRegistry.h"
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/IR/IntrinsicsAMDGPU.h"
#include "llvm/IR/PassManager.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/InitializePasses.h"
#include "llvm/MC/TargetRegistry.h"
#include "llvm/Passes/PassBuilder.h"
#include "llvm/Support/FormatVariadic.h"
#include "llvm/Transforms/HipStdPar/HipStdPar.h"
#include "llvm/Transforms/IPO.h"
#include "llvm/Transforms/IPO/AlwaysInliner.h"
#include "llvm/Transforms/IPO/ExpandVariadics.h"
#include "llvm/Transforms/IPO/GlobalDCE.h"
#include "llvm/Transforms/IPO/Internalize.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Scalar/EarlyCSE.h"
#include "llvm/Transforms/Scalar/FlattenCFG.h"
#include "llvm/Transforms/Scalar/GVN.h"
#include "llvm/Transforms/Scalar/InferAddressSpaces.h"
#include "llvm/Transforms/Scalar/LoopDataPrefetch.h"
#include "llvm/Transforms/Scalar/NaryReassociate.h"
#include "llvm/Transforms/Scalar/SeparateConstOffsetFromGEP.h"
#include "llvm/Transforms/Scalar/Sink.h"
#include "llvm/Transforms/Scalar/StraightLineStrengthReduce.h"
#include "llvm/Transforms/Scalar/StructurizeCFG.h"
#include "llvm/Transforms/Utils.h"
#include "llvm/Transforms/Utils/FixIrreducible.h"
#include "llvm/Transforms/Utils/LCSSA.h"
#include "llvm/Transforms/Utils/LowerSwitch.h"
#include "llvm/Transforms/Utils/SimplifyLibCalls.h"
#include "llvm/Transforms/Utils/UnifyLoopExits.h"
#include "llvm/Transforms/Vectorize/LoadStoreVectorizer.h"
#include <optional>
#include "llvm/Passes/TargetPassRegistry.inc"

Go to the source code of this file.

Macros

#define GET_PASS_REGISTRY   "AMDGPUPassRegistry.def"
 

Functions

LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget ()
 
static std::unique_ptr< TargetLoweringObjectFilecreateTLOF (const Triple &TT)
 
static ScheduleDAGInstrscreateSIMachineScheduler (MachineSchedContext *C)
 
static ScheduleDAGInstrscreateGCNMaxOccupancyMachineScheduler (MachineSchedContext *C)
 
static ScheduleDAGInstrscreateGCNMaxILPMachineScheduler (MachineSchedContext *C)
 
static ScheduleDAGInstrscreateGCNMaxMemoryClauseMachineScheduler (MachineSchedContext *C)
 
static ScheduleDAGInstrscreateIterativeGCNMaxOccupancyMachineScheduler (MachineSchedContext *C)
 
static ScheduleDAGInstrscreateMinRegScheduler (MachineSchedContext *C)
 
static ScheduleDAGInstrscreateIterativeILPMachineScheduler (MachineSchedContext *C)
 
static StringRef computeDataLayout (const Triple &TT)
 
static LLVM_READNONE StringRef getGPUOrDefault (const Triple &TT, StringRef GPU)
 
static Reloc::Model getEffectiveRelocModel (std::optional< Reloc::Model > RM)
 
static bool mustPreserveGV (const GlobalValue &GV)
 Predicate for Internalize pass.
 
static Expected< ScanOptionsparseAMDGPUAtomicOptimizerStrategy (StringRef Params)
 
Expected< AMDGPUAttributorOptionsparseAMDGPUAttributorPassOptions (StringRef Params)
 

Variables

static cl::opt< boolEnableEarlyIfConversion ("amdgpu-early-ifcvt", cl::Hidden, cl::desc("Run early if-conversion"), cl::init(false))
 
static cl::opt< boolOptExecMaskPreRA ("amdgpu-opt-exec-mask-pre-ra", cl::Hidden, cl::desc("Run pre-RA exec mask optimizations"), cl::init(true))
 
static cl::opt< boolLowerCtorDtor ("amdgpu-lower-global-ctor-dtor", cl::desc("Lower GPU ctor / dtors to globals on the device."), cl::init(true), cl::Hidden)
 
static cl::opt< boolEnableLoadStoreVectorizer ("amdgpu-load-store-vectorizer", cl::desc("Enable load store vectorizer"), cl::init(true), cl::Hidden)
 
static cl::opt< boolScalarizeGlobal ("amdgpu-scalarize-global-loads", cl::desc("Enable global load scalarization"), cl::init(true), cl::Hidden)
 
static cl::opt< boolInternalizeSymbols ("amdgpu-internalize-symbols", cl::desc("Enable elimination of non-kernel functions and unused globals"), cl::init(false), cl::Hidden)
 
static cl::opt< boolEarlyInlineAll ("amdgpu-early-inline-all", cl::desc("Inline all functions early"), cl::init(false), cl::Hidden)
 
static cl::opt< boolRemoveIncompatibleFunctions ("amdgpu-enable-remove-incompatible-functions", cl::Hidden, cl::desc("Enable removal of functions when they" "use features not supported by the target GPU"), cl::init(true))
 
static cl::opt< boolEnableSDWAPeephole ("amdgpu-sdwa-peephole", cl::desc("Enable SDWA peepholer"), cl::init(true))
 
static cl::opt< boolEnableDPPCombine ("amdgpu-dpp-combine", cl::desc("Enable DPP combiner"), cl::init(true))
 
static cl::opt< boolEnableAMDGPUAliasAnalysis ("enable-amdgpu-aa", cl::Hidden, cl::desc("Enable AMDGPU Alias Analysis"), cl::init(true))
 
static cl::opt< boolEnableLibCallSimplify ("amdgpu-simplify-libcall", cl::desc("Enable amdgpu library simplifications"), cl::init(true), cl::Hidden)
 
static cl::opt< boolEnableLowerKernelArguments ("amdgpu-ir-lower-kernel-arguments", cl::desc("Lower kernel argument loads in IR pass"), cl::init(true), cl::Hidden)
 
static cl::opt< boolEnableRegReassign ("amdgpu-reassign-regs", cl::desc("Enable register reassign optimizations on gfx10+"), cl::init(true), cl::Hidden)
 
static cl::opt< boolOptVGPRLiveRange ("amdgpu-opt-vgpr-liverange", cl::desc("Enable VGPR liverange optimizations for if-else structure"), cl::init(true), cl::Hidden)
 
static cl::opt< ScanOptionsAMDGPUAtomicOptimizerStrategy ("amdgpu-atomic-optimizer-strategy", cl::desc("Select DPP or Iterative strategy for scan"), cl::init(ScanOptions::Iterative), cl::values(clEnumValN(ScanOptions::DPP, "DPP", "Use DPP operations for scan"), clEnumValN(ScanOptions::Iterative, "Iterative", "Use Iterative approach for scan"), clEnumValN(ScanOptions::None, "None", "Disable atomic optimizer")))
 
static cl::opt< boolEnableSIModeRegisterPass ("amdgpu-mode-register", cl::desc("Enable mode register pass"), cl::init(true), cl::Hidden)
 
static cl::opt< boolEnableInsertDelayAlu ("amdgpu-enable-delay-alu", cl::desc("Enable s_delay_alu insertion"), cl::init(true), cl::Hidden)
 
static cl::opt< boolEnableVOPD ("amdgpu-enable-vopd", cl::desc("Enable VOPD, dual issue of VALU in wave32"), cl::init(true), cl::Hidden)
 
static cl::opt< boolEnableDCEInRA ("amdgpu-dce-in-ra", cl::init(true), cl::Hidden, cl::desc("Enable machine DCE inside regalloc"))
 
static cl::opt< boolEnableSetWavePriority ("amdgpu-set-wave-priority", cl::desc("Adjust wave priority"), cl::init(false), cl::Hidden)
 
static cl::opt< boolEnableScalarIRPasses ("amdgpu-scalar-ir-passes", cl::desc("Enable scalar IR passes"), cl::init(true), cl::Hidden)
 
static cl::opt< boolEnableSwLowerLDS ("amdgpu-enable-sw-lower-lds", cl::desc("Enable lowering of lds to global memory pass " "and asan instrument resulting IR."), cl::init(true), cl::Hidden)
 
static cl::opt< bool, trueEnableLowerModuleLDS ("amdgpu-enable-lower-module-lds", cl::desc("Enable lower module lds pass"), cl::location(AMDGPUTargetMachine::EnableLowerModuleLDS), cl::init(true), cl::Hidden)
 
static cl::opt< boolEnablePreRAOptimizations ("amdgpu-enable-pre-ra-optimizations", cl::desc("Enable Pre-RA optimizations pass"), cl::init(true), cl::Hidden)
 
static cl::opt< boolEnablePromoteKernelArguments ("amdgpu-enable-promote-kernel-arguments", cl::desc("Enable promotion of flat kernel pointer arguments to global"), cl::Hidden, cl::init(true))
 
static cl::opt< boolEnableImageIntrinsicOptimizer ("amdgpu-enable-image-intrinsic-optimizer", cl::desc("Enable image intrinsic optimizer pass"), cl::init(true), cl::Hidden)
 
static cl::opt< boolEnableLoopPrefetch ("amdgpu-loop-prefetch", cl::desc("Enable loop data prefetch on AMDGPU"), cl::Hidden, cl::init(false))
 
static cl::opt< std::string > AMDGPUSchedStrategy ("amdgpu-sched-strategy", cl::desc("Select custom AMDGPU scheduling strategy."), cl::Hidden, cl::init(""))
 
static cl::opt< boolEnableRewritePartialRegUses ("amdgpu-enable-rewrite-partial-reg-uses", cl::desc("Enable rewrite partial reg uses pass"), cl::init(true), cl::Hidden)
 
static cl::opt< boolEnableHipStdPar ("amdgpu-enable-hipstdpar", cl::desc("Enable HIP Standard Parallelism Offload support"), cl::init(false), cl::Hidden)
 
static cl::opt< boolEnableAMDGPUAttributor ("amdgpu-attributor-enable", cl::desc("Enable AMDGPUAttributorPass"), cl::init(true), cl::Hidden)
 
static cl::opt< boolNewRegBankSelect ("new-reg-bank-select", cl::desc("Run amdgpu-regbankselect and amdgpu-regbanklegalize instead of " "regbankselect"), cl::init(false), cl::Hidden)
 
static cl::opt< boolHasClosedWorldAssumption ("amdgpu-link-time-closed-world", cl::desc("Whether has closed-world assumption at link time"), cl::init(false), cl::Hidden)
 
static MachineSchedRegistry SISchedRegistry ("si", "Run SI's custom scheduler", createSIMachineScheduler)
 
static MachineSchedRegistry GCNMaxOccupancySchedRegistry ("gcn-max-occupancy", "Run GCN scheduler to maximize occupancy", createGCNMaxOccupancyMachineScheduler)
 
static MachineSchedRegistry GCNMaxILPSchedRegistry ("gcn-max-ilp", "Run GCN scheduler to maximize ilp", createGCNMaxILPMachineScheduler)
 
static MachineSchedRegistry GCNMaxMemoryClauseSchedRegistry ("gcn-max-memory-clause", "Run GCN scheduler to maximize memory clause", createGCNMaxMemoryClauseMachineScheduler)
 
static MachineSchedRegistry IterativeGCNMaxOccupancySchedRegistry ("gcn-iterative-max-occupancy-experimental", "Run GCN scheduler to maximize occupancy (experimental)", createIterativeGCNMaxOccupancyMachineScheduler)
 
static MachineSchedRegistry GCNMinRegSchedRegistry ("gcn-iterative-minreg", "Run GCN iterative scheduler for minimal register usage (experimental)", createMinRegScheduler)
 
static MachineSchedRegistry GCNILPSchedRegistry ("gcn-iterative-ilp", "Run GCN iterative scheduler for ILP scheduling (experimental)", createIterativeILPMachineScheduler)
 
static const char RegAllocOptNotSupportedMessage []
 

Detailed Description

This file contains both AMDGPU target machine and the CodeGen pass builder.

The AMDGPU target machine contains all of the hardware specific information needed to emit code for SI+ GPUs in the legacy pass manager pipeline. The CodeGen pass builder handles the pass pipeline for new pass manager.

Definition in file AMDGPUTargetMachine.cpp.

Macro Definition Documentation

◆ GET_PASS_REGISTRY

#define GET_PASS_REGISTRY   "AMDGPUPassRegistry.def"

Function Documentation

◆ computeDataLayout()

static StringRef computeDataLayout ( const Triple TT)
static

Definition at line 646 of file AMDGPUTargetMachine.cpp.

References llvm::Triple::r600.

◆ createGCNMaxILPMachineScheduler()

static ScheduleDAGInstrs * createGCNMaxILPMachineScheduler ( MachineSchedContext C)
static

◆ createGCNMaxMemoryClauseMachineScheduler()

static ScheduleDAGInstrs * createGCNMaxMemoryClauseMachineScheduler ( MachineSchedContext C)
static

◆ createGCNMaxOccupancyMachineScheduler()

static ScheduleDAGInstrs * createGCNMaxOccupancyMachineScheduler ( MachineSchedContext C)
static

◆ createIterativeGCNMaxOccupancyMachineScheduler()

static ScheduleDAGInstrs * createIterativeGCNMaxOccupancyMachineScheduler ( MachineSchedContext C)
static

◆ createIterativeILPMachineScheduler()

static ScheduleDAGInstrs * createIterativeILPMachineScheduler ( MachineSchedContext C)
static

◆ createMinRegScheduler()

static ScheduleDAGInstrs * createMinRegScheduler ( MachineSchedContext C)
static

◆ createSIMachineScheduler()

static ScheduleDAGInstrs * createSIMachineScheduler ( MachineSchedContext C)
static

Definition at line 549 of file AMDGPUTargetMachine.cpp.

References llvm::CallingConv::C.

◆ createTLOF()

static std::unique_ptr< TargetLoweringObjectFile > createTLOF ( const Triple TT)
static

Definition at line 545 of file AMDGPUTargetMachine.cpp.

◆ getEffectiveRelocModel()

static Reloc::Model getEffectiveRelocModel ( std::optional< Reloc::Model RM)
static

Definition at line 678 of file AMDGPUTargetMachine.cpp.

References llvm::Reloc::PIC_.

◆ getGPUOrDefault()

static LLVM_READNONE StringRef getGPUOrDefault ( const Triple TT,
StringRef  GPU 
)
static

◆ LLVMInitializeAMDGPUTarget()

LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget ( )

Definition at line 462 of file AMDGPUTargetMachine.cpp.

References llvm::PassRegistry::getPassRegistry(), llvm::getTheGCNTarget(), llvm::getTheR600Target(), llvm::initializeAMDGPUAAWrapperPassPass(), llvm::initializeAMDGPUAlwaysInlinePass(), llvm::initializeAMDGPUAnnotateKernelFeaturesPass(), llvm::initializeAMDGPUAnnotateUniformValuesLegacyPass(), llvm::initializeAMDGPUArgumentUsageInfoPass(), llvm::initializeAMDGPUAtomicOptimizerPass(), llvm::initializeAMDGPUAttributorLegacyPass(), llvm::initializeAMDGPUCodeGenPreparePass(), llvm::initializeAMDGPUCtorDtorLoweringLegacyPass(), llvm::initializeAMDGPUDAGToDAGISelLegacyPass(), llvm::initializeAMDGPUExternalAAWrapperPass(), llvm::initializeAMDGPUGlobalISelDivergenceLoweringPass(), llvm::initializeAMDGPUImageIntrinsicOptimizerPass(), llvm::initializeAMDGPUInsertDelayAluPass(), llvm::initializeAMDGPULateCodeGenPrepareLegacyPass(), llvm::initializeAMDGPULowerBufferFatPointersPass(), llvm::initializeAMDGPULowerKernelArgumentsPass(), llvm::initializeAMDGPULowerKernelAttributesPass(), llvm::initializeAMDGPULowerModuleLDSLegacyPass(), llvm::initializeAMDGPUMarkLastScratchLoadPass(), llvm::initializeAMDGPUOpenCLEnqueuedBlockLoweringPass(), llvm::initializeAMDGPUPostLegalizerCombinerPass(), llvm::initializeAMDGPUPreLegalizerCombinerPass(), llvm::initializeAMDGPUPrintfRuntimeBindingPass(), llvm::initializeAMDGPUPromoteAllocaPass(), llvm::initializeAMDGPUPromoteAllocaToVectorPass(), llvm::initializeAMDGPUPromoteKernelArgumentsPass(), llvm::initializeAMDGPURegBankCombinerPass(), llvm::initializeAMDGPURegBankLegalizePass(), llvm::initializeAMDGPURegBankSelectPass(), llvm::initializeAMDGPURemoveIncompatibleFunctionsPass(), llvm::initializeAMDGPUReserveWWMRegsPass(), llvm::initializeAMDGPUResourceUsageAnalysisPass(), llvm::initializeAMDGPURewriteOutArgumentsPass(), llvm::initializeAMDGPURewriteUndefForPHILegacyPass(), llvm::initializeAMDGPUSwLowerLDSLegacyPass(), llvm::initializeAMDGPUUnifyDivergentExitNodesPass(), llvm::initializeAMDGPUUnifyMetadataPass(), llvm::initializeGCNCreateVOPDPass(), llvm::initializeGCNDPPCombineLegacyPass(), llvm::initializeGCNNSAReassignPass(), llvm::initializeGCNPreRALongBranchRegPass(), llvm::initializeGCNPreRAOptimizationsPass(), llvm::initializeGCNRegPressurePrinterPass(), llvm::initializeGCNRewritePartialRegUsesPass(), llvm::initializeGlobalISel(), llvm::initializeR600ClauseMergePassPass(), llvm::initializeR600ControlFlowFinalizerPass(), llvm::initializeR600ExpandSpecialInstrsPassPass(), llvm::initializeR600PacketizerPass(), llvm::initializeR600VectorRegMergerPass(), llvm::initializeSIAnnotateControlFlowLegacyPass(), llvm::initializeSIFixSGPRCopiesLegacyPass(), llvm::initializeSIFixVGPRCopiesPass(), llvm::initializeSIFoldOperandsLegacyPass(), llvm::initializeSIFormMemoryClausesPass(), llvm::initializeSIInsertHardClausesPass(), llvm::initializeSIInsertWaitcntsPass(), llvm::initializeSILateBranchLoweringPass(), llvm::initializeSILoadStoreOptimizerLegacyPass(), llvm::initializeSILowerControlFlowPass(), llvm::initializeSILowerI1CopiesLegacyPass(), llvm::initializeSILowerSGPRSpillsLegacyPass(), llvm::initializeSILowerWWMCopiesPass(), llvm::initializeSIMemoryLegalizerPass(), llvm::initializeSIModeRegisterPass(), llvm::initializeSIOptimizeExecMaskingPass(), llvm::initializeSIOptimizeExecMaskingPreRAPass(), llvm::initializeSIOptimizeVGPRLiveRangeLegacyPass(), llvm::initializeSIPeepholeSDWALegacyPass(), llvm::initializeSIPostRABundlerPass(), llvm::initializeSIPreAllocateWWMRegsLegacyPass(), llvm::initializeSIPreEmitPeepholePass(), llvm::initializeSIShrinkInstructionsLegacyPass(), llvm::initializeSIWholeQuadModePass(), X, and Y.

◆ mustPreserveGV()

static bool mustPreserveGV ( const GlobalValue GV)
static

◆ parseAMDGPUAtomicOptimizerStrategy()

static Expected< ScanOptions > parseAMDGPUAtomicOptimizerStrategy ( StringRef  Params)
static

◆ parseAMDGPUAttributorPassOptions()

Expected< AMDGPUAttributorOptions > parseAMDGPUAttributorPassOptions ( StringRef  Params)

Variable Documentation

◆ AMDGPUAtomicOptimizerStrategy

cl::opt< ScanOptions > AMDGPUAtomicOptimizerStrategy("amdgpu-atomic-optimizer-strategy", cl::desc("Select DPP or Iterative strategy for scan"), cl::init(ScanOptions::Iterative), cl::values( clEnumValN(ScanOptions::DPP, "DPP", "Use DPP operations for scan"), clEnumValN(ScanOptions::Iterative, "Iterative", "Use Iterative approach for scan"), clEnumValN(ScanOptions::None, "None", "Disable atomic optimizer"))) ( "amdgpu-atomic-optimizer-strategy"  ,
cl::desc("Select DPP or Iterative strategy for scan")  ,
cl::init(ScanOptions::Iterative)  ,
cl::values( clEnumValN(ScanOptions::DPP, "DPP", "Use DPP operations for scan"), clEnumValN(ScanOptions::Iterative, "Iterative", "Use Iterative approach for scan"), clEnumValN(ScanOptions::None, "None", "Disable atomic optimizer"))   
)
static

◆ AMDGPUSchedStrategy

cl::opt< std::string > AMDGPUSchedStrategy("amdgpu-sched-strategy", cl::desc("Select custom AMDGPU scheduling strategy."), cl::Hidden, cl::init("")) ( "amdgpu-sched-strategy"  ,
cl::desc("Select custom AMDGPU scheduling strategy.")  ,
cl::Hidden  ,
cl::init("")   
)
static

◆ EarlyInlineAll

cl::opt< bool > EarlyInlineAll("amdgpu-early-inline-all", cl::desc("Inline all functions early"), cl::init(false), cl::Hidden) ( "amdgpu-early-inline-all"  ,
cl::desc("Inline all functions early")  ,
cl::init(false)  ,
cl::Hidden   
)
static

◆ EnableAMDGPUAliasAnalysis

cl::opt< bool > EnableAMDGPUAliasAnalysis("enable-amdgpu-aa", cl::Hidden, cl::desc("Enable AMDGPU Alias Analysis"), cl::init(true)) ( "enable-amdgpu-aa"  ,
cl::Hidden  ,
cl::desc("Enable AMDGPU Alias Analysis")  ,
cl::init(true  
)
static

◆ EnableAMDGPUAttributor

cl::opt< bool > EnableAMDGPUAttributor("amdgpu-attributor-enable", cl::desc("Enable AMDGPUAttributorPass"), cl::init(true), cl::Hidden) ( "amdgpu-attributor-enable"  ,
cl::desc("Enable AMDGPUAttributorPass")  ,
cl::init(true ,
cl::Hidden   
)
static

◆ EnableDCEInRA

cl::opt< bool > EnableDCEInRA("amdgpu-dce-in-ra", cl::init(true), cl::Hidden, cl::desc("Enable machine DCE inside regalloc")) ( "amdgpu-dce-in-ra"  ,
cl::init(true ,
cl::Hidden  ,
cl::desc("Enable machine DCE inside regalloc")   
)
static

◆ EnableDPPCombine

cl::opt< bool > EnableDPPCombine("amdgpu-dpp-combine", cl::desc("Enable DPP combiner"), cl::init(true)) ( "amdgpu-dpp-combine"  ,
cl::desc("Enable DPP combiner")  ,
cl::init(true  
)
static

◆ EnableEarlyIfConversion

cl::opt< bool > EnableEarlyIfConversion("amdgpu-early-ifcvt", cl::Hidden, cl::desc("Run early if-conversion"), cl::init(false)) ( "amdgpu-early-ifcvt"  ,
cl::Hidden  ,
cl::desc("Run early if-conversion")  ,
cl::init(false)   
)
static

◆ EnableHipStdPar

cl::opt< bool > EnableHipStdPar("amdgpu-enable-hipstdpar", cl::desc("Enable HIP Standard Parallelism Offload support"), cl::init(false), cl::Hidden) ( "amdgpu-enable-hipstdpar"  ,
cl::desc("Enable HIP Standard Parallelism Offload support")  ,
cl::init(false)  ,
cl::Hidden   
)
static

◆ EnableImageIntrinsicOptimizer

cl::opt< bool > EnableImageIntrinsicOptimizer("amdgpu-enable-image-intrinsic-optimizer", cl::desc("Enable image intrinsic optimizer pass"), cl::init(true), cl::Hidden) ( "amdgpu-enable-image-intrinsic-optimizer"  ,
cl::desc("Enable image intrinsic optimizer pass")  ,
cl::init(true ,
cl::Hidden   
)
static

◆ EnableInsertDelayAlu

cl::opt< bool > EnableInsertDelayAlu("amdgpu-enable-delay-alu", cl::desc("Enable s_delay_alu insertion"), cl::init(true), cl::Hidden) ( "amdgpu-enable-delay-alu"  ,
cl::desc("Enable s_delay_alu insertion")  ,
cl::init(true ,
cl::Hidden   
)
static

◆ EnableLibCallSimplify

cl::opt< bool > EnableLibCallSimplify("amdgpu-simplify-libcall", cl::desc("Enable amdgpu library simplifications"), cl::init(true), cl::Hidden) ( "amdgpu-simplify-libcall"  ,
cl::desc("Enable amdgpu library simplifications")  ,
cl::init(true ,
cl::Hidden   
)
static

◆ EnableLoadStoreVectorizer

cl::opt< bool > EnableLoadStoreVectorizer("amdgpu-load-store-vectorizer", cl::desc("Enable load store vectorizer"), cl::init(true), cl::Hidden) ( "amdgpu-load-store-vectorizer"  ,
cl::desc("Enable load store vectorizer")  ,
cl::init(true ,
cl::Hidden   
)
static

◆ EnableLoopPrefetch

cl::opt< bool > EnableLoopPrefetch("amdgpu-loop-prefetch", cl::desc("Enable loop data prefetch on AMDGPU"), cl::Hidden, cl::init(false)) ( "amdgpu-loop-prefetch"  ,
cl::desc("Enable loop data prefetch on AMDGPU")  ,
cl::Hidden  ,
cl::init(false)   
)
static

◆ EnableLowerKernelArguments

cl::opt< bool > EnableLowerKernelArguments("amdgpu-ir-lower-kernel-arguments", cl::desc("Lower kernel argument loads in IR pass"), cl::init(true), cl::Hidden) ( "amdgpu-ir-lower-kernel-arguments"  ,
cl::desc("Lower kernel argument loads in IR pass")  ,
cl::init(true ,
cl::Hidden   
)
static

◆ EnableLowerModuleLDS

cl::opt< bool, true > EnableLowerModuleLDS("amdgpu-enable-lower-module-lds", cl::desc("Enable lower module lds pass"), cl::location(AMDGPUTargetMachine::EnableLowerModuleLDS), cl::init(true), cl::Hidden) ( "amdgpu-enable-lower-module-lds"  ,
cl::desc("Enable lower module lds pass")  ,
cl::location(AMDGPUTargetMachine::EnableLowerModuleLDS)  ,
cl::init(true ,
cl::Hidden   
)
static

◆ EnablePreRAOptimizations

cl::opt< bool > EnablePreRAOptimizations("amdgpu-enable-pre-ra-optimizations", cl::desc("Enable Pre-RA optimizations pass"), cl::init(true), cl::Hidden) ( "amdgpu-enable-pre-ra-optimizations"  ,
cl::desc("Enable Pre-RA optimizations pass")  ,
cl::init(true ,
cl::Hidden   
)
static

◆ EnablePromoteKernelArguments

cl::opt< bool > EnablePromoteKernelArguments("amdgpu-enable-promote-kernel-arguments", cl::desc("Enable promotion of flat kernel pointer arguments to global"), cl::Hidden, cl::init(true)) ( "amdgpu-enable-promote-kernel-arguments"  ,
cl::desc("Enable promotion of flat kernel pointer arguments to global")  ,
cl::Hidden  ,
cl::init(true  
)
static

◆ EnableRegReassign

cl::opt< bool > EnableRegReassign("amdgpu-reassign-regs", cl::desc("Enable register reassign optimizations on gfx10+"), cl::init(true), cl::Hidden) ( "amdgpu-reassign-regs"  ,
cl::desc("Enable register reassign optimizations on gfx10+")  ,
cl::init(true ,
cl::Hidden   
)
static

◆ EnableRewritePartialRegUses

cl::opt< bool > EnableRewritePartialRegUses("amdgpu-enable-rewrite-partial-reg-uses", cl::desc("Enable rewrite partial reg uses pass"), cl::init(true), cl::Hidden) ( "amdgpu-enable-rewrite-partial-reg-uses"  ,
cl::desc("Enable rewrite partial reg uses pass")  ,
cl::init(true ,
cl::Hidden   
)
static

◆ EnableScalarIRPasses

cl::opt< bool > EnableScalarIRPasses("amdgpu-scalar-ir-passes", cl::desc("Enable scalar IR passes"), cl::init(true), cl::Hidden) ( "amdgpu-scalar-ir-passes"  ,
cl::desc("Enable scalar IR passes")  ,
cl::init(true ,
cl::Hidden   
)
static

◆ EnableSDWAPeephole

cl::opt< bool > EnableSDWAPeephole("amdgpu-sdwa-peephole", cl::desc("Enable SDWA peepholer"), cl::init(true)) ( "amdgpu-sdwa-peephole"  ,
cl::desc("Enable SDWA peepholer")  ,
cl::init(true  
)
static

◆ EnableSetWavePriority

cl::opt< bool > EnableSetWavePriority("amdgpu-set-wave-priority", cl::desc("Adjust wave priority"), cl::init(false), cl::Hidden) ( "amdgpu-set-wave-priority"  ,
cl::desc("Adjust wave priority")  ,
cl::init(false)  ,
cl::Hidden   
)
static

◆ EnableSIModeRegisterPass

cl::opt< bool > EnableSIModeRegisterPass("amdgpu-mode-register", cl::desc("Enable mode register pass"), cl::init(true), cl::Hidden) ( "amdgpu-mode-register"  ,
cl::desc("Enable mode register pass")  ,
cl::init(true ,
cl::Hidden   
)
static

◆ EnableSwLowerLDS

cl::opt< bool > EnableSwLowerLDS("amdgpu-enable-sw-lower-lds", cl::desc("Enable lowering of lds to global memory pass " "and asan instrument resulting IR."), cl::init(true), cl::Hidden) ( "amdgpu-enable-sw-lower-lds"  ,
cl::desc("Enable lowering of lds to global memory pass " "and asan instrument resulting IR.")  ,
cl::init(true ,
cl::Hidden   
)
static

◆ EnableVOPD

cl::opt< bool > EnableVOPD("amdgpu-enable-vopd", cl::desc("Enable VOPD, dual issue of VALU in wave32"), cl::init(true), cl::Hidden) ( "amdgpu-enable-vopd"  ,
cl::desc("Enable VOPD, dual issue of VALU in wave32")  ,
cl::init(true ,
cl::Hidden   
)
static

◆ GCNILPSchedRegistry

MachineSchedRegistry GCNILPSchedRegistry("gcn-iterative-ilp", "Run GCN iterative scheduler for ILP scheduling (experimental)", createIterativeILPMachineScheduler) ( "gcn-iterative-ilp"  ,
"Run GCN iterative scheduler for ILP scheduling (experimental)"  ,
createIterativeILPMachineScheduler   
)
static

◆ GCNMaxILPSchedRegistry

MachineSchedRegistry GCNMaxILPSchedRegistry("gcn-max-ilp", "Run GCN scheduler to maximize ilp", createGCNMaxILPMachineScheduler) ( "gcn-max-ilp"  ,
"Run GCN scheduler to maximize ilp"  ,
createGCNMaxILPMachineScheduler   
)
static

◆ GCNMaxMemoryClauseSchedRegistry

MachineSchedRegistry GCNMaxMemoryClauseSchedRegistry("gcn-max-memory-clause", "Run GCN scheduler to maximize memory clause", createGCNMaxMemoryClauseMachineScheduler) ( "gcn-max-memory-clause"  ,
"Run GCN scheduler to maximize memory clause"  ,
createGCNMaxMemoryClauseMachineScheduler   
)
static

◆ GCNMaxOccupancySchedRegistry

MachineSchedRegistry GCNMaxOccupancySchedRegistry("gcn-max-occupancy", "Run GCN scheduler to maximize occupancy", createGCNMaxOccupancyMachineScheduler) ( "gcn-max-occupancy"  ,
"Run GCN scheduler to maximize occupancy"  ,
createGCNMaxOccupancyMachineScheduler   
)
static

◆ GCNMinRegSchedRegistry

MachineSchedRegistry GCNMinRegSchedRegistry("gcn-iterative-minreg", "Run GCN iterative scheduler for minimal register usage (experimental)", createMinRegScheduler) ( "gcn-iterative-minreg"  ,
"Run GCN iterative scheduler for minimal register usage (experimental)"  ,
createMinRegScheduler   
)
static

◆ HasClosedWorldAssumption

cl::opt< bool > HasClosedWorldAssumption("amdgpu-link-time-closed-world", cl::desc("Whether has closed-world assumption at link time"), cl::init(false), cl::Hidden) ( "amdgpu-link-time-closed-world"  ,
cl::desc("Whether has closed-world assumption at link time")  ,
cl::init(false)  ,
cl::Hidden   
)
static

◆ InternalizeSymbols

cl::opt< bool > InternalizeSymbols("amdgpu-internalize-symbols", cl::desc("Enable elimination of non-kernel functions and unused globals"), cl::init(false), cl::Hidden) ( "amdgpu-internalize-symbols"  ,
cl::desc("Enable elimination of non-kernel functions and unused globals")  ,
cl::init(false)  ,
cl::Hidden   
)
static

◆ IterativeGCNMaxOccupancySchedRegistry

MachineSchedRegistry IterativeGCNMaxOccupancySchedRegistry("gcn-iterative-max-occupancy-experimental", "Run GCN scheduler to maximize occupancy (experimental)", createIterativeGCNMaxOccupancyMachineScheduler) ( "gcn-iterative-max-occupancy-experimental"  ,
"Run GCN scheduler to maximize occupancy (experimental)"  ,
createIterativeGCNMaxOccupancyMachineScheduler   
)
static

◆ LowerCtorDtor

cl::opt< bool > LowerCtorDtor("amdgpu-lower-global-ctor-dtor", cl::desc("Lower GPU ctor / dtors to globals on the device."), cl::init(true), cl::Hidden) ( "amdgpu-lower-global-ctor-dtor"  ,
cl::desc("Lower GPU ctor / dtors to globals on the device.")  ,
cl::init(true ,
cl::Hidden   
)
static

◆ NewRegBankSelect

cl::opt< bool > NewRegBankSelect("new-reg-bank-select", cl::desc("Run amdgpu-regbankselect and amdgpu-regbanklegalize instead of " "regbankselect"), cl::init(false), cl::Hidden) ( "new-reg-bank-select"  ,
cl::desc("Run amdgpu-regbankselect and amdgpu-regbanklegalize instead of " "regbankselect")  ,
cl::init(false)  ,
cl::Hidden   
)
static

◆ OptExecMaskPreRA

cl::opt< bool > OptExecMaskPreRA("amdgpu-opt-exec-mask-pre-ra", cl::Hidden, cl::desc("Run pre-RA exec mask optimizations"), cl::init(true)) ( "amdgpu-opt-exec-mask-pre-ra"  ,
cl::Hidden  ,
cl::desc("Run pre-RA exec mask optimizations")  ,
cl::init(true  
)
static

◆ OptVGPRLiveRange

cl::opt< bool > OptVGPRLiveRange("amdgpu-opt-vgpr-liverange", cl::desc("Enable VGPR liverange optimizations for if-else structure"), cl::init(true), cl::Hidden) ( "amdgpu-opt-vgpr-liverange"  ,
cl::desc("Enable VGPR liverange optimizations for if-else structure")  ,
cl::init(true ,
cl::Hidden   
)
static

◆ RegAllocOptNotSupportedMessage

const char RegAllocOptNotSupportedMessage[]
static
Initial value:
=
"-regalloc not supported with amdgcn. Use -sgpr-regalloc, -wwm-regalloc, "
"and -vgpr-regalloc"

Definition at line 1557 of file AMDGPUTargetMachine.cpp.

◆ RemoveIncompatibleFunctions

cl::opt< bool > RemoveIncompatibleFunctions("amdgpu-enable-remove-incompatible-functions", cl::Hidden, cl::desc("Enable removal of functions when they" "use features not supported by the target GPU"), cl::init(true)) ( "amdgpu-enable-remove-incompatible-functions"  ,
cl::Hidden  ,
cl::desc("Enable removal of functions when they" "use features not supported by the target GPU")  ,
cl::init(true  
)
static

◆ ScalarizeGlobal

cl::opt< bool > ScalarizeGlobal("amdgpu-scalarize-global-loads", cl::desc("Enable global load scalarization"), cl::init(true), cl::Hidden) ( "amdgpu-scalarize-global-loads"  ,
cl::desc("Enable global load scalarization")  ,
cl::init(true ,
cl::Hidden   
)
static

◆ SISchedRegistry

MachineSchedRegistry SISchedRegistry("si", "Run SI's custom scheduler", createSIMachineScheduler) ( "si"  ,
"Run SI's custom scheduler"  ,
createSIMachineScheduler   
)
static