LLVM 19.0.0git
Macros | Functions | Variables
AMDGPUTargetMachine.cpp File Reference

The AMDGPU target machine contains all of the hardware specific information needed to emit code for SI+ GPUs. More...

#include "AMDGPUTargetMachine.h"
#include "AMDGPU.h"
#include "AMDGPUAliasAnalysis.h"
#include "AMDGPUCtorDtorLowering.h"
#include "AMDGPUExportClustering.h"
#include "AMDGPUIGroupLP.h"
#include "AMDGPUMacroFusion.h"
#include "AMDGPURegBankSelect.h"
#include "AMDGPUTargetObjectFile.h"
#include "AMDGPUTargetTransformInfo.h"
#include "AMDGPUUnifyDivergentExitNodes.h"
#include "GCNIterativeScheduler.h"
#include "GCNSchedStrategy.h"
#include "GCNVOPDUtils.h"
#include "R600.h"
#include "R600MachineFunctionInfo.h"
#include "R600TargetMachine.h"
#include "SIMachineFunctionInfo.h"
#include "SIMachineScheduler.h"
#include "TargetInfo/AMDGPUTargetInfo.h"
#include "Utils/AMDGPUBaseInfo.h"
#include "llvm/Analysis/CGSCCPassManager.h"
#include "llvm/Analysis/CallGraphSCCPass.h"
#include "llvm/CodeGen/GlobalISel/CSEInfo.h"
#include "llvm/CodeGen/GlobalISel/IRTranslator.h"
#include "llvm/CodeGen/GlobalISel/InstructionSelect.h"
#include "llvm/CodeGen/GlobalISel/Legalizer.h"
#include "llvm/CodeGen/GlobalISel/Localizer.h"
#include "llvm/CodeGen/GlobalISel/RegBankSelect.h"
#include "llvm/CodeGen/MIRParser/MIParser.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/RegAllocRegistry.h"
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/IR/IntrinsicsAMDGPU.h"
#include "llvm/IR/PassManager.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/InitializePasses.h"
#include "llvm/MC/TargetRegistry.h"
#include "llvm/Passes/PassBuilder.h"
#include "llvm/Transforms/HipStdPar/HipStdPar.h"
#include "llvm/Transforms/IPO.h"
#include "llvm/Transforms/IPO/AlwaysInliner.h"
#include "llvm/Transforms/IPO/GlobalDCE.h"
#include "llvm/Transforms/IPO/Internalize.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Scalar/GVN.h"
#include "llvm/Transforms/Scalar/InferAddressSpaces.h"
#include "llvm/Transforms/Utils.h"
#include "llvm/Transforms/Utils/SimplifyLibCalls.h"
#include "llvm/Transforms/Vectorize/LoadStoreVectorizer.h"
#include <optional>
#include "llvm/Passes/TargetPassRegistry.inc"

Go to the source code of this file.

Macros

#define GET_PASS_REGISTRY   "AMDGPUPassRegistry.def"
 

Functions

LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget ()
 
static std::unique_ptr< TargetLoweringObjectFilecreateTLOF (const Triple &TT)
 
static ScheduleDAGInstrscreateSIMachineScheduler (MachineSchedContext *C)
 
static ScheduleDAGInstrscreateGCNMaxOccupancyMachineScheduler (MachineSchedContext *C)
 
static ScheduleDAGInstrscreateGCNMaxILPMachineScheduler (MachineSchedContext *C)
 
static ScheduleDAGInstrscreateIterativeGCNMaxOccupancyMachineScheduler (MachineSchedContext *C)
 
static ScheduleDAGInstrscreateMinRegScheduler (MachineSchedContext *C)
 
static ScheduleDAGInstrscreateIterativeILPMachineScheduler (MachineSchedContext *C)
 
static StringRef computeDataLayout (const Triple &TT)
 
static LLVM_READNONE StringRef getGPUOrDefault (const Triple &TT, StringRef GPU)
 
static Reloc::Model getEffectiveRelocModel (std::optional< Reloc::Model > RM)
 
static bool mustPreserveGV (const GlobalValue &GV)
 Predicate for Internalize pass.
 
static Expected< ScanOptionsparseAMDGPUAtomicOptimizerStrategy (StringRef Params)
 

Variables

static cl::opt< boolEnableEarlyIfConversion ("amdgpu-early-ifcvt", cl::Hidden, cl::desc("Run early if-conversion"), cl::init(false))
 
static cl::opt< boolOptExecMaskPreRA ("amdgpu-opt-exec-mask-pre-ra", cl::Hidden, cl::desc("Run pre-RA exec mask optimizations"), cl::init(true))
 
static cl::opt< boolLowerCtorDtor ("amdgpu-lower-global-ctor-dtor", cl::desc("Lower GPU ctor / dtors to globals on the device."), cl::init(true), cl::Hidden)
 
static cl::opt< boolEnableLoadStoreVectorizer ("amdgpu-load-store-vectorizer", cl::desc("Enable load store vectorizer"), cl::init(true), cl::Hidden)
 
static cl::opt< boolScalarizeGlobal ("amdgpu-scalarize-global-loads", cl::desc("Enable global load scalarization"), cl::init(true), cl::Hidden)
 
static cl::opt< boolInternalizeSymbols ("amdgpu-internalize-symbols", cl::desc("Enable elimination of non-kernel functions and unused globals"), cl::init(false), cl::Hidden)
 
static cl::opt< boolEarlyInlineAll ("amdgpu-early-inline-all", cl::desc("Inline all functions early"), cl::init(false), cl::Hidden)
 
static cl::opt< boolRemoveIncompatibleFunctions ("amdgpu-enable-remove-incompatible-functions", cl::Hidden, cl::desc("Enable removal of functions when they" "use features not supported by the target GPU"), cl::init(true))
 
static cl::opt< boolEnableSDWAPeephole ("amdgpu-sdwa-peephole", cl::desc("Enable SDWA peepholer"), cl::init(true))
 
static cl::opt< boolEnableDPPCombine ("amdgpu-dpp-combine", cl::desc("Enable DPP combiner"), cl::init(true))
 
static cl::opt< boolEnableAMDGPUAliasAnalysis ("enable-amdgpu-aa", cl::Hidden, cl::desc("Enable AMDGPU Alias Analysis"), cl::init(true))
 
static cl::opt< bool, trueLateCFGStructurize ("amdgpu-late-structurize", cl::desc("Enable late CFG structurization"), cl::location(AMDGPUTargetMachine::EnableLateStructurizeCFG), cl::Hidden)
 
static cl::opt< bool, trueDisableStructurizer ("amdgpu-disable-structurizer", cl::desc("Disable structurizer for experiments; produces unusable code"), cl::location(AMDGPUTargetMachine::DisableStructurizer), cl::ReallyHidden)
 
static cl::opt< boolEnableLibCallSimplify ("amdgpu-simplify-libcall", cl::desc("Enable amdgpu library simplifications"), cl::init(true), cl::Hidden)
 
static cl::opt< boolEnableLowerKernelArguments ("amdgpu-ir-lower-kernel-arguments", cl::desc("Lower kernel argument loads in IR pass"), cl::init(true), cl::Hidden)
 
static cl::opt< boolEnableRegReassign ("amdgpu-reassign-regs", cl::desc("Enable register reassign optimizations on gfx10+"), cl::init(true), cl::Hidden)
 
static cl::opt< boolOptVGPRLiveRange ("amdgpu-opt-vgpr-liverange", cl::desc("Enable VGPR liverange optimizations for if-else structure"), cl::init(true), cl::Hidden)
 
static cl::opt< ScanOptionsAMDGPUAtomicOptimizerStrategy ("amdgpu-atomic-optimizer-strategy", cl::desc("Select DPP or Iterative strategy for scan"), cl::init(ScanOptions::Iterative), cl::values(clEnumValN(ScanOptions::DPP, "DPP", "Use DPP operations for scan"), clEnumValN(ScanOptions::Iterative, "Iterative", "Use Iterative approach for scan"), clEnumValN(ScanOptions::None, "None", "Disable atomic optimizer")))
 
static cl::opt< boolEnableSIModeRegisterPass ("amdgpu-mode-register", cl::desc("Enable mode register pass"), cl::init(true), cl::Hidden)
 
static cl::opt< boolEnableInsertSingleUseVDST ("amdgpu-enable-single-use-vdst", cl::desc("Enable s_singleuse_vdst insertion"), cl::init(false), cl::Hidden)
 
static cl::opt< boolEnableInsertDelayAlu ("amdgpu-enable-delay-alu", cl::desc("Enable s_delay_alu insertion"), cl::init(true), cl::Hidden)
 
static cl::opt< boolEnableVOPD ("amdgpu-enable-vopd", cl::desc("Enable VOPD, dual issue of VALU in wave32"), cl::init(true), cl::Hidden)
 
static cl::opt< boolEnableDCEInRA ("amdgpu-dce-in-ra", cl::init(true), cl::Hidden, cl::desc("Enable machine DCE inside regalloc"))
 
static cl::opt< boolEnableSetWavePriority ("amdgpu-set-wave-priority", cl::desc("Adjust wave priority"), cl::init(false), cl::Hidden)
 
static cl::opt< boolEnableScalarIRPasses ("amdgpu-scalar-ir-passes", cl::desc("Enable scalar IR passes"), cl::init(true), cl::Hidden)
 
static cl::opt< boolEnableStructurizerWorkarounds ("amdgpu-enable-structurizer-workarounds", cl::desc("Enable workarounds for the StructurizeCFG pass"), cl::init(true), cl::Hidden)
 
static cl::opt< bool, trueEnableLowerModuleLDS ("amdgpu-enable-lower-module-lds", cl::desc("Enable lower module lds pass"), cl::location(AMDGPUTargetMachine::EnableLowerModuleLDS), cl::init(true), cl::Hidden)
 
static cl::opt< boolEnablePreRAOptimizations ("amdgpu-enable-pre-ra-optimizations", cl::desc("Enable Pre-RA optimizations pass"), cl::init(true), cl::Hidden)
 
static cl::opt< boolEnablePromoteKernelArguments ("amdgpu-enable-promote-kernel-arguments", cl::desc("Enable promotion of flat kernel pointer arguments to global"), cl::Hidden, cl::init(true))
 
static cl::opt< boolEnableImageIntrinsicOptimizer ("amdgpu-enable-image-intrinsic-optimizer", cl::desc("Enable image intrinsic optimizer pass"), cl::init(true), cl::Hidden)
 
static cl::opt< boolEnableLoopPrefetch ("amdgpu-loop-prefetch", cl::desc("Enable loop data prefetch on AMDGPU"), cl::Hidden, cl::init(false))
 
static cl::opt< boolEnableMaxIlpSchedStrategy ("amdgpu-enable-max-ilp-scheduling-strategy", cl::desc("Enable scheduling strategy to maximize ILP for a single wave."), cl::Hidden, cl::init(false))
 
static cl::opt< boolEnableRewritePartialRegUses ("amdgpu-enable-rewrite-partial-reg-uses", cl::desc("Enable rewrite partial reg uses pass"), cl::init(true), cl::Hidden)
 
static cl::opt< boolEnableHipStdPar ("amdgpu-enable-hipstdpar", cl::desc("Enable HIP Standard Parallelism Offload support"), cl::init(false), cl::Hidden)
 
static MachineSchedRegistry SISchedRegistry ("si", "Run SI's custom scheduler", createSIMachineScheduler)
 
static MachineSchedRegistry GCNMaxOccupancySchedRegistry ("gcn-max-occupancy", "Run GCN scheduler to maximize occupancy", createGCNMaxOccupancyMachineScheduler)
 
static MachineSchedRegistry GCNMaxILPSchedRegistry ("gcn-max-ilp", "Run GCN scheduler to maximize ilp", createGCNMaxILPMachineScheduler)
 
static MachineSchedRegistry IterativeGCNMaxOccupancySchedRegistry ("gcn-iterative-max-occupancy-experimental", "Run GCN scheduler to maximize occupancy (experimental)", createIterativeGCNMaxOccupancyMachineScheduler)
 
static MachineSchedRegistry GCNMinRegSchedRegistry ("gcn-iterative-minreg", "Run GCN iterative scheduler for minimal register usage (experimental)", createMinRegScheduler)
 
static MachineSchedRegistry GCNILPSchedRegistry ("gcn-iterative-ilp", "Run GCN iterative scheduler for ILP scheduling (experimental)", createIterativeILPMachineScheduler)
 
static const char RegAllocOptNotSupportedMessage []
 

Detailed Description

The AMDGPU target machine contains all of the hardware specific information needed to emit code for SI+ GPUs.

Definition in file AMDGPUTargetMachine.cpp.

Macro Definition Documentation

◆ GET_PASS_REGISTRY

#define GET_PASS_REGISTRY   "AMDGPUPassRegistry.def"

Function Documentation

◆ computeDataLayout()

static StringRef computeDataLayout ( const Triple TT)
static

Definition at line 543 of file AMDGPUTargetMachine.cpp.

References llvm::Triple::r600.

◆ createGCNMaxILPMachineScheduler()

static ScheduleDAGInstrs * createGCNMaxILPMachineScheduler ( MachineSchedContext C)
static

◆ createGCNMaxOccupancyMachineScheduler()

static ScheduleDAGInstrs * createGCNMaxOccupancyMachineScheduler ( MachineSchedContext C)
static

◆ createIterativeGCNMaxOccupancyMachineScheduler()

static ScheduleDAGInstrs * createIterativeGCNMaxOccupancyMachineScheduler ( MachineSchedContext C)
static

◆ createIterativeILPMachineScheduler()

static ScheduleDAGInstrs * createIterativeILPMachineScheduler ( MachineSchedContext C)
static

◆ createMinRegScheduler()

static ScheduleDAGInstrs * createMinRegScheduler ( MachineSchedContext C)
static

◆ createSIMachineScheduler()

static ScheduleDAGInstrs * createSIMachineScheduler ( MachineSchedContext C)
static

Definition at line 461 of file AMDGPUTargetMachine.cpp.

References llvm::CallingConv::C.

◆ createTLOF()

static std::unique_ptr< TargetLoweringObjectFile > createTLOF ( const Triple TT)
static

Definition at line 457 of file AMDGPUTargetMachine.cpp.

◆ getEffectiveRelocModel()

static Reloc::Model getEffectiveRelocModel ( std::optional< Reloc::Model RM)
static

Definition at line 575 of file AMDGPUTargetMachine.cpp.

References llvm::Reloc::PIC_.

◆ getGPUOrDefault()

static LLVM_READNONE StringRef getGPUOrDefault ( const Triple TT,
StringRef  GPU 
)
static

◆ LLVMInitializeAMDGPUTarget()

LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget ( )

Definition at line 376 of file AMDGPUTargetMachine.cpp.

References llvm::PassRegistry::getPassRegistry(), llvm::getTheGCNTarget(), llvm::getTheR600Target(), llvm::initializeAMDGPUAAWrapperPassPass(), llvm::initializeAMDGPUAlwaysInlinePass(), llvm::initializeAMDGPUAnnotateKernelFeaturesPass(), llvm::initializeAMDGPUAnnotateUniformValuesPass(), llvm::initializeAMDGPUArgumentUsageInfoPass(), llvm::initializeAMDGPUAtomicOptimizerPass(), llvm::initializeAMDGPUAttributorLegacyPass(), llvm::initializeAMDGPUCodeGenPreparePass(), llvm::initializeAMDGPUCtorDtorLoweringLegacyPass(), llvm::initializeAMDGPUDAGToDAGISelPass(), llvm::initializeAMDGPUExternalAAWrapperPass(), llvm::initializeAMDGPUGlobalISelDivergenceLoweringPass(), llvm::initializeAMDGPUImageIntrinsicOptimizerPass(), llvm::initializeAMDGPUInsertDelayAluPass(), llvm::initializeAMDGPUInsertSingleUseVDSTPass(), llvm::initializeAMDGPULateCodeGenPreparePass(), llvm::initializeAMDGPULowerBufferFatPointersPass(), llvm::initializeAMDGPULowerKernelArgumentsPass(), llvm::initializeAMDGPULowerKernelAttributesPass(), llvm::initializeAMDGPULowerModuleLDSLegacyPass(), llvm::initializeAMDGPUMarkLastScratchLoadPass(), llvm::initializeAMDGPUOpenCLEnqueuedBlockLoweringPass(), llvm::initializeAMDGPUPostLegalizerCombinerPass(), llvm::initializeAMDGPUPreLegalizerCombinerPass(), llvm::initializeAMDGPUPrintfRuntimeBindingPass(), llvm::initializeAMDGPUPromoteAllocaPass(), llvm::initializeAMDGPUPromoteAllocaToVectorPass(), llvm::initializeAMDGPUPromoteKernelArgumentsPass(), llvm::initializeAMDGPURegBankCombinerPass(), llvm::initializeAMDGPURegBankSelectPass(), llvm::initializeAMDGPURemoveIncompatibleFunctionsPass(), llvm::initializeAMDGPUResourceUsageAnalysisPass(), llvm::initializeAMDGPURewriteOutArgumentsPass(), llvm::initializeAMDGPURewriteUndefForPHILegacyPass(), llvm::initializeAMDGPUUnifyDivergentExitNodesPass(), llvm::initializeAMDGPUUnifyMetadataPass(), llvm::initializeGCNCreateVOPDPass(), llvm::initializeGCNDPPCombinePass(), llvm::initializeGCNNSAReassignPass(), llvm::initializeGCNPreRALongBranchRegPass(), llvm::initializeGCNPreRAOptimizationsPass(), llvm::initializeGCNRegPressurePrinterPass(), llvm::initializeGCNRewritePartialRegUsesPass(), llvm::initializeGlobalISel(), llvm::initializeR600ClauseMergePassPass(), llvm::initializeR600ControlFlowFinalizerPass(), llvm::initializeR600ExpandSpecialInstrsPassPass(), llvm::initializeR600PacketizerPass(), llvm::initializeR600VectorRegMergerPass(), llvm::initializeSIAnnotateControlFlowPass(), llvm::initializeSIFixSGPRCopiesPass(), llvm::initializeSIFixVGPRCopiesPass(), llvm::initializeSIFoldOperandsPass(), llvm::initializeSIFormMemoryClausesPass(), llvm::initializeSIInsertHardClausesPass(), llvm::initializeSIInsertWaitcntsPass(), llvm::initializeSILateBranchLoweringPass(), llvm::initializeSILoadStoreOptimizerPass(), llvm::initializeSILowerControlFlowPass(), llvm::initializeSILowerI1CopiesPass(), llvm::initializeSILowerSGPRSpillsPass(), llvm::initializeSILowerWWMCopiesPass(), llvm::initializeSIMemoryLegalizerPass(), llvm::initializeSIModeRegisterPass(), llvm::initializeSIOptimizeExecMaskingPass(), llvm::initializeSIOptimizeExecMaskingPreRAPass(), llvm::initializeSIOptimizeVGPRLiveRangePass(), llvm::initializeSIPeepholeSDWAPass(), llvm::initializeSIPostRABundlerPass(), llvm::initializeSIPreAllocateWWMRegsPass(), llvm::initializeSIPreEmitPeepholePass(), llvm::initializeSIShrinkInstructionsPass(), llvm::initializeSIWholeQuadModePass(), X, and Y.

◆ mustPreserveGV()

static bool mustPreserveGV ( const GlobalValue GV)
static

◆ parseAMDGPUAtomicOptimizerStrategy()

static Expected< ScanOptions > parseAMDGPUAtomicOptimizerStrategy ( StringRef  Params)
static

Variable Documentation

◆ AMDGPUAtomicOptimizerStrategy

cl::opt< ScanOptions > AMDGPUAtomicOptimizerStrategy("amdgpu-atomic-optimizer-strategy", cl::desc("Select DPP or Iterative strategy for scan"), cl::init(ScanOptions::Iterative), cl::values( clEnumValN(ScanOptions::DPP, "DPP", "Use DPP operations for scan"), clEnumValN(ScanOptions::Iterative, "Iterative", "Use Iterative approach for scan"), clEnumValN(ScanOptions::None, "None", "Disable atomic optimizer"))) ( "amdgpu-atomic-optimizer-strategy"  ,
cl::desc("Select DPP or Iterative strategy for scan")  ,
cl::init(ScanOptions::Iterative)  ,
cl::values( clEnumValN(ScanOptions::DPP, "DPP", "Use DPP operations for scan"), clEnumValN(ScanOptions::Iterative, "Iterative", "Use Iterative approach for scan"), clEnumValN(ScanOptions::None, "None", "Disable atomic optimizer"))   
)
static

◆ DisableStructurizer

cl::opt< bool, true > DisableStructurizer("amdgpu-disable-structurizer", cl::desc("Disable structurizer for experiments; produces unusable code"), cl::location(AMDGPUTargetMachine::DisableStructurizer), cl::ReallyHidden) ( "amdgpu-disable-structurizer"  ,
cl::desc("Disable structurizer for experiments; produces unusable code")  ,
cl::location(AMDGPUTargetMachine::DisableStructurizer)  ,
cl::ReallyHidden   
)
static

◆ EarlyInlineAll

cl::opt< bool > EarlyInlineAll("amdgpu-early-inline-all", cl::desc("Inline all functions early"), cl::init(false), cl::Hidden) ( "amdgpu-early-inline-all"  ,
cl::desc("Inline all functions early")  ,
cl::init(false)  ,
cl::Hidden   
)
static

◆ EnableAMDGPUAliasAnalysis

cl::opt< bool > EnableAMDGPUAliasAnalysis("enable-amdgpu-aa", cl::Hidden, cl::desc("Enable AMDGPU Alias Analysis"), cl::init(true)) ( "enable-amdgpu-aa"  ,
cl::Hidden  ,
cl::desc("Enable AMDGPU Alias Analysis")  ,
cl::init(true  
)
static

◆ EnableDCEInRA

cl::opt< bool > EnableDCEInRA("amdgpu-dce-in-ra", cl::init(true), cl::Hidden, cl::desc("Enable machine DCE inside regalloc")) ( "amdgpu-dce-in-ra"  ,
cl::init(true ,
cl::Hidden  ,
cl::desc("Enable machine DCE inside regalloc")   
)
static

◆ EnableDPPCombine

cl::opt< bool > EnableDPPCombine("amdgpu-dpp-combine", cl::desc("Enable DPP combiner"), cl::init(true)) ( "amdgpu-dpp-combine"  ,
cl::desc("Enable DPP combiner")  ,
cl::init(true  
)
static

◆ EnableEarlyIfConversion

cl::opt< bool > EnableEarlyIfConversion("amdgpu-early-ifcvt", cl::Hidden, cl::desc("Run early if-conversion"), cl::init(false)) ( "amdgpu-early-ifcvt"  ,
cl::Hidden  ,
cl::desc("Run early if-conversion")  ,
cl::init(false)   
)
static

◆ EnableHipStdPar

cl::opt< bool > EnableHipStdPar("amdgpu-enable-hipstdpar", cl::desc("Enable HIP Standard Parallelism Offload support"), cl::init(false), cl::Hidden) ( "amdgpu-enable-hipstdpar"  ,
cl::desc("Enable HIP Standard Parallelism Offload support")  ,
cl::init(false)  ,
cl::Hidden   
)
static

◆ EnableImageIntrinsicOptimizer

cl::opt< bool > EnableImageIntrinsicOptimizer("amdgpu-enable-image-intrinsic-optimizer", cl::desc("Enable image intrinsic optimizer pass"), cl::init(true), cl::Hidden) ( "amdgpu-enable-image-intrinsic-optimizer"  ,
cl::desc("Enable image intrinsic optimizer pass")  ,
cl::init(true ,
cl::Hidden   
)
static

◆ EnableInsertDelayAlu

cl::opt< bool > EnableInsertDelayAlu("amdgpu-enable-delay-alu", cl::desc("Enable s_delay_alu insertion"), cl::init(true), cl::Hidden) ( "amdgpu-enable-delay-alu"  ,
cl::desc("Enable s_delay_alu insertion")  ,
cl::init(true ,
cl::Hidden   
)
static

◆ EnableInsertSingleUseVDST

cl::opt< bool > EnableInsertSingleUseVDST("amdgpu-enable-single-use-vdst", cl::desc("Enable s_singleuse_vdst insertion"), cl::init(false), cl::Hidden) ( "amdgpu-enable-single-use-vdst"  ,
cl::desc("Enable s_singleuse_vdst insertion")  ,
cl::init(false)  ,
cl::Hidden   
)
static

◆ EnableLibCallSimplify

cl::opt< bool > EnableLibCallSimplify("amdgpu-simplify-libcall", cl::desc("Enable amdgpu library simplifications"), cl::init(true), cl::Hidden) ( "amdgpu-simplify-libcall"  ,
cl::desc("Enable amdgpu library simplifications")  ,
cl::init(true ,
cl::Hidden   
)
static

◆ EnableLoadStoreVectorizer

cl::opt< bool > EnableLoadStoreVectorizer("amdgpu-load-store-vectorizer", cl::desc("Enable load store vectorizer"), cl::init(true), cl::Hidden) ( "amdgpu-load-store-vectorizer"  ,
cl::desc("Enable load store vectorizer")  ,
cl::init(true ,
cl::Hidden   
)
static

◆ EnableLoopPrefetch

cl::opt< bool > EnableLoopPrefetch("amdgpu-loop-prefetch", cl::desc("Enable loop data prefetch on AMDGPU"), cl::Hidden, cl::init(false)) ( "amdgpu-loop-prefetch"  ,
cl::desc("Enable loop data prefetch on AMDGPU")  ,
cl::Hidden  ,
cl::init(false)   
)
static

◆ EnableLowerKernelArguments

cl::opt< bool > EnableLowerKernelArguments("amdgpu-ir-lower-kernel-arguments", cl::desc("Lower kernel argument loads in IR pass"), cl::init(true), cl::Hidden) ( "amdgpu-ir-lower-kernel-arguments"  ,
cl::desc("Lower kernel argument loads in IR pass")  ,
cl::init(true ,
cl::Hidden   
)
static

◆ EnableLowerModuleLDS

cl::opt< bool, true > EnableLowerModuleLDS("amdgpu-enable-lower-module-lds", cl::desc("Enable lower module lds pass"), cl::location(AMDGPUTargetMachine::EnableLowerModuleLDS), cl::init(true), cl::Hidden) ( "amdgpu-enable-lower-module-lds"  ,
cl::desc("Enable lower module lds pass")  ,
cl::location(AMDGPUTargetMachine::EnableLowerModuleLDS)  ,
cl::init(true ,
cl::Hidden   
)
static

◆ EnableMaxIlpSchedStrategy

cl::opt< bool > EnableMaxIlpSchedStrategy("amdgpu-enable-max-ilp-scheduling-strategy", cl::desc("Enable scheduling strategy to maximize ILP for a single wave."), cl::Hidden, cl::init(false)) ( "amdgpu-enable-max-ilp-scheduling-strategy"  ,
cl::desc("Enable scheduling strategy to maximize ILP for a single wave.")  ,
cl::Hidden  ,
cl::init(false)   
)
static

◆ EnablePreRAOptimizations

cl::opt< bool > EnablePreRAOptimizations("amdgpu-enable-pre-ra-optimizations", cl::desc("Enable Pre-RA optimizations pass"), cl::init(true), cl::Hidden) ( "amdgpu-enable-pre-ra-optimizations"  ,
cl::desc("Enable Pre-RA optimizations pass")  ,
cl::init(true ,
cl::Hidden   
)
static

◆ EnablePromoteKernelArguments

cl::opt< bool > EnablePromoteKernelArguments("amdgpu-enable-promote-kernel-arguments", cl::desc("Enable promotion of flat kernel pointer arguments to global"), cl::Hidden, cl::init(true)) ( "amdgpu-enable-promote-kernel-arguments"  ,
cl::desc("Enable promotion of flat kernel pointer arguments to global")  ,
cl::Hidden  ,
cl::init(true  
)
static

◆ EnableRegReassign

cl::opt< bool > EnableRegReassign("amdgpu-reassign-regs", cl::desc("Enable register reassign optimizations on gfx10+"), cl::init(true), cl::Hidden) ( "amdgpu-reassign-regs"  ,
cl::desc("Enable register reassign optimizations on gfx10+")  ,
cl::init(true ,
cl::Hidden   
)
static

◆ EnableRewritePartialRegUses

cl::opt< bool > EnableRewritePartialRegUses("amdgpu-enable-rewrite-partial-reg-uses", cl::desc("Enable rewrite partial reg uses pass"), cl::init(true), cl::Hidden) ( "amdgpu-enable-rewrite-partial-reg-uses"  ,
cl::desc("Enable rewrite partial reg uses pass")  ,
cl::init(true ,
cl::Hidden   
)
static

◆ EnableScalarIRPasses

cl::opt< bool > EnableScalarIRPasses("amdgpu-scalar-ir-passes", cl::desc("Enable scalar IR passes"), cl::init(true), cl::Hidden) ( "amdgpu-scalar-ir-passes"  ,
cl::desc("Enable scalar IR passes")  ,
cl::init(true ,
cl::Hidden   
)
static

◆ EnableSDWAPeephole

cl::opt< bool > EnableSDWAPeephole("amdgpu-sdwa-peephole", cl::desc("Enable SDWA peepholer"), cl::init(true)) ( "amdgpu-sdwa-peephole"  ,
cl::desc("Enable SDWA peepholer")  ,
cl::init(true  
)
static

◆ EnableSetWavePriority

cl::opt< bool > EnableSetWavePriority("amdgpu-set-wave-priority", cl::desc("Adjust wave priority"), cl::init(false), cl::Hidden) ( "amdgpu-set-wave-priority"  ,
cl::desc("Adjust wave priority")  ,
cl::init(false)  ,
cl::Hidden   
)
static

◆ EnableSIModeRegisterPass

cl::opt< bool > EnableSIModeRegisterPass("amdgpu-mode-register", cl::desc("Enable mode register pass"), cl::init(true), cl::Hidden) ( "amdgpu-mode-register"  ,
cl::desc("Enable mode register pass")  ,
cl::init(true ,
cl::Hidden   
)
static

◆ EnableStructurizerWorkarounds

cl::opt< bool > EnableStructurizerWorkarounds("amdgpu-enable-structurizer-workarounds", cl::desc("Enable workarounds for the StructurizeCFG pass"), cl::init(true), cl::Hidden) ( "amdgpu-enable-structurizer-workarounds"  ,
cl::desc("Enable workarounds for the StructurizeCFG pass")  ,
cl::init(true ,
cl::Hidden   
)
static

◆ EnableVOPD

cl::opt< bool > EnableVOPD("amdgpu-enable-vopd", cl::desc("Enable VOPD, dual issue of VALU in wave32"), cl::init(true), cl::Hidden) ( "amdgpu-enable-vopd"  ,
cl::desc("Enable VOPD, dual issue of VALU in wave32")  ,
cl::init(true ,
cl::Hidden   
)
static

◆ GCNILPSchedRegistry

MachineSchedRegistry GCNILPSchedRegistry("gcn-iterative-ilp", "Run GCN iterative scheduler for ILP scheduling (experimental)", createIterativeILPMachineScheduler) ( "gcn-iterative-ilp"  ,
"Run GCN iterative scheduler for ILP scheduling (experimental)"  ,
createIterativeILPMachineScheduler   
)
static

◆ GCNMaxILPSchedRegistry

MachineSchedRegistry GCNMaxILPSchedRegistry("gcn-max-ilp", "Run GCN scheduler to maximize ilp", createGCNMaxILPMachineScheduler) ( "gcn-max-ilp"  ,
"Run GCN scheduler to maximize ilp"  ,
createGCNMaxILPMachineScheduler   
)
static

◆ GCNMaxOccupancySchedRegistry

MachineSchedRegistry GCNMaxOccupancySchedRegistry("gcn-max-occupancy", "Run GCN scheduler to maximize occupancy", createGCNMaxOccupancyMachineScheduler) ( "gcn-max-occupancy"  ,
"Run GCN scheduler to maximize occupancy"  ,
createGCNMaxOccupancyMachineScheduler   
)
static

◆ GCNMinRegSchedRegistry

MachineSchedRegistry GCNMinRegSchedRegistry("gcn-iterative-minreg", "Run GCN iterative scheduler for minimal register usage (experimental)", createMinRegScheduler) ( "gcn-iterative-minreg"  ,
"Run GCN iterative scheduler for minimal register usage (experimental)"  ,
createMinRegScheduler   
)
static

◆ InternalizeSymbols

cl::opt< bool > InternalizeSymbols("amdgpu-internalize-symbols", cl::desc("Enable elimination of non-kernel functions and unused globals"), cl::init(false), cl::Hidden) ( "amdgpu-internalize-symbols"  ,
cl::desc("Enable elimination of non-kernel functions and unused globals")  ,
cl::init(false)  ,
cl::Hidden   
)
static

◆ IterativeGCNMaxOccupancySchedRegistry

MachineSchedRegistry IterativeGCNMaxOccupancySchedRegistry("gcn-iterative-max-occupancy-experimental", "Run GCN scheduler to maximize occupancy (experimental)", createIterativeGCNMaxOccupancyMachineScheduler) ( "gcn-iterative-max-occupancy-experimental"  ,
"Run GCN scheduler to maximize occupancy (experimental)"  ,
createIterativeGCNMaxOccupancyMachineScheduler   
)
static

◆ LateCFGStructurize

cl::opt< bool, true > LateCFGStructurize("amdgpu-late-structurize", cl::desc("Enable late CFG structurization"), cl::location(AMDGPUTargetMachine::EnableLateStructurizeCFG), cl::Hidden) ( "amdgpu-late-structurize"  ,
cl::desc("Enable late CFG structurization")  ,
cl::location(AMDGPUTargetMachine::EnableLateStructurizeCFG)  ,
cl::Hidden   
)
static

◆ LowerCtorDtor

cl::opt< bool > LowerCtorDtor("amdgpu-lower-global-ctor-dtor", cl::desc("Lower GPU ctor / dtors to globals on the device."), cl::init(true), cl::Hidden) ( "amdgpu-lower-global-ctor-dtor"  ,
cl::desc("Lower GPU ctor / dtors to globals on the device.")  ,
cl::init(true ,
cl::Hidden   
)
static

◆ OptExecMaskPreRA

cl::opt< bool > OptExecMaskPreRA("amdgpu-opt-exec-mask-pre-ra", cl::Hidden, cl::desc("Run pre-RA exec mask optimizations"), cl::init(true)) ( "amdgpu-opt-exec-mask-pre-ra"  ,
cl::Hidden  ,
cl::desc("Run pre-RA exec mask optimizations")  ,
cl::init(true  
)
static

◆ OptVGPRLiveRange

cl::opt< bool > OptVGPRLiveRange("amdgpu-opt-vgpr-liverange", cl::desc("Enable VGPR liverange optimizations for if-else structure"), cl::init(true), cl::Hidden) ( "amdgpu-opt-vgpr-liverange"  ,
cl::desc("Enable VGPR liverange optimizations for if-else structure")  ,
cl::init(true ,
cl::Hidden   
)
static

◆ RegAllocOptNotSupportedMessage

const char RegAllocOptNotSupportedMessage[]
static
Initial value:
=
"-regalloc not supported with amdgcn. Use -sgpr-regalloc and -vgpr-regalloc"

Definition at line 1357 of file AMDGPUTargetMachine.cpp.

◆ RemoveIncompatibleFunctions

cl::opt< bool > RemoveIncompatibleFunctions("amdgpu-enable-remove-incompatible-functions", cl::Hidden, cl::desc("Enable removal of functions when they" "use features not supported by the target GPU"), cl::init(true)) ( "amdgpu-enable-remove-incompatible-functions"  ,
cl::Hidden  ,
cl::desc("Enable removal of functions when they" "use features not supported by the target GPU")  ,
cl::init(true  
)
static

◆ ScalarizeGlobal

cl::opt< bool > ScalarizeGlobal("amdgpu-scalarize-global-loads", cl::desc("Enable global load scalarization"), cl::init(true), cl::Hidden) ( "amdgpu-scalarize-global-loads"  ,
cl::desc("Enable global load scalarization")  ,
cl::init(true ,
cl::Hidden   
)
static

◆ SISchedRegistry

MachineSchedRegistry SISchedRegistry("si", "Run SI's custom scheduler", createSIMachineScheduler) ( "si"  ,
"Run SI's custom scheduler"  ,
createSIMachineScheduler   
)
static