LLVM 20.0.0git
Macros | Functions | Variables
AMDGPUTargetMachine.cpp File Reference

This file contains both AMDGPU target machine and the CodeGen pass builder. More...

#include "AMDGPUTargetMachine.h"
#include "AMDGPU.h"
#include "AMDGPUAliasAnalysis.h"
#include "AMDGPUCtorDtorLowering.h"
#include "AMDGPUExportClustering.h"
#include "AMDGPUIGroupLP.h"
#include "AMDGPUISelDAGToDAG.h"
#include "AMDGPUMacroFusion.h"
#include "AMDGPUPerfHintAnalysis.h"
#include "AMDGPURegBankSelect.h"
#include "AMDGPUSplitModule.h"
#include "AMDGPUTargetObjectFile.h"
#include "AMDGPUTargetTransformInfo.h"
#include "AMDGPUUnifyDivergentExitNodes.h"
#include "GCNIterativeScheduler.h"
#include "GCNSchedStrategy.h"
#include "GCNVOPDUtils.h"
#include "R600.h"
#include "R600TargetMachine.h"
#include "SIFixSGPRCopies.h"
#include "SIMachineFunctionInfo.h"
#include "SIMachineScheduler.h"
#include "TargetInfo/AMDGPUTargetInfo.h"
#include "Utils/AMDGPUBaseInfo.h"
#include "llvm/Analysis/CGSCCPassManager.h"
#include "llvm/Analysis/CallGraphSCCPass.h"
#include "llvm/Analysis/UniformityAnalysis.h"
#include "llvm/CodeGen/GlobalISel/CSEInfo.h"
#include "llvm/CodeGen/GlobalISel/IRTranslator.h"
#include "llvm/CodeGen/GlobalISel/InstructionSelect.h"
#include "llvm/CodeGen/GlobalISel/Legalizer.h"
#include "llvm/CodeGen/GlobalISel/Localizer.h"
#include "llvm/CodeGen/GlobalISel/RegBankSelect.h"
#include "llvm/CodeGen/MIRParser/MIParser.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/RegAllocRegistry.h"
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/IR/IntrinsicsAMDGPU.h"
#include "llvm/IR/PassManager.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/InitializePasses.h"
#include "llvm/MC/TargetRegistry.h"
#include "llvm/Passes/PassBuilder.h"
#include "llvm/Support/FormatVariadic.h"
#include "llvm/Transforms/HipStdPar/HipStdPar.h"
#include "llvm/Transforms/IPO.h"
#include "llvm/Transforms/IPO/AlwaysInliner.h"
#include "llvm/Transforms/IPO/ExpandVariadics.h"
#include "llvm/Transforms/IPO/GlobalDCE.h"
#include "llvm/Transforms/IPO/Internalize.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Scalar/EarlyCSE.h"
#include "llvm/Transforms/Scalar/FlattenCFG.h"
#include "llvm/Transforms/Scalar/GVN.h"
#include "llvm/Transforms/Scalar/InferAddressSpaces.h"
#include "llvm/Transforms/Scalar/LICM.h"
#include "llvm/Transforms/Scalar/LoopDataPrefetch.h"
#include "llvm/Transforms/Scalar/NaryReassociate.h"
#include "llvm/Transforms/Scalar/SeparateConstOffsetFromGEP.h"
#include "llvm/Transforms/Scalar/Sink.h"
#include "llvm/Transforms/Scalar/StraightLineStrengthReduce.h"
#include "llvm/Transforms/Scalar/StructurizeCFG.h"
#include "llvm/Transforms/Utils.h"
#include "llvm/Transforms/Utils/FixIrreducible.h"
#include "llvm/Transforms/Utils/LCSSA.h"
#include "llvm/Transforms/Utils/LowerSwitch.h"
#include "llvm/Transforms/Utils/SimplifyLibCalls.h"
#include "llvm/Transforms/Utils/UnifyLoopExits.h"
#include "llvm/Transforms/Vectorize/LoadStoreVectorizer.h"
#include <optional>
#include "llvm/Passes/TargetPassRegistry.inc"

Go to the source code of this file.

Macros

#define GET_PASS_REGISTRY   "AMDGPUPassRegistry.def"
 

Functions

LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget ()
 
static std::unique_ptr< TargetLoweringObjectFilecreateTLOF (const Triple &TT)
 
static ScheduleDAGInstrscreateSIMachineScheduler (MachineSchedContext *C)
 
static ScheduleDAGInstrscreateGCNMaxOccupancyMachineScheduler (MachineSchedContext *C)
 
static ScheduleDAGInstrscreateGCNMaxILPMachineScheduler (MachineSchedContext *C)
 
static ScheduleDAGInstrscreateIterativeGCNMaxOccupancyMachineScheduler (MachineSchedContext *C)
 
static ScheduleDAGInstrscreateMinRegScheduler (MachineSchedContext *C)
 
static ScheduleDAGInstrscreateIterativeILPMachineScheduler (MachineSchedContext *C)
 
static StringRef computeDataLayout (const Triple &TT)
 
static LLVM_READNONE StringRef getGPUOrDefault (const Triple &TT, StringRef GPU)
 
static Reloc::Model getEffectiveRelocModel (std::optional< Reloc::Model > RM)
 
static bool mustPreserveGV (const GlobalValue &GV)
 Predicate for Internalize pass.
 
static Expected< ScanOptionsparseAMDGPUAtomicOptimizerStrategy (StringRef Params)
 
Expected< AMDGPUAttributorOptionsparseAMDGPUAttributorPassOptions (StringRef Params)
 

Variables

static cl::opt< boolEnableEarlyIfConversion ("amdgpu-early-ifcvt", cl::Hidden, cl::desc("Run early if-conversion"), cl::init(false))
 
static cl::opt< boolOptExecMaskPreRA ("amdgpu-opt-exec-mask-pre-ra", cl::Hidden, cl::desc("Run pre-RA exec mask optimizations"), cl::init(true))
 
static cl::opt< boolLowerCtorDtor ("amdgpu-lower-global-ctor-dtor", cl::desc("Lower GPU ctor / dtors to globals on the device."), cl::init(true), cl::Hidden)
 
static cl::opt< boolEnableLoadStoreVectorizer ("amdgpu-load-store-vectorizer", cl::desc("Enable load store vectorizer"), cl::init(true), cl::Hidden)
 
static cl::opt< boolScalarizeGlobal ("amdgpu-scalarize-global-loads", cl::desc("Enable global load scalarization"), cl::init(true), cl::Hidden)
 
static cl::opt< boolInternalizeSymbols ("amdgpu-internalize-symbols", cl::desc("Enable elimination of non-kernel functions and unused globals"), cl::init(false), cl::Hidden)
 
static cl::opt< boolEarlyInlineAll ("amdgpu-early-inline-all", cl::desc("Inline all functions early"), cl::init(false), cl::Hidden)
 
static cl::opt< boolRemoveIncompatibleFunctions ("amdgpu-enable-remove-incompatible-functions", cl::Hidden, cl::desc("Enable removal of functions when they" "use features not supported by the target GPU"), cl::init(true))
 
static cl::opt< boolEnableSDWAPeephole ("amdgpu-sdwa-peephole", cl::desc("Enable SDWA peepholer"), cl::init(true))
 
static cl::opt< boolEnableDPPCombine ("amdgpu-dpp-combine", cl::desc("Enable DPP combiner"), cl::init(true))
 
static cl::opt< boolEnableAMDGPUAliasAnalysis ("enable-amdgpu-aa", cl::Hidden, cl::desc("Enable AMDGPU Alias Analysis"), cl::init(true))
 
static cl::opt< bool, trueLateCFGStructurize ("amdgpu-late-structurize", cl::desc("Enable late CFG structurization"), cl::location(AMDGPUTargetMachine::EnableLateStructurizeCFG), cl::Hidden)
 
static cl::opt< bool, trueDisableStructurizer ("amdgpu-disable-structurizer", cl::desc("Disable structurizer for experiments; produces unusable code"), cl::location(AMDGPUTargetMachine::DisableStructurizer), cl::ReallyHidden)
 
static cl::opt< boolEnableLibCallSimplify ("amdgpu-simplify-libcall", cl::desc("Enable amdgpu library simplifications"), cl::init(true), cl::Hidden)
 
static cl::opt< boolEnableLowerKernelArguments ("amdgpu-ir-lower-kernel-arguments", cl::desc("Lower kernel argument loads in IR pass"), cl::init(true), cl::Hidden)
 
static cl::opt< boolEnableRegReassign ("amdgpu-reassign-regs", cl::desc("Enable register reassign optimizations on gfx10+"), cl::init(true), cl::Hidden)
 
static cl::opt< boolOptVGPRLiveRange ("amdgpu-opt-vgpr-liverange", cl::desc("Enable VGPR liverange optimizations for if-else structure"), cl::init(true), cl::Hidden)
 
static cl::opt< ScanOptionsAMDGPUAtomicOptimizerStrategy ("amdgpu-atomic-optimizer-strategy", cl::desc("Select DPP or Iterative strategy for scan"), cl::init(ScanOptions::Iterative), cl::values(clEnumValN(ScanOptions::DPP, "DPP", "Use DPP operations for scan"), clEnumValN(ScanOptions::Iterative, "Iterative", "Use Iterative approach for scan"), clEnumValN(ScanOptions::None, "None", "Disable atomic optimizer")))
 
static cl::opt< boolEnableSIModeRegisterPass ("amdgpu-mode-register", cl::desc("Enable mode register pass"), cl::init(true), cl::Hidden)
 
static cl::opt< boolEnableInsertSingleUseVDST ("amdgpu-enable-single-use-vdst", cl::desc("Enable s_singleuse_vdst insertion"), cl::init(false), cl::Hidden)
 
static cl::opt< boolEnableInsertDelayAlu ("amdgpu-enable-delay-alu", cl::desc("Enable s_delay_alu insertion"), cl::init(true), cl::Hidden)
 
static cl::opt< boolEnableVOPD ("amdgpu-enable-vopd", cl::desc("Enable VOPD, dual issue of VALU in wave32"), cl::init(true), cl::Hidden)
 
static cl::opt< boolEnableDCEInRA ("amdgpu-dce-in-ra", cl::init(true), cl::Hidden, cl::desc("Enable machine DCE inside regalloc"))
 
static cl::opt< boolEnableSetWavePriority ("amdgpu-set-wave-priority", cl::desc("Adjust wave priority"), cl::init(false), cl::Hidden)
 
static cl::opt< boolEnableScalarIRPasses ("amdgpu-scalar-ir-passes", cl::desc("Enable scalar IR passes"), cl::init(true), cl::Hidden)
 
static cl::opt< bool, trueEnableStructurizerWorkarounds ("amdgpu-enable-structurizer-workarounds", cl::desc("Enable workarounds for the StructurizeCFG pass"), cl::location(AMDGPUTargetMachine::EnableStructurizerWorkarounds), cl::init(true), cl::Hidden)
 
static cl::opt< bool, trueEnableLowerModuleLDS ("amdgpu-enable-lower-module-lds", cl::desc("Enable lower module lds pass"), cl::location(AMDGPUTargetMachine::EnableLowerModuleLDS), cl::init(true), cl::Hidden)
 
static cl::opt< boolEnablePreRAOptimizations ("amdgpu-enable-pre-ra-optimizations", cl::desc("Enable Pre-RA optimizations pass"), cl::init(true), cl::Hidden)
 
static cl::opt< boolEnablePromoteKernelArguments ("amdgpu-enable-promote-kernel-arguments", cl::desc("Enable promotion of flat kernel pointer arguments to global"), cl::Hidden, cl::init(true))
 
static cl::opt< boolEnableImageIntrinsicOptimizer ("amdgpu-enable-image-intrinsic-optimizer", cl::desc("Enable image intrinsic optimizer pass"), cl::init(true), cl::Hidden)
 
static cl::opt< boolEnableLoopPrefetch ("amdgpu-loop-prefetch", cl::desc("Enable loop data prefetch on AMDGPU"), cl::Hidden, cl::init(false))
 
static cl::opt< boolEnableMaxIlpSchedStrategy ("amdgpu-enable-max-ilp-scheduling-strategy", cl::desc("Enable scheduling strategy to maximize ILP for a single wave."), cl::Hidden, cl::init(false))
 
static cl::opt< boolEnableRewritePartialRegUses ("amdgpu-enable-rewrite-partial-reg-uses", cl::desc("Enable rewrite partial reg uses pass"), cl::init(true), cl::Hidden)
 
static cl::opt< boolEnableHipStdPar ("amdgpu-enable-hipstdpar", cl::desc("Enable HIP Standard Parallelism Offload support"), cl::init(false), cl::Hidden)
 
static cl::opt< boolEnableAMDGPUAttributor ("amdgpu-attributor-enable", cl::desc("Enable AMDGPUAttributorPass"), cl::init(true), cl::Hidden)
 
static MachineSchedRegistry SISchedRegistry ("si", "Run SI's custom scheduler", createSIMachineScheduler)
 
static MachineSchedRegistry GCNMaxOccupancySchedRegistry ("gcn-max-occupancy", "Run GCN scheduler to maximize occupancy", createGCNMaxOccupancyMachineScheduler)
 
static MachineSchedRegistry GCNMaxILPSchedRegistry ("gcn-max-ilp", "Run GCN scheduler to maximize ilp", createGCNMaxILPMachineScheduler)
 
static MachineSchedRegistry IterativeGCNMaxOccupancySchedRegistry ("gcn-iterative-max-occupancy-experimental", "Run GCN scheduler to maximize occupancy (experimental)", createIterativeGCNMaxOccupancyMachineScheduler)
 
static MachineSchedRegistry GCNMinRegSchedRegistry ("gcn-iterative-minreg", "Run GCN iterative scheduler for minimal register usage (experimental)", createMinRegScheduler)
 
static MachineSchedRegistry GCNILPSchedRegistry ("gcn-iterative-ilp", "Run GCN iterative scheduler for ILP scheduling (experimental)", createIterativeILPMachineScheduler)
 
static const char RegAllocOptNotSupportedMessage []
 

Detailed Description

This file contains both AMDGPU target machine and the CodeGen pass builder.

The AMDGPU target machine contains all of the hardware specific information needed to emit code for SI+ GPUs in the legacy pass manager pipeline. The CodeGen pass builder handles the pass pipeline for new pass manager.

Definition in file AMDGPUTargetMachine.cpp.

Macro Definition Documentation

◆ GET_PASS_REGISTRY

#define GET_PASS_REGISTRY   "AMDGPUPassRegistry.def"

Function Documentation

◆ computeDataLayout()

static StringRef computeDataLayout ( const Triple TT)
static

Definition at line 573 of file AMDGPUTargetMachine.cpp.

References llvm::Triple::r600.

◆ createGCNMaxILPMachineScheduler()

static ScheduleDAGInstrs * createGCNMaxILPMachineScheduler ( MachineSchedContext C)
static

◆ createGCNMaxOccupancyMachineScheduler()

static ScheduleDAGInstrs * createGCNMaxOccupancyMachineScheduler ( MachineSchedContext C)
static

◆ createIterativeGCNMaxOccupancyMachineScheduler()

static ScheduleDAGInstrs * createIterativeGCNMaxOccupancyMachineScheduler ( MachineSchedContext C)
static

◆ createIterativeILPMachineScheduler()

static ScheduleDAGInstrs * createIterativeILPMachineScheduler ( MachineSchedContext C)
static

◆ createMinRegScheduler()

static ScheduleDAGInstrs * createMinRegScheduler ( MachineSchedContext C)
static

◆ createSIMachineScheduler()

static ScheduleDAGInstrs * createSIMachineScheduler ( MachineSchedContext C)
static

Definition at line 491 of file AMDGPUTargetMachine.cpp.

References llvm::CallingConv::C.

◆ createTLOF()

static std::unique_ptr< TargetLoweringObjectFile > createTLOF ( const Triple TT)
static

Definition at line 487 of file AMDGPUTargetMachine.cpp.

◆ getEffectiveRelocModel()

static Reloc::Model getEffectiveRelocModel ( std::optional< Reloc::Model RM)
static

Definition at line 605 of file AMDGPUTargetMachine.cpp.

References llvm::Reloc::PIC_.

◆ getGPUOrDefault()

static LLVM_READNONE StringRef getGPUOrDefault ( const Triple TT,
StringRef  GPU 
)
static

◆ LLVMInitializeAMDGPUTarget()

LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget ( )

Definition at line 406 of file AMDGPUTargetMachine.cpp.

References llvm::PassRegistry::getPassRegistry(), llvm::getTheGCNTarget(), llvm::getTheR600Target(), llvm::initializeAMDGPUAAWrapperPassPass(), llvm::initializeAMDGPUAlwaysInlinePass(), llvm::initializeAMDGPUAnnotateKernelFeaturesPass(), llvm::initializeAMDGPUAnnotateUniformValuesLegacyPass(), llvm::initializeAMDGPUArgumentUsageInfoPass(), llvm::initializeAMDGPUAtomicOptimizerPass(), llvm::initializeAMDGPUAttributorLegacyPass(), llvm::initializeAMDGPUCodeGenPreparePass(), llvm::initializeAMDGPUCtorDtorLoweringLegacyPass(), llvm::initializeAMDGPUDAGToDAGISelLegacyPass(), llvm::initializeAMDGPUExternalAAWrapperPass(), llvm::initializeAMDGPUGlobalISelDivergenceLoweringPass(), llvm::initializeAMDGPUImageIntrinsicOptimizerPass(), llvm::initializeAMDGPUInsertDelayAluPass(), llvm::initializeAMDGPUInsertSingleUseVDSTPass(), llvm::initializeAMDGPULateCodeGenPrepareLegacyPass(), llvm::initializeAMDGPULowerBufferFatPointersPass(), llvm::initializeAMDGPULowerKernelArgumentsPass(), llvm::initializeAMDGPULowerKernelAttributesPass(), llvm::initializeAMDGPULowerModuleLDSLegacyPass(), llvm::initializeAMDGPUMarkLastScratchLoadPass(), llvm::initializeAMDGPUOpenCLEnqueuedBlockLoweringPass(), llvm::initializeAMDGPUPostLegalizerCombinerPass(), llvm::initializeAMDGPUPreLegalizerCombinerPass(), llvm::initializeAMDGPUPrintfRuntimeBindingPass(), llvm::initializeAMDGPUPromoteAllocaPass(), llvm::initializeAMDGPUPromoteAllocaToVectorPass(), llvm::initializeAMDGPUPromoteKernelArgumentsPass(), llvm::initializeAMDGPURegBankCombinerPass(), llvm::initializeAMDGPURegBankSelectPass(), llvm::initializeAMDGPURemoveIncompatibleFunctionsPass(), llvm::initializeAMDGPUResourceUsageAnalysisPass(), llvm::initializeAMDGPURewriteOutArgumentsPass(), llvm::initializeAMDGPURewriteUndefForPHILegacyPass(), llvm::initializeAMDGPUUnifyDivergentExitNodesPass(), llvm::initializeAMDGPUUnifyMetadataPass(), llvm::initializeGCNCreateVOPDPass(), llvm::initializeGCNDPPCombinePass(), llvm::initializeGCNNSAReassignPass(), llvm::initializeGCNPreRALongBranchRegPass(), llvm::initializeGCNPreRAOptimizationsPass(), llvm::initializeGCNRegPressurePrinterPass(), llvm::initializeGCNRewritePartialRegUsesPass(), llvm::initializeGlobalISel(), llvm::initializeR600ClauseMergePassPass(), llvm::initializeR600ControlFlowFinalizerPass(), llvm::initializeR600ExpandSpecialInstrsPassPass(), llvm::initializeR600PacketizerPass(), llvm::initializeR600VectorRegMergerPass(), llvm::initializeSIAnnotateControlFlowLegacyPass(), llvm::initializeSIFixSGPRCopiesLegacyPass(), llvm::initializeSIFixVGPRCopiesPass(), llvm::initializeSIFoldOperandsPass(), llvm::initializeSIFormMemoryClausesPass(), llvm::initializeSIInsertHardClausesPass(), llvm::initializeSIInsertWaitcntsPass(), llvm::initializeSILateBranchLoweringPass(), llvm::initializeSILoadStoreOptimizerPass(), llvm::initializeSILowerControlFlowPass(), llvm::initializeSILowerI1CopiesLegacyPass(), llvm::initializeSILowerSGPRSpillsPass(), llvm::initializeSILowerWWMCopiesPass(), llvm::initializeSIMemoryLegalizerPass(), llvm::initializeSIModeRegisterPass(), llvm::initializeSIOptimizeExecMaskingPass(), llvm::initializeSIOptimizeExecMaskingPreRAPass(), llvm::initializeSIOptimizeVGPRLiveRangePass(), llvm::initializeSIPeepholeSDWAPass(), llvm::initializeSIPostRABundlerPass(), llvm::initializeSIPreAllocateWWMRegsPass(), llvm::initializeSIPreEmitPeepholePass(), llvm::initializeSIShrinkInstructionsPass(), llvm::initializeSIWholeQuadModePass(), X, and Y.

◆ mustPreserveGV()

static bool mustPreserveGV ( const GlobalValue GV)
static

◆ parseAMDGPUAtomicOptimizerStrategy()

static Expected< ScanOptions > parseAMDGPUAtomicOptimizerStrategy ( StringRef  Params)
static

◆ parseAMDGPUAttributorPassOptions()

Expected< AMDGPUAttributorOptions > parseAMDGPUAttributorPassOptions ( StringRef  Params)

Variable Documentation

◆ AMDGPUAtomicOptimizerStrategy

cl::opt< ScanOptions > AMDGPUAtomicOptimizerStrategy("amdgpu-atomic-optimizer-strategy", cl::desc("Select DPP or Iterative strategy for scan"), cl::init(ScanOptions::Iterative), cl::values( clEnumValN(ScanOptions::DPP, "DPP", "Use DPP operations for scan"), clEnumValN(ScanOptions::Iterative, "Iterative", "Use Iterative approach for scan"), clEnumValN(ScanOptions::None, "None", "Disable atomic optimizer"))) ( "amdgpu-atomic-optimizer-strategy"  ,
cl::desc("Select DPP or Iterative strategy for scan")  ,
cl::init(ScanOptions::Iterative)  ,
cl::values( clEnumValN(ScanOptions::DPP, "DPP", "Use DPP operations for scan"), clEnumValN(ScanOptions::Iterative, "Iterative", "Use Iterative approach for scan"), clEnumValN(ScanOptions::None, "None", "Disable atomic optimizer"))   
)
static

◆ DisableStructurizer

cl::opt< bool, true > DisableStructurizer("amdgpu-disable-structurizer", cl::desc("Disable structurizer for experiments; produces unusable code"), cl::location(AMDGPUTargetMachine::DisableStructurizer), cl::ReallyHidden) ( "amdgpu-disable-structurizer"  ,
cl::desc("Disable structurizer for experiments; produces unusable code")  ,
cl::location(AMDGPUTargetMachine::DisableStructurizer)  ,
cl::ReallyHidden   
)
static

◆ EarlyInlineAll

cl::opt< bool > EarlyInlineAll("amdgpu-early-inline-all", cl::desc("Inline all functions early"), cl::init(false), cl::Hidden) ( "amdgpu-early-inline-all"  ,
cl::desc("Inline all functions early")  ,
cl::init(false)  ,
cl::Hidden   
)
static

◆ EnableAMDGPUAliasAnalysis

cl::opt< bool > EnableAMDGPUAliasAnalysis("enable-amdgpu-aa", cl::Hidden, cl::desc("Enable AMDGPU Alias Analysis"), cl::init(true)) ( "enable-amdgpu-aa"  ,
cl::Hidden  ,
cl::desc("Enable AMDGPU Alias Analysis")  ,
cl::init(true  
)
static

◆ EnableAMDGPUAttributor

cl::opt< bool > EnableAMDGPUAttributor("amdgpu-attributor-enable", cl::desc("Enable AMDGPUAttributorPass"), cl::init(true), cl::Hidden) ( "amdgpu-attributor-enable"  ,
cl::desc("Enable AMDGPUAttributorPass")  ,
cl::init(true ,
cl::Hidden   
)
static

◆ EnableDCEInRA

cl::opt< bool > EnableDCEInRA("amdgpu-dce-in-ra", cl::init(true), cl::Hidden, cl::desc("Enable machine DCE inside regalloc")) ( "amdgpu-dce-in-ra"  ,
cl::init(true ,
cl::Hidden  ,
cl::desc("Enable machine DCE inside regalloc")   
)
static

◆ EnableDPPCombine

cl::opt< bool > EnableDPPCombine("amdgpu-dpp-combine", cl::desc("Enable DPP combiner"), cl::init(true)) ( "amdgpu-dpp-combine"  ,
cl::desc("Enable DPP combiner")  ,
cl::init(true  
)
static

◆ EnableEarlyIfConversion

cl::opt< bool > EnableEarlyIfConversion("amdgpu-early-ifcvt", cl::Hidden, cl::desc("Run early if-conversion"), cl::init(false)) ( "amdgpu-early-ifcvt"  ,
cl::Hidden  ,
cl::desc("Run early if-conversion")  ,
cl::init(false)   
)
static

◆ EnableHipStdPar

cl::opt< bool > EnableHipStdPar("amdgpu-enable-hipstdpar", cl::desc("Enable HIP Standard Parallelism Offload support"), cl::init(false), cl::Hidden) ( "amdgpu-enable-hipstdpar"  ,
cl::desc("Enable HIP Standard Parallelism Offload support")  ,
cl::init(false)  ,
cl::Hidden   
)
static

◆ EnableImageIntrinsicOptimizer

cl::opt< bool > EnableImageIntrinsicOptimizer("amdgpu-enable-image-intrinsic-optimizer", cl::desc("Enable image intrinsic optimizer pass"), cl::init(true), cl::Hidden) ( "amdgpu-enable-image-intrinsic-optimizer"  ,
cl::desc("Enable image intrinsic optimizer pass")  ,
cl::init(true ,
cl::Hidden   
)
static

◆ EnableInsertDelayAlu

cl::opt< bool > EnableInsertDelayAlu("amdgpu-enable-delay-alu", cl::desc("Enable s_delay_alu insertion"), cl::init(true), cl::Hidden) ( "amdgpu-enable-delay-alu"  ,
cl::desc("Enable s_delay_alu insertion")  ,
cl::init(true ,
cl::Hidden   
)
static

◆ EnableInsertSingleUseVDST

cl::opt< bool > EnableInsertSingleUseVDST("amdgpu-enable-single-use-vdst", cl::desc("Enable s_singleuse_vdst insertion"), cl::init(false), cl::Hidden) ( "amdgpu-enable-single-use-vdst"  ,
cl::desc("Enable s_singleuse_vdst insertion")  ,
cl::init(false)  ,
cl::Hidden   
)
static

◆ EnableLibCallSimplify

cl::opt< bool > EnableLibCallSimplify("amdgpu-simplify-libcall", cl::desc("Enable amdgpu library simplifications"), cl::init(true), cl::Hidden) ( "amdgpu-simplify-libcall"  ,
cl::desc("Enable amdgpu library simplifications")  ,
cl::init(true ,
cl::Hidden   
)
static

◆ EnableLoadStoreVectorizer

cl::opt< bool > EnableLoadStoreVectorizer("amdgpu-load-store-vectorizer", cl::desc("Enable load store vectorizer"), cl::init(true), cl::Hidden) ( "amdgpu-load-store-vectorizer"  ,
cl::desc("Enable load store vectorizer")  ,
cl::init(true ,
cl::Hidden   
)
static

◆ EnableLoopPrefetch

cl::opt< bool > EnableLoopPrefetch("amdgpu-loop-prefetch", cl::desc("Enable loop data prefetch on AMDGPU"), cl::Hidden, cl::init(false)) ( "amdgpu-loop-prefetch"  ,
cl::desc("Enable loop data prefetch on AMDGPU")  ,
cl::Hidden  ,
cl::init(false)   
)
static

◆ EnableLowerKernelArguments

cl::opt< bool > EnableLowerKernelArguments("amdgpu-ir-lower-kernel-arguments", cl::desc("Lower kernel argument loads in IR pass"), cl::init(true), cl::Hidden) ( "amdgpu-ir-lower-kernel-arguments"  ,
cl::desc("Lower kernel argument loads in IR pass")  ,
cl::init(true ,
cl::Hidden   
)
static

◆ EnableLowerModuleLDS

cl::opt< bool, true > EnableLowerModuleLDS("amdgpu-enable-lower-module-lds", cl::desc("Enable lower module lds pass"), cl::location(AMDGPUTargetMachine::EnableLowerModuleLDS), cl::init(true), cl::Hidden) ( "amdgpu-enable-lower-module-lds"  ,
cl::desc("Enable lower module lds pass")  ,
cl::location(AMDGPUTargetMachine::EnableLowerModuleLDS)  ,
cl::init(true ,
cl::Hidden   
)
static

◆ EnableMaxIlpSchedStrategy

cl::opt< bool > EnableMaxIlpSchedStrategy("amdgpu-enable-max-ilp-scheduling-strategy", cl::desc("Enable scheduling strategy to maximize ILP for a single wave."), cl::Hidden, cl::init(false)) ( "amdgpu-enable-max-ilp-scheduling-strategy"  ,
cl::desc("Enable scheduling strategy to maximize ILP for a single wave.")  ,
cl::Hidden  ,
cl::init(false)   
)
static

◆ EnablePreRAOptimizations

cl::opt< bool > EnablePreRAOptimizations("amdgpu-enable-pre-ra-optimizations", cl::desc("Enable Pre-RA optimizations pass"), cl::init(true), cl::Hidden) ( "amdgpu-enable-pre-ra-optimizations"  ,
cl::desc("Enable Pre-RA optimizations pass")  ,
cl::init(true ,
cl::Hidden   
)
static

◆ EnablePromoteKernelArguments

cl::opt< bool > EnablePromoteKernelArguments("amdgpu-enable-promote-kernel-arguments", cl::desc("Enable promotion of flat kernel pointer arguments to global"), cl::Hidden, cl::init(true)) ( "amdgpu-enable-promote-kernel-arguments"  ,
cl::desc("Enable promotion of flat kernel pointer arguments to global")  ,
cl::Hidden  ,
cl::init(true  
)
static

◆ EnableRegReassign

cl::opt< bool > EnableRegReassign("amdgpu-reassign-regs", cl::desc("Enable register reassign optimizations on gfx10+"), cl::init(true), cl::Hidden) ( "amdgpu-reassign-regs"  ,
cl::desc("Enable register reassign optimizations on gfx10+")  ,
cl::init(true ,
cl::Hidden   
)
static

◆ EnableRewritePartialRegUses

cl::opt< bool > EnableRewritePartialRegUses("amdgpu-enable-rewrite-partial-reg-uses", cl::desc("Enable rewrite partial reg uses pass"), cl::init(true), cl::Hidden) ( "amdgpu-enable-rewrite-partial-reg-uses"  ,
cl::desc("Enable rewrite partial reg uses pass")  ,
cl::init(true ,
cl::Hidden   
)
static

◆ EnableScalarIRPasses

cl::opt< bool > EnableScalarIRPasses("amdgpu-scalar-ir-passes", cl::desc("Enable scalar IR passes"), cl::init(true), cl::Hidden) ( "amdgpu-scalar-ir-passes"  ,
cl::desc("Enable scalar IR passes")  ,
cl::init(true ,
cl::Hidden   
)
static

◆ EnableSDWAPeephole

cl::opt< bool > EnableSDWAPeephole("amdgpu-sdwa-peephole", cl::desc("Enable SDWA peepholer"), cl::init(true)) ( "amdgpu-sdwa-peephole"  ,
cl::desc("Enable SDWA peepholer")  ,
cl::init(true  
)
static

◆ EnableSetWavePriority

cl::opt< bool > EnableSetWavePriority("amdgpu-set-wave-priority", cl::desc("Adjust wave priority"), cl::init(false), cl::Hidden) ( "amdgpu-set-wave-priority"  ,
cl::desc("Adjust wave priority")  ,
cl::init(false)  ,
cl::Hidden   
)
static

◆ EnableSIModeRegisterPass

cl::opt< bool > EnableSIModeRegisterPass("amdgpu-mode-register", cl::desc("Enable mode register pass"), cl::init(true), cl::Hidden) ( "amdgpu-mode-register"  ,
cl::desc("Enable mode register pass")  ,
cl::init(true ,
cl::Hidden   
)
static

◆ EnableStructurizerWorkarounds

cl::opt< bool, true > EnableStructurizerWorkarounds("amdgpu-enable-structurizer-workarounds", cl::desc("Enable workarounds for the StructurizeCFG pass"), cl::location(AMDGPUTargetMachine::EnableStructurizerWorkarounds), cl::init(true), cl::Hidden) ( "amdgpu-enable-structurizer-workarounds"  ,
cl::desc("Enable workarounds for the StructurizeCFG pass")  ,
cl::location(AMDGPUTargetMachine::EnableStructurizerWorkarounds)  ,
cl::init(true ,
cl::Hidden   
)
static

◆ EnableVOPD

cl::opt< bool > EnableVOPD("amdgpu-enable-vopd", cl::desc("Enable VOPD, dual issue of VALU in wave32"), cl::init(true), cl::Hidden) ( "amdgpu-enable-vopd"  ,
cl::desc("Enable VOPD, dual issue of VALU in wave32")  ,
cl::init(true ,
cl::Hidden   
)
static

◆ GCNILPSchedRegistry

MachineSchedRegistry GCNILPSchedRegistry("gcn-iterative-ilp", "Run GCN iterative scheduler for ILP scheduling (experimental)", createIterativeILPMachineScheduler) ( "gcn-iterative-ilp"  ,
"Run GCN iterative scheduler for ILP scheduling (experimental)"  ,
createIterativeILPMachineScheduler   
)
static

◆ GCNMaxILPSchedRegistry

MachineSchedRegistry GCNMaxILPSchedRegistry("gcn-max-ilp", "Run GCN scheduler to maximize ilp", createGCNMaxILPMachineScheduler) ( "gcn-max-ilp"  ,
"Run GCN scheduler to maximize ilp"  ,
createGCNMaxILPMachineScheduler   
)
static

◆ GCNMaxOccupancySchedRegistry

MachineSchedRegistry GCNMaxOccupancySchedRegistry("gcn-max-occupancy", "Run GCN scheduler to maximize occupancy", createGCNMaxOccupancyMachineScheduler) ( "gcn-max-occupancy"  ,
"Run GCN scheduler to maximize occupancy"  ,
createGCNMaxOccupancyMachineScheduler   
)
static

◆ GCNMinRegSchedRegistry

MachineSchedRegistry GCNMinRegSchedRegistry("gcn-iterative-minreg", "Run GCN iterative scheduler for minimal register usage (experimental)", createMinRegScheduler) ( "gcn-iterative-minreg"  ,
"Run GCN iterative scheduler for minimal register usage (experimental)"  ,
createMinRegScheduler   
)
static

◆ InternalizeSymbols

cl::opt< bool > InternalizeSymbols("amdgpu-internalize-symbols", cl::desc("Enable elimination of non-kernel functions and unused globals"), cl::init(false), cl::Hidden) ( "amdgpu-internalize-symbols"  ,
cl::desc("Enable elimination of non-kernel functions and unused globals")  ,
cl::init(false)  ,
cl::Hidden   
)
static

◆ IterativeGCNMaxOccupancySchedRegistry

MachineSchedRegistry IterativeGCNMaxOccupancySchedRegistry("gcn-iterative-max-occupancy-experimental", "Run GCN scheduler to maximize occupancy (experimental)", createIterativeGCNMaxOccupancyMachineScheduler) ( "gcn-iterative-max-occupancy-experimental"  ,
"Run GCN scheduler to maximize occupancy (experimental)"  ,
createIterativeGCNMaxOccupancyMachineScheduler   
)
static

◆ LateCFGStructurize

cl::opt< bool, true > LateCFGStructurize("amdgpu-late-structurize", cl::desc("Enable late CFG structurization"), cl::location(AMDGPUTargetMachine::EnableLateStructurizeCFG), cl::Hidden) ( "amdgpu-late-structurize"  ,
cl::desc("Enable late CFG structurization")  ,
cl::location(AMDGPUTargetMachine::EnableLateStructurizeCFG)  ,
cl::Hidden   
)
static

◆ LowerCtorDtor

cl::opt< bool > LowerCtorDtor("amdgpu-lower-global-ctor-dtor", cl::desc("Lower GPU ctor / dtors to globals on the device."), cl::init(true), cl::Hidden) ( "amdgpu-lower-global-ctor-dtor"  ,
cl::desc("Lower GPU ctor / dtors to globals on the device.")  ,
cl::init(true ,
cl::Hidden   
)
static

◆ OptExecMaskPreRA

cl::opt< bool > OptExecMaskPreRA("amdgpu-opt-exec-mask-pre-ra", cl::Hidden, cl::desc("Run pre-RA exec mask optimizations"), cl::init(true)) ( "amdgpu-opt-exec-mask-pre-ra"  ,
cl::Hidden  ,
cl::desc("Run pre-RA exec mask optimizations")  ,
cl::init(true  
)
static

◆ OptVGPRLiveRange

cl::opt< bool > OptVGPRLiveRange("amdgpu-opt-vgpr-liverange", cl::desc("Enable VGPR liverange optimizations for if-else structure"), cl::init(true), cl::Hidden) ( "amdgpu-opt-vgpr-liverange"  ,
cl::desc("Enable VGPR liverange optimizations for if-else structure")  ,
cl::init(true ,
cl::Hidden   
)
static

◆ RegAllocOptNotSupportedMessage

const char RegAllocOptNotSupportedMessage[]
static
Initial value:
=
"-regalloc not supported with amdgcn. Use -sgpr-regalloc and -vgpr-regalloc"

Definition at line 1447 of file AMDGPUTargetMachine.cpp.

◆ RemoveIncompatibleFunctions

cl::opt< bool > RemoveIncompatibleFunctions("amdgpu-enable-remove-incompatible-functions", cl::Hidden, cl::desc("Enable removal of functions when they" "use features not supported by the target GPU"), cl::init(true)) ( "amdgpu-enable-remove-incompatible-functions"  ,
cl::Hidden  ,
cl::desc("Enable removal of functions when they" "use features not supported by the target GPU")  ,
cl::init(true  
)
static

◆ ScalarizeGlobal

cl::opt< bool > ScalarizeGlobal("amdgpu-scalarize-global-loads", cl::desc("Enable global load scalarization"), cl::init(true), cl::Hidden) ( "amdgpu-scalarize-global-loads"  ,
cl::desc("Enable global load scalarization")  ,
cl::init(true ,
cl::Hidden   
)
static

◆ SISchedRegistry

MachineSchedRegistry SISchedRegistry("si", "Run SI's custom scheduler", createSIMachineScheduler) ( "si"  ,
"Run SI's custom scheduler"  ,
createSIMachineScheduler   
)
static