This file contains both AMDGPU target machine and the CodeGen pass builder. More...

#include "AMDGPUTargetMachine.h"
#include "AMDGPU.h"
#include "AMDGPUAliasAnalysis.h"
#include "AMDGPUCtorDtorLowering.h"
#include "AMDGPUExportClustering.h"
#include "AMDGPUIGroupLP.h"
#include "AMDGPUISelDAGToDAG.h"
#include "AMDGPUMacroFusion.h"
#include "AMDGPUOpenCLEnqueuedBlockLowering.h"
#include "AMDGPUPerfHintAnalysis.h"
#include "AMDGPURemoveIncompatibleFunctions.h"
#include "AMDGPUSplitModule.h"
#include "AMDGPUTargetObjectFile.h"
#include "AMDGPUTargetTransformInfo.h"
#include "AMDGPUUnifyDivergentExitNodes.h"
#include "GCNDPPCombine.h"
#include "GCNIterativeScheduler.h"
#include "GCNSchedStrategy.h"
#include "GCNVOPDUtils.h"
#include "R600.h"
#include "R600TargetMachine.h"
#include "SIFixSGPRCopies.h"
#include "SIFixVGPRCopies.h"
#include "SIFoldOperands.h"
#include "SILoadStoreOptimizer.h"
#include "SILowerControlFlow.h"
#include "SILowerSGPRSpills.h"
#include "SILowerWWMCopies.h"
#include "SIMachineFunctionInfo.h"
#include "SIMachineScheduler.h"
#include "SIOptimizeExecMasking.h"
#include "SIOptimizeVGPRLiveRange.h"
#include "SIPeepholeSDWA.h"
#include "SIPreAllocateWWMRegs.h"
#include "SIShrinkInstructions.h"
#include "TargetInfo/AMDGPUTargetInfo.h"
#include "Utils/AMDGPUBaseInfo.h"
#include "llvm/Analysis/CGSCCPassManager.h"
#include "llvm/Analysis/CallGraphSCCPass.h"
#include "llvm/Analysis/UniformityAnalysis.h"
#include "llvm/CodeGen/AtomicExpand.h"
#include "llvm/CodeGen/DeadMachineInstructionElim.h"
#include "llvm/CodeGen/GlobalISel/CSEInfo.h"
#include "llvm/CodeGen/GlobalISel/IRTranslator.h"
#include "llvm/CodeGen/GlobalISel/InstructionSelect.h"
#include "llvm/CodeGen/GlobalISel/Legalizer.h"
#include "llvm/CodeGen/GlobalISel/Localizer.h"
#include "llvm/CodeGen/GlobalISel/RegBankSelect.h"
#include "llvm/CodeGen/MIRParser/MIParser.h"
#include "llvm/CodeGen/MachineCSE.h"
#include "llvm/CodeGen/MachineLICM.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/RegAllocRegistry.h"
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/IR/IntrinsicsAMDGPU.h"
#include "llvm/IR/PassManager.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/InitializePasses.h"
#include "llvm/MC/TargetRegistry.h"
#include "llvm/Passes/PassBuilder.h"
#include "llvm/Support/FormatVariadic.h"
#include "llvm/Transforms/HipStdPar/HipStdPar.h"
#include "llvm/Transforms/IPO.h"
#include "llvm/Transforms/IPO/AlwaysInliner.h"
#include "llvm/Transforms/IPO/ExpandVariadics.h"
#include "llvm/Transforms/IPO/GlobalDCE.h"
#include "llvm/Transforms/IPO/Internalize.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Scalar/EarlyCSE.h"
#include "llvm/Transforms/Scalar/FlattenCFG.h"
#include "llvm/Transforms/Scalar/GVN.h"
#include "llvm/Transforms/Scalar/InferAddressSpaces.h"
#include "llvm/Transforms/Scalar/LoopDataPrefetch.h"
#include "llvm/Transforms/Scalar/NaryReassociate.h"
#include "llvm/Transforms/Scalar/SeparateConstOffsetFromGEP.h"
#include "llvm/Transforms/Scalar/Sink.h"
#include "llvm/Transforms/Scalar/StraightLineStrengthReduce.h"
#include "llvm/Transforms/Scalar/StructurizeCFG.h"
#include "llvm/Transforms/Utils.h"
#include "llvm/Transforms/Utils/FixIrreducible.h"
#include "llvm/Transforms/Utils/LCSSA.h"
#include "llvm/Transforms/Utils/LowerSwitch.h"
#include "llvm/Transforms/Utils/SimplifyLibCalls.h"
#include "llvm/Transforms/Utils/UnifyLoopExits.h"
#include "llvm/Transforms/Vectorize/LoadStoreVectorizer.h"
#include <optional>
#include "llvm/Passes/TargetPassRegistry.inc"

Macros
#define	GET_PASS_REGISTRY "AMDGPUPassRegistry.def"

Functions
LLVM_EXTERNAL_VISIBILITY void	LLVMInitializeAMDGPUTarget ()

static std::unique_ptr< TargetLoweringObjectFile >	createTLOF (const Triple &TT)

static ScheduleDAGInstrs *	createSIMachineScheduler (MachineSchedContext *C)

static ScheduleDAGInstrs *	createGCNMaxOccupancyMachineScheduler (MachineSchedContext *C)

static ScheduleDAGInstrs *	createGCNMaxILPMachineScheduler (MachineSchedContext *C)

static ScheduleDAGInstrs *	createGCNMaxMemoryClauseMachineScheduler (MachineSchedContext *C)

static ScheduleDAGInstrs *	createIterativeGCNMaxOccupancyMachineScheduler (MachineSchedContext *C)

static ScheduleDAGInstrs *	createMinRegScheduler (MachineSchedContext *C)

static ScheduleDAGInstrs *	createIterativeILPMachineScheduler (MachineSchedContext *C)

static StringRef	computeDataLayout (const Triple &TT)

static LLVM_READNONE StringRef	getGPUOrDefault (const Triple &TT, StringRef GPU)

static Reloc::Model	getEffectiveRelocModel (std::optional< Reloc::Model > RM)

static bool	mustPreserveGV (const GlobalValue &GV)
	Predicate for Internalize pass.

static Expected< ScanOptions >	parseAMDGPUAtomicOptimizerStrategy (StringRef Params)

Expected< AMDGPUAttributorOptions >	parseAMDGPUAttributorPassOptions (StringRef Params)

Variables
static cl::opt< bool >	EnableEarlyIfConversion ("amdgpu-early-ifcvt", cl::Hidden, cl::desc("Run early if-conversion"), cl::init(false))

static cl::opt< bool >	OptExecMaskPreRA ("amdgpu-opt-exec-mask-pre-ra", cl::Hidden, cl::desc("Run pre-RA exec mask optimizations"), cl::init(true))

static cl::opt< bool >	LowerCtorDtor ("amdgpu-lower-global-ctor-dtor", cl::desc("Lower GPU ctor / dtors to globals on the device."), cl::init(true), cl::Hidden)

static cl::opt< bool >	EnableLoadStoreVectorizer ("amdgpu-load-store-vectorizer", cl::desc("Enable load store vectorizer"), cl::init(true), cl::Hidden)

static cl::opt< bool >	ScalarizeGlobal ("amdgpu-scalarize-global-loads", cl::desc("Enable global load scalarization"), cl::init(true), cl::Hidden)

static cl::opt< bool >	InternalizeSymbols ("amdgpu-internalize-symbols", cl::desc("Enable elimination of non-kernel functions and unused globals"), cl::init(false), cl::Hidden)

static cl::opt< bool >	EarlyInlineAll ("amdgpu-early-inline-all", cl::desc("Inline all functions early"), cl::init(false), cl::Hidden)

static cl::opt< bool >	RemoveIncompatibleFunctions ("amdgpu-enable-remove-incompatible-functions", cl::Hidden, cl::desc("Enable removal of functions when they" "use features not supported by the target GPU"), cl::init(true))

static cl::opt< bool >	EnableSDWAPeephole ("amdgpu-sdwa-peephole", cl::desc("Enable SDWA peepholer"), cl::init(true))

static cl::opt< bool >	EnableDPPCombine ("amdgpu-dpp-combine", cl::desc("Enable DPP combiner"), cl::init(true))

static cl::opt< bool >	EnableAMDGPUAliasAnalysis ("enable-amdgpu-aa", cl::Hidden, cl::desc("Enable AMDGPU Alias Analysis"), cl::init(true))

static cl::opt< bool >	EnableLibCallSimplify ("amdgpu-simplify-libcall", cl::desc("Enable amdgpu library simplifications"), cl::init(true), cl::Hidden)

static cl::opt< bool >	EnableLowerKernelArguments ("amdgpu-ir-lower-kernel-arguments", cl::desc("Lower kernel argument loads in IR pass"), cl::init(true), cl::Hidden)

static cl::opt< bool >	EnableRegReassign ("amdgpu-reassign-regs", cl::desc("Enable register reassign optimizations on gfx10+"), cl::init(true), cl::Hidden)

static cl::opt< bool >	OptVGPRLiveRange ("amdgpu-opt-vgpr-liverange", cl::desc("Enable VGPR liverange optimizations for if-else structure"), cl::init(true), cl::Hidden)

static cl::opt< ScanOptions >	AMDGPUAtomicOptimizerStrategy ("amdgpu-atomic-optimizer-strategy", cl::desc("Select DPP or Iterative strategy for scan"), cl::init(ScanOptions::Iterative), cl::values(clEnumValN(ScanOptions::DPP, "DPP", "Use DPP operations for scan"), clEnumValN(ScanOptions::Iterative, "Iterative", "Use Iterative approach for scan"), clEnumValN(ScanOptions::None, "None", "Disable atomic optimizer")))

static cl::opt< bool >	EnableSIModeRegisterPass ("amdgpu-mode-register", cl::desc("Enable mode register pass"), cl::init(true), cl::Hidden)

static cl::opt< bool >	EnableInsertDelayAlu ("amdgpu-enable-delay-alu", cl::desc("Enable s_delay_alu insertion"), cl::init(true), cl::Hidden)

static cl::opt< bool >	EnableVOPD ("amdgpu-enable-vopd", cl::desc("Enable VOPD, dual issue of VALU in wave32"), cl::init(true), cl::Hidden)

static cl::opt< bool >	EnableDCEInRA ("amdgpu-dce-in-ra", cl::init(true), cl::Hidden, cl::desc("Enable machine DCE inside regalloc"))

static cl::opt< bool >	EnableSetWavePriority ("amdgpu-set-wave-priority", cl::desc("Adjust wave priority"), cl::init(false), cl::Hidden)

static cl::opt< bool >	EnableScalarIRPasses ("amdgpu-scalar-ir-passes", cl::desc("Enable scalar IR passes"), cl::init(true), cl::Hidden)

static cl::opt< bool >	EnableSwLowerLDS ("amdgpu-enable-sw-lower-lds", cl::desc("Enable lowering of lds to global memory pass " "and asan instrument resulting IR."), cl::init(true), cl::Hidden)

static cl::opt< bool, true >	EnableLowerModuleLDS ("amdgpu-enable-lower-module-lds", cl::desc("Enable lower module lds pass"), cl::location(AMDGPUTargetMachine::EnableLowerModuleLDS), cl::init(true), cl::Hidden)

static cl::opt< bool >	EnablePreRAOptimizations ("amdgpu-enable-pre-ra-optimizations", cl::desc("Enable Pre-RA optimizations pass"), cl::init(true), cl::Hidden)

static cl::opt< bool >	EnablePromoteKernelArguments ("amdgpu-enable-promote-kernel-arguments", cl::desc("Enable promotion of flat kernel pointer arguments to global"), cl::Hidden, cl::init(true))

static cl::opt< bool >	EnableImageIntrinsicOptimizer ("amdgpu-enable-image-intrinsic-optimizer", cl::desc("Enable image intrinsic optimizer pass"), cl::init(true), cl::Hidden)

static cl::opt< bool >	EnableLoopPrefetch ("amdgpu-loop-prefetch", cl::desc("Enable loop data prefetch on AMDGPU"), cl::Hidden, cl::init(false))

static cl::opt< std::string >	AMDGPUSchedStrategy ("amdgpu-sched-strategy", cl::desc("Select custom AMDGPU scheduling strategy."), cl::Hidden, cl::init(""))

static cl::opt< bool >	EnableRewritePartialRegUses ("amdgpu-enable-rewrite-partial-reg-uses", cl::desc("Enable rewrite partial reg uses pass"), cl::init(true), cl::Hidden)

static cl::opt< bool >	EnableHipStdPar ("amdgpu-enable-hipstdpar", cl::desc("Enable HIP Standard Parallelism Offload support"), cl::init(false), cl::Hidden)

static cl::opt< bool >	EnableAMDGPUAttributor ("amdgpu-attributor-enable", cl::desc("Enable AMDGPUAttributorPass"), cl::init(true), cl::Hidden)

static cl::opt< bool >	NewRegBankSelect ("new-reg-bank-select", cl::desc("Run amdgpu-regbankselect and amdgpu-regbanklegalize instead of " "regbankselect"), cl::init(false), cl::Hidden)

static cl::opt< bool >	HasClosedWorldAssumption ("amdgpu-link-time-closed-world", cl::desc("Whether has closed-world assumption at link time"), cl::init(false), cl::Hidden)

static MachineSchedRegistry	SISchedRegistry ("si", "Run SI's custom scheduler", createSIMachineScheduler)

static MachineSchedRegistry	GCNMaxOccupancySchedRegistry ("gcn-max-occupancy", "Run GCN scheduler to maximize occupancy", createGCNMaxOccupancyMachineScheduler)

static MachineSchedRegistry	GCNMaxILPSchedRegistry ("gcn-max-ilp", "Run GCN scheduler to maximize ilp", createGCNMaxILPMachineScheduler)

static MachineSchedRegistry	GCNMaxMemoryClauseSchedRegistry ("gcn-max-memory-clause", "Run GCN scheduler to maximize memory clause", createGCNMaxMemoryClauseMachineScheduler)

static MachineSchedRegistry	IterativeGCNMaxOccupancySchedRegistry ("gcn-iterative-max-occupancy-experimental", "Run GCN scheduler to maximize occupancy (experimental)", createIterativeGCNMaxOccupancyMachineScheduler)

static MachineSchedRegistry	GCNMinRegSchedRegistry ("gcn-iterative-minreg", "Run GCN iterative scheduler for minimal register usage (experimental)", createMinRegScheduler)

static MachineSchedRegistry	GCNILPSchedRegistry ("gcn-iterative-ilp", "Run GCN iterative scheduler for ILP scheduling (experimental)", createIterativeILPMachineScheduler)

static const char	RegAllocOptNotSupportedMessage []

Detailed Description

This file contains both AMDGPU target machine and the CodeGen pass builder.

The AMDGPU target machine contains all of the hardware specific information needed to emit code for SI+ GPUs in the legacy pass manager pipeline. The CodeGen pass builder handles the pass pipeline for new pass manager.

Definition in file AMDGPUTargetMachine.cpp.

Macro Definition Documentation

◆ GET_PASS_REGISTRY

#define GET_PASS_REGISTRY "AMDGPUPassRegistry.def"

Function Documentation

◆ computeDataLayout()

static StringRef computeDataLayout ( const Triple & TT )

static

Definition at line 654 of file AMDGPUTargetMachine.cpp.

References llvm::Triple::r600.

◆ createGCNMaxILPMachineScheduler()

static ScheduleDAGInstrs * createGCNMaxILPMachineScheduler ( MachineSchedContext * C )

static

Definition at line 576 of file AMDGPUTargetMachine.cpp.

References llvm::ScheduleDAGMI::addMutation(), llvm::CallingConv::C, and llvm::createIGroupLPDAGMutation().

◆ createGCNMaxMemoryClauseMachineScheduler()

static ScheduleDAGInstrs * createGCNMaxMemoryClauseMachineScheduler ( MachineSchedContext * C )

static

Definition at line 584 of file AMDGPUTargetMachine.cpp.

References llvm::CallingConv::C, llvm::createAMDGPUExportClusteringDAGMutation(), llvm::createLoadClusterDAGMutation(), and llvm::createStoreClusterDAGMutation().

◆ createGCNMaxOccupancyMachineScheduler()

static ScheduleDAGInstrs * createGCNMaxOccupancyMachineScheduler ( MachineSchedContext * C )

static

Definition at line 562 of file AMDGPUTargetMachine.cpp.

References llvm::CallingConv::C, llvm::createAMDGPUExportClusteringDAGMutation(), llvm::createAMDGPUMacroFusionDAGMutation(), llvm::createIGroupLPDAGMutation(), llvm::createLoadClusterDAGMutation(), and llvm::createStoreClusterDAGMutation().

◆ createIterativeGCNMaxOccupancyMachineScheduler()

static ScheduleDAGInstrs * createIterativeGCNMaxOccupancyMachineScheduler ( MachineSchedContext * C )

static

Definition at line 596 of file AMDGPUTargetMachine.cpp.

References llvm::CallingConv::C, llvm::createLoadClusterDAGMutation(), llvm::createStoreClusterDAGMutation(), and llvm::GCNIterativeScheduler::SCHEDULE_LEGACYMAXOCCUPANCY.

◆ createIterativeILPMachineScheduler()

static ScheduleDAGInstrs * createIterativeILPMachineScheduler ( MachineSchedContext * C )

static

Definition at line 612 of file AMDGPUTargetMachine.cpp.

References llvm::CallingConv::C, llvm::createAMDGPUMacroFusionDAGMutation(), llvm::createLoadClusterDAGMutation(), llvm::createStoreClusterDAGMutation(), and llvm::GCNIterativeScheduler::SCHEDULE_ILP.

◆ createMinRegScheduler()

static ScheduleDAGInstrs * createMinRegScheduler ( MachineSchedContext * C )

static

Definition at line 606 of file AMDGPUTargetMachine.cpp.

References llvm::CallingConv::C, and llvm::GCNIterativeScheduler::SCHEDULE_MINREGFORCED.

◆ createSIMachineScheduler()

static ScheduleDAGInstrs * createSIMachineScheduler ( MachineSchedContext * C )

static

Definition at line 557 of file AMDGPUTargetMachine.cpp.

References llvm::CallingConv::C.

◆ createTLOF()

static std::unique_ptr< TargetLoweringObjectFile > createTLOF ( const Triple & TT )

static

Definition at line 553 of file AMDGPUTargetMachine.cpp.

◆ getEffectiveRelocModel()

static Reloc::Model getEffectiveRelocModel ( std::optional< Reloc::Model > RM )

static

Definition at line 686 of file AMDGPUTargetMachine.cpp.

References llvm::Reloc::PIC_.

◆ getGPUOrDefault()

static LLVM_READNONE StringRef getGPUOrDefault	(	const Triple &	TT,
		StringRef	GPU
	)

static

Definition at line 675 of file AMDGPUTargetMachine.cpp.

References llvm::Triple::amdgcn, llvm::Triple::AMDHSA, and llvm::StringRef::empty().

◆ LLVMInitializeAMDGPUTarget()

LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget ( )

Definition at line 469 of file AMDGPUTargetMachine.cpp.

◆ mustPreserveGV()

static bool mustPreserveGV ( const GlobalValue & GV )

static

Predicate for Internalize pass.

Definition at line 730 of file AMDGPUTargetMachine.cpp.

References F, llvm::AMDGPU::isEntryFunctionCC(), llvm::Constant::removeDeadConstantUsers(), and llvm::Value::use_empty().

Referenced by llvm::AMDGPUTargetMachine::registerPassBuilderCallbacks().

◆ parseAMDGPUAtomicOptimizerStrategy()

static Expected< ScanOptions > parseAMDGPUAtomicOptimizerStrategy ( StringRef Params )

static

Definition at line 745 of file AMDGPUTargetMachine.cpp.

References llvm::StringSwitch< T, R >::Case(), llvm::StringSwitch< T, R >::Cases(), llvm::StringRef::consume_front(), llvm::StringSwitch< T, R >::Default(), llvm::DPP, llvm::StringRef::empty(), llvm::inconvertibleErrorCode(), llvm::Iterative, and llvm::None.

◆ parseAMDGPUAttributorPassOptions()

Expected< AMDGPUAttributorOptions > parseAMDGPUAttributorPassOptions ( StringRef Params )

Definition at line 760 of file AMDGPUTargetMachine.cpp.

References llvm::StringRef::empty(), llvm::formatv(), llvm::inconvertibleErrorCode(), and llvm::StringRef::split().

Variable Documentation

◆ AMDGPUAtomicOptimizerStrategy

cl::opt< ScanOptions > AMDGPUAtomicOptimizerStrategy("amdgpu-atomic-optimizer-strategy", cl::desc("Select DPP or Iterative strategy for scan"), cl::init(ScanOptions::Iterative), cl::values( clEnumValN(ScanOptions::DPP, "DPP", "Use DPP operations for scan"), clEnumValN(ScanOptions::Iterative, "Iterative", "Use Iterative approach for scan"), clEnumValN(ScanOptions::None, "None", "Disable atomic optimizer")))	(	"amdgpu-atomic-optimizer-strategy"	,
		cl::desc("Select DPP or Iterative strategy for scan")	,
		cl::init(ScanOptions::Iterative)	,
		cl::values( clEnumValN(ScanOptions::DPP, "DPP", "Use DPP operations for scan"), clEnumValN(ScanOptions::Iterative, "Iterative", "Use Iterative approach for scan"), clEnumValN(ScanOptions::None, "None", "Disable atomic optimizer"))
	)

static

Referenced by llvm::AMDGPUPassConfig::addIRPasses(), and llvm::AMDGPUCodeGenPassBuilder::addIRPasses().

◆ AMDGPUSchedStrategy

cl::opt< std::string > AMDGPUSchedStrategy("amdgpu-sched-strategy", cl::desc("Select custom AMDGPU scheduling strategy."), cl::Hidden, cl::init(""))	(	"amdgpu-sched-strategy"	,
		cl::desc("Select custom AMDGPU scheduling strategy.")	,
		cl::Hidden	,
		cl::init("")
	)

static

◆ EarlyInlineAll

cl::opt< bool > EarlyInlineAll("amdgpu-early-inline-all", cl::desc("Inline all functions early"), cl::init(false), cl::Hidden)	(	"amdgpu-early-inline-all"	,
		cl::desc("Inline all functions early")	,
		cl::init(false)	,
		cl::Hidden
	)

static

Referenced by llvm::AMDGPUTargetMachine::registerPassBuilderCallbacks().

◆ EnableAMDGPUAliasAnalysis

cl::opt< bool > EnableAMDGPUAliasAnalysis("enable-amdgpu-aa", cl::Hidden, cl::desc("Enable AMDGPU Alias Analysis"), cl::init(true))	(	"enable-amdgpu-aa"	,
		cl::Hidden	,
		cl::desc("Enable AMDGPU Alias Analysis")	,
		cl::init(true)
	)

static

Referenced by llvm::AMDGPUPassConfig::addIRPasses().

◆ EnableAMDGPUAttributor

cl::opt< bool > EnableAMDGPUAttributor("amdgpu-attributor-enable", cl::desc("Enable AMDGPUAttributorPass"), cl::init(true), cl::Hidden)	(	"amdgpu-attributor-enable"	,
		cl::desc("Enable AMDGPUAttributorPass")	,
		cl::init(true)	,
		cl::Hidden
	)

static

Referenced by llvm::AMDGPUTargetMachine::registerPassBuilderCallbacks().

◆ EnableDCEInRA

cl::opt< bool > EnableDCEInRA("amdgpu-dce-in-ra", cl::init(true), cl::Hidden, cl::desc("Enable machine DCE inside regalloc"))	(	"amdgpu-dce-in-ra"	,
		cl::init(true)	,
		cl::Hidden	,
		cl::desc("Enable machine DCE inside regalloc")
	)

static

◆ EnableDPPCombine

cl::opt< bool > EnableDPPCombine("amdgpu-dpp-combine", cl::desc("Enable DPP combiner"), cl::init(true))	(	"amdgpu-dpp-combine"	,
		cl::desc("Enable DPP combiner")	,
		cl::init(true)
	)

static

Referenced by llvm::AMDGPUCodeGenPassBuilder::addMachineSSAOptimization().

◆ EnableEarlyIfConversion

cl::opt< bool > EnableEarlyIfConversion("amdgpu-early-ifcvt", cl::Hidden, cl::desc("Run early if-conversion"), cl::init(false))	(	"amdgpu-early-ifcvt"	,
		cl::Hidden	,
		cl::desc("Run early if-conversion")	,
		cl::init(false)
	)

static

◆ EnableHipStdPar

cl::opt< bool > EnableHipStdPar("amdgpu-enable-hipstdpar", cl::desc("Enable HIP Standard Parallelism Offload support"), cl::init(false), cl::Hidden)	(	"amdgpu-enable-hipstdpar"	,
		cl::desc("Enable HIP Standard Parallelism Offload support")	,
		cl::init(false)	,
		cl::Hidden
	)

static

Referenced by llvm::AMDGPUTargetMachine::registerPassBuilderCallbacks().

◆ EnableImageIntrinsicOptimizer

cl::opt< bool > EnableImageIntrinsicOptimizer("amdgpu-enable-image-intrinsic-optimizer", cl::desc("Enable image intrinsic optimizer pass"), cl::init(true), cl::Hidden)	(	"amdgpu-enable-image-intrinsic-optimizer"	,
		cl::desc("Enable image intrinsic optimizer pass")	,
		cl::init(true)	,
		cl::Hidden
	)

static

Referenced by llvm::AMDGPUPassConfig::addIRPasses(), and llvm::AMDGPUCodeGenPassBuilder::addIRPasses().

◆ EnableInsertDelayAlu

cl::opt< bool > EnableInsertDelayAlu("amdgpu-enable-delay-alu", cl::desc("Enable s_delay_alu insertion"), cl::init(true), cl::Hidden)	(	"amdgpu-enable-delay-alu"	,
		cl::desc("Enable s_delay_alu insertion")	,
		cl::init(true)	,
		cl::Hidden
	)

static

◆ EnableLibCallSimplify

cl::opt< bool > EnableLibCallSimplify("amdgpu-simplify-libcall", cl::desc("Enable amdgpu library simplifications"), cl::init(true), cl::Hidden)	(	"amdgpu-simplify-libcall"	,
		cl::desc("Enable amdgpu library simplifications")	,
		cl::init(true)	,
		cl::Hidden
	)

static

Referenced by llvm::AMDGPUTargetMachine::registerPassBuilderCallbacks().

◆ EnableLoadStoreVectorizer

cl::opt< bool > EnableLoadStoreVectorizer("amdgpu-load-store-vectorizer", cl::desc("Enable load store vectorizer"), cl::init(true), cl::Hidden)	(	"amdgpu-load-store-vectorizer"	,
		cl::desc("Enable load store vectorizer")	,
		cl::init(true)	,
		cl::Hidden
	)

static

Referenced by llvm::AMDGPUPassConfig::addCodeGenPrepare(), and llvm::AMDGPUCodeGenPassBuilder::addCodeGenPrepare().

◆ EnableLoopPrefetch

cl::opt< bool > EnableLoopPrefetch("amdgpu-loop-prefetch", cl::desc("Enable loop data prefetch on AMDGPU"), cl::Hidden, cl::init(false))	(	"amdgpu-loop-prefetch"	,
		cl::desc("Enable loop data prefetch on AMDGPU")	,
		cl::Hidden	,
		cl::init(false)
	)

static

Referenced by llvm::AMDGPUPassConfig::addStraightLineScalarOptimizationPasses(), and llvm::AMDGPUCodeGenPassBuilder::addStraightLineScalarOptimizationPasses().

◆ EnableLowerKernelArguments

cl::opt< bool > EnableLowerKernelArguments("amdgpu-ir-lower-kernel-arguments", cl::desc("Lower kernel argument loads in IR pass"), cl::init(true), cl::Hidden)	(	"amdgpu-ir-lower-kernel-arguments"	,
		cl::desc("Lower kernel argument loads in IR pass")	,
		cl::init(true)	,
		cl::Hidden
	)

static

Referenced by llvm::AMDGPUPassConfig::addCodeGenPrepare(), and llvm::AMDGPUCodeGenPassBuilder::addCodeGenPrepare().

◆ EnableLowerModuleLDS

cl::opt< bool, true > EnableLowerModuleLDS("amdgpu-enable-lower-module-lds", cl::desc("Enable lower module lds pass"), cl::location(AMDGPUTargetMachine::EnableLowerModuleLDS), cl::init(true), cl::Hidden)	(	"amdgpu-enable-lower-module-lds"	,
		cl::desc("Enable lower module lds pass")	,
		cl::location(AMDGPUTargetMachine::EnableLowerModuleLDS)	,
		cl::init(true)	,
		cl::Hidden
	)

static

Referenced by llvm::AMDGPUPassConfig::addIRPasses(), and llvm::AMDGPUCodeGenPassBuilder::addIRPasses().

◆ EnablePreRAOptimizations

cl::opt< bool > EnablePreRAOptimizations("amdgpu-enable-pre-ra-optimizations", cl::desc("Enable Pre-RA optimizations pass"), cl::init(true), cl::Hidden)	(	"amdgpu-enable-pre-ra-optimizations"	,
		cl::desc("Enable Pre-RA optimizations pass")	,
		cl::init(true)	,
		cl::Hidden
	)

static

◆ EnablePromoteKernelArguments

cl::opt< bool > EnablePromoteKernelArguments("amdgpu-enable-promote-kernel-arguments", cl::desc("Enable promotion of flat kernel pointer arguments to global"), cl::Hidden, cl::init(true))	(	"amdgpu-enable-promote-kernel-arguments"	,
		cl::desc("Enable promotion of flat kernel pointer arguments to global")	,
		cl::Hidden	,
		cl::init(true)
	)

static

Referenced by llvm::AMDGPUTargetMachine::registerPassBuilderCallbacks().

◆ EnableRegReassign

cl::opt< bool > EnableRegReassign("amdgpu-reassign-regs", cl::desc("Enable register reassign optimizations on gfx10+"), cl::init(true), cl::Hidden)	(	"amdgpu-reassign-regs"	,
		cl::desc("Enable register reassign optimizations on gfx10+")	,
		cl::init(true)	,
		cl::Hidden
	)

static

◆ EnableRewritePartialRegUses

cl::opt< bool > EnableRewritePartialRegUses("amdgpu-enable-rewrite-partial-reg-uses", cl::desc("Enable rewrite partial reg uses pass"), cl::init(true), cl::Hidden)	(	"amdgpu-enable-rewrite-partial-reg-uses"	,
		cl::desc("Enable rewrite partial reg uses pass")	,
		cl::init(true)	,
		cl::Hidden
	)

static

◆ EnableScalarIRPasses

cl::opt< bool > EnableScalarIRPasses("amdgpu-scalar-ir-passes", cl::desc("Enable scalar IR passes"), cl::init(true), cl::Hidden)	(	"amdgpu-scalar-ir-passes"	,
		cl::desc("Enable scalar IR passes")	,
		cl::init(true)	,
		cl::Hidden
	)

static

Referenced by llvm::AMDGPUPassConfig::addIRPasses(), and llvm::AMDGPUCodeGenPassBuilder::addIRPasses().

◆ EnableSDWAPeephole

cl::opt< bool > EnableSDWAPeephole("amdgpu-sdwa-peephole", cl::desc("Enable SDWA peepholer"), cl::init(true))	(	"amdgpu-sdwa-peephole"	,
		cl::desc("Enable SDWA peepholer")	,
		cl::init(true)
	)

static

Referenced by llvm::AMDGPUCodeGenPassBuilder::addMachineSSAOptimization().

◆ EnableSetWavePriority

cl::opt< bool > EnableSetWavePriority("amdgpu-set-wave-priority", cl::desc("Adjust wave priority"), cl::init(false), cl::Hidden)	(	"amdgpu-set-wave-priority"	,
		cl::desc("Adjust wave priority")	,
		cl::init(false)	,
		cl::Hidden
	)

static

◆ EnableSIModeRegisterPass

cl::opt< bool > EnableSIModeRegisterPass("amdgpu-mode-register", cl::desc("Enable mode register pass"), cl::init(true), cl::Hidden)	(	"amdgpu-mode-register"	,
		cl::desc("Enable mode register pass")	,
		cl::init(true)	,
		cl::Hidden
	)

static

◆ EnableSwLowerLDS

cl::opt< bool > EnableSwLowerLDS("amdgpu-enable-sw-lower-lds", cl::desc("Enable lowering of lds to global memory pass " "and asan instrument resulting IR."), cl::init(true), cl::Hidden)	(	"amdgpu-enable-sw-lower-lds"	,
		cl::desc("Enable lowering of lds to global memory pass " "and asan instrument resulting IR.")	,
		cl::init(true)	,
		cl::Hidden
	)

static

Referenced by llvm::AMDGPUPassConfig::addIRPasses(), and llvm::AMDGPUTargetMachine::registerPassBuilderCallbacks().

◆ EnableVOPD

cl::opt< bool > EnableVOPD("amdgpu-enable-vopd", cl::desc("Enable VOPD, dual issue of VALU in wave32"), cl::init(true), cl::Hidden)	(	"amdgpu-enable-vopd"	,
		cl::desc("Enable VOPD, dual issue of VALU in wave32")	,
		cl::init(true)	,
		cl::Hidden
	)

static

◆ GCNILPSchedRegistry

MachineSchedRegistry GCNILPSchedRegistry("gcn-iterative-ilp", "Run GCN iterative scheduler for ILP scheduling (experimental)", createIterativeILPMachineScheduler)	(	"gcn-iterative-ilp"	,
		"Run GCN iterative scheduler for ILP scheduling (experimental)"	,
		createIterativeILPMachineScheduler
	)

static

◆ GCNMaxILPSchedRegistry

MachineSchedRegistry GCNMaxILPSchedRegistry("gcn-max-ilp", "Run GCN scheduler to maximize ilp", createGCNMaxILPMachineScheduler)	(	"gcn-max-ilp"	,
		"Run GCN scheduler to maximize ilp"	,
		createGCNMaxILPMachineScheduler
	)

static

◆ GCNMaxMemoryClauseSchedRegistry

MachineSchedRegistry GCNMaxMemoryClauseSchedRegistry("gcn-max-memory-clause", "Run GCN scheduler to maximize memory clause", createGCNMaxMemoryClauseMachineScheduler)	(	"gcn-max-memory-clause"	,
		"Run GCN scheduler to maximize memory clause"	,
		createGCNMaxMemoryClauseMachineScheduler
	)

static

◆ GCNMaxOccupancySchedRegistry

MachineSchedRegistry GCNMaxOccupancySchedRegistry("gcn-max-occupancy", "Run GCN scheduler to maximize occupancy", createGCNMaxOccupancyMachineScheduler)	(	"gcn-max-occupancy"	,
		"Run GCN scheduler to maximize occupancy"	,
		createGCNMaxOccupancyMachineScheduler
	)

static

◆ GCNMinRegSchedRegistry

MachineSchedRegistry GCNMinRegSchedRegistry("gcn-iterative-minreg", "Run GCN iterative scheduler for minimal register usage (experimental)", createMinRegScheduler)	(	"gcn-iterative-minreg"	,
		"Run GCN iterative scheduler for minimal register usage (experimental)"	,
		createMinRegScheduler
	)

static

◆ HasClosedWorldAssumption

cl::opt< bool > HasClosedWorldAssumption("amdgpu-link-time-closed-world", cl::desc("Whether has closed-world assumption at link time"), cl::init(false), cl::Hidden)	(	"amdgpu-link-time-closed-world"	,
		cl::desc("Whether has closed-world assumption at link time")	,
		cl::init(false)	,
		cl::Hidden
	)

static

Referenced by llvm::AMDGPUTargetMachine::registerPassBuilderCallbacks().

◆ InternalizeSymbols

cl::opt< bool > InternalizeSymbols("amdgpu-internalize-symbols", cl::desc("Enable elimination of non-kernel functions and unused globals"), cl::init(false), cl::Hidden)	(	"amdgpu-internalize-symbols"	,
		cl::desc("Enable elimination of non-kernel functions and unused globals")	,
		cl::init(false)	,
		cl::Hidden
	)

static

Referenced by llvm::AMDGPUTargetMachine::registerPassBuilderCallbacks().

◆ IterativeGCNMaxOccupancySchedRegistry

MachineSchedRegistry IterativeGCNMaxOccupancySchedRegistry("gcn-iterative-max-occupancy-experimental", "Run GCN scheduler to maximize occupancy (experimental)", createIterativeGCNMaxOccupancyMachineScheduler)	(	"gcn-iterative-max-occupancy-experimental"	,
		"Run GCN scheduler to maximize occupancy (experimental)"	,
		createIterativeGCNMaxOccupancyMachineScheduler
	)

static

◆ LowerCtorDtor

cl::opt< bool > LowerCtorDtor("amdgpu-lower-global-ctor-dtor", cl::desc("Lower GPU ctor / dtors to globals on the device."), cl::init(true), cl::Hidden)	(	"amdgpu-lower-global-ctor-dtor"	,
		cl::desc("Lower GPU ctor / dtors to globals on the device.")	,
		cl::init(true)	,
		cl::Hidden
	)

static

Referenced by llvm::AMDGPUPassConfig::addIRPasses(), llvm::AMDGPUCodeGenPassBuilder::addIRPasses(), and llvm::NVPTXAsmPrinter::doInitialization().

◆ NewRegBankSelect

cl::opt< bool > NewRegBankSelect("new-reg-bank-select", cl::desc("Run amdgpu-regbankselect and amdgpu-regbanklegalize instead of " "regbankselect"), cl::init(false), cl::Hidden)	(	"new-reg-bank-select"	,
		cl::desc("Run amdgpu-regbankselect and amdgpu-regbanklegalize instead of " "regbankselect")	,
		cl::init(false)	,
		cl::Hidden
	)

static

◆ OptExecMaskPreRA

cl::opt< bool > OptExecMaskPreRA("amdgpu-opt-exec-mask-pre-ra", cl::Hidden, cl::desc("Run pre-RA exec mask optimizations"), cl::init(true))	(	"amdgpu-opt-exec-mask-pre-ra"	,
		cl::Hidden	,
		cl::desc("Run pre-RA exec mask optimizations")	,
		cl::init(true)
	)

static

◆ OptVGPRLiveRange

cl::opt< bool > OptVGPRLiveRange("amdgpu-opt-vgpr-liverange", cl::desc("Enable VGPR liverange optimizations for if-else structure"), cl::init(true), cl::Hidden)	(	"amdgpu-opt-vgpr-liverange"	,
		cl::desc("Enable VGPR liverange optimizations for if-else structure")	,
		cl::init(true)	,
		cl::Hidden
	)

static

◆ RegAllocOptNotSupportedMessage

const char RegAllocOptNotSupportedMessage[]

static

Initial value:

=
    "-regalloc not supported with amdgcn. Use -sgpr-regalloc, -wwm-regalloc, "
    "and -vgpr-regalloc"

Definition at line 1564 of file AMDGPUTargetMachine.cpp.

◆ RemoveIncompatibleFunctions

cl::opt< bool > RemoveIncompatibleFunctions("amdgpu-enable-remove-incompatible-functions", cl::Hidden, cl::desc("Enable removal of functions when they" "use features not supported by the target GPU"), cl::init(true))	(	"amdgpu-enable-remove-incompatible-functions"	,
		cl::Hidden	,
		cl::desc("Enable removal of functions when they" "use features not supported by the target GPU")	,
		cl::init(true)
	)

static

Referenced by llvm::AMDGPUPassConfig::addIRPasses(), and llvm::AMDGPUCodeGenPassBuilder::addIRPasses().

◆ ScalarizeGlobal

cl::opt< bool > ScalarizeGlobal("amdgpu-scalarize-global-loads", cl::desc("Enable global load scalarization"), cl::init(true), cl::Hidden)	(	"amdgpu-scalarize-global-loads"	,
		cl::desc("Enable global load scalarization")	,
		cl::init(true)	,
		cl::Hidden
	)

static

Referenced by llvm::GCNTargetMachine::getSubtargetImpl().

◆ SISchedRegistry

MachineSchedRegistry SISchedRegistry("si", "Run SI's custom scheduler", createSIMachineScheduler)	(	"si"	,
		"Run SI's custom scheduler"	,
		createSIMachineScheduler
	)

static

Macros

Functions

Variables

Detailed Description

Macro Definition Documentation

◆ GET_PASS_REGISTRY

Function Documentation

◆ computeDataLayout()

◆ createGCNMaxILPMachineScheduler()

◆ createGCNMaxMemoryClauseMachineScheduler()

◆ createGCNMaxOccupancyMachineScheduler()

◆ createIterativeGCNMaxOccupancyMachineScheduler()

◆ createIterativeILPMachineScheduler()

◆ createMinRegScheduler()

◆ createSIMachineScheduler()

◆ createTLOF()

◆ getEffectiveRelocModel()

◆ getGPUOrDefault()

◆ LLVMInitializeAMDGPUTarget()

◆ mustPreserveGV()

◆ parseAMDGPUAtomicOptimizerStrategy()

◆ parseAMDGPUAttributorPassOptions()

Variable Documentation

◆ AMDGPUAtomicOptimizerStrategy

◆ AMDGPUSchedStrategy

◆ EarlyInlineAll

◆ EnableAMDGPUAliasAnalysis

◆ EnableAMDGPUAttributor

◆ EnableDCEInRA

◆ EnableDPPCombine

◆ EnableEarlyIfConversion

◆ EnableHipStdPar

◆ EnableImageIntrinsicOptimizer

◆ EnableInsertDelayAlu

◆ EnableLibCallSimplify

◆ EnableLoadStoreVectorizer

◆ EnableLoopPrefetch

◆ EnableLowerKernelArguments

◆ EnableLowerModuleLDS

◆ EnablePreRAOptimizations

◆ EnablePromoteKernelArguments

◆ EnableRegReassign

◆ EnableRewritePartialRegUses

◆ EnableScalarIRPasses

◆ EnableSDWAPeephole

◆ EnableSetWavePriority

◆ EnableSIModeRegisterPass

◆ EnableSwLowerLDS

◆ EnableVOPD

◆ GCNILPSchedRegistry

◆ GCNMaxILPSchedRegistry

◆ GCNMaxMemoryClauseSchedRegistry

◆ GCNMaxOccupancySchedRegistry

◆ GCNMinRegSchedRegistry

◆ HasClosedWorldAssumption

◆ InternalizeSymbols

◆ IterativeGCNMaxOccupancySchedRegistry

◆ LowerCtorDtor

◆ NewRegBankSelect

◆ OptExecMaskPreRA

◆ OptVGPRLiveRange

◆ RegAllocOptNotSupportedMessage

◆ RemoveIncompatibleFunctions

◆ ScalarizeGlobal

◆ SISchedRegistry