LLVM  14.0.0git
Functions | Variables
AMDGPUTargetMachine.cpp File Reference
#include "AMDGPUTargetMachine.h"
#include "AMDGPU.h"
#include "AMDGPUAliasAnalysis.h"
#include "AMDGPUExportClustering.h"
#include "AMDGPUMacroFusion.h"
#include "AMDGPUTargetObjectFile.h"
#include "AMDGPUTargetTransformInfo.h"
#include "GCNIterativeScheduler.h"
#include "GCNSchedStrategy.h"
#include "R600.h"
#include "R600TargetMachine.h"
#include "SIMachineFunctionInfo.h"
#include "SIMachineScheduler.h"
#include "TargetInfo/AMDGPUTargetInfo.h"
#include "llvm/Analysis/CGSCCPassManager.h"
#include "llvm/CodeGen/GlobalISel/IRTranslator.h"
#include "llvm/CodeGen/GlobalISel/InstructionSelect.h"
#include "llvm/CodeGen/GlobalISel/Legalizer.h"
#include "llvm/CodeGen/GlobalISel/Localizer.h"
#include "llvm/CodeGen/GlobalISel/RegBankSelect.h"
#include "llvm/CodeGen/MIRParser/MIParser.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/RegAllocRegistry.h"
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/IR/LegacyPassManager.h"
#include "llvm/IR/PassManager.h"
#include "llvm/InitializePasses.h"
#include "llvm/MC/TargetRegistry.h"
#include "llvm/Passes/PassBuilder.h"
#include "llvm/Transforms/IPO.h"
#include "llvm/Transforms/IPO/AlwaysInliner.h"
#include "llvm/Transforms/IPO/GlobalDCE.h"
#include "llvm/Transforms/IPO/Internalize.h"
#include "llvm/Transforms/IPO/PassManagerBuilder.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Scalar/GVN.h"
#include "llvm/Transforms/Scalar/InferAddressSpaces.h"
#include "llvm/Transforms/Utils.h"
#include "llvm/Transforms/Utils/SimplifyLibCalls.h"
#include "llvm/Transforms/Vectorize.h"
Include dependency graph for AMDGPUTargetMachine.cpp:

Go to the source code of this file.

Functions

LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget ()
 
static std::unique_ptr< TargetLoweringObjectFilecreateTLOF (const Triple &TT)
 
static ScheduleDAGInstrscreateSIMachineScheduler (MachineSchedContext *C)
 
static ScheduleDAGInstrscreateGCNMaxOccupancyMachineScheduler (MachineSchedContext *C)
 
static ScheduleDAGInstrscreateIterativeGCNMaxOccupancyMachineScheduler (MachineSchedContext *C)
 
static ScheduleDAGInstrscreateMinRegScheduler (MachineSchedContext *C)
 
static ScheduleDAGInstrscreateIterativeILPMachineScheduler (MachineSchedContext *C)
 
static StringRef computeDataLayout (const Triple &TT)
 
static LLVM_READNONE StringRef getGPUOrDefault (const Triple &TT, StringRef GPU)
 
static Reloc::Model getEffectiveRelocModel (Optional< Reloc::Model > RM)
 
static bool mustPreserveGV (const GlobalValue &GV)
 Predicate for Internalize pass. More...
 

Variables

static cl::opt< bool > EnableSROA ("amdgpu-sroa", cl::desc("Run SROA after promote alloca pass"), cl::ReallyHidden, cl::init(true))
 
static cl::opt< bool > EnableEarlyIfConversion ("amdgpu-early-ifcvt", cl::Hidden, cl::desc("Run early if-conversion"), cl::init(false))
 
static cl::opt< bool > OptExecMaskPreRA ("amdgpu-opt-exec-mask-pre-ra", cl::Hidden, cl::desc("Run pre-RA exec mask optimizations"), cl::init(true))
 
static cl::opt< bool > EnableLoadStoreVectorizer ("amdgpu-load-store-vectorizer", cl::desc("Enable load store vectorizer"), cl::init(true), cl::Hidden)
 
static cl::opt< bool > ScalarizeGlobal ("amdgpu-scalarize-global-loads", cl::desc("Enable global load scalarization"), cl::init(true), cl::Hidden)
 
static cl::opt< bool > InternalizeSymbols ("amdgpu-internalize-symbols", cl::desc("Enable elimination of non-kernel functions and unused globals"), cl::init(false), cl::Hidden)
 
static cl::opt< bool > EarlyInlineAll ("amdgpu-early-inline-all", cl::desc("Inline all functions early"), cl::init(false), cl::Hidden)
 
static cl::opt< bool > EnableSDWAPeephole ("amdgpu-sdwa-peephole", cl::desc("Enable SDWA peepholer"), cl::init(true))
 
static cl::opt< bool > EnableDPPCombine ("amdgpu-dpp-combine", cl::desc("Enable DPP combiner"), cl::init(true))
 
static cl::opt< bool > EnableAMDGPUAliasAnalysis ("enable-amdgpu-aa", cl::Hidden, cl::desc("Enable AMDGPU Alias Analysis"), cl::init(true))
 
static cl::opt< bool, trueLateCFGStructurize ("amdgpu-late-structurize", cl::desc("Enable late CFG structurization"), cl::location(AMDGPUTargetMachine::EnableLateStructurizeCFG), cl::Hidden)
 
static cl::opt< bool, trueEnableAMDGPUFixedFunctionABIOpt ("amdgpu-fixed-function-abi", cl::desc("Enable all implicit function arguments"), cl::location(AMDGPUTargetMachine::EnableFixedFunctionABI), cl::init(false), cl::Hidden)
 
static cl::opt< bool > EnableLibCallSimplify ("amdgpu-simplify-libcall", cl::desc("Enable amdgpu library simplifications"), cl::init(true), cl::Hidden)
 
static cl::opt< bool > EnableLowerKernelArguments ("amdgpu-ir-lower-kernel-arguments", cl::desc("Lower kernel argument loads in IR pass"), cl::init(true), cl::Hidden)
 
static cl::opt< bool > EnableRegReassign ("amdgpu-reassign-regs", cl::desc("Enable register reassign optimizations on gfx10+"), cl::init(true), cl::Hidden)
 
static cl::opt< bool > OptVGPRLiveRange ("amdgpu-opt-vgpr-liverange", cl::desc("Enable VGPR liverange optimizations for if-else structure"), cl::init(true), cl::Hidden)
 
static cl::opt< bool > EnableAtomicOptimizations ("amdgpu-atomic-optimizations", cl::desc("Enable atomic optimizations"), cl::init(false), cl::Hidden)
 
static cl::opt< bool > EnableSIModeRegisterPass ("amdgpu-mode-register", cl::desc("Enable mode register pass"), cl::init(true), cl::Hidden)
 
static cl::opt< bool > EnableDCEInRA ("amdgpu-dce-in-ra", cl::init(true), cl::Hidden, cl::desc("Enable machine DCE inside regalloc"))
 
static cl::opt< bool > EnableScalarIRPasses ("amdgpu-scalar-ir-passes", cl::desc("Enable scalar IR passes"), cl::init(true), cl::Hidden)
 
static cl::opt< bool > EnableStructurizerWorkarounds ("amdgpu-enable-structurizer-workarounds", cl::desc("Enable workarounds for the StructurizeCFG pass"), cl::init(true), cl::Hidden)
 
static cl::opt< bool > EnableLDSReplaceWithPointer ("amdgpu-enable-lds-replace-with-pointer", cl::desc("Enable LDS replace with pointer pass"), cl::init(false), cl::Hidden)
 
static cl::opt< bool, trueEnableLowerModuleLDS ("amdgpu-enable-lower-module-lds", cl::desc("Enable lower module lds pass"), cl::location(AMDGPUTargetMachine::EnableLowerModuleLDS), cl::init(true), cl::Hidden)
 
static cl::opt< bool > EnablePreRAOptimizations ("amdgpu-enable-pre-ra-optimizations", cl::desc("Enable Pre-RA optimizations pass"), cl::init(true), cl::Hidden)
 
static cl::opt< bool > EnablePromoteKernelArguments ("amdgpu-enable-promote-kernel-arguments", cl::desc("Enable promotion of flat kernel pointer arguments to global"), cl::Hidden, cl::init(true))
 
static MachineSchedRegistry SISchedRegistry ("si", "Run SI's custom scheduler", createSIMachineScheduler)
 
static MachineSchedRegistry GCNMaxOccupancySchedRegistry ("gcn-max-occupancy", "Run GCN scheduler to maximize occupancy", createGCNMaxOccupancyMachineScheduler)
 
static MachineSchedRegistry IterativeGCNMaxOccupancySchedRegistry ("gcn-max-occupancy-experimental", "Run GCN scheduler to maximize occupancy (experimental)", createIterativeGCNMaxOccupancyMachineScheduler)
 
static MachineSchedRegistry GCNMinRegSchedRegistry ("gcn-minreg", "Run GCN iterative scheduler for minimal register usage (experimental)", createMinRegScheduler)
 
static MachineSchedRegistry GCNILPSchedRegistry ("gcn-ilp", "Run GCN iterative scheduler for ILP scheduling (experimental)", createIterativeILPMachineScheduler)
 
static const char RegAllocOptNotSupportedMessage []
 

Detailed Description

The AMDGPU target machine contains all of the hardware specific information needed to emit code for SI+ GPUs.

Definition in file AMDGPUTargetMachine.cpp.

Function Documentation

◆ computeDataLayout()

static StringRef computeDataLayout ( const Triple TT)
static

Definition at line 452 of file AMDGPUTargetMachine.cpp.

References llvm::Triple::r600.

◆ createGCNMaxOccupancyMachineScheduler()

static ScheduleDAGInstrs* createGCNMaxOccupancyMachineScheduler ( MachineSchedContext C)
static

◆ createIterativeGCNMaxOccupancyMachineScheduler()

static ScheduleDAGInstrs* createIterativeGCNMaxOccupancyMachineScheduler ( MachineSchedContext C)
static

◆ createIterativeILPMachineScheduler()

static ScheduleDAGInstrs* createIterativeILPMachineScheduler ( MachineSchedContext C)
static

◆ createMinRegScheduler()

static ScheduleDAGInstrs* createMinRegScheduler ( MachineSchedContext C)
static

◆ createSIMachineScheduler()

static ScheduleDAGInstrs* createSIMachineScheduler ( MachineSchedContext C)
static

Definition at line 392 of file AMDGPUTargetMachine.cpp.

◆ createTLOF()

static std::unique_ptr<TargetLoweringObjectFile> createTLOF ( const Triple TT)
static

Definition at line 388 of file AMDGPUTargetMachine.cpp.

◆ getEffectiveRelocModel()

static Reloc::Model getEffectiveRelocModel ( Optional< Reloc::Model RM)
static

Definition at line 479 of file AMDGPUTargetMachine.cpp.

References llvm::Reloc::PIC_.

◆ getGPUOrDefault()

static LLVM_READNONE StringRef getGPUOrDefault ( const Triple TT,
StringRef  GPU 
)
static

◆ LLVMInitializeAMDGPUTarget()

LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget ( )

Definition at line 314 of file AMDGPUTargetMachine.cpp.

References llvm::PassRegistry::getPassRegistry(), llvm::getTheAMDGPUTarget(), llvm::getTheGCNTarget(), llvm::initializeAMDGPUAAWrapperPassPass(), llvm::initializeAMDGPUAlwaysInlinePass(), llvm::initializeAMDGPUAnnotateKernelFeaturesPass(), llvm::initializeAMDGPUAnnotateUniformValuesPass(), llvm::initializeAMDGPUArgumentUsageInfoPass(), llvm::initializeAMDGPUAtomicOptimizerPass(), llvm::initializeAMDGPUAttributorPass(), llvm::initializeAMDGPUCodeGenPreparePass(), llvm::initializeAMDGPUCtorDtorLoweringPass(), llvm::initializeAMDGPUDAGToDAGISelPass(), llvm::initializeAMDGPUExternalAAWrapperPass(), llvm::initializeAMDGPUFixFunctionBitcastsPass(), llvm::initializeAMDGPULateCodeGenPreparePass(), llvm::initializeAMDGPULowerIntrinsicsPass(), llvm::initializeAMDGPULowerKernelArgumentsPass(), llvm::initializeAMDGPULowerKernelAttributesPass(), llvm::initializeAMDGPULowerModuleLDSPass(), llvm::initializeAMDGPUOpenCLEnqueuedBlockLoweringPass(), llvm::initializeAMDGPUPostLegalizerCombinerPass(), llvm::initializeAMDGPUPreLegalizerCombinerPass(), llvm::initializeAMDGPUPrintfRuntimeBindingPass(), llvm::initializeAMDGPUPromoteAllocaPass(), llvm::initializeAMDGPUPromoteAllocaToVectorPass(), llvm::initializeAMDGPUPromoteKernelArgumentsPass(), llvm::initializeAMDGPUPropagateAttributesEarlyPass(), llvm::initializeAMDGPUPropagateAttributesLatePass(), llvm::initializeAMDGPURegBankCombinerPass(), llvm::initializeAMDGPUReplaceLDSUseWithPointerPass(), llvm::initializeAMDGPUResourceUsageAnalysisPass(), llvm::initializeAMDGPURewriteOutArgumentsPass(), llvm::initializeAMDGPUSimplifyLibCallsPass(), llvm::initializeAMDGPUUnifyDivergentExitNodesPass(), llvm::initializeAMDGPUUnifyMetadataPass(), llvm::initializeAMDGPUUseNativeCallsPass(), llvm::initializeGCNDPPCombinePass(), llvm::initializeGCNNSAReassignPass(), llvm::initializeGCNPreRAOptimizationsPass(), llvm::initializeGlobalISel(), llvm::initializeR600ClauseMergePassPass(), llvm::initializeR600ControlFlowFinalizerPass(), llvm::initializeR600ExpandSpecialInstrsPassPass(), llvm::initializeR600PacketizerPass(), llvm::initializeR600VectorRegMergerPass(), llvm::initializeSIAnnotateControlFlowPass(), llvm::initializeSIFixSGPRCopiesPass(), llvm::initializeSIFixVGPRCopiesPass(), llvm::initializeSIFoldOperandsPass(), llvm::initializeSIFormMemoryClausesPass(), llvm::initializeSIInsertHardClausesPass(), llvm::initializeSIInsertWaitcntsPass(), llvm::initializeSILateBranchLoweringPass(), llvm::initializeSILoadStoreOptimizerPass(), llvm::initializeSILowerControlFlowPass(), llvm::initializeSILowerI1CopiesPass(), llvm::initializeSILowerSGPRSpillsPass(), llvm::initializeSIMemoryLegalizerPass(), llvm::initializeSIModeRegisterPass(), llvm::initializeSIOptimizeExecMaskingPass(), llvm::initializeSIOptimizeExecMaskingPreRAPass(), llvm::initializeSIOptimizeVGPRLiveRangePass(), llvm::initializeSIPeepholeSDWAPass(), llvm::initializeSIPostRABundlerPass(), llvm::initializeSIPreAllocateWWMRegsPass(), llvm::initializeSIPreEmitPeepholePass(), llvm::initializeSIShrinkInstructionsPass(), llvm::initializeSIWholeQuadModePass(), X, and Y.

◆ mustPreserveGV()

static bool mustPreserveGV ( const GlobalValue GV)
static

Variable Documentation

◆ EarlyInlineAll

cl::opt<bool> EarlyInlineAll("amdgpu-early-inline-all", cl::desc("Inline all functions early"), cl::init(false), cl::Hidden)
static

◆ EnableAMDGPUAliasAnalysis

cl::opt<bool> EnableAMDGPUAliasAnalysis("enable-amdgpu-aa", cl::Hidden, cl::desc("Enable AMDGPU Alias Analysis"), cl::init(true))
static

◆ EnableAMDGPUFixedFunctionABIOpt

cl::opt<bool, true> EnableAMDGPUFixedFunctionABIOpt("amdgpu-fixed-function-abi", cl::desc("Enable all implicit function arguments"), cl::location(AMDGPUTargetMachine::EnableFixedFunctionABI), cl::init(false), cl::Hidden)
static

◆ EnableAtomicOptimizations

cl::opt<bool> EnableAtomicOptimizations("amdgpu-atomic-optimizations", cl::desc("Enable atomic optimizations"), cl::init(false), cl::Hidden)
static

◆ EnableDCEInRA

cl::opt<bool> EnableDCEInRA("amdgpu-dce-in-ra", cl::init(true), cl::Hidden, cl::desc("Enable machine DCE inside regalloc"))
static

◆ EnableDPPCombine

cl::opt<bool> EnableDPPCombine("amdgpu-dpp-combine", cl::desc("Enable DPP combiner"), cl::init(true))
static

◆ EnableEarlyIfConversion

cl::opt<bool> EnableEarlyIfConversion("amdgpu-early-ifcvt", cl::Hidden, cl::desc("Run early if-conversion"), cl::init(false))
static

◆ EnableLDSReplaceWithPointer

cl::opt<bool> EnableLDSReplaceWithPointer("amdgpu-enable-lds-replace-with-pointer", cl::desc("Enable LDS replace with pointer pass"), cl::init(false), cl::Hidden)
static

◆ EnableLibCallSimplify

cl::opt<bool> EnableLibCallSimplify("amdgpu-simplify-libcall", cl::desc("Enable amdgpu library simplifications"), cl::init(true), cl::Hidden)
static

◆ EnableLoadStoreVectorizer

cl::opt<bool> EnableLoadStoreVectorizer("amdgpu-load-store-vectorizer", cl::desc("Enable load store vectorizer"), cl::init(true), cl::Hidden)
static

◆ EnableLowerKernelArguments

cl::opt<bool> EnableLowerKernelArguments("amdgpu-ir-lower-kernel-arguments", cl::desc("Lower kernel argument loads in IR pass"), cl::init(true), cl::Hidden)
static

◆ EnableLowerModuleLDS

cl::opt<bool, true> EnableLowerModuleLDS("amdgpu-enable-lower-module-lds", cl::desc("Enable lower module lds pass"), cl::location(AMDGPUTargetMachine::EnableLowerModuleLDS), cl::init(true), cl::Hidden)
static

◆ EnablePreRAOptimizations

cl::opt<bool> EnablePreRAOptimizations("amdgpu-enable-pre-ra-optimizations", cl::desc("Enable Pre-RA optimizations pass"), cl::init(true), cl::Hidden)
static

◆ EnablePromoteKernelArguments

cl::opt<bool> EnablePromoteKernelArguments("amdgpu-enable-promote-kernel-arguments", cl::desc("Enable promotion of flat kernel pointer arguments to global"), cl::Hidden, cl::init(true))
static

◆ EnableRegReassign

cl::opt<bool> EnableRegReassign("amdgpu-reassign-regs", cl::desc("Enable register reassign optimizations on gfx10+"), cl::init(true), cl::Hidden)
static

◆ EnableScalarIRPasses

cl::opt<bool> EnableScalarIRPasses("amdgpu-scalar-ir-passes", cl::desc("Enable scalar IR passes"), cl::init(true), cl::Hidden)
static

◆ EnableSDWAPeephole

cl::opt<bool> EnableSDWAPeephole("amdgpu-sdwa-peephole", cl::desc("Enable SDWA peepholer"), cl::init(true))
static

◆ EnableSIModeRegisterPass

cl::opt<bool> EnableSIModeRegisterPass("amdgpu-mode-register", cl::desc("Enable mode register pass"), cl::init(true), cl::Hidden)
static

◆ EnableSROA

cl::opt<bool> EnableSROA("amdgpu-sroa", cl::desc("Run SROA after promote alloca pass"), cl::ReallyHidden, cl::init(true))
static

◆ EnableStructurizerWorkarounds

cl::opt<bool> EnableStructurizerWorkarounds("amdgpu-enable-structurizer-workarounds", cl::desc("Enable workarounds for the StructurizeCFG pass"), cl::init(true), cl::Hidden)
static

◆ GCNILPSchedRegistry

MachineSchedRegistry GCNILPSchedRegistry("gcn-ilp", "Run GCN iterative scheduler for ILP scheduling (experimental)", createIterativeILPMachineScheduler)
static

◆ GCNMaxOccupancySchedRegistry

MachineSchedRegistry GCNMaxOccupancySchedRegistry("gcn-max-occupancy", "Run GCN scheduler to maximize occupancy", createGCNMaxOccupancyMachineScheduler)
static

◆ GCNMinRegSchedRegistry

MachineSchedRegistry GCNMinRegSchedRegistry("gcn-minreg", "Run GCN iterative scheduler for minimal register usage (experimental)", createMinRegScheduler)
static

◆ InternalizeSymbols

cl::opt<bool> InternalizeSymbols("amdgpu-internalize-symbols", cl::desc("Enable elimination of non-kernel functions and unused globals"), cl::init(false), cl::Hidden)
static

◆ IterativeGCNMaxOccupancySchedRegistry

MachineSchedRegistry IterativeGCNMaxOccupancySchedRegistry("gcn-max-occupancy-experimental", "Run GCN scheduler to maximize occupancy (experimental)", createIterativeGCNMaxOccupancyMachineScheduler)
static

◆ LateCFGStructurize

cl::opt<bool, true> LateCFGStructurize("amdgpu-late-structurize", cl::desc("Enable late CFG structurization"), cl::location(AMDGPUTargetMachine::EnableLateStructurizeCFG), cl::Hidden)
static

◆ OptExecMaskPreRA

cl::opt<bool> OptExecMaskPreRA("amdgpu-opt-exec-mask-pre-ra", cl::Hidden, cl::desc("Run pre-RA exec mask optimizations"), cl::init(true))
static

◆ OptVGPRLiveRange

cl::opt<bool> OptVGPRLiveRange("amdgpu-opt-vgpr-liverange", cl::desc("Enable VGPR liverange optimizations for if-else structure"), cl::init(true), cl::Hidden)
static

◆ RegAllocOptNotSupportedMessage

const char RegAllocOptNotSupportedMessage[]
static
Initial value:
=
"-regalloc not supported with amdgcn. Use -sgpr-regalloc and -vgpr-regalloc"

Definition at line 1283 of file AMDGPUTargetMachine.cpp.

◆ ScalarizeGlobal

cl::opt<bool> ScalarizeGlobal("amdgpu-scalarize-global-loads", cl::desc("Enable global load scalarization"), cl::init(true), cl::Hidden)
static

◆ SISchedRegistry

MachineSchedRegistry SISchedRegistry("si", "Run SI's custom scheduler", createSIMachineScheduler)
static