LLVM  9.0.0svn
Functions | Variables
AMDGPUTargetMachine.cpp File Reference

The AMDGPU target machine contains all of the hardware specific information needed to emit code for R600 and SI GPUs. More...

#include "AMDGPUTargetMachine.h"
#include "AMDGPU.h"
#include "AMDGPUAliasAnalysis.h"
#include "AMDGPUCallLowering.h"
#include "AMDGPUInstructionSelector.h"
#include "AMDGPULegalizerInfo.h"
#include "AMDGPUMacroFusion.h"
#include "AMDGPUTargetObjectFile.h"
#include "AMDGPUTargetTransformInfo.h"
#include "GCNIterativeScheduler.h"
#include "GCNSchedStrategy.h"
#include "R600MachineScheduler.h"
#include "SIMachineFunctionInfo.h"
#include "SIMachineScheduler.h"
#include "TargetInfo/AMDGPUTargetInfo.h"
#include "llvm/CodeGen/GlobalISel/IRTranslator.h"
#include "llvm/CodeGen/GlobalISel/InstructionSelect.h"
#include "llvm/CodeGen/GlobalISel/Legalizer.h"
#include "llvm/CodeGen/GlobalISel/RegBankSelect.h"
#include "llvm/CodeGen/MIRParser/MIParser.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/LegacyPassManager.h"
#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/TargetRegistry.h"
#include "llvm/Target/TargetLoweringObjectFile.h"
#include "llvm/Transforms/IPO.h"
#include "llvm/Transforms/IPO/AlwaysInliner.h"
#include "llvm/Transforms/IPO/PassManagerBuilder.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Scalar/GVN.h"
#include "llvm/Transforms/Utils.h"
#include "llvm/Transforms/Vectorize.h"
#include <memory>
Include dependency graph for AMDGPUTargetMachine.cpp:

Go to the source code of this file.

Functions

void LLVMInitializeAMDGPUTarget ()
 
static std::unique_ptr< TargetLoweringObjectFilecreateTLOF (const Triple &TT)
 
static ScheduleDAGInstrscreateR600MachineScheduler (MachineSchedContext *C)
 
static ScheduleDAGInstrscreateSIMachineScheduler (MachineSchedContext *C)
 
static ScheduleDAGInstrscreateGCNMaxOccupancyMachineScheduler (MachineSchedContext *C)
 
static ScheduleDAGInstrscreateIterativeGCNMaxOccupancyMachineScheduler (MachineSchedContext *C)
 
static ScheduleDAGInstrscreateMinRegScheduler (MachineSchedContext *C)
 
static ScheduleDAGInstrscreateIterativeILPMachineScheduler (MachineSchedContext *C)
 
static StringRef computeDataLayout (const Triple &TT)
 
static LLVM_READNONE StringRef getGPUOrDefault (const Triple &TT, StringRef GPU)
 
static Reloc::Model getEffectiveRelocModel (Optional< Reloc::Model > RM)
 
static bool mustPreserveGV (const GlobalValue &GV)
 Predicate for Internalize pass. More...
 

Variables

static cl::opt< boolEnableR600StructurizeCFG ("r600-ir-structurize", cl::desc("Use StructurizeCFG IR pass"), cl::init(true))
 
static cl::opt< boolEnableSROA ("amdgpu-sroa", cl::desc("Run SROA after promote alloca pass"), cl::ReallyHidden, cl::init(true))
 
static cl::opt< boolEnableEarlyIfConversion ("amdgpu-early-ifcvt", cl::Hidden, cl::desc("Run early if-conversion"), cl::init(false))
 
static cl::opt< boolOptExecMaskPreRA ("amdgpu-opt-exec-mask-pre-ra", cl::Hidden, cl::desc("Run pre-RA exec mask optimizations"), cl::init(true))
 
static cl::opt< boolEnableR600IfConvert ("r600-if-convert", cl::desc("Use if conversion pass"), cl::ReallyHidden, cl::init(true))
 
static cl::opt< boolEnableLoadStoreVectorizer ("amdgpu-load-store-vectorizer", cl::desc("Enable load store vectorizer"), cl::init(true), cl::Hidden)
 
static cl::opt< boolScalarizeGlobal ("amdgpu-scalarize-global-loads", cl::desc("Enable global load scalarization"), cl::init(true), cl::Hidden)
 
static cl::opt< boolInternalizeSymbols ("amdgpu-internalize-symbols", cl::desc("Enable elimination of non-kernel functions and unused globals"), cl::init(false), cl::Hidden)
 
static cl::opt< boolEarlyInlineAll ("amdgpu-early-inline-all", cl::desc("Inline all functions early"), cl::init(false), cl::Hidden)
 
static cl::opt< boolEnableSDWAPeephole ("amdgpu-sdwa-peephole", cl::desc("Enable SDWA peepholer"), cl::init(true))
 
static cl::opt< boolEnableDPPCombine ("amdgpu-dpp-combine", cl::desc("Enable DPP combiner"), cl::init(true))
 
static cl::opt< boolEnableAMDGPUAliasAnalysis ("enable-amdgpu-aa", cl::Hidden, cl::desc("Enable AMDGPU Alias Analysis"), cl::init(true))
 
static cl::opt< bool, trueLateCFGStructurize ("amdgpu-late-structurize", cl::desc("Enable late CFG structurization"), cl::location(AMDGPUTargetMachine::EnableLateStructurizeCFG), cl::Hidden)
 
static cl::opt< bool, trueEnableAMDGPUFunctionCallsOpt ("amdgpu-function-calls", cl::desc("Enable AMDGPU function call support"), cl::location(AMDGPUTargetMachine::EnableFunctionCalls), cl::init(true), cl::Hidden)
 
static cl::opt< boolEnableLibCallSimplify ("amdgpu-simplify-libcall", cl::desc("Enable amdgpu library simplifications"), cl::init(true), cl::Hidden)
 
static cl::opt< boolEnableLowerKernelArguments ("amdgpu-ir-lower-kernel-arguments", cl::desc("Lower kernel argument loads in IR pass"), cl::init(true), cl::Hidden)
 
static cl::opt< boolEnableRegReassign ("amdgpu-reassign-regs", cl::desc("Enable register reassign optimizations on gfx10+"), cl::init(true), cl::Hidden)
 
static cl::opt< boolEnableAtomicOptimizations ("amdgpu-atomic-optimizations", cl::desc("Enable atomic optimizations"), cl::init(false), cl::Hidden)
 
static cl::opt< boolEnableSIModeRegisterPass ("amdgpu-mode-register", cl::desc("Enable mode register pass"), cl::init(true), cl::Hidden)
 
static cl::opt< boolEnableDCEInRA ("amdgpu-dce-in-ra", cl::init(true), cl::Hidden, cl::desc("Enable machine DCE inside regalloc"))
 
static cl::opt< boolEnableScalarIRPasses ("amdgpu-scalar-ir-passes", cl::desc("Enable scalar IR passes"), cl::init(true), cl::Hidden)
 
static MachineSchedRegistry R600SchedRegistry ("r600", "Run R600's custom scheduler", createR600MachineScheduler)
 
static MachineSchedRegistry SISchedRegistry ("si", "Run SI's custom scheduler", createSIMachineScheduler)
 
static MachineSchedRegistry GCNMaxOccupancySchedRegistry ("gcn-max-occupancy", "Run GCN scheduler to maximize occupancy", createGCNMaxOccupancyMachineScheduler)
 
static MachineSchedRegistry IterativeGCNMaxOccupancySchedRegistry ("gcn-max-occupancy-experimental", "Run GCN scheduler to maximize occupancy (experimental)", createIterativeGCNMaxOccupancyMachineScheduler)
 
static MachineSchedRegistry GCNMinRegSchedRegistry ("gcn-minreg", "Run GCN iterative scheduler for minimal register usage (experimental)", createMinRegScheduler)
 
static MachineSchedRegistry GCNILPSchedRegistry ("gcn-ilp", "Run GCN iterative scheduler for ILP scheduling (experimental)", createIterativeILPMachineScheduler)
 

Detailed Description

The AMDGPU target machine contains all of the hardware specific information needed to emit code for R600 and SI GPUs.

Definition in file AMDGPUTargetMachine.cpp.

Function Documentation

◆ computeDataLayout()

static StringRef computeDataLayout ( const Triple TT)
static

◆ createGCNMaxOccupancyMachineScheduler()

static ScheduleDAGInstrs* createGCNMaxOccupancyMachineScheduler ( MachineSchedContext C)
static

◆ createIterativeGCNMaxOccupancyMachineScheduler()

static ScheduleDAGInstrs* createIterativeGCNMaxOccupancyMachineScheduler ( MachineSchedContext C)
static

◆ createIterativeILPMachineScheduler()

static ScheduleDAGInstrs* createIterativeILPMachineScheduler ( MachineSchedContext C)
static

◆ createMinRegScheduler()

static ScheduleDAGInstrs* createMinRegScheduler ( MachineSchedContext C)
static

◆ createR600MachineScheduler()

static ScheduleDAGInstrs* createR600MachineScheduler ( MachineSchedContext C)
static

◆ createSIMachineScheduler()

static ScheduleDAGInstrs* createSIMachineScheduler ( MachineSchedContext C)
static

◆ createTLOF()

static std::unique_ptr<TargetLoweringObjectFile> createTLOF ( const Triple TT)
static

Definition at line 245 of file AMDGPUTargetMachine.cpp.

◆ getEffectiveRelocModel()

static Reloc::Model getEffectiveRelocModel ( Optional< Reloc::Model RM)
static

Definition at line 346 of file AMDGPUTargetMachine.cpp.

References llvm::Reloc::PIC_.

◆ getGPUOrDefault()

static LLVM_READNONE StringRef getGPUOrDefault ( const Triple TT,
StringRef  GPU 
)
static

◆ LLVMInitializeAMDGPUTarget()

void LLVMInitializeAMDGPUTarget ( )

Definition at line 185 of file AMDGPUTargetMachine.cpp.

References llvm::PassRegistry::getPassRegistry(), llvm::getTheAMDGPUTarget(), llvm::getTheGCNTarget(), llvm::initializeAMDGPUAAWrapperPassPass(), llvm::initializeAMDGPUAlwaysInlinePass(), llvm::initializeAMDGPUAnnotateKernelFeaturesPass(), llvm::initializeAMDGPUAnnotateUniformValuesPass(), llvm::initializeAMDGPUArgumentUsageInfoPass(), llvm::initializeAMDGPUAtomicOptimizerPass(), llvm::initializeAMDGPUCodeGenPreparePass(), llvm::initializeAMDGPUDAGToDAGISelPass(), llvm::initializeAMDGPUExternalAAWrapperPass(), llvm::initializeAMDGPUFixFunctionBitcastsPass(), llvm::initializeAMDGPUInlinerPass(), llvm::initializeAMDGPULowerIntrinsicsPass(), llvm::initializeAMDGPULowerKernelArgumentsPass(), llvm::initializeAMDGPULowerKernelAttributesPass(), llvm::initializeAMDGPUOpenCLEnqueuedBlockLoweringPass(), llvm::initializeAMDGPUPromoteAllocaPass(), llvm::initializeAMDGPUPropagateAttributesEarlyPass(), llvm::initializeAMDGPUPropagateAttributesLatePass(), llvm::initializeAMDGPURewriteOutArgumentsPass(), llvm::initializeAMDGPUSimplifyLibCallsPass(), llvm::initializeAMDGPUUnifyDivergentExitNodesPass(), llvm::initializeAMDGPUUnifyMetadataPass(), llvm::initializeAMDGPUUseNativeCallsPass(), llvm::initializeGCNDPPCombinePass(), llvm::initializeGCNNSAReassignPass(), llvm::initializeGCNRegBankReassignPass(), llvm::initializeGlobalISel(), llvm::initializeR600ClauseMergePassPass(), llvm::initializeR600ControlFlowFinalizerPass(), llvm::initializeR600ExpandSpecialInstrsPassPass(), llvm::initializeR600PacketizerPass(), llvm::initializeR600VectorRegMergerPass(), llvm::initializeSIAnnotateControlFlowPass(), llvm::initializeSIFixSGPRCopiesPass(), llvm::initializeSIFixupVectorISelPass(), llvm::initializeSIFixVGPRCopiesPass(), llvm::initializeSIFoldOperandsPass(), llvm::initializeSIFormMemoryClausesPass(), llvm::initializeSIInsertSkipsPass(), llvm::initializeSIInsertWaitcntsPass(), llvm::initializeSILoadStoreOptimizerPass(), llvm::initializeSILowerControlFlowPass(), llvm::initializeSILowerI1CopiesPass(), llvm::initializeSILowerSGPRSpillsPass(), llvm::initializeSIMemoryLegalizerPass(), llvm::initializeSIModeRegisterPass(), llvm::initializeSIOptimizeExecMaskingPass(), llvm::initializeSIOptimizeExecMaskingPreRAPass(), llvm::initializeSIPeepholeSDWAPass(), llvm::initializeSIPreAllocateWWMRegsPass(), llvm::initializeSIShrinkInstructionsPass(), llvm::initializeSIWholeQuadModePass(), X, and Y.

◆ mustPreserveGV()

static bool mustPreserveGV ( const GlobalValue GV)
static

Variable Documentation

◆ EarlyInlineAll

cl::opt<bool> EarlyInlineAll("amdgpu-early-inline-all", cl::desc("Inline all functions early"), cl::init(false), cl::Hidden)
static

◆ EnableAMDGPUAliasAnalysis

cl::opt<bool> EnableAMDGPUAliasAnalysis("enable-amdgpu-aa", cl::Hidden, cl::desc("Enable AMDGPU Alias Analysis"), cl::init(true))
static

◆ EnableAMDGPUFunctionCallsOpt

cl::opt<bool, true> EnableAMDGPUFunctionCallsOpt("amdgpu-function-calls", cl::desc("Enable AMDGPU function call support"), cl::location(AMDGPUTargetMachine::EnableFunctionCalls), cl::init(true), cl::Hidden)
static

◆ EnableAtomicOptimizations

cl::opt<bool> EnableAtomicOptimizations("amdgpu-atomic-optimizations", cl::desc("Enable atomic optimizations"), cl::init(false), cl::Hidden)
static

◆ EnableDCEInRA

cl::opt<bool> EnableDCEInRA("amdgpu-dce-in-ra", cl::init(true), cl::Hidden, cl::desc("Enable machine DCE inside regalloc"))
static

◆ EnableDPPCombine

cl::opt<bool> EnableDPPCombine("amdgpu-dpp-combine", cl::desc("Enable DPP combiner"), cl::init(true))
static

◆ EnableEarlyIfConversion

cl::opt<bool> EnableEarlyIfConversion("amdgpu-early-ifcvt", cl::Hidden, cl::desc("Run early if-conversion"), cl::init(false))
static

◆ EnableLibCallSimplify

cl::opt<bool> EnableLibCallSimplify("amdgpu-simplify-libcall", cl::desc("Enable amdgpu library simplifications"), cl::init(true), cl::Hidden)
static

◆ EnableLoadStoreVectorizer

cl::opt<bool> EnableLoadStoreVectorizer("amdgpu-load-store-vectorizer", cl::desc("Enable load store vectorizer"), cl::init(true), cl::Hidden)
static

◆ EnableLowerKernelArguments

cl::opt<bool> EnableLowerKernelArguments("amdgpu-ir-lower-kernel-arguments", cl::desc("Lower kernel argument loads in IR pass"), cl::init(true), cl::Hidden)
static

◆ EnableR600IfConvert

cl::opt<bool> EnableR600IfConvert("r600-if-convert", cl::desc("Use if conversion pass"), cl::ReallyHidden, cl::init(true))
static

◆ EnableR600StructurizeCFG

cl::opt<bool> EnableR600StructurizeCFG("r600-ir-structurize", cl::desc("Use StructurizeCFG IR pass"), cl::init(true))
static

◆ EnableRegReassign

cl::opt<bool> EnableRegReassign("amdgpu-reassign-regs", cl::desc("Enable register reassign optimizations on gfx10+"), cl::init(true), cl::Hidden)
static

◆ EnableScalarIRPasses

cl::opt<bool> EnableScalarIRPasses("amdgpu-scalar-ir-passes", cl::desc("Enable scalar IR passes"), cl::init(true), cl::Hidden)
static

◆ EnableSDWAPeephole

cl::opt<bool> EnableSDWAPeephole("amdgpu-sdwa-peephole", cl::desc("Enable SDWA peepholer"), cl::init(true))
static

◆ EnableSIModeRegisterPass

cl::opt<bool> EnableSIModeRegisterPass("amdgpu-mode-register", cl::desc("Enable mode register pass"), cl::init(true), cl::Hidden)
static

◆ EnableSROA

cl::opt<bool> EnableSROA("amdgpu-sroa", cl::desc("Run SROA after promote alloca pass"), cl::ReallyHidden, cl::init(true))
static

◆ GCNILPSchedRegistry

MachineSchedRegistry GCNILPSchedRegistry("gcn-ilp", "Run GCN iterative scheduler for ILP scheduling (experimental)", createIterativeILPMachineScheduler)
static

◆ GCNMaxOccupancySchedRegistry

MachineSchedRegistry GCNMaxOccupancySchedRegistry("gcn-max-occupancy", "Run GCN scheduler to maximize occupancy", createGCNMaxOccupancyMachineScheduler)
static

◆ GCNMinRegSchedRegistry

MachineSchedRegistry GCNMinRegSchedRegistry("gcn-minreg", "Run GCN iterative scheduler for minimal register usage (experimental)", createMinRegScheduler)
static

◆ InternalizeSymbols

cl::opt<bool> InternalizeSymbols("amdgpu-internalize-symbols", cl::desc("Enable elimination of non-kernel functions and unused globals"), cl::init(false), cl::Hidden)
static

◆ IterativeGCNMaxOccupancySchedRegistry

MachineSchedRegistry IterativeGCNMaxOccupancySchedRegistry("gcn-max-occupancy-experimental", "Run GCN scheduler to maximize occupancy (experimental)", createIterativeGCNMaxOccupancyMachineScheduler)
static

◆ LateCFGStructurize

cl::opt<bool, true> LateCFGStructurize("amdgpu-late-structurize", cl::desc("Enable late CFG structurization"), cl::location(AMDGPUTargetMachine::EnableLateStructurizeCFG), cl::Hidden)
static

◆ OptExecMaskPreRA

cl::opt<bool> OptExecMaskPreRA("amdgpu-opt-exec-mask-pre-ra", cl::Hidden, cl::desc("Run pre-RA exec mask optimizations"), cl::init(true))
static

◆ R600SchedRegistry

MachineSchedRegistry R600SchedRegistry("r600", "Run R600's custom scheduler", createR600MachineScheduler)
static

◆ ScalarizeGlobal

cl::opt<bool> ScalarizeGlobal("amdgpu-scalarize-global-loads", cl::desc("Enable global load scalarization"), cl::init(true), cl::Hidden)
static

◆ SISchedRegistry

MachineSchedRegistry SISchedRegistry("si", "Run SI's custom scheduler", createSIMachineScheduler)
static