LLVM 20.0.0git
|
This file contains both AMDGPU target machine and the CodeGen pass builder. More...
#include "AMDGPUTargetMachine.h"
#include "AMDGPU.h"
#include "AMDGPUAliasAnalysis.h"
#include "AMDGPUCtorDtorLowering.h"
#include "AMDGPUExportClustering.h"
#include "AMDGPUIGroupLP.h"
#include "AMDGPUISelDAGToDAG.h"
#include "AMDGPUMacroFusion.h"
#include "AMDGPUPerfHintAnalysis.h"
#include "AMDGPURegBankSelect.h"
#include "AMDGPUSplitModule.h"
#include "AMDGPUTargetObjectFile.h"
#include "AMDGPUTargetTransformInfo.h"
#include "AMDGPUUnifyDivergentExitNodes.h"
#include "GCNIterativeScheduler.h"
#include "GCNSchedStrategy.h"
#include "GCNVOPDUtils.h"
#include "R600.h"
#include "R600TargetMachine.h"
#include "SIFixSGPRCopies.h"
#include "SIMachineFunctionInfo.h"
#include "SIMachineScheduler.h"
#include "TargetInfo/AMDGPUTargetInfo.h"
#include "Utils/AMDGPUBaseInfo.h"
#include "llvm/Analysis/CGSCCPassManager.h"
#include "llvm/Analysis/CallGraphSCCPass.h"
#include "llvm/Analysis/UniformityAnalysis.h"
#include "llvm/CodeGen/GlobalISel/CSEInfo.h"
#include "llvm/CodeGen/GlobalISel/IRTranslator.h"
#include "llvm/CodeGen/GlobalISel/InstructionSelect.h"
#include "llvm/CodeGen/GlobalISel/Legalizer.h"
#include "llvm/CodeGen/GlobalISel/Localizer.h"
#include "llvm/CodeGen/GlobalISel/RegBankSelect.h"
#include "llvm/CodeGen/MIRParser/MIParser.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/RegAllocRegistry.h"
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/IR/IntrinsicsAMDGPU.h"
#include "llvm/IR/PassManager.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/InitializePasses.h"
#include "llvm/MC/TargetRegistry.h"
#include "llvm/Passes/PassBuilder.h"
#include "llvm/Support/FormatVariadic.h"
#include "llvm/Transforms/HipStdPar/HipStdPar.h"
#include "llvm/Transforms/IPO.h"
#include "llvm/Transforms/IPO/AlwaysInliner.h"
#include "llvm/Transforms/IPO/ExpandVariadics.h"
#include "llvm/Transforms/IPO/GlobalDCE.h"
#include "llvm/Transforms/IPO/Internalize.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Scalar/EarlyCSE.h"
#include "llvm/Transforms/Scalar/FlattenCFG.h"
#include "llvm/Transforms/Scalar/GVN.h"
#include "llvm/Transforms/Scalar/InferAddressSpaces.h"
#include "llvm/Transforms/Scalar/LICM.h"
#include "llvm/Transforms/Scalar/LoopDataPrefetch.h"
#include "llvm/Transforms/Scalar/NaryReassociate.h"
#include "llvm/Transforms/Scalar/SeparateConstOffsetFromGEP.h"
#include "llvm/Transforms/Scalar/Sink.h"
#include "llvm/Transforms/Scalar/StraightLineStrengthReduce.h"
#include "llvm/Transforms/Scalar/StructurizeCFG.h"
#include "llvm/Transforms/Utils.h"
#include "llvm/Transforms/Utils/FixIrreducible.h"
#include "llvm/Transforms/Utils/LCSSA.h"
#include "llvm/Transforms/Utils/LowerSwitch.h"
#include "llvm/Transforms/Utils/SimplifyLibCalls.h"
#include "llvm/Transforms/Utils/UnifyLoopExits.h"
#include "llvm/Transforms/Vectorize/LoadStoreVectorizer.h"
#include <optional>
#include "llvm/Passes/TargetPassRegistry.inc"
Go to the source code of this file.
Macros | |
#define | GET_PASS_REGISTRY "AMDGPUPassRegistry.def" |
Variables | |
static cl::opt< bool > | EnableEarlyIfConversion ("amdgpu-early-ifcvt", cl::Hidden, cl::desc("Run early if-conversion"), cl::init(false)) |
static cl::opt< bool > | OptExecMaskPreRA ("amdgpu-opt-exec-mask-pre-ra", cl::Hidden, cl::desc("Run pre-RA exec mask optimizations"), cl::init(true)) |
static cl::opt< bool > | LowerCtorDtor ("amdgpu-lower-global-ctor-dtor", cl::desc("Lower GPU ctor / dtors to globals on the device."), cl::init(true), cl::Hidden) |
static cl::opt< bool > | EnableLoadStoreVectorizer ("amdgpu-load-store-vectorizer", cl::desc("Enable load store vectorizer"), cl::init(true), cl::Hidden) |
static cl::opt< bool > | ScalarizeGlobal ("amdgpu-scalarize-global-loads", cl::desc("Enable global load scalarization"), cl::init(true), cl::Hidden) |
static cl::opt< bool > | InternalizeSymbols ("amdgpu-internalize-symbols", cl::desc("Enable elimination of non-kernel functions and unused globals"), cl::init(false), cl::Hidden) |
static cl::opt< bool > | EarlyInlineAll ("amdgpu-early-inline-all", cl::desc("Inline all functions early"), cl::init(false), cl::Hidden) |
static cl::opt< bool > | RemoveIncompatibleFunctions ("amdgpu-enable-remove-incompatible-functions", cl::Hidden, cl::desc("Enable removal of functions when they" "use features not supported by the target GPU"), cl::init(true)) |
static cl::opt< bool > | EnableSDWAPeephole ("amdgpu-sdwa-peephole", cl::desc("Enable SDWA peepholer"), cl::init(true)) |
static cl::opt< bool > | EnableDPPCombine ("amdgpu-dpp-combine", cl::desc("Enable DPP combiner"), cl::init(true)) |
static cl::opt< bool > | EnableAMDGPUAliasAnalysis ("enable-amdgpu-aa", cl::Hidden, cl::desc("Enable AMDGPU Alias Analysis"), cl::init(true)) |
static cl::opt< bool, true > | LateCFGStructurize ("amdgpu-late-structurize", cl::desc("Enable late CFG structurization"), cl::location(AMDGPUTargetMachine::EnableLateStructurizeCFG), cl::Hidden) |
static cl::opt< bool, true > | DisableStructurizer ("amdgpu-disable-structurizer", cl::desc("Disable structurizer for experiments; produces unusable code"), cl::location(AMDGPUTargetMachine::DisableStructurizer), cl::ReallyHidden) |
static cl::opt< bool > | EnableLibCallSimplify ("amdgpu-simplify-libcall", cl::desc("Enable amdgpu library simplifications"), cl::init(true), cl::Hidden) |
static cl::opt< bool > | EnableLowerKernelArguments ("amdgpu-ir-lower-kernel-arguments", cl::desc("Lower kernel argument loads in IR pass"), cl::init(true), cl::Hidden) |
static cl::opt< bool > | EnableRegReassign ("amdgpu-reassign-regs", cl::desc("Enable register reassign optimizations on gfx10+"), cl::init(true), cl::Hidden) |
static cl::opt< bool > | OptVGPRLiveRange ("amdgpu-opt-vgpr-liverange", cl::desc("Enable VGPR liverange optimizations for if-else structure"), cl::init(true), cl::Hidden) |
static cl::opt< ScanOptions > | AMDGPUAtomicOptimizerStrategy ("amdgpu-atomic-optimizer-strategy", cl::desc("Select DPP or Iterative strategy for scan"), cl::init(ScanOptions::Iterative), cl::values(clEnumValN(ScanOptions::DPP, "DPP", "Use DPP operations for scan"), clEnumValN(ScanOptions::Iterative, "Iterative", "Use Iterative approach for scan"), clEnumValN(ScanOptions::None, "None", "Disable atomic optimizer"))) |
static cl::opt< bool > | EnableSIModeRegisterPass ("amdgpu-mode-register", cl::desc("Enable mode register pass"), cl::init(true), cl::Hidden) |
static cl::opt< bool > | EnableInsertSingleUseVDST ("amdgpu-enable-single-use-vdst", cl::desc("Enable s_singleuse_vdst insertion"), cl::init(false), cl::Hidden) |
static cl::opt< bool > | EnableInsertDelayAlu ("amdgpu-enable-delay-alu", cl::desc("Enable s_delay_alu insertion"), cl::init(true), cl::Hidden) |
static cl::opt< bool > | EnableVOPD ("amdgpu-enable-vopd", cl::desc("Enable VOPD, dual issue of VALU in wave32"), cl::init(true), cl::Hidden) |
static cl::opt< bool > | EnableDCEInRA ("amdgpu-dce-in-ra", cl::init(true), cl::Hidden, cl::desc("Enable machine DCE inside regalloc")) |
static cl::opt< bool > | EnableSetWavePriority ("amdgpu-set-wave-priority", cl::desc("Adjust wave priority"), cl::init(false), cl::Hidden) |
static cl::opt< bool > | EnableScalarIRPasses ("amdgpu-scalar-ir-passes", cl::desc("Enable scalar IR passes"), cl::init(true), cl::Hidden) |
static cl::opt< bool, true > | EnableStructurizerWorkarounds ("amdgpu-enable-structurizer-workarounds", cl::desc("Enable workarounds for the StructurizeCFG pass"), cl::location(AMDGPUTargetMachine::EnableStructurizerWorkarounds), cl::init(true), cl::Hidden) |
static cl::opt< bool, true > | EnableLowerModuleLDS ("amdgpu-enable-lower-module-lds", cl::desc("Enable lower module lds pass"), cl::location(AMDGPUTargetMachine::EnableLowerModuleLDS), cl::init(true), cl::Hidden) |
static cl::opt< bool > | EnablePreRAOptimizations ("amdgpu-enable-pre-ra-optimizations", cl::desc("Enable Pre-RA optimizations pass"), cl::init(true), cl::Hidden) |
static cl::opt< bool > | EnablePromoteKernelArguments ("amdgpu-enable-promote-kernel-arguments", cl::desc("Enable promotion of flat kernel pointer arguments to global"), cl::Hidden, cl::init(true)) |
static cl::opt< bool > | EnableImageIntrinsicOptimizer ("amdgpu-enable-image-intrinsic-optimizer", cl::desc("Enable image intrinsic optimizer pass"), cl::init(true), cl::Hidden) |
static cl::opt< bool > | EnableLoopPrefetch ("amdgpu-loop-prefetch", cl::desc("Enable loop data prefetch on AMDGPU"), cl::Hidden, cl::init(false)) |
static cl::opt< bool > | EnableMaxIlpSchedStrategy ("amdgpu-enable-max-ilp-scheduling-strategy", cl::desc("Enable scheduling strategy to maximize ILP for a single wave."), cl::Hidden, cl::init(false)) |
static cl::opt< bool > | EnableRewritePartialRegUses ("amdgpu-enable-rewrite-partial-reg-uses", cl::desc("Enable rewrite partial reg uses pass"), cl::init(true), cl::Hidden) |
static cl::opt< bool > | EnableHipStdPar ("amdgpu-enable-hipstdpar", cl::desc("Enable HIP Standard Parallelism Offload support"), cl::init(false), cl::Hidden) |
static cl::opt< bool > | EnableAMDGPUAttributor ("amdgpu-attributor-enable", cl::desc("Enable AMDGPUAttributorPass"), cl::init(true), cl::Hidden) |
static MachineSchedRegistry | SISchedRegistry ("si", "Run SI's custom scheduler", createSIMachineScheduler) |
static MachineSchedRegistry | GCNMaxOccupancySchedRegistry ("gcn-max-occupancy", "Run GCN scheduler to maximize occupancy", createGCNMaxOccupancyMachineScheduler) |
static MachineSchedRegistry | GCNMaxILPSchedRegistry ("gcn-max-ilp", "Run GCN scheduler to maximize ilp", createGCNMaxILPMachineScheduler) |
static MachineSchedRegistry | IterativeGCNMaxOccupancySchedRegistry ("gcn-iterative-max-occupancy-experimental", "Run GCN scheduler to maximize occupancy (experimental)", createIterativeGCNMaxOccupancyMachineScheduler) |
static MachineSchedRegistry | GCNMinRegSchedRegistry ("gcn-iterative-minreg", "Run GCN iterative scheduler for minimal register usage (experimental)", createMinRegScheduler) |
static MachineSchedRegistry | GCNILPSchedRegistry ("gcn-iterative-ilp", "Run GCN iterative scheduler for ILP scheduling (experimental)", createIterativeILPMachineScheduler) |
static const char | RegAllocOptNotSupportedMessage [] |
This file contains both AMDGPU target machine and the CodeGen pass builder.
The AMDGPU target machine contains all of the hardware specific information needed to emit code for SI+ GPUs in the legacy pass manager pipeline. The CodeGen pass builder handles the pass pipeline for new pass manager.
Definition in file AMDGPUTargetMachine.cpp.
#define GET_PASS_REGISTRY "AMDGPUPassRegistry.def" |
Definition at line 573 of file AMDGPUTargetMachine.cpp.
References llvm::Triple::r600.
|
static |
Definition at line 510 of file AMDGPUTargetMachine.cpp.
References llvm::ScheduleDAGMI::addMutation(), llvm::CallingConv::C, and llvm::createIGroupLPDAGMutation().
|
static |
|
static |
Definition at line 518 of file AMDGPUTargetMachine.cpp.
References llvm::CallingConv::C, llvm::createLoadClusterDAGMutation(), llvm::createStoreClusterDAGMutation(), and llvm::GCNIterativeScheduler::SCHEDULE_LEGACYMAXOCCUPANCY.
|
static |
Definition at line 534 of file AMDGPUTargetMachine.cpp.
References llvm::CallingConv::C, llvm::createAMDGPUMacroFusionDAGMutation(), llvm::createLoadClusterDAGMutation(), llvm::createStoreClusterDAGMutation(), and llvm::GCNIterativeScheduler::SCHEDULE_ILP.
|
static |
Definition at line 528 of file AMDGPUTargetMachine.cpp.
References llvm::CallingConv::C, and llvm::GCNIterativeScheduler::SCHEDULE_MINREGFORCED.
|
static |
Definition at line 491 of file AMDGPUTargetMachine.cpp.
References llvm::CallingConv::C.
|
static |
Definition at line 487 of file AMDGPUTargetMachine.cpp.
|
static |
Definition at line 605 of file AMDGPUTargetMachine.cpp.
References llvm::Reloc::PIC_.
|
static |
Definition at line 594 of file AMDGPUTargetMachine.cpp.
References llvm::Triple::amdgcn, llvm::Triple::AMDHSA, and llvm::StringRef::empty().
LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget | ( | ) |
Definition at line 406 of file AMDGPUTargetMachine.cpp.
References llvm::PassRegistry::getPassRegistry(), llvm::getTheGCNTarget(), llvm::getTheR600Target(), llvm::initializeAMDGPUAAWrapperPassPass(), llvm::initializeAMDGPUAlwaysInlinePass(), llvm::initializeAMDGPUAnnotateKernelFeaturesPass(), llvm::initializeAMDGPUAnnotateUniformValuesLegacyPass(), llvm::initializeAMDGPUArgumentUsageInfoPass(), llvm::initializeAMDGPUAtomicOptimizerPass(), llvm::initializeAMDGPUAttributorLegacyPass(), llvm::initializeAMDGPUCodeGenPreparePass(), llvm::initializeAMDGPUCtorDtorLoweringLegacyPass(), llvm::initializeAMDGPUDAGToDAGISelLegacyPass(), llvm::initializeAMDGPUExternalAAWrapperPass(), llvm::initializeAMDGPUGlobalISelDivergenceLoweringPass(), llvm::initializeAMDGPUImageIntrinsicOptimizerPass(), llvm::initializeAMDGPUInsertDelayAluPass(), llvm::initializeAMDGPUInsertSingleUseVDSTPass(), llvm::initializeAMDGPULateCodeGenPrepareLegacyPass(), llvm::initializeAMDGPULowerBufferFatPointersPass(), llvm::initializeAMDGPULowerKernelArgumentsPass(), llvm::initializeAMDGPULowerKernelAttributesPass(), llvm::initializeAMDGPULowerModuleLDSLegacyPass(), llvm::initializeAMDGPUMarkLastScratchLoadPass(), llvm::initializeAMDGPUOpenCLEnqueuedBlockLoweringPass(), llvm::initializeAMDGPUPostLegalizerCombinerPass(), llvm::initializeAMDGPUPreLegalizerCombinerPass(), llvm::initializeAMDGPUPrintfRuntimeBindingPass(), llvm::initializeAMDGPUPromoteAllocaPass(), llvm::initializeAMDGPUPromoteAllocaToVectorPass(), llvm::initializeAMDGPUPromoteKernelArgumentsPass(), llvm::initializeAMDGPURegBankCombinerPass(), llvm::initializeAMDGPURegBankSelectPass(), llvm::initializeAMDGPURemoveIncompatibleFunctionsPass(), llvm::initializeAMDGPUResourceUsageAnalysisPass(), llvm::initializeAMDGPURewriteOutArgumentsPass(), llvm::initializeAMDGPURewriteUndefForPHILegacyPass(), llvm::initializeAMDGPUUnifyDivergentExitNodesPass(), llvm::initializeAMDGPUUnifyMetadataPass(), llvm::initializeGCNCreateVOPDPass(), llvm::initializeGCNDPPCombinePass(), llvm::initializeGCNNSAReassignPass(), llvm::initializeGCNPreRALongBranchRegPass(), llvm::initializeGCNPreRAOptimizationsPass(), llvm::initializeGCNRegPressurePrinterPass(), llvm::initializeGCNRewritePartialRegUsesPass(), llvm::initializeGlobalISel(), llvm::initializeR600ClauseMergePassPass(), llvm::initializeR600ControlFlowFinalizerPass(), llvm::initializeR600ExpandSpecialInstrsPassPass(), llvm::initializeR600PacketizerPass(), llvm::initializeR600VectorRegMergerPass(), llvm::initializeSIAnnotateControlFlowLegacyPass(), llvm::initializeSIFixSGPRCopiesLegacyPass(), llvm::initializeSIFixVGPRCopiesPass(), llvm::initializeSIFoldOperandsPass(), llvm::initializeSIFormMemoryClausesPass(), llvm::initializeSIInsertHardClausesPass(), llvm::initializeSIInsertWaitcntsPass(), llvm::initializeSILateBranchLoweringPass(), llvm::initializeSILoadStoreOptimizerPass(), llvm::initializeSILowerControlFlowPass(), llvm::initializeSILowerI1CopiesLegacyPass(), llvm::initializeSILowerSGPRSpillsPass(), llvm::initializeSILowerWWMCopiesPass(), llvm::initializeSIMemoryLegalizerPass(), llvm::initializeSIModeRegisterPass(), llvm::initializeSIOptimizeExecMaskingPass(), llvm::initializeSIOptimizeExecMaskingPreRAPass(), llvm::initializeSIOptimizeVGPRLiveRangePass(), llvm::initializeSIPeepholeSDWAPass(), llvm::initializeSIPostRABundlerPass(), llvm::initializeSIPreAllocateWWMRegsPass(), llvm::initializeSIPreEmitPeepholePass(), llvm::initializeSIShrinkInstructionsPass(), llvm::initializeSIWholeQuadModePass(), X, and Y.
|
static |
Predicate for Internalize pass.
Definition at line 651 of file AMDGPUTargetMachine.cpp.
References F, llvm::AMDGPU::isEntryFunctionCC(), llvm::Constant::removeDeadConstantUsers(), and llvm::Value::use_empty().
Referenced by llvm::AMDGPUTargetMachine::registerPassBuilderCallbacks().
|
static |
Definition at line 666 of file AMDGPUTargetMachine.cpp.
References llvm::StringSwitch< T, R >::Case(), llvm::StringSwitch< T, R >::Cases(), llvm::StringRef::consume_front(), llvm::StringSwitch< T, R >::Default(), llvm::DPP, llvm::StringRef::empty(), llvm::inconvertibleErrorCode(), llvm::Iterative, and llvm::None.
Expected< AMDGPUAttributorOptions > parseAMDGPUAttributorPassOptions | ( | StringRef | Params | ) |
Definition at line 681 of file AMDGPUTargetMachine.cpp.
References llvm::StringRef::empty(), llvm::formatv(), llvm::inconvertibleErrorCode(), and llvm::StringRef::split().
|
static |
Referenced by llvm::AMDGPUPassConfig::addIRPasses(), and llvm::AMDGPUCodeGenPassBuilder::addIRPasses().
|
static |
Referenced by llvm::AMDGPUCodeGenPassBuilder::addPreISel().
|
static |
Referenced by llvm::AMDGPUTargetMachine::registerPassBuilderCallbacks().
|
static |
Referenced by llvm::AMDGPUPassConfig::addIRPasses().
|
static |
Referenced by llvm::AMDGPUTargetMachine::registerPassBuilderCallbacks().
|
static |
|
static |
|
static |
|
static |
Referenced by llvm::AMDGPUTargetMachine::registerPassBuilderCallbacks().
|
static |
Referenced by llvm::AMDGPUPassConfig::addIRPasses(), and llvm::AMDGPUCodeGenPassBuilder::addIRPasses().
|
static |
|
static |
|
static |
Referenced by llvm::AMDGPUTargetMachine::registerPassBuilderCallbacks().
|
static |
Referenced by llvm::AMDGPUPassConfig::addIRPasses(), and llvm::AMDGPUCodeGenPassBuilder::addIRPasses().
|
static |
|
static |
|
static |
Referenced by llvm::AMDGPUTargetMachine::registerPassBuilderCallbacks().
|
static |
|
static |
|
static |
Referenced by llvm::AMDGPUPassConfig::addIRPasses(), and llvm::AMDGPUCodeGenPassBuilder::addIRPasses().
|
static |
|
static |
|
static |
|
static |
Referenced by llvm::AMDGPUCodeGenPassBuilder::addPreISel().
|
static |
|
static |
|
static |
|
static |
|
static |
|
static |
Referenced by llvm::AMDGPUTargetMachine::registerPassBuilderCallbacks().
|
static |
|
static |
Referenced by llvm::AMDGPUCodeGenPassBuilder::addPreISel().
|
static |
|
static |
Definition at line 1447 of file AMDGPUTargetMachine.cpp.
|
static |
Referenced by llvm::AMDGPUPassConfig::addIRPasses().
|
static |
Referenced by llvm::GCNTargetMachine::getSubtargetImpl().
|
static |