Go to the documentation of this file.
56 "r600-ir-structurize",
57 cl::desc(
"Use StructurizeCFG IR pass"),
62 cl::desc(
"Run SROA after promote alloca pass"),
73 cl::desc(
"Run pre-RA exec mask optimizations"),
84 "amdgpu-load-store-vectorizer",
85 cl::desc(
"Enable load store vectorizer"),
91 "amdgpu-scalarize-global-loads",
92 cl::desc(
"Enable global load scalarization"),
98 "amdgpu-internalize-symbols",
99 cl::desc(
"Enable elimination of non-kernel functions and unused globals"),
105 "amdgpu-early-inline-all",
106 cl::desc(
"Inline all functions early"),
111 "amdgpu-sdwa-peephole",
116 "amdgpu-dpp-combine",
122 cl::desc(
"Enable AMDGPU Alias Analysis"),
127 "amdgpu-late-structurize",
128 cl::desc(
"Enable late CFG structurization"),
133 "amdgpu-function-calls",
134 cl::desc(
"Enable AMDGPU function call support"),
140 "amdgpu-fixed-function-abi",
141 cl::desc(
"Enable all implicit function arguments"),
148 "amdgpu-simplify-libcall",
149 cl::desc(
"Enable amdgpu library simplifications"),
154 "amdgpu-ir-lower-kernel-arguments",
155 cl::desc(
"Lower kernel argument loads in IR pass"),
160 "amdgpu-reassign-regs",
161 cl::desc(
"Enable register reassign optimizations on gfx10+"),
167 "amdgpu-atomic-optimizations",
168 cl::desc(
"Enable atomic optimizations"),
174 "amdgpu-mode-register",
175 cl::desc(
"Enable mode register pass"),
183 cl::desc(
"Enable machine DCE inside regalloc"));
186 "amdgpu-scalar-ir-passes",
187 cl::desc(
"Enable scalar IR passes"),
192 "amdgpu-enable-structurizer-workarounds",
198 cl::desc(
"Disable lower module lds pass"),
270 return std::make_unique<AMDGPUTargetObjectFile>();
323 "Run GCN scheduler to maximize occupancy",
328 "Run GCN scheduler to maximize occupancy (experimental)",
333 "Run GCN iterative scheduler for minimal register usage (experimental)",
338 "Run GCN iterative scheduler for ILP scheduling (experimental)",
344 return "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
345 "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1";
350 return "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32"
351 "-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
352 "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1"
400 Attribute GPUAttr =
F.getFnAttribute(
"target-cpu");
405 Attribute FSAttr =
F.getFnAttribute(
"target-features");
413 if (
const Function *
F = dyn_cast<Function>(&GV))
420 Builder.DivergentTarget =
true;
489 bool DebugPassManager) {
493 if (
PassName ==
"amdgpu-propagate-attributes-late") {
497 if (
PassName ==
"amdgpu-unify-metadata") {
501 if (
PassName ==
"amdgpu-printf-runtime-binding") {
505 if (
PassName ==
"amdgpu-always-inline") {
509 if (
PassName ==
"amdgpu-lower-module-lds") {
518 if (
PassName ==
"amdgpu-simplifylib") {
522 if (
PassName ==
"amdgpu-usenative") {
526 if (
PassName ==
"amdgpu-promote-alloca") {
530 if (
PassName ==
"amdgpu-promote-alloca-to-vector") {
534 if (
PassName ==
"amdgpu-lower-kernel-attributes") {
538 if (
PassName ==
"amdgpu-propagate-attributes-early") {
550 if (AAName ==
"amdgpu-aa") {
641 auto &
I = SubtargetMap[SubtargetKey];
662 unsigned DestAS)
const {
668 const auto *
LD = dyn_cast<LoadInst>(V);
676 const auto *Ptr =
LD->getPointerOperand();
710 auto &
I = SubtargetMap[SubtargetKey];
746 return getTM<AMDGPUTargetMachine>();
756 void addEarlyCSEOrGVNPass();
757 void addStraightLineScalarOptimizationPasses();
758 void addIRPasses()
override;
759 void addCodeGenPrepare()
override;
760 bool addPreISel()
override;
761 bool addInstSelector()
override;
762 bool addGCPasses()
override;
764 std::unique_ptr<CSEConfigBase> getCSEConfig()
const override;
767 std::unique_ptr<CSEConfigBase> AMDGPUPassConfig::getCSEConfig()
const {
771 class R600PassConfig final :
public AMDGPUPassConfig {
774 : AMDGPUPassConfig(
TM, PM) {}
781 bool addPreISel()
override;
782 bool addInstSelector()
override;
783 void addPreRegAlloc()
override;
784 void addPreSched2()
override;
785 void addPreEmitPass()
override;
788 class GCNPassConfig final :
public AMDGPUPassConfig {
791 : AMDGPUPassConfig(
TM, PM) {
795 setRequiresCodeGenSCCOrder(
true);
799 return getTM<GCNTargetMachine>();
805 bool addPreISel()
override;
806 void addMachineSSAOptimization()
override;
807 bool addILPOpts()
override;
808 bool addInstSelector()
override;
809 bool addIRTranslator()
override;
810 void addPreLegalizeMachineIR()
override;
811 bool addLegalizeMachineIR()
override;
812 void addPreRegBankSelect()
override;
813 bool addRegBankSelect()
override;
814 void addPreGlobalInstructionSelect()
override;
815 bool addGlobalInstructionSelect()
override;
816 void addFastRegAlloc()
override;
817 void addOptimizedRegAlloc()
override;
818 void addPreRegAlloc()
override;
819 bool addPreRewrite()
override;
820 void addPostRegAlloc()
override;
821 void addPreSched2()
override;
822 void addPreEmitPass()
override;
827 void AMDGPUPassConfig::addEarlyCSEOrGVNPass() {
834 void AMDGPUPassConfig::addStraightLineScalarOptimizationPasses() {
843 addEarlyCSEOrGVNPass();
851 void AMDGPUPassConfig::addIRPasses() {
902 addStraightLineScalarOptimizationPasses();
934 addEarlyCSEOrGVNPass();
937 void AMDGPUPassConfig::addCodeGenPrepare() {
959 bool AMDGPUPassConfig::addPreISel() {
964 bool AMDGPUPassConfig::addInstSelector() {
970 bool AMDGPUPassConfig::addGCPasses() {
979 bool R600PassConfig::addPreISel() {
980 AMDGPUPassConfig::addPreISel();
987 bool R600PassConfig::addInstSelector() {
992 void R600PassConfig::addPreRegAlloc() {
996 void R600PassConfig::addPreSched2() {
1003 void R600PassConfig::addPreEmitPass() {
1012 return new R600PassConfig(*
this, PM);
1022 if (
ST.enableSIScheduler())
1027 bool GCNPassConfig::addPreISel() {
1028 AMDGPUPassConfig::addPreISel();
1058 void GCNPassConfig::addMachineSSAOptimization() {
1083 bool GCNPassConfig::addILPOpts() {
1091 bool GCNPassConfig::addInstSelector() {
1092 AMDGPUPassConfig::addInstSelector();
1098 bool GCNPassConfig::addIRTranslator() {
1103 void GCNPassConfig::addPreLegalizeMachineIR() {
1109 bool GCNPassConfig::addLegalizeMachineIR() {
1114 void GCNPassConfig::addPreRegBankSelect() {
1119 bool GCNPassConfig::addRegBankSelect() {
1124 void GCNPassConfig::addPreGlobalInstructionSelect() {
1129 bool GCNPassConfig::addGlobalInstructionSelect() {
1134 void GCNPassConfig::addPreRegAlloc() {
1140 void GCNPassConfig::addFastRegAlloc() {
1155 void GCNPassConfig::addOptimizedRegAlloc() {
1176 bool GCNPassConfig::addPreRewrite() {
1184 void GCNPassConfig::addPostRegAlloc() {
1194 void GCNPassConfig::addPreSched2() {
1198 void GCNPassConfig::addPreEmitPass() {
1223 return new GCNPassConfig(*
this, PM);
1247 if (MFI->Occupancy == 0) {
1256 SourceRange =
RegName.SourceRange;
1270 "incorrect register class for field",
RegName.Value,
1272 SourceRange =
RegName.SourceRange;
1276 if (parseRegister(YamlMFI.
ScratchRSrcReg, MFI->ScratchRSrcReg) ||
1281 if (MFI->ScratchRSrcReg != AMDGPU::PRIVATE_RSRC_REG &&
1282 !AMDGPU::SGPR_128RegClass.contains(MFI->ScratchRSrcReg)) {
1286 if (MFI->FrameOffsetReg != AMDGPU::FP_REG &&
1287 !AMDGPU::SGPR_32RegClass.contains(MFI->FrameOffsetReg)) {
1291 if (MFI->StackPtrOffsetReg != AMDGPU::SP_REG &&
1292 !AMDGPU::SGPR_32RegClass.contains(MFI->StackPtrOffsetReg)) {
1299 unsigned SystemSGPRs) {
1304 if (A->IsRegister) {
1307 SourceRange = A->RegisterName.SourceRange;
1310 if (!RC.contains(
Reg))
1311 return diagnoseRegisterClass(A->RegisterName);
1319 MFI->NumUserSGPRs += UserSGPRs;
1320 MFI->NumSystemSGPRs += SystemSGPRs;
1325 (parseAndCheckArgument(YamlMFI.
ArgInfo->PrivateSegmentBuffer,
1326 AMDGPU::SGPR_128RegClass,
1328 parseAndCheckArgument(YamlMFI.
ArgInfo->DispatchPtr,
1329 AMDGPU::SReg_64RegClass, MFI->ArgInfo.
DispatchPtr,
1331 parseAndCheckArgument(YamlMFI.
ArgInfo->QueuePtr, AMDGPU::SReg_64RegClass,
1333 parseAndCheckArgument(YamlMFI.
ArgInfo->KernargSegmentPtr,
1334 AMDGPU::SReg_64RegClass,
1336 parseAndCheckArgument(YamlMFI.
ArgInfo->DispatchID,
1337 AMDGPU::SReg_64RegClass, MFI->ArgInfo.
DispatchID,
1339 parseAndCheckArgument(YamlMFI.
ArgInfo->FlatScratchInit,
1340 AMDGPU::SReg_64RegClass,
1342 parseAndCheckArgument(YamlMFI.
ArgInfo->PrivateSegmentSize,
1343 AMDGPU::SGPR_32RegClass,
1345 parseAndCheckArgument(YamlMFI.
ArgInfo->WorkGroupIDX,
1348 parseAndCheckArgument(YamlMFI.
ArgInfo->WorkGroupIDY,
1351 parseAndCheckArgument(YamlMFI.
ArgInfo->WorkGroupIDZ,
1354 parseAndCheckArgument(YamlMFI.
ArgInfo->WorkGroupInfo,
1355 AMDGPU::SGPR_32RegClass,
1357 parseAndCheckArgument(YamlMFI.
ArgInfo->PrivateSegmentWaveByteOffset,
1358 AMDGPU::SGPR_32RegClass,
1360 parseAndCheckArgument(YamlMFI.
ArgInfo->ImplicitArgPtr,
1361 AMDGPU::SReg_64RegClass,
1363 parseAndCheckArgument(YamlMFI.
ArgInfo->ImplicitBufferPtr,
1364 AMDGPU::SReg_64RegClass,
1366 parseAndCheckArgument(YamlMFI.
ArgInfo->WorkItemIDX,
1367 AMDGPU::VGPR_32RegClass,
1369 parseAndCheckArgument(YamlMFI.
ArgInfo->WorkItemIDY,
1370 AMDGPU::VGPR_32RegClass,
1372 parseAndCheckArgument(YamlMFI.
ArgInfo->WorkItemIDZ,
1373 AMDGPU::VGPR_32RegClass,
void addAAResult(AAResultT &AAResult)
Register a specific AA result.
void initializeR600ControlFlowFinalizerPass(PassRegistry &)
virtual void addPostRegAlloc()
This method may be implemented by targets that want to run passes after register allocation pass pipe...
FunctionPass * createR600ExpandSpecialInstrsPass()
static cl::opt< bool > EnableDCEInRA("amdgpu-dce-in-ra", cl::init(true), cl::Hidden, cl::desc("Enable machine DCE inside regalloc"))
CodeGenOpt::Level getOptLevel() const
Returns the optimization level: None, Less, Default, or Aggressive.
A manager for alias analyses.
Analysis pass providing a never-invalidated alias analysis result.
static constexpr ArgDescriptor createStack(unsigned Offset, unsigned Mask=~0u)
static bool EnableFixedFunctionABI
void initializeR600PacketizerPass(PassRegistry &)
LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget()
static cl::opt< bool > EnableSIModeRegisterPass("amdgpu-mode-register", cl::desc("Enable mode register pass"), cl::init(true), cl::Hidden)
bool isValid() const
Return true if the attribute is any kind of attribute.
void registerDefaultAliasAnalyses(AAManager &) override
Allow the target to register alias analyses with the AAManager for use with the new pass manager.
static bool mustPreserveGV(const GlobalValue &GV)
Predicate for Internalize pass.
FunctionPass * createSeparateConstOffsetFromGEPPass(bool LowerGEP=false)
LLVM_NODISCARD bool empty() const
empty - Check if the string is empty.
yaml::MachineFunctionInfo * convertFuncInfoToYAML(const MachineFunction &MF) const override
Allocate and initialize an instance of the YAML representation of the MachineFunctionInfo.
void initializeR600ExpandSpecialInstrsPassPass(PassRegistry &)
void initializeAMDGPUPostLegalizerCombinerPass(PassRegistry &)
void initializeAMDGPUPromoteAllocaPass(PassRegistry &)
FunctionPass * createSIMemoryLegalizerPass()
char & SILowerSGPRSpillsID
void registerPipelineStartEPCallback(const std::function< void(ModulePassManager &, OptimizationLevel)> &C)
Register a callback for a default optimizer pipeline extension point.
bool isPointerTy() const
True if this is an instance of PointerType.
Optional< SIArgumentInfo > ArgInfo
static constexpr ArgDescriptor createArg(const ArgDescriptor &Arg, unsigned Mask)
static ScheduleDAGInstrs * createMinRegScheduler(MachineSchedContext *C)
LocationClass< Ty > location(Ty &L)
This currently compiles esp xmm0 movsd esp eax eax esp ret We should use not the dag combiner This is because dagcombine2 needs to be able to see through the X86ISD::Wrapper which DAGCombine can t really do The code for turning x load into a single vector load is target independent and should be moved to the dag combiner The code for turning x load into a vector load can only handle a direct load from a global or a direct load from the stack It should be generalized to handle any load from P
void initializeAMDGPUAlwaysInlinePass(PassRegistry &)
Targets should override this in a way that mirrors the implementation of llvm::MachineFunctionInfo.
char & PHIEliminationID
PHIElimination - This pass eliminates machine instruction PHI nodes by inserting copy instructions.
void initializeSIInsertHardClausesPass(PassRegistry &)
void initializeAMDGPUOpenCLEnqueuedBlockLoweringPass(PassRegistry &)
void initializeSIPreAllocateWWMRegsPass(PassRegistry &)
void initializeAMDGPUPropagateAttributesLatePass(PassRegistry &)
bool IEEE
Floating point opcodes that support exception flag gathering quiet and propagate signaling NaN inputs...
Pass * createAlwaysInlinerLegacyPass(bool InsertLifetime=true)
Create a legacy pass manager instance of a pass to inline and remove functions marked as "always_inli...
static LLVM_READNONE StringRef getGPUOrDefault(const Triple &TT, StringRef GPU)
Target - Wrapper for Target specific information.
void initializeAMDGPULateCodeGenPreparePass(PassRegistry &)
FunctionPass * createFixIrreduciblePass()
MachineSchedRegistry provides a selection of available machine instruction schedulers.
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
FunctionPass * createAMDGPULateCodeGenPreparePass()
FunctionPass * createSILowerI1CopiesPass()
void initializeR600ClauseMergePassPass(PassRegistry &)
@ SCHEDULE_LEGACYMAXOCCUPANCY
FunctionPass * createFlattenCFGPass()
A pass that internalizes all functions and variables other than those that must be preserved accordin...
void initializeSIOptimizeExecMaskingPreRAPass(PassRegistry &)
ArgDescriptor FlatScratchInit
Triple - Helper class for working with autoconf configuration names.
FunctionPass * createEarlyCSEPass(bool UseMemorySSA=false)
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
void initializeSILowerI1CopiesPass(PassRegistry &)
char & SIPreEmitPeepholeID
FunctionPass * createAMDGPUPostLegalizeCombiner(bool IsOptNone)
void initializeAMDGPUDAGToDAGISelPass(PassRegistry &)
void initializeSIPeepholeSDWAPass(PassRegistry &)
char & SILowerControlFlowID
unsigned getLDSSize() const
ModulePass * createAMDGPUUnifyMetadataPass()
static cl::opt< bool > EnableStructurizerWorkarounds("amdgpu-enable-structurizer-workarounds", cl::desc("Enable workarounds for the StructurizeCFG pass"), cl::init(true), cl::Hidden)
Legacy wrapper pass to provide the AMDGPUAAResult object.
CGSCCToFunctionPassAdaptor createCGSCCToFunctionPassAdaptor(FunctionPassT Pass)
A function to deduce a function pass type and wrap it in the templated adaptor.
static cl::opt< bool > EnableAtomicOptimizations("amdgpu-atomic-optimizations", cl::desc("Enable atomic optimizations"), cl::init(false), cl::Hidden)
static ScheduleDAGInstrs * createGCNMaxOccupancyMachineScheduler(MachineSchedContext *C)
void initializeSIFoldOperandsPass(PassRegistry &)
ModulePass * createBarrierNoopPass()
createBarrierNoopPass - This pass is purely a module pass barrier in a pass manager.
FunctionPass * createAMDGPUISelDag(TargetMachine *TM=nullptr, CodeGenOpt::Level OptLevel=CodeGenOpt::Default)
This pass converts a legalized DAG into a AMDGPU-specific.
static cl::opt< bool > InternalizeSymbols("amdgpu-internalize-symbols", cl::desc("Enable elimination of non-kernel functions and unused globals"), cl::init(false), cl::Hidden)
void initializeGlobalISel(PassRegistry &)
Initialize all passes linked into the GlobalISel library.
bool FP32InputDenormals
If this is set, neither input or output denormals are flushed for most f32 instructions.
void registerAnalysisRegistrationCallback(const std::function< void(CGSCCAnalysisManager &)> &C)
{{@ Register callbacks for analysis registration with this PassBuilder instance.
FunctionPass * createGVNPass(bool NoMemDepAnalysis=false)
Create a legacy GVN pass.
ArgDescriptor PrivateSegmentSize
ModulePass * createR600OpenCLImageTypeLoweringPass()
FunctionPass * createR600ClauseMergePass()
ArgDescriptor DispatchPtr
void initializeAMDGPUPropagateAttributesEarlyPass(PassRegistry &)
char & SIPreAllocateWWMRegsID
void initializeSIShrinkInstructionsPass(PassRegistry &)
char & TwoAddressInstructionPassID
TwoAddressInstruction - This pass reduces two-address instructions to use two operands.
bool parseMachineFunctionInfo(const yaml::MachineFunctionInfo &, PerFunctionMIParsingState &PFS, SMDiagnostic &Error, SMRange &SourceRange) const override
Parse out the target's MachineFunctionInfo from the YAML reprsentation.
void initializeAMDGPUSimplifyLibCallsPass(PassRegistry &)
static ScheduleDAGInstrs * createSIMachineScheduler(MachineSchedContext *C)
This interface provides simple read-only access to a block of memory, and provides simple methods for...
AMDGPU::SIModeRegisterDefaults Mode
static cl::opt< bool > DisableLowerModuleLDS("amdgpu-disable-lower-module-lds", cl::Hidden, cl::desc("Disable lower module lds pass"), cl::init(false))
ImmutablePass * createAMDGPUExternalAAWrapperPass()
void initializeAMDGPULowerIntrinsicsPass(PassRegistry &)
void initializeGCNDPPCombinePass(PassRegistry &)
static const OptimizationLevel O0
Disable as many optimizations as possible.
ArgDescriptor ImplicitArgPtr
static cl::opt< bool > EnableSDWAPeephole("amdgpu-sdwa-peephole", cl::desc("Enable SDWA peepholer"), cl::init(true))
char & SIOptimizeExecMaskingID
amdgpu Simplify well known AMD library false FunctionCallee Value * Arg
void initializeAMDGPUUnifyMetadataPass(PassRegistry &)
StringValue FrameOffsetReg
void initializeAMDGPUArgumentUsageInfoPass(PassRegistry &)
static MachineSchedRegistry SISchedRegistry("si", "Run SI's custom scheduler", createSIMachineScheduler)
ArgDescriptor WorkGroupIDX
GCNTargetMachine(const Target &T, const Triple &TT, StringRef CPU, StringRef FS, TargetOptions Options, Optional< Reloc::Model > RM, Optional< CodeModel::Model > CM, CodeGenOpt::Level OL, bool JIT)
FunctionPass * createInferAddressSpacesPass(unsigned AddressSpace=~0u)
void initializeSILateBranchLoweringPass(PassRegistry &)
FunctionPass * createAMDGPUUseNativeCallsPass()
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Represents a location in source code.
FunctionPass * createR600Packetizer()
TargetTransformInfo getTargetTransformInfo(const Function &F) override
Get a TargetTransformInfo implementation for the target.
void registerParseAACallback(const std::function< bool(StringRef Name, AAManager &AA)> &C)
Register a callback for parsing an AliasAnalysis Name to populate the given AAManager AA.
static cl::opt< bool > ScalarizeGlobal("amdgpu-scalarize-global-loads", cl::desc("Enable global load scalarization"), cl::init(true), cl::Hidden)
FunctionPass * createNaryReassociatePass()
char & PostRAHazardRecognizerID
PostRAHazardRecognizer - This pass runs the post-ra hazard recognizer.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
void initializeAMDGPULowerKernelArgumentsPass(PassRegistry &)
void initializeSIWholeQuadModePass(PassRegistry &)
void initializeAMDGPUAtomicOptimizerPass(PassRegistry &)
Target & getTheAMDGPUTarget()
The target which supports all AMD GPUs.
ArgDescriptor WorkItemIDX
(vector float) vec_cmpeq(*A, *B) C
static cl::opt< bool > EnableAMDGPUAliasAnalysis("enable-amdgpu-aa", cl::Hidden, cl::desc("Enable AMDGPU Alias Analysis"), cl::init(true))
static cl::opt< bool > EnableLowerKernelArguments("amdgpu-ir-lower-kernel-arguments", cl::desc("Lower kernel argument loads in IR pass"), cl::init(true), cl::Hidden)
static cl::opt< bool > EnableLoadStoreVectorizer("amdgpu-load-store-vectorizer", cl::desc("Enable load store vectorizer"), cl::init(true), cl::Hidden)
ModulePass * createAMDGPULowerModuleLDSPass()
char & FuncletLayoutID
This pass lays out funclets contiguously.
void initializeAMDGPUUseNativeCallsPass(PassRegistry &)
FunctionPass * createSIInsertWaitcntsPass()
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
This class provides access to building LLVM's passes.
static cl::opt< bool > EnableRegReassign("amdgpu-reassign-regs", cl::desc("Enable register reassign optimizations on gfx10+"), cl::init(true), cl::Hidden)
Instances of this class encapsulate one diagnostic report, allowing printing to a raw_ostream as a ca...
bool FP64FP16InputDenormals
FunctionPass * createAMDGPUAnnotateUniformValues()
@ GLOBAL_ADDRESS
Address space for global memory (RAT0, VTX0).
FunctionPass * createR600EmitClauseMarkers()
void initializeAMDGPUUnifyDivergentExitNodesPass(PassRegistry &)
char & EarlyIfConverterID
EarlyIfConverter - This pass performs if-conversion on SSA form by inserting cmov instructions.
FunctionPass * createAtomicExpandPass()
This pass is responsible for selecting generic machine instructions to target-specific instructions.
void initializeGCNRegBankReassignPass(PassRegistry &)
static int64_t getNullPointerValue(unsigned AddrSpace)
Get the integer value of a null pointer in the given address space.
RegisterTargetMachine - Helper template for registering a target machine implementation,...
void addMutation(std::unique_ptr< ScheduleDAGMutation > Mutation)
Add a postprocessing step to the DAG builder.
PassRegistry - This class manages the registration and intitialization of the pass subsystem as appli...
FunctionPass * createUnifyLoopExitsPass()
static std::unique_ptr< TargetLoweringObjectFile > createTLOF(const Triple &TT)
unsigned getMainFileID() const
ModulePass * createAMDGPUPropagateAttributesLatePass(const TargetMachine *)
void initializeSIMemoryLegalizerPass(PassRegistry &)
static cl::opt< bool > EnableDPPCombine("amdgpu-dpp-combine", cl::desc("Enable DPP combiner"), cl::init(true))
ModulePass * createAMDGPULowerIntrinsicsPass()
char & StackMapLivenessID
StackMapLiveness - This pass analyses the register live-out set of stackmap/patchpoint intrinsics and...
Pass * createAMDGPUAnnotateKernelFeaturesPass()
~AMDGPUTargetMachine() override
const TargetSubtargetInfo * getSubtargetImpl() const
FunctionPass * createSinkingPass()
@ REGION_ADDRESS
Address space for region memory. (GDS)
FunctionPass * createSpeculativeExecutionPass()
char & SILoadStoreOptimizerID
StringRef getValueAsString() const
Return the attribute's value as a string.
std::unique_ptr< ScheduleDAGMutation > createAMDGPUExportClusteringDAGMutation()
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
void resetTargetOptions(const Function &F) const
Reset the target options based on the function's attributes.
bool isEntryFunctionCC(CallingConv::ID CC)
const MemoryBuffer * getMemoryBuffer(unsigned i) const
Pass * createFunctionInliningPass()
createFunctionInliningPass - Return a new pass object that uses a heuristic to inline direct function...
virtual void add(Pass *P)=0
Add a pass to the queue of passes to run.
@ PRIVATE_ADDRESS
Address space for private memory.
virtual StringRef getBufferIdentifier() const
Return an identifier for this buffer, typically the filename it was read from.
ImmutablePass * createAMDGPUAAWrapperPass()
PassManagerBuilder - This class is used to set up a standard optimization sequence for languages like...
FunctionPass * createLowerSwitchPass()
ModulePass * createAMDGPUPrintfRuntimeBinding()
ModulePass * createAMDGPUAlwaysInlinePass(bool GlobalOpt=true)
Target-Independent Code Generator Pass Configuration Options.
void append(StringRef RHS)
Append from a StringRef.
void initializeSILowerSGPRSpillsPass(PassRegistry &)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
void registerPipelineEarlySimplificationEPCallback(const std::function< void(ModulePassManager &, OptimizationLevel)> &C)
Register a callback for a default optimizer pipeline extension point.
StringRef getFeatureString(const Function &F) const
static MachineSchedRegistry R600SchedRegistry("r600", "Run R600's custom scheduler", createR600MachineScheduler)
static cl::opt< bool > EnableR600StructurizeCFG("r600-ir-structurize", cl::desc("Use StructurizeCFG IR pass"), cl::init(true))
ModuleToFunctionPassAdaptor createModuleToFunctionPassAdaptor(FunctionPassT Pass)
A function to deduce a function pass type and wrap it in the templated adaptor.
Triple TargetTriple
Triple string, CPU name, and target feature strings the TargetMachine instance is created with.
static cl::opt< bool > OptExecMaskPreRA("amdgpu-opt-exec-mask-pre-ra", cl::Hidden, cl::desc("Run pre-RA exec mask optimizations"), cl::init(true))
StringValue ScratchRSrcReg
char & AMDGPUUnifyDivergentExitNodesID
TargetPassConfig * createPassConfig(PassManagerBase &PM) override
Create a pass configuration object to be used by addPassToEmitX methods for generating a pipeline of ...
void initializeSIInsertWaitcntsPass(PassRegistry &)
void setRequiresStructuredCFG(bool Value)
void initializeSIAnnotateControlFlowPass(PassRegistry &)
ScheduleDAGMILive * createGenericSchedLive(MachineSchedContext *C)
Create the standard converging machine scheduler.
ArgDescriptor WorkGroupIDZ
#define LLVM_EXTERNAL_VISIBILITY
char & DetectDeadLanesID
This pass adds dead/undef flags after analyzing subregister lanes.
const MCSubtargetInfo * getMCSubtargetInfo() const
ArgDescriptor PrivateSegmentBuffer
FunctionPass * createAMDGPUAtomicOptimizerPass()
void initializeR600VectorRegMergerPass(PassRegistry &)
ModulePass * createGlobalDCEPass()
createGlobalDCEPass - This transform is designed to eliminate unreachable internal globals (functions...
char & FinalizeMachineBundlesID
FinalizeMachineBundles - This pass finalize machine instruction bundles (created earlier,...
compiles ldr LCPI1_0 ldr ldr mov lsr tst moveq r1 ldr LCPI1_1 and r0 bx lr It would be better to do something like to fold the shift into the conditional move
void initializeAMDGPURewriteOutArgumentsPass(PassRegistry &)
MachineSchedContext provides enough context from the MachineScheduler pass for the target to instanti...
static ScheduleDAGInstrs * createR600MachineScheduler(MachineSchedContext *C)
virtual void addIRPasses()
Add common target configurable passes that perform LLVM IR to IR transforms following machine indepen...
initializer< Ty > init(const Ty &Val)
virtual void addOptimizedRegAlloc()
addOptimizedRegAlloc - Add passes related to register allocation.
ArgDescriptor PrivateSegmentWaveByteOffset
char & SIFormMemoryClausesID
static cl::opt< bool, true > LateCFGStructurize("amdgpu-late-structurize", cl::desc("Enable late CFG structurization"), cl::location(AMDGPUTargetMachine::EnableLateStructurizeCFG), cl::Hidden)
ImmutablePass * createExternalAAWrapperPass(std::function< void(Pass &, Function &, AAResults &)> Callback)
A wrapper pass around a callback which can be used to populate the AAResults in the AAResultsWrapperP...
ArgDescriptor WorkGroupIDY
char & MachineCSEID
MachineCSE - This pass performs global CSE on machine instructions.
virtual void addCodeGenPrepare()
Add pass to prepare the LLVM IR for code generation.
bool DX10Clamp
Used by the vector ALU to force DX10-style treatment of NaNs: when set, clamp NaN to zero; otherwise,...
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
char & SIInsertHardClausesID
static MachineSchedRegistry GCNMinRegSchedRegistry("gcn-minreg", "Run GCN iterative scheduler for minimal register usage (experimental)", createMinRegScheduler)
bool isFlatGlobalAddrSpace(unsigned AS)
bool FP64FP16InputDenormals
If this is set, neither input or output denormals are flushed for both f64 and f16/v2f16 instructions...
Target & getTheGCNTarget()
The target for GCN GPUs.
void initializeSIOptimizeExecMaskingPass(PassRegistry &)
void initializeSIPostRABundlerPass(PassRegistry &)
void registerPipelineParsingCallback(const std::function< bool(StringRef Name, CGSCCPassManager &, ArrayRef< PipelineElement >)> &C)
{{@ Register pipeline parsing callbacks with this pass builder instance.
void initializeAMDGPUAAWrapperPassPass(PassRegistry &)
void initializeAMDGPUCodeGenPreparePass(PassRegistry &)
ModulePass * createAMDGPUOpenCLEnqueuedBlockLoweringPass()
void initializeGCNNSAReassignPass(PassRegistry &)
A wrapper around std::string which contains a source range that's being set during parsing.
Pass to remove unused function declarations.
char & PatchableFunctionID
This pass implements the "patchable-function" attribute.
static MachineSchedRegistry IterativeGCNMaxOccupancySchedRegistry("gcn-max-occupancy-experimental", "Run GCN scheduler to maximize occupancy (experimental)", createIterativeGCNMaxOccupancyMachineScheduler)
ArgDescriptor WorkItemIDZ
FunctionPass * createSIShrinkInstructionsPass()
FunctionPass * createAMDGPUMachineCFGStructurizerPass()
static cl::opt< bool, true > EnableAMDGPUFunctionCallsOpt("amdgpu-function-calls", cl::desc("Enable AMDGPU function call support"), cl::location(AMDGPUTargetMachine::EnableFunctionCalls), cl::init(true), cl::Hidden)
const TargetRegisterInfo * TRI
Target processor register info.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
void initializeSIFormMemoryClausesPass(PassRegistry &)
static StringRef computeDataLayout(const Triple &TT)
StringRef - Represent a constant reference to a string, i.e.
void initializeAMDGPUExternalAAWrapperPass(PassRegistry &)
TargetTransformInfo getTargetTransformInfo(const Function &F) override
Get a TargetTransformInfo implementation for the target.
StringValue StackPtrOffsetReg
Type * getType() const
All values are typed, get the type of this value.
std::unique_ptr< CSEConfigBase > getStandardCSEConfigForOpt(CodeGenOpt::Level Level)
FunctionPass * createAMDGPURegBankCombiner(bool IsOptNone)
bool initializeBaseYamlFields(const yaml::SIMachineFunctionInfo &YamlMFI)
StringRef getTargetFeatureString() const
static cl::opt< bool > EarlyInlineAll("amdgpu-early-inline-all", cl::desc("Inline all functions early"), cl::init(false), cl::Hidden)
ArgDescriptor ImplicitBufferPtr
static Reloc::Model getEffectiveRelocModel(Optional< Reloc::Model > RM)
static cl::opt< bool > EnableSROA("amdgpu-sroa", cl::desc("Run SROA after promote alloca pass"), cl::ReallyHidden, cl::init(true))
void initializeAMDGPULowerKernelAttributesPass(PassRegistry &)
CodeModel::Model getEffectiveCodeModel(Optional< CodeModel::Model > CM, CodeModel::Model Default)
Helper method for getting the code model, returning Default if CM does not have a value.
void initializeAMDGPUAnnotateUniformValuesPass(PassRegistry &)
Pass * createLoadStoreVectorizerPass()
Create a legacy pass manager instance of the LoadStoreVectorizer pass.
Wrapper class representing virtual and physical registers.
Pass * createStructurizeCFGPass(bool SkipUniformRegions=false)
When SkipUniformRegions is true the structizer will not structurize regions that only contain uniform...
bool FP64FP16OutputDenormals
TargetPassConfig * createPassConfig(PassManagerBase &PM) override
Create a pass configuration object to be used by addPassToEmitX methods for generating a pipeline of ...
ModulePass * createAMDGPULowerKernelAttributesPass()
void initializeSIFixSGPRCopiesPass(PassRegistry &)
ArgDescriptor WorkGroupInfo
FunctionPass * createAMDGPUPromoteAllocaToVector()
R600TargetMachine(const Target &T, const Triple &TT, StringRef CPU, StringRef FS, TargetOptions Options, Optional< Reloc::Model > RM, Optional< CodeModel::Model > CM, CodeGenOpt::Level OL, bool JIT)
FunctionPass * createR600VectorRegMerger()
void initializeAMDGPULowerModuleLDSPass(PassRegistry &)
static ScheduleDAGInstrs * createIterativeILPMachineScheduler(MachineSchedContext *C)
bool parseNamedRegisterReference(PerFunctionMIParsingState &PFS, Register &Reg, StringRef Src, SMDiagnostic &Error)
static cl::opt< bool > EnableEarlyIfConversion("amdgpu-early-ifcvt", cl::Hidden, cl::desc("Run early if-conversion"), cl::init(false))
void initializeSIFixVGPRCopiesPass(PassRegistry &)
void initializeAMDGPUPromoteAllocaToVectorPass(PassRegistry &)
static cl::opt< bool > EnableScalarIRPasses("amdgpu-scalar-ir-passes", cl::desc("Enable scalar IR passes"), cl::init(true), cl::Hidden)
static cl::opt< bool > EnableR600IfConvert("r600-if-convert", cl::desc("Use if conversion pass"), cl::ReallyHidden, cl::init(true))
void initializeSIPreEmitPeepholePass(PassRegistry &)
static ScheduleDAGInstrs * createIterativeGCNMaxOccupancyMachineScheduler(MachineSchedContext *C)
LLVM-provided high-level optimization levels.
Function & getFunction()
Return the LLVM function that this machine code represents.
FunctionPass * createR600ControlFlowFinalizer()
virtual void addMachineSSAOptimization()
addMachineSSAOptimization - Add standard passes that optimize machine instructions in SSA form.
FunctionPass * createAMDGPUPropagateAttributesEarlyPass(const TargetMachine *)
void initializeSIModeRegisterPass(PassRegistry &)
Lightweight error class with error context and mandatory checking.
@ LOCAL_ADDRESS
Address space for local memory.
std::unique_ptr< ScheduleDAGMutation > createLoadClusterDAGMutation(const TargetInstrInfo *TII, const TargetRegisterInfo *TRI)
const TargetInstrInfo * TII
Target instruction information.
static MachineSchedRegistry GCNMaxOccupancySchedRegistry("gcn-max-occupancy", "Run GCN scheduler to maximize occupancy", createGCNMaxOccupancyMachineScheduler)
FunctionPass * createAMDGPULowerKernelArgumentsPass()
bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override
Returns true if a cast between SrcAS and DestAS is a noop.
@ EP_ModuleOptimizerEarly
EP_ModuleOptimizerEarly - This extension point allows adding passes just before the main module-level...
FunctionPass * createSIModeRegisterPass()
static constexpr ArgDescriptor createRegister(Register Reg, unsigned Mask=~0u)
ModulePass * createInternalizePass(std::function< bool(const GlobalValue &)> MustPreserveGV)
createInternalizePass - This pass loops over all of the functions in the input module,...
void adjustPassManager(PassManagerBuilder &) override
Allow the target to modify the pass manager, e.g.
This class describes a target machine that is implemented with the LLVM target-independent code gener...
char & DeadMachineInstructionElimID
DeadMachineInstructionElim - This pass removes dead machine instructions.
static MachineSchedRegistry GCNILPSchedRegistry("gcn-ilp", "Run GCN iterative scheduler for ILP scheduling (experimental)", createIterativeILPMachineScheduler)
bool registerPass(PassBuilderT &&PassBuilder)
Register an analysis pass with the manager.
ArgDescriptor KernargSegmentPtr
FunctionPass * createAMDGPUPromoteAlloca()
void initializeAMDGPUPrintfRuntimeBindingPass(PassRegistry &)
void registerFunctionAnalysis()
Register a specific AA result.
char & BranchRelaxationPassID
BranchRelaxation - This pass replaces branches that need to jump further than is supported by a branc...
void initializeAMDGPUPreLegalizerCombinerPass(PassRegistry &)
FunctionPass * createAMDGPUCodeGenPreparePass()
char & MachineSchedulerID
MachineScheduler - This pass schedules machine instructions.
static bool EnableFunctionCalls
Pass interface - Implemented by all 'passes'.
ModulePass * createAMDGPUFixFunctionBitcastsPass()
StringRef getTargetCPU() const
@ EP_EarlyAsPossible
EP_EarlyAsPossible - This extension point allows adding passes before any other transformations,...
void initializeAMDGPUAnnotateKernelFeaturesPass(PassRegistry &)
ArgDescriptor WorkItemIDY
FunctionPass * createAMDGPUPreLegalizeCombiner(bool IsOptNone)
unsigned getAssumedAddrSpace(const Value *V) const override
If the specified generic pointer could be assumed as a pointer to a specific address space,...
Represents a range in source code.
FunctionPass * createStraightLineStrengthReducePass()
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
void initializeAMDGPUFixFunctionBitcastsPass(PassRegistry &)
void initializeSILoadStoreOptimizerPass(PassRegistry &)
PassManagerBase - An abstract interface to allow code to add passes to a pass manager without having ...
void registerCGSCCOptimizerLateEPCallback(const std::function< void(CGSCCPassManager &, OptimizationLevel)> &C)
Register a callback for a default optimizer pipeline extension point.
void initializeAMDGPURegBankCombinerPass(PassRegistry &)
FunctionPass * createSIAnnotateControlFlowPass()
Create the annotation pass.
A container for analyses that lazily runs them and caches their results.
This pass implements the reg bank selector pass used in the GlobalISel pipeline.
@ FLAT_ADDRESS
Address space for flat memory.
char & EarlyMachineLICMID
This pass performs loop invariant code motion on machine instructions.
StringRef getGPUName(const Function &F) const
@ CONSTANT_ADDRESS
Address space for constant memory (VTX2).
ScheduleDAGMILive is an implementation of ScheduleDAGInstrs that schedules machine instructions while...
A ScheduleDAG for scheduling lists of MachineInstr.
@ EP_CGSCCOptimizerLate
EP_CGSCCOptimizerLate - This extension point allows adding CallGraphSCC passes at the end of the main...
FunctionPass * createAMDGPUCFGStructurizerPass()
FunctionPass * createR600ISelDag(TargetMachine *TM, CodeGenOpt::Level OptLevel)
This pass converts a legalized DAG into a R600-specific.
char & IfConverterID
IfConverter - This pass performs machine code if conversion.
AMDGPUTargetMachine(const Target &T, const Triple &TT, StringRef CPU, StringRef FS, TargetOptions Options, Optional< Reloc::Model > RM, Optional< CodeModel::Model > CM, CodeGenOpt::Level OL)
virtual void addFastRegAlloc()
addFastRegAlloc - Add the minimum set of target-independent passes that are required for fast registe...
char & AMDGPUPerfHintAnalysisID
FunctionPass * createSROAPass()
static cl::opt< bool > EnableLibCallSimplify("amdgpu-simplify-libcall", cl::desc("Enable amdgpu library simplifications"), cl::init(true), cl::Hidden)
bool FP64FP16OutputDenormals
char & SIOptimizeExecMaskingPreRAID
MCRegisterInfo * createGCNMCRegisterInfo(AMDGPUDwarfFlavour DwarfFlavour)
std::unique_ptr< const MCRegisterInfo > MRI
LLVM Value Representation.
static cl::opt< bool, true > EnableAMDGPUFixedFunctionABIOpt("amdgpu-fixed-function-abi", cl::desc("Enable all implicit function arguments"), cl::location(AMDGPUTargetMachine::EnableFixedFunctionABI), cl::init(false), cl::Hidden)
MachineFunctionPass * createGCNRegBankReassignPass(AMDGPU::RegBankReassignMode Mode)
static bool EnableLateStructurizeCFG
void registerPassBuilderCallbacks(PassBuilder &PB, bool DebugPassManager) override
Allow the target to modify the pass pipeline with New Pass Manager (similar to adjustPassManager for ...
virtual bool addILPOpts()
Add passes that optimize instruction level parallelism for out-of-order targets.
yaml::MachineFunctionInfo * createDefaultFuncInfoYAML() const override
Allocate and return a default initialized instance of the YAML representation for the MachineFunction...
std::enable_if_t<!std::is_same< PassT, PassManager >::value > addPass(PassT Pass)
static const char PassName[]
void initializeSILowerControlFlowPass(PassRegistry &)
char & SILateBranchLoweringPassID
FunctionPass * createAMDGPUSimplifyLibCallsPass(const TargetMachine *)
This pass implements the localization mechanism described at the top of this file.
std::unique_ptr< ScheduleDAGMutation > createAMDGPUMacroFusionDAGMutation()
Note that you have to add: DAG.addMutation(createAMDGPUMacroFusionDAGMutation()); to AMDGPUPassConfig...