Go to the documentation of this file.
41 #include "llvm/IR/IntrinsicsAMDGPU.h"
95 static SGPRRegisterRegAlloc
96 defaultSGPRRegAlloc(
"default",
97 "pick SGPR register allocator based on -O option",
100 static cl::opt<SGPRRegisterRegAlloc::FunctionPassCtor,
false,
103 cl::desc(
"Register allocator to use for SGPRs"));
105 static cl::opt<VGPRRegisterRegAlloc::FunctionPassCtor,
false,
108 cl::desc(
"Register allocator to use for VGPRs"));
111 static void initializeDefaultSGPRRegisterAllocatorOnce() {
116 SGPRRegisterRegAlloc::setDefault(SGPRRegAlloc);
120 static void initializeDefaultVGPRRegisterAllocatorOnce() {
125 VGPRRegisterRegAlloc::setDefault(VGPRRegAlloc);
129 static FunctionPass *createBasicSGPRRegisterAllocator() {
133 static FunctionPass *createGreedySGPRRegisterAllocator() {
137 static FunctionPass *createFastSGPRRegisterAllocator() {
141 static FunctionPass *createBasicVGPRRegisterAllocator() {
145 static FunctionPass *createGreedyVGPRRegisterAllocator() {
149 static FunctionPass *createFastVGPRRegisterAllocator() {
153 static SGPRRegisterRegAlloc basicRegAllocSGPR(
154 "basic",
"basic register allocator", createBasicSGPRRegisterAllocator);
155 static SGPRRegisterRegAlloc greedyRegAllocSGPR(
156 "greedy",
"greedy register allocator", createGreedySGPRRegisterAllocator);
158 static SGPRRegisterRegAlloc fastRegAllocSGPR(
159 "fast",
"fast register allocator", createFastSGPRRegisterAllocator);
162 static VGPRRegisterRegAlloc basicRegAllocVGPR(
163 "basic",
"basic register allocator", createBasicVGPRRegisterAllocator);
164 static VGPRRegisterRegAlloc greedyRegAllocVGPR(
165 "greedy",
"greedy register allocator", createGreedyVGPRRegisterAllocator);
167 static VGPRRegisterRegAlloc fastRegAllocVGPR(
168 "fast",
"fast register allocator", createFastVGPRRegisterAllocator);
173 cl::desc(
"Run SROA after promote alloca pass"),
179 cl::desc(
"Run early if-conversion"),
184 cl::desc(
"Run pre-RA exec mask optimizations"),
189 "amdgpu-load-store-vectorizer",
190 cl::desc(
"Enable load store vectorizer"),
196 "amdgpu-scalarize-global-loads",
197 cl::desc(
"Enable global load scalarization"),
203 "amdgpu-internalize-symbols",
204 cl::desc(
"Enable elimination of non-kernel functions and unused globals"),
210 "amdgpu-early-inline-all",
211 cl::desc(
"Inline all functions early"),
216 "amdgpu-sdwa-peephole",
221 "amdgpu-dpp-combine",
227 cl::desc(
"Enable AMDGPU Alias Analysis"),
232 "amdgpu-late-structurize",
233 cl::desc(
"Enable late CFG structurization"),
239 "amdgpu-simplify-libcall",
240 cl::desc(
"Enable amdgpu library simplifications"),
245 "amdgpu-ir-lower-kernel-arguments",
246 cl::desc(
"Lower kernel argument loads in IR pass"),
251 "amdgpu-reassign-regs",
252 cl::desc(
"Enable register reassign optimizations on gfx10+"),
257 "amdgpu-opt-vgpr-liverange",
258 cl::desc(
"Enable VGPR liverange optimizations for if-else structure"),
263 "amdgpu-atomic-optimizations",
264 cl::desc(
"Enable atomic optimizations"),
270 "amdgpu-mode-register",
271 cl::desc(
"Enable mode register pass"),
278 cl::desc(
"Enable s_delay_alu insertion"),
285 cl::desc(
"Enable machine DCE inside regalloc"));
292 "amdgpu-scalar-ir-passes",
293 cl::desc(
"Enable scalar IR passes"),
298 "amdgpu-enable-structurizer-workarounds",
303 "amdgpu-enable-lds-replace-with-pointer",
308 "amdgpu-enable-lower-module-lds",
cl::desc(
"Enable lower module lds pass"),
313 "amdgpu-enable-pre-ra-optimizations",
318 "amdgpu-enable-promote-kernel-arguments",
319 cl::desc(
"Enable promotion of flat kernel pointer arguments to global"),
398 return std::make_unique<AMDGPUTargetObjectFile>();
411 if (
ST.shouldClusterStores())
426 if (
ST.shouldClusterStores())
442 if (
ST.shouldClusterStores())
454 "Run GCN scheduler to maximize occupancy",
459 "Run GCN scheduler to maximize occupancy (experimental)",
464 "Run GCN iterative scheduler for minimal register usage (experimental)",
469 "Run GCN iterative scheduler for ILP scheduling (experimental)",
475 return "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
476 "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1";
481 return "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32"
482 "-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
483 "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1"
531 Attribute GPUAttr =
F.getFnAttribute(
"target-cpu");
536 Attribute FSAttr =
F.getFnAttribute(
"target-features");
544 if (
const Function *
F = dyn_cast<Function>(&GV))
545 return F->isDeclaration() ||
F->getName().startswith(
"__asan_") ||
546 F->getName().startswith(
"__sanitizer_") ||
554 Builder.DivergentTarget =
true;
561 bool PromoteKernelArguments =
609 if (PromoteKernelArguments)
635 if (
PassName ==
"amdgpu-propagate-attributes-late") {
639 if (
PassName ==
"amdgpu-unify-metadata") {
643 if (
PassName ==
"amdgpu-printf-runtime-binding") {
647 if (
PassName ==
"amdgpu-always-inline") {
651 if (
PassName ==
"amdgpu-replace-lds-use-with-pointer") {
655 if (
PassName ==
"amdgpu-lower-module-lds") {
664 if (
PassName ==
"amdgpu-simplifylib") {
668 if (
PassName ==
"amdgpu-usenative") {
672 if (
PassName ==
"amdgpu-promote-alloca") {
676 if (
PassName ==
"amdgpu-promote-alloca-to-vector") {
680 if (
PassName ==
"amdgpu-lower-kernel-attributes") {
684 if (
PassName ==
"amdgpu-propagate-attributes-early") {
688 if (
PassName ==
"amdgpu-promote-kernel-arguments") {
700 if (AAName ==
"amdgpu-aa") {
778 unsigned DestAS)
const {
784 const auto *
LD = dyn_cast<LoadInst>(V);
792 const auto *Ptr =
LD->getPointerOperand();
802 std::pair<const Value *, unsigned>
804 if (
auto *II = dyn_cast<IntrinsicInst>(V)) {
805 switch (II->getIntrinsicID()) {
806 case Intrinsic::amdgcn_is_shared:
808 case Intrinsic::amdgcn_is_private:
813 return std::make_pair(
nullptr, -1);
820 const_cast<Value *
>(V),
822 m_Not(m_Intrinsic<Intrinsic::amdgcn_is_private>(
826 return std::make_pair(
nullptr, -1);
866 auto &
I = SubtargetMap[SubtargetKey];
902 setRequiresCodeGenSCCOrder(
true);
907 return getTM<GCNTargetMachine>();
918 if (
ST.shouldClusterStores())
926 bool addPreISel()
override;
927 void addMachineSSAOptimization()
override;
928 bool addILPOpts()
override;
929 bool addInstSelector()
override;
930 bool addIRTranslator()
override;
931 void addPreLegalizeMachineIR()
override;
932 bool addLegalizeMachineIR()
override;
933 void addPreRegBankSelect()
override;
934 bool addRegBankSelect()
override;
935 void addPreGlobalInstructionSelect()
override;
936 bool addGlobalInstructionSelect()
override;
937 void addFastRegAlloc()
override;
938 void addOptimizedRegAlloc()
override;
942 FunctionPass *createRegAllocPass(
bool Optimized)
override;
944 bool addRegAssignAndRewriteFast()
override;
945 bool addRegAssignAndRewriteOptimized()
override;
947 void addPreRegAlloc()
override;
948 bool addPreRewrite()
override;
949 void addPostRegAlloc()
override;
950 void addPreSched2()
override;
951 void addPreEmitPass()
override;
1127 if (
ST.shouldClusterStores())
1139 if (
ST.enableSIScheduler())
1144 bool GCNPassConfig::addPreISel() {
1179 void GCNPassConfig::addMachineSSAOptimization() {
1203 bool GCNPassConfig::addILPOpts() {
1211 bool GCNPassConfig::addInstSelector() {
1218 bool GCNPassConfig::addIRTranslator() {
1223 void GCNPassConfig::addPreLegalizeMachineIR() {
1229 bool GCNPassConfig::addLegalizeMachineIR() {
1234 void GCNPassConfig::addPreRegBankSelect() {
1239 bool GCNPassConfig::addRegBankSelect() {
1244 void GCNPassConfig::addPreGlobalInstructionSelect() {
1249 bool GCNPassConfig::addGlobalInstructionSelect() {
1254 void GCNPassConfig::addPreRegAlloc() {
1260 void GCNPassConfig::addFastRegAlloc() {
1275 void GCNPassConfig::addOptimizedRegAlloc() {
1309 bool GCNPassConfig::addPreRewrite() {
1315 FunctionPass *GCNPassConfig::createSGPRAllocPass(
bool Optimized) {
1318 initializeDefaultSGPRRegisterAllocatorOnce);
1330 FunctionPass *GCNPassConfig::createVGPRAllocPass(
bool Optimized) {
1333 initializeDefaultVGPRRegisterAllocatorOnce);
1340 return createGreedyVGPRRegisterAllocator();
1342 return createFastVGPRRegisterAllocator();
1345 FunctionPass *GCNPassConfig::createRegAllocPass(
bool Optimized) {
1350 "-regalloc not supported with amdgcn. Use -sgpr-regalloc and -vgpr-regalloc";
1352 bool GCNPassConfig::addRegAssignAndRewriteFast() {
1353 if (!usingDefaultRegAlloc())
1356 addPass(createSGPRAllocPass(
false));
1361 addPass(createVGPRAllocPass(
false));
1365 bool GCNPassConfig::addRegAssignAndRewriteOptimized() {
1366 if (!usingDefaultRegAlloc())
1369 addPass(createSGPRAllocPass(
true));
1380 addPass(createVGPRAllocPass(
true));
1388 void GCNPassConfig::addPostRegAlloc() {
1395 void GCNPassConfig::addPreSched2() {
1401 void GCNPassConfig::addPreEmitPass() {
1435 return new GCNPassConfig(*
this, PM);
1460 if (MFI->Occupancy == 0) {
1469 SourceRange =
RegName.SourceRange;
1482 if (parseOptionalRegister(YamlMFI.
VGPRForAGPRCopy, MFI->VGPRForAGPRCopy))
1491 "incorrect register class for field",
RegName.Value,
1493 SourceRange =
RegName.SourceRange;
1497 if (parseRegister(YamlMFI.
ScratchRSrcReg, MFI->ScratchRSrcReg) ||
1502 if (MFI->ScratchRSrcReg != AMDGPU::PRIVATE_RSRC_REG &&
1503 !AMDGPU::SGPR_128RegClass.contains(MFI->ScratchRSrcReg)) {
1507 if (MFI->FrameOffsetReg != AMDGPU::FP_REG &&
1508 !AMDGPU::SGPR_32RegClass.contains(MFI->FrameOffsetReg)) {
1512 if (MFI->StackPtrOffsetReg != AMDGPU::SP_REG &&
1513 !AMDGPU::SGPR_32RegClass.contains(MFI->StackPtrOffsetReg)) {
1519 if (parseRegister(YamlReg, ParsedReg))
1528 unsigned SystemSGPRs) {
1533 if (A->IsRegister) {
1536 SourceRange = A->RegisterName.SourceRange;
1539 if (!RC.contains(
Reg))
1540 return diagnoseRegisterClass(A->RegisterName);
1548 MFI->NumUserSGPRs += UserSGPRs;
1549 MFI->NumSystemSGPRs += SystemSGPRs;
1554 (parseAndCheckArgument(YamlMFI.
ArgInfo->PrivateSegmentBuffer,
1555 AMDGPU::SGPR_128RegClass,
1557 parseAndCheckArgument(YamlMFI.
ArgInfo->DispatchPtr,
1558 AMDGPU::SReg_64RegClass, MFI->ArgInfo.
DispatchPtr,
1560 parseAndCheckArgument(YamlMFI.
ArgInfo->QueuePtr, AMDGPU::SReg_64RegClass,
1562 parseAndCheckArgument(YamlMFI.
ArgInfo->KernargSegmentPtr,
1563 AMDGPU::SReg_64RegClass,
1565 parseAndCheckArgument(YamlMFI.
ArgInfo->DispatchID,
1566 AMDGPU::SReg_64RegClass, MFI->ArgInfo.
DispatchID,
1568 parseAndCheckArgument(YamlMFI.
ArgInfo->FlatScratchInit,
1569 AMDGPU::SReg_64RegClass,
1571 parseAndCheckArgument(YamlMFI.
ArgInfo->PrivateSegmentSize,
1572 AMDGPU::SGPR_32RegClass,
1574 parseAndCheckArgument(YamlMFI.
ArgInfo->WorkGroupIDX,
1577 parseAndCheckArgument(YamlMFI.
ArgInfo->WorkGroupIDY,
1580 parseAndCheckArgument(YamlMFI.
ArgInfo->WorkGroupIDZ,
1583 parseAndCheckArgument(YamlMFI.
ArgInfo->WorkGroupInfo,
1584 AMDGPU::SGPR_32RegClass,
1586 parseAndCheckArgument(YamlMFI.
ArgInfo->PrivateSegmentWaveByteOffset,
1587 AMDGPU::SGPR_32RegClass,
1589 parseAndCheckArgument(YamlMFI.
ArgInfo->ImplicitArgPtr,
1590 AMDGPU::SReg_64RegClass,
1592 parseAndCheckArgument(YamlMFI.
ArgInfo->ImplicitBufferPtr,
1593 AMDGPU::SReg_64RegClass,
1595 parseAndCheckArgument(YamlMFI.
ArgInfo->WorkItemIDX,
1596 AMDGPU::VGPR_32RegClass,
1598 parseAndCheckArgument(YamlMFI.
ArgInfo->WorkItemIDY,
1599 AMDGPU::VGPR_32RegClass,
1601 parseAndCheckArgument(YamlMFI.
ArgInfo->WorkItemIDZ,
1602 AMDGPU::VGPR_32RegClass,
void addAAResult(AAResultT &AAResult)
Register a specific AA result.
void initializeR600ControlFlowFinalizerPass(PassRegistry &)
virtual void addPostRegAlloc()
This method may be implemented by targets that want to run passes after register allocation pass pipe...
static cl::opt< bool > EnableDCEInRA("amdgpu-dce-in-ra", cl::init(true), cl::Hidden, cl::desc("Enable machine DCE inside regalloc"))
CodeGenOpt::Level getOptLevel() const
Returns the optimization level: None, Less, Default, or Aggressive.
FunctionPass * createFastRegisterAllocator()
FastRegisterAllocation Pass - This pass register allocates as fast as possible.
A manager for alias analyses.
Analysis pass providing a never-invalidated alias analysis result.
static constexpr ArgDescriptor createStack(unsigned Offset, unsigned Mask=~0u)
static cl::opt< bool, true > EnableLowerModuleLDS("amdgpu-enable-lower-module-lds", cl::desc("Enable lower module lds pass"), cl::location(AMDGPUTargetMachine::EnableLowerModuleLDS), cl::init(true), cl::Hidden)
void initializeR600PacketizerPass(PassRegistry &)
LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget()
ModulePass * createAMDGPUCtorDtorLoweringPass()
static const char RegAllocOptNotSupportedMessage[]
static cl::opt< bool > EnableSIModeRegisterPass("amdgpu-mode-register", cl::desc("Enable mode register pass"), cl::init(true), cl::Hidden)
This is an optimization pass for GlobalISel generic memory operations.
FunctionPass * createGreedyRegisterAllocator()
Greedy register allocation pass - This pass implements a global register allocator for optimized buil...
bool isValid() const
Return true if the attribute is any kind of attribute.
Pass * createAMDGPUAttributorPass()
void registerDefaultAliasAnalyses(AAManager &) override
Allow the target to register alias analyses with the AAManager for use with the new pass manager.
static bool mustPreserveGV(const GlobalValue &GV)
Predicate for Internalize pass.
FunctionPass * createSeparateConstOffsetFromGEPPass(bool LowerGEP=false)
static const OptimizationLevel O1
Optimize quickly without destroying debuggability.
yaml::MachineFunctionInfo * convertFuncInfoToYAML(const MachineFunction &MF) const override
Allocate and initialize an instance of the YAML representation of the MachineFunctionInfo.
void initializeR600ExpandSpecialInstrsPassPass(PassRegistry &)
void initializeAMDGPUPostLegalizerCombinerPass(PassRegistry &)
void initializeAMDGPUPromoteAllocaPass(PassRegistry &)
FunctionPass * createSIMemoryLegalizerPass()
char & SILowerSGPRSpillsID
FunctionPass * createAMDGPUSetWavePriorityPass()
void initializeAMDGPUInsertDelayAluPass(PassRegistry &)
void registerPipelineStartEPCallback(const std::function< void(ModulePassManager &, OptimizationLevel)> &C)
Register a callback for a default optimizer pipeline extension point.
bool isPointerTy() const
True if this is an instance of PointerType.
Optional< SIArgumentInfo > ArgInfo
static constexpr ArgDescriptor createArg(const ArgDescriptor &Arg, unsigned Mask)
static ScheduleDAGInstrs * createMinRegScheduler(MachineSchedContext *C)
void initializeGCNPreRAOptimizationsPass(PassRegistry &)
LocationClass< Ty > location(Ty &L)
LLVM_ATTRIBUTE_MINSIZE std::enable_if_t<!std::is_same< PassT, PassManager >::value > addPass(PassT &&Pass)
This currently compiles esp xmm0 movsd esp eax eax esp ret We should use not the dag combiner This is because dagcombine2 needs to be able to see through the X86ISD::Wrapper which DAGCombine can t really do The code for turning x load into a single vector load is target independent and should be moved to the dag combiner The code for turning x load into a vector load can only handle a direct load from a global or a direct load from the stack It should be generalized to handle any load from P
void initializeAMDGPUAlwaysInlinePass(PassRegistry &)
Targets should override this in a way that mirrors the implementation of llvm::MachineFunctionInfo.
char & PHIEliminationID
PHIElimination - This pass eliminates machine instruction PHI nodes by inserting copy instructions.
void initializeSIInsertHardClausesPass(PassRegistry &)
void initializeAMDGPUOpenCLEnqueuedBlockLoweringPass(PassRegistry &)
void initializeSIPreAllocateWWMRegsPass(PassRegistry &)
void initializeAMDGPUPropagateAttributesLatePass(PassRegistry &)
bool IEEE
Floating point opcodes that support exception flag gathering quiet and propagate signaling NaN inputs...
Pass * createAlwaysInlinerLegacyPass(bool InsertLifetime=true)
Create a legacy pass manager instance of a pass to inline and remove functions marked as "always_inli...
static LLVM_READNONE StringRef getGPUOrDefault(const Triple &TT, StringRef GPU)
Target - Wrapper for Target specific information.
void initializeAMDGPULateCodeGenPreparePass(PassRegistry &)
FunctionPass * createFixIrreduciblePass()
MachineSchedRegistry provides a selection of available machine instruction schedulers.
FunctionPass * createVirtRegRewriter(bool ClearVirtRegs=true)
StringValue VGPRForAGPRCopy
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
FunctionPass * createAMDGPULateCodeGenPreparePass()
Reg
All possible values of the reg field in the ModR/M byte.
FunctionPass * createSILowerI1CopiesPass()
void initializeR600ClauseMergePassPass(PassRegistry &)
@ SCHEDULE_LEGACYMAXOCCUPANCY
FunctionPass * createFlattenCFGPass()
A pass that internalizes all functions and variables other than those that must be preserved accordin...
void initializeSIOptimizeExecMaskingPreRAPass(PassRegistry &)
ArgDescriptor FlatScratchInit
Triple - Helper class for working with autoconf configuration names.
FunctionAnalysisManager FAM
FunctionPass * createEarlyCSEPass(bool UseMemorySSA=false)
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
void initializeSILowerI1CopiesPass(PassRegistry &)
uint32_t getLDSSize() const
static cl::opt< bool > EnableSetWavePriority("amdgpu-set-wave-priority", cl::desc("Adjust wave priority"), cl::init(false), cl::Hidden)
char & SIPreEmitPeepholeID
FunctionPass * createAMDGPUPostLegalizeCombiner(bool IsOptNone)
void initializeAMDGPUDAGToDAGISelPass(PassRegistry &)
void initializeSIPeepholeSDWAPass(PassRegistry &)
char & ShadowStackGCLoweringID
ShadowStackGCLowering - Implements the custom lowering mechanism used by the shadow stack GC.
char & SILowerControlFlowID
char & SIOptimizeVGPRLiveRangeID
ModulePass * createAMDGPUUnifyMetadataPass()
static cl::opt< bool > EnableStructurizerWorkarounds("amdgpu-enable-structurizer-workarounds", cl::desc("Enable workarounds for the StructurizeCFG pass"), cl::init(true), cl::Hidden)
Legacy wrapper pass to provide the AMDGPUAAResult object.
static cl::opt< bool > EnableAtomicOptimizations("amdgpu-atomic-optimizations", cl::desc("Enable atomic optimizations"), cl::init(false), cl::Hidden)
static ScheduleDAGInstrs * createGCNMaxOccupancyMachineScheduler(MachineSchedContext *C)
void initializeSIFoldOperandsPass(PassRegistry &)
ModulePass * createBarrierNoopPass()
createBarrierNoopPass - This pass is purely a module pass barrier in a pass manager.
FunctionPass * createAMDGPUISelDag(TargetMachine *TM=nullptr, CodeGenOpt::Level OptLevel=CodeGenOpt::Default)
This pass converts a legalized DAG into a AMDGPU-specific.
static cl::opt< bool > InternalizeSymbols("amdgpu-internalize-symbols", cl::desc("Enable elimination of non-kernel functions and unused globals"), cl::init(false), cl::Hidden)
void initializeGlobalISel(PassRegistry &)
Initialize all passes linked into the GlobalISel library.
bool FP32InputDenormals
If this is set, neither input or output denormals are flushed for most f32 instructions.
void registerAnalysisRegistrationCallback(const std::function< void(CGSCCAnalysisManager &)> &C)
{{@ Register callbacks for analysis registration with this PassBuilder instance.
FunctionPass * createGVNPass(bool NoMemDepAnalysis=false)
Create a legacy GVN pass.
char & AMDGPUReleaseVGPRsID
CGSCCToFunctionPassAdaptor createCGSCCToFunctionPassAdaptor(FunctionPassT &&Pass, bool EagerlyInvalidate=false, bool NoRerun=false)
A function to deduce a function pass type and wrap it in the templated adaptor.
ArgDescriptor PrivateSegmentSize
ModulePass * createR600OpenCLImageTypeLoweringPass()
ArgDescriptor DispatchPtr
BinaryOp_match< LHS, RHS, Instruction::And, true > m_c_And(const LHS &L, const RHS &R)
Matches an And with LHS and RHS in either order.
void initializeAMDGPUPropagateAttributesEarlyPass(PassRegistry &)
char & SIPreAllocateWWMRegsID
void initializeAMDGPUPromoteKernelArgumentsPass(PassRegistry &)
static const OptimizationLevel O0
Disable as many optimizations as possible.
void initializeSIShrinkInstructionsPass(PassRegistry &)
char & TwoAddressInstructionPassID
TwoAddressInstruction - This pass reduces two-address instructions to use two operands.
unsigned const TargetRegisterInfo * TRI
bool parseMachineFunctionInfo(const yaml::MachineFunctionInfo &, PerFunctionMIParsingState &PFS, SMDiagnostic &Error, SMRange &SourceRange) const override
Parse out the target's MachineFunctionInfo from the YAML reprsentation.
void initializeAMDGPUSimplifyLibCallsPass(PassRegistry &)
static ScheduleDAGInstrs * createSIMachineScheduler(MachineSchedContext *C)
deferredval_ty< Value > m_Deferred(Value *const &V)
Like m_Specific(), but works if the specific value to match is determined as part of the same match()...
This interface provides simple read-only access to a block of memory, and provides simple methods for...
AMDGPU::SIModeRegisterDefaults Mode
bool addGCPasses() override
addGCPasses - Add late codegen passes that analyze code for garbage collection.
static cl::opt< bool > EnableInsertDelayAlu("amdgpu-enable-delay-alu", cl::desc("Enable s_delay_alu insertion"), cl::init(true), cl::Hidden)
ImmutablePass * createAMDGPUExternalAAWrapperPass()
void initializeAMDGPULowerIntrinsicsPass(PassRegistry &)
void initializeGCNDPPCombinePass(PassRegistry &)
ArgDescriptor ImplicitArgPtr
static cl::opt< bool > EnableSDWAPeephole("amdgpu-sdwa-peephole", cl::desc("Enable SDWA peepholer"), cl::init(true))
char & SIOptimizeExecMaskingID
amdgpu Simplify well known AMD library false FunctionCallee Value * Arg
void initializeAMDGPUUnifyMetadataPass(PassRegistry &)
StringValue FrameOffsetReg
void initializeAMDGPUArgumentUsageInfoPass(PassRegistry &)
void addIRPasses() override
Add common target configurable passes that perform LLVM IR to IR transforms following machine indepen...
static MachineSchedRegistry SISchedRegistry("si", "Run SI's custom scheduler", createSIMachineScheduler)
ArgDescriptor WorkGroupIDX
GCNTargetMachine(const Target &T, const Triple &TT, StringRef CPU, StringRef FS, TargetOptions Options, Optional< Reloc::Model > RM, Optional< CodeModel::Model > CM, CodeGenOpt::Level OL, bool JIT)
FunctionPass * createInferAddressSpacesPass(unsigned AddressSpace=~0u)
void initializeSILateBranchLoweringPass(PassRegistry &)
FunctionPass * createAMDGPUUseNativeCallsPass()
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Represents a location in source code.
bool match(Val *V, const Pattern &P)
void registerParseAACallback(const std::function< bool(StringRef Name, AAManager &AA)> &C)
Register a callback for parsing an AliasAnalysis Name to populate the given AAManager AA.
static cl::opt< bool > ScalarizeGlobal("amdgpu-scalarize-global-loads", cl::desc("Enable global load scalarization"), cl::init(true), cl::Hidden)
FunctionPass * createNaryReassociatePass()
char & PostRAHazardRecognizerID
PostRAHazardRecognizer - This pass runs the post-ra hazard recognizer.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
void initializeAMDGPULowerKernelArgumentsPass(PassRegistry &)
void initializeSIWholeQuadModePass(PassRegistry &)
void initializeAMDGPUAtomicOptimizerPass(PassRegistry &)
Target & getTheAMDGPUTarget()
The target which supports all AMD GPUs.
ArgDescriptor WorkItemIDX
(vector float) vec_cmpeq(*A, *B) C
static cl::opt< bool > EnableAMDGPUAliasAnalysis("enable-amdgpu-aa", cl::Hidden, cl::desc("Enable AMDGPU Alias Analysis"), cl::init(true))
static cl::opt< bool > EnableLowerKernelArguments("amdgpu-ir-lower-kernel-arguments", cl::desc("Lower kernel argument loads in IR pass"), cl::init(true), cl::Hidden)
static cl::opt< bool > EnableLoadStoreVectorizer("amdgpu-load-store-vectorizer", cl::desc("Enable load store vectorizer"), cl::init(true), cl::Hidden)
ModulePass * createAMDGPULowerModuleLDSPass()
char & FuncletLayoutID
This pass lays out funclets contiguously.
void initializeAMDGPUUseNativeCallsPass(PassRegistry &)
FunctionPass * createSIInsertWaitcntsPass()
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
static cl::opt< bool > EnableLDSReplaceWithPointer("amdgpu-enable-lds-replace-with-pointer", cl::desc("Enable LDS replace with pointer pass"), cl::init(false), cl::Hidden)
This class provides access to building LLVM's passes.
static cl::opt< bool > EnableRegReassign("amdgpu-reassign-regs", cl::desc("Enable register reassign optimizations on gfx10+"), cl::init(true), cl::Hidden)
Instances of this class encapsulate one diagnostic report, allowing printing to a raw_ostream as a ca...
bool FP64FP16InputDenormals
FunctionPass * createAMDGPUAnnotateUniformValues()
void initializeAMDGPUUnifyDivergentExitNodesPass(PassRegistry &)
char & EarlyIfConverterID
EarlyIfConverter - This pass performs if-conversion on SSA form by inserting cmov instructions.
static FunctionPass * useDefaultRegisterAllocator()
-regalloc=... command line option.
ScheduleDAGMI * createGenericSchedPostRA(MachineSchedContext *C)
Create a generic scheduler with no vreg liveness or DAG mutation passes.
FunctionPass * createAtomicExpandPass()
AtomicExpandPass - At IR level this pass replace atomic instructions with __atomic_* library calls,...
@ LOCAL_ADDRESS
Address space for local memory.
This pass is responsible for selecting generic machine instructions to target-specific instructions.
static int64_t getNullPointerValue(unsigned AddrSpace)
Get the integer value of a null pointer in the given address space.
RegisterTargetMachine - Helper template for registering a target machine implementation,...
void addMutation(std::unique_ptr< ScheduleDAGMutation > Mutation)
Add a postprocessing step to the DAG builder.
PassRegistry - This class manages the registration and intitialization of the pass subsystem as appli...
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
const char LLVMTargetMachineRef LLVMPassBuilderOptionsRef Options
FunctionPass * createUnifyLoopExitsPass()
static std::unique_ptr< TargetLoweringObjectFile > createTLOF(const Triple &TT)
unsigned getMainFileID() const
unsigned getAddressSpaceForPseudoSourceKind(unsigned Kind) const override
getAddressSpaceForPseudoSourceKind - Given the kind of memory (e.g.
ModulePass * createAMDGPUPropagateAttributesLatePass(const TargetMachine *)
void initializeSIMemoryLegalizerPass(PassRegistry &)
Pass * createLoadStoreVectorizerPass()
Create a legacy pass manager instance of the LoadStoreVectorizer pass.
void initializeAMDGPUResourceUsageAnalysisPass(PassRegistry &)
static cl::opt< bool > EnableDPPCombine("amdgpu-dpp-combine", cl::desc("Enable DPP combiner"), cl::init(true))
ModulePass * createAMDGPULowerIntrinsicsPass()
void addCodeGenPrepare() override
Add pass to prepare the LLVM IR for code generation.
char & StackMapLivenessID
StackMapLiveness - This pass analyses the register live-out set of stackmap/patchpoint intrinsics and...
Pass * createAMDGPUAnnotateKernelFeaturesPass()
void initializeAMDGPUReplaceLDSUseWithPointerPass(PassRegistry &)
~AMDGPUTargetMachine() override
const TargetSubtargetInfo * getSubtargetImpl() const
FunctionPass * createSinkingPass()
ArchType getArch() const
Get the parsed architecture type of this triple.
FunctionPass * createSpeculativeExecutionPass()
char & SILoadStoreOptimizerID
StringRef getValueAsString() const
Return the attribute's value as a string.
RegisterPassParser class - Handle the addition of new machine passes.
std::unique_ptr< ScheduleDAGMutation > createAMDGPUExportClusteringDAGMutation()
void initializeSIOptimizeVGPRLiveRangePass(PassRegistry &)
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
void resetTargetOptions(const Function &F) const
Reset the target options based on the function's attributes.
bool isEntryFunctionCC(CallingConv::ID CC)
const MemoryBuffer * getMemoryBuffer(unsigned i) const
Pass * createFunctionInliningPass()
createFunctionInliningPass - Return a new pass object that uses a heuristic to inline direct function...
virtual void add(Pass *P)=0
Add a pass to the queue of passes to run.
virtual StringRef getBufferIdentifier() const
Return an identifier for this buffer, typically the filename it was read from.
ImmutablePass * createAMDGPUAAWrapperPass()
PassManagerBuilder - This class is used to set up a standard optimization sequence for languages like...
FunctionPass * createLowerSwitchPass()
ModulePass * createAMDGPUPrintfRuntimeBinding()
bool addInstSelector() override
addInstSelector - This method should install an instruction selector pass, which converts from LLVM c...
PassBuilder PB(Machine, PassOpts->PTO, None, &PIC)
char & VirtRegRewriterID
VirtRegRewriter pass.
ModulePass * createAMDGPUAlwaysInlinePass(bool GlobalOpt=true)
Target-Independent Code Generator Pass Configuration Options.
void append(StringRef RHS)
Append from a StringRef.
void initializeSILowerSGPRSpillsPass(PassRegistry &)
@ ExternalSymbolCallEntry
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
void registerPipelineEarlySimplificationEPCallback(const std::function< void(ModulePassManager &, OptimizationLevel)> &C)
Register a callback for a default optimizer pipeline extension point.
StringRef getFeatureString(const Function &F) const
static cl::opt< bool > OptVGPRLiveRange("amdgpu-opt-vgpr-liverange", cl::desc("Enable VGPR liverange optimizations for if-else structure"), cl::init(true), cl::Hidden)
ModuleToFunctionPassAdaptor createModuleToFunctionPassAdaptor(FunctionPassT &&Pass, bool EagerlyInvalidate=false)
A function to deduce a function pass type and wrap it in the templated adaptor.
Triple TargetTriple
Triple string, CPU name, and target feature strings the TargetMachine instance is created with.
static cl::opt< bool > OptExecMaskPreRA("amdgpu-opt-exec-mask-pre-ra", cl::Hidden, cl::desc("Run pre-RA exec mask optimizations"), cl::init(true))
char & GCLoweringID
GCLowering Pass - Used by gc.root to perform its default lowering operations.
StringValue ScratchRSrcReg
char & AMDGPUUnifyDivergentExitNodesID
constexpr LLVM_NODISCARD bool empty() const
empty - Check if the string is empty.
void initializeSIInsertWaitcntsPass(PassRegistry &)
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
void initializeSIAnnotateControlFlowPass(PassRegistry &)
ScheduleDAGMILive * createGenericSchedLive(MachineSchedContext *C)
Create the standard converging machine scheduler.
ArgDescriptor WorkGroupIDZ
FunctionPass *(*)() FunctionPassCtor
#define LLVM_EXTERNAL_VISIBILITY
char & DetectDeadLanesID
This pass adds dead/undef flags after analyzing subregister lanes.
const MCSubtargetInfo * getMCSubtargetInfo() const
ArgDescriptor PrivateSegmentBuffer
void reserveWWMRegister(Register Reg)
std::unique_ptr< ScheduleDAGMutation > createSchedBarrierDAGMutation()
FunctionPass * createAMDGPUAtomicOptimizerPass()
void initializeR600VectorRegMergerPass(PassRegistry &)
bool initializeBaseYamlFields(const yaml::SIMachineFunctionInfo &YamlMFI, const MachineFunction &MF, PerFunctionMIParsingState &PFS, SMDiagnostic &Error, SMRange &SourceRange)
ModulePass * createGlobalDCEPass()
createGlobalDCEPass - This transform is designed to eliminate unreachable internal globals (functions...
compiles ldr LCPI1_0 ldr ldr mov lsr tst moveq r1 ldr LCPI1_1 and r0 bx lr It would be better to do something like to fold the shift into the conditional move
void initializeAMDGPURewriteOutArgumentsPass(PassRegistry &)
MachineSchedContext provides enough context from the MachineScheduler pass for the target to instanti...
@ CONSTANT_ADDRESS
Address space for constant memory (VTX2).
ScheduleDAGInstrs * createMachineScheduler(MachineSchedContext *C) const override
Create an instance of ScheduleDAGInstrs to be run within the standard MachineScheduler pass for this ...
virtual void addIRPasses()
Add common target configurable passes that perform LLVM IR to IR transforms following machine indepen...
initializer< Ty > init(const Ty &Val)
virtual void addOptimizedRegAlloc()
addOptimizedRegAlloc - Add passes related to register allocation.
ArgDescriptor PrivateSegmentWaveByteOffset
char & SIFormMemoryClausesID
char & LiveVariablesID
LiveVariables pass - This pass computes the set of blocks in which each variable is life and sets mac...
static cl::opt< bool, true > LateCFGStructurize("amdgpu-late-structurize", cl::desc("Enable late CFG structurization"), cl::location(AMDGPUTargetMachine::EnableLateStructurizeCFG), cl::Hidden)
SmallVector< StringValue > WWMReservedRegs
ImmutablePass * createExternalAAWrapperPass(std::function< void(Pass &, Function &, AAResults &)> Callback)
A wrapper pass around a callback which can be used to populate the AAResults in the AAResultsWrapperP...
ArgDescriptor WorkGroupIDY
char & MachineCSEID
MachineCSE - This pass performs global CSE on machine instructions.
virtual void addCodeGenPrepare()
Add pass to prepare the LLVM IR for code generation.
bool DX10Clamp
Used by the vector ALU to force DX10-style treatment of NaNs: when set, clamp NaN to zero; otherwise,...
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
char & SIInsertHardClausesID
static MachineSchedRegistry GCNMinRegSchedRegistry("gcn-minreg", "Run GCN iterative scheduler for minimal register usage (experimental)", createMinRegScheduler)
void addStraightLineScalarOptimizationPasses()
bool isFlatGlobalAddrSpace(unsigned AS)
bool FP64FP16InputDenormals
If this is set, neither input or output denormals are flushed for both f64 and f16/v2f16 instructions...
std::pair< const Value *, unsigned > getPredicatedAddrSpace(const Value *V) const override
If the specified predicate checks whether a generic pointer falls within a specified address space,...
Target & getTheGCNTarget()
The target for GCN GPUs.
AMDGPUTargetMachine & getAMDGPUTargetMachine() const
void initializeSIOptimizeExecMaskingPass(PassRegistry &)
void initializeSIPostRABundlerPass(PassRegistry &)
void registerPipelineParsingCallback(const std::function< bool(StringRef Name, CGSCCPassManager &, ArrayRef< PipelineElement >)> &C)
{{@ Register pipeline parsing callbacks with this pass builder instance.
void initializeAMDGPUAAWrapperPassPass(PassRegistry &)
ScheduleDAGMI is an implementation of ScheduleDAGInstrs that simply schedules machine instructions ac...
void initializeAMDGPUCodeGenPreparePass(PassRegistry &)
AMDGPUPassConfig(LLVMTargetMachine &TM, PassManagerBase &PM)
ModulePass * createAMDGPUOpenCLEnqueuedBlockLoweringPass()
void initializeGCNNSAReassignPass(PassRegistry &)
static bool EnableLowerModuleLDS
A wrapper around std::string which contains a source range that's being set during parsing.
Pass to remove unused function declarations.
char & PatchableFunctionID
This pass implements the "patchable-function" attribute.
static MachineSchedRegistry IterativeGCNMaxOccupancySchedRegistry("gcn-max-occupancy-experimental", "Run GCN scheduler to maximize occupancy (experimental)", createIterativeGCNMaxOccupancyMachineScheduler)
@ PRIVATE_ADDRESS
Address space for private memory.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
ArgDescriptor WorkItemIDZ
FunctionPass * createSIShrinkInstructionsPass()
FunctionPass * createAMDGPUMachineCFGStructurizerPass()
const TargetRegisterInfo * TRI
Target processor register info.
AnalysisID addPass(AnalysisID PassID)
Utilities for targets to add passes to the pass manager.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
void removeDeadConstantUsers() const
If there are any dead constant users dangling off of this constant, remove them.
void initializeSIFormMemoryClausesPass(PassRegistry &)
static StringRef computeDataLayout(const Triple &TT)
StringRef - Represent a constant reference to a string, i.e.
@ GLOBAL_ADDRESS
Address space for global memory (RAT0, VTX0).
void initializeAMDGPUExternalAAWrapperPass(PassRegistry &)
StringValue StackPtrOffsetReg
bool addPreISel() override
Methods with trivial inline returns are convenient points in the common codegen pass pipeline where t...
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Type * getType() const
All values are typed, get the type of this value.
std::unique_ptr< CSEConfigBase > getStandardCSEConfigForOpt(CodeGenOpt::Level Level)
FunctionPass * createAMDGPURegBankCombiner(bool IsOptNone)
static cl::opt< bool > EnablePreRAOptimizations("amdgpu-enable-pre-ra-optimizations", cl::desc("Enable Pre-RA optimizations pass"), cl::init(true), cl::Hidden)
StringRef getTargetFeatureString() const
static cl::opt< bool > EarlyInlineAll("amdgpu-early-inline-all", cl::desc("Inline all functions early"), cl::init(false), cl::Hidden)
ArgDescriptor ImplicitBufferPtr
static Reloc::Model getEffectiveRelocModel(Optional< Reloc::Model > RM)
static cl::opt< bool > EnableSROA("amdgpu-sroa", cl::desc("Run SROA after promote alloca pass"), cl::ReallyHidden, cl::init(true))
void initializeAMDGPULowerKernelAttributesPass(PassRegistry &)
CodeModel::Model getEffectiveCodeModel(Optional< CodeModel::Model > CM, CodeModel::Model Default)
Helper method for getting the code model, returning Default if CM does not have a value.
std::unique_ptr< CSEConfigBase > getCSEConfig() const override
Returns the CSEConfig object to use for the current optimization level.
TargetSubtargetInfo - Generic base class for all target subtargets.
void initializeAMDGPUAnnotateUniformValuesPass(PassRegistry &)
char & RenameIndependentSubregsID
This pass detects subregister lanes in a virtual register that are used independently of other lanes ...
Wrapper class representing virtual and physical registers.
Pass * createStructurizeCFGPass(bool SkipUniformRegions=false)
When SkipUniformRegions is true the structizer will not structurize regions that only contain uniform...
bool FP64FP16OutputDenormals
TargetPassConfig * createPassConfig(PassManagerBase &PM) override
Create a pass configuration object to be used by addPassToEmitX methods for generating a pipeline of ...
ModulePass * createAMDGPULowerKernelAttributesPass()
void initializeSIFixSGPRCopiesPass(PassRegistry &)
ArgDescriptor WorkGroupInfo
FunctionPass * createAMDGPUPromoteAllocaToVector()
unsigned getSpeedupLevel() const
void initializeAMDGPULowerModuleLDSPass(PassRegistry &)
static ScheduleDAGInstrs * createIterativeILPMachineScheduler(MachineSchedContext *C)
bool parseNamedRegisterReference(PerFunctionMIParsingState &PFS, Register &Reg, StringRef Src, SMDiagnostic &Error)
void initializeAMDGPUReleaseVGPRsPass(PassRegistry &)
static cl::opt< bool > EnableEarlyIfConversion("amdgpu-early-ifcvt", cl::Hidden, cl::desc("Run early if-conversion"), cl::init(false))
void initializeSIFixVGPRCopiesPass(PassRegistry &)
void initializeAMDGPUPromoteAllocaToVectorPass(PassRegistry &)
static cl::opt< bool > EnableScalarIRPasses("amdgpu-scalar-ir-passes", cl::desc("Enable scalar IR passes"), cl::init(true), cl::Hidden)
void initializeSIPreEmitPeepholePass(PassRegistry &)
static ScheduleDAGInstrs * createIterativeGCNMaxOccupancyMachineScheduler(MachineSchedContext *C)
void call_once(once_flag &flag, Function &&F, Args &&... ArgList)
Execute the function specified as a parameter once.
Function & getFunction()
Return the LLVM function that this machine code represents.
TargetTransformInfo getTargetTransformInfo(const Function &F) const override
Get a TargetTransformInfo implementation for the target.
void registerPassBuilderCallbacks(PassBuilder &PB) override
Allow the target to modify the pass pipeline with New Pass Manager (similar to adjustPassManager for ...
static cl::opt< bool > EnablePromoteKernelArguments("amdgpu-enable-promote-kernel-arguments", cl::desc("Enable promotion of flat kernel pointer arguments to global"), cl::Hidden, cl::init(true))
virtual void addMachineSSAOptimization()
addMachineSSAOptimization - Add standard passes that optimize machine instructions in SSA form.
@ REGION_ADDRESS
Address space for region memory. (GDS)
void addEarlyCSEOrGVNPass()
FunctionPass * createAMDGPUPropagateAttributesEarlyPass(const TargetMachine *)
void initializeSIModeRegisterPass(PassRegistry &)
Lightweight error class with error context and mandatory checking.
std::unique_ptr< ScheduleDAGMutation > createLoadClusterDAGMutation(const TargetInstrInfo *TII, const TargetRegisterInfo *TRI)
const TargetInstrInfo * TII
Target instruction information.
static MachineSchedRegistry GCNMaxOccupancySchedRegistry("gcn-max-occupancy", "Run GCN scheduler to maximize occupancy", createGCNMaxOccupancyMachineScheduler)
FunctionPass * createAMDGPULowerKernelArgumentsPass()
bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override
Returns true if a cast between SrcAS and DestAS is a noop.
@ EP_ModuleOptimizerEarly
EP_ModuleOptimizerEarly - This extension point allows adding passes just before the main module-level...
FunctionPass * createSIModeRegisterPass()
static constexpr ArgDescriptor createRegister(Register Reg, unsigned Mask=~0u)
ModulePass * createInternalizePass(std::function< bool(const GlobalValue &)> MustPreserveGV)
createInternalizePass - This pass loops over all of the functions in the input module,...
ModulePass * createAMDGPUReplaceLDSUseWithPointerPass()
void adjustPassManager(PassManagerBuilder &) override
Allow the target to modify the pass manager, e.g.
This class describes a target machine that is implemented with the LLVM target-independent code gener...
void disablePass(AnalysisID PassID)
Allow the target to disable a specific standard pass by default.
char & DeadMachineInstructionElimID
DeadMachineInstructionElim - This pass removes dead machine instructions.
static MachineSchedRegistry GCNILPSchedRegistry("gcn-ilp", "Run GCN iterative scheduler for ILP scheduling (experimental)", createIterativeILPMachineScheduler)
bool registerPass(PassBuilderT &&PassBuilder)
Register an analysis pass with the manager.
ArgDescriptor KernargSegmentPtr
FunctionPass * createAMDGPUPromoteAlloca()
void initializeAMDGPUPrintfRuntimeBindingPass(PassRegistry &)
void registerFunctionAnalysis()
Register a specific AA result.
bool isPassEnabled(const cl::opt< bool > &Opt, CodeGenOpt::Level Level=CodeGenOpt::Default) const
Check if a pass is enabled given Opt option.
char & BranchRelaxationPassID
BranchRelaxation - This pass replaces branches that need to jump further than is supported by a branc...
void initializeAMDGPUPreLegalizerCombinerPass(PassRegistry &)
FunctionPass * createAMDGPUCodeGenPreparePass()
FunctionPass * createAMDGPUPromoteKernelArgumentsPass()
RegisterRegAllocBase class - Track the registration of register allocators.
char & MachineSchedulerID
MachineScheduler - This pass schedules machine instructions.
static bool EnableFunctionCalls
void initializeAMDGPUAttributorPass(PassRegistry &)
Pass interface - Implemented by all 'passes'.
StringRef getTargetCPU() const
@ EP_EarlyAsPossible
EP_EarlyAsPossible - This extension point allows adding passes before any other transformations,...
void initializeAMDGPUAnnotateKernelFeaturesPass(PassRegistry &)
char & PostRASchedulerID
PostRAScheduler - This pass performs post register allocation scheduling.
ArgDescriptor WorkItemIDY
FunctionPass * createAMDGPUPreLegalizeCombiner(bool IsOptNone)
unsigned getAssumedAddrSpace(const Value *V) const override
If the specified generic pointer could be assumed as a pointer to a specific address space,...
Represents a range in source code.
FunctionPass * createStraightLineStrengthReducePass()
@ FLAT_ADDRESS
Address space for flat memory.
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
const Triple & getTargetTriple() const
char & GCNPreRAOptimizationsID
void initializeSILoadStoreOptimizerPass(PassRegistry &)
PassManagerBase - An abstract interface to allow code to add passes to a pass manager without having ...
std::unique_ptr< ScheduleDAGMutation > createStoreClusterDAGMutation(const TargetInstrInfo *TII, const TargetRegisterInfo *TRI)
void registerCGSCCOptimizerLateEPCallback(const std::function< void(CGSCCPassManager &, OptimizationLevel)> &C)
Register a callback for a default optimizer pipeline extension point.
void initializeAMDGPURegBankCombinerPass(PassRegistry &)
FunctionPass * createSIAnnotateControlFlowPass()
Create the annotation pass.
void initializeAMDGPUCtorDtorLoweringPass(PassRegistry &)
A container for analyses that lazily runs them and caches their results.
const char LLVMTargetMachineRef TM
FunctionPass class - This class is used to implement most global optimizations.
FunctionPass * createBasicRegisterAllocator()
BasicRegisterAllocation Pass - This pass implements a degenerate global register allocator using the ...
This pass implements the reg bank selector pass used in the GlobalISel pipeline.
BinaryOp_match< SrcTy, SpecificConstantMatch, TargetOpcode::G_XOR, true > m_Not(const SrcTy &&Src)
Matches a register not-ed by a G_XOR.
char & EarlyMachineLICMID
This pass performs loop invariant code motion on machine instructions.
StringRef getGPUName(const Function &F) const
char & PostMachineSchedulerID
PostMachineScheduler - This pass schedules machine instructions postRA.
ScheduleDAGMILive is an implementation of ScheduleDAGInstrs that schedules machine instructions while...
A ScheduleDAG for scheduling lists of MachineInstr.
char & AMDGPUInsertDelayAluID
@ EP_CGSCCOptimizerLate
EP_CGSCCOptimizerLate - This extension point allows adding CallGraphSCC passes at the end of the main...
AMDGPUTargetMachine(const Target &T, const Triple &TT, StringRef CPU, StringRef FS, TargetOptions Options, Optional< Reloc::Model > RM, Optional< CodeModel::Model > CM, CodeGenOpt::Level OL)
virtual void addFastRegAlloc()
addFastRegAlloc - Add the minimum set of target-independent passes that are required for fast registe...
char & AMDGPUPerfHintAnalysisID
FunctionPass * createSROAPass()
static cl::opt< bool > EnableLibCallSimplify("amdgpu-simplify-libcall", cl::desc("Enable amdgpu library simplifications"), cl::init(true), cl::Hidden)
bool FP64FP16OutputDenormals
char & SIOptimizeExecMaskingPreRAID
MCRegisterInfo * createGCNMCRegisterInfo(AMDGPUDwarfFlavour DwarfFlavour)
std::unique_ptr< const MCRegisterInfo > MRI
LLVM Value Representation.
static bool EnableLateStructurizeCFG
virtual bool addILPOpts()
Add passes that optimize instruction level parallelism for out-of-order targets.
CodeGenOpt::Level getOptLevel() const
yaml::MachineFunctionInfo * createDefaultFuncInfoYAML() const override
Allocate and return a default initialized instance of the YAML representation for the MachineFunction...
static const char PassName[]
void initializeSILowerControlFlowPass(PassRegistry &)
char & SILateBranchLoweringPassID
std::unique_ptr< ScheduleDAGMutation > createIGroupLPDAGMutation()
FunctionPass * createAMDGPUSimplifyLibCallsPass(const TargetMachine *)
This pass implements the localization mechanism described at the top of this file.
std::unique_ptr< ScheduleDAGMutation > createAMDGPUMacroFusionDAGMutation()
Note that you have to add: DAG.addMutation(createAMDGPUMacroFusionDAGMutation()); to AMDGPUPassConfig...