51 "r600-ir-structurize",
52 cl::desc(
"Use StructurizeCFG IR pass"),
57 cl::desc(
"Run SROA after promote alloca pass"),
69 "amdgpu-load-store-vectorizer",
70 cl::desc(
"Enable load store vectorizer"),
76 "amdgpu-scalarize-global-loads",
77 cl::desc(
"Enable global load scalarization"),
108 return llvm::make_unique<AMDGPUTargetObjectFile>();
123 llvm::make_unique<GCNMaxOccupancySchedStrategy>(C));
139 "Run GCN scheduler to maximize occupancy",
145 return "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
146 "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64";
151 return "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32"
152 "-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
153 "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64";
227 auto &
I = SubtargetMap[SubtargetKey];
233 I = llvm::make_unique<R600Subtarget>(
TargetTriple, GPU, FS, *
this);
243 #ifdef LLVM_BUILD_GLOBAL_ISEL
247 std::unique_ptr<AMDGPUCallLowering> CallLoweringInfo;
249 return CallLoweringInfo.get();
270 auto &
I = SubtargetMap[SubtargetKey];
276 I = llvm::make_unique<SISubtarget>(
TargetTriple, GPU, FS, *
this);
278 #ifndef LLVM_BUILD_GLOBAL_ISEL
281 SIGISelActualAccessor *GISel =
new SIGISelActualAccessor();
282 GISel->CallLoweringInfo.reset(
286 I->setGISelAccessor(*GISel);
311 return getTM<AMDGPUTargetMachine>();
322 void addEarlyCSEOrGVNPass();
323 void addStraightLineScalarOptimizationPasses();
324 void addIRPasses()
override;
325 void addCodeGenPrepare()
override;
326 bool addPreISel()
override;
327 bool addInstSelector()
override;
328 bool addGCPasses()
override;
331 class R600PassConfig final :
public AMDGPUPassConfig {
334 : AMDGPUPassConfig(TM, PM) {}
341 bool addPreISel()
override;
342 void addPreRegAlloc()
override;
343 void addPreSched2()
override;
344 void addPreEmitPass()
override;
347 class GCNPassConfig final :
public AMDGPUPassConfig {
350 : AMDGPUPassConfig(TM, PM) {}
353 return getTM<GCNTargetMachine>();
359 void addIRPasses()
override;
360 bool addPreISel()
override;
361 void addMachineSSAOptimization()
override;
362 bool addInstSelector()
override;
363 #ifdef LLVM_BUILD_GLOBAL_ISEL
364 bool addIRTranslator()
override;
365 bool addLegalizeMachineIR()
override;
366 bool addRegBankSelect()
override;
367 bool addGlobalInstructionSelect()
override;
369 void addFastRegAlloc(
FunctionPass *RegAllocPass)
override;
370 void addOptimizedRegAlloc(
FunctionPass *RegAllocPass)
override;
371 void addPreRegAlloc()
override;
372 void addPostRegAlloc()
override;
373 void addPreSched2()
override;
374 void addPreEmitPass()
override;
385 void AMDGPUPassConfig::addEarlyCSEOrGVNPass() {
392 void AMDGPUPassConfig::addStraightLineScalarOptimizationPasses() {
400 addEarlyCSEOrGVNPass();
408 void AMDGPUPassConfig::addIRPasses() {
434 addStraightLineScalarOptimizationPasses();
452 addEarlyCSEOrGVNPass();
455 void AMDGPUPassConfig::addCodeGenPrepare() {
462 bool AMDGPUPassConfig::addPreISel() {
467 bool AMDGPUPassConfig::addInstSelector() {
472 bool AMDGPUPassConfig::addGCPasses() {
481 bool R600PassConfig::addPreISel() {
482 AMDGPUPassConfig::addPreISel();
489 void R600PassConfig::addPreRegAlloc() {
493 void R600PassConfig::addPreSched2() {
500 void R600PassConfig::addPreEmitPass() {
509 return new R600PassConfig(
this, PM);
524 bool GCNPassConfig::addPreISel() {
525 AMDGPUPassConfig::addPreISel();
539 void GCNPassConfig::addMachineSSAOptimization() {
554 void GCNPassConfig::addIRPasses() {
558 AMDGPUPassConfig::addIRPasses();
561 bool GCNPassConfig::addInstSelector() {
562 AMDGPUPassConfig::addInstSelector();
568 #ifdef LLVM_BUILD_GLOBAL_ISEL
569 bool GCNPassConfig::addIRTranslator() {
574 bool GCNPassConfig::addLegalizeMachineIR() {
578 bool GCNPassConfig::addRegBankSelect() {
582 bool GCNPassConfig::addGlobalInstructionSelect() {
587 void GCNPassConfig::addPreRegAlloc() {
592 void GCNPassConfig::addFastRegAlloc(
FunctionPass *RegAllocPass) {
604 void GCNPassConfig::addOptimizedRegAlloc(
FunctionPass *RegAllocPass) {
617 void GCNPassConfig::addPostRegAlloc() {
622 void GCNPassConfig::addPreSched2() {
625 void GCNPassConfig::addPreEmitPass() {
644 return new GCNPassConfig(
this, PM);
FunctionPass * createSpeculativeExecutionPass()
OSType getOS() const
getOS - Get the parsed operating system type of this triple.
FunctionPass * createStraightLineStrengthReducePass()
FunctionPass * createGVNPass(bool NoLoads=false)
Create a legacy GVN pass.
The goal of this helper class is to gather the accessor to all the APIs related to GlobalISel...
StringRef getTargetCPU() const
Target & getTheGCNTarget()
The target for GCN GPUs.
FunctionPass * createSIAnnotateControlFlowPass()
Create the annotation pass.
static LLVM_READNONE StringRef getGPUOrDefault(const Triple &TT, StringRef GPU)
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
const AMDGPUSubtarget * getSubtargetImpl() const
virtual void addIRPasses()
Add common target configurable passes that perform LLVM IR to IR transforms following machine indepen...
static cl::opt< bool > EnableLoadStoreVectorizer("amdgpu-load-store-vectorizer", cl::desc("Enable load store vectorizer"), cl::init(true), cl::Hidden)
void initializeAMDGPUAnnotateKernelFeaturesPass(PassRegistry &)
Triple TargetTriple
Triple string, CPU name, and target feature strings the TargetMachine instance is created with...
char & SILoadStoreOptimizerID
Target & getTheAMDGPUTarget()
The target which suports all AMD GPUs.
This file describes how to lower LLVM calls to machine code calls.
char & FuncletLayoutID
This pass lays out funclets contiguously.
Analysis pass providing the TargetTransformInfo.
void initializeAMDGPUAnnotateUniformValuesPass(PassRegistry &)
FunctionPass * createAMDGPUPromoteAlloca(const TargetMachine *TM=nullptr)
char & BranchRelaxationPassID
BranchRelaxation - This pass replaces branches that need to jump further than is supported by a branc...
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
R600 Machine Scheduler interface.
char & MachineSchedulerID
MachineScheduler - This pass schedules machine instructions.
FunctionPass * createAMDGPUCFGStructurizerPass()
MachineSchedRegistry provides a selection of available machine instruction schedulers.
virtual void addMachineSSAOptimization()
addMachineSSAOptimization - Add standard passes that optimize machine instructions in SSA form...
FunctionPass * createAMDGPUISelDag(TargetMachine &TM, CodeGenOpt::Level OptLevel)
This pass converts a legalized DAG into a AMDGPU-specific.
static std::unique_ptr< TargetLoweringObjectFile > createTLOF(const Triple &TT)
void initializeAMDGPUPromoteAllocaPass(PassRegistry &)
bool hasAttribute(AttrKind Val) const
Return true if the attribute is present.
ScheduleDAGMILive is an implementation of ScheduleDAGInstrs that schedules machine instructions while...
bool enableSIScheduler() const
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
FunctionPass * createR600ExpandSpecialInstrsPass(TargetMachine &tm)
Pass * createLoadStoreVectorizerPass()
This file declares the AMDGPU-specific subclass of TargetLoweringObjectFile.
GCNTargetMachine(const Target &T, const Triple &TT, StringRef CPU, StringRef FS, TargetOptions Options, Optional< Reloc::Model > RM, CodeModel::Model CM, CodeGenOpt::Level OL)
void initializeAMDGPUCodeGenPreparePass(PassRegistry &)
This file contains the simple types necessary to represent the attributes associated with functions a...
No attributes have been set.
FunctionPass * createSinkingPass()
R600TargetMachine(const Target &T, const Triple &TT, StringRef CPU, StringRef FS, TargetOptions Options, Optional< Reloc::Model > RM, CodeModel::Model CM, CodeGenOpt::Level OL)
char & SIFixControlFlowLiveIntervalsID
char & FinalizeMachineBundlesID
FinalizeMachineBundles - This pass finalize machine instruction bundles (created earlier, e.g.
FunctionPass * createR600VectorRegMerger(TargetMachine &tm)
static MachineSchedRegistry R600SchedRegistry("r600","Run R600's custom scheduler", createR600MachineScheduler)
Target-Independent Code Generator Pass Configuration Options.
static StringRef computeDataLayout(const Triple &TT)
FunctionPass * createSITypeRewriter()
FunctionPass * createR600ClauseMergePass(TargetMachine &tm)
FunctionPass * createSILowerI1CopiesPass()
void initializeSIOptimizeExecMaskingPass(PassRegistry &)
char & DeadMachineInstructionElimID
DeadMachineInstructionElim - This pass removes dead machine instructions.
ArchType getArch() const
getArch - Get the parsed architecture type of this triple.
ScheduleDAGMILive * createGenericSchedLive(MachineSchedContext *C)
Create the standard converging machine scheduler.
CodeGenOpt::Level getOptLevel() const
Returns the optimization level: None, Less, Default, or Aggressive.
void initializeSIFixSGPRCopiesPass(PassRegistry &)
FunctionPass * createR600ControlFlowFinalizer(TargetMachine &tm)
SI Machine Scheduler interface.
void append(in_iter S, in_iter E)
Append from an iterator pair.
char & PHIEliminationID
PHIElimination - This pass eliminates machine instruction PHI nodes by inserting copy instructions...
initializer< Ty > init(const Ty &Val)
FunctionPass * createSeparateConstOffsetFromGEPPass(const TargetMachine *TM=nullptr, bool LowerGEP=false)
FunctionPass * createSIDebuggerInsertNopsPass()
static Reloc::Model getEffectiveRelocModel(Optional< Reloc::Model > RM)
FunctionPass * createFlattenCFGPass()
FunctionPass * createSIWholeQuadModePass()
This file provides the interface for LLVM's Global Value Numbering pass which eliminates fully redund...
char & SIInsertSkipsPassID
virtual void addOptimizedRegAlloc(FunctionPass *RegAllocPass)
addOptimizedRegAlloc - Add passes related to register allocation.
void LLVMInitializeAMDGPUTarget()
static MachineSchedRegistry GCNMaxOccupancySchedRegistry("gcn-max-occupancy","Run GCN scheduler to maximize occupancy", createGCNMaxOccupancyMachineScheduler)
char & AMDGPUAnnotateKernelFeaturesID
static cl::opt< bool > ScalarizeGlobal("amdgpu-scalarize-global-loads", cl::desc("Enable global load scalarization"), cl::init(false), cl::Hidden)
static cl::opt< bool > EnableSROA("amdgpu-sroa", cl::desc("Run SROA after promote alloca pass"), cl::ReallyHidden, cl::init(true))
FunctionPass * createR600Packetizer(TargetMachine &tm)
TargetPassConfig * createPassConfig(PassManagerBase &PM) override
Create a pass configuration object to be used by addPassToEmitX methods for generating a pipeline of ...
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang","erlang-compatible garbage collector")
static MachineSchedRegistry SISchedRegistry("si","Run SI's custom scheduler", createSIMachineScheduler)
This class describes a target machine that is implemented with the LLVM target-independent code gener...
FunctionPass * createAMDGPUUnifyMetadataPass()
ModulePass * createBarrierNoopPass()
createBarrierNoopPass - This pass is purely a module pass barrier in a pass manager.
FunctionPass class - This class is used to implement most global optimizations.
Pass * createAlwaysInlinerLegacyPass(bool InsertLifetime=true)
Create a legacy pass manager instance of a pass to inline and remove functions marked as "always_inli...
AMDGPUTargetMachine(const Target &T, const Triple &TT, StringRef CPU, StringRef FS, TargetOptions Options, Optional< Reloc::Model > RM, CodeModel::Model CM, CodeGenOpt::Level OL)
void initializeSILowerControlFlowPass(PassRegistry &)
virtual void addCodeGenPrepare()
Add pass to prepare the LLVM IR for code generation.
The AMDGPU TargetMachine interface definition for hw codgen targets.
static cl::opt< bool > EnableR600IfConvert("r600-if-convert", cl::desc("Use if conversion pass"), cl::ReallyHidden, cl::init(true))
std::unique_ptr< ScheduleDAGMutation > createStoreClusterDAGMutation(const TargetInstrInfo *TII, const TargetRegisterInfo *TRI)
static ScheduleDAGInstrs * createR600MachineScheduler(MachineSchedContext *C)
void initializeSIShrinkInstructionsPass(PassRegistry &)
void initializeSIInsertSkipsPass(PassRegistry &)
Triple - Helper class for working with autoconf configuration names.
FunctionPass * createAMDGPUAnnotateUniformValues()
Provides passes to inlining "always_inline" functions.
char & SIOptimizeExecMaskingID
StringRef getFeatureString(const Function &F) const
std::unique_ptr< ScheduleDAGMutation > createLoadClusterDAGMutation(const TargetInstrInfo *TII, const TargetRegisterInfo *TRI)
StringRef getTargetFeatureString() const
ModulePass * createAMDGPUAlwaysInlinePass()
void addEarlyAsPossiblePasses(PassManagerBase &PM) override
Add target-specific function passes that should be run as early as possible in the optimization pipel...
char & StackMapLivenessID
StackMapLiveness - This pass analyses the register live-out set of stackmap/patchpoint intrinsics and...
FunctionPass * createAMDGPUCodeGenPreparePass(const GCNTargetMachine *TM=nullptr)
void initializeSIWholeQuadModePass(PassRegistry &)
void setRequiresStructuredCFG(bool Value)
static GCRegistry::Add< ShadowStackGC > C("shadow-stack","Very portable GC for uncooperative code generators")
void initializeAMDGPUUnifyMetadataPass(PassRegistry &)
Target - Wrapper for Target specific information.
virtual void addPostRegAlloc()
This method may be implemented by targets that want to run passes after register allocation pass pipe...
LLVM_NODISCARD LLVM_ATTRIBUTE_ALWAYS_INLINE bool empty() const
empty - Check if the string is empty.
virtual void addFastRegAlloc(FunctionPass *RegAllocPass)
addFastRegAlloc - Add the minimum set of target-independent passes that are required for fast registe...
void initializeSILoadStoreOptimizerPass(PassRegistry &)
char & SILowerControlFlowID
void initializeSIAnnotateControlFlowPass(PassRegistry &)
ScheduleDAGInstrs - A ScheduleDAG subclass for scheduling lists of MachineInstrs. ...
char & PatchableFunctionID
This pass implements the "patchable-function" attribute.
void initializeSIFoldOperandsPass(PassRegistry &)
const TargetRegisterInfo * TRI
FunctionPass * createSIShrinkInstructionsPass()
void initializeSIDebuggerInsertNopsPass(PassRegistry &)
char & IfConverterID
IfConverter - This pass performs machine code if conversion.
MachineSchedContext provides enough context from the MachineScheduler pass for the target to instanti...
void initializeSIInsertWaitsPass(PassRegistry &)
void resetTargetOptions(const Function &F) const
Reset the target options based on the function's attributes.
FunctionPass * createSROAPass()
StringRef getGPUName(const Function &F) const
const TargetInstrInfo * TII
TargetIRAnalysis getTargetIRAnalysis() override
Get a TargetIRAnalysis implementation for the target.
FunctionPass * createSIInsertWaitsPass()
FunctionPass * createR600EmitClauseMarkers()
StringRef getValueAsString() const
Return the attribute's value as a string.
This file declares the IRTranslator pass.
RegisterTargetMachine - Helper template for registering a target machine implementation, for use in the target machine initialization function.
void initializeSIFixControlFlowLiveIntervalsPass(PassRegistry &)
char & PostRAHazardRecognizerID
createPostRAHazardRecognizer - This pass runs the post-ra hazard recognizer.
Primary interface to the complete machine description for the target machine.
FunctionPass * createEarlyCSEPass(bool UseMemorySSA=false)
~AMDGPUTargetMachine() override
StringRef - Represent a constant reference to a string, i.e.
PassRegistry - This class manages the registration and intitialization of the pass subsystem as appli...
void initializeSILowerI1CopiesPass(PassRegistry &)
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml","ocaml 3.10-compatible collector")
ModulePass * createAMDGPUOpenCLImageTypeLoweringPass()
static cl::opt< bool > EnableR600StructurizeCFG("r600-ir-structurize", cl::desc("Use StructurizeCFG IR pass"), cl::init(true))
Pass * createStructurizeCFGPass(bool SkipUniformRegions=false)
When SkipUniformRegions is true the structizer will not structurize regions that only contain uniform...
TargetPassConfig * createPassConfig(PassManagerBase &PM) override
Create a pass configuration object to be used by addPassToEmitX methods for generating a pipeline of ...
static ScheduleDAGInstrs * createSIMachineScheduler(MachineSchedContext *C)
static ScheduleDAGInstrs * createGCNMaxOccupancyMachineScheduler(MachineSchedContext *C)
void addMutation(std::unique_ptr< ScheduleDAGMutation > Mutation)
Add a postprocessing step to the DAG builder.
FunctionPass * createNaryReassociatePass()