LLVM  4.0.0
AMDGPUTargetMachine.cpp
Go to the documentation of this file.
1 //===-- AMDGPUTargetMachine.cpp - TargetMachine for hw codegen targets-----===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 /// \file
11 /// \brief The AMDGPU target machine contains all of the hardware specific
12 /// information needed to emit code for R600 and SI GPUs.
13 //
14 //===----------------------------------------------------------------------===//
15 
16 #include "AMDGPUTargetMachine.h"
17 #include "AMDGPU.h"
18 #include "AMDGPUCallLowering.h"
19 #include "AMDGPUTargetObjectFile.h"
21 #include "GCNSchedStrategy.h"
22 #include "R600MachineScheduler.h"
23 #include "SIMachineScheduler.h"
24 #include "llvm/ADT/SmallString.h"
25 #include "llvm/ADT/STLExtras.h"
26 #include "llvm/ADT/StringRef.h"
27 #include "llvm/ADT/Triple.h"
31 #include "llvm/CodeGen/Passes.h"
34 #include "llvm/Transforms/IPO.h"
36 #include "llvm/Transforms/Scalar.h"
39 #include "llvm/IR/Attributes.h"
40 #include "llvm/IR/Function.h"
42 #include "llvm/Pass.h"
44 #include "llvm/Support/Compiler.h"
46 #include <memory>
47 
48 using namespace llvm;
49 
51  "r600-ir-structurize",
52  cl::desc("Use StructurizeCFG IR pass"),
53  cl::init(true));
54 
56  "amdgpu-sroa",
57  cl::desc("Run SROA after promote alloca pass"),
59  cl::init(true));
60 
62  "r600-if-convert",
63  cl::desc("Use if conversion pass"),
65  cl::init(true));
66 
67 // Option to disable vectorizer for tests.
69  "amdgpu-load-store-vectorizer",
70  cl::desc("Enable load store vectorizer"),
71  cl::init(true),
72  cl::Hidden);
73 
74 // Option to to control global loads scalarization
76  "amdgpu-scalarize-global-loads",
77  cl::desc("Enable global load scalarization"),
78  cl::init(false),
79  cl::Hidden);
80 
81 extern "C" void LLVMInitializeAMDGPUTarget() {
82  // Register the target
85 
105 }
106 
107 static std::unique_ptr<TargetLoweringObjectFile> createTLOF(const Triple &TT) {
108  return llvm::make_unique<AMDGPUTargetObjectFile>();
109 }
110 
112  return new ScheduleDAGMILive(C, llvm::make_unique<R600SchedStrategy>());
113 }
114 
116  return new SIScheduleDAGMI(C);
117 }
118 
119 static ScheduleDAGInstrs *
121  ScheduleDAGMILive *DAG =
122  new ScheduleDAGMILive(C,
123  llvm::make_unique<GCNMaxOccupancySchedStrategy>(C));
126  return DAG;
127 }
128 
130 R600SchedRegistry("r600", "Run R600's custom scheduler",
132 
134 SISchedRegistry("si", "Run SI's custom scheduler",
136 
138 GCNMaxOccupancySchedRegistry("gcn-max-occupancy",
139  "Run GCN scheduler to maximize occupancy",
141 
142 static StringRef computeDataLayout(const Triple &TT) {
143  if (TT.getArch() == Triple::r600) {
144  // 32-bit pointers.
145  return "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
146  "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64";
147  }
148 
149  // 32-bit private, local, and region pointers. 64-bit global, constant and
150  // flat.
151  return "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32"
152  "-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
153  "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64";
154 }
155 
157 static StringRef getGPUOrDefault(const Triple &TT, StringRef GPU) {
158  if (!GPU.empty())
159  return GPU;
160 
161  // HSA only supports CI+, so change the default GPU to a CI for HSA.
162  if (TT.getArch() == Triple::amdgcn)
163  return (TT.getOS() == Triple::AMDHSA) ? "kaveri" : "tahiti";
164 
165  return "r600";
166 }
167 
169  // The AMDGPU toolchain only supports generating shared objects, so we
170  // must always use PIC.
171  return Reloc::PIC_;
172 }
173 
175  StringRef CPU, StringRef FS,
176  TargetOptions Options,
178  CodeModel::Model CM,
179  CodeGenOpt::Level OptLevel)
180  : LLVMTargetMachine(T, computeDataLayout(TT), TT, getGPUOrDefault(TT, CPU),
181  FS, Options, getEffectiveRelocModel(RM), CM, OptLevel),
182  TLOF(createTLOF(getTargetTriple())) {
183  initAsmInfo();
184 }
185 
187 
189  Attribute GPUAttr = F.getFnAttribute("target-cpu");
190  return GPUAttr.hasAttribute(Attribute::None) ?
191  getTargetCPU() : GPUAttr.getValueAsString();
192 }
193 
195  Attribute FSAttr = F.getFnAttribute("target-features");
196 
197  return FSAttr.hasAttribute(Attribute::None) ?
199  FSAttr.getValueAsString();
200 }
201 
204 }
205 
206 //===----------------------------------------------------------------------===//
207 // R600 Target Machine (R600 -> Cayman)
208 //===----------------------------------------------------------------------===//
209 
211  StringRef CPU, StringRef FS,
212  TargetOptions Options,
215  : AMDGPUTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL) {
217 }
218 
220  const Function &F) const {
221  StringRef GPU = getGPUName(F);
222  StringRef FS = getFeatureString(F);
223 
224  SmallString<128> SubtargetKey(GPU);
225  SubtargetKey.append(FS);
226 
227  auto &I = SubtargetMap[SubtargetKey];
228  if (!I) {
229  // This needs to be done before we create a new subtarget since any
230  // creation will depend on the TM and the code generation flags on the
231  // function that reside in TargetOptions.
233  I = llvm::make_unique<R600Subtarget>(TargetTriple, GPU, FS, *this);
234  }
235 
236  return I.get();
237 }
238 
239 //===----------------------------------------------------------------------===//
240 // GCN Target Machine (SI+)
241 //===----------------------------------------------------------------------===//
242 
243 #ifdef LLVM_BUILD_GLOBAL_ISEL
244 namespace {
245 
246 struct SIGISelActualAccessor : public GISelAccessor {
247  std::unique_ptr<AMDGPUCallLowering> CallLoweringInfo;
248  const AMDGPUCallLowering *getCallLowering() const override {
249  return CallLoweringInfo.get();
250  }
251 };
252 
253 } // end anonymous namespace
254 #endif
255 
257  StringRef CPU, StringRef FS,
258  TargetOptions Options,
261  : AMDGPUTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL) {}
262 
264  StringRef GPU = getGPUName(F);
265  StringRef FS = getFeatureString(F);
266 
267  SmallString<128> SubtargetKey(GPU);
268  SubtargetKey.append(FS);
269 
270  auto &I = SubtargetMap[SubtargetKey];
271  if (!I) {
272  // This needs to be done before we create a new subtarget since any
273  // creation will depend on the TM and the code generation flags on the
274  // function that reside in TargetOptions.
276  I = llvm::make_unique<SISubtarget>(TargetTriple, GPU, FS, *this);
277 
278 #ifndef LLVM_BUILD_GLOBAL_ISEL
279  GISelAccessor *GISel = new GISelAccessor();
280 #else
281  SIGISelActualAccessor *GISel = new SIGISelActualAccessor();
282  GISel->CallLoweringInfo.reset(
283  new AMDGPUCallLowering(*I->getTargetLowering()));
284 #endif
285 
286  I->setGISelAccessor(*GISel);
287  }
288 
289  I->setScalarizeGlobalBehavior(ScalarizeGlobal);
290 
291  return I.get();
292 }
293 
294 //===----------------------------------------------------------------------===//
295 // AMDGPU Pass Setup
296 //===----------------------------------------------------------------------===//
297 
298 namespace {
299 
300 class AMDGPUPassConfig : public TargetPassConfig {
301 public:
302  AMDGPUPassConfig(TargetMachine *TM, PassManagerBase &PM)
303  : TargetPassConfig(TM, PM) {
304  // Exceptions and StackMaps are not supported, so these passes will never do
305  // anything.
306  disablePass(&StackMapLivenessID);
307  disablePass(&FuncletLayoutID);
308  }
309 
310  AMDGPUTargetMachine &getAMDGPUTargetMachine() const {
311  return getTM<AMDGPUTargetMachine>();
312  }
313 
315  createMachineScheduler(MachineSchedContext *C) const override {
319  return DAG;
320  }
321 
322  void addEarlyCSEOrGVNPass();
323  void addStraightLineScalarOptimizationPasses();
324  void addIRPasses() override;
325  void addCodeGenPrepare() override;
326  bool addPreISel() override;
327  bool addInstSelector() override;
328  bool addGCPasses() override;
329 };
330 
331 class R600PassConfig final : public AMDGPUPassConfig {
332 public:
333  R600PassConfig(TargetMachine *TM, PassManagerBase &PM)
334  : AMDGPUPassConfig(TM, PM) {}
335 
336  ScheduleDAGInstrs *createMachineScheduler(
337  MachineSchedContext *C) const override {
338  return createR600MachineScheduler(C);
339  }
340 
341  bool addPreISel() override;
342  void addPreRegAlloc() override;
343  void addPreSched2() override;
344  void addPreEmitPass() override;
345 };
346 
347 class GCNPassConfig final : public AMDGPUPassConfig {
348 public:
349  GCNPassConfig(TargetMachine *TM, PassManagerBase &PM)
350  : AMDGPUPassConfig(TM, PM) {}
351 
352  GCNTargetMachine &getGCNTargetMachine() const {
353  return getTM<GCNTargetMachine>();
354  }
355 
357  createMachineScheduler(MachineSchedContext *C) const override;
358 
359  void addIRPasses() override;
360  bool addPreISel() override;
361  void addMachineSSAOptimization() override;
362  bool addInstSelector() override;
363 #ifdef LLVM_BUILD_GLOBAL_ISEL
364  bool addIRTranslator() override;
365  bool addLegalizeMachineIR() override;
366  bool addRegBankSelect() override;
367  bool addGlobalInstructionSelect() override;
368 #endif
369  void addFastRegAlloc(FunctionPass *RegAllocPass) override;
370  void addOptimizedRegAlloc(FunctionPass *RegAllocPass) override;
371  void addPreRegAlloc() override;
372  void addPostRegAlloc() override;
373  void addPreSched2() override;
374  void addPreEmitPass() override;
375 };
376 
377 } // end anonymous namespace
378 
380  return TargetIRAnalysis([this](const Function &F) {
381  return TargetTransformInfo(AMDGPUTTIImpl(this, F));
382  });
383 }
384 
385 void AMDGPUPassConfig::addEarlyCSEOrGVNPass() {
386  if (getOptLevel() == CodeGenOpt::Aggressive)
387  addPass(createGVNPass());
388  else
389  addPass(createEarlyCSEPass());
390 }
391 
392 void AMDGPUPassConfig::addStraightLineScalarOptimizationPasses() {
395  // ReassociateGEPs exposes more opportunites for SLSR. See
396  // the example in reassociate-geps-and-slsr.ll.
398  // SeparateConstOffsetFromGEP and SLSR creates common expressions which GVN or
399  // EarlyCSE can reuse.
400  addEarlyCSEOrGVNPass();
401  // Run NaryReassociate after EarlyCSE/GVN to be more effective.
402  addPass(createNaryReassociatePass());
403  // NaryReassociate on GEPs creates redundant common expressions, so run
404  // EarlyCSE after it.
405  addPass(createEarlyCSEPass());
406 }
407 
408 void AMDGPUPassConfig::addIRPasses() {
409  // There is no reason to run these.
410  disablePass(&StackMapLivenessID);
411  disablePass(&FuncletLayoutID);
412  disablePass(&PatchableFunctionID);
413 
414  // Function calls are not supported, so make sure we inline everything.
415  addPass(createAMDGPUAlwaysInlinePass());
417  // We need to add the barrier noop pass, otherwise adding the function
418  // inlining pass will cause all of the PassConfigs passes to be run
419  // one function at a time, which means if we have a nodule with two
420  // functions, then we will generate code for the first function
421  // without ever running any passes on the second.
422  addPass(createBarrierNoopPass());
423 
424  // Handle uses of OpenCL image2d_t, image3d_t and sampler_t arguments.
426 
427  const AMDGPUTargetMachine &TM = getAMDGPUTargetMachine();
428  if (TM.getOptLevel() > CodeGenOpt::None) {
429  addPass(createAMDGPUPromoteAlloca(&TM));
430 
431  if (EnableSROA)
432  addPass(createSROAPass());
433 
434  addStraightLineScalarOptimizationPasses();
435  }
436 
438 
439  // EarlyCSE is not always strong enough to clean up what LSR produces. For
440  // example, GVN can combine
441  //
442  // %0 = add %a, %b
443  // %1 = add %b, %a
444  //
445  // and
446  //
447  // %0 = shl nsw %a, 2
448  // %1 = shl %a, 2
449  //
450  // but EarlyCSE can do neither of them.
451  if (getOptLevel() != CodeGenOpt::None)
452  addEarlyCSEOrGVNPass();
453 }
454 
455 void AMDGPUPassConfig::addCodeGenPrepare() {
457 
460 }
461 
462 bool AMDGPUPassConfig::addPreISel() {
463  addPass(createFlattenCFGPass());
464  return false;
465 }
466 
467 bool AMDGPUPassConfig::addInstSelector() {
468  addPass(createAMDGPUISelDag(getAMDGPUTargetMachine(), getOptLevel()));
469  return false;
470 }
471 
472 bool AMDGPUPassConfig::addGCPasses() {
473  // Do nothing. GC is not supported.
474  return false;
475 }
476 
477 //===----------------------------------------------------------------------===//
478 // R600 Pass Setup
479 //===----------------------------------------------------------------------===//
480 
481 bool R600PassConfig::addPreISel() {
482  AMDGPUPassConfig::addPreISel();
483 
485  addPass(createStructurizeCFGPass());
486  return false;
487 }
488 
489 void R600PassConfig::addPreRegAlloc() {
490  addPass(createR600VectorRegMerger(*TM));
491 }
492 
493 void R600PassConfig::addPreSched2() {
494  addPass(createR600EmitClauseMarkers(), false);
496  addPass(&IfConverterID, false);
497  addPass(createR600ClauseMergePass(*TM), false);
498 }
499 
500 void R600PassConfig::addPreEmitPass() {
501  addPass(createAMDGPUCFGStructurizerPass(), false);
502  addPass(createR600ExpandSpecialInstrsPass(*TM), false);
503  addPass(&FinalizeMachineBundlesID, false);
504  addPass(createR600Packetizer(*TM), false);
505  addPass(createR600ControlFlowFinalizer(*TM), false);
506 }
507 
509  return new R600PassConfig(this, PM);
510 }
511 
512 //===----------------------------------------------------------------------===//
513 // GCN Pass Setup
514 //===----------------------------------------------------------------------===//
515 
516 ScheduleDAGInstrs *GCNPassConfig::createMachineScheduler(
517  MachineSchedContext *C) const {
518  const SISubtarget &ST = C->MF->getSubtarget<SISubtarget>();
519  if (ST.enableSIScheduler())
520  return createSIMachineScheduler(C);
522 }
523 
524 bool GCNPassConfig::addPreISel() {
525  AMDGPUPassConfig::addPreISel();
526 
527  // FIXME: We need to run a pass to propagate the attributes when calls are
528  // supported.
530  addPass(createStructurizeCFGPass(true)); // true -> SkipUniformRegions
531  addPass(createSinkingPass());
532  addPass(createSITypeRewriter());
535 
536  return false;
537 }
538 
539 void GCNPassConfig::addMachineSSAOptimization() {
541 
542  // We want to fold operands after PeepholeOptimizer has run (or as part of
543  // it), because it will eliminate extra copies making it easier to fold the
544  // real source operand. We want to eliminate dead instructions after, so that
545  // we see fewer uses of the copies. We then need to clean up the dead
546  // instructions leftover after the operands are folded as well.
547  //
548  // XXX - Can we get away without running DeadMachineInstructionElim again?
549  addPass(&SIFoldOperandsID);
551  addPass(&SILoadStoreOptimizerID);
552 }
553 
554 void GCNPassConfig::addIRPasses() {
555  // TODO: May want to move later or split into an early and late one.
556  addPass(createAMDGPUCodeGenPreparePass(&getGCNTargetMachine()));
557 
558  AMDGPUPassConfig::addIRPasses();
559 }
560 
561 bool GCNPassConfig::addInstSelector() {
562  AMDGPUPassConfig::addInstSelector();
563  addPass(createSILowerI1CopiesPass());
564  addPass(&SIFixSGPRCopiesID);
565  return false;
566 }
567 
568 #ifdef LLVM_BUILD_GLOBAL_ISEL
569 bool GCNPassConfig::addIRTranslator() {
570  addPass(new IRTranslator());
571  return false;
572 }
573 
574 bool GCNPassConfig::addLegalizeMachineIR() {
575  return false;
576 }
577 
578 bool GCNPassConfig::addRegBankSelect() {
579  return false;
580 }
581 
582 bool GCNPassConfig::addGlobalInstructionSelect() {
583  return false;
584 }
585 #endif
586 
587 void GCNPassConfig::addPreRegAlloc() {
589  addPass(createSIWholeQuadModePass());
590 }
591 
592 void GCNPassConfig::addFastRegAlloc(FunctionPass *RegAllocPass) {
593  // FIXME: We have to disable the verifier here because of PHIElimination +
594  // TwoAddressInstructions disabling it.
595 
596  // This must be run immediately after phi elimination and before
597  // TwoAddressInstructions, otherwise the processing of the tied operand of
598  // SI_ELSE will introduce a copy of the tied operand source after the else.
599  insertPass(&PHIEliminationID, &SILowerControlFlowID, false);
600 
601  TargetPassConfig::addFastRegAlloc(RegAllocPass);
602 }
603 
604 void GCNPassConfig::addOptimizedRegAlloc(FunctionPass *RegAllocPass) {
605  // This needs to be run directly before register allocation because earlier
606  // passes might recompute live intervals.
608 
609  // This must be run immediately after phi elimination and before
610  // TwoAddressInstructions, otherwise the processing of the tied operand of
611  // SI_ELSE will introduce a copy of the tied operand source after the else.
612  insertPass(&PHIEliminationID, &SILowerControlFlowID, false);
613 
615 }
616 
617 void GCNPassConfig::addPostRegAlloc() {
618  addPass(&SIOptimizeExecMaskingID);
620 }
621 
622 void GCNPassConfig::addPreSched2() {
623 }
624 
625 void GCNPassConfig::addPreEmitPass() {
626  // The hazard recognizer that runs as part of the post-ra scheduler does not
627  // guarantee to be able handle all hazards correctly. This is because if there
628  // are multiple scheduling regions in a basic block, the regions are scheduled
629  // bottom up, so when we begin to schedule a region we don't know what
630  // instructions were emitted directly before it.
631  //
632  // Here we add a stand-alone hazard recognizer pass which can handle all
633  // cases.
634  addPass(&PostRAHazardRecognizerID);
635 
636  addPass(createSIInsertWaitsPass());
638  addPass(&SIInsertSkipsPassID);
640  addPass(&BranchRelaxationPassID);
641 }
642 
644  return new GCNPassConfig(this, PM);
645 }
FunctionPass * createSpeculativeExecutionPass()
OSType getOS() const
getOS - Get the parsed operating system type of this triple.
Definition: Triple.h:279
FunctionPass * createStraightLineStrengthReducePass()
FunctionPass * createGVNPass(bool NoLoads=false)
Create a legacy GVN pass.
Definition: GVN.cpp:2797
The goal of this helper class is to gather the accessor to all the APIs related to GlobalISel...
Definition: GISelAccessor.h:29
StringRef getTargetCPU() const
Target & getTheGCNTarget()
The target for GCN GPUs.
This file a TargetTransformInfo::Concept conforming object specific to the AMDGPU target machine...
FunctionPass * createSIAnnotateControlFlowPass()
Create the annotation pass.
static LLVM_READNONE StringRef getGPUOrDefault(const Triple &TT, StringRef GPU)
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
const AMDGPUSubtarget * getSubtargetImpl() const
virtual void addIRPasses()
Add common target configurable passes that perform LLVM IR to IR transforms following machine indepen...
static cl::opt< bool > EnableLoadStoreVectorizer("amdgpu-load-store-vectorizer", cl::desc("Enable load store vectorizer"), cl::init(true), cl::Hidden)
void initializeAMDGPUAnnotateKernelFeaturesPass(PassRegistry &)
Triple TargetTriple
Triple string, CPU name, and target feature strings the TargetMachine instance is created with...
char & SILoadStoreOptimizerID
Target & getTheAMDGPUTarget()
The target which suports all AMD GPUs.
This file describes how to lower LLVM calls to machine code calls.
char & FuncletLayoutID
This pass lays out funclets contiguously.
Analysis pass providing the TargetTransformInfo.
void initializeAMDGPUAnnotateUniformValuesPass(PassRegistry &)
FunctionPass * createAMDGPUPromoteAlloca(const TargetMachine *TM=nullptr)
char & BranchRelaxationPassID
BranchRelaxation - This pass replaces branches that need to jump further than is supported by a branc...
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition: Function.h:234
R600 Machine Scheduler interface.
char & MachineSchedulerID
MachineScheduler - This pass schedules machine instructions.
FunctionPass * createAMDGPUCFGStructurizerPass()
MachineSchedRegistry provides a selection of available machine instruction schedulers.
virtual void addMachineSSAOptimization()
addMachineSSAOptimization - Add standard passes that optimize machine instructions in SSA form...
FunctionPass * createAMDGPUISelDag(TargetMachine &TM, CodeGenOpt::Level OptLevel)
This pass converts a legalized DAG into a AMDGPU-specific.
static std::unique_ptr< TargetLoweringObjectFile > createTLOF(const Triple &TT)
void initializeAMDGPUPromoteAllocaPass(PassRegistry &)
bool hasAttribute(AttrKind Val) const
Return true if the attribute is present.
Definition: Attributes.cpp:185
ScheduleDAGMILive is an implementation of ScheduleDAGInstrs that schedules machine instructions while...
bool enableSIScheduler() const
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
FunctionPass * createR600ExpandSpecialInstrsPass(TargetMachine &tm)
Pass * createLoadStoreVectorizerPass()
This file declares the AMDGPU-specific subclass of TargetLoweringObjectFile.
GCNTargetMachine(const Target &T, const Triple &TT, StringRef CPU, StringRef FS, TargetOptions Options, Optional< Reloc::Model > RM, CodeModel::Model CM, CodeGenOpt::Level OL)
void initializeAMDGPUCodeGenPreparePass(PassRegistry &)
This file contains the simple types necessary to represent the attributes associated with functions a...
No attributes have been set.
Definition: Attributes.h:69
FunctionPass * createSinkingPass()
Definition: Sink.cpp:306
R600TargetMachine(const Target &T, const Triple &TT, StringRef CPU, StringRef FS, TargetOptions Options, Optional< Reloc::Model > RM, CodeModel::Model CM, CodeGenOpt::Level OL)
char & SIFixControlFlowLiveIntervalsID
char & FinalizeMachineBundlesID
FinalizeMachineBundles - This pass finalize machine instruction bundles (created earlier, e.g.
FunctionPass * createR600VectorRegMerger(TargetMachine &tm)
static MachineSchedRegistry R600SchedRegistry("r600","Run R600's custom scheduler", createR600MachineScheduler)
Target-Independent Code Generator Pass Configuration Options.
static StringRef computeDataLayout(const Triple &TT)
#define F(x, y, z)
Definition: MD5.cpp:51
FunctionPass * createSITypeRewriter()
FunctionPass * createR600ClauseMergePass(TargetMachine &tm)
FunctionPass * createSILowerI1CopiesPass()
void initializeSIOptimizeExecMaskingPass(PassRegistry &)
char & DeadMachineInstructionElimID
DeadMachineInstructionElim - This pass removes dead machine instructions.
ArchType getArch() const
getArch - Get the parsed architecture type of this triple.
Definition: Triple.h:270
ScheduleDAGMILive * createGenericSchedLive(MachineSchedContext *C)
Create the standard converging machine scheduler.
CodeGenOpt::Level getOptLevel() const
Returns the optimization level: None, Less, Default, or Aggressive.
void initializeSIFixSGPRCopiesPass(PassRegistry &)
FunctionPass * createR600ControlFlowFinalizer(TargetMachine &tm)
SI Machine Scheduler interface.
void append(in_iter S, in_iter E)
Append from an iterator pair.
Definition: SmallString.h:75
char & PHIEliminationID
PHIElimination - This pass eliminates machine instruction PHI nodes by inserting copy instructions...
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:395
FunctionPass * createSeparateConstOffsetFromGEPPass(const TargetMachine *TM=nullptr, bool LowerGEP=false)
FunctionPass * createSIDebuggerInsertNopsPass()
static Reloc::Model getEffectiveRelocModel(Optional< Reloc::Model > RM)
FunctionPass * createFlattenCFGPass()
FunctionPass * createSIWholeQuadModePass()
This file provides the interface for LLVM's Global Value Numbering pass which eliminates fully redund...
char & SIInsertSkipsPassID
virtual void addOptimizedRegAlloc(FunctionPass *RegAllocPass)
addOptimizedRegAlloc - Add passes related to register allocation.
void LLVMInitializeAMDGPUTarget()
static MachineSchedRegistry GCNMaxOccupancySchedRegistry("gcn-max-occupancy","Run GCN scheduler to maximize occupancy", createGCNMaxOccupancyMachineScheduler)
char & AMDGPUAnnotateKernelFeaturesID
static cl::opt< bool > ScalarizeGlobal("amdgpu-scalarize-global-loads", cl::desc("Enable global load scalarization"), cl::init(false), cl::Hidden)
static cl::opt< bool > EnableSROA("amdgpu-sroa", cl::desc("Run SROA after promote alloca pass"), cl::ReallyHidden, cl::init(true))
FunctionPass * createR600Packetizer(TargetMachine &tm)
TargetPassConfig * createPassConfig(PassManagerBase &PM) override
Create a pass configuration object to be used by addPassToEmitX methods for generating a pipeline of ...
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang","erlang-compatible garbage collector")
static MachineSchedRegistry SISchedRegistry("si","Run SI's custom scheduler", createSIMachineScheduler)
This class describes a target machine that is implemented with the LLVM target-independent code gener...
FunctionPass * createAMDGPUUnifyMetadataPass()
ModulePass * createBarrierNoopPass()
createBarrierNoopPass - This pass is purely a module pass barrier in a pass manager.
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:298
Pass * createAlwaysInlinerLegacyPass(bool InsertLifetime=true)
Create a legacy pass manager instance of a pass to inline and remove functions marked as "always_inli...
AMDGPUTargetMachine(const Target &T, const Triple &TT, StringRef CPU, StringRef FS, TargetOptions Options, Optional< Reloc::Model > RM, CodeModel::Model CM, CodeGenOpt::Level OL)
void initializeSILowerControlFlowPass(PassRegistry &)
virtual void addCodeGenPrepare()
Add pass to prepare the LLVM IR for code generation.
The AMDGPU TargetMachine interface definition for hw codgen targets.
static cl::opt< bool > EnableR600IfConvert("r600-if-convert", cl::desc("Use if conversion pass"), cl::ReallyHidden, cl::init(true))
std::unique_ptr< ScheduleDAGMutation > createStoreClusterDAGMutation(const TargetInstrInfo *TII, const TargetRegisterInfo *TRI)
static ScheduleDAGInstrs * createR600MachineScheduler(MachineSchedContext *C)
void initializeSIShrinkInstructionsPass(PassRegistry &)
void initializeSIInsertSkipsPass(PassRegistry &)
Triple - Helper class for working with autoconf configuration names.
Definition: Triple.h:44
FunctionPass * createAMDGPUAnnotateUniformValues()
Provides passes to inlining "always_inline" functions.
char & SIOptimizeExecMaskingID
StringRef getFeatureString(const Function &F) const
std::unique_ptr< ScheduleDAGMutation > createLoadClusterDAGMutation(const TargetInstrInfo *TII, const TargetRegisterInfo *TRI)
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
StringRef getTargetFeatureString() const
ModulePass * createAMDGPUAlwaysInlinePass()
void addEarlyAsPossiblePasses(PassManagerBase &PM) override
Add target-specific function passes that should be run as early as possible in the optimization pipel...
char & StackMapLivenessID
StackMapLiveness - This pass analyses the register live-out set of stackmap/patchpoint intrinsics and...
FunctionPass * createAMDGPUCodeGenPreparePass(const GCNTargetMachine *TM=nullptr)
void initializeSIWholeQuadModePass(PassRegistry &)
void setRequiresStructuredCFG(bool Value)
static GCRegistry::Add< ShadowStackGC > C("shadow-stack","Very portable GC for uncooperative code generators")
void initializeAMDGPUUnifyMetadataPass(PassRegistry &)
Target - Wrapper for Target specific information.
virtual void addPostRegAlloc()
This method may be implemented by targets that want to run passes after register allocation pass pipe...
LLVM_NODISCARD LLVM_ATTRIBUTE_ALWAYS_INLINE bool empty() const
empty - Check if the string is empty.
Definition: StringRef.h:130
virtual void addFastRegAlloc(FunctionPass *RegAllocPass)
addFastRegAlloc - Add the minimum set of target-independent passes that are required for fast registe...
void initializeSILoadStoreOptimizerPass(PassRegistry &)
char & SILowerControlFlowID
void initializeSIAnnotateControlFlowPass(PassRegistry &)
ScheduleDAGInstrs - A ScheduleDAG subclass for scheduling lists of MachineInstrs. ...
char & PatchableFunctionID
This pass implements the "patchable-function" attribute.
void initializeSIFoldOperandsPass(PassRegistry &)
char & SIFoldOperandsID
const TargetRegisterInfo * TRI
Definition: ScheduleDAG.h:580
FunctionPass * createSIShrinkInstructionsPass()
void initializeSIDebuggerInsertNopsPass(PassRegistry &)
char & IfConverterID
IfConverter - This pass performs machine code if conversion.
#define LLVM_READNONE
Definition: Compiler.h:167
MachineSchedContext provides enough context from the MachineScheduler pass for the target to instanti...
char & SIFixSGPRCopiesID
void initializeSIInsertWaitsPass(PassRegistry &)
#define I(x, y, z)
Definition: MD5.cpp:54
void resetTargetOptions(const Function &F) const
Reset the target options based on the function's attributes.
FunctionPass * createSROAPass()
Definition: SROA.cpp:4283
StringRef getGPUName(const Function &F) const
const TargetInstrInfo * TII
Definition: ScheduleDAG.h:579
TargetIRAnalysis getTargetIRAnalysis() override
Get a TargetIRAnalysis implementation for the target.
FunctionPass * createSIInsertWaitsPass()
FunctionPass * createR600EmitClauseMarkers()
StringRef getValueAsString() const
Return the attribute's value as a string.
Definition: Attributes.cpp:178
This file declares the IRTranslator pass.
RegisterTargetMachine - Helper template for registering a target machine implementation, for use in the target machine initialization function.
void initializeSIFixControlFlowLiveIntervalsPass(PassRegistry &)
char & PostRAHazardRecognizerID
createPostRAHazardRecognizer - This pass runs the post-ra hazard recognizer.
Primary interface to the complete machine description for the target machine.
FunctionPass * createEarlyCSEPass(bool UseMemorySSA=false)
Definition: EarlyCSE.cpp:1050
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:47
PassRegistry - This class manages the registration and intitialization of the pass subsystem as appli...
Definition: PassRegistry.h:40
void initializeSILowerI1CopiesPass(PassRegistry &)
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml","ocaml 3.10-compatible collector")
ModulePass * createAMDGPUOpenCLImageTypeLoweringPass()
static cl::opt< bool > EnableR600StructurizeCFG("r600-ir-structurize", cl::desc("Use StructurizeCFG IR pass"), cl::init(true))
Pass * createStructurizeCFGPass(bool SkipUniformRegions=false)
When SkipUniformRegions is true the structizer will not structurize regions that only contain uniform...
TargetPassConfig * createPassConfig(PassManagerBase &PM) override
Create a pass configuration object to be used by addPassToEmitX methods for generating a pipeline of ...
static ScheduleDAGInstrs * createSIMachineScheduler(MachineSchedContext *C)
static ScheduleDAGInstrs * createGCNMaxOccupancyMachineScheduler(MachineSchedContext *C)
void addMutation(std::unique_ptr< ScheduleDAGMutation > Mutation)
Add a postprocessing step to the DAG builder.
FunctionPass * createNaryReassociatePass()