LLVM  9.0.0svn
AMDGPUTargetMachine.cpp
Go to the documentation of this file.
1 //===-- AMDGPUTargetMachine.cpp - TargetMachine for hw codegen targets-----===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// The AMDGPU target machine contains all of the hardware specific
11 /// information needed to emit code for R600 and SI GPUs.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "AMDGPUTargetMachine.h"
16 #include "AMDGPU.h"
17 #include "AMDGPUAliasAnalysis.h"
18 #include "AMDGPUCallLowering.h"
20 #include "AMDGPULegalizerInfo.h"
21 #include "AMDGPUMacroFusion.h"
22 #include "AMDGPUTargetObjectFile.h"
24 #include "GCNIterativeScheduler.h"
25 #include "GCNSchedStrategy.h"
26 #include "R600MachineScheduler.h"
27 #include "SIMachineScheduler.h"
32 #include "llvm/CodeGen/Passes.h"
34 #include "llvm/IR/Attributes.h"
35 #include "llvm/IR/Function.h"
37 #include "llvm/Pass.h"
39 #include "llvm/Support/Compiler.h"
42 #include "llvm/Transforms/IPO.h"
45 #include "llvm/Transforms/Scalar.h"
47 #include "llvm/Transforms/Utils.h"
49 #include <memory>
50 
51 using namespace llvm;
52 
54  "r600-ir-structurize",
55  cl::desc("Use StructurizeCFG IR pass"),
56  cl::init(true));
57 
59  "amdgpu-sroa",
60  cl::desc("Run SROA after promote alloca pass"),
62  cl::init(true));
63 
64 static cl::opt<bool>
65 EnableEarlyIfConversion("amdgpu-early-ifcvt", cl::Hidden,
66  cl::desc("Run early if-conversion"),
67  cl::init(false));
68 
70  "r600-if-convert",
71  cl::desc("Use if conversion pass"),
73  cl::init(true));
74 
75 // Option to disable vectorizer for tests.
77  "amdgpu-load-store-vectorizer",
78  cl::desc("Enable load store vectorizer"),
79  cl::init(true),
80  cl::Hidden);
81 
82 // Option to control global loads scalarization
84  "amdgpu-scalarize-global-loads",
85  cl::desc("Enable global load scalarization"),
86  cl::init(true),
87  cl::Hidden);
88 
89 // Option to run internalize pass.
91  "amdgpu-internalize-symbols",
92  cl::desc("Enable elimination of non-kernel functions and unused globals"),
93  cl::init(false),
94  cl::Hidden);
95 
96 // Option to inline all early.
98  "amdgpu-early-inline-all",
99  cl::desc("Inline all functions early"),
100  cl::init(false),
101  cl::Hidden);
102 
104  "amdgpu-sdwa-peephole",
105  cl::desc("Enable SDWA peepholer"),
106  cl::init(true));
107 
109  "amdgpu-dpp-combine",
110  cl::desc("Enable DPP combiner"),
111  cl::init(true));
112 
113 // Enable address space based alias analysis
114 static cl::opt<bool> EnableAMDGPUAliasAnalysis("enable-amdgpu-aa", cl::Hidden,
115  cl::desc("Enable AMDGPU Alias Analysis"),
116  cl::init(true));
117 
118 // Option to run late CFG structurizer
120  "amdgpu-late-structurize",
121  cl::desc("Enable late CFG structurization"),
123  cl::Hidden);
124 
126  "amdgpu-function-calls",
127  cl::desc("Enable AMDGPU function call support"),
129  cl::init(false),
130  cl::Hidden);
131 
132 // Enable lib calls simplifications
134  "amdgpu-simplify-libcall",
135  cl::desc("Enable amdgpu library simplifications"),
136  cl::init(true),
137  cl::Hidden);
138 
140  "amdgpu-ir-lower-kernel-arguments",
141  cl::desc("Lower kernel argument loads in IR pass"),
142  cl::init(true),
143  cl::Hidden);
144 
145 // Enable atomic optimization
147  "amdgpu-atomic-optimizations",
148  cl::desc("Enable atomic optimizations"),
149  cl::init(false),
150  cl::Hidden);
151 
152 // Enable Mode register optimization
154  "amdgpu-mode-register",
155  cl::desc("Enable mode register pass"),
156  cl::init(true),
157  cl::Hidden);
158 
159 extern "C" void LLVMInitializeAMDGPUTarget() {
160  // Register the target
163 
213 }
214 
215 static std::unique_ptr<TargetLoweringObjectFile> createTLOF(const Triple &TT) {
216  return llvm::make_unique<AMDGPUTargetObjectFile>();
217 }
218 
220  return new ScheduleDAGMILive(C, llvm::make_unique<R600SchedStrategy>());
221 }
222 
224  return new SIScheduleDAGMI(C);
225 }
226 
227 static ScheduleDAGInstrs *
229  ScheduleDAGMILive *DAG =
230  new GCNScheduleDAGMILive(C, make_unique<GCNMaxOccupancySchedStrategy>(C));
234  return DAG;
235 }
236 
237 static ScheduleDAGInstrs *
239  auto DAG = new GCNIterativeScheduler(C,
241  DAG->addMutation(createLoadClusterDAGMutation(DAG->TII, DAG->TRI));
242  DAG->addMutation(createStoreClusterDAGMutation(DAG->TII, DAG->TRI));
243  return DAG;
244 }
245 
247  return new GCNIterativeScheduler(C,
249 }
250 
251 static ScheduleDAGInstrs *
253  auto DAG = new GCNIterativeScheduler(C,
255  DAG->addMutation(createLoadClusterDAGMutation(DAG->TII, DAG->TRI));
256  DAG->addMutation(createStoreClusterDAGMutation(DAG->TII, DAG->TRI));
257  DAG->addMutation(createAMDGPUMacroFusionDAGMutation());
258  return DAG;
259 }
260 
262 R600SchedRegistry("r600", "Run R600's custom scheduler",
264 
266 SISchedRegistry("si", "Run SI's custom scheduler",
268 
270 GCNMaxOccupancySchedRegistry("gcn-max-occupancy",
271  "Run GCN scheduler to maximize occupancy",
273 
275 IterativeGCNMaxOccupancySchedRegistry("gcn-max-occupancy-experimental",
276  "Run GCN scheduler to maximize occupancy (experimental)",
278 
280 GCNMinRegSchedRegistry("gcn-minreg",
281  "Run GCN iterative scheduler for minimal register usage (experimental)",
283 
285 GCNILPSchedRegistry("gcn-ilp",
286  "Run GCN iterative scheduler for ILP scheduling (experimental)",
288 
289 static StringRef computeDataLayout(const Triple &TT) {
290  if (TT.getArch() == Triple::r600) {
291  // 32-bit pointers.
292  return "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
293  "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5";
294  }
295 
296  // 32-bit private, local, and region pointers. 64-bit global, constant and
297  // flat.
298  return "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32"
299  "-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
300  "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5";
301 }
302 
304 static StringRef getGPUOrDefault(const Triple &TT, StringRef GPU) {
305  if (!GPU.empty())
306  return GPU;
307 
308  if (TT.getArch() == Triple::amdgcn)
309  return "generic";
310 
311  return "r600";
312 }
313 
315  // The AMDGPU toolchain only supports generating shared objects, so we
316  // must always use PIC.
317  return Reloc::PIC_;
318 }
319 
321  StringRef CPU, StringRef FS,
322  TargetOptions Options,
325  CodeGenOpt::Level OptLevel)
326  : LLVMTargetMachine(T, computeDataLayout(TT), TT, getGPUOrDefault(TT, CPU),
327  FS, Options, getEffectiveRelocModel(RM),
328  getEffectiveCodeModel(CM, CodeModel::Small), OptLevel),
329  TLOF(createTLOF(getTargetTriple())) {
330  initAsmInfo();
331 }
332 
335 
337 
339  Attribute GPUAttr = F.getFnAttribute("target-cpu");
340  return GPUAttr.hasAttribute(Attribute::None) ?
341  getTargetCPU() : GPUAttr.getValueAsString();
342 }
343 
345  Attribute FSAttr = F.getFnAttribute("target-features");
346 
347  return FSAttr.hasAttribute(Attribute::None) ?
349  FSAttr.getValueAsString();
350 }
351 
352 /// Predicate for Internalize pass.
353 static bool mustPreserveGV(const GlobalValue &GV) {
354  if (const Function *F = dyn_cast<Function>(&GV))
355  return F->isDeclaration() || AMDGPU::isEntryFunctionCC(F->getCallingConv());
356 
357  return !GV.use_empty();
358 }
359 
361  Builder.DivergentTarget = true;
362 
363  bool EnableOpt = getOptLevel() > CodeGenOpt::None;
364  bool Internalize = InternalizeSymbols;
365  bool EarlyInline = EarlyInlineAll && EnableOpt && !EnableAMDGPUFunctionCalls;
366  bool AMDGPUAA = EnableAMDGPUAliasAnalysis && EnableOpt;
367  bool LibCallSimplify = EnableLibCallSimplify && EnableOpt;
368 
370  delete Builder.Inliner;
372  }
373 
374  Builder.addExtension(
376  [Internalize, EarlyInline, AMDGPUAA](const PassManagerBuilder &,
378  if (AMDGPUAA) {
381  }
383  if (Internalize) {
385  PM.add(createGlobalDCEPass());
386  }
387  if (EarlyInline)
389  });
390 
391  const auto &Opt = Options;
392  Builder.addExtension(
394  [AMDGPUAA, LibCallSimplify, &Opt](const PassManagerBuilder &,
396  if (AMDGPUAA) {
399  }
401  if (LibCallSimplify)
403  });
404 
405  Builder.addExtension(
408  // Add infer address spaces pass to the opt pipeline after inlining
409  // but before SROA to increase SROA opportunities.
411 
412  // This should run after inlining to have any chance of doing anything,
413  // and before other cleanup optimizations.
415  });
416 }
417 
418 //===----------------------------------------------------------------------===//
419 // R600 Target Machine (R600 -> Cayman)
420 //===----------------------------------------------------------------------===//
421 
423  StringRef CPU, StringRef FS,
427  CodeGenOpt::Level OL, bool JIT)
428  : AMDGPUTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL) {
430 }
431 
433  const Function &F) const {
434  StringRef GPU = getGPUName(F);
435  StringRef FS = getFeatureString(F);
436 
437  SmallString<128> SubtargetKey(GPU);
438  SubtargetKey.append(FS);
439 
440  auto &I = SubtargetMap[SubtargetKey];
441  if (!I) {
442  // This needs to be done before we create a new subtarget since any
443  // creation will depend on the TM and the code generation flags on the
444  // function that reside in TargetOptions.
446  I = llvm::make_unique<R600Subtarget>(TargetTriple, GPU, FS, *this);
447  }
448 
449  return I.get();
450 }
451 
454  return TargetTransformInfo(R600TTIImpl(this, F));
455 }
456 
457 //===----------------------------------------------------------------------===//
458 // GCN Target Machine (SI+)
459 //===----------------------------------------------------------------------===//
460 
462  StringRef CPU, StringRef FS,
466  CodeGenOpt::Level OL, bool JIT)
467  : AMDGPUTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL) {}
468 
470  StringRef GPU = getGPUName(F);
471  StringRef FS = getFeatureString(F);
472 
473  SmallString<128> SubtargetKey(GPU);
474  SubtargetKey.append(FS);
475 
476  auto &I = SubtargetMap[SubtargetKey];
477  if (!I) {
478  // This needs to be done before we create a new subtarget since any
479  // creation will depend on the TM and the code generation flags on the
480  // function that reside in TargetOptions.
482  I = llvm::make_unique<GCNSubtarget>(TargetTriple, GPU, FS, *this);
483  }
484 
485  I->setScalarizeGlobalBehavior(ScalarizeGlobal);
486 
487  return I.get();
488 }
489 
492  return TargetTransformInfo(GCNTTIImpl(this, F));
493 }
494 
495 //===----------------------------------------------------------------------===//
496 // AMDGPU Pass Setup
497 //===----------------------------------------------------------------------===//
498 
499 namespace {
500 
501 class AMDGPUPassConfig : public TargetPassConfig {
502 public:
503  AMDGPUPassConfig(LLVMTargetMachine &TM, PassManagerBase &PM)
504  : TargetPassConfig(TM, PM) {
505  // Exceptions and StackMaps are not supported, so these passes will never do
506  // anything.
507  disablePass(&StackMapLivenessID);
508  disablePass(&FuncletLayoutID);
509  }
510 
511  AMDGPUTargetMachine &getAMDGPUTargetMachine() const {
512  return getTM<AMDGPUTargetMachine>();
513  }
514 
516  createMachineScheduler(MachineSchedContext *C) const override {
520  return DAG;
521  }
522 
523  void addEarlyCSEOrGVNPass();
524  void addStraightLineScalarOptimizationPasses();
525  void addIRPasses() override;
526  void addCodeGenPrepare() override;
527  bool addPreISel() override;
528  bool addInstSelector() override;
529  bool addGCPasses() override;
530 };
531 
532 class R600PassConfig final : public AMDGPUPassConfig {
533 public:
534  R600PassConfig(LLVMTargetMachine &TM, PassManagerBase &PM)
535  : AMDGPUPassConfig(TM, PM) {}
536 
537  ScheduleDAGInstrs *createMachineScheduler(
538  MachineSchedContext *C) const override {
539  return createR600MachineScheduler(C);
540  }
541 
542  bool addPreISel() override;
543  bool addInstSelector() override;
544  void addPreRegAlloc() override;
545  void addPreSched2() override;
546  void addPreEmitPass() override;
547 };
548 
549 class GCNPassConfig final : public AMDGPUPassConfig {
550 public:
551  GCNPassConfig(LLVMTargetMachine &TM, PassManagerBase &PM)
552  : AMDGPUPassConfig(TM, PM) {
553  // It is necessary to know the register usage of the entire call graph. We
554  // allow calls without EnableAMDGPUFunctionCalls if they are marked
555  // noinline, so this is always required.
556  setRequiresCodeGenSCCOrder(true);
557  }
558 
559  GCNTargetMachine &getGCNTargetMachine() const {
560  return getTM<GCNTargetMachine>();
561  }
562 
564  createMachineScheduler(MachineSchedContext *C) const override;
565 
566  bool addPreISel() override;
567  void addMachineSSAOptimization() override;
568  bool addILPOpts() override;
569  bool addInstSelector() override;
570  bool addIRTranslator() override;
571  bool addLegalizeMachineIR() override;
572  bool addRegBankSelect() override;
573  bool addGlobalInstructionSelect() override;
574  void addFastRegAlloc(FunctionPass *RegAllocPass) override;
575  void addOptimizedRegAlloc(FunctionPass *RegAllocPass) override;
576  void addPreRegAlloc() override;
577  void addPostRegAlloc() override;
578  void addPreSched2() override;
579  void addPreEmitPass() override;
580 };
581 
582 } // end anonymous namespace
583 
584 void AMDGPUPassConfig::addEarlyCSEOrGVNPass() {
586  addPass(createGVNPass());
587  else
588  addPass(createEarlyCSEPass());
589 }
590 
591 void AMDGPUPassConfig::addStraightLineScalarOptimizationPasses() {
592  addPass(createLICMPass());
595  // ReassociateGEPs exposes more opportunites for SLSR. See
596  // the example in reassociate-geps-and-slsr.ll.
598  // SeparateConstOffsetFromGEP and SLSR creates common expressions which GVN or
599  // EarlyCSE can reuse.
600  addEarlyCSEOrGVNPass();
601  // Run NaryReassociate after EarlyCSE/GVN to be more effective.
602  addPass(createNaryReassociatePass());
603  // NaryReassociate on GEPs creates redundant common expressions, so run
604  // EarlyCSE after it.
605  addPass(createEarlyCSEPass());
606 }
607 
608 void AMDGPUPassConfig::addIRPasses() {
609  const AMDGPUTargetMachine &TM = getAMDGPUTargetMachine();
610 
611  // There is no reason to run these.
612  disablePass(&StackMapLivenessID);
613  disablePass(&FuncletLayoutID);
614  disablePass(&PatchableFunctionID);
615 
616  addPass(createAtomicExpandPass());
617 
618  // This must occur before inlining, as the inliner will not look through
619  // bitcast calls.
621 
623 
624  // Function calls are not supported, so make sure we inline everything.
625  addPass(createAMDGPUAlwaysInlinePass());
627  // We need to add the barrier noop pass, otherwise adding the function
628  // inlining pass will cause all of the PassConfigs passes to be run
629  // one function at a time, which means if we have a nodule with two
630  // functions, then we will generate code for the first function
631  // without ever running any passes on the second.
632  addPass(createBarrierNoopPass());
633 
634  if (TM.getTargetTriple().getArch() == Triple::amdgcn) {
635  // TODO: May want to move later or split into an early and late one.
636 
638  }
639 
640  // Handle uses of OpenCL image2d_t, image3d_t and sampler_t arguments.
641  if (TM.getTargetTriple().getArch() == Triple::r600)
643 
644  // Replace OpenCL enqueued block function pointers with global variables.
646 
647  if (TM.getOptLevel() > CodeGenOpt::None) {
648  addPass(createInferAddressSpacesPass());
649  addPass(createAMDGPUPromoteAlloca());
650 
651  if (EnableSROA)
652  addPass(createSROAPass());
653 
654  addStraightLineScalarOptimizationPasses();
655 
657  addPass(createAMDGPUAAWrapperPass());
659  AAResults &AAR) {
660  if (auto *WrapperPass = P.getAnalysisIfAvailable<AMDGPUAAWrapperPass>())
661  AAR.addAAResult(WrapperPass->getResult());
662  }));
663  }
664  }
665 
667 
668  // EarlyCSE is not always strong enough to clean up what LSR produces. For
669  // example, GVN can combine
670  //
671  // %0 = add %a, %b
672  // %1 = add %b, %a
673  //
674  // and
675  //
676  // %0 = shl nsw %a, 2
677  // %1 = shl %a, 2
678  //
679  // but EarlyCSE can do neither of them.
680  if (getOptLevel() != CodeGenOpt::None)
681  addEarlyCSEOrGVNPass();
682 }
683 
684 void AMDGPUPassConfig::addCodeGenPrepare() {
685  if (TM->getTargetTriple().getArch() == Triple::amdgcn &&
688 
690 
693 }
694 
695 bool AMDGPUPassConfig::addPreISel() {
696  addPass(createLowerSwitchPass());
697  addPass(createFlattenCFGPass());
698  return false;
699 }
700 
701 bool AMDGPUPassConfig::addInstSelector() {
702  addPass(createAMDGPUISelDag(&getAMDGPUTargetMachine(), getOptLevel()));
703  return false;
704 }
705 
706 bool AMDGPUPassConfig::addGCPasses() {
707  // Do nothing. GC is not supported.
708  return false;
709 }
710 
711 //===----------------------------------------------------------------------===//
712 // R600 Pass Setup
713 //===----------------------------------------------------------------------===//
714 
715 bool R600PassConfig::addPreISel() {
716  AMDGPUPassConfig::addPreISel();
717 
719  addPass(createStructurizeCFGPass());
720  return false;
721 }
722 
723 bool R600PassConfig::addInstSelector() {
724  addPass(createR600ISelDag(&getAMDGPUTargetMachine(), getOptLevel()));
725  return false;
726 }
727 
728 void R600PassConfig::addPreRegAlloc() {
729  addPass(createR600VectorRegMerger());
730 }
731 
732 void R600PassConfig::addPreSched2() {
733  addPass(createR600EmitClauseMarkers(), false);
735  addPass(&IfConverterID, false);
736  addPass(createR600ClauseMergePass(), false);
737 }
738 
739 void R600PassConfig::addPreEmitPass() {
740  addPass(createAMDGPUCFGStructurizerPass(), false);
741  addPass(createR600ExpandSpecialInstrsPass(), false);
742  addPass(&FinalizeMachineBundlesID, false);
743  addPass(createR600Packetizer(), false);
744  addPass(createR600ControlFlowFinalizer(), false);
745 }
746 
748  return new R600PassConfig(*this, PM);
749 }
750 
751 //===----------------------------------------------------------------------===//
752 // GCN Pass Setup
753 //===----------------------------------------------------------------------===//
754 
755 ScheduleDAGInstrs *GCNPassConfig::createMachineScheduler(
756  MachineSchedContext *C) const {
757  const GCNSubtarget &ST = C->MF->getSubtarget<GCNSubtarget>();
758  if (ST.enableSIScheduler())
759  return createSIMachineScheduler(C);
761 }
762 
763 bool GCNPassConfig::addPreISel() {
764  AMDGPUPassConfig::addPreISel();
765 
768  }
769 
770  // FIXME: We need to run a pass to propagate the attributes when calls are
771  // supported.
773 
774  // Merge divergent exit nodes. StructurizeCFG won't recognize the multi-exit
775  // regions formed by them.
777  if (!LateCFGStructurize) {
778  addPass(createStructurizeCFGPass(true)); // true -> SkipUniformRegions
779  }
780  addPass(createSinkingPass());
782  if (!LateCFGStructurize) {
784  }
785 
786  return false;
787 }
788 
789 void GCNPassConfig::addMachineSSAOptimization() {
791 
792  // We want to fold operands after PeepholeOptimizer has run (or as part of
793  // it), because it will eliminate extra copies making it easier to fold the
794  // real source operand. We want to eliminate dead instructions after, so that
795  // we see fewer uses of the copies. We then need to clean up the dead
796  // instructions leftover after the operands are folded as well.
797  //
798  // XXX - Can we get away without running DeadMachineInstructionElim again?
799  addPass(&SIFoldOperandsID);
800  if (EnableDPPCombine)
801  addPass(&GCNDPPCombineID);
803  addPass(&SILoadStoreOptimizerID);
804  if (EnableSDWAPeephole) {
805  addPass(&SIPeepholeSDWAID);
806  addPass(&EarlyMachineLICMID);
807  addPass(&MachineCSEID);
808  addPass(&SIFoldOperandsID);
810  }
812 }
813 
814 bool GCNPassConfig::addILPOpts() {
816  addPass(&EarlyIfConverterID);
817 
819  return false;
820 }
821 
822 bool GCNPassConfig::addInstSelector() {
823  AMDGPUPassConfig::addInstSelector();
824  addPass(&SIFixSGPRCopiesID);
825  addPass(createSILowerI1CopiesPass());
826  addPass(createSIFixupVectorISelPass());
827  addPass(createSIAddIMGInitPass());
828  return false;
829 }
830 
831 bool GCNPassConfig::addIRTranslator() {
832  addPass(new IRTranslator());
833  return false;
834 }
835 
836 bool GCNPassConfig::addLegalizeMachineIR() {
837  addPass(new Legalizer());
838  return false;
839 }
840 
841 bool GCNPassConfig::addRegBankSelect() {
842  addPass(new RegBankSelect());
843  return false;
844 }
845 
846 bool GCNPassConfig::addGlobalInstructionSelect() {
847  addPass(new InstructionSelect());
848  return false;
849 }
850 
851 void GCNPassConfig::addPreRegAlloc() {
852  if (LateCFGStructurize) {
854  }
855  addPass(createSIWholeQuadModePass());
856 }
857 
858 void GCNPassConfig::addFastRegAlloc(FunctionPass *RegAllocPass) {
859  // FIXME: We have to disable the verifier here because of PHIElimination +
860  // TwoAddressInstructions disabling it.
861 
862  // This must be run immediately after phi elimination and before
863  // TwoAddressInstructions, otherwise the processing of the tied operand of
864  // SI_ELSE will introduce a copy of the tied operand source after the else.
865  insertPass(&PHIEliminationID, &SILowerControlFlowID, false);
866 
867  // This must be run after SILowerControlFlow, since it needs to use the
868  // machine-level CFG, but before register allocation.
869  insertPass(&SILowerControlFlowID, &SIFixWWMLivenessID, false);
870 
871  TargetPassConfig::addFastRegAlloc(RegAllocPass);
872 }
873 
874 void GCNPassConfig::addOptimizedRegAlloc(FunctionPass *RegAllocPass) {
876 
878 
879  // This must be run immediately after phi elimination and before
880  // TwoAddressInstructions, otherwise the processing of the tied operand of
881  // SI_ELSE will introduce a copy of the tied operand source after the else.
882  insertPass(&PHIEliminationID, &SILowerControlFlowID, false);
883 
884  // This must be run after SILowerControlFlow, since it needs to use the
885  // machine-level CFG, but before register allocation.
886  insertPass(&SILowerControlFlowID, &SIFixWWMLivenessID, false);
887 
889 }
890 
891 void GCNPassConfig::addPostRegAlloc() {
892  addPass(&SIFixVGPRCopiesID);
894  addPass(&SIOptimizeExecMaskingID);
896 }
897 
898 void GCNPassConfig::addPreSched2() {
899 }
900 
901 void GCNPassConfig::addPreEmitPass() {
902  addPass(createSIMemoryLegalizerPass());
903  addPass(createSIInsertWaitcntsPass());
905  addPass(createSIModeRegisterPass());
906 
907  // The hazard recognizer that runs as part of the post-ra scheduler does not
908  // guarantee to be able handle all hazards correctly. This is because if there
909  // are multiple scheduling regions in a basic block, the regions are scheduled
910  // bottom up, so when we begin to schedule a region we don't know what
911  // instructions were emitted directly before it.
912  //
913  // Here we add a stand-alone hazard recognizer pass which can handle all
914  // cases.
915  //
916  // FIXME: This stand-alone pass will emit indiv. S_NOP 0, as needed. It would
917  // be better for it to emit S_NOP <N> when possible.
918  addPass(&PostRAHazardRecognizerID);
919 
920  addPass(&SIInsertSkipsPassID);
922  addPass(&BranchRelaxationPassID);
923 }
924 
926  return new GCNPassConfig(*this, PM);
927 }
FunctionPass * createSpeculativeExecutionPass()
char & SIFormMemoryClausesID
Pass interface - Implemented by all &#39;passes&#39;.
Definition: Pass.h:80
FunctionPass * createStraightLineStrengthReducePass()
uint64_t CallInst * C
FunctionPass * createGVNPass(bool NoLoads=false)
Create a legacy GVN pass.
Definition: GVN.cpp:2611
StringRef getTargetFeatureString() const
Target & getTheGCNTarget()
The target for GCN GPUs.
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
void initializeAMDGPUDAGToDAGISelPass(PassRegistry &)
void addAAResult(AAResultT &AAResult)
Register a specific AA result.
This file a TargetTransformInfo::Concept conforming object specific to the AMDGPU target machine...
FunctionPass * createSIAnnotateControlFlowPass()
Create the annotation pass.
CodeModel::Model getEffectiveCodeModel(Optional< CodeModel::Model > CM, CodeModel::Model Default)
Helper method for getting the code model, returning Default if CM does not have a value...
ModulePass * createAMDGPUAlwaysInlinePass(bool GlobalOpt=true)
static LLVM_READNONE StringRef getGPUOrDefault(const Triple &TT, StringRef GPU)
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
PassManagerBuilder - This class is used to set up a standard optimization sequence for languages like...
GCNTargetMachine(const Target &T, const Triple &TT, StringRef CPU, StringRef FS, TargetOptions Options, Optional< Reloc::Model > RM, Optional< CodeModel::Model > CM, CodeGenOpt::Level OL, bool JIT)
This class represents lattice values for constants.
Definition: AllocatorList.h:23
FunctionPass * createLowerSwitchPass()
void initializeSIFixVGPRCopiesPass(PassRegistry &)
virtual void addIRPasses()
Add common target configurable passes that perform LLVM IR to IR transforms following machine indepen...
static cl::opt< bool > EnableLoadStoreVectorizer("amdgpu-load-store-vectorizer", cl::desc("Enable load store vectorizer"), cl::init(true), cl::Hidden)
FunctionPass * createR600ISelDag(TargetMachine *TM, CodeGenOpt::Level OptLevel)
This pass converts a legalized DAG into a R600-specific.
void initializeSIInsertWaitcntsPass(PassRegistry &)
void initializeSIFormMemoryClausesPass(PassRegistry &)
ModulePass * createR600OpenCLImageTypeLoweringPass()
void initializeAMDGPUAnnotateKernelFeaturesPass(PassRegistry &)
Triple TargetTriple
Triple string, CPU name, and target feature strings the TargetMachine instance is created with...
Definition: TargetMachine.h:77
void initializeAMDGPUAtomicOptimizerPass(PassRegistry &)
char & SILoadStoreOptimizerID
Target & getTheAMDGPUTarget()
The target which supports all AMD GPUs.
char & SIPeepholeSDWAID
void initializeSIModeRegisterPass(PassRegistry &)
void initializeAMDGPUSimplifyLibCallsPass(PassRegistry &)
This file describes how to lower LLVM calls to machine code calls.
char & FuncletLayoutID
This pass lays out funclets contiguously.
void initializeAMDGPULowerKernelAttributesPass(PassRegistry &)
char & EarlyIfConverterID
EarlyIfConverter - This pass performs if-conversion on SSA form by inserting cmov instructions...
void initializeR600ControlFlowFinalizerPass(PassRegistry &)
void initializeAMDGPUAnnotateUniformValuesPass(PassRegistry &)
FunctionPass * createAMDGPUPromoteAlloca()
virtual void add(Pass *P)=0
Add a pass to the queue of passes to run.
ModulePass * createAMDGPULowerKernelAttributesPass()
char & BranchRelaxationPassID
BranchRelaxation - This pass replaces branches that need to jump further than is supported by a branc...
FunctionPass * createAMDGPUCodeGenPreparePass()
F(f)
R600 Machine Scheduler interface.
char & MachineSchedulerID
MachineScheduler - This pass schedules machine instructions.
static cl::opt< bool > EnableLowerKernelArguments("amdgpu-ir-lower-kernel-arguments", cl::desc("Lower kernel argument loads in IR pass"), cl::init(true), cl::Hidden)
FunctionPass * createAMDGPUCFGStructurizerPass()
MachineSchedRegistry provides a selection of available machine instruction schedulers.
virtual void addMachineSSAOptimization()
addMachineSSAOptimization - Add standard passes that optimize machine instructions in SSA form...
void initializeAMDGPUAAWrapperPassPass(PassRegistry &)
static std::unique_ptr< TargetLoweringObjectFile > createTLOF(const Triple &TT)
void initializeAMDGPUPromoteAllocaPass(PassRegistry &)
void initializeAMDGPULowerKernelArgumentsPass(PassRegistry &)
FunctionPass * createSIAddIMGInitPass()
FunctionPass * createSIMemoryLegalizerPass()
Pass * Inliner
Inliner - Specifies the inliner to use.
FunctionPass * createAMDGPUMachineCFGStructurizerPass()
FunctionPass * createSIInsertWaitcntsPass()
ScheduleDAGMILive is an implementation of ScheduleDAGInstrs that schedules machine instructions while...
StringRef getFeatureString(const Function &F) const
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
void resetTargetOptions(const Function &F) const
Reset the target options based on the function&#39;s attributes.
This file declares the targeting of the InstructionSelector class for AMDGPU.
Pass * createAMDGPUFunctionInliningPass()
static cl::opt< bool > EnableSDWAPeephole("amdgpu-sdwa-peephole", cl::desc("Enable SDWA peepholer"), cl::init(true))
This file declares the AMDGPU-specific subclass of TargetLoweringObjectFile.
Pass * createAMDGPUAnnotateKernelFeaturesPass()
static cl::opt< bool > EnableSIModeRegisterPass("amdgpu-mode-register", cl::desc("Enable mode register pass"), cl::init(true), cl::Hidden)
void initializeAMDGPUCodeGenPreparePass(PassRegistry &)
This file contains the simple types necessary to represent the attributes associated with functions a...
ModulePass * createAMDGPUOpenCLEnqueuedBlockLoweringPass()
No attributes have been set.
Definition: Attributes.h:71
void initializeAMDGPUInlinerPass(PassRegistry &)
FunctionPass * createSinkingPass()
Definition: Sink.cpp:303
static MachineSchedRegistry GCNILPSchedRegistry("gcn-ilp", "Run GCN iterative scheduler for ILP scheduling (experimental)", createIterativeILPMachineScheduler)
char & SIOptimizeExecMaskingPreRAID
EP_ModuleOptimizerEarly - This extension point allows adding passes just before the main module-level...
char & FinalizeMachineBundlesID
FinalizeMachineBundles - This pass finalize machine instruction bundles (created earlier, e.g.
LLVM_NODISCARD bool empty() const
empty - Check if the string is empty.
Definition: StringRef.h:126
Target-Independent Code Generator Pass Configuration Options.
static StringRef computeDataLayout(const Triple &TT)
static cl::opt< bool, true > LateCFGStructurize("amdgpu-late-structurize", cl::desc("Enable late CFG structurization"), cl::location(AMDGPUTargetMachine::EnableLateStructurizeCFG), cl::Hidden)
static cl::opt< bool > ScalarizeGlobal("amdgpu-scalarize-global-loads", cl::desc("Enable global load scalarization"), cl::init(true), cl::Hidden)
FunctionPass * createR600ExpandSpecialInstrsPass()
static MachineSchedRegistry GCNMinRegSchedRegistry("gcn-minreg", "Run GCN iterative scheduler for minimal register usage (experimental)", createMinRegScheduler)
RegisterTargetMachine - Helper template for registering a target machine implementation, for use in the target machine initialization function.
char & MachineCSEID
MachineCSE - This pass performs global CSE on machine instructions.
Definition: MachineCSE.cpp:133
ArchType getArch() const
getArch - Get the parsed architecture type of this triple.
Definition: Triple.h:289
FunctionPass * createSIFixupVectorISelPass()
TargetTransformInfo getTargetTransformInfo(const Function &F) override
Get a TargetTransformInfo implementation for the target.
FunctionPass * createSILowerI1CopiesPass()
void initializeSIOptimizeExecMaskingPass(PassRegistry &)
char & DeadMachineInstructionElimID
DeadMachineInstructionElim - This pass removes dead machine instructions.
void initializeSIOptimizeExecMaskingPreRAPass(PassRegistry &)
Pass * createLoadStoreVectorizerPass()
Create a legacy pass manager instance of the LoadStoreVectorizer pass.
ScheduleDAGMILive * createGenericSchedLive(MachineSchedContext *C)
Create the standard converging machine scheduler.
StringRef getTargetCPU() const
virtual bool addILPOpts()
Add passes that optimize instruction level parallelism for out-of-order targets.
void initializeR600ExpandSpecialInstrsPassPass(PassRegistry &)
void initializeSIFixSGPRCopiesPass(PassRegistry &)
void initializeAMDGPULowerIntrinsicsPass(PassRegistry &)
ModulePass * createGlobalDCEPass()
createGlobalDCEPass - This transform is designed to eliminate unreachable internal globals (functions...
FunctionPass * createR600VectorRegMerger()
void initializeSIFixupVectorISelPass(PassRegistry &)
static ScheduleDAGInstrs * createIterativeGCNMaxOccupancyMachineScheduler(MachineSchedContext *C)
static cl::opt< bool > InternalizeSymbols("amdgpu-internalize-symbols", cl::desc("Enable elimination of non-kernel functions and unused globals"), cl::init(false), cl::Hidden)
SI Machine Scheduler interface.
StringRef getGPUName(const Function &F) const
void append(in_iter S, in_iter E)
Append from an iterator pair.
Definition: SmallString.h:74
void initializeAMDGPUExternalAAWrapperPass(PassRegistry &)
char & PHIEliminationID
PHIElimination - This pass eliminates machine instruction PHI nodes by inserting copy instructions...
FunctionPass * createAMDGPUISelDag(TargetMachine *TM=nullptr, CodeGenOpt::Level OptLevel=CodeGenOpt::Default)
This pass converts a legalized DAG into a AMDGPU-specific.
ImmutablePass * createExternalAAWrapperPass(std::function< void(Pass &, Function &, AAResults &)> Callback)
A wrapper pass around a callback which can be used to populate the AAResults in the AAResultsWrapperP...
#define P(N)
char & GCNDPPCombineID
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:422
FunctionPass * createAMDGPULowerKernelArgumentsPass()
bool hasAttribute(AttrKind Val) const
Return true if the attribute is present.
Definition: Attributes.cpp:201
CodeGenOpt::Level getOptLevel() const
Returns the optimization level: None, Less, Default, or Aggressive.
FunctionPass * createSIDebuggerInsertNopsPass()
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
static Reloc::Model getEffectiveRelocModel(Optional< Reloc::Model > RM)
This pass implements the reg bank selector pass used in the GlobalISel pipeline.
Definition: RegBankSelect.h:90
FunctionPass * createFlattenCFGPass()
static cl::opt< bool > EnableAtomicOptimizations("amdgpu-atomic-optimizations", cl::desc("Enable atomic optimizations"), cl::init(false), cl::Hidden)
FunctionPass * createSIWholeQuadModePass()
This file provides the interface for LLVM&#39;s Global Value Numbering pass which eliminates fully redund...
static cl::opt< bool > EarlyInlineAll("amdgpu-early-inline-all", cl::desc("Inline all functions early"), cl::init(false), cl::Hidden)
char & SIInsertSkipsPassID
virtual void addOptimizedRegAlloc(FunctionPass *RegAllocPass)
addOptimizedRegAlloc - Add passes related to register allocation.
static MachineSchedRegistry GCNMaxOccupancySchedRegistry("gcn-max-occupancy", "Run GCN scheduler to maximize occupancy", createGCNMaxOccupancyMachineScheduler)
void adjustPassManager(PassManagerBuilder &) override
Allow the target to modify the pass manager, e.g.
bool isEntryFunctionCC(CallingConv::ID CC)
void LLVMInitializeAMDGPUTarget()
void initializeSIPeepholeSDWAPass(PassRegistry &)
Pass * createLICMPass()
Definition: LICM.cpp:294
static cl::opt< bool > EnableSROA("amdgpu-sroa", cl::desc("Run SROA after promote alloca pass"), cl::ReallyHidden, cl::init(true))
TargetPassConfig * createPassConfig(PassManagerBase &PM) override
Create a pass configuration object to be used by addPassToEmitX methods for generating a pipeline of ...
FunctionPass * createR600ControlFlowFinalizer()
Legacy wrapper pass to provide the AMDGPUAAResult object.
R600TargetMachine(const Target &T, const Triple &TT, StringRef CPU, StringRef FS, TargetOptions Options, Optional< Reloc::Model > RM, Optional< CodeModel::Model > CM, CodeGenOpt::Level OL, bool JIT)
This class describes a target machine that is implemented with the LLVM target-independent code gener...
ModulePass * createBarrierNoopPass()
createBarrierNoopPass - This pass is purely a module pass barrier in a pass manager.
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:284
Pass * createAlwaysInlinerLegacyPass(bool InsertLifetime=true)
Create a legacy pass manager instance of a pass to inline and remove functions marked as "always_inli...
const Triple & getTargetTriple() const
static MachineSchedRegistry SISchedRegistry("si", "Run SI's custom scheduler", createSIMachineScheduler)
void initializeSILowerControlFlowPass(PassRegistry &)
static ScheduleDAGInstrs * createMinRegScheduler(MachineSchedContext *C)
ModulePass * createAMDGPULowerIntrinsicsPass()
virtual void addCodeGenPrepare()
Add pass to prepare the LLVM IR for code generation.
FunctionPass * createSIModeRegisterPass()
const TargetSubtargetInfo * getSubtargetImpl() const
FunctionPass * createR600ClauseMergePass()
The AMDGPU TargetMachine interface definition for hw codgen targets.
static cl::opt< bool > EnableR600IfConvert("r600-if-convert", cl::desc("Use if conversion pass"), cl::ReallyHidden, cl::init(true))
std::unique_ptr< ScheduleDAGMutation > createStoreClusterDAGMutation(const TargetInstrInfo *TII, const TargetRegisterInfo *TRI)
static ScheduleDAGInstrs * createR600MachineScheduler(MachineSchedContext *C)
std::unique_ptr< ScheduleDAGMutation > createAMDGPUMacroFusionDAGMutation()
Note that you have to add: DAG.addMutation(createAMDGPUMacroFusionDAGMutation()); to AMDGPUPassConfig...
static cl::opt< bool, true > EnableAMDGPUFunctionCalls("amdgpu-function-calls", cl::desc("Enable AMDGPU function call support"), cl::location(AMDGPUTargetMachine::EnableFunctionCalls), cl::init(false), cl::Hidden)
void initializeSIShrinkInstructionsPass(PassRegistry &)
void initializeAMDGPUUseNativeCallsPass(PassRegistry &)
Analysis pass providing a never-invalidated alias analysis result.
EP_EarlyAsPossible - This extension point allows adding passes before any other transformations, allowing them to see the code as it is coming out of the frontend.
AMDGPUTargetMachine(const Target &T, const Triple &TT, StringRef CPU, StringRef FS, TargetOptions Options, Optional< Reloc::Model > RM, Optional< CodeModel::Model > CM, CodeGenOpt::Level OL)
void initializeSIInsertSkipsPass(PassRegistry &)
void initializeR600PacketizerPass(PassRegistry &)
Triple - Helper class for working with autoconf configuration names.
Definition: Triple.h:43
FunctionPass * createAMDGPUAnnotateUniformValues()
This is the AMGPU address space based alias analysis pass.
char & SIFixWWMLivenessID
Provides passes to inlining "always_inline" functions.
char & SIOptimizeExecMaskingID
EP_CGSCCOptimizerLate - This extension point allows adding CallGraphSCC passes at the end of the main...
static MachineSchedRegistry IterativeGCNMaxOccupancySchedRegistry("gcn-max-occupancy-experimental", "Run GCN scheduler to maximize occupancy (experimental)", createIterativeGCNMaxOccupancyMachineScheduler)
std::unique_ptr< ScheduleDAGMutation > createLoadClusterDAGMutation(const TargetInstrInfo *TII, const TargetRegisterInfo *TRI)
void initializeAMDGPUFixFunctionBitcastsPass(PassRegistry &)
char & AMDGPUUnifyDivergentExitNodesID
bool enableSIScheduler() const
void initializeSIFixWWMLivenessPass(PassRegistry &)
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
void initializeSIMemoryLegalizerPass(PassRegistry &)
PassManagerBase - An abstract interface to allow code to add passes to a pass manager without having ...
char & StackMapLivenessID
StackMapLiveness - This pass analyses the register live-out set of stackmap/patchpoint intrinsics and...
ModulePass * createInternalizePass(std::function< bool(const GlobalValue &)> MustPreserveGV)
createInternalizePass - This pass loops over all of the functions in the input module, internalizing all globals (functions and variables) it can.
void initializeSIWholeQuadModePass(PassRegistry &)
void setRequiresStructuredCFG(bool Value)
FunctionPass * createAMDGPUAtomicOptimizerPass()
void initializeR600VectorRegMergerPass(PassRegistry &)
char & SIFixVGPRCopiesID
void initializeAMDGPURewriteOutArgumentsPass(PassRegistry &)
char & EarlyMachineLICMID
This pass performs loop invariant code motion on machine instructions.
void initializeGCNDPPCombinePass(PassRegistry &)
ImmutablePass * createAMDGPUAAWrapperPass()
FunctionPass * createR600EmitClauseMarkers()
void initializeR600ClauseMergePassPass(PassRegistry &)
This pass is responsible for selecting generic machine instructions to target-specific instructions...
ModulePass * createAMDGPUFixFunctionBitcastsPass()
void initializeAMDGPUUnifyMetadataPass(PassRegistry &)
FunctionPass * createSeparateConstOffsetFromGEPPass(bool LowerGEP=false)
Target - Wrapper for Target specific information.
void initializeAMDGPUArgumentUsageInfoPass(PassRegistry &)
virtual void addPostRegAlloc()
This method may be implemented by targets that want to run passes after register allocation pass pipe...
This file declares the targeting of the Machinelegalizer class for AMDGPU.
virtual void addFastRegAlloc(FunctionPass *RegAllocPass)
addFastRegAlloc - Add the minimum set of target-independent passes that are required for fast registe...
FunctionPass * createR600Packetizer()
void initializeSILoadStoreOptimizerPass(PassRegistry &)
char & SILowerControlFlowID
ModulePass * createAMDGPUUnifyMetadataPass()
void initializeSIAnnotateControlFlowPass(PassRegistry &)
A ScheduleDAG for scheduling lists of MachineInstr.
char & PatchableFunctionID
This pass implements the "patchable-function" attribute.
FunctionPass * createInferAddressSpacesPass()
void initializeSIFoldOperandsPass(PassRegistry &)
char & SIFoldOperandsID
const TargetRegisterInfo * TRI
Target processor register info.
Definition: ScheduleDAG.h:559
FunctionPass * createSIShrinkInstructionsPass()
static cl::opt< bool > EnableAMDGPUAliasAnalysis("enable-amdgpu-aa", cl::Hidden, cl::desc("Enable AMDGPU Alias Analysis"), cl::init(true))
void initializeSIDebuggerInsertNopsPass(PassRegistry &)
StringRef getValueAsString() const
Return the attribute&#39;s value as a string.
Definition: Attributes.cpp:194
TargetOptions Options
Definition: TargetMachine.h:96
char & IfConverterID
IfConverter - This pass performs machine code if conversion.
#define LLVM_READNONE
Definition: Compiler.h:176
MachineSchedContext provides enough context from the MachineScheduler pass for the target to instanti...
char & SIFixSGPRCopiesID
#define I(x, y, z)
Definition: MD5.cpp:58
FunctionPass * createAMDGPUSimplifyLibCallsPass(const TargetOptions &)
FunctionPass * createSROAPass()
Definition: SROA.cpp:4585
static MachineSchedRegistry R600SchedRegistry("r600", "Run R600's custom scheduler", createR600MachineScheduler)
ImmutablePass * createAMDGPUExternalAAWrapperPass()
static bool mustPreserveGV(const GlobalValue &GV)
Predicate for Internalize pass.
const TargetInstrInfo * TII
Target instruction information.
Definition: ScheduleDAG.h:558
This file declares the IRTranslator pass.
FunctionPass * createAMDGPUUseNativeCallsPass()
char & PostRAHazardRecognizerID
createPostRAHazardRecognizer - This pass runs the post-ra hazard recognizer.
AnalysisType * getAnalysisIfAvailable() const
getAnalysisIfAvailable<AnalysisType>() - Subclasses use this function to get analysis information tha...
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition: Function.h:330
FunctionPass * createEarlyCSEPass(bool UseMemorySSA=false)
Definition: EarlyCSE.cpp:1294
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:48
PassRegistry - This class manages the registration and intitialization of the pass subsystem as appli...
Definition: PassRegistry.h:38
TargetTransformInfo getTargetTransformInfo(const Function &F) override
Get a TargetTransformInfo implementation for the target.
void initializeSILowerI1CopiesPass(PassRegistry &)
static cl::opt< bool > EnableDPPCombine("amdgpu-dpp-combine", cl::desc("Enable DPP combiner"), cl::init(true))
void addExtension(ExtensionPointTy Ty, ExtensionFn Fn)
static ScheduleDAGInstrs * createIterativeILPMachineScheduler(MachineSchedContext *C)
void initializeAMDGPUUnifyDivergentExitNodesPass(PassRegistry &)
static cl::opt< bool > EnableLibCallSimplify("amdgpu-simplify-libcall", cl::desc("Enable amdgpu library simplifications"), cl::init(true), cl::Hidden)
static cl::opt< bool > EnableR600StructurizeCFG("r600-ir-structurize", cl::desc("Use StructurizeCFG IR pass"), cl::init(true))
Pass * createStructurizeCFGPass(bool SkipUniformRegions=false)
When SkipUniformRegions is true the structizer will not structurize regions that only contain uniform...
FunctionPass * createAtomicExpandPass()
void initializeAMDGPUAlwaysInlinePass(PassRegistry &)
TargetPassConfig * createPassConfig(PassManagerBase &PM) override
Create a pass configuration object to be used by addPassToEmitX methods for generating a pipeline of ...
void initializeAMDGPUOpenCLEnqueuedBlockLoweringPass(PassRegistry &)
void initializeGlobalISel(PassRegistry &)
Initialize all passes linked into the GlobalISel library.
Definition: GlobalISel.cpp:18
bool use_empty() const
Definition: Value.h:322
LocationClass< Ty > location(Ty &L)
Definition: CommandLine.h:438
static ScheduleDAGInstrs * createSIMachineScheduler(MachineSchedContext *C)
static ScheduleDAGInstrs * createGCNMaxOccupancyMachineScheduler(MachineSchedContext *C)
static cl::opt< bool > EnableEarlyIfConversion("amdgpu-early-ifcvt", cl::Hidden, cl::desc("Run early if-conversion"), cl::init(false))
void addMutation(std::unique_ptr< ScheduleDAGMutation > Mutation)
Add a postprocessing step to the DAG builder.
FunctionPass * createNaryReassociatePass()