LLVM  9.0.0svn
AMDGPUTargetMachine.cpp
Go to the documentation of this file.
1 //===-- AMDGPUTargetMachine.cpp - TargetMachine for hw codegen targets-----===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// The AMDGPU target machine contains all of the hardware specific
11 /// information needed to emit code for R600 and SI GPUs.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "AMDGPUTargetMachine.h"
16 #include "AMDGPU.h"
17 #include "AMDGPUAliasAnalysis.h"
18 #include "AMDGPUCallLowering.h"
20 #include "AMDGPULegalizerInfo.h"
21 #include "AMDGPUMacroFusion.h"
22 #include "AMDGPUTargetObjectFile.h"
24 #include "GCNIterativeScheduler.h"
25 #include "GCNSchedStrategy.h"
26 #include "R600MachineScheduler.h"
27 #include "SIMachineFunctionInfo.h"
28 #include "SIMachineScheduler.h"
35 #include "llvm/CodeGen/Passes.h"
37 #include "llvm/IR/Attributes.h"
38 #include "llvm/IR/Function.h"
40 #include "llvm/Pass.h"
42 #include "llvm/Support/Compiler.h"
45 #include "llvm/Transforms/IPO.h"
48 #include "llvm/Transforms/Scalar.h"
50 #include "llvm/Transforms/Utils.h"
52 #include <memory>
53 
54 using namespace llvm;
55 
57  "r600-ir-structurize",
58  cl::desc("Use StructurizeCFG IR pass"),
59  cl::init(true));
60 
62  "amdgpu-sroa",
63  cl::desc("Run SROA after promote alloca pass"),
65  cl::init(true));
66 
67 static cl::opt<bool>
68 EnableEarlyIfConversion("amdgpu-early-ifcvt", cl::Hidden,
69  cl::desc("Run early if-conversion"),
70  cl::init(false));
71 
72 static cl::opt<bool>
73 OptExecMaskPreRA("amdgpu-opt-exec-mask-pre-ra", cl::Hidden,
74  cl::desc("Run pre-RA exec mask optimizations"),
75  cl::init(true));
76 
78  "r600-if-convert",
79  cl::desc("Use if conversion pass"),
81  cl::init(true));
82 
83 // Option to disable vectorizer for tests.
85  "amdgpu-load-store-vectorizer",
86  cl::desc("Enable load store vectorizer"),
87  cl::init(true),
88  cl::Hidden);
89 
90 // Option to control global loads scalarization
92  "amdgpu-scalarize-global-loads",
93  cl::desc("Enable global load scalarization"),
94  cl::init(true),
95  cl::Hidden);
96 
97 // Option to run internalize pass.
99  "amdgpu-internalize-symbols",
100  cl::desc("Enable elimination of non-kernel functions and unused globals"),
101  cl::init(false),
102  cl::Hidden);
103 
104 // Option to inline all early.
106  "amdgpu-early-inline-all",
107  cl::desc("Inline all functions early"),
108  cl::init(false),
109  cl::Hidden);
110 
112  "amdgpu-sdwa-peephole",
113  cl::desc("Enable SDWA peepholer"),
114  cl::init(true));
115 
117  "amdgpu-dpp-combine",
118  cl::desc("Enable DPP combiner"),
119  cl::init(true));
120 
121 // Enable address space based alias analysis
122 static cl::opt<bool> EnableAMDGPUAliasAnalysis("enable-amdgpu-aa", cl::Hidden,
123  cl::desc("Enable AMDGPU Alias Analysis"),
124  cl::init(true));
125 
126 // Option to run late CFG structurizer
128  "amdgpu-late-structurize",
129  cl::desc("Enable late CFG structurization"),
131  cl::Hidden);
132 
134  "amdgpu-function-calls",
135  cl::desc("Enable AMDGPU function call support"),
137  cl::init(true),
138  cl::Hidden);
139 
140 // Enable lib calls simplifications
142  "amdgpu-simplify-libcall",
143  cl::desc("Enable amdgpu library simplifications"),
144  cl::init(true),
145  cl::Hidden);
146 
148  "amdgpu-ir-lower-kernel-arguments",
149  cl::desc("Lower kernel argument loads in IR pass"),
150  cl::init(true),
151  cl::Hidden);
152 
154  "amdgpu-reassign-regs",
155  cl::desc("Enable register reassign optimizations on gfx10+"),
156  cl::init(true),
157  cl::Hidden);
158 
159 // Enable atomic optimization
161  "amdgpu-atomic-optimizations",
162  cl::desc("Enable atomic optimizations"),
163  cl::init(false),
164  cl::Hidden);
165 
166 // Enable Mode register optimization
168  "amdgpu-mode-register",
169  cl::desc("Enable mode register pass"),
170  cl::init(true),
171  cl::Hidden);
172 
173 // Option is used in lit tests to prevent deadcoding of patterns inspected.
174 static cl::opt<bool>
175 EnableDCEInRA("amdgpu-dce-in-ra",
176  cl::init(true), cl::Hidden,
177  cl::desc("Enable machine DCE inside regalloc"));
178 
180  "amdgpu-scalar-ir-passes",
181  cl::desc("Enable scalar IR passes"),
182  cl::init(true),
183  cl::Hidden);
184 
185 extern "C" void LLVMInitializeAMDGPUTarget() {
186  // Register the target
189 
240 }
241 
242 static std::unique_ptr<TargetLoweringObjectFile> createTLOF(const Triple &TT) {
243  return llvm::make_unique<AMDGPUTargetObjectFile>();
244 }
245 
247  return new ScheduleDAGMILive(C, llvm::make_unique<R600SchedStrategy>());
248 }
249 
251  return new SIScheduleDAGMI(C);
252 }
253 
254 static ScheduleDAGInstrs *
256  ScheduleDAGMILive *DAG =
257  new GCNScheduleDAGMILive(C, make_unique<GCNMaxOccupancySchedStrategy>(C));
261  return DAG;
262 }
263 
264 static ScheduleDAGInstrs *
266  auto DAG = new GCNIterativeScheduler(C,
268  DAG->addMutation(createLoadClusterDAGMutation(DAG->TII, DAG->TRI));
269  DAG->addMutation(createStoreClusterDAGMutation(DAG->TII, DAG->TRI));
270  return DAG;
271 }
272 
274  return new GCNIterativeScheduler(C,
276 }
277 
278 static ScheduleDAGInstrs *
280  auto DAG = new GCNIterativeScheduler(C,
282  DAG->addMutation(createLoadClusterDAGMutation(DAG->TII, DAG->TRI));
283  DAG->addMutation(createStoreClusterDAGMutation(DAG->TII, DAG->TRI));
284  DAG->addMutation(createAMDGPUMacroFusionDAGMutation());
285  return DAG;
286 }
287 
289 R600SchedRegistry("r600", "Run R600's custom scheduler",
291 
293 SISchedRegistry("si", "Run SI's custom scheduler",
295 
297 GCNMaxOccupancySchedRegistry("gcn-max-occupancy",
298  "Run GCN scheduler to maximize occupancy",
300 
302 IterativeGCNMaxOccupancySchedRegistry("gcn-max-occupancy-experimental",
303  "Run GCN scheduler to maximize occupancy (experimental)",
305 
307 GCNMinRegSchedRegistry("gcn-minreg",
308  "Run GCN iterative scheduler for minimal register usage (experimental)",
310 
312 GCNILPSchedRegistry("gcn-ilp",
313  "Run GCN iterative scheduler for ILP scheduling (experimental)",
315 
316 static StringRef computeDataLayout(const Triple &TT) {
317  if (TT.getArch() == Triple::r600) {
318  // 32-bit pointers.
319  return "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
320  "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5";
321  }
322 
323  // 32-bit private, local, and region pointers. 64-bit global, constant and
324  // flat, non-integral buffer fat pointers.
325  return "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32"
326  "-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
327  "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5"
328  "-ni:7";
329 }
330 
332 static StringRef getGPUOrDefault(const Triple &TT, StringRef GPU) {
333  if (!GPU.empty())
334  return GPU;
335 
336  // Need to default to a target with flat support for HSA.
337  if (TT.getArch() == Triple::amdgcn)
338  return TT.getOS() == Triple::AMDHSA ? "generic-hsa" : "generic";
339 
340  return "r600";
341 }
342 
344  // The AMDGPU toolchain only supports generating shared objects, so we
345  // must always use PIC.
346  return Reloc::PIC_;
347 }
348 
350  StringRef CPU, StringRef FS,
351  TargetOptions Options,
354  CodeGenOpt::Level OptLevel)
355  : LLVMTargetMachine(T, computeDataLayout(TT), TT, getGPUOrDefault(TT, CPU),
356  FS, Options, getEffectiveRelocModel(RM),
357  getEffectiveCodeModel(CM, CodeModel::Small), OptLevel),
358  TLOF(createTLOF(getTargetTriple())) {
359  initAsmInfo();
360 }
361 
364 
366 
368  Attribute GPUAttr = F.getFnAttribute("target-cpu");
369  return GPUAttr.hasAttribute(Attribute::None) ?
370  getTargetCPU() : GPUAttr.getValueAsString();
371 }
372 
374  Attribute FSAttr = F.getFnAttribute("target-features");
375 
376  return FSAttr.hasAttribute(Attribute::None) ?
378  FSAttr.getValueAsString();
379 }
380 
381 /// Predicate for Internalize pass.
382 static bool mustPreserveGV(const GlobalValue &GV) {
383  if (const Function *F = dyn_cast<Function>(&GV))
384  return F->isDeclaration() || AMDGPU::isEntryFunctionCC(F->getCallingConv());
385 
386  return !GV.use_empty();
387 }
388 
390  Builder.DivergentTarget = true;
391 
392  bool EnableOpt = getOptLevel() > CodeGenOpt::None;
393  bool Internalize = InternalizeSymbols;
394  bool EarlyInline = EarlyInlineAll && EnableOpt && !EnableFunctionCalls;
395  bool AMDGPUAA = EnableAMDGPUAliasAnalysis && EnableOpt;
396  bool LibCallSimplify = EnableLibCallSimplify && EnableOpt;
397 
398  if (EnableFunctionCalls) {
399  delete Builder.Inliner;
401  }
402 
403  Builder.addExtension(
405  [Internalize, EarlyInline, AMDGPUAA](const PassManagerBuilder &,
407  if (AMDGPUAA) {
410  }
412  if (Internalize) {
414  PM.add(createGlobalDCEPass());
415  }
416  if (EarlyInline)
418  });
419 
420  const auto &Opt = Options;
421  Builder.addExtension(
423  [AMDGPUAA, LibCallSimplify, &Opt](const PassManagerBuilder &,
425  if (AMDGPUAA) {
428  }
430  if (LibCallSimplify)
432  });
433 
434  Builder.addExtension(
437  // Add infer address spaces pass to the opt pipeline after inlining
438  // but before SROA to increase SROA opportunities.
440 
441  // This should run after inlining to have any chance of doing anything,
442  // and before other cleanup optimizations.
444  });
445 }
446 
447 //===----------------------------------------------------------------------===//
448 // R600 Target Machine (R600 -> Cayman)
449 //===----------------------------------------------------------------------===//
450 
452  StringRef CPU, StringRef FS,
456  CodeGenOpt::Level OL, bool JIT)
457  : AMDGPUTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL) {
459 
460  // Override the default since calls aren't supported for r600.
461  if (EnableFunctionCalls &&
462  EnableAMDGPUFunctionCallsOpt.getNumOccurrences() == 0)
463  EnableFunctionCalls = false;
464 }
465 
467  const Function &F) const {
468  StringRef GPU = getGPUName(F);
469  StringRef FS = getFeatureString(F);
470 
471  SmallString<128> SubtargetKey(GPU);
472  SubtargetKey.append(FS);
473 
474  auto &I = SubtargetMap[SubtargetKey];
475  if (!I) {
476  // This needs to be done before we create a new subtarget since any
477  // creation will depend on the TM and the code generation flags on the
478  // function that reside in TargetOptions.
480  I = llvm::make_unique<R600Subtarget>(TargetTriple, GPU, FS, *this);
481  }
482 
483  return I.get();
484 }
485 
488  return TargetTransformInfo(R600TTIImpl(this, F));
489 }
490 
491 //===----------------------------------------------------------------------===//
492 // GCN Target Machine (SI+)
493 //===----------------------------------------------------------------------===//
494 
496  StringRef CPU, StringRef FS,
500  CodeGenOpt::Level OL, bool JIT)
501  : AMDGPUTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL) {}
502 
504  StringRef GPU = getGPUName(F);
505  StringRef FS = getFeatureString(F);
506 
507  SmallString<128> SubtargetKey(GPU);
508  SubtargetKey.append(FS);
509 
510  auto &I = SubtargetMap[SubtargetKey];
511  if (!I) {
512  // This needs to be done before we create a new subtarget since any
513  // creation will depend on the TM and the code generation flags on the
514  // function that reside in TargetOptions.
516  I = llvm::make_unique<GCNSubtarget>(TargetTriple, GPU, FS, *this);
517  }
518 
519  I->setScalarizeGlobalBehavior(ScalarizeGlobal);
520 
521  return I.get();
522 }
523 
526  return TargetTransformInfo(GCNTTIImpl(this, F));
527 }
528 
529 //===----------------------------------------------------------------------===//
530 // AMDGPU Pass Setup
531 //===----------------------------------------------------------------------===//
532 
533 namespace {
534 
535 class AMDGPUPassConfig : public TargetPassConfig {
536 public:
537  AMDGPUPassConfig(LLVMTargetMachine &TM, PassManagerBase &PM)
538  : TargetPassConfig(TM, PM) {
539  // Exceptions and StackMaps are not supported, so these passes will never do
540  // anything.
541  disablePass(&StackMapLivenessID);
542  disablePass(&FuncletLayoutID);
543  }
544 
545  AMDGPUTargetMachine &getAMDGPUTargetMachine() const {
546  return getTM<AMDGPUTargetMachine>();
547  }
548 
550  createMachineScheduler(MachineSchedContext *C) const override {
554  return DAG;
555  }
556 
557  void addEarlyCSEOrGVNPass();
558  void addStraightLineScalarOptimizationPasses();
559  void addIRPasses() override;
560  void addCodeGenPrepare() override;
561  bool addPreISel() override;
562  bool addInstSelector() override;
563  bool addGCPasses() override;
564 
565  std::unique_ptr<CSEConfigBase> getCSEConfig() const override;
566 };
567 
568 std::unique_ptr<CSEConfigBase> AMDGPUPassConfig::getCSEConfig() const {
569  return getStandardCSEConfigForOpt(TM->getOptLevel());
570 }
571 
572 class R600PassConfig final : public AMDGPUPassConfig {
573 public:
574  R600PassConfig(LLVMTargetMachine &TM, PassManagerBase &PM)
575  : AMDGPUPassConfig(TM, PM) {}
576 
577  ScheduleDAGInstrs *createMachineScheduler(
578  MachineSchedContext *C) const override {
579  return createR600MachineScheduler(C);
580  }
581 
582  bool addPreISel() override;
583  bool addInstSelector() override;
584  void addPreRegAlloc() override;
585  void addPreSched2() override;
586  void addPreEmitPass() override;
587 };
588 
589 class GCNPassConfig final : public AMDGPUPassConfig {
590 public:
591  GCNPassConfig(LLVMTargetMachine &TM, PassManagerBase &PM)
592  : AMDGPUPassConfig(TM, PM) {
593  // It is necessary to know the register usage of the entire call graph. We
594  // allow calls without EnableAMDGPUFunctionCalls if they are marked
595  // noinline, so this is always required.
596  setRequiresCodeGenSCCOrder(true);
597  }
598 
599  GCNTargetMachine &getGCNTargetMachine() const {
600  return getTM<GCNTargetMachine>();
601  }
602 
604  createMachineScheduler(MachineSchedContext *C) const override;
605 
606  bool addPreISel() override;
607  void addMachineSSAOptimization() override;
608  bool addILPOpts() override;
609  bool addInstSelector() override;
610  bool addIRTranslator() override;
611  bool addLegalizeMachineIR() override;
612  bool addRegBankSelect() override;
613  bool addGlobalInstructionSelect() override;
614  void addFastRegAlloc() override;
615  void addOptimizedRegAlloc() override;
616  void addPreRegAlloc() override;
617  bool addPreRewrite() override;
618  void addPostRegAlloc() override;
619  void addPreSched2() override;
620  void addPreEmitPass() override;
621 };
622 
623 } // end anonymous namespace
624 
625 void AMDGPUPassConfig::addEarlyCSEOrGVNPass() {
627  addPass(createGVNPass());
628  else
629  addPass(createEarlyCSEPass());
630 }
631 
632 void AMDGPUPassConfig::addStraightLineScalarOptimizationPasses() {
633  addPass(createLICMPass());
636  // ReassociateGEPs exposes more opportunites for SLSR. See
637  // the example in reassociate-geps-and-slsr.ll.
639  // SeparateConstOffsetFromGEP and SLSR creates common expressions which GVN or
640  // EarlyCSE can reuse.
641  addEarlyCSEOrGVNPass();
642  // Run NaryReassociate after EarlyCSE/GVN to be more effective.
643  addPass(createNaryReassociatePass());
644  // NaryReassociate on GEPs creates redundant common expressions, so run
645  // EarlyCSE after it.
646  addPass(createEarlyCSEPass());
647 }
648 
649 void AMDGPUPassConfig::addIRPasses() {
650  const AMDGPUTargetMachine &TM = getAMDGPUTargetMachine();
651 
652  // There is no reason to run these.
653  disablePass(&StackMapLivenessID);
654  disablePass(&FuncletLayoutID);
655  disablePass(&PatchableFunctionID);
656 
657  addPass(createAtomicExpandPass());
658 
659  // This must occur before inlining, as the inliner will not look through
660  // bitcast calls.
662 
664 
665  // Function calls are not supported, so make sure we inline everything.
666  addPass(createAMDGPUAlwaysInlinePass());
668  // We need to add the barrier noop pass, otherwise adding the function
669  // inlining pass will cause all of the PassConfigs passes to be run
670  // one function at a time, which means if we have a nodule with two
671  // functions, then we will generate code for the first function
672  // without ever running any passes on the second.
673  addPass(createBarrierNoopPass());
674 
675  if (TM.getTargetTriple().getArch() == Triple::amdgcn) {
676  // TODO: May want to move later or split into an early and late one.
677 
679  }
680 
681  // Handle uses of OpenCL image2d_t, image3d_t and sampler_t arguments.
682  if (TM.getTargetTriple().getArch() == Triple::r600)
684 
685  // Replace OpenCL enqueued block function pointers with global variables.
687 
688  if (TM.getOptLevel() > CodeGenOpt::None) {
689  addPass(createInferAddressSpacesPass());
690  addPass(createAMDGPUPromoteAlloca());
691 
692  if (EnableSROA)
693  addPass(createSROAPass());
694 
696  addStraightLineScalarOptimizationPasses();
697 
699  addPass(createAMDGPUAAWrapperPass());
701  AAResults &AAR) {
702  if (auto *WrapperPass = P.getAnalysisIfAvailable<AMDGPUAAWrapperPass>())
703  AAR.addAAResult(WrapperPass->getResult());
704  }));
705  }
706  }
707 
709 
710  // EarlyCSE is not always strong enough to clean up what LSR produces. For
711  // example, GVN can combine
712  //
713  // %0 = add %a, %b
714  // %1 = add %b, %a
715  //
716  // and
717  //
718  // %0 = shl nsw %a, 2
719  // %1 = shl %a, 2
720  //
721  // but EarlyCSE can do neither of them.
723  addEarlyCSEOrGVNPass();
724 }
725 
726 void AMDGPUPassConfig::addCodeGenPrepare() {
727  if (TM->getTargetTriple().getArch() == Triple::amdgcn)
729 
730  if (TM->getTargetTriple().getArch() == Triple::amdgcn &&
733 
735 
738 }
739 
740 bool AMDGPUPassConfig::addPreISel() {
741  addPass(createLowerSwitchPass());
742  addPass(createFlattenCFGPass());
743  return false;
744 }
745 
746 bool AMDGPUPassConfig::addInstSelector() {
747  addPass(createAMDGPUISelDag(&getAMDGPUTargetMachine(), getOptLevel()));
748  return false;
749 }
750 
751 bool AMDGPUPassConfig::addGCPasses() {
752  // Do nothing. GC is not supported.
753  return false;
754 }
755 
756 //===----------------------------------------------------------------------===//
757 // R600 Pass Setup
758 //===----------------------------------------------------------------------===//
759 
760 bool R600PassConfig::addPreISel() {
761  AMDGPUPassConfig::addPreISel();
762 
764  addPass(createStructurizeCFGPass());
765  return false;
766 }
767 
768 bool R600PassConfig::addInstSelector() {
769  addPass(createR600ISelDag(&getAMDGPUTargetMachine(), getOptLevel()));
770  return false;
771 }
772 
773 void R600PassConfig::addPreRegAlloc() {
774  addPass(createR600VectorRegMerger());
775 }
776 
777 void R600PassConfig::addPreSched2() {
778  addPass(createR600EmitClauseMarkers(), false);
780  addPass(&IfConverterID, false);
781  addPass(createR600ClauseMergePass(), false);
782 }
783 
784 void R600PassConfig::addPreEmitPass() {
785  addPass(createAMDGPUCFGStructurizerPass(), false);
786  addPass(createR600ExpandSpecialInstrsPass(), false);
787  addPass(&FinalizeMachineBundlesID, false);
788  addPass(createR600Packetizer(), false);
789  addPass(createR600ControlFlowFinalizer(), false);
790 }
791 
793  return new R600PassConfig(*this, PM);
794 }
795 
796 //===----------------------------------------------------------------------===//
797 // GCN Pass Setup
798 //===----------------------------------------------------------------------===//
799 
800 ScheduleDAGInstrs *GCNPassConfig::createMachineScheduler(
801  MachineSchedContext *C) const {
802  const GCNSubtarget &ST = C->MF->getSubtarget<GCNSubtarget>();
803  if (ST.enableSIScheduler())
804  return createSIMachineScheduler(C);
806 }
807 
808 bool GCNPassConfig::addPreISel() {
809  AMDGPUPassConfig::addPreISel();
810 
813  }
814 
815  // FIXME: We need to run a pass to propagate the attributes when calls are
816  // supported.
817 
818  // Merge divergent exit nodes. StructurizeCFG won't recognize the multi-exit
819  // regions formed by them.
821  if (!LateCFGStructurize) {
822  addPass(createStructurizeCFGPass(true)); // true -> SkipUniformRegions
823  }
824  addPass(createSinkingPass());
826  if (!LateCFGStructurize) {
828  }
829 
830  return false;
831 }
832 
833 void GCNPassConfig::addMachineSSAOptimization() {
835 
836  // We want to fold operands after PeepholeOptimizer has run (or as part of
837  // it), because it will eliminate extra copies making it easier to fold the
838  // real source operand. We want to eliminate dead instructions after, so that
839  // we see fewer uses of the copies. We then need to clean up the dead
840  // instructions leftover after the operands are folded as well.
841  //
842  // XXX - Can we get away without running DeadMachineInstructionElim again?
843  addPass(&SIFoldOperandsID);
844  if (EnableDPPCombine)
845  addPass(&GCNDPPCombineID);
847  addPass(&SILoadStoreOptimizerID);
848  if (EnableSDWAPeephole) {
849  addPass(&SIPeepholeSDWAID);
850  addPass(&EarlyMachineLICMID);
851  addPass(&MachineCSEID);
852  addPass(&SIFoldOperandsID);
854  }
856 }
857 
858 bool GCNPassConfig::addILPOpts() {
860  addPass(&EarlyIfConverterID);
861 
863  return false;
864 }
865 
866 bool GCNPassConfig::addInstSelector() {
867  AMDGPUPassConfig::addInstSelector();
868  addPass(&SIFixSGPRCopiesID);
869  addPass(createSILowerI1CopiesPass());
870  addPass(createSIFixupVectorISelPass());
871  addPass(createSIAddIMGInitPass());
872  return false;
873 }
874 
875 bool GCNPassConfig::addIRTranslator() {
876  addPass(new IRTranslator());
877  return false;
878 }
879 
880 bool GCNPassConfig::addLegalizeMachineIR() {
881  addPass(new Legalizer());
882  return false;
883 }
884 
885 bool GCNPassConfig::addRegBankSelect() {
886  addPass(new RegBankSelect());
887  return false;
888 }
889 
890 bool GCNPassConfig::addGlobalInstructionSelect() {
891  addPass(new InstructionSelect());
892  return false;
893 }
894 
895 void GCNPassConfig::addPreRegAlloc() {
896  if (LateCFGStructurize) {
898  }
899  addPass(createSIWholeQuadModePass());
900 }
901 
902 void GCNPassConfig::addFastRegAlloc() {
903  // FIXME: We have to disable the verifier here because of PHIElimination +
904  // TwoAddressInstructions disabling it.
905 
906  // This must be run immediately after phi elimination and before
907  // TwoAddressInstructions, otherwise the processing of the tied operand of
908  // SI_ELSE will introduce a copy of the tied operand source after the else.
909  insertPass(&PHIEliminationID, &SILowerControlFlowID, false);
910 
911  // This must be run just after RegisterCoalescing.
912  insertPass(&RegisterCoalescerID, &SIPreAllocateWWMRegsID, false);
913 
915 }
916 
917 void GCNPassConfig::addOptimizedRegAlloc() {
918  if (OptExecMaskPreRA) {
921  } else {
923  }
924 
925  // This must be run immediately after phi elimination and before
926  // TwoAddressInstructions, otherwise the processing of the tied operand of
927  // SI_ELSE will introduce a copy of the tied operand source after the else.
928  insertPass(&PHIEliminationID, &SILowerControlFlowID, false);
929 
930  // This must be run just after RegisterCoalescing.
931  insertPass(&RegisterCoalescerID, &SIPreAllocateWWMRegsID, false);
932 
933  if (EnableDCEInRA)
935 
937 }
938 
939 bool GCNPassConfig::addPreRewrite() {
940  if (EnableRegReassign) {
941  addPass(&GCNNSAReassignID);
942  addPass(&GCNRegBankReassignID);
943  }
944  return true;
945 }
946 
947 void GCNPassConfig::addPostRegAlloc() {
948  addPass(&SIFixVGPRCopiesID);
950  addPass(&SIOptimizeExecMaskingID);
952 }
953 
954 void GCNPassConfig::addPreSched2() {
955 }
956 
957 void GCNPassConfig::addPreEmitPass() {
958  addPass(createSIMemoryLegalizerPass());
959  addPass(createSIInsertWaitcntsPass());
961  addPass(createSIModeRegisterPass());
962 
963  // The hazard recognizer that runs as part of the post-ra scheduler does not
964  // guarantee to be able handle all hazards correctly. This is because if there
965  // are multiple scheduling regions in a basic block, the regions are scheduled
966  // bottom up, so when we begin to schedule a region we don't know what
967  // instructions were emitted directly before it.
968  //
969  // Here we add a stand-alone hazard recognizer pass which can handle all
970  // cases.
971  //
972  // FIXME: This stand-alone pass will emit indiv. S_NOP 0, as needed. It would
973  // be better for it to emit S_NOP <N> when possible.
974  addPass(&PostRAHazardRecognizerID);
975 
976  addPass(&SIInsertSkipsPassID);
977  addPass(&BranchRelaxationPassID);
978 }
979 
981  return new GCNPassConfig(*this, PM);
982 }
983 
985  return new yaml::SIMachineFunctionInfo();
986 }
987 
991  return new yaml::SIMachineFunctionInfo(*MFI,
992  *MF.getSubtarget().getRegisterInfo());
993 }
994 
997  SMDiagnostic &Error, SMRange &SourceRange) const {
998  const yaml::SIMachineFunctionInfo &YamlMFI =
999  reinterpret_cast<const yaml::SIMachineFunctionInfo &>(MFI_);
1000  MachineFunction &MF = PFS.MF;
1002 
1003  MFI->initializeBaseYamlFields(YamlMFI);
1004 
1005  auto parseRegister = [&](const yaml::StringValue &RegName, unsigned &RegVal) {
1006  if (parseNamedRegisterReference(PFS, RegVal, RegName.Value, Error)) {
1007  SourceRange = RegName.SourceRange;
1008  return true;
1009  }
1010 
1011  return false;
1012  };
1013 
1014  auto diagnoseRegisterClass = [&](const yaml::StringValue &RegName) {
1015  // Create a diagnostic for a the register string literal.
1016  const MemoryBuffer &Buffer =
1017  *PFS.SM->getMemoryBuffer(PFS.SM->getMainFileID());
1018  Error = SMDiagnostic(*PFS.SM, SMLoc(), Buffer.getBufferIdentifier(), 1,
1019  RegName.Value.size(), SourceMgr::DK_Error,
1020  "incorrect register class for field", RegName.Value,
1021  None, None);
1022  SourceRange = RegName.SourceRange;
1023  return true;
1024  };
1025 
1026  if (parseRegister(YamlMFI.ScratchRSrcReg, MFI->ScratchRSrcReg) ||
1027  parseRegister(YamlMFI.ScratchWaveOffsetReg, MFI->ScratchWaveOffsetReg) ||
1028  parseRegister(YamlMFI.FrameOffsetReg, MFI->FrameOffsetReg) ||
1029  parseRegister(YamlMFI.StackPtrOffsetReg, MFI->StackPtrOffsetReg))
1030  return true;
1031 
1032  if (MFI->ScratchRSrcReg != AMDGPU::PRIVATE_RSRC_REG &&
1033  !AMDGPU::SReg_128RegClass.contains(MFI->ScratchRSrcReg)) {
1034  return diagnoseRegisterClass(YamlMFI.ScratchRSrcReg);
1035  }
1036 
1037  if (MFI->ScratchWaveOffsetReg != AMDGPU::SCRATCH_WAVE_OFFSET_REG &&
1038  !AMDGPU::SGPR_32RegClass.contains(MFI->ScratchWaveOffsetReg)) {
1039  return diagnoseRegisterClass(YamlMFI.ScratchWaveOffsetReg);
1040  }
1041 
1042  if (MFI->FrameOffsetReg != AMDGPU::FP_REG &&
1043  !AMDGPU::SGPR_32RegClass.contains(MFI->FrameOffsetReg)) {
1044  return diagnoseRegisterClass(YamlMFI.FrameOffsetReg);
1045  }
1046 
1047  if (MFI->StackPtrOffsetReg != AMDGPU::SP_REG &&
1048  !AMDGPU::SGPR_32RegClass.contains(MFI->StackPtrOffsetReg)) {
1049  return diagnoseRegisterClass(YamlMFI.StackPtrOffsetReg);
1050  }
1051 
1052  return false;
1053 }
FunctionPass * createSpeculativeExecutionPass()
char & SIFormMemoryClausesID
Pass interface - Implemented by all &#39;passes&#39;.
Definition: Pass.h:80
FunctionPass * createStraightLineStrengthReducePass()
uint64_t CallInst * C
virtual StringRef getBufferIdentifier() const
Return an identifier for this buffer, typically the filename it was read from.
Definition: MemoryBuffer.h:69
Represents a range in source code.
Definition: SMLoc.h:48
FunctionPass * createGVNPass(bool NoLoads=false)
Create a legacy GVN pass.
Definition: GVN.cpp:2591
StringRef getTargetFeatureString() const
static cl::opt< bool > EnableDCEInRA("amdgpu-dce-in-ra", cl::init(true), cl::Hidden, cl::desc("Enable machine DCE inside regalloc"))
Target & getTheGCNTarget()
The target for GCN GPUs.
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
void initializeAMDGPUDAGToDAGISelPass(PassRegistry &)
void addAAResult(AAResultT &AAResult)
Register a specific AA result.
This file a TargetTransformInfo::Concept conforming object specific to the AMDGPU target machine...
FunctionPass * createSIAnnotateControlFlowPass()
Create the annotation pass.
CodeModel::Model getEffectiveCodeModel(Optional< CodeModel::Model > CM, CodeModel::Model Default)
Helper method for getting the code model, returning Default if CM does not have a value...
ModulePass * createAMDGPUAlwaysInlinePass(bool GlobalOpt=true)
static LLVM_READNONE StringRef getGPUOrDefault(const Triple &TT, StringRef GPU)
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
char & RenameIndependentSubregsID
This pass detects subregister lanes in a virtual register that are used independently of other lanes ...
PassManagerBuilder - This class is used to set up a standard optimization sequence for languages like...
GCNTargetMachine(const Target &T, const Triple &TT, StringRef CPU, StringRef FS, TargetOptions Options, Optional< Reloc::Model > RM, Optional< CodeModel::Model > CM, CodeGenOpt::Level OL, bool JIT)
This class represents lattice values for constants.
Definition: AllocatorList.h:23
void initializeSIFixVGPRCopiesPass(PassRegistry &)
virtual void addIRPasses()
Add common target configurable passes that perform LLVM IR to IR transforms following machine indepen...
static cl::opt< bool > EnableLoadStoreVectorizer("amdgpu-load-store-vectorizer", cl::desc("Enable load store vectorizer"), cl::init(true), cl::Hidden)
FunctionPass * createR600ISelDag(TargetMachine *TM, CodeGenOpt::Level OptLevel)
This pass converts a legalized DAG into a R600-specific.
void initializeSIInsertWaitcntsPass(PassRegistry &)
char & GCNNSAReassignID
void initializeSIFormMemoryClausesPass(PassRegistry &)
ModulePass * createR600OpenCLImageTypeLoweringPass()
void initializeAMDGPUAnnotateKernelFeaturesPass(PassRegistry &)
Triple TargetTriple
Triple string, CPU name, and target feature strings the TargetMachine instance is created with...
Definition: TargetMachine.h:84
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
OSType getOS() const
getOS - Get the parsed operating system type of this triple.
Definition: Triple.h:303
void initializeAMDGPUAtomicOptimizerPass(PassRegistry &)
char & SILoadStoreOptimizerID
Target & getTheAMDGPUTarget()
The target which supports all AMD GPUs.
char & SIPeepholeSDWAID
void initializeSIModeRegisterPass(PassRegistry &)
void initializeAMDGPUSimplifyLibCallsPass(PassRegistry &)
This file describes how to lower LLVM calls to machine code calls.
char & FuncletLayoutID
This pass lays out funclets contiguously.
FunctionPass * createLowerSwitchPass()
void initializeAMDGPULowerKernelAttributesPass(PassRegistry &)
char & RegisterCoalescerID
RegisterCoalescer - This pass merges live ranges to eliminate copies.
char & EarlyIfConverterID
EarlyIfConverter - This pass performs if-conversion on SSA form by inserting cmov instructions...
void initializeR600ControlFlowFinalizerPass(PassRegistry &)
void initializeAMDGPUAnnotateUniformValuesPass(PassRegistry &)
FunctionPass * createAMDGPUPromoteAlloca()
virtual void add(Pass *P)=0
Add a pass to the queue of passes to run.
ModulePass * createAMDGPULowerKernelAttributesPass()
bool parseMachineFunctionInfo(const yaml::MachineFunctionInfo &, PerFunctionMIParsingState &PFS, SMDiagnostic &Error, SMRange &SourceRange) const override
Parse out the target&#39;s MachineFunctionInfo from the YAML reprsentation.
char & BranchRelaxationPassID
BranchRelaxation - This pass replaces branches that need to jump further than is supported by a branc...
FunctionPass * createAMDGPUCodeGenPreparePass()
F(f)
R600 Machine Scheduler interface.
char & MachineSchedulerID
MachineScheduler - This pass schedules machine instructions.
static cl::opt< bool > EnableLowerKernelArguments("amdgpu-ir-lower-kernel-arguments", cl::desc("Lower kernel argument loads in IR pass"), cl::init(true), cl::Hidden)
FunctionPass * createAMDGPUCFGStructurizerPass()
MachineSchedRegistry provides a selection of available machine instruction schedulers.
static cl::opt< bool, true > EnableAMDGPUFunctionCallsOpt("amdgpu-function-calls", cl::desc("Enable AMDGPU function call support"), cl::location(AMDGPUTargetMachine::EnableFunctionCalls), cl::init(true), cl::Hidden)
virtual void addMachineSSAOptimization()
addMachineSSAOptimization - Add standard passes that optimize machine instructions in SSA form...
void initializeAMDGPUAAWrapperPassPass(PassRegistry &)
static std::unique_ptr< TargetLoweringObjectFile > createTLOF(const Triple &TT)
void initializeAMDGPUPromoteAllocaPass(PassRegistry &)
void initializeAMDGPULowerKernelArgumentsPass(PassRegistry &)
FunctionPass * createSIAddIMGInitPass()
FunctionPass * createSIMemoryLegalizerPass()
Pass * Inliner
Inliner - Specifies the inliner to use.
FunctionPass * createAMDGPUMachineCFGStructurizerPass()
FunctionPass * createSIInsertWaitcntsPass()
ScheduleDAGMILive is an implementation of ScheduleDAGInstrs that schedules machine instructions while...
StringRef getFeatureString(const Function &F) const
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
void resetTargetOptions(const Function &F) const
Reset the target options based on the function&#39;s attributes.
yaml::MachineFunctionInfo * createDefaultFuncInfoYAML() const override
Allocate and return a default initialized instance of the YAML representation for the MachineFunction...
This file declares the targeting of the InstructionSelector class for AMDGPU.
Pass * createAMDGPUFunctionInliningPass()
static cl::opt< bool > EnableSDWAPeephole("amdgpu-sdwa-peephole", cl::desc("Enable SDWA peepholer"), cl::init(true))
This file declares the AMDGPU-specific subclass of TargetLoweringObjectFile.
Pass * createAMDGPUAnnotateKernelFeaturesPass()
static cl::opt< bool > EnableSIModeRegisterPass("amdgpu-mode-register", cl::desc("Enable mode register pass"), cl::init(true), cl::Hidden)
static cl::opt< bool > EnableRegReassign("amdgpu-reassign-regs", cl::desc("Enable register reassign optimizations on gfx10+"), cl::init(true), cl::Hidden)
void initializeAMDGPUCodeGenPreparePass(PassRegistry &)
This file contains the simple types necessary to represent the attributes associated with functions a...
ModulePass * createAMDGPUOpenCLEnqueuedBlockLoweringPass()
No attributes have been set.
Definition: Attributes.h:71
void initializeGCNNSAReassignPass(PassRegistry &)
virtual void addFastRegAlloc()
addFastRegAlloc - Add the minimum set of target-independent passes that are required for fast registe...
void initializeAMDGPUInlinerPass(PassRegistry &)
FunctionPass * createSinkingPass()
Definition: Sink.cpp:303
static MachineSchedRegistry GCNILPSchedRegistry("gcn-ilp", "Run GCN iterative scheduler for ILP scheduling (experimental)", createIterativeILPMachineScheduler)
char & SIOptimizeExecMaskingPreRAID
EP_ModuleOptimizerEarly - This extension point allows adding passes just before the main module-level...
char & FinalizeMachineBundlesID
FinalizeMachineBundles - This pass finalize machine instruction bundles (created earlier, e.g.
LLVM_NODISCARD bool empty() const
empty - Check if the string is empty.
Definition: StringRef.h:126
Target-Independent Code Generator Pass Configuration Options.
static StringRef computeDataLayout(const Triple &TT)
static cl::opt< bool, true > LateCFGStructurize("amdgpu-late-structurize", cl::desc("Enable late CFG structurization"), cl::location(AMDGPUTargetMachine::EnableLateStructurizeCFG), cl::Hidden)
const MemoryBuffer * getMemoryBuffer(unsigned i) const
Definition: SourceMgr.h:130
static cl::opt< bool > ScalarizeGlobal("amdgpu-scalarize-global-loads", cl::desc("Enable global load scalarization"), cl::init(true), cl::Hidden)
FunctionPass * createR600ExpandSpecialInstrsPass()
static MachineSchedRegistry GCNMinRegSchedRegistry("gcn-minreg", "Run GCN iterative scheduler for minimal register usage (experimental)", createMinRegScheduler)
RegisterTargetMachine - Helper template for registering a target machine implementation, for use in the target machine initialization function.
char & MachineCSEID
MachineCSE - This pass performs global CSE on machine instructions.
Definition: MachineCSE.cpp:141
bool initializeBaseYamlFields(const yaml::SIMachineFunctionInfo &YamlMFI)
ArchType getArch() const
getArch - Get the parsed architecture type of this triple.
Definition: Triple.h:294
FunctionPass * createSIFixupVectorISelPass()
TargetTransformInfo getTargetTransformInfo(const Function &F) override
Get a TargetTransformInfo implementation for the target.
FunctionPass * createSILowerI1CopiesPass()
void initializeSIOptimizeExecMaskingPass(PassRegistry &)
char & DeadMachineInstructionElimID
DeadMachineInstructionElim - This pass removes dead machine instructions.
void initializeSIOptimizeExecMaskingPreRAPass(PassRegistry &)
Pass * createLoadStoreVectorizerPass()
Create a legacy pass manager instance of the LoadStoreVectorizer pass.
ScheduleDAGMILive * createGenericSchedLive(MachineSchedContext *C)
Create the standard converging machine scheduler.
StringRef getTargetCPU() const
virtual bool addILPOpts()
Add passes that optimize instruction level parallelism for out-of-order targets.
void initializeR600ExpandSpecialInstrsPassPass(PassRegistry &)
void initializeSIFixSGPRCopiesPass(PassRegistry &)
void initializeAMDGPULowerIntrinsicsPass(PassRegistry &)
ModulePass * createGlobalDCEPass()
createGlobalDCEPass - This transform is designed to eliminate unreachable internal globals (functions...
FunctionPass * createR600VectorRegMerger()
void initializeSIFixupVectorISelPass(PassRegistry &)
static ScheduleDAGInstrs * createIterativeGCNMaxOccupancyMachineScheduler(MachineSchedContext *C)
static cl::opt< bool > InternalizeSymbols("amdgpu-internalize-symbols", cl::desc("Enable elimination of non-kernel functions and unused globals"), cl::init(false), cl::Hidden)
SI Machine Scheduler interface.
StringRef getGPUName(const Function &F) const
unsigned getMainFileID() const
Definition: SourceMgr.h:139
void append(in_iter S, in_iter E)
Append from an iterator pair.
Definition: SmallString.h:74
void initializeAMDGPUExternalAAWrapperPass(PassRegistry &)
char & PHIEliminationID
PHIElimination - This pass eliminates machine instruction PHI nodes by inserting copy instructions...
FunctionPass * createInferAddressSpacesPass(unsigned AddressSpace=~0u)
FunctionPass * createAMDGPUISelDag(TargetMachine *TM=nullptr, CodeGenOpt::Level OptLevel=CodeGenOpt::Default)
This pass converts a legalized DAG into a AMDGPU-specific.
ImmutablePass * createExternalAAWrapperPass(std::function< void(Pass &, Function &, AAResults &)> Callback)
A wrapper pass around a callback which can be used to populate the AAResults in the AAResultsWrapperP...
#define P(N)
std::unique_ptr< CSEConfigBase > getStandardCSEConfigForOpt(CodeGenOpt::Level Level)
Definition: CSEInfo.cpp:65
char & GCNDPPCombineID
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:432
FunctionPass * createAMDGPULowerKernelArgumentsPass()
bool hasAttribute(AttrKind Val) const
Return true if the attribute is present.
Definition: Attributes.cpp:201
CodeGenOpt::Level getOptLevel() const
Returns the optimization level: None, Less, Default, or Aggressive.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
static Reloc::Model getEffectiveRelocModel(Optional< Reloc::Model > RM)
This pass implements the reg bank selector pass used in the GlobalISel pipeline.
Definition: RegBankSelect.h:90
FunctionPass * createFlattenCFGPass()
static cl::opt< bool > EnableAtomicOptimizations("amdgpu-atomic-optimizations", cl::desc("Enable atomic optimizations"), cl::init(false), cl::Hidden)
FunctionPass * createSIWholeQuadModePass()
This file provides the interface for LLVM&#39;s Global Value Numbering pass which eliminates fully redund...
static cl::opt< bool > EarlyInlineAll("amdgpu-early-inline-all", cl::desc("Inline all functions early"), cl::init(false), cl::Hidden)
char & SIInsertSkipsPassID
static MachineSchedRegistry GCNMaxOccupancySchedRegistry("gcn-max-occupancy", "Run GCN scheduler to maximize occupancy", createGCNMaxOccupancyMachineScheduler)
void adjustPassManager(PassManagerBuilder &) override
Allow the target to modify the pass manager, e.g.
bool isEntryFunctionCC(CallingConv::ID CC)
void LLVMInitializeAMDGPUTarget()
void initializeSIPeepholeSDWAPass(PassRegistry &)
Pass * createLICMPass()
Definition: LICM.cpp:311
bool parseNamedRegisterReference(PerFunctionMIParsingState &PFS, unsigned &Reg, StringRef Src, SMDiagnostic &Error)
Definition: MIParser.cpp:3017
static cl::opt< bool > EnableSROA("amdgpu-sroa", cl::desc("Run SROA after promote alloca pass"), cl::ReallyHidden, cl::init(true))
TargetPassConfig * createPassConfig(PassManagerBase &PM) override
Create a pass configuration object to be used by addPassToEmitX methods for generating a pipeline of ...
FunctionPass * createR600ControlFlowFinalizer()
Legacy wrapper pass to provide the AMDGPUAAResult object.
R600TargetMachine(const Target &T, const Triple &TT, StringRef CPU, StringRef FS, TargetOptions Options, Optional< Reloc::Model > RM, Optional< CodeModel::Model > CM, CodeGenOpt::Level OL, bool JIT)
virtual void addOptimizedRegAlloc()
addOptimizedRegAlloc - Add passes related to register allocation.
This class describes a target machine that is implemented with the LLVM target-independent code gener...
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
ModulePass * createBarrierNoopPass()
createBarrierNoopPass - This pass is purely a module pass barrier in a pass manager.
Pass * createAlwaysInlinerLegacyPass(bool InsertLifetime=true)
Create a legacy pass manager instance of a pass to inline and remove functions marked as "always_inli...
const Triple & getTargetTriple() const
static MachineSchedRegistry SISchedRegistry("si", "Run SI's custom scheduler", createSIMachineScheduler)
static cl::opt< bool > EnableScalarIRPasses("amdgpu-scalar-ir-passes", cl::desc("Enable scalar IR passes"), cl::init(true), cl::Hidden)
void initializeSILowerControlFlowPass(PassRegistry &)
static ScheduleDAGInstrs * createMinRegScheduler(MachineSchedContext *C)
ModulePass * createAMDGPULowerIntrinsicsPass()
virtual void addCodeGenPrepare()
Add pass to prepare the LLVM IR for code generation.
FunctionPass * createSIModeRegisterPass()
const TargetSubtargetInfo * getSubtargetImpl() const
FunctionPass * createR600ClauseMergePass()
The AMDGPU TargetMachine interface definition for hw codgen targets.
static cl::opt< bool > EnableR600IfConvert("r600-if-convert", cl::desc("Use if conversion pass"), cl::ReallyHidden, cl::init(true))
std::unique_ptr< ScheduleDAGMutation > createStoreClusterDAGMutation(const TargetInstrInfo *TII, const TargetRegisterInfo *TRI)
static ScheduleDAGInstrs * createR600MachineScheduler(MachineSchedContext *C)
std::unique_ptr< ScheduleDAGMutation > createAMDGPUMacroFusionDAGMutation()
Note that you have to add: DAG.addMutation(createAMDGPUMacroFusionDAGMutation()); to AMDGPUPassConfig...
void initializeSIShrinkInstructionsPass(PassRegistry &)
void initializeAMDGPUUseNativeCallsPass(PassRegistry &)
Analysis pass providing a never-invalidated alias analysis result.
EP_EarlyAsPossible - This extension point allows adding passes before any other transformations, allowing them to see the code as it is coming out of the frontend.
AMDGPUTargetMachine(const Target &T, const Triple &TT, StringRef CPU, StringRef FS, TargetOptions Options, Optional< Reloc::Model > RM, Optional< CodeModel::Model > CM, CodeGenOpt::Level OL)
void initializeSIInsertSkipsPass(PassRegistry &)
void initializeR600PacketizerPass(PassRegistry &)
Triple - Helper class for working with autoconf configuration names.
Definition: Triple.h:43
FunctionPass * createAMDGPUAnnotateUniformValues()
This is the AMGPU address space based alias analysis pass.
Provides passes to inlining "always_inline" functions.
char & SIOptimizeExecMaskingID
EP_CGSCCOptimizerLate - This extension point allows adding CallGraphSCC passes at the end of the main...
static MachineSchedRegistry IterativeGCNMaxOccupancySchedRegistry("gcn-max-occupancy-experimental", "Run GCN scheduler to maximize occupancy (experimental)", createIterativeGCNMaxOccupancyMachineScheduler)
void initializeGCNRegBankReassignPass(PassRegistry &)
std::unique_ptr< ScheduleDAGMutation > createLoadClusterDAGMutation(const TargetInstrInfo *TII, const TargetRegisterInfo *TRI)
void initializeAMDGPUFixFunctionBitcastsPass(PassRegistry &)
char & AMDGPUUnifyDivergentExitNodesID
bool enableSIScheduler() const
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
void initializeSIMemoryLegalizerPass(PassRegistry &)
PassManagerBase - An abstract interface to allow code to add passes to a pass manager without having ...
char & StackMapLivenessID
StackMapLiveness - This pass analyses the register live-out set of stackmap/patchpoint intrinsics and...
ModulePass * createInternalizePass(std::function< bool(const GlobalValue &)> MustPreserveGV)
createInternalizePass - This pass loops over all of the functions in the input module, internalizing all globals (functions and variables) it can.
char & SIPreAllocateWWMRegsID
void initializeSIWholeQuadModePass(PassRegistry &)
void setRequiresStructuredCFG(bool Value)
FunctionPass * createAMDGPUAtomicOptimizerPass()
void initializeR600VectorRegMergerPass(PassRegistry &)
char & SIFixVGPRCopiesID
void initializeAMDGPURewriteOutArgumentsPass(PassRegistry &)
char & EarlyMachineLICMID
This pass performs loop invariant code motion on machine instructions.
void initializeGCNDPPCombinePass(PassRegistry &)
ImmutablePass * createAMDGPUAAWrapperPass()
FunctionPass * createR600EmitClauseMarkers()
void initializeR600ClauseMergePassPass(PassRegistry &)
This interface provides simple read-only access to a block of memory, and provides simple methods for...
Definition: MemoryBuffer.h:41
This pass is responsible for selecting generic machine instructions to target-specific instructions...
ModulePass * createAMDGPUFixFunctionBitcastsPass()
void initializeAMDGPUUnifyMetadataPass(PassRegistry &)
FunctionPass * createSeparateConstOffsetFromGEPPass(bool LowerGEP=false)
Target - Wrapper for Target specific information.
void initializeAMDGPUArgumentUsageInfoPass(PassRegistry &)
virtual void addPostRegAlloc()
This method may be implemented by targets that want to run passes after register allocation pass pipe...
This file declares the targeting of the Machinelegalizer class for AMDGPU.
A wrapper around std::string which contains a source range that&#39;s being set during parsing...
FunctionPass * createR600Packetizer()
void initializeSILoadStoreOptimizerPass(PassRegistry &)
char & SILowerControlFlowID
ModulePass * createAMDGPUUnifyMetadataPass()
void initializeSIAnnotateControlFlowPass(PassRegistry &)
Targets should override this in a way that mirrors the implementation of llvm::MachineFunctionInfo.
A ScheduleDAG for scheduling lists of MachineInstr.
char & PatchableFunctionID
This pass implements the "patchable-function" attribute.
void initializeSIFoldOperandsPass(PassRegistry &)
char & SIFoldOperandsID
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
const TargetRegisterInfo * TRI
Target processor register info.
Definition: ScheduleDAG.h:559
FunctionPass * createSIShrinkInstructionsPass()
static cl::opt< bool > EnableAMDGPUAliasAnalysis("enable-amdgpu-aa", cl::Hidden, cl::desc("Enable AMDGPU Alias Analysis"), cl::init(true))
StringRef getValueAsString() const
Return the attribute&#39;s value as a string.
Definition: Attributes.cpp:194
TargetOptions Options
char & IfConverterID
IfConverter - This pass performs machine code if conversion.
#define LLVM_READNONE
Definition: Compiler.h:176
MachineSchedContext provides enough context from the MachineScheduler pass for the target to instanti...
char & SIFixSGPRCopiesID
#define I(x, y, z)
Definition: MD5.cpp:58
FunctionPass * createAMDGPUSimplifyLibCallsPass(const TargetOptions &)
FunctionPass * createSROAPass()
Definition: SROA.cpp:4594
static MachineSchedRegistry R600SchedRegistry("r600", "Run R600's custom scheduler", createR600MachineScheduler)
ImmutablePass * createAMDGPUExternalAAWrapperPass()
static cl::opt< bool > OptExecMaskPreRA("amdgpu-opt-exec-mask-pre-ra", cl::Hidden, cl::desc("Run pre-RA exec mask optimizations"), cl::init(true))
static bool mustPreserveGV(const GlobalValue &GV)
Predicate for Internalize pass.
const TargetInstrInfo * TII
Target instruction information.
Definition: ScheduleDAG.h:558
char & GCNRegBankReassignID
This file declares the IRTranslator pass.
FunctionPass * createAMDGPUUseNativeCallsPass()
yaml::MachineFunctionInfo * convertFuncInfoToYAML(const MachineFunction &MF) const override
Allocate and initialize an instance of the YAML representation of the MachineFunctionInfo.
char & PostRAHazardRecognizerID
createPostRAHazardRecognizer - This pass runs the post-ra hazard recognizer.
AnalysisType * getAnalysisIfAvailable() const
getAnalysisIfAvailable<AnalysisType>() - Subclasses use this function to get analysis information tha...
Lightweight error class with error context and mandatory checking.
Definition: Error.h:157
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition: Function.h:333
FunctionPass * createEarlyCSEPass(bool UseMemorySSA=false)
Definition: EarlyCSE.cpp:1358
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:48
PassRegistry - This class manages the registration and intitialization of the pass subsystem as appli...
Definition: PassRegistry.h:38
TargetTransformInfo getTargetTransformInfo(const Function &F) override
Get a TargetTransformInfo implementation for the target.
void initializeSILowerI1CopiesPass(PassRegistry &)
static cl::opt< bool > EnableDPPCombine("amdgpu-dpp-combine", cl::desc("Enable DPP combiner"), cl::init(true))
void addExtension(ExtensionPointTy Ty, ExtensionFn Fn)
static ScheduleDAGInstrs * createIterativeILPMachineScheduler(MachineSchedContext *C)
void initializeAMDGPUUnifyDivergentExitNodesPass(PassRegistry &)
Represents a location in source code.
Definition: SMLoc.h:23
static cl::opt< bool > EnableLibCallSimplify("amdgpu-simplify-libcall", cl::desc("Enable amdgpu library simplifications"), cl::init(true), cl::Hidden)
static cl::opt< bool > EnableR600StructurizeCFG("r600-ir-structurize", cl::desc("Use StructurizeCFG IR pass"), cl::init(true))
Pass * createStructurizeCFGPass(bool SkipUniformRegions=false)
When SkipUniformRegions is true the structizer will not structurize regions that only contain uniform...
FunctionPass * createAtomicExpandPass()
void initializeAMDGPUAlwaysInlinePass(PassRegistry &)
TargetPassConfig * createPassConfig(PassManagerBase &PM) override
Create a pass configuration object to be used by addPassToEmitX methods for generating a pipeline of ...
void initializeAMDGPUOpenCLEnqueuedBlockLoweringPass(PassRegistry &)
void initializeSIPreAllocateWWMRegsPass(PassRegistry &)
void initializeGlobalISel(PassRegistry &)
Initialize all passes linked into the GlobalISel library.
Definition: GlobalISel.cpp:18
bool use_empty() const
Definition: Value.h:322
LocationClass< Ty > location(Ty &L)
Definition: CommandLine.h:448
static ScheduleDAGInstrs * createSIMachineScheduler(MachineSchedContext *C)
static ScheduleDAGInstrs * createGCNMaxOccupancyMachineScheduler(MachineSchedContext *C)
static cl::opt< bool > EnableEarlyIfConversion("amdgpu-early-ifcvt", cl::Hidden, cl::desc("Run early if-conversion"), cl::init(false))
void addMutation(std::unique_ptr< ScheduleDAGMutation > Mutation)
Add a postprocessing step to the DAG builder.
FunctionPass * createNaryReassociatePass()
Instances of this class encapsulate one diagnostic report, allowing printing to a raw_ostream as a ca...
Definition: SourceMgr.h:259