LLVM  9.0.0svn
AMDGPUTargetMachine.cpp
Go to the documentation of this file.
1 //===-- AMDGPUTargetMachine.cpp - TargetMachine for hw codegen targets-----===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// The AMDGPU target machine contains all of the hardware specific
11 /// information needed to emit code for R600 and SI GPUs.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "AMDGPUTargetMachine.h"
16 #include "AMDGPU.h"
17 #include "AMDGPUAliasAnalysis.h"
18 #include "AMDGPUCallLowering.h"
20 #include "AMDGPULegalizerInfo.h"
21 #include "AMDGPUMacroFusion.h"
22 #include "AMDGPUTargetObjectFile.h"
24 #include "GCNIterativeScheduler.h"
25 #include "GCNSchedStrategy.h"
26 #include "R600MachineScheduler.h"
27 #include "SIMachineFunctionInfo.h"
28 #include "SIMachineScheduler.h"
35 #include "llvm/CodeGen/Passes.h"
37 #include "llvm/IR/Attributes.h"
38 #include "llvm/IR/Function.h"
40 #include "llvm/Pass.h"
42 #include "llvm/Support/Compiler.h"
45 #include "llvm/Transforms/IPO.h"
48 #include "llvm/Transforms/Scalar.h"
50 #include "llvm/Transforms/Utils.h"
52 #include <memory>
53 
54 using namespace llvm;
55 
57  "r600-ir-structurize",
58  cl::desc("Use StructurizeCFG IR pass"),
59  cl::init(true));
60 
62  "amdgpu-sroa",
63  cl::desc("Run SROA after promote alloca pass"),
65  cl::init(true));
66 
67 static cl::opt<bool>
68 EnableEarlyIfConversion("amdgpu-early-ifcvt", cl::Hidden,
69  cl::desc("Run early if-conversion"),
70  cl::init(false));
71 
72 static cl::opt<bool>
73 OptExecMaskPreRA("amdgpu-opt-exec-mask-pre-ra", cl::Hidden,
74  cl::desc("Run pre-RA exec mask optimizations"),
75  cl::init(true));
76 
78  "r600-if-convert",
79  cl::desc("Use if conversion pass"),
81  cl::init(true));
82 
83 // Option to disable vectorizer for tests.
85  "amdgpu-load-store-vectorizer",
86  cl::desc("Enable load store vectorizer"),
87  cl::init(true),
88  cl::Hidden);
89 
90 // Option to control global loads scalarization
92  "amdgpu-scalarize-global-loads",
93  cl::desc("Enable global load scalarization"),
94  cl::init(true),
95  cl::Hidden);
96 
97 // Option to run internalize pass.
99  "amdgpu-internalize-symbols",
100  cl::desc("Enable elimination of non-kernel functions and unused globals"),
101  cl::init(false),
102  cl::Hidden);
103 
104 // Option to inline all early.
106  "amdgpu-early-inline-all",
107  cl::desc("Inline all functions early"),
108  cl::init(false),
109  cl::Hidden);
110 
112  "amdgpu-sdwa-peephole",
113  cl::desc("Enable SDWA peepholer"),
114  cl::init(true));
115 
117  "amdgpu-dpp-combine",
118  cl::desc("Enable DPP combiner"),
119  cl::init(true));
120 
121 // Enable address space based alias analysis
122 static cl::opt<bool> EnableAMDGPUAliasAnalysis("enable-amdgpu-aa", cl::Hidden,
123  cl::desc("Enable AMDGPU Alias Analysis"),
124  cl::init(true));
125 
126 // Option to run late CFG structurizer
128  "amdgpu-late-structurize",
129  cl::desc("Enable late CFG structurization"),
131  cl::Hidden);
132 
134  "amdgpu-function-calls",
135  cl::desc("Enable AMDGPU function call support"),
137  cl::init(true),
138  cl::Hidden);
139 
140 // Enable lib calls simplifications
142  "amdgpu-simplify-libcall",
143  cl::desc("Enable amdgpu library simplifications"),
144  cl::init(true),
145  cl::Hidden);
146 
148  "amdgpu-ir-lower-kernel-arguments",
149  cl::desc("Lower kernel argument loads in IR pass"),
150  cl::init(true),
151  cl::Hidden);
152 
154  "amdgpu-reassign-regs",
155  cl::desc("Enable register reassign optimizations on gfx10+"),
156  cl::init(true),
157  cl::Hidden);
158 
159 // Enable atomic optimization
161  "amdgpu-atomic-optimizations",
162  cl::desc("Enable atomic optimizations"),
163  cl::init(false),
164  cl::Hidden);
165 
166 // Enable Mode register optimization
168  "amdgpu-mode-register",
169  cl::desc("Enable mode register pass"),
170  cl::init(true),
171  cl::Hidden);
172 
173 // Option is used in lit tests to prevent deadcoding of patterns inspected.
174 static cl::opt<bool>
175 EnableDCEInRA("amdgpu-dce-in-ra",
176  cl::init(true), cl::Hidden,
177  cl::desc("Enable machine DCE inside regalloc"));
178 
180  "amdgpu-scalar-ir-passes",
181  cl::desc("Enable scalar IR passes"),
182  cl::init(true),
183  cl::Hidden);
184 
185 extern "C" void LLVMInitializeAMDGPUTarget() {
186  // Register the target
189 
242 }
243 
244 static std::unique_ptr<TargetLoweringObjectFile> createTLOF(const Triple &TT) {
245  return llvm::make_unique<AMDGPUTargetObjectFile>();
246 }
247 
249  return new ScheduleDAGMILive(C, llvm::make_unique<R600SchedStrategy>());
250 }
251 
253  return new SIScheduleDAGMI(C);
254 }
255 
256 static ScheduleDAGInstrs *
258  ScheduleDAGMILive *DAG =
259  new GCNScheduleDAGMILive(C, make_unique<GCNMaxOccupancySchedStrategy>(C));
263  return DAG;
264 }
265 
266 static ScheduleDAGInstrs *
268  auto DAG = new GCNIterativeScheduler(C,
270  DAG->addMutation(createLoadClusterDAGMutation(DAG->TII, DAG->TRI));
271  DAG->addMutation(createStoreClusterDAGMutation(DAG->TII, DAG->TRI));
272  return DAG;
273 }
274 
276  return new GCNIterativeScheduler(C,
278 }
279 
280 static ScheduleDAGInstrs *
282  auto DAG = new GCNIterativeScheduler(C,
284  DAG->addMutation(createLoadClusterDAGMutation(DAG->TII, DAG->TRI));
285  DAG->addMutation(createStoreClusterDAGMutation(DAG->TII, DAG->TRI));
286  DAG->addMutation(createAMDGPUMacroFusionDAGMutation());
287  return DAG;
288 }
289 
291 R600SchedRegistry("r600", "Run R600's custom scheduler",
293 
295 SISchedRegistry("si", "Run SI's custom scheduler",
297 
299 GCNMaxOccupancySchedRegistry("gcn-max-occupancy",
300  "Run GCN scheduler to maximize occupancy",
302 
304 IterativeGCNMaxOccupancySchedRegistry("gcn-max-occupancy-experimental",
305  "Run GCN scheduler to maximize occupancy (experimental)",
307 
309 GCNMinRegSchedRegistry("gcn-minreg",
310  "Run GCN iterative scheduler for minimal register usage (experimental)",
312 
314 GCNILPSchedRegistry("gcn-ilp",
315  "Run GCN iterative scheduler for ILP scheduling (experimental)",
317 
319  if (TT.getArch() == Triple::r600) {
320  // 32-bit pointers.
321  return "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
322  "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5";
323  }
324 
325  // 32-bit private, local, and region pointers. 64-bit global, constant and
326  // flat, non-integral buffer fat pointers.
327  return "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32"
328  "-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
329  "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5"
330  "-ni:7";
331 }
332 
335  if (!GPU.empty())
336  return GPU;
337 
338  // Need to default to a target with flat support for HSA.
339  if (TT.getArch() == Triple::amdgcn)
340  return TT.getOS() == Triple::AMDHSA ? "generic-hsa" : "generic";
341 
342  return "r600";
343 }
344 
346  // The AMDGPU toolchain only supports generating shared objects, so we
347  // must always use PIC.
348  return Reloc::PIC_;
349 }
350 
352  StringRef CPU, StringRef FS,
353  TargetOptions Options,
356  CodeGenOpt::Level OptLevel)
357  : LLVMTargetMachine(T, computeDataLayout(TT), TT, getGPUOrDefault(TT, CPU),
358  FS, Options, getEffectiveRelocModel(RM),
359  getEffectiveCodeModel(CM, CodeModel::Small), OptLevel),
360  TLOF(createTLOF(getTargetTriple())) {
361  initAsmInfo();
362 }
363 
366 
368 
370  Attribute GPUAttr = F.getFnAttribute("target-cpu");
371  return GPUAttr.hasAttribute(Attribute::None) ?
372  getTargetCPU() : GPUAttr.getValueAsString();
373 }
374 
376  Attribute FSAttr = F.getFnAttribute("target-features");
377 
378  return FSAttr.hasAttribute(Attribute::None) ?
380  FSAttr.getValueAsString();
381 }
382 
383 /// Predicate for Internalize pass.
384 static bool mustPreserveGV(const GlobalValue &GV) {
385  if (const Function *F = dyn_cast<Function>(&GV))
386  return F->isDeclaration() || AMDGPU::isEntryFunctionCC(F->getCallingConv());
387 
388  return !GV.use_empty();
389 }
390 
392  Builder.DivergentTarget = true;
393 
394  bool EnableOpt = getOptLevel() > CodeGenOpt::None;
395  bool Internalize = InternalizeSymbols;
396  bool EarlyInline = EarlyInlineAll && EnableOpt && !EnableFunctionCalls;
397  bool AMDGPUAA = EnableAMDGPUAliasAnalysis && EnableOpt;
398  bool LibCallSimplify = EnableLibCallSimplify && EnableOpt;
399 
400  if (EnableFunctionCalls) {
401  delete Builder.Inliner;
403  }
404 
405  Builder.addExtension(
407  [Internalize, EarlyInline, AMDGPUAA, this](const PassManagerBuilder &,
409  if (AMDGPUAA) {
412  }
415  if (Internalize) {
417  PM.add(createGlobalDCEPass());
418  }
419  if (EarlyInline)
421  });
422 
423  const auto &Opt = Options;
424  Builder.addExtension(
426  [AMDGPUAA, LibCallSimplify, &Opt, this](const PassManagerBuilder &,
428  if (AMDGPUAA) {
431  }
434  if (LibCallSimplify)
436  });
437 
438  Builder.addExtension(
441  // Add infer address spaces pass to the opt pipeline after inlining
442  // but before SROA to increase SROA opportunities.
444 
445  // This should run after inlining to have any chance of doing anything,
446  // and before other cleanup optimizations.
448  });
449 }
450 
451 //===----------------------------------------------------------------------===//
452 // R600 Target Machine (R600 -> Cayman)
453 //===----------------------------------------------------------------------===//
454 
456  StringRef CPU, StringRef FS,
460  CodeGenOpt::Level OL, bool JIT)
461  : AMDGPUTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL) {
463 
464  // Override the default since calls aren't supported for r600.
465  if (EnableFunctionCalls &&
466  EnableAMDGPUFunctionCallsOpt.getNumOccurrences() == 0)
467  EnableFunctionCalls = false;
468 }
469 
471  const Function &F) const {
472  StringRef GPU = getGPUName(F);
473  StringRef FS = getFeatureString(F);
474 
475  SmallString<128> SubtargetKey(GPU);
476  SubtargetKey.append(FS);
477 
478  auto &I = SubtargetMap[SubtargetKey];
479  if (!I) {
480  // This needs to be done before we create a new subtarget since any
481  // creation will depend on the TM and the code generation flags on the
482  // function that reside in TargetOptions.
484  I = llvm::make_unique<R600Subtarget>(TargetTriple, GPU, FS, *this);
485  }
486 
487  return I.get();
488 }
489 
492  return TargetTransformInfo(R600TTIImpl(this, F));
493 }
494 
495 //===----------------------------------------------------------------------===//
496 // GCN Target Machine (SI+)
497 //===----------------------------------------------------------------------===//
498 
500  StringRef CPU, StringRef FS,
504  CodeGenOpt::Level OL, bool JIT)
505  : AMDGPUTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL) {}
506 
508  StringRef GPU = getGPUName(F);
509  StringRef FS = getFeatureString(F);
510 
511  SmallString<128> SubtargetKey(GPU);
512  SubtargetKey.append(FS);
513 
514  auto &I = SubtargetMap[SubtargetKey];
515  if (!I) {
516  // This needs to be done before we create a new subtarget since any
517  // creation will depend on the TM and the code generation flags on the
518  // function that reside in TargetOptions.
520  I = llvm::make_unique<GCNSubtarget>(TargetTriple, GPU, FS, *this);
521  }
522 
523  I->setScalarizeGlobalBehavior(ScalarizeGlobal);
524 
525  return I.get();
526 }
527 
530  return TargetTransformInfo(GCNTTIImpl(this, F));
531 }
532 
533 //===----------------------------------------------------------------------===//
534 // AMDGPU Pass Setup
535 //===----------------------------------------------------------------------===//
536 
537 namespace {
538 
539 class AMDGPUPassConfig : public TargetPassConfig {
540 public:
541  AMDGPUPassConfig(LLVMTargetMachine &TM, PassManagerBase &PM)
542  : TargetPassConfig(TM, PM) {
543  // Exceptions and StackMaps are not supported, so these passes will never do
544  // anything.
545  disablePass(&StackMapLivenessID);
546  disablePass(&FuncletLayoutID);
547  }
548 
549  AMDGPUTargetMachine &getAMDGPUTargetMachine() const {
550  return getTM<AMDGPUTargetMachine>();
551  }
552 
554  createMachineScheduler(MachineSchedContext *C) const override {
558  return DAG;
559  }
560 
561  void addEarlyCSEOrGVNPass();
562  void addStraightLineScalarOptimizationPasses();
563  void addIRPasses() override;
564  void addCodeGenPrepare() override;
565  bool addPreISel() override;
566  bool addInstSelector() override;
567  bool addGCPasses() override;
568 
569  std::unique_ptr<CSEConfigBase> getCSEConfig() const override;
570 };
571 
572 std::unique_ptr<CSEConfigBase> AMDGPUPassConfig::getCSEConfig() const {
573  return getStandardCSEConfigForOpt(TM->getOptLevel());
574 }
575 
576 class R600PassConfig final : public AMDGPUPassConfig {
577 public:
578  R600PassConfig(LLVMTargetMachine &TM, PassManagerBase &PM)
579  : AMDGPUPassConfig(TM, PM) {}
580 
581  ScheduleDAGInstrs *createMachineScheduler(
582  MachineSchedContext *C) const override {
583  return createR600MachineScheduler(C);
584  }
585 
586  bool addPreISel() override;
587  bool addInstSelector() override;
588  void addPreRegAlloc() override;
589  void addPreSched2() override;
590  void addPreEmitPass() override;
591 };
592 
593 class GCNPassConfig final : public AMDGPUPassConfig {
594 public:
595  GCNPassConfig(LLVMTargetMachine &TM, PassManagerBase &PM)
596  : AMDGPUPassConfig(TM, PM) {
597  // It is necessary to know the register usage of the entire call graph. We
598  // allow calls without EnableAMDGPUFunctionCalls if they are marked
599  // noinline, so this is always required.
600  setRequiresCodeGenSCCOrder(true);
601  }
602 
603  GCNTargetMachine &getGCNTargetMachine() const {
604  return getTM<GCNTargetMachine>();
605  }
606 
608  createMachineScheduler(MachineSchedContext *C) const override;
609 
610  bool addPreISel() override;
611  void addMachineSSAOptimization() override;
612  bool addILPOpts() override;
613  bool addInstSelector() override;
614  bool addIRTranslator() override;
615  bool addLegalizeMachineIR() override;
616  bool addRegBankSelect() override;
617  bool addGlobalInstructionSelect() override;
618  void addFastRegAlloc() override;
619  void addOptimizedRegAlloc() override;
620  void addPreRegAlloc() override;
621  bool addPreRewrite() override;
622  void addPostRegAlloc() override;
623  void addPreSched2() override;
624  void addPreEmitPass() override;
625 };
626 
627 } // end anonymous namespace
628 
629 void AMDGPUPassConfig::addEarlyCSEOrGVNPass() {
631  addPass(createGVNPass());
632  else
633  addPass(createEarlyCSEPass());
634 }
635 
636 void AMDGPUPassConfig::addStraightLineScalarOptimizationPasses() {
637  addPass(createLICMPass());
640  // ReassociateGEPs exposes more opportunites for SLSR. See
641  // the example in reassociate-geps-and-slsr.ll.
643  // SeparateConstOffsetFromGEP and SLSR creates common expressions which GVN or
644  // EarlyCSE can reuse.
645  addEarlyCSEOrGVNPass();
646  // Run NaryReassociate after EarlyCSE/GVN to be more effective.
647  addPass(createNaryReassociatePass());
648  // NaryReassociate on GEPs creates redundant common expressions, so run
649  // EarlyCSE after it.
650  addPass(createEarlyCSEPass());
651 }
652 
653 void AMDGPUPassConfig::addIRPasses() {
654  const AMDGPUTargetMachine &TM = getAMDGPUTargetMachine();
655 
656  // There is no reason to run these.
657  disablePass(&StackMapLivenessID);
658  disablePass(&FuncletLayoutID);
659  disablePass(&PatchableFunctionID);
660 
661  // This must occur before inlining, as the inliner will not look through
662  // bitcast calls.
664 
665  // A call to propagate attributes pass in the backend in case opt was not run.
667 
668  addPass(createAtomicExpandPass());
669 
670 
672 
673  // Function calls are not supported, so make sure we inline everything.
674  addPass(createAMDGPUAlwaysInlinePass());
676  // We need to add the barrier noop pass, otherwise adding the function
677  // inlining pass will cause all of the PassConfigs passes to be run
678  // one function at a time, which means if we have a nodule with two
679  // functions, then we will generate code for the first function
680  // without ever running any passes on the second.
681  addPass(createBarrierNoopPass());
682 
683  if (TM.getTargetTriple().getArch() == Triple::amdgcn) {
684  // TODO: May want to move later or split into an early and late one.
685 
687  }
688 
689  // Handle uses of OpenCL image2d_t, image3d_t and sampler_t arguments.
690  if (TM.getTargetTriple().getArch() == Triple::r600)
692 
693  // Replace OpenCL enqueued block function pointers with global variables.
695 
696  if (TM.getOptLevel() > CodeGenOpt::None) {
697  addPass(createInferAddressSpacesPass());
698  addPass(createAMDGPUPromoteAlloca());
699 
700  if (EnableSROA)
701  addPass(createSROAPass());
702 
704  addStraightLineScalarOptimizationPasses();
705 
707  addPass(createAMDGPUAAWrapperPass());
709  AAResults &AAR) {
710  if (auto *WrapperPass = P.getAnalysisIfAvailable<AMDGPUAAWrapperPass>())
711  AAR.addAAResult(WrapperPass->getResult());
712  }));
713  }
714  }
715 
717 
718  // EarlyCSE is not always strong enough to clean up what LSR produces. For
719  // example, GVN can combine
720  //
721  // %0 = add %a, %b
722  // %1 = add %b, %a
723  //
724  // and
725  //
726  // %0 = shl nsw %a, 2
727  // %1 = shl %a, 2
728  //
729  // but EarlyCSE can do neither of them.
731  addEarlyCSEOrGVNPass();
732 }
733 
734 void AMDGPUPassConfig::addCodeGenPrepare() {
735  if (TM->getTargetTriple().getArch() == Triple::amdgcn)
737 
738  if (TM->getTargetTriple().getArch() == Triple::amdgcn &&
741 
743 
746 }
747 
748 bool AMDGPUPassConfig::addPreISel() {
749  addPass(createLowerSwitchPass());
750  addPass(createFlattenCFGPass());
751  return false;
752 }
753 
754 bool AMDGPUPassConfig::addInstSelector() {
755  // Defer the verifier until FinalizeISel.
756  addPass(createAMDGPUISelDag(&getAMDGPUTargetMachine(), getOptLevel()), false);
757  return false;
758 }
759 
760 bool AMDGPUPassConfig::addGCPasses() {
761  // Do nothing. GC is not supported.
762  return false;
763 }
764 
765 //===----------------------------------------------------------------------===//
766 // R600 Pass Setup
767 //===----------------------------------------------------------------------===//
768 
769 bool R600PassConfig::addPreISel() {
770  AMDGPUPassConfig::addPreISel();
771 
773  addPass(createStructurizeCFGPass());
774  return false;
775 }
776 
777 bool R600PassConfig::addInstSelector() {
778  addPass(createR600ISelDag(&getAMDGPUTargetMachine(), getOptLevel()));
779  return false;
780 }
781 
782 void R600PassConfig::addPreRegAlloc() {
783  addPass(createR600VectorRegMerger());
784 }
785 
786 void R600PassConfig::addPreSched2() {
787  addPass(createR600EmitClauseMarkers(), false);
789  addPass(&IfConverterID, false);
790  addPass(createR600ClauseMergePass(), false);
791 }
792 
793 void R600PassConfig::addPreEmitPass() {
794  addPass(createAMDGPUCFGStructurizerPass(), false);
795  addPass(createR600ExpandSpecialInstrsPass(), false);
796  addPass(&FinalizeMachineBundlesID, false);
797  addPass(createR600Packetizer(), false);
798  addPass(createR600ControlFlowFinalizer(), false);
799 }
800 
802  return new R600PassConfig(*this, PM);
803 }
804 
805 //===----------------------------------------------------------------------===//
806 // GCN Pass Setup
807 //===----------------------------------------------------------------------===//
808 
809 ScheduleDAGInstrs *GCNPassConfig::createMachineScheduler(
810  MachineSchedContext *C) const {
811  const GCNSubtarget &ST = C->MF->getSubtarget<GCNSubtarget>();
812  if (ST.enableSIScheduler())
813  return createSIMachineScheduler(C);
815 }
816 
817 bool GCNPassConfig::addPreISel() {
818  AMDGPUPassConfig::addPreISel();
819 
822  }
823 
824  // FIXME: We need to run a pass to propagate the attributes when calls are
825  // supported.
826 
827  // Merge divergent exit nodes. StructurizeCFG won't recognize the multi-exit
828  // regions formed by them.
830  if (!LateCFGStructurize) {
831  addPass(createStructurizeCFGPass(true)); // true -> SkipUniformRegions
832  }
833  addPass(createSinkingPass());
835  if (!LateCFGStructurize) {
837  }
838 
839  return false;
840 }
841 
842 void GCNPassConfig::addMachineSSAOptimization() {
844 
845  // We want to fold operands after PeepholeOptimizer has run (or as part of
846  // it), because it will eliminate extra copies making it easier to fold the
847  // real source operand. We want to eliminate dead instructions after, so that
848  // we see fewer uses of the copies. We then need to clean up the dead
849  // instructions leftover after the operands are folded as well.
850  //
851  // XXX - Can we get away without running DeadMachineInstructionElim again?
852  addPass(&SIFoldOperandsID);
853  if (EnableDPPCombine)
854  addPass(&GCNDPPCombineID);
856  addPass(&SILoadStoreOptimizerID);
857  if (EnableSDWAPeephole) {
858  addPass(&SIPeepholeSDWAID);
859  addPass(&EarlyMachineLICMID);
860  addPass(&MachineCSEID);
861  addPass(&SIFoldOperandsID);
863  }
865 }
866 
867 bool GCNPassConfig::addILPOpts() {
869  addPass(&EarlyIfConverterID);
870 
872  return false;
873 }
874 
875 bool GCNPassConfig::addInstSelector() {
876  AMDGPUPassConfig::addInstSelector();
877  addPass(&SIFixSGPRCopiesID);
878  addPass(createSILowerI1CopiesPass());
879  addPass(createSIFixupVectorISelPass());
880  addPass(createSIAddIMGInitPass());
881  return false;
882 }
883 
884 bool GCNPassConfig::addIRTranslator() {
885  addPass(new IRTranslator());
886  return false;
887 }
888 
889 bool GCNPassConfig::addLegalizeMachineIR() {
890  addPass(new Legalizer());
891  return false;
892 }
893 
894 bool GCNPassConfig::addRegBankSelect() {
895  addPass(new RegBankSelect());
896  return false;
897 }
898 
899 bool GCNPassConfig::addGlobalInstructionSelect() {
900  addPass(new InstructionSelect());
901  return false;
902 }
903 
904 void GCNPassConfig::addPreRegAlloc() {
905  if (LateCFGStructurize) {
907  }
908  addPass(createSIWholeQuadModePass());
909 }
910 
911 void GCNPassConfig::addFastRegAlloc() {
912  // FIXME: We have to disable the verifier here because of PHIElimination +
913  // TwoAddressInstructions disabling it.
914 
915  // This must be run immediately after phi elimination and before
916  // TwoAddressInstructions, otherwise the processing of the tied operand of
917  // SI_ELSE will introduce a copy of the tied operand source after the else.
918  insertPass(&PHIEliminationID, &SILowerControlFlowID, false);
919 
920  // This must be run just after RegisterCoalescing.
921  insertPass(&RegisterCoalescerID, &SIPreAllocateWWMRegsID, false);
922 
924 }
925 
926 void GCNPassConfig::addOptimizedRegAlloc() {
927  if (OptExecMaskPreRA) {
930  } else {
932  }
933 
934  // This must be run immediately after phi elimination and before
935  // TwoAddressInstructions, otherwise the processing of the tied operand of
936  // SI_ELSE will introduce a copy of the tied operand source after the else.
937  insertPass(&PHIEliminationID, &SILowerControlFlowID, false);
938 
939  // This must be run just after RegisterCoalescing.
940  insertPass(&RegisterCoalescerID, &SIPreAllocateWWMRegsID, false);
941 
942  if (EnableDCEInRA)
944 
946 }
947 
948 bool GCNPassConfig::addPreRewrite() {
949  if (EnableRegReassign) {
950  addPass(&GCNNSAReassignID);
951  addPass(&GCNRegBankReassignID);
952  }
953  return true;
954 }
955 
956 void GCNPassConfig::addPostRegAlloc() {
957  addPass(&SIFixVGPRCopiesID);
959  addPass(&SIOptimizeExecMaskingID);
961 }
962 
963 void GCNPassConfig::addPreSched2() {
964 }
965 
966 void GCNPassConfig::addPreEmitPass() {
967  addPass(createSIMemoryLegalizerPass());
968  addPass(createSIInsertWaitcntsPass());
970  addPass(createSIModeRegisterPass());
971 
972  // The hazard recognizer that runs as part of the post-ra scheduler does not
973  // guarantee to be able handle all hazards correctly. This is because if there
974  // are multiple scheduling regions in a basic block, the regions are scheduled
975  // bottom up, so when we begin to schedule a region we don't know what
976  // instructions were emitted directly before it.
977  //
978  // Here we add a stand-alone hazard recognizer pass which can handle all
979  // cases.
980  //
981  // FIXME: This stand-alone pass will emit indiv. S_NOP 0, as needed. It would
982  // be better for it to emit S_NOP <N> when possible.
983  addPass(&PostRAHazardRecognizerID);
984 
985  addPass(&SIInsertSkipsPassID);
986  addPass(&BranchRelaxationPassID);
987 }
988 
990  return new GCNPassConfig(*this, PM);
991 }
992 
994  return new yaml::SIMachineFunctionInfo();
995 }
996 
1000  return new yaml::SIMachineFunctionInfo(*MFI,
1001  *MF.getSubtarget().getRegisterInfo());
1002 }
1003 
1006  SMDiagnostic &Error, SMRange &SourceRange) const {
1007  const yaml::SIMachineFunctionInfo &YamlMFI =
1008  reinterpret_cast<const yaml::SIMachineFunctionInfo &>(MFI_);
1009  MachineFunction &MF = PFS.MF;
1011 
1012  MFI->initializeBaseYamlFields(YamlMFI);
1013 
1014  auto parseRegister = [&](const yaml::StringValue &RegName, unsigned &RegVal) {
1015  if (parseNamedRegisterReference(PFS, RegVal, RegName.Value, Error)) {
1016  SourceRange = RegName.SourceRange;
1017  return true;
1018  }
1019 
1020  return false;
1021  };
1022 
1023  auto diagnoseRegisterClass = [&](const yaml::StringValue &RegName) {
1024  // Create a diagnostic for a the register string literal.
1025  const MemoryBuffer &Buffer =
1026  *PFS.SM->getMemoryBuffer(PFS.SM->getMainFileID());
1027  Error = SMDiagnostic(*PFS.SM, SMLoc(), Buffer.getBufferIdentifier(), 1,
1028  RegName.Value.size(), SourceMgr::DK_Error,
1029  "incorrect register class for field", RegName.Value,
1030  None, None);
1031  SourceRange = RegName.SourceRange;
1032  return true;
1033  };
1034 
1035  if (parseRegister(YamlMFI.ScratchRSrcReg, MFI->ScratchRSrcReg) ||
1036  parseRegister(YamlMFI.ScratchWaveOffsetReg, MFI->ScratchWaveOffsetReg) ||
1037  parseRegister(YamlMFI.FrameOffsetReg, MFI->FrameOffsetReg) ||
1038  parseRegister(YamlMFI.StackPtrOffsetReg, MFI->StackPtrOffsetReg))
1039  return true;
1040 
1041  if (MFI->ScratchRSrcReg != AMDGPU::PRIVATE_RSRC_REG &&
1042  !AMDGPU::SReg_128RegClass.contains(MFI->ScratchRSrcReg)) {
1043  return diagnoseRegisterClass(YamlMFI.ScratchRSrcReg);
1044  }
1045 
1046  if (MFI->ScratchWaveOffsetReg != AMDGPU::SCRATCH_WAVE_OFFSET_REG &&
1047  !AMDGPU::SGPR_32RegClass.contains(MFI->ScratchWaveOffsetReg)) {
1048  return diagnoseRegisterClass(YamlMFI.ScratchWaveOffsetReg);
1049  }
1050 
1051  if (MFI->FrameOffsetReg != AMDGPU::FP_REG &&
1052  !AMDGPU::SGPR_32RegClass.contains(MFI->FrameOffsetReg)) {
1053  return diagnoseRegisterClass(YamlMFI.FrameOffsetReg);
1054  }
1055 
1056  if (MFI->StackPtrOffsetReg != AMDGPU::SP_REG &&
1057  !AMDGPU::SGPR_32RegClass.contains(MFI->StackPtrOffsetReg)) {
1058  return diagnoseRegisterClass(YamlMFI.StackPtrOffsetReg);
1059  }
1060 
1061  return false;
1062 }
FunctionPass * createSpeculativeExecutionPass()
char & SIFormMemoryClausesID
Pass interface - Implemented by all &#39;passes&#39;.
Definition: Pass.h:80
FunctionPass * createAMDGPUPropagateAttributesEarlyPass(const TargetMachine *)
FunctionPass * createStraightLineStrengthReducePass()
uint64_t CallInst * C
virtual StringRef getBufferIdentifier() const
Return an identifier for this buffer, typically the filename it was read from.
Definition: MemoryBuffer.h:69
Represents a range in source code.
Definition: SMLoc.h:48
FunctionPass * createGVNPass(bool NoLoads=false)
Create a legacy GVN pass.
Definition: GVN.cpp:2586
StringRef getTargetFeatureString() const
static cl::opt< bool > EnableDCEInRA("amdgpu-dce-in-ra", cl::init(true), cl::Hidden, cl::desc("Enable machine DCE inside regalloc"))
Target & getTheGCNTarget()
The target for GCN GPUs.
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
void initializeAMDGPUDAGToDAGISelPass(PassRegistry &)
void addAAResult(AAResultT &AAResult)
Register a specific AA result.
This file a TargetTransformInfo::Concept conforming object specific to the AMDGPU target machine...
FunctionPass * createSIAnnotateControlFlowPass()
Create the annotation pass.
CodeModel::Model getEffectiveCodeModel(Optional< CodeModel::Model > CM, CodeModel::Model Default)
Helper method for getting the code model, returning Default if CM does not have a value...
ModulePass * createAMDGPUAlwaysInlinePass(bool GlobalOpt=true)
static LLVM_READNONE StringRef getGPUOrDefault(const Triple &TT, StringRef GPU)
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
char & RenameIndependentSubregsID
This pass detects subregister lanes in a virtual register that are used independently of other lanes ...
PassManagerBuilder - This class is used to set up a standard optimization sequence for languages like...
GCNTargetMachine(const Target &T, const Triple &TT, StringRef CPU, StringRef FS, TargetOptions Options, Optional< Reloc::Model > RM, Optional< CodeModel::Model > CM, CodeGenOpt::Level OL, bool JIT)
This class represents lattice values for constants.
Definition: AllocatorList.h:23
void initializeSIFixVGPRCopiesPass(PassRegistry &)
virtual void addIRPasses()
Add common target configurable passes that perform LLVM IR to IR transforms following machine indepen...
static cl::opt< bool > EnableLoadStoreVectorizer("amdgpu-load-store-vectorizer", cl::desc("Enable load store vectorizer"), cl::init(true), cl::Hidden)
FunctionPass * createR600ISelDag(TargetMachine *TM, CodeGenOpt::Level OptLevel)
This pass converts a legalized DAG into a R600-specific.
void initializeSIInsertWaitcntsPass(PassRegistry &)
char & GCNNSAReassignID
void initializeSIFormMemoryClausesPass(PassRegistry &)
ModulePass * createR600OpenCLImageTypeLoweringPass()
void initializeAMDGPUAnnotateKernelFeaturesPass(PassRegistry &)
Triple TargetTriple
Triple string, CPU name, and target feature strings the TargetMachine instance is created with...
Definition: TargetMachine.h:84
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
OSType getOS() const
getOS - Get the parsed operating system type of this triple.
Definition: Triple.h:304
void initializeAMDGPUAtomicOptimizerPass(PassRegistry &)
char & SILoadStoreOptimizerID
Target & getTheAMDGPUTarget()
The target which supports all AMD GPUs.
char & SIPeepholeSDWAID
void initializeSIModeRegisterPass(PassRegistry &)
void initializeAMDGPUSimplifyLibCallsPass(PassRegistry &)
This file describes how to lower LLVM calls to machine code calls.
char & FuncletLayoutID
This pass lays out funclets contiguously.
FunctionPass * createLowerSwitchPass()
void initializeAMDGPULowerKernelAttributesPass(PassRegistry &)
char & RegisterCoalescerID
RegisterCoalescer - This pass merges live ranges to eliminate copies.
char & EarlyIfConverterID
EarlyIfConverter - This pass performs if-conversion on SSA form by inserting cmov instructions...
void initializeR600ControlFlowFinalizerPass(PassRegistry &)
void initializeAMDGPUAnnotateUniformValuesPass(PassRegistry &)
FunctionPass * createAMDGPUPromoteAlloca()
virtual void add(Pass *P)=0
Add a pass to the queue of passes to run.
ModulePass * createAMDGPULowerKernelAttributesPass()
bool parseMachineFunctionInfo(const yaml::MachineFunctionInfo &, PerFunctionMIParsingState &PFS, SMDiagnostic &Error, SMRange &SourceRange) const override
Parse out the target&#39;s MachineFunctionInfo from the YAML reprsentation.
char & BranchRelaxationPassID
BranchRelaxation - This pass replaces branches that need to jump further than is supported by a branc...
FunctionPass * createAMDGPUCodeGenPreparePass()
F(f)
R600 Machine Scheduler interface.
char & MachineSchedulerID
MachineScheduler - This pass schedules machine instructions.
static cl::opt< bool > EnableLowerKernelArguments("amdgpu-ir-lower-kernel-arguments", cl::desc("Lower kernel argument loads in IR pass"), cl::init(true), cl::Hidden)
FunctionPass * createAMDGPUCFGStructurizerPass()
MachineSchedRegistry provides a selection of available machine instruction schedulers.
static cl::opt< bool, true > EnableAMDGPUFunctionCallsOpt("amdgpu-function-calls", cl::desc("Enable AMDGPU function call support"), cl::location(AMDGPUTargetMachine::EnableFunctionCalls), cl::init(true), cl::Hidden)
virtual void addMachineSSAOptimization()
addMachineSSAOptimization - Add standard passes that optimize machine instructions in SSA form...
void initializeAMDGPUAAWrapperPassPass(PassRegistry &)
static std::unique_ptr< TargetLoweringObjectFile > createTLOF(const Triple &TT)
void initializeAMDGPUPromoteAllocaPass(PassRegistry &)
void initializeAMDGPULowerKernelArgumentsPass(PassRegistry &)
FunctionPass * createSIAddIMGInitPass()
FunctionPass * createSIMemoryLegalizerPass()
Pass * Inliner
Inliner - Specifies the inliner to use.
FunctionPass * createAMDGPUMachineCFGStructurizerPass()
FunctionPass * createSIInsertWaitcntsPass()
ScheduleDAGMILive is an implementation of ScheduleDAGInstrs that schedules machine instructions while...
StringRef getFeatureString(const Function &F) const
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
void resetTargetOptions(const Function &F) const
Reset the target options based on the function&#39;s attributes.
yaml::MachineFunctionInfo * createDefaultFuncInfoYAML() const override
Allocate and return a default initialized instance of the YAML representation for the MachineFunction...
This file declares the targeting of the InstructionSelector class for AMDGPU.
Pass * createAMDGPUFunctionInliningPass()
static cl::opt< bool > EnableSDWAPeephole("amdgpu-sdwa-peephole", cl::desc("Enable SDWA peepholer"), cl::init(true))
This file declares the AMDGPU-specific subclass of TargetLoweringObjectFile.
Pass * createAMDGPUAnnotateKernelFeaturesPass()
static cl::opt< bool > EnableSIModeRegisterPass("amdgpu-mode-register", cl::desc("Enable mode register pass"), cl::init(true), cl::Hidden)
static cl::opt< bool > EnableRegReassign("amdgpu-reassign-regs", cl::desc("Enable register reassign optimizations on gfx10+"), cl::init(true), cl::Hidden)
void initializeAMDGPUCodeGenPreparePass(PassRegistry &)
This file contains the simple types necessary to represent the attributes associated with functions a...
ModulePass * createAMDGPUOpenCLEnqueuedBlockLoweringPass()
No attributes have been set.
Definition: Attributes.h:71
void initializeGCNNSAReassignPass(PassRegistry &)
virtual void addFastRegAlloc()
addFastRegAlloc - Add the minimum set of target-independent passes that are required for fast registe...
void initializeAMDGPUInlinerPass(PassRegistry &)
FunctionPass * createSinkingPass()
Definition: Sink.cpp:303
static MachineSchedRegistry GCNILPSchedRegistry("gcn-ilp", "Run GCN iterative scheduler for ILP scheduling (experimental)", createIterativeILPMachineScheduler)
char & SIOptimizeExecMaskingPreRAID
EP_ModuleOptimizerEarly - This extension point allows adding passes just before the main module-level...
char & FinalizeMachineBundlesID
FinalizeMachineBundles - This pass finalize machine instruction bundles (created earlier, e.g.
LLVM_NODISCARD bool empty() const
empty - Check if the string is empty.
Definition: StringRef.h:126
Target-Independent Code Generator Pass Configuration Options.
static StringRef computeDataLayout(const Triple &TT)
static cl::opt< bool, true > LateCFGStructurize("amdgpu-late-structurize", cl::desc("Enable late CFG structurization"), cl::location(AMDGPUTargetMachine::EnableLateStructurizeCFG), cl::Hidden)
const MemoryBuffer * getMemoryBuffer(unsigned i) const
Definition: SourceMgr.h:130
static cl::opt< bool > ScalarizeGlobal("amdgpu-scalarize-global-loads", cl::desc("Enable global load scalarization"), cl::init(true), cl::Hidden)
FunctionPass * createR600ExpandSpecialInstrsPass()
static MachineSchedRegistry GCNMinRegSchedRegistry("gcn-minreg", "Run GCN iterative scheduler for minimal register usage (experimental)", createMinRegScheduler)
RegisterTargetMachine - Helper template for registering a target machine implementation, for use in the target machine initialization function.
char & MachineCSEID
MachineCSE - This pass performs global CSE on machine instructions.
Definition: MachineCSE.cpp:142
bool initializeBaseYamlFields(const yaml::SIMachineFunctionInfo &YamlMFI)
ArchType getArch() const
getArch - Get the parsed architecture type of this triple.
Definition: Triple.h:295
FunctionPass * createSIFixupVectorISelPass()
TargetTransformInfo getTargetTransformInfo(const Function &F) override
Get a TargetTransformInfo implementation for the target.
FunctionPass * createSILowerI1CopiesPass()
void initializeSIOptimizeExecMaskingPass(PassRegistry &)
char & DeadMachineInstructionElimID
DeadMachineInstructionElim - This pass removes dead machine instructions.
void initializeSIOptimizeExecMaskingPreRAPass(PassRegistry &)
Pass * createLoadStoreVectorizerPass()
Create a legacy pass manager instance of the LoadStoreVectorizer pass.
ScheduleDAGMILive * createGenericSchedLive(MachineSchedContext *C)
Create the standard converging machine scheduler.
StringRef getTargetCPU() const
virtual bool addILPOpts()
Add passes that optimize instruction level parallelism for out-of-order targets.
void initializeR600ExpandSpecialInstrsPassPass(PassRegistry &)
void initializeSIFixSGPRCopiesPass(PassRegistry &)
void initializeAMDGPULowerIntrinsicsPass(PassRegistry &)
ModulePass * createGlobalDCEPass()
createGlobalDCEPass - This transform is designed to eliminate unreachable internal globals (functions...
FunctionPass * createR600VectorRegMerger()
void initializeSIFixupVectorISelPass(PassRegistry &)
static ScheduleDAGInstrs * createIterativeGCNMaxOccupancyMachineScheduler(MachineSchedContext *C)
static cl::opt< bool > InternalizeSymbols("amdgpu-internalize-symbols", cl::desc("Enable elimination of non-kernel functions and unused globals"), cl::init(false), cl::Hidden)
SI Machine Scheduler interface.
StringRef getGPUName(const Function &F) const
unsigned getMainFileID() const
Definition: SourceMgr.h:139
void append(in_iter S, in_iter E)
Append from an iterator pair.
Definition: SmallString.h:74
void initializeAMDGPUExternalAAWrapperPass(PassRegistry &)
char & PHIEliminationID
PHIElimination - This pass eliminates machine instruction PHI nodes by inserting copy instructions...
FunctionPass * createInferAddressSpacesPass(unsigned AddressSpace=~0u)
FunctionPass * createAMDGPUISelDag(TargetMachine *TM=nullptr, CodeGenOpt::Level OptLevel=CodeGenOpt::Default)
This pass converts a legalized DAG into a AMDGPU-specific.
ImmutablePass * createExternalAAWrapperPass(std::function< void(Pass &, Function &, AAResults &)> Callback)
A wrapper pass around a callback which can be used to populate the AAResults in the AAResultsWrapperP...
#define P(N)
std::unique_ptr< CSEConfigBase > getStandardCSEConfigForOpt(CodeGenOpt::Level Level)
Definition: CSEInfo.cpp:65
char & GCNDPPCombineID
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:432
FunctionPass * createAMDGPULowerKernelArgumentsPass()
bool hasAttribute(AttrKind Val) const
Return true if the attribute is present.
Definition: Attributes.cpp:238
CodeGenOpt::Level getOptLevel() const
Returns the optimization level: None, Less, Default, or Aggressive.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
static Reloc::Model getEffectiveRelocModel(Optional< Reloc::Model > RM)
This pass implements the reg bank selector pass used in the GlobalISel pipeline.
Definition: RegBankSelect.h:90
FunctionPass * createFlattenCFGPass()
static cl::opt< bool > EnableAtomicOptimizations("amdgpu-atomic-optimizations", cl::desc("Enable atomic optimizations"), cl::init(false), cl::Hidden)
FunctionPass * createSIWholeQuadModePass()
This file provides the interface for LLVM&#39;s Global Value Numbering pass which eliminates fully redund...
static cl::opt< bool > EarlyInlineAll("amdgpu-early-inline-all", cl::desc("Inline all functions early"), cl::init(false), cl::Hidden)
char & SIInsertSkipsPassID
static MachineSchedRegistry GCNMaxOccupancySchedRegistry("gcn-max-occupancy", "Run GCN scheduler to maximize occupancy", createGCNMaxOccupancyMachineScheduler)
void adjustPassManager(PassManagerBuilder &) override
Allow the target to modify the pass manager, e.g.
bool isEntryFunctionCC(CallingConv::ID CC)
void LLVMInitializeAMDGPUTarget()
void initializeSIPeepholeSDWAPass(PassRegistry &)
Pass * createLICMPass()
Definition: LICM.cpp:313
bool parseNamedRegisterReference(PerFunctionMIParsingState &PFS, unsigned &Reg, StringRef Src, SMDiagnostic &Error)
Definition: MIParser.cpp:3020
static cl::opt< bool > EnableSROA("amdgpu-sroa", cl::desc("Run SROA after promote alloca pass"), cl::ReallyHidden, cl::init(true))
TargetPassConfig * createPassConfig(PassManagerBase &PM) override
Create a pass configuration object to be used by addPassToEmitX methods for generating a pipeline of ...
FunctionPass * createR600ControlFlowFinalizer()
Legacy wrapper pass to provide the AMDGPUAAResult object.
R600TargetMachine(const Target &T, const Triple &TT, StringRef CPU, StringRef FS, TargetOptions Options, Optional< Reloc::Model > RM, Optional< CodeModel::Model > CM, CodeGenOpt::Level OL, bool JIT)
virtual void addOptimizedRegAlloc()
addOptimizedRegAlloc - Add passes related to register allocation.
This class describes a target machine that is implemented with the LLVM target-independent code gener...
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
ModulePass * createBarrierNoopPass()
createBarrierNoopPass - This pass is purely a module pass barrier in a pass manager.
Pass * createAlwaysInlinerLegacyPass(bool InsertLifetime=true)
Create a legacy pass manager instance of a pass to inline and remove functions marked as "always_inli...
const Triple & getTargetTriple() const
static MachineSchedRegistry SISchedRegistry("si", "Run SI's custom scheduler", createSIMachineScheduler)
static cl::opt< bool > EnableScalarIRPasses("amdgpu-scalar-ir-passes", cl::desc("Enable scalar IR passes"), cl::init(true), cl::Hidden)
void initializeSILowerControlFlowPass(PassRegistry &)
static ScheduleDAGInstrs * createMinRegScheduler(MachineSchedContext *C)
ModulePass * createAMDGPULowerIntrinsicsPass()
virtual void addCodeGenPrepare()
Add pass to prepare the LLVM IR for code generation.
FunctionPass * createSIModeRegisterPass()
const TargetSubtargetInfo * getSubtargetImpl() const
FunctionPass * createR600ClauseMergePass()
The AMDGPU TargetMachine interface definition for hw codgen targets.
static cl::opt< bool > EnableR600IfConvert("r600-if-convert", cl::desc("Use if conversion pass"), cl::ReallyHidden, cl::init(true))
std::unique_ptr< ScheduleDAGMutation > createStoreClusterDAGMutation(const TargetInstrInfo *TII, const TargetRegisterInfo *TRI)
static ScheduleDAGInstrs * createR600MachineScheduler(MachineSchedContext *C)
std::unique_ptr< ScheduleDAGMutation > createAMDGPUMacroFusionDAGMutation()
Note that you have to add: DAG.addMutation(createAMDGPUMacroFusionDAGMutation()); to AMDGPUPassConfig...
void initializeSIShrinkInstructionsPass(PassRegistry &)
void initializeAMDGPUUseNativeCallsPass(PassRegistry &)
Analysis pass providing a never-invalidated alias analysis result.
EP_EarlyAsPossible - This extension point allows adding passes before any other transformations, allowing them to see the code as it is coming out of the frontend.
AMDGPUTargetMachine(const Target &T, const Triple &TT, StringRef CPU, StringRef FS, TargetOptions Options, Optional< Reloc::Model > RM, Optional< CodeModel::Model > CM, CodeGenOpt::Level OL)
void initializeSIInsertSkipsPass(PassRegistry &)
void initializeR600PacketizerPass(PassRegistry &)
Triple - Helper class for working with autoconf configuration names.
Definition: Triple.h:43
FunctionPass * createAMDGPUAnnotateUniformValues()
This is the AMGPU address space based alias analysis pass.
Provides passes to inlining "always_inline" functions.
char & SIOptimizeExecMaskingID
EP_CGSCCOptimizerLate - This extension point allows adding CallGraphSCC passes at the end of the main...
static MachineSchedRegistry IterativeGCNMaxOccupancySchedRegistry("gcn-max-occupancy-experimental", "Run GCN scheduler to maximize occupancy (experimental)", createIterativeGCNMaxOccupancyMachineScheduler)
void initializeGCNRegBankReassignPass(PassRegistry &)
std::unique_ptr< ScheduleDAGMutation > createLoadClusterDAGMutation(const TargetInstrInfo *TII, const TargetRegisterInfo *TRI)
void initializeAMDGPUFixFunctionBitcastsPass(PassRegistry &)
char & AMDGPUUnifyDivergentExitNodesID
bool enableSIScheduler() const
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
void initializeSIMemoryLegalizerPass(PassRegistry &)
PassManagerBase - An abstract interface to allow code to add passes to a pass manager without having ...
char & StackMapLivenessID
StackMapLiveness - This pass analyses the register live-out set of stackmap/patchpoint intrinsics and...
ModulePass * createInternalizePass(std::function< bool(const GlobalValue &)> MustPreserveGV)
createInternalizePass - This pass loops over all of the functions in the input module, internalizing all globals (functions and variables) it can.
char & SIPreAllocateWWMRegsID
void initializeSIWholeQuadModePass(PassRegistry &)
void setRequiresStructuredCFG(bool Value)
FunctionPass * createAMDGPUAtomicOptimizerPass()
void initializeR600VectorRegMergerPass(PassRegistry &)
char & SIFixVGPRCopiesID
void initializeAMDGPURewriteOutArgumentsPass(PassRegistry &)
char & EarlyMachineLICMID
This pass performs loop invariant code motion on machine instructions.
void initializeGCNDPPCombinePass(PassRegistry &)
ImmutablePass * createAMDGPUAAWrapperPass()
FunctionPass * createR600EmitClauseMarkers()
void initializeR600ClauseMergePassPass(PassRegistry &)
This interface provides simple read-only access to a block of memory, and provides simple methods for...
Definition: MemoryBuffer.h:41
This pass is responsible for selecting generic machine instructions to target-specific instructions...
ModulePass * createAMDGPUFixFunctionBitcastsPass()
void initializeAMDGPUUnifyMetadataPass(PassRegistry &)
FunctionPass * createSeparateConstOffsetFromGEPPass(bool LowerGEP=false)
Target - Wrapper for Target specific information.
void initializeAMDGPUArgumentUsageInfoPass(PassRegistry &)
virtual void addPostRegAlloc()
This method may be implemented by targets that want to run passes after register allocation pass pipe...
This file declares the targeting of the Machinelegalizer class for AMDGPU.
A wrapper around std::string which contains a source range that&#39;s being set during parsing...
FunctionPass * createR600Packetizer()
void initializeSILoadStoreOptimizerPass(PassRegistry &)
char & SILowerControlFlowID
ModulePass * createAMDGPUUnifyMetadataPass()
void initializeSIAnnotateControlFlowPass(PassRegistry &)
Targets should override this in a way that mirrors the implementation of llvm::MachineFunctionInfo.
A ScheduleDAG for scheduling lists of MachineInstr.
char & PatchableFunctionID
This pass implements the "patchable-function" attribute.
void initializeSIFoldOperandsPass(PassRegistry &)
char & SIFoldOperandsID
void initializeAMDGPUPropagateAttributesLatePass(PassRegistry &)
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
const TargetRegisterInfo * TRI
Target processor register info.
Definition: ScheduleDAG.h:559
FunctionPass * createSIShrinkInstructionsPass()
static cl::opt< bool > EnableAMDGPUAliasAnalysis("enable-amdgpu-aa", cl::Hidden, cl::desc("Enable AMDGPU Alias Analysis"), cl::init(true))
StringRef getValueAsString() const
Return the attribute&#39;s value as a string.
Definition: Attributes.cpp:223
TargetOptions Options
char & IfConverterID
IfConverter - This pass performs machine code if conversion.
#define LLVM_READNONE
Definition: Compiler.h:176
MachineSchedContext provides enough context from the MachineScheduler pass for the target to instanti...
char & SIFixSGPRCopiesID
#define I(x, y, z)
Definition: MD5.cpp:58
FunctionPass * createSROAPass()
Definition: SROA.cpp:4635
static MachineSchedRegistry R600SchedRegistry("r600", "Run R600's custom scheduler", createR600MachineScheduler)
ImmutablePass * createAMDGPUExternalAAWrapperPass()
static cl::opt< bool > OptExecMaskPreRA("amdgpu-opt-exec-mask-pre-ra", cl::Hidden, cl::desc("Run pre-RA exec mask optimizations"), cl::init(true))
static bool mustPreserveGV(const GlobalValue &GV)
Predicate for Internalize pass.
ModulePass * createAMDGPUPropagateAttributesLatePass(const TargetMachine *)
const TargetInstrInfo * TII
Target instruction information.
Definition: ScheduleDAG.h:558
char & GCNRegBankReassignID
This file declares the IRTranslator pass.
FunctionPass * createAMDGPUSimplifyLibCallsPass(const TargetOptions &, const TargetMachine *)
FunctionPass * createAMDGPUUseNativeCallsPass()
yaml::MachineFunctionInfo * convertFuncInfoToYAML(const MachineFunction &MF) const override
Allocate and initialize an instance of the YAML representation of the MachineFunctionInfo.
char & PostRAHazardRecognizerID
createPostRAHazardRecognizer - This pass runs the post-ra hazard recognizer.
void initializeAMDGPUPropagateAttributesEarlyPass(PassRegistry &)
AnalysisType * getAnalysisIfAvailable() const
getAnalysisIfAvailable<AnalysisType>() - Subclasses use this function to get analysis information tha...
Lightweight error class with error context and mandatory checking.
Definition: Error.h:157
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition: Function.h:333
FunctionPass * createEarlyCSEPass(bool UseMemorySSA=false)
Definition: EarlyCSE.cpp:1409
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:48
PassRegistry - This class manages the registration and intitialization of the pass subsystem as appli...
Definition: PassRegistry.h:38
TargetTransformInfo getTargetTransformInfo(const Function &F) override
Get a TargetTransformInfo implementation for the target.
void initializeSILowerI1CopiesPass(PassRegistry &)
static cl::opt< bool > EnableDPPCombine("amdgpu-dpp-combine", cl::desc("Enable DPP combiner"), cl::init(true))
void addExtension(ExtensionPointTy Ty, ExtensionFn Fn)
static ScheduleDAGInstrs * createIterativeILPMachineScheduler(MachineSchedContext *C)
void initializeAMDGPUUnifyDivergentExitNodesPass(PassRegistry &)
Represents a location in source code.
Definition: SMLoc.h:23
static cl::opt< bool > EnableLibCallSimplify("amdgpu-simplify-libcall", cl::desc("Enable amdgpu library simplifications"), cl::init(true), cl::Hidden)
static cl::opt< bool > EnableR600StructurizeCFG("r600-ir-structurize", cl::desc("Use StructurizeCFG IR pass"), cl::init(true))
Pass * createStructurizeCFGPass(bool SkipUniformRegions=false)
When SkipUniformRegions is true the structizer will not structurize regions that only contain uniform...
FunctionPass * createAtomicExpandPass()
void initializeAMDGPUAlwaysInlinePass(PassRegistry &)
TargetPassConfig * createPassConfig(PassManagerBase &PM) override
Create a pass configuration object to be used by addPassToEmitX methods for generating a pipeline of ...
void initializeAMDGPUOpenCLEnqueuedBlockLoweringPass(PassRegistry &)
void initializeSIPreAllocateWWMRegsPass(PassRegistry &)
void initializeGlobalISel(PassRegistry &)
Initialize all passes linked into the GlobalISel library.
Definition: GlobalISel.cpp:18
bool use_empty() const
Definition: Value.h:322
LocationClass< Ty > location(Ty &L)
Definition: CommandLine.h:448
static ScheduleDAGInstrs * createSIMachineScheduler(MachineSchedContext *C)
static ScheduleDAGInstrs * createGCNMaxOccupancyMachineScheduler(MachineSchedContext *C)
static cl::opt< bool > EnableEarlyIfConversion("amdgpu-early-ifcvt", cl::Hidden, cl::desc("Run early if-conversion"), cl::init(false))
void addMutation(std::unique_ptr< ScheduleDAGMutation > Mutation)
Add a postprocessing step to the DAG builder.
FunctionPass * createNaryReassociatePass()
Instances of this class encapsulate one diagnostic report, allowing printing to a raw_ostream as a ca...
Definition: SourceMgr.h:259