LLVM  9.0.0svn
AMDGPUTargetMachine.cpp
Go to the documentation of this file.
1 //===-- AMDGPUTargetMachine.cpp - TargetMachine for hw codegen targets-----===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// The AMDGPU target machine contains all of the hardware specific
11 /// information needed to emit code for R600 and SI GPUs.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "AMDGPUTargetMachine.h"
16 #include "AMDGPU.h"
17 #include "AMDGPUAliasAnalysis.h"
18 #include "AMDGPUCallLowering.h"
20 #include "AMDGPULegalizerInfo.h"
21 #include "AMDGPUMacroFusion.h"
22 #include "AMDGPUTargetObjectFile.h"
24 #include "GCNIterativeScheduler.h"
25 #include "GCNSchedStrategy.h"
26 #include "R600MachineScheduler.h"
27 #include "SIMachineFunctionInfo.h"
28 #include "SIMachineScheduler.h"
34 #include "llvm/CodeGen/Passes.h"
36 #include "llvm/IR/Attributes.h"
37 #include "llvm/IR/Function.h"
39 #include "llvm/Pass.h"
41 #include "llvm/Support/Compiler.h"
44 #include "llvm/Transforms/IPO.h"
47 #include "llvm/Transforms/Scalar.h"
49 #include "llvm/Transforms/Utils.h"
51 #include <memory>
52 
53 using namespace llvm;
54 
56  "r600-ir-structurize",
57  cl::desc("Use StructurizeCFG IR pass"),
58  cl::init(true));
59 
61  "amdgpu-sroa",
62  cl::desc("Run SROA after promote alloca pass"),
64  cl::init(true));
65 
66 static cl::opt<bool>
67 EnableEarlyIfConversion("amdgpu-early-ifcvt", cl::Hidden,
68  cl::desc("Run early if-conversion"),
69  cl::init(false));
70 
71 static cl::opt<bool>
72 OptExecMaskPreRA("amdgpu-opt-exec-mask-pre-ra", cl::Hidden,
73  cl::desc("Run pre-RA exec mask optimizations"),
74  cl::init(true));
75 
77  "r600-if-convert",
78  cl::desc("Use if conversion pass"),
80  cl::init(true));
81 
82 // Option to disable vectorizer for tests.
84  "amdgpu-load-store-vectorizer",
85  cl::desc("Enable load store vectorizer"),
86  cl::init(true),
87  cl::Hidden);
88 
89 // Option to control global loads scalarization
91  "amdgpu-scalarize-global-loads",
92  cl::desc("Enable global load scalarization"),
93  cl::init(true),
94  cl::Hidden);
95 
96 // Option to run internalize pass.
98  "amdgpu-internalize-symbols",
99  cl::desc("Enable elimination of non-kernel functions and unused globals"),
100  cl::init(false),
101  cl::Hidden);
102 
103 // Option to inline all early.
105  "amdgpu-early-inline-all",
106  cl::desc("Inline all functions early"),
107  cl::init(false),
108  cl::Hidden);
109 
111  "amdgpu-sdwa-peephole",
112  cl::desc("Enable SDWA peepholer"),
113  cl::init(true));
114 
116  "amdgpu-dpp-combine",
117  cl::desc("Enable DPP combiner"),
118  cl::init(true));
119 
120 // Enable address space based alias analysis
121 static cl::opt<bool> EnableAMDGPUAliasAnalysis("enable-amdgpu-aa", cl::Hidden,
122  cl::desc("Enable AMDGPU Alias Analysis"),
123  cl::init(true));
124 
125 // Option to run late CFG structurizer
127  "amdgpu-late-structurize",
128  cl::desc("Enable late CFG structurization"),
130  cl::Hidden);
131 
133  "amdgpu-function-calls",
134  cl::desc("Enable AMDGPU function call support"),
136  cl::init(true),
137  cl::Hidden);
138 
139 // Enable lib calls simplifications
141  "amdgpu-simplify-libcall",
142  cl::desc("Enable amdgpu library simplifications"),
143  cl::init(true),
144  cl::Hidden);
145 
147  "amdgpu-ir-lower-kernel-arguments",
148  cl::desc("Lower kernel argument loads in IR pass"),
149  cl::init(true),
150  cl::Hidden);
151 
152 // Enable atomic optimization
154  "amdgpu-atomic-optimizations",
155  cl::desc("Enable atomic optimizations"),
156  cl::init(false),
157  cl::Hidden);
158 
159 // Enable Mode register optimization
161  "amdgpu-mode-register",
162  cl::desc("Enable mode register pass"),
163  cl::init(true),
164  cl::Hidden);
165 
166 // Option is used in lit tests to prevent deadcoding of patterns inspected.
167 static cl::opt<bool>
168 EnableDCEInRA("amdgpu-dce-in-ra",
169  cl::init(true), cl::Hidden,
170  cl::desc("Enable machine DCE inside regalloc"));
171 
173  "amdgpu-scalar-ir-passes",
174  cl::desc("Enable scalar IR passes"),
175  cl::init(true),
176  cl::Hidden);
177 
178 extern "C" void LLVMInitializeAMDGPUTarget() {
179  // Register the target
182 
231 }
232 
233 static std::unique_ptr<TargetLoweringObjectFile> createTLOF(const Triple &TT) {
234  return llvm::make_unique<AMDGPUTargetObjectFile>();
235 }
236 
238  return new ScheduleDAGMILive(C, llvm::make_unique<R600SchedStrategy>());
239 }
240 
242  return new SIScheduleDAGMI(C);
243 }
244 
245 static ScheduleDAGInstrs *
247  ScheduleDAGMILive *DAG =
248  new GCNScheduleDAGMILive(C, make_unique<GCNMaxOccupancySchedStrategy>(C));
252  return DAG;
253 }
254 
255 static ScheduleDAGInstrs *
257  auto DAG = new GCNIterativeScheduler(C,
259  DAG->addMutation(createLoadClusterDAGMutation(DAG->TII, DAG->TRI));
260  DAG->addMutation(createStoreClusterDAGMutation(DAG->TII, DAG->TRI));
261  return DAG;
262 }
263 
265  return new GCNIterativeScheduler(C,
267 }
268 
269 static ScheduleDAGInstrs *
271  auto DAG = new GCNIterativeScheduler(C,
273  DAG->addMutation(createLoadClusterDAGMutation(DAG->TII, DAG->TRI));
274  DAG->addMutation(createStoreClusterDAGMutation(DAG->TII, DAG->TRI));
275  DAG->addMutation(createAMDGPUMacroFusionDAGMutation());
276  return DAG;
277 }
278 
280 R600SchedRegistry("r600", "Run R600's custom scheduler",
282 
284 SISchedRegistry("si", "Run SI's custom scheduler",
286 
288 GCNMaxOccupancySchedRegistry("gcn-max-occupancy",
289  "Run GCN scheduler to maximize occupancy",
291 
293 IterativeGCNMaxOccupancySchedRegistry("gcn-max-occupancy-experimental",
294  "Run GCN scheduler to maximize occupancy (experimental)",
296 
298 GCNMinRegSchedRegistry("gcn-minreg",
299  "Run GCN iterative scheduler for minimal register usage (experimental)",
301 
303 GCNILPSchedRegistry("gcn-ilp",
304  "Run GCN iterative scheduler for ILP scheduling (experimental)",
306 
307 static StringRef computeDataLayout(const Triple &TT) {
308  if (TT.getArch() == Triple::r600) {
309  // 32-bit pointers.
310  return "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
311  "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5";
312  }
313 
314  // 32-bit private, local, and region pointers. 64-bit global, constant and
315  // flat, non-integral buffer fat pointers.
316  return "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32"
317  "-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
318  "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5"
319  "-ni:7";
320 }
321 
323 static StringRef getGPUOrDefault(const Triple &TT, StringRef GPU) {
324  if (!GPU.empty())
325  return GPU;
326 
327  // Need to default to a target with flat support for HSA.
328  if (TT.getArch() == Triple::amdgcn)
329  return TT.getOS() == Triple::AMDHSA ? "generic-hsa" : "generic";
330 
331  return "r600";
332 }
333 
335  // The AMDGPU toolchain only supports generating shared objects, so we
336  // must always use PIC.
337  return Reloc::PIC_;
338 }
339 
341  StringRef CPU, StringRef FS,
342  TargetOptions Options,
345  CodeGenOpt::Level OptLevel)
346  : LLVMTargetMachine(T, computeDataLayout(TT), TT, getGPUOrDefault(TT, CPU),
347  FS, Options, getEffectiveRelocModel(RM),
348  getEffectiveCodeModel(CM, CodeModel::Small), OptLevel),
349  TLOF(createTLOF(getTargetTriple())) {
350  initAsmInfo();
351 }
352 
355 
357 
359  Attribute GPUAttr = F.getFnAttribute("target-cpu");
360  return GPUAttr.hasAttribute(Attribute::None) ?
361  getTargetCPU() : GPUAttr.getValueAsString();
362 }
363 
365  Attribute FSAttr = F.getFnAttribute("target-features");
366 
367  return FSAttr.hasAttribute(Attribute::None) ?
369  FSAttr.getValueAsString();
370 }
371 
372 /// Predicate for Internalize pass.
373 static bool mustPreserveGV(const GlobalValue &GV) {
374  if (const Function *F = dyn_cast<Function>(&GV))
375  return F->isDeclaration() || AMDGPU::isEntryFunctionCC(F->getCallingConv());
376 
377  return !GV.use_empty();
378 }
379 
381  Builder.DivergentTarget = true;
382 
383  bool EnableOpt = getOptLevel() > CodeGenOpt::None;
384  bool Internalize = InternalizeSymbols;
385  bool EarlyInline = EarlyInlineAll && EnableOpt && !EnableFunctionCalls;
386  bool AMDGPUAA = EnableAMDGPUAliasAnalysis && EnableOpt;
387  bool LibCallSimplify = EnableLibCallSimplify && EnableOpt;
388 
389  if (EnableFunctionCalls) {
390  delete Builder.Inliner;
392  }
393 
394  Builder.addExtension(
396  [Internalize, EarlyInline, AMDGPUAA](const PassManagerBuilder &,
398  if (AMDGPUAA) {
401  }
403  if (Internalize) {
405  PM.add(createGlobalDCEPass());
406  }
407  if (EarlyInline)
409  });
410 
411  const auto &Opt = Options;
412  Builder.addExtension(
414  [AMDGPUAA, LibCallSimplify, &Opt](const PassManagerBuilder &,
416  if (AMDGPUAA) {
419  }
421  if (LibCallSimplify)
423  });
424 
425  Builder.addExtension(
428  // Add infer address spaces pass to the opt pipeline after inlining
429  // but before SROA to increase SROA opportunities.
431 
432  // This should run after inlining to have any chance of doing anything,
433  // and before other cleanup optimizations.
435  });
436 }
437 
438 //===----------------------------------------------------------------------===//
439 // R600 Target Machine (R600 -> Cayman)
440 //===----------------------------------------------------------------------===//
441 
443  StringRef CPU, StringRef FS,
447  CodeGenOpt::Level OL, bool JIT)
448  : AMDGPUTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL) {
450 
451  // Override the default since calls aren't supported for r600.
452  if (EnableFunctionCalls &&
453  EnableAMDGPUFunctionCallsOpt.getNumOccurrences() == 0)
454  EnableFunctionCalls = false;
455 }
456 
458  const Function &F) const {
459  StringRef GPU = getGPUName(F);
460  StringRef FS = getFeatureString(F);
461 
462  SmallString<128> SubtargetKey(GPU);
463  SubtargetKey.append(FS);
464 
465  auto &I = SubtargetMap[SubtargetKey];
466  if (!I) {
467  // This needs to be done before we create a new subtarget since any
468  // creation will depend on the TM and the code generation flags on the
469  // function that reside in TargetOptions.
471  I = llvm::make_unique<R600Subtarget>(TargetTriple, GPU, FS, *this);
472  }
473 
474  return I.get();
475 }
476 
479  return TargetTransformInfo(R600TTIImpl(this, F));
480 }
481 
482 //===----------------------------------------------------------------------===//
483 // GCN Target Machine (SI+)
484 //===----------------------------------------------------------------------===//
485 
487  StringRef CPU, StringRef FS,
491  CodeGenOpt::Level OL, bool JIT)
492  : AMDGPUTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL) {}
493 
495  StringRef GPU = getGPUName(F);
496  StringRef FS = getFeatureString(F);
497 
498  SmallString<128> SubtargetKey(GPU);
499  SubtargetKey.append(FS);
500 
501  auto &I = SubtargetMap[SubtargetKey];
502  if (!I) {
503  // This needs to be done before we create a new subtarget since any
504  // creation will depend on the TM and the code generation flags on the
505  // function that reside in TargetOptions.
507  I = llvm::make_unique<GCNSubtarget>(TargetTriple, GPU, FS, *this);
508  }
509 
510  I->setScalarizeGlobalBehavior(ScalarizeGlobal);
511 
512  return I.get();
513 }
514 
517  return TargetTransformInfo(GCNTTIImpl(this, F));
518 }
519 
520 //===----------------------------------------------------------------------===//
521 // AMDGPU Pass Setup
522 //===----------------------------------------------------------------------===//
523 
524 namespace {
525 
526 class AMDGPUPassConfig : public TargetPassConfig {
527 public:
528  AMDGPUPassConfig(LLVMTargetMachine &TM, PassManagerBase &PM)
529  : TargetPassConfig(TM, PM) {
530  // Exceptions and StackMaps are not supported, so these passes will never do
531  // anything.
532  disablePass(&StackMapLivenessID);
533  disablePass(&FuncletLayoutID);
534  }
535 
536  AMDGPUTargetMachine &getAMDGPUTargetMachine() const {
537  return getTM<AMDGPUTargetMachine>();
538  }
539 
541  createMachineScheduler(MachineSchedContext *C) const override {
545  return DAG;
546  }
547 
548  void addEarlyCSEOrGVNPass();
549  void addStraightLineScalarOptimizationPasses();
550  void addIRPasses() override;
551  void addCodeGenPrepare() override;
552  bool addPreISel() override;
553  bool addInstSelector() override;
554  bool addGCPasses() override;
555 
556  std::unique_ptr<CSEConfigBase> getCSEConfig() const override;
557 };
558 
559 std::unique_ptr<CSEConfigBase> AMDGPUPassConfig::getCSEConfig() const {
560  return getStandardCSEConfigForOpt(TM->getOptLevel());
561 }
562 
563 class R600PassConfig final : public AMDGPUPassConfig {
564 public:
565  R600PassConfig(LLVMTargetMachine &TM, PassManagerBase &PM)
566  : AMDGPUPassConfig(TM, PM) {}
567 
568  ScheduleDAGInstrs *createMachineScheduler(
569  MachineSchedContext *C) const override {
570  return createR600MachineScheduler(C);
571  }
572 
573  bool addPreISel() override;
574  bool addInstSelector() override;
575  void addPreRegAlloc() override;
576  void addPreSched2() override;
577  void addPreEmitPass() override;
578 };
579 
580 class GCNPassConfig final : public AMDGPUPassConfig {
581 public:
582  GCNPassConfig(LLVMTargetMachine &TM, PassManagerBase &PM)
583  : AMDGPUPassConfig(TM, PM) {
584  // It is necessary to know the register usage of the entire call graph. We
585  // allow calls without EnableAMDGPUFunctionCalls if they are marked
586  // noinline, so this is always required.
587  setRequiresCodeGenSCCOrder(true);
588  }
589 
590  GCNTargetMachine &getGCNTargetMachine() const {
591  return getTM<GCNTargetMachine>();
592  }
593 
595  createMachineScheduler(MachineSchedContext *C) const override;
596 
597  bool addPreISel() override;
598  void addMachineSSAOptimization() override;
599  bool addILPOpts() override;
600  bool addInstSelector() override;
601  bool addIRTranslator() override;
602  bool addLegalizeMachineIR() override;
603  bool addRegBankSelect() override;
604  bool addGlobalInstructionSelect() override;
605  void addFastRegAlloc() override;
606  void addOptimizedRegAlloc() override;
607  void addPreRegAlloc() override;
608  void addPostRegAlloc() override;
609  void addPreSched2() override;
610  void addPreEmitPass() override;
611 };
612 
613 } // end anonymous namespace
614 
615 void AMDGPUPassConfig::addEarlyCSEOrGVNPass() {
617  addPass(createGVNPass());
618  else
619  addPass(createEarlyCSEPass());
620 }
621 
622 void AMDGPUPassConfig::addStraightLineScalarOptimizationPasses() {
623  addPass(createLICMPass());
626  // ReassociateGEPs exposes more opportunites for SLSR. See
627  // the example in reassociate-geps-and-slsr.ll.
629  // SeparateConstOffsetFromGEP and SLSR creates common expressions which GVN or
630  // EarlyCSE can reuse.
631  addEarlyCSEOrGVNPass();
632  // Run NaryReassociate after EarlyCSE/GVN to be more effective.
633  addPass(createNaryReassociatePass());
634  // NaryReassociate on GEPs creates redundant common expressions, so run
635  // EarlyCSE after it.
636  addPass(createEarlyCSEPass());
637 }
638 
639 void AMDGPUPassConfig::addIRPasses() {
640  const AMDGPUTargetMachine &TM = getAMDGPUTargetMachine();
641 
642  // There is no reason to run these.
643  disablePass(&StackMapLivenessID);
644  disablePass(&FuncletLayoutID);
645  disablePass(&PatchableFunctionID);
646 
647  addPass(createAtomicExpandPass());
648 
649  // This must occur before inlining, as the inliner will not look through
650  // bitcast calls.
652 
654 
655  // Function calls are not supported, so make sure we inline everything.
656  addPass(createAMDGPUAlwaysInlinePass());
658  // We need to add the barrier noop pass, otherwise adding the function
659  // inlining pass will cause all of the PassConfigs passes to be run
660  // one function at a time, which means if we have a nodule with two
661  // functions, then we will generate code for the first function
662  // without ever running any passes on the second.
663  addPass(createBarrierNoopPass());
664 
665  if (TM.getTargetTriple().getArch() == Triple::amdgcn) {
666  // TODO: May want to move later or split into an early and late one.
667 
669  }
670 
671  // Handle uses of OpenCL image2d_t, image3d_t and sampler_t arguments.
672  if (TM.getTargetTriple().getArch() == Triple::r600)
674 
675  // Replace OpenCL enqueued block function pointers with global variables.
677 
678  if (TM.getOptLevel() > CodeGenOpt::None) {
679  addPass(createInferAddressSpacesPass());
680  addPass(createAMDGPUPromoteAlloca());
681 
682  if (EnableSROA)
683  addPass(createSROAPass());
684 
686  addStraightLineScalarOptimizationPasses();
687 
689  addPass(createAMDGPUAAWrapperPass());
691  AAResults &AAR) {
692  if (auto *WrapperPass = P.getAnalysisIfAvailable<AMDGPUAAWrapperPass>())
693  AAR.addAAResult(WrapperPass->getResult());
694  }));
695  }
696  }
697 
699 
700  // EarlyCSE is not always strong enough to clean up what LSR produces. For
701  // example, GVN can combine
702  //
703  // %0 = add %a, %b
704  // %1 = add %b, %a
705  //
706  // and
707  //
708  // %0 = shl nsw %a, 2
709  // %1 = shl %a, 2
710  //
711  // but EarlyCSE can do neither of them.
713  addEarlyCSEOrGVNPass();
714 }
715 
716 void AMDGPUPassConfig::addCodeGenPrepare() {
717  if (TM->getTargetTriple().getArch() == Triple::amdgcn)
719 
720  if (TM->getTargetTriple().getArch() == Triple::amdgcn &&
723 
725 
728 }
729 
730 bool AMDGPUPassConfig::addPreISel() {
731  addPass(createLowerSwitchPass());
732  addPass(createFlattenCFGPass());
733  return false;
734 }
735 
736 bool AMDGPUPassConfig::addInstSelector() {
737  addPass(createAMDGPUISelDag(&getAMDGPUTargetMachine(), getOptLevel()));
738  return false;
739 }
740 
741 bool AMDGPUPassConfig::addGCPasses() {
742  // Do nothing. GC is not supported.
743  return false;
744 }
745 
746 //===----------------------------------------------------------------------===//
747 // R600 Pass Setup
748 //===----------------------------------------------------------------------===//
749 
750 bool R600PassConfig::addPreISel() {
751  AMDGPUPassConfig::addPreISel();
752 
754  addPass(createStructurizeCFGPass());
755  return false;
756 }
757 
758 bool R600PassConfig::addInstSelector() {
759  addPass(createR600ISelDag(&getAMDGPUTargetMachine(), getOptLevel()));
760  return false;
761 }
762 
763 void R600PassConfig::addPreRegAlloc() {
764  addPass(createR600VectorRegMerger());
765 }
766 
767 void R600PassConfig::addPreSched2() {
768  addPass(createR600EmitClauseMarkers(), false);
770  addPass(&IfConverterID, false);
771  addPass(createR600ClauseMergePass(), false);
772 }
773 
774 void R600PassConfig::addPreEmitPass() {
775  addPass(createAMDGPUCFGStructurizerPass(), false);
776  addPass(createR600ExpandSpecialInstrsPass(), false);
777  addPass(&FinalizeMachineBundlesID, false);
778  addPass(createR600Packetizer(), false);
779  addPass(createR600ControlFlowFinalizer(), false);
780 }
781 
783  return new R600PassConfig(*this, PM);
784 }
785 
786 //===----------------------------------------------------------------------===//
787 // GCN Pass Setup
788 //===----------------------------------------------------------------------===//
789 
790 ScheduleDAGInstrs *GCNPassConfig::createMachineScheduler(
791  MachineSchedContext *C) const {
792  const GCNSubtarget &ST = C->MF->getSubtarget<GCNSubtarget>();
793  if (ST.enableSIScheduler())
794  return createSIMachineScheduler(C);
796 }
797 
798 bool GCNPassConfig::addPreISel() {
799  AMDGPUPassConfig::addPreISel();
800 
803  }
804 
805  // FIXME: We need to run a pass to propagate the attributes when calls are
806  // supported.
807 
808  // Merge divergent exit nodes. StructurizeCFG won't recognize the multi-exit
809  // regions formed by them.
811  if (!LateCFGStructurize) {
812  addPass(createStructurizeCFGPass(true)); // true -> SkipUniformRegions
813  }
814  addPass(createSinkingPass());
816  if (!LateCFGStructurize) {
818  }
819 
820  return false;
821 }
822 
823 void GCNPassConfig::addMachineSSAOptimization() {
825 
826  // We want to fold operands after PeepholeOptimizer has run (or as part of
827  // it), because it will eliminate extra copies making it easier to fold the
828  // real source operand. We want to eliminate dead instructions after, so that
829  // we see fewer uses of the copies. We then need to clean up the dead
830  // instructions leftover after the operands are folded as well.
831  //
832  // XXX - Can we get away without running DeadMachineInstructionElim again?
833  addPass(&SIFoldOperandsID);
834  if (EnableDPPCombine)
835  addPass(&GCNDPPCombineID);
837  addPass(&SILoadStoreOptimizerID);
838  if (EnableSDWAPeephole) {
839  addPass(&SIPeepholeSDWAID);
840  addPass(&EarlyMachineLICMID);
841  addPass(&MachineCSEID);
842  addPass(&SIFoldOperandsID);
844  }
846 }
847 
848 bool GCNPassConfig::addILPOpts() {
850  addPass(&EarlyIfConverterID);
851 
853  return false;
854 }
855 
856 bool GCNPassConfig::addInstSelector() {
857  AMDGPUPassConfig::addInstSelector();
858  addPass(&SIFixSGPRCopiesID);
859  addPass(createSILowerI1CopiesPass());
860  addPass(createSIFixupVectorISelPass());
861  addPass(createSIAddIMGInitPass());
862  return false;
863 }
864 
865 bool GCNPassConfig::addIRTranslator() {
866  addPass(new IRTranslator());
867  return false;
868 }
869 
870 bool GCNPassConfig::addLegalizeMachineIR() {
871  addPass(new Legalizer());
872  return false;
873 }
874 
875 bool GCNPassConfig::addRegBankSelect() {
876  addPass(new RegBankSelect());
877  return false;
878 }
879 
880 bool GCNPassConfig::addGlobalInstructionSelect() {
881  addPass(new InstructionSelect());
882  return false;
883 }
884 
885 void GCNPassConfig::addPreRegAlloc() {
886  if (LateCFGStructurize) {
888  }
889  addPass(createSIWholeQuadModePass());
890 }
891 
892 void GCNPassConfig::addFastRegAlloc() {
893  // FIXME: We have to disable the verifier here because of PHIElimination +
894  // TwoAddressInstructions disabling it.
895 
896  // This must be run immediately after phi elimination and before
897  // TwoAddressInstructions, otherwise the processing of the tied operand of
898  // SI_ELSE will introduce a copy of the tied operand source after the else.
899  insertPass(&PHIEliminationID, &SILowerControlFlowID, false);
900 
901  // This must be run just after RegisterCoalescing.
902  insertPass(&RegisterCoalescerID, &SIPreAllocateWWMRegsID, false);
903 
905 }
906 
907 void GCNPassConfig::addOptimizedRegAlloc() {
908  if (OptExecMaskPreRA) {
911  } else {
913  }
914 
915  // This must be run immediately after phi elimination and before
916  // TwoAddressInstructions, otherwise the processing of the tied operand of
917  // SI_ELSE will introduce a copy of the tied operand source after the else.
918  insertPass(&PHIEliminationID, &SILowerControlFlowID, false);
919 
920  // This must be run just after RegisterCoalescing.
921  insertPass(&RegisterCoalescerID, &SIPreAllocateWWMRegsID, false);
922 
923  if (EnableDCEInRA)
925 
927 }
928 
929 void GCNPassConfig::addPostRegAlloc() {
930  addPass(&SIFixVGPRCopiesID);
932  addPass(&SIOptimizeExecMaskingID);
934 }
935 
936 void GCNPassConfig::addPreSched2() {
937 }
938 
939 void GCNPassConfig::addPreEmitPass() {
940  addPass(createSIMemoryLegalizerPass());
941  addPass(createSIInsertWaitcntsPass());
943  addPass(createSIModeRegisterPass());
944 
945  // The hazard recognizer that runs as part of the post-ra scheduler does not
946  // guarantee to be able handle all hazards correctly. This is because if there
947  // are multiple scheduling regions in a basic block, the regions are scheduled
948  // bottom up, so when we begin to schedule a region we don't know what
949  // instructions were emitted directly before it.
950  //
951  // Here we add a stand-alone hazard recognizer pass which can handle all
952  // cases.
953  //
954  // FIXME: This stand-alone pass will emit indiv. S_NOP 0, as needed. It would
955  // be better for it to emit S_NOP <N> when possible.
956  addPass(&PostRAHazardRecognizerID);
957 
958  addPass(&SIInsertSkipsPassID);
959  addPass(&BranchRelaxationPassID);
960 }
961 
963  return new GCNPassConfig(*this, PM);
964 }
965 
967  return new yaml::SIMachineFunctionInfo();
968 }
969 
973  return new yaml::SIMachineFunctionInfo(*MFI,
974  *MF.getSubtarget().getRegisterInfo());
975 }
976 
979  SMDiagnostic &Error, SMRange &SourceRange) const {
980  const yaml::SIMachineFunctionInfo &YamlMFI =
981  reinterpret_cast<const yaml::SIMachineFunctionInfo &>(MFI_);
982  MachineFunction &MF = PFS.MF;
984 
985  MFI->initializeBaseYamlFields(YamlMFI);
986 
987  auto parseRegister = [&](const yaml::StringValue &RegName, unsigned &RegVal) {
988  if (parseNamedRegisterReference(PFS, RegVal, RegName.Value, Error)) {
989  SourceRange = RegName.SourceRange;
990  return true;
991  }
992 
993  return false;
994  };
995 
996  auto diagnoseRegisterClass = [&](const yaml::StringValue &RegName) {
997  // Create a diagnostic for a the register string literal.
998  const MemoryBuffer &Buffer =
999  *PFS.SM->getMemoryBuffer(PFS.SM->getMainFileID());
1000  Error = SMDiagnostic(*PFS.SM, SMLoc(), Buffer.getBufferIdentifier(), 1,
1001  RegName.Value.size(), SourceMgr::DK_Error,
1002  "incorrect register class for field", RegName.Value,
1003  None, None);
1004  SourceRange = RegName.SourceRange;
1005  return true;
1006  };
1007 
1008  if (parseRegister(YamlMFI.ScratchRSrcReg, MFI->ScratchRSrcReg) ||
1009  parseRegister(YamlMFI.ScratchWaveOffsetReg, MFI->ScratchWaveOffsetReg) ||
1010  parseRegister(YamlMFI.FrameOffsetReg, MFI->FrameOffsetReg) ||
1011  parseRegister(YamlMFI.StackPtrOffsetReg, MFI->StackPtrOffsetReg))
1012  return true;
1013 
1014  if (MFI->ScratchRSrcReg != AMDGPU::PRIVATE_RSRC_REG &&
1015  !AMDGPU::SReg_128RegClass.contains(MFI->ScratchRSrcReg)) {
1016  return diagnoseRegisterClass(YamlMFI.ScratchRSrcReg);
1017  }
1018 
1019  if (MFI->ScratchWaveOffsetReg != AMDGPU::SCRATCH_WAVE_OFFSET_REG &&
1020  !AMDGPU::SGPR_32RegClass.contains(MFI->ScratchWaveOffsetReg)) {
1021  return diagnoseRegisterClass(YamlMFI.ScratchWaveOffsetReg);
1022  }
1023 
1024  if (MFI->FrameOffsetReg != AMDGPU::FP_REG &&
1025  !AMDGPU::SGPR_32RegClass.contains(MFI->FrameOffsetReg)) {
1026  return diagnoseRegisterClass(YamlMFI.FrameOffsetReg);
1027  }
1028 
1029  if (MFI->StackPtrOffsetReg != AMDGPU::SP_REG &&
1030  !AMDGPU::SGPR_32RegClass.contains(MFI->StackPtrOffsetReg)) {
1031  return diagnoseRegisterClass(YamlMFI.StackPtrOffsetReg);
1032  }
1033 
1034  return false;
1035 }
FunctionPass * createSpeculativeExecutionPass()
char & SIFormMemoryClausesID
Pass interface - Implemented by all &#39;passes&#39;.
Definition: Pass.h:80
FunctionPass * createStraightLineStrengthReducePass()
uint64_t CallInst * C
virtual StringRef getBufferIdentifier() const
Return an identifier for this buffer, typically the filename it was read from.
Definition: MemoryBuffer.h:69
Represents a range in source code.
Definition: SMLoc.h:48
FunctionPass * createGVNPass(bool NoLoads=false)
Create a legacy GVN pass.
Definition: GVN.cpp:2590
StringRef getTargetFeatureString() const
static cl::opt< bool > EnableDCEInRA("amdgpu-dce-in-ra", cl::init(true), cl::Hidden, cl::desc("Enable machine DCE inside regalloc"))
Target & getTheGCNTarget()
The target for GCN GPUs.
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
void initializeAMDGPUDAGToDAGISelPass(PassRegistry &)
void addAAResult(AAResultT &AAResult)
Register a specific AA result.
This file a TargetTransformInfo::Concept conforming object specific to the AMDGPU target machine...
FunctionPass * createSIAnnotateControlFlowPass()
Create the annotation pass.
CodeModel::Model getEffectiveCodeModel(Optional< CodeModel::Model > CM, CodeModel::Model Default)
Helper method for getting the code model, returning Default if CM does not have a value...
ModulePass * createAMDGPUAlwaysInlinePass(bool GlobalOpt=true)
static LLVM_READNONE StringRef getGPUOrDefault(const Triple &TT, StringRef GPU)
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
char & RenameIndependentSubregsID
This pass detects subregister lanes in a virtual register that are used independently of other lanes ...
PassManagerBuilder - This class is used to set up a standard optimization sequence for languages like...
GCNTargetMachine(const Target &T, const Triple &TT, StringRef CPU, StringRef FS, TargetOptions Options, Optional< Reloc::Model > RM, Optional< CodeModel::Model > CM, CodeGenOpt::Level OL, bool JIT)
This class represents lattice values for constants.
Definition: AllocatorList.h:23
void initializeSIFixVGPRCopiesPass(PassRegistry &)
virtual void addIRPasses()
Add common target configurable passes that perform LLVM IR to IR transforms following machine indepen...
static cl::opt< bool > EnableLoadStoreVectorizer("amdgpu-load-store-vectorizer", cl::desc("Enable load store vectorizer"), cl::init(true), cl::Hidden)
FunctionPass * createR600ISelDag(TargetMachine *TM, CodeGenOpt::Level OptLevel)
This pass converts a legalized DAG into a R600-specific.
void initializeSIInsertWaitcntsPass(PassRegistry &)
void initializeSIFormMemoryClausesPass(PassRegistry &)
ModulePass * createR600OpenCLImageTypeLoweringPass()
void initializeAMDGPUAnnotateKernelFeaturesPass(PassRegistry &)
Triple TargetTriple
Triple string, CPU name, and target feature strings the TargetMachine instance is created with...
Definition: TargetMachine.h:84
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
OSType getOS() const
getOS - Get the parsed operating system type of this triple.
Definition: Triple.h:300
void initializeAMDGPUAtomicOptimizerPass(PassRegistry &)
char & SILoadStoreOptimizerID
Target & getTheAMDGPUTarget()
The target which supports all AMD GPUs.
char & SIPeepholeSDWAID
void initializeSIModeRegisterPass(PassRegistry &)
void initializeAMDGPUSimplifyLibCallsPass(PassRegistry &)
This file describes how to lower LLVM calls to machine code calls.
char & FuncletLayoutID
This pass lays out funclets contiguously.
FunctionPass * createLowerSwitchPass()
void initializeAMDGPULowerKernelAttributesPass(PassRegistry &)
char & RegisterCoalescerID
RegisterCoalescer - This pass merges live ranges to eliminate copies.
char & EarlyIfConverterID
EarlyIfConverter - This pass performs if-conversion on SSA form by inserting cmov instructions...
void initializeR600ControlFlowFinalizerPass(PassRegistry &)
void initializeAMDGPUAnnotateUniformValuesPass(PassRegistry &)
FunctionPass * createAMDGPUPromoteAlloca()
virtual void add(Pass *P)=0
Add a pass to the queue of passes to run.
ModulePass * createAMDGPULowerKernelAttributesPass()
bool parseMachineFunctionInfo(const yaml::MachineFunctionInfo &, PerFunctionMIParsingState &PFS, SMDiagnostic &Error, SMRange &SourceRange) const override
Parse out the target&#39;s MachineFunctionInfo from the YAML reprsentation.
char & BranchRelaxationPassID
BranchRelaxation - This pass replaces branches that need to jump further than is supported by a branc...
FunctionPass * createAMDGPUCodeGenPreparePass()
F(f)
R600 Machine Scheduler interface.
char & MachineSchedulerID
MachineScheduler - This pass schedules machine instructions.
static cl::opt< bool > EnableLowerKernelArguments("amdgpu-ir-lower-kernel-arguments", cl::desc("Lower kernel argument loads in IR pass"), cl::init(true), cl::Hidden)
FunctionPass * createAMDGPUCFGStructurizerPass()
MachineSchedRegistry provides a selection of available machine instruction schedulers.
static cl::opt< bool, true > EnableAMDGPUFunctionCallsOpt("amdgpu-function-calls", cl::desc("Enable AMDGPU function call support"), cl::location(AMDGPUTargetMachine::EnableFunctionCalls), cl::init(true), cl::Hidden)
virtual void addMachineSSAOptimization()
addMachineSSAOptimization - Add standard passes that optimize machine instructions in SSA form...
void initializeAMDGPUAAWrapperPassPass(PassRegistry &)
static std::unique_ptr< TargetLoweringObjectFile > createTLOF(const Triple &TT)
void initializeAMDGPUPromoteAllocaPass(PassRegistry &)
void initializeAMDGPULowerKernelArgumentsPass(PassRegistry &)
FunctionPass * createSIAddIMGInitPass()
FunctionPass * createSIMemoryLegalizerPass()
Pass * Inliner
Inliner - Specifies the inliner to use.
FunctionPass * createAMDGPUMachineCFGStructurizerPass()
FunctionPass * createSIInsertWaitcntsPass()
ScheduleDAGMILive is an implementation of ScheduleDAGInstrs that schedules machine instructions while...
StringRef getFeatureString(const Function &F) const
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
void resetTargetOptions(const Function &F) const
Reset the target options based on the function&#39;s attributes.
yaml::MachineFunctionInfo * createDefaultFuncInfoYAML() const override
Allocate and return a default initialized instance of the YAML representation for the MachineFunction...
This file declares the targeting of the InstructionSelector class for AMDGPU.
Pass * createAMDGPUFunctionInliningPass()
static cl::opt< bool > EnableSDWAPeephole("amdgpu-sdwa-peephole", cl::desc("Enable SDWA peepholer"), cl::init(true))
This file declares the AMDGPU-specific subclass of TargetLoweringObjectFile.
Pass * createAMDGPUAnnotateKernelFeaturesPass()
static cl::opt< bool > EnableSIModeRegisterPass("amdgpu-mode-register", cl::desc("Enable mode register pass"), cl::init(true), cl::Hidden)
void initializeAMDGPUCodeGenPreparePass(PassRegistry &)
This file contains the simple types necessary to represent the attributes associated with functions a...
ModulePass * createAMDGPUOpenCLEnqueuedBlockLoweringPass()
No attributes have been set.
Definition: Attributes.h:71
virtual void addFastRegAlloc()
addFastRegAlloc - Add the minimum set of target-independent passes that are required for fast registe...
void initializeAMDGPUInlinerPass(PassRegistry &)
FunctionPass * createSinkingPass()
Definition: Sink.cpp:303
static MachineSchedRegistry GCNILPSchedRegistry("gcn-ilp", "Run GCN iterative scheduler for ILP scheduling (experimental)", createIterativeILPMachineScheduler)
char & SIOptimizeExecMaskingPreRAID
EP_ModuleOptimizerEarly - This extension point allows adding passes just before the main module-level...
char & FinalizeMachineBundlesID
FinalizeMachineBundles - This pass finalize machine instruction bundles (created earlier, e.g.
LLVM_NODISCARD bool empty() const
empty - Check if the string is empty.
Definition: StringRef.h:126
Target-Independent Code Generator Pass Configuration Options.
static StringRef computeDataLayout(const Triple &TT)
static cl::opt< bool, true > LateCFGStructurize("amdgpu-late-structurize", cl::desc("Enable late CFG structurization"), cl::location(AMDGPUTargetMachine::EnableLateStructurizeCFG), cl::Hidden)
const MemoryBuffer * getMemoryBuffer(unsigned i) const
Definition: SourceMgr.h:130
static cl::opt< bool > ScalarizeGlobal("amdgpu-scalarize-global-loads", cl::desc("Enable global load scalarization"), cl::init(true), cl::Hidden)
FunctionPass * createR600ExpandSpecialInstrsPass()
static MachineSchedRegistry GCNMinRegSchedRegistry("gcn-minreg", "Run GCN iterative scheduler for minimal register usage (experimental)", createMinRegScheduler)
RegisterTargetMachine - Helper template for registering a target machine implementation, for use in the target machine initialization function.
char & MachineCSEID
MachineCSE - This pass performs global CSE on machine instructions.
Definition: MachineCSE.cpp:132
bool initializeBaseYamlFields(const yaml::SIMachineFunctionInfo &YamlMFI)
ArchType getArch() const
getArch - Get the parsed architecture type of this triple.
Definition: Triple.h:291
FunctionPass * createSIFixupVectorISelPass()
TargetTransformInfo getTargetTransformInfo(const Function &F) override
Get a TargetTransformInfo implementation for the target.
FunctionPass * createSILowerI1CopiesPass()
void initializeSIOptimizeExecMaskingPass(PassRegistry &)
char & DeadMachineInstructionElimID
DeadMachineInstructionElim - This pass removes dead machine instructions.
void initializeSIOptimizeExecMaskingPreRAPass(PassRegistry &)
Pass * createLoadStoreVectorizerPass()
Create a legacy pass manager instance of the LoadStoreVectorizer pass.
ScheduleDAGMILive * createGenericSchedLive(MachineSchedContext *C)
Create the standard converging machine scheduler.
StringRef getTargetCPU() const
virtual bool addILPOpts()
Add passes that optimize instruction level parallelism for out-of-order targets.
void initializeR600ExpandSpecialInstrsPassPass(PassRegistry &)
void initializeSIFixSGPRCopiesPass(PassRegistry &)
void initializeAMDGPULowerIntrinsicsPass(PassRegistry &)
ModulePass * createGlobalDCEPass()
createGlobalDCEPass - This transform is designed to eliminate unreachable internal globals (functions...
FunctionPass * createR600VectorRegMerger()
void initializeSIFixupVectorISelPass(PassRegistry &)
static ScheduleDAGInstrs * createIterativeGCNMaxOccupancyMachineScheduler(MachineSchedContext *C)
static cl::opt< bool > InternalizeSymbols("amdgpu-internalize-symbols", cl::desc("Enable elimination of non-kernel functions and unused globals"), cl::init(false), cl::Hidden)
SI Machine Scheduler interface.
StringRef getGPUName(const Function &F) const
unsigned getMainFileID() const
Definition: SourceMgr.h:139
void append(in_iter S, in_iter E)
Append from an iterator pair.
Definition: SmallString.h:74
void initializeAMDGPUExternalAAWrapperPass(PassRegistry &)
char & PHIEliminationID
PHIElimination - This pass eliminates machine instruction PHI nodes by inserting copy instructions...
FunctionPass * createAMDGPUISelDag(TargetMachine *TM=nullptr, CodeGenOpt::Level OptLevel=CodeGenOpt::Default)
This pass converts a legalized DAG into a AMDGPU-specific.
ImmutablePass * createExternalAAWrapperPass(std::function< void(Pass &, Function &, AAResults &)> Callback)
A wrapper pass around a callback which can be used to populate the AAResults in the AAResultsWrapperP...
#define P(N)
std::unique_ptr< CSEConfigBase > getStandardCSEConfigForOpt(CodeGenOpt::Level Level)
Definition: CSEInfo.cpp:65
char & GCNDPPCombineID
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:427
FunctionPass * createAMDGPULowerKernelArgumentsPass()
bool hasAttribute(AttrKind Val) const
Return true if the attribute is present.
Definition: Attributes.cpp:201
CodeGenOpt::Level getOptLevel() const
Returns the optimization level: None, Less, Default, or Aggressive.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
static Reloc::Model getEffectiveRelocModel(Optional< Reloc::Model > RM)
This pass implements the reg bank selector pass used in the GlobalISel pipeline.
Definition: RegBankSelect.h:90
FunctionPass * createFlattenCFGPass()
static cl::opt< bool > EnableAtomicOptimizations("amdgpu-atomic-optimizations", cl::desc("Enable atomic optimizations"), cl::init(false), cl::Hidden)
FunctionPass * createSIWholeQuadModePass()
This file provides the interface for LLVM&#39;s Global Value Numbering pass which eliminates fully redund...
static cl::opt< bool > EarlyInlineAll("amdgpu-early-inline-all", cl::desc("Inline all functions early"), cl::init(false), cl::Hidden)
char & SIInsertSkipsPassID
static MachineSchedRegistry GCNMaxOccupancySchedRegistry("gcn-max-occupancy", "Run GCN scheduler to maximize occupancy", createGCNMaxOccupancyMachineScheduler)
void adjustPassManager(PassManagerBuilder &) override
Allow the target to modify the pass manager, e.g.
bool isEntryFunctionCC(CallingConv::ID CC)
void LLVMInitializeAMDGPUTarget()
void initializeSIPeepholeSDWAPass(PassRegistry &)
Pass * createLICMPass()
Definition: LICM.cpp:303
bool parseNamedRegisterReference(PerFunctionMIParsingState &PFS, unsigned &Reg, StringRef Src, SMDiagnostic &Error)
Definition: MIParser.cpp:3012
static cl::opt< bool > EnableSROA("amdgpu-sroa", cl::desc("Run SROA after promote alloca pass"), cl::ReallyHidden, cl::init(true))
TargetPassConfig * createPassConfig(PassManagerBase &PM) override
Create a pass configuration object to be used by addPassToEmitX methods for generating a pipeline of ...
FunctionPass * createR600ControlFlowFinalizer()
Legacy wrapper pass to provide the AMDGPUAAResult object.
R600TargetMachine(const Target &T, const Triple &TT, StringRef CPU, StringRef FS, TargetOptions Options, Optional< Reloc::Model > RM, Optional< CodeModel::Model > CM, CodeGenOpt::Level OL, bool JIT)
virtual void addOptimizedRegAlloc()
addOptimizedRegAlloc - Add passes related to register allocation.
This class describes a target machine that is implemented with the LLVM target-independent code gener...
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
ModulePass * createBarrierNoopPass()
createBarrierNoopPass - This pass is purely a module pass barrier in a pass manager.
Pass * createAlwaysInlinerLegacyPass(bool InsertLifetime=true)
Create a legacy pass manager instance of a pass to inline and remove functions marked as "always_inli...
const Triple & getTargetTriple() const
static MachineSchedRegistry SISchedRegistry("si", "Run SI's custom scheduler", createSIMachineScheduler)
static cl::opt< bool > EnableScalarIRPasses("amdgpu-scalar-ir-passes", cl::desc("Enable scalar IR passes"), cl::init(true), cl::Hidden)
void initializeSILowerControlFlowPass(PassRegistry &)
static ScheduleDAGInstrs * createMinRegScheduler(MachineSchedContext *C)
ModulePass * createAMDGPULowerIntrinsicsPass()
virtual void addCodeGenPrepare()
Add pass to prepare the LLVM IR for code generation.
FunctionPass * createSIModeRegisterPass()
const TargetSubtargetInfo * getSubtargetImpl() const
FunctionPass * createR600ClauseMergePass()
The AMDGPU TargetMachine interface definition for hw codgen targets.
static cl::opt< bool > EnableR600IfConvert("r600-if-convert", cl::desc("Use if conversion pass"), cl::ReallyHidden, cl::init(true))
std::unique_ptr< ScheduleDAGMutation > createStoreClusterDAGMutation(const TargetInstrInfo *TII, const TargetRegisterInfo *TRI)
static ScheduleDAGInstrs * createR600MachineScheduler(MachineSchedContext *C)
std::unique_ptr< ScheduleDAGMutation > createAMDGPUMacroFusionDAGMutation()
Note that you have to add: DAG.addMutation(createAMDGPUMacroFusionDAGMutation()); to AMDGPUPassConfig...
void initializeSIShrinkInstructionsPass(PassRegistry &)
void initializeAMDGPUUseNativeCallsPass(PassRegistry &)
Analysis pass providing a never-invalidated alias analysis result.
EP_EarlyAsPossible - This extension point allows adding passes before any other transformations, allowing them to see the code as it is coming out of the frontend.
AMDGPUTargetMachine(const Target &T, const Triple &TT, StringRef CPU, StringRef FS, TargetOptions Options, Optional< Reloc::Model > RM, Optional< CodeModel::Model > CM, CodeGenOpt::Level OL)
void initializeSIInsertSkipsPass(PassRegistry &)
void initializeR600PacketizerPass(PassRegistry &)
Triple - Helper class for working with autoconf configuration names.
Definition: Triple.h:43
FunctionPass * createAMDGPUAnnotateUniformValues()
This is the AMGPU address space based alias analysis pass.
Provides passes to inlining "always_inline" functions.
char & SIOptimizeExecMaskingID
EP_CGSCCOptimizerLate - This extension point allows adding CallGraphSCC passes at the end of the main...
static MachineSchedRegistry IterativeGCNMaxOccupancySchedRegistry("gcn-max-occupancy-experimental", "Run GCN scheduler to maximize occupancy (experimental)", createIterativeGCNMaxOccupancyMachineScheduler)
std::unique_ptr< ScheduleDAGMutation > createLoadClusterDAGMutation(const TargetInstrInfo *TII, const TargetRegisterInfo *TRI)
void initializeAMDGPUFixFunctionBitcastsPass(PassRegistry &)
char & AMDGPUUnifyDivergentExitNodesID
bool enableSIScheduler() const
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
void initializeSIMemoryLegalizerPass(PassRegistry &)
PassManagerBase - An abstract interface to allow code to add passes to a pass manager without having ...
char & StackMapLivenessID
StackMapLiveness - This pass analyses the register live-out set of stackmap/patchpoint intrinsics and...
ModulePass * createInternalizePass(std::function< bool(const GlobalValue &)> MustPreserveGV)
createInternalizePass - This pass loops over all of the functions in the input module, internalizing all globals (functions and variables) it can.
char & SIPreAllocateWWMRegsID
void initializeSIWholeQuadModePass(PassRegistry &)
void setRequiresStructuredCFG(bool Value)
FunctionPass * createAMDGPUAtomicOptimizerPass()
void initializeR600VectorRegMergerPass(PassRegistry &)
char & SIFixVGPRCopiesID
void initializeAMDGPURewriteOutArgumentsPass(PassRegistry &)
char & EarlyMachineLICMID
This pass performs loop invariant code motion on machine instructions.
void initializeGCNDPPCombinePass(PassRegistry &)
ImmutablePass * createAMDGPUAAWrapperPass()
FunctionPass * createR600EmitClauseMarkers()
void initializeR600ClauseMergePassPass(PassRegistry &)
This interface provides simple read-only access to a block of memory, and provides simple methods for...
Definition: MemoryBuffer.h:41
This pass is responsible for selecting generic machine instructions to target-specific instructions...
ModulePass * createAMDGPUFixFunctionBitcastsPass()
void initializeAMDGPUUnifyMetadataPass(PassRegistry &)
FunctionPass * createSeparateConstOffsetFromGEPPass(bool LowerGEP=false)
Target - Wrapper for Target specific information.
void initializeAMDGPUArgumentUsageInfoPass(PassRegistry &)
virtual void addPostRegAlloc()
This method may be implemented by targets that want to run passes after register allocation pass pipe...
This file declares the targeting of the Machinelegalizer class for AMDGPU.
A wrapper around std::string which contains a source range that&#39;s being set during parsing...
FunctionPass * createR600Packetizer()
void initializeSILoadStoreOptimizerPass(PassRegistry &)
char & SILowerControlFlowID
ModulePass * createAMDGPUUnifyMetadataPass()
void initializeSIAnnotateControlFlowPass(PassRegistry &)
Targets should override this in a way that mirrors the implementation of llvm::MachineFunctionInfo.
A ScheduleDAG for scheduling lists of MachineInstr.
char & PatchableFunctionID
This pass implements the "patchable-function" attribute.
FunctionPass * createInferAddressSpacesPass()
void initializeSIFoldOperandsPass(PassRegistry &)
char & SIFoldOperandsID
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
const TargetRegisterInfo * TRI
Target processor register info.
Definition: ScheduleDAG.h:559
FunctionPass * createSIShrinkInstructionsPass()
static cl::opt< bool > EnableAMDGPUAliasAnalysis("enable-amdgpu-aa", cl::Hidden, cl::desc("Enable AMDGPU Alias Analysis"), cl::init(true))
StringRef getValueAsString() const
Return the attribute&#39;s value as a string.
Definition: Attributes.cpp:194
TargetOptions Options
char & IfConverterID
IfConverter - This pass performs machine code if conversion.
#define LLVM_READNONE
Definition: Compiler.h:176
MachineSchedContext provides enough context from the MachineScheduler pass for the target to instanti...
char & SIFixSGPRCopiesID
#define I(x, y, z)
Definition: MD5.cpp:58
FunctionPass * createAMDGPUSimplifyLibCallsPass(const TargetOptions &)
FunctionPass * createSROAPass()
Definition: SROA.cpp:4596
static MachineSchedRegistry R600SchedRegistry("r600", "Run R600's custom scheduler", createR600MachineScheduler)
ImmutablePass * createAMDGPUExternalAAWrapperPass()
static cl::opt< bool > OptExecMaskPreRA("amdgpu-opt-exec-mask-pre-ra", cl::Hidden, cl::desc("Run pre-RA exec mask optimizations"), cl::init(true))
static bool mustPreserveGV(const GlobalValue &GV)
Predicate for Internalize pass.
const TargetInstrInfo * TII
Target instruction information.
Definition: ScheduleDAG.h:558
This file declares the IRTranslator pass.
FunctionPass * createAMDGPUUseNativeCallsPass()
yaml::MachineFunctionInfo * convertFuncInfoToYAML(const MachineFunction &MF) const override
Allocate and initialize an instance of the YAML representation of the MachineFunctionInfo.
char & PostRAHazardRecognizerID
createPostRAHazardRecognizer - This pass runs the post-ra hazard recognizer.
AnalysisType * getAnalysisIfAvailable() const
getAnalysisIfAvailable<AnalysisType>() - Subclasses use this function to get analysis information tha...
Lightweight error class with error context and mandatory checking.
Definition: Error.h:157
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition: Function.h:330
FunctionPass * createEarlyCSEPass(bool UseMemorySSA=false)
Definition: EarlyCSE.cpp:1358
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:48
PassRegistry - This class manages the registration and intitialization of the pass subsystem as appli...
Definition: PassRegistry.h:38
TargetTransformInfo getTargetTransformInfo(const Function &F) override
Get a TargetTransformInfo implementation for the target.
void initializeSILowerI1CopiesPass(PassRegistry &)
static cl::opt< bool > EnableDPPCombine("amdgpu-dpp-combine", cl::desc("Enable DPP combiner"), cl::init(true))
void addExtension(ExtensionPointTy Ty, ExtensionFn Fn)
static ScheduleDAGInstrs * createIterativeILPMachineScheduler(MachineSchedContext *C)
void initializeAMDGPUUnifyDivergentExitNodesPass(PassRegistry &)
Represents a location in source code.
Definition: SMLoc.h:23
static cl::opt< bool > EnableLibCallSimplify("amdgpu-simplify-libcall", cl::desc("Enable amdgpu library simplifications"), cl::init(true), cl::Hidden)
static cl::opt< bool > EnableR600StructurizeCFG("r600-ir-structurize", cl::desc("Use StructurizeCFG IR pass"), cl::init(true))
Pass * createStructurizeCFGPass(bool SkipUniformRegions=false)
When SkipUniformRegions is true the structizer will not structurize regions that only contain uniform...
FunctionPass * createAtomicExpandPass()
void initializeAMDGPUAlwaysInlinePass(PassRegistry &)
TargetPassConfig * createPassConfig(PassManagerBase &PM) override
Create a pass configuration object to be used by addPassToEmitX methods for generating a pipeline of ...
void initializeAMDGPUOpenCLEnqueuedBlockLoweringPass(PassRegistry &)
void initializeSIPreAllocateWWMRegsPass(PassRegistry &)
void initializeGlobalISel(PassRegistry &)
Initialize all passes linked into the GlobalISel library.
Definition: GlobalISel.cpp:18
bool use_empty() const
Definition: Value.h:322
LocationClass< Ty > location(Ty &L)
Definition: CommandLine.h:443
static ScheduleDAGInstrs * createSIMachineScheduler(MachineSchedContext *C)
static ScheduleDAGInstrs * createGCNMaxOccupancyMachineScheduler(MachineSchedContext *C)
static cl::opt< bool > EnableEarlyIfConversion("amdgpu-early-ifcvt", cl::Hidden, cl::desc("Run early if-conversion"), cl::init(false))
void addMutation(std::unique_ptr< ScheduleDAGMutation > Mutation)
Add a postprocessing step to the DAG builder.
FunctionPass * createNaryReassociatePass()
Instances of this class encapsulate one diagnostic report, allowing printing to a raw_ostream as a ca...
Definition: SourceMgr.h:259