LLVM  7.0.0svn
AMDGPUTargetMachine.cpp
Go to the documentation of this file.
1 //===-- AMDGPUTargetMachine.cpp - TargetMachine for hw codegen targets-----===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 /// \file
11 /// The AMDGPU target machine contains all of the hardware specific
12 /// information needed to emit code for R600 and SI GPUs.
13 //
14 //===----------------------------------------------------------------------===//
15 
16 #include "AMDGPUTargetMachine.h"
17 #include "AMDGPU.h"
18 #include "AMDGPUAliasAnalysis.h"
19 #include "AMDGPUCallLowering.h"
21 #include "AMDGPULegalizerInfo.h"
22 #include "AMDGPUMacroFusion.h"
23 #include "AMDGPUTargetObjectFile.h"
25 #include "GCNIterativeScheduler.h"
26 #include "GCNSchedStrategy.h"
27 #include "R600MachineScheduler.h"
28 #include "SIMachineScheduler.h"
33 #include "llvm/CodeGen/Passes.h"
35 #include "llvm/IR/Attributes.h"
36 #include "llvm/IR/Function.h"
38 #include "llvm/Pass.h"
40 #include "llvm/Support/Compiler.h"
43 #include "llvm/Transforms/IPO.h"
46 #include "llvm/Transforms/Scalar.h"
49 #include <memory>
50 
51 using namespace llvm;
52 
54  "r600-ir-structurize",
55  cl::desc("Use StructurizeCFG IR pass"),
56  cl::init(true));
57 
59  "amdgpu-sroa",
60  cl::desc("Run SROA after promote alloca pass"),
62  cl::init(true));
63 
64 static cl::opt<bool>
65 EnableEarlyIfConversion("amdgpu-early-ifcvt", cl::Hidden,
66  cl::desc("Run early if-conversion"),
67  cl::init(false));
68 
70  "r600-if-convert",
71  cl::desc("Use if conversion pass"),
73  cl::init(true));
74 
75 // Option to disable vectorizer for tests.
77  "amdgpu-load-store-vectorizer",
78  cl::desc("Enable load store vectorizer"),
79  cl::init(true),
80  cl::Hidden);
81 
82 // Option to control global loads scalarization
84  "amdgpu-scalarize-global-loads",
85  cl::desc("Enable global load scalarization"),
86  cl::init(true),
87  cl::Hidden);
88 
89 // Option to run internalize pass.
91  "amdgpu-internalize-symbols",
92  cl::desc("Enable elimination of non-kernel functions and unused globals"),
93  cl::init(false),
94  cl::Hidden);
95 
96 // Option to inline all early.
98  "amdgpu-early-inline-all",
99  cl::desc("Inline all functions early"),
100  cl::init(false),
101  cl::Hidden);
102 
104  "amdgpu-sdwa-peephole",
105  cl::desc("Enable SDWA peepholer"),
106  cl::init(true));
107 
108 // Enable address space based alias analysis
109 static cl::opt<bool> EnableAMDGPUAliasAnalysis("enable-amdgpu-aa", cl::Hidden,
110  cl::desc("Enable AMDGPU Alias Analysis"),
111  cl::init(true));
112 
113 // Option to run late CFG structurizer
115  "amdgpu-late-structurize",
116  cl::desc("Enable late CFG structurization"),
118  cl::Hidden);
119 
121  "amdgpu-function-calls",
122  cl::Hidden,
123  cl::desc("Enable AMDGPU function call support"),
124  cl::init(false));
125 
126 // Enable lib calls simplifications
128  "amdgpu-simplify-libcall",
129  cl::desc("Enable amdgpu library simplifications"),
130  cl::init(true),
131  cl::Hidden);
132 
133 extern "C" void LLVMInitializeAMDGPUTarget() {
134  // Register the target
137 
180 }
181 
182 static std::unique_ptr<TargetLoweringObjectFile> createTLOF(const Triple &TT) {
183  return llvm::make_unique<AMDGPUTargetObjectFile>();
184 }
185 
187  return new ScheduleDAGMILive(C, llvm::make_unique<R600SchedStrategy>());
188 }
189 
191  return new SIScheduleDAGMI(C);
192 }
193 
194 static ScheduleDAGInstrs *
196  ScheduleDAGMILive *DAG =
197  new GCNScheduleDAGMILive(C, make_unique<GCNMaxOccupancySchedStrategy>(C));
201  return DAG;
202 }
203 
204 static ScheduleDAGInstrs *
206  auto DAG = new GCNIterativeScheduler(C,
208  DAG->addMutation(createLoadClusterDAGMutation(DAG->TII, DAG->TRI));
209  DAG->addMutation(createStoreClusterDAGMutation(DAG->TII, DAG->TRI));
210  return DAG;
211 }
212 
214  return new GCNIterativeScheduler(C,
216 }
217 
218 static ScheduleDAGInstrs *
220  auto DAG = new GCNIterativeScheduler(C,
222  DAG->addMutation(createLoadClusterDAGMutation(DAG->TII, DAG->TRI));
223  DAG->addMutation(createStoreClusterDAGMutation(DAG->TII, DAG->TRI));
224  DAG->addMutation(createAMDGPUMacroFusionDAGMutation());
225  return DAG;
226 }
227 
229 R600SchedRegistry("r600", "Run R600's custom scheduler",
231 
233 SISchedRegistry("si", "Run SI's custom scheduler",
235 
237 GCNMaxOccupancySchedRegistry("gcn-max-occupancy",
238  "Run GCN scheduler to maximize occupancy",
240 
242 IterativeGCNMaxOccupancySchedRegistry("gcn-max-occupancy-experimental",
243  "Run GCN scheduler to maximize occupancy (experimental)",
245 
247 GCNMinRegSchedRegistry("gcn-minreg",
248  "Run GCN iterative scheduler for minimal register usage (experimental)",
250 
252 GCNILPSchedRegistry("gcn-ilp",
253  "Run GCN iterative scheduler for ILP scheduling (experimental)",
255 
256 static StringRef computeDataLayout(const Triple &TT) {
257  if (TT.getArch() == Triple::r600) {
258  // 32-bit pointers.
259  return "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
260  "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5";
261  }
262 
263  // 32-bit private, local, and region pointers. 64-bit global, constant and
264  // flat.
265  return "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32"
266  "-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
267  "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5";
268 }
269 
271 static StringRef getGPUOrDefault(const Triple &TT, StringRef GPU) {
272  if (!GPU.empty())
273  return GPU;
274 
275  if (TT.getArch() == Triple::amdgcn)
276  return "generic";
277 
278  return "r600";
279 }
280 
282  // The AMDGPU toolchain only supports generating shared objects, so we
283  // must always use PIC.
284  return Reloc::PIC_;
285 }
286 
288  if (CM)
289  return *CM;
290  return CodeModel::Small;
291 }
292 
294  StringRef CPU, StringRef FS,
295  TargetOptions Options,
298  CodeGenOpt::Level OptLevel)
299  : LLVMTargetMachine(T, computeDataLayout(TT), TT, getGPUOrDefault(TT, CPU),
300  FS, Options, getEffectiveRelocModel(RM),
301  getEffectiveCodeModel(CM), OptLevel),
302  TLOF(createTLOF(getTargetTriple())) {
303  AS = AMDGPU::getAMDGPUAS(TT);
304  initAsmInfo();
305 }
306 
308 
310 
312  Attribute GPUAttr = F.getFnAttribute("target-cpu");
313  return GPUAttr.hasAttribute(Attribute::None) ?
314  getTargetCPU() : GPUAttr.getValueAsString();
315 }
316 
318  Attribute FSAttr = F.getFnAttribute("target-features");
319 
320  return FSAttr.hasAttribute(Attribute::None) ?
322  FSAttr.getValueAsString();
323 }
324 
326  return createExternalAAWrapperPass([](Pass &P, Function &, AAResults &AAR) {
327  if (auto *WrapperPass = P.getAnalysisIfAvailable<AMDGPUAAWrapperPass>())
328  AAR.addAAResult(WrapperPass->getResult());
329  });
330 }
331 
332 /// Predicate for Internalize pass.
333 static bool mustPreserveGV(const GlobalValue &GV) {
334  if (const Function *F = dyn_cast<Function>(&GV))
335  return F->isDeclaration() || AMDGPU::isEntryFunctionCC(F->getCallingConv());
336 
337  return !GV.use_empty();
338 }
339 
341  Builder.DivergentTarget = true;
342 
343  bool EnableOpt = getOptLevel() > CodeGenOpt::None;
344  bool Internalize = InternalizeSymbols;
345  bool EarlyInline = EarlyInlineAll && EnableOpt && !EnableAMDGPUFunctionCalls;
346  bool AMDGPUAA = EnableAMDGPUAliasAnalysis && EnableOpt;
347  bool LibCallSimplify = EnableLibCallSimplify && EnableOpt;
348 
350  delete Builder.Inliner;
352  }
353 
354  if (Internalize) {
355  // If we're generating code, we always have the whole program available. The
356  // relocations expected for externally visible functions aren't supported,
357  // so make sure every non-entry function is hidden.
358  Builder.addExtension(
362  });
363  }
364 
365  Builder.addExtension(
367  [Internalize, EarlyInline, AMDGPUAA](const PassManagerBuilder &,
369  if (AMDGPUAA) {
372  }
374  if (Internalize) {
376  PM.add(createGlobalDCEPass());
377  }
378  if (EarlyInline)
380  });
381 
382  const auto &Opt = Options;
383  Builder.addExtension(
385  [AMDGPUAA, LibCallSimplify, &Opt](const PassManagerBuilder &,
387  if (AMDGPUAA) {
390  }
392  if (LibCallSimplify)
394  });
395 
396  Builder.addExtension(
399  // Add infer address spaces pass to the opt pipeline after inlining
400  // but before SROA to increase SROA opportunities.
402 
403  // This should run after inlining to have any chance of doing anything,
404  // and before other cleanup optimizations.
406  });
407 }
408 
409 //===----------------------------------------------------------------------===//
410 // R600 Target Machine (R600 -> Cayman)
411 //===----------------------------------------------------------------------===//
412 
414  StringRef CPU, StringRef FS,
418  CodeGenOpt::Level OL, bool JIT)
419  : AMDGPUTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL) {
421 }
422 
424  const Function &F) const {
425  StringRef GPU = getGPUName(F);
426  StringRef FS = getFeatureString(F);
427 
428  SmallString<128> SubtargetKey(GPU);
429  SubtargetKey.append(FS);
430 
431  auto &I = SubtargetMap[SubtargetKey];
432  if (!I) {
433  // This needs to be done before we create a new subtarget since any
434  // creation will depend on the TM and the code generation flags on the
435  // function that reside in TargetOptions.
437  I = llvm::make_unique<R600Subtarget>(TargetTriple, GPU, FS, *this);
438  }
439 
440  return I.get();
441 }
442 
445  return TargetTransformInfo(R600TTIImpl(this, F));
446 }
447 
448 //===----------------------------------------------------------------------===//
449 // GCN Target Machine (SI+)
450 //===----------------------------------------------------------------------===//
451 
453  StringRef CPU, StringRef FS,
457  CodeGenOpt::Level OL, bool JIT)
458  : AMDGPUTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL) {}
459 
461  StringRef GPU = getGPUName(F);
462  StringRef FS = getFeatureString(F);
463 
464  SmallString<128> SubtargetKey(GPU);
465  SubtargetKey.append(FS);
466 
467  auto &I = SubtargetMap[SubtargetKey];
468  if (!I) {
469  // This needs to be done before we create a new subtarget since any
470  // creation will depend on the TM and the code generation flags on the
471  // function that reside in TargetOptions.
473  I = llvm::make_unique<SISubtarget>(TargetTriple, GPU, FS, *this);
474  }
475 
476  I->setScalarizeGlobalBehavior(ScalarizeGlobal);
477 
478  return I.get();
479 }
480 
483  return TargetTransformInfo(GCNTTIImpl(this, F));
484 }
485 
486 //===----------------------------------------------------------------------===//
487 // AMDGPU Pass Setup
488 //===----------------------------------------------------------------------===//
489 
490 namespace {
491 
492 class AMDGPUPassConfig : public TargetPassConfig {
493 public:
494  AMDGPUPassConfig(LLVMTargetMachine &TM, PassManagerBase &PM)
495  : TargetPassConfig(TM, PM) {
496  // Exceptions and StackMaps are not supported, so these passes will never do
497  // anything.
498  disablePass(&StackMapLivenessID);
499  disablePass(&FuncletLayoutID);
500  }
501 
502  AMDGPUTargetMachine &getAMDGPUTargetMachine() const {
503  return getTM<AMDGPUTargetMachine>();
504  }
505 
507  createMachineScheduler(MachineSchedContext *C) const override {
511  return DAG;
512  }
513 
514  void addEarlyCSEOrGVNPass();
515  void addStraightLineScalarOptimizationPasses();
516  void addIRPasses() override;
517  void addCodeGenPrepare() override;
518  bool addPreISel() override;
519  bool addInstSelector() override;
520  bool addGCPasses() override;
521 };
522 
523 class R600PassConfig final : public AMDGPUPassConfig {
524 public:
525  R600PassConfig(LLVMTargetMachine &TM, PassManagerBase &PM)
526  : AMDGPUPassConfig(TM, PM) {}
527 
528  ScheduleDAGInstrs *createMachineScheduler(
529  MachineSchedContext *C) const override {
530  return createR600MachineScheduler(C);
531  }
532 
533  bool addPreISel() override;
534  bool addInstSelector() override;
535  void addPreRegAlloc() override;
536  void addPreSched2() override;
537  void addPreEmitPass() override;
538 };
539 
540 class GCNPassConfig final : public AMDGPUPassConfig {
541 public:
542  GCNPassConfig(LLVMTargetMachine &TM, PassManagerBase &PM)
543  : AMDGPUPassConfig(TM, PM) {
544  // It is necessary to know the register usage of the entire call graph. We
545  // allow calls without EnableAMDGPUFunctionCalls if they are marked
546  // noinline, so this is always required.
547  setRequiresCodeGenSCCOrder(true);
548  }
549 
550  GCNTargetMachine &getGCNTargetMachine() const {
551  return getTM<GCNTargetMachine>();
552  }
553 
555  createMachineScheduler(MachineSchedContext *C) const override;
556 
557  bool addPreISel() override;
558  void addMachineSSAOptimization() override;
559  bool addILPOpts() override;
560  bool addInstSelector() override;
561  bool addIRTranslator() override;
562  bool addLegalizeMachineIR() override;
563  bool addRegBankSelect() override;
564  bool addGlobalInstructionSelect() override;
565  void addFastRegAlloc(FunctionPass *RegAllocPass) override;
566  void addOptimizedRegAlloc(FunctionPass *RegAllocPass) override;
567  void addPreRegAlloc() override;
568  void addPostRegAlloc() override;
569  void addPreSched2() override;
570  void addPreEmitPass() override;
571 };
572 
573 } // end anonymous namespace
574 
575 void AMDGPUPassConfig::addEarlyCSEOrGVNPass() {
577  addPass(createGVNPass());
578  else
579  addPass(createEarlyCSEPass());
580 }
581 
582 void AMDGPUPassConfig::addStraightLineScalarOptimizationPasses() {
585  // ReassociateGEPs exposes more opportunites for SLSR. See
586  // the example in reassociate-geps-and-slsr.ll.
588  // SeparateConstOffsetFromGEP and SLSR creates common expressions which GVN or
589  // EarlyCSE can reuse.
590  addEarlyCSEOrGVNPass();
591  // Run NaryReassociate after EarlyCSE/GVN to be more effective.
592  addPass(createNaryReassociatePass());
593  // NaryReassociate on GEPs creates redundant common expressions, so run
594  // EarlyCSE after it.
595  addPass(createEarlyCSEPass());
596 }
597 
598 void AMDGPUPassConfig::addIRPasses() {
599  const AMDGPUTargetMachine &TM = getAMDGPUTargetMachine();
600 
601  // There is no reason to run these.
602  disablePass(&StackMapLivenessID);
603  disablePass(&FuncletLayoutID);
604  disablePass(&PatchableFunctionID);
605 
607 
608  if (TM.getTargetTriple().getArch() == Triple::r600 ||
610  // Function calls are not supported, so make sure we inline everything.
611  addPass(createAMDGPUAlwaysInlinePass());
613  // We need to add the barrier noop pass, otherwise adding the function
614  // inlining pass will cause all of the PassConfigs passes to be run
615  // one function at a time, which means if we have a nodule with two
616  // functions, then we will generate code for the first function
617  // without ever running any passes on the second.
618  addPass(createBarrierNoopPass());
619  }
620 
621  if (TM.getTargetTriple().getArch() == Triple::amdgcn) {
622  // TODO: May want to move later or split into an early and late one.
623 
625  }
626 
627  // Handle uses of OpenCL image2d_t, image3d_t and sampler_t arguments.
628  if (TM.getTargetTriple().getArch() == Triple::r600)
630 
631  // Replace OpenCL enqueued block function pointers with global variables.
633 
634  if (TM.getOptLevel() > CodeGenOpt::None) {
635  addPass(createInferAddressSpacesPass());
636  addPass(createAMDGPUPromoteAlloca());
637 
638  if (EnableSROA)
639  addPass(createSROAPass());
640 
641  addStraightLineScalarOptimizationPasses();
642 
644  addPass(createAMDGPUAAWrapperPass());
646  AAResults &AAR) {
647  if (auto *WrapperPass = P.getAnalysisIfAvailable<AMDGPUAAWrapperPass>())
648  AAR.addAAResult(WrapperPass->getResult());
649  }));
650  }
651  }
652 
654 
655  // EarlyCSE is not always strong enough to clean up what LSR produces. For
656  // example, GVN can combine
657  //
658  // %0 = add %a, %b
659  // %1 = add %b, %a
660  //
661  // and
662  //
663  // %0 = shl nsw %a, 2
664  // %1 = shl %a, 2
665  //
666  // but EarlyCSE can do neither of them.
667  if (getOptLevel() != CodeGenOpt::None)
668  addEarlyCSEOrGVNPass();
669 }
670 
671 void AMDGPUPassConfig::addCodeGenPrepare() {
673 
676 }
677 
678 bool AMDGPUPassConfig::addPreISel() {
679  addPass(createFlattenCFGPass());
680  return false;
681 }
682 
683 bool AMDGPUPassConfig::addInstSelector() {
684  addPass(createAMDGPUISelDag(&getAMDGPUTargetMachine(), getOptLevel()));
685  return false;
686 }
687 
688 bool AMDGPUPassConfig::addGCPasses() {
689  // Do nothing. GC is not supported.
690  return false;
691 }
692 
693 //===----------------------------------------------------------------------===//
694 // R600 Pass Setup
695 //===----------------------------------------------------------------------===//
696 
697 bool R600PassConfig::addPreISel() {
698  AMDGPUPassConfig::addPreISel();
699 
701  addPass(createStructurizeCFGPass());
702  return false;
703 }
704 
705 bool R600PassConfig::addInstSelector() {
706  addPass(createR600ISelDag(&getAMDGPUTargetMachine(), getOptLevel()));
707  return false;
708 }
709 
710 void R600PassConfig::addPreRegAlloc() {
711  addPass(createR600VectorRegMerger());
712 }
713 
714 void R600PassConfig::addPreSched2() {
715  addPass(createR600EmitClauseMarkers(), false);
717  addPass(&IfConverterID, false);
718  addPass(createR600ClauseMergePass(), false);
719 }
720 
721 void R600PassConfig::addPreEmitPass() {
722  addPass(createAMDGPUCFGStructurizerPass(), false);
723  addPass(createR600ExpandSpecialInstrsPass(), false);
724  addPass(&FinalizeMachineBundlesID, false);
725  addPass(createR600Packetizer(), false);
726  addPass(createR600ControlFlowFinalizer(), false);
727 }
728 
730  return new R600PassConfig(*this, PM);
731 }
732 
733 //===----------------------------------------------------------------------===//
734 // GCN Pass Setup
735 //===----------------------------------------------------------------------===//
736 
737 ScheduleDAGInstrs *GCNPassConfig::createMachineScheduler(
738  MachineSchedContext *C) const {
739  const SISubtarget &ST = C->MF->getSubtarget<SISubtarget>();
740  if (ST.enableSIScheduler())
741  return createSIMachineScheduler(C);
743 }
744 
745 bool GCNPassConfig::addPreISel() {
746  AMDGPUPassConfig::addPreISel();
747 
748  // FIXME: We need to run a pass to propagate the attributes when calls are
749  // supported.
751 
752  // Merge divergent exit nodes. StructurizeCFG won't recognize the multi-exit
753  // regions formed by them.
755  if (!LateCFGStructurize) {
756  addPass(createStructurizeCFGPass(true)); // true -> SkipUniformRegions
757  }
758  addPass(createSinkingPass());
760  if (!LateCFGStructurize) {
762  }
763 
764  return false;
765 }
766 
767 void GCNPassConfig::addMachineSSAOptimization() {
769 
770  // We want to fold operands after PeepholeOptimizer has run (or as part of
771  // it), because it will eliminate extra copies making it easier to fold the
772  // real source operand. We want to eliminate dead instructions after, so that
773  // we see fewer uses of the copies. We then need to clean up the dead
774  // instructions leftover after the operands are folded as well.
775  //
776  // XXX - Can we get away without running DeadMachineInstructionElim again?
777  addPass(&SIFoldOperandsID);
779  addPass(&SILoadStoreOptimizerID);
780  if (EnableSDWAPeephole) {
781  addPass(&SIPeepholeSDWAID);
782  addPass(&EarlyMachineLICMID);
783  addPass(&MachineCSEID);
784  addPass(&SIFoldOperandsID);
786  }
788 }
789 
790 bool GCNPassConfig::addILPOpts() {
792  addPass(&EarlyIfConverterID);
793 
795  return false;
796 }
797 
798 bool GCNPassConfig::addInstSelector() {
799  AMDGPUPassConfig::addInstSelector();
800  addPass(createSILowerI1CopiesPass());
801  addPass(&SIFixSGPRCopiesID);
802  return false;
803 }
804 
805 bool GCNPassConfig::addIRTranslator() {
806  addPass(new IRTranslator());
807  return false;
808 }
809 
810 bool GCNPassConfig::addLegalizeMachineIR() {
811  addPass(new Legalizer());
812  return false;
813 }
814 
815 bool GCNPassConfig::addRegBankSelect() {
816  addPass(new RegBankSelect());
817  return false;
818 }
819 
820 bool GCNPassConfig::addGlobalInstructionSelect() {
821  addPass(new InstructionSelect());
822  return false;
823 }
824 
825 void GCNPassConfig::addPreRegAlloc() {
826  if (LateCFGStructurize) {
828  }
829  addPass(createSIWholeQuadModePass());
830 }
831 
832 void GCNPassConfig::addFastRegAlloc(FunctionPass *RegAllocPass) {
833  // FIXME: We have to disable the verifier here because of PHIElimination +
834  // TwoAddressInstructions disabling it.
835 
836  // This must be run immediately after phi elimination and before
837  // TwoAddressInstructions, otherwise the processing of the tied operand of
838  // SI_ELSE will introduce a copy of the tied operand source after the else.
839  insertPass(&PHIEliminationID, &SILowerControlFlowID, false);
840 
841  // This must be run after SILowerControlFlow, since it needs to use the
842  // machine-level CFG, but before register allocation.
843  insertPass(&SILowerControlFlowID, &SIFixWWMLivenessID, false);
844 
845  TargetPassConfig::addFastRegAlloc(RegAllocPass);
846 }
847 
848 void GCNPassConfig::addOptimizedRegAlloc(FunctionPass *RegAllocPass) {
850 
852 
853  // This must be run immediately after phi elimination and before
854  // TwoAddressInstructions, otherwise the processing of the tied operand of
855  // SI_ELSE will introduce a copy of the tied operand source after the else.
856  insertPass(&PHIEliminationID, &SILowerControlFlowID, false);
857 
858  // This must be run after SILowerControlFlow, since it needs to use the
859  // machine-level CFG, but before register allocation.
860  insertPass(&SILowerControlFlowID, &SIFixWWMLivenessID, false);
861 
863 }
864 
865 void GCNPassConfig::addPostRegAlloc() {
866  addPass(&SIFixVGPRCopiesID);
867  addPass(&SIOptimizeExecMaskingID);
869 }
870 
871 void GCNPassConfig::addPreSched2() {
872 }
873 
874 void GCNPassConfig::addPreEmitPass() {
875  // The hazard recognizer that runs as part of the post-ra scheduler does not
876  // guarantee to be able handle all hazards correctly. This is because if there
877  // are multiple scheduling regions in a basic block, the regions are scheduled
878  // bottom up, so when we begin to schedule a region we don't know what
879  // instructions were emitted directly before it.
880  //
881  // Here we add a stand-alone hazard recognizer pass which can handle all
882  // cases.
883  addPass(&PostRAHazardRecognizerID);
884 
885  addPass(createSIMemoryLegalizerPass());
886  addPass(createSIInsertWaitcntsPass());
888  addPass(&SIInsertSkipsPassID);
890  addPass(&BranchRelaxationPassID);
891 }
892 
894  return new GCNPassConfig(*this, PM);
895 }
FunctionPass * createSpeculativeExecutionPass()
char & SIFormMemoryClausesID
Pass interface - Implemented by all &#39;passes&#39;.
Definition: Pass.h:81
FunctionPass * createStraightLineStrengthReducePass()
uint64_t CallInst * C
FunctionPass * createGVNPass(bool NoLoads=false)
Create a legacy GVN pass.
Definition: GVN.cpp:2661
StringRef getTargetFeatureString() const
Target & getTheGCNTarget()
The target for GCN GPUs.
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
void initializeAMDGPUDAGToDAGISelPass(PassRegistry &)
void addAAResult(AAResultT &AAResult)
Register a specific AA result.
This file a TargetTransformInfo::Concept conforming object specific to the AMDGPU target machine...
FunctionPass * createSIAnnotateControlFlowPass()
Create the annotation pass.
ModulePass * createAMDGPUAlwaysInlinePass(bool GlobalOpt=true)
static LLVM_READNONE StringRef getGPUOrDefault(const Triple &TT, StringRef GPU)
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
PassManagerBuilder - This class is used to set up a standard optimization sequence for languages like...
GCNTargetMachine(const Target &T, const Triple &TT, StringRef CPU, StringRef FS, TargetOptions Options, Optional< Reloc::Model > RM, Optional< CodeModel::Model > CM, CodeGenOpt::Level OL, bool JIT)
Compute iterated dominance frontiers using a linear time algorithm.
Definition: AllocatorList.h:24
void initializeSIFixVGPRCopiesPass(PassRegistry &)
virtual void addIRPasses()
Add common target configurable passes that perform LLVM IR to IR transforms following machine indepen...
static cl::opt< bool > EnableLoadStoreVectorizer("amdgpu-load-store-vectorizer", cl::desc("Enable load store vectorizer"), cl::init(true), cl::Hidden)
FunctionPass * createR600ISelDag(TargetMachine *TM, CodeGenOpt::Level OptLevel)
This pass converts a legalized DAG into a R600-specific.
void initializeSIInsertWaitcntsPass(PassRegistry &)
void initializeSIFormMemoryClausesPass(PassRegistry &)
ModulePass * createR600OpenCLImageTypeLoweringPass()
void initializeAMDGPUAnnotateKernelFeaturesPass(PassRegistry &)
Triple TargetTriple
Triple string, CPU name, and target feature strings the TargetMachine instance is created with...
Definition: TargetMachine.h:78
char & SILoadStoreOptimizerID
Target & getTheAMDGPUTarget()
The target which supports all AMD GPUs.
char & SIPeepholeSDWAID
void initializeAMDGPUSimplifyLibCallsPass(PassRegistry &)
This file describes how to lower LLVM calls to machine code calls.
char & FuncletLayoutID
This pass lays out funclets contiguously.
AMDGPUAS getAMDGPUAS(const Module &M)
void initializeAMDGPULowerKernelAttributesPass(PassRegistry &)
char & EarlyIfConverterID
EarlyIfConverter - This pass performs if-conversion on SSA form by inserting cmov instructions...
void initializeR600ControlFlowFinalizerPass(PassRegistry &)
void initializeAMDGPUAnnotateUniformValuesPass(PassRegistry &)
FunctionPass * createAMDGPUPromoteAlloca()
virtual void add(Pass *P)=0
Add a pass to the queue of passes to run.
ModulePass * createAMDGPULowerKernelAttributesPass()
char & BranchRelaxationPassID
BranchRelaxation - This pass replaces branches that need to jump further than is supported by a branc...
FunctionPass * createAMDGPUCodeGenPreparePass()
F(f)
R600 Machine Scheduler interface.
char & MachineSchedulerID
MachineScheduler - This pass schedules machine instructions.
FunctionPass * createAMDGPUCFGStructurizerPass()
MachineSchedRegistry provides a selection of available machine instruction schedulers.
virtual void addMachineSSAOptimization()
addMachineSSAOptimization - Add standard passes that optimize machine instructions in SSA form...
void initializeAMDGPUAAWrapperPassPass(PassRegistry &)
static std::unique_ptr< TargetLoweringObjectFile > createTLOF(const Triple &TT)
void initializeAMDGPUPromoteAllocaPass(PassRegistry &)
FunctionPass * createSIMemoryLegalizerPass()
Pass * Inliner
Inliner - Specifies the inliner to use.
FunctionPass * createAMDGPUMachineCFGStructurizerPass()
FunctionPass * createSIInsertWaitcntsPass()
ScheduleDAGMILive is an implementation of ScheduleDAGInstrs that schedules machine instructions while...
StringRef getFeatureString(const Function &F) const
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
void resetTargetOptions(const Function &F) const
Reset the target options based on the function&#39;s attributes.
This file declares the targeting of the InstructionSelector class for AMDGPU.
Pass * createAMDGPUFunctionInliningPass()
static cl::opt< bool > EnableSDWAPeephole("amdgpu-sdwa-peephole", cl::desc("Enable SDWA peepholer"), cl::init(true))
const AMDGPUSubtarget * getSubtargetImpl() const
This file declares the AMDGPU-specific subclass of TargetLoweringObjectFile.
Pass * createAMDGPUAnnotateKernelFeaturesPass()
void initializeAMDGPUCodeGenPreparePass(PassRegistry &)
This file contains the simple types necessary to represent the attributes associated with functions a...
ModulePass * createAMDGPUOpenCLEnqueuedBlockLoweringPass()
No attributes have been set.
Definition: Attributes.h:72
void initializeAMDGPUInlinerPass(PassRegistry &)
FunctionPass * createSinkingPass()
Definition: Sink.cpp:304
static MachineSchedRegistry GCNILPSchedRegistry("gcn-ilp", "Run GCN iterative scheduler for ILP scheduling (experimental)", createIterativeILPMachineScheduler)
char & SIOptimizeExecMaskingPreRAID
EP_ModuleOptimizerEarly - This extension point allows adding passes just before the main module-level...
char & FinalizeMachineBundlesID
FinalizeMachineBundles - This pass finalize machine instruction bundles (created earlier, e.g.
Target-Independent Code Generator Pass Configuration Options.
static StringRef computeDataLayout(const Triple &TT)
static cl::opt< bool, true > LateCFGStructurize("amdgpu-late-structurize", cl::desc("Enable late CFG structurization"), cl::location(AMDGPUTargetMachine::EnableLateStructurizeCFG), cl::Hidden)
EP_EnabledOnOptLevel0 - This extension point allows adding passes that should not be disabled by O0 o...
static cl::opt< bool > ScalarizeGlobal("amdgpu-scalarize-global-loads", cl::desc("Enable global load scalarization"), cl::init(true), cl::Hidden)
FunctionPass * createR600ExpandSpecialInstrsPass()
static MachineSchedRegistry GCNMinRegSchedRegistry("gcn-minreg", "Run GCN iterative scheduler for minimal register usage (experimental)", createMinRegScheduler)
RegisterTargetMachine - Helper template for registering a target machine implementation, for use in the target machine initialization function.
char & MachineCSEID
MachineCSE - This pass performs global CSE on machine instructions.
Definition: MachineCSE.cpp:134
ArchType getArch() const
getArch - Get the parsed architecture type of this triple.
Definition: Triple.h:283
TargetTransformInfo getTargetTransformInfo(const Function &F) override
Get a TargetTransformInfo implementation for the target.
LLVM_NODISCARD LLVM_ATTRIBUTE_ALWAYS_INLINE bool empty() const
empty - Check if the string is empty.
Definition: StringRef.h:133
FunctionPass * createSILowerI1CopiesPass()
void initializeSIOptimizeExecMaskingPass(PassRegistry &)
char & DeadMachineInstructionElimID
DeadMachineInstructionElim - This pass removes dead machine instructions.
void initializeSIOptimizeExecMaskingPreRAPass(PassRegistry &)
Pass * createLoadStoreVectorizerPass()
ScheduleDAGMILive * createGenericSchedLive(MachineSchedContext *C)
Create the standard converging machine scheduler.
StringRef getTargetCPU() const
virtual bool addILPOpts()
Add passes that optimize instruction level parallelism for out-of-order targets.
void initializeR600ExpandSpecialInstrsPassPass(PassRegistry &)
void initializeSIFixSGPRCopiesPass(PassRegistry &)
void initializeAMDGPULowerIntrinsicsPass(PassRegistry &)
ModulePass * createGlobalDCEPass()
createGlobalDCEPass - This transform is designed to eliminate unreachable internal globals (functions...
FunctionPass * createR600VectorRegMerger()
static ScheduleDAGInstrs * createIterativeGCNMaxOccupancyMachineScheduler(MachineSchedContext *C)
static cl::opt< bool > InternalizeSymbols("amdgpu-internalize-symbols", cl::desc("Enable elimination of non-kernel functions and unused globals"), cl::init(false), cl::Hidden)
static CodeModel::Model getEffectiveCodeModel(Optional< CodeModel::Model > CM)
SI Machine Scheduler interface.
StringRef getGPUName(const Function &F) const
void append(in_iter S, in_iter E)
Append from an iterator pair.
Definition: SmallString.h:75
char & PHIEliminationID
PHIElimination - This pass eliminates machine instruction PHI nodes by inserting copy instructions...
ImmutablePass * createExternalAAWrapperPass(std::function< void(Pass &, Function &, AAResults &)> Callback)
A wrapper pass around a callback which can be used to populate the AAResults in the AAResultsWrapperP...
#define P(N)
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:410
bool hasAttribute(AttrKind Val) const
Return true if the attribute is present.
Definition: Attributes.cpp:202
CodeGenOpt::Level getOptLevel() const
Returns the optimization level: None, Less, Default, or Aggressive.
FunctionPass * createSIDebuggerInsertNopsPass()
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
static Reloc::Model getEffectiveRelocModel(Optional< Reloc::Model > RM)
This pass implements the reg bank selector pass used in the GlobalISel pipeline.
Definition: RegBankSelect.h:91
FunctionPass * createFlattenCFGPass()
FunctionPass * createSIWholeQuadModePass()
This file provides the interface for LLVM&#39;s Global Value Numbering pass which eliminates fully redund...
static cl::opt< bool > EarlyInlineAll("amdgpu-early-inline-all", cl::desc("Inline all functions early"), cl::init(false), cl::Hidden)
char & SIInsertSkipsPassID
virtual void addOptimizedRegAlloc(FunctionPass *RegAllocPass)
addOptimizedRegAlloc - Add passes related to register allocation.
static MachineSchedRegistry GCNMaxOccupancySchedRegistry("gcn-max-occupancy", "Run GCN scheduler to maximize occupancy", createGCNMaxOccupancyMachineScheduler)
void adjustPassManager(PassManagerBuilder &) override
Allow the target to modify the pass manager, e.g.
bool isEntryFunctionCC(CallingConv::ID CC)
void LLVMInitializeAMDGPUTarget()
void initializeSIPeepholeSDWAPass(PassRegistry &)
static cl::opt< bool > EnableSROA("amdgpu-sroa", cl::desc("Run SROA after promote alloca pass"), cl::ReallyHidden, cl::init(true))
TargetPassConfig * createPassConfig(PassManagerBase &PM) override
Create a pass configuration object to be used by addPassToEmitX methods for generating a pipeline of ...
FunctionPass * createR600ControlFlowFinalizer()
char & SIFixWWMLivenessID
static cl::opt< bool > EnableAMDGPUFunctionCalls("amdgpu-function-calls", cl::Hidden, cl::desc("Enable AMDGPU function call support"), cl::init(false))
Legacy wrapper pass to provide the AMDGPUAAResult object.
R600TargetMachine(const Target &T, const Triple &TT, StringRef CPU, StringRef FS, TargetOptions Options, Optional< Reloc::Model > RM, Optional< CodeModel::Model > CM, CodeGenOpt::Level OL, bool JIT)
This class describes a target machine that is implemented with the LLVM target-independent code gener...
ModulePass * createBarrierNoopPass()
createBarrierNoopPass - This pass is purely a module pass barrier in a pass manager.
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:285
Pass * createAlwaysInlinerLegacyPass(bool InsertLifetime=true)
Create a legacy pass manager instance of a pass to inline and remove functions marked as "always_inli...
const Triple & getTargetTriple() const
static MachineSchedRegistry SISchedRegistry("si", "Run SI's custom scheduler", createSIMachineScheduler)
void initializeSILowerControlFlowPass(PassRegistry &)
static ScheduleDAGInstrs * createMinRegScheduler(MachineSchedContext *C)
ModulePass * createAMDGPULowerIntrinsicsPass()
virtual void addCodeGenPrepare()
Add pass to prepare the LLVM IR for code generation.
FunctionPass * createR600ClauseMergePass()
The AMDGPU TargetMachine interface definition for hw codgen targets.
static cl::opt< bool > EnableR600IfConvert("r600-if-convert", cl::desc("Use if conversion pass"), cl::ReallyHidden, cl::init(true))
std::unique_ptr< ScheduleDAGMutation > createStoreClusterDAGMutation(const TargetInstrInfo *TII, const TargetRegisterInfo *TRI)
static ScheduleDAGInstrs * createR600MachineScheduler(MachineSchedContext *C)
FunctionPass * createAMDGPUISelDag(TargetMachine *TM=nullptr, CodeGenOpt::Level OptLevel=CodeGenOpt::Default)
This pass converts a legalized DAG into a AMDGPU-specific.
std::unique_ptr< ScheduleDAGMutation > createAMDGPUMacroFusionDAGMutation()
Note that you have to add: DAG.addMutation(createAMDGPUMacroFusionDAGMutation()); to AMDGPUPassConfig...
void initializeSIShrinkInstructionsPass(PassRegistry &)
void initializeAMDGPUUseNativeCallsPass(PassRegistry &)
Analysis pass providing a never-invalidated alias analysis result.
EP_EarlyAsPossible - This extension point allows adding passes before any other transformations, allowing them to see the code as it is coming out of the frontend.
AMDGPUTargetMachine(const Target &T, const Triple &TT, StringRef CPU, StringRef FS, TargetOptions Options, Optional< Reloc::Model > RM, Optional< CodeModel::Model > CM, CodeGenOpt::Level OL)
void initializeSIInsertSkipsPass(PassRegistry &)
void initializeR600PacketizerPass(PassRegistry &)
Triple - Helper class for working with autoconf configuration names.
Definition: Triple.h:44
FunctionPass * createAMDGPUAnnotateUniformValues()
This is the AMGPU address space based alias analysis pass.
Provides passes to inlining "always_inline" functions.
char & SIOptimizeExecMaskingID
EP_CGSCCOptimizerLate - This extension point allows adding CallGraphSCC passes at the end of the main...
static MachineSchedRegistry IterativeGCNMaxOccupancySchedRegistry("gcn-max-occupancy-experimental", "Run GCN scheduler to maximize occupancy (experimental)", createIterativeGCNMaxOccupancyMachineScheduler)
std::unique_ptr< ScheduleDAGMutation > createLoadClusterDAGMutation(const TargetInstrInfo *TII, const TargetRegisterInfo *TRI)
ImmutablePass class - This class is used to provide information that does not need to be run...
Definition: Pass.h:256
char & AMDGPUUnifyDivergentExitNodesID
void initializeSIFixWWMLivenessPass(PassRegistry &)
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
void initializeSIMemoryLegalizerPass(PassRegistry &)
PassManagerBase - An abstract interface to allow code to add passes to a pass manager without having ...
char & StackMapLivenessID
StackMapLiveness - This pass analyses the register live-out set of stackmap/patchpoint intrinsics and...
ModulePass * createInternalizePass(std::function< bool(const GlobalValue &)> MustPreserveGV)
createInternalizePass - This pass loops over all of the functions in the input module, internalizing all globals (functions and variables) it can.
void initializeSIWholeQuadModePass(PassRegistry &)
void setRequiresStructuredCFG(bool Value)
void initializeR600VectorRegMergerPass(PassRegistry &)
ImmutablePass * createAMDGPUAAWrapperPass()
char & SIFixVGPRCopiesID
void initializeAMDGPURewriteOutArgumentsPass(PassRegistry &)
char & EarlyMachineLICMID
This pass performs loop invariant code motion on machine instructions.
FunctionPass * createR600EmitClauseMarkers()
void initializeR600ClauseMergePassPass(PassRegistry &)
This pass is responsible for selecting generic machine instructions to target-specific instructions...
void initializeAMDGPUUnifyMetadataPass(PassRegistry &)
FunctionPass * createSeparateConstOffsetFromGEPPass(bool LowerGEP=false)
Target - Wrapper for Target specific information.
void initializeAMDGPUArgumentUsageInfoPass(PassRegistry &)
virtual void addPostRegAlloc()
This method may be implemented by targets that want to run passes after register allocation pass pipe...
This file declares the targeting of the Machinelegalizer class for AMDGPU.
virtual void addFastRegAlloc(FunctionPass *RegAllocPass)
addFastRegAlloc - Add the minimum set of target-independent passes that are required for fast registe...
FunctionPass * createR600Packetizer()
void initializeSILoadStoreOptimizerPass(PassRegistry &)
char & SILowerControlFlowID
ModulePass * createAMDGPUUnifyMetadataPass()
void initializeSIAnnotateControlFlowPass(PassRegistry &)
A ScheduleDAG for scheduling lists of MachineInstr.
char & PatchableFunctionID
This pass implements the "patchable-function" attribute.
FunctionPass * createInferAddressSpacesPass()
void initializeSIFoldOperandsPass(PassRegistry &)
char & SIFoldOperandsID
const TargetRegisterInfo * TRI
Target processor register info.
Definition: ScheduleDAG.h:569
FunctionPass * createSIShrinkInstructionsPass()
static cl::opt< bool > EnableAMDGPUAliasAnalysis("enable-amdgpu-aa", cl::Hidden, cl::desc("Enable AMDGPU Alias Analysis"), cl::init(true))
void initializeSIDebuggerInsertNopsPass(PassRegistry &)
StringRef getValueAsString() const
Return the attribute&#39;s value as a string.
Definition: Attributes.cpp:195
TargetOptions Options
Definition: TargetMachine.h:98
char & IfConverterID
IfConverter - This pass performs machine code if conversion.
#define LLVM_READNONE
Definition: Compiler.h:161
MachineSchedContext provides enough context from the MachineScheduler pass for the target to instanti...
char & SIFixSGPRCopiesID
#define I(x, y, z)
Definition: MD5.cpp:58
FunctionPass * createAMDGPUSimplifyLibCallsPass(const TargetOptions &)
FunctionPass * createSROAPass()
Definition: SROA.cpp:4511
bool enableSIScheduler() const
static MachineSchedRegistry R600SchedRegistry("r600", "Run R600's custom scheduler", createR600MachineScheduler)
static bool mustPreserveGV(const GlobalValue &GV)
Predicate for Internalize pass.
const TargetInstrInfo * TII
Target instruction information.
Definition: ScheduleDAG.h:568
This file declares the IRTranslator pass.
FunctionPass * createAMDGPUUseNativeCallsPass()
char & PostRAHazardRecognizerID
createPostRAHazardRecognizer - This pass runs the post-ra hazard recognizer.
AnalysisType * getAnalysisIfAvailable() const
getAnalysisIfAvailable<AnalysisType>() - Subclasses use this function to get analysis information tha...
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition: Function.h:317
FunctionPass * createEarlyCSEPass(bool UseMemorySSA=false)
Definition: EarlyCSE.cpp:1315
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:49
PassRegistry - This class manages the registration and intitialization of the pass subsystem as appli...
Definition: PassRegistry.h:39
TargetTransformInfo getTargetTransformInfo(const Function &F) override
Get a TargetTransformInfo implementation for the target.
void initializeSILowerI1CopiesPass(PassRegistry &)
void addExtension(ExtensionPointTy Ty, ExtensionFn Fn)
static ScheduleDAGInstrs * createIterativeILPMachineScheduler(MachineSchedContext *C)
void initializeAMDGPUUnifyDivergentExitNodesPass(PassRegistry &)
static cl::opt< bool > EnableLibCallSimplify("amdgpu-simplify-libcall", cl::desc("Enable amdgpu library simplifications"), cl::init(true), cl::Hidden)
static cl::opt< bool > EnableR600StructurizeCFG("r600-ir-structurize", cl::desc("Use StructurizeCFG IR pass"), cl::init(true))
Pass * createStructurizeCFGPass(bool SkipUniformRegions=false)
When SkipUniformRegions is true the structizer will not structurize regions that only contain uniform...
void initializeAMDGPUAlwaysInlinePass(PassRegistry &)
TargetPassConfig * createPassConfig(PassManagerBase &PM) override
Create a pass configuration object to be used by addPassToEmitX methods for generating a pipeline of ...
void initializeAMDGPUOpenCLEnqueuedBlockLoweringPass(PassRegistry &)
static ImmutablePass * createAMDGPUExternalAAWrapperPass()
void initializeGlobalISel(PassRegistry &)
Initialize all passes linked into the GlobalISel library.
Definition: GlobalISel.cpp:19
bool use_empty() const
Definition: Value.h:322
LocationClass< Ty > location(Ty &L)
Definition: CommandLine.h:426
static ScheduleDAGInstrs * createSIMachineScheduler(MachineSchedContext *C)
static ScheduleDAGInstrs * createGCNMaxOccupancyMachineScheduler(MachineSchedContext *C)
static cl::opt< bool > EnableEarlyIfConversion("amdgpu-early-ifcvt", cl::Hidden, cl::desc("Run early if-conversion"), cl::init(false))
void addMutation(std::unique_ptr< ScheduleDAGMutation > Mutation)
Add a postprocessing step to the DAG builder.
FunctionPass * createNaryReassociatePass()