LLVM  14.0.0git
AMDGPUTargetMachine.cpp
Go to the documentation of this file.
1 //===-- AMDGPUTargetMachine.cpp - TargetMachine for hw codegen targets-----===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// The AMDGPU target machine contains all of the hardware specific
11 /// information needed to emit code for SI+ GPUs.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "AMDGPUTargetMachine.h"
16 #include "AMDGPU.h"
17 #include "AMDGPUAliasAnalysis.h"
18 #include "AMDGPUExportClustering.h"
19 #include "AMDGPUMacroFusion.h"
20 #include "AMDGPUTargetObjectFile.h"
22 #include "GCNIterativeScheduler.h"
23 #include "GCNSchedStrategy.h"
24 #include "R600.h"
25 #include "R600TargetMachine.h"
26 #include "SIMachineFunctionInfo.h"
27 #include "SIMachineScheduler.h"
36 #include "llvm/CodeGen/Passes.h"
39 #include "llvm/IR/IntrinsicsAMDGPU.h"
41 #include "llvm/IR/PassManager.h"
42 #include "llvm/IR/PatternMatch.h"
43 #include "llvm/InitializePasses.h"
44 #include "llvm/MC/TargetRegistry.h"
46 #include "llvm/Transforms/IPO.h"
51 #include "llvm/Transforms/Scalar.h"
54 #include "llvm/Transforms/Utils.h"
57 
58 using namespace llvm;
59 
60 namespace {
61 class SGPRRegisterRegAlloc : public RegisterRegAllocBase<SGPRRegisterRegAlloc> {
62 public:
63  SGPRRegisterRegAlloc(const char *N, const char *D, FunctionPassCtor C)
64  : RegisterRegAllocBase(N, D, C) {}
65 };
66 
67 class VGPRRegisterRegAlloc : public RegisterRegAllocBase<VGPRRegisterRegAlloc> {
68 public:
69  VGPRRegisterRegAlloc(const char *N, const char *D, FunctionPassCtor C)
70  : RegisterRegAllocBase(N, D, C) {}
71 };
72 
73 static bool onlyAllocateSGPRs(const TargetRegisterInfo &TRI,
74  const TargetRegisterClass &RC) {
75  return static_cast<const SIRegisterInfo &>(TRI).isSGPRClass(&RC);
76 }
77 
78 static bool onlyAllocateVGPRs(const TargetRegisterInfo &TRI,
79  const TargetRegisterClass &RC) {
80  return !static_cast<const SIRegisterInfo &>(TRI).isSGPRClass(&RC);
81 }
82 
83 
84 /// -{sgpr|vgpr}-regalloc=... command line option.
85 static FunctionPass *useDefaultRegisterAllocator() { return nullptr; }
86 
87 /// A dummy default pass factory indicates whether the register allocator is
88 /// overridden on the command line.
89 static llvm::once_flag InitializeDefaultSGPRRegisterAllocatorFlag;
90 static llvm::once_flag InitializeDefaultVGPRRegisterAllocatorFlag;
91 
92 static SGPRRegisterRegAlloc
93 defaultSGPRRegAlloc("default",
94  "pick SGPR register allocator based on -O option",
96 
97 static cl::opt<SGPRRegisterRegAlloc::FunctionPassCtor, false,
99 SGPRRegAlloc("sgpr-regalloc", cl::Hidden, cl::init(&useDefaultRegisterAllocator),
100  cl::desc("Register allocator to use for SGPRs"));
101 
102 static cl::opt<VGPRRegisterRegAlloc::FunctionPassCtor, false,
104 VGPRRegAlloc("vgpr-regalloc", cl::Hidden, cl::init(&useDefaultRegisterAllocator),
105  cl::desc("Register allocator to use for VGPRs"));
106 
107 
108 static void initializeDefaultSGPRRegisterAllocatorOnce() {
109  RegisterRegAlloc::FunctionPassCtor Ctor = SGPRRegisterRegAlloc::getDefault();
110 
111  if (!Ctor) {
112  Ctor = SGPRRegAlloc;
113  SGPRRegisterRegAlloc::setDefault(SGPRRegAlloc);
114  }
115 }
116 
117 static void initializeDefaultVGPRRegisterAllocatorOnce() {
118  RegisterRegAlloc::FunctionPassCtor Ctor = VGPRRegisterRegAlloc::getDefault();
119 
120  if (!Ctor) {
121  Ctor = VGPRRegAlloc;
122  VGPRRegisterRegAlloc::setDefault(VGPRRegAlloc);
123  }
124 }
125 
126 static FunctionPass *createBasicSGPRRegisterAllocator() {
127  return createBasicRegisterAllocator(onlyAllocateSGPRs);
128 }
129 
130 static FunctionPass *createGreedySGPRRegisterAllocator() {
131  return createGreedyRegisterAllocator(onlyAllocateSGPRs);
132 }
133 
134 static FunctionPass *createFastSGPRRegisterAllocator() {
135  return createFastRegisterAllocator(onlyAllocateSGPRs, false);
136 }
137 
138 static FunctionPass *createBasicVGPRRegisterAllocator() {
139  return createBasicRegisterAllocator(onlyAllocateVGPRs);
140 }
141 
142 static FunctionPass *createGreedyVGPRRegisterAllocator() {
143  return createGreedyRegisterAllocator(onlyAllocateVGPRs);
144 }
145 
146 static FunctionPass *createFastVGPRRegisterAllocator() {
147  return createFastRegisterAllocator(onlyAllocateVGPRs, true);
148 }
149 
150 static SGPRRegisterRegAlloc basicRegAllocSGPR(
151  "basic", "basic register allocator", createBasicSGPRRegisterAllocator);
152 static SGPRRegisterRegAlloc greedyRegAllocSGPR(
153  "greedy", "greedy register allocator", createGreedySGPRRegisterAllocator);
154 
155 static SGPRRegisterRegAlloc fastRegAllocSGPR(
156  "fast", "fast register allocator", createFastSGPRRegisterAllocator);
157 
158 
159 static VGPRRegisterRegAlloc basicRegAllocVGPR(
160  "basic", "basic register allocator", createBasicVGPRRegisterAllocator);
161 static VGPRRegisterRegAlloc greedyRegAllocVGPR(
162  "greedy", "greedy register allocator", createGreedyVGPRRegisterAllocator);
163 
164 static VGPRRegisterRegAlloc fastRegAllocVGPR(
165  "fast", "fast register allocator", createFastVGPRRegisterAllocator);
166 }
167 
169  "amdgpu-sroa",
170  cl::desc("Run SROA after promote alloca pass"),
172  cl::init(true));
173 
174 static cl::opt<bool>
175 EnableEarlyIfConversion("amdgpu-early-ifcvt", cl::Hidden,
176  cl::desc("Run early if-conversion"),
177  cl::init(false));
178 
179 static cl::opt<bool>
180 OptExecMaskPreRA("amdgpu-opt-exec-mask-pre-ra", cl::Hidden,
181  cl::desc("Run pre-RA exec mask optimizations"),
182  cl::init(true));
183 
184 // Option to disable vectorizer for tests.
186  "amdgpu-load-store-vectorizer",
187  cl::desc("Enable load store vectorizer"),
188  cl::init(true),
189  cl::Hidden);
190 
191 // Option to control global loads scalarization
193  "amdgpu-scalarize-global-loads",
194  cl::desc("Enable global load scalarization"),
195  cl::init(true),
196  cl::Hidden);
197 
198 // Option to run internalize pass.
200  "amdgpu-internalize-symbols",
201  cl::desc("Enable elimination of non-kernel functions and unused globals"),
202  cl::init(false),
203  cl::Hidden);
204 
205 // Option to inline all early.
207  "amdgpu-early-inline-all",
208  cl::desc("Inline all functions early"),
209  cl::init(false),
210  cl::Hidden);
211 
213  "amdgpu-sdwa-peephole",
214  cl::desc("Enable SDWA peepholer"),
215  cl::init(true));
216 
218  "amdgpu-dpp-combine",
219  cl::desc("Enable DPP combiner"),
220  cl::init(true));
221 
222 // Enable address space based alias analysis
223 static cl::opt<bool> EnableAMDGPUAliasAnalysis("enable-amdgpu-aa", cl::Hidden,
224  cl::desc("Enable AMDGPU Alias Analysis"),
225  cl::init(true));
226 
227 // Option to run late CFG structurizer
229  "amdgpu-late-structurize",
230  cl::desc("Enable late CFG structurization"),
232  cl::Hidden);
233 
234 // Enable lib calls simplifications
236  "amdgpu-simplify-libcall",
237  cl::desc("Enable amdgpu library simplifications"),
238  cl::init(true),
239  cl::Hidden);
240 
242  "amdgpu-ir-lower-kernel-arguments",
243  cl::desc("Lower kernel argument loads in IR pass"),
244  cl::init(true),
245  cl::Hidden);
246 
248  "amdgpu-reassign-regs",
249  cl::desc("Enable register reassign optimizations on gfx10+"),
250  cl::init(true),
251  cl::Hidden);
252 
254  "amdgpu-opt-vgpr-liverange",
255  cl::desc("Enable VGPR liverange optimizations for if-else structure"),
256  cl::init(true), cl::Hidden);
257 
258 // Enable atomic optimization
260  "amdgpu-atomic-optimizations",
261  cl::desc("Enable atomic optimizations"),
262  cl::init(false),
263  cl::Hidden);
264 
265 // Enable Mode register optimization
267  "amdgpu-mode-register",
268  cl::desc("Enable mode register pass"),
269  cl::init(true),
270  cl::Hidden);
271 
272 // Option is used in lit tests to prevent deadcoding of patterns inspected.
273 static cl::opt<bool>
274 EnableDCEInRA("amdgpu-dce-in-ra",
275  cl::init(true), cl::Hidden,
276  cl::desc("Enable machine DCE inside regalloc"));
277 
279  "amdgpu-scalar-ir-passes",
280  cl::desc("Enable scalar IR passes"),
281  cl::init(true),
282  cl::Hidden);
283 
285  "amdgpu-enable-structurizer-workarounds",
286  cl::desc("Enable workarounds for the StructurizeCFG pass"), cl::init(true),
287  cl::Hidden);
288 
290  "amdgpu-enable-lds-replace-with-pointer",
291  cl::desc("Enable LDS replace with pointer pass"), cl::init(false),
292  cl::Hidden);
293 
295  "amdgpu-enable-lower-module-lds", cl::desc("Enable lower module lds pass"),
297  cl::Hidden);
298 
300  "amdgpu-enable-pre-ra-optimizations",
301  cl::desc("Enable Pre-RA optimizations pass"), cl::init(true),
302  cl::Hidden);
303 
305  "amdgpu-enable-promote-kernel-arguments",
306  cl::desc("Enable promotion of flat kernel pointer arguments to global"),
307  cl::Hidden, cl::init(true));
308 
310  // Register the target
313 
381 }
382 
383 static std::unique_ptr<TargetLoweringObjectFile> createTLOF(const Triple &TT) {
384  return std::make_unique<AMDGPUTargetObjectFile>();
385 }
386 
388  return new SIScheduleDAGMI(C);
389 }
390 
391 static ScheduleDAGInstrs *
393  ScheduleDAGMILive *DAG =
394  new GCNScheduleDAGMILive(C, std::make_unique<GCNMaxOccupancySchedStrategy>(C));
398  return DAG;
399 }
400 
401 static ScheduleDAGInstrs *
403  auto DAG = new GCNIterativeScheduler(C,
405  DAG->addMutation(createLoadClusterDAGMutation(DAG->TII, DAG->TRI));
406  return DAG;
407 }
408 
410  return new GCNIterativeScheduler(C,
412 }
413 
414 static ScheduleDAGInstrs *
416  auto DAG = new GCNIterativeScheduler(C,
418  DAG->addMutation(createLoadClusterDAGMutation(DAG->TII, DAG->TRI));
419  DAG->addMutation(createAMDGPUMacroFusionDAGMutation());
420  return DAG;
421 }
422 
424 SISchedRegistry("si", "Run SI's custom scheduler",
426 
428 GCNMaxOccupancySchedRegistry("gcn-max-occupancy",
429  "Run GCN scheduler to maximize occupancy",
431 
433 IterativeGCNMaxOccupancySchedRegistry("gcn-max-occupancy-experimental",
434  "Run GCN scheduler to maximize occupancy (experimental)",
436 
438 GCNMinRegSchedRegistry("gcn-minreg",
439  "Run GCN iterative scheduler for minimal register usage (experimental)",
441 
443 GCNILPSchedRegistry("gcn-ilp",
444  "Run GCN iterative scheduler for ILP scheduling (experimental)",
446 
447 static StringRef computeDataLayout(const Triple &TT) {
448  if (TT.getArch() == Triple::r600) {
449  // 32-bit pointers.
450  return "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
451  "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1";
452  }
453 
454  // 32-bit private, local, and region pointers. 64-bit global, constant and
455  // flat, non-integral buffer fat pointers.
456  return "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32"
457  "-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
458  "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1"
459  "-ni:7";
460 }
461 
463 static StringRef getGPUOrDefault(const Triple &TT, StringRef GPU) {
464  if (!GPU.empty())
465  return GPU;
466 
467  // Need to default to a target with flat support for HSA.
468  if (TT.getArch() == Triple::amdgcn)
469  return TT.getOS() == Triple::AMDHSA ? "generic-hsa" : "generic";
470 
471  return "r600";
472 }
473 
475  // The AMDGPU toolchain only supports generating shared objects, so we
476  // must always use PIC.
477  return Reloc::PIC_;
478 }
479 
481  StringRef CPU, StringRef FS,
485  CodeGenOpt::Level OptLevel)
488  getEffectiveCodeModel(CM, CodeModel::Small), OptLevel),
489  TLOF(createTLOF(getTargetTriple())) {
490  initAsmInfo();
491  if (TT.getArch() == Triple::amdgcn) {
492  if (getMCSubtargetInfo()->checkFeatures("+wavefrontsize64"))
494  else if (getMCSubtargetInfo()->checkFeatures("+wavefrontsize32"))
496  }
497 }
498 
502 
504 
506  Attribute GPUAttr = F.getFnAttribute("target-cpu");
507  return GPUAttr.isValid() ? GPUAttr.getValueAsString() : getTargetCPU();
508 }
509 
511  Attribute FSAttr = F.getFnAttribute("target-features");
512 
513  return FSAttr.isValid() ? FSAttr.getValueAsString()
515 }
516 
517 /// Predicate for Internalize pass.
518 static bool mustPreserveGV(const GlobalValue &GV) {
519  if (const Function *F = dyn_cast<Function>(&GV))
520  return F->isDeclaration() || F->getName().startswith("__asan_") ||
521  F->getName().startswith("__sanitizer_") ||
522  AMDGPU::isEntryFunctionCC(F->getCallingConv());
523 
525  return !GV.use_empty();
526 }
527 
529  Builder.DivergentTarget = true;
530 
531  bool EnableOpt = getOptLevel() > CodeGenOpt::None;
532  bool Internalize = InternalizeSymbols;
533  bool EarlyInline = EarlyInlineAll && EnableOpt && !EnableFunctionCalls;
534  bool AMDGPUAA = EnableAMDGPUAliasAnalysis && EnableOpt;
535  bool LibCallSimplify = EnableLibCallSimplify && EnableOpt;
536  bool PromoteKernelArguments =
538 
539  if (EnableFunctionCalls) {
540  delete Builder.Inliner;
542  }
543 
544  Builder.addExtension(
546  [Internalize, EarlyInline, AMDGPUAA, this](const PassManagerBuilder &,
548  if (AMDGPUAA) {
551  }
554  if (Internalize)
557  if (Internalize)
558  PM.add(createGlobalDCEPass());
559  if (EarlyInline)
561  });
562 
563  Builder.addExtension(
565  [AMDGPUAA, LibCallSimplify, this](const PassManagerBuilder &,
567  if (AMDGPUAA) {
570  }
573  if (LibCallSimplify)
575  });
576 
577  Builder.addExtension(
579  [EnableOpt, PromoteKernelArguments](const PassManagerBuilder &,
581  // Add promote kernel arguments pass to the opt pipeline right before
582  // infer address spaces which is needed to do actual address space
583  // rewriting.
584  if (PromoteKernelArguments)
586 
587  // Add infer address spaces pass to the opt pipeline after inlining
588  // but before SROA to increase SROA opportunities.
590 
591  // This should run after inlining to have any chance of doing anything,
592  // and before other cleanup optimizations.
594 
595  // Promote alloca to vector before SROA and loop unroll. If we manage
596  // to eliminate allocas before unroll we may choose to unroll less.
597  if (EnableOpt)
599  });
600 }
601 
604 }
605 
610  if (PassName == "amdgpu-propagate-attributes-late") {
612  return true;
613  }
614  if (PassName == "amdgpu-unify-metadata") {
616  return true;
617  }
618  if (PassName == "amdgpu-printf-runtime-binding") {
620  return true;
621  }
622  if (PassName == "amdgpu-always-inline") {
624  return true;
625  }
626  if (PassName == "amdgpu-replace-lds-use-with-pointer") {
628  return true;
629  }
630  if (PassName == "amdgpu-lower-module-lds") {
632  return true;
633  }
634  return false;
635  });
639  if (PassName == "amdgpu-simplifylib") {
641  return true;
642  }
643  if (PassName == "amdgpu-usenative") {
645  return true;
646  }
647  if (PassName == "amdgpu-promote-alloca") {
648  PM.addPass(AMDGPUPromoteAllocaPass(*this));
649  return true;
650  }
651  if (PassName == "amdgpu-promote-alloca-to-vector") {
653  return true;
654  }
655  if (PassName == "amdgpu-lower-kernel-attributes") {
657  return true;
658  }
659  if (PassName == "amdgpu-propagate-attributes-early") {
661  return true;
662  }
663  if (PassName == "amdgpu-promote-kernel-arguments") {
665  return true;
666  }
667  return false;
668  });
669 
671  FAM.registerPass([&] { return AMDGPUAA(); });
672  });
673 
674  PB.registerParseAACallback([](StringRef AAName, AAManager &AAM) {
675  if (AAName == "amdgpu-aa") {
677  return true;
678  }
679  return false;
680  });
681 
690  });
691 
695  return;
696 
699 
700  if (InternalizeSymbols) {
702  }
704  if (InternalizeSymbols) {
705  PM.addPass(GlobalDCEPass());
706  }
709  });
710 
714  return;
715 
717 
718  // Add promote kernel arguments pass to the opt pipeline right before
719  // infer address spaces which is needed to do actual address space
720  // rewriting.
721  if (Level.getSpeedupLevel() > OptimizationLevel::O1.getSpeedupLevel() &&
724 
725  // Add infer address spaces pass to the opt pipeline after inlining
726  // but before SROA to increase SROA opportunities.
728 
729  // This should run after inlining to have any chance of doing
730  // anything, and before other cleanup optimizations.
732 
733  if (Level != OptimizationLevel::O0) {
734  // Promote alloca to vector before SROA and loop unroll. If we
735  // manage to eliminate allocas before unroll we may choose to unroll
736  // less.
738  }
739 
741  });
742 }
743 
744 int64_t AMDGPUTargetMachine::getNullPointerValue(unsigned AddrSpace) {
745  return (AddrSpace == AMDGPUAS::LOCAL_ADDRESS ||
746  AddrSpace == AMDGPUAS::PRIVATE_ADDRESS ||
747  AddrSpace == AMDGPUAS::REGION_ADDRESS)
748  ? -1
749  : 0;
750 }
751 
753  unsigned DestAS) const {
754  return AMDGPU::isFlatGlobalAddrSpace(SrcAS) &&
756 }
757 
759  const auto *LD = dyn_cast<LoadInst>(V);
760  if (!LD)
762 
763  // It must be a generic pointer loaded.
764  assert(V->getType()->isPointerTy() &&
766 
767  const auto *Ptr = LD->getPointerOperand();
768  if (Ptr->getType()->getPointerAddressSpace() != AMDGPUAS::CONSTANT_ADDRESS)
770  // For a generic pointer loaded from the constant memory, it could be assumed
771  // as a global pointer since the constant memory is only populated on the
772  // host side. As implied by the offload programming model, only global
773  // pointers could be referenced on the host side.
775 }
776 
777 std::pair<const Value *, unsigned>
779  if (auto *II = dyn_cast<IntrinsicInst>(V)) {
780  switch (II->getIntrinsicID()) {
781  case Intrinsic::amdgcn_is_shared:
782  return std::make_pair(II->getArgOperand(0), AMDGPUAS::LOCAL_ADDRESS);
783  case Intrinsic::amdgcn_is_private:
784  return std::make_pair(II->getArgOperand(0), AMDGPUAS::PRIVATE_ADDRESS);
785  default:
786  break;
787  }
788  return std::make_pair(nullptr, -1);
789  }
790  // Check the global pointer predication based on
791  // (!is_share(p) && !is_private(p)). Note that logic 'and' is commutative and
792  // the order of 'is_shared' and 'is_private' is not significant.
793  Value *Ptr;
794  if (match(
795  const_cast<Value *>(V),
796  m_c_And(m_Not(m_Intrinsic<Intrinsic::amdgcn_is_shared>(m_Value(Ptr))),
797  m_Not(m_Intrinsic<Intrinsic::amdgcn_is_private>(
798  m_Deferred(Ptr))))))
799  return std::make_pair(Ptr, AMDGPUAS::GLOBAL_ADDRESS);
800 
801  return std::make_pair(nullptr, -1);
802 }
803 
804 //===----------------------------------------------------------------------===//
805 // GCN Target Machine (SI+)
806 //===----------------------------------------------------------------------===//
807 
809  StringRef CPU, StringRef FS,
813  CodeGenOpt::Level OL, bool JIT)
814  : AMDGPUTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL) {}
815 
816 const TargetSubtargetInfo *
818  StringRef GPU = getGPUName(F);
820 
821  SmallString<128> SubtargetKey(GPU);
822  SubtargetKey.append(FS);
823 
824  auto &I = SubtargetMap[SubtargetKey];
825  if (!I) {
826  // This needs to be done before we create a new subtarget since any
827  // creation will depend on the TM and the code generation flags on the
828  // function that reside in TargetOptions.
830  I = std::make_unique<GCNSubtarget>(TargetTriple, GPU, FS, *this);
831  }
832 
833  I->setScalarizeGlobalBehavior(ScalarizeGlobal);
834 
835  return I.get();
836 }
837 
840  return TargetTransformInfo(GCNTTIImpl(this, F));
841 }
842 
843 //===----------------------------------------------------------------------===//
844 // AMDGPU Pass Setup
845 //===----------------------------------------------------------------------===//
846 
847 std::unique_ptr<CSEConfigBase> llvm::AMDGPUPassConfig::getCSEConfig() const {
849 }
850 
851 namespace {
852 
853 class GCNPassConfig final : public AMDGPUPassConfig {
854 public:
855  GCNPassConfig(LLVMTargetMachine &TM, PassManagerBase &PM)
856  : AMDGPUPassConfig(TM, PM) {
857  // It is necessary to know the register usage of the entire call graph. We
858  // allow calls without EnableAMDGPUFunctionCalls if they are marked
859  // noinline, so this is always required.
860  setRequiresCodeGenSCCOrder(true);
861  substitutePass(&PostRASchedulerID, &PostMachineSchedulerID);
862  }
863 
864  GCNTargetMachine &getGCNTargetMachine() const {
865  return getTM<GCNTargetMachine>();
866  }
867 
869  createMachineScheduler(MachineSchedContext *C) const override;
870 
872  createPostMachineScheduler(MachineSchedContext *C) const override {
874  const GCNSubtarget &ST = C->MF->getSubtarget<GCNSubtarget>();
876  DAG->addMutation(ST.createFillMFMAShadowMutation(DAG->TII));
877  return DAG;
878  }
879 
880  bool addPreISel() override;
881  void addMachineSSAOptimization() override;
882  bool addILPOpts() override;
883  bool addInstSelector() override;
884  bool addIRTranslator() override;
885  void addPreLegalizeMachineIR() override;
886  bool addLegalizeMachineIR() override;
887  void addPreRegBankSelect() override;
888  bool addRegBankSelect() override;
889  void addPreGlobalInstructionSelect() override;
890  bool addGlobalInstructionSelect() override;
891  void addFastRegAlloc() override;
892  void addOptimizedRegAlloc() override;
893 
894  FunctionPass *createSGPRAllocPass(bool Optimized);
895  FunctionPass *createVGPRAllocPass(bool Optimized);
896  FunctionPass *createRegAllocPass(bool Optimized) override;
897 
898  bool addRegAssignAndRewriteFast() override;
899  bool addRegAssignAndRewriteOptimized() override;
900 
901  void addPreRegAlloc() override;
902  bool addPreRewrite() override;
903  void addPostRegAlloc() override;
904  void addPreSched2() override;
905  void addPreEmitPass() override;
906 };
907 
908 } // end anonymous namespace
909 
911  : TargetPassConfig(TM, PM) {
912  // Exceptions and StackMaps are not supported, so these passes will never do
913  // anything.
916  // Garbage collection is not supported.
919 }
920 
924  else
926 }
927 
932  // ReassociateGEPs exposes more opportunities for SLSR. See
933  // the example in reassociate-geps-and-slsr.ll.
935  // SeparateConstOffsetFromGEP and SLSR creates common expressions which GVN or
936  // EarlyCSE can reuse.
938  // Run NaryReassociate after EarlyCSE/GVN to be more effective.
940  // NaryReassociate on GEPs creates redundant common expressions, so run
941  // EarlyCSE after it.
943 }
944 
947 
948  // There is no reason to run these.
952 
955 
956  // This must occur before inlining, as the inliner will not look through
957  // bitcast calls.
959 
960  // A call to propagate attributes pass in the backend in case opt was not run.
962 
964 
965  // Function calls are not supported, so make sure we inline everything.
968  // We need to add the barrier noop pass, otherwise adding the function
969  // inlining pass will cause all of the PassConfigs passes to be run
970  // one function at a time, which means if we have a nodule with two
971  // functions, then we will generate code for the first function
972  // without ever running any passes on the second.
974 
975  // Handle uses of OpenCL image2d_t, image3d_t and sampler_t arguments.
978 
979  // Replace OpenCL enqueued block function pointers with global variables.
981 
982  // Can increase LDS used by kernel so runs before PromoteAlloca
983  if (EnableLowerModuleLDS) {
984  // The pass "amdgpu-replace-lds-use-with-pointer" need to be run before the
985  // pass "amdgpu-lower-module-lds", and also it required to be run only if
986  // "amdgpu-lower-module-lds" pass is enabled.
989 
991  }
992 
995 
997 
998  if (TM.getOptLevel() > CodeGenOpt::None) {
1000 
1001  if (EnableSROA)
1005 
1009  AAResults &AAR) {
1010  if (auto *WrapperPass = P.getAnalysisIfAvailable<AMDGPUAAWrapperPass>())
1011  AAR.addAAResult(WrapperPass->getResult());
1012  }));
1013  }
1014 
1016  // TODO: May want to move later or split into an early and late one.
1018  }
1019  }
1020 
1022 
1023  // EarlyCSE is not always strong enough to clean up what LSR produces. For
1024  // example, GVN can combine
1025  //
1026  // %0 = add %a, %b
1027  // %1 = add %b, %a
1028  //
1029  // and
1030  //
1031  // %0 = shl nsw %a, 2
1032  // %1 = shl %a, 2
1033  //
1034  // but EarlyCSE can do neither of them.
1037 }
1038 
1040  if (TM->getTargetTriple().getArch() == Triple::amdgcn) {
1042 
1043  // FIXME: This pass adds 2 hacky attributes that can be replaced with an
1044  // analysis, and should be removed.
1046  }
1047 
1048  if (TM->getTargetTriple().getArch() == Triple::amdgcn &&
1051 
1053 
1056 
1057  // LowerSwitch pass may introduce unreachable blocks that can
1058  // cause unexpected behavior for subsequent passes. Placing it
1059  // here seems better that these blocks would get cleaned up by
1060  // UnreachableBlockElim inserted next in the pass flow.
1062 }
1063 
1065  if (TM->getOptLevel() > CodeGenOpt::None)
1067  return false;
1068 }
1069 
1072  return false;
1073 }
1074 
1076  // Do nothing. GC is not supported.
1077  return false;
1078 }
1079 
1083  DAG->addMutation(createLoadClusterDAGMutation(DAG->TII, DAG->TRI));
1084  return DAG;
1085 }
1086 
1087 //===----------------------------------------------------------------------===//
1088 // GCN Pass Setup
1089 //===----------------------------------------------------------------------===//
1090 
1091 ScheduleDAGInstrs *GCNPassConfig::createMachineScheduler(
1092  MachineSchedContext *C) const {
1093  const GCNSubtarget &ST = C->MF->getSubtarget<GCNSubtarget>();
1094  if (ST.enableSIScheduler())
1095  return createSIMachineScheduler(C);
1097 }
1098 
1099 bool GCNPassConfig::addPreISel() {
1101 
1102  if (TM->getOptLevel() > CodeGenOpt::None)
1104 
1105  if (isPassEnabled(EnableAtomicOptimizations, CodeGenOpt::Less)) {
1107  }
1108 
1109  if (TM->getOptLevel() > CodeGenOpt::None)
1110  addPass(createSinkingPass());
1111 
1112  // Merge divergent exit nodes. StructurizeCFG won't recognize the multi-exit
1113  // regions formed by them.
1115  if (!LateCFGStructurize) {
1117  addPass(createFixIrreduciblePass());
1118  addPass(createUnifyLoopExitsPass());
1119  }
1120  addPass(createStructurizeCFGPass(false)); // true -> SkipUniformRegions
1121  }
1123  if (!LateCFGStructurize) {
1125  }
1126  addPass(createLCSSAPass());
1127 
1128  if (TM->getOptLevel() > CodeGenOpt::Less)
1129  addPass(&AMDGPUPerfHintAnalysisID);
1130 
1131  return false;
1132 }
1133 
1134 void GCNPassConfig::addMachineSSAOptimization() {
1136 
1137  // We want to fold operands after PeepholeOptimizer has run (or as part of
1138  // it), because it will eliminate extra copies making it easier to fold the
1139  // real source operand. We want to eliminate dead instructions after, so that
1140  // we see fewer uses of the copies. We then need to clean up the dead
1141  // instructions leftover after the operands are folded as well.
1142  //
1143  // XXX - Can we get away without running DeadMachineInstructionElim again?
1144  addPass(&SIFoldOperandsID);
1145  if (EnableDPPCombine)
1146  addPass(&GCNDPPCombineID);
1147  addPass(&SILoadStoreOptimizerID);
1148  if (isPassEnabled(EnableSDWAPeephole)) {
1149  addPass(&SIPeepholeSDWAID);
1150  addPass(&EarlyMachineLICMID);
1151  addPass(&MachineCSEID);
1152  addPass(&SIFoldOperandsID);
1153  }
1154  addPass(&DeadMachineInstructionElimID);
1155  addPass(createSIShrinkInstructionsPass());
1156 }
1157 
1158 bool GCNPassConfig::addILPOpts() {
1160  addPass(&EarlyIfConverterID);
1161 
1163  return false;
1164 }
1165 
1166 bool GCNPassConfig::addInstSelector() {
1168  addPass(&SIFixSGPRCopiesID);
1169  addPass(createSILowerI1CopiesPass());
1170  return false;
1171 }
1172 
1173 bool GCNPassConfig::addIRTranslator() {
1174  addPass(new IRTranslator(getOptLevel()));
1175  return false;
1176 }
1177 
1178 void GCNPassConfig::addPreLegalizeMachineIR() {
1179  bool IsOptNone = getOptLevel() == CodeGenOpt::None;
1180  addPass(createAMDGPUPreLegalizeCombiner(IsOptNone));
1181  addPass(new Localizer());
1182 }
1183 
1184 bool GCNPassConfig::addLegalizeMachineIR() {
1185  addPass(new Legalizer());
1186  return false;
1187 }
1188 
1189 void GCNPassConfig::addPreRegBankSelect() {
1190  bool IsOptNone = getOptLevel() == CodeGenOpt::None;
1191  addPass(createAMDGPUPostLegalizeCombiner(IsOptNone));
1192 }
1193 
1194 bool GCNPassConfig::addRegBankSelect() {
1195  addPass(new RegBankSelect());
1196  return false;
1197 }
1198 
1199 void GCNPassConfig::addPreGlobalInstructionSelect() {
1200  bool IsOptNone = getOptLevel() == CodeGenOpt::None;
1201  addPass(createAMDGPURegBankCombiner(IsOptNone));
1202 }
1203 
1204 bool GCNPassConfig::addGlobalInstructionSelect() {
1205  addPass(new InstructionSelect(getOptLevel()));
1206  return false;
1207 }
1208 
1209 void GCNPassConfig::addPreRegAlloc() {
1210  if (LateCFGStructurize) {
1212  }
1213 }
1214 
1215 void GCNPassConfig::addFastRegAlloc() {
1216  // FIXME: We have to disable the verifier here because of PHIElimination +
1217  // TwoAddressInstructions disabling it.
1218 
1219  // This must be run immediately after phi elimination and before
1220  // TwoAddressInstructions, otherwise the processing of the tied operand of
1221  // SI_ELSE will introduce a copy of the tied operand source after the else.
1222  insertPass(&PHIEliminationID, &SILowerControlFlowID);
1223 
1226 
1228 }
1229 
1230 void GCNPassConfig::addOptimizedRegAlloc() {
1231  // Allow the scheduler to run before SIWholeQuadMode inserts exec manipulation
1232  // instructions that cause scheduling barriers.
1233  insertPass(&MachineSchedulerID, &SIWholeQuadModeID);
1235 
1236  if (OptExecMaskPreRA)
1238 
1239  if (isPassEnabled(EnablePreRAOptimizations))
1241 
1242  // This is not an essential optimization and it has a noticeable impact on
1243  // compilation time, so we only enable it from O2.
1244  if (TM->getOptLevel() > CodeGenOpt::Less)
1246 
1247  // FIXME: when an instruction has a Killed operand, and the instruction is
1248  // inside a bundle, seems only the BUNDLE instruction appears as the Kills of
1249  // the register in LiveVariables, this would trigger a failure in verifier,
1250  // we should fix it and enable the verifier.
1251  if (OptVGPRLiveRange)
1253  // This must be run immediately after phi elimination and before
1254  // TwoAddressInstructions, otherwise the processing of the tied operand of
1255  // SI_ELSE will introduce a copy of the tied operand source after the else.
1256  insertPass(&PHIEliminationID, &SILowerControlFlowID);
1257 
1258  if (EnableDCEInRA)
1260 
1262 }
1263 
1264 bool GCNPassConfig::addPreRewrite() {
1265  if (EnableRegReassign)
1266  addPass(&GCNNSAReassignID);
1267  return true;
1268 }
1269 
1270 FunctionPass *GCNPassConfig::createSGPRAllocPass(bool Optimized) {
1271  // Initialize the global default.
1272  llvm::call_once(InitializeDefaultSGPRRegisterAllocatorFlag,
1273  initializeDefaultSGPRRegisterAllocatorOnce);
1274 
1275  RegisterRegAlloc::FunctionPassCtor Ctor = SGPRRegisterRegAlloc::getDefault();
1276  if (Ctor != useDefaultRegisterAllocator)
1277  return Ctor();
1278 
1279  if (Optimized)
1280  return createGreedyRegisterAllocator(onlyAllocateSGPRs);
1281 
1282  return createFastRegisterAllocator(onlyAllocateSGPRs, false);
1283 }
1284 
1285 FunctionPass *GCNPassConfig::createVGPRAllocPass(bool Optimized) {
1286  // Initialize the global default.
1287  llvm::call_once(InitializeDefaultVGPRRegisterAllocatorFlag,
1288  initializeDefaultVGPRRegisterAllocatorOnce);
1289 
1290  RegisterRegAlloc::FunctionPassCtor Ctor = VGPRRegisterRegAlloc::getDefault();
1291  if (Ctor != useDefaultRegisterAllocator)
1292  return Ctor();
1293 
1294  if (Optimized)
1295  return createGreedyVGPRRegisterAllocator();
1296 
1297  return createFastVGPRRegisterAllocator();
1298 }
1299 
1300 FunctionPass *GCNPassConfig::createRegAllocPass(bool Optimized) {
1301  llvm_unreachable("should not be used");
1302 }
1303 
1304 static const char RegAllocOptNotSupportedMessage[] =
1305  "-regalloc not supported with amdgcn. Use -sgpr-regalloc and -vgpr-regalloc";
1306 
1307 bool GCNPassConfig::addRegAssignAndRewriteFast() {
1308  if (!usingDefaultRegAlloc())
1310 
1311  addPass(createSGPRAllocPass(false));
1312 
1313  // Equivalent of PEI for SGPRs.
1314  addPass(&SILowerSGPRSpillsID);
1315 
1316  addPass(createVGPRAllocPass(false));
1317  return true;
1318 }
1319 
1320 bool GCNPassConfig::addRegAssignAndRewriteOptimized() {
1321  if (!usingDefaultRegAlloc())
1323 
1324  addPass(createSGPRAllocPass(true));
1325 
1326  // Commit allocated register changes. This is mostly necessary because too
1327  // many things rely on the use lists of the physical registers, such as the
1328  // verifier. This is only necessary with allocators which use LiveIntervals,
1329  // since FastRegAlloc does the replacements itself.
1330  addPass(createVirtRegRewriter(false));
1331 
1332  // Equivalent of PEI for SGPRs.
1333  addPass(&SILowerSGPRSpillsID);
1334 
1335  addPass(createVGPRAllocPass(true));
1336 
1337  addPreRewrite();
1338  addPass(&VirtRegRewriterID);
1339 
1340  return true;
1341 }
1342 
1343 void GCNPassConfig::addPostRegAlloc() {
1344  addPass(&SIFixVGPRCopiesID);
1345  if (getOptLevel() > CodeGenOpt::None)
1346  addPass(&SIOptimizeExecMaskingID);
1348 }
1349 
1350 void GCNPassConfig::addPreSched2() {
1351  if (TM->getOptLevel() > CodeGenOpt::None)
1352  addPass(createSIShrinkInstructionsPass());
1353  addPass(&SIPostRABundlerID);
1354 }
1355 
1356 void GCNPassConfig::addPreEmitPass() {
1357  addPass(createSIMemoryLegalizerPass());
1358  addPass(createSIInsertWaitcntsPass());
1359 
1360  addPass(createSIModeRegisterPass());
1361 
1362  if (getOptLevel() > CodeGenOpt::None)
1363  addPass(&SIInsertHardClausesID);
1364 
1365  addPass(&SILateBranchLoweringPassID);
1366  if (getOptLevel() > CodeGenOpt::None)
1367  addPass(&SIPreEmitPeepholeID);
1368  // The hazard recognizer that runs as part of the post-ra scheduler does not
1369  // guarantee to be able handle all hazards correctly. This is because if there
1370  // are multiple scheduling regions in a basic block, the regions are scheduled
1371  // bottom up, so when we begin to schedule a region we don't know what
1372  // instructions were emitted directly before it.
1373  //
1374  // Here we add a stand-alone hazard recognizer pass which can handle all
1375  // cases.
1376  addPass(&PostRAHazardRecognizerID);
1377  addPass(&BranchRelaxationPassID);
1378 }
1379 
1381  return new GCNPassConfig(*this, PM);
1382 }
1383 
1385  return new yaml::SIMachineFunctionInfo();
1386 }
1387 
1391  return new yaml::SIMachineFunctionInfo(
1392  *MFI, *MF.getSubtarget().getRegisterInfo(), MF);
1393 }
1394 
1397  SMDiagnostic &Error, SMRange &SourceRange) const {
1398  const yaml::SIMachineFunctionInfo &YamlMFI =
1399  reinterpret_cast<const yaml::SIMachineFunctionInfo &>(MFI_);
1400  MachineFunction &MF = PFS.MF;
1402 
1403  if (MFI->initializeBaseYamlFields(YamlMFI, MF, PFS, Error, SourceRange))
1404  return true;
1405 
1406  if (MFI->Occupancy == 0) {
1407  // Fixup the subtarget dependent default value.
1408  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
1409  MFI->Occupancy = ST.computeOccupancy(MF.getFunction(), MFI->getLDSSize());
1410  }
1411 
1412  auto parseRegister = [&](const yaml::StringValue &RegName, Register &RegVal) {
1413  Register TempReg;
1414  if (parseNamedRegisterReference(PFS, TempReg, RegName.Value, Error)) {
1415  SourceRange = RegName.SourceRange;
1416  return true;
1417  }
1418  RegVal = TempReg;
1419 
1420  return false;
1421  };
1422 
1423  auto diagnoseRegisterClass = [&](const yaml::StringValue &RegName) {
1424  // Create a diagnostic for a the register string literal.
1425  const MemoryBuffer &Buffer =
1426  *PFS.SM->getMemoryBuffer(PFS.SM->getMainFileID());
1427  Error = SMDiagnostic(*PFS.SM, SMLoc(), Buffer.getBufferIdentifier(), 1,
1428  RegName.Value.size(), SourceMgr::DK_Error,
1429  "incorrect register class for field", RegName.Value,
1430  None, None);
1431  SourceRange = RegName.SourceRange;
1432  return true;
1433  };
1434 
1435  if (parseRegister(YamlMFI.ScratchRSrcReg, MFI->ScratchRSrcReg) ||
1436  parseRegister(YamlMFI.FrameOffsetReg, MFI->FrameOffsetReg) ||
1437  parseRegister(YamlMFI.StackPtrOffsetReg, MFI->StackPtrOffsetReg))
1438  return true;
1439 
1440  if (MFI->ScratchRSrcReg != AMDGPU::PRIVATE_RSRC_REG &&
1441  !AMDGPU::SGPR_128RegClass.contains(MFI->ScratchRSrcReg)) {
1442  return diagnoseRegisterClass(YamlMFI.ScratchRSrcReg);
1443  }
1444 
1445  if (MFI->FrameOffsetReg != AMDGPU::FP_REG &&
1446  !AMDGPU::SGPR_32RegClass.contains(MFI->FrameOffsetReg)) {
1447  return diagnoseRegisterClass(YamlMFI.FrameOffsetReg);
1448  }
1449 
1450  if (MFI->StackPtrOffsetReg != AMDGPU::SP_REG &&
1451  !AMDGPU::SGPR_32RegClass.contains(MFI->StackPtrOffsetReg)) {
1452  return diagnoseRegisterClass(YamlMFI.StackPtrOffsetReg);
1453  }
1454 
1455  auto parseAndCheckArgument = [&](const Optional<yaml::SIArgument> &A,
1456  const TargetRegisterClass &RC,
1457  ArgDescriptor &Arg, unsigned UserSGPRs,
1458  unsigned SystemSGPRs) {
1459  // Skip parsing if it's not present.
1460  if (!A)
1461  return false;
1462 
1463  if (A->IsRegister) {
1464  Register Reg;
1465  if (parseNamedRegisterReference(PFS, Reg, A->RegisterName.Value, Error)) {
1466  SourceRange = A->RegisterName.SourceRange;
1467  return true;
1468  }
1469  if (!RC.contains(Reg))
1470  return diagnoseRegisterClass(A->RegisterName);
1472  } else
1473  Arg = ArgDescriptor::createStack(A->StackOffset);
1474  // Check and apply the optional mask.
1475  if (A->Mask)
1476  Arg = ArgDescriptor::createArg(Arg, A->Mask.getValue());
1477 
1478  MFI->NumUserSGPRs += UserSGPRs;
1479  MFI->NumSystemSGPRs += SystemSGPRs;
1480  return false;
1481  };
1482 
1483  if (YamlMFI.ArgInfo &&
1484  (parseAndCheckArgument(YamlMFI.ArgInfo->PrivateSegmentBuffer,
1485  AMDGPU::SGPR_128RegClass,
1486  MFI->ArgInfo.PrivateSegmentBuffer, 4, 0) ||
1487  parseAndCheckArgument(YamlMFI.ArgInfo->DispatchPtr,
1488  AMDGPU::SReg_64RegClass, MFI->ArgInfo.DispatchPtr,
1489  2, 0) ||
1490  parseAndCheckArgument(YamlMFI.ArgInfo->QueuePtr, AMDGPU::SReg_64RegClass,
1491  MFI->ArgInfo.QueuePtr, 2, 0) ||
1492  parseAndCheckArgument(YamlMFI.ArgInfo->KernargSegmentPtr,
1493  AMDGPU::SReg_64RegClass,
1494  MFI->ArgInfo.KernargSegmentPtr, 2, 0) ||
1495  parseAndCheckArgument(YamlMFI.ArgInfo->DispatchID,
1496  AMDGPU::SReg_64RegClass, MFI->ArgInfo.DispatchID,
1497  2, 0) ||
1498  parseAndCheckArgument(YamlMFI.ArgInfo->FlatScratchInit,
1499  AMDGPU::SReg_64RegClass,
1500  MFI->ArgInfo.FlatScratchInit, 2, 0) ||
1501  parseAndCheckArgument(YamlMFI.ArgInfo->PrivateSegmentSize,
1502  AMDGPU::SGPR_32RegClass,
1503  MFI->ArgInfo.PrivateSegmentSize, 0, 0) ||
1504  parseAndCheckArgument(YamlMFI.ArgInfo->WorkGroupIDX,
1505  AMDGPU::SGPR_32RegClass, MFI->ArgInfo.WorkGroupIDX,
1506  0, 1) ||
1507  parseAndCheckArgument(YamlMFI.ArgInfo->WorkGroupIDY,
1508  AMDGPU::SGPR_32RegClass, MFI->ArgInfo.WorkGroupIDY,
1509  0, 1) ||
1510  parseAndCheckArgument(YamlMFI.ArgInfo->WorkGroupIDZ,
1511  AMDGPU::SGPR_32RegClass, MFI->ArgInfo.WorkGroupIDZ,
1512  0, 1) ||
1513  parseAndCheckArgument(YamlMFI.ArgInfo->WorkGroupInfo,
1514  AMDGPU::SGPR_32RegClass,
1515  MFI->ArgInfo.WorkGroupInfo, 0, 1) ||
1516  parseAndCheckArgument(YamlMFI.ArgInfo->PrivateSegmentWaveByteOffset,
1517  AMDGPU::SGPR_32RegClass,
1518  MFI->ArgInfo.PrivateSegmentWaveByteOffset, 0, 1) ||
1519  parseAndCheckArgument(YamlMFI.ArgInfo->ImplicitArgPtr,
1520  AMDGPU::SReg_64RegClass,
1521  MFI->ArgInfo.ImplicitArgPtr, 0, 0) ||
1522  parseAndCheckArgument(YamlMFI.ArgInfo->ImplicitBufferPtr,
1523  AMDGPU::SReg_64RegClass,
1524  MFI->ArgInfo.ImplicitBufferPtr, 2, 0) ||
1525  parseAndCheckArgument(YamlMFI.ArgInfo->WorkItemIDX,
1526  AMDGPU::VGPR_32RegClass,
1527  MFI->ArgInfo.WorkItemIDX, 0, 0) ||
1528  parseAndCheckArgument(YamlMFI.ArgInfo->WorkItemIDY,
1529  AMDGPU::VGPR_32RegClass,
1530  MFI->ArgInfo.WorkItemIDY, 0, 0) ||
1531  parseAndCheckArgument(YamlMFI.ArgInfo->WorkItemIDZ,
1532  AMDGPU::VGPR_32RegClass,
1533  MFI->ArgInfo.WorkItemIDZ, 0, 0)))
1534  return true;
1535 
1536  MFI->Mode.IEEE = YamlMFI.Mode.IEEE;
1537  MFI->Mode.DX10Clamp = YamlMFI.Mode.DX10Clamp;
1542 
1543  return false;
1544 }
llvm::AAResults::addAAResult
void addAAResult(AAResultT &AAResult)
Register a specific AA result.
Definition: AliasAnalysis.h:516
llvm::initializeR600ControlFlowFinalizerPass
void initializeR600ControlFlowFinalizerPass(PassRegistry &)
llvm::TargetPassConfig::addPostRegAlloc
virtual void addPostRegAlloc()
This method may be implemented by targets that want to run passes after register allocation pass pipe...
Definition: TargetPassConfig.h:417
EnableDCEInRA
static cl::opt< bool > EnableDCEInRA("amdgpu-dce-in-ra", cl::init(true), cl::Hidden, cl::desc("Enable machine DCE inside regalloc"))
llvm::TargetMachine::getOptLevel
CodeGenOpt::Level getOptLevel() const
Returns the optimization level: None, Less, Default, or Aggressive.
Definition: TargetMachine.cpp:188
llvm::createFastRegisterAllocator
FunctionPass * createFastRegisterAllocator()
FastRegisterAllocation Pass - This pass register allocates as fast as possible.
Definition: RegAllocFast.cpp:1565
llvm::AAManager
A manager for alias analyses.
Definition: AliasAnalysis.h:1295
llvm::AMDGPUAA
Analysis pass providing a never-invalidated alias analysis result.
Definition: AMDGPUAliasAnalysis.h:48
llvm::ArgDescriptor::createStack
static constexpr ArgDescriptor createStack(unsigned Offset, unsigned Mask=~0u)
Definition: AMDGPUArgumentUsageInfo.h:49
llvm::AMDGPUFunctionArgInfo::QueuePtr
ArgDescriptor QueuePtr
Definition: AMDGPUArgumentUsageInfo.h:126
EnableLowerModuleLDS
static cl::opt< bool, true > EnableLowerModuleLDS("amdgpu-enable-lower-module-lds", cl::desc("Enable lower module lds pass"), cl::location(AMDGPUTargetMachine::EnableLowerModuleLDS), cl::init(true), cl::Hidden)
llvm::initializeR600PacketizerPass
void initializeR600PacketizerPass(PassRegistry &)
LLVMInitializeAMDGPUTarget
LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget()
Definition: AMDGPUTargetMachine.cpp:309
llvm::createAMDGPUCtorDtorLoweringPass
ModulePass * createAMDGPUCtorDtorLoweringPass()
RegAllocOptNotSupportedMessage
static const char RegAllocOptNotSupportedMessage[]
Definition: AMDGPUTargetMachine.cpp:1304
llvm::InferAddressSpacesPass
Definition: InferAddressSpaces.h:16
EnableSIModeRegisterPass
static cl::opt< bool > EnableSIModeRegisterPass("amdgpu-mode-register", cl::desc("Enable mode register pass"), cl::init(true), cl::Hidden)
llvm::PerFunctionMIParsingState::SM
SourceMgr * SM
Definition: MIParser.h:165
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AllocatorList.h:22
PassBuilder.h
llvm::createGreedyRegisterAllocator
FunctionPass * createGreedyRegisterAllocator()
Greedy register allocation pass - This pass implements a global register allocator for optimized buil...
Definition: RegAllocGreedy.cpp:177
llvm::Attribute::isValid
bool isValid() const
Return true if the attribute is any kind of attribute.
Definition: Attributes.h:168
llvm::createAMDGPUAttributorPass
Pass * createAMDGPUAttributorPass()
Definition: AMDGPUAttributor.cpp:675
llvm::AMDGPUTargetMachine::registerDefaultAliasAnalyses
void registerDefaultAliasAnalyses(AAManager &) override
Allow the target to register alias analyses with the AAManager for use with the new pass manager.
Definition: AMDGPUTargetMachine.cpp:602
mustPreserveGV
static bool mustPreserveGV(const GlobalValue &GV)
Predicate for Internalize pass.
Definition: AMDGPUTargetMachine.cpp:518
llvm::createSeparateConstOffsetFromGEPPass
FunctionPass * createSeparateConstOffsetFromGEPPass(bool LowerGEP=false)
Definition: SeparateConstOffsetFromGEP.cpp:499
llvm::OptimizationLevel::O1
static const OptimizationLevel O1
Optimize quickly without destroying debuggability.
Definition: OptimizationLevel.h:57
llvm::GCNTargetMachine::convertFuncInfoToYAML
yaml::MachineFunctionInfo * convertFuncInfoToYAML(const MachineFunction &MF) const override
Allocate and initialize an instance of the YAML representation of the MachineFunctionInfo.
Definition: AMDGPUTargetMachine.cpp:1389
llvm::AMDGPULowerModuleLDSPass
Definition: AMDGPU.h:158
llvm::initializeR600ExpandSpecialInstrsPassPass
void initializeR600ExpandSpecialInstrsPassPass(PassRegistry &)
llvm::initializeAMDGPUPostLegalizerCombinerPass
void initializeAMDGPUPostLegalizerCombinerPass(PassRegistry &)
llvm::initializeAMDGPUPromoteAllocaPass
void initializeAMDGPUPromoteAllocaPass(PassRegistry &)
llvm::createSIMemoryLegalizerPass
FunctionPass * createSIMemoryLegalizerPass()
Definition: SIMemoryLegalizer.cpp:1883
llvm::X86AS::FS
@ FS
Definition: X86.h:188
llvm::SILowerSGPRSpillsID
char & SILowerSGPRSpillsID
Definition: SILowerSGPRSpills.cpp:72
llvm::Wave32
@ Wave32
Definition: AMDGPUMCTargetDesc.h:31
llvm::PassBuilder::registerPipelineStartEPCallback
void registerPipelineStartEPCallback(const std::function< void(ModulePassManager &, OptimizationLevel)> &C)
Register a callback for a default optimizer pipeline extension point.
Definition: PassBuilder.h:457
llvm::Type::isPointerTy
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:217
llvm::TargetOptions
Definition: TargetOptions.h:124
llvm::AMDGPUAlwaysInlinePass
Definition: AMDGPU.h:251
llvm::yaml::SIMachineFunctionInfo::ArgInfo
Optional< SIArgumentInfo > ArgInfo
Definition: SIMachineFunctionInfo.h:290
SIMachineFunctionInfo.h
Scalar.h
llvm::ArgDescriptor::createArg
static constexpr ArgDescriptor createArg(const ArgDescriptor &Arg, unsigned Mask)
Definition: AMDGPUArgumentUsageInfo.h:54
createMinRegScheduler
static ScheduleDAGInstrs * createMinRegScheduler(MachineSchedContext *C)
Definition: AMDGPUTargetMachine.cpp:409
llvm::initializeGCNPreRAOptimizationsPass
void initializeGCNPreRAOptimizationsPass(PassRegistry &)
T
llvm::ArgDescriptor
Definition: AMDGPUArgumentUsageInfo.h:23
llvm::Function
Definition: Function.h:62
llvm::cl::location
LocationClass< Ty > location(Ty &L)
Definition: CommandLine.h:457
llvm::Attribute
Definition: Attributes.h:52
llvm::AMDGPU::SIModeRegisterDefaults::FP32OutputDenormals
bool FP32OutputDenormals
Definition: AMDGPUBaseInfo.h:946
llvm::PassManager::addPass
LLVM_ATTRIBUTE_MINSIZE std::enable_if_t<!std::is_same< PassT, PassManager >::value > addPass(PassT &&Pass)
Definition: PassManager.h:553
P
This currently compiles esp xmm0 movsd esp eax eax esp ret We should use not the dag combiner This is because dagcombine2 needs to be able to see through the X86ISD::Wrapper which DAGCombine can t really do The code for turning x load into a single vector load is target independent and should be moved to the dag combiner The code for turning x load into a vector load can only handle a direct load from a global or a direct load from the stack It should be generalized to handle any load from P
Definition: README-SSE.txt:411
llvm::initializeAMDGPUAlwaysInlinePass
void initializeAMDGPUAlwaysInlinePass(PassRegistry &)
llvm::yaml::MachineFunctionInfo
Targets should override this in a way that mirrors the implementation of llvm::MachineFunctionInfo.
Definition: MIRYamlMapping.h:673
llvm::PHIEliminationID
char & PHIEliminationID
PHIElimination - This pass eliminates machine instruction PHI nodes by inserting copy instructions.
Definition: PHIElimination.cpp:130
llvm::initializeSIInsertHardClausesPass
void initializeSIInsertHardClausesPass(PassRegistry &)
llvm::initializeAMDGPUOpenCLEnqueuedBlockLoweringPass
void initializeAMDGPUOpenCLEnqueuedBlockLoweringPass(PassRegistry &)
llvm::initializeSIPreAllocateWWMRegsPass
void initializeSIPreAllocateWWMRegsPass(PassRegistry &)
llvm::ARM_MB::LD
@ LD
Definition: ARMBaseInfo.h:72
llvm::initializeAMDGPUPropagateAttributesLatePass
void initializeAMDGPUPropagateAttributesLatePass(PassRegistry &)
InferAddressSpaces.h
llvm::AMDGPU::SIModeRegisterDefaults::IEEE
bool IEEE
Floating point opcodes that support exception flag gathering quiet and propagate signaling NaN inputs...
Definition: AMDGPUBaseInfo.h:937
llvm::createAlwaysInlinerLegacyPass
Pass * createAlwaysInlinerLegacyPass(bool InsertLifetime=true)
Create a legacy pass manager instance of a pass to inline and remove functions marked as "always_inli...
Definition: AlwaysInliner.cpp:175
getGPUOrDefault
static LLVM_READNONE StringRef getGPUOrDefault(const Triple &TT, StringRef GPU)
Definition: AMDGPUTargetMachine.cpp:463
llvm::Target
Target - Wrapper for Target specific information.
Definition: TargetRegistry.h:137
llvm::AMDGPUPromoteAllocaToVectorPass
Definition: AMDGPU.h:236
llvm::initializeAMDGPULateCodeGenPreparePass
void initializeAMDGPULateCodeGenPreparePass(PassRegistry &)
llvm::createFixIrreduciblePass
FunctionPass * createFixIrreduciblePass()
Definition: FixIrreducible.cpp:103
llvm::MachineSchedRegistry
MachineSchedRegistry provides a selection of available machine instruction schedulers.
Definition: MachineScheduler.h:141
llvm::createVirtRegRewriter
FunctionPass * createVirtRegRewriter(bool ClearVirtRegs=true)
Definition: VirtRegMap.cpp:646
llvm::Triple::amdgcn
@ amdgcn
Definition: Triple.h:71
GCNSchedStrategy.h
llvm::GCNIterativeScheduler::SCHEDULE_ILP
@ SCHEDULE_ILP
Definition: GCNIterativeScheduler.h:37
llvm::TargetTransformInfo
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
Definition: TargetTransformInfo.h:168
llvm::Type::getPointerAddressSpace
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
Definition: DerivedTypes.h:736
llvm::createAMDGPULateCodeGenPreparePass
FunctionPass * createAMDGPULateCodeGenPreparePass()
Definition: AMDGPULateCodeGenPrepare.cpp:193
llvm::X86Disassembler::Reg
Reg
All possible values of the reg field in the ModR/M byte.
Definition: X86DisassemblerDecoder.h:462
llvm::createSILowerI1CopiesPass
FunctionPass * createSILowerI1CopiesPass()
Definition: SILowerI1Copies.cpp:413
llvm::initializeR600ClauseMergePassPass
void initializeR600ClauseMergePassPass(PassRegistry &)
llvm::GCNIterativeScheduler::SCHEDULE_LEGACYMAXOCCUPANCY
@ SCHEDULE_LEGACYMAXOCCUPANCY
Definition: GCNIterativeScheduler.h:36
llvm::createFlattenCFGPass
FunctionPass * createFlattenCFGPass()
Definition: FlattenCFGPass.cpp:83
llvm::InternalizePass
A pass that internalizes all functions and variables other than those that must be preserved accordin...
Definition: Internalize.h:36
llvm::initializeSIOptimizeExecMaskingPreRAPass
void initializeSIOptimizeExecMaskingPreRAPass(PassRegistry &)
llvm::AMDGPUFunctionArgInfo::FlatScratchInit
ArgDescriptor FlatScratchInit
Definition: AMDGPUArgumentUsageInfo.h:129
llvm::Triple
Triple - Helper class for working with autoconf configuration names.
Definition: Triple.h:44
FAM
FunctionAnalysisManager FAM
Definition: PassBuilderBindings.cpp:59
llvm::createEarlyCSEPass
FunctionPass * createEarlyCSEPass(bool UseMemorySSA=false)
Definition: EarlyCSE.cpp:1746
llvm::Wave64
@ Wave64
Definition: AMDGPUMCTargetDesc.h:31
llvm::TargetSubtargetInfo::getRegisterInfo
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
Definition: TargetSubtargetInfo.h:124
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:143
llvm::TargetRegisterInfo
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
Definition: TargetRegisterInfo.h:233
llvm::initializeSILowerI1CopiesPass
void initializeSILowerI1CopiesPass(PassRegistry &)
llvm::SIPreEmitPeepholeID
char & SIPreEmitPeepholeID
llvm::createAMDGPUPostLegalizeCombiner
FunctionPass * createAMDGPUPostLegalizeCombiner(bool IsOptNone)
Definition: AMDGPUPostLegalizerCombiner.cpp:448
llvm::initializeAMDGPUDAGToDAGISelPass
void initializeAMDGPUDAGToDAGISelPass(PassRegistry &)
llvm::initializeSIPeepholeSDWAPass
void initializeSIPeepholeSDWAPass(PassRegistry &)
llvm::ShadowStackGCLoweringID
char & ShadowStackGCLoweringID
ShadowStackGCLowering - Implements the custom lowering mechanism used by the shadow stack GC.
Definition: ShadowStackGCLowering.cpp:92
llvm::SILowerControlFlowID
char & SILowerControlFlowID
Definition: SILowerControlFlow.cpp:174
llvm::yaml::SIMachineFunctionInfo
Definition: SIMachineFunctionInfo.h:270
llvm::AMDGPUMachineFunction::getLDSSize
unsigned getLDSSize() const
Definition: AMDGPUMachineFunction.h:68
llvm::SIOptimizeVGPRLiveRangeID
char & SIOptimizeVGPRLiveRangeID
Definition: SIOptimizeVGPRLiveRange.cpp:572
llvm::createAMDGPUUnifyMetadataPass
ModulePass * createAMDGPUUnifyMetadataPass()
InstructionSelect.h
EnableStructurizerWorkarounds
static cl::opt< bool > EnableStructurizerWorkarounds("amdgpu-enable-structurizer-workarounds", cl::desc("Enable workarounds for the StructurizeCFG pass"), cl::init(true), cl::Hidden)
llvm::AMDGPUPassConfig
Definition: AMDGPUTargetMachine.h:106
llvm::AMDGPUAAWrapperPass
Legacy wrapper pass to provide the AMDGPUAAResult object.
Definition: AMDGPUAliasAnalysis.h:62
EnableAtomicOptimizations
static cl::opt< bool > EnableAtomicOptimizations("amdgpu-atomic-optimizations", cl::desc("Enable atomic optimizations"), cl::init(false), cl::Hidden)
createGCNMaxOccupancyMachineScheduler
static ScheduleDAGInstrs * createGCNMaxOccupancyMachineScheduler(MachineSchedContext *C)
Definition: AMDGPUTargetMachine.cpp:392
llvm::Optional< Reloc::Model >
llvm::GCNScheduleDAGMILive
Definition: GCNSchedStrategy.h:73
llvm::initializeSIFoldOperandsPass
void initializeSIFoldOperandsPass(PassRegistry &)
llvm::createBarrierNoopPass
ModulePass * createBarrierNoopPass()
createBarrierNoopPass - This pass is purely a module pass barrier in a pass manager.
Definition: BarrierNoopPass.cpp:43
llvm::createAMDGPUISelDag
FunctionPass * createAMDGPUISelDag(TargetMachine *TM=nullptr, CodeGenOpt::Level OptLevel=CodeGenOpt::Default)
This pass converts a legalized DAG into a AMDGPU-specific.
Definition: AMDGPUISelDAGToDAG.cpp:112
InternalizeSymbols
static cl::opt< bool > InternalizeSymbols("amdgpu-internalize-symbols", cl::desc("Enable elimination of non-kernel functions and unused globals"), cl::init(false), cl::Hidden)
llvm::initializeGlobalISel
void initializeGlobalISel(PassRegistry &)
Initialize all passes linked into the GlobalISel library.
Definition: GlobalISel.cpp:18
llvm::AMDGPU::SIModeRegisterDefaults::FP32InputDenormals
bool FP32InputDenormals
If this is set, neither input or output denormals are flushed for most f32 instructions.
Definition: AMDGPUBaseInfo.h:945
llvm::PassBuilder::registerAnalysisRegistrationCallback
void registerAnalysisRegistrationCallback(const std::function< void(CGSCCAnalysisManager &)> &C)
{{@ Register callbacks for analysis registration with this PassBuilder instance.
Definition: PassBuilder.h:490
llvm::GCNSubtarget
Definition: GCNSubtarget.h:31
SIMachineScheduler.h
llvm::yaml::SIMode::FP32OutputDenormals
bool FP32OutputDenormals
Definition: SIMachineFunctionInfo.h:234
llvm::createGVNPass
FunctionPass * createGVNPass(bool NoMemDepAnalysis=false)
Create a legacy GVN pass.
Definition: GVN.cpp:3118
llvm::createCGSCCToFunctionPassAdaptor
CGSCCToFunctionPassAdaptor createCGSCCToFunctionPassAdaptor(FunctionPassT &&Pass, bool EagerlyInvalidate=false, bool NoRerun=false)
A function to deduce a function pass type and wrap it in the templated adaptor.
Definition: CGSCCPassManager.h:525
llvm::AMDGPUFunctionArgInfo::PrivateSegmentSize
ArgDescriptor PrivateSegmentSize
Definition: AMDGPUArgumentUsageInfo.h:130
llvm::createR600OpenCLImageTypeLoweringPass
ModulePass * createR600OpenCLImageTypeLoweringPass()
Definition: R600OpenCLImageTypeLoweringPass.cpp:372
llvm::AMDGPUUseNativeCallsPass
Definition: AMDGPU.h:68
llvm::AMDGPUFunctionArgInfo::DispatchPtr
ArgDescriptor DispatchPtr
Definition: AMDGPUArgumentUsageInfo.h:125
llvm::PatternMatch::m_c_And
BinaryOp_match< LHS, RHS, Instruction::And, true > m_c_And(const LHS &L, const RHS &R)
Matches an And with LHS and RHS in either order.
Definition: PatternMatch.h:2249
llvm::initializeAMDGPUPropagateAttributesEarlyPass
void initializeAMDGPUPropagateAttributesEarlyPass(PassRegistry &)
llvm::SIPreAllocateWWMRegsID
char & SIPreAllocateWWMRegsID
Definition: SIPreAllocateWWMRegs.cpp:81
llvm::initializeAMDGPUPromoteKernelArgumentsPass
void initializeAMDGPUPromoteKernelArgumentsPass(PassRegistry &)
llvm::SIPostRABundlerID
char & SIPostRABundlerID
Definition: SIPostRABundler.cpp:69
llvm::OptimizationLevel::O0
static const OptimizationLevel O0
Disable as many optimizations as possible.
Definition: OptimizationLevel.h:41
llvm::initializeSIShrinkInstructionsPass
void initializeSIShrinkInstructionsPass(PassRegistry &)
LegacyPassManager.h
llvm::TwoAddressInstructionPassID
char & TwoAddressInstructionPassID
TwoAddressInstruction - This pass reduces two-address instructions to use two operands.
Definition: TwoAddressInstructionPass.cpp:194
PassManagerBuilder.h
TRI
unsigned const TargetRegisterInfo * TRI
Definition: MachineSink.cpp:1564
llvm::cl::ReallyHidden
@ ReallyHidden
Definition: CommandLine.h:144
llvm::GCNTargetMachine::parseMachineFunctionInfo
bool parseMachineFunctionInfo(const yaml::MachineFunctionInfo &, PerFunctionMIParsingState &PFS, SMDiagnostic &Error, SMRange &SourceRange) const override
Parse out the target's MachineFunctionInfo from the YAML reprsentation.
Definition: AMDGPUTargetMachine.cpp:1395
llvm::initializeAMDGPUSimplifyLibCallsPass
void initializeAMDGPUSimplifyLibCallsPass(PassRegistry &)
Internalize.h
createSIMachineScheduler
static ScheduleDAGInstrs * createSIMachineScheduler(MachineSchedContext *C)
Definition: AMDGPUTargetMachine.cpp:387
llvm::PatternMatch::m_Deferred
deferredval_ty< Value > m_Deferred(Value *const &V)
Like m_Specific(), but works if the specific value to match is determined as part of the same match()...
Definition: PatternMatch.h:798
llvm::MemoryBuffer
This interface provides simple read-only access to a block of memory, and provides simple methods for...
Definition: MemoryBuffer.h:50
llvm::AMDGPUMachineFunction::Mode
AMDGPU::SIModeRegisterDefaults Mode
Definition: AMDGPUMachineFunction.h:42
llvm::AMDGPUPassConfig::addGCPasses
bool addGCPasses() override
addGCPasses - Add late codegen passes that analyze code for garbage collection.
Definition: AMDGPUTargetMachine.cpp:1075
F
#define F(x, y, z)
Definition: MD5.cpp:55
llvm::createAMDGPUExternalAAWrapperPass
ImmutablePass * createAMDGPUExternalAAWrapperPass()
Definition: AMDGPUAliasAnalysis.cpp:37
llvm::AMDGPUFunctionArgInfo::DispatchID
ArgDescriptor DispatchID
Definition: AMDGPUArgumentUsageInfo.h:128
llvm::initializeAMDGPULowerIntrinsicsPass
void initializeAMDGPULowerIntrinsicsPass(PassRegistry &)
llvm::initializeGCNDPPCombinePass
void initializeGCNDPPCombinePass(PassRegistry &)
llvm::AMDGPUUnifyMetadataPass
Definition: AMDGPU.h:277
llvm::AMDGPUFunctionArgInfo::ImplicitArgPtr
ArgDescriptor ImplicitArgPtr
Definition: AMDGPUArgumentUsageInfo.h:141
EnableSDWAPeephole
static cl::opt< bool > EnableSDWAPeephole("amdgpu-sdwa-peephole", cl::desc("Enable SDWA peepholer"), cl::init(true))
llvm::Reloc::Model
Model
Definition: CodeGen.h:22
FunctionPassCtor
llvm::SIOptimizeExecMaskingID
char & SIOptimizeExecMaskingID
Definition: SIOptimizeExecMasking.cpp:52
Arg
amdgpu Simplify well known AMD library false FunctionCallee Value * Arg
Definition: AMDGPULibCalls.cpp:185
llvm::initializeAMDGPUUnifyMetadataPass
void initializeAMDGPUUnifyMetadataPass(PassRegistry &)
llvm::yaml::SIMachineFunctionInfo::FrameOffsetReg
StringValue FrameOffsetReg
Definition: SIMachineFunctionInfo.h:287
llvm::initializeAMDGPUArgumentUsageInfoPass
void initializeAMDGPUArgumentUsageInfoPass(PassRegistry &)
R600.h
llvm::AMDGPUPassConfig::addIRPasses
void addIRPasses() override
Add common target configurable passes that perform LLVM IR to IR transforms following machine indepen...
Definition: AMDGPUTargetMachine.cpp:945
SISchedRegistry
static MachineSchedRegistry SISchedRegistry("si", "Run SI's custom scheduler", createSIMachineScheduler)
GCNIterativeScheduler.h
llvm::AMDGPUFunctionArgInfo::WorkGroupIDX
ArgDescriptor WorkGroupIDX
Definition: AMDGPUArgumentUsageInfo.h:133
llvm::GCNTargetMachine::GCNTargetMachine
GCNTargetMachine(const Target &T, const Triple &TT, StringRef CPU, StringRef FS, TargetOptions Options, Optional< Reloc::Model > RM, Optional< CodeModel::Model > CM, CodeGenOpt::Level OL, bool JIT)
Definition: AMDGPUTargetMachine.cpp:808
llvm::createInferAddressSpacesPass
FunctionPass * createInferAddressSpacesPass(unsigned AddressSpace=~0u)
Definition: InferAddressSpaces.cpp:1293
llvm::initializeSILateBranchLoweringPass
void initializeSILateBranchLoweringPass(PassRegistry &)
llvm::TargetPassConfig::TM
LLVMTargetMachine * TM
Definition: TargetPassConfig.h:122
AMDGPUAliasAnalysis.h
llvm::AMDGPUTargetMachine
Definition: AMDGPUTargetMachine.h:28
llvm::MSP430Attrs::CodeModel
CodeModel
Definition: MSP430Attributes.h:37
llvm::createAMDGPUUseNativeCallsPass
FunctionPass * createAMDGPUUseNativeCallsPass()
Definition: AMDGPULibCalls.cpp:1661
llvm::PassRegistry::getPassRegistry
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Definition: PassRegistry.cpp:31
llvm::SMLoc
Represents a location in source code.
Definition: SMLoc.h:23
AlwaysInliner.h
llvm::PatternMatch::match
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
llvm::AAResults
Definition: AliasAnalysis.h:507
llvm::yaml::SIMode::FP32InputDenormals
bool FP32InputDenormals
Definition: SIMachineFunctionInfo.h:233
llvm::PassBuilder::registerParseAACallback
void registerParseAACallback(const std::function< bool(StringRef Name, AAManager &AA)> &C)
Register a callback for parsing an AliasAnalysis Name to populate the given AAManager AA.
Definition: PassBuilder.h:482
ScalarizeGlobal
static cl::opt< bool > ScalarizeGlobal("amdgpu-scalarize-global-loads", cl::desc("Enable global load scalarization"), cl::init(true), cl::Hidden)
llvm::createNaryReassociatePass
FunctionPass * createNaryReassociatePass()
Definition: NaryReassociate.cpp:165
llvm::PostRAHazardRecognizerID
char & PostRAHazardRecognizerID
PostRAHazardRecognizer - This pass runs the post-ra hazard recognizer.
Definition: PostRAHazardRecognizer.cpp:64
llvm::MachineFunction::getInfo
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Definition: MachineFunction.h:739
llvm::initializeAMDGPULowerKernelArgumentsPass
void initializeAMDGPULowerKernelArgumentsPass(PassRegistry &)
llvm::initializeSIWholeQuadModePass
void initializeSIWholeQuadModePass(PassRegistry &)
llvm::initializeAMDGPUAtomicOptimizerPass
void initializeAMDGPUAtomicOptimizerPass(PassRegistry &)
llvm::getTheAMDGPUTarget
Target & getTheAMDGPUTarget()
The target which supports all AMD GPUs.
Definition: AMDGPUTargetInfo.cpp:20
llvm::Legalizer
Definition: Legalizer.h:30
llvm::AMDGPUFunctionArgInfo::WorkItemIDX
ArgDescriptor WorkItemIDX
Definition: AMDGPUArgumentUsageInfo.h:148
C
(vector float) vec_cmpeq(*A, *B) C
Definition: README_ALTIVEC.txt:86
EnableAMDGPUAliasAnalysis
static cl::opt< bool > EnableAMDGPUAliasAnalysis("enable-amdgpu-aa", cl::Hidden, cl::desc("Enable AMDGPU Alias Analysis"), cl::init(true))
EnableLowerKernelArguments
static cl::opt< bool > EnableLowerKernelArguments("amdgpu-ir-lower-kernel-arguments", cl::desc("Lower kernel argument loads in IR pass"), cl::init(true), cl::Hidden)
EnableLoadStoreVectorizer
static cl::opt< bool > EnableLoadStoreVectorizer("amdgpu-load-store-vectorizer", cl::desc("Enable load store vectorizer"), cl::init(true), cl::Hidden)
AMDGPUTargetInfo.h
llvm::createAMDGPULowerModuleLDSPass
ModulePass * createAMDGPULowerModuleLDSPass()
R600TargetMachine.h
llvm::FuncletLayoutID
char & FuncletLayoutID
This pass lays out funclets contiguously.
Definition: FuncletLayout.cpp:39
AMDGPUMacroFusion.h
llvm::initializeAMDGPUUseNativeCallsPass
void initializeAMDGPUUseNativeCallsPass(PassRegistry &)
llvm::createSIInsertWaitcntsPass
FunctionPass * createSIInsertWaitcntsPass()
Definition: SIInsertWaitcnts.cpp:795
Y
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
EnableLDSReplaceWithPointer
static cl::opt< bool > EnableLDSReplaceWithPointer("amdgpu-enable-lds-replace-with-pointer", cl::desc("Enable LDS replace with pointer pass"), cl::init(false), cl::Hidden)
llvm::PassBuilder
This class provides access to building LLVM's passes.
Definition: PassBuilder.h:94
EnableRegReassign
static cl::opt< bool > EnableRegReassign("amdgpu-reassign-regs", cl::desc("Enable register reassign optimizations on gfx10+"), cl::init(true), cl::Hidden)
llvm::TargetRegisterClass
Definition: TargetRegisterInfo.h:46
llvm::SMDiagnostic
Instances of this class encapsulate one diagnostic report, allowing printing to a raw_ostream as a ca...
Definition: SourceMgr.h:251
llvm::yaml::SIMode::FP64FP16InputDenormals
bool FP64FP16InputDenormals
Definition: SIMachineFunctionInfo.h:235
llvm::createAMDGPUAnnotateUniformValues
FunctionPass * createAMDGPUAnnotateUniformValues()
Definition: AMDGPUAnnotateUniformValues.cpp:150
llvm::initializeAMDGPUUnifyDivergentExitNodesPass
void initializeAMDGPUUnifyDivergentExitNodesPass(PassRegistry &)
llvm::EarlyIfConverterID
char & EarlyIfConverterID
EarlyIfConverter - This pass performs if-conversion on SSA form by inserting cmov instructions.
Definition: EarlyIfConversion.cpp:784
useDefaultRegisterAllocator
static FunctionPass * useDefaultRegisterAllocator()
-regalloc=... command line option.
Definition: TargetPassConfig.cpp:1103
llvm::AMDGPUPromoteAllocaPass
Definition: AMDGPU.h:228
llvm::createGenericSchedPostRA
ScheduleDAGMI * createGenericSchedPostRA(MachineSchedContext *C)
Create a generic scheduler with no vreg liveness or DAG mutation passes.
Definition: MachineScheduler.cpp:3649
llvm::CodeModel::Small
@ Small
Definition: CodeGen.h:28
llvm::createAtomicExpandPass
FunctionPass * createAtomicExpandPass()
AtomicExpandPass - At IR level this pass replace atomic instructions with __atomic_* library calls,...
llvm::InstructionSelect
This pass is responsible for selecting generic machine instructions to target-specific instructions.
Definition: InstructionSelect.h:31
llvm::AMDGPUTargetMachine::getNullPointerValue
static int64_t getNullPointerValue(unsigned AddrSpace)
Get the integer value of a null pointer in the given address space.
Definition: AMDGPUTargetMachine.cpp:744
llvm::RegisterTargetMachine
RegisterTargetMachine - Helper template for registering a target machine implementation,...
Definition: TargetRegistry.h:1275
llvm::ScheduleDAGMI::addMutation
void addMutation(std::unique_ptr< ScheduleDAGMutation > Mutation)
Add a postprocessing step to the DAG builder.
Definition: MachineScheduler.h:323
llvm::PassRegistry
PassRegistry - This class manages the registration and intitialization of the pass subsystem as appli...
Definition: PassRegistry.h:38
llvm::Triple::r600
@ r600
Definition: Triple.h:70
llvm::report_fatal_error
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:143
Options
const char LLVMTargetMachineRef LLVMPassBuilderOptionsRef Options
Definition: PassBuilderBindings.cpp:48
llvm::createUnifyLoopExitsPass
FunctionPass * createUnifyLoopExitsPass()
Definition: UnifyLoopExits.cpp:53
llvm::GCNIterativeScheduler
Definition: GCNIterativeScheduler.h:29
createTLOF
static std::unique_ptr< TargetLoweringObjectFile > createTLOF(const Triple &TT)
Definition: AMDGPUTargetMachine.cpp:383
llvm::SourceMgr::getMainFileID
unsigned getMainFileID() const
Definition: SourceMgr.h:129
llvm::AMDGPUAS::REGION_ADDRESS
@ REGION_ADDRESS
Address space for region memory. (GDS)
Definition: AMDGPU.h:360
AMDGPUTargetObjectFile.h
llvm::AMDGPULowerKernelAttributesPass
Definition: AMDGPU.h:118
GVN.h
llvm::createAMDGPUPropagateAttributesLatePass
ModulePass * createAMDGPUPropagateAttributesLatePass(const TargetMachine *)
Definition: AMDGPUPropagateAttributes.cpp:406
llvm::initializeSIMemoryLegalizerPass
void initializeSIMemoryLegalizerPass(PassRegistry &)
llvm::createLoadStoreVectorizerPass
Pass * createLoadStoreVectorizerPass()
Create a legacy pass manager instance of the LoadStoreVectorizer pass.
llvm::initializeAMDGPUResourceUsageAnalysisPass
void initializeAMDGPUResourceUsageAnalysisPass(PassRegistry &)
EnableDPPCombine
static cl::opt< bool > EnableDPPCombine("amdgpu-dpp-combine", cl::desc("Enable DPP combiner"), cl::init(true))
llvm::createAMDGPULowerIntrinsicsPass
ModulePass * createAMDGPULowerIntrinsicsPass()
Definition: AMDGPULowerIntrinsics.cpp:179
llvm::AMDGPUPassConfig::addCodeGenPrepare
void addCodeGenPrepare() override
Add pass to prepare the LLVM IR for code generation.
Definition: AMDGPUTargetMachine.cpp:1039
llvm::SIRegisterInfo
Definition: SIRegisterInfo.h:30
llvm::StackMapLivenessID
char & StackMapLivenessID
StackMapLiveness - This pass analyses the register live-out set of stackmap/patchpoint intrinsics and...
Definition: StackMapLivenessAnalysis.cpp:86
llvm::createAMDGPUAnnotateKernelFeaturesPass
Pass * createAMDGPUAnnotateKernelFeaturesPass()
Definition: AMDGPUAnnotateKernelFeatures.cpp:137
llvm::initializeAMDGPUReplaceLDSUseWithPointerPass
void initializeAMDGPUReplaceLDSUseWithPointerPass(PassRegistry &)
PatternMatch.h
llvm::AMDGPUTargetMachine::~AMDGPUTargetMachine
~AMDGPUTargetMachine() override
llvm::AMDGPUTargetMachine::getSubtargetImpl
const TargetSubtargetInfo * getSubtargetImpl() const
llvm::createSinkingPass
FunctionPass * createSinkingPass()
Definition: Sink.cpp:284
llvm::Triple::getArch
ArchType getArch() const
Get the parsed architecture type of this triple.
Definition: Triple.h:311
llvm::createSpeculativeExecutionPass
FunctionPass * createSpeculativeExecutionPass()
Definition: SpeculativeExecution.cpp:325
Utils.h
llvm::SILoadStoreOptimizerID
char & SILoadStoreOptimizerID
Definition: SILoadStoreOptimizer.cpp:576
llvm::Attribute::getValueAsString
StringRef getValueAsString() const
Return the attribute's value as a string.
Definition: Attributes.cpp:304
llvm::RegisterPassParser
RegisterPassParser class - Handle the addition of new machine passes.
Definition: MachinePassRegistry.h:135
llvm::None
const NoneType None
Definition: None.h:23
llvm::Value::use_empty
bool use_empty() const
Definition: Value.h:344
llvm::createAMDGPUExportClusteringDAGMutation
std::unique_ptr< ScheduleDAGMutation > createAMDGPUExportClusteringDAGMutation()
Definition: AMDGPUExportClustering.cpp:144
llvm::initializeSIOptimizeVGPRLiveRangePass
void initializeSIOptimizeVGPRLiveRangePass(PassRegistry &)
X
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
llvm::TargetMachine::resetTargetOptions
void resetTargetOptions(const Function &F) const
Reset the target options based on the function's attributes.
Definition: TargetMachine.cpp:56
llvm::AMDGPU::isEntryFunctionCC
bool isEntryFunctionCC(CallingConv::ID CC)
Definition: AMDGPUBaseInfo.cpp:1394
llvm::SmallString< 128 >
llvm::SourceMgr::getMemoryBuffer
const MemoryBuffer * getMemoryBuffer(unsigned i) const
Definition: SourceMgr.h:122
llvm::createFunctionInliningPass
Pass * createFunctionInliningPass()
createFunctionInliningPass - Return a new pass object that uses a heuristic to inline direct function...
Definition: InlineSimple.cpp:97
llvm::legacy::PassManagerBase::add
virtual void add(Pass *P)=0
Add a pass to the queue of passes to run.
llvm::MemoryBuffer::getBufferIdentifier
virtual StringRef getBufferIdentifier() const
Return an identifier for this buffer, typically the filename it was read from.
Definition: MemoryBuffer.h:75
llvm::createAMDGPUAAWrapperPass
ImmutablePass * createAMDGPUAAWrapperPass()
Definition: AMDGPUAliasAnalysis.cpp:33
llvm::PassManagerBuilder
PassManagerBuilder - This class is used to set up a standard optimization sequence for languages like...
Definition: PassManagerBuilder.h:58
llvm::createLowerSwitchPass
FunctionPass * createLowerSwitchPass()
Definition: LowerSwitch.cpp:580
llvm::createAMDGPUPrintfRuntimeBinding
ModulePass * createAMDGPUPrintfRuntimeBinding()
Definition: AMDGPUPrintfRuntimeBinding.cpp:92
AMDGPUTargetTransformInfo.h
llvm::AMDGPUPassConfig::addInstSelector
bool addInstSelector() override
addInstSelector - This method should install an instruction selector pass, which converts from LLVM c...
Definition: AMDGPUTargetMachine.cpp:1070
PB
PassBuilder PB(Machine, PassOpts->PTO, None, &PIC)
Passes.h
llvm::Triple::AMDHSA
@ AMDHSA
Definition: Triple.h:196
llvm::VirtRegRewriterID
char & VirtRegRewriterID
VirtRegRewriter pass.
Definition: VirtRegMap.cpp:227
llvm::createAMDGPUAlwaysInlinePass
ModulePass * createAMDGPUAlwaysInlinePass(bool GlobalOpt=true)
Definition: AMDGPUAlwaysInlinePass.cpp:163
llvm::TargetPassConfig
Target-Independent Code Generator Pass Configuration Options.
Definition: TargetPassConfig.h:84
llvm::SmallString::append
void append(StringRef RHS)
Append from a StringRef.
Definition: SmallString.h:67
llvm::initializeSILowerSGPRSpillsPass
void initializeSILowerSGPRSpillsPass(PassRegistry &)
llvm::MachineFunction::getSubtarget
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Definition: MachineFunction.h:641
llvm::PassBuilder::registerPipelineEarlySimplificationEPCallback
void registerPipelineEarlySimplificationEPCallback(const std::function< void(ModulePassManager &, OptimizationLevel)> &C)
Register a callback for a default optimizer pipeline extension point.
Definition: PassBuilder.h:466
llvm::AMDGPUTargetMachine::getFeatureString
StringRef getFeatureString(const Function &F) const
Definition: AMDGPUTargetMachine.cpp:510
llvm::AMDGPUAS::PRIVATE_ADDRESS
@ PRIVATE_ADDRESS
Address space for private memory.
Definition: AMDGPU.h:364
OptVGPRLiveRange
static cl::opt< bool > OptVGPRLiveRange("amdgpu-opt-vgpr-liverange", cl::desc("Enable VGPR liverange optimizations for if-else structure"), cl::init(true), cl::Hidden)
llvm::cl::opt
Definition: CommandLine.h:1432
llvm::createLCSSAPass
Pass * createLCSSAPass()
Definition: LCSSA.cpp:486
llvm::createModuleToFunctionPassAdaptor
ModuleToFunctionPassAdaptor createModuleToFunctionPassAdaptor(FunctionPassT &&Pass, bool EagerlyInvalidate=false)
A function to deduce a function pass type and wrap it in the templated adaptor.
Definition: PassManager.h:1227
llvm::TargetMachine::TargetTriple
Triple TargetTriple
Triple string, CPU name, and target feature strings the TargetMachine instance is created with.
Definition: TargetMachine.h:99
OptExecMaskPreRA
static cl::opt< bool > OptExecMaskPreRA("amdgpu-opt-exec-mask-pre-ra", cl::Hidden, cl::desc("Run pre-RA exec mask optimizations"), cl::init(true))
llvm::GCLoweringID
char & GCLoweringID
GCLowering Pass - Used by gc.root to perform its default lowering operations.
Definition: GCRootLowering.cpp:88
llvm::yaml::SIMachineFunctionInfo::ScratchRSrcReg
StringValue ScratchRSrcReg
Definition: SIMachineFunctionInfo.h:286
llvm::GlobalValue
Definition: GlobalValue.h:44
llvm::AMDGPUUnifyDivergentExitNodesID
char & AMDGPUUnifyDivergentExitNodesID
Definition: AMDGPUUnifyDivergentExitNodes.cpp:79
llvm::StringRef::empty
constexpr LLVM_NODISCARD bool empty() const
empty - Check if the string is empty.
Definition: StringRef.h:153
llvm::initializeSIInsertWaitcntsPass
void initializeSIInsertWaitcntsPass(PassRegistry &)
D
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
llvm::initializeSIAnnotateControlFlowPass
void initializeSIAnnotateControlFlowPass(PassRegistry &)
llvm::createGenericSchedLive
ScheduleDAGMILive * createGenericSchedLive(MachineSchedContext *C)
Create the standard converging machine scheduler.
Definition: MachineScheduler.cpp:3492
llvm::AMDGPUFunctionArgInfo::WorkGroupIDZ
ArgDescriptor WorkGroupIDZ
Definition: AMDGPUArgumentUsageInfo.h:135
llvm::RegisterRegAllocBase< RegisterRegAlloc >::FunctionPassCtor
FunctionPass *(*)() FunctionPassCtor
Definition: RegAllocRegistry.h:32
llvm::EngineKind::JIT
@ JIT
Definition: ExecutionEngine.h:524
LLVM_EXTERNAL_VISIBILITY
#define LLVM_EXTERNAL_VISIBILITY
Definition: Compiler.h:133
llvm::DetectDeadLanesID
char & DetectDeadLanesID
This pass adds dead/undef flags after analyzing subregister lanes.
Definition: DetectDeadLanes.cpp:128
llvm::ARM_MB::ST
@ ST
Definition: ARMBaseInfo.h:73
llvm::TargetMachine::getMCSubtargetInfo
const MCSubtargetInfo * getMCSubtargetInfo() const
Definition: TargetMachine.h:212
llvm::AMDGPUFunctionArgInfo::PrivateSegmentBuffer
ArgDescriptor PrivateSegmentBuffer
Definition: AMDGPUArgumentUsageInfo.h:124
llvm::createAMDGPUAtomicOptimizerPass
FunctionPass * createAMDGPUAtomicOptimizerPass()
Definition: AMDGPUAtomicOptimizer.cpp:707
llvm::initializeR600VectorRegMergerPass
void initializeR600VectorRegMergerPass(PassRegistry &)
IPO.h
llvm::SIPeepholeSDWAID
char & SIPeepholeSDWAID
Definition: SIPeepholeSDWA.cpp:191
llvm::SIMachineFunctionInfo::initializeBaseYamlFields
bool initializeBaseYamlFields(const yaml::SIMachineFunctionInfo &YamlMFI, const MachineFunction &MF, PerFunctionMIParsingState &PFS, SMDiagnostic &Error, SMRange &SourceRange)
Definition: SIMachineFunctionInfo.cpp:562
llvm::createGlobalDCEPass
ModulePass * createGlobalDCEPass()
createGlobalDCEPass - This transform is designed to eliminate unreachable internal globals (functions...
move
compiles ldr LCPI1_0 ldr ldr mov lsr tst moveq r1 ldr LCPI1_1 and r0 bx lr It would be better to do something like to fold the shift into the conditional move
Definition: README.txt:546
llvm::GCNTTIImpl
Definition: AMDGPUTargetTransformInfo.h:59
llvm::SIFixVGPRCopiesID
char & SIFixVGPRCopiesID
Definition: SIFixVGPRCopies.cpp:45
llvm::initializeAMDGPURewriteOutArgumentsPass
void initializeAMDGPURewriteOutArgumentsPass(PassRegistry &)
CGSCCPassManager.h
llvm::MachineSchedContext
MachineSchedContext provides enough context from the MachineScheduler pass for the target to instanti...
Definition: MachineScheduler.h:125
llvm::GCNIterativeScheduler::SCHEDULE_MINREGFORCED
@ SCHEDULE_MINREGFORCED
Definition: GCNIterativeScheduler.h:35
I
#define I(x, y, z)
Definition: MD5.cpp:58
llvm::AMDGPUSimplifyLibCallsPass
Definition: AMDGPU.h:60
llvm::AMDGPUPassConfig::createMachineScheduler
ScheduleDAGInstrs * createMachineScheduler(MachineSchedContext *C) const override
Create an instance of ScheduleDAGInstrs to be run within the standard MachineScheduler pass for this ...
Definition: AMDGPUTargetMachine.cpp:1081
llvm::TargetPassConfig::addIRPasses
virtual void addIRPasses()
Add common target configurable passes that perform LLVM IR to IR transforms following machine indepen...
Definition: TargetPassConfig.cpp:844
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:441
llvm::TargetPassConfig::addOptimizedRegAlloc
virtual void addOptimizedRegAlloc()
addOptimizedRegAlloc - Add passes related to register allocation.
Definition: TargetPassConfig.cpp:1426
llvm::AMDGPUFunctionArgInfo::PrivateSegmentWaveByteOffset
ArgDescriptor PrivateSegmentWaveByteOffset
Definition: AMDGPUArgumentUsageInfo.h:137
llvm::SIFormMemoryClausesID
char & SIFormMemoryClausesID
Definition: SIFormMemoryClauses.cpp:91
llvm::LiveVariablesID
char & LiveVariablesID
LiveVariables pass - This pass computes the set of blocks in which each variable is life and sets mac...
Definition: LiveVariables.cpp:45
LateCFGStructurize
static cl::opt< bool, true > LateCFGStructurize("amdgpu-late-structurize", cl::desc("Enable late CFG structurization"), cl::location(AMDGPUTargetMachine::EnableLateStructurizeCFG), cl::Hidden)
TargetPassConfig.h
llvm::createExternalAAWrapperPass
ImmutablePass * createExternalAAWrapperPass(std::function< void(Pass &, Function &, AAResults &)> Callback)
A wrapper pass around a callback which can be used to populate the AAResults in the AAResultsWrapperP...
llvm::SIFixSGPRCopiesID
char & SIFixSGPRCopiesID
Definition: SIFixSGPRCopies.cpp:121
llvm::AMDGPUFunctionArgInfo::WorkGroupIDY
ArgDescriptor WorkGroupIDY
Definition: AMDGPUArgumentUsageInfo.h:134
Localizer.h
llvm::AMDGPUAS::UNKNOWN_ADDRESS_SPACE
@ UNKNOWN_ADDRESS_SPACE
Definition: AMDGPU.h:399
llvm::MachineCSEID
char & MachineCSEID
MachineCSE - This pass performs global CSE on machine instructions.
Definition: MachineCSE.cpp:153
llvm::GCNDPPCombineID
char & GCNDPPCombineID
Definition: GCNDPPCombine.cpp:111
llvm::TargetPassConfig::addCodeGenPrepare
virtual void addCodeGenPrepare()
Add pass to prepare the LLVM IR for code generation.
Definition: TargetPassConfig.cpp:973
llvm::AMDGPU::SIModeRegisterDefaults::DX10Clamp
bool DX10Clamp
Used by the vector ALU to force DX10-style treatment of NaNs: when set, clamp NaN to zero; otherwise,...
Definition: AMDGPUBaseInfo.h:941
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::SIInsertHardClausesID
char & SIInsertHardClausesID
Definition: SIInsertHardClauses.cpp:220
GCNMinRegSchedRegistry
static MachineSchedRegistry GCNMinRegSchedRegistry("gcn-minreg", "Run GCN iterative scheduler for minimal register usage (experimental)", createMinRegScheduler)
llvm::AMDGPUPassConfig::addStraightLineScalarOptimizationPasses
void addStraightLineScalarOptimizationPasses()
Definition: AMDGPUTargetMachine.cpp:928
llvm::AMDGPU::isFlatGlobalAddrSpace
bool isFlatGlobalAddrSpace(unsigned AS)
Definition: AMDGPU.h:406
llvm::AMDGPU::SIModeRegisterDefaults::FP64FP16InputDenormals
bool FP64FP16InputDenormals
If this is set, neither input or output denormals are flushed for both f64 and f16/v2f16 instructions...
Definition: AMDGPUBaseInfo.h:950
llvm::AMDGPUTargetMachine::getPredicatedAddrSpace
std::pair< const Value *, unsigned > getPredicatedAddrSpace(const Value *V) const override
If the specified predicate checks whether a generic pointer falls within a specified address space,...
Definition: AMDGPUTargetMachine.cpp:778
llvm::getTheGCNTarget
Target & getTheGCNTarget()
The target for GCN GPUs.
Definition: AMDGPUTargetInfo.cpp:25
llvm::AMDGPUPassConfig::getAMDGPUTargetMachine
AMDGPUTargetMachine & getAMDGPUTargetMachine() const
Definition: AMDGPUTargetMachine.h:110
llvm::initializeSIOptimizeExecMaskingPass
void initializeSIOptimizeExecMaskingPass(PassRegistry &)
llvm::initializeSIPostRABundlerPass
void initializeSIPostRABundlerPass(PassRegistry &)
llvm::SIScheduleDAGMI
Definition: SIMachineScheduler.h:425
llvm::PassBuilder::registerPipelineParsingCallback
void registerPipelineParsingCallback(const std::function< bool(StringRef Name, CGSCCPassManager &, ArrayRef< PipelineElement >)> &C)
{{@ Register pipeline parsing callbacks with this pass builder instance.
Definition: PassBuilder.h:512
llvm::initializeAMDGPUAAWrapperPassPass
void initializeAMDGPUAAWrapperPassPass(PassRegistry &)
llvm::ScheduleDAGMI
ScheduleDAGMI is an implementation of ScheduleDAGInstrs that simply schedules machine instructions ac...
Definition: MachineScheduler.h:271
llvm::initializeAMDGPUCodeGenPreparePass
void initializeAMDGPUCodeGenPreparePass(PassRegistry &)
llvm::AMDGPUPassConfig::AMDGPUPassConfig
AMDGPUPassConfig(LLVMTargetMachine &TM, PassManagerBase &PM)
Definition: AMDGPUTargetMachine.cpp:910
llvm::createAMDGPUOpenCLEnqueuedBlockLoweringPass
ModulePass * createAMDGPUOpenCLEnqueuedBlockLoweringPass()
llvm::initializeGCNNSAReassignPass
void initializeGCNNSAReassignPass(PassRegistry &)
llvm::CodeGenOpt::Aggressive
@ Aggressive
Definition: CodeGen.h:56
llvm::AMDGPUTargetMachine::EnableLowerModuleLDS
static bool EnableLowerModuleLDS
Definition: AMDGPUTargetMachine.h:38
llvm::yaml::StringValue
A wrapper around std::string which contains a source range that's being set during parsing.
Definition: MIRYamlMapping.h:34
llvm::GlobalDCEPass
Pass to remove unused function declarations.
Definition: GlobalDCE.h:29
llvm::PatchableFunctionID
char & PatchableFunctionID
This pass implements the "patchable-function" attribute.
Definition: PatchableFunction.cpp:96
Builder
assume Assume Builder
Definition: AssumeBundleBuilder.cpp:650
IterativeGCNMaxOccupancySchedRegistry
static MachineSchedRegistry IterativeGCNMaxOccupancySchedRegistry("gcn-max-occupancy-experimental", "Run GCN scheduler to maximize occupancy (experimental)", createIterativeGCNMaxOccupancyMachineScheduler)
AMDGPUExportClustering.h
llvm::PatternMatch::m_Value
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:76
llvm::AMDGPUFunctionArgInfo::WorkItemIDZ
ArgDescriptor WorkItemIDZ
Definition: AMDGPUArgumentUsageInfo.h:150
llvm::MachineFunction
Definition: MachineFunction.h:241
llvm::CodeGenOpt::None
@ None
Definition: CodeGen.h:53
llvm::createSIShrinkInstructionsPass
FunctionPass * createSIShrinkInstructionsPass()
llvm::createAMDGPUMachineCFGStructurizerPass
FunctionPass * createAMDGPUMachineCFGStructurizerPass()
Definition: AMDGPUMachineCFGStructurizer.cpp:2851
llvm::GCNTargetMachine
Definition: AMDGPUTargetMachine.h:73
llvm::AArch64::RM
@ RM
Definition: AArch64ISelLowering.h:483
llvm::ScheduleDAG::TRI
const TargetRegisterInfo * TRI
Target processor register info.
Definition: ScheduleDAG.h:559
llvm::TargetPassConfig::addPass
AnalysisID addPass(AnalysisID PassID)
Utilities for targets to add passes to the pass manager.
Definition: TargetPassConfig.cpp:772
llvm::ArrayRef
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: APInt.h:32
llvm::Constant::removeDeadConstantUsers
void removeDeadConstantUsers() const
If there are any dead constant users dangling off of this constant, remove them.
Definition: Constants.cpp:748
llvm::initializeSIFormMemoryClausesPass
void initializeSIFormMemoryClausesPass(PassRegistry &)
llvm::AMDGPUAS::GLOBAL_ADDRESS
@ GLOBAL_ADDRESS
Address space for global memory (RAT0, VTX0).
Definition: AMDGPU.h:359
computeDataLayout
static StringRef computeDataLayout(const Triple &TT)
Definition: AMDGPUTargetMachine.cpp:447
llvm::Reloc::PIC_
@ PIC_
Definition: CodeGen.h:22
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:58
llvm::initializeAMDGPUExternalAAWrapperPass
void initializeAMDGPUExternalAAWrapperPass(PassRegistry &)
AMDGPU.h
llvm::GCNTargetMachine::getTargetTransformInfo
TargetTransformInfo getTargetTransformInfo(const Function &F) override
Get a TargetTransformInfo implementation for the target.
Definition: AMDGPUTargetMachine.cpp:839
llvm::yaml::SIMachineFunctionInfo::StackPtrOffsetReg
StringValue StackPtrOffsetReg
Definition: SIMachineFunctionInfo.h:288
SimplifyLibCalls.h
llvm::AMDGPUPassConfig::addPreISel
bool addPreISel() override
Methods with trivial inline returns are convenient points in the common codegen pass pipeline where t...
Definition: AMDGPUTargetMachine.cpp:1064
llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:134
llvm::Value::getType
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
GlobalDCE.h
llvm::yaml::SIMachineFunctionInfo::Mode
SIMode Mode
Definition: SIMachineFunctionInfo.h:291
llvm::getStandardCSEConfigForOpt
std::unique_ptr< CSEConfigBase > getStandardCSEConfigForOpt(CodeGenOpt::Level Level)
Definition: CSEInfo.cpp:74
llvm::createAMDGPURegBankCombiner
FunctionPass * createAMDGPURegBankCombiner(bool IsOptNone)
Definition: AMDGPURegBankCombiner.cpp:486
EnablePreRAOptimizations
static cl::opt< bool > EnablePreRAOptimizations("amdgpu-enable-pre-ra-optimizations", cl::desc("Enable Pre-RA optimizations pass"), cl::init(true), cl::Hidden)
IRTranslator.h
llvm::TargetMachine::getTargetFeatureString
StringRef getTargetFeatureString() const
Definition: TargetMachine.h:131
EarlyInlineAll
static cl::opt< bool > EarlyInlineAll("amdgpu-early-inline-all", cl::desc("Inline all functions early"), cl::init(false), cl::Hidden)
llvm::PICLevel::Level
Level
Definition: CodeGen.h:33
llvm::once_flag
std::once_flag once_flag
Definition: Threading.h:60
llvm::AMDGPUAS::FLAT_ADDRESS
@ FLAT_ADDRESS
Address space for flat memory.
Definition: AMDGPU.h:358
llvm::CodeGenOpt::Level
Level
Definition: CodeGen.h:52
llvm::AMDGPUFunctionArgInfo::ImplicitBufferPtr
ArgDescriptor ImplicitBufferPtr
Definition: AMDGPUArgumentUsageInfo.h:144
llvm::SIWholeQuadModeID
char & SIWholeQuadModeID
Definition: SIWholeQuadMode.cpp:265
llvm::getEffectiveRelocModel
static Reloc::Model getEffectiveRelocModel(Optional< Reloc::Model > RM)
Definition: AVRTargetMachine.cpp:40
EnableSROA
static cl::opt< bool > EnableSROA("amdgpu-sroa", cl::desc("Run SROA after promote alloca pass"), cl::ReallyHidden, cl::init(true))
llvm::initializeAMDGPULowerKernelAttributesPass
void initializeAMDGPULowerKernelAttributesPass(PassRegistry &)
llvm::getEffectiveCodeModel
CodeModel::Model getEffectiveCodeModel(Optional< CodeModel::Model > CM, CodeModel::Model Default)
Helper method for getting the code model, returning Default if CM does not have a value.
Definition: TargetMachine.h:498
llvm::AMDGPUPassConfig::getCSEConfig
std::unique_ptr< CSEConfigBase > getCSEConfig() const override
Returns the CSEConfig object to use for the current optimization level.
Definition: AMDGPUTargetMachine.cpp:847
llvm::TargetSubtargetInfo
TargetSubtargetInfo - Generic base class for all target subtargets.
Definition: TargetSubtargetInfo.h:59
llvm::LLVMTargetMachine::initAsmInfo
void initAsmInfo()
Definition: LLVMTargetMachine.cpp:41
llvm::initializeAMDGPUAnnotateUniformValuesPass
void initializeAMDGPUAnnotateUniformValuesPass(PassRegistry &)
llvm::RenameIndependentSubregsID
char & RenameIndependentSubregsID
This pass detects subregister lanes in a virtual register that are used independently of other lanes ...
Definition: RenameIndependentSubregs.cpp:113
llvm::AMDGPUPrintfRuntimeBindingPass
Definition: AMDGPU.h:268
llvm::AMDGPUReplaceLDSUseWithPointerPass
Definition: AMDGPU.h:150
llvm::Register
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
llvm::createStructurizeCFGPass
Pass * createStructurizeCFGPass(bool SkipUniformRegions=false)
When SkipUniformRegions is true the structizer will not structurize regions that only contain uniform...
Definition: StructurizeCFG.cpp:1086
llvm::AMDGPU::SIModeRegisterDefaults::FP64FP16OutputDenormals
bool FP64FP16OutputDenormals
Definition: AMDGPUBaseInfo.h:951
llvm::GCNTargetMachine::createPassConfig
TargetPassConfig * createPassConfig(PassManagerBase &PM) override
Create a pass configuration object to be used by addPassToEmitX methods for generating a pipeline of ...
Definition: AMDGPUTargetMachine.cpp:1380
llvm::PassManager< Module >
llvm::createAMDGPULowerKernelAttributesPass
ModulePass * createAMDGPULowerKernelAttributesPass()
Definition: AMDGPULowerKernelAttributes.cpp:258
llvm::initializeSIFixSGPRCopiesPass
void initializeSIFixSGPRCopiesPass(PassRegistry &)
llvm::PerFunctionMIParsingState
Definition: MIParser.h:162
llvm::AMDGPUFunctionArgInfo::WorkGroupInfo
ArgDescriptor WorkGroupInfo
Definition: AMDGPUArgumentUsageInfo.h:136
llvm::createAMDGPUPromoteAllocaToVector
FunctionPass * createAMDGPUPromoteAllocaToVector()
Definition: AMDGPUPromoteAlloca.cpp:1175
llvm::OptimizationLevel::getSpeedupLevel
unsigned getSpeedupLevel() const
Definition: OptimizationLevel.h:121
llvm::initializeAMDGPULowerModuleLDSPass
void initializeAMDGPULowerModuleLDSPass(PassRegistry &)
LLVM_READNONE
#define LLVM_READNONE
Definition: Compiler.h:206
createIterativeILPMachineScheduler
static ScheduleDAGInstrs * createIterativeILPMachineScheduler(MachineSchedContext *C)
Definition: AMDGPUTargetMachine.cpp:415
llvm::parseNamedRegisterReference
bool parseNamedRegisterReference(PerFunctionMIParsingState &PFS, Register &Reg, StringRef Src, SMDiagnostic &Error)
Definition: MIParser.cpp:3428
EnableEarlyIfConversion
static cl::opt< bool > EnableEarlyIfConversion("amdgpu-early-ifcvt", cl::Hidden, cl::desc("Run early if-conversion"), cl::init(false))
llvm::initializeSIFixVGPRCopiesPass
void initializeSIFixVGPRCopiesPass(PassRegistry &)
llvm::yaml::SIMode::DX10Clamp
bool DX10Clamp
Definition: SIMachineFunctionInfo.h:232
llvm::initializeAMDGPUPromoteAllocaToVectorPass
void initializeAMDGPUPromoteAllocaToVectorPass(PassRegistry &)
EnableScalarIRPasses
static cl::opt< bool > EnableScalarIRPasses("amdgpu-scalar-ir-passes", cl::desc("Enable scalar IR passes"), cl::init(true), cl::Hidden)
llvm::AMDGPUPromoteKernelArgumentsPass
Definition: AMDGPU.h:109
llvm::initializeSIPreEmitPeepholePass
void initializeSIPreEmitPeepholePass(PassRegistry &)
createIterativeGCNMaxOccupancyMachineScheduler
static ScheduleDAGInstrs * createIterativeGCNMaxOccupancyMachineScheduler(MachineSchedContext *C)
Definition: AMDGPUTargetMachine.cpp:402
llvm::call_once
void call_once(once_flag &flag, Function &&F, Args &&... ArgList)
Execute the function specified as a parameter once.
Definition: Threading.h:90
llvm::MachineFunction::getFunction
Function & getFunction()
Return the LLVM function that this machine code represents.
Definition: MachineFunction.h:607
llvm::AMDGPUTargetMachine::registerPassBuilderCallbacks
void registerPassBuilderCallbacks(PassBuilder &PB) override
Allow the target to modify the pass pipeline with New Pass Manager (similar to adjustPassManager for ...
Definition: AMDGPUTargetMachine.cpp:606
EnablePromoteKernelArguments
static cl::opt< bool > EnablePromoteKernelArguments("amdgpu-enable-promote-kernel-arguments", cl::desc("Enable promotion of flat kernel pointer arguments to global"), cl::Hidden, cl::init(true))
llvm::TargetPassConfig::addMachineSSAOptimization
virtual void addMachineSSAOptimization()
addMachineSSAOptimization - Add standard passes that optimize machine instructions in SSA form.
Definition: TargetPassConfig.cpp:1277
llvm::AMDGPUPassConfig::addEarlyCSEOrGVNPass
void addEarlyCSEOrGVNPass()
Definition: AMDGPUTargetMachine.cpp:921
llvm::createAMDGPUPropagateAttributesEarlyPass
FunctionPass * createAMDGPUPropagateAttributesEarlyPass(const TargetMachine *)
Definition: AMDGPUPropagateAttributes.cpp:401
llvm::AMDGPUPropagateAttributesEarlyPass
Definition: AMDGPU.h:126
llvm::initializeSIModeRegisterPass
void initializeSIModeRegisterPass(PassRegistry &)
llvm::Error
Lightweight error class with error context and mandatory checking.
Definition: Error.h:155
llvm::createLoadClusterDAGMutation
std::unique_ptr< ScheduleDAGMutation > createLoadClusterDAGMutation(const TargetInstrInfo *TII, const TargetRegisterInfo *TRI)
Definition: MachineScheduler.cpp:1576
RegBankSelect.h
llvm::ScheduleDAG::TII
const TargetInstrInfo * TII
Target instruction information.
Definition: ScheduleDAG.h:558
GCNMaxOccupancySchedRegistry
static MachineSchedRegistry GCNMaxOccupancySchedRegistry("gcn-max-occupancy", "Run GCN scheduler to maximize occupancy", createGCNMaxOccupancyMachineScheduler)
llvm::createAMDGPULowerKernelArgumentsPass
FunctionPass * createAMDGPULowerKernelArgumentsPass()
Definition: AMDGPULowerKernelArguments.cpp:248
llvm::AMDGPUTargetMachine::isNoopAddrSpaceCast
bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override
Returns true if a cast between SrcAS and DestAS is a noop.
Definition: AMDGPUTargetMachine.cpp:752
llvm::PassManagerBuilder::EP_ModuleOptimizerEarly
@ EP_ModuleOptimizerEarly
EP_ModuleOptimizerEarly - This extension point allows adding passes just before the main module-level...
Definition: PassManagerBuilder.h:75
llvm::createSIModeRegisterPass
FunctionPass * createSIModeRegisterPass()
Definition: SIModeRegister.cpp:157
llvm::OptimizationLevel
Definition: OptimizationLevel.h:22
llvm::ArgDescriptor::createRegister
static constexpr ArgDescriptor createRegister(Register Reg, unsigned Mask=~0u)
Definition: AMDGPUArgumentUsageInfo.h:44
PassManager.h
llvm::createInternalizePass
ModulePass * createInternalizePass(std::function< bool(const GlobalValue &)> MustPreserveGV)
createInternalizePass - This pass loops over all of the functions in the input module,...
Definition: Internalize.cpp:315
llvm::SourceMgr::DK_Error
@ DK_Error
Definition: SourceMgr.h:34
llvm::createAMDGPUReplaceLDSUseWithPointerPass
ModulePass * createAMDGPUReplaceLDSUseWithPointerPass()
Definition: AMDGPUReplaceLDSUseWithPointer.cpp:639
llvm::AMDGPUTargetMachine::adjustPassManager
void adjustPassManager(PassManagerBuilder &) override
Allow the target to modify the pass manager, e.g.
Definition: AMDGPUTargetMachine.cpp:528
llvm::LLVMTargetMachine
This class describes a target machine that is implemented with the LLVM target-independent code gener...
Definition: TargetMachine.h:406
llvm::TargetPassConfig::disablePass
void disablePass(AnalysisID PassID)
Allow the target to disable a specific standard pass by default.
Definition: TargetPassConfig.h:196
llvm::DeadMachineInstructionElimID
char & DeadMachineInstructionElimID
DeadMachineInstructionElim - This pass removes dead machine instructions.
Definition: DeadMachineInstructionElim.cpp:57
llvm::PerFunctionMIParsingState::MF
MachineFunction & MF
Definition: MIParser.h:164
GCNILPSchedRegistry
static MachineSchedRegistry GCNILPSchedRegistry("gcn-ilp", "Run GCN iterative scheduler for ILP scheduling (experimental)", createIterativeILPMachineScheduler)
llvm::AnalysisManager::registerPass
bool registerPass(PassBuilderT &&PassBuilder)
Register an analysis pass with the manager.
Definition: PassManager.h:845
llvm::AMDGPUFunctionArgInfo::KernargSegmentPtr
ArgDescriptor KernargSegmentPtr
Definition: AMDGPUArgumentUsageInfo.h:127
llvm::createAMDGPUPromoteAlloca
FunctionPass * createAMDGPUPromoteAlloca()
Definition: AMDGPUPromoteAlloca.cpp:1171
llvm::initializeAMDGPUPrintfRuntimeBindingPass
void initializeAMDGPUPrintfRuntimeBindingPass(PassRegistry &)
llvm::AAManager::registerFunctionAnalysis
void registerFunctionAnalysis()
Register a specific AA result.
Definition: AliasAnalysis.h:1300
llvm::AMDGPUPassConfig::isPassEnabled
bool isPassEnabled(const cl::opt< bool > &Opt, CodeGenOpt::Level Level=CodeGenOpt::Default) const
Check if a pass is enabled given Opt option.
Definition: AMDGPUTargetMachine.h:131
llvm::BranchRelaxationPassID
char & BranchRelaxationPassID
BranchRelaxation - This pass replaces branches that need to jump further than is supported by a branc...
Definition: BranchRelaxation.cpp:119
llvm::initializeAMDGPUPreLegalizerCombinerPass
void initializeAMDGPUPreLegalizerCombinerPass(PassRegistry &)
llvm::createAMDGPUCodeGenPreparePass
FunctionPass * createAMDGPUCodeGenPreparePass()
Definition: AMDGPUCodeGenPrepare.cpp:1465
llvm::createAMDGPUPromoteKernelArgumentsPass
FunctionPass * createAMDGPUPromoteKernelArgumentsPass()
Definition: AMDGPUPromoteKernelArguments.cpp:180
llvm::RegisterRegAllocBase
RegisterRegAllocBase class - Track the registration of register allocators.
Definition: RegAllocRegistry.h:30
llvm::MachineSchedulerID
char & MachineSchedulerID
MachineScheduler - This pass schedules machine instructions.
Definition: MachineScheduler.cpp:214
llvm::AMDGPUTargetMachine::EnableFunctionCalls
static bool EnableFunctionCalls
Definition: AMDGPUTargetMachine.h:37
llvm::initializeAMDGPUAttributorPass
void initializeAMDGPUAttributorPass(PassRegistry &)
Legalizer.h
llvm::Pass
Pass interface - Implemented by all 'passes'.
Definition: Pass.h:91
llvm::createLICMPass
Pass * createLICMPass()
Definition: LICM.cpp:322
llvm::createAMDGPUFixFunctionBitcastsPass
ModulePass * createAMDGPUFixFunctionBitcastsPass()
llvm::GCNNSAReassignID
char & GCNNSAReassignID
Definition: GCNNSAReassign.cpp:104
llvm::TargetMachine::getTargetCPU
StringRef getTargetCPU() const
Definition: TargetMachine.h:130
llvm::PassManagerBuilder::EP_EarlyAsPossible
@ EP_EarlyAsPossible
EP_EarlyAsPossible - This extension point allows adding passes before any other transformations,...
Definition: PassManagerBuilder.h:71
llvm::initializeAMDGPUAnnotateKernelFeaturesPass
void initializeAMDGPUAnnotateKernelFeaturesPass(PassRegistry &)
llvm::PostRASchedulerID
char & PostRASchedulerID
PostRAScheduler - This pass performs post register allocation scheduling.
Definition: PostRASchedulerList.cpp:199
llvm::AMDGPUFunctionArgInfo::WorkItemIDY
ArgDescriptor WorkItemIDY
Definition: AMDGPUArgumentUsageInfo.h:149
llvm::createAMDGPUPreLegalizeCombiner
FunctionPass * createAMDGPUPreLegalizeCombiner(bool IsOptNone)
Definition: AMDGPUPreLegalizerCombiner.cpp:298
llvm::AMDGPUTargetMachine::getAssumedAddrSpace
unsigned getAssumedAddrSpace(const Value *V) const override
If the specified generic pointer could be assumed as a pointer to a specific address space,...
Definition: AMDGPUTargetMachine.cpp:758
llvm::SMRange
Represents a range in source code.
Definition: SMLoc.h:48
N
#define N
llvm::createStraightLineStrengthReducePass
FunctionPass * createStraightLineStrengthReducePass()
Definition: StraightLineStrengthReduce.cpp:269
llvm::SIMachineFunctionInfo
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
Definition: SIMachineFunctionInfo.h:335
llvm::initializeAMDGPUFixFunctionBitcastsPass
void initializeAMDGPUFixFunctionBitcastsPass(PassRegistry &)
llvm::TargetMachine::getTargetTriple
const Triple & getTargetTriple() const
Definition: TargetMachine.h:129
llvm::GCNPreRAOptimizationsID
char & GCNPreRAOptimizationsID
Definition: GCNPreRAOptimizations.cpp:79
llvm::initializeSILoadStoreOptimizerPass
void initializeSILoadStoreOptimizerPass(PassRegistry &)
llvm::legacy::PassManagerBase
PassManagerBase - An abstract interface to allow code to add passes to a pass manager without having ...
Definition: LegacyPassManager.h:39
llvm::IRTranslator
Definition: IRTranslator.h:63
llvm::PassBuilder::registerCGSCCOptimizerLateEPCallback
void registerCGSCCOptimizerLateEPCallback(const std::function< void(CGSCCPassManager &, OptimizationLevel)> &C)
Register a callback for a default optimizer pipeline extension point.
Definition: PassBuilder.h:436
llvm::initializeAMDGPURegBankCombinerPass
void initializeAMDGPURegBankCombinerPass(PassRegistry &)
RegName
#define RegName(no)
llvm::createSIAnnotateControlFlowPass
FunctionPass * createSIAnnotateControlFlowPass()
Create the annotation pass.
Definition: SIAnnotateControlFlow.cpp:381
Vectorize.h
llvm::yaml::SIMode::IEEE
bool IEEE
Definition: SIMachineFunctionInfo.h:231
llvm::initializeAMDGPUCtorDtorLoweringPass
void initializeAMDGPUCtorDtorLoweringPass(PassRegistry &)
llvm::AnalysisManager
A container for analyses that lazily runs them and caches their results.
Definition: InstructionSimplify.h:44
TM
const char LLVMTargetMachineRef TM
Definition: PassBuilderBindings.cpp:47
llvm::SIFoldOperandsID
char & SIFoldOperandsID
llvm::FunctionPass
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:298
llvm::createBasicRegisterAllocator
FunctionPass * createBasicRegisterAllocator()
BasicRegisterAllocation Pass - This pass implements a degenerate global register allocator using the ...
Definition: RegAllocBasic.cpp:335
llvm::RegBankSelect
This pass implements the reg bank selector pass used in the GlobalISel pipeline.
Definition: RegBankSelect.h:91
llvm::MIPatternMatch::m_Not
BinaryOp_match< SrcTy, SpecificConstantMatch, TargetOpcode::G_XOR, true > m_Not(const SrcTy &&Src)
Matches a register not-ed by a G_XOR.
Definition: MIPatternMatch.h:654
llvm::EarlyMachineLICMID
char & EarlyMachineLICMID
This pass performs loop invariant code motion on machine instructions.
Definition: MachineLICM.cpp:298
llvm::AMDGPUTargetMachine::getGPUName
StringRef getGPUName(const Function &F) const
Definition: AMDGPUTargetMachine.cpp:505
llvm::PostMachineSchedulerID
char & PostMachineSchedulerID
PostMachineScheduler - This pass schedules machine instructions postRA.
Definition: MachineScheduler.cpp:245
llvm::cl::desc
Definition: CommandLine.h:412
llvm::ScheduleDAGMILive
ScheduleDAGMILive is an implementation of ScheduleDAGInstrs that schedules machine instructions while...
Definition: MachineScheduler.h:390
llvm::ScheduleDAGInstrs
A ScheduleDAG for scheduling lists of MachineInstr.
Definition: ScheduleDAGInstrs.h:119
llvm::PassManagerBuilder::EP_CGSCCOptimizerLate
@ EP_CGSCCOptimizerLate
EP_CGSCCOptimizerLate - This extension point allows adding CallGraphSCC passes at the end of the main...
Definition: PassManagerBuilder.h:116
llvm::CodeGenOpt::Less
@ Less
Definition: CodeGen.h:54
llvm::AMDGPUTargetMachine::AMDGPUTargetMachine
AMDGPUTargetMachine(const Target &T, const Triple &TT, StringRef CPU, StringRef FS, TargetOptions Options, Optional< Reloc::Model > RM, Optional< CodeModel::Model > CM, CodeGenOpt::Level OL)
Definition: AMDGPUTargetMachine.cpp:480
llvm::TargetPassConfig::addFastRegAlloc
virtual void addFastRegAlloc()
addFastRegAlloc - Add the minimum set of target-independent passes that are required for fast registe...
Definition: TargetPassConfig.cpp:1416
llvm::AMDGPUPerfHintAnalysisID
char & AMDGPUPerfHintAnalysisID
Definition: AMDGPUPerfHintAnalysis.cpp:58
TargetRegistry.h
llvm::createSROAPass
FunctionPass * createSROAPass()
Definition: SROA.cpp:4818
llvm::AMDGPUPropagateAttributesLatePass
Definition: AMDGPU.h:138
EnableLibCallSimplify
static cl::opt< bool > EnableLibCallSimplify("amdgpu-simplify-libcall", cl::desc("Enable amdgpu library simplifications"), cl::init(true), cl::Hidden)
InitializePasses.h
llvm::yaml::SIMode::FP64FP16OutputDenormals
bool FP64FP16OutputDenormals
Definition: SIMachineFunctionInfo.h:236
llvm::SIOptimizeExecMaskingPreRAID
char & SIOptimizeExecMaskingPreRAID
Definition: SIOptimizeExecMaskingPreRA.cpp:75
llvm::createGCNMCRegisterInfo
MCRegisterInfo * createGCNMCRegisterInfo(AMDGPUDwarfFlavour DwarfFlavour)
Definition: AMDGPUMCTargetDesc.cpp:68
llvm::TargetMachine::MRI
std::unique_ptr< const MCRegisterInfo > MRI
Definition: TargetMachine.h:109
llvm::Value
LLVM Value Representation.
Definition: Value.h:74
llvm::AMDGPUAS::LOCAL_ADDRESS
@ LOCAL_ADDRESS
Address space for local memory.
Definition: AMDGPU.h:363
llvm::AMDGPUTargetMachine::EnableLateStructurizeCFG
static bool EnableLateStructurizeCFG
Definition: AMDGPUTargetMachine.h:36
llvm::AMDGPUAS::CONSTANT_ADDRESS
@ CONSTANT_ADDRESS
Address space for constant memory (VTX2).
Definition: AMDGPU.h:362
llvm::TargetPassConfig::addILPOpts
virtual bool addILPOpts()
Add passes that optimize instruction level parallelism for out-of-order targets.
Definition: TargetPassConfig.h:371
llvm::TargetPassConfig::getOptLevel
CodeGenOpt::Level getOptLevel() const
Definition: TargetPassConfig.cpp:635
AMDGPUTargetMachine.h
llvm::GCNTargetMachine::createDefaultFuncInfoYAML
yaml::MachineFunctionInfo * createDefaultFuncInfoYAML() const override
Allocate and return a default initialized instance of the YAML representation for the MachineFunction...
Definition: AMDGPUTargetMachine.cpp:1384
PassName
static const char PassName[]
Definition: X86LowerAMXIntrinsics.cpp:670
llvm::initializeSILowerControlFlowPass
void initializeSILowerControlFlowPass(PassRegistry &)
llvm::SILateBranchLoweringPassID
char & SILateBranchLoweringPassID
Definition: SILateBranchLowering.cpp:66
RegAllocRegistry.h
llvm::createAMDGPUSimplifyLibCallsPass
FunctionPass * createAMDGPUSimplifyLibCallsPass(const TargetMachine *)
Definition: AMDGPULibCalls.cpp:1657
MIParser.h
llvm::Localizer
This pass implements the localization mechanism described at the top of this file.
Definition: Localizer.h:40
llvm::createAMDGPUMacroFusionDAGMutation
std::unique_ptr< ScheduleDAGMutation > createAMDGPUMacroFusionDAGMutation()
Note that you have to add: DAG.addMutation(createAMDGPUMacroFusionDAGMutation()); to AMDGPUPassConfig...
Definition: AMDGPUMacroFusion.cpp:62