LLVM  15.0.0git
AMDGPUTargetMachine.cpp
Go to the documentation of this file.
1 //===-- AMDGPUTargetMachine.cpp - TargetMachine for hw codegen targets-----===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// The AMDGPU target machine contains all of the hardware specific
11 /// information needed to emit code for SI+ GPUs.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "AMDGPUTargetMachine.h"
16 #include "AMDGPU.h"
17 #include "AMDGPUAliasAnalysis.h"
18 #include "AMDGPUExportClustering.h"
19 #include "AMDGPUIGroupLP.h"
20 #include "AMDGPUMacroFusion.h"
21 #include "AMDGPUTargetObjectFile.h"
23 #include "GCNIterativeScheduler.h"
24 #include "GCNSchedStrategy.h"
25 #include "R600.h"
26 #include "R600TargetMachine.h"
27 #include "SIMachineFunctionInfo.h"
28 #include "SIMachineScheduler.h"
38 #include "llvm/CodeGen/Passes.h"
41 #include "llvm/IR/IntrinsicsAMDGPU.h"
43 #include "llvm/IR/PassManager.h"
44 #include "llvm/IR/PatternMatch.h"
45 #include "llvm/InitializePasses.h"
46 #include "llvm/MC/TargetRegistry.h"
48 #include "llvm/Transforms/IPO.h"
53 #include "llvm/Transforms/Scalar.h"
56 #include "llvm/Transforms/Utils.h"
59 
60 using namespace llvm;
61 using namespace llvm::PatternMatch;
62 
63 namespace {
64 class SGPRRegisterRegAlloc : public RegisterRegAllocBase<SGPRRegisterRegAlloc> {
65 public:
66  SGPRRegisterRegAlloc(const char *N, const char *D, FunctionPassCtor C)
67  : RegisterRegAllocBase(N, D, C) {}
68 };
69 
70 class VGPRRegisterRegAlloc : public RegisterRegAllocBase<VGPRRegisterRegAlloc> {
71 public:
72  VGPRRegisterRegAlloc(const char *N, const char *D, FunctionPassCtor C)
73  : RegisterRegAllocBase(N, D, C) {}
74 };
75 
76 static bool onlyAllocateSGPRs(const TargetRegisterInfo &TRI,
77  const TargetRegisterClass &RC) {
78  return static_cast<const SIRegisterInfo &>(TRI).isSGPRClass(&RC);
79 }
80 
81 static bool onlyAllocateVGPRs(const TargetRegisterInfo &TRI,
82  const TargetRegisterClass &RC) {
83  return !static_cast<const SIRegisterInfo &>(TRI).isSGPRClass(&RC);
84 }
85 
86 
87 /// -{sgpr|vgpr}-regalloc=... command line option.
88 static FunctionPass *useDefaultRegisterAllocator() { return nullptr; }
89 
90 /// A dummy default pass factory indicates whether the register allocator is
91 /// overridden on the command line.
92 static llvm::once_flag InitializeDefaultSGPRRegisterAllocatorFlag;
93 static llvm::once_flag InitializeDefaultVGPRRegisterAllocatorFlag;
94 
95 static SGPRRegisterRegAlloc
96 defaultSGPRRegAlloc("default",
97  "pick SGPR register allocator based on -O option",
99 
100 static cl::opt<SGPRRegisterRegAlloc::FunctionPassCtor, false,
102 SGPRRegAlloc("sgpr-regalloc", cl::Hidden, cl::init(&useDefaultRegisterAllocator),
103  cl::desc("Register allocator to use for SGPRs"));
104 
105 static cl::opt<VGPRRegisterRegAlloc::FunctionPassCtor, false,
107 VGPRRegAlloc("vgpr-regalloc", cl::Hidden, cl::init(&useDefaultRegisterAllocator),
108  cl::desc("Register allocator to use for VGPRs"));
109 
110 
111 static void initializeDefaultSGPRRegisterAllocatorOnce() {
112  RegisterRegAlloc::FunctionPassCtor Ctor = SGPRRegisterRegAlloc::getDefault();
113 
114  if (!Ctor) {
115  Ctor = SGPRRegAlloc;
116  SGPRRegisterRegAlloc::setDefault(SGPRRegAlloc);
117  }
118 }
119 
120 static void initializeDefaultVGPRRegisterAllocatorOnce() {
121  RegisterRegAlloc::FunctionPassCtor Ctor = VGPRRegisterRegAlloc::getDefault();
122 
123  if (!Ctor) {
124  Ctor = VGPRRegAlloc;
125  VGPRRegisterRegAlloc::setDefault(VGPRRegAlloc);
126  }
127 }
128 
129 static FunctionPass *createBasicSGPRRegisterAllocator() {
130  return createBasicRegisterAllocator(onlyAllocateSGPRs);
131 }
132 
133 static FunctionPass *createGreedySGPRRegisterAllocator() {
134  return createGreedyRegisterAllocator(onlyAllocateSGPRs);
135 }
136 
137 static FunctionPass *createFastSGPRRegisterAllocator() {
138  return createFastRegisterAllocator(onlyAllocateSGPRs, false);
139 }
140 
141 static FunctionPass *createBasicVGPRRegisterAllocator() {
142  return createBasicRegisterAllocator(onlyAllocateVGPRs);
143 }
144 
145 static FunctionPass *createGreedyVGPRRegisterAllocator() {
146  return createGreedyRegisterAllocator(onlyAllocateVGPRs);
147 }
148 
149 static FunctionPass *createFastVGPRRegisterAllocator() {
150  return createFastRegisterAllocator(onlyAllocateVGPRs, true);
151 }
152 
153 static SGPRRegisterRegAlloc basicRegAllocSGPR(
154  "basic", "basic register allocator", createBasicSGPRRegisterAllocator);
155 static SGPRRegisterRegAlloc greedyRegAllocSGPR(
156  "greedy", "greedy register allocator", createGreedySGPRRegisterAllocator);
157 
158 static SGPRRegisterRegAlloc fastRegAllocSGPR(
159  "fast", "fast register allocator", createFastSGPRRegisterAllocator);
160 
161 
162 static VGPRRegisterRegAlloc basicRegAllocVGPR(
163  "basic", "basic register allocator", createBasicVGPRRegisterAllocator);
164 static VGPRRegisterRegAlloc greedyRegAllocVGPR(
165  "greedy", "greedy register allocator", createGreedyVGPRRegisterAllocator);
166 
167 static VGPRRegisterRegAlloc fastRegAllocVGPR(
168  "fast", "fast register allocator", createFastVGPRRegisterAllocator);
169 }
170 
172  "amdgpu-sroa",
173  cl::desc("Run SROA after promote alloca pass"),
175  cl::init(true));
176 
177 static cl::opt<bool>
178 EnableEarlyIfConversion("amdgpu-early-ifcvt", cl::Hidden,
179  cl::desc("Run early if-conversion"),
180  cl::init(false));
181 
182 static cl::opt<bool>
183 OptExecMaskPreRA("amdgpu-opt-exec-mask-pre-ra", cl::Hidden,
184  cl::desc("Run pre-RA exec mask optimizations"),
185  cl::init(true));
186 
187 // Option to disable vectorizer for tests.
189  "amdgpu-load-store-vectorizer",
190  cl::desc("Enable load store vectorizer"),
191  cl::init(true),
192  cl::Hidden);
193 
194 // Option to control global loads scalarization
196  "amdgpu-scalarize-global-loads",
197  cl::desc("Enable global load scalarization"),
198  cl::init(true),
199  cl::Hidden);
200 
201 // Option to run internalize pass.
203  "amdgpu-internalize-symbols",
204  cl::desc("Enable elimination of non-kernel functions and unused globals"),
205  cl::init(false),
206  cl::Hidden);
207 
208 // Option to inline all early.
210  "amdgpu-early-inline-all",
211  cl::desc("Inline all functions early"),
212  cl::init(false),
213  cl::Hidden);
214 
216  "amdgpu-sdwa-peephole",
217  cl::desc("Enable SDWA peepholer"),
218  cl::init(true));
219 
221  "amdgpu-dpp-combine",
222  cl::desc("Enable DPP combiner"),
223  cl::init(true));
224 
225 // Enable address space based alias analysis
226 static cl::opt<bool> EnableAMDGPUAliasAnalysis("enable-amdgpu-aa", cl::Hidden,
227  cl::desc("Enable AMDGPU Alias Analysis"),
228  cl::init(true));
229 
230 // Option to run late CFG structurizer
232  "amdgpu-late-structurize",
233  cl::desc("Enable late CFG structurization"),
235  cl::Hidden);
236 
237 // Enable lib calls simplifications
239  "amdgpu-simplify-libcall",
240  cl::desc("Enable amdgpu library simplifications"),
241  cl::init(true),
242  cl::Hidden);
243 
245  "amdgpu-ir-lower-kernel-arguments",
246  cl::desc("Lower kernel argument loads in IR pass"),
247  cl::init(true),
248  cl::Hidden);
249 
251  "amdgpu-reassign-regs",
252  cl::desc("Enable register reassign optimizations on gfx10+"),
253  cl::init(true),
254  cl::Hidden);
255 
257  "amdgpu-opt-vgpr-liverange",
258  cl::desc("Enable VGPR liverange optimizations for if-else structure"),
259  cl::init(true), cl::Hidden);
260 
261 // Enable atomic optimization
263  "amdgpu-atomic-optimizations",
264  cl::desc("Enable atomic optimizations"),
265  cl::init(false),
266  cl::Hidden);
267 
268 // Enable Mode register optimization
270  "amdgpu-mode-register",
271  cl::desc("Enable mode register pass"),
272  cl::init(true),
273  cl::Hidden);
274 
275 // Enable GFX11+ s_delay_alu insertion
276 static cl::opt<bool>
277  EnableInsertDelayAlu("amdgpu-enable-delay-alu",
278  cl::desc("Enable s_delay_alu insertion"),
279  cl::init(true), cl::Hidden);
280 
281 // Option is used in lit tests to prevent deadcoding of patterns inspected.
282 static cl::opt<bool>
283 EnableDCEInRA("amdgpu-dce-in-ra",
284  cl::init(true), cl::Hidden,
285  cl::desc("Enable machine DCE inside regalloc"));
286 
287 static cl::opt<bool> EnableSetWavePriority("amdgpu-set-wave-priority",
288  cl::desc("Adjust wave priority"),
289  cl::init(false), cl::Hidden);
290 
292  "amdgpu-scalar-ir-passes",
293  cl::desc("Enable scalar IR passes"),
294  cl::init(true),
295  cl::Hidden);
296 
298  "amdgpu-enable-structurizer-workarounds",
299  cl::desc("Enable workarounds for the StructurizeCFG pass"), cl::init(true),
300  cl::Hidden);
301 
303  "amdgpu-enable-lds-replace-with-pointer",
304  cl::desc("Enable LDS replace with pointer pass"), cl::init(false),
305  cl::Hidden);
306 
308  "amdgpu-enable-lower-module-lds", cl::desc("Enable lower module lds pass"),
310  cl::Hidden);
311 
313  "amdgpu-enable-pre-ra-optimizations",
314  cl::desc("Enable Pre-RA optimizations pass"), cl::init(true),
315  cl::Hidden);
316 
318  "amdgpu-enable-promote-kernel-arguments",
319  cl::desc("Enable promotion of flat kernel pointer arguments to global"),
320  cl::Hidden, cl::init(true));
321 
323  // Register the target
326 
395 }
396 
397 static std::unique_ptr<TargetLoweringObjectFile> createTLOF(const Triple &TT) {
398  return std::make_unique<AMDGPUTargetObjectFile>();
399 }
400 
402  return new SIScheduleDAGMI(C);
403 }
404 
405 static ScheduleDAGInstrs *
407  const GCNSubtarget &ST = C->MF->getSubtarget<GCNSubtarget>();
408  ScheduleDAGMILive *DAG =
409  new GCNScheduleDAGMILive(C, std::make_unique<GCNMaxOccupancySchedStrategy>(C));
410  DAG->addMutation(createLoadClusterDAGMutation(DAG->TII, DAG->TRI));
411  if (ST.shouldClusterStores())
412  DAG->addMutation(createStoreClusterDAGMutation(DAG->TII, DAG->TRI));
413  DAG->addMutation(createIGroupLPDAGMutation());
414  DAG->addMutation(createSchedBarrierDAGMutation());
415  DAG->addMutation(createAMDGPUMacroFusionDAGMutation());
416  DAG->addMutation(createAMDGPUExportClusteringDAGMutation());
417  return DAG;
418 }
419 
420 static ScheduleDAGInstrs *
422  const GCNSubtarget &ST = C->MF->getSubtarget<GCNSubtarget>();
423  auto DAG = new GCNIterativeScheduler(C,
425  DAG->addMutation(createLoadClusterDAGMutation(DAG->TII, DAG->TRI));
426  if (ST.shouldClusterStores())
427  DAG->addMutation(createStoreClusterDAGMutation(DAG->TII, DAG->TRI));
428  return DAG;
429 }
430 
432  return new GCNIterativeScheduler(C,
434 }
435 
436 static ScheduleDAGInstrs *
438  const GCNSubtarget &ST = C->MF->getSubtarget<GCNSubtarget>();
439  auto DAG = new GCNIterativeScheduler(C,
441  DAG->addMutation(createLoadClusterDAGMutation(DAG->TII, DAG->TRI));
442  if (ST.shouldClusterStores())
443  DAG->addMutation(createStoreClusterDAGMutation(DAG->TII, DAG->TRI));
444  DAG->addMutation(createAMDGPUMacroFusionDAGMutation());
445  return DAG;
446 }
447 
449 SISchedRegistry("si", "Run SI's custom scheduler",
451 
453 GCNMaxOccupancySchedRegistry("gcn-max-occupancy",
454  "Run GCN scheduler to maximize occupancy",
456 
458 IterativeGCNMaxOccupancySchedRegistry("gcn-max-occupancy-experimental",
459  "Run GCN scheduler to maximize occupancy (experimental)",
461 
463 GCNMinRegSchedRegistry("gcn-minreg",
464  "Run GCN iterative scheduler for minimal register usage (experimental)",
466 
468 GCNILPSchedRegistry("gcn-ilp",
469  "Run GCN iterative scheduler for ILP scheduling (experimental)",
471 
472 static StringRef computeDataLayout(const Triple &TT) {
473  if (TT.getArch() == Triple::r600) {
474  // 32-bit pointers.
475  return "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
476  "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1";
477  }
478 
479  // 32-bit private, local, and region pointers. 64-bit global, constant and
480  // flat, non-integral buffer fat pointers.
481  return "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32"
482  "-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
483  "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1"
484  "-ni:7";
485 }
486 
488 static StringRef getGPUOrDefault(const Triple &TT, StringRef GPU) {
489  if (!GPU.empty())
490  return GPU;
491 
492  // Need to default to a target with flat support for HSA.
493  if (TT.getArch() == Triple::amdgcn)
494  return TT.getOS() == Triple::AMDHSA ? "generic-hsa" : "generic";
495 
496  return "r600";
497 }
498 
500  // The AMDGPU toolchain only supports generating shared objects, so we
501  // must always use PIC.
502  return Reloc::PIC_;
503 }
504 
506  StringRef CPU, StringRef FS,
510  CodeGenOpt::Level OptLevel)
513  getEffectiveCodeModel(CM, CodeModel::Small), OptLevel),
514  TLOF(createTLOF(getTargetTriple())) {
515  initAsmInfo();
516  if (TT.getArch() == Triple::amdgcn) {
517  if (getMCSubtargetInfo()->checkFeatures("+wavefrontsize64"))
519  else if (getMCSubtargetInfo()->checkFeatures("+wavefrontsize32"))
521  }
522 }
523 
527 
529 
531  Attribute GPUAttr = F.getFnAttribute("target-cpu");
532  return GPUAttr.isValid() ? GPUAttr.getValueAsString() : getTargetCPU();
533 }
534 
536  Attribute FSAttr = F.getFnAttribute("target-features");
537 
538  return FSAttr.isValid() ? FSAttr.getValueAsString()
540 }
541 
542 /// Predicate for Internalize pass.
543 static bool mustPreserveGV(const GlobalValue &GV) {
544  if (const Function *F = dyn_cast<Function>(&GV))
545  return F->isDeclaration() || F->getName().startswith("__asan_") ||
546  F->getName().startswith("__sanitizer_") ||
547  AMDGPU::isEntryFunctionCC(F->getCallingConv());
548 
550  return !GV.use_empty();
551 }
552 
554  Builder.DivergentTarget = true;
555 
556  bool EnableOpt = getOptLevel() > CodeGenOpt::None;
557  bool Internalize = InternalizeSymbols;
558  bool EarlyInline = EarlyInlineAll && EnableOpt && !EnableFunctionCalls;
559  bool AMDGPUAA = EnableAMDGPUAliasAnalysis && EnableOpt;
560  bool LibCallSimplify = EnableLibCallSimplify && EnableOpt;
561  bool PromoteKernelArguments =
563 
564  if (EnableFunctionCalls) {
565  delete Builder.Inliner;
567  }
568 
569  Builder.addExtension(
571  [Internalize, EarlyInline, AMDGPUAA, this](const PassManagerBuilder &,
573  if (AMDGPUAA) {
576  }
579  if (Internalize)
582  if (Internalize)
583  PM.add(createGlobalDCEPass());
584  if (EarlyInline)
586  });
587 
588  Builder.addExtension(
590  [AMDGPUAA, LibCallSimplify, this](const PassManagerBuilder &,
592  if (AMDGPUAA) {
595  }
598  if (LibCallSimplify)
600  });
601 
602  Builder.addExtension(
604  [EnableOpt, PromoteKernelArguments](const PassManagerBuilder &,
606  // Add promote kernel arguments pass to the opt pipeline right before
607  // infer address spaces which is needed to do actual address space
608  // rewriting.
609  if (PromoteKernelArguments)
611 
612  // Add infer address spaces pass to the opt pipeline after inlining
613  // but before SROA to increase SROA opportunities.
615 
616  // This should run after inlining to have any chance of doing anything,
617  // and before other cleanup optimizations.
619 
620  // Promote alloca to vector before SROA and loop unroll. If we manage
621  // to eliminate allocas before unroll we may choose to unroll less.
622  if (EnableOpt)
624  });
625 }
626 
629 }
630 
635  if (PassName == "amdgpu-propagate-attributes-late") {
637  return true;
638  }
639  if (PassName == "amdgpu-unify-metadata") {
641  return true;
642  }
643  if (PassName == "amdgpu-printf-runtime-binding") {
645  return true;
646  }
647  if (PassName == "amdgpu-always-inline") {
649  return true;
650  }
651  if (PassName == "amdgpu-replace-lds-use-with-pointer") {
653  return true;
654  }
655  if (PassName == "amdgpu-lower-module-lds") {
657  return true;
658  }
659  return false;
660  });
664  if (PassName == "amdgpu-simplifylib") {
666  return true;
667  }
668  if (PassName == "amdgpu-usenative") {
670  return true;
671  }
672  if (PassName == "amdgpu-promote-alloca") {
673  PM.addPass(AMDGPUPromoteAllocaPass(*this));
674  return true;
675  }
676  if (PassName == "amdgpu-promote-alloca-to-vector") {
678  return true;
679  }
680  if (PassName == "amdgpu-lower-kernel-attributes") {
682  return true;
683  }
684  if (PassName == "amdgpu-propagate-attributes-early") {
686  return true;
687  }
688  if (PassName == "amdgpu-promote-kernel-arguments") {
690  return true;
691  }
692  return false;
693  });
694 
696  FAM.registerPass([&] { return AMDGPUAA(); });
697  });
698 
699  PB.registerParseAACallback([](StringRef AAName, AAManager &AAM) {
700  if (AAName == "amdgpu-aa") {
702  return true;
703  }
704  return false;
705  });
706 
715  });
716 
720  return;
721 
724 
725  if (InternalizeSymbols) {
727  }
729  if (InternalizeSymbols) {
730  PM.addPass(GlobalDCEPass());
731  }
734  });
735 
739  return;
740 
742 
743  // Add promote kernel arguments pass to the opt pipeline right before
744  // infer address spaces which is needed to do actual address space
745  // rewriting.
746  if (Level.getSpeedupLevel() > OptimizationLevel::O1.getSpeedupLevel() &&
749 
750  // Add infer address spaces pass to the opt pipeline after inlining
751  // but before SROA to increase SROA opportunities.
753 
754  // This should run after inlining to have any chance of doing
755  // anything, and before other cleanup optimizations.
757 
758  if (Level != OptimizationLevel::O0) {
759  // Promote alloca to vector before SROA and loop unroll. If we
760  // manage to eliminate allocas before unroll we may choose to unroll
761  // less.
763  }
764 
766  });
767 }
768 
769 int64_t AMDGPUTargetMachine::getNullPointerValue(unsigned AddrSpace) {
770  return (AddrSpace == AMDGPUAS::LOCAL_ADDRESS ||
771  AddrSpace == AMDGPUAS::PRIVATE_ADDRESS ||
772  AddrSpace == AMDGPUAS::REGION_ADDRESS)
773  ? -1
774  : 0;
775 }
776 
778  unsigned DestAS) const {
779  return AMDGPU::isFlatGlobalAddrSpace(SrcAS) &&
781 }
782 
784  const auto *LD = dyn_cast<LoadInst>(V);
785  if (!LD)
787 
788  // It must be a generic pointer loaded.
789  assert(V->getType()->isPointerTy() &&
791 
792  const auto *Ptr = LD->getPointerOperand();
793  if (Ptr->getType()->getPointerAddressSpace() != AMDGPUAS::CONSTANT_ADDRESS)
795  // For a generic pointer loaded from the constant memory, it could be assumed
796  // as a global pointer since the constant memory is only populated on the
797  // host side. As implied by the offload programming model, only global
798  // pointers could be referenced on the host side.
800 }
801 
802 std::pair<const Value *, unsigned>
804  if (auto *II = dyn_cast<IntrinsicInst>(V)) {
805  switch (II->getIntrinsicID()) {
806  case Intrinsic::amdgcn_is_shared:
807  return std::make_pair(II->getArgOperand(0), AMDGPUAS::LOCAL_ADDRESS);
808  case Intrinsic::amdgcn_is_private:
809  return std::make_pair(II->getArgOperand(0), AMDGPUAS::PRIVATE_ADDRESS);
810  default:
811  break;
812  }
813  return std::make_pair(nullptr, -1);
814  }
815  // Check the global pointer predication based on
816  // (!is_share(p) && !is_private(p)). Note that logic 'and' is commutative and
817  // the order of 'is_shared' and 'is_private' is not significant.
818  Value *Ptr;
819  if (match(
820  const_cast<Value *>(V),
821  m_c_And(m_Not(m_Intrinsic<Intrinsic::amdgcn_is_shared>(m_Value(Ptr))),
822  m_Not(m_Intrinsic<Intrinsic::amdgcn_is_private>(
823  m_Deferred(Ptr))))))
824  return std::make_pair(Ptr, AMDGPUAS::GLOBAL_ADDRESS);
825 
826  return std::make_pair(nullptr, -1);
827 }
828 
829 unsigned
831  switch (Kind) {
842  }
843  return AMDGPUAS::FLAT_ADDRESS;
844 }
845 
846 //===----------------------------------------------------------------------===//
847 // GCN Target Machine (SI+)
848 //===----------------------------------------------------------------------===//
849 
851  StringRef CPU, StringRef FS,
855  CodeGenOpt::Level OL, bool JIT)
856  : AMDGPUTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL) {}
857 
858 const TargetSubtargetInfo *
860  StringRef GPU = getGPUName(F);
862 
863  SmallString<128> SubtargetKey(GPU);
864  SubtargetKey.append(FS);
865 
866  auto &I = SubtargetMap[SubtargetKey];
867  if (!I) {
868  // This needs to be done before we create a new subtarget since any
869  // creation will depend on the TM and the code generation flags on the
870  // function that reside in TargetOptions.
872  I = std::make_unique<GCNSubtarget>(TargetTriple, GPU, FS, *this);
873  }
874 
875  I->setScalarizeGlobalBehavior(ScalarizeGlobal);
876 
877  return I.get();
878 }
879 
882  return TargetTransformInfo(GCNTTIImpl(this, F));
883 }
884 
885 //===----------------------------------------------------------------------===//
886 // AMDGPU Pass Setup
887 //===----------------------------------------------------------------------===//
888 
889 std::unique_ptr<CSEConfigBase> llvm::AMDGPUPassConfig::getCSEConfig() const {
891 }
892 
893 namespace {
894 
895 class GCNPassConfig final : public AMDGPUPassConfig {
896 public:
897  GCNPassConfig(LLVMTargetMachine &TM, PassManagerBase &PM)
898  : AMDGPUPassConfig(TM, PM) {
899  // It is necessary to know the register usage of the entire call graph. We
900  // allow calls without EnableAMDGPUFunctionCalls if they are marked
901  // noinline, so this is always required.
902  setRequiresCodeGenSCCOrder(true);
903  substitutePass(&PostRASchedulerID, &PostMachineSchedulerID);
904  }
905 
906  GCNTargetMachine &getGCNTargetMachine() const {
907  return getTM<GCNTargetMachine>();
908  }
909 
911  createMachineScheduler(MachineSchedContext *C) const override;
912 
914  createPostMachineScheduler(MachineSchedContext *C) const override {
916  const GCNSubtarget &ST = C->MF->getSubtarget<GCNSubtarget>();
918  if (ST.shouldClusterStores())
920  DAG->addMutation(ST.createFillMFMAShadowMutation(DAG->TII));
923  return DAG;
924  }
925 
926  bool addPreISel() override;
927  void addMachineSSAOptimization() override;
928  bool addILPOpts() override;
929  bool addInstSelector() override;
930  bool addIRTranslator() override;
931  void addPreLegalizeMachineIR() override;
932  bool addLegalizeMachineIR() override;
933  void addPreRegBankSelect() override;
934  bool addRegBankSelect() override;
935  void addPreGlobalInstructionSelect() override;
936  bool addGlobalInstructionSelect() override;
937  void addFastRegAlloc() override;
938  void addOptimizedRegAlloc() override;
939 
940  FunctionPass *createSGPRAllocPass(bool Optimized);
941  FunctionPass *createVGPRAllocPass(bool Optimized);
942  FunctionPass *createRegAllocPass(bool Optimized) override;
943 
944  bool addRegAssignAndRewriteFast() override;
945  bool addRegAssignAndRewriteOptimized() override;
946 
947  void addPreRegAlloc() override;
948  bool addPreRewrite() override;
949  void addPostRegAlloc() override;
950  void addPreSched2() override;
951  void addPreEmitPass() override;
952 };
953 
954 } // end anonymous namespace
955 
957  : TargetPassConfig(TM, PM) {
958  // Exceptions and StackMaps are not supported, so these passes will never do
959  // anything.
962  // Garbage collection is not supported.
965 }
966 
970  else
972 }
973 
978  // ReassociateGEPs exposes more opportunities for SLSR. See
979  // the example in reassociate-geps-and-slsr.ll.
981  // SeparateConstOffsetFromGEP and SLSR creates common expressions which GVN or
982  // EarlyCSE can reuse.
984  // Run NaryReassociate after EarlyCSE/GVN to be more effective.
986  // NaryReassociate on GEPs creates redundant common expressions, so run
987  // EarlyCSE after it.
989 }
990 
993 
994  // There is no reason to run these.
998 
1001 
1002  // A call to propagate attributes pass in the backend in case opt was not run.
1004 
1006 
1007  // Function calls are not supported, so make sure we inline everything.
1010  // We need to add the barrier noop pass, otherwise adding the function
1011  // inlining pass will cause all of the PassConfigs passes to be run
1012  // one function at a time, which means if we have a module with two
1013  // functions, then we will generate code for the first function
1014  // without ever running any passes on the second.
1016 
1017  // Handle uses of OpenCL image2d_t, image3d_t and sampler_t arguments.
1020 
1021  // Replace OpenCL enqueued block function pointers with global variables.
1023 
1024  // Can increase LDS used by kernel so runs before PromoteAlloca
1025  if (EnableLowerModuleLDS) {
1026  // The pass "amdgpu-replace-lds-use-with-pointer" need to be run before the
1027  // pass "amdgpu-lower-module-lds", and also it required to be run only if
1028  // "amdgpu-lower-module-lds" pass is enabled.
1031 
1033  }
1034 
1037 
1039 
1040  if (TM.getOptLevel() > CodeGenOpt::None) {
1042 
1043  if (EnableSROA)
1047 
1051  AAResults &AAR) {
1052  if (auto *WrapperPass = P.getAnalysisIfAvailable<AMDGPUAAWrapperPass>())
1053  AAR.addAAResult(WrapperPass->getResult());
1054  }));
1055  }
1056 
1058  // TODO: May want to move later or split into an early and late one.
1060  }
1061  }
1062 
1064 
1065  // EarlyCSE is not always strong enough to clean up what LSR produces. For
1066  // example, GVN can combine
1067  //
1068  // %0 = add %a, %b
1069  // %1 = add %b, %a
1070  //
1071  // and
1072  //
1073  // %0 = shl nsw %a, 2
1074  // %1 = shl %a, 2
1075  //
1076  // but EarlyCSE can do neither of them.
1079 }
1080 
1082  if (TM->getTargetTriple().getArch() == Triple::amdgcn) {
1084 
1085  // FIXME: This pass adds 2 hacky attributes that can be replaced with an
1086  // analysis, and should be removed.
1088  }
1089 
1090  if (TM->getTargetTriple().getArch() == Triple::amdgcn &&
1093 
1095 
1098 
1099  // LowerSwitch pass may introduce unreachable blocks that can
1100  // cause unexpected behavior for subsequent passes. Placing it
1101  // here seems better that these blocks would get cleaned up by
1102  // UnreachableBlockElim inserted next in the pass flow.
1104 }
1105 
1107  if (TM->getOptLevel() > CodeGenOpt::None)
1109  return false;
1110 }
1111 
1114  return false;
1115 }
1116 
1118  // Do nothing. GC is not supported.
1119  return false;
1120 }
1121 
1124  const GCNSubtarget &ST = C->MF->getSubtarget<GCNSubtarget>();
1126  DAG->addMutation(createLoadClusterDAGMutation(DAG->TII, DAG->TRI));
1127  if (ST.shouldClusterStores())
1128  DAG->addMutation(createStoreClusterDAGMutation(DAG->TII, DAG->TRI));
1129  return DAG;
1130 }
1131 
1132 //===----------------------------------------------------------------------===//
1133 // GCN Pass Setup
1134 //===----------------------------------------------------------------------===//
1135 
1136 ScheduleDAGInstrs *GCNPassConfig::createMachineScheduler(
1137  MachineSchedContext *C) const {
1138  const GCNSubtarget &ST = C->MF->getSubtarget<GCNSubtarget>();
1139  if (ST.enableSIScheduler())
1140  return createSIMachineScheduler(C);
1142 }
1143 
1144 bool GCNPassConfig::addPreISel() {
1146 
1147  if (TM->getOptLevel() > CodeGenOpt::None)
1149 
1150  if (isPassEnabled(EnableAtomicOptimizations, CodeGenOpt::Less)) {
1152  }
1153 
1154  if (TM->getOptLevel() > CodeGenOpt::None)
1155  addPass(createSinkingPass());
1156 
1157  // Merge divergent exit nodes. StructurizeCFG won't recognize the multi-exit
1158  // regions formed by them.
1160  if (!LateCFGStructurize) {
1162  addPass(createFixIrreduciblePass());
1163  addPass(createUnifyLoopExitsPass());
1164  }
1165  addPass(createStructurizeCFGPass(false)); // true -> SkipUniformRegions
1166  }
1168  if (!LateCFGStructurize) {
1170  }
1171  addPass(createLCSSAPass());
1172 
1173  if (TM->getOptLevel() > CodeGenOpt::Less)
1174  addPass(&AMDGPUPerfHintAnalysisID);
1175 
1176  return false;
1177 }
1178 
1179 void GCNPassConfig::addMachineSSAOptimization() {
1181 
1182  // We want to fold operands after PeepholeOptimizer has run (or as part of
1183  // it), because it will eliminate extra copies making it easier to fold the
1184  // real source operand. We want to eliminate dead instructions after, so that
1185  // we see fewer uses of the copies. We then need to clean up the dead
1186  // instructions leftover after the operands are folded as well.
1187  //
1188  // XXX - Can we get away without running DeadMachineInstructionElim again?
1189  addPass(&SIFoldOperandsID);
1190  if (EnableDPPCombine)
1191  addPass(&GCNDPPCombineID);
1192  addPass(&SILoadStoreOptimizerID);
1193  if (isPassEnabled(EnableSDWAPeephole)) {
1194  addPass(&SIPeepholeSDWAID);
1195  addPass(&EarlyMachineLICMID);
1196  addPass(&MachineCSEID);
1197  addPass(&SIFoldOperandsID);
1198  }
1199  addPass(&DeadMachineInstructionElimID);
1200  addPass(createSIShrinkInstructionsPass());
1201 }
1202 
1203 bool GCNPassConfig::addILPOpts() {
1205  addPass(&EarlyIfConverterID);
1206 
1208  return false;
1209 }
1210 
1211 bool GCNPassConfig::addInstSelector() {
1213  addPass(&SIFixSGPRCopiesID);
1214  addPass(createSILowerI1CopiesPass());
1215  return false;
1216 }
1217 
1218 bool GCNPassConfig::addIRTranslator() {
1219  addPass(new IRTranslator(getOptLevel()));
1220  return false;
1221 }
1222 
1223 void GCNPassConfig::addPreLegalizeMachineIR() {
1224  bool IsOptNone = getOptLevel() == CodeGenOpt::None;
1225  addPass(createAMDGPUPreLegalizeCombiner(IsOptNone));
1226  addPass(new Localizer());
1227 }
1228 
1229 bool GCNPassConfig::addLegalizeMachineIR() {
1230  addPass(new Legalizer());
1231  return false;
1232 }
1233 
1234 void GCNPassConfig::addPreRegBankSelect() {
1235  bool IsOptNone = getOptLevel() == CodeGenOpt::None;
1236  addPass(createAMDGPUPostLegalizeCombiner(IsOptNone));
1237 }
1238 
1239 bool GCNPassConfig::addRegBankSelect() {
1240  addPass(new RegBankSelect());
1241  return false;
1242 }
1243 
1244 void GCNPassConfig::addPreGlobalInstructionSelect() {
1245  bool IsOptNone = getOptLevel() == CodeGenOpt::None;
1246  addPass(createAMDGPURegBankCombiner(IsOptNone));
1247 }
1248 
1249 bool GCNPassConfig::addGlobalInstructionSelect() {
1250  addPass(new InstructionSelect(getOptLevel()));
1251  return false;
1252 }
1253 
1254 void GCNPassConfig::addPreRegAlloc() {
1255  if (LateCFGStructurize) {
1257  }
1258 }
1259 
1260 void GCNPassConfig::addFastRegAlloc() {
1261  // FIXME: We have to disable the verifier here because of PHIElimination +
1262  // TwoAddressInstructions disabling it.
1263 
1264  // This must be run immediately after phi elimination and before
1265  // TwoAddressInstructions, otherwise the processing of the tied operand of
1266  // SI_ELSE will introduce a copy of the tied operand source after the else.
1267  insertPass(&PHIEliminationID, &SILowerControlFlowID);
1268 
1271 
1273 }
1274 
1275 void GCNPassConfig::addOptimizedRegAlloc() {
1276  // Allow the scheduler to run before SIWholeQuadMode inserts exec manipulation
1277  // instructions that cause scheduling barriers.
1278  insertPass(&MachineSchedulerID, &SIWholeQuadModeID);
1280 
1281  if (OptExecMaskPreRA)
1283 
1284  if (isPassEnabled(EnablePreRAOptimizations))
1286 
1287  // This is not an essential optimization and it has a noticeable impact on
1288  // compilation time, so we only enable it from O2.
1289  if (TM->getOptLevel() > CodeGenOpt::Less)
1291 
1292  // FIXME: when an instruction has a Killed operand, and the instruction is
1293  // inside a bundle, seems only the BUNDLE instruction appears as the Kills of
1294  // the register in LiveVariables, this would trigger a failure in verifier,
1295  // we should fix it and enable the verifier.
1296  if (OptVGPRLiveRange)
1298  // This must be run immediately after phi elimination and before
1299  // TwoAddressInstructions, otherwise the processing of the tied operand of
1300  // SI_ELSE will introduce a copy of the tied operand source after the else.
1301  insertPass(&PHIEliminationID, &SILowerControlFlowID);
1302 
1303  if (EnableDCEInRA)
1305 
1307 }
1308 
1309 bool GCNPassConfig::addPreRewrite() {
1310  if (EnableRegReassign)
1311  addPass(&GCNNSAReassignID);
1312  return true;
1313 }
1314 
1315 FunctionPass *GCNPassConfig::createSGPRAllocPass(bool Optimized) {
1316  // Initialize the global default.
1317  llvm::call_once(InitializeDefaultSGPRRegisterAllocatorFlag,
1318  initializeDefaultSGPRRegisterAllocatorOnce);
1319 
1320  RegisterRegAlloc::FunctionPassCtor Ctor = SGPRRegisterRegAlloc::getDefault();
1321  if (Ctor != useDefaultRegisterAllocator)
1322  return Ctor();
1323 
1324  if (Optimized)
1325  return createGreedyRegisterAllocator(onlyAllocateSGPRs);
1326 
1327  return createFastRegisterAllocator(onlyAllocateSGPRs, false);
1328 }
1329 
1330 FunctionPass *GCNPassConfig::createVGPRAllocPass(bool Optimized) {
1331  // Initialize the global default.
1332  llvm::call_once(InitializeDefaultVGPRRegisterAllocatorFlag,
1333  initializeDefaultVGPRRegisterAllocatorOnce);
1334 
1335  RegisterRegAlloc::FunctionPassCtor Ctor = VGPRRegisterRegAlloc::getDefault();
1336  if (Ctor != useDefaultRegisterAllocator)
1337  return Ctor();
1338 
1339  if (Optimized)
1340  return createGreedyVGPRRegisterAllocator();
1341 
1342  return createFastVGPRRegisterAllocator();
1343 }
1344 
1345 FunctionPass *GCNPassConfig::createRegAllocPass(bool Optimized) {
1346  llvm_unreachable("should not be used");
1347 }
1348 
1349 static const char RegAllocOptNotSupportedMessage[] =
1350  "-regalloc not supported with amdgcn. Use -sgpr-regalloc and -vgpr-regalloc";
1351 
1352 bool GCNPassConfig::addRegAssignAndRewriteFast() {
1353  if (!usingDefaultRegAlloc())
1355 
1356  addPass(createSGPRAllocPass(false));
1357 
1358  // Equivalent of PEI for SGPRs.
1359  addPass(&SILowerSGPRSpillsID);
1360 
1361  addPass(createVGPRAllocPass(false));
1362  return true;
1363 }
1364 
1365 bool GCNPassConfig::addRegAssignAndRewriteOptimized() {
1366  if (!usingDefaultRegAlloc())
1368 
1369  addPass(createSGPRAllocPass(true));
1370 
1371  // Commit allocated register changes. This is mostly necessary because too
1372  // many things rely on the use lists of the physical registers, such as the
1373  // verifier. This is only necessary with allocators which use LiveIntervals,
1374  // since FastRegAlloc does the replacements itself.
1375  addPass(createVirtRegRewriter(false));
1376 
1377  // Equivalent of PEI for SGPRs.
1378  addPass(&SILowerSGPRSpillsID);
1379 
1380  addPass(createVGPRAllocPass(true));
1381 
1382  addPreRewrite();
1383  addPass(&VirtRegRewriterID);
1384 
1385  return true;
1386 }
1387 
1388 void GCNPassConfig::addPostRegAlloc() {
1389  addPass(&SIFixVGPRCopiesID);
1390  if (getOptLevel() > CodeGenOpt::None)
1391  addPass(&SIOptimizeExecMaskingID);
1393 }
1394 
1395 void GCNPassConfig::addPreSched2() {
1396  if (TM->getOptLevel() > CodeGenOpt::None)
1397  addPass(createSIShrinkInstructionsPass());
1398  addPass(&SIPostRABundlerID);
1399 }
1400 
1401 void GCNPassConfig::addPreEmitPass() {
1402  addPass(createSIMemoryLegalizerPass());
1403  addPass(createSIInsertWaitcntsPass());
1404 
1405  addPass(createSIModeRegisterPass());
1406 
1407  if (getOptLevel() > CodeGenOpt::None)
1408  addPass(&SIInsertHardClausesID);
1409 
1410  addPass(&SILateBranchLoweringPassID);
1411  if (isPassEnabled(EnableSetWavePriority, CodeGenOpt::Less))
1413  if (getOptLevel() > CodeGenOpt::None)
1414  addPass(&SIPreEmitPeepholeID);
1415  // The hazard recognizer that runs as part of the post-ra scheduler does not
1416  // guarantee to be able handle all hazards correctly. This is because if there
1417  // are multiple scheduling regions in a basic block, the regions are scheduled
1418  // bottom up, so when we begin to schedule a region we don't know what
1419  // instructions were emitted directly before it.
1420  //
1421  // Here we add a stand-alone hazard recognizer pass which can handle all
1422  // cases.
1423  addPass(&PostRAHazardRecognizerID);
1424 
1425  if (getOptLevel() > CodeGenOpt::Less)
1426  addPass(&AMDGPUReleaseVGPRsID);
1427 
1428  if (isPassEnabled(EnableInsertDelayAlu, CodeGenOpt::Less))
1429  addPass(&AMDGPUInsertDelayAluID);
1430 
1431  addPass(&BranchRelaxationPassID);
1432 }
1433 
1435  return new GCNPassConfig(*this, PM);
1436 }
1437 
1439  return new yaml::SIMachineFunctionInfo();
1440 }
1441 
1445  return new yaml::SIMachineFunctionInfo(
1446  *MFI, *MF.getSubtarget().getRegisterInfo(), MF);
1447 }
1448 
1451  SMDiagnostic &Error, SMRange &SourceRange) const {
1452  const yaml::SIMachineFunctionInfo &YamlMFI =
1453  static_cast<const yaml::SIMachineFunctionInfo &>(MFI_);
1454  MachineFunction &MF = PFS.MF;
1456 
1457  if (MFI->initializeBaseYamlFields(YamlMFI, MF, PFS, Error, SourceRange))
1458  return true;
1459 
1460  if (MFI->Occupancy == 0) {
1461  // Fixup the subtarget dependent default value.
1462  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
1463  MFI->Occupancy = ST.computeOccupancy(MF.getFunction(), MFI->getLDSSize());
1464  }
1465 
1466  auto parseRegister = [&](const yaml::StringValue &RegName, Register &RegVal) {
1467  Register TempReg;
1468  if (parseNamedRegisterReference(PFS, TempReg, RegName.Value, Error)) {
1469  SourceRange = RegName.SourceRange;
1470  return true;
1471  }
1472  RegVal = TempReg;
1473 
1474  return false;
1475  };
1476 
1477  auto parseOptionalRegister = [&](const yaml::StringValue &RegName,
1478  Register &RegVal) {
1479  return !RegName.Value.empty() && parseRegister(RegName, RegVal);
1480  };
1481 
1482  if (parseOptionalRegister(YamlMFI.VGPRForAGPRCopy, MFI->VGPRForAGPRCopy))
1483  return true;
1484 
1485  auto diagnoseRegisterClass = [&](const yaml::StringValue &RegName) {
1486  // Create a diagnostic for a the register string literal.
1487  const MemoryBuffer &Buffer =
1488  *PFS.SM->getMemoryBuffer(PFS.SM->getMainFileID());
1489  Error = SMDiagnostic(*PFS.SM, SMLoc(), Buffer.getBufferIdentifier(), 1,
1490  RegName.Value.size(), SourceMgr::DK_Error,
1491  "incorrect register class for field", RegName.Value,
1492  None, None);
1493  SourceRange = RegName.SourceRange;
1494  return true;
1495  };
1496 
1497  if (parseRegister(YamlMFI.ScratchRSrcReg, MFI->ScratchRSrcReg) ||
1498  parseRegister(YamlMFI.FrameOffsetReg, MFI->FrameOffsetReg) ||
1499  parseRegister(YamlMFI.StackPtrOffsetReg, MFI->StackPtrOffsetReg))
1500  return true;
1501 
1502  if (MFI->ScratchRSrcReg != AMDGPU::PRIVATE_RSRC_REG &&
1503  !AMDGPU::SGPR_128RegClass.contains(MFI->ScratchRSrcReg)) {
1504  return diagnoseRegisterClass(YamlMFI.ScratchRSrcReg);
1505  }
1506 
1507  if (MFI->FrameOffsetReg != AMDGPU::FP_REG &&
1508  !AMDGPU::SGPR_32RegClass.contains(MFI->FrameOffsetReg)) {
1509  return diagnoseRegisterClass(YamlMFI.FrameOffsetReg);
1510  }
1511 
1512  if (MFI->StackPtrOffsetReg != AMDGPU::SP_REG &&
1513  !AMDGPU::SGPR_32RegClass.contains(MFI->StackPtrOffsetReg)) {
1514  return diagnoseRegisterClass(YamlMFI.StackPtrOffsetReg);
1515  }
1516 
1517  for (const auto &YamlReg : YamlMFI.WWMReservedRegs) {
1518  Register ParsedReg;
1519  if (parseRegister(YamlReg, ParsedReg))
1520  return true;
1521 
1522  MFI->reserveWWMRegister(ParsedReg);
1523  }
1524 
1525  auto parseAndCheckArgument = [&](const Optional<yaml::SIArgument> &A,
1526  const TargetRegisterClass &RC,
1527  ArgDescriptor &Arg, unsigned UserSGPRs,
1528  unsigned SystemSGPRs) {
1529  // Skip parsing if it's not present.
1530  if (!A)
1531  return false;
1532 
1533  if (A->IsRegister) {
1534  Register Reg;
1535  if (parseNamedRegisterReference(PFS, Reg, A->RegisterName.Value, Error)) {
1536  SourceRange = A->RegisterName.SourceRange;
1537  return true;
1538  }
1539  if (!RC.contains(Reg))
1540  return diagnoseRegisterClass(A->RegisterName);
1542  } else
1543  Arg = ArgDescriptor::createStack(A->StackOffset);
1544  // Check and apply the optional mask.
1545  if (A->Mask)
1546  Arg = ArgDescriptor::createArg(Arg, *A->Mask);
1547 
1548  MFI->NumUserSGPRs += UserSGPRs;
1549  MFI->NumSystemSGPRs += SystemSGPRs;
1550  return false;
1551  };
1552 
1553  if (YamlMFI.ArgInfo &&
1554  (parseAndCheckArgument(YamlMFI.ArgInfo->PrivateSegmentBuffer,
1555  AMDGPU::SGPR_128RegClass,
1556  MFI->ArgInfo.PrivateSegmentBuffer, 4, 0) ||
1557  parseAndCheckArgument(YamlMFI.ArgInfo->DispatchPtr,
1558  AMDGPU::SReg_64RegClass, MFI->ArgInfo.DispatchPtr,
1559  2, 0) ||
1560  parseAndCheckArgument(YamlMFI.ArgInfo->QueuePtr, AMDGPU::SReg_64RegClass,
1561  MFI->ArgInfo.QueuePtr, 2, 0) ||
1562  parseAndCheckArgument(YamlMFI.ArgInfo->KernargSegmentPtr,
1563  AMDGPU::SReg_64RegClass,
1564  MFI->ArgInfo.KernargSegmentPtr, 2, 0) ||
1565  parseAndCheckArgument(YamlMFI.ArgInfo->DispatchID,
1566  AMDGPU::SReg_64RegClass, MFI->ArgInfo.DispatchID,
1567  2, 0) ||
1568  parseAndCheckArgument(YamlMFI.ArgInfo->FlatScratchInit,
1569  AMDGPU::SReg_64RegClass,
1570  MFI->ArgInfo.FlatScratchInit, 2, 0) ||
1571  parseAndCheckArgument(YamlMFI.ArgInfo->PrivateSegmentSize,
1572  AMDGPU::SGPR_32RegClass,
1573  MFI->ArgInfo.PrivateSegmentSize, 0, 0) ||
1574  parseAndCheckArgument(YamlMFI.ArgInfo->WorkGroupIDX,
1575  AMDGPU::SGPR_32RegClass, MFI->ArgInfo.WorkGroupIDX,
1576  0, 1) ||
1577  parseAndCheckArgument(YamlMFI.ArgInfo->WorkGroupIDY,
1578  AMDGPU::SGPR_32RegClass, MFI->ArgInfo.WorkGroupIDY,
1579  0, 1) ||
1580  parseAndCheckArgument(YamlMFI.ArgInfo->WorkGroupIDZ,
1581  AMDGPU::SGPR_32RegClass, MFI->ArgInfo.WorkGroupIDZ,
1582  0, 1) ||
1583  parseAndCheckArgument(YamlMFI.ArgInfo->WorkGroupInfo,
1584  AMDGPU::SGPR_32RegClass,
1585  MFI->ArgInfo.WorkGroupInfo, 0, 1) ||
1586  parseAndCheckArgument(YamlMFI.ArgInfo->PrivateSegmentWaveByteOffset,
1587  AMDGPU::SGPR_32RegClass,
1588  MFI->ArgInfo.PrivateSegmentWaveByteOffset, 0, 1) ||
1589  parseAndCheckArgument(YamlMFI.ArgInfo->ImplicitArgPtr,
1590  AMDGPU::SReg_64RegClass,
1591  MFI->ArgInfo.ImplicitArgPtr, 0, 0) ||
1592  parseAndCheckArgument(YamlMFI.ArgInfo->ImplicitBufferPtr,
1593  AMDGPU::SReg_64RegClass,
1594  MFI->ArgInfo.ImplicitBufferPtr, 2, 0) ||
1595  parseAndCheckArgument(YamlMFI.ArgInfo->WorkItemIDX,
1596  AMDGPU::VGPR_32RegClass,
1597  MFI->ArgInfo.WorkItemIDX, 0, 0) ||
1598  parseAndCheckArgument(YamlMFI.ArgInfo->WorkItemIDY,
1599  AMDGPU::VGPR_32RegClass,
1600  MFI->ArgInfo.WorkItemIDY, 0, 0) ||
1601  parseAndCheckArgument(YamlMFI.ArgInfo->WorkItemIDZ,
1602  AMDGPU::VGPR_32RegClass,
1603  MFI->ArgInfo.WorkItemIDZ, 0, 0)))
1604  return true;
1605 
1606  MFI->Mode.IEEE = YamlMFI.Mode.IEEE;
1607  MFI->Mode.DX10Clamp = YamlMFI.Mode.DX10Clamp;
1612 
1613  return false;
1614 }
llvm::AAResults::addAAResult
void addAAResult(AAResultT &AAResult)
Register a specific AA result.
Definition: AliasAnalysis.h:520
llvm::initializeR600ControlFlowFinalizerPass
void initializeR600ControlFlowFinalizerPass(PassRegistry &)
llvm::TargetPassConfig::addPostRegAlloc
virtual void addPostRegAlloc()
This method may be implemented by targets that want to run passes after register allocation pass pipe...
Definition: TargetPassConfig.h:420
EnableDCEInRA
static cl::opt< bool > EnableDCEInRA("amdgpu-dce-in-ra", cl::init(true), cl::Hidden, cl::desc("Enable machine DCE inside regalloc"))
llvm::TargetMachine::getOptLevel
CodeGenOpt::Level getOptLevel() const
Returns the optimization level: None, Less, Default, or Aggressive.
Definition: TargetMachine.cpp:182
llvm::createFastRegisterAllocator
FunctionPass * createFastRegisterAllocator()
FastRegisterAllocation Pass - This pass register allocates as fast as possible.
Definition: RegAllocFast.cpp:1579
llvm::AAManager
A manager for alias analyses.
Definition: AliasAnalysis.h:1303
llvm::AMDGPUAA
Analysis pass providing a never-invalidated alias analysis result.
Definition: AMDGPUAliasAnalysis.h:48
llvm::ArgDescriptor::createStack
static constexpr ArgDescriptor createStack(unsigned Offset, unsigned Mask=~0u)
Definition: AMDGPUArgumentUsageInfo.h:49
llvm::AMDGPUFunctionArgInfo::QueuePtr
ArgDescriptor QueuePtr
Definition: AMDGPUArgumentUsageInfo.h:126
EnableLowerModuleLDS
static cl::opt< bool, true > EnableLowerModuleLDS("amdgpu-enable-lower-module-lds", cl::desc("Enable lower module lds pass"), cl::location(AMDGPUTargetMachine::EnableLowerModuleLDS), cl::init(true), cl::Hidden)
llvm::initializeR600PacketizerPass
void initializeR600PacketizerPass(PassRegistry &)
LLVMInitializeAMDGPUTarget
LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget()
Definition: AMDGPUTargetMachine.cpp:322
llvm::createAMDGPUCtorDtorLoweringPass
ModulePass * createAMDGPUCtorDtorLoweringPass()
RegAllocOptNotSupportedMessage
static const char RegAllocOptNotSupportedMessage[]
Definition: AMDGPUTargetMachine.cpp:1349
llvm::InferAddressSpacesPass
Definition: InferAddressSpaces.h:16
EnableSIModeRegisterPass
static cl::opt< bool > EnableSIModeRegisterPass("amdgpu-mode-register", cl::desc("Enable mode register pass"), cl::init(true), cl::Hidden)
llvm::PerFunctionMIParsingState::SM
SourceMgr * SM
Definition: MIParser.h:165
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:17
PassBuilder.h
llvm::createGreedyRegisterAllocator
FunctionPass * createGreedyRegisterAllocator()
Greedy register allocation pass - This pass implements a global register allocator for optimized buil...
Definition: RegAllocGreedy.cpp:179
llvm::Attribute::isValid
bool isValid() const
Return true if the attribute is any kind of attribute.
Definition: Attributes.h:182
llvm::createAMDGPUAttributorPass
Pass * createAMDGPUAttributorPass()
Definition: AMDGPUAttributor.cpp:777
llvm::PseudoSourceValue::GlobalValueCallEntry
@ GlobalValueCallEntry
Definition: PseudoSourceValue.h:43
llvm::AMDGPUTargetMachine::registerDefaultAliasAnalyses
void registerDefaultAliasAnalyses(AAManager &) override
Allow the target to register alias analyses with the AAManager for use with the new pass manager.
Definition: AMDGPUTargetMachine.cpp:627
mustPreserveGV
static bool mustPreserveGV(const GlobalValue &GV)
Predicate for Internalize pass.
Definition: AMDGPUTargetMachine.cpp:543
llvm::createSeparateConstOffsetFromGEPPass
FunctionPass * createSeparateConstOffsetFromGEPPass(bool LowerGEP=false)
Definition: SeparateConstOffsetFromGEP.cpp:498
llvm::OptimizationLevel::O1
static const OptimizationLevel O1
Optimize quickly without destroying debuggability.
Definition: OptimizationLevel.h:57
llvm::GCNTargetMachine::convertFuncInfoToYAML
yaml::MachineFunctionInfo * convertFuncInfoToYAML(const MachineFunction &MF) const override
Allocate and initialize an instance of the YAML representation of the MachineFunctionInfo.
Definition: AMDGPUTargetMachine.cpp:1443
llvm::AMDGPULowerModuleLDSPass
Definition: AMDGPU.h:155
llvm::initializeR600ExpandSpecialInstrsPassPass
void initializeR600ExpandSpecialInstrsPassPass(PassRegistry &)
llvm::initializeAMDGPUPostLegalizerCombinerPass
void initializeAMDGPUPostLegalizerCombinerPass(PassRegistry &)
llvm::initializeAMDGPUPromoteAllocaPass
void initializeAMDGPUPromoteAllocaPass(PassRegistry &)
llvm::createSIMemoryLegalizerPass
FunctionPass * createSIMemoryLegalizerPass()
Definition: SIMemoryLegalizer.cpp:2351
llvm::SILowerSGPRSpillsID
char & SILowerSGPRSpillsID
Definition: SILowerSGPRSpills.cpp:73
llvm::Wave32
@ Wave32
Definition: AMDGPUMCTargetDesc.h:31
llvm::createAMDGPUSetWavePriorityPass
FunctionPass * createAMDGPUSetWavePriorityPass()
llvm::initializeAMDGPUInsertDelayAluPass
void initializeAMDGPUInsertDelayAluPass(PassRegistry &)
llvm::PassBuilder::registerPipelineStartEPCallback
void registerPipelineStartEPCallback(const std::function< void(ModulePassManager &, OptimizationLevel)> &C)
Register a callback for a default optimizer pipeline extension point.
Definition: PassBuilder.h:458
llvm::Type::isPointerTy
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:218
llvm::TargetOptions
Definition: TargetOptions.h:124
llvm::AMDGPUAlwaysInlinePass
Definition: AMDGPU.h:248
llvm::yaml::SIMachineFunctionInfo::ArgInfo
Optional< SIArgumentInfo > ArgInfo
Definition: SIMachineFunctionInfo.h:297
SIMachineFunctionInfo.h
Scalar.h
llvm::ArgDescriptor::createArg
static constexpr ArgDescriptor createArg(const ArgDescriptor &Arg, unsigned Mask)
Definition: AMDGPUArgumentUsageInfo.h:54
createMinRegScheduler
static ScheduleDAGInstrs * createMinRegScheduler(MachineSchedContext *C)
Definition: AMDGPUTargetMachine.cpp:431
llvm::initializeGCNPreRAOptimizationsPass
void initializeGCNPreRAOptimizationsPass(PassRegistry &)
T
llvm::ArgDescriptor
Definition: AMDGPUArgumentUsageInfo.h:23
llvm::Function
Definition: Function.h:60
llvm::cl::location
LocationClass< Ty > location(Ty &L)
Definition: CommandLine.h:447
llvm::Attribute
Definition: Attributes.h:65
llvm::AMDGPU::SIModeRegisterDefaults::FP32OutputDenormals
bool FP32OutputDenormals
Definition: AMDGPUBaseInfo.h:1012
llvm::PassManager::addPass
LLVM_ATTRIBUTE_MINSIZE std::enable_if_t<!std::is_same< PassT, PassManager >::value > addPass(PassT &&Pass)
Definition: PassManager.h:550
P
This currently compiles esp xmm0 movsd esp eax eax esp ret We should use not the dag combiner This is because dagcombine2 needs to be able to see through the X86ISD::Wrapper which DAGCombine can t really do The code for turning x load into a single vector load is target independent and should be moved to the dag combiner The code for turning x load into a vector load can only handle a direct load from a global or a direct load from the stack It should be generalized to handle any load from P
Definition: README-SSE.txt:411
llvm::initializeAMDGPUAlwaysInlinePass
void initializeAMDGPUAlwaysInlinePass(PassRegistry &)
llvm::yaml::MachineFunctionInfo
Targets should override this in a way that mirrors the implementation of llvm::MachineFunctionInfo.
Definition: MIRYamlMapping.h:676
llvm::PHIEliminationID
char & PHIEliminationID
PHIElimination - This pass eliminates machine instruction PHI nodes by inserting copy instructions.
Definition: PHIElimination.cpp:128
llvm::initializeSIInsertHardClausesPass
void initializeSIInsertHardClausesPass(PassRegistry &)
llvm::initializeAMDGPUOpenCLEnqueuedBlockLoweringPass
void initializeAMDGPUOpenCLEnqueuedBlockLoweringPass(PassRegistry &)
llvm::initializeSIPreAllocateWWMRegsPass
void initializeSIPreAllocateWWMRegsPass(PassRegistry &)
llvm::ARM_MB::LD
@ LD
Definition: ARMBaseInfo.h:72
llvm::initializeAMDGPUPropagateAttributesLatePass
void initializeAMDGPUPropagateAttributesLatePass(PassRegistry &)
InferAddressSpaces.h
llvm::AMDGPU::SIModeRegisterDefaults::IEEE
bool IEEE
Floating point opcodes that support exception flag gathering quiet and propagate signaling NaN inputs...
Definition: AMDGPUBaseInfo.h:1003
llvm::createAlwaysInlinerLegacyPass
Pass * createAlwaysInlinerLegacyPass(bool InsertLifetime=true)
Create a legacy pass manager instance of a pass to inline and remove functions marked as "always_inli...
Definition: AlwaysInliner.cpp:177
getGPUOrDefault
static LLVM_READNONE StringRef getGPUOrDefault(const Triple &TT, StringRef GPU)
Definition: AMDGPUTargetMachine.cpp:488
llvm::Target
Target - Wrapper for Target specific information.
Definition: TargetRegistry.h:145
llvm::AMDGPUPromoteAllocaToVectorPass
Definition: AMDGPU.h:233
llvm::initializeAMDGPULateCodeGenPreparePass
void initializeAMDGPULateCodeGenPreparePass(PassRegistry &)
llvm::createFixIrreduciblePass
FunctionPass * createFixIrreduciblePass()
Definition: FixIrreducible.cpp:104
llvm::MachineSchedRegistry
MachineSchedRegistry provides a selection of available machine instruction schedulers.
Definition: MachineScheduler.h:141
llvm::createVirtRegRewriter
FunctionPass * createVirtRegRewriter(bool ClearVirtRegs=true)
Definition: VirtRegMap.cpp:646
llvm::Triple::amdgcn
@ amdgcn
Definition: Triple.h:74
GCNSchedStrategy.h
llvm::GCNIterativeScheduler::SCHEDULE_ILP
@ SCHEDULE_ILP
Definition: GCNIterativeScheduler.h:37
llvm::yaml::SIMachineFunctionInfo::VGPRForAGPRCopy
StringValue VGPRForAGPRCopy
Definition: SIMachineFunctionInfo.h:300
llvm::TargetTransformInfo
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
Definition: TargetTransformInfo.h:168
llvm::Type::getPointerAddressSpace
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
Definition: DerivedTypes.h:729
llvm::createAMDGPULateCodeGenPreparePass
FunctionPass * createAMDGPULateCodeGenPreparePass()
Definition: AMDGPULateCodeGenPrepare.cpp:193
llvm::X86Disassembler::Reg
Reg
All possible values of the reg field in the ModR/M byte.
Definition: X86DisassemblerDecoder.h:462
llvm::createSILowerI1CopiesPass
FunctionPass * createSILowerI1CopiesPass()
Definition: SILowerI1Copies.cpp:413
llvm::initializeR600ClauseMergePassPass
void initializeR600ClauseMergePassPass(PassRegistry &)
llvm::GCNIterativeScheduler::SCHEDULE_LEGACYMAXOCCUPANCY
@ SCHEDULE_LEGACYMAXOCCUPANCY
Definition: GCNIterativeScheduler.h:36
llvm::createFlattenCFGPass
FunctionPass * createFlattenCFGPass()
Definition: FlattenCFGPass.cpp:81
llvm::InternalizePass
A pass that internalizes all functions and variables other than those that must be preserved accordin...
Definition: Internalize.h:35
llvm::initializeSIOptimizeExecMaskingPreRAPass
void initializeSIOptimizeExecMaskingPreRAPass(PassRegistry &)
llvm::AMDGPUFunctionArgInfo::FlatScratchInit
ArgDescriptor FlatScratchInit
Definition: AMDGPUArgumentUsageInfo.h:129
llvm::Triple
Triple - Helper class for working with autoconf configuration names.
Definition: Triple.h:44
FAM
FunctionAnalysisManager FAM
Definition: PassBuilderBindings.cpp:59
llvm::createEarlyCSEPass
FunctionPass * createEarlyCSEPass(bool UseMemorySSA=false)
Definition: EarlyCSE.cpp:1759
llvm::Wave64
@ Wave64
Definition: AMDGPUMCTargetDesc.h:31
llvm::TargetSubtargetInfo::getRegisterInfo
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
Definition: TargetSubtargetInfo.h:125
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:139
llvm::TargetRegisterInfo
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
Definition: TargetRegisterInfo.h:234
llvm::initializeSILowerI1CopiesPass
void initializeSILowerI1CopiesPass(PassRegistry &)
llvm::AMDGPUMachineFunction::getLDSSize
uint32_t getLDSSize() const
Definition: AMDGPUMachineFunction.h:74
EnableSetWavePriority
static cl::opt< bool > EnableSetWavePriority("amdgpu-set-wave-priority", cl::desc("Adjust wave priority"), cl::init(false), cl::Hidden)
llvm::SIPreEmitPeepholeID
char & SIPreEmitPeepholeID
llvm::createAMDGPUPostLegalizeCombiner
FunctionPass * createAMDGPUPostLegalizeCombiner(bool IsOptNone)
Definition: AMDGPUPostLegalizerCombiner.cpp:448
llvm::initializeAMDGPUDAGToDAGISelPass
void initializeAMDGPUDAGToDAGISelPass(PassRegistry &)
llvm::initializeSIPeepholeSDWAPass
void initializeSIPeepholeSDWAPass(PassRegistry &)
llvm::ShadowStackGCLoweringID
char & ShadowStackGCLoweringID
ShadowStackGCLowering - Implements the custom lowering mechanism used by the shadow stack GC.
Definition: ShadowStackGCLowering.cpp:91
llvm::SILowerControlFlowID
char & SILowerControlFlowID
Definition: SILowerControlFlow.cpp:174
llvm::yaml::SIMachineFunctionInfo
Definition: SIMachineFunctionInfo.h:271
llvm::SIOptimizeVGPRLiveRangeID
char & SIOptimizeVGPRLiveRangeID
Definition: SIOptimizeVGPRLiveRange.cpp:618
llvm::AMDGPUAS::UNKNOWN_ADDRESS_SPACE
@ UNKNOWN_ADDRESS_SPACE
Definition: AMDGPU.h:405
llvm::createAMDGPUUnifyMetadataPass
ModulePass * createAMDGPUUnifyMetadataPass()
InstructionSelect.h
EnableStructurizerWorkarounds
static cl::opt< bool > EnableStructurizerWorkarounds("amdgpu-enable-structurizer-workarounds", cl::desc("Enable workarounds for the StructurizeCFG pass"), cl::init(true), cl::Hidden)
llvm::AMDGPUPassConfig
Definition: AMDGPUTargetMachine.h:108
llvm::AMDGPUAAWrapperPass
Legacy wrapper pass to provide the AMDGPUAAResult object.
Definition: AMDGPUAliasAnalysis.h:62
EnableAtomicOptimizations
static cl::opt< bool > EnableAtomicOptimizations("amdgpu-atomic-optimizations", cl::desc("Enable atomic optimizations"), cl::init(false), cl::Hidden)
createGCNMaxOccupancyMachineScheduler
static ScheduleDAGInstrs * createGCNMaxOccupancyMachineScheduler(MachineSchedContext *C)
Definition: AMDGPUTargetMachine.cpp:406
llvm::Optional< Reloc::Model >
llvm::GCNScheduleDAGMILive
Definition: GCNSchedStrategy.h:74
llvm::initializeSIFoldOperandsPass
void initializeSIFoldOperandsPass(PassRegistry &)
llvm::createBarrierNoopPass
ModulePass * createBarrierNoopPass()
createBarrierNoopPass - This pass is purely a module pass barrier in a pass manager.
Definition: BarrierNoopPass.cpp:43
llvm::createAMDGPUISelDag
FunctionPass * createAMDGPUISelDag(TargetMachine *TM=nullptr, CodeGenOpt::Level OptLevel=CodeGenOpt::Default)
This pass converts a legalized DAG into a AMDGPU-specific.
Definition: AMDGPUISelDAGToDAG.cpp:114
InternalizeSymbols
static cl::opt< bool > InternalizeSymbols("amdgpu-internalize-symbols", cl::desc("Enable elimination of non-kernel functions and unused globals"), cl::init(false), cl::Hidden)
llvm::initializeGlobalISel
void initializeGlobalISel(PassRegistry &)
Initialize all passes linked into the GlobalISel library.
Definition: GlobalISel.cpp:17
llvm::AMDGPU::SIModeRegisterDefaults::FP32InputDenormals
bool FP32InputDenormals
If this is set, neither input or output denormals are flushed for most f32 instructions.
Definition: AMDGPUBaseInfo.h:1011
llvm::PassBuilder::registerAnalysisRegistrationCallback
void registerAnalysisRegistrationCallback(const std::function< void(CGSCCAnalysisManager &)> &C)
{{@ Register callbacks for analysis registration with this PassBuilder instance.
Definition: PassBuilder.h:518
llvm::GCNSubtarget
Definition: GCNSubtarget.h:31
SIMachineScheduler.h
llvm::yaml::SIMode::FP32OutputDenormals
bool FP32OutputDenormals
Definition: SIMachineFunctionInfo.h:235
llvm::createGVNPass
FunctionPass * createGVNPass(bool NoMemDepAnalysis=false)
Create a legacy GVN pass.
Definition: GVN.cpp:3247
llvm::AMDGPUReleaseVGPRsID
char & AMDGPUReleaseVGPRsID
Definition: AMDGPUReleaseVGPRs.cpp:138
llvm::createCGSCCToFunctionPassAdaptor
CGSCCToFunctionPassAdaptor createCGSCCToFunctionPassAdaptor(FunctionPassT &&Pass, bool EagerlyInvalidate=false, bool NoRerun=false)
A function to deduce a function pass type and wrap it in the templated adaptor.
Definition: CGSCCPassManager.h:509
llvm::AMDGPUFunctionArgInfo::PrivateSegmentSize
ArgDescriptor PrivateSegmentSize
Definition: AMDGPUArgumentUsageInfo.h:130
llvm::createR600OpenCLImageTypeLoweringPass
ModulePass * createR600OpenCLImageTypeLoweringPass()
Definition: R600OpenCLImageTypeLoweringPass.cpp:372
llvm::AMDGPUUseNativeCallsPass
Definition: AMDGPU.h:69
llvm::AMDGPUFunctionArgInfo::DispatchPtr
ArgDescriptor DispatchPtr
Definition: AMDGPUArgumentUsageInfo.h:125
llvm::PatternMatch::m_c_And
BinaryOp_match< LHS, RHS, Instruction::And, true > m_c_And(const LHS &L, const RHS &R)
Matches an And with LHS and RHS in either order.
Definition: PatternMatch.h:2245
llvm::initializeAMDGPUPropagateAttributesEarlyPass
void initializeAMDGPUPropagateAttributesEarlyPass(PassRegistry &)
llvm::SIPreAllocateWWMRegsID
char & SIPreAllocateWWMRegsID
Definition: SIPreAllocateWWMRegs.cpp:84
AMDGPUIGroupLP.h
llvm::initializeAMDGPUPromoteKernelArgumentsPass
void initializeAMDGPUPromoteKernelArgumentsPass(PassRegistry &)
llvm::SIPostRABundlerID
char & SIPostRABundlerID
Definition: SIPostRABundler.cpp:69
llvm::OptimizationLevel::O0
static const OptimizationLevel O0
Disable as many optimizations as possible.
Definition: OptimizationLevel.h:41
llvm::initializeSIShrinkInstructionsPass
void initializeSIShrinkInstructionsPass(PassRegistry &)
LegacyPassManager.h
llvm::TwoAddressInstructionPassID
char & TwoAddressInstructionPassID
TwoAddressInstruction - This pass reduces two-address instructions to use two operands.
Definition: TwoAddressInstructionPass.cpp:193
PassManagerBuilder.h
TRI
unsigned const TargetRegisterInfo * TRI
Definition: MachineSink.cpp:1628
llvm::cl::ReallyHidden
@ ReallyHidden
Definition: CommandLine.h:140
llvm::GCNTargetMachine::parseMachineFunctionInfo
bool parseMachineFunctionInfo(const yaml::MachineFunctionInfo &, PerFunctionMIParsingState &PFS, SMDiagnostic &Error, SMRange &SourceRange) const override
Parse out the target's MachineFunctionInfo from the YAML reprsentation.
Definition: AMDGPUTargetMachine.cpp:1449
llvm::initializeAMDGPUSimplifyLibCallsPass
void initializeAMDGPUSimplifyLibCallsPass(PassRegistry &)
Internalize.h
createSIMachineScheduler
static ScheduleDAGInstrs * createSIMachineScheduler(MachineSchedContext *C)
Definition: AMDGPUTargetMachine.cpp:401
llvm::PatternMatch::m_Deferred
deferredval_ty< Value > m_Deferred(Value *const &V)
Like m_Specific(), but works if the specific value to match is determined as part of the same match()...
Definition: PatternMatch.h:784
llvm::MemoryBuffer
This interface provides simple read-only access to a block of memory, and provides simple methods for...
Definition: MemoryBuffer.h:50
llvm::AMDGPUMachineFunction::Mode
AMDGPU::SIModeRegisterDefaults Mode
Definition: AMDGPUMachineFunction.h:48
llvm::AMDGPUPassConfig::addGCPasses
bool addGCPasses() override
addGCPasses - Add late codegen passes that analyze code for garbage collection.
Definition: AMDGPUTargetMachine.cpp:1117
F
#define F(x, y, z)
Definition: MD5.cpp:55
EnableInsertDelayAlu
static cl::opt< bool > EnableInsertDelayAlu("amdgpu-enable-delay-alu", cl::desc("Enable s_delay_alu insertion"), cl::init(true), cl::Hidden)
llvm::createAMDGPUExternalAAWrapperPass
ImmutablePass * createAMDGPUExternalAAWrapperPass()
Definition: AMDGPUAliasAnalysis.cpp:37
llvm::AMDGPUFunctionArgInfo::DispatchID
ArgDescriptor DispatchID
Definition: AMDGPUArgumentUsageInfo.h:128
llvm::PseudoSourceValue::JumpTable
@ JumpTable
Definition: PseudoSourceValue.h:40
llvm::initializeAMDGPULowerIntrinsicsPass
void initializeAMDGPULowerIntrinsicsPass(PassRegistry &)
llvm::initializeGCNDPPCombinePass
void initializeGCNDPPCombinePass(PassRegistry &)
llvm::AMDGPUUnifyMetadataPass
Definition: AMDGPU.h:274
llvm::AMDGPUFunctionArgInfo::ImplicitArgPtr
ArgDescriptor ImplicitArgPtr
Definition: AMDGPUArgumentUsageInfo.h:141
EnableSDWAPeephole
static cl::opt< bool > EnableSDWAPeephole("amdgpu-sdwa-peephole", cl::desc("Enable SDWA peepholer"), cl::init(true))
llvm::Reloc::Model
Model
Definition: CodeGen.h:22
CSEInfo.h
FunctionPassCtor
llvm::SIOptimizeExecMaskingID
char & SIOptimizeExecMaskingID
Definition: SIOptimizeExecMasking.cpp:53
Arg
amdgpu Simplify well known AMD library false FunctionCallee Value * Arg
Definition: AMDGPULibCalls.cpp:186
llvm::initializeAMDGPUUnifyMetadataPass
void initializeAMDGPUUnifyMetadataPass(PassRegistry &)
llvm::yaml::SIMachineFunctionInfo::FrameOffsetReg
StringValue FrameOffsetReg
Definition: SIMachineFunctionInfo.h:291
llvm::initializeAMDGPUArgumentUsageInfoPass
void initializeAMDGPUArgumentUsageInfoPass(PassRegistry &)
R600.h
llvm::AMDGPUPassConfig::addIRPasses
void addIRPasses() override
Add common target configurable passes that perform LLVM IR to IR transforms following machine indepen...
Definition: AMDGPUTargetMachine.cpp:991
SISchedRegistry
static MachineSchedRegistry SISchedRegistry("si", "Run SI's custom scheduler", createSIMachineScheduler)
GCNIterativeScheduler.h
llvm::AMDGPUFunctionArgInfo::WorkGroupIDX
ArgDescriptor WorkGroupIDX
Definition: AMDGPUArgumentUsageInfo.h:133
llvm::GCNTargetMachine::GCNTargetMachine
GCNTargetMachine(const Target &T, const Triple &TT, StringRef CPU, StringRef FS, TargetOptions Options, Optional< Reloc::Model > RM, Optional< CodeModel::Model > CM, CodeGenOpt::Level OL, bool JIT)
Definition: AMDGPUTargetMachine.cpp:850
llvm::createInferAddressSpacesPass
FunctionPass * createInferAddressSpacesPass(unsigned AddressSpace=~0u)
Definition: InferAddressSpaces.cpp:1307
llvm::initializeSILateBranchLoweringPass
void initializeSILateBranchLoweringPass(PassRegistry &)
llvm::TargetPassConfig::TM
LLVMTargetMachine * TM
Definition: TargetPassConfig.h:122
AMDGPUAliasAnalysis.h
llvm::AMDGPUTargetMachine
Definition: AMDGPUTargetMachine.h:28
llvm::MSP430Attrs::CodeModel
CodeModel
Definition: MSP430Attributes.h:37
llvm::createAMDGPUUseNativeCallsPass
FunctionPass * createAMDGPUUseNativeCallsPass()
Definition: AMDGPULibCalls.cpp:1663
llvm::PassRegistry::getPassRegistry
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Definition: PassRegistry.cpp:31
llvm::SMLoc
Represents a location in source code.
Definition: SMLoc.h:23
AlwaysInliner.h
llvm::PatternMatch::match
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
llvm::AAResults
Definition: AliasAnalysis.h:511
llvm::yaml::SIMode::FP32InputDenormals
bool FP32InputDenormals
Definition: SIMachineFunctionInfo.h:234
llvm::PassBuilder::registerParseAACallback
void registerParseAACallback(const std::function< bool(StringRef Name, AAManager &AA)> &C)
Register a callback for parsing an AliasAnalysis Name to populate the given AAManager AA.
Definition: PassBuilder.h:510
ScalarizeGlobal
static cl::opt< bool > ScalarizeGlobal("amdgpu-scalarize-global-loads", cl::desc("Enable global load scalarization"), cl::init(true), cl::Hidden)
llvm::createNaryReassociatePass
FunctionPass * createNaryReassociatePass()
Definition: NaryReassociate.cpp:165
llvm::PostRAHazardRecognizerID
char & PostRAHazardRecognizerID
PostRAHazardRecognizer - This pass runs the post-ra hazard recognizer.
Definition: PostRAHazardRecognizer.cpp:61
llvm::MachineFunction::getInfo
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Definition: MachineFunction.h:754
llvm::initializeAMDGPULowerKernelArgumentsPass
void initializeAMDGPULowerKernelArgumentsPass(PassRegistry &)
llvm::initializeSIWholeQuadModePass
void initializeSIWholeQuadModePass(PassRegistry &)
llvm::initializeAMDGPUAtomicOptimizerPass
void initializeAMDGPUAtomicOptimizerPass(PassRegistry &)
llvm::getTheAMDGPUTarget
Target & getTheAMDGPUTarget()
The target which supports all AMD GPUs.
Definition: AMDGPUTargetInfo.cpp:20
llvm::Legalizer
Definition: Legalizer.h:36
llvm::AMDGPUFunctionArgInfo::WorkItemIDX
ArgDescriptor WorkItemIDX
Definition: AMDGPUArgumentUsageInfo.h:148
C
(vector float) vec_cmpeq(*A, *B) C
Definition: README_ALTIVEC.txt:86
EnableAMDGPUAliasAnalysis
static cl::opt< bool > EnableAMDGPUAliasAnalysis("enable-amdgpu-aa", cl::Hidden, cl::desc("Enable AMDGPU Alias Analysis"), cl::init(true))
EnableLowerKernelArguments
static cl::opt< bool > EnableLowerKernelArguments("amdgpu-ir-lower-kernel-arguments", cl::desc("Lower kernel argument loads in IR pass"), cl::init(true), cl::Hidden)
EnableLoadStoreVectorizer
static cl::opt< bool > EnableLoadStoreVectorizer("amdgpu-load-store-vectorizer", cl::desc("Enable load store vectorizer"), cl::init(true), cl::Hidden)
AMDGPUTargetInfo.h
llvm::createAMDGPULowerModuleLDSPass
ModulePass * createAMDGPULowerModuleLDSPass()
R600TargetMachine.h
llvm::FuncletLayoutID
char & FuncletLayoutID
This pass lays out funclets contiguously.
Definition: FuncletLayout.cpp:39
AMDGPUMacroFusion.h
llvm::initializeAMDGPUUseNativeCallsPass
void initializeAMDGPUUseNativeCallsPass(PassRegistry &)
llvm::createSIInsertWaitcntsPass
FunctionPass * createSIInsertWaitcntsPass()
Definition: SIInsertWaitcnts.cpp:826
Y
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
EnableLDSReplaceWithPointer
static cl::opt< bool > EnableLDSReplaceWithPointer("amdgpu-enable-lds-replace-with-pointer", cl::desc("Enable LDS replace with pointer pass"), cl::init(false), cl::Hidden)
llvm::PassBuilder
This class provides access to building LLVM's passes.
Definition: PassBuilder.h:94
EnableRegReassign
static cl::opt< bool > EnableRegReassign("amdgpu-reassign-regs", cl::desc("Enable register reassign optimizations on gfx10+"), cl::init(true), cl::Hidden)
llvm::TargetRegisterClass
Definition: TargetRegisterInfo.h:45
llvm::SMDiagnostic
Instances of this class encapsulate one diagnostic report, allowing printing to a raw_ostream as a ca...
Definition: SourceMgr.h:281
llvm::yaml::SIMode::FP64FP16InputDenormals
bool FP64FP16InputDenormals
Definition: SIMachineFunctionInfo.h:236
llvm::createAMDGPUAnnotateUniformValues
FunctionPass * createAMDGPUAnnotateUniformValues()
Definition: AMDGPUAnnotateUniformValues.cpp:122
llvm::initializeAMDGPUUnifyDivergentExitNodesPass
void initializeAMDGPUUnifyDivergentExitNodesPass(PassRegistry &)
llvm::EarlyIfConverterID
char & EarlyIfConverterID
EarlyIfConverter - This pass performs if-conversion on SSA form by inserting cmov instructions.
Definition: EarlyIfConversion.cpp:782
useDefaultRegisterAllocator
static FunctionPass * useDefaultRegisterAllocator()
-regalloc=... command line option.
Definition: TargetPassConfig.cpp:1125
llvm::AMDGPUPromoteAllocaPass
Definition: AMDGPU.h:225
llvm::createGenericSchedPostRA
ScheduleDAGMI * createGenericSchedPostRA(MachineSchedContext *C)
Create a generic scheduler with no vreg liveness or DAG mutation passes.
Definition: MachineScheduler.cpp:3646
llvm::CodeModel::Small
@ Small
Definition: CodeGen.h:28
llvm::createAtomicExpandPass
FunctionPass * createAtomicExpandPass()
AtomicExpandPass - At IR level this pass replace atomic instructions with __atomic_* library calls,...
llvm::AMDGPUAS::LOCAL_ADDRESS
@ LOCAL_ADDRESS
Address space for local memory.
Definition: AMDGPU.h:369
llvm::InstructionSelect
This pass is responsible for selecting generic machine instructions to target-specific instructions.
Definition: InstructionSelect.h:33
llvm::AMDGPUTargetMachine::getNullPointerValue
static int64_t getNullPointerValue(unsigned AddrSpace)
Get the integer value of a null pointer in the given address space.
Definition: AMDGPUTargetMachine.cpp:769
llvm::RegisterTargetMachine
RegisterTargetMachine - Helper template for registering a target machine implementation,...
Definition: TargetRegistry.h:1318
llvm::ScheduleDAGMI::addMutation
void addMutation(std::unique_ptr< ScheduleDAGMutation > Mutation)
Add a postprocessing step to the DAG builder.
Definition: MachineScheduler.h:323
llvm::PassRegistry
PassRegistry - This class manages the registration and intitialization of the pass subsystem as appli...
Definition: PassRegistry.h:38
llvm::Triple::r600
@ r600
Definition: Triple.h:73
llvm::report_fatal_error
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:143
Options
const char LLVMTargetMachineRef LLVMPassBuilderOptionsRef Options
Definition: PassBuilderBindings.cpp:48
llvm::createUnifyLoopExitsPass
FunctionPass * createUnifyLoopExitsPass()
Definition: UnifyLoopExits.cpp:55
llvm::GCNIterativeScheduler
Definition: GCNIterativeScheduler.h:29
createTLOF
static std::unique_ptr< TargetLoweringObjectFile > createTLOF(const Triple &TT)
Definition: AMDGPUTargetMachine.cpp:397
llvm::PseudoSourceValue::FixedStack
@ FixedStack
Definition: PseudoSourceValue.h:42
llvm::SourceMgr::getMainFileID
unsigned getMainFileID() const
Definition: SourceMgr.h:132
AMDGPUTargetObjectFile.h
llvm::AMDGPULowerKernelAttributesPass
Definition: AMDGPU.h:115
llvm::AMDGPUTargetMachine::getAddressSpaceForPseudoSourceKind
unsigned getAddressSpaceForPseudoSourceKind(unsigned Kind) const override
getAddressSpaceForPseudoSourceKind - Given the kind of memory (e.g.
Definition: AMDGPUTargetMachine.cpp:830
GVN.h
llvm::createAMDGPUPropagateAttributesLatePass
ModulePass * createAMDGPUPropagateAttributesLatePass(const TargetMachine *)
Definition: AMDGPUPropagateAttributes.cpp:406
llvm::initializeSIMemoryLegalizerPass
void initializeSIMemoryLegalizerPass(PassRegistry &)
llvm::createLoadStoreVectorizerPass
Pass * createLoadStoreVectorizerPass()
Create a legacy pass manager instance of the LoadStoreVectorizer pass.
llvm::initializeAMDGPUResourceUsageAnalysisPass
void initializeAMDGPUResourceUsageAnalysisPass(PassRegistry &)
EnableDPPCombine
static cl::opt< bool > EnableDPPCombine("amdgpu-dpp-combine", cl::desc("Enable DPP combiner"), cl::init(true))
llvm::createAMDGPULowerIntrinsicsPass
ModulePass * createAMDGPULowerIntrinsicsPass()
Definition: AMDGPULowerIntrinsics.cpp:175
llvm::AMDGPUPassConfig::addCodeGenPrepare
void addCodeGenPrepare() override
Add pass to prepare the LLVM IR for code generation.
Definition: AMDGPUTargetMachine.cpp:1081
llvm::SIRegisterInfo
Definition: SIRegisterInfo.h:30
llvm::StackMapLivenessID
char & StackMapLivenessID
StackMapLiveness - This pass analyses the register live-out set of stackmap/patchpoint intrinsics and...
Definition: StackMapLivenessAnalysis.cpp:86
llvm::createAMDGPUAnnotateKernelFeaturesPass
Pass * createAMDGPUAnnotateKernelFeaturesPass()
Definition: AMDGPUAnnotateKernelFeatures.cpp:137
llvm::initializeAMDGPUReplaceLDSUseWithPointerPass
void initializeAMDGPUReplaceLDSUseWithPointerPass(PassRegistry &)
PatternMatch.h
llvm::AMDGPUTargetMachine::~AMDGPUTargetMachine
~AMDGPUTargetMachine() override
llvm::AMDGPUTargetMachine::getSubtargetImpl
const TargetSubtargetInfo * getSubtargetImpl() const
llvm::createSinkingPass
FunctionPass * createSinkingPass()
Definition: Sink.cpp:279
llvm::Triple::getArch
ArchType getArch() const
Get the parsed architecture type of this triple.
Definition: Triple.h:345
llvm::createSpeculativeExecutionPass
FunctionPass * createSpeculativeExecutionPass()
Definition: SpeculativeExecution.cpp:325
Utils.h
llvm::SILoadStoreOptimizerID
char & SILoadStoreOptimizerID
Definition: SILoadStoreOptimizer.cpp:734
llvm::Attribute::getValueAsString
StringRef getValueAsString() const
Return the attribute's value as a string.
Definition: Attributes.cpp:304
llvm::RegisterPassParser
RegisterPassParser class - Handle the addition of new machine passes.
Definition: MachinePassRegistry.h:135
llvm::None
const NoneType None
Definition: None.h:24
llvm::PseudoSourceValue::TargetCustom
@ TargetCustom
Definition: PseudoSourceValue.h:45
llvm::lltok::Kind
Kind
Definition: LLToken.h:18
llvm::Value::use_empty
bool use_empty() const
Definition: Value.h:344
llvm::createAMDGPUExportClusteringDAGMutation
std::unique_ptr< ScheduleDAGMutation > createAMDGPUExportClusteringDAGMutation()
Definition: AMDGPUExportClustering.cpp:144
llvm::initializeSIOptimizeVGPRLiveRangePass
void initializeSIOptimizeVGPRLiveRangePass(PassRegistry &)
X
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
llvm::TargetMachine::resetTargetOptions
void resetTargetOptions(const Function &F) const
Reset the target options based on the function's attributes.
Definition: TargetMachine.cpp:53
llvm::AMDGPU::isEntryFunctionCC
bool isEntryFunctionCC(CallingConv::ID CC)
Definition: AMDGPUBaseInfo.cpp:1653
llvm::SmallString< 128 >
llvm::SourceMgr::getMemoryBuffer
const MemoryBuffer * getMemoryBuffer(unsigned i) const
Definition: SourceMgr.h:125
llvm::createFunctionInliningPass
Pass * createFunctionInliningPass()
createFunctionInliningPass - Return a new pass object that uses a heuristic to inline direct function...
Definition: InlineSimple.cpp:91
llvm::legacy::PassManagerBase::add
virtual void add(Pass *P)=0
Add a pass to the queue of passes to run.
llvm::MemoryBuffer::getBufferIdentifier
virtual StringRef getBufferIdentifier() const
Return an identifier for this buffer, typically the filename it was read from.
Definition: MemoryBuffer.h:75
llvm::createAMDGPUAAWrapperPass
ImmutablePass * createAMDGPUAAWrapperPass()
Definition: AMDGPUAliasAnalysis.cpp:33
llvm::PassManagerBuilder
PassManagerBuilder - This class is used to set up a standard optimization sequence for languages like...
Definition: PassManagerBuilder.h:57
llvm::createLowerSwitchPass
FunctionPass * createLowerSwitchPass()
Definition: LowerSwitch.cpp:587
llvm::createAMDGPUPrintfRuntimeBinding
ModulePass * createAMDGPUPrintfRuntimeBinding()
Definition: AMDGPUPrintfRuntimeBinding.cpp:93
AMDGPUTargetTransformInfo.h
llvm::AMDGPUPassConfig::addInstSelector
bool addInstSelector() override
addInstSelector - This method should install an instruction selector pass, which converts from LLVM c...
Definition: AMDGPUTargetMachine.cpp:1112
PB
PassBuilder PB(Machine, PassOpts->PTO, None, &PIC)
Passes.h
llvm::Triple::AMDHSA
@ AMDHSA
Definition: Triple.h:207
llvm::VirtRegRewriterID
char & VirtRegRewriterID
VirtRegRewriter pass.
Definition: VirtRegMap.cpp:227
llvm::createAMDGPUAlwaysInlinePass
ModulePass * createAMDGPUAlwaysInlinePass(bool GlobalOpt=true)
Definition: AMDGPUAlwaysInlinePass.cpp:163
llvm::TargetPassConfig
Target-Independent Code Generator Pass Configuration Options.
Definition: TargetPassConfig.h:84
llvm::SmallString::append
void append(StringRef RHS)
Append from a StringRef.
Definition: SmallString.h:68
llvm::initializeSILowerSGPRSpillsPass
void initializeSILowerSGPRSpillsPass(PassRegistry &)
llvm::PseudoSourceValue::ExternalSymbolCallEntry
@ ExternalSymbolCallEntry
Definition: PseudoSourceValue.h:44
llvm::MachineFunction::getSubtarget
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Definition: MachineFunction.h:656
llvm::PassBuilder::registerPipelineEarlySimplificationEPCallback
void registerPipelineEarlySimplificationEPCallback(const std::function< void(ModulePassManager &, OptimizationLevel)> &C)
Register a callback for a default optimizer pipeline extension point.
Definition: PassBuilder.h:467
llvm::AMDGPUTargetMachine::getFeatureString
StringRef getFeatureString(const Function &F) const
Definition: AMDGPUTargetMachine.cpp:535
OptVGPRLiveRange
static cl::opt< bool > OptVGPRLiveRange("amdgpu-opt-vgpr-liverange", cl::desc("Enable VGPR liverange optimizations for if-else structure"), cl::init(true), cl::Hidden)
llvm::cl::opt
Definition: CommandLine.h:1392
llvm::createLCSSAPass
Pass * createLCSSAPass()
Definition: LCSSA.cpp:485
llvm::createModuleToFunctionPassAdaptor
ModuleToFunctionPassAdaptor createModuleToFunctionPassAdaptor(FunctionPassT &&Pass, bool EagerlyInvalidate=false)
A function to deduce a function pass type and wrap it in the templated adaptor.
Definition: PassManager.h:1224
llvm::TargetMachine::TargetTriple
Triple TargetTriple
Triple string, CPU name, and target feature strings the TargetMachine instance is created with.
Definition: TargetMachine.h:96
OptExecMaskPreRA
static cl::opt< bool > OptExecMaskPreRA("amdgpu-opt-exec-mask-pre-ra", cl::Hidden, cl::desc("Run pre-RA exec mask optimizations"), cl::init(true))
llvm::GCLoweringID
char & GCLoweringID
GCLowering Pass - Used by gc.root to perform its default lowering operations.
Definition: GCRootLowering.cpp:85
llvm::yaml::SIMachineFunctionInfo::ScratchRSrcReg
StringValue ScratchRSrcReg
Definition: SIMachineFunctionInfo.h:290
llvm::GlobalValue
Definition: GlobalValue.h:44
llvm::AMDGPUUnifyDivergentExitNodesID
char & AMDGPUUnifyDivergentExitNodesID
Definition: AMDGPUUnifyDivergentExitNodes.cpp:79
llvm::StringRef::empty
constexpr LLVM_NODISCARD bool empty() const
empty - Check if the string is empty.
Definition: StringRef.h:153
llvm::initializeSIInsertWaitcntsPass
void initializeSIInsertWaitcntsPass(PassRegistry &)
D
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
llvm::initializeSIAnnotateControlFlowPass
void initializeSIAnnotateControlFlowPass(PassRegistry &)
llvm::createGenericSchedLive
ScheduleDAGMILive * createGenericSchedLive(MachineSchedContext *C)
Create the standard converging machine scheduler.
Definition: MachineScheduler.cpp:3489
llvm::AMDGPUFunctionArgInfo::WorkGroupIDZ
ArgDescriptor WorkGroupIDZ
Definition: AMDGPUArgumentUsageInfo.h:135
llvm::RegisterRegAllocBase< RegisterRegAlloc >::FunctionPassCtor
FunctionPass *(*)() FunctionPassCtor
Definition: RegAllocRegistry.h:32
llvm::EngineKind::JIT
@ JIT
Definition: ExecutionEngine.h:524
LLVM_EXTERNAL_VISIBILITY
#define LLVM_EXTERNAL_VISIBILITY
Definition: Compiler.h:126
llvm::DetectDeadLanesID
char & DetectDeadLanesID
This pass adds dead/undef flags after analyzing subregister lanes.
Definition: DetectDeadLanes.cpp:125
llvm::ARM_MB::ST
@ ST
Definition: ARMBaseInfo.h:73
llvm::TargetMachine::getMCSubtargetInfo
const MCSubtargetInfo * getMCSubtargetInfo() const
Definition: TargetMachine.h:209
llvm::AMDGPUFunctionArgInfo::PrivateSegmentBuffer
ArgDescriptor PrivateSegmentBuffer
Definition: AMDGPUArgumentUsageInfo.h:124
llvm::SIMachineFunctionInfo::reserveWWMRegister
void reserveWWMRegister(Register Reg)
Definition: SIMachineFunctionInfo.h:548
llvm::createSchedBarrierDAGMutation
std::unique_ptr< ScheduleDAGMutation > createSchedBarrierDAGMutation()
Definition: AMDGPUIGroupLP.cpp:435
llvm::createAMDGPUAtomicOptimizerPass
FunctionPass * createAMDGPUAtomicOptimizerPass()
Definition: AMDGPUAtomicOptimizer.cpp:713
llvm::initializeR600VectorRegMergerPass
void initializeR600VectorRegMergerPass(PassRegistry &)
IPO.h
llvm::SIPeepholeSDWAID
char & SIPeepholeSDWAID
Definition: SIPeepholeSDWA.cpp:191
llvm::SIMachineFunctionInfo::initializeBaseYamlFields
bool initializeBaseYamlFields(const yaml::SIMachineFunctionInfo &YamlMFI, const MachineFunction &MF, PerFunctionMIParsingState &PFS, SMDiagnostic &Error, SMRange &SourceRange)
Definition: SIMachineFunctionInfo.cpp:616
llvm::createGlobalDCEPass
ModulePass * createGlobalDCEPass()
createGlobalDCEPass - This transform is designed to eliminate unreachable internal globals (functions...
move
compiles ldr LCPI1_0 ldr ldr mov lsr tst moveq r1 ldr LCPI1_1 and r0 bx lr It would be better to do something like to fold the shift into the conditional move
Definition: README.txt:546
llvm::GCNTTIImpl
Definition: AMDGPUTargetTransformInfo.h:59
llvm::SIFixVGPRCopiesID
char & SIFixVGPRCopiesID
Definition: SIFixVGPRCopies.cpp:45
llvm::initializeAMDGPURewriteOutArgumentsPass
void initializeAMDGPURewriteOutArgumentsPass(PassRegistry &)
CGSCCPassManager.h
llvm::MachineSchedContext
MachineSchedContext provides enough context from the MachineScheduler pass for the target to instanti...
Definition: MachineScheduler.h:125
llvm::AMDGPUAS::CONSTANT_ADDRESS
@ CONSTANT_ADDRESS
Address space for constant memory (VTX2).
Definition: AMDGPU.h:368
llvm::GCNIterativeScheduler::SCHEDULE_MINREGFORCED
@ SCHEDULE_MINREGFORCED
Definition: GCNIterativeScheduler.h:35
I
#define I(x, y, z)
Definition: MD5.cpp:58
llvm::AMDGPUSimplifyLibCallsPass
Definition: AMDGPU.h:61
llvm::AMDGPUPassConfig::createMachineScheduler
ScheduleDAGInstrs * createMachineScheduler(MachineSchedContext *C) const override
Create an instance of ScheduleDAGInstrs to be run within the standard MachineScheduler pass for this ...
Definition: AMDGPUTargetMachine.cpp:1123
llvm::TargetPassConfig::addIRPasses
virtual void addIRPasses()
Add common target configurable passes that perform LLVM IR to IR transforms following machine indepen...
Definition: TargetPassConfig.cpp:853
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:432
llvm::TargetPassConfig::addOptimizedRegAlloc
virtual void addOptimizedRegAlloc()
addOptimizedRegAlloc - Add passes related to register allocation.
Definition: TargetPassConfig.cpp:1460
llvm::AMDGPUFunctionArgInfo::PrivateSegmentWaveByteOffset
ArgDescriptor PrivateSegmentWaveByteOffset
Definition: AMDGPUArgumentUsageInfo.h:137
llvm::SIFormMemoryClausesID
char & SIFormMemoryClausesID
Definition: SIFormMemoryClauses.cpp:91
llvm::LiveVariablesID
char & LiveVariablesID
LiveVariables pass - This pass computes the set of blocks in which each variable is life and sets mac...
Definition: LiveVariables.cpp:45
LateCFGStructurize
static cl::opt< bool, true > LateCFGStructurize("amdgpu-late-structurize", cl::desc("Enable late CFG structurization"), cl::location(AMDGPUTargetMachine::EnableLateStructurizeCFG), cl::Hidden)
TargetPassConfig.h
llvm::yaml::SIMachineFunctionInfo::WWMReservedRegs
SmallVector< StringValue > WWMReservedRegs
Definition: SIMachineFunctionInfo.h:288
llvm::createExternalAAWrapperPass
ImmutablePass * createExternalAAWrapperPass(std::function< void(Pass &, Function &, AAResults &)> Callback)
A wrapper pass around a callback which can be used to populate the AAResults in the AAResultsWrapperP...
llvm::SIFixSGPRCopiesID
char & SIFixSGPRCopiesID
Definition: SIFixSGPRCopies.cpp:121
llvm::AMDGPUFunctionArgInfo::WorkGroupIDY
ArgDescriptor WorkGroupIDY
Definition: AMDGPUArgumentUsageInfo.h:134
Localizer.h
llvm::PseudoSourceValue::ConstantPool
@ ConstantPool
Definition: PseudoSourceValue.h:41
llvm::MachineCSEID
char & MachineCSEID
MachineCSE - This pass performs global CSE on machine instructions.
Definition: MachineCSE.cpp:157
llvm::GCNDPPCombineID
char & GCNDPPCombineID
Definition: GCNDPPCombine.cpp:111
llvm::TargetPassConfig::addCodeGenPrepare
virtual void addCodeGenPrepare()
Add pass to prepare the LLVM IR for code generation.
Definition: TargetPassConfig.cpp:995
llvm::AMDGPU::SIModeRegisterDefaults::DX10Clamp
bool DX10Clamp
Used by the vector ALU to force DX10-style treatment of NaNs: when set, clamp NaN to zero; otherwise,...
Definition: AMDGPUBaseInfo.h:1007
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::SIInsertHardClausesID
char & SIInsertHardClausesID
Definition: SIInsertHardClauses.cpp:273
GCNMinRegSchedRegistry
static MachineSchedRegistry GCNMinRegSchedRegistry("gcn-minreg", "Run GCN iterative scheduler for minimal register usage (experimental)", createMinRegScheduler)
llvm::X86AS::FS
@ FS
Definition: X86.h:192
llvm::AMDGPUPassConfig::addStraightLineScalarOptimizationPasses
void addStraightLineScalarOptimizationPasses()
Definition: AMDGPUTargetMachine.cpp:974
llvm::AMDGPU::isFlatGlobalAddrSpace
bool isFlatGlobalAddrSpace(unsigned AS)
Definition: AMDGPU.h:412
llvm::AMDGPU::SIModeRegisterDefaults::FP64FP16InputDenormals
bool FP64FP16InputDenormals
If this is set, neither input or output denormals are flushed for both f64 and f16/v2f16 instructions...
Definition: AMDGPUBaseInfo.h:1016
llvm::AMDGPUTargetMachine::getPredicatedAddrSpace
std::pair< const Value *, unsigned > getPredicatedAddrSpace(const Value *V) const override
If the specified predicate checks whether a generic pointer falls within a specified address space,...
Definition: AMDGPUTargetMachine.cpp:803
llvm::getTheGCNTarget
Target & getTheGCNTarget()
The target for GCN GPUs.
Definition: AMDGPUTargetInfo.cpp:25
llvm::AMDGPUPassConfig::getAMDGPUTargetMachine
AMDGPUTargetMachine & getAMDGPUTargetMachine() const
Definition: AMDGPUTargetMachine.h:112
llvm::initializeSIOptimizeExecMaskingPass
void initializeSIOptimizeExecMaskingPass(PassRegistry &)
llvm::initializeSIPostRABundlerPass
void initializeSIPostRABundlerPass(PassRegistry &)
llvm::SIScheduleDAGMI
Definition: SIMachineScheduler.h:425
llvm::PassBuilder::registerPipelineParsingCallback
void registerPipelineParsingCallback(const std::function< bool(StringRef Name, CGSCCPassManager &, ArrayRef< PipelineElement >)> &C)
{{@ Register pipeline parsing callbacks with this pass builder instance.
Definition: PassBuilder.h:540
llvm::initializeAMDGPUAAWrapperPassPass
void initializeAMDGPUAAWrapperPassPass(PassRegistry &)
llvm::ScheduleDAGMI
ScheduleDAGMI is an implementation of ScheduleDAGInstrs that simply schedules machine instructions ac...
Definition: MachineScheduler.h:271
llvm::initializeAMDGPUCodeGenPreparePass
void initializeAMDGPUCodeGenPreparePass(PassRegistry &)
llvm::AMDGPUPassConfig::AMDGPUPassConfig
AMDGPUPassConfig(LLVMTargetMachine &TM, PassManagerBase &PM)
Definition: AMDGPUTargetMachine.cpp:956
llvm::createAMDGPUOpenCLEnqueuedBlockLoweringPass
ModulePass * createAMDGPUOpenCLEnqueuedBlockLoweringPass()
llvm::initializeGCNNSAReassignPass
void initializeGCNNSAReassignPass(PassRegistry &)
llvm::CodeGenOpt::Aggressive
@ Aggressive
Definition: CodeGen.h:56
llvm::AMDGPUTargetMachine::EnableLowerModuleLDS
static bool EnableLowerModuleLDS
Definition: AMDGPUTargetMachine.h:38
llvm::yaml::StringValue
A wrapper around std::string which contains a source range that's being set during parsing.
Definition: MIRYamlMapping.h:34
llvm::GlobalDCEPass
Pass to remove unused function declarations.
Definition: GlobalDCE.h:36
llvm::PatchableFunctionID
char & PatchableFunctionID
This pass implements the "patchable-function" attribute.
Definition: PatchableFunction.cpp:96
Builder
assume Assume Builder
Definition: AssumeBundleBuilder.cpp:651
IterativeGCNMaxOccupancySchedRegistry
static MachineSchedRegistry IterativeGCNMaxOccupancySchedRegistry("gcn-max-occupancy-experimental", "Run GCN scheduler to maximize occupancy (experimental)", createIterativeGCNMaxOccupancyMachineScheduler)
AMDGPUExportClustering.h
llvm::AMDGPUAS::PRIVATE_ADDRESS
@ PRIVATE_ADDRESS
Address space for private memory.
Definition: AMDGPU.h:370
llvm::PatternMatch::m_Value
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:76
llvm::AMDGPUFunctionArgInfo::WorkItemIDZ
ArgDescriptor WorkItemIDZ
Definition: AMDGPUArgumentUsageInfo.h:150
llvm::MachineFunction
Definition: MachineFunction.h:257
llvm::CodeGenOpt::None
@ None
Definition: CodeGen.h:53
llvm::createSIShrinkInstructionsPass
FunctionPass * createSIShrinkInstructionsPass()
llvm::createAMDGPUMachineCFGStructurizerPass
FunctionPass * createAMDGPUMachineCFGStructurizerPass()
Definition: AMDGPUMachineCFGStructurizer.cpp:2851
llvm::GCNTargetMachine
Definition: AMDGPUTargetMachine.h:75
llvm::AArch64::RM
@ RM
Definition: AArch64ISelLowering.h:471
llvm::ScheduleDAG::TRI
const TargetRegisterInfo * TRI
Target processor register info.
Definition: ScheduleDAG.h:559
llvm::TargetPassConfig::addPass
AnalysisID addPass(AnalysisID PassID)
Utilities for targets to add passes to the pass manager.
Definition: TargetPassConfig.cpp:781
llvm::ArrayRef
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: APInt.h:32
llvm::Constant::removeDeadConstantUsers
void removeDeadConstantUsers() const
If there are any dead constant users dangling off of this constant, remove them.
Definition: Constants.cpp:749
llvm::initializeSIFormMemoryClausesPass
void initializeSIFormMemoryClausesPass(PassRegistry &)
computeDataLayout
static StringRef computeDataLayout(const Triple &TT)
Definition: AMDGPUTargetMachine.cpp:472
llvm::Reloc::PIC_
@ PIC_
Definition: CodeGen.h:22
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:58
llvm::AMDGPUAS::GLOBAL_ADDRESS
@ GLOBAL_ADDRESS
Address space for global memory (RAT0, VTX0).
Definition: AMDGPU.h:365
llvm::initializeAMDGPUExternalAAWrapperPass
void initializeAMDGPUExternalAAWrapperPass(PassRegistry &)
AMDGPU.h
llvm::yaml::SIMachineFunctionInfo::StackPtrOffsetReg
StringValue StackPtrOffsetReg
Definition: SIMachineFunctionInfo.h:292
SimplifyLibCalls.h
llvm::AMDGPUPassConfig::addPreISel
bool addPreISel() override
Methods with trivial inline returns are convenient points in the common codegen pass pipeline where t...
Definition: AMDGPUTargetMachine.cpp:1106
llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:143
llvm::Value::getType
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
GlobalDCE.h
llvm::yaml::SIMachineFunctionInfo::Mode
SIMode Mode
Definition: SIMachineFunctionInfo.h:298
llvm::getStandardCSEConfigForOpt
std::unique_ptr< CSEConfigBase > getStandardCSEConfigForOpt(CodeGenOpt::Level Level)
Definition: CSEInfo.cpp:75
llvm::createAMDGPURegBankCombiner
FunctionPass * createAMDGPURegBankCombiner(bool IsOptNone)
Definition: AMDGPURegBankCombiner.cpp:486
EnablePreRAOptimizations
static cl::opt< bool > EnablePreRAOptimizations("amdgpu-enable-pre-ra-optimizations", cl::desc("Enable Pre-RA optimizations pass"), cl::init(true), cl::Hidden)
IRTranslator.h
llvm::TargetMachine::getTargetFeatureString
StringRef getTargetFeatureString() const
Definition: TargetMachine.h:128
EarlyInlineAll
static cl::opt< bool > EarlyInlineAll("amdgpu-early-inline-all", cl::desc("Inline all functions early"), cl::init(false), cl::Hidden)
llvm::PICLevel::Level
Level
Definition: CodeGen.h:33
llvm::once_flag
std::once_flag once_flag
Definition: Threading.h:57
llvm::CodeGenOpt::Level
Level
Definition: CodeGen.h:52
llvm::AMDGPUFunctionArgInfo::ImplicitBufferPtr
ArgDescriptor ImplicitBufferPtr
Definition: AMDGPUArgumentUsageInfo.h:144
llvm::SIWholeQuadModeID
char & SIWholeQuadModeID
Definition: SIWholeQuadMode.cpp:265
llvm::getEffectiveRelocModel
static Reloc::Model getEffectiveRelocModel(Optional< Reloc::Model > RM)
Definition: AVRTargetMachine.cpp:40
EnableSROA
static cl::opt< bool > EnableSROA("amdgpu-sroa", cl::desc("Run SROA after promote alloca pass"), cl::ReallyHidden, cl::init(true))
llvm::initializeAMDGPULowerKernelAttributesPass
void initializeAMDGPULowerKernelAttributesPass(PassRegistry &)
llvm::getEffectiveCodeModel
CodeModel::Model getEffectiveCodeModel(Optional< CodeModel::Model > CM, CodeModel::Model Default)
Helper method for getting the code model, returning Default if CM does not have a value.
Definition: TargetMachine.h:506
llvm::AMDGPUPassConfig::getCSEConfig
std::unique_ptr< CSEConfigBase > getCSEConfig() const override
Returns the CSEConfig object to use for the current optimization level.
Definition: AMDGPUTargetMachine.cpp:889
llvm::TargetSubtargetInfo
TargetSubtargetInfo - Generic base class for all target subtargets.
Definition: TargetSubtargetInfo.h:60
llvm::LLVMTargetMachine::initAsmInfo
void initAsmInfo()
Definition: LLVMTargetMachine.cpp:40
llvm::initializeAMDGPUAnnotateUniformValuesPass
void initializeAMDGPUAnnotateUniformValuesPass(PassRegistry &)
llvm::RenameIndependentSubregsID
char & RenameIndependentSubregsID
This pass detects subregister lanes in a virtual register that are used independently of other lanes ...
Definition: RenameIndependentSubregs.cpp:113
llvm::AMDGPUPrintfRuntimeBindingPass
Definition: AMDGPU.h:265
llvm::AMDGPUReplaceLDSUseWithPointerPass
Definition: AMDGPU.h:147
llvm::Register
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
llvm::createStructurizeCFGPass
Pass * createStructurizeCFGPass(bool SkipUniformRegions=false)
When SkipUniformRegions is true the structizer will not structurize regions that only contain uniform...
Definition: StructurizeCFG.cpp:1166
llvm::AMDGPU::SIModeRegisterDefaults::FP64FP16OutputDenormals
bool FP64FP16OutputDenormals
Definition: AMDGPUBaseInfo.h:1017
llvm::GCNTargetMachine::createPassConfig
TargetPassConfig * createPassConfig(PassManagerBase &PM) override
Create a pass configuration object to be used by addPassToEmitX methods for generating a pipeline of ...
Definition: AMDGPUTargetMachine.cpp:1434
llvm::PassManager< Module >
llvm::createAMDGPULowerKernelAttributesPass
ModulePass * createAMDGPULowerKernelAttributesPass()
Definition: AMDGPULowerKernelAttributes.cpp:248
llvm::PseudoSourceValue::GOT
@ GOT
Definition: PseudoSourceValue.h:39
llvm::initializeSIFixSGPRCopiesPass
void initializeSIFixSGPRCopiesPass(PassRegistry &)
llvm::PerFunctionMIParsingState
Definition: MIParser.h:162
llvm::AMDGPUFunctionArgInfo::WorkGroupInfo
ArgDescriptor WorkGroupInfo
Definition: AMDGPUArgumentUsageInfo.h:136
llvm::createAMDGPUPromoteAllocaToVector
FunctionPass * createAMDGPUPromoteAllocaToVector()
Definition: AMDGPUPromoteAlloca.cpp:1143
llvm::OptimizationLevel::getSpeedupLevel
unsigned getSpeedupLevel() const
Definition: OptimizationLevel.h:121
llvm::initializeAMDGPULowerModuleLDSPass
void initializeAMDGPULowerModuleLDSPass(PassRegistry &)
LLVM_READNONE
#define LLVM_READNONE
Definition: Compiler.h:199
createIterativeILPMachineScheduler
static ScheduleDAGInstrs * createIterativeILPMachineScheduler(MachineSchedContext *C)
Definition: AMDGPUTargetMachine.cpp:437
llvm::parseNamedRegisterReference
bool parseNamedRegisterReference(PerFunctionMIParsingState &PFS, Register &Reg, StringRef Src, SMDiagnostic &Error)
Definition: MIParser.cpp:3456
llvm::initializeAMDGPUReleaseVGPRsPass
void initializeAMDGPUReleaseVGPRsPass(PassRegistry &)
EnableEarlyIfConversion
static cl::opt< bool > EnableEarlyIfConversion("amdgpu-early-ifcvt", cl::Hidden, cl::desc("Run early if-conversion"), cl::init(false))
llvm::initializeSIFixVGPRCopiesPass
void initializeSIFixVGPRCopiesPass(PassRegistry &)
llvm::yaml::SIMode::DX10Clamp
bool DX10Clamp
Definition: SIMachineFunctionInfo.h:233
llvm::initializeAMDGPUPromoteAllocaToVectorPass
void initializeAMDGPUPromoteAllocaToVectorPass(PassRegistry &)
EnableScalarIRPasses
static cl::opt< bool > EnableScalarIRPasses("amdgpu-scalar-ir-passes", cl::desc("Enable scalar IR passes"), cl::init(true), cl::Hidden)
llvm::AMDGPUPromoteKernelArgumentsPass
Definition: AMDGPU.h:106
llvm::initializeSIPreEmitPeepholePass
void initializeSIPreEmitPeepholePass(PassRegistry &)
createIterativeGCNMaxOccupancyMachineScheduler
static ScheduleDAGInstrs * createIterativeGCNMaxOccupancyMachineScheduler(MachineSchedContext *C)
Definition: AMDGPUTargetMachine.cpp:421
llvm::call_once
void call_once(once_flag &flag, Function &&F, Args &&... ArgList)
Execute the function specified as a parameter once.
Definition: Threading.h:87
llvm::MachineFunction::getFunction
Function & getFunction()
Return the LLVM function that this machine code represents.
Definition: MachineFunction.h:622
llvm::GCNTargetMachine::getTargetTransformInfo
TargetTransformInfo getTargetTransformInfo(const Function &F) const override
Get a TargetTransformInfo implementation for the target.
Definition: AMDGPUTargetMachine.cpp:881
llvm::AMDGPUTargetMachine::registerPassBuilderCallbacks
void registerPassBuilderCallbacks(PassBuilder &PB) override
Allow the target to modify the pass pipeline with New Pass Manager (similar to adjustPassManager for ...
Definition: AMDGPUTargetMachine.cpp:631
EnablePromoteKernelArguments
static cl::opt< bool > EnablePromoteKernelArguments("amdgpu-enable-promote-kernel-arguments", cl::desc("Enable promotion of flat kernel pointer arguments to global"), cl::Hidden, cl::init(true))
llvm::TargetPassConfig::addMachineSSAOptimization
virtual void addMachineSSAOptimization()
addMachineSSAOptimization - Add standard passes that optimize machine instructions in SSA form.
Definition: TargetPassConfig.cpp:1306
llvm::AMDGPUAS::REGION_ADDRESS
@ REGION_ADDRESS
Address space for region memory. (GDS)
Definition: AMDGPU.h:366
llvm::AMDGPUPassConfig::addEarlyCSEOrGVNPass
void addEarlyCSEOrGVNPass()
Definition: AMDGPUTargetMachine.cpp:967
llvm::createAMDGPUPropagateAttributesEarlyPass
FunctionPass * createAMDGPUPropagateAttributesEarlyPass(const TargetMachine *)
Definition: AMDGPUPropagateAttributes.cpp:401
llvm::AMDGPUPropagateAttributesEarlyPass
Definition: AMDGPU.h:123
llvm::initializeSIModeRegisterPass
void initializeSIModeRegisterPass(PassRegistry &)
llvm::Error
Lightweight error class with error context and mandatory checking.
Definition: Error.h:155
llvm::createLoadClusterDAGMutation
std::unique_ptr< ScheduleDAGMutation > createLoadClusterDAGMutation(const TargetInstrInfo *TII, const TargetRegisterInfo *TRI)
Definition: MachineScheduler.cpp:1573
RegBankSelect.h
llvm::ScheduleDAG::TII
const TargetInstrInfo * TII
Target instruction information.
Definition: ScheduleDAG.h:558
GCNMaxOccupancySchedRegistry
static MachineSchedRegistry GCNMaxOccupancySchedRegistry("gcn-max-occupancy", "Run GCN scheduler to maximize occupancy", createGCNMaxOccupancyMachineScheduler)
llvm::createAMDGPULowerKernelArgumentsPass
FunctionPass * createAMDGPULowerKernelArgumentsPass()
Definition: AMDGPULowerKernelArguments.cpp:247
llvm::AMDGPUTargetMachine::isNoopAddrSpaceCast
bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override
Returns true if a cast between SrcAS and DestAS is a noop.
Definition: AMDGPUTargetMachine.cpp:777
llvm::PassManagerBuilder::EP_ModuleOptimizerEarly
@ EP_ModuleOptimizerEarly
EP_ModuleOptimizerEarly - This extension point allows adding passes just before the main module-level...
Definition: PassManagerBuilder.h:74
llvm::createSIModeRegisterPass
FunctionPass * createSIModeRegisterPass()
Definition: SIModeRegister.cpp:158
llvm::OptimizationLevel
Definition: OptimizationLevel.h:22
llvm::PseudoSourceValue::Stack
@ Stack
Definition: PseudoSourceValue.h:38
llvm::ArgDescriptor::createRegister
static constexpr ArgDescriptor createRegister(Register Reg, unsigned Mask=~0u)
Definition: AMDGPUArgumentUsageInfo.h:44
PassManager.h
llvm::createInternalizePass
ModulePass * createInternalizePass(std::function< bool(const GlobalValue &)> MustPreserveGV)
createInternalizePass - This pass loops over all of the functions in the input module,...
Definition: Internalize.cpp:312
llvm::SourceMgr::DK_Error
@ DK_Error
Definition: SourceMgr.h:34
llvm::createAMDGPUReplaceLDSUseWithPointerPass
ModulePass * createAMDGPUReplaceLDSUseWithPointerPass()
Definition: AMDGPUReplaceLDSUseWithPointer.cpp:639
llvm::AMDGPUTargetMachine::adjustPassManager
void adjustPassManager(PassManagerBuilder &) override
Allow the target to modify the pass manager, e.g.
Definition: AMDGPUTargetMachine.cpp:553
llvm::LLVMTargetMachine
This class describes a target machine that is implemented with the LLVM target-independent code gener...
Definition: TargetMachine.h:414
llvm::TargetPassConfig::disablePass
void disablePass(AnalysisID PassID)
Allow the target to disable a specific standard pass by default.
Definition: TargetPassConfig.h:196
llvm::DeadMachineInstructionElimID
char & DeadMachineInstructionElimID
DeadMachineInstructionElim - This pass removes dead machine instructions.
Definition: DeadMachineInstructionElim.cpp:56
llvm::PerFunctionMIParsingState::MF
MachineFunction & MF
Definition: MIParser.h:164
GCNILPSchedRegistry
static MachineSchedRegistry GCNILPSchedRegistry("gcn-ilp", "Run GCN iterative scheduler for ILP scheduling (experimental)", createIterativeILPMachineScheduler)
llvm::AnalysisManager::registerPass
bool registerPass(PassBuilderT &&PassBuilder)
Register an analysis pass with the manager.
Definition: PassManager.h:842
llvm::AMDGPUFunctionArgInfo::KernargSegmentPtr
ArgDescriptor KernargSegmentPtr
Definition: AMDGPUArgumentUsageInfo.h:127
llvm::createAMDGPUPromoteAlloca
FunctionPass * createAMDGPUPromoteAlloca()
Definition: AMDGPUPromoteAlloca.cpp:1139
llvm::initializeAMDGPUPrintfRuntimeBindingPass
void initializeAMDGPUPrintfRuntimeBindingPass(PassRegistry &)
llvm::AAManager::registerFunctionAnalysis
void registerFunctionAnalysis()
Register a specific AA result.
Definition: AliasAnalysis.h:1308
llvm::AMDGPUPassConfig::isPassEnabled
bool isPassEnabled(const cl::opt< bool > &Opt, CodeGenOpt::Level Level=CodeGenOpt::Default) const
Check if a pass is enabled given Opt option.
Definition: AMDGPUTargetMachine.h:133
llvm::BranchRelaxationPassID
char & BranchRelaxationPassID
BranchRelaxation - This pass replaces branches that need to jump further than is supported by a branc...
Definition: BranchRelaxation.cpp:118
llvm::initializeAMDGPUPreLegalizerCombinerPass
void initializeAMDGPUPreLegalizerCombinerPass(PassRegistry &)
llvm::createAMDGPUCodeGenPreparePass
FunctionPass * createAMDGPUCodeGenPreparePass()
Definition: AMDGPUCodeGenPrepare.cpp:1465
llvm::createAMDGPUPromoteKernelArgumentsPass
FunctionPass * createAMDGPUPromoteKernelArgumentsPass()
Definition: AMDGPUPromoteKernelArguments.cpp:203
llvm::RegisterRegAllocBase
RegisterRegAllocBase class - Track the registration of register allocators.
Definition: RegAllocRegistry.h:30
llvm::MachineSchedulerID
char & MachineSchedulerID
MachineScheduler - This pass schedules machine instructions.
Definition: MachineScheduler.cpp:213
llvm::AMDGPUTargetMachine::EnableFunctionCalls
static bool EnableFunctionCalls
Definition: AMDGPUTargetMachine.h:37
llvm::initializeAMDGPUAttributorPass
void initializeAMDGPUAttributorPass(PassRegistry &)
Legalizer.h
llvm::Pass
Pass interface - Implemented by all 'passes'.
Definition: Pass.h:91
llvm::createLICMPass
Pass * createLICMPass()
Definition: LICM.cpp:345
llvm::GCNNSAReassignID
char & GCNNSAReassignID
Definition: GCNNSAReassign.cpp:106
llvm::TargetMachine::getTargetCPU
StringRef getTargetCPU() const
Definition: TargetMachine.h:127
llvm::PassManagerBuilder::EP_EarlyAsPossible
@ EP_EarlyAsPossible
EP_EarlyAsPossible - This extension point allows adding passes before any other transformations,...
Definition: PassManagerBuilder.h:70
llvm::initializeAMDGPUAnnotateKernelFeaturesPass
void initializeAMDGPUAnnotateKernelFeaturesPass(PassRegistry &)
llvm::PostRASchedulerID
char & PostRASchedulerID
PostRAScheduler - This pass performs post register allocation scheduling.
Definition: PostRASchedulerList.cpp:197
llvm::AMDGPUFunctionArgInfo::WorkItemIDY
ArgDescriptor WorkItemIDY
Definition: AMDGPUArgumentUsageInfo.h:149
llvm::createAMDGPUPreLegalizeCombiner
FunctionPass * createAMDGPUPreLegalizeCombiner(bool IsOptNone)
Definition: AMDGPUPreLegalizerCombiner.cpp:298
llvm::AMDGPUTargetMachine::getAssumedAddrSpace
unsigned getAssumedAddrSpace(const Value *V) const override
If the specified generic pointer could be assumed as a pointer to a specific address space,...
Definition: AMDGPUTargetMachine.cpp:783
llvm::SMRange
Represents a range in source code.
Definition: SMLoc.h:48
N
#define N
llvm::createStraightLineStrengthReducePass
FunctionPass * createStraightLineStrengthReducePass()
Definition: StraightLineStrengthReduce.cpp:268
llvm::AMDGPUAS::FLAT_ADDRESS
@ FLAT_ADDRESS
Address space for flat memory.
Definition: AMDGPU.h:364
llvm::SIMachineFunctionInfo
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
Definition: SIMachineFunctionInfo.h:349
llvm::TargetMachine::getTargetTriple
const Triple & getTargetTriple() const
Definition: TargetMachine.h:126
llvm::GCNPreRAOptimizationsID
char & GCNPreRAOptimizationsID
Definition: GCNPreRAOptimizations.cpp:79
llvm::initializeSILoadStoreOptimizerPass
void initializeSILoadStoreOptimizerPass(PassRegistry &)
llvm::legacy::PassManagerBase
PassManagerBase - An abstract interface to allow code to add passes to a pass manager without having ...
Definition: LegacyPassManager.h:39
llvm::PatternMatch
Definition: PatternMatch.h:47
llvm::createStoreClusterDAGMutation
std::unique_ptr< ScheduleDAGMutation > createStoreClusterDAGMutation(const TargetInstrInfo *TII, const TargetRegisterInfo *TRI)
Definition: MachineScheduler.cpp:1580
llvm::IRTranslator
Definition: IRTranslator.h:62
llvm::PassBuilder::registerCGSCCOptimizerLateEPCallback
void registerCGSCCOptimizerLateEPCallback(const std::function< void(CGSCCPassManager &, OptimizationLevel)> &C)
Register a callback for a default optimizer pipeline extension point.
Definition: PassBuilder.h:437
llvm::initializeAMDGPURegBankCombinerPass
void initializeAMDGPURegBankCombinerPass(PassRegistry &)
RegName
#define RegName(no)
llvm::createSIAnnotateControlFlowPass
FunctionPass * createSIAnnotateControlFlowPass()
Create the annotation pass.
Definition: SIAnnotateControlFlow.cpp:389
Vectorize.h
llvm::yaml::SIMode::IEEE
bool IEEE
Definition: SIMachineFunctionInfo.h:232
llvm::initializeAMDGPUCtorDtorLoweringPass
void initializeAMDGPUCtorDtorLoweringPass(PassRegistry &)
llvm::AnalysisManager
A container for analyses that lazily runs them and caches their results.
Definition: InstructionSimplify.h:42
TM
const char LLVMTargetMachineRef TM
Definition: PassBuilderBindings.cpp:47
llvm::SIFoldOperandsID
char & SIFoldOperandsID
llvm::FunctionPass
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:308
llvm::createBasicRegisterAllocator
FunctionPass * createBasicRegisterAllocator()
BasicRegisterAllocation Pass - This pass implements a degenerate global register allocator using the ...
Definition: RegAllocBasic.cpp:332
llvm::RegBankSelect
This pass implements the reg bank selector pass used in the GlobalISel pipeline.
Definition: RegBankSelect.h:91
llvm::MIPatternMatch::m_Not
BinaryOp_match< SrcTy, SpecificConstantMatch, TargetOpcode::G_XOR, true > m_Not(const SrcTy &&Src)
Matches a register not-ed by a G_XOR.
Definition: MIPatternMatch.h:696
llvm::EarlyMachineLICMID
char & EarlyMachineLICMID
This pass performs loop invariant code motion on machine instructions.
Definition: MachineLICM.cpp:298
llvm::AMDGPUTargetMachine::getGPUName
StringRef getGPUName(const Function &F) const
Definition: AMDGPUTargetMachine.cpp:530
llvm::PostMachineSchedulerID
char & PostMachineSchedulerID
PostMachineScheduler - This pass schedules machine instructions postRA.
Definition: MachineScheduler.cpp:244
llvm::cl::desc
Definition: CommandLine.h:405
llvm::ScheduleDAGMILive
ScheduleDAGMILive is an implementation of ScheduleDAGInstrs that schedules machine instructions while...
Definition: MachineScheduler.h:390
llvm::ScheduleDAGInstrs
A ScheduleDAG for scheduling lists of MachineInstr.
Definition: ScheduleDAGInstrs.h:119
llvm::AMDGPUInsertDelayAluID
char & AMDGPUInsertDelayAluID
Definition: AMDGPUInsertDelayAlu.cpp:454
llvm::PassManagerBuilder::EP_CGSCCOptimizerLate
@ EP_CGSCCOptimizerLate
EP_CGSCCOptimizerLate - This extension point allows adding CallGraphSCC passes at the end of the main...
Definition: PassManagerBuilder.h:115
llvm::CodeGenOpt::Less
@ Less
Definition: CodeGen.h:54
llvm::AMDGPUTargetMachine::AMDGPUTargetMachine
AMDGPUTargetMachine(const Target &T, const Triple &TT, StringRef CPU, StringRef FS, TargetOptions Options, Optional< Reloc::Model > RM, Optional< CodeModel::Model > CM, CodeGenOpt::Level OL)
Definition: AMDGPUTargetMachine.cpp:505
llvm::TargetPassConfig::addFastRegAlloc
virtual void addFastRegAlloc()
addFastRegAlloc - Add the minimum set of target-independent passes that are required for fast registe...
Definition: TargetPassConfig.cpp:1450
llvm::AMDGPUPerfHintAnalysisID
char & AMDGPUPerfHintAnalysisID
Definition: AMDGPUPerfHintAnalysis.cpp:58
TargetRegistry.h
llvm::createSROAPass
FunctionPass * createSROAPass()
Definition: SROA.cpp:4808
llvm::AMDGPUPropagateAttributesLatePass
Definition: AMDGPU.h:135
EnableLibCallSimplify
static cl::opt< bool > EnableLibCallSimplify("amdgpu-simplify-libcall", cl::desc("Enable amdgpu library simplifications"), cl::init(true), cl::Hidden)
InitializePasses.h
llvm::yaml::SIMode::FP64FP16OutputDenormals
bool FP64FP16OutputDenormals
Definition: SIMachineFunctionInfo.h:237
llvm::SIOptimizeExecMaskingPreRAID
char & SIOptimizeExecMaskingPreRAID
Definition: SIOptimizeExecMaskingPreRA.cpp:75
llvm::createGCNMCRegisterInfo
MCRegisterInfo * createGCNMCRegisterInfo(AMDGPUDwarfFlavour DwarfFlavour)
Definition: AMDGPUMCTargetDesc.cpp:70
llvm::TargetMachine::MRI
std::unique_ptr< const MCRegisterInfo > MRI
Definition: TargetMachine.h:106
llvm::Value
LLVM Value Representation.
Definition: Value.h:74
llvm::AMDGPUTargetMachine::EnableLateStructurizeCFG
static bool EnableLateStructurizeCFG
Definition: AMDGPUTargetMachine.h:36
llvm::TargetPassConfig::addILPOpts
virtual bool addILPOpts()
Add passes that optimize instruction level parallelism for out-of-order targets.
Definition: TargetPassConfig.h:374
llvm::TargetPassConfig::getOptLevel
CodeGenOpt::Level getOptLevel() const
Definition: TargetPassConfig.cpp:644
AMDGPUTargetMachine.h
llvm::GCNTargetMachine::createDefaultFuncInfoYAML
yaml::MachineFunctionInfo * createDefaultFuncInfoYAML() const override
Allocate and return a default initialized instance of the YAML representation for the MachineFunction...
Definition: AMDGPUTargetMachine.cpp:1438
PassName
static const char PassName[]
Definition: X86LowerAMXIntrinsics.cpp:671
llvm::initializeSILowerControlFlowPass
void initializeSILowerControlFlowPass(PassRegistry &)
llvm::SILateBranchLoweringPassID
char & SILateBranchLoweringPassID
Definition: SILateBranchLowering.cpp:66
llvm::createIGroupLPDAGMutation
std::unique_ptr< ScheduleDAGMutation > createIGroupLPDAGMutation()
Definition: AMDGPUIGroupLP.cpp:431
RegAllocRegistry.h
llvm::createAMDGPUSimplifyLibCallsPass
FunctionPass * createAMDGPUSimplifyLibCallsPass(const TargetMachine *)
Definition: AMDGPULibCalls.cpp:1659
MIParser.h
llvm::Localizer
This pass implements the localization mechanism described at the top of this file.
Definition: Localizer.h:43
llvm::createAMDGPUMacroFusionDAGMutation
std::unique_ptr< ScheduleDAGMutation > createAMDGPUMacroFusionDAGMutation()
Note that you have to add: DAG.addMutation(createAMDGPUMacroFusionDAGMutation()); to AMDGPUPassConfig...
Definition: AMDGPUMacroFusion.cpp:62