LLVM  3.7.0
AMDGPUTargetMachine.cpp
Go to the documentation of this file.
1 //===-- AMDGPUTargetMachine.cpp - TargetMachine for hw codegen targets-----===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 /// \file
11 /// \brief The AMDGPU target machine contains all of the hardware specific
12 /// information needed to emit code for R600 and SI GPUs.
13 //
14 //===----------------------------------------------------------------------===//
15 
16 #include "AMDGPUTargetMachine.h"
17 #include "AMDGPU.h"
19 #include "R600ISelLowering.h"
20 #include "R600InstrInfo.h"
21 #include "R600MachineScheduler.h"
22 #include "SIISelLowering.h"
23 #include "SIInstrInfo.h"
24 #include "llvm/Analysis/Passes.h"
28 #include "llvm/CodeGen/Passes.h"
29 #include "llvm/IR/Verifier.h"
30 #include "llvm/MC/MCAsmInfo.h"
34 #include "llvm/Transforms/IPO.h"
35 #include "llvm/Transforms/Scalar.h"
36 #include <llvm/CodeGen/Passes.h>
37 
38 using namespace llvm;
39 
40 extern "C" void LLVMInitializeAMDGPUTarget() {
41  // Register the target
44 }
45 
47  return new ScheduleDAGMILive(C, make_unique<R600SchedStrategy>());
48 }
49 
51 SchedCustomRegistry("r600", "Run R600's custom scheduler",
53 
54 static std::string computeDataLayout(const Triple &TT) {
55  std::string Ret = "e-p:32:32";
56 
57  if (TT.getArch() == Triple::amdgcn) {
58  // 32-bit private, local, and region pointers. 64-bit global and constant.
59  Ret += "-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-p24:64:64";
60  }
61 
62  Ret += "-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256"
63  "-v512:512-v1024:1024-v2048:2048-n32:64";
64 
65  return Ret;
66 }
67 
69  StringRef CPU, StringRef FS,
70  TargetOptions Options, Reloc::Model RM,
72  CodeGenOpt::Level OptLevel)
73  : LLVMTargetMachine(T, computeDataLayout(TT), TT, CPU, FS, Options, RM, CM,
74  OptLevel),
75  TLOF(new TargetLoweringObjectFileELF()), Subtarget(TT, CPU, FS, *this),
76  IntrinsicInfo() {
78  initAsmInfo();
79 }
80 
82  delete TLOF;
83 }
84 
85 //===----------------------------------------------------------------------===//
86 // R600 Target Machine (R600 -> Cayman)
87 //===----------------------------------------------------------------------===//
88 
90  StringRef FS, StringRef CPU,
91  TargetOptions Options, Reloc::Model RM,
93  : AMDGPUTargetMachine(T, TT, FS, CPU, Options, RM, CM, OL) {}
94 
95 //===----------------------------------------------------------------------===//
96 // GCN Target Machine (SI+)
97 //===----------------------------------------------------------------------===//
98 
100  StringRef FS, StringRef CPU,
101  TargetOptions Options, Reloc::Model RM,
103  : AMDGPUTargetMachine(T, TT, FS, CPU, Options, RM, CM, OL) {}
104 
105 //===----------------------------------------------------------------------===//
106 // AMDGPU Pass Setup
107 //===----------------------------------------------------------------------===//
108 
109 namespace {
110 class AMDGPUPassConfig : public TargetPassConfig {
111 public:
112  AMDGPUPassConfig(TargetMachine *TM, PassManagerBase &PM)
113  : TargetPassConfig(TM, PM) {}
114 
115  AMDGPUTargetMachine &getAMDGPUTargetMachine() const {
116  return getTM<AMDGPUTargetMachine>();
117  }
118 
120  createMachineScheduler(MachineSchedContext *C) const override {
121  const AMDGPUSubtarget &ST = *getAMDGPUTargetMachine().getSubtargetImpl();
123  return createR600MachineScheduler(C);
124  return nullptr;
125  }
126 
127  void addIRPasses() override;
128  void addCodeGenPrepare() override;
129  virtual bool addPreISel() override;
130  virtual bool addInstSelector() override;
131 };
132 
133 class R600PassConfig : public AMDGPUPassConfig {
134 public:
135  R600PassConfig(TargetMachine *TM, PassManagerBase &PM)
136  : AMDGPUPassConfig(TM, PM) { }
137 
138  bool addPreISel() override;
139  void addPreRegAlloc() override;
140  void addPreSched2() override;
141  void addPreEmitPass() override;
142 };
143 
144 class GCNPassConfig : public AMDGPUPassConfig {
145 public:
146  GCNPassConfig(TargetMachine *TM, PassManagerBase &PM)
147  : AMDGPUPassConfig(TM, PM) { }
148  bool addPreISel() override;
149  bool addInstSelector() override;
150  void addPreRegAlloc() override;
151  void addPostRegAlloc() override;
152  void addPreSched2() override;
153  void addPreEmitPass() override;
154 };
155 
156 } // End of anonymous namespace
157 
159  return TargetIRAnalysis([this](Function &F) {
160  return TargetTransformInfo(
161  AMDGPUTTIImpl(this, F.getParent()->getDataLayout()));
162  });
163 }
164 
165 void AMDGPUPassConfig::addIRPasses() {
166  // Function calls are not supported, so make sure we inline everything.
167  addPass(createAMDGPUAlwaysInlinePass());
168  addPass(createAlwaysInlinerPass());
169  // We need to add the barrier noop pass, otherwise adding the function
170  // inlining pass will cause all of the PassConfigs passes to be run
171  // one function at a time, which means if we have a nodule with two
172  // functions, then we will generate code for the first function
173  // without ever running any passes on the second.
174  addPass(createBarrierNoopPass());
176 }
177 
178 void AMDGPUPassConfig::addCodeGenPrepare() {
179  const AMDGPUSubtarget &ST = *getAMDGPUTargetMachine().getSubtargetImpl();
180  if (ST.isPromoteAllocaEnabled()) {
181  addPass(createAMDGPUPromoteAlloca(ST));
182  addPass(createSROAPass());
183  }
185 }
186 
187 bool
188 AMDGPUPassConfig::addPreISel() {
189  const AMDGPUSubtarget &ST = *getAMDGPUTargetMachine().getSubtargetImpl();
190  addPass(createFlattenCFGPass());
191  if (ST.IsIRStructurizerEnabled())
192  addPass(createStructurizeCFGPass());
193  return false;
194 }
195 
196 bool AMDGPUPassConfig::addInstSelector() {
197  addPass(createAMDGPUISelDag(getAMDGPUTargetMachine()));
198  return false;
199 }
200 
201 //===----------------------------------------------------------------------===//
202 // R600 Pass Setup
203 //===----------------------------------------------------------------------===//
204 
205 bool R600PassConfig::addPreISel() {
206  AMDGPUPassConfig::addPreISel();
208  return false;
209 }
210 
211 void R600PassConfig::addPreRegAlloc() {
212  addPass(createR600VectorRegMerger(*TM));
213 }
214 
215 void R600PassConfig::addPreSched2() {
216  const AMDGPUSubtarget &ST = *getAMDGPUTargetMachine().getSubtargetImpl();
217  addPass(createR600EmitClauseMarkers(), false);
218  if (ST.isIfCvtEnabled())
219  addPass(&IfConverterID, false);
220  addPass(createR600ClauseMergePass(*TM), false);
221 }
222 
223 void R600PassConfig::addPreEmitPass() {
224  addPass(createAMDGPUCFGStructurizerPass(), false);
225  addPass(createR600ExpandSpecialInstrsPass(*TM), false);
226  addPass(&FinalizeMachineBundlesID, false);
227  addPass(createR600Packetizer(*TM), false);
228  addPass(createR600ControlFlowFinalizer(*TM), false);
229 }
230 
232  return new R600PassConfig(this, PM);
233 }
234 
235 //===----------------------------------------------------------------------===//
236 // GCN Pass Setup
237 //===----------------------------------------------------------------------===//
238 
239 bool GCNPassConfig::addPreISel() {
240  AMDGPUPassConfig::addPreISel();
241  addPass(createSinkingPass());
242  addPass(createSITypeRewriter());
244  return false;
245 }
246 
247 bool GCNPassConfig::addInstSelector() {
248  AMDGPUPassConfig::addInstSelector();
249  addPass(createSILowerI1CopiesPass());
250  addPass(createSIFixSGPRCopiesPass(*TM));
251  addPass(createSIFoldOperandsPass());
252  return false;
253 }
254 
255 void GCNPassConfig::addPreRegAlloc() {
256  const AMDGPUSubtarget &ST = *getAMDGPUTargetMachine().getSubtargetImpl();
257 
258  // This needs to be run directly before register allocation because
259  // earlier passes might recompute live intervals.
260  // TODO: handle CodeGenOpt::None; fast RA ignores spill weights set by the pass
261  if (getOptLevel() > CodeGenOpt::None) {
264  }
265 
266  if (getOptLevel() > CodeGenOpt::None && ST.loadStoreOptEnabled()) {
267  // Don't do this with no optimizations since it throws away debug info by
268  // merging nonadjacent loads.
269 
270  // This should be run after scheduling, but before register allocation. It
271  // also need extra copies to the address operand to be eliminated.
275  }
276  addPass(createSIShrinkInstructionsPass(), false);
277  addPass(createSIFixSGPRLiveRangesPass(), false);
278 }
279 
280 void GCNPassConfig::addPostRegAlloc() {
281  addPass(createSIPrepareScratchRegs(), false);
282  addPass(createSIShrinkInstructionsPass(), false);
283 }
284 
285 void GCNPassConfig::addPreSched2() {
286 }
287 
288 void GCNPassConfig::addPreEmitPass() {
289  addPass(createSIInsertWaits(*TM), false);
290  addPass(createSILowerControlFlowPass(*TM), false);
291 }
292 
294  return new GCNPassConfig(this, PM);
295 }
This file a TargetTransformInfo::Concept conforming object specific to the AMDGPU target machine...
FunctionPass * createSIAnnotateControlFlowPass()
Create the annotation pass.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
TargetLoweringObjectFile * TLOF
static std::string computeDataLayout(const Triple &TT)
Interface definition for R600InstrInfo.
virtual void addIRPasses()
Add common target configurable passes that perform LLVM IR to IR transforms following machine indepen...
Definition: Passes.cpp:377
Target TheGCNTarget
The target for GCN GPUs.
FunctionPass * createSIShrinkInstructionsPass()
char & SILoadStoreOptimizerID
char & RegisterCoalescerID
RegisterCoalescer - This pass merges live ranges to eliminate copies.
Analysis pass providing the TargetTransformInfo.
F(f)
R600 Machine Scheduler interface.
char & MachineSchedulerID
MachineScheduler - This pass schedules machine instructions.
FunctionPass * createAMDGPUCFGStructurizerPass()
MachineSchedRegistry provides a selection of available machine instruction schedulers.
Pass * createAlwaysInlinerPass()
createAlwaysInlinerPass - Return a new pass object that inlines only functions that are marked as "al...
FunctionPass * createSIInsertWaits(TargetMachine &tm)
bool loadStoreOptEnabled() const
Target TheAMDGPUTarget
The target which suports all AMD GPUs.
ScheduleDAGMILive is an implementation of ScheduleDAGInstrs that schedules machine instructions while...
FunctionPass * createR600TextureIntrinsicsReplacer()
FunctionPass * createR600ExpandSpecialInstrsPass(TargetMachine &tm)
FunctionPass * createSROAPass(bool RequiresDomTree=true)
Definition: SROA.cpp:1275
FunctionPass * createSinkingPass()
Definition: Sink.cpp:72
char & SIFixControlFlowLiveIntervalsID
char & FinalizeMachineBundlesID
FinalizeMachineBundles - This pass finalize machine instruction bundles (created earlier, e.g.
FunctionPass * createR600VectorRegMerger(TargetMachine &tm)
Target-Independent Code Generator Pass Configuration Options.
FunctionPass * createSITypeRewriter()
FunctionPass * createR600ClauseMergePass(TargetMachine &tm)
FunctionPass * createSILowerControlFlowPass(TargetMachine &tm)
FunctionPass * createSILowerI1CopiesPass()
ArchType getArch() const
getArch - Get the parsed architecture type of this triple.
Definition: Triple.h:242
bool isIfCvtEnabled() const
Generation getGeneration() const
FunctionPass * createR600ControlFlowFinalizer(TargetMachine &tm)
FunctionPass * createSIFixSGPRCopiesPass(TargetMachine &tm)
bool IsIRStructurizerEnabled() const
AMDGPUTargetMachine(const Target &T, const Triple &TT, StringRef FS, StringRef CPU, TargetOptions Options, Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL)
FunctionPass * createFlattenCFGPass()
FunctionPass * createAMDGPUISelDag(TargetMachine &tm)
This pass converts a legalized DAG into a AMDGPU-specific.
void LLVMInitializeAMDGPUTarget()
SI DAG Lowering interface definition.
R600TargetMachine(const Target &T, const Triple &TT, StringRef FS, StringRef CPU, TargetOptions Options, Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL)
FunctionPass * createR600Packetizer(TargetMachine &tm)
TargetPassConfig * createPassConfig(PassManagerBase &PM) override
Create a pass configuration object to be used by addPassToEmitX methods for generating a pipeline of ...
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang","erlang-compatible garbage collector")
This class describes a target machine that is implemented with the LLVM target-independent code gener...
ModulePass * createBarrierNoopPass()
createBarrierNoopPass - This pass is purely a module pass barrier in a pass manager.
virtual void addCodeGenPrepare()
Add pass to prepare the LLVM IR for code generation.
Definition: Passes.cpp:451
FunctionPass * createSIPrepareScratchRegs()
The AMDGPU TargetMachine interface definition for hw codgen targets.
static ScheduleDAGInstrs * createR600MachineScheduler(MachineSchedContext *C)
bool isPromoteAllocaEnabled() const
Triple - Helper class for working with autoconf configuration names.
Definition: Triple.h:44
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
ModulePass * createAMDGPUAlwaysInlinePass()
GCNTargetMachine(const Target &T, const Triple &TT, StringRef FS, StringRef CPU, TargetOptions Options, Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL)
void setRequiresStructuredCFG(bool Value)
Target - Wrapper for Target specific information.
void initializeSILoadStoreOptimizerPass(PassRegistry &)
ScheduleDAGInstrs - A ScheduleDAG subclass for scheduling lists of MachineInstrs. ...
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
Definition: Module.cpp:372
Interface definition for SIInstrInfo.
R600 DAG Lowering interface definition.
FunctionPass * createSIFoldOperandsPass()
char & IfConverterID
IfConverter - This pass performs machine code if conversion.
MachineSchedContext provides enough context from the MachineScheduler pass for the target to instanti...
TargetIRAnalysis getTargetIRAnalysis() override
Get a TargetIRAnalysis implementation for the target.
FunctionPass * createR600EmitClauseMarkers()
static MachineSchedRegistry SchedCustomRegistry("r600","Run R600's custom scheduler", createR600MachineScheduler)
RegisterTargetMachine - Helper template for registering a target machine implementation, for use in the target machine initialization function.
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:365
void initializeSIFixControlFlowLiveIntervalsPass(PassRegistry &)
Pass * createStructurizeCFGPass()
Create the pass.
Primary interface to the complete machine description for the target machine.
FunctionPass * createSIFixSGPRLiveRangesPass()
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:40
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml","ocaml 3.10-compatible collector")
TargetPassConfig * createPassConfig(PassManagerBase &PM) override
Create a pass configuration object to be used by addPassToEmitX methods for generating a pipeline of ...
FunctionPass * createAMDGPUPromoteAlloca(const AMDGPUSubtarget &ST)