LLVM  4.0.0
NVPTXTargetMachine.cpp
Go to the documentation of this file.
1 //===-- NVPTXTargetMachine.cpp - Define TargetMachine for NVPTX -----------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // Top-level implementation for the NVPTX target.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "NVPTX.h"
15 #include "NVPTXAllocaHoisting.h"
16 #include "NVPTXLowerAggrCopies.h"
17 #include "NVPTXTargetMachine.h"
18 #include "NVPTXTargetObjectFile.h"
20 #include "llvm/ADT/STLExtras.h"
21 #include "llvm/ADT/Triple.h"
23 #include "llvm/CodeGen/Passes.h"
26 #include "llvm/Pass.h"
31 #include "llvm/Transforms/Scalar.h"
34 #include <cassert>
35 #include <string>
36 
37 using namespace llvm;
38 
39 // LSV is still relatively new; this switch lets us turn it off in case we
40 // encounter (or suspect) a bug.
41 static cl::opt<bool>
42  DisableLoadStoreVectorizer("disable-nvptx-load-store-vectorizer",
43  cl::desc("Disable load/store vectorizer"),
44  cl::init(false), cl::Hidden);
45 
46 namespace llvm {
47 
57 
58 } // end namespace llvm
59 
60 extern "C" void LLVMInitializeNVPTXTarget() {
61  // Register the target.
64 
65  // FIXME: This pass is really intended to be invoked during IR optimization,
66  // but it's very NVPTX-specific.
77 }
78 
79 static std::string computeDataLayout(bool is64Bit) {
80  std::string Ret = "e";
81 
82  if (!is64Bit)
83  Ret += "-p:32:32";
84 
85  Ret += "-i64:64-v16:16-v32:32-n16:32:64";
86 
87  return Ret;
88 }
89 
91  StringRef CPU, StringRef FS,
92  const TargetOptions &Options,
95  CodeGenOpt::Level OL, bool is64bit)
96  // The pic relocation model is used regardless of what the client has
97  // specified, as it is the only relocation model currently supported.
98  : LLVMTargetMachine(T, computeDataLayout(is64bit), TT, CPU, FS, Options,
99  Reloc::PIC_, CM, OL),
100  is64bit(is64bit),
101  TLOF(llvm::make_unique<NVPTXTargetObjectFile>()),
102  Subtarget(TT, CPU, FS, *this) {
103  if (TT.getOS() == Triple::NVCL)
104  drvInterface = NVPTX::NVCL;
105  else
106  drvInterface = NVPTX::CUDA;
107  initAsmInfo();
108 }
109 
111 
112 void NVPTXTargetMachine32::anchor() {}
113 
115  StringRef CPU, StringRef FS,
116  const TargetOptions &Options,
118  CodeModel::Model CM,
120  : NVPTXTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, false) {}
121 
122 void NVPTXTargetMachine64::anchor() {}
123 
125  StringRef CPU, StringRef FS,
126  const TargetOptions &Options,
128  CodeModel::Model CM,
130  : NVPTXTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, true) {}
131 
132 namespace {
133 
134 class NVPTXPassConfig : public TargetPassConfig {
135 public:
136  NVPTXPassConfig(NVPTXTargetMachine *TM, PassManagerBase &PM)
137  : TargetPassConfig(TM, PM) {}
138 
139  NVPTXTargetMachine &getNVPTXTargetMachine() const {
140  return getTM<NVPTXTargetMachine>();
141  }
142 
143  void addIRPasses() override;
144  bool addInstSelector() override;
145  void addPostRegAlloc() override;
146  void addMachineSSAOptimization() override;
147 
148  FunctionPass *createTargetRegisterAllocator(bool) override;
149  void addFastRegAlloc(FunctionPass *RegAllocPass) override;
150  void addOptimizedRegAlloc(FunctionPass *RegAllocPass) override;
151 
152 private:
153  // If the opt level is aggressive, add GVN; otherwise, add EarlyCSE. This
154  // function is only called in opt mode.
155  void addEarlyCSEOrGVNPass();
156 
157  // Add passes that propagate special memory spaces.
158  void addAddressSpaceInferencePasses();
159 
160  // Add passes that perform straight-line scalar optimizations.
161  void addStraightLineScalarOptimizationPasses();
162 };
163 
164 } // end anonymous namespace
165 
167  return new NVPTXPassConfig(this, PM);
168 }
169 
171  PM.add(createNVVMReflectPass());
172  PM.add(createNVVMIntrRangePass(Subtarget.getSmVersion()));
173 }
174 
176  return TargetIRAnalysis([this](const Function &F) {
177  return TargetTransformInfo(NVPTXTTIImpl(this, F));
178  });
179 }
180 
181 void NVPTXPassConfig::addEarlyCSEOrGVNPass() {
182  if (getOptLevel() == CodeGenOpt::Aggressive)
183  addPass(createGVNPass());
184  else
185  addPass(createEarlyCSEPass());
186 }
187 
188 void NVPTXPassConfig::addAddressSpaceInferencePasses() {
189  // NVPTXLowerArgs emits alloca for byval parameters which can often
190  // be eliminated by SROA.
191  addPass(createSROAPass());
192  addPass(createNVPTXLowerAllocaPass());
194 }
195 
196 void NVPTXPassConfig::addStraightLineScalarOptimizationPasses() {
199  // ReassociateGEPs exposes more opportunites for SLSR. See
200  // the example in reassociate-geps-and-slsr.ll.
202  // SeparateConstOffsetFromGEP and SLSR creates common expressions which GVN or
203  // EarlyCSE can reuse. GVN generates significantly better code than EarlyCSE
204  // for some of our benchmarks.
205  addEarlyCSEOrGVNPass();
206  // Run NaryReassociate after EarlyCSE/GVN to be more effective.
207  addPass(createNaryReassociatePass());
208  // NaryReassociate on GEPs creates redundant common expressions, so run
209  // EarlyCSE after it.
210  addPass(createEarlyCSEPass());
211 }
212 
213 void NVPTXPassConfig::addIRPasses() {
214  // The following passes are known to not play well with virtual regs hanging
215  // around after register allocation (which in our case, is *all* registers).
216  // We explicitly disable them here. We do, however, need some functionality
217  // of the PrologEpilogCodeInserter pass, so we emulate that behavior in the
218  // NVPTXPrologEpilog pass (see NVPTXPrologEpilogPass.cpp).
219  disablePass(&PrologEpilogCodeInserterID);
220  disablePass(&MachineCopyPropagationID);
221  disablePass(&TailDuplicateID);
222  disablePass(&StackMapLivenessID);
223  disablePass(&LiveDebugValuesID);
224  disablePass(&PostRASchedulerID);
225  disablePass(&FuncletLayoutID);
226  disablePass(&PatchableFunctionID);
227 
228  // NVVMReflectPass is added in addEarlyAsPossiblePasses, so hopefully running
229  // it here does nothing. But since we need it for correctness when lowering
230  // to NVPTX, run it here too, in case whoever built our pass pipeline didn't
231  // call addEarlyAsPossiblePasses.
232  addPass(createNVVMReflectPass());
233 
234  if (getOptLevel() != CodeGenOpt::None)
237  addPass(createGenericToNVVMPass());
238 
239  // NVPTXLowerArgs is required for correctness and should be run right
240  // before the address space inference passes.
241  addPass(createNVPTXLowerArgsPass(&getNVPTXTargetMachine()));
242  if (getOptLevel() != CodeGenOpt::None) {
243  addAddressSpaceInferencePasses();
246  addStraightLineScalarOptimizationPasses();
247  }
248 
249  // === LSR and other generic IR passes ===
251  // EarlyCSE is not always strong enough to clean up what LSR produces. For
252  // example, GVN can combine
253  //
254  // %0 = add %a, %b
255  // %1 = add %b, %a
256  //
257  // and
258  //
259  // %0 = shl nsw %a, 2
260  // %1 = shl %a, 2
261  //
262  // but EarlyCSE can do neither of them.
263  if (getOptLevel() != CodeGenOpt::None)
264  addEarlyCSEOrGVNPass();
265 }
266 
267 bool NVPTXPassConfig::addInstSelector() {
268  const NVPTXSubtarget &ST = *getTM<NVPTXTargetMachine>().getSubtargetImpl();
269 
270  addPass(createLowerAggrCopies());
271  addPass(createAllocaHoisting());
272  addPass(createNVPTXISelDag(getNVPTXTargetMachine(), getOptLevel()));
273 
274  if (!ST.hasImageHandles())
276 
277  return false;
278 }
279 
280 void NVPTXPassConfig::addPostRegAlloc() {
281  addPass(createNVPTXPrologEpilogPass(), false);
282  if (getOptLevel() != CodeGenOpt::None) {
283  // NVPTXPrologEpilogPass calculates frame object offset and replace frame
284  // index with VRFrame register. NVPTXPeephole need to be run after that and
285  // will replace VRFrame with VRFrameLocal when possible.
286  addPass(createNVPTXPeephole());
287  }
288 }
289 
290 FunctionPass *NVPTXPassConfig::createTargetRegisterAllocator(bool) {
291  return nullptr; // No reg alloc
292 }
293 
294 void NVPTXPassConfig::addFastRegAlloc(FunctionPass *RegAllocPass) {
295  assert(!RegAllocPass && "NVPTX uses no regalloc!");
296  addPass(&PHIEliminationID);
297  addPass(&TwoAddressInstructionPassID);
298 }
299 
300 void NVPTXPassConfig::addOptimizedRegAlloc(FunctionPass *RegAllocPass) {
301  assert(!RegAllocPass && "NVPTX uses no regalloc!");
302 
303  addPass(&ProcessImplicitDefsID);
304  addPass(&LiveVariablesID);
305  addPass(&MachineLoopInfoID);
306  addPass(&PHIEliminationID);
307 
308  addPass(&TwoAddressInstructionPassID);
309  addPass(&RegisterCoalescerID);
310 
311  // PreRA instruction scheduling.
312  if (addPass(&MachineSchedulerID))
313  printAndVerify("After Machine Scheduling");
314 
315 
316  addPass(&StackSlotColoringID);
317 
318  // FIXME: Needs physical registers
319  //addPass(&PostRAMachineLICMID);
320 
321  printAndVerify("After StackSlotColoring");
322 }
323 
324 void NVPTXPassConfig::addMachineSSAOptimization() {
325  // Pre-ra tail duplication.
326  if (addPass(&EarlyTailDuplicateID))
327  printAndVerify("After Pre-RegAlloc TailDuplicate");
328 
329  // Optimize PHIs before DCE: removing dead PHI cycles may make more
330  // instructions dead.
331  addPass(&OptimizePHIsID);
332 
333  // This pass merges large allocas. StackSlotColoring is a different pass
334  // which merges spill slots.
335  addPass(&StackColoringID);
336 
337  // If the target requests it, assign local variables to stack slots relative
338  // to one another and simplify frame index references where possible.
339  addPass(&LocalStackSlotAllocationID);
340 
341  // With optimization, dead code should already be eliminated. However
342  // there is one known exception: lowered code for arguments that are only
343  // used by tail calls, where the tail calls reuse the incoming stack
344  // arguments directly (see t11 in test/CodeGen/X86/sibcall.ll).
346  printAndVerify("After codegen DCE pass");
347 
348  // Allow targets to insert passes that improve instruction level parallelism,
349  // like if-conversion. Such passes will typically need dominator trees and
350  // loop info, just like LICM and CSE below.
351  if (addILPOpts())
352  printAndVerify("After ILP optimizations");
353 
354  addPass(&MachineLICMID);
355  addPass(&MachineCSEID);
356 
357  addPass(&MachineSinkingID);
358  printAndVerify("After Machine LICM, CSE and Sinking passes");
359 
360  addPass(&PeepholeOptimizerID);
361  printAndVerify("After codegen peephole optimization pass");
362 }
FunctionPass * createSpeculativeExecutionPass()
OSType getOS() const
getOS - Get the parsed operating system type of this triple.
Definition: Triple.h:279
FunctionPass * createStraightLineStrengthReducePass()
FunctionPass * createGVNPass(bool NoLoads=false)
Create a legacy GVN pass.
Definition: GVN.cpp:2797
void initializeNVPTXLowerArgsPass(PassRegistry &)
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
void LLVMInitializeNVPTXTarget()
virtual void addIRPasses()
Add common target configurable passes that perform LLVM IR to IR transforms following machine indepen...
FunctionPass * createNVVMReflectPass()
Definition: NVVMReflect.cpp:72
char & MachineLICMID
MachineLICM - This pass performs LICM on machine instructions.
ModulePass * createNVPTXAssignValidGlobalNamesPass()
char & FuncletLayoutID
This pass lays out funclets contiguously.
static std::string computeDataLayout(bool is64Bit)
FunctionPass * createAllocaHoisting()
char & RegisterCoalescerID
RegisterCoalescer - This pass merges live ranges to eliminate copies.
Analysis pass providing the TargetTransformInfo.
MachineFunctionPass * createNVPTXPrologEpilogPass()
char & MachineSchedulerID
MachineScheduler - This pass schedules machine instructions.
ModulePass * createGenericToNVVMPass()
char & ProcessImplicitDefsID
ProcessImpicitDefs pass - This pass removes IMPLICIT_DEFs.
void initializeNVPTXAssignValidGlobalNamesPass(PassRegistry &)
char & MachineLoopInfoID
MachineLoopInfo - This pass is a loop analysis pass.
char & StackColoringID
StackSlotColoring - This pass performs stack coloring and merging.
Pass * createLoadStoreVectorizerPass()
Target-Independent Code Generator Pass Configuration Options.
Target & getTheNVPTXTarget64()
#define F(x, y, z)
Definition: MD5.cpp:51
char & MachineCSEID
MachineCSE - This pass performs global CSE on machine instructions.
Definition: MachineCSE.cpp:110
unsigned int getSmVersion() const
Function Alias Analysis false
TargetIRAnalysis getTargetIRAnalysis() override
Get a TargetIRAnalysis implementation for the target.
char & StackSlotColoringID
StackSlotColoring - This pass performs stack slot coloring.
char & DeadMachineInstructionElimID
DeadMachineInstructionElim - This pass removes dead machine instructions.
TargetPassConfig * createPassConfig(PassManagerBase &PM) override
Create a pass configuration object to be used by addPassToEmitX methods for generating a pipeline of ...
FunctionPass * createNVPTXImageOptimizerPass()
void initializeNVVMIntrRangePass(PassRegistry &)
char & PHIEliminationID
PHIElimination - This pass eliminates machine instruction PHI nodes by inserting copy instructions...
Target & getTheNVPTXTarget32()
char & LiveVariablesID
LiveVariables pass - This pass computes the set of blocks in which each variable is life and sets mac...
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:395
This file a TargetTransformInfo::Concept conforming object specific to the NVPTX target machine...
NVPTXTargetMachine32(const Target &T, const Triple &TT, StringRef CPU, StringRef FS, const TargetOptions &Options, Optional< Reloc::Model > RM, CodeModel::Model CM, CodeGenOpt::Level OL)
FunctionPass * createSeparateConstOffsetFromGEPPass(const TargetMachine *TM=nullptr, bool LowerGEP=false)
void initializeNVPTXLowerAggrCopiesPass(PassRegistry &)
void initializeNVVMReflectPass(PassRegistry &)
std::enable_if<!std::is_array< T >::value, std::unique_ptr< T > >::type make_unique(Args &&...args)
Constructs a new T() with the given args and returns a unique_ptr<T> which owns the object...
Definition: STLExtras.h:845
This file provides the interface for LLVM's Global Value Numbering pass which eliminates fully redund...
static bool is64Bit(const char *name)
MachineFunctionPass * createNVPTXReplaceImageHandlesPass()
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang","erlang-compatible garbage collector")
char & LiveDebugValuesID
LiveDebugValues pass.
This class describes a target machine that is implemented with the LLVM target-independent code gener...
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:298
void initializeNVPTXAllocaHoistingPass(PassRegistry &)
char & MachineCopyPropagationID
MachineCopyPropagation - This pass performs copy propagation on machine instructions.
void initializeGenericToNVVMPass(PassRegistry &)
FunctionPass * createNVPTXInferAddressSpacesPass()
Triple - Helper class for working with autoconf configuration names.
Definition: Triple.h:44
char & PostRASchedulerID
createPostRAScheduler - This pass performs post register allocation scheduling.
void initializeNVPTXLowerAllocaPass(PassRegistry &)
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
BasicBlockPass * createNVPTXLowerAllocaPass()
char & StackMapLivenessID
StackMapLiveness - This pass analyses the register live-out set of stackmap/patchpoint intrinsics and...
char & TailDuplicateID
TailDuplicate - Duplicate blocks with unconditional branches into tails of their predecessors.
char & MachineSinkingID
MachineSinking - This pass performs sinking on machine instructions.
char & OptimizePHIsID
OptimizePHIs - This pass optimizes machine instruction PHIs to take advantage of opportunities create...
Target - Wrapper for Target specific information.
char & PeepholeOptimizerID
PeepholeOptimizer - This pass performs peephole optimizations - like extension and comparison elimina...
char & PrologEpilogCodeInserterID
PrologEpilogCodeInserter - This pass inserts prolog and epilog code, and eliminates abstract frame re...
void initializeNVPTXInferAddressSpacesPass(PassRegistry &)
Basic Alias true
char & PatchableFunctionID
This pass implements the "patchable-function" attribute.
NVPTXTargetMachine.
MachineFunctionPass * createNVPTXPeephole()
FunctionPass * createNVPTXISelDag(NVPTXTargetMachine &TM, llvm::CodeGenOpt::Level OptLevel)
createNVPTXISelDag - This pass converts a legalized DAG into a NVPTX-specific DAG, ready for instruction scheduling.
~NVPTXTargetMachine() override
FunctionPass * createNVVMIntrRangePass(unsigned int SmVersion)
FunctionPass * createNVPTXLowerArgsPass(const NVPTXTargetMachine *TM)
FunctionPass * createLowerAggrCopies()
void addEarlyAsPossiblePasses(PassManagerBase &PM) override
Add target-specific function passes that should be run as early as possible in the optimization pipel...
FunctionPass * createSROAPass()
Definition: SROA.cpp:4283
NVPTXTargetMachine64(const Target &T, const Triple &TT, StringRef CPU, StringRef FS, const TargetOptions &Options, Optional< Reloc::Model > RM, CodeModel::Model CM, CodeGenOpt::Level OL)
char & TwoAddressInstructionPassID
TwoAddressInstruction - This pass reduces two-address instructions to use two operands.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
bool hasImageHandles() const
RegisterTargetMachine - Helper template for registering a target machine implementation, for use in the target machine initialization function.
FunctionPass * createEarlyCSEPass(bool UseMemorySSA=false)
Definition: EarlyCSE.cpp:1050
static cl::opt< bool > DisableLoadStoreVectorizer("disable-nvptx-load-store-vectorizer", cl::desc("Disable load/store vectorizer"), cl::init(false), cl::Hidden)
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:47
PassRegistry - This class manages the registration and intitialization of the pass subsystem as appli...
Definition: PassRegistry.h:40
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml","ocaml 3.10-compatible collector")
This pass exposes codegen information to IR-level passes.
char & LocalStackSlotAllocationID
LocalStackSlotAllocation - This pass assigns local frame indices to stack slots relative to one anoth...
FunctionPass * createNaryReassociatePass()
NVPTXTargetMachine(const Target &T, const Triple &TT, StringRef CPU, StringRef FS, const TargetOptions &Options, Optional< Reloc::Model > RM, CodeModel::Model CM, CodeGenOpt::Level OP, bool is64bit)