LLVM  3.7.0
NVPTXTargetMachine.cpp
Go to the documentation of this file.
1 //===-- NVPTXTargetMachine.cpp - Define TargetMachine for NVPTX -----------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // Top-level implementation for the NVPTX target.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "NVPTXTargetMachine.h"
16 #include "NVPTX.h"
17 #include "NVPTXAllocaHoisting.h"
18 #include "NVPTXLowerAggrCopies.h"
19 #include "NVPTXTargetObjectFile.h"
21 #include "llvm/Analysis/Passes.h"
25 #include "llvm/CodeGen/Passes.h"
26 #include "llvm/IR/DataLayout.h"
29 #include "llvm/IR/Verifier.h"
30 #include "llvm/MC/MCAsmInfo.h"
31 #include "llvm/MC/MCInstrInfo.h"
32 #include "llvm/MC/MCStreamer.h"
35 #include "llvm/Support/Debug.h"
46 #include "llvm/Transforms/Scalar.h"
47 
48 using namespace llvm;
49 
50 namespace llvm {
58 }
59 
60 extern "C" void LLVMInitializeNVPTXTarget() {
61  // Register the target.
64 
65  // FIXME: This pass is really intended to be invoked during IR optimization,
66  // but it's very NVPTX-specific.
75 }
76 
77 static std::string computeDataLayout(bool is64Bit) {
78  std::string Ret = "e";
79 
80  if (!is64Bit)
81  Ret += "-p:32:32";
82 
83  Ret += "-i64:64-v16:16-v32:32-n16:32:64";
84 
85  return Ret;
86 }
87 
89  StringRef CPU, StringRef FS,
90  const TargetOptions &Options,
92  CodeGenOpt::Level OL, bool is64bit)
93  : LLVMTargetMachine(T, computeDataLayout(is64bit), TT, CPU, FS, Options, RM,
94  CM, OL),
95  is64bit(is64bit), TLOF(make_unique<NVPTXTargetObjectFile>()),
96  Subtarget(TT, CPU, FS, *this) {
97  if (TT.getOS() == Triple::NVCL)
98  drvInterface = NVPTX::NVCL;
99  else
100  drvInterface = NVPTX::CUDA;
101  initAsmInfo();
102 }
103 
105 
106 void NVPTXTargetMachine32::anchor() {}
107 
109  StringRef CPU, StringRef FS,
110  const TargetOptions &Options,
113  : NVPTXTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, false) {}
114 
115 void NVPTXTargetMachine64::anchor() {}
116 
118  StringRef CPU, StringRef FS,
119  const TargetOptions &Options,
122  : NVPTXTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, true) {}
123 
124 namespace {
125 class NVPTXPassConfig : public TargetPassConfig {
126 public:
127  NVPTXPassConfig(NVPTXTargetMachine *TM, PassManagerBase &PM)
128  : TargetPassConfig(TM, PM) {}
129 
130  NVPTXTargetMachine &getNVPTXTargetMachine() const {
131  return getTM<NVPTXTargetMachine>();
132  }
133 
134  void addIRPasses() override;
135  bool addInstSelector() override;
136  void addPostRegAlloc() override;
137  void addMachineSSAOptimization() override;
138 
139  FunctionPass *createTargetRegisterAllocator(bool) override;
140  void addFastRegAlloc(FunctionPass *RegAllocPass) override;
141  void addOptimizedRegAlloc(FunctionPass *RegAllocPass) override;
142 };
143 } // end anonymous namespace
144 
146  NVPTXPassConfig *PassConfig = new NVPTXPassConfig(this, PM);
147  return PassConfig;
148 }
149 
151  return TargetIRAnalysis([this](Function &F) {
152  return TargetTransformInfo(NVPTXTTIImpl(this, F));
153  });
154 }
155 
156 void NVPTXPassConfig::addIRPasses() {
157  // The following passes are known to not play well with virtual regs hanging
158  // around after register allocation (which in our case, is *all* registers).
159  // We explicitly disable them here. We do, however, need some functionality
160  // of the PrologEpilogCodeInserter pass, so we emulate that behavior in the
161  // NVPTXPrologEpilog pass (see NVPTXPrologEpilogPass.cpp).
162  disablePass(&PrologEpilogCodeInserterID);
163  disablePass(&MachineCopyPropagationID);
164  disablePass(&BranchFolderPassID);
165  disablePass(&TailDuplicateID);
166 
170  addPass(createGenericToNVVMPass());
171  addPass(createNVPTXLowerKernelArgsPass(&getNVPTXTargetMachine()));
172  // NVPTXLowerKernelArgs emits alloca for byval parameters which can often
173  // be eliminated by SROA.
174  addPass(createSROAPass());
175  addPass(createNVPTXLowerAllocaPass());
177  // FavorNonGenericAddrSpaces shortcuts unnecessary addrspacecasts, and leave
178  // them unused. We could remove dead code in an ad-hoc manner, but that
179  // requires manual work and might be error-prone.
182  // ReassociateGEPs exposes more opportunites for SLSR. See
183  // the example in reassociate-geps-and-slsr.ll.
185  // SeparateConstOffsetFromGEP and SLSR creates common expressions which GVN or
186  // EarlyCSE can reuse. GVN generates significantly better code than EarlyCSE
187  // for some of our benchmarks.
188  if (getOptLevel() == CodeGenOpt::Aggressive)
189  addPass(createGVNPass());
190  else
191  addPass(createEarlyCSEPass());
192  // Run NaryReassociate after EarlyCSE/GVN to be more effective.
193  addPass(createNaryReassociatePass());
194  // NaryReassociate on GEPs creates redundant common expressions, so run
195  // EarlyCSE after it.
196  addPass(createEarlyCSEPass());
197 }
198 
199 bool NVPTXPassConfig::addInstSelector() {
200  const NVPTXSubtarget &ST = *getTM<NVPTXTargetMachine>().getSubtargetImpl();
201 
202  addPass(createLowerAggrCopies());
203  addPass(createAllocaHoisting());
204  addPass(createNVPTXISelDag(getNVPTXTargetMachine(), getOptLevel()));
205 
206  if (!ST.hasImageHandles())
208 
209  return false;
210 }
211 
212 void NVPTXPassConfig::addPostRegAlloc() {
213  addPass(createNVPTXPrologEpilogPass(), false);
214  // NVPTXPrologEpilogPass calculates frame object offset and replace frame
215  // index with VRFrame register. NVPTXPeephole need to be run after that and
216  // will replace VRFrame with VRFrameLocal when possible.
217  addPass(createNVPTXPeephole());
218 }
219 
220 FunctionPass *NVPTXPassConfig::createTargetRegisterAllocator(bool) {
221  return nullptr; // No reg alloc
222 }
223 
224 void NVPTXPassConfig::addFastRegAlloc(FunctionPass *RegAllocPass) {
225  assert(!RegAllocPass && "NVPTX uses no regalloc!");
226  addPass(&PHIEliminationID);
227  addPass(&TwoAddressInstructionPassID);
228 }
229 
230 void NVPTXPassConfig::addOptimizedRegAlloc(FunctionPass *RegAllocPass) {
231  assert(!RegAllocPass && "NVPTX uses no regalloc!");
232 
233  addPass(&ProcessImplicitDefsID);
234  addPass(&LiveVariablesID);
235  addPass(&MachineLoopInfoID);
236  addPass(&PHIEliminationID);
237 
238  addPass(&TwoAddressInstructionPassID);
239  addPass(&RegisterCoalescerID);
240 
241  // PreRA instruction scheduling.
242  if (addPass(&MachineSchedulerID))
243  printAndVerify("After Machine Scheduling");
244 
245 
246  addPass(&StackSlotColoringID);
247 
248  // FIXME: Needs physical registers
249  //addPass(&PostRAMachineLICMID);
250 
251  printAndVerify("After StackSlotColoring");
252 }
253 
254 void NVPTXPassConfig::addMachineSSAOptimization() {
255  // Pre-ra tail duplication.
256  if (addPass(&EarlyTailDuplicateID))
257  printAndVerify("After Pre-RegAlloc TailDuplicate");
258 
259  // Optimize PHIs before DCE: removing dead PHI cycles may make more
260  // instructions dead.
261  addPass(&OptimizePHIsID);
262 
263  // This pass merges large allocas. StackSlotColoring is a different pass
264  // which merges spill slots.
265  addPass(&StackColoringID);
266 
267  // If the target requests it, assign local variables to stack slots relative
268  // to one another and simplify frame index references where possible.
269  addPass(&LocalStackSlotAllocationID);
270 
271  // With optimization, dead code should already be eliminated. However
272  // there is one known exception: lowered code for arguments that are only
273  // used by tail calls, where the tail calls reuse the incoming stack
274  // arguments directly (see t11 in test/CodeGen/X86/sibcall.ll).
276  printAndVerify("After codegen DCE pass");
277 
278  // Allow targets to insert passes that improve instruction level parallelism,
279  // like if-conversion. Such passes will typically need dominator trees and
280  // loop info, just like LICM and CSE below.
281  if (addILPOpts())
282  printAndVerify("After ILP optimizations");
283 
284  addPass(&MachineLICMID);
285  addPass(&MachineCSEID);
286 
287  addPass(&MachineSinkingID);
288  printAndVerify("After Machine LICM, CSE and Sinking passes");
289 
290  addPass(&PeepholeOptimizerID);
291  printAndVerify("After codegen peephole optimization pass");
292 }
OSType getOS() const
getOS - Get the parsed operating system type of this triple.
Definition: Triple.h:251
FunctionPass * createStraightLineStrengthReducePass()
FunctionPass * createGVNPass(bool NoLoads=false)
Definition: GVN.cpp:732
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
FunctionPass * createNVPTXFavorNonGenericAddrSpacesPass()
void LLVMInitializeNVPTXTarget()
virtual void addIRPasses()
Add common target configurable passes that perform LLVM IR to IR transforms following machine indepen...
Definition: Passes.cpp:377
char & MachineLICMID
MachineLICM - This pass performs LICM on machine instructions.
ModulePass * createNVPTXAssignValidGlobalNamesPass()
static std::string computeDataLayout(bool is64Bit)
FunctionPass * createAllocaHoisting()
char & RegisterCoalescerID
RegisterCoalescer - This pass merges live ranges to eliminate copies.
Analysis pass providing the TargetTransformInfo.
MachineFunctionPass * createNVPTXPrologEpilogPass()
F(f)
char & MachineSchedulerID
MachineScheduler - This pass schedules machine instructions.
ModulePass * createGenericToNVVMPass()
char & ProcessImplicitDefsID
ProcessImpicitDefs pass - This pass removes IMPLICIT_DEFs.
NVPTXTargetMachine32(const Target &T, const Triple &TT, StringRef CPU, StringRef FS, const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL)
void initializeNVPTXAssignValidGlobalNamesPass(PassRegistry &)
Target TheNVPTXTarget32
Definition: NVPTX.h:78
void initializeNVPTXLowerKernelArgsPass(PassRegistry &)
char & MachineLoopInfoID
MachineLoopInfo - This pass is a loop analysis pass.
char & StackColoringID
StackSlotColoring - This pass performs stack coloring and merging.
FunctionPass * createSROAPass(bool RequiresDomTree=true)
Definition: SROA.cpp:1275
FunctionPass * createLowerAggrCopies()
#define false
Definition: ConvertUTF.c:65
void initializeNVPTXFavorNonGenericAddrSpacesPass(PassRegistry &)
Target-Independent Code Generator Pass Configuration Options.
FunctionPass * createDeadCodeEliminationPass()
Definition: DCE.cpp:139
char & MachineCSEID
MachineCSE - This pass performs global CSE on machine instructions.
Definition: MachineCSE.cpp:110
TargetIRAnalysis getTargetIRAnalysis() override
Get a TargetIRAnalysis implementation for the target.
char & StackSlotColoringID
StackSlotColoring - This pass performs stack slot coloring.
char & DeadMachineInstructionElimID
DeadMachineInstructionElim - This pass removes dead machine instructions.
TargetPassConfig * createPassConfig(PassManagerBase &PM) override
Create a pass configuration object to be used by addPassToEmitX methods for generating a pipeline of ...
FunctionPass * createNVPTXImageOptimizerPass()
char & PHIEliminationID
PHIElimination - This pass eliminates machine instruction PHI nodes by inserting copy instructions...
char & LiveVariablesID
LiveVariables pass - This pass computes the set of blocks in which each variable is life and sets mac...
#define true
Definition: ConvertUTF.c:66
This file a TargetTransformInfo::Concept conforming object specific to the NVPTX target machine...
FunctionPass * createSeparateConstOffsetFromGEPPass(const TargetMachine *TM=nullptr, bool LowerGEP=false)
void initializeNVVMReflectPass(PassRegistry &)
std::enable_if<!std::is_array< T >::value, std::unique_ptr< T > >::type make_unique(Args &&...args)
Constructs a new T() with the given args and returns a unique_ptr<T> which owns the object...
Definition: STLExtras.h:354
static bool is64Bit(const char *name)
MachineFunctionPass * createNVPTXReplaceImageHandlesPass()
Target TheNVPTXTarget64
Definition: NVPTX.h:79
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang","erlang-compatible garbage collector")
This class describes a target machine that is implemented with the LLVM target-independent code gener...
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:294
FunctionPass * createEarlyCSEPass()
Definition: EarlyCSE.cpp:771
void initializeNVPTXAllocaHoistingPass(PassRegistry &)
char & MachineCopyPropagationID
MachineCopyPropagation - This pass performs copy propagation on machine instructions.
void initializeGenericToNVVMPass(PassRegistry &)
FunctionPass * createNVPTXLowerKernelArgsPass(const NVPTXTargetMachine *TM)
Triple - Helper class for working with autoconf configuration names.
Definition: Triple.h:44
void initializeNVPTXLowerAllocaPass(PassRegistry &)
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
BasicBlockPass * createNVPTXLowerAllocaPass()
char & TailDuplicateID
TailDuplicate - Duplicate blocks with unconditional branches into tails of their predecessors.
char & MachineSinkingID
MachineSinking - This pass performs sinking on machine instructions.
char & OptimizePHIsID
OptimizePHIs - This pass optimizes machine instruction PHIs to take advantage of opportunities create...
Target - Wrapper for Target specific information.
NVPTXTargetMachine(const Target &T, const Triple &TT, StringRef CPU, StringRef FS, const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OP, bool is64bit)
char & PeepholeOptimizerID
PeepholeOptimizer - This pass performs peephole optimizations - like extension and comparison elimina...
char & PrologEpilogCodeInserterID
PrologEpilogCodeInserter - This pass inserts prolog and epilog code, and eliminates abstract frame re...
NVPTXTargetMachine.
MachineFunctionPass * createNVPTXPeephole()
FunctionPass * createNVPTXISelDag(NVPTXTargetMachine &TM, llvm::CodeGenOpt::Level OptLevel)
createNVPTXISelDag - This pass converts a legalized DAG into a NVPTX-specific DAG, ready for instruction scheduling.
This file defines passes to print out IR in various granularities.
char & TwoAddressInstructionPassID
TwoAddressInstruction - This pass reduces two-address instructions to use two operands.
bool hasImageHandles() const
RegisterTargetMachine - Helper template for registering a target machine implementation, for use in the target machine initialization function.
char & BranchFolderPassID
BranchFolding - This pass performs machine code CFG based optimizations to delete branches to branche...
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:40
PassRegistry - This class manages the registration and intitialization of the pass subsystem as appli...
Definition: PassRegistry.h:41
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml","ocaml 3.10-compatible collector")
NVPTXTargetMachine64(const Target &T, const Triple &TT, StringRef CPU, StringRef FS, const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL)
char & LocalStackSlotAllocationID
LocalStackSlotAllocation - This pass assigns local frame indices to stack slots relative to one anoth...
This file describes how to lower LLVM code to machine code.
FunctionPass * createNaryReassociatePass()