LLVM  9.0.0svn
NVPTXTargetMachine.cpp
Go to the documentation of this file.
1 //===-- NVPTXTargetMachine.cpp - Define TargetMachine for NVPTX -----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // Top-level implementation for the NVPTX target.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "NVPTXTargetMachine.h"
14 #include "NVPTX.h"
15 #include "NVPTXAllocaHoisting.h"
16 #include "NVPTXLowerAggrCopies.h"
17 #include "NVPTXTargetObjectFile.h"
19 #include "llvm/ADT/STLExtras.h"
20 #include "llvm/ADT/Triple.h"
22 #include "llvm/CodeGen/Passes.h"
25 #include "llvm/Pass.h"
31 #include "llvm/Transforms/Scalar.h"
34 #include <cassert>
35 #include <string>
36 
37 using namespace llvm;
38 
39 // LSV is still relatively new; this switch lets us turn it off in case we
40 // encounter (or suspect) a bug.
41 static cl::opt<bool>
42  DisableLoadStoreVectorizer("disable-nvptx-load-store-vectorizer",
43  cl::desc("Disable load/store vectorizer"),
44  cl::init(false), cl::Hidden);
45 
46 // TODO: Remove this flag when we are confident with no regressions.
48  "disable-nvptx-require-structured-cfg",
49  cl::desc("Transitional flag to turn off NVPTX's requirement on preserving "
50  "structured CFG. The requirement should be disabled only when "
51  "unexpected regressions happen."),
52  cl::init(false), cl::Hidden);
53 
55  "nvptx-short-ptr",
56  cl::desc(
57  "Use 32-bit pointers for accessing const/local/shared address spaces."),
58  cl::init(false), cl::Hidden);
59 
60 namespace llvm {
61 
71 
72 } // end namespace llvm
73 
74 extern "C" void LLVMInitializeNVPTXTarget() {
75  // Register the target.
78 
79  // FIXME: This pass is really intended to be invoked during IR optimization,
80  // but it's very NVPTX-specific.
91 }
92 
93 static std::string computeDataLayout(bool is64Bit, bool UseShortPointers) {
94  std::string Ret = "e";
95 
96  if (!is64Bit)
97  Ret += "-p:32:32";
98  else if (UseShortPointers)
99  Ret += "-p3:32:32-p4:32:32-p5:32:32";
100 
101  Ret += "-i64:64-i128:128-v16:16-v32:32-n16:32:64";
102 
103  return Ret;
104 }
105 
107  StringRef CPU, StringRef FS,
108  const TargetOptions &Options,
111  CodeGenOpt::Level OL, bool is64bit)
112  // The pic relocation model is used regardless of what the client has
113  // specified, as it is the only relocation model currently supported.
115  CPU, FS, Options, Reloc::PIC_,
116  getEffectiveCodeModel(CM, CodeModel::Small), OL),
117  is64bit(is64bit), UseShortPointers(UseShortPointersOpt),
119  Subtarget(TT, CPU, FS, *this) {
120  if (TT.getOS() == Triple::NVCL)
121  drvInterface = NVPTX::NVCL;
122  else
123  drvInterface = NVPTX::CUDA;
126  initAsmInfo();
127 }
128 
130 
131 void NVPTXTargetMachine32::anchor() {}
132 
134  StringRef CPU, StringRef FS,
135  const TargetOptions &Options,
138  CodeGenOpt::Level OL, bool JIT)
139  : NVPTXTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, false) {}
140 
141 void NVPTXTargetMachine64::anchor() {}
142 
144  StringRef CPU, StringRef FS,
145  const TargetOptions &Options,
148  CodeGenOpt::Level OL, bool JIT)
149  : NVPTXTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, true) {}
150 
151 namespace {
152 
153 class NVPTXPassConfig : public TargetPassConfig {
154 public:
155  NVPTXPassConfig(NVPTXTargetMachine &TM, PassManagerBase &PM)
156  : TargetPassConfig(TM, PM) {}
157 
158  NVPTXTargetMachine &getNVPTXTargetMachine() const {
159  return getTM<NVPTXTargetMachine>();
160  }
161 
162  void addIRPasses() override;
163  bool addInstSelector() override;
164  void addPreRegAlloc() override;
165  void addPostRegAlloc() override;
166  void addMachineSSAOptimization() override;
167 
168  FunctionPass *createTargetRegisterAllocator(bool) override;
169  void addFastRegAlloc(FunctionPass *RegAllocPass) override;
170  void addOptimizedRegAlloc(FunctionPass *RegAllocPass) override;
171 
172 private:
173  // If the opt level is aggressive, add GVN; otherwise, add EarlyCSE. This
174  // function is only called in opt mode.
175  void addEarlyCSEOrGVNPass();
176 
177  // Add passes that propagate special memory spaces.
178  void addAddressSpaceInferencePasses();
179 
180  // Add passes that perform straight-line scalar optimizations.
181  void addStraightLineScalarOptimizationPasses();
182 };
183 
184 } // end anonymous namespace
185 
187  return new NVPTXPassConfig(*this, PM);
188 }
189 
191  Builder.addExtension(
193  [&](const PassManagerBuilder &, legacy::PassManagerBase &PM) {
194  PM.add(createNVVMReflectPass(Subtarget.getSmVersion()));
195  PM.add(createNVVMIntrRangePass(Subtarget.getSmVersion()));
196  });
197 }
198 
201  return TargetTransformInfo(NVPTXTTIImpl(this, F));
202 }
203 
204 void NVPTXPassConfig::addEarlyCSEOrGVNPass() {
206  addPass(createGVNPass());
207  else
208  addPass(createEarlyCSEPass());
209 }
210 
211 void NVPTXPassConfig::addAddressSpaceInferencePasses() {
212  // NVPTXLowerArgs emits alloca for byval parameters which can often
213  // be eliminated by SROA.
214  addPass(createSROAPass());
215  addPass(createNVPTXLowerAllocaPass());
216  addPass(createInferAddressSpacesPass());
217 }
218 
219 void NVPTXPassConfig::addStraightLineScalarOptimizationPasses() {
222  // ReassociateGEPs exposes more opportunites for SLSR. See
223  // the example in reassociate-geps-and-slsr.ll.
225  // SeparateConstOffsetFromGEP and SLSR creates common expressions which GVN or
226  // EarlyCSE can reuse. GVN generates significantly better code than EarlyCSE
227  // for some of our benchmarks.
228  addEarlyCSEOrGVNPass();
229  // Run NaryReassociate after EarlyCSE/GVN to be more effective.
230  addPass(createNaryReassociatePass());
231  // NaryReassociate on GEPs creates redundant common expressions, so run
232  // EarlyCSE after it.
233  addPass(createEarlyCSEPass());
234 }
235 
236 void NVPTXPassConfig::addIRPasses() {
237  // The following passes are known to not play well with virtual regs hanging
238  // around after register allocation (which in our case, is *all* registers).
239  // We explicitly disable them here. We do, however, need some functionality
240  // of the PrologEpilogCodeInserter pass, so we emulate that behavior in the
241  // NVPTXPrologEpilog pass (see NVPTXPrologEpilogPass.cpp).
242  disablePass(&PrologEpilogCodeInserterID);
243  disablePass(&MachineCopyPropagationID);
244  disablePass(&TailDuplicateID);
245  disablePass(&StackMapLivenessID);
246  disablePass(&LiveDebugValuesID);
247  disablePass(&PostRAMachineSinkingID);
248  disablePass(&PostRASchedulerID);
249  disablePass(&FuncletLayoutID);
250  disablePass(&PatchableFunctionID);
251  disablePass(&ShrinkWrapID);
252 
253  // NVVMReflectPass is added in addEarlyAsPossiblePasses, so hopefully running
254  // it here does nothing. But since we need it for correctness when lowering
255  // to NVPTX, run it here too, in case whoever built our pass pipeline didn't
256  // call addEarlyAsPossiblePasses.
257  const NVPTXSubtarget &ST = *getTM<NVPTXTargetMachine>().getSubtargetImpl();
258  addPass(createNVVMReflectPass(ST.getSmVersion()));
259 
260  if (getOptLevel() != CodeGenOpt::None)
263  addPass(createGenericToNVVMPass());
264 
265  // NVPTXLowerArgs is required for correctness and should be run right
266  // before the address space inference passes.
267  addPass(createNVPTXLowerArgsPass(&getNVPTXTargetMachine()));
268  if (getOptLevel() != CodeGenOpt::None) {
269  addAddressSpaceInferencePasses();
272  addStraightLineScalarOptimizationPasses();
273  }
274 
275  // === LSR and other generic IR passes ===
277  // EarlyCSE is not always strong enough to clean up what LSR produces. For
278  // example, GVN can combine
279  //
280  // %0 = add %a, %b
281  // %1 = add %b, %a
282  //
283  // and
284  //
285  // %0 = shl nsw %a, 2
286  // %1 = shl %a, 2
287  //
288  // but EarlyCSE can do neither of them.
289  if (getOptLevel() != CodeGenOpt::None)
290  addEarlyCSEOrGVNPass();
291 }
292 
293 bool NVPTXPassConfig::addInstSelector() {
294  const NVPTXSubtarget &ST = *getTM<NVPTXTargetMachine>().getSubtargetImpl();
295 
296  addPass(createLowerAggrCopies());
297  addPass(createAllocaHoisting());
298  addPass(createNVPTXISelDag(getNVPTXTargetMachine(), getOptLevel()));
299 
300  if (!ST.hasImageHandles())
302 
303  return false;
304 }
305 
306 void NVPTXPassConfig::addPreRegAlloc() {
307  // Remove Proxy Register pseudo instructions used to keep `callseq_end` alive.
309 }
310 
311 void NVPTXPassConfig::addPostRegAlloc() {
312  addPass(createNVPTXPrologEpilogPass(), false);
313  if (getOptLevel() != CodeGenOpt::None) {
314  // NVPTXPrologEpilogPass calculates frame object offset and replace frame
315  // index with VRFrame register. NVPTXPeephole need to be run after that and
316  // will replace VRFrame with VRFrameLocal when possible.
317  addPass(createNVPTXPeephole());
318  }
319 }
320 
321 FunctionPass *NVPTXPassConfig::createTargetRegisterAllocator(bool) {
322  return nullptr; // No reg alloc
323 }
324 
325 void NVPTXPassConfig::addFastRegAlloc(FunctionPass *RegAllocPass) {
326  assert(!RegAllocPass && "NVPTX uses no regalloc!");
327  addPass(&PHIEliminationID);
328  addPass(&TwoAddressInstructionPassID);
329 }
330 
331 void NVPTXPassConfig::addOptimizedRegAlloc(FunctionPass *RegAllocPass) {
332  assert(!RegAllocPass && "NVPTX uses no regalloc!");
333 
334  addPass(&ProcessImplicitDefsID);
335  addPass(&LiveVariablesID);
336  addPass(&MachineLoopInfoID);
337  addPass(&PHIEliminationID);
338 
339  addPass(&TwoAddressInstructionPassID);
340  addPass(&RegisterCoalescerID);
341 
342  // PreRA instruction scheduling.
343  if (addPass(&MachineSchedulerID))
344  printAndVerify("After Machine Scheduling");
345 
346 
347  addPass(&StackSlotColoringID);
348 
349  // FIXME: Needs physical registers
350  //addPass(&MachineLICMID);
351 
352  printAndVerify("After StackSlotColoring");
353 }
354 
355 void NVPTXPassConfig::addMachineSSAOptimization() {
356  // Pre-ra tail duplication.
357  if (addPass(&EarlyTailDuplicateID))
358  printAndVerify("After Pre-RegAlloc TailDuplicate");
359 
360  // Optimize PHIs before DCE: removing dead PHI cycles may make more
361  // instructions dead.
362  addPass(&OptimizePHIsID);
363 
364  // This pass merges large allocas. StackSlotColoring is a different pass
365  // which merges spill slots.
366  addPass(&StackColoringID);
367 
368  // If the target requests it, assign local variables to stack slots relative
369  // to one another and simplify frame index references where possible.
370  addPass(&LocalStackSlotAllocationID);
371 
372  // With optimization, dead code should already be eliminated. However
373  // there is one known exception: lowered code for arguments that are only
374  // used by tail calls, where the tail calls reuse the incoming stack
375  // arguments directly (see t11 in test/CodeGen/X86/sibcall.ll).
377  printAndVerify("After codegen DCE pass");
378 
379  // Allow targets to insert passes that improve instruction level parallelism,
380  // like if-conversion. Such passes will typically need dominator trees and
381  // loop info, just like LICM and CSE below.
382  if (addILPOpts())
383  printAndVerify("After ILP optimizations");
384 
385  addPass(&EarlyMachineLICMID);
386  addPass(&MachineCSEID);
387 
388  addPass(&MachineSinkingID);
389  printAndVerify("After Machine LICM, CSE and Sinking passes");
390 
391  addPass(&PeepholeOptimizerID);
392  printAndVerify("After codegen peephole optimization pass");
393 }
FunctionPass * createSpeculativeExecutionPass()
FunctionPass * createStraightLineStrengthReducePass()
FunctionPass * createGVNPass(bool NoLoads=false)
Create a legacy GVN pass.
Definition: GVN.cpp:2611
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
void initializeNVPTXLowerArgsPass(PassRegistry &)
CodeModel::Model getEffectiveCodeModel(Optional< CodeModel::Model > CM, CodeModel::Model Default)
Helper method for getting the code model, returning Default if CM does not have a value...
MachineFunctionPass * createNVPTXProxyRegErasurePass()
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
PassManagerBuilder - This class is used to set up a standard optimization sequence for languages like...
This class represents lattice values for constants.
Definition: AllocatorList.h:23
void LLVMInitializeNVPTXTarget()
virtual void addIRPasses()
Add common target configurable passes that perform LLVM IR to IR transforms following machine indepen...
OSType getOS() const
getOS - Get the parsed operating system type of this triple.
Definition: Triple.h:298
ModulePass * createNVPTXAssignValidGlobalNamesPass()
char & FuncletLayoutID
This pass lays out funclets contiguously.
FunctionPass * createAllocaHoisting()
static cl::opt< bool > DisableRequireStructuredCFG("disable-nvptx-require-structured-cfg", cl::desc("Transitional flag to turn off NVPTX's requirement on preserving " "structured CFG. The requirement should be disabled only when " "unexpected regressions happen."), cl::init(false), cl::Hidden)
char & RegisterCoalescerID
RegisterCoalescer - This pass merges live ranges to eliminate copies.
virtual void add(Pass *P)=0
Add a pass to the queue of passes to run.
MachineFunctionPass * createNVPTXPrologEpilogPass()
F(f)
char & MachineSchedulerID
MachineScheduler - This pass schedules machine instructions.
block Block Frequency true
std::enable_if<!std::is_array< T >::value, std::unique_ptr< T > >::type make_unique(Args &&... args)
Constructs a new T() with the given args and returns a unique_ptr<T> which owns the object...
Definition: STLExtras.h:1348
ModulePass * createGenericToNVVMPass()
char & ProcessImplicitDefsID
ProcessImpicitDefs pass - This pass removes IMPLICIT_DEFs.
void initializeNVPTXAssignValidGlobalNamesPass(PassRegistry &)
FunctionPass * createNVVMReflectPass(unsigned int SmVersion)
Definition: NVVMReflect.cpp:62
NVPTXTargetMachine64(const Target &T, const Triple &TT, StringRef CPU, StringRef FS, const TargetOptions &Options, Optional< Reloc::Model > RM, Optional< CodeModel::Model > CM, CodeGenOpt::Level OL, bool JIT)
bool hasImageHandles() const
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
char & MachineLoopInfoID
MachineLoopInfo - This pass is a loop analysis pass.
char & StackColoringID
StackSlotColoring - This pass performs stack coloring and merging.
Target-Independent Code Generator Pass Configuration Options.
Target & getTheNVPTXTarget64()
static cl::opt< bool > UseShortPointersOpt("nvptx-short-ptr", cl::desc("Use 32-bit pointers for accessing const/local/shared address spaces."), cl::init(false), cl::Hidden)
char & EarlyTailDuplicateID
Duplicate blocks with unconditional branches into tails of their predecessors.
RegisterTargetMachine - Helper template for registering a target machine implementation, for use in the target machine initialization function.
char & MachineCSEID
MachineCSE - This pass performs global CSE on machine instructions.
Definition: MachineCSE.cpp:132
char & StackSlotColoringID
StackSlotColoring - This pass performs stack slot coloring.
char & DeadMachineInstructionElimID
DeadMachineInstructionElim - This pass removes dead machine instructions.
Pass * createLoadStoreVectorizerPass()
Create a legacy pass manager instance of the LoadStoreVectorizer pass.
TargetPassConfig * createPassConfig(PassManagerBase &PM) override
Create a pass configuration object to be used by addPassToEmitX methods for generating a pipeline of ...
FunctionPass * createNVPTXImageOptimizerPass()
void initializeNVVMIntrRangePass(PassRegistry &)
TargetTransformInfo getTargetTransformInfo(const Function &F) override
Get a TargetTransformInfo implementation for the target.
char & PHIEliminationID
PHIElimination - This pass eliminates machine instruction PHI nodes by inserting copy instructions...
Target & getTheNVPTXTarget32()
char & LiveVariablesID
LiveVariables pass - This pass computes the set of blocks in which each variable is life and sets mac...
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:422
This file a TargetTransformInfo::Concept conforming object specific to the NVPTX target machine...
CodeGenOpt::Level getOptLevel() const
Returns the optimization level: None, Less, Default, or Aggressive.
static std::string computeDataLayout(bool is64Bit, bool UseShortPointers)
void initializeNVPTXLowerAggrCopiesPass(PassRegistry &)
void initializeNVVMReflectPass(PassRegistry &)
NVPTXTargetMachine(const Target &T, const Triple &TT, StringRef CPU, StringRef FS, const TargetOptions &Options, Optional< Reloc::Model > RM, Optional< CodeModel::Model > CM, CodeGenOpt::Level OP, bool is64bit)
This file provides the interface for LLVM&#39;s Global Value Numbering pass which eliminates fully redund...
static bool is64Bit(const char *name)
MachineFunctionPass * createNVPTXReplaceImageHandlesPass()
char & PostRAMachineSinkingID
This pass perform post-ra machine sink for COPY instructions.
char & LiveDebugValuesID
LiveDebugValues pass.
This class describes a target machine that is implemented with the LLVM target-independent code gener...
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:284
void initializeNVPTXAllocaHoistingPass(PassRegistry &)
char & MachineCopyPropagationID
MachineCopyPropagation - This pass performs copy propagation on machine instructions.
char & TwoAddressInstructionPassID
TwoAddressInstruction - This pass reduces two-address instructions to use two operands.
void initializeGenericToNVVMPass(PassRegistry &)
EP_EarlyAsPossible - This extension point allows adding passes before any other transformations, allowing them to see the code as it is coming out of the frontend.
Triple - Helper class for working with autoconf configuration names.
Definition: Triple.h:43
char & PostRASchedulerID
createPostRAScheduler - This pass performs post register allocation scheduling.
void initializeNVPTXLowerAllocaPass(PassRegistry &)
NVPTXTargetMachine32(const Target &T, const Triple &TT, StringRef CPU, StringRef FS, const TargetOptions &Options, Optional< Reloc::Model > RM, Optional< CodeModel::Model > CM, CodeGenOpt::Level OL, bool JIT)
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
BasicBlockPass * createNVPTXLowerAllocaPass()
PassManagerBase - An abstract interface to allow code to add passes to a pass manager without having ...
char & StackMapLivenessID
StackMapLiveness - This pass analyses the register live-out set of stackmap/patchpoint intrinsics and...
char & TailDuplicateID
TailDuplicate - Duplicate blocks with unconditional branches into tails of their predecessors.
char & MachineSinkingID
MachineSinking - This pass performs sinking on machine instructions.
void setRequiresStructuredCFG(bool Value)
char & EarlyMachineLICMID
This pass performs loop invariant code motion on machine instructions.
char & OptimizePHIsID
OptimizePHIs - This pass optimizes machine instruction PHIs to take advantage of opportunities create...
FunctionPass * createSeparateConstOffsetFromGEPPass(bool LowerGEP=false)
Target - Wrapper for Target specific information.
char & PeepholeOptimizerID
PeepholeOptimizer - This pass performs peephole optimizations - like extension and comparison elimina...
void initializeNVPTXProxyRegErasurePass(PassRegistry &)
char & PrologEpilogCodeInserterID
PrologEpilogCodeInserter - This pass inserts prolog and epilog code, and eliminates abstract frame re...
char & PatchableFunctionID
This pass implements the "patchable-function" attribute.
FunctionPass * createInferAddressSpacesPass()
NVPTXTargetMachine.
MachineFunctionPass * createNVPTXPeephole()
FunctionPass * createNVPTXISelDag(NVPTXTargetMachine &TM, llvm::CodeGenOpt::Level OptLevel)
createNVPTXISelDag - This pass converts a legalized DAG into a NVPTX-specific DAG, ready for instruction scheduling.
~NVPTXTargetMachine() override
FunctionPass * createNVVMIntrRangePass(unsigned int SmVersion)
FunctionPass * createNVPTXLowerArgsPass(const NVPTXTargetMachine *TM)
FunctionPass * createLowerAggrCopies()
TargetOptions Options
Definition: TargetMachine.h:96
void adjustPassManager(PassManagerBuilder &) override
Allow the target to modify the pass manager, e.g.
FunctionPass * createSROAPass()
Definition: SROA.cpp:4585
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
const NVPTXSubtarget * getSubtargetImpl() const
unsigned int getSmVersion() const
FunctionPass * createEarlyCSEPass(bool UseMemorySSA=false)
Definition: EarlyCSE.cpp:1301
static cl::opt< bool > DisableLoadStoreVectorizer("disable-nvptx-load-store-vectorizer", cl::desc("Disable load/store vectorizer"), cl::init(false), cl::Hidden)
char & ShrinkWrapID
ShrinkWrap pass. Look for the best place to insert save and restore.
Definition: ShrinkWrap.cpp:249
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:48
PassRegistry - This class manages the registration and intitialization of the pass subsystem as appli...
Definition: PassRegistry.h:38
This pass exposes codegen information to IR-level passes.
void addExtension(ExtensionPointTy Ty, ExtensionFn Fn)
char & LocalStackSlotAllocationID
LocalStackSlotAllocation - This pass assigns local frame indices to stack slots relative to one anoth...
FunctionPass * createNaryReassociatePass()