LLVM  6.0.0svn
PPCTargetMachine.cpp
Go to the documentation of this file.
1 //===-- PPCTargetMachine.cpp - Define TargetMachine for PowerPC -----------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // Top-level implementation for the PowerPC target.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "PPCTargetMachine.h"
16 #include "PPC.h"
17 #include "PPCSubtarget.h"
18 #include "PPCTargetObjectFile.h"
19 #include "PPCTargetTransformInfo.h"
20 #include "llvm/ADT/Optional.h"
21 #include "llvm/ADT/STLExtras.h"
22 #include "llvm/ADT/StringRef.h"
23 #include "llvm/ADT/Triple.h"
25 #include "llvm/CodeGen/Passes.h"
27 #include "llvm/IR/Attributes.h"
28 #include "llvm/IR/DataLayout.h"
29 #include "llvm/IR/Function.h"
30 #include "llvm/Pass.h"
31 #include "llvm/Support/CodeGen.h"
36 #include "llvm/Transforms/Scalar.h"
37 #include <cassert>
38 #include <memory>
39 #include <string>
40 
41 using namespace llvm;
42 
43 
44 static cl::opt<bool>
45  EnableBranchCoalescing("enable-ppc-branch-coalesce", cl::Hidden,
46  cl::desc("enable coalescing of duplicate branches for PPC"));
47 static cl::
48 opt<bool> DisableCTRLoops("disable-ppc-ctrloops", cl::Hidden,
49  cl::desc("Disable CTR loops for PPC"));
50 
51 static cl::
52 opt<bool> DisablePreIncPrep("disable-ppc-preinc-prep", cl::Hidden,
53  cl::desc("Disable PPC loop preinc prep"));
54 
55 static cl::opt<bool>
56 VSXFMAMutateEarly("schedule-ppc-vsx-fma-mutation-early",
57  cl::Hidden, cl::desc("Schedule VSX FMA instruction mutation early"));
58 
59 static cl::
60 opt<bool> DisableVSXSwapRemoval("disable-ppc-vsx-swap-removal", cl::Hidden,
61  cl::desc("Disable VSX Swap Removal for PPC"));
62 
63 static cl::
64 opt<bool> DisableQPXLoadSplat("disable-ppc-qpx-load-splat", cl::Hidden,
65  cl::desc("Disable QPX load splat simplification"));
66 
67 static cl::
68 opt<bool> DisableMIPeephole("disable-ppc-peephole", cl::Hidden,
69  cl::desc("Disable machine peepholes for PPC"));
70 
71 static cl::opt<bool>
72 EnableGEPOpt("ppc-gep-opt", cl::Hidden,
73  cl::desc("Enable optimizations on complex GEPs"),
74  cl::init(true));
75 
76 static cl::opt<bool>
77 EnablePrefetch("enable-ppc-prefetching",
78  cl::desc("disable software prefetching on PPC"),
79  cl::init(false), cl::Hidden);
80 
81 static cl::opt<bool>
82 EnableExtraTOCRegDeps("enable-ppc-extra-toc-reg-deps",
83  cl::desc("Add extra TOC register dependencies"),
84  cl::init(true), cl::Hidden);
85 
86 static cl::opt<bool>
87 EnableMachineCombinerPass("ppc-machine-combiner",
88  cl::desc("Enable the machine combiner pass"),
89  cl::init(true), cl::Hidden);
90 
91 extern "C" void LLVMInitializePowerPCTarget() {
92  // Register the targets
96 
101 }
102 
103 /// Return the datalayout string of a subtarget.
104 static std::string getDataLayoutString(const Triple &T) {
105  bool is64Bit = T.getArch() == Triple::ppc64 || T.getArch() == Triple::ppc64le;
106  std::string Ret;
107 
108  // Most PPC* platforms are big endian, PPC64LE is little endian.
109  if (T.getArch() == Triple::ppc64le)
110  Ret = "e";
111  else
112  Ret = "E";
113 
115 
116  // PPC32 has 32 bit pointers. The PS3 (OS Lv2) is a PPC64 machine with 32 bit
117  // pointers.
118  if (!is64Bit || T.getOS() == Triple::Lv2)
119  Ret += "-p:32:32";
120 
121  // Note, the alignment values for f64 and i64 on ppc64 in Darwin
122  // documentation are wrong; these are correct (i.e. "what gcc does").
123  if (is64Bit || !T.isOSDarwin())
124  Ret += "-i64:64";
125  else
126  Ret += "-f64:32:64";
127 
128  // PPC64 has 32 and 64 bit registers, PPC32 has only 32 bit ones.
129  if (is64Bit)
130  Ret += "-n32:64";
131  else
132  Ret += "-n32";
133 
134  return Ret;
135 }
136 
138  const Triple &TT) {
139  std::string FullFS = FS;
140 
141  // Make sure 64-bit features are available when CPUname is generic
142  if (TT.getArch() == Triple::ppc64 || TT.getArch() == Triple::ppc64le) {
143  if (!FullFS.empty())
144  FullFS = "+64bit," + FullFS;
145  else
146  FullFS = "+64bit";
147  }
148 
149  if (OL >= CodeGenOpt::Default) {
150  if (!FullFS.empty())
151  FullFS = "+crbits," + FullFS;
152  else
153  FullFS = "+crbits";
154  }
155 
156  if (OL != CodeGenOpt::None) {
157  if (!FullFS.empty())
158  FullFS = "+invariant-function-descriptors," + FullFS;
159  else
160  FullFS = "+invariant-function-descriptors";
161  }
162 
163  return FullFS;
164 }
165 
166 static std::unique_ptr<TargetLoweringObjectFile> createTLOF(const Triple &TT) {
167  // If it isn't a Mach-O file then it's going to be a linux ELF
168  // object file.
169  if (TT.isOSDarwin())
170  return llvm::make_unique<TargetLoweringObjectFileMachO>();
171 
172  return llvm::make_unique<PPC64LinuxTargetObjectFile>();
173 }
174 
176  const TargetOptions &Options) {
177  if (Options.MCOptions.getABIName().startswith("elfv1"))
179  else if (Options.MCOptions.getABIName().startswith("elfv2"))
181 
182  assert(Options.MCOptions.getABIName().empty() &&
183  "Unknown target-abi option!");
184 
185  if (TT.isMacOSX())
187 
188  switch (TT.getArch()) {
189  case Triple::ppc64le:
191  case Triple::ppc64:
193  default:
195  }
196 }
197 
200  if (RM.hasValue())
201  return *RM;
202 
203  // Darwin defaults to dynamic-no-pic.
204  if (TT.isOSDarwin())
205  return Reloc::DynamicNoPIC;
206 
207  // Non-darwin 64-bit platforms are PIC by default.
208  if (TT.getArch() == Triple::ppc64 || TT.getArch() == Triple::ppc64le)
209  return Reloc::PIC_;
210 
211  // 32-bit is static by default.
212  return Reloc::Static;
213 }
214 
217  bool JIT) {
218  if (CM)
219  return *CM;
220  if (!TT.isOSDarwin() && !JIT &&
221  (TT.getArch() == Triple::ppc64 || TT.getArch() == Triple::ppc64le))
222  return CodeModel::Medium;
223  return CodeModel::Small;
224 }
225 
226 // The FeatureString here is a little subtle. We are modifying the feature
227 // string with what are (currently) non-function specific overrides as it goes
228 // into the LLVMTargetMachine constructor and then using the stored value in the
229 // Subtarget constructor below it.
231  StringRef CPU, StringRef FS,
232  const TargetOptions &Options,
235  CodeGenOpt::Level OL, bool JIT)
236  : LLVMTargetMachine(T, getDataLayoutString(TT), TT, CPU,
237  computeFSAdditions(FS, OL, TT), Options,
238  getEffectiveRelocModel(TT, RM),
239  getEffectiveCodeModel(TT, CM, JIT), OL),
240  TLOF(createTLOF(getTargetTriple())),
241  TargetABI(computeTargetABI(TT, Options)) {
242  initAsmInfo();
243 }
244 
246 
247 const PPCSubtarget *
249  Attribute CPUAttr = F.getFnAttribute("target-cpu");
250  Attribute FSAttr = F.getFnAttribute("target-features");
251 
252  std::string CPU = !CPUAttr.hasAttribute(Attribute::None)
253  ? CPUAttr.getValueAsString().str()
254  : TargetCPU;
255  std::string FS = !FSAttr.hasAttribute(Attribute::None)
256  ? FSAttr.getValueAsString().str()
257  : TargetFS;
258 
259  // FIXME: This is related to the code below to reset the target options,
260  // we need to know whether or not the soft float flag is set on the
261  // function before we can generate a subtarget. We also need to use
262  // it as a key for the subtarget since that can be the only difference
263  // between two functions.
264  bool SoftFloat =
265  F.getFnAttribute("use-soft-float").getValueAsString() == "true";
266  // If the soft float attribute is set on the function turn on the soft float
267  // subtarget feature.
268  if (SoftFloat)
269  FS += FS.empty() ? "-hard-float" : ",-hard-float";
270 
271  auto &I = SubtargetMap[CPU + FS];
272  if (!I) {
273  // This needs to be done before we create a new subtarget since any
274  // creation will depend on the TM and the code generation flags on the
275  // function that reside in TargetOptions.
277  I = llvm::make_unique<PPCSubtarget>(
278  TargetTriple, CPU,
279  // FIXME: It would be good to have the subtarget additions here
280  // not necessary. Anything that turns them on/off (overrides) ends
281  // up being put at the end of the feature string, but the defaults
282  // shouldn't require adding them. Fixing this means pulling Feature64Bit
283  // out of most of the target cpus in the .td file and making it set only
284  // as part of initialization via the TargetTriple.
286  }
287  return I.get();
288 }
289 
290 //===----------------------------------------------------------------------===//
291 // Pass Pipeline Configuration
292 //===----------------------------------------------------------------------===//
293 
294 namespace {
295 
296 /// PPC Code Generator Pass Configuration Options.
297 class PPCPassConfig : public TargetPassConfig {
298 public:
299  PPCPassConfig(PPCTargetMachine &TM, PassManagerBase &PM)
300  : TargetPassConfig(TM, PM) {}
301 
302  PPCTargetMachine &getPPCTargetMachine() const {
303  return getTM<PPCTargetMachine>();
304  }
305 
306  void addIRPasses() override;
307  bool addPreISel() override;
308  bool addILPOpts() override;
309  bool addInstSelector() override;
310  void addMachineSSAOptimization() override;
311  void addPreRegAlloc() override;
312  void addPreSched2() override;
313  void addPreEmitPass() override;
314 };
315 
316 } // end anonymous namespace
317 
319  return new PPCPassConfig(*this, PM);
320 }
321 
322 void PPCPassConfig::addIRPasses() {
323  if (TM->getOptLevel() != CodeGenOpt::None)
324  addPass(createPPCBoolRetToIntPass());
325  addPass(createAtomicExpandPass());
326 
327  // For the BG/Q (or if explicitly requested), add explicit data prefetch
328  // intrinsics.
329  bool UsePrefetching = TM->getTargetTriple().getVendor() == Triple::BGQ &&
331  if (EnablePrefetch.getNumOccurrences() > 0)
332  UsePrefetching = EnablePrefetch;
333  if (UsePrefetching)
334  addPass(createLoopDataPrefetchPass());
335 
336  if (TM->getOptLevel() >= CodeGenOpt::Default && EnableGEPOpt) {
337  // Call SeparateConstOffsetFromGEP pass to extract constants within indices
338  // and lower a GEP with multiple indices to either arithmetic operations or
339  // multiple GEPs with single index.
341  // Call EarlyCSE pass to find and remove subexpressions in the lowered
342  // result.
343  addPass(createEarlyCSEPass());
344  // Do loop invariant code motion in case part of the lowered result is
345  // invariant.
346  addPass(createLICMPass());
347  }
348 
350 }
351 
352 bool PPCPassConfig::addPreISel() {
354  addPass(createPPCLoopPreIncPrepPass(getPPCTargetMachine()));
355 
357  addPass(createPPCCTRLoops());
358 
359  return false;
360 }
361 
362 bool PPCPassConfig::addILPOpts() {
363  addPass(&EarlyIfConverterID);
364 
366  addPass(&MachineCombinerID);
367 
368  return true;
369 }
370 
371 bool PPCPassConfig::addInstSelector() {
372  // Install an instruction selector.
373  addPass(createPPCISelDag(getPPCTargetMachine(), getOptLevel()));
374 
375 #ifndef NDEBUG
377  addPass(createPPCCTRLoopsVerify());
378 #endif
379 
380  addPass(createPPCVSXCopyPass());
381  return false;
382 }
383 
384 void PPCPassConfig::addMachineSSAOptimization() {
385  // PPCBranchCoalescingPass need to be done before machine sinking
386  // since it merges empty blocks.
390  // For little endian, remove where possible the vector swap instructions
391  // introduced at code generation to normalize vector element order.
392  if (TM->getTargetTriple().getArch() == Triple::ppc64le &&
394  addPass(createPPCVSXSwapRemovalPass());
395  // Target-specific peephole cleanups performed after instruction
396  // selection.
397  if (!DisableMIPeephole) {
398  addPass(createPPCMIPeepholePass());
400  }
401 }
402 
403 void PPCPassConfig::addPreRegAlloc() {
404  if (getOptLevel() != CodeGenOpt::None) {
408  }
409 
410  // FIXME: We probably don't need to run these for -fPIE.
411  if (getPPCTargetMachine().isPositionIndependent()) {
412  // FIXME: LiveVariables should not be necessary here!
413  // PPCTLSDynamicCallPass uses LiveIntervals which previously dependent on
414  // LiveVariables. This (unnecessary) dependency has been removed now,
415  // however a stage-2 clang build fails without LiveVariables computed here.
416  addPass(&LiveVariablesID, false);
417  addPass(createPPCTLSDynamicCallPass());
418  }
420  addPass(createPPCTOCRegDepsPass());
421 }
422 
423 void PPCPassConfig::addPreSched2() {
424  if (getOptLevel() != CodeGenOpt::None) {
425  addPass(&IfConverterID);
426 
427  // This optimization must happen after anything that might do store-to-load
428  // forwarding. Here we're after RA (and, thus, when spills are inserted)
429  // but before post-RA scheduling.
430  if (!DisableQPXLoadSplat)
431  addPass(createPPCQPXLoadSplatPass());
432  }
433 }
434 
435 void PPCPassConfig::addPreEmitPass() {
436  addPass(createPPCExpandISELPass());
437 
438  if (getOptLevel() != CodeGenOpt::None)
439  addPass(createPPCEarlyReturnPass(), false);
440  // Must run branch selection immediately preceding the asm printer.
441  addPass(createPPCBranchSelectionPass(), false);
442 }
443 
445  return TargetIRAnalysis([this](const Function &F) {
446  return TargetTransformInfo(PPCTTIImpl(this, F));
447  });
448 }
uint64_t CallInst * C
bool isOSDarwin() const
isOSDarwin - Is this a "Darwin" OS (OS X, iOS, or watchOS).
Definition: Triple.h:468
char & MachineCombinerID
This pass performs instruction combining using trace metrics to estimate critical-path and resource d...
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
LLVM_NODISCARD std::string str() const
str - Get the contents as an std::string.
Definition: StringRef.h:228
MCTargetOptions MCOptions
Machine level options.
Compute iterated dominance frontiers using a linear time algorithm.
Definition: AllocatorList.h:24
bool isMacOSX() const
isMacOSX - Is this a Mac OS X triple.
Definition: Triple.h:440
FunctionPass * createPPCVSXSwapRemovalPass()
virtual void addIRPasses()
Add common target configurable passes that perform LLVM IR to IR transforms following machine indepen...
Triple TargetTriple
Triple string, CPU name, and target feature strings the TargetMachine instance is created with...
Definition: TargetMachine.h:76
void initializePPCTLSDynamicCallPass(PassRegistry &)
OSType getOS() const
getOS - Get the parsed operating system type of this triple.
Definition: Triple.h:292
static cl::opt< bool > DisableQPXLoadSplat("disable-ppc-qpx-load-splat", cl::Hidden, cl::desc("Disable QPX load splat simplification"))
Target & getThePPC32Target()
static cl::opt< bool > EnableMachineCombinerPass("ppc-machine-combiner", cl::desc("Enable the machine combiner pass"), cl::init(true), cl::Hidden)
static std::unique_ptr< TargetLoweringObjectFile > createTLOF(const Triple &TT)
char & RegisterCoalescerID
RegisterCoalescer - This pass merges live ranges to eliminate copies.
char & EarlyIfConverterID
EarlyIfConverter - This pass performs if-conversion on SSA form by inserting cmov instructions...
Analysis pass providing the TargetTransformInfo.
PPCTargetMachine(const Target &T, const Triple &TT, StringRef CPU, StringRef FS, const TargetOptions &Options, Optional< Reloc::Model > RM, Optional< CodeModel::Model > CM, CodeGenOpt::Level OL, bool JIT)
F(f)
char & MachineSchedulerID
MachineScheduler - This pass schedules machine instructions.
static std::string computeFSAdditions(StringRef FS, CodeGenOpt::Level OL, const Triple &TT)
char & PPCVSXFMAMutateID
virtual void addMachineSSAOptimization()
addMachineSSAOptimization - Add standard passes that optimize machine instructions in SSA form...
static const char * getManglingComponent(const Triple &T)
Definition: DataLayout.cpp:154
FunctionPass * createPPCTLSDynamicCallPass()
static cl::opt< bool > DisableVSXSwapRemoval("disable-ppc-vsx-swap-removal", cl::Hidden, cl::desc("Disable VSX Swap Removal for PPC"))
FunctionPass * createLoopDataPrefetchPass()
void resetTargetOptions(const Function &F) const
Reset the target options based on the function&#39;s attributes.
FunctionPass * createPPCTOCRegDepsPass()
This file a TargetTransformInfo::Concept conforming object specific to the PPC target machine...
static PPCTargetMachine::PPCABI computeTargetABI(const Triple &TT, const TargetOptions &Options)
FunctionPass * createPPCCTRLoops()
This file contains the simple types necessary to represent the attributes associated with functions a...
No attributes have been set.
Definition: Attributes.h:72
Target-Independent Code Generator Pass Configuration Options.
LLVM_NODISCARD LLVM_ATTRIBUTE_ALWAYS_INLINE bool startswith(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition: StringRef.h:267
ArchType getArch() const
getArch - Get the parsed architecture type of this triple.
Definition: Triple.h:283
FunctionPass * createPPCBranchCoalescingPass()
createPPCBranchCoalescingPass - returns an instance of the Branch Coalescing Pass ...
LLVM_NODISCARD LLVM_ATTRIBUTE_ALWAYS_INLINE bool empty() const
empty - Check if the string is empty.
Definition: StringRef.h:133
FunctionPass * createPPCCTRLoopsVerify()
char & DeadMachineInstructionElimID
DeadMachineInstructionElim - This pass removes dead machine instructions.
FunctionPass * createPPCBoolRetToIntPass()
Target & getThePPC64Target()
static CodeModel::Model getEffectiveCodeModel(Optional< CodeModel::Model > CM)
FunctionPass * createPPCBranchSelectionPass()
char & LiveVariablesID
LiveVariables pass - This pass computes the set of blocks in which each variable is life and sets mac...
void initializePPCBoolRetToIntPass(PassRegistry &)
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:406
bool hasAttribute(AttrKind Val) const
Return true if the attribute is present.
Definition: Attributes.cpp:202
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
const PPCSubtarget * getSubtargetImpl() const =delete
CodeGenOpt::Level getOptLevel() const
Returns the optimization level: None, Less, Default, or Aggressive.
FunctionPass * createSeparateConstOffsetFromGEPPass(const TargetMachine *TM=nullptr, bool LowerGEP=false)
FunctionPass * createPPCEarlyReturnPass()
static cl::opt< bool > VSXFMAMutateEarly("schedule-ppc-vsx-fma-mutation-early", cl::Hidden, cl::desc("Schedule VSX FMA instruction mutation early"))
~PPCTargetMachine() override
static Reloc::Model getEffectiveRelocModel(Optional< Reloc::Model > RM)
void LLVMInitializePowerPCTarget()
static cl::opt< bool > DisableMIPeephole("disable-ppc-peephole", cl::Hidden, cl::desc("Disable machine peepholes for PPC"))
static bool is64Bit(const char *name)
void initializePPCVSXFMAMutatePass(PassRegistry &)
Pass * createLICMPass()
Definition: LICM.cpp:223
This class describes a target machine that is implemented with the LLVM target-independent code gener...
TargetIRAnalysis getTargetIRAnalysis() override
Get a TargetIRAnalysis implementation for the target.
const Triple & getTargetTriple() const
Common code between 32-bit and 64-bit PowerPC targets.
FunctionPass * createPPCVSXCopyPass()
Triple - Helper class for working with autoconf configuration names.
Definition: Triple.h:44
TargetPassConfig * createPassConfig(PassManagerBase &PM) override
Create a pass configuration object to be used by addPassToEmitX methods for generating a pipeline of ...
static cl::opt< bool > EnablePrefetch("enable-ppc-prefetching", cl::desc("disable software prefetching on PPC"), cl::init(false), cl::Hidden)
void initializePPCExpandISELPass(PassRegistry &)
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
PassManagerBase - An abstract interface to allow code to add passes to a pass manager without having ...
static cl::opt< bool > EnableGEPOpt("ppc-gep-opt", cl::Hidden, cl::desc("Enable optimizations on complex GEPs"), cl::init(true))
Target - Wrapper for Target specific information.
Target & getThePPC64LETarget()
static cl::opt< bool > DisablePreIncPrep("disable-ppc-preinc-prep", cl::Hidden, cl::desc("Disable PPC loop preinc prep"))
std::string TargetCPU
Definition: TargetMachine.h:77
static cl::opt< bool > DisableCTRLoops("disable-ppc-ctrloops", cl::Hidden, cl::desc("Disable CTR loops for PPC"))
bool hasValue() const
Definition: Optional.h:133
StringRef getABIName() const
getABIName - If this returns a non-empty string this represents the textual name of the ABI that we w...
FunctionPass * createPPCISelDag(PPCTargetMachine &TM, CodeGenOpt::Level OL)
createPPCISelDag - This pass converts a legalized DAG into a PowerPC-specific DAG, ready for instruction scheduling.
FunctionPass * createPPCQPXLoadSplatPass()
StringRef getValueAsString() const
Return the attribute&#39;s value as a string.
Definition: Attributes.cpp:195
bool isPositionIndependent() const
char & IfConverterID
IfConverter - This pass performs machine code if conversion.
#define I(x, y, z)
Definition: MD5.cpp:58
static cl::opt< bool > EnableExtraTOCRegDeps("enable-ppc-extra-toc-reg-deps", cl::desc("Add extra TOC register dependencies"), cl::init(true), cl::Hidden)
std::string TargetFS
Definition: TargetMachine.h:78
static cl::opt< bool > EnableBranchCoalescing("enable-ppc-branch-coalesce", cl::Hidden, cl::desc("enable coalescing of duplicate branches for PPC"))
FunctionPass * createPPCMIPeepholePass()
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
FunctionPass * createPPCLoopPreIncPrepPass(PPCTargetMachine &TM)
RegisterTargetMachine - Helper template for registering a target machine implementation, for use in the target machine initialization function.
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition: Function.h:270
static std::string getDataLayoutString(const Triple &T)
Return the datalayout string of a subtarget.
FunctionPass * createEarlyCSEPass(bool UseMemorySSA=false)
Definition: EarlyCSE.cpp:1086
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:49
PassRegistry - This class manages the registration and intitialization of the pass subsystem as appli...
Definition: PassRegistry.h:39
This pass exposes codegen information to IR-level passes.
FunctionPass * createAtomicExpandPass()
FunctionPass * createPPCExpandISELPass()