1 //===-- X86Subtarget.cpp - X86 Subtarget Information ----------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file implements the X86 specific subclass of TargetSubtargetInfo.
11 //
12 //===----------------------------------------------------------------------===//
14 #include "X86.h"
16 #include "X86CallLowering.h"
17 #include "X86LegalizerInfo.h"
18 #include "X86RegisterBankInfo.h"
19 #include "X86Subtarget.h"
21 #include "X86TargetMachine.h"
22 #include "llvm/ADT/Triple.h"
25 #include "llvm/IR/Attributes.h"
26 #include "llvm/IR/ConstantRange.h"
27 #include "llvm/IR/Function.h"
28 #include "llvm/IR/GlobalValue.h"
29 #include "llvm/Support/Casting.h"
30 #include "llvm/Support/CodeGen.h"
32 #include "llvm/Support/Debug.h"
37 #if defined(_MSC_VER)
38 #include <intrin.h>
39 #endif
41 using namespace llvm;
43 #define DEBUG_TYPE "subtarget"
47 #include "X86GenSubtargetInfo.inc"
49 // Temporary option to control early if-conversion for x86 while adding machine
50 // models.
51 static cl::opt<bool>
52 X86EarlyIfConv("x86-early-ifcvt", cl::Hidden,
53  cl::desc("Enable early if-conversion on X86"));
56 /// Classify a blockaddress reference for the current subtarget according to how
57 /// we should reference it in a non-pcrel context.
59  return classifyLocalReference(nullptr);
60 }
62 /// Classify a global variable reference for the current subtarget according to
63 /// how we should reference it in a non-pcrel context.
64 unsigned char
66  return classifyGlobalReference(GV, *GV->getParent());
67 }
69 unsigned char
71  // 64 bits can use %rip addressing for anything local.
72  if (is64Bit())
73  return X86II::MO_NO_FLAG;
75  // If this is for a position dependent executable, the static linker can
76  // figure it out.
77  if (!isPositionIndependent())
78  return X86II::MO_NO_FLAG;
80  // The COFF dynamic linker just patches the executable sections.
81  if (isTargetCOFF())
82  return X86II::MO_NO_FLAG;
84  if (isTargetDarwin()) {
85  // 32 bit macho has no relocation for a-b if a is undefined, even if
86  // b is in the section that is being relocated.
87  // This means we have to use o load even for GVs that are known to be
88  // local to the dso.
89  if (GV && (GV->isDeclarationForLinker() || GV->hasCommonLinkage()))
93  }
95  return X86II::MO_GOTOFF;
96 }
99  const Module &M) const {
100  // Large model never uses stubs.
102  return X86II::MO_NO_FLAG;
104  // Absolute symbols can be referenced directly.
105  if (GV) {
107  // See if we can use the 8-bit immediate form. Note that some instructions
108  // will sign extend the immediate operand, so to be conservative we only
109  // accept the range [0,128).
110  if (CR->getUnsignedMax().ult(128))
111  return X86II::MO_ABS8;
112  else
113  return X86II::MO_NO_FLAG;
114  }
115  }
117  if (TM.shouldAssumeDSOLocal(M, GV))
118  return classifyLocalReference(GV);
120  if (isTargetCOFF())
121  return X86II::MO_DLLIMPORT;
123  if (is64Bit())
124  return X86II::MO_GOTPCREL;
126  if (isTargetDarwin()) {
127  if (!isPositionIndependent())
130  }
132  return X86II::MO_GOT;
133 }
135 unsigned char
137  return classifyGlobalFunctionReference(GV, *GV->getParent());
138 }
140 unsigned char
142  const Module &M) const {
143  if (TM.shouldAssumeDSOLocal(M, GV))
144  return X86II::MO_NO_FLAG;
146  if (isTargetCOFF()) {
148  "shouldAssumeDSOLocal gave inconsistent answer");
149  return X86II::MO_DLLIMPORT;
150  }
152  const Function *F = dyn_cast_or_null<Function>(GV);
154  if (isTargetELF()) {
155  if (is64Bit() && F && (CallingConv::X86_RegCall == F->getCallingConv()))
156  // According to psABI, PLT stub clobbers XMM8-XMM15.
157  // In Regcall calling convention those registers are used for passing
158  // parameters. Thus we need to prevent lazy binding in Regcall.
159  return X86II::MO_GOTPCREL;
160  if (F && F->hasFnAttribute(Attribute::NonLazyBind) && is64Bit())
161  return X86II::MO_GOTPCREL;
162  return X86II::MO_PLT;
163  }
165  if (is64Bit()) {
166  if (F && F->hasFnAttribute(Attribute::NonLazyBind))
167  // If the function is marked as non-lazy, generate an indirect call
168  // which loads from the GOT directly. This avoids runtime overhead
169  // at the cost of eager binding (and one extra byte of encoding).
170  return X86II::MO_GOTPCREL;
171  return X86II::MO_NO_FLAG;
172  }
174  return X86II::MO_NO_FLAG;
175 }
177 /// Return true if the subtarget allows calls to immediate address.
179  // FIXME: I386 PE/COFF supports PC relative calls using IMAGE_REL_I386_REL32
180  // but WinCOFFObjectWriter::RecordRelocation cannot emit them. Once it does,
181  // the following check for Win32 should be removed.
182  if (In64BitMode || isTargetWin32())
183  return false;
185 }
187 void X86Subtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) {
188  std::string CPUName = CPU;
189  if (CPUName.empty())
190  CPUName = "generic";
192  // Make sure 64-bit features are available in 64-bit mode. (But make sure
193  // SSE2 can be turned off explicitly.)
194  std::string FullFS = FS;
195  if (In64BitMode) {
196  if (!FullFS.empty())
197  FullFS = "+64bit,+sse2," + FullFS;
198  else
199  FullFS = "+64bit,+sse2";
200  }
202  // LAHF/SAHF are always supported in non-64-bit mode.
203  if (!In64BitMode) {
204  if (!FullFS.empty())
205  FullFS = "+sahf," + FullFS;
206  else
207  FullFS = "+sahf";
208  }
210  // Parse features string and set the CPU.
211  ParseSubtargetFeatures(CPUName, FullFS);
213  // All CPUs that implement SSE4.2 or SSE4A support unaligned accesses of
214  // 16-bytes and under that are reasonably fast. These features were
215  // introduced with Intel's Nehalem/Silvermont and AMD's Family10h
216  // micro-architectures respectively.
217  if (hasSSE42() || hasSSE4A())
218  IsUAMem16Slow = false;
220  InstrItins = getInstrItineraryForCPU(CPUName);
222  // It's important to keep the MCSubtargetInfo feature bits in sync with
223  // target data structure which is shared with MC code emitter, etc.
224  if (In64BitMode)
225  ToggleFeature(X86::Mode64Bit);
226  else if (In32BitMode)
227  ToggleFeature(X86::Mode32Bit);
228  else if (In16BitMode)
229  ToggleFeature(X86::Mode16Bit);
230  else
231  llvm_unreachable("Not 16-bit, 32-bit or 64-bit mode!");
233  DEBUG(dbgs() << "Subtarget features: SSELevel " << X86SSELevel
234  << ", 3DNowLevel " << X863DNowLevel
235  << ", 64bit " << HasX86_64 << "\n");
236  assert((!In64BitMode || HasX86_64) &&
237  "64-bit code requested on a subtarget that doesn't support it!");
239  // Stack alignment is 16 bytes on Darwin, Linux, kFreeBSD and Solaris (both
240  // 32 and 64 bit) and for all 64-bit targets.
241  if (StackAlignOverride)
242  stackAlignment = StackAlignOverride;
243  else if (isTargetDarwin() || isTargetLinux() || isTargetSolaris() ||
244  isTargetKFreeBSD() || In64BitMode)
245  stackAlignment = 16;
247  // Some CPUs have more overhead for gather. The specified overhead is relative
248  // to the Load operation. "2" is the number provided by Intel architects. This
249  // parameter is used for cost estimation of Gather Op and comparison with
250  // other alternatives.
251  // TODO: Remove the explicit hasAVX512()?, That would mean we would only
252  // enable gather with a -march.
253  if (hasAVX512() || (hasAVX2() && hasFastGather()))
254  GatherOverhead = 2;
255  if (hasAVX512())
256  ScatterOverhead = 2;
257 }
259 void X86Subtarget::initializeEnvironment() {
260  X86SSELevel = NoSSE;
262  HasX87 = false;
263  HasNOPL = false;
264  HasCMov = false;
265  HasX86_64 = false;
266  HasPOPCNT = false;
267  HasSSE4A = false;
268  HasAES = false;
269  HasVAES = false;
270  HasFXSR = false;
271  HasXSAVE = false;
272  HasXSAVEOPT = false;
273  HasXSAVEC = false;
274  HasXSAVES = false;
275  HasPCLMUL = false;
276  HasVPCLMULQDQ = false;
277  HasGFNI = false;
278  HasFMA = false;
279  HasFMA4 = false;
280  HasXOP = false;
281  HasTBM = false;
282  HasLWP = false;
283  HasMOVBE = false;
284  HasRDRAND = false;
285  HasF16C = false;
286  HasFSGSBase = false;
287  HasLZCNT = false;
288  HasBMI = false;
289  HasBMI2 = false;
290  HasVBMI = false;
291  HasVBMI2 = false;
292  HasIFMA = false;
293  HasRTM = false;
294  HasERI = false;
295  HasCDI = false;
296  HasPFI = false;
297  HasDQI = false;
298  HasVPOPCNTDQ = false;
299  HasBWI = false;
300  HasVLX = false;
301  HasADX = false;
302  HasPKU = false;
303  HasVNNI = false;
304  HasBITALG = false;
305  HasSHA = false;
306  HasPREFETCHWT1 = false;
307  HasPRFCHW = false;
308  HasRDSEED = false;
309  HasLAHFSAHF = false;
310  HasMWAITX = false;
311  HasCLZERO = false;
312  HasMPX = false;
313  HasSHSTK = false;
314  HasIBT = false;
315  HasSGX = false;
316  HasCLFLUSHOPT = false;
317  HasCLWB = false;
318  IsPMULLDSlow = false;
319  IsSHLDSlow = false;
320  IsUAMem16Slow = false;
321  IsUAMem32Slow = false;
322  HasSSEUnalignedMem = false;
323  HasCmpxchg16b = false;
324  UseLeaForSP = false;
325  HasFastVariableShuffle = false;
327  HasFastGather = false;
328  HasFastScalarFSQRT = false;
329  HasFastVectorFSQRT = false;
330  HasFastLZCNT = false;
331  HasFastSHLDRotate = false;
332  HasMacroFusion = false;
333  HasERMSB = false;
334  HasSlowDivide32 = false;
335  HasSlowDivide64 = false;
336  PadShortFunctions = false;
337  SlowTwoMemOps = false;
338  LEAUsesAG = false;
339  SlowLEA = false;
340  Slow3OpsLEA = false;
341  SlowIncDec = false;
342  stackAlignment = 4;
343  // FIXME: this is a known good value for Yonah. How about others?
345  UseSoftFloat = false;
347  GatherOverhead = 1024;
348  ScatterOverhead = 1024;
349 }
351 X86Subtarget &X86Subtarget::initializeSubtargetDependencies(StringRef CPU,
352  StringRef FS) {
353  initializeEnvironment();
354  initSubtargetFeatures(CPU, FS);
355  return *this;
356 }
359  const X86TargetMachine &TM,
360  unsigned StackAlignOverride)
361  : X86GenSubtargetInfo(TT, CPU, FS), X86ProcFamily(Others),
362  PICStyle(PICStyles::None), TM(TM), TargetTriple(TT),
363  StackAlignOverride(StackAlignOverride),
364  In64BitMode(TargetTriple.getArch() == Triple::x86_64),
365  In32BitMode(TargetTriple.getArch() == Triple::x86 &&
366  TargetTriple.getEnvironment() != Triple::CODE16),
367  In16BitMode(TargetTriple.getArch() == Triple::x86 &&
368  TargetTriple.getEnvironment() == Triple::CODE16),
369  InstrInfo(initializeSubtargetDependencies(CPU, FS)), TLInfo(TM, *this),
370  FrameLowering(*this, getStackAlignment()) {
371  // Determine the PICStyle based on the target selected.
372  if (!isPositionIndependent())
374  else if (is64Bit())
376  else if (isTargetCOFF())
378  else if (isTargetDarwin())
380  else if (isTargetELF())
384  Legalizer.reset(new X86LegalizerInfo(*this, TM));
386  auto *RBI = new X86RegisterBankInfo(*getRegisterInfo());
387  RegBankInfo.reset(RBI);
388  InstSelector.reset(createX86InstructionSelector(TM, *this, *RBI));
389 }
392  return CallLoweringInfo.get();
393 }
396  return InstSelector.get();
397 }
400  return Legalizer.get();
401 }
404  return RegBankInfo.get();
405 }
408  return hasCMov() && X86EarlyIfConv;
409 }
