LLVM  4.0.0
X86FastISel.cpp
Go to the documentation of this file.
1 //===-- X86FastISel.cpp - X86 FastISel implementation ---------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file defines the X86-specific support for the FastISel class. Much
11 // of the target-specific code is generated by tablegen in the file
12 // X86GenFastISel.inc, which is #included here.
13 //
14 //===----------------------------------------------------------------------===//
15 
16 #include "X86.h"
17 #include "X86CallingConv.h"
18 #include "X86InstrBuilder.h"
19 #include "X86InstrInfo.h"
20 #include "X86MachineFunctionInfo.h"
21 #include "X86RegisterInfo.h"
22 #include "X86Subtarget.h"
23 #include "X86TargetMachine.h"
25 #include "llvm/CodeGen/FastISel.h"
30 #include "llvm/IR/CallSite.h"
31 #include "llvm/IR/CallingConv.h"
32 #include "llvm/IR/DebugInfo.h"
33 #include "llvm/IR/DerivedTypes.h"
35 #include "llvm/IR/GlobalAlias.h"
36 #include "llvm/IR/GlobalVariable.h"
37 #include "llvm/IR/Instructions.h"
38 #include "llvm/IR/IntrinsicInst.h"
39 #include "llvm/IR/Operator.h"
40 #include "llvm/MC/MCAsmInfo.h"
41 #include "llvm/MC/MCSymbol.h"
44 using namespace llvm;
45 
46 namespace {
47 
48 class X86FastISel final : public FastISel {
49  /// Subtarget - Keep a pointer to the X86Subtarget around so that we can
50  /// make the right decision when generating code for different targets.
51  const X86Subtarget *Subtarget;
52 
53  /// X86ScalarSSEf32, X86ScalarSSEf64 - Select between SSE or x87
54  /// floating point ops.
55  /// When SSE is available, use it for f32 operations.
56  /// When SSE2 is available, use it for f64 operations.
57  bool X86ScalarSSEf64;
58  bool X86ScalarSSEf32;
59 
60 public:
61  explicit X86FastISel(FunctionLoweringInfo &funcInfo,
62  const TargetLibraryInfo *libInfo)
63  : FastISel(funcInfo, libInfo) {
64  Subtarget = &funcInfo.MF->getSubtarget<X86Subtarget>();
65  X86ScalarSSEf64 = Subtarget->hasSSE2();
66  X86ScalarSSEf32 = Subtarget->hasSSE1();
67  }
68 
69  bool fastSelectInstruction(const Instruction *I) override;
70 
71  /// \brief The specified machine instr operand is a vreg, and that
72  /// vreg is being provided by the specified load instruction. If possible,
73  /// try to fold the load as an operand to the instruction, returning true if
74  /// possible.
75  bool tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo,
76  const LoadInst *LI) override;
77 
78  bool fastLowerArguments() override;
79  bool fastLowerCall(CallLoweringInfo &CLI) override;
80  bool fastLowerIntrinsicCall(const IntrinsicInst *II) override;
81 
82 #include "X86GenFastISel.inc"
83 
84 private:
85  bool X86FastEmitCompare(const Value *LHS, const Value *RHS, EVT VT,
86  const DebugLoc &DL);
87 
88  bool X86FastEmitLoad(EVT VT, X86AddressMode &AM, MachineMemOperand *MMO,
89  unsigned &ResultReg, unsigned Alignment = 1);
90 
91  bool X86FastEmitStore(EVT VT, const Value *Val, X86AddressMode &AM,
92  MachineMemOperand *MMO = nullptr, bool Aligned = false);
93  bool X86FastEmitStore(EVT VT, unsigned ValReg, bool ValIsKill,
94  X86AddressMode &AM,
95  MachineMemOperand *MMO = nullptr, bool Aligned = false);
96 
97  bool X86FastEmitExtend(ISD::NodeType Opc, EVT DstVT, unsigned Src, EVT SrcVT,
98  unsigned &ResultReg);
99 
100  bool X86SelectAddress(const Value *V, X86AddressMode &AM);
101  bool X86SelectCallAddress(const Value *V, X86AddressMode &AM);
102 
103  bool X86SelectLoad(const Instruction *I);
104 
105  bool X86SelectStore(const Instruction *I);
106 
107  bool X86SelectRet(const Instruction *I);
108 
109  bool X86SelectCmp(const Instruction *I);
110 
111  bool X86SelectZExt(const Instruction *I);
112 
113  bool X86SelectBranch(const Instruction *I);
114 
115  bool X86SelectShift(const Instruction *I);
116 
117  bool X86SelectDivRem(const Instruction *I);
118 
119  bool X86FastEmitCMoveSelect(MVT RetVT, const Instruction *I);
120 
121  bool X86FastEmitSSESelect(MVT RetVT, const Instruction *I);
122 
123  bool X86FastEmitPseudoSelect(MVT RetVT, const Instruction *I);
124 
125  bool X86SelectSelect(const Instruction *I);
126 
127  bool X86SelectTrunc(const Instruction *I);
128 
129  bool X86SelectFPExtOrFPTrunc(const Instruction *I, unsigned Opc,
130  const TargetRegisterClass *RC);
131 
132  bool X86SelectFPExt(const Instruction *I);
133  bool X86SelectFPTrunc(const Instruction *I);
134  bool X86SelectSIToFP(const Instruction *I);
135 
136  const X86InstrInfo *getInstrInfo() const {
137  return Subtarget->getInstrInfo();
138  }
139  const X86TargetMachine *getTargetMachine() const {
140  return static_cast<const X86TargetMachine *>(&TM);
141  }
142 
143  bool handleConstantAddresses(const Value *V, X86AddressMode &AM);
144 
145  unsigned X86MaterializeInt(const ConstantInt *CI, MVT VT);
146  unsigned X86MaterializeFP(const ConstantFP *CFP, MVT VT);
147  unsigned X86MaterializeGV(const GlobalValue *GV, MVT VT);
148  unsigned fastMaterializeConstant(const Constant *C) override;
149 
150  unsigned fastMaterializeAlloca(const AllocaInst *C) override;
151 
152  unsigned fastMaterializeFloatZero(const ConstantFP *CF) override;
153 
154  /// isScalarFPTypeInSSEReg - Return true if the specified scalar FP type is
155  /// computed in an SSE register, not on the X87 floating point stack.
156  bool isScalarFPTypeInSSEReg(EVT VT) const {
157  return (VT == MVT::f64 && X86ScalarSSEf64) || // f64 is when SSE2
158  (VT == MVT::f32 && X86ScalarSSEf32); // f32 is when SSE1
159  }
160 
161  bool isTypeLegal(Type *Ty, MVT &VT, bool AllowI1 = false);
162 
163  bool IsMemcpySmall(uint64_t Len);
164 
165  bool TryEmitSmallMemcpy(X86AddressMode DestAM,
166  X86AddressMode SrcAM, uint64_t Len);
167 
168  bool foldX86XALUIntrinsic(X86::CondCode &CC, const Instruction *I,
169  const Value *Cond);
170 
172  X86AddressMode &AM);
173 
174  unsigned fastEmitInst_rrrr(unsigned MachineInstOpcode,
175  const TargetRegisterClass *RC, unsigned Op0,
176  bool Op0IsKill, unsigned Op1, bool Op1IsKill,
177  unsigned Op2, bool Op2IsKill, unsigned Op3,
178  bool Op3IsKill);
179 };
180 
181 } // end anonymous namespace.
182 
183 static std::pair<X86::CondCode, bool>
186  bool NeedSwap = false;
187  switch (Predicate) {
188  default: break;
189  // Floating-point Predicates
190  case CmpInst::FCMP_UEQ: CC = X86::COND_E; break;
191  case CmpInst::FCMP_OLT: NeedSwap = true; LLVM_FALLTHROUGH;
192  case CmpInst::FCMP_OGT: CC = X86::COND_A; break;
193  case CmpInst::FCMP_OLE: NeedSwap = true; LLVM_FALLTHROUGH;
194  case CmpInst::FCMP_OGE: CC = X86::COND_AE; break;
195  case CmpInst::FCMP_UGT: NeedSwap = true; LLVM_FALLTHROUGH;
196  case CmpInst::FCMP_ULT: CC = X86::COND_B; break;
197  case CmpInst::FCMP_UGE: NeedSwap = true; LLVM_FALLTHROUGH;
198  case CmpInst::FCMP_ULE: CC = X86::COND_BE; break;
199  case CmpInst::FCMP_ONE: CC = X86::COND_NE; break;
200  case CmpInst::FCMP_UNO: CC = X86::COND_P; break;
201  case CmpInst::FCMP_ORD: CC = X86::COND_NP; break;
203  case CmpInst::FCMP_UNE: CC = X86::COND_INVALID; break;
204 
205  // Integer Predicates
206  case CmpInst::ICMP_EQ: CC = X86::COND_E; break;
207  case CmpInst::ICMP_NE: CC = X86::COND_NE; break;
208  case CmpInst::ICMP_UGT: CC = X86::COND_A; break;
209  case CmpInst::ICMP_UGE: CC = X86::COND_AE; break;
210  case CmpInst::ICMP_ULT: CC = X86::COND_B; break;
211  case CmpInst::ICMP_ULE: CC = X86::COND_BE; break;
212  case CmpInst::ICMP_SGT: CC = X86::COND_G; break;
213  case CmpInst::ICMP_SGE: CC = X86::COND_GE; break;
214  case CmpInst::ICMP_SLT: CC = X86::COND_L; break;
215  case CmpInst::ICMP_SLE: CC = X86::COND_LE; break;
216  }
217 
218  return std::make_pair(CC, NeedSwap);
219 }
220 
221 static std::pair<unsigned, bool>
223  unsigned CC;
224  bool NeedSwap = false;
225 
226  // SSE Condition code mapping:
227  // 0 - EQ
228  // 1 - LT
229  // 2 - LE
230  // 3 - UNORD
231  // 4 - NEQ
232  // 5 - NLT
233  // 6 - NLE
234  // 7 - ORD
235  switch (Predicate) {
236  default: llvm_unreachable("Unexpected predicate");
237  case CmpInst::FCMP_OEQ: CC = 0; break;
238  case CmpInst::FCMP_OGT: NeedSwap = true; LLVM_FALLTHROUGH;
239  case CmpInst::FCMP_OLT: CC = 1; break;
240  case CmpInst::FCMP_OGE: NeedSwap = true; LLVM_FALLTHROUGH;
241  case CmpInst::FCMP_OLE: CC = 2; break;
242  case CmpInst::FCMP_UNO: CC = 3; break;
243  case CmpInst::FCMP_UNE: CC = 4; break;
244  case CmpInst::FCMP_ULE: NeedSwap = true; LLVM_FALLTHROUGH;
245  case CmpInst::FCMP_UGE: CC = 5; break;
246  case CmpInst::FCMP_ULT: NeedSwap = true; LLVM_FALLTHROUGH;
247  case CmpInst::FCMP_UGT: CC = 6; break;
248  case CmpInst::FCMP_ORD: CC = 7; break;
249  case CmpInst::FCMP_UEQ:
250  case CmpInst::FCMP_ONE: CC = 8; break;
251  }
252 
253  return std::make_pair(CC, NeedSwap);
254 }
255 
256 /// \brief Adds a complex addressing mode to the given machine instr builder.
257 /// Note, this will constrain the index register. If its not possible to
258 /// constrain the given index register, then a new one will be created. The
259 /// IndexReg field of the addressing mode will be updated to match in this case.
260 const MachineInstrBuilder &
262  X86AddressMode &AM) {
263  // First constrain the index register. It needs to be a GR64_NOSP.
265  MIB->getNumOperands() +
267  return ::addFullAddress(MIB, AM);
268 }
269 
270 /// \brief Check if it is possible to fold the condition from the XALU intrinsic
271 /// into the user. The condition code will only be updated on success.
272 bool X86FastISel::foldX86XALUIntrinsic(X86::CondCode &CC, const Instruction *I,
273  const Value *Cond) {
274  if (!isa<ExtractValueInst>(Cond))
275  return false;
276 
277  const auto *EV = cast<ExtractValueInst>(Cond);
278  if (!isa<IntrinsicInst>(EV->getAggregateOperand()))
279  return false;
280 
281  const auto *II = cast<IntrinsicInst>(EV->getAggregateOperand());
282  MVT RetVT;
283  const Function *Callee = II->getCalledFunction();
284  Type *RetTy =
285  cast<StructType>(Callee->getReturnType())->getTypeAtIndex(0U);
286  if (!isTypeLegal(RetTy, RetVT))
287  return false;
288 
289  if (RetVT != MVT::i32 && RetVT != MVT::i64)
290  return false;
291 
292  X86::CondCode TmpCC;
293  switch (II->getIntrinsicID()) {
294  default: return false;
295  case Intrinsic::sadd_with_overflow:
296  case Intrinsic::ssub_with_overflow:
297  case Intrinsic::smul_with_overflow:
298  case Intrinsic::umul_with_overflow: TmpCC = X86::COND_O; break;
299  case Intrinsic::uadd_with_overflow:
300  case Intrinsic::usub_with_overflow: TmpCC = X86::COND_B; break;
301  }
302 
303  // Check if both instructions are in the same basic block.
304  if (II->getParent() != I->getParent())
305  return false;
306 
307  // Make sure nothing is in the way
310  for (auto Itr = std::prev(Start); Itr != End; --Itr) {
311  // We only expect extractvalue instructions between the intrinsic and the
312  // instruction to be selected.
313  if (!isa<ExtractValueInst>(Itr))
314  return false;
315 
316  // Check that the extractvalue operand comes from the intrinsic.
317  const auto *EVI = cast<ExtractValueInst>(Itr);
318  if (EVI->getAggregateOperand() != II)
319  return false;
320  }
321 
322  CC = TmpCC;
323  return true;
324 }
325 
326 bool X86FastISel::isTypeLegal(Type *Ty, MVT &VT, bool AllowI1) {
327  EVT evt = TLI.getValueType(DL, Ty, /*HandleUnknown=*/true);
328  if (evt == MVT::Other || !evt.isSimple())
329  // Unhandled type. Halt "fast" selection and bail.
330  return false;
331 
332  VT = evt.getSimpleVT();
333  // For now, require SSE/SSE2 for performing floating-point operations,
334  // since x87 requires additional work.
335  if (VT == MVT::f64 && !X86ScalarSSEf64)
336  return false;
337  if (VT == MVT::f32 && !X86ScalarSSEf32)
338  return false;
339  // Similarly, no f80 support yet.
340  if (VT == MVT::f80)
341  return false;
342  // We only handle legal types. For example, on x86-32 the instruction
343  // selector contains all of the 64-bit instructions from x86-64,
344  // under the assumption that i64 won't be used if the target doesn't
345  // support it.
346  return (AllowI1 && VT == MVT::i1) || TLI.isTypeLegal(VT);
347 }
348 
349 #include "X86GenCallingConv.inc"
350 
351 /// X86FastEmitLoad - Emit a machine instruction to load a value of type VT.
352 /// The address is either pre-computed, i.e. Ptr, or a GlobalAddress, i.e. GV.
353 /// Return true and the result register by reference if it is possible.
354 bool X86FastISel::X86FastEmitLoad(EVT VT, X86AddressMode &AM,
355  MachineMemOperand *MMO, unsigned &ResultReg,
356  unsigned Alignment) {
357  bool HasSSE41 = Subtarget->hasSSE41();
358  bool HasAVX = Subtarget->hasAVX();
359  bool HasAVX2 = Subtarget->hasAVX2();
360  bool HasAVX512 = Subtarget->hasAVX512();
361  bool HasVLX = Subtarget->hasVLX();
362  bool IsNonTemporal = MMO && MMO->isNonTemporal();
363 
364  // Get opcode and regclass of the output for the given load instruction.
365  unsigned Opc = 0;
366  const TargetRegisterClass *RC = nullptr;
367  switch (VT.getSimpleVT().SimpleTy) {
368  default: return false;
369  case MVT::i1:
370  case MVT::i8:
371  Opc = X86::MOV8rm;
372  RC = &X86::GR8RegClass;
373  break;
374  case MVT::i16:
375  Opc = X86::MOV16rm;
376  RC = &X86::GR16RegClass;
377  break;
378  case MVT::i32:
379  Opc = X86::MOV32rm;
380  RC = &X86::GR32RegClass;
381  break;
382  case MVT::i64:
383  // Must be in x86-64 mode.
384  Opc = X86::MOV64rm;
385  RC = &X86::GR64RegClass;
386  break;
387  case MVT::f32:
388  if (X86ScalarSSEf32) {
389  Opc = HasAVX512 ? X86::VMOVSSZrm : HasAVX ? X86::VMOVSSrm : X86::MOVSSrm;
390  RC = &X86::FR32RegClass;
391  } else {
392  Opc = X86::LD_Fp32m;
393  RC = &X86::RFP32RegClass;
394  }
395  break;
396  case MVT::f64:
397  if (X86ScalarSSEf64) {
398  Opc = HasAVX512 ? X86::VMOVSDZrm : HasAVX ? X86::VMOVSDrm : X86::MOVSDrm;
399  RC = &X86::FR64RegClass;
400  } else {
401  Opc = X86::LD_Fp64m;
402  RC = &X86::RFP64RegClass;
403  }
404  break;
405  case MVT::f80:
406  // No f80 support yet.
407  return false;
408  case MVT::v4f32:
409  if (IsNonTemporal && Alignment >= 16 && HasSSE41)
410  Opc = HasVLX ? X86::VMOVNTDQAZ128rm :
411  HasAVX ? X86::VMOVNTDQArm : X86::MOVNTDQArm;
412  else if (Alignment >= 16)
413  Opc = HasVLX ? X86::VMOVAPSZ128rm :
414  HasAVX ? X86::VMOVAPSrm : X86::MOVAPSrm;
415  else
416  Opc = HasVLX ? X86::VMOVUPSZ128rm :
417  HasAVX ? X86::VMOVUPSrm : X86::MOVUPSrm;
418  RC = &X86::VR128RegClass;
419  break;
420  case MVT::v2f64:
421  if (IsNonTemporal && Alignment >= 16 && HasSSE41)
422  Opc = HasVLX ? X86::VMOVNTDQAZ128rm :
423  HasAVX ? X86::VMOVNTDQArm : X86::MOVNTDQArm;
424  else if (Alignment >= 16)
425  Opc = HasVLX ? X86::VMOVAPDZ128rm :
426  HasAVX ? X86::VMOVAPDrm : X86::MOVAPDrm;
427  else
428  Opc = HasVLX ? X86::VMOVUPDZ128rm :
429  HasAVX ? X86::VMOVUPDrm : X86::MOVUPDrm;
430  RC = &X86::VR128RegClass;
431  break;
432  case MVT::v4i32:
433  case MVT::v2i64:
434  case MVT::v8i16:
435  case MVT::v16i8:
436  if (IsNonTemporal && Alignment >= 16)
437  Opc = HasVLX ? X86::VMOVNTDQAZ128rm :
438  HasAVX ? X86::VMOVNTDQArm : X86::MOVNTDQArm;
439  else if (Alignment >= 16)
440  Opc = HasVLX ? X86::VMOVDQA64Z128rm :
441  HasAVX ? X86::VMOVDQArm : X86::MOVDQArm;
442  else
443  Opc = HasVLX ? X86::VMOVDQU64Z128rm :
444  HasAVX ? X86::VMOVDQUrm : X86::MOVDQUrm;
445  RC = &X86::VR128RegClass;
446  break;
447  case MVT::v8f32:
448  assert(HasAVX);
449  if (IsNonTemporal && Alignment >= 32 && HasAVX2)
450  Opc = HasVLX ? X86::VMOVNTDQAZ256rm : X86::VMOVNTDQAYrm;
451  else if (Alignment >= 32)
452  Opc = HasVLX ? X86::VMOVAPSZ256rm : X86::VMOVAPSYrm;
453  else
454  Opc = HasVLX ? X86::VMOVUPSZ256rm : X86::VMOVUPSYrm;
455  RC = &X86::VR256RegClass;
456  break;
457  case MVT::v4f64:
458  assert(HasAVX);
459  if (IsNonTemporal && Alignment >= 32 && HasAVX2)
460  Opc = X86::VMOVNTDQAYrm;
461  else if (Alignment >= 32)
462  Opc = HasVLX ? X86::VMOVAPDZ256rm : X86::VMOVAPDYrm;
463  else
464  Opc = HasVLX ? X86::VMOVUPDZ256rm : X86::VMOVUPDYrm;
465  RC = &X86::VR256RegClass;
466  break;
467  case MVT::v8i32:
468  case MVT::v4i64:
469  case MVT::v16i16:
470  case MVT::v32i8:
471  assert(HasAVX);
472  if (IsNonTemporal && Alignment >= 32 && HasAVX2)
473  Opc = X86::VMOVNTDQAYrm;
474  else if (Alignment >= 32)
475  Opc = HasVLX ? X86::VMOVDQA64Z256rm : X86::VMOVDQAYrm;
476  else
477  Opc = HasVLX ? X86::VMOVDQU64Z256rm : X86::VMOVDQUYrm;
478  RC = &X86::VR256RegClass;
479  break;
480  case MVT::v16f32:
481  assert(HasAVX512);
482  if (IsNonTemporal && Alignment >= 64)
483  Opc = X86::VMOVNTDQAZrm;
484  else
485  Opc = (Alignment >= 64) ? X86::VMOVAPSZrm : X86::VMOVUPSZrm;
486  RC = &X86::VR512RegClass;
487  break;
488  case MVT::v8f64:
489  assert(HasAVX512);
490  if (IsNonTemporal && Alignment >= 64)
491  Opc = X86::VMOVNTDQAZrm;
492  else
493  Opc = (Alignment >= 64) ? X86::VMOVAPDZrm : X86::VMOVUPDZrm;
494  RC = &X86::VR512RegClass;
495  break;
496  case MVT::v8i64:
497  case MVT::v16i32:
498  case MVT::v32i16:
499  case MVT::v64i8:
500  assert(HasAVX512);
501  // Note: There are a lot more choices based on type with AVX-512, but
502  // there's really no advantage when the load isn't masked.
503  if (IsNonTemporal && Alignment >= 64)
504  Opc = X86::VMOVNTDQAZrm;
505  else
506  Opc = (Alignment >= 64) ? X86::VMOVDQA64Zrm : X86::VMOVDQU64Zrm;
507  RC = &X86::VR512RegClass;
508  break;
509  }
510 
511  ResultReg = createResultReg(RC);
512  MachineInstrBuilder MIB =
513  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg);
514  addFullAddress(MIB, AM);
515  if (MMO)
516  MIB->addMemOperand(*FuncInfo.MF, MMO);
517  return true;
518 }
519 
520 /// X86FastEmitStore - Emit a machine instruction to store a value Val of
521 /// type VT. The address is either pre-computed, consisted of a base ptr, Ptr
522 /// and a displacement offset, or a GlobalAddress,
523 /// i.e. V. Return true if it is possible.
524 bool X86FastISel::X86FastEmitStore(EVT VT, unsigned ValReg, bool ValIsKill,
525  X86AddressMode &AM,
526  MachineMemOperand *MMO, bool Aligned) {
527  bool HasSSE2 = Subtarget->hasSSE2();
528  bool HasSSE4A = Subtarget->hasSSE4A();
529  bool HasAVX = Subtarget->hasAVX();
530  bool HasAVX512 = Subtarget->hasAVX512();
531  bool HasVLX = Subtarget->hasVLX();
532  bool IsNonTemporal = MMO && MMO->isNonTemporal();
533 
534  // Get opcode and regclass of the output for the given store instruction.
535  unsigned Opc = 0;
536  switch (VT.getSimpleVT().SimpleTy) {
537  case MVT::f80: // No f80 support yet.
538  default: return false;
539  case MVT::i1: {
540  // Mask out all but lowest bit.
541  unsigned AndResult = createResultReg(&X86::GR8RegClass);
542  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
543  TII.get(X86::AND8ri), AndResult)
544  .addReg(ValReg, getKillRegState(ValIsKill)).addImm(1);
545  ValReg = AndResult;
546  LLVM_FALLTHROUGH; // handle i1 as i8.
547  }
548  case MVT::i8: Opc = X86::MOV8mr; break;
549  case MVT::i16: Opc = X86::MOV16mr; break;
550  case MVT::i32:
551  Opc = (IsNonTemporal && HasSSE2) ? X86::MOVNTImr : X86::MOV32mr;
552  break;
553  case MVT::i64:
554  // Must be in x86-64 mode.
555  Opc = (IsNonTemporal && HasSSE2) ? X86::MOVNTI_64mr : X86::MOV64mr;
556  break;
557  case MVT::f32:
558  if (X86ScalarSSEf32) {
559  if (IsNonTemporal && HasSSE4A)
560  Opc = X86::MOVNTSS;
561  else
562  Opc = HasAVX512 ? X86::VMOVSSZmr :
563  HasAVX ? X86::VMOVSSmr : X86::MOVSSmr;
564  } else
565  Opc = X86::ST_Fp32m;
566  break;
567  case MVT::f64:
568  if (X86ScalarSSEf32) {
569  if (IsNonTemporal && HasSSE4A)
570  Opc = X86::MOVNTSD;
571  else
572  Opc = HasAVX512 ? X86::VMOVSDZmr :
573  HasAVX ? X86::VMOVSDmr : X86::MOVSDmr;
574  } else
575  Opc = X86::ST_Fp64m;
576  break;
577  case MVT::v4f32:
578  if (Aligned) {
579  if (IsNonTemporal)
580  Opc = HasVLX ? X86::VMOVNTPSZ128mr :
581  HasAVX ? X86::VMOVNTPSmr : X86::MOVNTPSmr;
582  else
583  Opc = HasVLX ? X86::VMOVAPSZ128mr :
584  HasAVX ? X86::VMOVAPSmr : X86::MOVAPSmr;
585  } else
586  Opc = HasVLX ? X86::VMOVUPSZ128mr :
587  HasAVX ? X86::VMOVUPSmr : X86::MOVUPSmr;
588  break;
589  case MVT::v2f64:
590  if (Aligned) {
591  if (IsNonTemporal)
592  Opc = HasVLX ? X86::VMOVNTPDZ128mr :
593  HasAVX ? X86::VMOVNTPDmr : X86::MOVNTPDmr;
594  else
595  Opc = HasVLX ? X86::VMOVAPDZ128mr :
596  HasAVX ? X86::VMOVAPDmr : X86::MOVAPDmr;
597  } else
598  Opc = HasVLX ? X86::VMOVUPDZ128mr :
599  HasAVX ? X86::VMOVUPDmr : X86::MOVUPDmr;
600  break;
601  case MVT::v4i32:
602  case MVT::v2i64:
603  case MVT::v8i16:
604  case MVT::v16i8:
605  if (Aligned) {
606  if (IsNonTemporal)
607  Opc = HasVLX ? X86::VMOVNTDQZ128mr :
608  HasAVX ? X86::VMOVNTDQmr : X86::MOVNTDQmr;
609  else
610  Opc = HasVLX ? X86::VMOVDQA64Z128mr :
611  HasAVX ? X86::VMOVDQAmr : X86::MOVDQAmr;
612  } else
613  Opc = HasVLX ? X86::VMOVDQU64Z128mr :
614  HasAVX ? X86::VMOVDQUmr : X86::MOVDQUmr;
615  break;
616  case MVT::v8f32:
617  assert(HasAVX);
618  if (Aligned) {
619  if (IsNonTemporal)
620  Opc = HasVLX ? X86::VMOVNTPSZ256mr : X86::VMOVNTPSYmr;
621  else
622  Opc = HasVLX ? X86::VMOVAPSZ256mr : X86::VMOVAPSYmr;
623  } else
624  Opc = HasVLX ? X86::VMOVUPSZ256mr : X86::VMOVUPSYmr;
625  break;
626  case MVT::v4f64:
627  assert(HasAVX);
628  if (Aligned) {
629  if (IsNonTemporal)
630  Opc = HasVLX ? X86::VMOVNTPDZ256mr : X86::VMOVNTPDYmr;
631  else
632  Opc = HasVLX ? X86::VMOVAPDZ256mr : X86::VMOVAPDYmr;
633  } else
634  Opc = HasVLX ? X86::VMOVUPDZ256mr : X86::VMOVUPDYmr;
635  break;
636  case MVT::v8i32:
637  case MVT::v4i64:
638  case MVT::v16i16:
639  case MVT::v32i8:
640  assert(HasAVX);
641  if (Aligned) {
642  if (IsNonTemporal)
643  Opc = HasVLX ? X86::VMOVNTDQZ256mr : X86::VMOVNTDQYmr;
644  else
645  Opc = HasVLX ? X86::VMOVDQA64Z256mr : X86::VMOVDQAYmr;
646  } else
647  Opc = HasVLX ? X86::VMOVDQU64Z256mr : X86::VMOVDQUYmr;
648  break;
649  case MVT::v16f32:
650  assert(HasAVX512);
651  if (Aligned)
652  Opc = IsNonTemporal ? X86::VMOVNTPSZmr : X86::VMOVAPSZmr;
653  else
654  Opc = X86::VMOVUPSZmr;
655  break;
656  case MVT::v8f64:
657  assert(HasAVX512);
658  if (Aligned) {
659  Opc = IsNonTemporal ? X86::VMOVNTPDZmr : X86::VMOVAPDZmr;
660  } else
661  Opc = X86::VMOVUPDZmr;
662  break;
663  case MVT::v8i64:
664  case MVT::v16i32:
665  case MVT::v32i16:
666  case MVT::v64i8:
667  assert(HasAVX512);
668  // Note: There are a lot more choices based on type with AVX-512, but
669  // there's really no advantage when the store isn't masked.
670  if (Aligned)
671  Opc = IsNonTemporal ? X86::VMOVNTDQZmr : X86::VMOVDQA64Zmr;
672  else
673  Opc = X86::VMOVDQU64Zmr;
674  break;
675  }
676 
677  const MCInstrDesc &Desc = TII.get(Opc);
678  // Some of the instructions in the previous switch use FR128 instead
679  // of FR32 for ValReg. Make sure the register we feed the instruction
680  // matches its register class constraints.
681  // Note: This is fine to do a copy from FR32 to FR128, this is the
682  // same registers behind the scene and actually why it did not trigger
683  // any bugs before.
684  ValReg = constrainOperandRegClass(Desc, ValReg, Desc.getNumOperands() - 1);
685  MachineInstrBuilder MIB =
686  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, Desc);
687  addFullAddress(MIB, AM).addReg(ValReg, getKillRegState(ValIsKill));
688  if (MMO)
689  MIB->addMemOperand(*FuncInfo.MF, MMO);
690 
691  return true;
692 }
693 
694 bool X86FastISel::X86FastEmitStore(EVT VT, const Value *Val,
695  X86AddressMode &AM,
696  MachineMemOperand *MMO, bool Aligned) {
697  // Handle 'null' like i32/i64 0.
698  if (isa<ConstantPointerNull>(Val))
699  Val = Constant::getNullValue(DL.getIntPtrType(Val->getContext()));
700 
701  // If this is a store of a simple constant, fold the constant into the store.
702  if (const ConstantInt *CI = dyn_cast<ConstantInt>(Val)) {
703  unsigned Opc = 0;
704  bool Signed = true;
705  switch (VT.getSimpleVT().SimpleTy) {
706  default: break;
707  case MVT::i1:
708  Signed = false;
709  LLVM_FALLTHROUGH; // Handle as i8.
710  case MVT::i8: Opc = X86::MOV8mi; break;
711  case MVT::i16: Opc = X86::MOV16mi; break;
712  case MVT::i32: Opc = X86::MOV32mi; break;
713  case MVT::i64:
714  // Must be a 32-bit sign extended value.
715  if (isInt<32>(CI->getSExtValue()))
716  Opc = X86::MOV64mi32;
717  break;
718  }
719 
720  if (Opc) {
721  MachineInstrBuilder MIB =
722  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc));
723  addFullAddress(MIB, AM).addImm(Signed ? (uint64_t) CI->getSExtValue()
724  : CI->getZExtValue());
725  if (MMO)
726  MIB->addMemOperand(*FuncInfo.MF, MMO);
727  return true;
728  }
729  }
730 
731  unsigned ValReg = getRegForValue(Val);
732  if (ValReg == 0)
733  return false;
734 
735  bool ValKill = hasTrivialKill(Val);
736  return X86FastEmitStore(VT, ValReg, ValKill, AM, MMO, Aligned);
737 }
738 
739 /// X86FastEmitExtend - Emit a machine instruction to extend a value Src of
740 /// type SrcVT to type DstVT using the specified extension opcode Opc (e.g.
741 /// ISD::SIGN_EXTEND).
742 bool X86FastISel::X86FastEmitExtend(ISD::NodeType Opc, EVT DstVT,
743  unsigned Src, EVT SrcVT,
744  unsigned &ResultReg) {
745  unsigned RR = fastEmit_r(SrcVT.getSimpleVT(), DstVT.getSimpleVT(), Opc,
746  Src, /*TODO: Kill=*/false);
747  if (RR == 0)
748  return false;
749 
750  ResultReg = RR;
751  return true;
752 }
753 
754 bool X86FastISel::handleConstantAddresses(const Value *V, X86AddressMode &AM) {
755  // Handle constant address.
756  if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
757  // Can't handle alternate code models yet.
758  if (TM.getCodeModel() != CodeModel::Small)
759  return false;
760 
761  // Can't handle TLS yet.
762  if (GV->isThreadLocal())
763  return false;
764 
765  // RIP-relative addresses can't have additional register operands, so if
766  // we've already folded stuff into the addressing mode, just force the
767  // global value into its own register, which we can use as the basereg.
768  if (!Subtarget->isPICStyleRIPRel() ||
769  (AM.Base.Reg == 0 && AM.IndexReg == 0)) {
770  // Okay, we've committed to selecting this global. Set up the address.
771  AM.GV = GV;
772 
773  // Allow the subtarget to classify the global.
774  unsigned char GVFlags = Subtarget->classifyGlobalReference(GV);
775 
776  // If this reference is relative to the pic base, set it now.
777  if (isGlobalRelativeToPICBase(GVFlags)) {
778  // FIXME: How do we know Base.Reg is free??
779  AM.Base.Reg = getInstrInfo()->getGlobalBaseReg(FuncInfo.MF);
780  }
781 
782  // Unless the ABI requires an extra load, return a direct reference to
783  // the global.
784  if (!isGlobalStubReference(GVFlags)) {
785  if (Subtarget->isPICStyleRIPRel()) {
786  // Use rip-relative addressing if we can. Above we verified that the
787  // base and index registers are unused.
788  assert(AM.Base.Reg == 0 && AM.IndexReg == 0);
789  AM.Base.Reg = X86::RIP;
790  }
791  AM.GVOpFlags = GVFlags;
792  return true;
793  }
794 
795  // Ok, we need to do a load from a stub. If we've already loaded from
796  // this stub, reuse the loaded pointer, otherwise emit the load now.
797  DenseMap<const Value *, unsigned>::iterator I = LocalValueMap.find(V);
798  unsigned LoadReg;
799  if (I != LocalValueMap.end() && I->second != 0) {
800  LoadReg = I->second;
801  } else {
802  // Issue load from stub.
803  unsigned Opc = 0;
804  const TargetRegisterClass *RC = nullptr;
805  X86AddressMode StubAM;
806  StubAM.Base.Reg = AM.Base.Reg;
807  StubAM.GV = GV;
808  StubAM.GVOpFlags = GVFlags;
809 
810  // Prepare for inserting code in the local-value area.
811  SavePoint SaveInsertPt = enterLocalValueArea();
812 
813  if (TLI.getPointerTy(DL) == MVT::i64) {
814  Opc = X86::MOV64rm;
815  RC = &X86::GR64RegClass;
816 
817  if (Subtarget->isPICStyleRIPRel())
818  StubAM.Base.Reg = X86::RIP;
819  } else {
820  Opc = X86::MOV32rm;
821  RC = &X86::GR32RegClass;
822  }
823 
824  LoadReg = createResultReg(RC);
825  MachineInstrBuilder LoadMI =
826  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), LoadReg);
827  addFullAddress(LoadMI, StubAM);
828 
829  // Ok, back to normal mode.
830  leaveLocalValueArea(SaveInsertPt);
831 
832  // Prevent loading GV stub multiple times in same MBB.
833  LocalValueMap[V] = LoadReg;
834  }
835 
836  // Now construct the final address. Note that the Disp, Scale,
837  // and Index values may already be set here.
838  AM.Base.Reg = LoadReg;
839  AM.GV = nullptr;
840  return true;
841  }
842  }
843 
844  // If all else fails, try to materialize the value in a register.
845  if (!AM.GV || !Subtarget->isPICStyleRIPRel()) {
846  if (AM.Base.Reg == 0) {
847  AM.Base.Reg = getRegForValue(V);
848  return AM.Base.Reg != 0;
849  }
850  if (AM.IndexReg == 0) {
851  assert(AM.Scale == 1 && "Scale with no index!");
852  AM.IndexReg = getRegForValue(V);
853  return AM.IndexReg != 0;
854  }
855  }
856 
857  return false;
858 }
859 
860 /// X86SelectAddress - Attempt to fill in an address from the given value.
861 ///
862 bool X86FastISel::X86SelectAddress(const Value *V, X86AddressMode &AM) {
864 redo_gep:
865  const User *U = nullptr;
866  unsigned Opcode = Instruction::UserOp1;
867  if (const Instruction *I = dyn_cast<Instruction>(V)) {
868  // Don't walk into other basic blocks; it's possible we haven't
869  // visited them yet, so the instructions may not yet be assigned
870  // virtual registers.
871  if (FuncInfo.StaticAllocaMap.count(static_cast<const AllocaInst *>(V)) ||
872  FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
873  Opcode = I->getOpcode();
874  U = I;
875  }
876  } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(V)) {
877  Opcode = C->getOpcode();
878  U = C;
879  }
880 
881  if (PointerType *Ty = dyn_cast<PointerType>(V->getType()))
882  if (Ty->getAddressSpace() > 255)
883  // Fast instruction selection doesn't support the special
884  // address spaces.
885  return false;
886 
887  switch (Opcode) {
888  default: break;
889  case Instruction::BitCast:
890  // Look past bitcasts.
891  return X86SelectAddress(U->getOperand(0), AM);
892 
893  case Instruction::IntToPtr:
894  // Look past no-op inttoptrs.
895  if (TLI.getValueType(DL, U->getOperand(0)->getType()) ==
896  TLI.getPointerTy(DL))
897  return X86SelectAddress(U->getOperand(0), AM);
898  break;
899 
900  case Instruction::PtrToInt:
901  // Look past no-op ptrtoints.
902  if (TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL))
903  return X86SelectAddress(U->getOperand(0), AM);
904  break;
905 
906  case Instruction::Alloca: {
907  // Do static allocas.
908  const AllocaInst *A = cast<AllocaInst>(V);
910  FuncInfo.StaticAllocaMap.find(A);
911  if (SI != FuncInfo.StaticAllocaMap.end()) {
913  AM.Base.FrameIndex = SI->second;
914  return true;
915  }
916  break;
917  }
918 
919  case Instruction::Add: {
920  // Adds of constants are common and easy enough.
921  if (const ConstantInt *CI = dyn_cast<ConstantInt>(U->getOperand(1))) {
922  uint64_t Disp = (int32_t)AM.Disp + (uint64_t)CI->getSExtValue();
923  // They have to fit in the 32-bit signed displacement field though.
924  if (isInt<32>(Disp)) {
925  AM.Disp = (uint32_t)Disp;
926  return X86SelectAddress(U->getOperand(0), AM);
927  }
928  }
929  break;
930  }
931 
932  case Instruction::GetElementPtr: {
933  X86AddressMode SavedAM = AM;
934 
935  // Pattern-match simple GEPs.
936  uint64_t Disp = (int32_t)AM.Disp;
937  unsigned IndexReg = AM.IndexReg;
938  unsigned Scale = AM.Scale;
940  // Iterate through the indices, folding what we can. Constants can be
941  // folded, and one dynamic index can be handled, if the scale is supported.
942  for (User::const_op_iterator i = U->op_begin() + 1, e = U->op_end();
943  i != e; ++i, ++GTI) {
944  const Value *Op = *i;
945  if (StructType *STy = GTI.getStructTypeOrNull()) {
946  const StructLayout *SL = DL.getStructLayout(STy);
947  Disp += SL->getElementOffset(cast<ConstantInt>(Op)->getZExtValue());
948  continue;
949  }
950 
951  // A array/variable index is always of the form i*S where S is the
952  // constant scale size. See if we can push the scale into immediates.
953  uint64_t S = DL.getTypeAllocSize(GTI.getIndexedType());
954  for (;;) {
955  if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
956  // Constant-offset addressing.
957  Disp += CI->getSExtValue() * S;
958  break;
959  }
960  if (canFoldAddIntoGEP(U, Op)) {
961  // A compatible add with a constant operand. Fold the constant.
962  ConstantInt *CI =
963  cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1));
964  Disp += CI->getSExtValue() * S;
965  // Iterate on the other operand.
966  Op = cast<AddOperator>(Op)->getOperand(0);
967  continue;
968  }
969  if (IndexReg == 0 &&
970  (!AM.GV || !Subtarget->isPICStyleRIPRel()) &&
971  (S == 1 || S == 2 || S == 4 || S == 8)) {
972  // Scaled-index addressing.
973  Scale = S;
974  IndexReg = getRegForGEPIndex(Op).first;
975  if (IndexReg == 0)
976  return false;
977  break;
978  }
979  // Unsupported.
980  goto unsupported_gep;
981  }
982  }
983 
984  // Check for displacement overflow.
985  if (!isInt<32>(Disp))
986  break;
987 
988  AM.IndexReg = IndexReg;
989  AM.Scale = Scale;
990  AM.Disp = (uint32_t)Disp;
991  GEPs.push_back(V);
992 
993  if (const GetElementPtrInst *GEP =
994  dyn_cast<GetElementPtrInst>(U->getOperand(0))) {
995  // Ok, the GEP indices were covered by constant-offset and scaled-index
996  // addressing. Update the address state and move on to examining the base.
997  V = GEP;
998  goto redo_gep;
999  } else if (X86SelectAddress(U->getOperand(0), AM)) {
1000  return true;
1001  }
1002 
1003  // If we couldn't merge the gep value into this addr mode, revert back to
1004  // our address and just match the value instead of completely failing.
1005  AM = SavedAM;
1006 
1007  for (const Value *I : reverse(GEPs))
1008  if (handleConstantAddresses(I, AM))
1009  return true;
1010 
1011  return false;
1012  unsupported_gep:
1013  // Ok, the GEP indices weren't all covered.
1014  break;
1015  }
1016  }
1017 
1018  return handleConstantAddresses(V, AM);
1019 }
1020 
1021 /// X86SelectCallAddress - Attempt to fill in an address from the given value.
1022 ///
1023 bool X86FastISel::X86SelectCallAddress(const Value *V, X86AddressMode &AM) {
1024  const User *U = nullptr;
1025  unsigned Opcode = Instruction::UserOp1;
1026  const Instruction *I = dyn_cast<Instruction>(V);
1027  // Record if the value is defined in the same basic block.
1028  //
1029  // This information is crucial to know whether or not folding an
1030  // operand is valid.
1031  // Indeed, FastISel generates or reuses a virtual register for all
1032  // operands of all instructions it selects. Obviously, the definition and
1033  // its uses must use the same virtual register otherwise the produced
1034  // code is incorrect.
1035  // Before instruction selection, FunctionLoweringInfo::set sets the virtual
1036  // registers for values that are alive across basic blocks. This ensures
1037  // that the values are consistently set between across basic block, even
1038  // if different instruction selection mechanisms are used (e.g., a mix of
1039  // SDISel and FastISel).
1040  // For values local to a basic block, the instruction selection process
1041  // generates these virtual registers with whatever method is appropriate
1042  // for its needs. In particular, FastISel and SDISel do not share the way
1043  // local virtual registers are set.
1044  // Therefore, this is impossible (or at least unsafe) to share values
1045  // between basic blocks unless they use the same instruction selection
1046  // method, which is not guarantee for X86.
1047  // Moreover, things like hasOneUse could not be used accurately, if we
1048  // allow to reference values across basic blocks whereas they are not
1049  // alive across basic blocks initially.
1050  bool InMBB = true;
1051  if (I) {
1052  Opcode = I->getOpcode();
1053  U = I;
1054  InMBB = I->getParent() == FuncInfo.MBB->getBasicBlock();
1055  } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(V)) {
1056  Opcode = C->getOpcode();
1057  U = C;
1058  }
1059 
1060  switch (Opcode) {
1061  default: break;
1062  case Instruction::BitCast:
1063  // Look past bitcasts if its operand is in the same BB.
1064  if (InMBB)
1065  return X86SelectCallAddress(U->getOperand(0), AM);
1066  break;
1067 
1068  case Instruction::IntToPtr:
1069  // Look past no-op inttoptrs if its operand is in the same BB.
1070  if (InMBB &&
1071  TLI.getValueType(DL, U->getOperand(0)->getType()) ==
1072  TLI.getPointerTy(DL))
1073  return X86SelectCallAddress(U->getOperand(0), AM);
1074  break;
1075 
1076  case Instruction::PtrToInt:
1077  // Look past no-op ptrtoints if its operand is in the same BB.
1078  if (InMBB && TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL))
1079  return X86SelectCallAddress(U->getOperand(0), AM);
1080  break;
1081  }
1082 
1083  // Handle constant address.
1084  if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
1085  // Can't handle alternate code models yet.
1086  if (TM.getCodeModel() != CodeModel::Small)
1087  return false;
1088 
1089  // RIP-relative addresses can't have additional register operands.
1090  if (Subtarget->isPICStyleRIPRel() &&
1091  (AM.Base.Reg != 0 || AM.IndexReg != 0))
1092  return false;
1093 
1094  // Can't handle DLL Import.
1095  if (GV->hasDLLImportStorageClass())
1096  return false;
1097 
1098  // Can't handle TLS.
1099  if (const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV))
1100  if (GVar->isThreadLocal())
1101  return false;
1102 
1103  // Okay, we've committed to selecting this global. Set up the basic address.
1104  AM.GV = GV;
1105 
1106  // No ABI requires an extra load for anything other than DLLImport, which
1107  // we rejected above. Return a direct reference to the global.
1108  if (Subtarget->isPICStyleRIPRel()) {
1109  // Use rip-relative addressing if we can. Above we verified that the
1110  // base and index registers are unused.
1111  assert(AM.Base.Reg == 0 && AM.IndexReg == 0);
1112  AM.Base.Reg = X86::RIP;
1113  } else {
1114  AM.GVOpFlags = Subtarget->classifyLocalReference(nullptr);
1115  }
1116 
1117  return true;
1118  }
1119 
1120  // If all else fails, try to materialize the value in a register.
1121  if (!AM.GV || !Subtarget->isPICStyleRIPRel()) {
1122  if (AM.Base.Reg == 0) {
1123  AM.Base.Reg = getRegForValue(V);
1124  return AM.Base.Reg != 0;
1125  }
1126  if (AM.IndexReg == 0) {
1127  assert(AM.Scale == 1 && "Scale with no index!");
1128  AM.IndexReg = getRegForValue(V);
1129  return AM.IndexReg != 0;
1130  }
1131  }
1132 
1133  return false;
1134 }
1135 
1136 
1137 /// X86SelectStore - Select and emit code to implement store instructions.
1138 bool X86FastISel::X86SelectStore(const Instruction *I) {
1139  // Atomic stores need special handling.
1140  const StoreInst *S = cast<StoreInst>(I);
1141 
1142  if (S->isAtomic())
1143  return false;
1144 
1145  const Value *PtrV = I->getOperand(1);
1146  if (TLI.supportSwiftError()) {
1147  // Swifterror values can come from either a function parameter with
1148  // swifterror attribute or an alloca with swifterror attribute.
1149  if (const Argument *Arg = dyn_cast<Argument>(PtrV)) {
1150  if (Arg->hasSwiftErrorAttr())
1151  return false;
1152  }
1153 
1154  if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(PtrV)) {
1155  if (Alloca->isSwiftError())
1156  return false;
1157  }
1158  }
1159 
1160  const Value *Val = S->getValueOperand();
1161  const Value *Ptr = S->getPointerOperand();
1162 
1163  MVT VT;
1164  if (!isTypeLegal(Val->getType(), VT, /*AllowI1=*/true))
1165  return false;
1166 
1167  unsigned Alignment = S->getAlignment();
1168  unsigned ABIAlignment = DL.getABITypeAlignment(Val->getType());
1169  if (Alignment == 0) // Ensure that codegen never sees alignment 0
1170  Alignment = ABIAlignment;
1171  bool Aligned = Alignment >= ABIAlignment;
1172 
1173  X86AddressMode AM;
1174  if (!X86SelectAddress(Ptr, AM))
1175  return false;
1176 
1177  return X86FastEmitStore(VT, Val, AM, createMachineMemOperandFor(I), Aligned);
1178 }
1179 
1180 /// X86SelectRet - Select and emit code to implement ret instructions.
1181 bool X86FastISel::X86SelectRet(const Instruction *I) {
1182  const ReturnInst *Ret = cast<ReturnInst>(I);
1183  const Function &F = *I->getParent()->getParent();
1184  const X86MachineFunctionInfo *X86MFInfo =
1185  FuncInfo.MF->getInfo<X86MachineFunctionInfo>();
1186 
1187  if (!FuncInfo.CanLowerReturn)
1188  return false;
1189 
1190  if (TLI.supportSwiftError() &&
1191  F.getAttributes().hasAttrSomewhere(Attribute::SwiftError))
1192  return false;
1193 
1194  if (TLI.supportSplitCSR(FuncInfo.MF))
1195  return false;
1196 
1197  CallingConv::ID CC = F.getCallingConv();
1198  if (CC != CallingConv::C &&
1199  CC != CallingConv::Fast &&
1200  CC != CallingConv::X86_FastCall &&
1201  CC != CallingConv::X86_StdCall &&
1202  CC != CallingConv::X86_ThisCall &&
1203  CC != CallingConv::X86_64_SysV &&
1205  return false;
1206 
1207  // Don't handle popping bytes if they don't fit the ret's immediate.
1208  if (!isUInt<16>(X86MFInfo->getBytesToPopOnReturn()))
1209  return false;
1210 
1211  // fastcc with -tailcallopt is intended to provide a guaranteed
1212  // tail call optimization. Fastisel doesn't know how to do that.
1213  if (CC == CallingConv::Fast && TM.Options.GuaranteedTailCallOpt)
1214  return false;
1215 
1216  // Let SDISel handle vararg functions.
1217  if (F.isVarArg())
1218  return false;
1219 
1220  // Build a list of return value registers.
1221  SmallVector<unsigned, 4> RetRegs;
1222 
1223  if (Ret->getNumOperands() > 0) {
1225  GetReturnInfo(F.getReturnType(), F.getAttributes(), Outs, TLI, DL);
1226 
1227  // Analyze operands of the call, assigning locations to each operand.
1229  CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, ValLocs, I->getContext());
1230  CCInfo.AnalyzeReturn(Outs, RetCC_X86);
1231 
1232  const Value *RV = Ret->getOperand(0);
1233  unsigned Reg = getRegForValue(RV);
1234  if (Reg == 0)
1235  return false;
1236 
1237  // Only handle a single return value for now.
1238  if (ValLocs.size() != 1)
1239  return false;
1240 
1241  CCValAssign &VA = ValLocs[0];
1242 
1243  // Don't bother handling odd stuff for now.
1244  if (VA.getLocInfo() != CCValAssign::Full)
1245  return false;
1246  // Only handle register returns for now.
1247  if (!VA.isRegLoc())
1248  return false;
1249 
1250  // The calling-convention tables for x87 returns don't tell
1251  // the whole story.
1252  if (VA.getLocReg() == X86::FP0 || VA.getLocReg() == X86::FP1)
1253  return false;
1254 
1255  unsigned SrcReg = Reg + VA.getValNo();
1256  EVT SrcVT = TLI.getValueType(DL, RV->getType());
1257  EVT DstVT = VA.getValVT();
1258  // Special handling for extended integers.
1259  if (SrcVT != DstVT) {
1260  if (SrcVT != MVT::i1 && SrcVT != MVT::i8 && SrcVT != MVT::i16)
1261  return false;
1262 
1263  if (!Outs[0].Flags.isZExt() && !Outs[0].Flags.isSExt())
1264  return false;
1265 
1266  assert(DstVT == MVT::i32 && "X86 should always ext to i32");
1267 
1268  if (SrcVT == MVT::i1) {
1269  if (Outs[0].Flags.isSExt())
1270  return false;
1271  SrcReg = fastEmitZExtFromI1(MVT::i8, SrcReg, /*TODO: Kill=*/false);
1272  SrcVT = MVT::i8;
1273  }
1274  unsigned Op = Outs[0].Flags.isZExt() ? ISD::ZERO_EXTEND :
1276  SrcReg = fastEmit_r(SrcVT.getSimpleVT(), DstVT.getSimpleVT(), Op,
1277  SrcReg, /*TODO: Kill=*/false);
1278  }
1279 
1280  // Make the copy.
1281  unsigned DstReg = VA.getLocReg();
1282  const TargetRegisterClass *SrcRC = MRI.getRegClass(SrcReg);
1283  // Avoid a cross-class copy. This is very unlikely.
1284  if (!SrcRC->contains(DstReg))
1285  return false;
1286  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1287  TII.get(TargetOpcode::COPY), DstReg).addReg(SrcReg);
1288 
1289  // Add register to return instruction.
1290  RetRegs.push_back(VA.getLocReg());
1291  }
1292 
1293  // Swift calling convention does not require we copy the sret argument
1294  // into %rax/%eax for the return, and SRetReturnReg is not set for Swift.
1295 
1296  // All x86 ABIs require that for returning structs by value we copy
1297  // the sret argument into %rax/%eax (depending on ABI) for the return.
1298  // We saved the argument into a virtual register in the entry block,
1299  // so now we copy the value out and into %rax/%eax.
1300  if (F.hasStructRetAttr() && CC != CallingConv::Swift) {
1301  unsigned Reg = X86MFInfo->getSRetReturnReg();
1302  assert(Reg &&
1303  "SRetReturnReg should have been set in LowerFormalArguments()!");
1304  unsigned RetReg = Subtarget->is64Bit() ? X86::RAX : X86::EAX;
1305  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1306  TII.get(TargetOpcode::COPY), RetReg).addReg(Reg);
1307  RetRegs.push_back(RetReg);
1308  }
1309 
1310  // Now emit the RET.
1311  MachineInstrBuilder MIB;
1312  if (X86MFInfo->getBytesToPopOnReturn()) {
1313  MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1314  TII.get(Subtarget->is64Bit() ? X86::RETIQ : X86::RETIL))
1315  .addImm(X86MFInfo->getBytesToPopOnReturn());
1316  } else {
1317  MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1318  TII.get(Subtarget->is64Bit() ? X86::RETQ : X86::RETL));
1319  }
1320  for (unsigned i = 0, e = RetRegs.size(); i != e; ++i)
1321  MIB.addReg(RetRegs[i], RegState::Implicit);
1322  return true;
1323 }
1324 
1325 /// X86SelectLoad - Select and emit code to implement load instructions.
1326 ///
1327 bool X86FastISel::X86SelectLoad(const Instruction *I) {
1328  const LoadInst *LI = cast<LoadInst>(I);
1329 
1330  // Atomic loads need special handling.
1331  if (LI->isAtomic())
1332  return false;
1333 
1334  const Value *SV = I->getOperand(0);
1335  if (TLI.supportSwiftError()) {
1336  // Swifterror values can come from either a function parameter with
1337  // swifterror attribute or an alloca with swifterror attribute.
1338  if (const Argument *Arg = dyn_cast<Argument>(SV)) {
1339  if (Arg->hasSwiftErrorAttr())
1340  return false;
1341  }
1342 
1343  if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(SV)) {
1344  if (Alloca->isSwiftError())
1345  return false;
1346  }
1347  }
1348 
1349  MVT VT;
1350  if (!isTypeLegal(LI->getType(), VT, /*AllowI1=*/true))
1351  return false;
1352 
1353  const Value *Ptr = LI->getPointerOperand();
1354 
1355  X86AddressMode AM;
1356  if (!X86SelectAddress(Ptr, AM))
1357  return false;
1358 
1359  unsigned Alignment = LI->getAlignment();
1360  unsigned ABIAlignment = DL.getABITypeAlignment(LI->getType());
1361  if (Alignment == 0) // Ensure that codegen never sees alignment 0
1362  Alignment = ABIAlignment;
1363 
1364  unsigned ResultReg = 0;
1365  if (!X86FastEmitLoad(VT, AM, createMachineMemOperandFor(LI), ResultReg,
1366  Alignment))
1367  return false;
1368 
1369  updateValueMap(I, ResultReg);
1370  return true;
1371 }
1372 
1373 static unsigned X86ChooseCmpOpcode(EVT VT, const X86Subtarget *Subtarget) {
1374  bool HasAVX = Subtarget->hasAVX();
1375  bool X86ScalarSSEf32 = Subtarget->hasSSE1();
1376  bool X86ScalarSSEf64 = Subtarget->hasSSE2();
1377 
1378  switch (VT.getSimpleVT().SimpleTy) {
1379  default: return 0;
1380  case MVT::i8: return X86::CMP8rr;
1381  case MVT::i16: return X86::CMP16rr;
1382  case MVT::i32: return X86::CMP32rr;
1383  case MVT::i64: return X86::CMP64rr;
1384  case MVT::f32:
1385  return X86ScalarSSEf32 ? (HasAVX ? X86::VUCOMISSrr : X86::UCOMISSrr) : 0;
1386  case MVT::f64:
1387  return X86ScalarSSEf64 ? (HasAVX ? X86::VUCOMISDrr : X86::UCOMISDrr) : 0;
1388  }
1389 }
1390 
1391 /// If we have a comparison with RHS as the RHS of the comparison, return an
1392 /// opcode that works for the compare (e.g. CMP32ri) otherwise return 0.
1393 static unsigned X86ChooseCmpImmediateOpcode(EVT VT, const ConstantInt *RHSC) {
1394  int64_t Val = RHSC->getSExtValue();
1395  switch (VT.getSimpleVT().SimpleTy) {
1396  // Otherwise, we can't fold the immediate into this comparison.
1397  default:
1398  return 0;
1399  case MVT::i8:
1400  return X86::CMP8ri;
1401  case MVT::i16:
1402  if (isInt<8>(Val))
1403  return X86::CMP16ri8;
1404  return X86::CMP16ri;
1405  case MVT::i32:
1406  if (isInt<8>(Val))
1407  return X86::CMP32ri8;
1408  return X86::CMP32ri;
1409  case MVT::i64:
1410  if (isInt<8>(Val))
1411  return X86::CMP64ri8;
1412  // 64-bit comparisons are only valid if the immediate fits in a 32-bit sext
1413  // field.
1414  if (isInt<32>(Val))
1415  return X86::CMP64ri32;
1416  return 0;
1417  }
1418 }
1419 
1420 bool X86FastISel::X86FastEmitCompare(const Value *Op0, const Value *Op1, EVT VT,
1421  const DebugLoc &CurDbgLoc) {
1422  unsigned Op0Reg = getRegForValue(Op0);
1423  if (Op0Reg == 0) return false;
1424 
1425  // Handle 'null' like i32/i64 0.
1426  if (isa<ConstantPointerNull>(Op1))
1427  Op1 = Constant::getNullValue(DL.getIntPtrType(Op0->getContext()));
1428 
1429  // We have two options: compare with register or immediate. If the RHS of
1430  // the compare is an immediate that we can fold into this compare, use
1431  // CMPri, otherwise use CMPrr.
1432  if (const ConstantInt *Op1C = dyn_cast<ConstantInt>(Op1)) {
1433  if (unsigned CompareImmOpc = X86ChooseCmpImmediateOpcode(VT, Op1C)) {
1434  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, CurDbgLoc, TII.get(CompareImmOpc))
1435  .addReg(Op0Reg)
1436  .addImm(Op1C->getSExtValue());
1437  return true;
1438  }
1439  }
1440 
1441  unsigned CompareOpc = X86ChooseCmpOpcode(VT, Subtarget);
1442  if (CompareOpc == 0) return false;
1443 
1444  unsigned Op1Reg = getRegForValue(Op1);
1445  if (Op1Reg == 0) return false;
1446  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, CurDbgLoc, TII.get(CompareOpc))
1447  .addReg(Op0Reg)
1448  .addReg(Op1Reg);
1449 
1450  return true;
1451 }
1452 
1453 bool X86FastISel::X86SelectCmp(const Instruction *I) {
1454  const CmpInst *CI = cast<CmpInst>(I);
1455 
1456  MVT VT;
1457  if (!isTypeLegal(I->getOperand(0)->getType(), VT))
1458  return false;
1459 
1460  if (I->getType()->isIntegerTy(1) && Subtarget->hasAVX512())
1461  return false;
1462 
1463  // Try to optimize or fold the cmp.
1464  CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
1465  unsigned ResultReg = 0;
1466  switch (Predicate) {
1467  default: break;
1468  case CmpInst::FCMP_FALSE: {
1469  ResultReg = createResultReg(&X86::GR32RegClass);
1470  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::MOV32r0),
1471  ResultReg);
1472  ResultReg = fastEmitInst_extractsubreg(MVT::i8, ResultReg, /*Kill=*/true,
1473  X86::sub_8bit);
1474  if (!ResultReg)
1475  return false;
1476  break;
1477  }
1478  case CmpInst::FCMP_TRUE: {
1479  ResultReg = createResultReg(&X86::GR8RegClass);
1480  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::MOV8ri),
1481  ResultReg).addImm(1);
1482  break;
1483  }
1484  }
1485 
1486  if (ResultReg) {
1487  updateValueMap(I, ResultReg);
1488  return true;
1489  }
1490 
1491  const Value *LHS = CI->getOperand(0);
1492  const Value *RHS = CI->getOperand(1);
1493 
1494  // The optimizer might have replaced fcmp oeq %x, %x with fcmp ord %x, 0.0.
1495  // We don't have to materialize a zero constant for this case and can just use
1496  // %x again on the RHS.
1497  if (Predicate == CmpInst::FCMP_ORD || Predicate == CmpInst::FCMP_UNO) {
1498  const auto *RHSC = dyn_cast<ConstantFP>(RHS);
1499  if (RHSC && RHSC->isNullValue())
1500  RHS = LHS;
1501  }
1502 
1503  // FCMP_OEQ and FCMP_UNE cannot be checked with a single instruction.
1504  static const uint16_t SETFOpcTable[2][3] = {
1505  { X86::SETEr, X86::SETNPr, X86::AND8rr },
1506  { X86::SETNEr, X86::SETPr, X86::OR8rr }
1507  };
1508  const uint16_t *SETFOpc = nullptr;
1509  switch (Predicate) {
1510  default: break;
1511  case CmpInst::FCMP_OEQ: SETFOpc = &SETFOpcTable[0][0]; break;
1512  case CmpInst::FCMP_UNE: SETFOpc = &SETFOpcTable[1][0]; break;
1513  }
1514 
1515  ResultReg = createResultReg(&X86::GR8RegClass);
1516  if (SETFOpc) {
1517  if (!X86FastEmitCompare(LHS, RHS, VT, I->getDebugLoc()))
1518  return false;
1519 
1520  unsigned FlagReg1 = createResultReg(&X86::GR8RegClass);
1521  unsigned FlagReg2 = createResultReg(&X86::GR8RegClass);
1522  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(SETFOpc[0]),
1523  FlagReg1);
1524  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(SETFOpc[1]),
1525  FlagReg2);
1526  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(SETFOpc[2]),
1527  ResultReg).addReg(FlagReg1).addReg(FlagReg2);
1528  updateValueMap(I, ResultReg);
1529  return true;
1530  }
1531 
1532  X86::CondCode CC;
1533  bool SwapArgs;
1534  std::tie(CC, SwapArgs) = getX86ConditionCode(Predicate);
1535  assert(CC <= X86::LAST_VALID_COND && "Unexpected condition code.");
1536  unsigned Opc = X86::getSETFromCond(CC);
1537 
1538  if (SwapArgs)
1539  std::swap(LHS, RHS);
1540 
1541  // Emit a compare of LHS/RHS.
1542  if (!X86FastEmitCompare(LHS, RHS, VT, I->getDebugLoc()))
1543  return false;
1544 
1545  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg);
1546  updateValueMap(I, ResultReg);
1547  return true;
1548 }
1549 
1550 bool X86FastISel::X86SelectZExt(const Instruction *I) {
1551  EVT DstVT = TLI.getValueType(DL, I->getType());
1552  if (!TLI.isTypeLegal(DstVT))
1553  return false;
1554 
1555  unsigned ResultReg = getRegForValue(I->getOperand(0));
1556  if (ResultReg == 0)
1557  return false;
1558 
1559  // Handle zero-extension from i1 to i8, which is common.
1560  MVT SrcVT = TLI.getSimpleValueType(DL, I->getOperand(0)->getType());
1561  if (SrcVT == MVT::i1) {
1562  // Set the high bits to zero.
1563  ResultReg = fastEmitZExtFromI1(MVT::i8, ResultReg, /*TODO: Kill=*/false);
1564  SrcVT = MVT::i8;
1565 
1566  if (ResultReg == 0)
1567  return false;
1568  }
1569 
1570  if (DstVT == MVT::i64) {
1571  // Handle extension to 64-bits via sub-register shenanigans.
1572  unsigned MovInst;
1573 
1574  switch (SrcVT.SimpleTy) {
1575  case MVT::i8: MovInst = X86::MOVZX32rr8; break;
1576  case MVT::i16: MovInst = X86::MOVZX32rr16; break;
1577  case MVT::i32: MovInst = X86::MOV32rr; break;
1578  default: llvm_unreachable("Unexpected zext to i64 source type");
1579  }
1580 
1581  unsigned Result32 = createResultReg(&X86::GR32RegClass);
1582  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(MovInst), Result32)
1583  .addReg(ResultReg);
1584 
1585  ResultReg = createResultReg(&X86::GR64RegClass);
1586  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::SUBREG_TO_REG),
1587  ResultReg)
1588  .addImm(0).addReg(Result32).addImm(X86::sub_32bit);
1589  } else if (DstVT != MVT::i8) {
1590  ResultReg = fastEmit_r(MVT::i8, DstVT.getSimpleVT(), ISD::ZERO_EXTEND,
1591  ResultReg, /*Kill=*/true);
1592  if (ResultReg == 0)
1593  return false;
1594  }
1595 
1596  updateValueMap(I, ResultReg);
1597  return true;
1598 }
1599 
1600 bool X86FastISel::X86SelectBranch(const Instruction *I) {
1601  // Unconditional branches are selected by tablegen-generated code.
1602  // Handle a conditional branch.
1603  const BranchInst *BI = cast<BranchInst>(I);
1604  MachineBasicBlock *TrueMBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
1605  MachineBasicBlock *FalseMBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
1606 
1607  // Fold the common case of a conditional branch with a comparison
1608  // in the same block (values defined on other blocks may not have
1609  // initialized registers).
1610  X86::CondCode CC;
1611  if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) {
1612  if (CI->hasOneUse() && CI->getParent() == I->getParent()) {
1613  EVT VT = TLI.getValueType(DL, CI->getOperand(0)->getType());
1614 
1615  // Try to optimize or fold the cmp.
1616  CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
1617  switch (Predicate) {
1618  default: break;
1619  case CmpInst::FCMP_FALSE: fastEmitBranch(FalseMBB, DbgLoc); return true;
1620  case CmpInst::FCMP_TRUE: fastEmitBranch(TrueMBB, DbgLoc); return true;
1621  }
1622 
1623  const Value *CmpLHS = CI->getOperand(0);
1624  const Value *CmpRHS = CI->getOperand(1);
1625 
1626  // The optimizer might have replaced fcmp oeq %x, %x with fcmp ord %x,
1627  // 0.0.
1628  // We don't have to materialize a zero constant for this case and can just
1629  // use %x again on the RHS.
1630  if (Predicate == CmpInst::FCMP_ORD || Predicate == CmpInst::FCMP_UNO) {
1631  const auto *CmpRHSC = dyn_cast<ConstantFP>(CmpRHS);
1632  if (CmpRHSC && CmpRHSC->isNullValue())
1633  CmpRHS = CmpLHS;
1634  }
1635 
1636  // Try to take advantage of fallthrough opportunities.
1637  if (FuncInfo.MBB->isLayoutSuccessor(TrueMBB)) {
1638  std::swap(TrueMBB, FalseMBB);
1639  Predicate = CmpInst::getInversePredicate(Predicate);
1640  }
1641 
1642  // FCMP_OEQ and FCMP_UNE cannot be expressed with a single flag/condition
1643  // code check. Instead two branch instructions are required to check all
1644  // the flags. First we change the predicate to a supported condition code,
1645  // which will be the first branch. Later one we will emit the second
1646  // branch.
1647  bool NeedExtraBranch = false;
1648  switch (Predicate) {
1649  default: break;
1650  case CmpInst::FCMP_OEQ:
1651  std::swap(TrueMBB, FalseMBB);
1653  case CmpInst::FCMP_UNE:
1654  NeedExtraBranch = true;
1655  Predicate = CmpInst::FCMP_ONE;
1656  break;
1657  }
1658 
1659  bool SwapArgs;
1660  unsigned BranchOpc;
1661  std::tie(CC, SwapArgs) = getX86ConditionCode(Predicate);
1662  assert(CC <= X86::LAST_VALID_COND && "Unexpected condition code.");
1663 
1664  BranchOpc = X86::GetCondBranchFromCond(CC);
1665  if (SwapArgs)
1666  std::swap(CmpLHS, CmpRHS);
1667 
1668  // Emit a compare of the LHS and RHS, setting the flags.
1669  if (!X86FastEmitCompare(CmpLHS, CmpRHS, VT, CI->getDebugLoc()))
1670  return false;
1671 
1672  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(BranchOpc))
1673  .addMBB(TrueMBB);
1674 
1675  // X86 requires a second branch to handle UNE (and OEQ, which is mapped
1676  // to UNE above).
1677  if (NeedExtraBranch) {
1678  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::JP_1))
1679  .addMBB(TrueMBB);
1680  }
1681 
1682  finishCondBranch(BI->getParent(), TrueMBB, FalseMBB);
1683  return true;
1684  }
1685  } else if (TruncInst *TI = dyn_cast<TruncInst>(BI->getCondition())) {
1686  // Handle things like "%cond = trunc i32 %X to i1 / br i1 %cond", which
1687  // typically happen for _Bool and C++ bools.
1688  MVT SourceVT;
1689  if (TI->hasOneUse() && TI->getParent() == I->getParent() &&
1690  isTypeLegal(TI->getOperand(0)->getType(), SourceVT)) {
1691  unsigned TestOpc = 0;
1692  switch (SourceVT.SimpleTy) {
1693  default: break;
1694  case MVT::i8: TestOpc = X86::TEST8ri; break;
1695  case MVT::i16: TestOpc = X86::TEST16ri; break;
1696  case MVT::i32: TestOpc = X86::TEST32ri; break;
1697  case MVT::i64: TestOpc = X86::TEST64ri32; break;
1698  }
1699  if (TestOpc) {
1700  unsigned OpReg = getRegForValue(TI->getOperand(0));
1701  if (OpReg == 0) return false;
1702 
1703  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TestOpc))
1704  .addReg(OpReg).addImm(1);
1705 
1706  unsigned JmpOpc = X86::JNE_1;
1707  if (FuncInfo.MBB->isLayoutSuccessor(TrueMBB)) {
1708  std::swap(TrueMBB, FalseMBB);
1709  JmpOpc = X86::JE_1;
1710  }
1711 
1712  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(JmpOpc))
1713  .addMBB(TrueMBB);
1714 
1715  finishCondBranch(BI->getParent(), TrueMBB, FalseMBB);
1716  return true;
1717  }
1718  }
1719  } else if (foldX86XALUIntrinsic(CC, BI, BI->getCondition())) {
1720  // Fake request the condition, otherwise the intrinsic might be completely
1721  // optimized away.
1722  unsigned TmpReg = getRegForValue(BI->getCondition());
1723  if (TmpReg == 0)
1724  return false;
1725 
1726  unsigned BranchOpc = X86::GetCondBranchFromCond(CC);
1727 
1728  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(BranchOpc))
1729  .addMBB(TrueMBB);
1730  finishCondBranch(BI->getParent(), TrueMBB, FalseMBB);
1731  return true;
1732  }
1733 
1734  // Otherwise do a clumsy setcc and re-test it.
1735  // Note that i1 essentially gets ANY_EXTEND'ed to i8 where it isn't used
1736  // in an explicit cast, so make sure to handle that correctly.
1737  unsigned OpReg = getRegForValue(BI->getCondition());
1738  if (OpReg == 0) return false;
1739 
1740  // In case OpReg is a K register, COPY to a GPR
1741  if (MRI.getRegClass(OpReg) == &X86::VK1RegClass) {
1742  unsigned KOpReg = OpReg;
1743  OpReg = createResultReg(&X86::GR8RegClass);
1744  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1745  TII.get(TargetOpcode::COPY), OpReg)
1746  .addReg(KOpReg);
1747  }
1748  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::TEST8ri))
1749  .addReg(OpReg)
1750  .addImm(1);
1751  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::JNE_1))
1752  .addMBB(TrueMBB);
1753  finishCondBranch(BI->getParent(), TrueMBB, FalseMBB);
1754  return true;
1755 }
1756 
1757 bool X86FastISel::X86SelectShift(const Instruction *I) {
1758  unsigned CReg = 0, OpReg = 0;
1759  const TargetRegisterClass *RC = nullptr;
1760  if (I->getType()->isIntegerTy(8)) {
1761  CReg = X86::CL;
1762  RC = &X86::GR8RegClass;
1763  switch (I->getOpcode()) {
1764  case Instruction::LShr: OpReg = X86::SHR8rCL; break;
1765  case Instruction::AShr: OpReg = X86::SAR8rCL; break;
1766  case Instruction::Shl: OpReg = X86::SHL8rCL; break;
1767  default: return false;
1768  }
1769  } else if (I->getType()->isIntegerTy(16)) {
1770  CReg = X86::CX;
1771  RC = &X86::GR16RegClass;
1772  switch (I->getOpcode()) {
1773  case Instruction::LShr: OpReg = X86::SHR16rCL; break;
1774  case Instruction::AShr: OpReg = X86::SAR16rCL; break;
1775  case Instruction::Shl: OpReg = X86::SHL16rCL; break;
1776  default: return false;
1777  }
1778  } else if (I->getType()->isIntegerTy(32)) {
1779  CReg = X86::ECX;
1780  RC = &X86::GR32RegClass;
1781  switch (I->getOpcode()) {
1782  case Instruction::LShr: OpReg = X86::SHR32rCL; break;
1783  case Instruction::AShr: OpReg = X86::SAR32rCL; break;
1784  case Instruction::Shl: OpReg = X86::SHL32rCL; break;
1785  default: return false;
1786  }
1787  } else if (I->getType()->isIntegerTy(64)) {
1788  CReg = X86::RCX;
1789  RC = &X86::GR64RegClass;
1790  switch (I->getOpcode()) {
1791  case Instruction::LShr: OpReg = X86::SHR64rCL; break;
1792  case Instruction::AShr: OpReg = X86::SAR64rCL; break;
1793  case Instruction::Shl: OpReg = X86::SHL64rCL; break;
1794  default: return false;
1795  }
1796  } else {
1797  return false;
1798  }
1799 
1800  MVT VT;
1801  if (!isTypeLegal(I->getType(), VT))
1802  return false;
1803 
1804  unsigned Op0Reg = getRegForValue(I->getOperand(0));
1805  if (Op0Reg == 0) return false;
1806 
1807  unsigned Op1Reg = getRegForValue(I->getOperand(1));
1808  if (Op1Reg == 0) return false;
1809  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY),
1810  CReg).addReg(Op1Reg);
1811 
1812  // The shift instruction uses X86::CL. If we defined a super-register
1813  // of X86::CL, emit a subreg KILL to precisely describe what we're doing here.
1814  if (CReg != X86::CL)
1815  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1816  TII.get(TargetOpcode::KILL), X86::CL)
1817  .addReg(CReg, RegState::Kill);
1818 
1819  unsigned ResultReg = createResultReg(RC);
1820  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(OpReg), ResultReg)
1821  .addReg(Op0Reg);
1822  updateValueMap(I, ResultReg);
1823  return true;
1824 }
1825 
1826 bool X86FastISel::X86SelectDivRem(const Instruction *I) {
1827  const static unsigned NumTypes = 4; // i8, i16, i32, i64
1828  const static unsigned NumOps = 4; // SDiv, SRem, UDiv, URem
1829  const static bool S = true; // IsSigned
1830  const static bool U = false; // !IsSigned
1831  const static unsigned Copy = TargetOpcode::COPY;
1832  // For the X86 DIV/IDIV instruction, in most cases the dividend
1833  // (numerator) must be in a specific register pair highreg:lowreg,
1834  // producing the quotient in lowreg and the remainder in highreg.
1835  // For most data types, to set up the instruction, the dividend is
1836  // copied into lowreg, and lowreg is sign-extended or zero-extended
1837  // into highreg. The exception is i8, where the dividend is defined
1838  // as a single register rather than a register pair, and we
1839  // therefore directly sign-extend or zero-extend the dividend into
1840  // lowreg, instead of copying, and ignore the highreg.
1841  const static struct DivRemEntry {
1842  // The following portion depends only on the data type.
1843  const TargetRegisterClass *RC;
1844  unsigned LowInReg; // low part of the register pair
1845  unsigned HighInReg; // high part of the register pair
1846  // The following portion depends on both the data type and the operation.
1847  struct DivRemResult {
1848  unsigned OpDivRem; // The specific DIV/IDIV opcode to use.
1849  unsigned OpSignExtend; // Opcode for sign-extending lowreg into
1850  // highreg, or copying a zero into highreg.
1851  unsigned OpCopy; // Opcode for copying dividend into lowreg, or
1852  // zero/sign-extending into lowreg for i8.
1853  unsigned DivRemResultReg; // Register containing the desired result.
1854  bool IsOpSigned; // Whether to use signed or unsigned form.
1855  } ResultTable[NumOps];
1856  } OpTable[NumTypes] = {
1857  { &X86::GR8RegClass, X86::AX, 0, {
1858  { X86::IDIV8r, 0, X86::MOVSX16rr8, X86::AL, S }, // SDiv
1859  { X86::IDIV8r, 0, X86::MOVSX16rr8, X86::AH, S }, // SRem
1860  { X86::DIV8r, 0, X86::MOVZX16rr8, X86::AL, U }, // UDiv
1861  { X86::DIV8r, 0, X86::MOVZX16rr8, X86::AH, U }, // URem
1862  }
1863  }, // i8
1864  { &X86::GR16RegClass, X86::AX, X86::DX, {
1865  { X86::IDIV16r, X86::CWD, Copy, X86::AX, S }, // SDiv
1866  { X86::IDIV16r, X86::CWD, Copy, X86::DX, S }, // SRem
1867  { X86::DIV16r, X86::MOV32r0, Copy, X86::AX, U }, // UDiv
1868  { X86::DIV16r, X86::MOV32r0, Copy, X86::DX, U }, // URem
1869  }
1870  }, // i16
1871  { &X86::GR32RegClass, X86::EAX, X86::EDX, {
1872  { X86::IDIV32r, X86::CDQ, Copy, X86::EAX, S }, // SDiv
1873  { X86::IDIV32r, X86::CDQ, Copy, X86::EDX, S }, // SRem
1874  { X86::DIV32r, X86::MOV32r0, Copy, X86::EAX, U }, // UDiv
1875  { X86::DIV32r, X86::MOV32r0, Copy, X86::EDX, U }, // URem
1876  }
1877  }, // i32
1878  { &X86::GR64RegClass, X86::RAX, X86::RDX, {
1879  { X86::IDIV64r, X86::CQO, Copy, X86::RAX, S }, // SDiv
1880  { X86::IDIV64r, X86::CQO, Copy, X86::RDX, S }, // SRem
1881  { X86::DIV64r, X86::MOV32r0, Copy, X86::RAX, U }, // UDiv
1882  { X86::DIV64r, X86::MOV32r0, Copy, X86::RDX, U }, // URem
1883  }
1884  }, // i64
1885  };
1886 
1887  MVT VT;
1888  if (!isTypeLegal(I->getType(), VT))
1889  return false;
1890 
1891  unsigned TypeIndex, OpIndex;
1892  switch (VT.SimpleTy) {
1893  default: return false;
1894  case MVT::i8: TypeIndex = 0; break;
1895  case MVT::i16: TypeIndex = 1; break;
1896  case MVT::i32: TypeIndex = 2; break;
1897  case MVT::i64: TypeIndex = 3;
1898  if (!Subtarget->is64Bit())
1899  return false;
1900  break;
1901  }
1902 
1903  switch (I->getOpcode()) {
1904  default: llvm_unreachable("Unexpected div/rem opcode");
1905  case Instruction::SDiv: OpIndex = 0; break;
1906  case Instruction::SRem: OpIndex = 1; break;
1907  case Instruction::UDiv: OpIndex = 2; break;
1908  case Instruction::URem: OpIndex = 3; break;
1909  }
1910 
1911  const DivRemEntry &TypeEntry = OpTable[TypeIndex];
1912  const DivRemEntry::DivRemResult &OpEntry = TypeEntry.ResultTable[OpIndex];
1913  unsigned Op0Reg = getRegForValue(I->getOperand(0));
1914  if (Op0Reg == 0)
1915  return false;
1916  unsigned Op1Reg = getRegForValue(I->getOperand(1));
1917  if (Op1Reg == 0)
1918  return false;
1919 
1920  // Move op0 into low-order input register.
1921  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1922  TII.get(OpEntry.OpCopy), TypeEntry.LowInReg).addReg(Op0Reg);
1923  // Zero-extend or sign-extend into high-order input register.
1924  if (OpEntry.OpSignExtend) {
1925  if (OpEntry.IsOpSigned)
1926  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1927  TII.get(OpEntry.OpSignExtend));
1928  else {
1929  unsigned Zero32 = createResultReg(&X86::GR32RegClass);
1930  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1931  TII.get(X86::MOV32r0), Zero32);
1932 
1933  // Copy the zero into the appropriate sub/super/identical physical
1934  // register. Unfortunately the operations needed are not uniform enough
1935  // to fit neatly into the table above.
1936  if (VT == MVT::i16) {
1937  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1938  TII.get(Copy), TypeEntry.HighInReg)
1939  .addReg(Zero32, 0, X86::sub_16bit);
1940  } else if (VT == MVT::i32) {
1941  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1942  TII.get(Copy), TypeEntry.HighInReg)
1943  .addReg(Zero32);
1944  } else if (VT == MVT::i64) {
1945  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1946  TII.get(TargetOpcode::SUBREG_TO_REG), TypeEntry.HighInReg)
1947  .addImm(0).addReg(Zero32).addImm(X86::sub_32bit);
1948  }
1949  }
1950  }
1951  // Generate the DIV/IDIV instruction.
1952  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1953  TII.get(OpEntry.OpDivRem)).addReg(Op1Reg);
1954  // For i8 remainder, we can't reference AH directly, as we'll end
1955  // up with bogus copies like %R9B = COPY %AH. Reference AX
1956  // instead to prevent AH references in a REX instruction.
1957  //
1958  // The current assumption of the fast register allocator is that isel
1959  // won't generate explicit references to the GPR8_NOREX registers. If
1960  // the allocator and/or the backend get enhanced to be more robust in
1961  // that regard, this can be, and should be, removed.
1962  unsigned ResultReg = 0;
1963  if ((I->getOpcode() == Instruction::SRem ||
1964  I->getOpcode() == Instruction::URem) &&
1965  OpEntry.DivRemResultReg == X86::AH && Subtarget->is64Bit()) {
1966  unsigned SourceSuperReg = createResultReg(&X86::GR16RegClass);
1967  unsigned ResultSuperReg = createResultReg(&X86::GR16RegClass);
1968  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1969  TII.get(Copy), SourceSuperReg).addReg(X86::AX);
1970 
1971  // Shift AX right by 8 bits instead of using AH.
1972  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::SHR16ri),
1973  ResultSuperReg).addReg(SourceSuperReg).addImm(8);
1974 
1975  // Now reference the 8-bit subreg of the result.
1976  ResultReg = fastEmitInst_extractsubreg(MVT::i8, ResultSuperReg,
1977  /*Kill=*/true, X86::sub_8bit);
1978  }
1979  // Copy the result out of the physreg if we haven't already.
1980  if (!ResultReg) {
1981  ResultReg = createResultReg(TypeEntry.RC);
1982  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Copy), ResultReg)
1983  .addReg(OpEntry.DivRemResultReg);
1984  }
1985  updateValueMap(I, ResultReg);
1986 
1987  return true;
1988 }
1989 
1990 /// \brief Emit a conditional move instruction (if the are supported) to lower
1991 /// the select.
1992 bool X86FastISel::X86FastEmitCMoveSelect(MVT RetVT, const Instruction *I) {
1993  // Check if the subtarget supports these instructions.
1994  if (!Subtarget->hasCMov())
1995  return false;
1996 
1997  // FIXME: Add support for i8.
1998  if (RetVT < MVT::i16 || RetVT > MVT::i64)
1999  return false;
2000 
2001  const Value *Cond = I->getOperand(0);
2002  const TargetRegisterClass *RC = TLI.getRegClassFor(RetVT);
2003  bool NeedTest = true;
2005 
2006  // Optimize conditions coming from a compare if both instructions are in the
2007  // same basic block (values defined in other basic blocks may not have
2008  // initialized registers).
2009  const auto *CI = dyn_cast<CmpInst>(Cond);
2010  if (CI && (CI->getParent() == I->getParent())) {
2011  CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
2012 
2013  // FCMP_OEQ and FCMP_UNE cannot be checked with a single instruction.
2014  static const uint16_t SETFOpcTable[2][3] = {
2015  { X86::SETNPr, X86::SETEr , X86::TEST8rr },
2016  { X86::SETPr, X86::SETNEr, X86::OR8rr }
2017  };
2018  const uint16_t *SETFOpc = nullptr;
2019  switch (Predicate) {
2020  default: break;
2021  case CmpInst::FCMP_OEQ:
2022  SETFOpc = &SETFOpcTable[0][0];
2023  Predicate = CmpInst::ICMP_NE;
2024  break;
2025  case CmpInst::FCMP_UNE:
2026  SETFOpc = &SETFOpcTable[1][0];
2027  Predicate = CmpInst::ICMP_NE;
2028  break;
2029  }
2030 
2031  bool NeedSwap;
2032  std::tie(CC, NeedSwap) = getX86ConditionCode(Predicate);
2033  assert(CC <= X86::LAST_VALID_COND && "Unexpected condition code.");
2034 
2035  const Value *CmpLHS = CI->getOperand(0);
2036  const Value *CmpRHS = CI->getOperand(1);
2037  if (NeedSwap)
2038  std::swap(CmpLHS, CmpRHS);
2039 
2040  EVT CmpVT = TLI.getValueType(DL, CmpLHS->getType());
2041  // Emit a compare of the LHS and RHS, setting the flags.
2042  if (!X86FastEmitCompare(CmpLHS, CmpRHS, CmpVT, CI->getDebugLoc()))
2043  return false;
2044 
2045  if (SETFOpc) {
2046  unsigned FlagReg1 = createResultReg(&X86::GR8RegClass);
2047  unsigned FlagReg2 = createResultReg(&X86::GR8RegClass);
2048  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(SETFOpc[0]),
2049  FlagReg1);
2050  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(SETFOpc[1]),
2051  FlagReg2);
2052  auto const &II = TII.get(SETFOpc[2]);
2053  if (II.getNumDefs()) {
2054  unsigned TmpReg = createResultReg(&X86::GR8RegClass);
2055  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, TmpReg)
2056  .addReg(FlagReg2).addReg(FlagReg1);
2057  } else {
2058  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
2059  .addReg(FlagReg2).addReg(FlagReg1);
2060  }
2061  }
2062  NeedTest = false;
2063  } else if (foldX86XALUIntrinsic(CC, I, Cond)) {
2064  // Fake request the condition, otherwise the intrinsic might be completely
2065  // optimized away.
2066  unsigned TmpReg = getRegForValue(Cond);
2067  if (TmpReg == 0)
2068  return false;
2069 
2070  NeedTest = false;
2071  }
2072 
2073  if (NeedTest) {
2074  // Selects operate on i1, however, CondReg is 8 bits width and may contain
2075  // garbage. Indeed, only the less significant bit is supposed to be
2076  // accurate. If we read more than the lsb, we may see non-zero values
2077  // whereas lsb is zero. Therefore, we have to truncate Op0Reg to i1 for
2078  // the select. This is achieved by performing TEST against 1.
2079  unsigned CondReg = getRegForValue(Cond);
2080  if (CondReg == 0)
2081  return false;
2082  bool CondIsKill = hasTrivialKill(Cond);
2083 
2084  // In case OpReg is a K register, COPY to a GPR
2085  if (MRI.getRegClass(CondReg) == &X86::VK1RegClass) {
2086  unsigned KCondReg = CondReg;
2087  CondReg = createResultReg(&X86::GR8RegClass);
2088  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2089  TII.get(TargetOpcode::COPY), CondReg)
2090  .addReg(KCondReg, getKillRegState(CondIsKill));
2091  }
2092  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::TEST8ri))
2093  .addReg(CondReg, getKillRegState(CondIsKill))
2094  .addImm(1);
2095  }
2096 
2097  const Value *LHS = I->getOperand(1);
2098  const Value *RHS = I->getOperand(2);
2099 
2100  unsigned RHSReg = getRegForValue(RHS);
2101  bool RHSIsKill = hasTrivialKill(RHS);
2102 
2103  unsigned LHSReg = getRegForValue(LHS);
2104  bool LHSIsKill = hasTrivialKill(LHS);
2105 
2106  if (!LHSReg || !RHSReg)
2107  return false;
2108 
2109  unsigned Opc = X86::getCMovFromCond(CC, RC->getSize());
2110  unsigned ResultReg = fastEmitInst_rr(Opc, RC, RHSReg, RHSIsKill,
2111  LHSReg, LHSIsKill);
2112  updateValueMap(I, ResultReg);
2113  return true;
2114 }
2115 
2116 /// \brief Emit SSE or AVX instructions to lower the select.
2117 ///
2118 /// Try to use SSE1/SSE2 instructions to simulate a select without branches.
2119 /// This lowers fp selects into a CMP/AND/ANDN/OR sequence when the necessary
2120 /// SSE instructions are available. If AVX is available, try to use a VBLENDV.
2121 bool X86FastISel::X86FastEmitSSESelect(MVT RetVT, const Instruction *I) {
2122  // Optimize conditions coming from a compare if both instructions are in the
2123  // same basic block (values defined in other basic blocks may not have
2124  // initialized registers).
2125  const auto *CI = dyn_cast<FCmpInst>(I->getOperand(0));
2126  if (!CI || (CI->getParent() != I->getParent()))
2127  return false;
2128 
2129  if (I->getType() != CI->getOperand(0)->getType() ||
2130  !((Subtarget->hasSSE1() && RetVT == MVT::f32) ||
2131  (Subtarget->hasSSE2() && RetVT == MVT::f64)))
2132  return false;
2133 
2134  const Value *CmpLHS = CI->getOperand(0);
2135  const Value *CmpRHS = CI->getOperand(1);
2136  CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
2137 
2138  // The optimizer might have replaced fcmp oeq %x, %x with fcmp ord %x, 0.0.
2139  // We don't have to materialize a zero constant for this case and can just use
2140  // %x again on the RHS.
2141  if (Predicate == CmpInst::FCMP_ORD || Predicate == CmpInst::FCMP_UNO) {
2142  const auto *CmpRHSC = dyn_cast<ConstantFP>(CmpRHS);
2143  if (CmpRHSC && CmpRHSC->isNullValue())
2144  CmpRHS = CmpLHS;
2145  }
2146 
2147  unsigned CC;
2148  bool NeedSwap;
2149  std::tie(CC, NeedSwap) = getX86SSEConditionCode(Predicate);
2150  if (CC > 7)
2151  return false;
2152 
2153  if (NeedSwap)
2154  std::swap(CmpLHS, CmpRHS);
2155 
2156  // Choose the SSE instruction sequence based on data type (float or double).
2157  static const uint16_t OpcTable[2][4] = {
2158  { X86::CMPSSrr, X86::ANDPSrr, X86::ANDNPSrr, X86::ORPSrr },
2159  { X86::CMPSDrr, X86::ANDPDrr, X86::ANDNPDrr, X86::ORPDrr }
2160  };
2161 
2162  const uint16_t *Opc = nullptr;
2163  switch (RetVT.SimpleTy) {
2164  default: return false;
2165  case MVT::f32: Opc = &OpcTable[0][0]; break;
2166  case MVT::f64: Opc = &OpcTable[1][0]; break;
2167  }
2168 
2169  const Value *LHS = I->getOperand(1);
2170  const Value *RHS = I->getOperand(2);
2171 
2172  unsigned LHSReg = getRegForValue(LHS);
2173  bool LHSIsKill = hasTrivialKill(LHS);
2174 
2175  unsigned RHSReg = getRegForValue(RHS);
2176  bool RHSIsKill = hasTrivialKill(RHS);
2177 
2178  unsigned CmpLHSReg = getRegForValue(CmpLHS);
2179  bool CmpLHSIsKill = hasTrivialKill(CmpLHS);
2180 
2181  unsigned CmpRHSReg = getRegForValue(CmpRHS);
2182  bool CmpRHSIsKill = hasTrivialKill(CmpRHS);
2183 
2184  if (!LHSReg || !RHSReg || !CmpLHS || !CmpRHS)
2185  return false;
2186 
2187  const TargetRegisterClass *RC = TLI.getRegClassFor(RetVT);
2188  unsigned ResultReg;
2189 
2190  if (Subtarget->hasAVX512()) {
2191  // If we have AVX512 we can use a mask compare and masked movss/sd.
2192  const TargetRegisterClass *VR128X = &X86::VR128XRegClass;
2193  const TargetRegisterClass *VK1 = &X86::VK1RegClass;
2194 
2195  unsigned CmpOpcode =
2196  (RetVT == MVT::f32) ? X86::VCMPSSZrr : X86::VCMPSDZrr;
2197  unsigned CmpReg = fastEmitInst_rri(CmpOpcode, VK1, CmpLHSReg, CmpLHSIsKill,
2198  CmpRHSReg, CmpRHSIsKill, CC);
2199 
2200  // Need an IMPLICIT_DEF for the input that is used to generate the upper
2201  // bits of the result register since its not based on any of the inputs.
2202  unsigned ImplicitDefReg = createResultReg(VR128X);
2203  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2204  TII.get(TargetOpcode::IMPLICIT_DEF), ImplicitDefReg);
2205 
2206  // Place RHSReg is the passthru of the masked movss/sd operation and put
2207  // LHS in the input. The mask input comes from the compare.
2208  unsigned MovOpcode =
2209  (RetVT == MVT::f32) ? X86::VMOVSSZrrk : X86::VMOVSDZrrk;
2210  unsigned MovReg = fastEmitInst_rrrr(MovOpcode, VR128X, RHSReg, RHSIsKill,
2211  CmpReg, true, ImplicitDefReg, true,
2212  LHSReg, LHSIsKill);
2213 
2214  ResultReg = createResultReg(RC);
2215  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2216  TII.get(TargetOpcode::COPY), ResultReg).addReg(MovReg);
2217 
2218  } else if (Subtarget->hasAVX()) {
2219  const TargetRegisterClass *VR128 = &X86::VR128RegClass;
2220 
2221  // If we have AVX, create 1 blendv instead of 3 logic instructions.
2222  // Blendv was introduced with SSE 4.1, but the 2 register form implicitly
2223  // uses XMM0 as the selection register. That may need just as many
2224  // instructions as the AND/ANDN/OR sequence due to register moves, so
2225  // don't bother.
2226  unsigned CmpOpcode =
2227  (RetVT == MVT::f32) ? X86::VCMPSSrr : X86::VCMPSDrr;
2228  unsigned BlendOpcode =
2229  (RetVT == MVT::f32) ? X86::VBLENDVPSrr : X86::VBLENDVPDrr;
2230 
2231  unsigned CmpReg = fastEmitInst_rri(CmpOpcode, RC, CmpLHSReg, CmpLHSIsKill,
2232  CmpRHSReg, CmpRHSIsKill, CC);
2233  unsigned VBlendReg = fastEmitInst_rrr(BlendOpcode, VR128, RHSReg, RHSIsKill,
2234  LHSReg, LHSIsKill, CmpReg, true);
2235  ResultReg = createResultReg(RC);
2236  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2237  TII.get(TargetOpcode::COPY), ResultReg).addReg(VBlendReg);
2238  } else {
2239  const TargetRegisterClass *VR128 = &X86::VR128RegClass;
2240  unsigned CmpReg = fastEmitInst_rri(Opc[0], RC, CmpLHSReg, CmpLHSIsKill,
2241  CmpRHSReg, CmpRHSIsKill, CC);
2242  unsigned AndReg = fastEmitInst_rr(Opc[1], VR128, CmpReg, /*IsKill=*/false,
2243  LHSReg, LHSIsKill);
2244  unsigned AndNReg = fastEmitInst_rr(Opc[2], VR128, CmpReg, /*IsKill=*/true,
2245  RHSReg, RHSIsKill);
2246  unsigned OrReg = fastEmitInst_rr(Opc[3], VR128, AndNReg, /*IsKill=*/true,
2247  AndReg, /*IsKill=*/true);
2248  ResultReg = createResultReg(RC);
2249  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2250  TII.get(TargetOpcode::COPY), ResultReg).addReg(OrReg);
2251  }
2252  updateValueMap(I, ResultReg);
2253  return true;
2254 }
2255 
2256 bool X86FastISel::X86FastEmitPseudoSelect(MVT RetVT, const Instruction *I) {
2257  // These are pseudo CMOV instructions and will be later expanded into control-
2258  // flow.
2259  unsigned Opc;
2260  switch (RetVT.SimpleTy) {
2261  default: return false;
2262  case MVT::i8: Opc = X86::CMOV_GR8; break;
2263  case MVT::i16: Opc = X86::CMOV_GR16; break;
2264  case MVT::i32: Opc = X86::CMOV_GR32; break;
2265  case MVT::f32: Opc = X86::CMOV_FR32; break;
2266  case MVT::f64: Opc = X86::CMOV_FR64; break;
2267  }
2268 
2269  const Value *Cond = I->getOperand(0);
2271 
2272  // Optimize conditions coming from a compare if both instructions are in the
2273  // same basic block (values defined in other basic blocks may not have
2274  // initialized registers).
2275  const auto *CI = dyn_cast<CmpInst>(Cond);
2276  if (CI && (CI->getParent() == I->getParent())) {
2277  bool NeedSwap;
2278  std::tie(CC, NeedSwap) = getX86ConditionCode(CI->getPredicate());
2279  if (CC > X86::LAST_VALID_COND)
2280  return false;
2281 
2282  const Value *CmpLHS = CI->getOperand(0);
2283  const Value *CmpRHS = CI->getOperand(1);
2284 
2285  if (NeedSwap)
2286  std::swap(CmpLHS, CmpRHS);
2287 
2288  EVT CmpVT = TLI.getValueType(DL, CmpLHS->getType());
2289  if (!X86FastEmitCompare(CmpLHS, CmpRHS, CmpVT, CI->getDebugLoc()))
2290  return false;
2291  } else {
2292  unsigned CondReg = getRegForValue(Cond);
2293  if (CondReg == 0)
2294  return false;
2295  bool CondIsKill = hasTrivialKill(Cond);
2296 
2297  // In case OpReg is a K register, COPY to a GPR
2298  if (MRI.getRegClass(CondReg) == &X86::VK1RegClass) {
2299  unsigned KCondReg = CondReg;
2300  CondReg = createResultReg(&X86::GR8RegClass);
2301  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2302  TII.get(TargetOpcode::COPY), CondReg)
2303  .addReg(KCondReg, getKillRegState(CondIsKill));
2304  }
2305  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::TEST8ri))
2306  .addReg(CondReg, getKillRegState(CondIsKill))
2307  .addImm(1);
2308  }
2309 
2310  const Value *LHS = I->getOperand(1);
2311  const Value *RHS = I->getOperand(2);
2312 
2313  unsigned LHSReg = getRegForValue(LHS);
2314  bool LHSIsKill = hasTrivialKill(LHS);
2315 
2316  unsigned RHSReg = getRegForValue(RHS);
2317  bool RHSIsKill = hasTrivialKill(RHS);
2318 
2319  if (!LHSReg || !RHSReg)
2320  return false;
2321 
2322  const TargetRegisterClass *RC = TLI.getRegClassFor(RetVT);
2323 
2324  unsigned ResultReg =
2325  fastEmitInst_rri(Opc, RC, RHSReg, RHSIsKill, LHSReg, LHSIsKill, CC);
2326  updateValueMap(I, ResultReg);
2327  return true;
2328 }
2329 
2330 bool X86FastISel::X86SelectSelect(const Instruction *I) {
2331  MVT RetVT;
2332  if (!isTypeLegal(I->getType(), RetVT))
2333  return false;
2334 
2335  // Check if we can fold the select.
2336  if (const auto *CI = dyn_cast<CmpInst>(I->getOperand(0))) {
2337  CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
2338  const Value *Opnd = nullptr;
2339  switch (Predicate) {
2340  default: break;
2341  case CmpInst::FCMP_FALSE: Opnd = I->getOperand(2); break;
2342  case CmpInst::FCMP_TRUE: Opnd = I->getOperand(1); break;
2343  }
2344  // No need for a select anymore - this is an unconditional move.
2345  if (Opnd) {
2346  unsigned OpReg = getRegForValue(Opnd);
2347  if (OpReg == 0)
2348  return false;
2349  bool OpIsKill = hasTrivialKill(Opnd);
2350  const TargetRegisterClass *RC = TLI.getRegClassFor(RetVT);
2351  unsigned ResultReg = createResultReg(RC);
2352  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2353  TII.get(TargetOpcode::COPY), ResultReg)
2354  .addReg(OpReg, getKillRegState(OpIsKill));
2355  updateValueMap(I, ResultReg);
2356  return true;
2357  }
2358  }
2359 
2360  // First try to use real conditional move instructions.
2361  if (X86FastEmitCMoveSelect(RetVT, I))
2362  return true;
2363 
2364  // Try to use a sequence of SSE instructions to simulate a conditional move.
2365  if (X86FastEmitSSESelect(RetVT, I))
2366  return true;
2367 
2368  // Fall-back to pseudo conditional move instructions, which will be later
2369  // converted to control-flow.
2370  if (X86FastEmitPseudoSelect(RetVT, I))
2371  return true;
2372 
2373  return false;
2374 }
2375 
2376 bool X86FastISel::X86SelectSIToFP(const Instruction *I) {
2377  // The target-independent selection algorithm in FastISel already knows how
2378  // to select a SINT_TO_FP if the target is SSE but not AVX.
2379  // Early exit if the subtarget doesn't have AVX.
2380  if (!Subtarget->hasAVX())
2381  return false;
2382 
2383  if (!I->getOperand(0)->getType()->isIntegerTy(32))
2384  return false;
2385 
2386  // Select integer to float/double conversion.
2387  unsigned OpReg = getRegForValue(I->getOperand(0));
2388  if (OpReg == 0)
2389  return false;
2390 
2391  const TargetRegisterClass *RC = nullptr;
2392  unsigned Opcode;
2393 
2394  if (I->getType()->isDoubleTy()) {
2395  // sitofp int -> double
2396  Opcode = X86::VCVTSI2SDrr;
2397  RC = &X86::FR64RegClass;
2398  } else if (I->getType()->isFloatTy()) {
2399  // sitofp int -> float
2400  Opcode = X86::VCVTSI2SSrr;
2401  RC = &X86::FR32RegClass;
2402  } else
2403  return false;
2404 
2405  unsigned ImplicitDefReg = createResultReg(RC);
2406  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2407  TII.get(TargetOpcode::IMPLICIT_DEF), ImplicitDefReg);
2408  unsigned ResultReg =
2409  fastEmitInst_rr(Opcode, RC, ImplicitDefReg, true, OpReg, false);
2410  updateValueMap(I, ResultReg);
2411  return true;
2412 }
2413 
2414 // Helper method used by X86SelectFPExt and X86SelectFPTrunc.
2415 bool X86FastISel::X86SelectFPExtOrFPTrunc(const Instruction *I,
2416  unsigned TargetOpc,
2417  const TargetRegisterClass *RC) {
2418  assert((I->getOpcode() == Instruction::FPExt ||
2419  I->getOpcode() == Instruction::FPTrunc) &&
2420  "Instruction must be an FPExt or FPTrunc!");
2421 
2422  unsigned OpReg = getRegForValue(I->getOperand(0));
2423  if (OpReg == 0)
2424  return false;
2425 
2426  unsigned ResultReg = createResultReg(RC);
2427  MachineInstrBuilder MIB;
2428  MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpc),
2429  ResultReg);
2430  if (Subtarget->hasAVX())
2431  MIB.addReg(OpReg);
2432  MIB.addReg(OpReg);
2433  updateValueMap(I, ResultReg);
2434  return true;
2435 }
2436 
2437 bool X86FastISel::X86SelectFPExt(const Instruction *I) {
2438  if (X86ScalarSSEf64 && I->getType()->isDoubleTy() &&
2439  I->getOperand(0)->getType()->isFloatTy()) {
2440  // fpext from float to double.
2441  unsigned Opc = Subtarget->hasAVX() ? X86::VCVTSS2SDrr : X86::CVTSS2SDrr;
2442  return X86SelectFPExtOrFPTrunc(I, Opc, &X86::FR64RegClass);
2443  }
2444 
2445  return false;
2446 }
2447 
2448 bool X86FastISel::X86SelectFPTrunc(const Instruction *I) {
2449  if (X86ScalarSSEf64 && I->getType()->isFloatTy() &&
2450  I->getOperand(0)->getType()->isDoubleTy()) {
2451  // fptrunc from double to float.
2452  unsigned Opc = Subtarget->hasAVX() ? X86::VCVTSD2SSrr : X86::CVTSD2SSrr;
2453  return X86SelectFPExtOrFPTrunc(I, Opc, &X86::FR32RegClass);
2454  }
2455 
2456  return false;
2457 }
2458 
2459 bool X86FastISel::X86SelectTrunc(const Instruction *I) {
2460  EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType());
2461  EVT DstVT = TLI.getValueType(DL, I->getType());
2462 
2463  // This code only handles truncation to byte.
2464  if (DstVT != MVT::i8 && DstVT != MVT::i1)
2465  return false;
2466  if (!TLI.isTypeLegal(SrcVT))
2467  return false;
2468 
2469  unsigned InputReg = getRegForValue(I->getOperand(0));
2470  if (!InputReg)
2471  // Unhandled operand. Halt "fast" selection and bail.
2472  return false;
2473 
2474  if (SrcVT == MVT::i8) {
2475  // Truncate from i8 to i1; no code needed.
2476  updateValueMap(I, InputReg);
2477  return true;
2478  }
2479 
2480  bool KillInputReg = false;
2481  if (!Subtarget->is64Bit()) {
2482  // If we're on x86-32; we can't extract an i8 from a general register.
2483  // First issue a copy to GR16_ABCD or GR32_ABCD.
2484  const TargetRegisterClass *CopyRC =
2485  (SrcVT == MVT::i16) ? &X86::GR16_ABCDRegClass : &X86::GR32_ABCDRegClass;
2486  unsigned CopyReg = createResultReg(CopyRC);
2487  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2488  TII.get(TargetOpcode::COPY), CopyReg).addReg(InputReg);
2489  InputReg = CopyReg;
2490  KillInputReg = true;
2491  }
2492 
2493  // Issue an extract_subreg.
2494  unsigned ResultReg = fastEmitInst_extractsubreg(MVT::i8,
2495  InputReg, KillInputReg,
2496  X86::sub_8bit);
2497  if (!ResultReg)
2498  return false;
2499 
2500  updateValueMap(I, ResultReg);
2501  return true;
2502 }
2503 
2504 bool X86FastISel::IsMemcpySmall(uint64_t Len) {
2505  return Len <= (Subtarget->is64Bit() ? 32 : 16);
2506 }
2507 
2508 bool X86FastISel::TryEmitSmallMemcpy(X86AddressMode DestAM,
2509  X86AddressMode SrcAM, uint64_t Len) {
2510 
2511  // Make sure we don't bloat code by inlining very large memcpy's.
2512  if (!IsMemcpySmall(Len))
2513  return false;
2514 
2515  bool i64Legal = Subtarget->is64Bit();
2516 
2517  // We don't care about alignment here since we just emit integer accesses.
2518  while (Len) {
2519  MVT VT;
2520  if (Len >= 8 && i64Legal)
2521  VT = MVT::i64;
2522  else if (Len >= 4)
2523  VT = MVT::i32;
2524  else if (Len >= 2)
2525  VT = MVT::i16;
2526  else
2527  VT = MVT::i8;
2528 
2529  unsigned Reg;
2530  bool RV = X86FastEmitLoad(VT, SrcAM, nullptr, Reg);
2531  RV &= X86FastEmitStore(VT, Reg, /*Kill=*/true, DestAM);
2532  assert(RV && "Failed to emit load or store??");
2533 
2534  unsigned Size = VT.getSizeInBits()/8;
2535  Len -= Size;
2536  DestAM.Disp += Size;
2537  SrcAM.Disp += Size;
2538  }
2539 
2540  return true;
2541 }
2542 
2543 bool X86FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
2544  // FIXME: Handle more intrinsics.
2545  switch (II->getIntrinsicID()) {
2546  default: return false;
2547  case Intrinsic::convert_from_fp16:
2548  case Intrinsic::convert_to_fp16: {
2549  if (Subtarget->useSoftFloat() || !Subtarget->hasF16C())
2550  return false;
2551 
2552  const Value *Op = II->getArgOperand(0);
2553  unsigned InputReg = getRegForValue(Op);
2554  if (InputReg == 0)
2555  return false;
2556 
2557  // F16C only allows converting from float to half and from half to float.
2558  bool IsFloatToHalf = II->getIntrinsicID() == Intrinsic::convert_to_fp16;
2559  if (IsFloatToHalf) {
2560  if (!Op->getType()->isFloatTy())
2561  return false;
2562  } else {
2563  if (!II->getType()->isFloatTy())
2564  return false;
2565  }
2566 
2567  unsigned ResultReg = 0;
2568  const TargetRegisterClass *RC = TLI.getRegClassFor(MVT::v8i16);
2569  if (IsFloatToHalf) {
2570  // 'InputReg' is implicitly promoted from register class FR32 to
2571  // register class VR128 by method 'constrainOperandRegClass' which is
2572  // directly called by 'fastEmitInst_ri'.
2573  // Instruction VCVTPS2PHrr takes an extra immediate operand which is
2574  // used to provide rounding control: use MXCSR.RC, encoded as 0b100.
2575  // It's consistent with the other FP instructions, which are usually
2576  // controlled by MXCSR.
2577  InputReg = fastEmitInst_ri(X86::VCVTPS2PHrr, RC, InputReg, false, 4);
2578 
2579  // Move the lower 32-bits of ResultReg to another register of class GR32.
2580  ResultReg = createResultReg(&X86::GR32RegClass);
2581  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2582  TII.get(X86::VMOVPDI2DIrr), ResultReg)
2583  .addReg(InputReg, RegState::Kill);
2584 
2585  // The result value is in the lower 16-bits of ResultReg.
2586  unsigned RegIdx = X86::sub_16bit;
2587  ResultReg = fastEmitInst_extractsubreg(MVT::i16, ResultReg, true, RegIdx);
2588  } else {
2589  assert(Op->getType()->isIntegerTy(16) && "Expected a 16-bit integer!");
2590  // Explicitly sign-extend the input to 32-bit.
2591  InputReg = fastEmit_r(MVT::i16, MVT::i32, ISD::SIGN_EXTEND, InputReg,
2592  /*Kill=*/false);
2593 
2594  // The following SCALAR_TO_VECTOR will be expanded into a VMOVDI2PDIrr.
2595  InputReg = fastEmit_r(MVT::i32, MVT::v4i32, ISD::SCALAR_TO_VECTOR,
2596  InputReg, /*Kill=*/true);
2597 
2598  InputReg = fastEmitInst_r(X86::VCVTPH2PSrr, RC, InputReg, /*Kill=*/true);
2599 
2600  // The result value is in the lower 32-bits of ResultReg.
2601  // Emit an explicit copy from register class VR128 to register class FR32.
2602  ResultReg = createResultReg(&X86::FR32RegClass);
2603  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2604  TII.get(TargetOpcode::COPY), ResultReg)
2605  .addReg(InputReg, RegState::Kill);
2606  }
2607 
2608  updateValueMap(II, ResultReg);
2609  return true;
2610  }
2611  case Intrinsic::frameaddress: {
2612  MachineFunction *MF = FuncInfo.MF;
2613  if (MF->getTarget().getMCAsmInfo()->usesWindowsCFI())
2614  return false;
2615 
2616  Type *RetTy = II->getCalledFunction()->getReturnType();
2617 
2618  MVT VT;
2619  if (!isTypeLegal(RetTy, VT))
2620  return false;
2621 
2622  unsigned Opc;
2623  const TargetRegisterClass *RC = nullptr;
2624 
2625  switch (VT.SimpleTy) {
2626  default: llvm_unreachable("Invalid result type for frameaddress.");
2627  case MVT::i32: Opc = X86::MOV32rm; RC = &X86::GR32RegClass; break;
2628  case MVT::i64: Opc = X86::MOV64rm; RC = &X86::GR64RegClass; break;
2629  }
2630 
2631  // This needs to be set before we call getPtrSizedFrameRegister, otherwise
2632  // we get the wrong frame register.
2633  MachineFrameInfo &MFI = MF->getFrameInfo();
2634  MFI.setFrameAddressIsTaken(true);
2635 
2636  const X86RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
2637  unsigned FrameReg = RegInfo->getPtrSizedFrameRegister(*MF);
2638  assert(((FrameReg == X86::RBP && VT == MVT::i64) ||
2639  (FrameReg == X86::EBP && VT == MVT::i32)) &&
2640  "Invalid Frame Register!");
2641 
2642  // Always make a copy of the frame register to to a vreg first, so that we
2643  // never directly reference the frame register (the TwoAddressInstruction-
2644  // Pass doesn't like that).
2645  unsigned SrcReg = createResultReg(RC);
2646  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2647  TII.get(TargetOpcode::COPY), SrcReg).addReg(FrameReg);
2648 
2649  // Now recursively load from the frame address.
2650  // movq (%rbp), %rax
2651  // movq (%rax), %rax
2652  // movq (%rax), %rax
2653  // ...
2654  unsigned DestReg;
2655  unsigned Depth = cast<ConstantInt>(II->getOperand(0))->getZExtValue();
2656  while (Depth--) {
2657  DestReg = createResultReg(RC);
2658  addDirectMem(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2659  TII.get(Opc), DestReg), SrcReg);
2660  SrcReg = DestReg;
2661  }
2662 
2663  updateValueMap(II, SrcReg);
2664  return true;
2665  }
2666  case Intrinsic::memcpy: {
2667  const MemCpyInst *MCI = cast<MemCpyInst>(II);
2668  // Don't handle volatile or variable length memcpys.
2669  if (MCI->isVolatile())
2670  return false;
2671 
2672  if (isa<ConstantInt>(MCI->getLength())) {
2673  // Small memcpy's are common enough that we want to do them
2674  // without a call if possible.
2675  uint64_t Len = cast<ConstantInt>(MCI->getLength())->getZExtValue();
2676  if (IsMemcpySmall(Len)) {
2677  X86AddressMode DestAM, SrcAM;
2678  if (!X86SelectAddress(MCI->getRawDest(), DestAM) ||
2679  !X86SelectAddress(MCI->getRawSource(), SrcAM))
2680  return false;
2681  TryEmitSmallMemcpy(DestAM, SrcAM, Len);
2682  return true;
2683  }
2684  }
2685 
2686  unsigned SizeWidth = Subtarget->is64Bit() ? 64 : 32;
2687  if (!MCI->getLength()->getType()->isIntegerTy(SizeWidth))
2688  return false;
2689 
2690  if (MCI->getSourceAddressSpace() > 255 || MCI->getDestAddressSpace() > 255)
2691  return false;
2692 
2693  return lowerCallTo(II, "memcpy", II->getNumArgOperands() - 2);
2694  }
2695  case Intrinsic::memset: {
2696  const MemSetInst *MSI = cast<MemSetInst>(II);
2697 
2698  if (MSI->isVolatile())
2699  return false;
2700 
2701  unsigned SizeWidth = Subtarget->is64Bit() ? 64 : 32;
2702  if (!MSI->getLength()->getType()->isIntegerTy(SizeWidth))
2703  return false;
2704 
2705  if (MSI->getDestAddressSpace() > 255)
2706  return false;
2707 
2708  return lowerCallTo(II, "memset", II->getNumArgOperands() - 2);
2709  }
2710  case Intrinsic::stackprotector: {
2711  // Emit code to store the stack guard onto the stack.
2712  EVT PtrTy = TLI.getPointerTy(DL);
2713 
2714  const Value *Op1 = II->getArgOperand(0); // The guard's value.
2715  const AllocaInst *Slot = cast<AllocaInst>(II->getArgOperand(1));
2716 
2717  MFI.setStackProtectorIndex(FuncInfo.StaticAllocaMap[Slot]);
2718 
2719  // Grab the frame index.
2720  X86AddressMode AM;
2721  if (!X86SelectAddress(Slot, AM)) return false;
2722  if (!X86FastEmitStore(PtrTy, Op1, AM)) return false;
2723  return true;
2724  }
2725  case Intrinsic::dbg_declare: {
2726  const DbgDeclareInst *DI = cast<DbgDeclareInst>(II);
2727  X86AddressMode AM;
2728  assert(DI->getAddress() && "Null address should be checked earlier!");
2729  if (!X86SelectAddress(DI->getAddress(), AM))
2730  return false;
2731  const MCInstrDesc &II = TII.get(TargetOpcode::DBG_VALUE);
2732  // FIXME may need to add RegState::Debug to any registers produced,
2733  // although ESP/EBP should be the only ones at the moment.
2735  "Expected inlined-at fields to agree");
2736  addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II), AM)
2737  .addImm(0)
2738  .addMetadata(DI->getVariable())
2739  .addMetadata(DI->getExpression());
2740  return true;
2741  }
2742  case Intrinsic::trap: {
2743  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::TRAP));
2744  return true;
2745  }
2746  case Intrinsic::sqrt: {
2747  if (!Subtarget->hasSSE1())
2748  return false;
2749 
2750  Type *RetTy = II->getCalledFunction()->getReturnType();
2751 
2752  MVT VT;
2753  if (!isTypeLegal(RetTy, VT))
2754  return false;
2755 
2756  // Unfortunately we can't use fastEmit_r, because the AVX version of FSQRT
2757  // is not generated by FastISel yet.
2758  // FIXME: Update this code once tablegen can handle it.
2759  static const uint16_t SqrtOpc[2][2] = {
2760  {X86::SQRTSSr, X86::VSQRTSSr},
2761  {X86::SQRTSDr, X86::VSQRTSDr}
2762  };
2763  bool HasAVX = Subtarget->hasAVX();
2764  unsigned Opc;
2765  const TargetRegisterClass *RC;
2766  switch (VT.SimpleTy) {
2767  default: return false;
2768  case MVT::f32: Opc = SqrtOpc[0][HasAVX]; RC = &X86::FR32RegClass; break;
2769  case MVT::f64: Opc = SqrtOpc[1][HasAVX]; RC = &X86::FR64RegClass; break;
2770  }
2771 
2772  const Value *SrcVal = II->getArgOperand(0);
2773  unsigned SrcReg = getRegForValue(SrcVal);
2774 
2775  if (SrcReg == 0)
2776  return false;
2777 
2778  unsigned ImplicitDefReg = 0;
2779  if (HasAVX) {
2780  ImplicitDefReg = createResultReg(RC);
2781  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2782  TII.get(TargetOpcode::IMPLICIT_DEF), ImplicitDefReg);
2783  }
2784 
2785  unsigned ResultReg = createResultReg(RC);
2786  MachineInstrBuilder MIB;
2787  MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc),
2788  ResultReg);
2789 
2790  if (ImplicitDefReg)
2791  MIB.addReg(ImplicitDefReg);
2792 
2793  MIB.addReg(SrcReg);
2794 
2795  updateValueMap(II, ResultReg);
2796  return true;
2797  }
2798  case Intrinsic::sadd_with_overflow:
2799  case Intrinsic::uadd_with_overflow:
2800  case Intrinsic::ssub_with_overflow:
2801  case Intrinsic::usub_with_overflow:
2802  case Intrinsic::smul_with_overflow:
2803  case Intrinsic::umul_with_overflow: {
2804  // This implements the basic lowering of the xalu with overflow intrinsics
2805  // into add/sub/mul followed by either seto or setb.
2806  const Function *Callee = II->getCalledFunction();
2807  auto *Ty = cast<StructType>(Callee->getReturnType());
2808  Type *RetTy = Ty->getTypeAtIndex(0U);
2809  assert(Ty->getTypeAtIndex(1)->isIntegerTy() &&
2810  Ty->getTypeAtIndex(1)->getScalarSizeInBits() == 1 &&
2811  "Overflow value expected to be an i1");
2812 
2813  MVT VT;
2814  if (!isTypeLegal(RetTy, VT))
2815  return false;
2816 
2817  if (VT < MVT::i8 || VT > MVT::i64)
2818  return false;
2819 
2820  const Value *LHS = II->getArgOperand(0);
2821  const Value *RHS = II->getArgOperand(1);
2822 
2823  // Canonicalize immediate to the RHS.
2824  if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS) &&
2825  isCommutativeIntrinsic(II))
2826  std::swap(LHS, RHS);
2827 
2828  bool UseIncDec = false;
2829  if (isa<ConstantInt>(RHS) && cast<ConstantInt>(RHS)->isOne())
2830  UseIncDec = true;
2831 
2832  unsigned BaseOpc, CondOpc;
2833  switch (II->getIntrinsicID()) {
2834  default: llvm_unreachable("Unexpected intrinsic!");
2835  case Intrinsic::sadd_with_overflow:
2836  BaseOpc = UseIncDec ? unsigned(X86ISD::INC) : unsigned(ISD::ADD);
2837  CondOpc = X86::SETOr;
2838  break;
2839  case Intrinsic::uadd_with_overflow:
2840  BaseOpc = ISD::ADD; CondOpc = X86::SETBr; break;
2841  case Intrinsic::ssub_with_overflow:
2842  BaseOpc = UseIncDec ? unsigned(X86ISD::DEC) : unsigned(ISD::SUB);
2843  CondOpc = X86::SETOr;
2844  break;
2845  case Intrinsic::usub_with_overflow:
2846  BaseOpc = ISD::SUB; CondOpc = X86::SETBr; break;
2847  case Intrinsic::smul_with_overflow:
2848  BaseOpc = X86ISD::SMUL; CondOpc = X86::SETOr; break;
2849  case Intrinsic::umul_with_overflow:
2850  BaseOpc = X86ISD::UMUL; CondOpc = X86::SETOr; break;
2851  }
2852 
2853  unsigned LHSReg = getRegForValue(LHS);
2854  if (LHSReg == 0)
2855  return false;
2856  bool LHSIsKill = hasTrivialKill(LHS);
2857 
2858  unsigned ResultReg = 0;
2859  // Check if we have an immediate version.
2860  if (const auto *CI = dyn_cast<ConstantInt>(RHS)) {
2861  static const uint16_t Opc[2][4] = {
2862  { X86::INC8r, X86::INC16r, X86::INC32r, X86::INC64r },
2863  { X86::DEC8r, X86::DEC16r, X86::DEC32r, X86::DEC64r }
2864  };
2865 
2866  if (BaseOpc == X86ISD::INC || BaseOpc == X86ISD::DEC) {
2867  ResultReg = createResultReg(TLI.getRegClassFor(VT));
2868  bool IsDec = BaseOpc == X86ISD::DEC;
2869  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2870  TII.get(Opc[IsDec][VT.SimpleTy-MVT::i8]), ResultReg)
2871  .addReg(LHSReg, getKillRegState(LHSIsKill));
2872  } else
2873  ResultReg = fastEmit_ri(VT, VT, BaseOpc, LHSReg, LHSIsKill,
2874  CI->getZExtValue());
2875  }
2876 
2877  unsigned RHSReg;
2878  bool RHSIsKill;
2879  if (!ResultReg) {
2880  RHSReg = getRegForValue(RHS);
2881  if (RHSReg == 0)
2882  return false;
2883  RHSIsKill = hasTrivialKill(RHS);
2884  ResultReg = fastEmit_rr(VT, VT, BaseOpc, LHSReg, LHSIsKill, RHSReg,
2885  RHSIsKill);
2886  }
2887 
2888  // FastISel doesn't have a pattern for all X86::MUL*r and X86::IMUL*r. Emit
2889  // it manually.
2890  if (BaseOpc == X86ISD::UMUL && !ResultReg) {
2891  static const uint16_t MULOpc[] =
2892  { X86::MUL8r, X86::MUL16r, X86::MUL32r, X86::MUL64r };
2893  static const MCPhysReg Reg[] = { X86::AL, X86::AX, X86::EAX, X86::RAX };
2894  // First copy the first operand into RAX, which is an implicit input to
2895  // the X86::MUL*r instruction.
2896  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2897  TII.get(TargetOpcode::COPY), Reg[VT.SimpleTy-MVT::i8])
2898  .addReg(LHSReg, getKillRegState(LHSIsKill));
2899  ResultReg = fastEmitInst_r(MULOpc[VT.SimpleTy-MVT::i8],
2900  TLI.getRegClassFor(VT), RHSReg, RHSIsKill);
2901  } else if (BaseOpc == X86ISD::SMUL && !ResultReg) {
2902  static const uint16_t MULOpc[] =
2903  { X86::IMUL8r, X86::IMUL16rr, X86::IMUL32rr, X86::IMUL64rr };
2904  if (VT == MVT::i8) {
2905  // Copy the first operand into AL, which is an implicit input to the
2906  // X86::IMUL8r instruction.
2907  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2908  TII.get(TargetOpcode::COPY), X86::AL)
2909  .addReg(LHSReg, getKillRegState(LHSIsKill));
2910  ResultReg = fastEmitInst_r(MULOpc[0], TLI.getRegClassFor(VT), RHSReg,
2911  RHSIsKill);
2912  } else
2913  ResultReg = fastEmitInst_rr(MULOpc[VT.SimpleTy-MVT::i8],
2914  TLI.getRegClassFor(VT), LHSReg, LHSIsKill,
2915  RHSReg, RHSIsKill);
2916  }
2917 
2918  if (!ResultReg)
2919  return false;
2920 
2921  // Assign to a GPR since the overflow return value is lowered to a SETcc.
2922  unsigned ResultReg2 = createResultReg(&X86::GR8RegClass);
2923  assert((ResultReg+1) == ResultReg2 && "Nonconsecutive result registers.");
2924  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CondOpc),
2925  ResultReg2);
2926 
2927  updateValueMap(II, ResultReg, 2);
2928  return true;
2929  }
2930  case Intrinsic::x86_sse_cvttss2si:
2931  case Intrinsic::x86_sse_cvttss2si64:
2932  case Intrinsic::x86_sse2_cvttsd2si:
2933  case Intrinsic::x86_sse2_cvttsd2si64: {
2934  bool IsInputDouble;
2935  switch (II->getIntrinsicID()) {
2936  default: llvm_unreachable("Unexpected intrinsic.");
2937  case Intrinsic::x86_sse_cvttss2si:
2938  case Intrinsic::x86_sse_cvttss2si64:
2939  if (!Subtarget->hasSSE1())
2940  return false;
2941  IsInputDouble = false;
2942  break;
2943  case Intrinsic::x86_sse2_cvttsd2si:
2944  case Intrinsic::x86_sse2_cvttsd2si64:
2945  if (!Subtarget->hasSSE2())
2946  return false;
2947  IsInputDouble = true;
2948  break;
2949  }
2950 
2951  Type *RetTy = II->getCalledFunction()->getReturnType();
2952  MVT VT;
2953  if (!isTypeLegal(RetTy, VT))
2954  return false;
2955 
2956  static const uint16_t CvtOpc[2][2][2] = {
2957  { { X86::CVTTSS2SIrr, X86::VCVTTSS2SIrr },
2958  { X86::CVTTSS2SI64rr, X86::VCVTTSS2SI64rr } },
2959  { { X86::CVTTSD2SIrr, X86::VCVTTSD2SIrr },
2960  { X86::CVTTSD2SI64rr, X86::VCVTTSD2SI64rr } }
2961  };
2962  bool HasAVX = Subtarget->hasAVX();
2963  unsigned Opc;
2964  switch (VT.SimpleTy) {
2965  default: llvm_unreachable("Unexpected result type.");
2966  case MVT::i32: Opc = CvtOpc[IsInputDouble][0][HasAVX]; break;
2967  case MVT::i64: Opc = CvtOpc[IsInputDouble][1][HasAVX]; break;
2968  }
2969 
2970  // Check if we can fold insertelement instructions into the convert.
2971  const Value *Op = II->getArgOperand(0);
2972  while (auto *IE = dyn_cast<InsertElementInst>(Op)) {
2973  const Value *Index = IE->getOperand(2);
2974  if (!isa<ConstantInt>(Index))
2975  break;
2976  unsigned Idx = cast<ConstantInt>(Index)->getZExtValue();
2977 
2978  if (Idx == 0) {
2979  Op = IE->getOperand(1);
2980  break;
2981  }
2982  Op = IE->getOperand(0);
2983  }
2984 
2985  unsigned Reg = getRegForValue(Op);
2986  if (Reg == 0)
2987  return false;
2988 
2989  unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
2990  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
2991  .addReg(Reg);
2992 
2993  updateValueMap(II, ResultReg);
2994  return true;
2995  }
2996  }
2997 }
2998 
2999 bool X86FastISel::fastLowerArguments() {
3000  if (!FuncInfo.CanLowerReturn)
3001  return false;
3002 
3003  const Function *F = FuncInfo.Fn;
3004  if (F->isVarArg())
3005  return false;
3006 
3007  CallingConv::ID CC = F->getCallingConv();
3008  if (CC != CallingConv::C)
3009  return false;
3010 
3011  if (Subtarget->isCallingConvWin64(CC))
3012  return false;
3013 
3014  if (!Subtarget->is64Bit())
3015  return false;
3016 
3017  // Only handle simple cases. i.e. Up to 6 i32/i64 scalar arguments.
3018  unsigned GPRCnt = 0;
3019  unsigned FPRCnt = 0;
3020  unsigned Idx = 0;
3021  for (auto const &Arg : F->args()) {
3022  // The first argument is at index 1.
3023  ++Idx;
3024  if (F->getAttributes().hasAttribute(Idx, Attribute::ByVal) ||
3025  F->getAttributes().hasAttribute(Idx, Attribute::InReg) ||
3026  F->getAttributes().hasAttribute(Idx, Attribute::StructRet) ||
3027  F->getAttributes().hasAttribute(Idx, Attribute::SwiftSelf) ||
3028  F->getAttributes().hasAttribute(Idx, Attribute::SwiftError) ||
3029  F->getAttributes().hasAttribute(Idx, Attribute::Nest))
3030  return false;
3031 
3032  Type *ArgTy = Arg.getType();
3033  if (ArgTy->isStructTy() || ArgTy->isArrayTy() || ArgTy->isVectorTy())
3034  return false;
3035 
3036  EVT ArgVT = TLI.getValueType(DL, ArgTy);
3037  if (!ArgVT.isSimple()) return false;
3038  switch (ArgVT.getSimpleVT().SimpleTy) {
3039  default: return false;
3040  case MVT::i32:
3041  case MVT::i64:
3042  ++GPRCnt;
3043  break;
3044  case MVT::f32:
3045  case MVT::f64:
3046  if (!Subtarget->hasSSE1())
3047  return false;
3048  ++FPRCnt;
3049  break;
3050  }
3051 
3052  if (GPRCnt > 6)
3053  return false;
3054 
3055  if (FPRCnt > 8)
3056  return false;
3057  }
3058 
3059  static const MCPhysReg GPR32ArgRegs[] = {
3060  X86::EDI, X86::ESI, X86::EDX, X86::ECX, X86::R8D, X86::R9D
3061  };
3062  static const MCPhysReg GPR64ArgRegs[] = {
3063  X86::RDI, X86::RSI, X86::RDX, X86::RCX, X86::R8 , X86::R9
3064  };
3065  static const MCPhysReg XMMArgRegs[] = {
3066  X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
3067  X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
3068  };
3069 
3070  unsigned GPRIdx = 0;
3071  unsigned FPRIdx = 0;
3072  for (auto const &Arg : F->args()) {
3073  MVT VT = TLI.getSimpleValueType(DL, Arg.getType());
3074  const TargetRegisterClass *RC = TLI.getRegClassFor(VT);
3075  unsigned SrcReg;
3076  switch (VT.SimpleTy) {
3077  default: llvm_unreachable("Unexpected value type.");
3078  case MVT::i32: SrcReg = GPR32ArgRegs[GPRIdx++]; break;
3079  case MVT::i64: SrcReg = GPR64ArgRegs[GPRIdx++]; break;
3080  case MVT::f32: LLVM_FALLTHROUGH;
3081  case MVT::f64: SrcReg = XMMArgRegs[FPRIdx++]; break;
3082  }
3083  unsigned DstReg = FuncInfo.MF->addLiveIn(SrcReg, RC);
3084  // FIXME: Unfortunately it's necessary to emit a copy from the livein copy.
3085  // Without this, EmitLiveInCopies may eliminate the livein if its only
3086  // use is a bitcast (which isn't turned into an instruction).
3087  unsigned ResultReg = createResultReg(RC);
3088  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3089  TII.get(TargetOpcode::COPY), ResultReg)
3090  .addReg(DstReg, getKillRegState(true));
3091  updateValueMap(&Arg, ResultReg);
3092  }
3093  return true;
3094 }
3095 
3096 static unsigned computeBytesPoppedByCalleeForSRet(const X86Subtarget *Subtarget,
3097  CallingConv::ID CC,
3098  ImmutableCallSite *CS) {
3099  if (Subtarget->is64Bit())
3100  return 0;
3101  if (Subtarget->getTargetTriple().isOSMSVCRT())
3102  return 0;
3103  if (CC == CallingConv::Fast || CC == CallingConv::GHC ||
3104  CC == CallingConv::HiPE)
3105  return 0;
3106 
3107  if (CS)
3108  if (CS->arg_empty() || !CS->paramHasAttr(1, Attribute::StructRet) ||
3109  CS->paramHasAttr(1, Attribute::InReg) || Subtarget->isTargetMCU())
3110  return 0;
3111 
3112  return 4;
3113 }
3114 
3115 bool X86FastISel::fastLowerCall(CallLoweringInfo &CLI) {
3116  auto &OutVals = CLI.OutVals;
3117  auto &OutFlags = CLI.OutFlags;
3118  auto &OutRegs = CLI.OutRegs;
3119  auto &Ins = CLI.Ins;
3120  auto &InRegs = CLI.InRegs;
3121  CallingConv::ID CC = CLI.CallConv;
3122  bool &IsTailCall = CLI.IsTailCall;
3123  bool IsVarArg = CLI.IsVarArg;
3124  const Value *Callee = CLI.Callee;
3125  MCSymbol *Symbol = CLI.Symbol;
3126 
3127  bool Is64Bit = Subtarget->is64Bit();
3128  bool IsWin64 = Subtarget->isCallingConvWin64(CC);
3129 
3130  // Handle only C, fastcc, and webkit_js calling conventions for now.
3131  switch (CC) {
3132  default: return false;
3133  case CallingConv::C:
3134  case CallingConv::Fast:
3136  case CallingConv::Swift:
3142  break;
3143  }
3144 
3145  // Allow SelectionDAG isel to handle tail calls.
3146  if (IsTailCall)
3147  return false;
3148 
3149  // fastcc with -tailcallopt is intended to provide a guaranteed
3150  // tail call optimization. Fastisel doesn't know how to do that.
3151  if (CC == CallingConv::Fast && TM.Options.GuaranteedTailCallOpt)
3152  return false;
3153 
3154  // Don't know how to handle Win64 varargs yet. Nothing special needed for
3155  // x86-32. Special handling for x86-64 is implemented.
3156  if (IsVarArg && IsWin64)
3157  return false;
3158 
3159  // Don't know about inalloca yet.
3160  if (CLI.CS && CLI.CS->hasInAllocaArgument())
3161  return false;
3162 
3163  for (auto Flag : CLI.OutFlags)
3164  if (Flag.isSwiftError())
3165  return false;
3166 
3167  SmallVector<MVT, 16> OutVTs;
3168  SmallVector<unsigned, 16> ArgRegs;
3169 
3170  // If this is a constant i1/i8/i16 argument, promote to i32 to avoid an extra
3171  // instruction. This is safe because it is common to all FastISel supported
3172  // calling conventions on x86.
3173  for (int i = 0, e = OutVals.size(); i != e; ++i) {
3174  Value *&Val = OutVals[i];
3175  ISD::ArgFlagsTy Flags = OutFlags[i];
3176  if (auto *CI = dyn_cast<ConstantInt>(Val)) {
3177  if (CI->getBitWidth() < 32) {
3178  if (Flags.isSExt())
3180  else
3182  }
3183  }
3184 
3185  // Passing bools around ends up doing a trunc to i1 and passing it.
3186  // Codegen this as an argument + "and 1".
3187  MVT VT;
3188  auto *TI = dyn_cast<TruncInst>(Val);
3189  unsigned ResultReg;
3190  if (TI && TI->getType()->isIntegerTy(1) && CLI.CS &&
3191  (TI->getParent() == CLI.CS->getInstruction()->getParent()) &&
3192  TI->hasOneUse()) {
3193  Value *PrevVal = TI->getOperand(0);
3194  ResultReg = getRegForValue(PrevVal);
3195 
3196  if (!ResultReg)
3197  return false;
3198 
3199  if (!isTypeLegal(PrevVal->getType(), VT))
3200  return false;
3201 
3202  ResultReg =
3203  fastEmit_ri(VT, VT, ISD::AND, ResultReg, hasTrivialKill(PrevVal), 1);
3204  } else {
3205  if (!isTypeLegal(Val->getType(), VT))
3206  return false;
3207  ResultReg = getRegForValue(Val);
3208  }
3209 
3210  if (!ResultReg)
3211  return false;
3212 
3213  ArgRegs.push_back(ResultReg);
3214  OutVTs.push_back(VT);
3215  }
3216 
3217  // Analyze operands of the call, assigning locations to each operand.
3219  CCState CCInfo(CC, IsVarArg, *FuncInfo.MF, ArgLocs, CLI.RetTy->getContext());
3220 
3221  // Allocate shadow area for Win64
3222  if (IsWin64)
3223  CCInfo.AllocateStack(32, 8);
3224 
3225  CCInfo.AnalyzeCallOperands(OutVTs, OutFlags, CC_X86);
3226 
3227  // Get a count of how many bytes are to be pushed on the stack.
3228  unsigned NumBytes = CCInfo.getAlignedCallFrameSize();
3229 
3230  // Issue CALLSEQ_START
3231  unsigned AdjStackDown = TII.getCallFrameSetupOpcode();
3232  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackDown))
3233  .addImm(NumBytes).addImm(0);
3234 
3235  // Walk the register/memloc assignments, inserting copies/loads.
3236  const X86RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
3237  for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
3238  CCValAssign const &VA = ArgLocs[i];
3239  const Value *ArgVal = OutVals[VA.getValNo()];
3240  MVT ArgVT = OutVTs[VA.getValNo()];
3241 
3242  if (ArgVT == MVT::x86mmx)
3243  return false;
3244 
3245  unsigned ArgReg = ArgRegs[VA.getValNo()];
3246 
3247  // Promote the value if needed.
3248  switch (VA.getLocInfo()) {
3249  case CCValAssign::Full: break;
3250  case CCValAssign::SExt: {
3251  assert(VA.getLocVT().isInteger() && !VA.getLocVT().isVector() &&
3252  "Unexpected extend");
3253 
3254  if (ArgVT == MVT::i1)
3255  return false;
3256 
3257  bool Emitted = X86FastEmitExtend(ISD::SIGN_EXTEND, VA.getLocVT(), ArgReg,
3258  ArgVT, ArgReg);
3259  assert(Emitted && "Failed to emit a sext!"); (void)Emitted;
3260  ArgVT = VA.getLocVT();
3261  break;
3262  }
3263  case CCValAssign::ZExt: {
3264  assert(VA.getLocVT().isInteger() && !VA.getLocVT().isVector() &&
3265  "Unexpected extend");
3266 
3267  // Handle zero-extension from i1 to i8, which is common.
3268  if (ArgVT == MVT::i1) {
3269  // Set the high bits to zero.
3270  ArgReg = fastEmitZExtFromI1(MVT::i8, ArgReg, /*TODO: Kill=*/false);
3271  ArgVT = MVT::i8;
3272 
3273  if (ArgReg == 0)
3274  return false;
3275  }
3276 
3277  bool Emitted = X86FastEmitExtend(ISD::ZERO_EXTEND, VA.getLocVT(), ArgReg,
3278  ArgVT, ArgReg);
3279  assert(Emitted && "Failed to emit a zext!"); (void)Emitted;
3280  ArgVT = VA.getLocVT();
3281  break;
3282  }
3283  case CCValAssign::AExt: {
3284  assert(VA.getLocVT().isInteger() && !VA.getLocVT().isVector() &&
3285  "Unexpected extend");
3286  bool Emitted = X86FastEmitExtend(ISD::ANY_EXTEND, VA.getLocVT(), ArgReg,
3287  ArgVT, ArgReg);
3288  if (!Emitted)
3289  Emitted = X86FastEmitExtend(ISD::ZERO_EXTEND, VA.getLocVT(), ArgReg,
3290  ArgVT, ArgReg);
3291  if (!Emitted)
3292  Emitted = X86FastEmitExtend(ISD::SIGN_EXTEND, VA.getLocVT(), ArgReg,
3293  ArgVT, ArgReg);
3294 
3295  assert(Emitted && "Failed to emit a aext!"); (void)Emitted;
3296  ArgVT = VA.getLocVT();
3297  break;
3298  }
3299  case CCValAssign::BCvt: {
3300  ArgReg = fastEmit_r(ArgVT, VA.getLocVT(), ISD::BITCAST, ArgReg,
3301  /*TODO: Kill=*/false);
3302  assert(ArgReg && "Failed to emit a bitcast!");
3303  ArgVT = VA.getLocVT();
3304  break;
3305  }
3306  case CCValAssign::VExt:
3307  // VExt has not been implemented, so this should be impossible to reach
3308  // for now. However, fallback to Selection DAG isel once implemented.
3309  return false;
3313  case CCValAssign::FPExt:
3314  llvm_unreachable("Unexpected loc info!");
3315  case CCValAssign::Indirect:
3316  // FIXME: Indirect doesn't need extending, but fast-isel doesn't fully
3317  // support this.
3318  return false;
3319  }
3320 
3321  if (VA.isRegLoc()) {
3322  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3323  TII.get(TargetOpcode::COPY), VA.getLocReg()).addReg(ArgReg);
3324  OutRegs.push_back(VA.getLocReg());
3325  } else {
3326  assert(VA.isMemLoc());
3327 
3328  // Don't emit stores for undef values.
3329  if (isa<UndefValue>(ArgVal))
3330  continue;
3331 
3332  unsigned LocMemOffset = VA.getLocMemOffset();
3333  X86AddressMode AM;
3334  AM.Base.Reg = RegInfo->getStackRegister();
3335  AM.Disp = LocMemOffset;
3336  ISD::ArgFlagsTy Flags = OutFlags[VA.getValNo()];
3337  unsigned Alignment = DL.getABITypeAlignment(ArgVal->getType());
3338  MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
3339  MachinePointerInfo::getStack(*FuncInfo.MF, LocMemOffset),
3340  MachineMemOperand::MOStore, ArgVT.getStoreSize(), Alignment);
3341  if (Flags.isByVal()) {
3342  X86AddressMode SrcAM;
3343  SrcAM.Base.Reg = ArgReg;
3344  if (!TryEmitSmallMemcpy(AM, SrcAM, Flags.getByValSize()))
3345  return false;
3346  } else if (isa<ConstantInt>(ArgVal) || isa<ConstantPointerNull>(ArgVal)) {
3347  // If this is a really simple value, emit this with the Value* version
3348  // of X86FastEmitStore. If it isn't simple, we don't want to do this,
3349  // as it can cause us to reevaluate the argument.
3350  if (!X86FastEmitStore(ArgVT, ArgVal, AM, MMO))
3351  return false;
3352  } else {
3353  bool ValIsKill = hasTrivialKill(ArgVal);
3354  if (!X86FastEmitStore(ArgVT, ArgReg, ValIsKill, AM, MMO))
3355  return false;
3356  }
3357  }
3358  }
3359 
3360  // ELF / PIC requires GOT in the EBX register before function calls via PLT
3361  // GOT pointer.
3362  if (Subtarget->isPICStyleGOT()) {
3363  unsigned Base = getInstrInfo()->getGlobalBaseReg(FuncInfo.MF);
3364  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3365  TII.get(TargetOpcode::COPY), X86::EBX).addReg(Base);
3366  }
3367 
3368  if (Is64Bit && IsVarArg && !IsWin64) {
3369  // From AMD64 ABI document:
3370  // For calls that may call functions that use varargs or stdargs
3371  // (prototype-less calls or calls to functions containing ellipsis (...) in
3372  // the declaration) %al is used as hidden argument to specify the number
3373  // of SSE registers used. The contents of %al do not need to match exactly
3374  // the number of registers, but must be an ubound on the number of SSE
3375  // registers used and is in the range 0 - 8 inclusive.
3376 
3377  // Count the number of XMM registers allocated.
3378  static const MCPhysReg XMMArgRegs[] = {
3379  X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
3380  X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
3381  };
3382  unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs);
3383  assert((Subtarget->hasSSE1() || !NumXMMRegs)
3384  && "SSE registers cannot be used when SSE is disabled");
3385  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::MOV8ri),
3386  X86::AL).addImm(NumXMMRegs);
3387  }
3388 
3389  // Materialize callee address in a register. FIXME: GV address can be
3390  // handled with a CALLpcrel32 instead.
3391  X86AddressMode CalleeAM;
3392  if (!X86SelectCallAddress(Callee, CalleeAM))
3393  return false;
3394 
3395  unsigned CalleeOp = 0;
3396  const GlobalValue *GV = nullptr;
3397  if (CalleeAM.GV != nullptr) {
3398  GV = CalleeAM.GV;
3399  } else if (CalleeAM.Base.Reg != 0) {
3400  CalleeOp = CalleeAM.Base.Reg;
3401  } else
3402  return false;
3403 
3404  // Issue the call.
3405  MachineInstrBuilder MIB;
3406  if (CalleeOp) {
3407  // Register-indirect call.
3408  unsigned CallOpc = Is64Bit ? X86::CALL64r : X86::CALL32r;
3409  MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CallOpc))
3410  .addReg(CalleeOp);
3411  } else {
3412  // Direct call.
3413  assert(GV && "Not a direct call");
3414  unsigned CallOpc = Is64Bit ? X86::CALL64pcrel32 : X86::CALLpcrel32;
3415 
3416  // See if we need any target-specific flags on the GV operand.
3417  unsigned char OpFlags = Subtarget->classifyGlobalFunctionReference(GV);
3418  // Ignore NonLazyBind attribute in FastISel
3419  if (OpFlags == X86II::MO_GOTPCREL)
3420  OpFlags = 0;
3421 
3422  MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CallOpc));
3423  if (Symbol)
3424  MIB.addSym(Symbol, OpFlags);
3425  else
3426  MIB.addGlobalAddress(GV, 0, OpFlags);
3427  }
3428 
3429  // Add a register mask operand representing the call-preserved registers.
3430  // Proper defs for return values will be added by setPhysRegsDeadExcept().
3431  MIB.addRegMask(TRI.getCallPreservedMask(*FuncInfo.MF, CC));
3432 
3433  // Add an implicit use GOT pointer in EBX.
3434  if (Subtarget->isPICStyleGOT())
3435  MIB.addReg(X86::EBX, RegState::Implicit);
3436 
3437  if (Is64Bit && IsVarArg && !IsWin64)
3438  MIB.addReg(X86::AL, RegState::Implicit);
3439 
3440  // Add implicit physical register uses to the call.
3441  for (auto Reg : OutRegs)
3442  MIB.addReg(Reg, RegState::Implicit);
3443 
3444  // Issue CALLSEQ_END
3445  unsigned NumBytesForCalleeToPop =
3446  X86::isCalleePop(CC, Subtarget->is64Bit(), IsVarArg,
3447  TM.Options.GuaranteedTailCallOpt)
3448  ? NumBytes // Callee pops everything.
3449  : computeBytesPoppedByCalleeForSRet(Subtarget, CC, CLI.CS);
3450  unsigned AdjStackUp = TII.getCallFrameDestroyOpcode();
3451  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackUp))
3452  .addImm(NumBytes).addImm(NumBytesForCalleeToPop);
3453 
3454  // Now handle call return values.
3456  CCState CCRetInfo(CC, IsVarArg, *FuncInfo.MF, RVLocs,
3457  CLI.RetTy->getContext());
3458  CCRetInfo.AnalyzeCallResult(Ins, RetCC_X86);
3459 
3460  // Copy all of the result registers out of their specified physreg.
3461  unsigned ResultReg = FuncInfo.CreateRegs(CLI.RetTy);
3462  for (unsigned i = 0; i != RVLocs.size(); ++i) {
3463  CCValAssign &VA = RVLocs[i];
3464  EVT CopyVT = VA.getValVT();
3465  unsigned CopyReg = ResultReg + i;
3466 
3467  // If this is x86-64, and we disabled SSE, we can't return FP values
3468  if ((CopyVT == MVT::f32 || CopyVT == MVT::f64) &&
3469  ((Is64Bit || Ins[i].Flags.isInReg()) && !Subtarget->hasSSE1())) {
3470  report_fatal_error("SSE register return with SSE disabled");
3471  }
3472 
3473  // If we prefer to use the value in xmm registers, copy it out as f80 and
3474  // use a truncate to move it from fp stack reg to xmm reg.
3475  if ((VA.getLocReg() == X86::FP0 || VA.getLocReg() == X86::FP1) &&
3476  isScalarFPTypeInSSEReg(VA.getValVT())) {
3477  CopyVT = MVT::f80;
3478  CopyReg = createResultReg(&X86::RFP80RegClass);
3479  }
3480 
3481  // Copy out the result.
3482  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3483  TII.get(TargetOpcode::COPY), CopyReg).addReg(VA.getLocReg());
3484  InRegs.push_back(VA.getLocReg());
3485 
3486  // Round the f80 to the right size, which also moves it to the appropriate
3487  // xmm register. This is accomplished by storing the f80 value in memory
3488  // and then loading it back.
3489  if (CopyVT != VA.getValVT()) {
3490  EVT ResVT = VA.getValVT();
3491  unsigned Opc = ResVT == MVT::f32 ? X86::ST_Fp80m32 : X86::ST_Fp80m64;
3492  unsigned MemSize = ResVT.getSizeInBits()/8;
3493  int FI = MFI.CreateStackObject(MemSize, MemSize, false);
3494  addFrameReference(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3495  TII.get(Opc)), FI)
3496  .addReg(CopyReg);
3497  Opc = ResVT == MVT::f32 ? X86::MOVSSrm : X86::MOVSDrm;
3498  addFrameReference(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3499  TII.get(Opc), ResultReg + i), FI);
3500  }
3501  }
3502 
3503  CLI.ResultReg = ResultReg;
3504  CLI.NumResultRegs = RVLocs.size();
3505  CLI.Call = MIB;
3506 
3507  return true;
3508 }
3509 
3510 bool
3511 X86FastISel::fastSelectInstruction(const Instruction *I) {
3512  switch (I->getOpcode()) {
3513  default: break;
3514  case Instruction::Load:
3515  return X86SelectLoad(I);
3516  case Instruction::Store:
3517  return X86SelectStore(I);
3518  case Instruction::Ret:
3519  return X86SelectRet(I);
3520  case Instruction::ICmp:
3521  case Instruction::FCmp:
3522  return X86SelectCmp(I);
3523  case Instruction::ZExt:
3524  return X86SelectZExt(I);
3525  case Instruction::Br:
3526  return X86SelectBranch(I);
3527  case Instruction::LShr:
3528  case Instruction::AShr:
3529  case Instruction::Shl:
3530  return X86SelectShift(I);
3531  case Instruction::SDiv:
3532  case Instruction::UDiv:
3533  case Instruction::SRem:
3534  case Instruction::URem:
3535  return X86SelectDivRem(I);
3536  case Instruction::Select:
3537  return X86SelectSelect(I);
3538  case Instruction::Trunc:
3539  return X86SelectTrunc(I);
3540  case Instruction::FPExt:
3541  return X86SelectFPExt(I);
3542  case Instruction::FPTrunc:
3543  return X86SelectFPTrunc(I);
3544  case Instruction::SIToFP:
3545  return X86SelectSIToFP(I);
3546  case Instruction::IntToPtr: // Deliberate fall-through.
3547  case Instruction::PtrToInt: {
3548  EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType());
3549  EVT DstVT = TLI.getValueType(DL, I->getType());
3550  if (DstVT.bitsGT(SrcVT))
3551  return X86SelectZExt(I);
3552  if (DstVT.bitsLT(SrcVT))
3553  return X86SelectTrunc(I);
3554  unsigned Reg = getRegForValue(I->getOperand(0));
3555  if (Reg == 0) return false;
3556  updateValueMap(I, Reg);
3557  return true;
3558  }
3559  case Instruction::BitCast: {
3560  // Select SSE2/AVX bitcasts between 128/256 bit vector types.
3561  if (!Subtarget->hasSSE2())
3562  return false;
3563 
3564  EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType());
3565  EVT DstVT = TLI.getValueType(DL, I->getType());
3566 
3567  if (!SrcVT.isSimple() || !DstVT.isSimple())
3568  return false;
3569 
3570  MVT SVT = SrcVT.getSimpleVT();
3571  MVT DVT = DstVT.getSimpleVT();
3572 
3573  if (!SVT.is128BitVector() &&
3574  !(Subtarget->hasAVX() && SVT.is256BitVector()) &&
3575  !(Subtarget->hasAVX512() && SVT.is512BitVector() &&
3576  (Subtarget->hasBWI() || (SVT.getScalarSizeInBits() >= 32 &&
3577  DVT.getScalarSizeInBits() >= 32))))
3578  return false;
3579 
3580  unsigned Reg = getRegForValue(I->getOperand(0));
3581  if (Reg == 0)
3582  return false;
3583 
3584  // No instruction is needed for conversion. Reuse the register used by
3585  // the fist operand.
3586  updateValueMap(I, Reg);
3587  return true;
3588  }
3589  }
3590 
3591  return false;
3592 }
3593 
3594 unsigned X86FastISel::X86MaterializeInt(const ConstantInt *CI, MVT VT) {
3595  if (VT > MVT::i64)
3596  return 0;
3597 
3598  uint64_t Imm = CI->getZExtValue();
3599  if (Imm == 0) {
3600  unsigned SrcReg = fastEmitInst_(X86::MOV32r0, &X86::GR32RegClass);
3601  switch (VT.SimpleTy) {
3602  default: llvm_unreachable("Unexpected value type");
3603  case MVT::i1:
3604  case MVT::i8:
3605  return fastEmitInst_extractsubreg(MVT::i8, SrcReg, /*Kill=*/true,
3606  X86::sub_8bit);
3607  case MVT::i16:
3608  return fastEmitInst_extractsubreg(MVT::i16, SrcReg, /*Kill=*/true,
3609  X86::sub_16bit);
3610  case MVT::i32:
3611  return SrcReg;
3612  case MVT::i64: {
3613  unsigned ResultReg = createResultReg(&X86::GR64RegClass);
3614  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3615  TII.get(TargetOpcode::SUBREG_TO_REG), ResultReg)
3616  .addImm(0).addReg(SrcReg).addImm(X86::sub_32bit);
3617  return ResultReg;
3618  }
3619  }
3620  }
3621 
3622  unsigned Opc = 0;
3623  switch (VT.SimpleTy) {
3624  default: llvm_unreachable("Unexpected value type");
3625  case MVT::i1: VT = MVT::i8; LLVM_FALLTHROUGH;
3626  case MVT::i8: Opc = X86::MOV8ri; break;
3627  case MVT::i16: Opc = X86::MOV16ri; break;
3628  case MVT::i32: Opc = X86::MOV32ri; break;
3629  case MVT::i64: {
3630  if (isUInt<32>(Imm))
3631  Opc = X86::MOV32ri;
3632  else if (isInt<32>(Imm))
3633  Opc = X86::MOV64ri32;
3634  else
3635  Opc = X86::MOV64ri;
3636  break;
3637  }
3638  }
3639  if (VT == MVT::i64 && Opc == X86::MOV32ri) {
3640  unsigned SrcReg = fastEmitInst_i(Opc, &X86::GR32RegClass, Imm);
3641  unsigned ResultReg = createResultReg(&X86::GR64RegClass);
3642  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3643  TII.get(TargetOpcode::SUBREG_TO_REG), ResultReg)
3644  .addImm(0).addReg(SrcReg).addImm(X86::sub_32bit);
3645  return ResultReg;
3646  }
3647  return fastEmitInst_i(Opc, TLI.getRegClassFor(VT), Imm);
3648 }
3649 
3650 unsigned X86FastISel::X86MaterializeFP(const ConstantFP *CFP, MVT VT) {
3651  if (CFP->isNullValue())
3652  return fastMaterializeFloatZero(CFP);
3653 
3654  // Can't handle alternate code models yet.
3655  CodeModel::Model CM = TM.getCodeModel();
3656  if (CM != CodeModel::Small && CM != CodeModel::Large)
3657  return 0;
3658 
3659  // Get opcode and regclass of the output for the given load instruction.
3660  unsigned Opc = 0;
3661  const TargetRegisterClass *RC = nullptr;
3662  switch (VT.SimpleTy) {
3663  default: return 0;
3664  case MVT::f32:
3665  if (X86ScalarSSEf32) {
3666  Opc = Subtarget->hasAVX() ? X86::VMOVSSrm : X86::MOVSSrm;
3667  RC = &X86::FR32RegClass;
3668  } else {
3669  Opc = X86::LD_Fp32m;
3670  RC = &X86::RFP32RegClass;
3671  }
3672  break;
3673  case MVT::f64:
3674  if (X86ScalarSSEf64) {
3675  Opc = Subtarget->hasAVX() ? X86::VMOVSDrm : X86::MOVSDrm;
3676  RC = &X86::FR64RegClass;
3677  } else {
3678  Opc = X86::LD_Fp64m;
3679  RC = &X86::RFP64RegClass;
3680  }
3681  break;
3682  case MVT::f80:
3683  // No f80 support yet.
3684  return 0;
3685  }
3686 
3687  // MachineConstantPool wants an explicit alignment.
3688  unsigned Align = DL.getPrefTypeAlignment(CFP->getType());
3689  if (Align == 0) {
3690  // Alignment of vector types. FIXME!
3691  Align = DL.getTypeAllocSize(CFP->getType());
3692  }
3693 
3694  // x86-32 PIC requires a PIC base register for constant pools.
3695  unsigned PICBase = 0;
3696  unsigned char OpFlag = Subtarget->classifyLocalReference(nullptr);
3697  if (OpFlag == X86II::MO_PIC_BASE_OFFSET)
3698  PICBase = getInstrInfo()->getGlobalBaseReg(FuncInfo.MF);
3699  else if (OpFlag == X86II::MO_GOTOFF)
3700  PICBase = getInstrInfo()->getGlobalBaseReg(FuncInfo.MF);
3701  else if (Subtarget->is64Bit() && TM.getCodeModel() == CodeModel::Small)
3702  PICBase = X86::RIP;
3703 
3704  // Create the load from the constant pool.
3705  unsigned CPI = MCP.getConstantPoolIndex(CFP, Align);
3706  unsigned ResultReg = createResultReg(RC);
3707 
3708  if (CM == CodeModel::Large) {
3709  unsigned AddrReg = createResultReg(&X86::GR64RegClass);
3710  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::MOV64ri),
3711  AddrReg)
3712  .addConstantPoolIndex(CPI, 0, OpFlag);
3713  MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3714  TII.get(Opc), ResultReg);
3715  addDirectMem(MIB, AddrReg);
3716  MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
3718  MachineMemOperand::MOLoad, DL.getPointerSize(), Align);
3719  MIB->addMemOperand(*FuncInfo.MF, MMO);
3720  return ResultReg;
3721  }
3722 
3723  addConstantPoolReference(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3724  TII.get(Opc), ResultReg),
3725  CPI, PICBase, OpFlag);
3726  return ResultReg;
3727 }
3728 
3729 unsigned X86FastISel::X86MaterializeGV(const GlobalValue *GV, MVT VT) {
3730  // Can't handle alternate code models yet.
3731  if (TM.getCodeModel() != CodeModel::Small)
3732  return 0;
3733 
3734  // Materialize addresses with LEA/MOV instructions.
3735  X86AddressMode AM;
3736  if (X86SelectAddress(GV, AM)) {
3737  // If the expression is just a basereg, then we're done, otherwise we need
3738  // to emit an LEA.
3739  if (AM.BaseType == X86AddressMode::RegBase &&
3740  AM.IndexReg == 0 && AM.Disp == 0 && AM.GV == nullptr)
3741  return AM.Base.Reg;
3742 
3743  unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
3744  if (TM.getRelocationModel() == Reloc::Static &&
3745  TLI.getPointerTy(DL) == MVT::i64) {
3746  // The displacement code could be more than 32 bits away so we need to use
3747  // an instruction with a 64 bit immediate
3748  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::MOV64ri),
3749  ResultReg)
3750  .addGlobalAddress(GV);
3751  } else {
3752  unsigned Opc =
3753  TLI.getPointerTy(DL) == MVT::i32
3754  ? (Subtarget->isTarget64BitILP32() ? X86::LEA64_32r : X86::LEA32r)
3755  : X86::LEA64r;
3756  addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3757  TII.get(Opc), ResultReg), AM);
3758  }
3759  return ResultReg;
3760  }
3761  return 0;
3762 }
3763 
3764 unsigned X86FastISel::fastMaterializeConstant(const Constant *C) {
3765  EVT CEVT = TLI.getValueType(DL, C->getType(), true);
3766 
3767  // Only handle simple types.
3768  if (!CEVT.isSimple())
3769  return 0;
3770  MVT VT = CEVT.getSimpleVT();
3771 
3772  if (const auto *CI = dyn_cast<ConstantInt>(C))
3773  return X86MaterializeInt(CI, VT);
3774  else if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C))
3775  return X86MaterializeFP(CFP, VT);
3776  else if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
3777  return X86MaterializeGV(GV, VT);
3778 
3779  return 0;
3780 }
3781 
3782 unsigned X86FastISel::fastMaterializeAlloca(const AllocaInst *C) {
3783  // Fail on dynamic allocas. At this point, getRegForValue has already
3784  // checked its CSE maps, so if we're here trying to handle a dynamic
3785  // alloca, we're not going to succeed. X86SelectAddress has a
3786  // check for dynamic allocas, because it's called directly from
3787  // various places, but targetMaterializeAlloca also needs a check
3788  // in order to avoid recursion between getRegForValue,
3789  // X86SelectAddrss, and targetMaterializeAlloca.
3790  if (!FuncInfo.StaticAllocaMap.count(C))
3791  return 0;
3792  assert(C->isStaticAlloca() && "dynamic alloca in the static alloca map?");
3793 
3794  X86AddressMode AM;
3795  if (!X86SelectAddress(C, AM))
3796  return 0;
3797  unsigned Opc =
3798  TLI.getPointerTy(DL) == MVT::i32
3799  ? (Subtarget->isTarget64BitILP32() ? X86::LEA64_32r : X86::LEA32r)
3800  : X86::LEA64r;
3801  const TargetRegisterClass *RC = TLI.getRegClassFor(TLI.getPointerTy(DL));
3802  unsigned ResultReg = createResultReg(RC);
3803  addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3804  TII.get(Opc), ResultReg), AM);
3805  return ResultReg;
3806 }
3807 
3808 unsigned X86FastISel::fastMaterializeFloatZero(const ConstantFP *CF) {
3809  MVT VT;
3810  if (!isTypeLegal(CF->getType(), VT))
3811  return 0;
3812 
3813  // Get opcode and regclass for the given zero.
3814  unsigned Opc = 0;
3815  const TargetRegisterClass *RC = nullptr;
3816  switch (VT.SimpleTy) {
3817  default: return 0;
3818  case MVT::f32:
3819  if (X86ScalarSSEf32) {
3820  Opc = X86::FsFLD0SS;
3821  RC = &X86::FR32RegClass;
3822  } else {
3823  Opc = X86::LD_Fp032;
3824  RC = &X86::RFP32RegClass;
3825  }
3826  break;
3827  case MVT::f64:
3828  if (X86ScalarSSEf64) {
3829  Opc = X86::FsFLD0SD;
3830  RC = &X86::FR64RegClass;
3831  } else {
3832  Opc = X86::LD_Fp064;
3833  RC = &X86::RFP64RegClass;
3834  }
3835  break;
3836  case MVT::f80:
3837  // No f80 support yet.
3838  return 0;
3839  }
3840 
3841  unsigned ResultReg = createResultReg(RC);
3842  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg);
3843  return ResultReg;
3844 }
3845 
3846 
3847 bool X86FastISel::tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo,
3848  const LoadInst *LI) {
3849  const Value *Ptr = LI->getPointerOperand();
3850  X86AddressMode AM;
3851  if (!X86SelectAddress(Ptr, AM))
3852  return false;
3853 
3854  const X86InstrInfo &XII = (const X86InstrInfo &)TII;
3855 
3856  unsigned Size = DL.getTypeAllocSize(LI->getType());
3857  unsigned Alignment = LI->getAlignment();
3858 
3859  if (Alignment == 0) // Ensure that codegen never sees alignment 0
3860  Alignment = DL.getABITypeAlignment(LI->getType());
3861 
3863  AM.getFullAddress(AddrOps);
3864 
3865  MachineInstr *Result = XII.foldMemoryOperandImpl(
3866  *FuncInfo.MF, *MI, OpNo, AddrOps, FuncInfo.InsertPt, Size, Alignment,
3867  /*AllowCommute=*/true);
3868  if (!Result)
3869  return false;
3870 
3871  // The index register could be in the wrong register class. Unfortunately,
3872  // foldMemoryOperandImpl could have commuted the instruction so its not enough
3873  // to just look at OpNo + the offset to the index reg. We actually need to
3874  // scan the instruction to find the index reg and see if its the correct reg
3875  // class.
3876  unsigned OperandNo = 0;
3877  for (MachineInstr::mop_iterator I = Result->operands_begin(),
3878  E = Result->operands_end(); I != E; ++I, ++OperandNo) {
3879  MachineOperand &MO = *I;
3880  if (!MO.isReg() || MO.isDef() || MO.getReg() != AM.IndexReg)
3881  continue;
3882  // Found the index reg, now try to rewrite it.
3883  unsigned IndexReg = constrainOperandRegClass(Result->getDesc(),
3884  MO.getReg(), OperandNo);
3885  if (IndexReg == MO.getReg())
3886  continue;
3887  MO.setReg(IndexReg);
3888  }
3889 
3890  Result->addMemOperand(*FuncInfo.MF, createMachineMemOperandFor(LI));
3891  MI->eraseFromParent();
3892  return true;
3893 }
3894 
3895 unsigned X86FastISel::fastEmitInst_rrrr(unsigned MachineInstOpcode,
3896  const TargetRegisterClass *RC,
3897  unsigned Op0, bool Op0IsKill,
3898  unsigned Op1, bool Op1IsKill,
3899  unsigned Op2, bool Op2IsKill,
3900  unsigned Op3, bool Op3IsKill) {
3901  const MCInstrDesc &II = TII.get(MachineInstOpcode);
3902 
3903  unsigned ResultReg = createResultReg(RC);
3904  Op0 = constrainOperandRegClass(II, Op0, II.getNumDefs());
3905  Op1 = constrainOperandRegClass(II, Op1, II.getNumDefs() + 1);
3906  Op2 = constrainOperandRegClass(II, Op2, II.getNumDefs() + 2);
3907  Op2 = constrainOperandRegClass(II, Op2, II.getNumDefs() + 3);
3908 
3909  if (II.getNumDefs() >= 1)
3910  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
3911  .addReg(Op0, getKillRegState(Op0IsKill))
3912  .addReg(Op1, getKillRegState(Op1IsKill))
3913  .addReg(Op2, getKillRegState(Op2IsKill))
3914  .addReg(Op3, getKillRegState(Op3IsKill));
3915  else {
3916  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
3917  .addReg(Op0, getKillRegState(Op0IsKill))
3918  .addReg(Op1, getKillRegState(Op1IsKill))
3919  .addReg(Op2, getKillRegState(Op2IsKill))
3920  .addReg(Op3, getKillRegState(Op3IsKill));
3921  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3922  TII.get(TargetOpcode::COPY), ResultReg).addReg(II.ImplicitDefs[0]);
3923  }
3924  return ResultReg;
3925 }
3926 
3927 
3928 namespace llvm {
3930  const TargetLibraryInfo *libInfo) {
3931  return new X86FastISel(funcInfo, libInfo);
3932  }
3933 }
void setFrameAddressIsTaken(bool T)
unsigned GetCondBranchFromCond(CondCode CC)
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:500
constexpr bool isUInt< 32 >(uint64_t x)
Definition: MathExtras.h:315
Return a value (possibly void), from a function.
Value * getValueOperand()
Definition: Instructions.h:391
const MachineInstrBuilder & addMetadata(const MDNode *MD) const
void AnalyzeCallResult(const SmallVectorImpl< ISD::InputArg > &Ins, CCAssignFn Fn)
AnalyzeCallResult - Analyze the return values of a call, incorporating info about the passed values i...
mop_iterator operands_end()
Definition: MachineInstr.h:296
This class is the base class for the comparison instructions.
Definition: InstrTypes.h:870
MVT getValVT() const
LLVM Argument representation.
Definition: Argument.h:34
LLVM_ATTRIBUTE_NORETURN void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
size_t i
LocInfo getLocInfo() const
static const MachineInstrBuilder & addConstantPoolReference(const MachineInstrBuilder &MIB, unsigned CPI, unsigned GlobalBaseReg, unsigned char OpFlags)
addConstantPoolReference - This function is used to add a reference to the base of a constant value s...
bool isVolatile() const
unsigned constrainOperandRegClass(const MachineFunction &MF, const TargetRegisterInfo &TRI, MachineRegisterInfo &MRI, const TargetInstrInfo &TII, const RegisterBankInfo &RBI, MachineInstr &InsertPt, const MCInstrDesc &II, unsigned Reg, unsigned OpIdx)
Try to constrain Reg so that it is usable by argument OpIdx of the provided MCInstrDesc II...
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
Definition: MCSymbol.h:39
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
Definition: IntrinsicInst.h:51
unsigned getNumDefs() const
Return the number of MachineOperands that are register definitions.
Definition: MCInstrDesc.h:216
unsigned getNumOperands() const
Definition: User.h:167
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:163
MachineInstrBuilder MachineInstrBuilder &DefMI const MCInstrDesc & Desc
unsigned getScalarSizeInBits() const
bool is512BitVector() const
is512BitVector - Return true if this is a 512-bit vector type.
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE, etc.
Definition: InstrTypes.h:984
unsigned less or equal
Definition: InstrTypes.h:906
unsigned less than
Definition: InstrTypes.h:905
0 1 0 0 True if ordered and less than
Definition: InstrTypes.h:886
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
constexpr bool isInt< 8 >(int64_t x)
Definition: MathExtras.h:268
unsigned getSizeInBits() const
bool isDoubleTy() const
Return true if this is 'double', a 64-bit IEEE fp type.
Definition: Type.h:148
unsigned getByValSize() const
1 1 1 0 True if unordered or not equal
Definition: InstrTypes.h:896
const MCInstrDesc & getDesc() const
Returns the target instruction descriptor of this MachineInstr.
Definition: MachineInstr.h:270
This class wraps the llvm.memset intrinsic.
Type * getReturnType() const
Returns the type of the ret val.
Definition: Function.cpp:238
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:100
const GlobalValue * GV
A debug info location.
Definition: DebugLoc.h:34
An instruction for reading from memory.
Definition: Instructions.h:164
MO_GOTPCREL - On a symbol operand this indicates that the immediate is offset to the GOT entry for th...
Definition: X86BaseInfo.h:97
Hexagon Common GEP
bool isCalleePop(CallingConv::ID CallingConv, bool is64Bit, bool IsVarArg, bool GuaranteeTCO)
Determines whether the callee is required to pop its own arguments.
bool hasAttribute(unsigned Index, Attribute::AttrKind Kind) const
Return true if the attribute exists at the given index.
Definition: Attributes.cpp:994
unsigned getValNo() const
op_iterator op_begin()
Definition: User.h:205
NodeType
ISD::NodeType enum - This enum defines the target-independent operators for a SelectionDAG.
Definition: ISDOpcodes.h:39
bool bitsLT(EVT VT) const
bitsLT - Return true if this has less bits than VT.
Definition: ValueTypes.h:212
bool isRegLoc() const
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:165
static Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
Definition: Constants.cpp:195
X86MachineFunctionInfo - This class is derived from MachineFunction and contains private X86 target-s...
1 0 0 1 True if unordered or equal
Definition: InstrTypes.h:891
Value * getAddress() const
Definition: IntrinsicInst.h:91
unsigned getSize() const
Return the size of the register in bytes, which is also the size of a stack slot allocated to hold a ...
Used to lazily calculate structure layout information for a target machine, based on the DataLayout s...
Definition: DataLayout.h:496
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
Definition: InstrTypes.h:890
A description of a memory reference used in the backend.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
This file declares the MachineConstantPool class which is an abstract constant pool to keep track of ...
struct fuzzer::@269 Flags
const HexagonInstrInfo * TII
const MCAsmInfo * getMCAsmInfo() const
Return target specific asm information.
Class to represent struct types.
Definition: DerivedTypes.h:199
A Use represents the edge between a Value definition and its users.
Definition: Use.h:56
bool isReg() const
isReg - Tests if this is a MO_Register operand.
static bool isGlobalStubReference(unsigned char TargetFlag)
isGlobalStubReference - Return true if the specified TargetFlag operand is a reference to a stub for ...
Definition: X86InstrInfo.h:87
unsigned getNumArgOperands() const
Return the number of call arguments.
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
Reg
All possible values of the reg field in the ModR/M byte.
0 1 0 1 True if ordered and less than or equal
Definition: InstrTypes.h:887
SimpleValueType SimpleTy
static Constant * getSExt(Constant *C, Type *Ty, bool OnlyIfReduced=false)
Definition: Constants.cpp:1573
const Triple & getTargetTriple() const
Definition: X86Subtarget.h:496
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted...
X86_ThisCall - Similar to X86_StdCall.
Definition: CallingConv.h:110
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
unsigned getNumOperands() const
Access to explicit operands of the instruction.
Definition: MachineInstr.h:277
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:154
auto reverse(ContainerTy &&C, typename std::enable_if< has_rbegin< ContainerTy >::value >::type *=nullptr) -> decltype(make_range(C.rbegin(), C.rend()))
Definition: STLExtras.h:241
static bool isGlobalRelativeToPICBase(unsigned char TargetFlag)
isGlobalRelativeToPICBase - Return true if the specified global value reference is relative to a 32-b...
Definition: X86InstrInfo.h:103
static Constant * getZExt(Constant *C, Type *Ty, bool OnlyIfReduced=false)
Definition: Constants.cpp:1587
unsigned getLocReg() const
This is a fast-path instruction selection class that generates poor code and doesn't support illegal ...
Definition: FastISel.h:31
A constant value that is initialized with an expression using other constant values.
Definition: Constants.h:873
#define F(x, y, z)
Definition: MD5.cpp:51
void GetReturnInfo(Type *ReturnType, AttributeSet attr, SmallVectorImpl< ISD::OutputArg > &Outs, const TargetLowering &TLI, const DataLayout &DL)
Given an LLVM IR type and return type attributes, compute the return value EVTs and flags...
bool is64Bit() const
Is this x86_64? (disregarding specific ABI / programming model)
Definition: X86Subtarget.h:381
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:200
This instruction compares its operands according to the predicate given to the constructor.
BasicBlock * getSuccessor(unsigned i) const
X86_FastCall - 'fast' analog of X86_StdCall.
Definition: CallingConv.h:91
bool isTargetMCU() const
Definition: X86Subtarget.h:515
An instruction for storing to memory.
Definition: Instructions.h:300
bool isArrayTy() const
True if this is an instance of ArrayType.
Definition: Type.h:210
void addMemOperand(MachineFunction &MF, MachineMemOperand *MO)
Add a MachineMemOperand to the machine instruction.
void setStackProtectorIndex(int I)
This class represents a truncation of integer types.
bool isAtomic() const
Return true if this instruction has an AtomicOrdering of unordered or higher.
static MachinePointerInfo getStack(MachineFunction &MF, int64_t Offset)
Stack pointer relative access.
Class to represent pointers.
Definition: DerivedTypes.h:443
unsigned getKillRegState(bool B)
bool hasSSE2() const
Definition: X86Subtarget.h:411
static GCRegistry::Add< CoreCLRGC > E("coreclr","CoreCLR-compatible GC")
Flag
These should be considered private to the implementation of the MCInstrDesc class.
Definition: MCInstrDesc.h:121
uint64_t getElementOffset(unsigned Idx) const
Definition: DataLayout.h:517
an instruction for type-safe pointer arithmetic to access elements of arrays and structs ...
Definition: Instructions.h:830
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
bool isInteger() const
isInteger - Return true if this is an integer, or a vector integer type.
unsigned getAlignment() const
Return the alignment of the access that is being performed.
Definition: Instructions.h:348
bool arg_empty() const
Definition: CallSite.h:210
unsigned const MachineRegisterInfo * MRI
MVT - Machine Value Type.
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:45
DIExpression * getExpression() const
Definition: IntrinsicInst.h:97
Conditional or Unconditional Branch instruction.
C - The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:219
MVT getLocVT() const
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
This is an important base class in LLVM.
Definition: Constant.h:42
bool isVector() const
isVector - Return true if this is a vector value type.
X86_StdCall - stdcall is the calling conventions mostly used by the Win32 API.
Definition: CallingConv.h:86
bool isFloatTy() const
Return true if this is 'float', a 32-bit IEEE fp type.
Definition: Type.h:145
ConstantFP - Floating Point Values [float, double].
Definition: Constants.h:269
const MCPhysReg * ImplicitDefs
Definition: MCInstrDesc.h:173
Value * getRawDest() const
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
Definition: Instruction.h:259
op_iterator op_end()
Definition: User.h:207
static std::pair< unsigned, bool > getX86SSEConditionCode(CmpInst::Predicate Predicate)
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:880
static const unsigned End
TRAP - Trapping instruction.
Definition: ISDOpcodes.h:676
Value * getOperand(unsigned i) const
Definition: User.h:145
0 1 1 1 True if ordered (no nans)
Definition: InstrTypes.h:889
Value * getPointerOperand()
Definition: Instructions.h:270
Predicate getPredicate() const
Return the predicate for this instruction.
Definition: InstrTypes.h:960
1 1 1 1 Always true (always folded)
Definition: InstrTypes.h:897
unsigned getSETFromCond(CondCode CC, bool HasMemoryOperand=false)
Return a set opcode for the given condition and whether it has a memory operand.
enum llvm::X86AddressMode::@388 BaseType
EVT - Extended Value Type.
Definition: ValueTypes.h:31
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:654
1 1 0 1 True if unordered, less than, or equal
Definition: InstrTypes.h:895
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned getStackRegister() const
signed greater than
Definition: InstrTypes.h:907
static const MachineInstrBuilder & addFrameReference(const MachineInstrBuilder &MIB, int FI, int Offset=0, bool mem=true)
addFrameReference - This function is used to add a reference to the base of an abstract object on the...
The memory access writes data.
bool bitsGT(EVT VT) const
bitsGT - Return true if this has more bits than VT.
Definition: ValueTypes.h:200
0 0 1 0 True if ordered and greater than
Definition: InstrTypes.h:884
Iterator for intrusive lists based on ilist_node.
CCState - This class holds information needed while lowering arguments and return values...
This is the shared class of boolean and integer constants.
Definition: Constants.h:88
constexpr bool isInt< 32 >(int64_t x)
Definition: MathExtras.h:274
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type...
Definition: Type.cpp:123
MachineOperand class - Representation of each machine instruction operand.
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:843
1 1 0 0 True if unordered or less than
Definition: InstrTypes.h:894
bool isStaticAlloca() const
Return true if this alloca is in the entry block of the function and is a constant size...
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:230
Provides information about what library functions are available for the current target.
CCValAssign - Represent assignment of one arg/retval to a location.
union llvm::X86AddressMode::@389 Base
signed less than
Definition: InstrTypes.h:909
Value * getLength() const
void getFullAddress(SmallVectorImpl< MachineOperand > &MO)
This class wraps the llvm.memcpy intrinsic.
Function * getCalledFunction() const
Return the function called, or null if this is an indirect function invocation.
bool isNullValue() const
Return true if this is the value that would be returned by getNullValue.
Definition: Constants.cpp:90
static GCRegistry::Add< ShadowStackGC > C("shadow-stack","Very portable GC for uncooperative code generators")
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:586
AttributeSet getAttributes() const
Return the attribute list for this Function.
Definition: Function.h:176
bool is128BitVector() const
is128BitVector - Return true if this is a 128-bit vector type.
Value * getArgOperand(unsigned i) const
getArgOperand/setArgOperand - Return/set the i-th call argument.
signed less or equal
Definition: InstrTypes.h:910
const MachineInstrBuilder & addSym(MCSymbol *Sym, unsigned char TargetFlags=0) const
The C convention as specified in the x86-64 supplement to the System V ABI, used on most non-Windows ...
Definition: CallingConv.h:143
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:195
This file defines the FastISel class.
unsigned getCMovFromCond(CondCode CC, unsigned RegBytes, bool HasMemoryOperand=false)
Return a cmov opcode for the given condition, register size in bytes, and operand type...
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:400
bool isMemLoc() const
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:403
The C convention as implemented on Windows/x86-64.
Definition: CallingConv.h:149
const MachineInstrBuilder & addGlobalAddress(const GlobalValue *GV, int64_t Offset=0, unsigned char TargetFlags=0) const
bool hasSSE1() const
Definition: X86Subtarget.h:410
bool isStructTy() const
True if this is an instance of StructType.
Definition: Type.h:207
The memory access reads data.
MachineInstr * foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI, ArrayRef< unsigned > Ops, MachineBasicBlock::iterator InsertPt, int FrameIndex, LiveIntervals *LIS=nullptr) const override
foldMemoryOperand - If this target supports it, fold a load or store of the specified stack slot into...
Representation of each machine instruction.
Definition: MachineInstr.h:52
MO_GOTOFF - On a symbol operand this indicates that the immediate is the offset to the location of th...
Definition: X86BaseInfo.h:89
static const MachineInstrBuilder & addDirectMem(const MachineInstrBuilder &MIB, unsigned Reg)
addDirectMem - This function is used to add a direct memory reference to the current instruction – th...
Value * getCondition() const
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:333
bool is256BitVector() const
is256BitVector - Return true if this is a 256-bit vector type.
static IntegerType * getInt32Ty(LLVMContext &C)
Definition: Type.cpp:169
unsigned greater or equal
Definition: InstrTypes.h:904
unsigned getAlignment() const
Return the alignment of the access that is being performed.
Definition: Instructions.h:227
ImmutableCallSite - establish a view to a call site for examination.
Definition: CallSite.h:665
unsigned getSizeInBits() const
getSizeInBits - Return the size of the specified value type in bits.
Definition: ValueTypes.h:256
void setReg(unsigned Reg)
Change the register this operand corresponds to.
#define I(x, y, z)
Definition: MD5.cpp:54
bool isOSMSVCRT() const
Is this a "Windows" OS targeting a "MSVCRT.dll" environment.
Definition: Triple.h:534
void AnalyzeReturn(const SmallVectorImpl< ISD::OutputArg > &Outs, CCAssignFn Fn)
AnalyzeReturn - Analyze the returned values of a return, incorporating info about the result values i...
LLVM_ATTRIBUTE_ALWAYS_INLINE size_type size() const
Definition: SmallVector.h:135
bool hasOneUse() const
Return true if there is exactly one user of this value.
Definition: Value.h:383
static std::pair< X86::CondCode, bool > getX86ConditionCode(CmpInst::Predicate Predicate)
FunctionLoweringInfo - This contains information that is global to a function that is used when lower...
bool paramHasAttr(unsigned i, Attribute::AttrKind Kind) const
Return true if the call or the callee has the given attribute.
Definition: CallSite.h:359
0 1 1 0 True if ordered and operands are unequal
Definition: InstrTypes.h:888
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:287
1 0 1 0 True if unordered or greater than
Definition: InstrTypes.h:892
constexpr bool isUInt< 16 >(uint64_t x)
Definition: MathExtras.h:312
static MachinePointerInfo getConstantPool(MachineFunction &MF)
Return a MachinePointerInfo record that refers to the constant pool.
int CreateStackObject(uint64_t Size, unsigned Alignment, bool isSS, const AllocaInst *Alloca=nullptr)
Create a new statically sized stack object, returning a nonnegative identifier to represent it...
Value * getRawSource() const
Return the arguments to the instruction.
unsigned getReg() const
getReg - Returns the register number.
DILocalVariable * getVariable() const
Definition: IntrinsicInst.h:93
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
bool isSimple() const
isSimple - Test if the given EVT is simple (as opposed to being extended).
Definition: ValueTypes.h:107
0 0 0 1 True if ordered and equal
Definition: InstrTypes.h:883
LLVM Value Representation.
Definition: Value.h:71
mop_iterator operands_begin()
Definition: MachineInstr.h:295
1 0 1 1 True if unordered, greater than, or equal
Definition: InstrTypes.h:893
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:111
static unsigned X86ChooseCmpImmediateOpcode(EVT VT, const ConstantInt *RHSC)
If we have a comparison with RHS as the RHS of the comparison, return an opcode that works for the co...
#define LLVM_FALLTHROUGH
LLVM_FALLTHROUGH - Mark fallthrough cases in switch statements.
Definition: Compiler.h:239
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
Definition: MCInstrDesc.h:210
unsigned getDestAddressSpace() const
MO_PIC_BASE_OFFSET - On a symbol operand this indicates that the immediate should get the value of th...
Definition: X86BaseInfo.h:75
bool isValidLocationForIntrinsic(const DILocation *DL) const
Check that a location is valid for this variable.
IRTranslator LLVM IR MI
unsigned greater than
Definition: InstrTypes.h:903
unsigned getLocMemOffset() const
Conversion operators.
Definition: ISDOpcodes.h:397
int * Ptr
X86AddressMode - This struct holds a generalized full x86 address mode.
const MachineInstrBuilder & addRegMask(const uint32_t *Mask) const
int64_t getSExtValue() const
Return the constant as a 64-bit integer value after it has been sign extended as appropriate for the ...
Definition: Constants.h:162
0 0 1 1 True if ordered and greater than or equal
Definition: InstrTypes.h:885
const MachineInstrBuilder & addReg(unsigned RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
This represents the llvm.dbg.declare instruction.
Definition: IntrinsicInst.h:89
unsigned getSourceAddressSpace() const
static GCRegistry::Add< ErlangGC > A("erlang","erlang-compatible garbage collector")
bool isVarArg() const
isVarArg - Return true if this function takes a variable number of arguments.
Definition: Function.cpp:234
Value * getPointerOperand()
Definition: Instructions.h:394
unsigned AllocateStack(unsigned Size, unsigned Align)
AllocateStack - Allocate a chunk of stack space with the specified size and alignment.
Fast - This calling convention attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:42
const BasicBlock * getParent() const
Definition: Instruction.h:62
static const MachineInstrBuilder & addFullAddress(const MachineInstrBuilder &MIB, const X86AddressMode &AM)
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition: ISDOpcodes.h:321
FastISel * createFastISel(FunctionLoweringInfo &funcInfo, const TargetLibraryInfo *libInfo)
bool hasAVX() const
Definition: X86Subtarget.h:416
MVT getSimpleVT() const
getSimpleVT - Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:226
iterator_range< arg_iterator > args()
Definition: Function.h:568
0 0 0 0 Always false (always folded)
Definition: InstrTypes.h:882
signed greater or equal
Definition: InstrTypes.h:908
static unsigned computeBytesPoppedByCalleeForSRet(const X86Subtarget *Subtarget, CallingConv::ID CC, ImmutableCallSite *CS)
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:44
static unsigned X86ChooseCmpOpcode(EVT VT, const X86Subtarget *Subtarget)
an instruction to allocate memory on the stack
Definition: Instructions.h:60
gep_type_iterator gep_type_begin(const User *GEP)
unsigned getPtrSizedFrameRegister(const MachineFunction &MF) const
bool usesWindowsCFI() const
Definition: MCAsmInfo.h:552
bool contains(unsigned Reg) const
Return true if the specified register is included in this register class.