LLVM  7.0.0svn
X86FastISel.cpp
Go to the documentation of this file.
1 //===-- X86FastISel.cpp - X86 FastISel implementation ---------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file defines the X86-specific support for the FastISel class. Much
11 // of the target-specific code is generated by tablegen in the file
12 // X86GenFastISel.inc, which is #included here.
13 //
14 //===----------------------------------------------------------------------===//
15 
16 #include "X86.h"
17 #include "X86CallingConv.h"
18 #include "X86InstrBuilder.h"
19 #include "X86InstrInfo.h"
20 #include "X86MachineFunctionInfo.h"
21 #include "X86RegisterInfo.h"
22 #include "X86Subtarget.h"
23 #include "X86TargetMachine.h"
25 #include "llvm/CodeGen/FastISel.h"
30 #include "llvm/IR/CallSite.h"
31 #include "llvm/IR/CallingConv.h"
32 #include "llvm/IR/DebugInfo.h"
33 #include "llvm/IR/DerivedTypes.h"
35 #include "llvm/IR/GlobalAlias.h"
36 #include "llvm/IR/GlobalVariable.h"
37 #include "llvm/IR/Instructions.h"
38 #include "llvm/IR/IntrinsicInst.h"
39 #include "llvm/IR/Operator.h"
40 #include "llvm/MC/MCAsmInfo.h"
41 #include "llvm/MC/MCSymbol.h"
44 using namespace llvm;
45 
46 namespace {
47 
48 class X86FastISel final : public FastISel {
49  /// Subtarget - Keep a pointer to the X86Subtarget around so that we can
50  /// make the right decision when generating code for different targets.
51  const X86Subtarget *Subtarget;
52 
53  /// X86ScalarSSEf32, X86ScalarSSEf64 - Select between SSE or x87
54  /// floating point ops.
55  /// When SSE is available, use it for f32 operations.
56  /// When SSE2 is available, use it for f64 operations.
57  bool X86ScalarSSEf64;
58  bool X86ScalarSSEf32;
59 
60 public:
61  explicit X86FastISel(FunctionLoweringInfo &funcInfo,
62  const TargetLibraryInfo *libInfo)
63  : FastISel(funcInfo, libInfo) {
64  Subtarget = &funcInfo.MF->getSubtarget<X86Subtarget>();
65  X86ScalarSSEf64 = Subtarget->hasSSE2();
66  X86ScalarSSEf32 = Subtarget->hasSSE1();
67  }
68 
69  bool fastSelectInstruction(const Instruction *I) override;
70 
71  /// The specified machine instr operand is a vreg, and that
72  /// vreg is being provided by the specified load instruction. If possible,
73  /// try to fold the load as an operand to the instruction, returning true if
74  /// possible.
75  bool tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo,
76  const LoadInst *LI) override;
77 
78  bool fastLowerArguments() override;
79  bool fastLowerCall(CallLoweringInfo &CLI) override;
80  bool fastLowerIntrinsicCall(const IntrinsicInst *II) override;
81 
82 #include "X86GenFastISel.inc"
83 
84 private:
85  bool X86FastEmitCompare(const Value *LHS, const Value *RHS, EVT VT,
86  const DebugLoc &DL);
87 
88  bool X86FastEmitLoad(EVT VT, X86AddressMode &AM, MachineMemOperand *MMO,
89  unsigned &ResultReg, unsigned Alignment = 1);
90 
91  bool X86FastEmitStore(EVT VT, const Value *Val, X86AddressMode &AM,
92  MachineMemOperand *MMO = nullptr, bool Aligned = false);
93  bool X86FastEmitStore(EVT VT, unsigned ValReg, bool ValIsKill,
94  X86AddressMode &AM,
95  MachineMemOperand *MMO = nullptr, bool Aligned = false);
96 
97  bool X86FastEmitExtend(ISD::NodeType Opc, EVT DstVT, unsigned Src, EVT SrcVT,
98  unsigned &ResultReg);
99 
100  bool X86SelectAddress(const Value *V, X86AddressMode &AM);
101  bool X86SelectCallAddress(const Value *V, X86AddressMode &AM);
102 
103  bool X86SelectLoad(const Instruction *I);
104 
105  bool X86SelectStore(const Instruction *I);
106 
107  bool X86SelectRet(const Instruction *I);
108 
109  bool X86SelectCmp(const Instruction *I);
110 
111  bool X86SelectZExt(const Instruction *I);
112 
113  bool X86SelectSExt(const Instruction *I);
114 
115  bool X86SelectBranch(const Instruction *I);
116 
117  bool X86SelectShift(const Instruction *I);
118 
119  bool X86SelectDivRem(const Instruction *I);
120 
121  bool X86FastEmitCMoveSelect(MVT RetVT, const Instruction *I);
122 
123  bool X86FastEmitSSESelect(MVT RetVT, const Instruction *I);
124 
125  bool X86FastEmitPseudoSelect(MVT RetVT, const Instruction *I);
126 
127  bool X86SelectSelect(const Instruction *I);
128 
129  bool X86SelectTrunc(const Instruction *I);
130 
131  bool X86SelectFPExtOrFPTrunc(const Instruction *I, unsigned Opc,
132  const TargetRegisterClass *RC);
133 
134  bool X86SelectFPExt(const Instruction *I);
135  bool X86SelectFPTrunc(const Instruction *I);
136  bool X86SelectSIToFP(const Instruction *I);
137 
138  const X86InstrInfo *getInstrInfo() const {
139  return Subtarget->getInstrInfo();
140  }
141  const X86TargetMachine *getTargetMachine() const {
142  return static_cast<const X86TargetMachine *>(&TM);
143  }
144 
145  bool handleConstantAddresses(const Value *V, X86AddressMode &AM);
146 
147  unsigned X86MaterializeInt(const ConstantInt *CI, MVT VT);
148  unsigned X86MaterializeFP(const ConstantFP *CFP, MVT VT);
149  unsigned X86MaterializeGV(const GlobalValue *GV, MVT VT);
150  unsigned fastMaterializeConstant(const Constant *C) override;
151 
152  unsigned fastMaterializeAlloca(const AllocaInst *C) override;
153 
154  unsigned fastMaterializeFloatZero(const ConstantFP *CF) override;
155 
156  /// isScalarFPTypeInSSEReg - Return true if the specified scalar FP type is
157  /// computed in an SSE register, not on the X87 floating point stack.
158  bool isScalarFPTypeInSSEReg(EVT VT) const {
159  return (VT == MVT::f64 && X86ScalarSSEf64) || // f64 is when SSE2
160  (VT == MVT::f32 && X86ScalarSSEf32); // f32 is when SSE1
161  }
162 
163  bool isTypeLegal(Type *Ty, MVT &VT, bool AllowI1 = false);
164 
165  bool IsMemcpySmall(uint64_t Len);
166 
167  bool TryEmitSmallMemcpy(X86AddressMode DestAM,
168  X86AddressMode SrcAM, uint64_t Len);
169 
170  bool foldX86XALUIntrinsic(X86::CondCode &CC, const Instruction *I,
171  const Value *Cond);
172 
174  X86AddressMode &AM);
175 
176  unsigned fastEmitInst_rrrr(unsigned MachineInstOpcode,
177  const TargetRegisterClass *RC, unsigned Op0,
178  bool Op0IsKill, unsigned Op1, bool Op1IsKill,
179  unsigned Op2, bool Op2IsKill, unsigned Op3,
180  bool Op3IsKill);
181 };
182 
183 } // end anonymous namespace.
184 
185 static std::pair<unsigned, bool>
187  unsigned CC;
188  bool NeedSwap = false;
189 
190  // SSE Condition code mapping:
191  // 0 - EQ
192  // 1 - LT
193  // 2 - LE
194  // 3 - UNORD
195  // 4 - NEQ
196  // 5 - NLT
197  // 6 - NLE
198  // 7 - ORD
199  switch (Predicate) {
200  default: llvm_unreachable("Unexpected predicate");
201  case CmpInst::FCMP_OEQ: CC = 0; break;
202  case CmpInst::FCMP_OGT: NeedSwap = true; LLVM_FALLTHROUGH;
203  case CmpInst::FCMP_OLT: CC = 1; break;
204  case CmpInst::FCMP_OGE: NeedSwap = true; LLVM_FALLTHROUGH;
205  case CmpInst::FCMP_OLE: CC = 2; break;
206  case CmpInst::FCMP_UNO: CC = 3; break;
207  case CmpInst::FCMP_UNE: CC = 4; break;
208  case CmpInst::FCMP_ULE: NeedSwap = true; LLVM_FALLTHROUGH;
209  case CmpInst::FCMP_UGE: CC = 5; break;
210  case CmpInst::FCMP_ULT: NeedSwap = true; LLVM_FALLTHROUGH;
211  case CmpInst::FCMP_UGT: CC = 6; break;
212  case CmpInst::FCMP_ORD: CC = 7; break;
213  case CmpInst::FCMP_UEQ: CC = 8; break;
214  case CmpInst::FCMP_ONE: CC = 12; break;
215  }
216 
217  return std::make_pair(CC, NeedSwap);
218 }
219 
220 /// Adds a complex addressing mode to the given machine instr builder.
221 /// Note, this will constrain the index register. If its not possible to
222 /// constrain the given index register, then a new one will be created. The
223 /// IndexReg field of the addressing mode will be updated to match in this case.
224 const MachineInstrBuilder &
226  X86AddressMode &AM) {
227  // First constrain the index register. It needs to be a GR64_NOSP.
229  MIB->getNumOperands() +
231  return ::addFullAddress(MIB, AM);
232 }
233 
234 /// Check if it is possible to fold the condition from the XALU intrinsic
235 /// into the user. The condition code will only be updated on success.
236 bool X86FastISel::foldX86XALUIntrinsic(X86::CondCode &CC, const Instruction *I,
237  const Value *Cond) {
238  if (!isa<ExtractValueInst>(Cond))
239  return false;
240 
241  const auto *EV = cast<ExtractValueInst>(Cond);
242  if (!isa<IntrinsicInst>(EV->getAggregateOperand()))
243  return false;
244 
245  const auto *II = cast<IntrinsicInst>(EV->getAggregateOperand());
246  MVT RetVT;
247  const Function *Callee = II->getCalledFunction();
248  Type *RetTy =
249  cast<StructType>(Callee->getReturnType())->getTypeAtIndex(0U);
250  if (!isTypeLegal(RetTy, RetVT))
251  return false;
252 
253  if (RetVT != MVT::i32 && RetVT != MVT::i64)
254  return false;
255 
256  X86::CondCode TmpCC;
257  switch (II->getIntrinsicID()) {
258  default: return false;
259  case Intrinsic::sadd_with_overflow:
260  case Intrinsic::ssub_with_overflow:
261  case Intrinsic::smul_with_overflow:
262  case Intrinsic::umul_with_overflow: TmpCC = X86::COND_O; break;
263  case Intrinsic::uadd_with_overflow:
264  case Intrinsic::usub_with_overflow: TmpCC = X86::COND_B; break;
265  }
266 
267  // Check if both instructions are in the same basic block.
268  if (II->getParent() != I->getParent())
269  return false;
270 
271  // Make sure nothing is in the way
274  for (auto Itr = std::prev(Start); Itr != End; --Itr) {
275  // We only expect extractvalue instructions between the intrinsic and the
276  // instruction to be selected.
277  if (!isa<ExtractValueInst>(Itr))
278  return false;
279 
280  // Check that the extractvalue operand comes from the intrinsic.
281  const auto *EVI = cast<ExtractValueInst>(Itr);
282  if (EVI->getAggregateOperand() != II)
283  return false;
284  }
285 
286  CC = TmpCC;
287  return true;
288 }
289 
290 bool X86FastISel::isTypeLegal(Type *Ty, MVT &VT, bool AllowI1) {
291  EVT evt = TLI.getValueType(DL, Ty, /*HandleUnknown=*/true);
292  if (evt == MVT::Other || !evt.isSimple())
293  // Unhandled type. Halt "fast" selection and bail.
294  return false;
295 
296  VT = evt.getSimpleVT();
297  // For now, require SSE/SSE2 for performing floating-point operations,
298  // since x87 requires additional work.
299  if (VT == MVT::f64 && !X86ScalarSSEf64)
300  return false;
301  if (VT == MVT::f32 && !X86ScalarSSEf32)
302  return false;
303  // Similarly, no f80 support yet.
304  if (VT == MVT::f80)
305  return false;
306  // We only handle legal types. For example, on x86-32 the instruction
307  // selector contains all of the 64-bit instructions from x86-64,
308  // under the assumption that i64 won't be used if the target doesn't
309  // support it.
310  return (AllowI1 && VT == MVT::i1) || TLI.isTypeLegal(VT);
311 }
312 
313 #include "X86GenCallingConv.inc"
314 
315 /// X86FastEmitLoad - Emit a machine instruction to load a value of type VT.
316 /// The address is either pre-computed, i.e. Ptr, or a GlobalAddress, i.e. GV.
317 /// Return true and the result register by reference if it is possible.
318 bool X86FastISel::X86FastEmitLoad(EVT VT, X86AddressMode &AM,
319  MachineMemOperand *MMO, unsigned &ResultReg,
320  unsigned Alignment) {
321  bool HasSSE41 = Subtarget->hasSSE41();
322  bool HasAVX = Subtarget->hasAVX();
323  bool HasAVX2 = Subtarget->hasAVX2();
324  bool HasAVX512 = Subtarget->hasAVX512();
325  bool HasVLX = Subtarget->hasVLX();
326  bool IsNonTemporal = MMO && MMO->isNonTemporal();
327 
328  // Get opcode and regclass of the output for the given load instruction.
329  unsigned Opc = 0;
330  const TargetRegisterClass *RC = nullptr;
331  switch (VT.getSimpleVT().SimpleTy) {
332  default: return false;
333  case MVT::i1:
334  case MVT::i8:
335  Opc = X86::MOV8rm;
336  RC = &X86::GR8RegClass;
337  break;
338  case MVT::i16:
339  Opc = X86::MOV16rm;
340  RC = &X86::GR16RegClass;
341  break;
342  case MVT::i32:
343  Opc = X86::MOV32rm;
344  RC = &X86::GR32RegClass;
345  break;
346  case MVT::i64:
347  // Must be in x86-64 mode.
348  Opc = X86::MOV64rm;
349  RC = &X86::GR64RegClass;
350  break;
351  case MVT::f32:
352  if (X86ScalarSSEf32) {
353  Opc = HasAVX512 ? X86::VMOVSSZrm : HasAVX ? X86::VMOVSSrm : X86::MOVSSrm;
354  RC = HasAVX512 ? &X86::FR32XRegClass : &X86::FR32RegClass;
355  } else {
356  Opc = X86::LD_Fp32m;
357  RC = &X86::RFP32RegClass;
358  }
359  break;
360  case MVT::f64:
361  if (X86ScalarSSEf64) {
362  Opc = HasAVX512 ? X86::VMOVSDZrm : HasAVX ? X86::VMOVSDrm : X86::MOVSDrm;
363  RC = HasAVX512 ? &X86::FR64XRegClass : &X86::FR64RegClass;
364  } else {
365  Opc = X86::LD_Fp64m;
366  RC = &X86::RFP64RegClass;
367  }
368  break;
369  case MVT::f80:
370  // No f80 support yet.
371  return false;
372  case MVT::v4f32:
373  if (IsNonTemporal && Alignment >= 16 && HasSSE41)
374  Opc = HasVLX ? X86::VMOVNTDQAZ128rm :
375  HasAVX ? X86::VMOVNTDQArm : X86::MOVNTDQArm;
376  else if (Alignment >= 16)
377  Opc = HasVLX ? X86::VMOVAPSZ128rm :
378  HasAVX ? X86::VMOVAPSrm : X86::MOVAPSrm;
379  else
380  Opc = HasVLX ? X86::VMOVUPSZ128rm :
381  HasAVX ? X86::VMOVUPSrm : X86::MOVUPSrm;
382  RC = HasVLX ? &X86::VR128XRegClass : &X86::VR128RegClass;
383  break;
384  case MVT::v2f64:
385  if (IsNonTemporal && Alignment >= 16 && HasSSE41)
386  Opc = HasVLX ? X86::VMOVNTDQAZ128rm :
387  HasAVX ? X86::VMOVNTDQArm : X86::MOVNTDQArm;
388  else if (Alignment >= 16)
389  Opc = HasVLX ? X86::VMOVAPDZ128rm :
390  HasAVX ? X86::VMOVAPDrm : X86::MOVAPDrm;
391  else
392  Opc = HasVLX ? X86::VMOVUPDZ128rm :
393  HasAVX ? X86::VMOVUPDrm : X86::MOVUPDrm;
394  RC = HasVLX ? &X86::VR128XRegClass : &X86::VR128RegClass;
395  break;
396  case MVT::v4i32:
397  case MVT::v2i64:
398  case MVT::v8i16:
399  case MVT::v16i8:
400  if (IsNonTemporal && Alignment >= 16)
401  Opc = HasVLX ? X86::VMOVNTDQAZ128rm :
402  HasAVX ? X86::VMOVNTDQArm : X86::MOVNTDQArm;
403  else if (Alignment >= 16)
404  Opc = HasVLX ? X86::VMOVDQA64Z128rm :
405  HasAVX ? X86::VMOVDQArm : X86::MOVDQArm;
406  else
407  Opc = HasVLX ? X86::VMOVDQU64Z128rm :
408  HasAVX ? X86::VMOVDQUrm : X86::MOVDQUrm;
409  RC = HasVLX ? &X86::VR128XRegClass : &X86::VR128RegClass;
410  break;
411  case MVT::v8f32:
412  assert(HasAVX);
413  if (IsNonTemporal && Alignment >= 32 && HasAVX2)
414  Opc = HasVLX ? X86::VMOVNTDQAZ256rm : X86::VMOVNTDQAYrm;
415  else if (IsNonTemporal && Alignment >= 16)
416  return false; // Force split for X86::VMOVNTDQArm
417  else if (Alignment >= 32)
418  Opc = HasVLX ? X86::VMOVAPSZ256rm : X86::VMOVAPSYrm;
419  else
420  Opc = HasVLX ? X86::VMOVUPSZ256rm : X86::VMOVUPSYrm;
421  RC = HasVLX ? &X86::VR256XRegClass : &X86::VR256RegClass;
422  break;
423  case MVT::v4f64:
424  assert(HasAVX);
425  if (IsNonTemporal && Alignment >= 32 && HasAVX2)
426  Opc = HasVLX ? X86::VMOVNTDQAZ256rm : X86::VMOVNTDQAYrm;
427  else if (IsNonTemporal && Alignment >= 16)
428  return false; // Force split for X86::VMOVNTDQArm
429  else if (Alignment >= 32)
430  Opc = HasVLX ? X86::VMOVAPDZ256rm : X86::VMOVAPDYrm;
431  else
432  Opc = HasVLX ? X86::VMOVUPDZ256rm : X86::VMOVUPDYrm;
433  RC = HasVLX ? &X86::VR256XRegClass : &X86::VR256RegClass;
434  break;
435  case MVT::v8i32:
436  case MVT::v4i64:
437  case MVT::v16i16:
438  case MVT::v32i8:
439  assert(HasAVX);
440  if (IsNonTemporal && Alignment >= 32 && HasAVX2)
441  Opc = HasVLX ? X86::VMOVNTDQAZ256rm : X86::VMOVNTDQAYrm;
442  else if (IsNonTemporal && Alignment >= 16)
443  return false; // Force split for X86::VMOVNTDQArm
444  else if (Alignment >= 32)
445  Opc = HasVLX ? X86::VMOVDQA64Z256rm : X86::VMOVDQAYrm;
446  else
447  Opc = HasVLX ? X86::VMOVDQU64Z256rm : X86::VMOVDQUYrm;
448  RC = HasVLX ? &X86::VR256XRegClass : &X86::VR256RegClass;
449  break;
450  case MVT::v16f32:
451  assert(HasAVX512);
452  if (IsNonTemporal && Alignment >= 64)
453  Opc = X86::VMOVNTDQAZrm;
454  else
455  Opc = (Alignment >= 64) ? X86::VMOVAPSZrm : X86::VMOVUPSZrm;
456  RC = &X86::VR512RegClass;
457  break;
458  case MVT::v8f64:
459  assert(HasAVX512);
460  if (IsNonTemporal && Alignment >= 64)
461  Opc = X86::VMOVNTDQAZrm;
462  else
463  Opc = (Alignment >= 64) ? X86::VMOVAPDZrm : X86::VMOVUPDZrm;
464  RC = &X86::VR512RegClass;
465  break;
466  case MVT::v8i64:
467  case MVT::v16i32:
468  case MVT::v32i16:
469  case MVT::v64i8:
470  assert(HasAVX512);
471  // Note: There are a lot more choices based on type with AVX-512, but
472  // there's really no advantage when the load isn't masked.
473  if (IsNonTemporal && Alignment >= 64)
474  Opc = X86::VMOVNTDQAZrm;
475  else
476  Opc = (Alignment >= 64) ? X86::VMOVDQA64Zrm : X86::VMOVDQU64Zrm;
477  RC = &X86::VR512RegClass;
478  break;
479  }
480 
481  ResultReg = createResultReg(RC);
482  MachineInstrBuilder MIB =
483  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg);
484  addFullAddress(MIB, AM);
485  if (MMO)
486  MIB->addMemOperand(*FuncInfo.MF, MMO);
487  return true;
488 }
489 
490 /// X86FastEmitStore - Emit a machine instruction to store a value Val of
491 /// type VT. The address is either pre-computed, consisted of a base ptr, Ptr
492 /// and a displacement offset, or a GlobalAddress,
493 /// i.e. V. Return true if it is possible.
494 bool X86FastISel::X86FastEmitStore(EVT VT, unsigned ValReg, bool ValIsKill,
495  X86AddressMode &AM,
496  MachineMemOperand *MMO, bool Aligned) {
497  bool HasSSE1 = Subtarget->hasSSE1();
498  bool HasSSE2 = Subtarget->hasSSE2();
499  bool HasSSE4A = Subtarget->hasSSE4A();
500  bool HasAVX = Subtarget->hasAVX();
501  bool HasAVX512 = Subtarget->hasAVX512();
502  bool HasVLX = Subtarget->hasVLX();
503  bool IsNonTemporal = MMO && MMO->isNonTemporal();
504 
505  // Get opcode and regclass of the output for the given store instruction.
506  unsigned Opc = 0;
507  switch (VT.getSimpleVT().SimpleTy) {
508  case MVT::f80: // No f80 support yet.
509  default: return false;
510  case MVT::i1: {
511  // Mask out all but lowest bit.
512  unsigned AndResult = createResultReg(&X86::GR8RegClass);
513  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
514  TII.get(X86::AND8ri), AndResult)
515  .addReg(ValReg, getKillRegState(ValIsKill)).addImm(1);
516  ValReg = AndResult;
517  LLVM_FALLTHROUGH; // handle i1 as i8.
518  }
519  case MVT::i8: Opc = X86::MOV8mr; break;
520  case MVT::i16: Opc = X86::MOV16mr; break;
521  case MVT::i32:
522  Opc = (IsNonTemporal && HasSSE2) ? X86::MOVNTImr : X86::MOV32mr;
523  break;
524  case MVT::i64:
525  // Must be in x86-64 mode.
526  Opc = (IsNonTemporal && HasSSE2) ? X86::MOVNTI_64mr : X86::MOV64mr;
527  break;
528  case MVT::f32:
529  if (X86ScalarSSEf32) {
530  if (IsNonTemporal && HasSSE4A)
531  Opc = X86::MOVNTSS;
532  else
533  Opc = HasAVX512 ? X86::VMOVSSZmr :
534  HasAVX ? X86::VMOVSSmr : X86::MOVSSmr;
535  } else
536  Opc = X86::ST_Fp32m;
537  break;
538  case MVT::f64:
539  if (X86ScalarSSEf32) {
540  if (IsNonTemporal && HasSSE4A)
541  Opc = X86::MOVNTSD;
542  else
543  Opc = HasAVX512 ? X86::VMOVSDZmr :
544  HasAVX ? X86::VMOVSDmr : X86::MOVSDmr;
545  } else
546  Opc = X86::ST_Fp64m;
547  break;
548  case MVT::x86mmx:
549  Opc = (IsNonTemporal && HasSSE1) ? X86::MMX_MOVNTQmr : X86::MMX_MOVQ64mr;
550  break;
551  case MVT::v4f32:
552  if (Aligned) {
553  if (IsNonTemporal)
554  Opc = HasVLX ? X86::VMOVNTPSZ128mr :
555  HasAVX ? X86::VMOVNTPSmr : X86::MOVNTPSmr;
556  else
557  Opc = HasVLX ? X86::VMOVAPSZ128mr :
558  HasAVX ? X86::VMOVAPSmr : X86::MOVAPSmr;
559  } else
560  Opc = HasVLX ? X86::VMOVUPSZ128mr :
561  HasAVX ? X86::VMOVUPSmr : X86::MOVUPSmr;
562  break;
563  case MVT::v2f64:
564  if (Aligned) {
565  if (IsNonTemporal)
566  Opc = HasVLX ? X86::VMOVNTPDZ128mr :
567  HasAVX ? X86::VMOVNTPDmr : X86::MOVNTPDmr;
568  else
569  Opc = HasVLX ? X86::VMOVAPDZ128mr :
570  HasAVX ? X86::VMOVAPDmr : X86::MOVAPDmr;
571  } else
572  Opc = HasVLX ? X86::VMOVUPDZ128mr :
573  HasAVX ? X86::VMOVUPDmr : X86::MOVUPDmr;
574  break;
575  case MVT::v4i32:
576  case MVT::v2i64:
577  case MVT::v8i16:
578  case MVT::v16i8:
579  if (Aligned) {
580  if (IsNonTemporal)
581  Opc = HasVLX ? X86::VMOVNTDQZ128mr :
582  HasAVX ? X86::VMOVNTDQmr : X86::MOVNTDQmr;
583  else
584  Opc = HasVLX ? X86::VMOVDQA64Z128mr :
585  HasAVX ? X86::VMOVDQAmr : X86::MOVDQAmr;
586  } else
587  Opc = HasVLX ? X86::VMOVDQU64Z128mr :
588  HasAVX ? X86::VMOVDQUmr : X86::MOVDQUmr;
589  break;
590  case MVT::v8f32:
591  assert(HasAVX);
592  if (Aligned) {
593  if (IsNonTemporal)
594  Opc = HasVLX ? X86::VMOVNTPSZ256mr : X86::VMOVNTPSYmr;
595  else
596  Opc = HasVLX ? X86::VMOVAPSZ256mr : X86::VMOVAPSYmr;
597  } else
598  Opc = HasVLX ? X86::VMOVUPSZ256mr : X86::VMOVUPSYmr;
599  break;
600  case MVT::v4f64:
601  assert(HasAVX);
602  if (Aligned) {
603  if (IsNonTemporal)
604  Opc = HasVLX ? X86::VMOVNTPDZ256mr : X86::VMOVNTPDYmr;
605  else
606  Opc = HasVLX ? X86::VMOVAPDZ256mr : X86::VMOVAPDYmr;
607  } else
608  Opc = HasVLX ? X86::VMOVUPDZ256mr : X86::VMOVUPDYmr;
609  break;
610  case MVT::v8i32:
611  case MVT::v4i64:
612  case MVT::v16i16:
613  case MVT::v32i8:
614  assert(HasAVX);
615  if (Aligned) {
616  if (IsNonTemporal)
617  Opc = HasVLX ? X86::VMOVNTDQZ256mr : X86::VMOVNTDQYmr;
618  else
619  Opc = HasVLX ? X86::VMOVDQA64Z256mr : X86::VMOVDQAYmr;
620  } else
621  Opc = HasVLX ? X86::VMOVDQU64Z256mr : X86::VMOVDQUYmr;
622  break;
623  case MVT::v16f32:
624  assert(HasAVX512);
625  if (Aligned)
626  Opc = IsNonTemporal ? X86::VMOVNTPSZmr : X86::VMOVAPSZmr;
627  else
628  Opc = X86::VMOVUPSZmr;
629  break;
630  case MVT::v8f64:
631  assert(HasAVX512);
632  if (Aligned) {
633  Opc = IsNonTemporal ? X86::VMOVNTPDZmr : X86::VMOVAPDZmr;
634  } else
635  Opc = X86::VMOVUPDZmr;
636  break;
637  case MVT::v8i64:
638  case MVT::v16i32:
639  case MVT::v32i16:
640  case MVT::v64i8:
641  assert(HasAVX512);
642  // Note: There are a lot more choices based on type with AVX-512, but
643  // there's really no advantage when the store isn't masked.
644  if (Aligned)
645  Opc = IsNonTemporal ? X86::VMOVNTDQZmr : X86::VMOVDQA64Zmr;
646  else
647  Opc = X86::VMOVDQU64Zmr;
648  break;
649  }
650 
651  const MCInstrDesc &Desc = TII.get(Opc);
652  // Some of the instructions in the previous switch use FR128 instead
653  // of FR32 for ValReg. Make sure the register we feed the instruction
654  // matches its register class constraints.
655  // Note: This is fine to do a copy from FR32 to FR128, this is the
656  // same registers behind the scene and actually why it did not trigger
657  // any bugs before.
658  ValReg = constrainOperandRegClass(Desc, ValReg, Desc.getNumOperands() - 1);
659  MachineInstrBuilder MIB =
660  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, Desc);
661  addFullAddress(MIB, AM).addReg(ValReg, getKillRegState(ValIsKill));
662  if (MMO)
663  MIB->addMemOperand(*FuncInfo.MF, MMO);
664 
665  return true;
666 }
667 
668 bool X86FastISel::X86FastEmitStore(EVT VT, const Value *Val,
669  X86AddressMode &AM,
670  MachineMemOperand *MMO, bool Aligned) {
671  // Handle 'null' like i32/i64 0.
672  if (isa<ConstantPointerNull>(Val))
673  Val = Constant::getNullValue(DL.getIntPtrType(Val->getContext()));
674 
675  // If this is a store of a simple constant, fold the constant into the store.
676  if (const ConstantInt *CI = dyn_cast<ConstantInt>(Val)) {
677  unsigned Opc = 0;
678  bool Signed = true;
679  switch (VT.getSimpleVT().SimpleTy) {
680  default: break;
681  case MVT::i1:
682  Signed = false;
683  LLVM_FALLTHROUGH; // Handle as i8.
684  case MVT::i8: Opc = X86::MOV8mi; break;
685  case MVT::i16: Opc = X86::MOV16mi; break;
686  case MVT::i32: Opc = X86::MOV32mi; break;
687  case MVT::i64:
688  // Must be a 32-bit sign extended value.
689  if (isInt<32>(CI->getSExtValue()))
690  Opc = X86::MOV64mi32;
691  break;
692  }
693 
694  if (Opc) {
695  MachineInstrBuilder MIB =
696  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc));
697  addFullAddress(MIB, AM).addImm(Signed ? (uint64_t) CI->getSExtValue()
698  : CI->getZExtValue());
699  if (MMO)
700  MIB->addMemOperand(*FuncInfo.MF, MMO);
701  return true;
702  }
703  }
704 
705  unsigned ValReg = getRegForValue(Val);
706  if (ValReg == 0)
707  return false;
708 
709  bool ValKill = hasTrivialKill(Val);
710  return X86FastEmitStore(VT, ValReg, ValKill, AM, MMO, Aligned);
711 }
712 
713 /// X86FastEmitExtend - Emit a machine instruction to extend a value Src of
714 /// type SrcVT to type DstVT using the specified extension opcode Opc (e.g.
715 /// ISD::SIGN_EXTEND).
716 bool X86FastISel::X86FastEmitExtend(ISD::NodeType Opc, EVT DstVT,
717  unsigned Src, EVT SrcVT,
718  unsigned &ResultReg) {
719  unsigned RR = fastEmit_r(SrcVT.getSimpleVT(), DstVT.getSimpleVT(), Opc,
720  Src, /*TODO: Kill=*/false);
721  if (RR == 0)
722  return false;
723 
724  ResultReg = RR;
725  return true;
726 }
727 
728 bool X86FastISel::handleConstantAddresses(const Value *V, X86AddressMode &AM) {
729  // Handle constant address.
730  if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
731  // Can't handle alternate code models yet.
732  if (TM.getCodeModel() != CodeModel::Small)
733  return false;
734 
735  // Can't handle TLS yet.
736  if (GV->isThreadLocal())
737  return false;
738 
739  // RIP-relative addresses can't have additional register operands, so if
740  // we've already folded stuff into the addressing mode, just force the
741  // global value into its own register, which we can use as the basereg.
742  if (!Subtarget->isPICStyleRIPRel() ||
743  (AM.Base.Reg == 0 && AM.IndexReg == 0)) {
744  // Okay, we've committed to selecting this global. Set up the address.
745  AM.GV = GV;
746 
747  // Allow the subtarget to classify the global.
748  unsigned char GVFlags = Subtarget->classifyGlobalReference(GV);
749 
750  // If this reference is relative to the pic base, set it now.
751  if (isGlobalRelativeToPICBase(GVFlags)) {
752  // FIXME: How do we know Base.Reg is free??
753  AM.Base.Reg = getInstrInfo()->getGlobalBaseReg(FuncInfo.MF);
754  }
755 
756  // Unless the ABI requires an extra load, return a direct reference to
757  // the global.
758  if (!isGlobalStubReference(GVFlags)) {
759  if (Subtarget->isPICStyleRIPRel()) {
760  // Use rip-relative addressing if we can. Above we verified that the
761  // base and index registers are unused.
762  assert(AM.Base.Reg == 0 && AM.IndexReg == 0);
763  AM.Base.Reg = X86::RIP;
764  }
765  AM.GVOpFlags = GVFlags;
766  return true;
767  }
768 
769  // Ok, we need to do a load from a stub. If we've already loaded from
770  // this stub, reuse the loaded pointer, otherwise emit the load now.
771  DenseMap<const Value *, unsigned>::iterator I = LocalValueMap.find(V);
772  unsigned LoadReg;
773  if (I != LocalValueMap.end() && I->second != 0) {
774  LoadReg = I->second;
775  } else {
776  // Issue load from stub.
777  unsigned Opc = 0;
778  const TargetRegisterClass *RC = nullptr;
779  X86AddressMode StubAM;
780  StubAM.Base.Reg = AM.Base.Reg;
781  StubAM.GV = GV;
782  StubAM.GVOpFlags = GVFlags;
783 
784  // Prepare for inserting code in the local-value area.
785  SavePoint SaveInsertPt = enterLocalValueArea();
786 
787  if (TLI.getPointerTy(DL) == MVT::i64) {
788  Opc = X86::MOV64rm;
789  RC = &X86::GR64RegClass;
790 
791  if (Subtarget->isPICStyleRIPRel())
792  StubAM.Base.Reg = X86::RIP;
793  } else {
794  Opc = X86::MOV32rm;
795  RC = &X86::GR32RegClass;
796  }
797 
798  LoadReg = createResultReg(RC);
799  MachineInstrBuilder LoadMI =
800  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), LoadReg);
801  addFullAddress(LoadMI, StubAM);
802 
803  // Ok, back to normal mode.
804  leaveLocalValueArea(SaveInsertPt);
805 
806  // Prevent loading GV stub multiple times in same MBB.
807  LocalValueMap[V] = LoadReg;
808  }
809 
810  // Now construct the final address. Note that the Disp, Scale,
811  // and Index values may already be set here.
812  AM.Base.Reg = LoadReg;
813  AM.GV = nullptr;
814  return true;
815  }
816  }
817 
818  // If all else fails, try to materialize the value in a register.
819  if (!AM.GV || !Subtarget->isPICStyleRIPRel()) {
820  if (AM.Base.Reg == 0) {
821  AM.Base.Reg = getRegForValue(V);
822  return AM.Base.Reg != 0;
823  }
824  if (AM.IndexReg == 0) {
825  assert(AM.Scale == 1 && "Scale with no index!");
826  AM.IndexReg = getRegForValue(V);
827  return AM.IndexReg != 0;
828  }
829  }
830 
831  return false;
832 }
833 
834 /// X86SelectAddress - Attempt to fill in an address from the given value.
835 ///
838 redo_gep:
839  const User *U = nullptr;
840  unsigned Opcode = Instruction::UserOp1;
841  if (const Instruction *I = dyn_cast<Instruction>(V)) {
842  // Don't walk into other basic blocks; it's possible we haven't
843  // visited them yet, so the instructions may not yet be assigned
844  // virtual registers.
845  if (FuncInfo.StaticAllocaMap.count(static_cast<const AllocaInst *>(V)) ||
846  FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
847  Opcode = I->getOpcode();
848  U = I;
849  }
850  } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(V)) {
851  Opcode = C->getOpcode();
852  U = C;
853  }
854 
855  if (PointerType *Ty = dyn_cast<PointerType>(V->getType()))
856  if (Ty->getAddressSpace() > 255)
857  // Fast instruction selection doesn't support the special
858  // address spaces.
859  return false;
860 
861  switch (Opcode) {
862  default: break;
863  case Instruction::BitCast:
864  // Look past bitcasts.
865  return X86SelectAddress(U->getOperand(0), AM);
866 
867  case Instruction::IntToPtr:
868  // Look past no-op inttoptrs.
869  if (TLI.getValueType(DL, U->getOperand(0)->getType()) ==
870  TLI.getPointerTy(DL))
871  return X86SelectAddress(U->getOperand(0), AM);
872  break;
873 
874  case Instruction::PtrToInt:
875  // Look past no-op ptrtoints.
876  if (TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL))
877  return X86SelectAddress(U->getOperand(0), AM);
878  break;
879 
880  case Instruction::Alloca: {
881  // Do static allocas.
882  const AllocaInst *A = cast<AllocaInst>(V);
884  FuncInfo.StaticAllocaMap.find(A);
885  if (SI != FuncInfo.StaticAllocaMap.end()) {
887  AM.Base.FrameIndex = SI->second;
888  return true;
889  }
890  break;
891  }
892 
893  case Instruction::Add: {
894  // Adds of constants are common and easy enough.
895  if (const ConstantInt *CI = dyn_cast<ConstantInt>(U->getOperand(1))) {
896  uint64_t Disp = (int32_t)AM.Disp + (uint64_t)CI->getSExtValue();
897  // They have to fit in the 32-bit signed displacement field though.
898  if (isInt<32>(Disp)) {
899  AM.Disp = (uint32_t)Disp;
900  return X86SelectAddress(U->getOperand(0), AM);
901  }
902  }
903  break;
904  }
905 
906  case Instruction::GetElementPtr: {
907  X86AddressMode SavedAM = AM;
908 
909  // Pattern-match simple GEPs.
910  uint64_t Disp = (int32_t)AM.Disp;
911  unsigned IndexReg = AM.IndexReg;
912  unsigned Scale = AM.Scale;
914  // Iterate through the indices, folding what we can. Constants can be
915  // folded, and one dynamic index can be handled, if the scale is supported.
916  for (User::const_op_iterator i = U->op_begin() + 1, e = U->op_end();
917  i != e; ++i, ++GTI) {
918  const Value *Op = *i;
919  if (StructType *STy = GTI.getStructTypeOrNull()) {
920  const StructLayout *SL = DL.getStructLayout(STy);
921  Disp += SL->getElementOffset(cast<ConstantInt>(Op)->getZExtValue());
922  continue;
923  }
924 
925  // A array/variable index is always of the form i*S where S is the
926  // constant scale size. See if we can push the scale into immediates.
927  uint64_t S = DL.getTypeAllocSize(GTI.getIndexedType());
928  for (;;) {
929  if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
930  // Constant-offset addressing.
931  Disp += CI->getSExtValue() * S;
932  break;
933  }
934  if (canFoldAddIntoGEP(U, Op)) {
935  // A compatible add with a constant operand. Fold the constant.
936  ConstantInt *CI =
937  cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1));
938  Disp += CI->getSExtValue() * S;
939  // Iterate on the other operand.
940  Op = cast<AddOperator>(Op)->getOperand(0);
941  continue;
942  }
943  if (IndexReg == 0 &&
944  (!AM.GV || !Subtarget->isPICStyleRIPRel()) &&
945  (S == 1 || S == 2 || S == 4 || S == 8)) {
946  // Scaled-index addressing.
947  Scale = S;
948  IndexReg = getRegForGEPIndex(Op).first;
949  if (IndexReg == 0)
950  return false;
951  break;
952  }
953  // Unsupported.
954  goto unsupported_gep;
955  }
956  }
957 
958  // Check for displacement overflow.
959  if (!isInt<32>(Disp))
960  break;
961 
962  AM.IndexReg = IndexReg;
963  AM.Scale = Scale;
964  AM.Disp = (uint32_t)Disp;
965  GEPs.push_back(V);
966 
967  if (const GetElementPtrInst *GEP =
968  dyn_cast<GetElementPtrInst>(U->getOperand(0))) {
969  // Ok, the GEP indices were covered by constant-offset and scaled-index
970  // addressing. Update the address state and move on to examining the base.
971  V = GEP;
972  goto redo_gep;
973  } else if (X86SelectAddress(U->getOperand(0), AM)) {
974  return true;
975  }
976 
977  // If we couldn't merge the gep value into this addr mode, revert back to
978  // our address and just match the value instead of completely failing.
979  AM = SavedAM;
980 
981  for (const Value *I : reverse(GEPs))
982  if (handleConstantAddresses(I, AM))
983  return true;
984 
985  return false;
986  unsupported_gep:
987  // Ok, the GEP indices weren't all covered.
988  break;
989  }
990  }
991 
992  return handleConstantAddresses(V, AM);
993 }
994 
995 /// X86SelectCallAddress - Attempt to fill in an address from the given value.
996 ///
997 bool X86FastISel::X86SelectCallAddress(const Value *V, X86AddressMode &AM) {
998  const User *U = nullptr;
999  unsigned Opcode = Instruction::UserOp1;
1000  const Instruction *I = dyn_cast<Instruction>(V);
1001  // Record if the value is defined in the same basic block.
1002  //
1003  // This information is crucial to know whether or not folding an
1004  // operand is valid.
1005  // Indeed, FastISel generates or reuses a virtual register for all
1006  // operands of all instructions it selects. Obviously, the definition and
1007  // its uses must use the same virtual register otherwise the produced
1008  // code is incorrect.
1009  // Before instruction selection, FunctionLoweringInfo::set sets the virtual
1010  // registers for values that are alive across basic blocks. This ensures
1011  // that the values are consistently set between across basic block, even
1012  // if different instruction selection mechanisms are used (e.g., a mix of
1013  // SDISel and FastISel).
1014  // For values local to a basic block, the instruction selection process
1015  // generates these virtual registers with whatever method is appropriate
1016  // for its needs. In particular, FastISel and SDISel do not share the way
1017  // local virtual registers are set.
1018  // Therefore, this is impossible (or at least unsafe) to share values
1019  // between basic blocks unless they use the same instruction selection
1020  // method, which is not guarantee for X86.
1021  // Moreover, things like hasOneUse could not be used accurately, if we
1022  // allow to reference values across basic blocks whereas they are not
1023  // alive across basic blocks initially.
1024  bool InMBB = true;
1025  if (I) {
1026  Opcode = I->getOpcode();
1027  U = I;
1028  InMBB = I->getParent() == FuncInfo.MBB->getBasicBlock();
1029  } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(V)) {
1030  Opcode = C->getOpcode();
1031  U = C;
1032  }
1033 
1034  switch (Opcode) {
1035  default: break;
1036  case Instruction::BitCast:
1037  // Look past bitcasts if its operand is in the same BB.
1038  if (InMBB)
1039  return X86SelectCallAddress(U->getOperand(0), AM);
1040  break;
1041 
1042  case Instruction::IntToPtr:
1043  // Look past no-op inttoptrs if its operand is in the same BB.
1044  if (InMBB &&
1045  TLI.getValueType(DL, U->getOperand(0)->getType()) ==
1046  TLI.getPointerTy(DL))
1047  return X86SelectCallAddress(U->getOperand(0), AM);
1048  break;
1049 
1050  case Instruction::PtrToInt:
1051  // Look past no-op ptrtoints if its operand is in the same BB.
1052  if (InMBB && TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL))
1053  return X86SelectCallAddress(U->getOperand(0), AM);
1054  break;
1055  }
1056 
1057  // Handle constant address.
1058  if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
1059  // Can't handle alternate code models yet.
1060  if (TM.getCodeModel() != CodeModel::Small)
1061  return false;
1062 
1063  // RIP-relative addresses can't have additional register operands.
1064  if (Subtarget->isPICStyleRIPRel() &&
1065  (AM.Base.Reg != 0 || AM.IndexReg != 0))
1066  return false;
1067 
1068  // Can't handle TLS.
1069  if (const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV))
1070  if (GVar->isThreadLocal())
1071  return false;
1072 
1073  // Okay, we've committed to selecting this global. Set up the basic address.
1074  AM.GV = GV;
1075 
1076  // Return a direct reference to the global. Fastisel can handle calls to
1077  // functions that require loads, such as dllimport and nonlazybind
1078  // functions.
1079  if (Subtarget->isPICStyleRIPRel()) {
1080  // Use rip-relative addressing if we can. Above we verified that the
1081  // base and index registers are unused.
1082  assert(AM.Base.Reg == 0 && AM.IndexReg == 0);
1083  AM.Base.Reg = X86::RIP;
1084  } else {
1085  AM.GVOpFlags = Subtarget->classifyLocalReference(nullptr);
1086  }
1087 
1088  return true;
1089  }
1090 
1091  // If all else fails, try to materialize the value in a register.
1092  if (!AM.GV || !Subtarget->isPICStyleRIPRel()) {
1093  if (AM.Base.Reg == 0) {
1094  AM.Base.Reg = getRegForValue(V);
1095  return AM.Base.Reg != 0;
1096  }
1097  if (AM.IndexReg == 0) {
1098  assert(AM.Scale == 1 && "Scale with no index!");
1099  AM.IndexReg = getRegForValue(V);
1100  return AM.IndexReg != 0;
1101  }
1102  }
1103 
1104  return false;
1105 }
1106 
1107 
1108 /// X86SelectStore - Select and emit code to implement store instructions.
1109 bool X86FastISel::X86SelectStore(const Instruction *I) {
1110  // Atomic stores need special handling.
1111  const StoreInst *S = cast<StoreInst>(I);
1112 
1113  if (S->isAtomic())
1114  return false;
1115 
1116  const Value *PtrV = I->getOperand(1);
1117  if (TLI.supportSwiftError()) {
1118  // Swifterror values can come from either a function parameter with
1119  // swifterror attribute or an alloca with swifterror attribute.
1120  if (const Argument *Arg = dyn_cast<Argument>(PtrV)) {
1121  if (Arg->hasSwiftErrorAttr())
1122  return false;
1123  }
1124 
1125  if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(PtrV)) {
1126  if (Alloca->isSwiftError())
1127  return false;
1128  }
1129  }
1130 
1131  const Value *Val = S->getValueOperand();
1132  const Value *Ptr = S->getPointerOperand();
1133 
1134  MVT VT;
1135  if (!isTypeLegal(Val->getType(), VT, /*AllowI1=*/true))
1136  return false;
1137 
1138  unsigned Alignment = S->getAlignment();
1139  unsigned ABIAlignment = DL.getABITypeAlignment(Val->getType());
1140  if (Alignment == 0) // Ensure that codegen never sees alignment 0
1141  Alignment = ABIAlignment;
1142  bool Aligned = Alignment >= ABIAlignment;
1143 
1144  X86AddressMode AM;
1145  if (!X86SelectAddress(Ptr, AM))
1146  return false;
1147 
1148  return X86FastEmitStore(VT, Val, AM, createMachineMemOperandFor(I), Aligned);
1149 }
1150 
1151 /// X86SelectRet - Select and emit code to implement ret instructions.
1152 bool X86FastISel::X86SelectRet(const Instruction *I) {
1153  const ReturnInst *Ret = cast<ReturnInst>(I);
1154  const Function &F = *I->getParent()->getParent();
1155  const X86MachineFunctionInfo *X86MFInfo =
1156  FuncInfo.MF->getInfo<X86MachineFunctionInfo>();
1157 
1158  if (!FuncInfo.CanLowerReturn)
1159  return false;
1160 
1161  if (TLI.supportSwiftError() &&
1162  F.getAttributes().hasAttrSomewhere(Attribute::SwiftError))
1163  return false;
1164 
1165  if (TLI.supportSplitCSR(FuncInfo.MF))
1166  return false;
1167 
1168  CallingConv::ID CC = F.getCallingConv();
1169  if (CC != CallingConv::C &&
1170  CC != CallingConv::Fast &&
1171  CC != CallingConv::X86_FastCall &&
1172  CC != CallingConv::X86_StdCall &&
1173  CC != CallingConv::X86_ThisCall &&
1174  CC != CallingConv::X86_64_SysV &&
1175  CC != CallingConv::Win64)
1176  return false;
1177 
1178  // Don't handle popping bytes if they don't fit the ret's immediate.
1179  if (!isUInt<16>(X86MFInfo->getBytesToPopOnReturn()))
1180  return false;
1181 
1182  // fastcc with -tailcallopt is intended to provide a guaranteed
1183  // tail call optimization. Fastisel doesn't know how to do that.
1184  if (CC == CallingConv::Fast && TM.Options.GuaranteedTailCallOpt)
1185  return false;
1186 
1187  // Let SDISel handle vararg functions.
1188  if (F.isVarArg())
1189  return false;
1190 
1191  // Build a list of return value registers.
1192  SmallVector<unsigned, 4> RetRegs;
1193 
1194  if (Ret->getNumOperands() > 0) {
1196  GetReturnInfo(F.getReturnType(), F.getAttributes(), Outs, TLI, DL);
1197 
1198  // Analyze operands of the call, assigning locations to each operand.
1200  CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, ValLocs, I->getContext());
1201  CCInfo.AnalyzeReturn(Outs, RetCC_X86);
1202 
1203  const Value *RV = Ret->getOperand(0);
1204  unsigned Reg = getRegForValue(RV);
1205  if (Reg == 0)
1206  return false;
1207 
1208  // Only handle a single return value for now.
1209  if (ValLocs.size() != 1)
1210  return false;
1211 
1212  CCValAssign &VA = ValLocs[0];
1213 
1214  // Don't bother handling odd stuff for now.
1215  if (VA.getLocInfo() != CCValAssign::Full)
1216  return false;
1217  // Only handle register returns for now.
1218  if (!VA.isRegLoc())
1219  return false;
1220 
1221  // The calling-convention tables for x87 returns don't tell
1222  // the whole story.
1223  if (VA.getLocReg() == X86::FP0 || VA.getLocReg() == X86::FP1)
1224  return false;
1225 
1226  unsigned SrcReg = Reg + VA.getValNo();
1227  EVT SrcVT = TLI.getValueType(DL, RV->getType());
1228  EVT DstVT = VA.getValVT();
1229  // Special handling for extended integers.
1230  if (SrcVT != DstVT) {
1231  if (SrcVT != MVT::i1 && SrcVT != MVT::i8 && SrcVT != MVT::i16)
1232  return false;
1233 
1234  if (!Outs[0].Flags.isZExt() && !Outs[0].Flags.isSExt())
1235  return false;
1236 
1237  assert(DstVT == MVT::i32 && "X86 should always ext to i32");
1238 
1239  if (SrcVT == MVT::i1) {
1240  if (Outs[0].Flags.isSExt())
1241  return false;
1242  SrcReg = fastEmitZExtFromI1(MVT::i8, SrcReg, /*TODO: Kill=*/false);
1243  SrcVT = MVT::i8;
1244  }
1245  unsigned Op = Outs[0].Flags.isZExt() ? ISD::ZERO_EXTEND :
1247  SrcReg = fastEmit_r(SrcVT.getSimpleVT(), DstVT.getSimpleVT(), Op,
1248  SrcReg, /*TODO: Kill=*/false);
1249  }
1250 
1251  // Make the copy.
1252  unsigned DstReg = VA.getLocReg();
1253  const TargetRegisterClass *SrcRC = MRI.getRegClass(SrcReg);
1254  // Avoid a cross-class copy. This is very unlikely.
1255  if (!SrcRC->contains(DstReg))
1256  return false;
1257  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1258  TII.get(TargetOpcode::COPY), DstReg).addReg(SrcReg);
1259 
1260  // Add register to return instruction.
1261  RetRegs.push_back(VA.getLocReg());
1262  }
1263 
1264  // Swift calling convention does not require we copy the sret argument
1265  // into %rax/%eax for the return, and SRetReturnReg is not set for Swift.
1266 
1267  // All x86 ABIs require that for returning structs by value we copy
1268  // the sret argument into %rax/%eax (depending on ABI) for the return.
1269  // We saved the argument into a virtual register in the entry block,
1270  // so now we copy the value out and into %rax/%eax.
1271  if (F.hasStructRetAttr() && CC != CallingConv::Swift) {
1272  unsigned Reg = X86MFInfo->getSRetReturnReg();
1273  assert(Reg &&
1274  "SRetReturnReg should have been set in LowerFormalArguments()!");
1275  unsigned RetReg = Subtarget->is64Bit() ? X86::RAX : X86::EAX;
1276  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1277  TII.get(TargetOpcode::COPY), RetReg).addReg(Reg);
1278  RetRegs.push_back(RetReg);
1279  }
1280 
1281  // Now emit the RET.
1282  MachineInstrBuilder MIB;
1283  if (X86MFInfo->getBytesToPopOnReturn()) {
1284  MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1285  TII.get(Subtarget->is64Bit() ? X86::RETIQ : X86::RETIL))
1286  .addImm(X86MFInfo->getBytesToPopOnReturn());
1287  } else {
1288  MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1289  TII.get(Subtarget->is64Bit() ? X86::RETQ : X86::RETL));
1290  }
1291  for (unsigned i = 0, e = RetRegs.size(); i != e; ++i)
1292  MIB.addReg(RetRegs[i], RegState::Implicit);
1293  return true;
1294 }
1295 
1296 /// X86SelectLoad - Select and emit code to implement load instructions.
1297 ///
1298 bool X86FastISel::X86SelectLoad(const Instruction *I) {
1299  const LoadInst *LI = cast<LoadInst>(I);
1300 
1301  // Atomic loads need special handling.
1302  if (LI->isAtomic())
1303  return false;
1304 
1305  const Value *SV = I->getOperand(0);
1306  if (TLI.supportSwiftError()) {
1307  // Swifterror values can come from either a function parameter with
1308  // swifterror attribute or an alloca with swifterror attribute.
1309  if (const Argument *Arg = dyn_cast<Argument>(SV)) {
1310  if (Arg->hasSwiftErrorAttr())
1311  return false;
1312  }
1313 
1314  if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(SV)) {
1315  if (Alloca->isSwiftError())
1316  return false;
1317  }
1318  }
1319 
1320  MVT VT;
1321  if (!isTypeLegal(LI->getType(), VT, /*AllowI1=*/true))
1322  return false;
1323 
1324  const Value *Ptr = LI->getPointerOperand();
1325 
1326  X86AddressMode AM;
1327  if (!X86SelectAddress(Ptr, AM))
1328  return false;
1329 
1330  unsigned Alignment = LI->getAlignment();
1331  unsigned ABIAlignment = DL.getABITypeAlignment(LI->getType());
1332  if (Alignment == 0) // Ensure that codegen never sees alignment 0
1333  Alignment = ABIAlignment;
1334 
1335  unsigned ResultReg = 0;
1336  if (!X86FastEmitLoad(VT, AM, createMachineMemOperandFor(LI), ResultReg,
1337  Alignment))
1338  return false;
1339 
1340  updateValueMap(I, ResultReg);
1341  return true;
1342 }
1343 
1344 static unsigned X86ChooseCmpOpcode(EVT VT, const X86Subtarget *Subtarget) {
1345  bool HasAVX512 = Subtarget->hasAVX512();
1346  bool HasAVX = Subtarget->hasAVX();
1347  bool X86ScalarSSEf32 = Subtarget->hasSSE1();
1348  bool X86ScalarSSEf64 = Subtarget->hasSSE2();
1349 
1350  switch (VT.getSimpleVT().SimpleTy) {
1351  default: return 0;
1352  case MVT::i8: return X86::CMP8rr;
1353  case MVT::i16: return X86::CMP16rr;
1354  case MVT::i32: return X86::CMP32rr;
1355  case MVT::i64: return X86::CMP64rr;
1356  case MVT::f32:
1357  return X86ScalarSSEf32
1358  ? (HasAVX512 ? X86::VUCOMISSZrr
1359  : HasAVX ? X86::VUCOMISSrr : X86::UCOMISSrr)
1360  : 0;
1361  case MVT::f64:
1362  return X86ScalarSSEf64
1363  ? (HasAVX512 ? X86::VUCOMISDZrr
1364  : HasAVX ? X86::VUCOMISDrr : X86::UCOMISDrr)
1365  : 0;
1366  }
1367 }
1368 
1369 /// If we have a comparison with RHS as the RHS of the comparison, return an
1370 /// opcode that works for the compare (e.g. CMP32ri) otherwise return 0.
1371 static unsigned X86ChooseCmpImmediateOpcode(EVT VT, const ConstantInt *RHSC) {
1372  int64_t Val = RHSC->getSExtValue();
1373  switch (VT.getSimpleVT().SimpleTy) {
1374  // Otherwise, we can't fold the immediate into this comparison.
1375  default:
1376  return 0;
1377  case MVT::i8:
1378  return X86::CMP8ri;
1379  case MVT::i16:
1380  if (isInt<8>(Val))
1381  return X86::CMP16ri8;
1382  return X86::CMP16ri;
1383  case MVT::i32:
1384  if (isInt<8>(Val))
1385  return X86::CMP32ri8;
1386  return X86::CMP32ri;
1387  case MVT::i64:
1388  if (isInt<8>(Val))
1389  return X86::CMP64ri8;
1390  // 64-bit comparisons are only valid if the immediate fits in a 32-bit sext
1391  // field.
1392  if (isInt<32>(Val))
1393  return X86::CMP64ri32;
1394  return 0;
1395  }
1396 }
1397 
1398 bool X86FastISel::X86FastEmitCompare(const Value *Op0, const Value *Op1, EVT VT,
1399  const DebugLoc &CurDbgLoc) {
1400  unsigned Op0Reg = getRegForValue(Op0);
1401  if (Op0Reg == 0) return false;
1402 
1403  // Handle 'null' like i32/i64 0.
1404  if (isa<ConstantPointerNull>(Op1))
1405  Op1 = Constant::getNullValue(DL.getIntPtrType(Op0->getContext()));
1406 
1407  // We have two options: compare with register or immediate. If the RHS of
1408  // the compare is an immediate that we can fold into this compare, use
1409  // CMPri, otherwise use CMPrr.
1410  if (const ConstantInt *Op1C = dyn_cast<ConstantInt>(Op1)) {
1411  if (unsigned CompareImmOpc = X86ChooseCmpImmediateOpcode(VT, Op1C)) {
1412  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, CurDbgLoc, TII.get(CompareImmOpc))
1413  .addReg(Op0Reg)
1414  .addImm(Op1C->getSExtValue());
1415  return true;
1416  }
1417  }
1418 
1419  unsigned CompareOpc = X86ChooseCmpOpcode(VT, Subtarget);
1420  if (CompareOpc == 0) return false;
1421 
1422  unsigned Op1Reg = getRegForValue(Op1);
1423  if (Op1Reg == 0) return false;
1424  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, CurDbgLoc, TII.get(CompareOpc))
1425  .addReg(Op0Reg)
1426  .addReg(Op1Reg);
1427 
1428  return true;
1429 }
1430 
1431 bool X86FastISel::X86SelectCmp(const Instruction *I) {
1432  const CmpInst *CI = cast<CmpInst>(I);
1433 
1434  MVT VT;
1435  if (!isTypeLegal(I->getOperand(0)->getType(), VT))
1436  return false;
1437 
1438  // Try to optimize or fold the cmp.
1439  CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
1440  unsigned ResultReg = 0;
1441  switch (Predicate) {
1442  default: break;
1443  case CmpInst::FCMP_FALSE: {
1444  ResultReg = createResultReg(&X86::GR32RegClass);
1445  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::MOV32r0),
1446  ResultReg);
1447  ResultReg = fastEmitInst_extractsubreg(MVT::i8, ResultReg, /*Kill=*/true,
1448  X86::sub_8bit);
1449  if (!ResultReg)
1450  return false;
1451  break;
1452  }
1453  case CmpInst::FCMP_TRUE: {
1454  ResultReg = createResultReg(&X86::GR8RegClass);
1455  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::MOV8ri),
1456  ResultReg).addImm(1);
1457  break;
1458  }
1459  }
1460 
1461  if (ResultReg) {
1462  updateValueMap(I, ResultReg);
1463  return true;
1464  }
1465 
1466  const Value *LHS = CI->getOperand(0);
1467  const Value *RHS = CI->getOperand(1);
1468 
1469  // The optimizer might have replaced fcmp oeq %x, %x with fcmp ord %x, 0.0.
1470  // We don't have to materialize a zero constant for this case and can just use
1471  // %x again on the RHS.
1472  if (Predicate == CmpInst::FCMP_ORD || Predicate == CmpInst::FCMP_UNO) {
1473  const auto *RHSC = dyn_cast<ConstantFP>(RHS);
1474  if (RHSC && RHSC->isNullValue())
1475  RHS = LHS;
1476  }
1477 
1478  // FCMP_OEQ and FCMP_UNE cannot be checked with a single instruction.
1479  static const uint16_t SETFOpcTable[2][3] = {
1480  { X86::SETEr, X86::SETNPr, X86::AND8rr },
1481  { X86::SETNEr, X86::SETPr, X86::OR8rr }
1482  };
1483  const uint16_t *SETFOpc = nullptr;
1484  switch (Predicate) {
1485  default: break;
1486  case CmpInst::FCMP_OEQ: SETFOpc = &SETFOpcTable[0][0]; break;
1487  case CmpInst::FCMP_UNE: SETFOpc = &SETFOpcTable[1][0]; break;
1488  }
1489 
1490  ResultReg = createResultReg(&X86::GR8RegClass);
1491  if (SETFOpc) {
1492  if (!X86FastEmitCompare(LHS, RHS, VT, I->getDebugLoc()))
1493  return false;
1494 
1495  unsigned FlagReg1 = createResultReg(&X86::GR8RegClass);
1496  unsigned FlagReg2 = createResultReg(&X86::GR8RegClass);
1497  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(SETFOpc[0]),
1498  FlagReg1);
1499  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(SETFOpc[1]),
1500  FlagReg2);
1501  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(SETFOpc[2]),
1502  ResultReg).addReg(FlagReg1).addReg(FlagReg2);
1503  updateValueMap(I, ResultReg);
1504  return true;
1505  }
1506 
1507  X86::CondCode CC;
1508  bool SwapArgs;
1509  std::tie(CC, SwapArgs) = X86::getX86ConditionCode(Predicate);
1510  assert(CC <= X86::LAST_VALID_COND && "Unexpected condition code.");
1511  unsigned Opc = X86::getSETFromCond(CC);
1512 
1513  if (SwapArgs)
1514  std::swap(LHS, RHS);
1515 
1516  // Emit a compare of LHS/RHS.
1517  if (!X86FastEmitCompare(LHS, RHS, VT, I->getDebugLoc()))
1518  return false;
1519 
1520  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg);
1521  updateValueMap(I, ResultReg);
1522  return true;
1523 }
1524 
1525 bool X86FastISel::X86SelectZExt(const Instruction *I) {
1526  EVT DstVT = TLI.getValueType(DL, I->getType());
1527  if (!TLI.isTypeLegal(DstVT))
1528  return false;
1529 
1530  unsigned ResultReg = getRegForValue(I->getOperand(0));
1531  if (ResultReg == 0)
1532  return false;
1533 
1534  // Handle zero-extension from i1 to i8, which is common.
1535  MVT SrcVT = TLI.getSimpleValueType(DL, I->getOperand(0)->getType());
1536  if (SrcVT == MVT::i1) {
1537  // Set the high bits to zero.
1538  ResultReg = fastEmitZExtFromI1(MVT::i8, ResultReg, /*TODO: Kill=*/false);
1539  SrcVT = MVT::i8;
1540 
1541  if (ResultReg == 0)
1542  return false;
1543  }
1544 
1545  if (DstVT == MVT::i64) {
1546  // Handle extension to 64-bits via sub-register shenanigans.
1547  unsigned MovInst;
1548 
1549  switch (SrcVT.SimpleTy) {
1550  case MVT::i8: MovInst = X86::MOVZX32rr8; break;
1551  case MVT::i16: MovInst = X86::MOVZX32rr16; break;
1552  case MVT::i32: MovInst = X86::MOV32rr; break;
1553  default: llvm_unreachable("Unexpected zext to i64 source type");
1554  }
1555 
1556  unsigned Result32 = createResultReg(&X86::GR32RegClass);
1557  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(MovInst), Result32)
1558  .addReg(ResultReg);
1559 
1560  ResultReg = createResultReg(&X86::GR64RegClass);
1561  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::SUBREG_TO_REG),
1562  ResultReg)
1563  .addImm(0).addReg(Result32).addImm(X86::sub_32bit);
1564  } else if (DstVT == MVT::i16) {
1565  // i8->i16 doesn't exist in the autogenerated isel table. Need to zero
1566  // extend to 32-bits and then extract down to 16-bits.
1567  unsigned Result32 = createResultReg(&X86::GR32RegClass);
1568  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::MOVZX32rr8),
1569  Result32).addReg(ResultReg);
1570 
1571  ResultReg = fastEmitInst_extractsubreg(MVT::i16, Result32, /*Kill=*/true,
1572  X86::sub_16bit);
1573  } else if (DstVT != MVT::i8) {
1574  ResultReg = fastEmit_r(MVT::i8, DstVT.getSimpleVT(), ISD::ZERO_EXTEND,
1575  ResultReg, /*Kill=*/true);
1576  if (ResultReg == 0)
1577  return false;
1578  }
1579 
1580  updateValueMap(I, ResultReg);
1581  return true;
1582 }
1583 
1584 bool X86FastISel::X86SelectSExt(const Instruction *I) {
1585  EVT DstVT = TLI.getValueType(DL, I->getType());
1586  if (!TLI.isTypeLegal(DstVT))
1587  return false;
1588 
1589  unsigned ResultReg = getRegForValue(I->getOperand(0));
1590  if (ResultReg == 0)
1591  return false;
1592 
1593  // Handle sign-extension from i1 to i8.
1594  MVT SrcVT = TLI.getSimpleValueType(DL, I->getOperand(0)->getType());
1595  if (SrcVT == MVT::i1) {
1596  // Set the high bits to zero.
1597  unsigned ZExtReg = fastEmitZExtFromI1(MVT::i8, ResultReg,
1598  /*TODO: Kill=*/false);
1599  if (ZExtReg == 0)
1600  return false;
1601 
1602  // Negate the result to make an 8-bit sign extended value.
1603  ResultReg = createResultReg(&X86::GR8RegClass);
1604  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::NEG8r),
1605  ResultReg).addReg(ZExtReg);
1606 
1607  SrcVT = MVT::i8;
1608  }
1609 
1610  if (DstVT == MVT::i16) {
1611  // i8->i16 doesn't exist in the autogenerated isel table. Need to sign
1612  // extend to 32-bits and then extract down to 16-bits.
1613  unsigned Result32 = createResultReg(&X86::GR32RegClass);
1614  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::MOVSX32rr8),
1615  Result32).addReg(ResultReg);
1616 
1617  ResultReg = fastEmitInst_extractsubreg(MVT::i16, Result32, /*Kill=*/true,
1618  X86::sub_16bit);
1619  } else if (DstVT != MVT::i8) {
1620  ResultReg = fastEmit_r(MVT::i8, DstVT.getSimpleVT(), ISD::SIGN_EXTEND,
1621  ResultReg, /*Kill=*/true);
1622  if (ResultReg == 0)
1623  return false;
1624  }
1625 
1626  updateValueMap(I, ResultReg);
1627  return true;
1628 }
1629 
1630 bool X86FastISel::X86SelectBranch(const Instruction *I) {
1631  // Unconditional branches are selected by tablegen-generated code.
1632  // Handle a conditional branch.
1633  const BranchInst *BI = cast<BranchInst>(I);
1634  MachineBasicBlock *TrueMBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
1635  MachineBasicBlock *FalseMBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
1636 
1637  // Fold the common case of a conditional branch with a comparison
1638  // in the same block (values defined on other blocks may not have
1639  // initialized registers).
1640  X86::CondCode CC;
1641  if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) {
1642  if (CI->hasOneUse() && CI->getParent() == I->getParent()) {
1643  EVT VT = TLI.getValueType(DL, CI->getOperand(0)->getType());
1644 
1645  // Try to optimize or fold the cmp.
1646  CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
1647  switch (Predicate) {
1648  default: break;
1649  case CmpInst::FCMP_FALSE: fastEmitBranch(FalseMBB, DbgLoc); return true;
1650  case CmpInst::FCMP_TRUE: fastEmitBranch(TrueMBB, DbgLoc); return true;
1651  }
1652 
1653  const Value *CmpLHS = CI->getOperand(0);
1654  const Value *CmpRHS = CI->getOperand(1);
1655 
1656  // The optimizer might have replaced fcmp oeq %x, %x with fcmp ord %x,
1657  // 0.0.
1658  // We don't have to materialize a zero constant for this case and can just
1659  // use %x again on the RHS.
1660  if (Predicate == CmpInst::FCMP_ORD || Predicate == CmpInst::FCMP_UNO) {
1661  const auto *CmpRHSC = dyn_cast<ConstantFP>(CmpRHS);
1662  if (CmpRHSC && CmpRHSC->isNullValue())
1663  CmpRHS = CmpLHS;
1664  }
1665 
1666  // Try to take advantage of fallthrough opportunities.
1667  if (FuncInfo.MBB->isLayoutSuccessor(TrueMBB)) {
1668  std::swap(TrueMBB, FalseMBB);
1669  Predicate = CmpInst::getInversePredicate(Predicate);
1670  }
1671 
1672  // FCMP_OEQ and FCMP_UNE cannot be expressed with a single flag/condition
1673  // code check. Instead two branch instructions are required to check all
1674  // the flags. First we change the predicate to a supported condition code,
1675  // which will be the first branch. Later one we will emit the second
1676  // branch.
1677  bool NeedExtraBranch = false;
1678  switch (Predicate) {
1679  default: break;
1680  case CmpInst::FCMP_OEQ:
1681  std::swap(TrueMBB, FalseMBB);
1683  case CmpInst::FCMP_UNE:
1684  NeedExtraBranch = true;
1685  Predicate = CmpInst::FCMP_ONE;
1686  break;
1687  }
1688 
1689  bool SwapArgs;
1690  unsigned BranchOpc;
1691  std::tie(CC, SwapArgs) = X86::getX86ConditionCode(Predicate);
1692  assert(CC <= X86::LAST_VALID_COND && "Unexpected condition code.");
1693 
1694  BranchOpc = X86::GetCondBranchFromCond(CC);
1695  if (SwapArgs)
1696  std::swap(CmpLHS, CmpRHS);
1697 
1698  // Emit a compare of the LHS and RHS, setting the flags.
1699  if (!X86FastEmitCompare(CmpLHS, CmpRHS, VT, CI->getDebugLoc()))
1700  return false;
1701 
1702  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(BranchOpc))
1703  .addMBB(TrueMBB);
1704 
1705  // X86 requires a second branch to handle UNE (and OEQ, which is mapped
1706  // to UNE above).
1707  if (NeedExtraBranch) {
1708  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::JP_1))
1709  .addMBB(TrueMBB);
1710  }
1711 
1712  finishCondBranch(BI->getParent(), TrueMBB, FalseMBB);
1713  return true;
1714  }
1715  } else if (TruncInst *TI = dyn_cast<TruncInst>(BI->getCondition())) {
1716  // Handle things like "%cond = trunc i32 %X to i1 / br i1 %cond", which
1717  // typically happen for _Bool and C++ bools.
1718  MVT SourceVT;
1719  if (TI->hasOneUse() && TI->getParent() == I->getParent() &&
1720  isTypeLegal(TI->getOperand(0)->getType(), SourceVT)) {
1721  unsigned TestOpc = 0;
1722  switch (SourceVT.SimpleTy) {
1723  default: break;
1724  case MVT::i8: TestOpc = X86::TEST8ri; break;
1725  case MVT::i16: TestOpc = X86::TEST16ri; break;
1726  case MVT::i32: TestOpc = X86::TEST32ri; break;
1727  case MVT::i64: TestOpc = X86::TEST64ri32; break;
1728  }
1729  if (TestOpc) {
1730  unsigned OpReg = getRegForValue(TI->getOperand(0));
1731  if (OpReg == 0) return false;
1732 
1733  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TestOpc))
1734  .addReg(OpReg).addImm(1);
1735 
1736  unsigned JmpOpc = X86::JNE_1;
1737  if (FuncInfo.MBB->isLayoutSuccessor(TrueMBB)) {
1738  std::swap(TrueMBB, FalseMBB);
1739  JmpOpc = X86::JE_1;
1740  }
1741 
1742  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(JmpOpc))
1743  .addMBB(TrueMBB);
1744 
1745  finishCondBranch(BI->getParent(), TrueMBB, FalseMBB);
1746  return true;
1747  }
1748  }
1749  } else if (foldX86XALUIntrinsic(CC, BI, BI->getCondition())) {
1750  // Fake request the condition, otherwise the intrinsic might be completely
1751  // optimized away.
1752  unsigned TmpReg = getRegForValue(BI->getCondition());
1753  if (TmpReg == 0)
1754  return false;
1755 
1756  unsigned BranchOpc = X86::GetCondBranchFromCond(CC);
1757 
1758  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(BranchOpc))
1759  .addMBB(TrueMBB);
1760  finishCondBranch(BI->getParent(), TrueMBB, FalseMBB);
1761  return true;
1762  }
1763 
1764  // Otherwise do a clumsy setcc and re-test it.
1765  // Note that i1 essentially gets ANY_EXTEND'ed to i8 where it isn't used
1766  // in an explicit cast, so make sure to handle that correctly.
1767  unsigned OpReg = getRegForValue(BI->getCondition());
1768  if (OpReg == 0) return false;
1769 
1770  // In case OpReg is a K register, COPY to a GPR
1771  if (MRI.getRegClass(OpReg) == &X86::VK1RegClass) {
1772  unsigned KOpReg = OpReg;
1773  OpReg = createResultReg(&X86::GR32RegClass);
1774  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1775  TII.get(TargetOpcode::COPY), OpReg)
1776  .addReg(KOpReg);
1777  OpReg = fastEmitInst_extractsubreg(MVT::i8, OpReg, /*Kill=*/true,
1778  X86::sub_8bit);
1779  }
1780  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::TEST8ri))
1781  .addReg(OpReg)
1782  .addImm(1);
1783  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::JNE_1))
1784  .addMBB(TrueMBB);
1785  finishCondBranch(BI->getParent(), TrueMBB, FalseMBB);
1786  return true;
1787 }
1788 
1789 bool X86FastISel::X86SelectShift(const Instruction *I) {
1790  unsigned CReg = 0, OpReg = 0;
1791  const TargetRegisterClass *RC = nullptr;
1792  if (I->getType()->isIntegerTy(8)) {
1793  CReg = X86::CL;
1794  RC = &X86::GR8RegClass;
1795  switch (I->getOpcode()) {
1796  case Instruction::LShr: OpReg = X86::SHR8rCL; break;
1797  case Instruction::AShr: OpReg = X86::SAR8rCL; break;
1798  case Instruction::Shl: OpReg = X86::SHL8rCL; break;
1799  default: return false;
1800  }
1801  } else if (I->getType()->isIntegerTy(16)) {
1802  CReg = X86::CX;
1803  RC = &X86::GR16RegClass;
1804  switch (I->getOpcode()) {
1805  default: llvm_unreachable("Unexpected shift opcode");
1806  case Instruction::LShr: OpReg = X86::SHR16rCL; break;
1807  case Instruction::AShr: OpReg = X86::SAR16rCL; break;
1808  case Instruction::Shl: OpReg = X86::SHL16rCL; break;
1809  }
1810  } else if (I->getType()->isIntegerTy(32)) {
1811  CReg = X86::ECX;
1812  RC = &X86::GR32RegClass;
1813  switch (I->getOpcode()) {
1814  default: llvm_unreachable("Unexpected shift opcode");
1815  case Instruction::LShr: OpReg = X86::SHR32rCL; break;
1816  case Instruction::AShr: OpReg = X86::SAR32rCL; break;
1817  case Instruction::Shl: OpReg = X86::SHL32rCL; break;
1818  }
1819  } else if (I->getType()->isIntegerTy(64)) {
1820  CReg = X86::RCX;
1821  RC = &X86::GR64RegClass;
1822  switch (I->getOpcode()) {
1823  default: llvm_unreachable("Unexpected shift opcode");
1824  case Instruction::LShr: OpReg = X86::SHR64rCL; break;
1825  case Instruction::AShr: OpReg = X86::SAR64rCL; break;
1826  case Instruction::Shl: OpReg = X86::SHL64rCL; break;
1827  }
1828  } else {
1829  return false;
1830  }
1831 
1832  MVT VT;
1833  if (!isTypeLegal(I->getType(), VT))
1834  return false;
1835 
1836  unsigned Op0Reg = getRegForValue(I->getOperand(0));
1837  if (Op0Reg == 0) return false;
1838 
1839  unsigned Op1Reg = getRegForValue(I->getOperand(1));
1840  if (Op1Reg == 0) return false;
1841  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY),
1842  CReg).addReg(Op1Reg);
1843 
1844  // The shift instruction uses X86::CL. If we defined a super-register
1845  // of X86::CL, emit a subreg KILL to precisely describe what we're doing here.
1846  if (CReg != X86::CL)
1847  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1848  TII.get(TargetOpcode::KILL), X86::CL)
1849  .addReg(CReg, RegState::Kill);
1850 
1851  unsigned ResultReg = createResultReg(RC);
1852  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(OpReg), ResultReg)
1853  .addReg(Op0Reg);
1854  updateValueMap(I, ResultReg);
1855  return true;
1856 }
1857 
1858 bool X86FastISel::X86SelectDivRem(const Instruction *I) {
1859  const static unsigned NumTypes = 4; // i8, i16, i32, i64
1860  const static unsigned NumOps = 4; // SDiv, SRem, UDiv, URem
1861  const static bool S = true; // IsSigned
1862  const static bool U = false; // !IsSigned
1863  const static unsigned Copy = TargetOpcode::COPY;
1864  // For the X86 DIV/IDIV instruction, in most cases the dividend
1865  // (numerator) must be in a specific register pair highreg:lowreg,
1866  // producing the quotient in lowreg and the remainder in highreg.
1867  // For most data types, to set up the instruction, the dividend is
1868  // copied into lowreg, and lowreg is sign-extended or zero-extended
1869  // into highreg. The exception is i8, where the dividend is defined
1870  // as a single register rather than a register pair, and we
1871  // therefore directly sign-extend or zero-extend the dividend into
1872  // lowreg, instead of copying, and ignore the highreg.
1873  const static struct DivRemEntry {
1874  // The following portion depends only on the data type.
1875  const TargetRegisterClass *RC;
1876  unsigned LowInReg; // low part of the register pair
1877  unsigned HighInReg; // high part of the register pair
1878  // The following portion depends on both the data type and the operation.
1879  struct DivRemResult {
1880  unsigned OpDivRem; // The specific DIV/IDIV opcode to use.
1881  unsigned OpSignExtend; // Opcode for sign-extending lowreg into
1882  // highreg, or copying a zero into highreg.
1883  unsigned OpCopy; // Opcode for copying dividend into lowreg, or
1884  // zero/sign-extending into lowreg for i8.
1885  unsigned DivRemResultReg; // Register containing the desired result.
1886  bool IsOpSigned; // Whether to use signed or unsigned form.
1887  } ResultTable[NumOps];
1888  } OpTable[NumTypes] = {
1889  { &X86::GR8RegClass, X86::AX, 0, {
1890  { X86::IDIV8r, 0, X86::MOVSX16rr8, X86::AL, S }, // SDiv
1891  { X86::IDIV8r, 0, X86::MOVSX16rr8, X86::AH, S }, // SRem
1892  { X86::DIV8r, 0, X86::MOVZX16rr8, X86::AL, U }, // UDiv
1893  { X86::DIV8r, 0, X86::MOVZX16rr8, X86::AH, U }, // URem
1894  }
1895  }, // i8
1896  { &X86::GR16RegClass, X86::AX, X86::DX, {
1897  { X86::IDIV16r, X86::CWD, Copy, X86::AX, S }, // SDiv
1898  { X86::IDIV16r, X86::CWD, Copy, X86::DX, S }, // SRem
1899  { X86::DIV16r, X86::MOV32r0, Copy, X86::AX, U }, // UDiv
1900  { X86::DIV16r, X86::MOV32r0, Copy, X86::DX, U }, // URem
1901  }
1902  }, // i16
1903  { &X86::GR32RegClass, X86::EAX, X86::EDX, {
1904  { X86::IDIV32r, X86::CDQ, Copy, X86::EAX, S }, // SDiv
1905  { X86::IDIV32r, X86::CDQ, Copy, X86::EDX, S }, // SRem
1906  { X86::DIV32r, X86::MOV32r0, Copy, X86::EAX, U }, // UDiv
1907  { X86::DIV32r, X86::MOV32r0, Copy, X86::EDX, U }, // URem
1908  }
1909  }, // i32
1910  { &X86::GR64RegClass, X86::RAX, X86::RDX, {
1911  { X86::IDIV64r, X86::CQO, Copy, X86::RAX, S }, // SDiv
1912  { X86::IDIV64r, X86::CQO, Copy, X86::RDX, S }, // SRem
1913  { X86::DIV64r, X86::MOV32r0, Copy, X86::RAX, U }, // UDiv
1914  { X86::DIV64r, X86::MOV32r0, Copy, X86::RDX, U }, // URem
1915  }
1916  }, // i64
1917  };
1918 
1919  MVT VT;
1920  if (!isTypeLegal(I->getType(), VT))
1921  return false;
1922 
1923  unsigned TypeIndex, OpIndex;
1924  switch (VT.SimpleTy) {
1925  default: return false;
1926  case MVT::i8: TypeIndex = 0; break;
1927  case MVT::i16: TypeIndex = 1; break;
1928  case MVT::i32: TypeIndex = 2; break;
1929  case MVT::i64: TypeIndex = 3;
1930  if (!Subtarget->is64Bit())
1931  return false;
1932  break;
1933  }
1934 
1935  switch (I->getOpcode()) {
1936  default: llvm_unreachable("Unexpected div/rem opcode");
1937  case Instruction::SDiv: OpIndex = 0; break;
1938  case Instruction::SRem: OpIndex = 1; break;
1939  case Instruction::UDiv: OpIndex = 2; break;
1940  case Instruction::URem: OpIndex = 3; break;
1941  }
1942 
1943  const DivRemEntry &TypeEntry = OpTable[TypeIndex];
1944  const DivRemEntry::DivRemResult &OpEntry = TypeEntry.ResultTable[OpIndex];
1945  unsigned Op0Reg = getRegForValue(I->getOperand(0));
1946  if (Op0Reg == 0)
1947  return false;
1948  unsigned Op1Reg = getRegForValue(I->getOperand(1));
1949  if (Op1Reg == 0)
1950  return false;
1951 
1952  // Move op0 into low-order input register.
1953  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1954  TII.get(OpEntry.OpCopy), TypeEntry.LowInReg).addReg(Op0Reg);
1955  // Zero-extend or sign-extend into high-order input register.
1956  if (OpEntry.OpSignExtend) {
1957  if (OpEntry.IsOpSigned)
1958  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1959  TII.get(OpEntry.OpSignExtend));
1960  else {
1961  unsigned Zero32 = createResultReg(&X86::GR32RegClass);
1962  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1963  TII.get(X86::MOV32r0), Zero32);
1964 
1965  // Copy the zero into the appropriate sub/super/identical physical
1966  // register. Unfortunately the operations needed are not uniform enough
1967  // to fit neatly into the table above.
1968  if (VT == MVT::i16) {
1969  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1970  TII.get(Copy), TypeEntry.HighInReg)
1971  .addReg(Zero32, 0, X86::sub_16bit);
1972  } else if (VT == MVT::i32) {
1973  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1974  TII.get(Copy), TypeEntry.HighInReg)
1975  .addReg(Zero32);
1976  } else if (VT == MVT::i64) {
1977  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1978  TII.get(TargetOpcode::SUBREG_TO_REG), TypeEntry.HighInReg)
1979  .addImm(0).addReg(Zero32).addImm(X86::sub_32bit);
1980  }
1981  }
1982  }
1983  // Generate the DIV/IDIV instruction.
1984  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1985  TII.get(OpEntry.OpDivRem)).addReg(Op1Reg);
1986  // For i8 remainder, we can't reference ah directly, as we'll end
1987  // up with bogus copies like %r9b = COPY %ah. Reference ax
1988  // instead to prevent ah references in a rex instruction.
1989  //
1990  // The current assumption of the fast register allocator is that isel
1991  // won't generate explicit references to the GR8_NOREX registers. If
1992  // the allocator and/or the backend get enhanced to be more robust in
1993  // that regard, this can be, and should be, removed.
1994  unsigned ResultReg = 0;
1995  if ((I->getOpcode() == Instruction::SRem ||
1996  I->getOpcode() == Instruction::URem) &&
1997  OpEntry.DivRemResultReg == X86::AH && Subtarget->is64Bit()) {
1998  unsigned SourceSuperReg = createResultReg(&X86::GR16RegClass);
1999  unsigned ResultSuperReg = createResultReg(&X86::GR16RegClass);
2000  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2001  TII.get(Copy), SourceSuperReg).addReg(X86::AX);
2002 
2003  // Shift AX right by 8 bits instead of using AH.
2004  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::SHR16ri),
2005  ResultSuperReg).addReg(SourceSuperReg).addImm(8);
2006 
2007  // Now reference the 8-bit subreg of the result.
2008  ResultReg = fastEmitInst_extractsubreg(MVT::i8, ResultSuperReg,
2009  /*Kill=*/true, X86::sub_8bit);
2010  }
2011  // Copy the result out of the physreg if we haven't already.
2012  if (!ResultReg) {
2013  ResultReg = createResultReg(TypeEntry.RC);
2014  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Copy), ResultReg)
2015  .addReg(OpEntry.DivRemResultReg);
2016  }
2017  updateValueMap(I, ResultReg);
2018 
2019  return true;
2020 }
2021 
2022 /// Emit a conditional move instruction (if the are supported) to lower
2023 /// the select.
2024 bool X86FastISel::X86FastEmitCMoveSelect(MVT RetVT, const Instruction *I) {
2025  // Check if the subtarget supports these instructions.
2026  if (!Subtarget->hasCMov())
2027  return false;
2028 
2029  // FIXME: Add support for i8.
2030  if (RetVT < MVT::i16 || RetVT > MVT::i64)
2031  return false;
2032 
2033  const Value *Cond = I->getOperand(0);
2034  const TargetRegisterClass *RC = TLI.getRegClassFor(RetVT);
2035  bool NeedTest = true;
2037 
2038  // Optimize conditions coming from a compare if both instructions are in the
2039  // same basic block (values defined in other basic blocks may not have
2040  // initialized registers).
2041  const auto *CI = dyn_cast<CmpInst>(Cond);
2042  if (CI && (CI->getParent() == I->getParent())) {
2043  CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
2044 
2045  // FCMP_OEQ and FCMP_UNE cannot be checked with a single instruction.
2046  static const uint16_t SETFOpcTable[2][3] = {
2047  { X86::SETNPr, X86::SETEr , X86::TEST8rr },
2048  { X86::SETPr, X86::SETNEr, X86::OR8rr }
2049  };
2050  const uint16_t *SETFOpc = nullptr;
2051  switch (Predicate) {
2052  default: break;
2053  case CmpInst::FCMP_OEQ:
2054  SETFOpc = &SETFOpcTable[0][0];
2055  Predicate = CmpInst::ICMP_NE;
2056  break;
2057  case CmpInst::FCMP_UNE:
2058  SETFOpc = &SETFOpcTable[1][0];
2059  Predicate = CmpInst::ICMP_NE;
2060  break;
2061  }
2062 
2063  bool NeedSwap;
2064  std::tie(CC, NeedSwap) = X86::getX86ConditionCode(Predicate);
2065  assert(CC <= X86::LAST_VALID_COND && "Unexpected condition code.");
2066 
2067  const Value *CmpLHS = CI->getOperand(0);
2068  const Value *CmpRHS = CI->getOperand(1);
2069  if (NeedSwap)
2070  std::swap(CmpLHS, CmpRHS);
2071 
2072  EVT CmpVT = TLI.getValueType(DL, CmpLHS->getType());
2073  // Emit a compare of the LHS and RHS, setting the flags.
2074  if (!X86FastEmitCompare(CmpLHS, CmpRHS, CmpVT, CI->getDebugLoc()))
2075  return false;
2076 
2077  if (SETFOpc) {
2078  unsigned FlagReg1 = createResultReg(&X86::GR8RegClass);
2079  unsigned FlagReg2 = createResultReg(&X86::GR8RegClass);
2080  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(SETFOpc[0]),
2081  FlagReg1);
2082  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(SETFOpc[1]),
2083  FlagReg2);
2084  auto const &II = TII.get(SETFOpc[2]);
2085  if (II.getNumDefs()) {
2086  unsigned TmpReg = createResultReg(&X86::GR8RegClass);
2087  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, TmpReg)
2088  .addReg(FlagReg2).addReg(FlagReg1);
2089  } else {
2090  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
2091  .addReg(FlagReg2).addReg(FlagReg1);
2092  }
2093  }
2094  NeedTest = false;
2095  } else if (foldX86XALUIntrinsic(CC, I, Cond)) {
2096  // Fake request the condition, otherwise the intrinsic might be completely
2097  // optimized away.
2098  unsigned TmpReg = getRegForValue(Cond);
2099  if (TmpReg == 0)
2100  return false;
2101 
2102  NeedTest = false;
2103  }
2104 
2105  if (NeedTest) {
2106  // Selects operate on i1, however, CondReg is 8 bits width and may contain
2107  // garbage. Indeed, only the less significant bit is supposed to be
2108  // accurate. If we read more than the lsb, we may see non-zero values
2109  // whereas lsb is zero. Therefore, we have to truncate Op0Reg to i1 for
2110  // the select. This is achieved by performing TEST against 1.
2111  unsigned CondReg = getRegForValue(Cond);
2112  if (CondReg == 0)
2113  return false;
2114  bool CondIsKill = hasTrivialKill(Cond);
2115 
2116  // In case OpReg is a K register, COPY to a GPR
2117  if (MRI.getRegClass(CondReg) == &X86::VK1RegClass) {
2118  unsigned KCondReg = CondReg;
2119  CondReg = createResultReg(&X86::GR32RegClass);
2120  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2121  TII.get(TargetOpcode::COPY), CondReg)
2122  .addReg(KCondReg, getKillRegState(CondIsKill));
2123  CondReg = fastEmitInst_extractsubreg(MVT::i8, CondReg, /*Kill=*/true,
2124  X86::sub_8bit);
2125  }
2126  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::TEST8ri))
2127  .addReg(CondReg, getKillRegState(CondIsKill))
2128  .addImm(1);
2129  }
2130 
2131  const Value *LHS = I->getOperand(1);
2132  const Value *RHS = I->getOperand(2);
2133 
2134  unsigned RHSReg = getRegForValue(RHS);
2135  bool RHSIsKill = hasTrivialKill(RHS);
2136 
2137  unsigned LHSReg = getRegForValue(LHS);
2138  bool LHSIsKill = hasTrivialKill(LHS);
2139 
2140  if (!LHSReg || !RHSReg)
2141  return false;
2142 
2143  const TargetRegisterInfo &TRI = *Subtarget->getRegisterInfo();
2144  unsigned Opc = X86::getCMovFromCond(CC, TRI.getRegSizeInBits(*RC)/8);
2145  unsigned ResultReg = fastEmitInst_rr(Opc, RC, RHSReg, RHSIsKill,
2146  LHSReg, LHSIsKill);
2147  updateValueMap(I, ResultReg);
2148  return true;
2149 }
2150 
2151 /// Emit SSE or AVX instructions to lower the select.
2152 ///
2153 /// Try to use SSE1/SSE2 instructions to simulate a select without branches.
2154 /// This lowers fp selects into a CMP/AND/ANDN/OR sequence when the necessary
2155 /// SSE instructions are available. If AVX is available, try to use a VBLENDV.
2156 bool X86FastISel::X86FastEmitSSESelect(MVT RetVT, const Instruction *I) {
2157  // Optimize conditions coming from a compare if both instructions are in the
2158  // same basic block (values defined in other basic blocks may not have
2159  // initialized registers).
2160  const auto *CI = dyn_cast<FCmpInst>(I->getOperand(0));
2161  if (!CI || (CI->getParent() != I->getParent()))
2162  return false;
2163 
2164  if (I->getType() != CI->getOperand(0)->getType() ||
2165  !((Subtarget->hasSSE1() && RetVT == MVT::f32) ||
2166  (Subtarget->hasSSE2() && RetVT == MVT::f64)))
2167  return false;
2168 
2169  const Value *CmpLHS = CI->getOperand(0);
2170  const Value *CmpRHS = CI->getOperand(1);
2171  CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
2172 
2173  // The optimizer might have replaced fcmp oeq %x, %x with fcmp ord %x, 0.0.
2174  // We don't have to materialize a zero constant for this case and can just use
2175  // %x again on the RHS.
2176  if (Predicate == CmpInst::FCMP_ORD || Predicate == CmpInst::FCMP_UNO) {
2177  const auto *CmpRHSC = dyn_cast<ConstantFP>(CmpRHS);
2178  if (CmpRHSC && CmpRHSC->isNullValue())
2179  CmpRHS = CmpLHS;
2180  }
2181 
2182  unsigned CC;
2183  bool NeedSwap;
2184  std::tie(CC, NeedSwap) = getX86SSEConditionCode(Predicate);
2185  if (CC > 7 && !Subtarget->hasAVX())
2186  return false;
2187 
2188  if (NeedSwap)
2189  std::swap(CmpLHS, CmpRHS);
2190 
2191  // Choose the SSE instruction sequence based on data type (float or double).
2192  static const uint16_t OpcTable[2][4] = {
2193  { X86::CMPSSrr, X86::ANDPSrr, X86::ANDNPSrr, X86::ORPSrr },
2194  { X86::CMPSDrr, X86::ANDPDrr, X86::ANDNPDrr, X86::ORPDrr }
2195  };
2196 
2197  const uint16_t *Opc = nullptr;
2198  switch (RetVT.SimpleTy) {
2199  default: return false;
2200  case MVT::f32: Opc = &OpcTable[0][0]; break;
2201  case MVT::f64: Opc = &OpcTable[1][0]; break;
2202  }
2203 
2204  const Value *LHS = I->getOperand(1);
2205  const Value *RHS = I->getOperand(2);
2206 
2207  unsigned LHSReg = getRegForValue(LHS);
2208  bool LHSIsKill = hasTrivialKill(LHS);
2209 
2210  unsigned RHSReg = getRegForValue(RHS);
2211  bool RHSIsKill = hasTrivialKill(RHS);
2212 
2213  unsigned CmpLHSReg = getRegForValue(CmpLHS);
2214  bool CmpLHSIsKill = hasTrivialKill(CmpLHS);
2215 
2216  unsigned CmpRHSReg = getRegForValue(CmpRHS);
2217  bool CmpRHSIsKill = hasTrivialKill(CmpRHS);
2218 
2219  if (!LHSReg || !RHSReg || !CmpLHS || !CmpRHS)
2220  return false;
2221 
2222  const TargetRegisterClass *RC = TLI.getRegClassFor(RetVT);
2223  unsigned ResultReg;
2224 
2225  if (Subtarget->hasAVX512()) {
2226  // If we have AVX512 we can use a mask compare and masked movss/sd.
2227  const TargetRegisterClass *VR128X = &X86::VR128XRegClass;
2228  const TargetRegisterClass *VK1 = &X86::VK1RegClass;
2229 
2230  unsigned CmpOpcode =
2231  (RetVT == MVT::f32) ? X86::VCMPSSZrr : X86::VCMPSDZrr;
2232  unsigned CmpReg = fastEmitInst_rri(CmpOpcode, VK1, CmpLHSReg, CmpLHSIsKill,
2233  CmpRHSReg, CmpRHSIsKill, CC);
2234 
2235  // Need an IMPLICIT_DEF for the input that is used to generate the upper
2236  // bits of the result register since its not based on any of the inputs.
2237  unsigned ImplicitDefReg = createResultReg(VR128X);
2238  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2239  TII.get(TargetOpcode::IMPLICIT_DEF), ImplicitDefReg);
2240 
2241  // Place RHSReg is the passthru of the masked movss/sd operation and put
2242  // LHS in the input. The mask input comes from the compare.
2243  unsigned MovOpcode =
2244  (RetVT == MVT::f32) ? X86::VMOVSSZrrk : X86::VMOVSDZrrk;
2245  unsigned MovReg = fastEmitInst_rrrr(MovOpcode, VR128X, RHSReg, RHSIsKill,
2246  CmpReg, true, ImplicitDefReg, true,
2247  LHSReg, LHSIsKill);
2248 
2249  ResultReg = createResultReg(RC);
2250  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2251  TII.get(TargetOpcode::COPY), ResultReg).addReg(MovReg);
2252 
2253  } else if (Subtarget->hasAVX()) {
2254  const TargetRegisterClass *VR128 = &X86::VR128RegClass;
2255 
2256  // If we have AVX, create 1 blendv instead of 3 logic instructions.
2257  // Blendv was introduced with SSE 4.1, but the 2 register form implicitly
2258  // uses XMM0 as the selection register. That may need just as many
2259  // instructions as the AND/ANDN/OR sequence due to register moves, so
2260  // don't bother.
2261  unsigned CmpOpcode =
2262  (RetVT == MVT::f32) ? X86::VCMPSSrr : X86::VCMPSDrr;
2263  unsigned BlendOpcode =
2264  (RetVT == MVT::f32) ? X86::VBLENDVPSrr : X86::VBLENDVPDrr;
2265 
2266  unsigned CmpReg = fastEmitInst_rri(CmpOpcode, RC, CmpLHSReg, CmpLHSIsKill,
2267  CmpRHSReg, CmpRHSIsKill, CC);
2268  unsigned VBlendReg = fastEmitInst_rrr(BlendOpcode, VR128, RHSReg, RHSIsKill,
2269  LHSReg, LHSIsKill, CmpReg, true);
2270  ResultReg = createResultReg(RC);
2271  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2272  TII.get(TargetOpcode::COPY), ResultReg).addReg(VBlendReg);
2273  } else {
2274  const TargetRegisterClass *VR128 = &X86::VR128RegClass;
2275  unsigned CmpReg = fastEmitInst_rri(Opc[0], RC, CmpLHSReg, CmpLHSIsKill,
2276  CmpRHSReg, CmpRHSIsKill, CC);
2277  unsigned AndReg = fastEmitInst_rr(Opc[1], VR128, CmpReg, /*IsKill=*/false,
2278  LHSReg, LHSIsKill);
2279  unsigned AndNReg = fastEmitInst_rr(Opc[2], VR128, CmpReg, /*IsKill=*/true,
2280  RHSReg, RHSIsKill);
2281  unsigned OrReg = fastEmitInst_rr(Opc[3], VR128, AndNReg, /*IsKill=*/true,
2282  AndReg, /*IsKill=*/true);
2283  ResultReg = createResultReg(RC);
2284  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2285  TII.get(TargetOpcode::COPY), ResultReg).addReg(OrReg);
2286  }
2287  updateValueMap(I, ResultReg);
2288  return true;
2289 }
2290 
2291 bool X86FastISel::X86FastEmitPseudoSelect(MVT RetVT, const Instruction *I) {
2292  // These are pseudo CMOV instructions and will be later expanded into control-
2293  // flow.
2294  unsigned Opc;
2295  switch (RetVT.SimpleTy) {
2296  default: return false;
2297  case MVT::i8: Opc = X86::CMOV_GR8; break;
2298  case MVT::i16: Opc = X86::CMOV_GR16; break;
2299  case MVT::i32: Opc = X86::CMOV_GR32; break;
2300  case MVT::f32: Opc = X86::CMOV_FR32; break;
2301  case MVT::f64: Opc = X86::CMOV_FR64; break;
2302  }
2303 
2304  const Value *Cond = I->getOperand(0);
2306 
2307  // Optimize conditions coming from a compare if both instructions are in the
2308  // same basic block (values defined in other basic blocks may not have
2309  // initialized registers).
2310  const auto *CI = dyn_cast<CmpInst>(Cond);
2311  if (CI && (CI->getParent() == I->getParent())) {
2312  bool NeedSwap;
2313  std::tie(CC, NeedSwap) = X86::getX86ConditionCode(CI->getPredicate());
2314  if (CC > X86::LAST_VALID_COND)
2315  return false;
2316 
2317  const Value *CmpLHS = CI->getOperand(0);
2318  const Value *CmpRHS = CI->getOperand(1);
2319 
2320  if (NeedSwap)
2321  std::swap(CmpLHS, CmpRHS);
2322 
2323  EVT CmpVT = TLI.getValueType(DL, CmpLHS->getType());
2324  if (!X86FastEmitCompare(CmpLHS, CmpRHS, CmpVT, CI->getDebugLoc()))
2325  return false;
2326  } else {
2327  unsigned CondReg = getRegForValue(Cond);
2328  if (CondReg == 0)
2329  return false;
2330  bool CondIsKill = hasTrivialKill(Cond);
2331 
2332  // In case OpReg is a K register, COPY to a GPR
2333  if (MRI.getRegClass(CondReg) == &X86::VK1RegClass) {
2334  unsigned KCondReg = CondReg;
2335  CondReg = createResultReg(&X86::GR32RegClass);
2336  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2337  TII.get(TargetOpcode::COPY), CondReg)
2338  .addReg(KCondReg, getKillRegState(CondIsKill));
2339  CondReg = fastEmitInst_extractsubreg(MVT::i8, CondReg, /*Kill=*/true,
2340  X86::sub_8bit);
2341  }
2342  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::TEST8ri))
2343  .addReg(CondReg, getKillRegState(CondIsKill))
2344  .addImm(1);
2345  }
2346 
2347  const Value *LHS = I->getOperand(1);
2348  const Value *RHS = I->getOperand(2);
2349 
2350  unsigned LHSReg = getRegForValue(LHS);
2351  bool LHSIsKill = hasTrivialKill(LHS);
2352 
2353  unsigned RHSReg = getRegForValue(RHS);
2354  bool RHSIsKill = hasTrivialKill(RHS);
2355 
2356  if (!LHSReg || !RHSReg)
2357  return false;
2358 
2359  const TargetRegisterClass *RC = TLI.getRegClassFor(RetVT);
2360 
2361  unsigned ResultReg =
2362  fastEmitInst_rri(Opc, RC, RHSReg, RHSIsKill, LHSReg, LHSIsKill, CC);
2363  updateValueMap(I, ResultReg);
2364  return true;
2365 }
2366 
2367 bool X86FastISel::X86SelectSelect(const Instruction *I) {
2368  MVT RetVT;
2369  if (!isTypeLegal(I->getType(), RetVT))
2370  return false;
2371 
2372  // Check if we can fold the select.
2373  if (const auto *CI = dyn_cast<CmpInst>(I->getOperand(0))) {
2374  CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
2375  const Value *Opnd = nullptr;
2376  switch (Predicate) {
2377  default: break;
2378  case CmpInst::FCMP_FALSE: Opnd = I->getOperand(2); break;
2379  case CmpInst::FCMP_TRUE: Opnd = I->getOperand(1); break;
2380  }
2381  // No need for a select anymore - this is an unconditional move.
2382  if (Opnd) {
2383  unsigned OpReg = getRegForValue(Opnd);
2384  if (OpReg == 0)
2385  return false;
2386  bool OpIsKill = hasTrivialKill(Opnd);
2387  const TargetRegisterClass *RC = TLI.getRegClassFor(RetVT);
2388  unsigned ResultReg = createResultReg(RC);
2389  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2390  TII.get(TargetOpcode::COPY), ResultReg)
2391  .addReg(OpReg, getKillRegState(OpIsKill));
2392  updateValueMap(I, ResultReg);
2393  return true;
2394  }
2395  }
2396 
2397  // First try to use real conditional move instructions.
2398  if (X86FastEmitCMoveSelect(RetVT, I))
2399  return true;
2400 
2401  // Try to use a sequence of SSE instructions to simulate a conditional move.
2402  if (X86FastEmitSSESelect(RetVT, I))
2403  return true;
2404 
2405  // Fall-back to pseudo conditional move instructions, which will be later
2406  // converted to control-flow.
2407  if (X86FastEmitPseudoSelect(RetVT, I))
2408  return true;
2409 
2410  return false;
2411 }
2412 
2413 bool X86FastISel::X86SelectSIToFP(const Instruction *I) {
2414  // The target-independent selection algorithm in FastISel already knows how
2415  // to select a SINT_TO_FP if the target is SSE but not AVX.
2416  // Early exit if the subtarget doesn't have AVX.
2417  if (!Subtarget->hasAVX())
2418  return false;
2419 
2420  Type *InTy = I->getOperand(0)->getType();
2421  if (!InTy->isIntegerTy(32) && !InTy->isIntegerTy(64))
2422  return false;
2423 
2424  // Select integer to float/double conversion.
2425  unsigned OpReg = getRegForValue(I->getOperand(0));
2426  if (OpReg == 0)
2427  return false;
2428 
2429  const TargetRegisterClass *RC = nullptr;
2430  unsigned Opcode;
2431 
2432  if (I->getType()->isDoubleTy()) {
2433  // sitofp int -> double
2434  Opcode = InTy->isIntegerTy(64) ? X86::VCVTSI642SDrr : X86::VCVTSI2SDrr;
2435  RC = &X86::FR64RegClass;
2436  } else if (I->getType()->isFloatTy()) {
2437  // sitofp int -> float
2438  Opcode = InTy->isIntegerTy(64) ? X86::VCVTSI642SSrr : X86::VCVTSI2SSrr;
2439  RC = &X86::FR32RegClass;
2440  } else
2441  return false;
2442 
2443  unsigned ImplicitDefReg = createResultReg(RC);
2444  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2445  TII.get(TargetOpcode::IMPLICIT_DEF), ImplicitDefReg);
2446  unsigned ResultReg =
2447  fastEmitInst_rr(Opcode, RC, ImplicitDefReg, true, OpReg, false);
2448  updateValueMap(I, ResultReg);
2449  return true;
2450 }
2451 
2452 // Helper method used by X86SelectFPExt and X86SelectFPTrunc.
2453 bool X86FastISel::X86SelectFPExtOrFPTrunc(const Instruction *I,
2454  unsigned TargetOpc,
2455  const TargetRegisterClass *RC) {
2456  assert((I->getOpcode() == Instruction::FPExt ||
2457  I->getOpcode() == Instruction::FPTrunc) &&
2458  "Instruction must be an FPExt or FPTrunc!");
2459 
2460  unsigned OpReg = getRegForValue(I->getOperand(0));
2461  if (OpReg == 0)
2462  return false;
2463 
2464  unsigned ImplicitDefReg;
2465  if (Subtarget->hasAVX()) {
2466  ImplicitDefReg = createResultReg(RC);
2467  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2468  TII.get(TargetOpcode::IMPLICIT_DEF), ImplicitDefReg);
2469 
2470  }
2471 
2472  unsigned ResultReg = createResultReg(RC);
2473  MachineInstrBuilder MIB;
2474  MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpc),
2475  ResultReg);
2476 
2477  if (Subtarget->hasAVX())
2478  MIB.addReg(ImplicitDefReg);
2479 
2480  MIB.addReg(OpReg);
2481  updateValueMap(I, ResultReg);
2482  return true;
2483 }
2484 
2485 bool X86FastISel::X86SelectFPExt(const Instruction *I) {
2486  if (X86ScalarSSEf64 && I->getType()->isDoubleTy() &&
2487  I->getOperand(0)->getType()->isFloatTy()) {
2488  bool HasAVX512 = Subtarget->hasAVX512();
2489  // fpext from float to double.
2490  unsigned Opc =
2491  HasAVX512 ? X86::VCVTSS2SDZrr
2492  : Subtarget->hasAVX() ? X86::VCVTSS2SDrr : X86::CVTSS2SDrr;
2493  return X86SelectFPExtOrFPTrunc(
2494  I, Opc, HasAVX512 ? &X86::FR64XRegClass : &X86::FR64RegClass);
2495  }
2496 
2497  return false;
2498 }
2499 
2500 bool X86FastISel::X86SelectFPTrunc(const Instruction *I) {
2501  if (X86ScalarSSEf64 && I->getType()->isFloatTy() &&
2502  I->getOperand(0)->getType()->isDoubleTy()) {
2503  bool HasAVX512 = Subtarget->hasAVX512();
2504  // fptrunc from double to float.
2505  unsigned Opc =
2506  HasAVX512 ? X86::VCVTSD2SSZrr
2507  : Subtarget->hasAVX() ? X86::VCVTSD2SSrr : X86::CVTSD2SSrr;
2508  return X86SelectFPExtOrFPTrunc(
2509  I, Opc, HasAVX512 ? &X86::FR32XRegClass : &X86::FR32RegClass);
2510  }
2511 
2512  return false;
2513 }
2514 
2515 bool X86FastISel::X86SelectTrunc(const Instruction *I) {
2516  EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType());
2517  EVT DstVT = TLI.getValueType(DL, I->getType());
2518 
2519  // This code only handles truncation to byte.
2520  if (DstVT != MVT::i8 && DstVT != MVT::i1)
2521  return false;
2522  if (!TLI.isTypeLegal(SrcVT))
2523  return false;
2524 
2525  unsigned InputReg = getRegForValue(I->getOperand(0));
2526  if (!InputReg)
2527  // Unhandled operand. Halt "fast" selection and bail.
2528  return false;
2529 
2530  if (SrcVT == MVT::i8) {
2531  // Truncate from i8 to i1; no code needed.
2532  updateValueMap(I, InputReg);
2533  return true;
2534  }
2535 
2536  // Issue an extract_subreg.
2537  unsigned ResultReg = fastEmitInst_extractsubreg(MVT::i8,
2538  InputReg, false,
2539  X86::sub_8bit);
2540  if (!ResultReg)
2541  return false;
2542 
2543  updateValueMap(I, ResultReg);
2544  return true;
2545 }
2546 
2547 bool X86FastISel::IsMemcpySmall(uint64_t Len) {
2548  return Len <= (Subtarget->is64Bit() ? 32 : 16);
2549 }
2550 
2551 bool X86FastISel::TryEmitSmallMemcpy(X86AddressMode DestAM,
2552  X86AddressMode SrcAM, uint64_t Len) {
2553 
2554  // Make sure we don't bloat code by inlining very large memcpy's.
2555  if (!IsMemcpySmall(Len))
2556  return false;
2557 
2558  bool i64Legal = Subtarget->is64Bit();
2559 
2560  // We don't care about alignment here since we just emit integer accesses.
2561  while (Len) {
2562  MVT VT;
2563  if (Len >= 8 && i64Legal)
2564  VT = MVT::i64;
2565  else if (Len >= 4)
2566  VT = MVT::i32;
2567  else if (Len >= 2)
2568  VT = MVT::i16;
2569  else
2570  VT = MVT::i8;
2571 
2572  unsigned Reg;
2573  bool RV = X86FastEmitLoad(VT, SrcAM, nullptr, Reg);
2574  RV &= X86FastEmitStore(VT, Reg, /*Kill=*/true, DestAM);
2575  assert(RV && "Failed to emit load or store??");
2576 
2577  unsigned Size = VT.getSizeInBits()/8;
2578  Len -= Size;
2579  DestAM.Disp += Size;
2580  SrcAM.Disp += Size;
2581  }
2582 
2583  return true;
2584 }
2585 
2586 bool X86FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
2587  // FIXME: Handle more intrinsics.
2588  switch (II->getIntrinsicID()) {
2589  default: return false;
2590  case Intrinsic::convert_from_fp16:
2591  case Intrinsic::convert_to_fp16: {
2592  if (Subtarget->useSoftFloat() || !Subtarget->hasF16C())
2593  return false;
2594 
2595  const Value *Op = II->getArgOperand(0);
2596  unsigned InputReg = getRegForValue(Op);
2597  if (InputReg == 0)
2598  return false;
2599 
2600  // F16C only allows converting from float to half and from half to float.
2601  bool IsFloatToHalf = II->getIntrinsicID() == Intrinsic::convert_to_fp16;
2602  if (IsFloatToHalf) {
2603  if (!Op->getType()->isFloatTy())
2604  return false;
2605  } else {
2606  if (!II->getType()->isFloatTy())
2607  return false;
2608  }
2609 
2610  unsigned ResultReg = 0;
2611  const TargetRegisterClass *RC = TLI.getRegClassFor(MVT::v8i16);
2612  if (IsFloatToHalf) {
2613  // 'InputReg' is implicitly promoted from register class FR32 to
2614  // register class VR128 by method 'constrainOperandRegClass' which is
2615  // directly called by 'fastEmitInst_ri'.
2616  // Instruction VCVTPS2PHrr takes an extra immediate operand which is
2617  // used to provide rounding control: use MXCSR.RC, encoded as 0b100.
2618  // It's consistent with the other FP instructions, which are usually
2619  // controlled by MXCSR.
2620  InputReg = fastEmitInst_ri(X86::VCVTPS2PHrr, RC, InputReg, false, 4);
2621 
2622  // Move the lower 32-bits of ResultReg to another register of class GR32.
2623  ResultReg = createResultReg(&X86::GR32RegClass);
2624  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2625  TII.get(X86::VMOVPDI2DIrr), ResultReg)
2626  .addReg(InputReg, RegState::Kill);
2627 
2628  // The result value is in the lower 16-bits of ResultReg.
2629  unsigned RegIdx = X86::sub_16bit;
2630  ResultReg = fastEmitInst_extractsubreg(MVT::i16, ResultReg, true, RegIdx);
2631  } else {
2632  assert(Op->getType()->isIntegerTy(16) && "Expected a 16-bit integer!");
2633  // Explicitly sign-extend the input to 32-bit.
2634  InputReg = fastEmit_r(MVT::i16, MVT::i32, ISD::SIGN_EXTEND, InputReg,
2635  /*Kill=*/false);
2636 
2637  // The following SCALAR_TO_VECTOR will be expanded into a VMOVDI2PDIrr.
2638  InputReg = fastEmit_r(MVT::i32, MVT::v4i32, ISD::SCALAR_TO_VECTOR,
2639  InputReg, /*Kill=*/true);
2640 
2641  InputReg = fastEmitInst_r(X86::VCVTPH2PSrr, RC, InputReg, /*Kill=*/true);
2642 
2643  // The result value is in the lower 32-bits of ResultReg.
2644  // Emit an explicit copy from register class VR128 to register class FR32.
2645  ResultReg = createResultReg(&X86::FR32RegClass);
2646  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2647  TII.get(TargetOpcode::COPY), ResultReg)
2648  .addReg(InputReg, RegState::Kill);
2649  }
2650 
2651  updateValueMap(II, ResultReg);
2652  return true;
2653  }
2654  case Intrinsic::frameaddress: {
2655  MachineFunction *MF = FuncInfo.MF;
2656  if (MF->getTarget().getMCAsmInfo()->usesWindowsCFI())
2657  return false;
2658 
2659  Type *RetTy = II->getCalledFunction()->getReturnType();
2660 
2661  MVT VT;
2662  if (!isTypeLegal(RetTy, VT))
2663  return false;
2664 
2665  unsigned Opc;
2666  const TargetRegisterClass *RC = nullptr;
2667 
2668  switch (VT.SimpleTy) {
2669  default: llvm_unreachable("Invalid result type for frameaddress.");
2670  case MVT::i32: Opc = X86::MOV32rm; RC = &X86::GR32RegClass; break;
2671  case MVT::i64: Opc = X86::MOV64rm; RC = &X86::GR64RegClass; break;
2672  }
2673 
2674  // This needs to be set before we call getPtrSizedFrameRegister, otherwise
2675  // we get the wrong frame register.
2676  MachineFrameInfo &MFI = MF->getFrameInfo();
2677  MFI.setFrameAddressIsTaken(true);
2678 
2679  const X86RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
2680  unsigned FrameReg = RegInfo->getPtrSizedFrameRegister(*MF);
2681  assert(((FrameReg == X86::RBP && VT == MVT::i64) ||
2682  (FrameReg == X86::EBP && VT == MVT::i32)) &&
2683  "Invalid Frame Register!");
2684 
2685  // Always make a copy of the frame register to a vreg first, so that we
2686  // never directly reference the frame register (the TwoAddressInstruction-
2687  // Pass doesn't like that).
2688  unsigned SrcReg = createResultReg(RC);
2689  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2690  TII.get(TargetOpcode::COPY), SrcReg).addReg(FrameReg);
2691 
2692  // Now recursively load from the frame address.
2693  // movq (%rbp), %rax
2694  // movq (%rax), %rax
2695  // movq (%rax), %rax
2696  // ...
2697  unsigned DestReg;
2698  unsigned Depth = cast<ConstantInt>(II->getOperand(0))->getZExtValue();
2699  while (Depth--) {
2700  DestReg = createResultReg(RC);
2701  addDirectMem(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2702  TII.get(Opc), DestReg), SrcReg);
2703  SrcReg = DestReg;
2704  }
2705 
2706  updateValueMap(II, SrcReg);
2707  return true;
2708  }
2709  case Intrinsic::memcpy: {
2710  const MemCpyInst *MCI = cast<MemCpyInst>(II);
2711  // Don't handle volatile or variable length memcpys.
2712  if (MCI->isVolatile())
2713  return false;
2714 
2715  if (isa<ConstantInt>(MCI->getLength())) {
2716  // Small memcpy's are common enough that we want to do them
2717  // without a call if possible.
2718  uint64_t Len = cast<ConstantInt>(MCI->getLength())->getZExtValue();
2719  if (IsMemcpySmall(Len)) {
2720  X86AddressMode DestAM, SrcAM;
2721  if (!X86SelectAddress(MCI->getRawDest(), DestAM) ||
2722  !X86SelectAddress(MCI->getRawSource(), SrcAM))
2723  return false;
2724  TryEmitSmallMemcpy(DestAM, SrcAM, Len);
2725  return true;
2726  }
2727  }
2728 
2729  unsigned SizeWidth = Subtarget->is64Bit() ? 64 : 32;
2730  if (!MCI->getLength()->getType()->isIntegerTy(SizeWidth))
2731  return false;
2732 
2733  if (MCI->getSourceAddressSpace() > 255 || MCI->getDestAddressSpace() > 255)
2734  return false;
2735 
2736  return lowerCallTo(II, "memcpy", II->getNumArgOperands() - 1);
2737  }
2738  case Intrinsic::memset: {
2739  const MemSetInst *MSI = cast<MemSetInst>(II);
2740 
2741  if (MSI->isVolatile())
2742  return false;
2743 
2744  unsigned SizeWidth = Subtarget->is64Bit() ? 64 : 32;
2745  if (!MSI->getLength()->getType()->isIntegerTy(SizeWidth))
2746  return false;
2747 
2748  if (MSI->getDestAddressSpace() > 255)
2749  return false;
2750 
2751  return lowerCallTo(II, "memset", II->getNumArgOperands() - 1);
2752  }
2753  case Intrinsic::stackprotector: {
2754  // Emit code to store the stack guard onto the stack.
2755  EVT PtrTy = TLI.getPointerTy(DL);
2756 
2757  const Value *Op1 = II->getArgOperand(0); // The guard's value.
2758  const AllocaInst *Slot = cast<AllocaInst>(II->getArgOperand(1));
2759 
2760  MFI.setStackProtectorIndex(FuncInfo.StaticAllocaMap[Slot]);
2761 
2762  // Grab the frame index.
2763  X86AddressMode AM;
2764  if (!X86SelectAddress(Slot, AM)) return false;
2765  if (!X86FastEmitStore(PtrTy, Op1, AM)) return false;
2766  return true;
2767  }
2768  case Intrinsic::dbg_declare: {
2769  const DbgDeclareInst *DI = cast<DbgDeclareInst>(II);
2770  X86AddressMode AM;
2771  assert(DI->getAddress() && "Null address should be checked earlier!");
2772  if (!X86SelectAddress(DI->getAddress(), AM))
2773  return false;
2774  const MCInstrDesc &II = TII.get(TargetOpcode::DBG_VALUE);
2775  // FIXME may need to add RegState::Debug to any registers produced,
2776  // although ESP/EBP should be the only ones at the moment.
2778  "Expected inlined-at fields to agree");
2779  addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II), AM)
2780  .addImm(0)
2781  .addMetadata(DI->getVariable())
2782  .addMetadata(DI->getExpression());
2783  return true;
2784  }
2785  case Intrinsic::trap: {
2786  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::TRAP));
2787  return true;
2788  }
2789  case Intrinsic::sqrt: {
2790  if (!Subtarget->hasSSE1())
2791  return false;
2792 
2793  Type *RetTy = II->getCalledFunction()->getReturnType();
2794 
2795  MVT VT;
2796  if (!isTypeLegal(RetTy, VT))
2797  return false;
2798 
2799  // Unfortunately we can't use fastEmit_r, because the AVX version of FSQRT
2800  // is not generated by FastISel yet.
2801  // FIXME: Update this code once tablegen can handle it.
2802  static const uint16_t SqrtOpc[2][2] = {
2803  {X86::SQRTSSr, X86::VSQRTSSr},
2804  {X86::SQRTSDr, X86::VSQRTSDr}
2805  };
2806  bool HasAVX = Subtarget->hasAVX();
2807  unsigned Opc;
2808  const TargetRegisterClass *RC;
2809  switch (VT.SimpleTy) {
2810  default: return false;
2811  case MVT::f32: Opc = SqrtOpc[0][HasAVX]; RC = &X86::FR32RegClass; break;
2812  case MVT::f64: Opc = SqrtOpc[1][HasAVX]; RC = &X86::FR64RegClass; break;
2813  }
2814 
2815  const Value *SrcVal = II->getArgOperand(0);
2816  unsigned SrcReg = getRegForValue(SrcVal);
2817 
2818  if (SrcReg == 0)
2819  return false;
2820 
2821  unsigned ImplicitDefReg = 0;
2822  if (HasAVX) {
2823  ImplicitDefReg = createResultReg(RC);
2824  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2825  TII.get(TargetOpcode::IMPLICIT_DEF), ImplicitDefReg);
2826  }
2827 
2828  unsigned ResultReg = createResultReg(RC);
2829  MachineInstrBuilder MIB;
2830  MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc),
2831  ResultReg);
2832 
2833  if (ImplicitDefReg)
2834  MIB.addReg(ImplicitDefReg);
2835 
2836  MIB.addReg(SrcReg);
2837 
2838  updateValueMap(II, ResultReg);
2839  return true;
2840  }
2841  case Intrinsic::sadd_with_overflow:
2842  case Intrinsic::uadd_with_overflow:
2843  case Intrinsic::ssub_with_overflow:
2844  case Intrinsic::usub_with_overflow:
2845  case Intrinsic::smul_with_overflow:
2846  case Intrinsic::umul_with_overflow: {
2847  // This implements the basic lowering of the xalu with overflow intrinsics
2848  // into add/sub/mul followed by either seto or setb.
2849  const Function *Callee = II->getCalledFunction();
2850  auto *Ty = cast<StructType>(Callee->getReturnType());
2851  Type *RetTy = Ty->getTypeAtIndex(0U);
2852  assert(Ty->getTypeAtIndex(1)->isIntegerTy() &&
2853  Ty->getTypeAtIndex(1)->getScalarSizeInBits() == 1 &&
2854  "Overflow value expected to be an i1");
2855 
2856  MVT VT;
2857  if (!isTypeLegal(RetTy, VT))
2858  return false;
2859 
2860  if (VT < MVT::i8 || VT > MVT::i64)
2861  return false;
2862 
2863  const Value *LHS = II->getArgOperand(0);
2864  const Value *RHS = II->getArgOperand(1);
2865 
2866  // Canonicalize immediate to the RHS.
2867  if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS) &&
2868  isCommutativeIntrinsic(II))
2869  std::swap(LHS, RHS);
2870 
2871  bool UseIncDec = false;
2872  if (isa<ConstantInt>(RHS) && cast<ConstantInt>(RHS)->isOne())
2873  UseIncDec = true;
2874 
2875  unsigned BaseOpc, CondOpc;
2876  switch (II->getIntrinsicID()) {
2877  default: llvm_unreachable("Unexpected intrinsic!");
2878  case Intrinsic::sadd_with_overflow:
2879  BaseOpc = UseIncDec ? unsigned(X86ISD::INC) : unsigned(ISD::ADD);
2880  CondOpc = X86::SETOr;
2881  break;
2882  case Intrinsic::uadd_with_overflow:
2883  BaseOpc = ISD::ADD; CondOpc = X86::SETBr; break;
2884  case Intrinsic::ssub_with_overflow:
2885  BaseOpc = UseIncDec ? unsigned(X86ISD::DEC) : unsigned(ISD::SUB);
2886  CondOpc = X86::SETOr;
2887  break;
2888  case Intrinsic::usub_with_overflow:
2889  BaseOpc = ISD::SUB; CondOpc = X86::SETBr; break;
2890  case Intrinsic::smul_with_overflow:
2891  BaseOpc = X86ISD::SMUL; CondOpc = X86::SETOr; break;
2892  case Intrinsic::umul_with_overflow:
2893  BaseOpc = X86ISD::UMUL; CondOpc = X86::SETOr; break;
2894  }
2895 
2896  unsigned LHSReg = getRegForValue(LHS);
2897  if (LHSReg == 0)
2898  return false;
2899  bool LHSIsKill = hasTrivialKill(LHS);
2900 
2901  unsigned ResultReg = 0;
2902  // Check if we have an immediate version.
2903  if (const auto *CI = dyn_cast<ConstantInt>(RHS)) {
2904  static const uint16_t Opc[2][4] = {
2905  { X86::INC8r, X86::INC16r, X86::INC32r, X86::INC64r },
2906  { X86::DEC8r, X86::DEC16r, X86::DEC32r, X86::DEC64r }
2907  };
2908 
2909  if (BaseOpc == X86ISD::INC || BaseOpc == X86ISD::DEC) {
2910  ResultReg = createResultReg(TLI.getRegClassFor(VT));
2911  bool IsDec = BaseOpc == X86ISD::DEC;
2912  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2913  TII.get(Opc[IsDec][VT.SimpleTy-MVT::i8]), ResultReg)
2914  .addReg(LHSReg, getKillRegState(LHSIsKill));
2915  } else
2916  ResultReg = fastEmit_ri(VT, VT, BaseOpc, LHSReg, LHSIsKill,
2917  CI->getZExtValue());
2918  }
2919 
2920  unsigned RHSReg;
2921  bool RHSIsKill;
2922  if (!ResultReg) {
2923  RHSReg = getRegForValue(RHS);
2924  if (RHSReg == 0)
2925  return false;
2926  RHSIsKill = hasTrivialKill(RHS);
2927  ResultReg = fastEmit_rr(VT, VT, BaseOpc, LHSReg, LHSIsKill, RHSReg,
2928  RHSIsKill);
2929  }
2930 
2931  // FastISel doesn't have a pattern for all X86::MUL*r and X86::IMUL*r. Emit
2932  // it manually.
2933  if (BaseOpc == X86ISD::UMUL && !ResultReg) {
2934  static const uint16_t MULOpc[] =
2935  { X86::MUL8r, X86::MUL16r, X86::MUL32r, X86::MUL64r };
2936  static const MCPhysReg Reg[] = { X86::AL, X86::AX, X86::EAX, X86::RAX };
2937  // First copy the first operand into RAX, which is an implicit input to
2938  // the X86::MUL*r instruction.
2939  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2940  TII.get(TargetOpcode::COPY), Reg[VT.SimpleTy-MVT::i8])
2941  .addReg(LHSReg, getKillRegState(LHSIsKill));
2942  ResultReg = fastEmitInst_r(MULOpc[VT.SimpleTy-MVT::i8],
2943  TLI.getRegClassFor(VT), RHSReg, RHSIsKill);
2944  } else if (BaseOpc == X86ISD::SMUL && !ResultReg) {
2945  static const uint16_t MULOpc[] =
2946  { X86::IMUL8r, X86::IMUL16rr, X86::IMUL32rr, X86::IMUL64rr };
2947  if (VT == MVT::i8) {
2948  // Copy the first operand into AL, which is an implicit input to the
2949  // X86::IMUL8r instruction.
2950  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2951  TII.get(TargetOpcode::COPY), X86::AL)
2952  .addReg(LHSReg, getKillRegState(LHSIsKill));
2953  ResultReg = fastEmitInst_r(MULOpc[0], TLI.getRegClassFor(VT), RHSReg,
2954  RHSIsKill);
2955  } else
2956  ResultReg = fastEmitInst_rr(MULOpc[VT.SimpleTy-MVT::i8],
2957  TLI.getRegClassFor(VT), LHSReg, LHSIsKill,
2958  RHSReg, RHSIsKill);
2959  }
2960 
2961  if (!ResultReg)
2962  return false;
2963 
2964  // Assign to a GPR since the overflow return value is lowered to a SETcc.
2965  unsigned ResultReg2 = createResultReg(&X86::GR8RegClass);
2966  assert((ResultReg+1) == ResultReg2 && "Nonconsecutive result registers.");
2967  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CondOpc),
2968  ResultReg2);
2969 
2970  updateValueMap(II, ResultReg, 2);
2971  return true;
2972  }
2973  case Intrinsic::x86_sse_cvttss2si:
2974  case Intrinsic::x86_sse_cvttss2si64:
2975  case Intrinsic::x86_sse2_cvttsd2si:
2976  case Intrinsic::x86_sse2_cvttsd2si64: {
2977  bool IsInputDouble;
2978  switch (II->getIntrinsicID()) {
2979  default: llvm_unreachable("Unexpected intrinsic.");
2980  case Intrinsic::x86_sse_cvttss2si:
2981  case Intrinsic::x86_sse_cvttss2si64:
2982  if (!Subtarget->hasSSE1())
2983  return false;
2984  IsInputDouble = false;
2985  break;
2986  case Intrinsic::x86_sse2_cvttsd2si:
2987  case Intrinsic::x86_sse2_cvttsd2si64:
2988  if (!Subtarget->hasSSE2())
2989  return false;
2990  IsInputDouble = true;
2991  break;
2992  }
2993 
2994  Type *RetTy = II->getCalledFunction()->getReturnType();
2995  MVT VT;
2996  if (!isTypeLegal(RetTy, VT))
2997  return false;
2998 
2999  static const uint16_t CvtOpc[2][2][2] = {
3000  { { X86::CVTTSS2SIrr, X86::VCVTTSS2SIrr },
3001  { X86::CVTTSS2SI64rr, X86::VCVTTSS2SI64rr } },
3002  { { X86::CVTTSD2SIrr, X86::VCVTTSD2SIrr },
3003  { X86::CVTTSD2SI64rr, X86::VCVTTSD2SI64rr } }
3004  };
3005  bool HasAVX = Subtarget->hasAVX();
3006  unsigned Opc;
3007  switch (VT.SimpleTy) {
3008  default: llvm_unreachable("Unexpected result type.");
3009  case MVT::i32: Opc = CvtOpc[IsInputDouble][0][HasAVX]; break;
3010  case MVT::i64: Opc = CvtOpc[IsInputDouble][1][HasAVX]; break;
3011  }
3012 
3013  // Check if we can fold insertelement instructions into the convert.
3014  const Value *Op = II->getArgOperand(0);
3015  while (auto *IE = dyn_cast<InsertElementInst>(Op)) {
3016  const Value *Index = IE->getOperand(2);
3017  if (!isa<ConstantInt>(Index))
3018  break;
3019  unsigned Idx = cast<ConstantInt>(Index)->getZExtValue();
3020 
3021  if (Idx == 0) {
3022  Op = IE->getOperand(1);
3023  break;
3024  }
3025  Op = IE->getOperand(0);
3026  }
3027 
3028  unsigned Reg = getRegForValue(Op);
3029  if (Reg == 0)
3030  return false;
3031 
3032  unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
3033  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
3034  .addReg(Reg);
3035 
3036  updateValueMap(II, ResultReg);
3037  return true;
3038  }
3039  }
3040 }
3041 
3042 bool X86FastISel::fastLowerArguments() {
3043  if (!FuncInfo.CanLowerReturn)
3044  return false;
3045 
3046  const Function *F = FuncInfo.Fn;
3047  if (F->isVarArg())
3048  return false;
3049 
3050  CallingConv::ID CC = F->getCallingConv();
3051  if (CC != CallingConv::C)
3052  return false;
3053 
3054  if (Subtarget->isCallingConvWin64(CC))
3055  return false;
3056 
3057  if (!Subtarget->is64Bit())
3058  return false;
3059 
3060  if (Subtarget->useSoftFloat())
3061  return false;
3062 
3063  // Only handle simple cases. i.e. Up to 6 i32/i64 scalar arguments.
3064  unsigned GPRCnt = 0;
3065  unsigned FPRCnt = 0;
3066  for (auto const &Arg : F->args()) {
3067  if (Arg.hasAttribute(Attribute::ByVal) ||
3068  Arg.hasAttribute(Attribute::InReg) ||
3069  Arg.hasAttribute(Attribute::StructRet) ||
3070  Arg.hasAttribute(Attribute::SwiftSelf) ||
3071  Arg.hasAttribute(Attribute::SwiftError) ||
3072  Arg.hasAttribute(Attribute::Nest))
3073  return false;
3074 
3075  Type *ArgTy = Arg.getType();
3076  if (ArgTy->isStructTy() || ArgTy->isArrayTy() || ArgTy->isVectorTy())
3077  return false;
3078 
3079  EVT ArgVT = TLI.getValueType(DL, ArgTy);
3080  if (!ArgVT.isSimple()) return false;
3081  switch (ArgVT.getSimpleVT().SimpleTy) {
3082  default: return false;
3083  case MVT::i32:
3084  case MVT::i64:
3085  ++GPRCnt;
3086  break;
3087  case MVT::f32:
3088  case MVT::f64:
3089  if (!Subtarget->hasSSE1())
3090  return false;
3091  ++FPRCnt;
3092  break;
3093  }
3094 
3095  if (GPRCnt > 6)
3096  return false;
3097 
3098  if (FPRCnt > 8)
3099  return false;
3100  }
3101 
3102  static const MCPhysReg GPR32ArgRegs[] = {
3103  X86::EDI, X86::ESI, X86::EDX, X86::ECX, X86::R8D, X86::R9D
3104  };
3105  static const MCPhysReg GPR64ArgRegs[] = {
3106  X86::RDI, X86::RSI, X86::RDX, X86::RCX, X86::R8 , X86::R9
3107  };
3108  static const MCPhysReg XMMArgRegs[] = {
3109  X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
3110  X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
3111  };
3112 
3113  unsigned GPRIdx = 0;
3114  unsigned FPRIdx = 0;
3115  for (auto const &Arg : F->args()) {
3116  MVT VT = TLI.getSimpleValueType(DL, Arg.getType());
3117  const TargetRegisterClass *RC = TLI.getRegClassFor(VT);
3118  unsigned SrcReg;
3119  switch (VT.SimpleTy) {
3120  default: llvm_unreachable("Unexpected value type.");
3121  case MVT::i32: SrcReg = GPR32ArgRegs[GPRIdx++]; break;
3122  case MVT::i64: SrcReg = GPR64ArgRegs[GPRIdx++]; break;
3123  case MVT::f32: LLVM_FALLTHROUGH;
3124  case MVT::f64: SrcReg = XMMArgRegs[FPRIdx++]; break;
3125  }
3126  unsigned DstReg = FuncInfo.MF->addLiveIn(SrcReg, RC);
3127  // FIXME: Unfortunately it's necessary to emit a copy from the livein copy.
3128  // Without this, EmitLiveInCopies may eliminate the livein if its only
3129  // use is a bitcast (which isn't turned into an instruction).
3130  unsigned ResultReg = createResultReg(RC);
3131  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3132  TII.get(TargetOpcode::COPY), ResultReg)
3133  .addReg(DstReg, getKillRegState(true));
3134  updateValueMap(&Arg, ResultReg);
3135  }
3136  return true;
3137 }
3138 
3139 static unsigned computeBytesPoppedByCalleeForSRet(const X86Subtarget *Subtarget,
3140  CallingConv::ID CC,
3141  ImmutableCallSite *CS) {
3142  if (Subtarget->is64Bit())
3143  return 0;
3144  if (Subtarget->getTargetTriple().isOSMSVCRT())
3145  return 0;
3146  if (CC == CallingConv::Fast || CC == CallingConv::GHC ||
3147  CC == CallingConv::HiPE)
3148  return 0;
3149 
3150  if (CS)
3151  if (CS->arg_empty() || !CS->paramHasAttr(0, Attribute::StructRet) ||
3152  CS->paramHasAttr(0, Attribute::InReg) || Subtarget->isTargetMCU())
3153  return 0;
3154 
3155  return 4;
3156 }
3157 
3158 bool X86FastISel::fastLowerCall(CallLoweringInfo &CLI) {
3159  auto &OutVals = CLI.OutVals;
3160  auto &OutFlags = CLI.OutFlags;
3161  auto &OutRegs = CLI.OutRegs;
3162  auto &Ins = CLI.Ins;
3163  auto &InRegs = CLI.InRegs;
3164  CallingConv::ID CC = CLI.CallConv;
3165  bool &IsTailCall = CLI.IsTailCall;
3166  bool IsVarArg = CLI.IsVarArg;
3167  const Value *Callee = CLI.Callee;
3168  MCSymbol *Symbol = CLI.Symbol;
3169 
3170  bool Is64Bit = Subtarget->is64Bit();
3171  bool IsWin64 = Subtarget->isCallingConvWin64(CC);
3172 
3173  const CallInst *CI =
3174  CLI.CS ? dyn_cast<CallInst>(CLI.CS->getInstruction()) : nullptr;
3175  const Function *CalledFn = CI ? CI->getCalledFunction() : nullptr;
3176 
3177  // Call / invoke instructions with NoCfCheck attribute require special
3178  // handling.
3179  const auto *II =
3180  CLI.CS ? dyn_cast<InvokeInst>(CLI.CS->getInstruction()) : nullptr;
3181  if ((CI && CI->doesNoCfCheck()) || (II && II->doesNoCfCheck()))
3182  return false;
3183 
3184  // Functions with no_caller_saved_registers that need special handling.
3185  if ((CI && CI->hasFnAttr("no_caller_saved_registers")) ||
3186  (CalledFn && CalledFn->hasFnAttribute("no_caller_saved_registers")))
3187  return false;
3188 
3189  // Functions using retpoline should use SDISel for calls.
3190  if (Subtarget->useRetpoline())
3191  return false;
3192 
3193  // Handle only C, fastcc, and webkit_js calling conventions for now.
3194  switch (CC) {
3195  default: return false;
3196  case CallingConv::C:
3197  case CallingConv::Fast:
3199  case CallingConv::Swift:
3203  case CallingConv::Win64:
3205  break;
3206  }
3207 
3208  // Allow SelectionDAG isel to handle tail calls.
3209  if (IsTailCall)
3210  return false;
3211 
3212  // fastcc with -tailcallopt is intended to provide a guaranteed
3213  // tail call optimization. Fastisel doesn't know how to do that.
3214  if (CC == CallingConv::Fast && TM.Options.GuaranteedTailCallOpt)
3215  return false;
3216 
3217  // Don't know how to handle Win64 varargs yet. Nothing special needed for
3218  // x86-32. Special handling for x86-64 is implemented.
3219  if (IsVarArg && IsWin64)
3220  return false;
3221 
3222  // Don't know about inalloca yet.
3223  if (CLI.CS && CLI.CS->hasInAllocaArgument())
3224  return false;
3225 
3226  for (auto Flag : CLI.OutFlags)
3227  if (Flag.isSwiftError())
3228  return false;
3229 
3230  SmallVector<MVT, 16> OutVTs;
3231  SmallVector<unsigned, 16> ArgRegs;
3232 
3233  // If this is a constant i1/i8/i16 argument, promote to i32 to avoid an extra
3234  // instruction. This is safe because it is common to all FastISel supported
3235  // calling conventions on x86.
3236  for (int i = 0, e = OutVals.size(); i != e; ++i) {
3237  Value *&Val = OutVals[i];
3238  ISD::ArgFlagsTy Flags = OutFlags[i];
3239  if (auto *CI = dyn_cast<ConstantInt>(Val)) {
3240  if (CI->getBitWidth() < 32) {
3241  if (Flags.isSExt())
3243  else
3245  }
3246  }
3247 
3248  // Passing bools around ends up doing a trunc to i1 and passing it.
3249  // Codegen this as an argument + "and 1".
3250  MVT VT;
3251  auto *TI = dyn_cast<TruncInst>(Val);
3252  unsigned ResultReg;
3253  if (TI && TI->getType()->isIntegerTy(1) && CLI.CS &&
3254  (TI->getParent() == CLI.CS->getInstruction()->getParent()) &&
3255  TI->hasOneUse()) {
3256  Value *PrevVal = TI->getOperand(0);
3257  ResultReg = getRegForValue(PrevVal);
3258 
3259  if (!ResultReg)
3260  return false;
3261 
3262  if (!isTypeLegal(PrevVal->getType(), VT))
3263  return false;
3264 
3265  ResultReg =
3266  fastEmit_ri(VT, VT, ISD::AND, ResultReg, hasTrivialKill(PrevVal), 1);
3267  } else {
3268  if (!isTypeLegal(Val->getType(), VT))
3269  return false;
3270  ResultReg = getRegForValue(Val);
3271  }
3272 
3273  if (!ResultReg)
3274  return false;
3275 
3276  ArgRegs.push_back(ResultReg);
3277  OutVTs.push_back(VT);
3278  }
3279 
3280  // Analyze operands of the call, assigning locations to each operand.
3282  CCState CCInfo(CC, IsVarArg, *FuncInfo.MF, ArgLocs, CLI.RetTy->getContext());
3283 
3284  // Allocate shadow area for Win64
3285  if (IsWin64)
3286  CCInfo.AllocateStack(32, 8);
3287 
3288  CCInfo.AnalyzeCallOperands(OutVTs, OutFlags, CC_X86);
3289 
3290  // Get a count of how many bytes are to be pushed on the stack.
3291  unsigned NumBytes = CCInfo.getAlignedCallFrameSize();
3292 
3293  // Issue CALLSEQ_START
3294  unsigned AdjStackDown = TII.getCallFrameSetupOpcode();
3295  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackDown))
3296  .addImm(NumBytes).addImm(0).addImm(0);
3297 
3298  // Walk the register/memloc assignments, inserting copies/loads.
3299  const X86RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
3300  for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
3301  CCValAssign const &VA = ArgLocs[i];
3302  const Value *ArgVal = OutVals[VA.getValNo()];
3303  MVT ArgVT = OutVTs[VA.getValNo()];
3304 
3305  if (ArgVT == MVT::x86mmx)
3306  return false;
3307 
3308  unsigned ArgReg = ArgRegs[VA.getValNo()];
3309 
3310  // Promote the value if needed.
3311  switch (VA.getLocInfo()) {
3312  case CCValAssign::Full: break;
3313  case CCValAssign::SExt: {
3314  assert(VA.getLocVT().isInteger() && !VA.getLocVT().isVector() &&
3315  "Unexpected extend");
3316 
3317  if (ArgVT == MVT::i1)
3318  return false;
3319 
3320  bool Emitted = X86FastEmitExtend(ISD::SIGN_EXTEND, VA.getLocVT(), ArgReg,
3321  ArgVT, ArgReg);
3322  assert(Emitted && "Failed to emit a sext!"); (void)Emitted;
3323  ArgVT = VA.getLocVT();
3324  break;
3325  }
3326  case CCValAssign::ZExt: {
3327  assert(VA.getLocVT().isInteger() && !VA.getLocVT().isVector() &&
3328  "Unexpected extend");
3329 
3330  // Handle zero-extension from i1 to i8, which is common.
3331  if (ArgVT == MVT::i1) {
3332  // Set the high bits to zero.
3333  ArgReg = fastEmitZExtFromI1(MVT::i8, ArgReg, /*TODO: Kill=*/false);
3334  ArgVT = MVT::i8;
3335 
3336  if (ArgReg == 0)
3337  return false;
3338  }
3339 
3340  bool Emitted = X86FastEmitExtend(ISD::ZERO_EXTEND, VA.getLocVT(), ArgReg,
3341  ArgVT, ArgReg);
3342  assert(Emitted && "Failed to emit a zext!"); (void)Emitted;
3343  ArgVT = VA.getLocVT();
3344  break;
3345  }
3346  case CCValAssign::AExt: {
3347  assert(VA.getLocVT().isInteger() && !VA.getLocVT().isVector() &&
3348  "Unexpected extend");
3349  bool Emitted = X86FastEmitExtend(ISD::ANY_EXTEND, VA.getLocVT(), ArgReg,
3350  ArgVT, ArgReg);
3351  if (!Emitted)
3352  Emitted = X86FastEmitExtend(ISD::ZERO_EXTEND, VA.getLocVT(), ArgReg,
3353  ArgVT, ArgReg);
3354  if (!Emitted)
3355  Emitted = X86FastEmitExtend(ISD::SIGN_EXTEND, VA.getLocVT(), ArgReg,
3356  ArgVT, ArgReg);
3357 
3358  assert(Emitted && "Failed to emit a aext!"); (void)Emitted;
3359  ArgVT = VA.getLocVT();
3360  break;
3361  }
3362  case CCValAssign::BCvt: {
3363  ArgReg = fastEmit_r(ArgVT, VA.getLocVT(), ISD::BITCAST, ArgReg,
3364  /*TODO: Kill=*/false);
3365  assert(ArgReg && "Failed to emit a bitcast!");
3366  ArgVT = VA.getLocVT();
3367  break;
3368  }
3369  case CCValAssign::VExt:
3370  // VExt has not been implemented, so this should be impossible to reach
3371  // for now. However, fallback to Selection DAG isel once implemented.
3372  return false;
3376  case CCValAssign::FPExt:
3377  llvm_unreachable("Unexpected loc info!");
3378  case CCValAssign::Indirect:
3379  // FIXME: Indirect doesn't need extending, but fast-isel doesn't fully
3380  // support this.
3381  return false;
3382  }
3383 
3384  if (VA.isRegLoc()) {
3385  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3386  TII.get(TargetOpcode::COPY), VA.getLocReg()).addReg(ArgReg);
3387  OutRegs.push_back(VA.getLocReg());
3388  } else {
3389  assert(VA.isMemLoc());
3390 
3391  // Don't emit stores for undef values.
3392  if (isa<UndefValue>(ArgVal))
3393  continue;
3394 
3395  unsigned LocMemOffset = VA.getLocMemOffset();
3396  X86AddressMode AM;
3397  AM.Base.Reg = RegInfo->getStackRegister();
3398  AM.Disp = LocMemOffset;
3399  ISD::ArgFlagsTy Flags = OutFlags[VA.getValNo()];
3400  unsigned Alignment = DL.getABITypeAlignment(ArgVal->getType());
3401  MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
3402  MachinePointerInfo::getStack(*FuncInfo.MF, LocMemOffset),
3403  MachineMemOperand::MOStore, ArgVT.getStoreSize(), Alignment);
3404  if (Flags.isByVal()) {
3405  X86AddressMode SrcAM;
3406  SrcAM.Base.Reg = ArgReg;
3407  if (!TryEmitSmallMemcpy(AM, SrcAM, Flags.getByValSize()))
3408  return false;
3409  } else if (isa<ConstantInt>(ArgVal) || isa<ConstantPointerNull>(ArgVal)) {
3410  // If this is a really simple value, emit this with the Value* version
3411  // of X86FastEmitStore. If it isn't simple, we don't want to do this,
3412  // as it can cause us to reevaluate the argument.
3413  if (!X86FastEmitStore(ArgVT, ArgVal, AM, MMO))
3414  return false;
3415  } else {
3416  bool ValIsKill = hasTrivialKill(ArgVal);
3417  if (!X86FastEmitStore(ArgVT, ArgReg, ValIsKill, AM, MMO))
3418  return false;
3419  }
3420  }
3421  }
3422 
3423  // ELF / PIC requires GOT in the EBX register before function calls via PLT
3424  // GOT pointer.
3425  if (Subtarget->isPICStyleGOT()) {
3426  unsigned Base = getInstrInfo()->getGlobalBaseReg(FuncInfo.MF);
3427  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3428  TII.get(TargetOpcode::COPY), X86::EBX).addReg(Base);
3429  }
3430 
3431  if (Is64Bit && IsVarArg && !IsWin64) {
3432  // From AMD64 ABI document:
3433  // For calls that may call functions that use varargs or stdargs
3434  // (prototype-less calls or calls to functions containing ellipsis (...) in
3435  // the declaration) %al is used as hidden argument to specify the number
3436  // of SSE registers used. The contents of %al do not need to match exactly
3437  // the number of registers, but must be an ubound on the number of SSE
3438  // registers used and is in the range 0 - 8 inclusive.
3439 
3440  // Count the number of XMM registers allocated.
3441  static const MCPhysReg XMMArgRegs[] = {
3442  X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
3443  X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
3444  };
3445  unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs);
3446  assert((Subtarget->hasSSE1() || !NumXMMRegs)
3447  && "SSE registers cannot be used when SSE is disabled");
3448  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::MOV8ri),
3449  X86::AL).addImm(NumXMMRegs);
3450  }
3451 
3452  // Materialize callee address in a register. FIXME: GV address can be
3453  // handled with a CALLpcrel32 instead.
3454  X86AddressMode CalleeAM;
3455  if (!X86SelectCallAddress(Callee, CalleeAM))
3456  return false;
3457 
3458  unsigned CalleeOp = 0;
3459  const GlobalValue *GV = nullptr;
3460  if (CalleeAM.GV != nullptr) {
3461  GV = CalleeAM.GV;
3462  } else if (CalleeAM.Base.Reg != 0) {
3463  CalleeOp = CalleeAM.Base.Reg;
3464  } else
3465  return false;
3466 
3467  // Issue the call.
3468  MachineInstrBuilder MIB;
3469  if (CalleeOp) {
3470  // Register-indirect call.
3471  unsigned CallOpc = Is64Bit ? X86::CALL64r : X86::CALL32r;
3472  MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CallOpc))
3473  .addReg(CalleeOp);
3474  } else {
3475  // Direct call.
3476  assert(GV && "Not a direct call");
3477  // See if we need any target-specific flags on the GV operand.
3478  unsigned char OpFlags = Subtarget->classifyGlobalFunctionReference(GV);
3479 
3480  // This will be a direct call, or an indirect call through memory for
3481  // NonLazyBind calls or dllimport calls.
3482  bool NeedLoad =
3483  OpFlags == X86II::MO_DLLIMPORT || OpFlags == X86II::MO_GOTPCREL;
3484  unsigned CallOpc = NeedLoad
3485  ? (Is64Bit ? X86::CALL64m : X86::CALL32m)
3486  : (Is64Bit ? X86::CALL64pcrel32 : X86::CALLpcrel32);
3487 
3488  MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CallOpc));
3489  if (NeedLoad)
3490  MIB.addReg(Is64Bit ? X86::RIP : 0).addImm(1).addReg(0);
3491  if (Symbol)
3492  MIB.addSym(Symbol, OpFlags);
3493  else
3494  MIB.addGlobalAddress(GV, 0, OpFlags);
3495  if (NeedLoad)
3496  MIB.addReg(0);
3497  }
3498 
3499  // Add a register mask operand representing the call-preserved registers.
3500  // Proper defs for return values will be added by setPhysRegsDeadExcept().
3501  MIB.addRegMask(TRI.getCallPreservedMask(*FuncInfo.MF, CC));
3502 
3503  // Add an implicit use GOT pointer in EBX.
3504  if (Subtarget->isPICStyleGOT())
3506 
3507  if (Is64Bit && IsVarArg && !IsWin64)
3509 
3510  // Add implicit physical register uses to the call.
3511  for (auto Reg : OutRegs)
3513 
3514  // Issue CALLSEQ_END
3515  unsigned NumBytesForCalleeToPop =
3516  X86::isCalleePop(CC, Subtarget->is64Bit(), IsVarArg,
3517  TM.Options.GuaranteedTailCallOpt)
3518  ? NumBytes // Callee pops everything.
3519  : computeBytesPoppedByCalleeForSRet(Subtarget, CC, CLI.CS);
3520  unsigned AdjStackUp = TII.getCallFrameDestroyOpcode();
3521  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackUp))
3522  .addImm(NumBytes).addImm(NumBytesForCalleeToPop);
3523 
3524  // Now handle call return values.
3526  CCState CCRetInfo(CC, IsVarArg, *FuncInfo.MF, RVLocs,
3527  CLI.RetTy->getContext());
3528  CCRetInfo.AnalyzeCallResult(Ins, RetCC_X86);
3529 
3530  // Copy all of the result registers out of their specified physreg.
3531  unsigned ResultReg = FuncInfo.CreateRegs(CLI.RetTy);
3532  for (unsigned i = 0; i != RVLocs.size(); ++i) {
3533  CCValAssign &VA = RVLocs[i];
3534  EVT CopyVT = VA.getValVT();
3535  unsigned CopyReg = ResultReg + i;
3536  unsigned SrcReg = VA.getLocReg();
3537 
3538  // If this is x86-64, and we disabled SSE, we can't return FP values
3539  if ((CopyVT == MVT::f32 || CopyVT == MVT::f64) &&
3540  ((Is64Bit || Ins[i].Flags.isInReg()) && !Subtarget->hasSSE1())) {
3541  report_fatal_error("SSE register return with SSE disabled");
3542  }
3543 
3544  // If we prefer to use the value in xmm registers, copy it out as f80 and
3545  // use a truncate to move it from fp stack reg to xmm reg.
3546  if ((SrcReg == X86::FP0 || SrcReg == X86::FP1) &&
3547  isScalarFPTypeInSSEReg(VA.getValVT())) {
3548  CopyVT = MVT::f80;
3549  CopyReg = createResultReg(&X86::RFP80RegClass);
3550  }
3551 
3552  // Copy out the result.
3553  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3554  TII.get(TargetOpcode::COPY), CopyReg).addReg(SrcReg);
3555  InRegs.push_back(VA.getLocReg());
3556 
3557  // Round the f80 to the right size, which also moves it to the appropriate
3558  // xmm register. This is accomplished by storing the f80 value in memory
3559  // and then loading it back.
3560  if (CopyVT != VA.getValVT()) {
3561  EVT ResVT = VA.getValVT();
3562  unsigned Opc = ResVT == MVT::f32 ? X86::ST_Fp80m32 : X86::ST_Fp80m64;
3563  unsigned MemSize = ResVT.getSizeInBits()/8;
3564  int FI = MFI.CreateStackObject(MemSize, MemSize, false);
3565  addFrameReference(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3566  TII.get(Opc)), FI)
3567  .addReg(CopyReg);
3568  Opc = ResVT == MVT::f32 ? X86::MOVSSrm : X86::MOVSDrm;
3569  addFrameReference(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3570  TII.get(Opc), ResultReg + i), FI);
3571  }
3572  }
3573 
3574  CLI.ResultReg = ResultReg;
3575  CLI.NumResultRegs = RVLocs.size();
3576  CLI.Call = MIB;
3577 
3578  return true;
3579 }
3580 
3581 bool
3582 X86FastISel::fastSelectInstruction(const Instruction *I) {
3583  switch (I->getOpcode()) {
3584  default: break;
3585  case Instruction::Load:
3586  return X86SelectLoad(I);
3587  case Instruction::Store:
3588  return X86SelectStore(I);
3589  case Instruction::Ret:
3590  return X86SelectRet(I);
3591  case Instruction::ICmp:
3592  case Instruction::FCmp:
3593  return X86SelectCmp(I);
3594  case Instruction::ZExt:
3595  return X86SelectZExt(I);
3596  case Instruction::SExt:
3597  return X86SelectSExt(I);
3598  case Instruction::Br:
3599  return X86SelectBranch(I);
3600  case Instruction::LShr:
3601  case Instruction::AShr:
3602  case Instruction::Shl:
3603  return X86SelectShift(I);
3604  case Instruction::SDiv:
3605  case Instruction::UDiv:
3606  case Instruction::SRem:
3607  case Instruction::URem:
3608  return X86SelectDivRem(I);
3609  case Instruction::Select:
3610  return X86SelectSelect(I);
3611  case Instruction::Trunc:
3612  return X86SelectTrunc(I);
3613  case Instruction::FPExt:
3614  return X86SelectFPExt(I);
3615  case Instruction::FPTrunc:
3616  return X86SelectFPTrunc(I);
3617  case Instruction::SIToFP:
3618  return X86SelectSIToFP(I);
3619  case Instruction::IntToPtr: // Deliberate fall-through.
3620  case Instruction::PtrToInt: {
3621  EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType());
3622  EVT DstVT = TLI.getValueType(DL, I->getType());
3623  if (DstVT.bitsGT(SrcVT))
3624  return X86SelectZExt(I);
3625  if (DstVT.bitsLT(SrcVT))
3626  return X86SelectTrunc(I);
3627  unsigned Reg = getRegForValue(I->getOperand(0));
3628  if (Reg == 0) return false;
3629  updateValueMap(I, Reg);
3630  return true;
3631  }
3632  case Instruction::BitCast: {
3633  // Select SSE2/AVX bitcasts between 128/256 bit vector types.
3634  if (!Subtarget->hasSSE2())
3635  return false;
3636 
3637  EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType());
3638  EVT DstVT = TLI.getValueType(DL, I->getType());
3639 
3640  if (!SrcVT.isSimple() || !DstVT.isSimple())
3641  return false;
3642 
3643  MVT SVT = SrcVT.getSimpleVT();
3644  MVT DVT = DstVT.getSimpleVT();
3645 
3646  if (!SVT.is128BitVector() &&
3647  !(Subtarget->hasAVX() && SVT.is256BitVector()) &&
3648  !(Subtarget->hasAVX512() && SVT.is512BitVector() &&
3649  (Subtarget->hasBWI() || (SVT.getScalarSizeInBits() >= 32 &&
3650  DVT.getScalarSizeInBits() >= 32))))
3651  return false;
3652 
3653  unsigned Reg = getRegForValue(I->getOperand(0));
3654  if (Reg == 0)
3655  return false;
3656 
3657  // No instruction is needed for conversion. Reuse the register used by
3658  // the fist operand.
3659  updateValueMap(I, Reg);
3660  return true;
3661  }
3662  }
3663 
3664  return false;
3665 }
3666 
3667 unsigned X86FastISel::X86MaterializeInt(const ConstantInt *CI, MVT VT) {
3668  if (VT > MVT::i64)
3669  return 0;
3670 
3671  uint64_t Imm = CI->getZExtValue();
3672  if (Imm == 0) {
3673  unsigned SrcReg = fastEmitInst_(X86::MOV32r0, &X86::GR32RegClass);
3674  switch (VT.SimpleTy) {
3675  default: llvm_unreachable("Unexpected value type");
3676  case MVT::i1:
3677  case MVT::i8:
3678  return fastEmitInst_extractsubreg(MVT::i8, SrcReg, /*Kill=*/true,
3679  X86::sub_8bit);
3680  case MVT::i16:
3681  return fastEmitInst_extractsubreg(MVT::i16, SrcReg, /*Kill=*/true,
3682  X86::sub_16bit);
3683  case MVT::i32:
3684  return SrcReg;
3685  case MVT::i64: {
3686  unsigned ResultReg = createResultReg(&X86::GR64RegClass);
3687  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3688  TII.get(TargetOpcode::SUBREG_TO_REG), ResultReg)
3689  .addImm(0).addReg(SrcReg).addImm(X86::sub_32bit);
3690  return ResultReg;
3691  }
3692  }
3693  }
3694 
3695  unsigned Opc = 0;
3696  switch (VT.SimpleTy) {
3697  default: llvm_unreachable("Unexpected value type");
3698  case MVT::i1:
3699  // TODO: Support this properly.
3700  if (Subtarget->hasAVX512())
3701  return 0;
3702  VT = MVT::i8;
3704  case MVT::i8: Opc = X86::MOV8ri; break;
3705  case MVT::i16: Opc = X86::MOV16ri; break;
3706  case MVT::i32: Opc = X86::MOV32ri; break;
3707  case MVT::i64: {
3708  if (isUInt<32>(Imm))
3709  Opc = X86::MOV32ri;
3710  else if (isInt<32>(Imm))
3711  Opc = X86::MOV64ri32;
3712  else
3713  Opc = X86::MOV64ri;
3714  break;
3715  }
3716  }
3717  if (VT == MVT::i64 && Opc == X86::MOV32ri) {
3718  unsigned SrcReg = fastEmitInst_i(Opc, &X86::GR32RegClass, Imm);
3719  unsigned ResultReg = createResultReg(&X86::GR64RegClass);
3720  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3721  TII.get(TargetOpcode::SUBREG_TO_REG), ResultReg)
3722  .addImm(0).addReg(SrcReg).addImm(X86::sub_32bit);
3723  return ResultReg;
3724  }
3725  return fastEmitInst_i(Opc, TLI.getRegClassFor(VT), Imm);
3726 }
3727 
3728 unsigned X86FastISel::X86MaterializeFP(const ConstantFP *CFP, MVT VT) {
3729  if (CFP->isNullValue())
3730  return fastMaterializeFloatZero(CFP);
3731 
3732  // Can't handle alternate code models yet.
3733  CodeModel::Model CM = TM.getCodeModel();
3734  if (CM != CodeModel::Small && CM != CodeModel::Large)
3735  return 0;
3736 
3737  // Get opcode and regclass of the output for the given load instruction.
3738  unsigned Opc = 0;
3739  const TargetRegisterClass *RC = nullptr;
3740  switch (VT.SimpleTy) {
3741  default: return 0;
3742  case MVT::f32:
3743  if (X86ScalarSSEf32) {
3744  Opc = Subtarget->hasAVX512()
3745  ? X86::VMOVSSZrm
3746  : Subtarget->hasAVX() ? X86::VMOVSSrm : X86::MOVSSrm;
3747  RC = Subtarget->hasAVX512() ? &X86::FR32XRegClass : &X86::FR32RegClass;
3748  } else {
3749  Opc = X86::LD_Fp32m;
3750  RC = &X86::RFP32RegClass;
3751  }
3752  break;
3753  case MVT::f64:
3754  if (X86ScalarSSEf64) {
3755  Opc = Subtarget->hasAVX512()
3756  ? X86::VMOVSDZrm
3757  : Subtarget->hasAVX() ? X86::VMOVSDrm : X86::MOVSDrm;
3758  RC = Subtarget->hasAVX512() ? &X86::FR64XRegClass : &X86::FR64RegClass;
3759  } else {
3760  Opc = X86::LD_Fp64m;
3761  RC = &X86::RFP64RegClass;
3762  }
3763  break;
3764  case MVT::f80:
3765  // No f80 support yet.
3766  return 0;
3767  }
3768 
3769  // MachineConstantPool wants an explicit alignment.
3770  unsigned Align = DL.getPrefTypeAlignment(CFP->getType());
3771  if (Align == 0) {
3772  // Alignment of vector types. FIXME!
3773  Align = DL.getTypeAllocSize(CFP->getType());
3774  }
3775 
3776  // x86-32 PIC requires a PIC base register for constant pools.
3777  unsigned PICBase = 0;
3778  unsigned char OpFlag = Subtarget->classifyLocalReference(nullptr);
3779  if (OpFlag == X86II::MO_PIC_BASE_OFFSET)
3780  PICBase = getInstrInfo()->getGlobalBaseReg(FuncInfo.MF);
3781  else if (OpFlag == X86II::MO_GOTOFF)
3782  PICBase = getInstrInfo()->getGlobalBaseReg(FuncInfo.MF);
3783  else if (Subtarget->is64Bit() && TM.getCodeModel() == CodeModel::Small)
3784  PICBase = X86::RIP;
3785 
3786  // Create the load from the constant pool.
3787  unsigned CPI = MCP.getConstantPoolIndex(CFP, Align);
3788  unsigned ResultReg = createResultReg(RC);
3789 
3790  if (CM == CodeModel::Large) {
3791  unsigned AddrReg = createResultReg(&X86::GR64RegClass);
3792  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::MOV64ri),
3793  AddrReg)
3794  .addConstantPoolIndex(CPI, 0, OpFlag);
3795  MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3796  TII.get(Opc), ResultReg);
3797  addDirectMem(MIB, AddrReg);
3798  MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
3800  MachineMemOperand::MOLoad, DL.getPointerSize(), Align);
3801  MIB->addMemOperand(*FuncInfo.MF, MMO);
3802  return ResultReg;
3803  }
3804 
3805  addConstantPoolReference(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3806  TII.get(Opc), ResultReg),
3807  CPI, PICBase, OpFlag);
3808  return ResultReg;
3809 }
3810 
3811 unsigned X86FastISel::X86MaterializeGV(const GlobalValue *GV, MVT VT) {
3812  // Can't handle alternate code models yet.
3813  if (TM.getCodeModel() != CodeModel::Small)
3814  return 0;
3815 
3816  // Materialize addresses with LEA/MOV instructions.
3817  X86AddressMode AM;
3818  if (X86SelectAddress(GV, AM)) {
3819  // If the expression is just a basereg, then we're done, otherwise we need
3820  // to emit an LEA.
3821  if (AM.BaseType == X86AddressMode::RegBase &&
3822  AM.IndexReg == 0 && AM.Disp == 0 && AM.GV == nullptr)
3823  return AM.Base.Reg;
3824 
3825  unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
3826  if (TM.getRelocationModel() == Reloc::Static &&
3827  TLI.getPointerTy(DL) == MVT::i64) {
3828  // The displacement code could be more than 32 bits away so we need to use
3829  // an instruction with a 64 bit immediate
3830  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::MOV64ri),
3831  ResultReg)
3832  .addGlobalAddress(GV);
3833  } else {
3834  unsigned Opc =
3835  TLI.getPointerTy(DL) == MVT::i32
3836  ? (Subtarget->isTarget64BitILP32() ? X86::LEA64_32r : X86::LEA32r)
3837  : X86::LEA64r;
3838  addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3839  TII.get(Opc), ResultReg), AM);
3840  }
3841  return ResultReg;
3842  }
3843  return 0;
3844 }
3845 
3846 unsigned X86FastISel::fastMaterializeConstant(const Constant *C) {
3847  EVT CEVT = TLI.getValueType(DL, C->getType(), true);
3848 
3849  // Only handle simple types.
3850  if (!CEVT.isSimple())
3851  return 0;
3852  MVT VT = CEVT.getSimpleVT();
3853 
3854  if (const auto *CI = dyn_cast<ConstantInt>(C))
3855  return X86MaterializeInt(CI, VT);
3856  else if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C))
3857  return X86MaterializeFP(CFP, VT);
3858  else if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
3859  return X86MaterializeGV(GV, VT);
3860 
3861  return 0;
3862 }
3863 
3864 unsigned X86FastISel::fastMaterializeAlloca(const AllocaInst *C) {
3865  // Fail on dynamic allocas. At this point, getRegForValue has already
3866  // checked its CSE maps, so if we're here trying to handle a dynamic
3867  // alloca, we're not going to succeed. X86SelectAddress has a
3868  // check for dynamic allocas, because it's called directly from
3869  // various places, but targetMaterializeAlloca also needs a check
3870  // in order to avoid recursion between getRegForValue,
3871  // X86SelectAddrss, and targetMaterializeAlloca.
3872  if (!FuncInfo.StaticAllocaMap.count(C))
3873  return 0;
3874  assert(C->isStaticAlloca() && "dynamic alloca in the static alloca map?");
3875 
3876  X86AddressMode AM;
3877  if (!X86SelectAddress(C, AM))
3878  return 0;
3879  unsigned Opc =
3880  TLI.getPointerTy(DL) == MVT::i32
3881  ? (Subtarget->isTarget64BitILP32() ? X86::LEA64_32r : X86::LEA32r)
3882  : X86::LEA64r;
3883  const TargetRegisterClass *RC = TLI.getRegClassFor(TLI.getPointerTy(DL));
3884  unsigned ResultReg = createResultReg(RC);
3885  addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3886  TII.get(Opc), ResultReg), AM);
3887  return ResultReg;
3888 }
3889 
3890 unsigned X86FastISel::fastMaterializeFloatZero(const ConstantFP *CF) {
3891  MVT VT;
3892  if (!isTypeLegal(CF->getType(), VT))
3893  return 0;
3894 
3895  // Get opcode and regclass for the given zero.
3896  bool HasAVX512 = Subtarget->hasAVX512();
3897  unsigned Opc = 0;
3898  const TargetRegisterClass *RC = nullptr;
3899  switch (VT.SimpleTy) {
3900  default: return 0;
3901  case MVT::f32:
3902  if (X86ScalarSSEf32) {
3903  Opc = HasAVX512 ? X86::AVX512_FsFLD0SS : X86::FsFLD0SS;
3904  RC = HasAVX512 ? &X86::FR32XRegClass : &X86::FR32RegClass;
3905  } else {
3906  Opc = X86::LD_Fp032;
3907  RC = &X86::RFP32RegClass;
3908  }
3909  break;
3910  case MVT::f64:
3911  if (X86ScalarSSEf64) {
3912  Opc = HasAVX512 ? X86::AVX512_FsFLD0SD : X86::FsFLD0SD;
3913  RC = HasAVX512 ? &X86::FR64XRegClass : &X86::FR64RegClass;
3914  } else {
3915  Opc = X86::LD_Fp064;
3916  RC = &X86::RFP64RegClass;
3917  }
3918  break;
3919  case MVT::f80:
3920  // No f80 support yet.
3921  return 0;
3922  }
3923 
3924  unsigned ResultReg = createResultReg(RC);
3925  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg);
3926  return ResultReg;
3927 }
3928 
3929 
3930 bool X86FastISel::tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo,
3931  const LoadInst *LI) {
3932  const Value *Ptr = LI->getPointerOperand();
3933  X86AddressMode AM;
3934  if (!X86SelectAddress(Ptr, AM))
3935  return false;
3936 
3937  const X86InstrInfo &XII = (const X86InstrInfo &)TII;
3938 
3939  unsigned Size = DL.getTypeAllocSize(LI->getType());
3940  unsigned Alignment = LI->getAlignment();
3941 
3942  if (Alignment == 0) // Ensure that codegen never sees alignment 0
3943  Alignment = DL.getABITypeAlignment(LI->getType());
3944 
3946  AM.getFullAddress(AddrOps);
3947 
3948  MachineInstr *Result = XII.foldMemoryOperandImpl(
3949  *FuncInfo.MF, *MI, OpNo, AddrOps, FuncInfo.InsertPt, Size, Alignment,
3950  /*AllowCommute=*/true);
3951  if (!Result)
3952  return false;
3953 
3954  // The index register could be in the wrong register class. Unfortunately,
3955  // foldMemoryOperandImpl could have commuted the instruction so its not enough
3956  // to just look at OpNo + the offset to the index reg. We actually need to
3957  // scan the instruction to find the index reg and see if its the correct reg
3958  // class.
3959  unsigned OperandNo = 0;
3960  for (MachineInstr::mop_iterator I = Result->operands_begin(),
3961  E = Result->operands_end(); I != E; ++I, ++OperandNo) {
3962  MachineOperand &MO = *I;
3963  if (!MO.isReg() || MO.isDef() || MO.getReg() != AM.IndexReg)
3964  continue;
3965  // Found the index reg, now try to rewrite it.
3966  unsigned IndexReg = constrainOperandRegClass(Result->getDesc(),
3967  MO.getReg(), OperandNo);
3968  if (IndexReg == MO.getReg())
3969  continue;
3970  MO.setReg(IndexReg);
3971  }
3972 
3973  Result->addMemOperand(*FuncInfo.MF, createMachineMemOperandFor(LI));
3974  MI->eraseFromParent();
3975  return true;
3976 }
3977 
3978 unsigned X86FastISel::fastEmitInst_rrrr(unsigned MachineInstOpcode,
3979  const TargetRegisterClass *RC,
3980  unsigned Op0, bool Op0IsKill,
3981  unsigned Op1, bool Op1IsKill,
3982  unsigned Op2, bool Op2IsKill,
3983  unsigned Op3, bool Op3IsKill) {
3984  const MCInstrDesc &II = TII.get(MachineInstOpcode);
3985 
3986  unsigned ResultReg = createResultReg(RC);
3987  Op0 = constrainOperandRegClass(II, Op0, II.getNumDefs());
3988  Op1 = constrainOperandRegClass(II, Op1, II.getNumDefs() + 1);
3989  Op2 = constrainOperandRegClass(II, Op2, II.getNumDefs() + 2);
3990  Op3 = constrainOperandRegClass(II, Op3, II.getNumDefs() + 3);
3991 
3992  if (II.getNumDefs() >= 1)
3993  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
3994  .addReg(Op0, getKillRegState(Op0IsKill))
3995  .addReg(Op1, getKillRegState(Op1IsKill))
3996  .addReg(Op2, getKillRegState(Op2IsKill))
3997  .addReg(Op3, getKillRegState(Op3IsKill));
3998  else {
3999  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
4000  .addReg(Op0, getKillRegState(Op0IsKill))
4001  .addReg(Op1, getKillRegState(Op1IsKill))
4002  .addReg(Op2, getKillRegState(Op2IsKill))
4003  .addReg(Op3, getKillRegState(Op3IsKill));
4004  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4005  TII.get(TargetOpcode::COPY), ResultReg).addReg(II.ImplicitDefs[0]);
4006  }
4007  return ResultReg;
4008 }
4009 
4010 
4011 namespace llvm {
4013  const TargetLibraryInfo *libInfo) {
4014  return new X86FastISel(funcInfo, libInfo);
4015  }
4016 }
bool hasAVX() const
Definition: X86Subtarget.h:544
bool isVarArg() const
isVarArg - Return true if this function takes a variable number of arguments.
Definition: Function.h:163
void setFrameAddressIsTaken(bool T)
unsigned GetCondBranchFromCond(CondCode CC)
uint64_t CallInst * C
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:539
constexpr bool isUInt< 32 >(uint64_t x)
Definition: MathExtras.h:347
Return a value (possibly void), from a function.
Value * getValueOperand()
Definition: Instructions.h:395
const MachineInstrBuilder & addMetadata(const MDNode *MD) const
void AnalyzeCallResult(const SmallVectorImpl< ISD::InputArg > &Ins, CCAssignFn Fn)
AnalyzeCallResult - Analyze the return values of a call, incorporating info about the passed values i...
bool is64Bit() const
Is this x86_64? (disregarding specific ABI / programming model)
Definition: X86Subtarget.h:508
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
bool usesWindowsCFI() const
Definition: MCAsmInfo.h:579
mop_iterator operands_end()
Definition: MachineInstr.h:356
Function * getCalledFunction() const
Return the function called, or null if this is an indirect function invocation.
bool isInteger() const
Return true if this is an integer or a vector integer type.
This class is the base class for the comparison instructions.
Definition: InstrTypes.h:875
This class represents an incoming formal argument to a Function.
Definition: Argument.h:30
bool contains(unsigned Reg) const
Return true if the specified register is included in this register class.
bool isOSMSVCRT() const
Is this a "Windows" OS targeting a "MSVCRT.dll" environment.
Definition: Triple.h:554
LLVM_ATTRIBUTE_NORETURN void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:115
Compute iterated dominance frontiers using a linear time algorithm.
Definition: AllocatorList.h:24
static const MachineInstrBuilder & addConstantPoolReference(const MachineInstrBuilder &MIB, unsigned CPI, unsigned GlobalBaseReg, unsigned char OpFlags)
addConstantPoolReference - This function is used to add a reference to the base of a constant value s...
bool isAtomic() const
Return true if this instruction has an AtomicOrdering of unordered or higher.
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
Definition: MCSymbol.h:42
LLVM_ATTRIBUTE_ALWAYS_INLINE size_type size() const
Definition: SmallVector.h:137
bool isVector() const
Return true if this is a vector value type.
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:161
unsigned getReg() const
getReg - Returns the register number.
bool is256BitVector() const
Return true if this is a 256-bit vector type.
This class represents a function call, abstracting a target machine&#39;s calling convention.
unsigned Reg
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:253
0 1 0 0 True if ordered and less than
Definition: InstrTypes.h:891
constexpr bool isInt< 8 >(int64_t x)
Definition: MathExtras.h:301
unsigned getSourceAddressSpace() const
unsigned getValNo() const
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:713
1 1 1 0 True if unordered or not equal
Definition: InstrTypes.h:901
This class wraps the llvm.memset intrinsic.
BasicBlock * getSuccessor(unsigned i) const
unsigned const TargetRegisterInfo * TRI
const GlobalValue * GV
A debug info location.
Definition: DebugLoc.h:34
F(f)
unsigned getPtrSizedFrameRegister(const MachineFunction &MF) const
An instruction for reading from memory.
Definition: Instructions.h:164
MO_GOTPCREL - On a symbol operand this indicates that the immediate is offset to the GOT entry for th...
Definition: X86BaseInfo.h:110
Hexagon Common GEP
Value * getCondition() const
const MachineInstrBuilder & addGlobalAddress(const GlobalValue *GV, int64_t Offset=0, unsigned char TargetFlags=0) const
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:227
bool isCalleePop(CallingConv::ID CallingConv, bool is64Bit, bool IsVarArg, bool GuaranteeTCO)
Determines whether the callee is required to pop its own arguments.
union llvm::X86AddressMode::@469 Base
Value * getLength() const
X86_StdCall - stdcall is the calling conventions mostly used by the Win32 API.
Definition: CallingConv.h:87
op_iterator op_begin()
Definition: User.h:230
NodeType
ISD::NodeType enum - This enum defines the target-independent operators for a SelectionDAG.
Definition: ISDOpcodes.h:39
bool isMemLoc() const
static Constant * getNullValue(Type *Ty)
Constructor to create a &#39;0&#39; constant of arbitrary type.
Definition: Constants.cpp:258
X86MachineFunctionInfo - This class is derived from MachineFunction and contains private X86 target-s...
bool arg_empty() const
Definition: CallSite.h:218
1 0 0 1 True if unordered or equal
Definition: InstrTypes.h:896
Used to lazily calculate structure layout information for a target machine, based on the DataLayout s...
Definition: DataLayout.h:521
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
Definition: InstrTypes.h:895
A description of a memory reference used in the backend.
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE, etc.
Definition: InstrTypes.h:983
This file declares the MachineConstantPool class which is an abstract constant pool to keep track of ...
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
Definition: MCInstrDesc.h:208
const HexagonInstrInfo * TII
unsigned getNumOperands() const
Access to explicit operands of the instruction.
Definition: MachineInstr.h:314
Class to represent struct types.
Definition: DerivedTypes.h:201
A Use represents the edge between a Value definition and its users.
Definition: Use.h:56
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
static bool isGlobalStubReference(unsigned char TargetFlag)
isGlobalStubReference - Return true if the specified TargetFlag operand is a reference to a stub for ...
Definition: X86InstrInfo.h:113
void eraseFromParent()
Unlink &#39;this&#39; from the containing basic block and delete it.
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:197
0 1 0 1 True if ordered and less than or equal
Definition: InstrTypes.h:892
SimpleValueType SimpleTy
static Constant * getSExt(Constant *C, Type *Ty, bool OnlyIfReduced=false)
Definition: Constants.cpp:1618
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted...
The C convention as implemented on Windows/x86-64 and AArch64.
Definition: CallingConv.h:154
unsigned getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
LocInfo getLocInfo() const
unsigned getSizeInBits() const
auto reverse(ContainerTy &&C, typename std::enable_if< has_rbegin< ContainerTy >::value >::type *=nullptr) -> decltype(make_range(C.rbegin(), C.rend()))
Definition: STLExtras.h:237
static bool isGlobalRelativeToPICBase(unsigned char TargetFlag)
isGlobalRelativeToPICBase - Return true if the specified global value reference is relative to a 32-b...
Definition: X86InstrInfo.h:129
static Constant * getZExt(Constant *C, Type *Ty, bool OnlyIfReduced=false)
Definition: Constants.cpp:1632
bool isNullValue() const
Return true if this is the value that would be returned by getNullValue.
Definition: Constants.cpp:85
This is a fast-path instruction selection class that generates poor code and doesn&#39;t support illegal ...
Definition: FastISel.h:67
A constant value that is initialized with an expression using other constant values.
Definition: Constants.h:885
unsigned getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:292
const MCInstrDesc & getDesc() const
Returns the target instruction descriptor of this MachineInstr.
Definition: MachineInstr.h:308
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:245
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:201
This instruction compares its operands according to the predicate given to the constructor.
bool paramHasAttr(unsigned ArgNo, Attribute::AttrKind Kind) const
Return true if the call or the callee has the given attribute.
Definition: CallSite.h:377
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:126
An instruction for storing to memory.
Definition: Instructions.h:306
void addMemOperand(MachineFunction &MF, MachineMemOperand *MO)
Add a MachineMemOperand to the machine instruction.
C - The default llvm calling convention, compatible with C.
Definition: CallingConv.h:35
amdgpu Simplify well known AMD library false Value * Callee
This class represents a truncation of integer types.
Value * getOperand(unsigned i) const
Definition: User.h:170
Class to represent pointers.
Definition: DerivedTypes.h:467
unsigned getByValSize() const
unsigned getKillRegState(bool B)
unsigned getStackRegister() const
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Flag
These should be considered private to the implementation of the MCInstrDesc class.
Definition: MCInstrDesc.h:118
Value * getOperand(unsigned i_nocapture) const
bool is128BitVector() const
Return true if this is a 128-bit vector type.
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
Definition: ValueTypes.h:229
bool isFloatTy() const
Return true if this is &#39;float&#39;, a 32-bit IEEE fp type.
Definition: Type.h:147
an instruction for type-safe pointer arithmetic to access elements of arrays and structs ...
Definition: Instructions.h:837
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
Type * getReturnType() const
Returns the type of the ret val.
Definition: Function.h:155
std::pair< CondCode, bool > getX86ConditionCode(CmpInst::Predicate Predicate)
Return a pair of condition code for the given predicate and whether the instruction operands should b...
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:149
bool hasFnAttr(Attribute::AttrKind Kind) const
Determine whether this call has the given attribute.
unsigned const MachineRegisterInfo * MRI
Machine Value Type.
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:46
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Conditional or Unconditional Branch instruction.
Value * getAddress() const
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
unsigned getScalarSizeInBits() const
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This is an important base class in LLVM.
Definition: Constant.h:42
const MCAsmInfo * getMCAsmInfo() const
Return target specific asm information.
ConstantFP - Floating Point Values [float, double].
Definition: Constants.h:264
const MCPhysReg * ImplicitDefs
Definition: MCInstrDesc.h:171
bool isValidLocationForIntrinsic(const DILocation *DL) const
Check that a location is valid for this variable.
op_iterator op_end()
Definition: User.h:232
DILocalVariable * getVariable() const
Definition: IntrinsicInst.h:80
static std::pair< unsigned, bool > getX86SSEConditionCode(CmpInst::Predicate Predicate)
const MachineInstrBuilder & addRegMask(const uint32_t *Mask) const
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:885
static const unsigned End
TRAP - Trapping instruction.
Definition: ISDOpcodes.h:728
DIExpression * getExpression() const
Definition: IntrinsicInst.h:84
0 1 1 1 True if ordered (no nans)
Definition: InstrTypes.h:894
Value * getPointerOperand()
Definition: Instructions.h:270
enum llvm::X86AddressMode::@468 BaseType
const Triple & getTargetTriple() const
Definition: X86Subtarget.h:698
static void X86SelectAddress(const MachineInstr &I, const MachineRegisterInfo &MRI, X86AddressMode &AM)
const MachineInstrBuilder & addSym(MCSymbol *Sym, unsigned char TargetFlags=0) const
1 1 1 1 Always true (always folded)
Definition: InstrTypes.h:902
unsigned getSETFromCond(CondCode CC, bool HasMemoryOperand=false)
Return a set opcode for the given condition and whether it has a memory operand.
bool isTargetMCU() const
Definition: X86Subtarget.h:717
Extended Value Type.
Definition: ValueTypes.h:34
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
bool isVolatile() const
1 1 0 1 True if unordered, less than, or equal
Definition: InstrTypes.h:900
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
static const MachineInstrBuilder & addFrameReference(const MachineInstrBuilder &MIB, int FI, int Offset=0, bool mem=true)
addFrameReference - This function is used to add a reference to the base of an abstract object on the...
The memory access writes data.
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
Definition: IntrinsicInst.h:51
0 0 1 0 True if ordered and greater than
Definition: InstrTypes.h:889
X86_ThisCall - Similar to X86_StdCall.
Definition: CallingConv.h:111
Iterator for intrusive lists based on ilist_node.
unsigned getNumOperands() const
Definition: User.h:192
CCState - This class holds information needed while lowering arguments and return values...
This is the shared class of boolean and integer constants.
Definition: Constants.h:84
constexpr bool isInt< 32 >(int64_t x)
Definition: MathExtras.h:307
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type...
Definition: Type.cpp:130
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:199
MachineOperand class - Representation of each machine instruction operand.
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:861
1 1 0 0 True if unordered or less than
Definition: InstrTypes.h:899
Provides information about what library functions are available for the current target.
CCValAssign - Represent assignment of one arg/retval to a location.
void getFullAddress(SmallVectorImpl< MachineOperand > &MO)
This class wraps the llvm.memcpy intrinsic.
unsigned getNumDefs() const
Return the number of MachineOperands that are register definitions.
Definition: MCInstrDesc.h:223
Value * getRawSource() const
Return the arguments to the instruction.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:924
This file defines the FastISel class.
bool bitsLT(EVT VT) const
Return true if this has less bits than VT.
Definition: ValueTypes.h:241
unsigned getCMovFromCond(CondCode CC, unsigned RegBytes, bool HasMemoryOperand=false)
Return a cmov opcode for the given condition, register size in bytes, and operand type...
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:438
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:441
Fast - This calling convention attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:43
amdgpu Simplify well known AMD library false Value Value * Arg
The memory access reads data.
MachineInstr * foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI, ArrayRef< unsigned > Ops, MachineBasicBlock::iterator InsertPt, int FrameIndex, LiveIntervals *LIS=nullptr) const override
foldMemoryOperand - If this target supports it, fold a load or store of the specified stack slot into...
Representation of each machine instruction.
Definition: MachineInstr.h:60
MO_GOTOFF - On a symbol operand this indicates that the immediate is the offset to the location of th...
Definition: X86BaseInfo.h:102
static const MachineInstrBuilder & addDirectMem(const MachineInstrBuilder &MIB, unsigned Reg)
addDirectMem - This function is used to add a direct memory reference to the current instruction – t...
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
Definition: Instruction.h:290
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:363
unsigned getDestAddressSpace() const
bool doesNoCfCheck() const
Determine if the call should not perform indirect branch tracking.
uint64_t getElementOffset(unsigned Idx) const
Definition: DataLayout.h:543
unsigned getAlignment() const
Return the alignment of the access that is being performed.
Definition: Instructions.h:226
static IntegerType * getInt32Ty(LLVMContext &C)
Definition: Type.cpp:176
unsigned getLocMemOffset() const
unsigned getNumArgOperands() const
Return the number of call arguments.
Establish a view to a call site for examination.
Definition: CallSite.h:713
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:108
void setReg(unsigned Reg)
Change the register this operand corresponds to.
#define I(x, y, z)
Definition: MD5.cpp:58
void AnalyzeReturn(const SmallVectorImpl< ISD::OutputArg > &Outs, CCAssignFn Fn)
AnalyzeReturn - Analyze the returned values of a return, incorporating info about the result values i...
FunctionLoweringInfo - This contains information that is global to a function that is used when lower...
0 1 1 0 True if ordered and operands are unequal
Definition: InstrTypes.h:893
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:323
bool is512BitVector() const
Return true if this is a 512-bit vector type.
const MachineInstrBuilder & addReg(unsigned RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
1 0 1 0 True if unordered or greater than
Definition: InstrTypes.h:897
constexpr bool isUInt< 16 >(uint64_t x)
Definition: MathExtras.h:344
static MachinePointerInfo getConstantPool(MachineFunction &MF)
Return a MachinePointerInfo record that refers to the constant pool.
unsigned getAlignment() const
Return the alignment of the access that is being performed.
Definition: Instructions.h:351
bool isReg() const
isReg - Tests if this is a MO_Register operand.
bool isRegLoc() const
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
X86_FastCall - &#39;fast&#39; analog of X86_StdCall.
Definition: CallingConv.h:92
void GetReturnInfo(Type *ReturnType, AttributeList attr, SmallVectorImpl< ISD::OutputArg > &Outs, const TargetLowering &TLI, const DataLayout &DL)
Given an LLVM IR type and return type attributes, compute the return value EVTs and flags...
Value * getArgOperand(unsigned i) const
getArgOperand/setArgOperand - Return/set the i-th call argument.
bool hasSSE1() const
Definition: X86Subtarget.h:538
0 0 0 1 True if ordered and equal
Definition: InstrTypes.h:888
LLVM Value Representation.
Definition: Value.h:73
mop_iterator operands_begin()
Definition: MachineInstr.h:355
1 0 1 1 True if unordered, greater than, or equal
Definition: InstrTypes.h:898
static unsigned X86ChooseCmpImmediateOpcode(EVT VT, const ConstantInt *RHSC)
If we have a comparison with RHS as the RHS of the comparison, return an opcode that works for the co...
constexpr char Size[]
Key for Kernel::Arg::Metadata::mSize.
#define LLVM_FALLTHROUGH
LLVM_FALLTHROUGH - Mark fallthrough cases in switch statements.
Definition: Compiler.h:235
The C convention as specified in the x86-64 supplement to the System V ABI, used on most non-Windows ...
Definition: CallingConv.h:144
bool hasAVX512() const
Definition: X86Subtarget.h:546
Invoke instruction.
MO_PIC_BASE_OFFSET - On a symbol operand this indicates that the immediate should get the value of th...
Definition: X86BaseInfo.h:88
IRTranslator LLVM IR MI
static MachinePointerInfo getStack(MachineFunction &MF, int64_t Offset, uint8_t ID=0)
Stack pointer relative access.
unsigned constrainOperandRegClass(const MachineFunction &MF, const TargetRegisterInfo &TRI, MachineRegisterInfo &MRI, const TargetInstrInfo &TII, const RegisterBankInfo &RBI, MachineInstr &InsertPt, const MCInstrDesc &II, const MachineOperand &RegMO, unsigned OpIdx)
Try to constrain Reg so that it is usable by argument OpIdx of the provided MCInstrDesc II...
Definition: Utils.cpp:46
unsigned getRegSizeInBits(const TargetRegisterClass &RC) const
Return the size in bits of a register from class RC.
Conversion operators.
Definition: ISDOpcodes.h:435
bool isStaticAlloca() const
Return true if this alloca is in the entry block of the function and is a constant size...
int64_t getSExtValue() const
Return the constant as a 64-bit integer value after it has been sign extended as appropriate for the ...
Definition: Constants.h:157
X86AddressMode - This struct holds a generalized full x86 address mode.
unsigned getLocReg() const
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition: ValueTypes.h:126
0 0 1 1 True if ordered and greater than or equal
Definition: InstrTypes.h:890
This represents the llvm.dbg.declare instruction.
bool isDoubleTy() const
Return true if this is &#39;double&#39;, a 64-bit IEEE fp type.
Definition: Type.h:150
Value * getPointerOperand()
Definition: Instructions.h:398
unsigned AllocateStack(unsigned Size, unsigned Align)
AllocateStack - Allocate a chunk of stack space with the specified size and alignment.
Value * getRawDest() const
static const MachineInstrBuilder & addFullAddress(const MachineInstrBuilder &MIB, const X86AddressMode &AM)
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition: ISDOpcodes.h:351
FastISel * createFastISel(FunctionLoweringInfo &funcInfo, const TargetLibraryInfo *libInfo)
bool hasSSE2() const
Definition: X86Subtarget.h:539
iterator_range< arg_iterator > args()
Definition: Function.h:675
0 0 0 0 Always false (always folded)
Definition: InstrTypes.h:887
bool isStructTy() const
True if this is an instance of StructType.
Definition: Type.h:215
MO_DLLIMPORT - On a symbol operand "FOO", this indicates that the reference is actually to the "__imp...
Definition: X86BaseInfo.h:200
static unsigned computeBytesPoppedByCalleeForSRet(const X86Subtarget *Subtarget, CallingConv::ID CC, ImmutableCallSite *CS)
bool isArrayTy() const
True if this is an instance of ArrayType.
Definition: Type.h:218
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:44
const BasicBlock * getParent() const
Definition: Instruction.h:67
static unsigned X86ChooseCmpOpcode(EVT VT, const X86Subtarget *Subtarget)
an instruction to allocate memory on the stack
Definition: Instructions.h:60
gep_type_iterator gep_type_begin(const User *GEP)