LLVM  7.0.0svn
X86FastISel.cpp
Go to the documentation of this file.
1 //===-- X86FastISel.cpp - X86 FastISel implementation ---------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file defines the X86-specific support for the FastISel class. Much
11 // of the target-specific code is generated by tablegen in the file
12 // X86GenFastISel.inc, which is #included here.
13 //
14 //===----------------------------------------------------------------------===//
15 
16 #include "X86.h"
17 #include "X86CallingConv.h"
18 #include "X86InstrBuilder.h"
19 #include "X86InstrInfo.h"
20 #include "X86MachineFunctionInfo.h"
21 #include "X86RegisterInfo.h"
22 #include "X86Subtarget.h"
23 #include "X86TargetMachine.h"
25 #include "llvm/CodeGen/FastISel.h"
30 #include "llvm/IR/CallSite.h"
31 #include "llvm/IR/CallingConv.h"
32 #include "llvm/IR/DebugInfo.h"
33 #include "llvm/IR/DerivedTypes.h"
35 #include "llvm/IR/GlobalAlias.h"
36 #include "llvm/IR/GlobalVariable.h"
37 #include "llvm/IR/Instructions.h"
38 #include "llvm/IR/IntrinsicInst.h"
39 #include "llvm/IR/Operator.h"
40 #include "llvm/MC/MCAsmInfo.h"
41 #include "llvm/MC/MCSymbol.h"
44 using namespace llvm;
45 
46 namespace {
47 
48 class X86FastISel final : public FastISel {
49  /// Subtarget - Keep a pointer to the X86Subtarget around so that we can
50  /// make the right decision when generating code for different targets.
51  const X86Subtarget *Subtarget;
52 
53  /// X86ScalarSSEf32, X86ScalarSSEf64 - Select between SSE or x87
54  /// floating point ops.
55  /// When SSE is available, use it for f32 operations.
56  /// When SSE2 is available, use it for f64 operations.
57  bool X86ScalarSSEf64;
58  bool X86ScalarSSEf32;
59 
60 public:
61  explicit X86FastISel(FunctionLoweringInfo &funcInfo,
62  const TargetLibraryInfo *libInfo)
63  : FastISel(funcInfo, libInfo) {
64  Subtarget = &funcInfo.MF->getSubtarget<X86Subtarget>();
65  X86ScalarSSEf64 = Subtarget->hasSSE2();
66  X86ScalarSSEf32 = Subtarget->hasSSE1();
67  }
68 
69  bool fastSelectInstruction(const Instruction *I) override;
70 
71  /// \brief The specified machine instr operand is a vreg, and that
72  /// vreg is being provided by the specified load instruction. If possible,
73  /// try to fold the load as an operand to the instruction, returning true if
74  /// possible.
75  bool tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo,
76  const LoadInst *LI) override;
77 
78  bool fastLowerArguments() override;
79  bool fastLowerCall(CallLoweringInfo &CLI) override;
80  bool fastLowerIntrinsicCall(const IntrinsicInst *II) override;
81 
82 #include "X86GenFastISel.inc"
83 
84 private:
85  bool X86FastEmitCompare(const Value *LHS, const Value *RHS, EVT VT,
86  const DebugLoc &DL);
87 
88  bool X86FastEmitLoad(EVT VT, X86AddressMode &AM, MachineMemOperand *MMO,
89  unsigned &ResultReg, unsigned Alignment = 1);
90 
91  bool X86FastEmitStore(EVT VT, const Value *Val, X86AddressMode &AM,
92  MachineMemOperand *MMO = nullptr, bool Aligned = false);
93  bool X86FastEmitStore(EVT VT, unsigned ValReg, bool ValIsKill,
94  X86AddressMode &AM,
95  MachineMemOperand *MMO = nullptr, bool Aligned = false);
96 
97  bool X86FastEmitExtend(ISD::NodeType Opc, EVT DstVT, unsigned Src, EVT SrcVT,
98  unsigned &ResultReg);
99 
100  bool X86SelectAddress(const Value *V, X86AddressMode &AM);
101  bool X86SelectCallAddress(const Value *V, X86AddressMode &AM);
102 
103  bool X86SelectLoad(const Instruction *I);
104 
105  bool X86SelectStore(const Instruction *I);
106 
107  bool X86SelectRet(const Instruction *I);
108 
109  bool X86SelectCmp(const Instruction *I);
110 
111  bool X86SelectZExt(const Instruction *I);
112 
113  bool X86SelectSExt(const Instruction *I);
114 
115  bool X86SelectBranch(const Instruction *I);
116 
117  bool X86SelectShift(const Instruction *I);
118 
119  bool X86SelectDivRem(const Instruction *I);
120 
121  bool X86FastEmitCMoveSelect(MVT RetVT, const Instruction *I);
122 
123  bool X86FastEmitSSESelect(MVT RetVT, const Instruction *I);
124 
125  bool X86FastEmitPseudoSelect(MVT RetVT, const Instruction *I);
126 
127  bool X86SelectSelect(const Instruction *I);
128 
129  bool X86SelectTrunc(const Instruction *I);
130 
131  bool X86SelectFPExtOrFPTrunc(const Instruction *I, unsigned Opc,
132  const TargetRegisterClass *RC);
133 
134  bool X86SelectFPExt(const Instruction *I);
135  bool X86SelectFPTrunc(const Instruction *I);
136  bool X86SelectSIToFP(const Instruction *I);
137 
138  const X86InstrInfo *getInstrInfo() const {
139  return Subtarget->getInstrInfo();
140  }
141  const X86TargetMachine *getTargetMachine() const {
142  return static_cast<const X86TargetMachine *>(&TM);
143  }
144 
145  bool handleConstantAddresses(const Value *V, X86AddressMode &AM);
146 
147  unsigned X86MaterializeInt(const ConstantInt *CI, MVT VT);
148  unsigned X86MaterializeFP(const ConstantFP *CFP, MVT VT);
149  unsigned X86MaterializeGV(const GlobalValue *GV, MVT VT);
150  unsigned fastMaterializeConstant(const Constant *C) override;
151 
152  unsigned fastMaterializeAlloca(const AllocaInst *C) override;
153 
154  unsigned fastMaterializeFloatZero(const ConstantFP *CF) override;
155 
156  /// isScalarFPTypeInSSEReg - Return true if the specified scalar FP type is
157  /// computed in an SSE register, not on the X87 floating point stack.
158  bool isScalarFPTypeInSSEReg(EVT VT) const {
159  return (VT == MVT::f64 && X86ScalarSSEf64) || // f64 is when SSE2
160  (VT == MVT::f32 && X86ScalarSSEf32); // f32 is when SSE1
161  }
162 
163  bool isTypeLegal(Type *Ty, MVT &VT, bool AllowI1 = false);
164 
165  bool IsMemcpySmall(uint64_t Len);
166 
167  bool TryEmitSmallMemcpy(X86AddressMode DestAM,
168  X86AddressMode SrcAM, uint64_t Len);
169 
170  bool foldX86XALUIntrinsic(X86::CondCode &CC, const Instruction *I,
171  const Value *Cond);
172 
174  X86AddressMode &AM);
175 
176  unsigned fastEmitInst_rrrr(unsigned MachineInstOpcode,
177  const TargetRegisterClass *RC, unsigned Op0,
178  bool Op0IsKill, unsigned Op1, bool Op1IsKill,
179  unsigned Op2, bool Op2IsKill, unsigned Op3,
180  bool Op3IsKill);
181 };
182 
183 } // end anonymous namespace.
184 
185 static std::pair<unsigned, bool>
187  unsigned CC;
188  bool NeedSwap = false;
189 
190  // SSE Condition code mapping:
191  // 0 - EQ
192  // 1 - LT
193  // 2 - LE
194  // 3 - UNORD
195  // 4 - NEQ
196  // 5 - NLT
197  // 6 - NLE
198  // 7 - ORD
199  switch (Predicate) {
200  default: llvm_unreachable("Unexpected predicate");
201  case CmpInst::FCMP_OEQ: CC = 0; break;
202  case CmpInst::FCMP_OGT: NeedSwap = true; LLVM_FALLTHROUGH;
203  case CmpInst::FCMP_OLT: CC = 1; break;
204  case CmpInst::FCMP_OGE: NeedSwap = true; LLVM_FALLTHROUGH;
205  case CmpInst::FCMP_OLE: CC = 2; break;
206  case CmpInst::FCMP_UNO: CC = 3; break;
207  case CmpInst::FCMP_UNE: CC = 4; break;
208  case CmpInst::FCMP_ULE: NeedSwap = true; LLVM_FALLTHROUGH;
209  case CmpInst::FCMP_UGE: CC = 5; break;
210  case CmpInst::FCMP_ULT: NeedSwap = true; LLVM_FALLTHROUGH;
211  case CmpInst::FCMP_UGT: CC = 6; break;
212  case CmpInst::FCMP_ORD: CC = 7; break;
213  case CmpInst::FCMP_UEQ: CC = 8; break;
214  case CmpInst::FCMP_ONE: CC = 12; break;
215  }
216 
217  return std::make_pair(CC, NeedSwap);
218 }
219 
220 /// \brief Adds a complex addressing mode to the given machine instr builder.
221 /// Note, this will constrain the index register. If its not possible to
222 /// constrain the given index register, then a new one will be created. The
223 /// IndexReg field of the addressing mode will be updated to match in this case.
224 const MachineInstrBuilder &
226  X86AddressMode &AM) {
227  // First constrain the index register. It needs to be a GR64_NOSP.
229  MIB->getNumOperands() +
231  return ::addFullAddress(MIB, AM);
232 }
233 
234 /// \brief Check if it is possible to fold the condition from the XALU intrinsic
235 /// into the user. The condition code will only be updated on success.
236 bool X86FastISel::foldX86XALUIntrinsic(X86::CondCode &CC, const Instruction *I,
237  const Value *Cond) {
238  if (!isa<ExtractValueInst>(Cond))
239  return false;
240 
241  const auto *EV = cast<ExtractValueInst>(Cond);
242  if (!isa<IntrinsicInst>(EV->getAggregateOperand()))
243  return false;
244 
245  const auto *II = cast<IntrinsicInst>(EV->getAggregateOperand());
246  MVT RetVT;
247  const Function *Callee = II->getCalledFunction();
248  Type *RetTy =
249  cast<StructType>(Callee->getReturnType())->getTypeAtIndex(0U);
250  if (!isTypeLegal(RetTy, RetVT))
251  return false;
252 
253  if (RetVT != MVT::i32 && RetVT != MVT::i64)
254  return false;
255 
256  X86::CondCode TmpCC;
257  switch (II->getIntrinsicID()) {
258  default: return false;
259  case Intrinsic::sadd_with_overflow:
260  case Intrinsic::ssub_with_overflow:
261  case Intrinsic::smul_with_overflow:
262  case Intrinsic::umul_with_overflow: TmpCC = X86::COND_O; break;
263  case Intrinsic::uadd_with_overflow:
264  case Intrinsic::usub_with_overflow: TmpCC = X86::COND_B; break;
265  }
266 
267  // Check if both instructions are in the same basic block.
268  if (II->getParent() != I->getParent())
269  return false;
270 
271  // Make sure nothing is in the way
274  for (auto Itr = std::prev(Start); Itr != End; --Itr) {
275  // We only expect extractvalue instructions between the intrinsic and the
276  // instruction to be selected.
277  if (!isa<ExtractValueInst>(Itr))
278  return false;
279 
280  // Check that the extractvalue operand comes from the intrinsic.
281  const auto *EVI = cast<ExtractValueInst>(Itr);
282  if (EVI->getAggregateOperand() != II)
283  return false;
284  }
285 
286  CC = TmpCC;
287  return true;
288 }
289 
290 bool X86FastISel::isTypeLegal(Type *Ty, MVT &VT, bool AllowI1) {
291  EVT evt = TLI.getValueType(DL, Ty, /*HandleUnknown=*/true);
292  if (evt == MVT::Other || !evt.isSimple())
293  // Unhandled type. Halt "fast" selection and bail.
294  return false;
295 
296  VT = evt.getSimpleVT();
297  // For now, require SSE/SSE2 for performing floating-point operations,
298  // since x87 requires additional work.
299  if (VT == MVT::f64 && !X86ScalarSSEf64)
300  return false;
301  if (VT == MVT::f32 && !X86ScalarSSEf32)
302  return false;
303  // Similarly, no f80 support yet.
304  if (VT == MVT::f80)
305  return false;
306  // We only handle legal types. For example, on x86-32 the instruction
307  // selector contains all of the 64-bit instructions from x86-64,
308  // under the assumption that i64 won't be used if the target doesn't
309  // support it.
310  return (AllowI1 && VT == MVT::i1) || TLI.isTypeLegal(VT);
311 }
312 
313 #include "X86GenCallingConv.inc"
314 
315 /// X86FastEmitLoad - Emit a machine instruction to load a value of type VT.
316 /// The address is either pre-computed, i.e. Ptr, or a GlobalAddress, i.e. GV.
317 /// Return true and the result register by reference if it is possible.
318 bool X86FastISel::X86FastEmitLoad(EVT VT, X86AddressMode &AM,
319  MachineMemOperand *MMO, unsigned &ResultReg,
320  unsigned Alignment) {
321  bool HasSSE41 = Subtarget->hasSSE41();
322  bool HasAVX = Subtarget->hasAVX();
323  bool HasAVX2 = Subtarget->hasAVX2();
324  bool HasAVX512 = Subtarget->hasAVX512();
325  bool HasVLX = Subtarget->hasVLX();
326  bool IsNonTemporal = MMO && MMO->isNonTemporal();
327 
328  // Get opcode and regclass of the output for the given load instruction.
329  unsigned Opc = 0;
330  const TargetRegisterClass *RC = nullptr;
331  switch (VT.getSimpleVT().SimpleTy) {
332  default: return false;
333  case MVT::i1:
334  case MVT::i8:
335  Opc = X86::MOV8rm;
336  RC = &X86::GR8RegClass;
337  break;
338  case MVT::i16:
339  Opc = X86::MOV16rm;
340  RC = &X86::GR16RegClass;
341  break;
342  case MVT::i32:
343  Opc = X86::MOV32rm;
344  RC = &X86::GR32RegClass;
345  break;
346  case MVT::i64:
347  // Must be in x86-64 mode.
348  Opc = X86::MOV64rm;
349  RC = &X86::GR64RegClass;
350  break;
351  case MVT::f32:
352  if (X86ScalarSSEf32) {
353  Opc = HasAVX512 ? X86::VMOVSSZrm : HasAVX ? X86::VMOVSSrm : X86::MOVSSrm;
354  RC = HasAVX512 ? &X86::FR32XRegClass : &X86::FR32RegClass;
355  } else {
356  Opc = X86::LD_Fp32m;
357  RC = &X86::RFP32RegClass;
358  }
359  break;
360  case MVT::f64:
361  if (X86ScalarSSEf64) {
362  Opc = HasAVX512 ? X86::VMOVSDZrm : HasAVX ? X86::VMOVSDrm : X86::MOVSDrm;
363  RC = HasAVX512 ? &X86::FR64XRegClass : &X86::FR64RegClass;
364  } else {
365  Opc = X86::LD_Fp64m;
366  RC = &X86::RFP64RegClass;
367  }
368  break;
369  case MVT::f80:
370  // No f80 support yet.
371  return false;
372  case MVT::v4f32:
373  if (IsNonTemporal && Alignment >= 16 && HasSSE41)
374  Opc = HasVLX ? X86::VMOVNTDQAZ128rm :
375  HasAVX ? X86::VMOVNTDQArm : X86::MOVNTDQArm;
376  else if (Alignment >= 16)
377  Opc = HasVLX ? X86::VMOVAPSZ128rm :
378  HasAVX ? X86::VMOVAPSrm : X86::MOVAPSrm;
379  else
380  Opc = HasVLX ? X86::VMOVUPSZ128rm :
381  HasAVX ? X86::VMOVUPSrm : X86::MOVUPSrm;
382  RC = HasVLX ? &X86::VR128XRegClass : &X86::VR128RegClass;
383  break;
384  case MVT::v2f64:
385  if (IsNonTemporal && Alignment >= 16 && HasSSE41)
386  Opc = HasVLX ? X86::VMOVNTDQAZ128rm :
387  HasAVX ? X86::VMOVNTDQArm : X86::MOVNTDQArm;
388  else if (Alignment >= 16)
389  Opc = HasVLX ? X86::VMOVAPDZ128rm :
390  HasAVX ? X86::VMOVAPDrm : X86::MOVAPDrm;
391  else
392  Opc = HasVLX ? X86::VMOVUPDZ128rm :
393  HasAVX ? X86::VMOVUPDrm : X86::MOVUPDrm;
394  RC = HasVLX ? &X86::VR128XRegClass : &X86::VR128RegClass;
395  break;
396  case MVT::v4i32:
397  case MVT::v2i64:
398  case MVT::v8i16:
399  case MVT::v16i8:
400  if (IsNonTemporal && Alignment >= 16)
401  Opc = HasVLX ? X86::VMOVNTDQAZ128rm :
402  HasAVX ? X86::VMOVNTDQArm : X86::MOVNTDQArm;
403  else if (Alignment >= 16)
404  Opc = HasVLX ? X86::VMOVDQA64Z128rm :
405  HasAVX ? X86::VMOVDQArm : X86::MOVDQArm;
406  else
407  Opc = HasVLX ? X86::VMOVDQU64Z128rm :
408  HasAVX ? X86::VMOVDQUrm : X86::MOVDQUrm;
409  RC = HasVLX ? &X86::VR128XRegClass : &X86::VR128RegClass;
410  break;
411  case MVT::v8f32:
412  assert(HasAVX);
413  if (IsNonTemporal && Alignment >= 32 && HasAVX2)
414  Opc = HasVLX ? X86::VMOVNTDQAZ256rm : X86::VMOVNTDQAYrm;
415  else if (IsNonTemporal && Alignment >= 16)
416  return false; // Force split for X86::VMOVNTDQArm
417  else if (Alignment >= 32)
418  Opc = HasVLX ? X86::VMOVAPSZ256rm : X86::VMOVAPSYrm;
419  else
420  Opc = HasVLX ? X86::VMOVUPSZ256rm : X86::VMOVUPSYrm;
421  RC = HasVLX ? &X86::VR256XRegClass : &X86::VR256RegClass;
422  break;
423  case MVT::v4f64:
424  assert(HasAVX);
425  if (IsNonTemporal && Alignment >= 32 && HasAVX2)
426  Opc = HasVLX ? X86::VMOVNTDQAZ256rm : X86::VMOVNTDQAYrm;
427  else if (IsNonTemporal && Alignment >= 16)
428  return false; // Force split for X86::VMOVNTDQArm
429  else if (Alignment >= 32)
430  Opc = HasVLX ? X86::VMOVAPDZ256rm : X86::VMOVAPDYrm;
431  else
432  Opc = HasVLX ? X86::VMOVUPDZ256rm : X86::VMOVUPDYrm;
433  RC = HasVLX ? &X86::VR256XRegClass : &X86::VR256RegClass;
434  break;
435  case MVT::v8i32:
436  case MVT::v4i64:
437  case MVT::v16i16:
438  case MVT::v32i8:
439  assert(HasAVX);
440  if (IsNonTemporal && Alignment >= 32 && HasAVX2)
441  Opc = HasVLX ? X86::VMOVNTDQAZ256rm : X86::VMOVNTDQAYrm;
442  else if (IsNonTemporal && Alignment >= 16)
443  return false; // Force split for X86::VMOVNTDQArm
444  else if (Alignment >= 32)
445  Opc = HasVLX ? X86::VMOVDQA64Z256rm : X86::VMOVDQAYrm;
446  else
447  Opc = HasVLX ? X86::VMOVDQU64Z256rm : X86::VMOVDQUYrm;
448  RC = HasVLX ? &X86::VR256XRegClass : &X86::VR256RegClass;
449  break;
450  case MVT::v16f32:
451  assert(HasAVX512);
452  if (IsNonTemporal && Alignment >= 64)
453  Opc = X86::VMOVNTDQAZrm;
454  else
455  Opc = (Alignment >= 64) ? X86::VMOVAPSZrm : X86::VMOVUPSZrm;
456  RC = &X86::VR512RegClass;
457  break;
458  case MVT::v8f64:
459  assert(HasAVX512);
460  if (IsNonTemporal && Alignment >= 64)
461  Opc = X86::VMOVNTDQAZrm;
462  else
463  Opc = (Alignment >= 64) ? X86::VMOVAPDZrm : X86::VMOVUPDZrm;
464  RC = &X86::VR512RegClass;
465  break;
466  case MVT::v8i64:
467  case MVT::v16i32:
468  case MVT::v32i16:
469  case MVT::v64i8:
470  assert(HasAVX512);
471  // Note: There are a lot more choices based on type with AVX-512, but
472  // there's really no advantage when the load isn't masked.
473  if (IsNonTemporal && Alignment >= 64)
474  Opc = X86::VMOVNTDQAZrm;
475  else
476  Opc = (Alignment >= 64) ? X86::VMOVDQA64Zrm : X86::VMOVDQU64Zrm;
477  RC = &X86::VR512RegClass;
478  break;
479  }
480 
481  ResultReg = createResultReg(RC);
482  MachineInstrBuilder MIB =
483  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg);
484  addFullAddress(MIB, AM);
485  if (MMO)
486  MIB->addMemOperand(*FuncInfo.MF, MMO);
487  return true;
488 }
489 
490 /// X86FastEmitStore - Emit a machine instruction to store a value Val of
491 /// type VT. The address is either pre-computed, consisted of a base ptr, Ptr
492 /// and a displacement offset, or a GlobalAddress,
493 /// i.e. V. Return true if it is possible.
494 bool X86FastISel::X86FastEmitStore(EVT VT, unsigned ValReg, bool ValIsKill,
495  X86AddressMode &AM,
496  MachineMemOperand *MMO, bool Aligned) {
497  bool HasSSE1 = Subtarget->hasSSE1();
498  bool HasSSE2 = Subtarget->hasSSE2();
499  bool HasSSE4A = Subtarget->hasSSE4A();
500  bool HasAVX = Subtarget->hasAVX();
501  bool HasAVX512 = Subtarget->hasAVX512();
502  bool HasVLX = Subtarget->hasVLX();
503  bool IsNonTemporal = MMO && MMO->isNonTemporal();
504 
505  // Get opcode and regclass of the output for the given store instruction.
506  unsigned Opc = 0;
507  switch (VT.getSimpleVT().SimpleTy) {
508  case MVT::f80: // No f80 support yet.
509  default: return false;
510  case MVT::i1: {
511  // Mask out all but lowest bit.
512  unsigned AndResult = createResultReg(&X86::GR8RegClass);
513  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
514  TII.get(X86::AND8ri), AndResult)
515  .addReg(ValReg, getKillRegState(ValIsKill)).addImm(1);
516  ValReg = AndResult;
517  LLVM_FALLTHROUGH; // handle i1 as i8.
518  }
519  case MVT::i8: Opc = X86::MOV8mr; break;
520  case MVT::i16: Opc = X86::MOV16mr; break;
521  case MVT::i32:
522  Opc = (IsNonTemporal && HasSSE2) ? X86::MOVNTImr : X86::MOV32mr;
523  break;
524  case MVT::i64:
525  // Must be in x86-64 mode.
526  Opc = (IsNonTemporal && HasSSE2) ? X86::MOVNTI_64mr : X86::MOV64mr;
527  break;
528  case MVT::f32:
529  if (X86ScalarSSEf32) {
530  if (IsNonTemporal && HasSSE4A)
531  Opc = X86::MOVNTSS;
532  else
533  Opc = HasAVX512 ? X86::VMOVSSZmr :
534  HasAVX ? X86::VMOVSSmr : X86::MOVSSmr;
535  } else
536  Opc = X86::ST_Fp32m;
537  break;
538  case MVT::f64:
539  if (X86ScalarSSEf32) {
540  if (IsNonTemporal && HasSSE4A)
541  Opc = X86::MOVNTSD;
542  else
543  Opc = HasAVX512 ? X86::VMOVSDZmr :
544  HasAVX ? X86::VMOVSDmr : X86::MOVSDmr;
545  } else
546  Opc = X86::ST_Fp64m;
547  break;
548  case MVT::x86mmx:
549  Opc = (IsNonTemporal && HasSSE1) ? X86::MMX_MOVNTQmr : X86::MMX_MOVQ64mr;
550  break;
551  case MVT::v4f32:
552  if (Aligned) {
553  if (IsNonTemporal)
554  Opc = HasVLX ? X86::VMOVNTPSZ128mr :
555  HasAVX ? X86::VMOVNTPSmr : X86::MOVNTPSmr;
556  else
557  Opc = HasVLX ? X86::VMOVAPSZ128mr :
558  HasAVX ? X86::VMOVAPSmr : X86::MOVAPSmr;
559  } else
560  Opc = HasVLX ? X86::VMOVUPSZ128mr :
561  HasAVX ? X86::VMOVUPSmr : X86::MOVUPSmr;
562  break;
563  case MVT::v2f64:
564  if (Aligned) {
565  if (IsNonTemporal)
566  Opc = HasVLX ? X86::VMOVNTPDZ128mr :
567  HasAVX ? X86::VMOVNTPDmr : X86::MOVNTPDmr;
568  else
569  Opc = HasVLX ? X86::VMOVAPDZ128mr :
570  HasAVX ? X86::VMOVAPDmr : X86::MOVAPDmr;
571  } else
572  Opc = HasVLX ? X86::VMOVUPDZ128mr :
573  HasAVX ? X86::VMOVUPDmr : X86::MOVUPDmr;
574  break;
575  case MVT::v4i32:
576  case MVT::v2i64:
577  case MVT::v8i16:
578  case MVT::v16i8:
579  if (Aligned) {
580  if (IsNonTemporal)
581  Opc = HasVLX ? X86::VMOVNTDQZ128mr :
582  HasAVX ? X86::VMOVNTDQmr : X86::MOVNTDQmr;
583  else
584  Opc = HasVLX ? X86::VMOVDQA64Z128mr :
585  HasAVX ? X86::VMOVDQAmr : X86::MOVDQAmr;
586  } else
587  Opc = HasVLX ? X86::VMOVDQU64Z128mr :
588  HasAVX ? X86::VMOVDQUmr : X86::MOVDQUmr;
589  break;
590  case MVT::v8f32:
591  assert(HasAVX);
592  if (Aligned) {
593  if (IsNonTemporal)
594  Opc = HasVLX ? X86::VMOVNTPSZ256mr : X86::VMOVNTPSYmr;
595  else
596  Opc = HasVLX ? X86::VMOVAPSZ256mr : X86::VMOVAPSYmr;
597  } else
598  Opc = HasVLX ? X86::VMOVUPSZ256mr : X86::VMOVUPSYmr;
599  break;
600  case MVT::v4f64:
601  assert(HasAVX);
602  if (Aligned) {
603  if (IsNonTemporal)
604  Opc = HasVLX ? X86::VMOVNTPDZ256mr : X86::VMOVNTPDYmr;
605  else
606  Opc = HasVLX ? X86::VMOVAPDZ256mr : X86::VMOVAPDYmr;
607  } else
608  Opc = HasVLX ? X86::VMOVUPDZ256mr : X86::VMOVUPDYmr;
609  break;
610  case MVT::v8i32:
611  case MVT::v4i64:
612  case MVT::v16i16:
613  case MVT::v32i8:
614  assert(HasAVX);
615  if (Aligned) {
616  if (IsNonTemporal)
617  Opc = HasVLX ? X86::VMOVNTDQZ256mr : X86::VMOVNTDQYmr;
618  else
619  Opc = HasVLX ? X86::VMOVDQA64Z256mr : X86::VMOVDQAYmr;
620  } else
621  Opc = HasVLX ? X86::VMOVDQU64Z256mr : X86::VMOVDQUYmr;
622  break;
623  case MVT::v16f32:
624  assert(HasAVX512);
625  if (Aligned)
626  Opc = IsNonTemporal ? X86::VMOVNTPSZmr : X86::VMOVAPSZmr;
627  else
628  Opc = X86::VMOVUPSZmr;
629  break;
630  case MVT::v8f64:
631  assert(HasAVX512);
632  if (Aligned) {
633  Opc = IsNonTemporal ? X86::VMOVNTPDZmr : X86::VMOVAPDZmr;
634  } else
635  Opc = X86::VMOVUPDZmr;
636  break;
637  case MVT::v8i64:
638  case MVT::v16i32:
639  case MVT::v32i16:
640  case MVT::v64i8:
641  assert(HasAVX512);
642  // Note: There are a lot more choices based on type with AVX-512, but
643  // there's really no advantage when the store isn't masked.
644  if (Aligned)
645  Opc = IsNonTemporal ? X86::VMOVNTDQZmr : X86::VMOVDQA64Zmr;
646  else
647  Opc = X86::VMOVDQU64Zmr;
648  break;
649  }
650 
651  const MCInstrDesc &Desc = TII.get(Opc);
652  // Some of the instructions in the previous switch use FR128 instead
653  // of FR32 for ValReg. Make sure the register we feed the instruction
654  // matches its register class constraints.
655  // Note: This is fine to do a copy from FR32 to FR128, this is the
656  // same registers behind the scene and actually why it did not trigger
657  // any bugs before.
658  ValReg = constrainOperandRegClass(Desc, ValReg, Desc.getNumOperands() - 1);
659  MachineInstrBuilder MIB =
660  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, Desc);
661  addFullAddress(MIB, AM).addReg(ValReg, getKillRegState(ValIsKill));
662  if (MMO)
663  MIB->addMemOperand(*FuncInfo.MF, MMO);
664 
665  return true;
666 }
667 
668 bool X86FastISel::X86FastEmitStore(EVT VT, const Value *Val,
669  X86AddressMode &AM,
670  MachineMemOperand *MMO, bool Aligned) {
671  // Handle 'null' like i32/i64 0.
672  if (isa<ConstantPointerNull>(Val))
673  Val = Constant::getNullValue(DL.getIntPtrType(Val->getContext()));
674 
675  // If this is a store of a simple constant, fold the constant into the store.
676  if (const ConstantInt *CI = dyn_cast<ConstantInt>(Val)) {
677  unsigned Opc = 0;
678  bool Signed = true;
679  switch (VT.getSimpleVT().SimpleTy) {
680  default: break;
681  case MVT::i1:
682  Signed = false;
683  LLVM_FALLTHROUGH; // Handle as i8.
684  case MVT::i8: Opc = X86::MOV8mi; break;
685  case MVT::i16: Opc = X86::MOV16mi; break;
686  case MVT::i32: Opc = X86::MOV32mi; break;
687  case MVT::i64:
688  // Must be a 32-bit sign extended value.
689  if (isInt<32>(CI->getSExtValue()))
690  Opc = X86::MOV64mi32;
691  break;
692  }
693 
694  if (Opc) {
695  MachineInstrBuilder MIB =
696  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc));
697  addFullAddress(MIB, AM).addImm(Signed ? (uint64_t) CI->getSExtValue()
698  : CI->getZExtValue());
699  if (MMO)
700  MIB->addMemOperand(*FuncInfo.MF, MMO);
701  return true;
702  }
703  }
704 
705  unsigned ValReg = getRegForValue(Val);
706  if (ValReg == 0)
707  return false;
708 
709  bool ValKill = hasTrivialKill(Val);
710  return X86FastEmitStore(VT, ValReg, ValKill, AM, MMO, Aligned);
711 }
712 
713 /// X86FastEmitExtend - Emit a machine instruction to extend a value Src of
714 /// type SrcVT to type DstVT using the specified extension opcode Opc (e.g.
715 /// ISD::SIGN_EXTEND).
716 bool X86FastISel::X86FastEmitExtend(ISD::NodeType Opc, EVT DstVT,
717  unsigned Src, EVT SrcVT,
718  unsigned &ResultReg) {
719  unsigned RR = fastEmit_r(SrcVT.getSimpleVT(), DstVT.getSimpleVT(), Opc,
720  Src, /*TODO: Kill=*/false);
721  if (RR == 0)
722  return false;
723 
724  ResultReg = RR;
725  return true;
726 }
727 
728 bool X86FastISel::handleConstantAddresses(const Value *V, X86AddressMode &AM) {
729  // Handle constant address.
730  if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
731  // Can't handle alternate code models yet.
732  if (TM.getCodeModel() != CodeModel::Small)
733  return false;
734 
735  // Can't handle TLS yet.
736  if (GV->isThreadLocal())
737  return false;
738 
739  // RIP-relative addresses can't have additional register operands, so if
740  // we've already folded stuff into the addressing mode, just force the
741  // global value into its own register, which we can use as the basereg.
742  if (!Subtarget->isPICStyleRIPRel() ||
743  (AM.Base.Reg == 0 && AM.IndexReg == 0)) {
744  // Okay, we've committed to selecting this global. Set up the address.
745  AM.GV = GV;
746 
747  // Allow the subtarget to classify the global.
748  unsigned char GVFlags = Subtarget->classifyGlobalReference(GV);
749 
750  // If this reference is relative to the pic base, set it now.
751  if (isGlobalRelativeToPICBase(GVFlags)) {
752  // FIXME: How do we know Base.Reg is free??
753  AM.Base.Reg = getInstrInfo()->getGlobalBaseReg(FuncInfo.MF);
754  }
755 
756  // Unless the ABI requires an extra load, return a direct reference to
757  // the global.
758  if (!isGlobalStubReference(GVFlags)) {
759  if (Subtarget->isPICStyleRIPRel()) {
760  // Use rip-relative addressing if we can. Above we verified that the
761  // base and index registers are unused.
762  assert(AM.Base.Reg == 0 && AM.IndexReg == 0);
763  AM.Base.Reg = X86::RIP;
764  }
765  AM.GVOpFlags = GVFlags;
766  return true;
767  }
768 
769  // Ok, we need to do a load from a stub. If we've already loaded from
770  // this stub, reuse the loaded pointer, otherwise emit the load now.
771  DenseMap<const Value *, unsigned>::iterator I = LocalValueMap.find(V);
772  unsigned LoadReg;
773  if (I != LocalValueMap.end() && I->second != 0) {
774  LoadReg = I->second;
775  } else {
776  // Issue load from stub.
777  unsigned Opc = 0;
778  const TargetRegisterClass *RC = nullptr;
779  X86AddressMode StubAM;
780  StubAM.Base.Reg = AM.Base.Reg;
781  StubAM.GV = GV;
782  StubAM.GVOpFlags = GVFlags;
783 
784  // Prepare for inserting code in the local-value area.
785  SavePoint SaveInsertPt = enterLocalValueArea();
786 
787  if (TLI.getPointerTy(DL) == MVT::i64) {
788  Opc = X86::MOV64rm;
789  RC = &X86::GR64RegClass;
790 
791  if (Subtarget->isPICStyleRIPRel())
792  StubAM.Base.Reg = X86::RIP;
793  } else {
794  Opc = X86::MOV32rm;
795  RC = &X86::GR32RegClass;
796  }
797 
798  LoadReg = createResultReg(RC);
799  MachineInstrBuilder LoadMI =
800  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), LoadReg);
801  addFullAddress(LoadMI, StubAM);
802 
803  // Ok, back to normal mode.
804  leaveLocalValueArea(SaveInsertPt);
805 
806  // Prevent loading GV stub multiple times in same MBB.
807  LocalValueMap[V] = LoadReg;
808  }
809 
810  // Now construct the final address. Note that the Disp, Scale,
811  // and Index values may already be set here.
812  AM.Base.Reg = LoadReg;
813  AM.GV = nullptr;
814  return true;
815  }
816  }
817 
818  // If all else fails, try to materialize the value in a register.
819  if (!AM.GV || !Subtarget->isPICStyleRIPRel()) {
820  if (AM.Base.Reg == 0) {
821  AM.Base.Reg = getRegForValue(V);
822  return AM.Base.Reg != 0;
823  }
824  if (AM.IndexReg == 0) {
825  assert(AM.Scale == 1 && "Scale with no index!");
826  AM.IndexReg = getRegForValue(V);
827  return AM.IndexReg != 0;
828  }
829  }
830 
831  return false;
832 }
833 
834 /// X86SelectAddress - Attempt to fill in an address from the given value.
835 ///
838 redo_gep:
839  const User *U = nullptr;
840  unsigned Opcode = Instruction::UserOp1;
841  if (const Instruction *I = dyn_cast<Instruction>(V)) {
842  // Don't walk into other basic blocks; it's possible we haven't
843  // visited them yet, so the instructions may not yet be assigned
844  // virtual registers.
845  if (FuncInfo.StaticAllocaMap.count(static_cast<const AllocaInst *>(V)) ||
846  FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
847  Opcode = I->getOpcode();
848  U = I;
849  }
850  } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(V)) {
851  Opcode = C->getOpcode();
852  U = C;
853  }
854 
855  if (PointerType *Ty = dyn_cast<PointerType>(V->getType()))
856  if (Ty->getAddressSpace() > 255)
857  // Fast instruction selection doesn't support the special
858  // address spaces.
859  return false;
860 
861  switch (Opcode) {
862  default: break;
863  case Instruction::BitCast:
864  // Look past bitcasts.
865  return X86SelectAddress(U->getOperand(0), AM);
866 
867  case Instruction::IntToPtr:
868  // Look past no-op inttoptrs.
869  if (TLI.getValueType(DL, U->getOperand(0)->getType()) ==
870  TLI.getPointerTy(DL))
871  return X86SelectAddress(U->getOperand(0), AM);
872  break;
873 
874  case Instruction::PtrToInt:
875  // Look past no-op ptrtoints.
876  if (TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL))
877  return X86SelectAddress(U->getOperand(0), AM);
878  break;
879 
880  case Instruction::Alloca: {
881  // Do static allocas.
882  const AllocaInst *A = cast<AllocaInst>(V);
884  FuncInfo.StaticAllocaMap.find(A);
885  if (SI != FuncInfo.StaticAllocaMap.end()) {
887  AM.Base.FrameIndex = SI->second;
888  return true;
889  }
890  break;
891  }
892 
893  case Instruction::Add: {
894  // Adds of constants are common and easy enough.
895  if (const ConstantInt *CI = dyn_cast<ConstantInt>(U->getOperand(1))) {
896  uint64_t Disp = (int32_t)AM.Disp + (uint64_t)CI->getSExtValue();
897  // They have to fit in the 32-bit signed displacement field though.
898  if (isInt<32>(Disp)) {
899  AM.Disp = (uint32_t)Disp;
900  return X86SelectAddress(U->getOperand(0), AM);
901  }
902  }
903  break;
904  }
905 
906  case Instruction::GetElementPtr: {
907  X86AddressMode SavedAM = AM;
908 
909  // Pattern-match simple GEPs.
910  uint64_t Disp = (int32_t)AM.Disp;
911  unsigned IndexReg = AM.IndexReg;
912  unsigned Scale = AM.Scale;
914  // Iterate through the indices, folding what we can. Constants can be
915  // folded, and one dynamic index can be handled, if the scale is supported.
916  for (User::const_op_iterator i = U->op_begin() + 1, e = U->op_end();
917  i != e; ++i, ++GTI) {
918  const Value *Op = *i;
919  if (StructType *STy = GTI.getStructTypeOrNull()) {
920  const StructLayout *SL = DL.getStructLayout(STy);
921  Disp += SL->getElementOffset(cast<ConstantInt>(Op)->getZExtValue());
922  continue;
923  }
924 
925  // A array/variable index is always of the form i*S where S is the
926  // constant scale size. See if we can push the scale into immediates.
927  uint64_t S = DL.getTypeAllocSize(GTI.getIndexedType());
928  for (;;) {
929  if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
930  // Constant-offset addressing.
931  Disp += CI->getSExtValue() * S;
932  break;
933  }
934  if (canFoldAddIntoGEP(U, Op)) {
935  // A compatible add with a constant operand. Fold the constant.
936  ConstantInt *CI =
937  cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1));
938  Disp += CI->getSExtValue() * S;
939  // Iterate on the other operand.
940  Op = cast<AddOperator>(Op)->getOperand(0);
941  continue;
942  }
943  if (IndexReg == 0 &&
944  (!AM.GV || !Subtarget->isPICStyleRIPRel()) &&
945  (S == 1 || S == 2 || S == 4 || S == 8)) {
946  // Scaled-index addressing.
947  Scale = S;
948  IndexReg = getRegForGEPIndex(Op).first;
949  if (IndexReg == 0)
950  return false;
951  break;
952  }
953  // Unsupported.
954  goto unsupported_gep;
955  }
956  }
957 
958  // Check for displacement overflow.
959  if (!isInt<32>(Disp))
960  break;
961 
962  AM.IndexReg = IndexReg;
963  AM.Scale = Scale;
964  AM.Disp = (uint32_t)Disp;
965  GEPs.push_back(V);
966 
967  if (const GetElementPtrInst *GEP =
968  dyn_cast<GetElementPtrInst>(U->getOperand(0))) {
969  // Ok, the GEP indices were covered by constant-offset and scaled-index
970  // addressing. Update the address state and move on to examining the base.
971  V = GEP;
972  goto redo_gep;
973  } else if (X86SelectAddress(U->getOperand(0), AM)) {
974  return true;
975  }
976 
977  // If we couldn't merge the gep value into this addr mode, revert back to
978  // our address and just match the value instead of completely failing.
979  AM = SavedAM;
980 
981  for (const Value *I : reverse(GEPs))
982  if (handleConstantAddresses(I, AM))
983  return true;
984 
985  return false;
986  unsupported_gep:
987  // Ok, the GEP indices weren't all covered.
988  break;
989  }
990  }
991 
992  return handleConstantAddresses(V, AM);
993 }
994 
995 /// X86SelectCallAddress - Attempt to fill in an address from the given value.
996 ///
997 bool X86FastISel::X86SelectCallAddress(const Value *V, X86AddressMode &AM) {
998  const User *U = nullptr;
999  unsigned Opcode = Instruction::UserOp1;
1000  const Instruction *I = dyn_cast<Instruction>(V);
1001  // Record if the value is defined in the same basic block.
1002  //
1003  // This information is crucial to know whether or not folding an
1004  // operand is valid.
1005  // Indeed, FastISel generates or reuses a virtual register for all
1006  // operands of all instructions it selects. Obviously, the definition and
1007  // its uses must use the same virtual register otherwise the produced
1008  // code is incorrect.
1009  // Before instruction selection, FunctionLoweringInfo::set sets the virtual
1010  // registers for values that are alive across basic blocks. This ensures
1011  // that the values are consistently set between across basic block, even
1012  // if different instruction selection mechanisms are used (e.g., a mix of
1013  // SDISel and FastISel).
1014  // For values local to a basic block, the instruction selection process
1015  // generates these virtual registers with whatever method is appropriate
1016  // for its needs. In particular, FastISel and SDISel do not share the way
1017  // local virtual registers are set.
1018  // Therefore, this is impossible (or at least unsafe) to share values
1019  // between basic blocks unless they use the same instruction selection
1020  // method, which is not guarantee for X86.
1021  // Moreover, things like hasOneUse could not be used accurately, if we
1022  // allow to reference values across basic blocks whereas they are not
1023  // alive across basic blocks initially.
1024  bool InMBB = true;
1025  if (I) {
1026  Opcode = I->getOpcode();
1027  U = I;
1028  InMBB = I->getParent() == FuncInfo.MBB->getBasicBlock();
1029  } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(V)) {
1030  Opcode = C->getOpcode();
1031  U = C;
1032  }
1033 
1034  switch (Opcode) {
1035  default: break;
1036  case Instruction::BitCast:
1037  // Look past bitcasts if its operand is in the same BB.
1038  if (InMBB)
1039  return X86SelectCallAddress(U->getOperand(0), AM);
1040  break;
1041 
1042  case Instruction::IntToPtr:
1043  // Look past no-op inttoptrs if its operand is in the same BB.
1044  if (InMBB &&
1045  TLI.getValueType(DL, U->getOperand(0)->getType()) ==
1046  TLI.getPointerTy(DL))
1047  return X86SelectCallAddress(U->getOperand(0), AM);
1048  break;
1049 
1050  case Instruction::PtrToInt:
1051  // Look past no-op ptrtoints if its operand is in the same BB.
1052  if (InMBB && TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL))
1053  return X86SelectCallAddress(U->getOperand(0), AM);
1054  break;
1055  }
1056 
1057  // Handle constant address.
1058  if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
1059  // Can't handle alternate code models yet.
1060  if (TM.getCodeModel() != CodeModel::Small)
1061  return false;
1062 
1063  // RIP-relative addresses can't have additional register operands.
1064  if (Subtarget->isPICStyleRIPRel() &&
1065  (AM.Base.Reg != 0 || AM.IndexReg != 0))
1066  return false;
1067 
1068  // Can't handle TLS.
1069  if (const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV))
1070  if (GVar->isThreadLocal())
1071  return false;
1072 
1073  // Okay, we've committed to selecting this global. Set up the basic address.
1074  AM.GV = GV;
1075 
1076  // Return a direct reference to the global. Fastisel can handle calls to
1077  // functions that require loads, such as dllimport and nonlazybind
1078  // functions.
1079  if (Subtarget->isPICStyleRIPRel()) {
1080  // Use rip-relative addressing if we can. Above we verified that the
1081  // base and index registers are unused.
1082  assert(AM.Base.Reg == 0 && AM.IndexReg == 0);
1083  AM.Base.Reg = X86::RIP;
1084  } else {
1085  AM.GVOpFlags = Subtarget->classifyLocalReference(nullptr);
1086  }
1087 
1088  return true;
1089  }
1090 
1091  // If all else fails, try to materialize the value in a register.
1092  if (!AM.GV || !Subtarget->isPICStyleRIPRel()) {
1093  if (AM.Base.Reg == 0) {
1094  AM.Base.Reg = getRegForValue(V);
1095  return AM.Base.Reg != 0;
1096  }
1097  if (AM.IndexReg == 0) {
1098  assert(AM.Scale == 1 && "Scale with no index!");
1099  AM.IndexReg = getRegForValue(V);
1100  return AM.IndexReg != 0;
1101  }
1102  }
1103 
1104  return false;
1105 }
1106 
1107 
1108 /// X86SelectStore - Select and emit code to implement store instructions.
1109 bool X86FastISel::X86SelectStore(const Instruction *I) {
1110  // Atomic stores need special handling.
1111  const StoreInst *S = cast<StoreInst>(I);
1112 
1113  if (S->isAtomic())
1114  return false;
1115 
1116  const Value *PtrV = I->getOperand(1);
1117  if (TLI.supportSwiftError()) {
1118  // Swifterror values can come from either a function parameter with
1119  // swifterror attribute or an alloca with swifterror attribute.
1120  if (const Argument *Arg = dyn_cast<Argument>(PtrV)) {
1121  if (Arg->hasSwiftErrorAttr())
1122  return false;
1123  }
1124 
1125  if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(PtrV)) {
1126  if (Alloca->isSwiftError())
1127  return false;
1128  }
1129  }
1130 
1131  const Value *Val = S->getValueOperand();
1132  const Value *Ptr = S->getPointerOperand();
1133 
1134  MVT VT;
1135  if (!isTypeLegal(Val->getType(), VT, /*AllowI1=*/true))
1136  return false;
1137 
1138  unsigned Alignment = S->getAlignment();
1139  unsigned ABIAlignment = DL.getABITypeAlignment(Val->getType());
1140  if (Alignment == 0) // Ensure that codegen never sees alignment 0
1141  Alignment = ABIAlignment;
1142  bool Aligned = Alignment >= ABIAlignment;
1143 
1144  X86AddressMode AM;
1145  if (!X86SelectAddress(Ptr, AM))
1146  return false;
1147 
1148  return X86FastEmitStore(VT, Val, AM, createMachineMemOperandFor(I), Aligned);
1149 }
1150 
1151 /// X86SelectRet - Select and emit code to implement ret instructions.
1152 bool X86FastISel::X86SelectRet(const Instruction *I) {
1153  const ReturnInst *Ret = cast<ReturnInst>(I);
1154  const Function &F = *I->getParent()->getParent();
1155  const X86MachineFunctionInfo *X86MFInfo =
1156  FuncInfo.MF->getInfo<X86MachineFunctionInfo>();
1157 
1158  if (!FuncInfo.CanLowerReturn)
1159  return false;
1160 
1161  if (TLI.supportSwiftError() &&
1162  F.getAttributes().hasAttrSomewhere(Attribute::SwiftError))
1163  return false;
1164 
1165  if (TLI.supportSplitCSR(FuncInfo.MF))
1166  return false;
1167 
1168  CallingConv::ID CC = F.getCallingConv();
1169  if (CC != CallingConv::C &&
1170  CC != CallingConv::Fast &&
1171  CC != CallingConv::X86_FastCall &&
1172  CC != CallingConv::X86_StdCall &&
1173  CC != CallingConv::X86_ThisCall &&
1174  CC != CallingConv::X86_64_SysV &&
1175  CC != CallingConv::Win64)
1176  return false;
1177 
1178  // Don't handle popping bytes if they don't fit the ret's immediate.
1179  if (!isUInt<16>(X86MFInfo->getBytesToPopOnReturn()))
1180  return false;
1181 
1182  // fastcc with -tailcallopt is intended to provide a guaranteed
1183  // tail call optimization. Fastisel doesn't know how to do that.
1184  if (CC == CallingConv::Fast && TM.Options.GuaranteedTailCallOpt)
1185  return false;
1186 
1187  // Let SDISel handle vararg functions.
1188  if (F.isVarArg())
1189  return false;
1190 
1191  // Build a list of return value registers.
1192  SmallVector<unsigned, 4> RetRegs;
1193 
1194  if (Ret->getNumOperands() > 0) {
1196  GetReturnInfo(F.getReturnType(), F.getAttributes(), Outs, TLI, DL);
1197 
1198  // Analyze operands of the call, assigning locations to each operand.
1200  CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, ValLocs, I->getContext());
1201  CCInfo.AnalyzeReturn(Outs, RetCC_X86);
1202 
1203  const Value *RV = Ret->getOperand(0);
1204  unsigned Reg = getRegForValue(RV);
1205  if (Reg == 0)
1206  return false;
1207 
1208  // Only handle a single return value for now.
1209  if (ValLocs.size() != 1)
1210  return false;
1211 
1212  CCValAssign &VA = ValLocs[0];
1213 
1214  // Don't bother handling odd stuff for now.
1215  if (VA.getLocInfo() != CCValAssign::Full)
1216  return false;
1217  // Only handle register returns for now.
1218  if (!VA.isRegLoc())
1219  return false;
1220 
1221  // The calling-convention tables for x87 returns don't tell
1222  // the whole story.
1223  if (VA.getLocReg() == X86::FP0 || VA.getLocReg() == X86::FP1)
1224  return false;
1225 
1226  unsigned SrcReg = Reg + VA.getValNo();
1227  EVT SrcVT = TLI.getValueType(DL, RV->getType());
1228  EVT DstVT = VA.getValVT();
1229  // Special handling for extended integers.
1230  if (SrcVT != DstVT) {
1231  if (SrcVT != MVT::i1 && SrcVT != MVT::i8 && SrcVT != MVT::i16)
1232  return false;
1233 
1234  if (!Outs[0].Flags.isZExt() && !Outs[0].Flags.isSExt())
1235  return false;
1236 
1237  assert(DstVT == MVT::i32 && "X86 should always ext to i32");
1238 
1239  if (SrcVT == MVT::i1) {
1240  if (Outs[0].Flags.isSExt())
1241  return false;
1242  SrcReg = fastEmitZExtFromI1(MVT::i8, SrcReg, /*TODO: Kill=*/false);
1243  SrcVT = MVT::i8;
1244  }
1245  unsigned Op = Outs[0].Flags.isZExt() ? ISD::ZERO_EXTEND :
1247  SrcReg = fastEmit_r(SrcVT.getSimpleVT(), DstVT.getSimpleVT(), Op,
1248  SrcReg, /*TODO: Kill=*/false);
1249  }
1250 
1251  // Make the copy.
1252  unsigned DstReg = VA.getLocReg();
1253  const TargetRegisterClass *SrcRC = MRI.getRegClass(SrcReg);
1254  // Avoid a cross-class copy. This is very unlikely.
1255  if (!SrcRC->contains(DstReg))
1256  return false;
1257  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1258  TII.get(TargetOpcode::COPY), DstReg).addReg(SrcReg);
1259 
1260  // Add register to return instruction.
1261  RetRegs.push_back(VA.getLocReg());
1262  }
1263 
1264  // Swift calling convention does not require we copy the sret argument
1265  // into %rax/%eax for the return, and SRetReturnReg is not set for Swift.
1266 
1267  // All x86 ABIs require that for returning structs by value we copy
1268  // the sret argument into %rax/%eax (depending on ABI) for the return.
1269  // We saved the argument into a virtual register in the entry block,
1270  // so now we copy the value out and into %rax/%eax.
1271  if (F.hasStructRetAttr() && CC != CallingConv::Swift) {
1272  unsigned Reg = X86MFInfo->getSRetReturnReg();
1273  assert(Reg &&
1274  "SRetReturnReg should have been set in LowerFormalArguments()!");
1275  unsigned RetReg = Subtarget->is64Bit() ? X86::RAX : X86::EAX;
1276  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1277  TII.get(TargetOpcode::COPY), RetReg).addReg(Reg);
1278  RetRegs.push_back(RetReg);
1279  }
1280 
1281  // Now emit the RET.
1282  MachineInstrBuilder MIB;
1283  if (X86MFInfo->getBytesToPopOnReturn()) {
1284  MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1285  TII.get(Subtarget->is64Bit() ? X86::RETIQ : X86::RETIL))
1286  .addImm(X86MFInfo->getBytesToPopOnReturn());
1287  } else {
1288  MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1289  TII.get(Subtarget->is64Bit() ? X86::RETQ : X86::RETL));
1290  }
1291  for (unsigned i = 0, e = RetRegs.size(); i != e; ++i)
1292  MIB.addReg(RetRegs[i], RegState::Implicit);
1293  return true;
1294 }
1295 
1296 /// X86SelectLoad - Select and emit code to implement load instructions.
1297 ///
1298 bool X86FastISel::X86SelectLoad(const Instruction *I) {
1299  const LoadInst *LI = cast<LoadInst>(I);
1300 
1301  // Atomic loads need special handling.
1302  if (LI->isAtomic())
1303  return false;
1304 
1305  const Value *SV = I->getOperand(0);
1306  if (TLI.supportSwiftError()) {
1307  // Swifterror values can come from either a function parameter with
1308  // swifterror attribute or an alloca with swifterror attribute.
1309  if (const Argument *Arg = dyn_cast<Argument>(SV)) {
1310  if (Arg->hasSwiftErrorAttr())
1311  return false;
1312  }
1313 
1314  if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(SV)) {
1315  if (Alloca->isSwiftError())
1316  return false;
1317  }
1318  }
1319 
1320  MVT VT;
1321  if (!isTypeLegal(LI->getType(), VT, /*AllowI1=*/true))
1322  return false;
1323 
1324  const Value *Ptr = LI->getPointerOperand();
1325 
1326  X86AddressMode AM;
1327  if (!X86SelectAddress(Ptr, AM))
1328  return false;
1329 
1330  unsigned Alignment = LI->getAlignment();
1331  unsigned ABIAlignment = DL.getABITypeAlignment(LI->getType());
1332  if (Alignment == 0) // Ensure that codegen never sees alignment 0
1333  Alignment = ABIAlignment;
1334 
1335  unsigned ResultReg = 0;
1336  if (!X86FastEmitLoad(VT, AM, createMachineMemOperandFor(LI), ResultReg,
1337  Alignment))
1338  return false;
1339 
1340  updateValueMap(I, ResultReg);
1341  return true;
1342 }
1343 
1344 static unsigned X86ChooseCmpOpcode(EVT VT, const X86Subtarget *Subtarget) {
1345  bool HasAVX512 = Subtarget->hasAVX512();
1346  bool HasAVX = Subtarget->hasAVX();
1347  bool X86ScalarSSEf32 = Subtarget->hasSSE1();
1348  bool X86ScalarSSEf64 = Subtarget->hasSSE2();
1349 
1350  switch (VT.getSimpleVT().SimpleTy) {
1351  default: return 0;
1352  case MVT::i8: return X86::CMP8rr;
1353  case MVT::i16: return X86::CMP16rr;
1354  case MVT::i32: return X86::CMP32rr;
1355  case MVT::i64: return X86::CMP64rr;
1356  case MVT::f32:
1357  return X86ScalarSSEf32
1358  ? (HasAVX512 ? X86::VUCOMISSZrr
1359  : HasAVX ? X86::VUCOMISSrr : X86::UCOMISSrr)
1360  : 0;
1361  case MVT::f64:
1362  return X86ScalarSSEf64
1363  ? (HasAVX512 ? X86::VUCOMISDZrr
1364  : HasAVX ? X86::VUCOMISDrr : X86::UCOMISDrr)
1365  : 0;
1366  }
1367 }
1368 
1369 /// If we have a comparison with RHS as the RHS of the comparison, return an
1370 /// opcode that works for the compare (e.g. CMP32ri) otherwise return 0.
1371 static unsigned X86ChooseCmpImmediateOpcode(EVT VT, const ConstantInt *RHSC) {
1372  int64_t Val = RHSC->getSExtValue();
1373  switch (VT.getSimpleVT().SimpleTy) {
1374  // Otherwise, we can't fold the immediate into this comparison.
1375  default:
1376  return 0;
1377  case MVT::i8:
1378  return X86::CMP8ri;
1379  case MVT::i16:
1380  if (isInt<8>(Val))
1381  return X86::CMP16ri8;
1382  return X86::CMP16ri;
1383  case MVT::i32:
1384  if (isInt<8>(Val))
1385  return X86::CMP32ri8;
1386  return X86::CMP32ri;
1387  case MVT::i64:
1388  if (isInt<8>(Val))
1389  return X86::CMP64ri8;
1390  // 64-bit comparisons are only valid if the immediate fits in a 32-bit sext
1391  // field.
1392  if (isInt<32>(Val))
1393  return X86::CMP64ri32;
1394  return 0;
1395  }
1396 }
1397 
1398 bool X86FastISel::X86FastEmitCompare(const Value *Op0, const Value *Op1, EVT VT,
1399  const DebugLoc &CurDbgLoc) {
1400  unsigned Op0Reg = getRegForValue(Op0);
1401  if (Op0Reg == 0) return false;
1402 
1403  // Handle 'null' like i32/i64 0.
1404  if (isa<ConstantPointerNull>(Op1))
1405  Op1 = Constant::getNullValue(DL.getIntPtrType(Op0->getContext()));
1406 
1407  // We have two options: compare with register or immediate. If the RHS of
1408  // the compare is an immediate that we can fold into this compare, use
1409  // CMPri, otherwise use CMPrr.
1410  if (const ConstantInt *Op1C = dyn_cast<ConstantInt>(Op1)) {
1411  if (unsigned CompareImmOpc = X86ChooseCmpImmediateOpcode(VT, Op1C)) {
1412  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, CurDbgLoc, TII.get(CompareImmOpc))
1413  .addReg(Op0Reg)
1414  .addImm(Op1C->getSExtValue());
1415  return true;
1416  }
1417  }
1418 
1419  unsigned CompareOpc = X86ChooseCmpOpcode(VT, Subtarget);
1420  if (CompareOpc == 0) return false;
1421 
1422  unsigned Op1Reg = getRegForValue(Op1);
1423  if (Op1Reg == 0) return false;
1424  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, CurDbgLoc, TII.get(CompareOpc))
1425  .addReg(Op0Reg)
1426  .addReg(Op1Reg);
1427 
1428  return true;
1429 }
1430 
1431 bool X86FastISel::X86SelectCmp(const Instruction *I) {
1432  const CmpInst *CI = cast<CmpInst>(I);
1433 
1434  MVT VT;
1435  if (!isTypeLegal(I->getOperand(0)->getType(), VT))
1436  return false;
1437 
1438  // Try to optimize or fold the cmp.
1439  CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
1440  unsigned ResultReg = 0;
1441  switch (Predicate) {
1442  default: break;
1443  case CmpInst::FCMP_FALSE: {
1444  ResultReg = createResultReg(&X86::GR32RegClass);
1445  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::MOV32r0),
1446  ResultReg);
1447  ResultReg = fastEmitInst_extractsubreg(MVT::i8, ResultReg, /*Kill=*/true,
1448  X86::sub_8bit);
1449  if (!ResultReg)
1450  return false;
1451  break;
1452  }
1453  case CmpInst::FCMP_TRUE: {
1454  ResultReg = createResultReg(&X86::GR8RegClass);
1455  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::MOV8ri),
1456  ResultReg).addImm(1);
1457  break;
1458  }
1459  }
1460 
1461  if (ResultReg) {
1462  updateValueMap(I, ResultReg);
1463  return true;
1464  }
1465 
1466  const Value *LHS = CI->getOperand(0);
1467  const Value *RHS = CI->getOperand(1);
1468 
1469  // The optimizer might have replaced fcmp oeq %x, %x with fcmp ord %x, 0.0.
1470  // We don't have to materialize a zero constant for this case and can just use
1471  // %x again on the RHS.
1472  if (Predicate == CmpInst::FCMP_ORD || Predicate == CmpInst::FCMP_UNO) {
1473  const auto *RHSC = dyn_cast<ConstantFP>(RHS);
1474  if (RHSC && RHSC->isNullValue())
1475  RHS = LHS;
1476  }
1477 
1478  // FCMP_OEQ and FCMP_UNE cannot be checked with a single instruction.
1479  static const uint16_t SETFOpcTable[2][3] = {
1480  { X86::SETEr, X86::SETNPr, X86::AND8rr },
1481  { X86::SETNEr, X86::SETPr, X86::OR8rr }
1482  };
1483  const uint16_t *SETFOpc = nullptr;
1484  switch (Predicate) {
1485  default: break;
1486  case CmpInst::FCMP_OEQ: SETFOpc = &SETFOpcTable[0][0]; break;
1487  case CmpInst::FCMP_UNE: SETFOpc = &SETFOpcTable[1][0]; break;
1488  }
1489 
1490  ResultReg = createResultReg(&X86::GR8RegClass);
1491  if (SETFOpc) {
1492  if (!X86FastEmitCompare(LHS, RHS, VT, I->getDebugLoc()))
1493  return false;
1494 
1495  unsigned FlagReg1 = createResultReg(&X86::GR8RegClass);
1496  unsigned FlagReg2 = createResultReg(&X86::GR8RegClass);
1497  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(SETFOpc[0]),
1498  FlagReg1);
1499  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(SETFOpc[1]),
1500  FlagReg2);
1501  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(SETFOpc[2]),
1502  ResultReg).addReg(FlagReg1).addReg(FlagReg2);
1503  updateValueMap(I, ResultReg);
1504  return true;
1505  }
1506 
1507  X86::CondCode CC;
1508  bool SwapArgs;
1509  std::tie(CC, SwapArgs) = X86::getX86ConditionCode(Predicate);
1510  assert(CC <= X86::LAST_VALID_COND && "Unexpected condition code.");
1511  unsigned Opc = X86::getSETFromCond(CC);
1512 
1513  if (SwapArgs)
1514  std::swap(LHS, RHS);
1515 
1516  // Emit a compare of LHS/RHS.
1517  if (!X86FastEmitCompare(LHS, RHS, VT, I->getDebugLoc()))
1518  return false;
1519 
1520  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg);
1521  updateValueMap(I, ResultReg);
1522  return true;
1523 }
1524 
1525 bool X86FastISel::X86SelectZExt(const Instruction *I) {
1526  EVT DstVT = TLI.getValueType(DL, I->getType());
1527  if (!TLI.isTypeLegal(DstVT))
1528  return false;
1529 
1530  unsigned ResultReg = getRegForValue(I->getOperand(0));
1531  if (ResultReg == 0)
1532  return false;
1533 
1534  // Handle zero-extension from i1 to i8, which is common.
1535  MVT SrcVT = TLI.getSimpleValueType(DL, I->getOperand(0)->getType());
1536  if (SrcVT == MVT::i1) {
1537  // Set the high bits to zero.
1538  ResultReg = fastEmitZExtFromI1(MVT::i8, ResultReg, /*TODO: Kill=*/false);
1539  SrcVT = MVT::i8;
1540 
1541  if (ResultReg == 0)
1542  return false;
1543  }
1544 
1545  if (DstVT == MVT::i64) {
1546  // Handle extension to 64-bits via sub-register shenanigans.
1547  unsigned MovInst;
1548 
1549  switch (SrcVT.SimpleTy) {
1550  case MVT::i8: MovInst = X86::MOVZX32rr8; break;
1551  case MVT::i16: MovInst = X86::MOVZX32rr16; break;
1552  case MVT::i32: MovInst = X86::MOV32rr; break;
1553  default: llvm_unreachable("Unexpected zext to i64 source type");
1554  }
1555 
1556  unsigned Result32 = createResultReg(&X86::GR32RegClass);
1557  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(MovInst), Result32)
1558  .addReg(ResultReg);
1559 
1560  ResultReg = createResultReg(&X86::GR64RegClass);
1561  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::SUBREG_TO_REG),
1562  ResultReg)
1563  .addImm(0).addReg(Result32).addImm(X86::sub_32bit);
1564  } else if (DstVT == MVT::i16) {
1565  // i8->i16 doesn't exist in the autogenerated isel table. Need to zero
1566  // extend to 32-bits and then extract down to 16-bits.
1567  unsigned Result32 = createResultReg(&X86::GR32RegClass);
1568  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::MOVZX32rr8),
1569  Result32).addReg(ResultReg);
1570 
1571  ResultReg = fastEmitInst_extractsubreg(MVT::i16, Result32, /*Kill=*/true,
1572  X86::sub_16bit);
1573  } else if (DstVT != MVT::i8) {
1574  ResultReg = fastEmit_r(MVT::i8, DstVT.getSimpleVT(), ISD::ZERO_EXTEND,
1575  ResultReg, /*Kill=*/true);
1576  if (ResultReg == 0)
1577  return false;
1578  }
1579 
1580  updateValueMap(I, ResultReg);
1581  return true;
1582 }
1583 
1584 bool X86FastISel::X86SelectSExt(const Instruction *I) {
1585  EVT DstVT = TLI.getValueType(DL, I->getType());
1586  if (!TLI.isTypeLegal(DstVT))
1587  return false;
1588 
1589  unsigned ResultReg = getRegForValue(I->getOperand(0));
1590  if (ResultReg == 0)
1591  return false;
1592 
1593  // Handle sign-extension from i1 to i8.
1594  MVT SrcVT = TLI.getSimpleValueType(DL, I->getOperand(0)->getType());
1595  if (SrcVT == MVT::i1) {
1596  // Set the high bits to zero.
1597  unsigned ZExtReg = fastEmitZExtFromI1(MVT::i8, ResultReg,
1598  /*TODO: Kill=*/false);
1599  if (ZExtReg == 0)
1600  return false;
1601 
1602  // Negate the result to make an 8-bit sign extended value.
1603  ResultReg = createResultReg(&X86::GR8RegClass);
1604  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::NEG8r),
1605  ResultReg).addReg(ZExtReg);
1606 
1607  SrcVT = MVT::i8;
1608  }
1609 
1610  if (DstVT == MVT::i16) {
1611  // i8->i16 doesn't exist in the autogenerated isel table. Need to sign
1612  // extend to 32-bits and then extract down to 16-bits.
1613  unsigned Result32 = createResultReg(&X86::GR32RegClass);
1614  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::MOVSX32rr8),
1615  Result32).addReg(ResultReg);
1616 
1617  ResultReg = fastEmitInst_extractsubreg(MVT::i16, Result32, /*Kill=*/true,
1618  X86::sub_16bit);
1619  } else if (DstVT != MVT::i8) {
1620  ResultReg = fastEmit_r(MVT::i8, DstVT.getSimpleVT(), ISD::SIGN_EXTEND,
1621  ResultReg, /*Kill=*/true);
1622  if (ResultReg == 0)
1623  return false;
1624  }
1625 
1626  updateValueMap(I, ResultReg);
1627  return true;
1628 }
1629 
1630 bool X86FastISel::X86SelectBranch(const Instruction *I) {
1631  // Unconditional branches are selected by tablegen-generated code.
1632  // Handle a conditional branch.
1633  const BranchInst *BI = cast<BranchInst>(I);
1634  MachineBasicBlock *TrueMBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
1635  MachineBasicBlock *FalseMBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
1636 
1637  // Fold the common case of a conditional branch with a comparison
1638  // in the same block (values defined on other blocks may not have
1639  // initialized registers).
1640  X86::CondCode CC;
1641  if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) {
1642  if (CI->hasOneUse() && CI->getParent() == I->getParent()) {
1643  EVT VT = TLI.getValueType(DL, CI->getOperand(0)->getType());
1644 
1645  // Try to optimize or fold the cmp.
1646  CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
1647  switch (Predicate) {
1648  default: break;
1649  case CmpInst::FCMP_FALSE: fastEmitBranch(FalseMBB, DbgLoc); return true;
1650  case CmpInst::FCMP_TRUE: fastEmitBranch(TrueMBB, DbgLoc); return true;
1651  }
1652 
1653  const Value *CmpLHS = CI->getOperand(0);
1654  const Value *CmpRHS = CI->getOperand(1);
1655 
1656  // The optimizer might have replaced fcmp oeq %x, %x with fcmp ord %x,
1657  // 0.0.
1658  // We don't have to materialize a zero constant for this case and can just
1659  // use %x again on the RHS.
1660  if (Predicate == CmpInst::FCMP_ORD || Predicate == CmpInst::FCMP_UNO) {
1661  const auto *CmpRHSC = dyn_cast<ConstantFP>(CmpRHS);
1662  if (CmpRHSC && CmpRHSC->isNullValue())
1663  CmpRHS = CmpLHS;
1664  }
1665 
1666  // Try to take advantage of fallthrough opportunities.
1667  if (FuncInfo.MBB->isLayoutSuccessor(TrueMBB)) {
1668  std::swap(TrueMBB, FalseMBB);
1669  Predicate = CmpInst::getInversePredicate(Predicate);
1670  }
1671 
1672  // FCMP_OEQ and FCMP_UNE cannot be expressed with a single flag/condition
1673  // code check. Instead two branch instructions are required to check all
1674  // the flags. First we change the predicate to a supported condition code,
1675  // which will be the first branch. Later one we will emit the second
1676  // branch.
1677  bool NeedExtraBranch = false;
1678  switch (Predicate) {
1679  default: break;
1680  case CmpInst::FCMP_OEQ:
1681  std::swap(TrueMBB, FalseMBB);
1683  case CmpInst::FCMP_UNE:
1684  NeedExtraBranch = true;
1685  Predicate = CmpInst::FCMP_ONE;
1686  break;
1687  }
1688 
1689  bool SwapArgs;
1690  unsigned BranchOpc;
1691  std::tie(CC, SwapArgs) = X86::getX86ConditionCode(Predicate);
1692  assert(CC <= X86::LAST_VALID_COND && "Unexpected condition code.");
1693 
1694  BranchOpc = X86::GetCondBranchFromCond(CC);
1695  if (SwapArgs)
1696  std::swap(CmpLHS, CmpRHS);
1697 
1698  // Emit a compare of the LHS and RHS, setting the flags.
1699  if (!X86FastEmitCompare(CmpLHS, CmpRHS, VT, CI->getDebugLoc()))
1700  return false;
1701 
1702  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(BranchOpc))
1703  .addMBB(TrueMBB);
1704 
1705  // X86 requires a second branch to handle UNE (and OEQ, which is mapped
1706  // to UNE above).
1707  if (NeedExtraBranch) {
1708  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::JP_1))
1709  .addMBB(TrueMBB);
1710  }
1711 
1712  finishCondBranch(BI->getParent(), TrueMBB, FalseMBB);
1713  return true;
1714  }
1715  } else if (TruncInst *TI = dyn_cast<TruncInst>(BI->getCondition())) {
1716  // Handle things like "%cond = trunc i32 %X to i1 / br i1 %cond", which
1717  // typically happen for _Bool and C++ bools.
1718  MVT SourceVT;
1719  if (TI->hasOneUse() && TI->getParent() == I->getParent() &&
1720  isTypeLegal(TI->getOperand(0)->getType(), SourceVT)) {
1721  unsigned TestOpc = 0;
1722  switch (SourceVT.SimpleTy) {
1723  default: break;
1724  case MVT::i8: TestOpc = X86::TEST8ri; break;
1725  case MVT::i16: TestOpc = X86::TEST16ri; break;
1726  case MVT::i32: TestOpc = X86::TEST32ri; break;
1727  case MVT::i64: TestOpc = X86::TEST64ri32; break;
1728  }
1729  if (TestOpc) {
1730  unsigned OpReg = getRegForValue(TI->getOperand(0));
1731  if (OpReg == 0) return false;
1732 
1733  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TestOpc))
1734  .addReg(OpReg).addImm(1);
1735 
1736  unsigned JmpOpc = X86::JNE_1;
1737  if (FuncInfo.MBB->isLayoutSuccessor(TrueMBB)) {
1738  std::swap(TrueMBB, FalseMBB);
1739  JmpOpc = X86::JE_1;
1740  }
1741 
1742  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(JmpOpc))
1743  .addMBB(TrueMBB);
1744 
1745  finishCondBranch(BI->getParent(), TrueMBB, FalseMBB);
1746  return true;
1747  }
1748  }
1749  } else if (foldX86XALUIntrinsic(CC, BI, BI->getCondition())) {
1750  // Fake request the condition, otherwise the intrinsic might be completely
1751  // optimized away.
1752  unsigned TmpReg = getRegForValue(BI->getCondition());
1753  if (TmpReg == 0)
1754  return false;
1755 
1756  unsigned BranchOpc = X86::GetCondBranchFromCond(CC);
1757 
1758  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(BranchOpc))
1759  .addMBB(TrueMBB);
1760  finishCondBranch(BI->getParent(), TrueMBB, FalseMBB);
1761  return true;
1762  }
1763 
1764  // Otherwise do a clumsy setcc and re-test it.
1765  // Note that i1 essentially gets ANY_EXTEND'ed to i8 where it isn't used
1766  // in an explicit cast, so make sure to handle that correctly.
1767  unsigned OpReg = getRegForValue(BI->getCondition());
1768  if (OpReg == 0) return false;
1769 
1770  // In case OpReg is a K register, COPY to a GPR
1771  if (MRI.getRegClass(OpReg) == &X86::VK1RegClass) {
1772  unsigned KOpReg = OpReg;
1773  OpReg = createResultReg(&X86::GR32RegClass);
1774  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1775  TII.get(TargetOpcode::COPY), OpReg)
1776  .addReg(KOpReg);
1777  OpReg = fastEmitInst_extractsubreg(MVT::i8, OpReg, /*Kill=*/true,
1778  X86::sub_8bit);
1779  }
1780  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::TEST8ri))
1781  .addReg(OpReg)
1782  .addImm(1);
1783  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::JNE_1))
1784  .addMBB(TrueMBB);
1785  finishCondBranch(BI->getParent(), TrueMBB, FalseMBB);
1786  return true;
1787 }
1788 
1789 bool X86FastISel::X86SelectShift(const Instruction *I) {
1790  unsigned CReg = 0, OpReg = 0;
1791  const TargetRegisterClass *RC = nullptr;
1792  assert(!I->getType()->isIntegerTy(8) &&
1793  "i8 shifts should be handled by autogenerated table");
1794  if (I->getType()->isIntegerTy(16)) {
1795  CReg = X86::CX;
1796  RC = &X86::GR16RegClass;
1797  switch (I->getOpcode()) {
1798  default: llvm_unreachable("Unexpected shift opcode");
1799  case Instruction::LShr: OpReg = X86::SHR16rCL; break;
1800  case Instruction::AShr: OpReg = X86::SAR16rCL; break;
1801  case Instruction::Shl: OpReg = X86::SHL16rCL; break;
1802  }
1803  } else if (I->getType()->isIntegerTy(32)) {
1804  CReg = X86::ECX;
1805  RC = &X86::GR32RegClass;
1806  switch (I->getOpcode()) {
1807  default: llvm_unreachable("Unexpected shift opcode");
1808  case Instruction::LShr: OpReg = X86::SHR32rCL; break;
1809  case Instruction::AShr: OpReg = X86::SAR32rCL; break;
1810  case Instruction::Shl: OpReg = X86::SHL32rCL; break;
1811  }
1812  } else if (I->getType()->isIntegerTy(64)) {
1813  CReg = X86::RCX;
1814  RC = &X86::GR64RegClass;
1815  switch (I->getOpcode()) {
1816  default: llvm_unreachable("Unexpected shift opcode");
1817  case Instruction::LShr: OpReg = X86::SHR64rCL; break;
1818  case Instruction::AShr: OpReg = X86::SAR64rCL; break;
1819  case Instruction::Shl: OpReg = X86::SHL64rCL; break;
1820  }
1821  } else {
1822  return false;
1823  }
1824 
1825  MVT VT;
1826  if (!isTypeLegal(I->getType(), VT))
1827  return false;
1828 
1829  unsigned Op0Reg = getRegForValue(I->getOperand(0));
1830  if (Op0Reg == 0) return false;
1831 
1832  unsigned Op1Reg = getRegForValue(I->getOperand(1));
1833  if (Op1Reg == 0) return false;
1834  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY),
1835  CReg).addReg(Op1Reg);
1836 
1837  // The shift instruction uses X86::CL. If we defined a super-register
1838  // of X86::CL, emit a subreg KILL to precisely describe what we're doing here.
1839  assert(CReg != X86::CL && "CReg should be a super register of CL");
1840  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1841  TII.get(TargetOpcode::KILL), X86::CL)
1842  .addReg(CReg, RegState::Kill);
1843 
1844  unsigned ResultReg = createResultReg(RC);
1845  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(OpReg), ResultReg)
1846  .addReg(Op0Reg);
1847  updateValueMap(I, ResultReg);
1848  return true;
1849 }
1850 
1851 bool X86FastISel::X86SelectDivRem(const Instruction *I) {
1852  const static unsigned NumTypes = 4; // i8, i16, i32, i64
1853  const static unsigned NumOps = 4; // SDiv, SRem, UDiv, URem
1854  const static bool S = true; // IsSigned
1855  const static bool U = false; // !IsSigned
1856  const static unsigned Copy = TargetOpcode::COPY;
1857  // For the X86 DIV/IDIV instruction, in most cases the dividend
1858  // (numerator) must be in a specific register pair highreg:lowreg,
1859  // producing the quotient in lowreg and the remainder in highreg.
1860  // For most data types, to set up the instruction, the dividend is
1861  // copied into lowreg, and lowreg is sign-extended or zero-extended
1862  // into highreg. The exception is i8, where the dividend is defined
1863  // as a single register rather than a register pair, and we
1864  // therefore directly sign-extend or zero-extend the dividend into
1865  // lowreg, instead of copying, and ignore the highreg.
1866  const static struct DivRemEntry {
1867  // The following portion depends only on the data type.
1868  const TargetRegisterClass *RC;
1869  unsigned LowInReg; // low part of the register pair
1870  unsigned HighInReg; // high part of the register pair
1871  // The following portion depends on both the data type and the operation.
1872  struct DivRemResult {
1873  unsigned OpDivRem; // The specific DIV/IDIV opcode to use.
1874  unsigned OpSignExtend; // Opcode for sign-extending lowreg into
1875  // highreg, or copying a zero into highreg.
1876  unsigned OpCopy; // Opcode for copying dividend into lowreg, or
1877  // zero/sign-extending into lowreg for i8.
1878  unsigned DivRemResultReg; // Register containing the desired result.
1879  bool IsOpSigned; // Whether to use signed or unsigned form.
1880  } ResultTable[NumOps];
1881  } OpTable[NumTypes] = {
1882  { &X86::GR8RegClass, X86::AX, 0, {
1883  { X86::IDIV8r, 0, X86::MOVSX16rr8, X86::AL, S }, // SDiv
1884  { X86::IDIV8r, 0, X86::MOVSX16rr8, X86::AH, S }, // SRem
1885  { X86::DIV8r, 0, X86::MOVZX16rr8, X86::AL, U }, // UDiv
1886  { X86::DIV8r, 0, X86::MOVZX16rr8, X86::AH, U }, // URem
1887  }
1888  }, // i8
1889  { &X86::GR16RegClass, X86::AX, X86::DX, {
1890  { X86::IDIV16r, X86::CWD, Copy, X86::AX, S }, // SDiv
1891  { X86::IDIV16r, X86::CWD, Copy, X86::DX, S }, // SRem
1892  { X86::DIV16r, X86::MOV32r0, Copy, X86::AX, U }, // UDiv
1893  { X86::DIV16r, X86::MOV32r0, Copy, X86::DX, U }, // URem
1894  }
1895  }, // i16
1896  { &X86::GR32RegClass, X86::EAX, X86::EDX, {
1897  { X86::IDIV32r, X86::CDQ, Copy, X86::EAX, S }, // SDiv
1898  { X86::IDIV32r, X86::CDQ, Copy, X86::EDX, S }, // SRem
1899  { X86::DIV32r, X86::MOV32r0, Copy, X86::EAX, U }, // UDiv
1900  { X86::DIV32r, X86::MOV32r0, Copy, X86::EDX, U }, // URem
1901  }
1902  }, // i32
1903  { &X86::GR64RegClass, X86::RAX, X86::RDX, {
1904  { X86::IDIV64r, X86::CQO, Copy, X86::RAX, S }, // SDiv
1905  { X86::IDIV64r, X86::CQO, Copy, X86::RDX, S }, // SRem
1906  { X86::DIV64r, X86::MOV32r0, Copy, X86::RAX, U }, // UDiv
1907  { X86::DIV64r, X86::MOV32r0, Copy, X86::RDX, U }, // URem
1908  }
1909  }, // i64
1910  };
1911 
1912  MVT VT;
1913  if (!isTypeLegal(I->getType(), VT))
1914  return false;
1915 
1916  unsigned TypeIndex, OpIndex;
1917  switch (VT.SimpleTy) {
1918  default: return false;
1919  case MVT::i8: TypeIndex = 0; break;
1920  case MVT::i16: TypeIndex = 1; break;
1921  case MVT::i32: TypeIndex = 2; break;
1922  case MVT::i64: TypeIndex = 3;
1923  if (!Subtarget->is64Bit())
1924  return false;
1925  break;
1926  }
1927 
1928  switch (I->getOpcode()) {
1929  default: llvm_unreachable("Unexpected div/rem opcode");
1930  case Instruction::SDiv: OpIndex = 0; break;
1931  case Instruction::SRem: OpIndex = 1; break;
1932  case Instruction::UDiv: OpIndex = 2; break;
1933  case Instruction::URem: OpIndex = 3; break;
1934  }
1935 
1936  const DivRemEntry &TypeEntry = OpTable[TypeIndex];
1937  const DivRemEntry::DivRemResult &OpEntry = TypeEntry.ResultTable[OpIndex];
1938  unsigned Op0Reg = getRegForValue(I->getOperand(0));
1939  if (Op0Reg == 0)
1940  return false;
1941  unsigned Op1Reg = getRegForValue(I->getOperand(1));
1942  if (Op1Reg == 0)
1943  return false;
1944 
1945  // Move op0 into low-order input register.
1946  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1947  TII.get(OpEntry.OpCopy), TypeEntry.LowInReg).addReg(Op0Reg);
1948  // Zero-extend or sign-extend into high-order input register.
1949  if (OpEntry.OpSignExtend) {
1950  if (OpEntry.IsOpSigned)
1951  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1952  TII.get(OpEntry.OpSignExtend));
1953  else {
1954  unsigned Zero32 = createResultReg(&X86::GR32RegClass);
1955  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1956  TII.get(X86::MOV32r0), Zero32);
1957 
1958  // Copy the zero into the appropriate sub/super/identical physical
1959  // register. Unfortunately the operations needed are not uniform enough
1960  // to fit neatly into the table above.
1961  if (VT == MVT::i16) {
1962  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1963  TII.get(Copy), TypeEntry.HighInReg)
1964  .addReg(Zero32, 0, X86::sub_16bit);
1965  } else if (VT == MVT::i32) {
1966  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1967  TII.get(Copy), TypeEntry.HighInReg)
1968  .addReg(Zero32);
1969  } else if (VT == MVT::i64) {
1970  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1971  TII.get(TargetOpcode::SUBREG_TO_REG), TypeEntry.HighInReg)
1972  .addImm(0).addReg(Zero32).addImm(X86::sub_32bit);
1973  }
1974  }
1975  }
1976  // Generate the DIV/IDIV instruction.
1977  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1978  TII.get(OpEntry.OpDivRem)).addReg(Op1Reg);
1979  // For i8 remainder, we can't reference ah directly, as we'll end
1980  // up with bogus copies like %r9b = COPY %ah. Reference ax
1981  // instead to prevent ah references in a rex instruction.
1982  //
1983  // The current assumption of the fast register allocator is that isel
1984  // won't generate explicit references to the GR8_NOREX registers. If
1985  // the allocator and/or the backend get enhanced to be more robust in
1986  // that regard, this can be, and should be, removed.
1987  unsigned ResultReg = 0;
1988  if ((I->getOpcode() == Instruction::SRem ||
1989  I->getOpcode() == Instruction::URem) &&
1990  OpEntry.DivRemResultReg == X86::AH && Subtarget->is64Bit()) {
1991  unsigned SourceSuperReg = createResultReg(&X86::GR16RegClass);
1992  unsigned ResultSuperReg = createResultReg(&X86::GR16RegClass);
1993  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1994  TII.get(Copy), SourceSuperReg).addReg(X86::AX);
1995 
1996  // Shift AX right by 8 bits instead of using AH.
1997  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::SHR16ri),
1998  ResultSuperReg).addReg(SourceSuperReg).addImm(8);
1999 
2000  // Now reference the 8-bit subreg of the result.
2001  ResultReg = fastEmitInst_extractsubreg(MVT::i8, ResultSuperReg,
2002  /*Kill=*/true, X86::sub_8bit);
2003  }
2004  // Copy the result out of the physreg if we haven't already.
2005  if (!ResultReg) {
2006  ResultReg = createResultReg(TypeEntry.RC);
2007  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Copy), ResultReg)
2008  .addReg(OpEntry.DivRemResultReg);
2009  }
2010  updateValueMap(I, ResultReg);
2011 
2012  return true;
2013 }
2014 
2015 /// \brief Emit a conditional move instruction (if the are supported) to lower
2016 /// the select.
2017 bool X86FastISel::X86FastEmitCMoveSelect(MVT RetVT, const Instruction *I) {
2018  // Check if the subtarget supports these instructions.
2019  if (!Subtarget->hasCMov())
2020  return false;
2021 
2022  // FIXME: Add support for i8.
2023  if (RetVT < MVT::i16 || RetVT > MVT::i64)
2024  return false;
2025 
2026  const Value *Cond = I->getOperand(0);
2027  const TargetRegisterClass *RC = TLI.getRegClassFor(RetVT);
2028  bool NeedTest = true;
2030 
2031  // Optimize conditions coming from a compare if both instructions are in the
2032  // same basic block (values defined in other basic blocks may not have
2033  // initialized registers).
2034  const auto *CI = dyn_cast<CmpInst>(Cond);
2035  if (CI && (CI->getParent() == I->getParent())) {
2036  CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
2037 
2038  // FCMP_OEQ and FCMP_UNE cannot be checked with a single instruction.
2039  static const uint16_t SETFOpcTable[2][3] = {
2040  { X86::SETNPr, X86::SETEr , X86::TEST8rr },
2041  { X86::SETPr, X86::SETNEr, X86::OR8rr }
2042  };
2043  const uint16_t *SETFOpc = nullptr;
2044  switch (Predicate) {
2045  default: break;
2046  case CmpInst::FCMP_OEQ:
2047  SETFOpc = &SETFOpcTable[0][0];
2048  Predicate = CmpInst::ICMP_NE;
2049  break;
2050  case CmpInst::FCMP_UNE:
2051  SETFOpc = &SETFOpcTable[1][0];
2052  Predicate = CmpInst::ICMP_NE;
2053  break;
2054  }
2055 
2056  bool NeedSwap;
2057  std::tie(CC, NeedSwap) = X86::getX86ConditionCode(Predicate);
2058  assert(CC <= X86::LAST_VALID_COND && "Unexpected condition code.");
2059 
2060  const Value *CmpLHS = CI->getOperand(0);
2061  const Value *CmpRHS = CI->getOperand(1);
2062  if (NeedSwap)
2063  std::swap(CmpLHS, CmpRHS);
2064 
2065  EVT CmpVT = TLI.getValueType(DL, CmpLHS->getType());
2066  // Emit a compare of the LHS and RHS, setting the flags.
2067  if (!X86FastEmitCompare(CmpLHS, CmpRHS, CmpVT, CI->getDebugLoc()))
2068  return false;
2069 
2070  if (SETFOpc) {
2071  unsigned FlagReg1 = createResultReg(&X86::GR8RegClass);
2072  unsigned FlagReg2 = createResultReg(&X86::GR8RegClass);
2073  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(SETFOpc[0]),
2074  FlagReg1);
2075  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(SETFOpc[1]),
2076  FlagReg2);
2077  auto const &II = TII.get(SETFOpc[2]);
2078  if (II.getNumDefs()) {
2079  unsigned TmpReg = createResultReg(&X86::GR8RegClass);
2080  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, TmpReg)
2081  .addReg(FlagReg2).addReg(FlagReg1);
2082  } else {
2083  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
2084  .addReg(FlagReg2).addReg(FlagReg1);
2085  }
2086  }
2087  NeedTest = false;
2088  } else if (foldX86XALUIntrinsic(CC, I, Cond)) {
2089  // Fake request the condition, otherwise the intrinsic might be completely
2090  // optimized away.
2091  unsigned TmpReg = getRegForValue(Cond);
2092  if (TmpReg == 0)
2093  return false;
2094 
2095  NeedTest = false;
2096  }
2097 
2098  if (NeedTest) {
2099  // Selects operate on i1, however, CondReg is 8 bits width and may contain
2100  // garbage. Indeed, only the less significant bit is supposed to be
2101  // accurate. If we read more than the lsb, we may see non-zero values
2102  // whereas lsb is zero. Therefore, we have to truncate Op0Reg to i1 for
2103  // the select. This is achieved by performing TEST against 1.
2104  unsigned CondReg = getRegForValue(Cond);
2105  if (CondReg == 0)
2106  return false;
2107  bool CondIsKill = hasTrivialKill(Cond);
2108 
2109  // In case OpReg is a K register, COPY to a GPR
2110  if (MRI.getRegClass(CondReg) == &X86::VK1RegClass) {
2111  unsigned KCondReg = CondReg;
2112  CondReg = createResultReg(&X86::GR32RegClass);
2113  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2114  TII.get(TargetOpcode::COPY), CondReg)
2115  .addReg(KCondReg, getKillRegState(CondIsKill));
2116  CondReg = fastEmitInst_extractsubreg(MVT::i8, CondReg, /*Kill=*/true,
2117  X86::sub_8bit);
2118  }
2119  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::TEST8ri))
2120  .addReg(CondReg, getKillRegState(CondIsKill))
2121  .addImm(1);
2122  }
2123 
2124  const Value *LHS = I->getOperand(1);
2125  const Value *RHS = I->getOperand(2);
2126 
2127  unsigned RHSReg = getRegForValue(RHS);
2128  bool RHSIsKill = hasTrivialKill(RHS);
2129 
2130  unsigned LHSReg = getRegForValue(LHS);
2131  bool LHSIsKill = hasTrivialKill(LHS);
2132 
2133  if (!LHSReg || !RHSReg)
2134  return false;
2135 
2136  const TargetRegisterInfo &TRI = *Subtarget->getRegisterInfo();
2137  unsigned Opc = X86::getCMovFromCond(CC, TRI.getRegSizeInBits(*RC)/8);
2138  unsigned ResultReg = fastEmitInst_rr(Opc, RC, RHSReg, RHSIsKill,
2139  LHSReg, LHSIsKill);
2140  updateValueMap(I, ResultReg);
2141  return true;
2142 }
2143 
2144 /// \brief Emit SSE or AVX instructions to lower the select.
2145 ///
2146 /// Try to use SSE1/SSE2 instructions to simulate a select without branches.
2147 /// This lowers fp selects into a CMP/AND/ANDN/OR sequence when the necessary
2148 /// SSE instructions are available. If AVX is available, try to use a VBLENDV.
2149 bool X86FastISel::X86FastEmitSSESelect(MVT RetVT, const Instruction *I) {
2150  // Optimize conditions coming from a compare if both instructions are in the
2151  // same basic block (values defined in other basic blocks may not have
2152  // initialized registers).
2153  const auto *CI = dyn_cast<FCmpInst>(I->getOperand(0));
2154  if (!CI || (CI->getParent() != I->getParent()))
2155  return false;
2156 
2157  if (I->getType() != CI->getOperand(0)->getType() ||
2158  !((Subtarget->hasSSE1() && RetVT == MVT::f32) ||
2159  (Subtarget->hasSSE2() && RetVT == MVT::f64)))
2160  return false;
2161 
2162  const Value *CmpLHS = CI->getOperand(0);
2163  const Value *CmpRHS = CI->getOperand(1);
2164  CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
2165 
2166  // The optimizer might have replaced fcmp oeq %x, %x with fcmp ord %x, 0.0.
2167  // We don't have to materialize a zero constant for this case and can just use
2168  // %x again on the RHS.
2169  if (Predicate == CmpInst::FCMP_ORD || Predicate == CmpInst::FCMP_UNO) {
2170  const auto *CmpRHSC = dyn_cast<ConstantFP>(CmpRHS);
2171  if (CmpRHSC && CmpRHSC->isNullValue())
2172  CmpRHS = CmpLHS;
2173  }
2174 
2175  unsigned CC;
2176  bool NeedSwap;
2177  std::tie(CC, NeedSwap) = getX86SSEConditionCode(Predicate);
2178  if (CC > 7 && !Subtarget->hasAVX())
2179  return false;
2180 
2181  if (NeedSwap)
2182  std::swap(CmpLHS, CmpRHS);
2183 
2184  // Choose the SSE instruction sequence based on data type (float or double).
2185  static const uint16_t OpcTable[2][4] = {
2186  { X86::CMPSSrr, X86::ANDPSrr, X86::ANDNPSrr, X86::ORPSrr },
2187  { X86::CMPSDrr, X86::ANDPDrr, X86::ANDNPDrr, X86::ORPDrr }
2188  };
2189 
2190  const uint16_t *Opc = nullptr;
2191  switch (RetVT.SimpleTy) {
2192  default: return false;
2193  case MVT::f32: Opc = &OpcTable[0][0]; break;
2194  case MVT::f64: Opc = &OpcTable[1][0]; break;
2195  }
2196 
2197  const Value *LHS = I->getOperand(1);
2198  const Value *RHS = I->getOperand(2);
2199 
2200  unsigned LHSReg = getRegForValue(LHS);
2201  bool LHSIsKill = hasTrivialKill(LHS);
2202 
2203  unsigned RHSReg = getRegForValue(RHS);
2204  bool RHSIsKill = hasTrivialKill(RHS);
2205 
2206  unsigned CmpLHSReg = getRegForValue(CmpLHS);
2207  bool CmpLHSIsKill = hasTrivialKill(CmpLHS);
2208 
2209  unsigned CmpRHSReg = getRegForValue(CmpRHS);
2210  bool CmpRHSIsKill = hasTrivialKill(CmpRHS);
2211 
2212  if (!LHSReg || !RHSReg || !CmpLHS || !CmpRHS)
2213  return false;
2214 
2215  const TargetRegisterClass *RC = TLI.getRegClassFor(RetVT);
2216  unsigned ResultReg;
2217 
2218  if (Subtarget->hasAVX512()) {
2219  // If we have AVX512 we can use a mask compare and masked movss/sd.
2220  const TargetRegisterClass *VR128X = &X86::VR128XRegClass;
2221  const TargetRegisterClass *VK1 = &X86::VK1RegClass;
2222 
2223  unsigned CmpOpcode =
2224  (RetVT == MVT::f32) ? X86::VCMPSSZrr : X86::VCMPSDZrr;
2225  unsigned CmpReg = fastEmitInst_rri(CmpOpcode, VK1, CmpLHSReg, CmpLHSIsKill,
2226  CmpRHSReg, CmpRHSIsKill, CC);
2227 
2228  // Need an IMPLICIT_DEF for the input that is used to generate the upper
2229  // bits of the result register since its not based on any of the inputs.
2230  unsigned ImplicitDefReg = createResultReg(VR128X);
2231  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2232  TII.get(TargetOpcode::IMPLICIT_DEF), ImplicitDefReg);
2233 
2234  // Place RHSReg is the passthru of the masked movss/sd operation and put
2235  // LHS in the input. The mask input comes from the compare.
2236  unsigned MovOpcode =
2237  (RetVT == MVT::f32) ? X86::VMOVSSZrrk : X86::VMOVSDZrrk;
2238  unsigned MovReg = fastEmitInst_rrrr(MovOpcode, VR128X, RHSReg, RHSIsKill,
2239  CmpReg, true, ImplicitDefReg, true,
2240  LHSReg, LHSIsKill);
2241 
2242  ResultReg = createResultReg(RC);
2243  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2244  TII.get(TargetOpcode::COPY), ResultReg).addReg(MovReg);
2245 
2246  } else if (Subtarget->hasAVX()) {
2247  const TargetRegisterClass *VR128 = &X86::VR128RegClass;
2248 
2249  // If we have AVX, create 1 blendv instead of 3 logic instructions.
2250  // Blendv was introduced with SSE 4.1, but the 2 register form implicitly
2251  // uses XMM0 as the selection register. That may need just as many
2252  // instructions as the AND/ANDN/OR sequence due to register moves, so
2253  // don't bother.
2254  unsigned CmpOpcode =
2255  (RetVT == MVT::f32) ? X86::VCMPSSrr : X86::VCMPSDrr;
2256  unsigned BlendOpcode =
2257  (RetVT == MVT::f32) ? X86::VBLENDVPSrr : X86::VBLENDVPDrr;
2258 
2259  unsigned CmpReg = fastEmitInst_rri(CmpOpcode, RC, CmpLHSReg, CmpLHSIsKill,
2260  CmpRHSReg, CmpRHSIsKill, CC);
2261  unsigned VBlendReg = fastEmitInst_rrr(BlendOpcode, VR128, RHSReg, RHSIsKill,
2262  LHSReg, LHSIsKill, CmpReg, true);
2263  ResultReg = createResultReg(RC);
2264  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2265  TII.get(TargetOpcode::COPY), ResultReg).addReg(VBlendReg);
2266  } else {
2267  const TargetRegisterClass *VR128 = &X86::VR128RegClass;
2268  unsigned CmpReg = fastEmitInst_rri(Opc[0], RC, CmpLHSReg, CmpLHSIsKill,
2269  CmpRHSReg, CmpRHSIsKill, CC);
2270  unsigned AndReg = fastEmitInst_rr(Opc[1], VR128, CmpReg, /*IsKill=*/false,
2271  LHSReg, LHSIsKill);
2272  unsigned AndNReg = fastEmitInst_rr(Opc[2], VR128, CmpReg, /*IsKill=*/true,
2273  RHSReg, RHSIsKill);
2274  unsigned OrReg = fastEmitInst_rr(Opc[3], VR128, AndNReg, /*IsKill=*/true,
2275  AndReg, /*IsKill=*/true);
2276  ResultReg = createResultReg(RC);
2277  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2278  TII.get(TargetOpcode::COPY), ResultReg).addReg(OrReg);
2279  }
2280  updateValueMap(I, ResultReg);
2281  return true;
2282 }
2283 
2284 bool X86FastISel::X86FastEmitPseudoSelect(MVT RetVT, const Instruction *I) {
2285  // These are pseudo CMOV instructions and will be later expanded into control-
2286  // flow.
2287  unsigned Opc;
2288  switch (RetVT.SimpleTy) {
2289  default: return false;
2290  case MVT::i8: Opc = X86::CMOV_GR8; break;
2291  case MVT::i16: Opc = X86::CMOV_GR16; break;
2292  case MVT::i32: Opc = X86::CMOV_GR32; break;
2293  case MVT::f32: Opc = X86::CMOV_FR32; break;
2294  case MVT::f64: Opc = X86::CMOV_FR64; break;
2295  }
2296 
2297  const Value *Cond = I->getOperand(0);
2299 
2300  // Optimize conditions coming from a compare if both instructions are in the
2301  // same basic block (values defined in other basic blocks may not have
2302  // initialized registers).
2303  const auto *CI = dyn_cast<CmpInst>(Cond);
2304  if (CI && (CI->getParent() == I->getParent())) {
2305  bool NeedSwap;
2306  std::tie(CC, NeedSwap) = X86::getX86ConditionCode(CI->getPredicate());
2307  if (CC > X86::LAST_VALID_COND)
2308  return false;
2309 
2310  const Value *CmpLHS = CI->getOperand(0);
2311  const Value *CmpRHS = CI->getOperand(1);
2312 
2313  if (NeedSwap)
2314  std::swap(CmpLHS, CmpRHS);
2315 
2316  EVT CmpVT = TLI.getValueType(DL, CmpLHS->getType());
2317  if (!X86FastEmitCompare(CmpLHS, CmpRHS, CmpVT, CI->getDebugLoc()))
2318  return false;
2319  } else {
2320  unsigned CondReg = getRegForValue(Cond);
2321  if (CondReg == 0)
2322  return false;
2323  bool CondIsKill = hasTrivialKill(Cond);
2324 
2325  // In case OpReg is a K register, COPY to a GPR
2326  if (MRI.getRegClass(CondReg) == &X86::VK1RegClass) {
2327  unsigned KCondReg = CondReg;
2328  CondReg = createResultReg(&X86::GR32RegClass);
2329  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2330  TII.get(TargetOpcode::COPY), CondReg)
2331  .addReg(KCondReg, getKillRegState(CondIsKill));
2332  CondReg = fastEmitInst_extractsubreg(MVT::i8, CondReg, /*Kill=*/true,
2333  X86::sub_8bit);
2334  }
2335  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::TEST8ri))
2336  .addReg(CondReg, getKillRegState(CondIsKill))
2337  .addImm(1);
2338  }
2339 
2340  const Value *LHS = I->getOperand(1);
2341  const Value *RHS = I->getOperand(2);
2342 
2343  unsigned LHSReg = getRegForValue(LHS);
2344  bool LHSIsKill = hasTrivialKill(LHS);
2345 
2346  unsigned RHSReg = getRegForValue(RHS);
2347  bool RHSIsKill = hasTrivialKill(RHS);
2348 
2349  if (!LHSReg || !RHSReg)
2350  return false;
2351 
2352  const TargetRegisterClass *RC = TLI.getRegClassFor(RetVT);
2353 
2354  unsigned ResultReg =
2355  fastEmitInst_rri(Opc, RC, RHSReg, RHSIsKill, LHSReg, LHSIsKill, CC);
2356  updateValueMap(I, ResultReg);
2357  return true;
2358 }
2359 
2360 bool X86FastISel::X86SelectSelect(const Instruction *I) {
2361  MVT RetVT;
2362  if (!isTypeLegal(I->getType(), RetVT))
2363  return false;
2364 
2365  // Check if we can fold the select.
2366  if (const auto *CI = dyn_cast<CmpInst>(I->getOperand(0))) {
2367  CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
2368  const Value *Opnd = nullptr;
2369  switch (Predicate) {
2370  default: break;
2371  case CmpInst::FCMP_FALSE: Opnd = I->getOperand(2); break;
2372  case CmpInst::FCMP_TRUE: Opnd = I->getOperand(1); break;
2373  }
2374  // No need for a select anymore - this is an unconditional move.
2375  if (Opnd) {
2376  unsigned OpReg = getRegForValue(Opnd);
2377  if (OpReg == 0)
2378  return false;
2379  bool OpIsKill = hasTrivialKill(Opnd);
2380  const TargetRegisterClass *RC = TLI.getRegClassFor(RetVT);
2381  unsigned ResultReg = createResultReg(RC);
2382  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2383  TII.get(TargetOpcode::COPY), ResultReg)
2384  .addReg(OpReg, getKillRegState(OpIsKill));
2385  updateValueMap(I, ResultReg);
2386  return true;
2387  }
2388  }
2389 
2390  // First try to use real conditional move instructions.
2391  if (X86FastEmitCMoveSelect(RetVT, I))
2392  return true;
2393 
2394  // Try to use a sequence of SSE instructions to simulate a conditional move.
2395  if (X86FastEmitSSESelect(RetVT, I))
2396  return true;
2397 
2398  // Fall-back to pseudo conditional move instructions, which will be later
2399  // converted to control-flow.
2400  if (X86FastEmitPseudoSelect(RetVT, I))
2401  return true;
2402 
2403  return false;
2404 }
2405 
2406 bool X86FastISel::X86SelectSIToFP(const Instruction *I) {
2407  // The target-independent selection algorithm in FastISel already knows how
2408  // to select a SINT_TO_FP if the target is SSE but not AVX.
2409  // Early exit if the subtarget doesn't have AVX.
2410  if (!Subtarget->hasAVX())
2411  return false;
2412 
2413  Type *InTy = I->getOperand(0)->getType();
2414  if (!InTy->isIntegerTy(32) && !InTy->isIntegerTy(64))
2415  return false;
2416 
2417  // Select integer to float/double conversion.
2418  unsigned OpReg = getRegForValue(I->getOperand(0));
2419  if (OpReg == 0)
2420  return false;
2421 
2422  const TargetRegisterClass *RC = nullptr;
2423  unsigned Opcode;
2424 
2425  if (I->getType()->isDoubleTy()) {
2426  // sitofp int -> double
2427  Opcode = InTy->isIntegerTy(64) ? X86::VCVTSI642SDrr : X86::VCVTSI2SDrr;
2428  RC = &X86::FR64RegClass;
2429  } else if (I->getType()->isFloatTy()) {
2430  // sitofp int -> float
2431  Opcode = InTy->isIntegerTy(64) ? X86::VCVTSI642SSrr : X86::VCVTSI2SSrr;
2432  RC = &X86::FR32RegClass;
2433  } else
2434  return false;
2435 
2436  unsigned ImplicitDefReg = createResultReg(RC);
2437  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2438  TII.get(TargetOpcode::IMPLICIT_DEF), ImplicitDefReg);
2439  unsigned ResultReg =
2440  fastEmitInst_rr(Opcode, RC, ImplicitDefReg, true, OpReg, false);
2441  updateValueMap(I, ResultReg);
2442  return true;
2443 }
2444 
2445 // Helper method used by X86SelectFPExt and X86SelectFPTrunc.
2446 bool X86FastISel::X86SelectFPExtOrFPTrunc(const Instruction *I,
2447  unsigned TargetOpc,
2448  const TargetRegisterClass *RC) {
2449  assert((I->getOpcode() == Instruction::FPExt ||
2450  I->getOpcode() == Instruction::FPTrunc) &&
2451  "Instruction must be an FPExt or FPTrunc!");
2452 
2453  unsigned OpReg = getRegForValue(I->getOperand(0));
2454  if (OpReg == 0)
2455  return false;
2456 
2457  unsigned ImplicitDefReg;
2458  if (Subtarget->hasAVX()) {
2459  ImplicitDefReg = createResultReg(RC);
2460  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2461  TII.get(TargetOpcode::IMPLICIT_DEF), ImplicitDefReg);
2462 
2463  }
2464 
2465  unsigned ResultReg = createResultReg(RC);
2466  MachineInstrBuilder MIB;
2467  MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpc),
2468  ResultReg);
2469 
2470  if (Subtarget->hasAVX())
2471  MIB.addReg(ImplicitDefReg);
2472 
2473  MIB.addReg(OpReg);
2474  updateValueMap(I, ResultReg);
2475  return true;
2476 }
2477 
2478 bool X86FastISel::X86SelectFPExt(const Instruction *I) {
2479  if (X86ScalarSSEf64 && I->getType()->isDoubleTy() &&
2480  I->getOperand(0)->getType()->isFloatTy()) {
2481  bool HasAVX512 = Subtarget->hasAVX512();
2482  // fpext from float to double.
2483  unsigned Opc =
2484  HasAVX512 ? X86::VCVTSS2SDZrr
2485  : Subtarget->hasAVX() ? X86::VCVTSS2SDrr : X86::CVTSS2SDrr;
2486  return X86SelectFPExtOrFPTrunc(
2487  I, Opc, HasAVX512 ? &X86::FR64XRegClass : &X86::FR64RegClass);
2488  }
2489 
2490  return false;
2491 }
2492 
2493 bool X86FastISel::X86SelectFPTrunc(const Instruction *I) {
2494  if (X86ScalarSSEf64 && I->getType()->isFloatTy() &&
2495  I->getOperand(0)->getType()->isDoubleTy()) {
2496  bool HasAVX512 = Subtarget->hasAVX512();
2497  // fptrunc from double to float.
2498  unsigned Opc =
2499  HasAVX512 ? X86::VCVTSD2SSZrr
2500  : Subtarget->hasAVX() ? X86::VCVTSD2SSrr : X86::CVTSD2SSrr;
2501  return X86SelectFPExtOrFPTrunc(
2502  I, Opc, HasAVX512 ? &X86::FR32XRegClass : &X86::FR32RegClass);
2503  }
2504 
2505  return false;
2506 }
2507 
2508 bool X86FastISel::X86SelectTrunc(const Instruction *I) {
2509  EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType());
2510  EVT DstVT = TLI.getValueType(DL, I->getType());
2511 
2512  // This code only handles truncation to byte.
2513  if (DstVT != MVT::i8 && DstVT != MVT::i1)
2514  return false;
2515  if (!TLI.isTypeLegal(SrcVT))
2516  return false;
2517 
2518  unsigned InputReg = getRegForValue(I->getOperand(0));
2519  if (!InputReg)
2520  // Unhandled operand. Halt "fast" selection and bail.
2521  return false;
2522 
2523  if (SrcVT == MVT::i8) {
2524  // Truncate from i8 to i1; no code needed.
2525  updateValueMap(I, InputReg);
2526  return true;
2527  }
2528 
2529  // Issue an extract_subreg.
2530  unsigned ResultReg = fastEmitInst_extractsubreg(MVT::i8,
2531  InputReg, false,
2532  X86::sub_8bit);
2533  if (!ResultReg)
2534  return false;
2535 
2536  updateValueMap(I, ResultReg);
2537  return true;
2538 }
2539 
2540 bool X86FastISel::IsMemcpySmall(uint64_t Len) {
2541  return Len <= (Subtarget->is64Bit() ? 32 : 16);
2542 }
2543 
2544 bool X86FastISel::TryEmitSmallMemcpy(X86AddressMode DestAM,
2545  X86AddressMode SrcAM, uint64_t Len) {
2546 
2547  // Make sure we don't bloat code by inlining very large memcpy's.
2548  if (!IsMemcpySmall(Len))
2549  return false;
2550 
2551  bool i64Legal = Subtarget->is64Bit();
2552 
2553  // We don't care about alignment here since we just emit integer accesses.
2554  while (Len) {
2555  MVT VT;
2556  if (Len >= 8 && i64Legal)
2557  VT = MVT::i64;
2558  else if (Len >= 4)
2559  VT = MVT::i32;
2560  else if (Len >= 2)
2561  VT = MVT::i16;
2562  else
2563  VT = MVT::i8;
2564 
2565  unsigned Reg;
2566  bool RV = X86FastEmitLoad(VT, SrcAM, nullptr, Reg);
2567  RV &= X86FastEmitStore(VT, Reg, /*Kill=*/true, DestAM);
2568  assert(RV && "Failed to emit load or store??");
2569 
2570  unsigned Size = VT.getSizeInBits()/8;
2571  Len -= Size;
2572  DestAM.Disp += Size;
2573  SrcAM.Disp += Size;
2574  }
2575 
2576  return true;
2577 }
2578 
2579 bool X86FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
2580  // FIXME: Handle more intrinsics.
2581  switch (II->getIntrinsicID()) {
2582  default: return false;
2583  case Intrinsic::convert_from_fp16:
2584  case Intrinsic::convert_to_fp16: {
2585  if (Subtarget->useSoftFloat() || !Subtarget->hasF16C())
2586  return false;
2587 
2588  const Value *Op = II->getArgOperand(0);
2589  unsigned InputReg = getRegForValue(Op);
2590  if (InputReg == 0)
2591  return false;
2592 
2593  // F16C only allows converting from float to half and from half to float.
2594  bool IsFloatToHalf = II->getIntrinsicID() == Intrinsic::convert_to_fp16;
2595  if (IsFloatToHalf) {
2596  if (!Op->getType()->isFloatTy())
2597  return false;
2598  } else {
2599  if (!II->getType()->isFloatTy())
2600  return false;
2601  }
2602 
2603  unsigned ResultReg = 0;
2604  const TargetRegisterClass *RC = TLI.getRegClassFor(MVT::v8i16);
2605  if (IsFloatToHalf) {
2606  // 'InputReg' is implicitly promoted from register class FR32 to
2607  // register class VR128 by method 'constrainOperandRegClass' which is
2608  // directly called by 'fastEmitInst_ri'.
2609  // Instruction VCVTPS2PHrr takes an extra immediate operand which is
2610  // used to provide rounding control: use MXCSR.RC, encoded as 0b100.
2611  // It's consistent with the other FP instructions, which are usually
2612  // controlled by MXCSR.
2613  InputReg = fastEmitInst_ri(X86::VCVTPS2PHrr, RC, InputReg, false, 4);
2614 
2615  // Move the lower 32-bits of ResultReg to another register of class GR32.
2616  ResultReg = createResultReg(&X86::GR32RegClass);
2617  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2618  TII.get(X86::VMOVPDI2DIrr), ResultReg)
2619  .addReg(InputReg, RegState::Kill);
2620 
2621  // The result value is in the lower 16-bits of ResultReg.
2622  unsigned RegIdx = X86::sub_16bit;
2623  ResultReg = fastEmitInst_extractsubreg(MVT::i16, ResultReg, true, RegIdx);
2624  } else {
2625  assert(Op->getType()->isIntegerTy(16) && "Expected a 16-bit integer!");
2626  // Explicitly sign-extend the input to 32-bit.
2627  InputReg = fastEmit_r(MVT::i16, MVT::i32, ISD::SIGN_EXTEND, InputReg,
2628  /*Kill=*/false);
2629 
2630  // The following SCALAR_TO_VECTOR will be expanded into a VMOVDI2PDIrr.
2631  InputReg = fastEmit_r(MVT::i32, MVT::v4i32, ISD::SCALAR_TO_VECTOR,
2632  InputReg, /*Kill=*/true);
2633 
2634  InputReg = fastEmitInst_r(X86::VCVTPH2PSrr, RC, InputReg, /*Kill=*/true);
2635 
2636  // The result value is in the lower 32-bits of ResultReg.
2637  // Emit an explicit copy from register class VR128 to register class FR32.
2638  ResultReg = createResultReg(&X86::FR32RegClass);
2639  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2640  TII.get(TargetOpcode::COPY), ResultReg)
2641  .addReg(InputReg, RegState::Kill);
2642  }
2643 
2644  updateValueMap(II, ResultReg);
2645  return true;
2646  }
2647  case Intrinsic::frameaddress: {
2648  MachineFunction *MF = FuncInfo.MF;
2649  if (MF->getTarget().getMCAsmInfo()->usesWindowsCFI())
2650  return false;
2651 
2652  Type *RetTy = II->getCalledFunction()->getReturnType();
2653 
2654  MVT VT;
2655  if (!isTypeLegal(RetTy, VT))
2656  return false;
2657 
2658  unsigned Opc;
2659  const TargetRegisterClass *RC = nullptr;
2660 
2661  switch (VT.SimpleTy) {
2662  default: llvm_unreachable("Invalid result type for frameaddress.");
2663  case MVT::i32: Opc = X86::MOV32rm; RC = &X86::GR32RegClass; break;
2664  case MVT::i64: Opc = X86::MOV64rm; RC = &X86::GR64RegClass; break;
2665  }
2666 
2667  // This needs to be set before we call getPtrSizedFrameRegister, otherwise
2668  // we get the wrong frame register.
2669  MachineFrameInfo &MFI = MF->getFrameInfo();
2670  MFI.setFrameAddressIsTaken(true);
2671 
2672  const X86RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
2673  unsigned FrameReg = RegInfo->getPtrSizedFrameRegister(*MF);
2674  assert(((FrameReg == X86::RBP && VT == MVT::i64) ||
2675  (FrameReg == X86::EBP && VT == MVT::i32)) &&
2676  "Invalid Frame Register!");
2677 
2678  // Always make a copy of the frame register to a vreg first, so that we
2679  // never directly reference the frame register (the TwoAddressInstruction-
2680  // Pass doesn't like that).
2681  unsigned SrcReg = createResultReg(RC);
2682  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2683  TII.get(TargetOpcode::COPY), SrcReg).addReg(FrameReg);
2684 
2685  // Now recursively load from the frame address.
2686  // movq (%rbp), %rax
2687  // movq (%rax), %rax
2688  // movq (%rax), %rax
2689  // ...
2690  unsigned DestReg;
2691  unsigned Depth = cast<ConstantInt>(II->getOperand(0))->getZExtValue();
2692  while (Depth--) {
2693  DestReg = createResultReg(RC);
2694  addDirectMem(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2695  TII.get(Opc), DestReg), SrcReg);
2696  SrcReg = DestReg;
2697  }
2698 
2699  updateValueMap(II, SrcReg);
2700  return true;
2701  }
2702  case Intrinsic::memcpy: {
2703  const MemCpyInst *MCI = cast<MemCpyInst>(II);
2704  // Don't handle volatile or variable length memcpys.
2705  if (MCI->isVolatile())
2706  return false;
2707 
2708  if (isa<ConstantInt>(MCI->getLength())) {
2709  // Small memcpy's are common enough that we want to do them
2710  // without a call if possible.
2711  uint64_t Len = cast<ConstantInt>(MCI->getLength())->getZExtValue();
2712  if (IsMemcpySmall(Len)) {
2713  X86AddressMode DestAM, SrcAM;
2714  if (!X86SelectAddress(MCI->getRawDest(), DestAM) ||
2715  !X86SelectAddress(MCI->getRawSource(), SrcAM))
2716  return false;
2717  TryEmitSmallMemcpy(DestAM, SrcAM, Len);
2718  return true;
2719  }
2720  }
2721 
2722  unsigned SizeWidth = Subtarget->is64Bit() ? 64 : 32;
2723  if (!MCI->getLength()->getType()->isIntegerTy(SizeWidth))
2724  return false;
2725 
2726  if (MCI->getSourceAddressSpace() > 255 || MCI->getDestAddressSpace() > 255)
2727  return false;
2728 
2729  return lowerCallTo(II, "memcpy", II->getNumArgOperands() - 1);
2730  }
2731  case Intrinsic::memset: {
2732  const MemSetInst *MSI = cast<MemSetInst>(II);
2733 
2734  if (MSI->isVolatile())
2735  return false;
2736 
2737  unsigned SizeWidth = Subtarget->is64Bit() ? 64 : 32;
2738  if (!MSI->getLength()->getType()->isIntegerTy(SizeWidth))
2739  return false;
2740 
2741  if (MSI->getDestAddressSpace() > 255)
2742  return false;
2743 
2744  return lowerCallTo(II, "memset", II->getNumArgOperands() - 1);
2745  }
2746  case Intrinsic::stackprotector: {
2747  // Emit code to store the stack guard onto the stack.
2748  EVT PtrTy = TLI.getPointerTy(DL);
2749 
2750  const Value *Op1 = II->getArgOperand(0); // The guard's value.
2751  const AllocaInst *Slot = cast<AllocaInst>(II->getArgOperand(1));
2752 
2753  MFI.setStackProtectorIndex(FuncInfo.StaticAllocaMap[Slot]);
2754 
2755  // Grab the frame index.
2756  X86AddressMode AM;
2757  if (!X86SelectAddress(Slot, AM)) return false;
2758  if (!X86FastEmitStore(PtrTy, Op1, AM)) return false;
2759  return true;
2760  }
2761  case Intrinsic::dbg_declare: {
2762  const DbgDeclareInst *DI = cast<DbgDeclareInst>(II);
2763  X86AddressMode AM;
2764  assert(DI->getAddress() && "Null address should be checked earlier!");
2765  if (!X86SelectAddress(DI->getAddress(), AM))
2766  return false;
2767  const MCInstrDesc &II = TII.get(TargetOpcode::DBG_VALUE);
2768  // FIXME may need to add RegState::Debug to any registers produced,
2769  // although ESP/EBP should be the only ones at the moment.
2771  "Expected inlined-at fields to agree");
2772  addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II), AM)
2773  .addImm(0)
2774  .addMetadata(DI->getVariable())
2775  .addMetadata(DI->getExpression());
2776  return true;
2777  }
2778  case Intrinsic::trap: {
2779  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::TRAP));
2780  return true;
2781  }
2782  case Intrinsic::sqrt: {
2783  if (!Subtarget->hasSSE1())
2784  return false;
2785 
2786  Type *RetTy = II->getCalledFunction()->getReturnType();
2787 
2788  MVT VT;
2789  if (!isTypeLegal(RetTy, VT))
2790  return false;
2791 
2792  // Unfortunately we can't use fastEmit_r, because the AVX version of FSQRT
2793  // is not generated by FastISel yet.
2794  // FIXME: Update this code once tablegen can handle it.
2795  static const uint16_t SqrtOpc[2][2] = {
2796  {X86::SQRTSSr, X86::VSQRTSSr},
2797  {X86::SQRTSDr, X86::VSQRTSDr}
2798  };
2799  bool HasAVX = Subtarget->hasAVX();
2800  unsigned Opc;
2801  const TargetRegisterClass *RC;
2802  switch (VT.SimpleTy) {
2803  default: return false;
2804  case MVT::f32: Opc = SqrtOpc[0][HasAVX]; RC = &X86::FR32RegClass; break;
2805  case MVT::f64: Opc = SqrtOpc[1][HasAVX]; RC = &X86::FR64RegClass; break;
2806  }
2807 
2808  const Value *SrcVal = II->getArgOperand(0);
2809  unsigned SrcReg = getRegForValue(SrcVal);
2810 
2811  if (SrcReg == 0)
2812  return false;
2813 
2814  unsigned ImplicitDefReg = 0;
2815  if (HasAVX) {
2816  ImplicitDefReg = createResultReg(RC);
2817  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2818  TII.get(TargetOpcode::IMPLICIT_DEF), ImplicitDefReg);
2819  }
2820 
2821  unsigned ResultReg = createResultReg(RC);
2822  MachineInstrBuilder MIB;
2823  MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc),
2824  ResultReg);
2825 
2826  if (ImplicitDefReg)
2827  MIB.addReg(ImplicitDefReg);
2828 
2829  MIB.addReg(SrcReg);
2830 
2831  updateValueMap(II, ResultReg);
2832  return true;
2833  }
2834  case Intrinsic::sadd_with_overflow:
2835  case Intrinsic::uadd_with_overflow:
2836  case Intrinsic::ssub_with_overflow:
2837  case Intrinsic::usub_with_overflow:
2838  case Intrinsic::smul_with_overflow:
2839  case Intrinsic::umul_with_overflow: {
2840  // This implements the basic lowering of the xalu with overflow intrinsics
2841  // into add/sub/mul followed by either seto or setb.
2842  const Function *Callee = II->getCalledFunction();
2843  auto *Ty = cast<StructType>(Callee->getReturnType());
2844  Type *RetTy = Ty->getTypeAtIndex(0U);
2845  assert(Ty->getTypeAtIndex(1)->isIntegerTy() &&
2846  Ty->getTypeAtIndex(1)->getScalarSizeInBits() == 1 &&
2847  "Overflow value expected to be an i1");
2848 
2849  MVT VT;
2850  if (!isTypeLegal(RetTy, VT))
2851  return false;
2852 
2853  if (VT < MVT::i8 || VT > MVT::i64)
2854  return false;
2855 
2856  const Value *LHS = II->getArgOperand(0);
2857  const Value *RHS = II->getArgOperand(1);
2858 
2859  // Canonicalize immediate to the RHS.
2860  if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS) &&
2861  isCommutativeIntrinsic(II))
2862  std::swap(LHS, RHS);
2863 
2864  bool UseIncDec = false;
2865  if (isa<ConstantInt>(RHS) && cast<ConstantInt>(RHS)->isOne())
2866  UseIncDec = true;
2867 
2868  unsigned BaseOpc, CondOpc;
2869  switch (II->getIntrinsicID()) {
2870  default: llvm_unreachable("Unexpected intrinsic!");
2871  case Intrinsic::sadd_with_overflow:
2872  BaseOpc = UseIncDec ? unsigned(X86ISD::INC) : unsigned(ISD::ADD);
2873  CondOpc = X86::SETOr;
2874  break;
2875  case Intrinsic::uadd_with_overflow:
2876  BaseOpc = ISD::ADD; CondOpc = X86::SETBr; break;
2877  case Intrinsic::ssub_with_overflow:
2878  BaseOpc = UseIncDec ? unsigned(X86ISD::DEC) : unsigned(ISD::SUB);
2879  CondOpc = X86::SETOr;
2880  break;
2881  case Intrinsic::usub_with_overflow:
2882  BaseOpc = ISD::SUB; CondOpc = X86::SETBr; break;
2883  case Intrinsic::smul_with_overflow:
2884  BaseOpc = X86ISD::SMUL; CondOpc = X86::SETOr; break;
2885  case Intrinsic::umul_with_overflow:
2886  BaseOpc = X86ISD::UMUL; CondOpc = X86::SETOr; break;
2887  }
2888 
2889  unsigned LHSReg = getRegForValue(LHS);
2890  if (LHSReg == 0)
2891  return false;
2892  bool LHSIsKill = hasTrivialKill(LHS);
2893 
2894  unsigned ResultReg = 0;
2895  // Check if we have an immediate version.
2896  if (const auto *CI = dyn_cast<ConstantInt>(RHS)) {
2897  static const uint16_t Opc[2][4] = {
2898  { X86::INC8r, X86::INC16r, X86::INC32r, X86::INC64r },
2899  { X86::DEC8r, X86::DEC16r, X86::DEC32r, X86::DEC64r }
2900  };
2901 
2902  if (BaseOpc == X86ISD::INC || BaseOpc == X86ISD::DEC) {
2903  ResultReg = createResultReg(TLI.getRegClassFor(VT));
2904  bool IsDec = BaseOpc == X86ISD::DEC;
2905  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2906  TII.get(Opc[IsDec][VT.SimpleTy-MVT::i8]), ResultReg)
2907  .addReg(LHSReg, getKillRegState(LHSIsKill));
2908  } else
2909  ResultReg = fastEmit_ri(VT, VT, BaseOpc, LHSReg, LHSIsKill,
2910  CI->getZExtValue());
2911  }
2912 
2913  unsigned RHSReg;
2914  bool RHSIsKill;
2915  if (!ResultReg) {
2916  RHSReg = getRegForValue(RHS);
2917  if (RHSReg == 0)
2918  return false;
2919  RHSIsKill = hasTrivialKill(RHS);
2920  ResultReg = fastEmit_rr(VT, VT, BaseOpc, LHSReg, LHSIsKill, RHSReg,
2921  RHSIsKill);
2922  }
2923 
2924  // FastISel doesn't have a pattern for all X86::MUL*r and X86::IMUL*r. Emit
2925  // it manually.
2926  if (BaseOpc == X86ISD::UMUL && !ResultReg) {
2927  static const uint16_t MULOpc[] =
2928  { X86::MUL8r, X86::MUL16r, X86::MUL32r, X86::MUL64r };
2929  static const MCPhysReg Reg[] = { X86::AL, X86::AX, X86::EAX, X86::RAX };
2930  // First copy the first operand into RAX, which is an implicit input to
2931  // the X86::MUL*r instruction.
2932  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2933  TII.get(TargetOpcode::COPY), Reg[VT.SimpleTy-MVT::i8])
2934  .addReg(LHSReg, getKillRegState(LHSIsKill));
2935  ResultReg = fastEmitInst_r(MULOpc[VT.SimpleTy-MVT::i8],
2936  TLI.getRegClassFor(VT), RHSReg, RHSIsKill);
2937  } else if (BaseOpc == X86ISD::SMUL && !ResultReg) {
2938  static const uint16_t MULOpc[] =
2939  { X86::IMUL8r, X86::IMUL16rr, X86::IMUL32rr, X86::IMUL64rr };
2940  if (VT == MVT::i8) {
2941  // Copy the first operand into AL, which is an implicit input to the
2942  // X86::IMUL8r instruction.
2943  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2944  TII.get(TargetOpcode::COPY), X86::AL)
2945  .addReg(LHSReg, getKillRegState(LHSIsKill));
2946  ResultReg = fastEmitInst_r(MULOpc[0], TLI.getRegClassFor(VT), RHSReg,
2947  RHSIsKill);
2948  } else
2949  ResultReg = fastEmitInst_rr(MULOpc[VT.SimpleTy-MVT::i8],
2950  TLI.getRegClassFor(VT), LHSReg, LHSIsKill,
2951  RHSReg, RHSIsKill);
2952  }
2953 
2954  if (!ResultReg)
2955  return false;
2956 
2957  // Assign to a GPR since the overflow return value is lowered to a SETcc.
2958  unsigned ResultReg2 = createResultReg(&X86::GR8RegClass);
2959  assert((ResultReg+1) == ResultReg2 && "Nonconsecutive result registers.");
2960  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CondOpc),
2961  ResultReg2);
2962 
2963  updateValueMap(II, ResultReg, 2);
2964  return true;
2965  }
2966  case Intrinsic::x86_sse_cvttss2si:
2967  case Intrinsic::x86_sse_cvttss2si64:
2968  case Intrinsic::x86_sse2_cvttsd2si:
2969  case Intrinsic::x86_sse2_cvttsd2si64: {
2970  bool IsInputDouble;
2971  switch (II->getIntrinsicID()) {
2972  default: llvm_unreachable("Unexpected intrinsic.");
2973  case Intrinsic::x86_sse_cvttss2si:
2974  case Intrinsic::x86_sse_cvttss2si64:
2975  if (!Subtarget->hasSSE1())
2976  return false;
2977  IsInputDouble = false;
2978  break;
2979  case Intrinsic::x86_sse2_cvttsd2si:
2980  case Intrinsic::x86_sse2_cvttsd2si64:
2981  if (!Subtarget->hasSSE2())
2982  return false;
2983  IsInputDouble = true;
2984  break;
2985  }
2986 
2987  Type *RetTy = II->getCalledFunction()->getReturnType();
2988  MVT VT;
2989  if (!isTypeLegal(RetTy, VT))
2990  return false;
2991 
2992  static const uint16_t CvtOpc[2][2][2] = {
2993  { { X86::CVTTSS2SIrr, X86::VCVTTSS2SIrr },
2994  { X86::CVTTSS2SI64rr, X86::VCVTTSS2SI64rr } },
2995  { { X86::CVTTSD2SIrr, X86::VCVTTSD2SIrr },
2996  { X86::CVTTSD2SI64rr, X86::VCVTTSD2SI64rr } }
2997  };
2998  bool HasAVX = Subtarget->hasAVX();
2999  unsigned Opc;
3000  switch (VT.SimpleTy) {
3001  default: llvm_unreachable("Unexpected result type.");
3002  case MVT::i32: Opc = CvtOpc[IsInputDouble][0][HasAVX]; break;
3003  case MVT::i64: Opc = CvtOpc[IsInputDouble][1][HasAVX]; break;
3004  }
3005 
3006  // Check if we can fold insertelement instructions into the convert.
3007  const Value *Op = II->getArgOperand(0);
3008  while (auto *IE = dyn_cast<InsertElementInst>(Op)) {
3009  const Value *Index = IE->getOperand(2);
3010  if (!isa<ConstantInt>(Index))
3011  break;
3012  unsigned Idx = cast<ConstantInt>(Index)->getZExtValue();
3013 
3014  if (Idx == 0) {
3015  Op = IE->getOperand(1);
3016  break;
3017  }
3018  Op = IE->getOperand(0);
3019  }
3020 
3021  unsigned Reg = getRegForValue(Op);
3022  if (Reg == 0)
3023  return false;
3024 
3025  unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
3026  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
3027  .addReg(Reg);
3028 
3029  updateValueMap(II, ResultReg);
3030  return true;
3031  }
3032  }
3033 }
3034 
3035 bool X86FastISel::fastLowerArguments() {
3036  if (!FuncInfo.CanLowerReturn)
3037  return false;
3038 
3039  const Function *F = FuncInfo.Fn;
3040  if (F->isVarArg())
3041  return false;
3042 
3043  CallingConv::ID CC = F->getCallingConv();
3044  if (CC != CallingConv::C)
3045  return false;
3046 
3047  if (Subtarget->isCallingConvWin64(CC))
3048  return false;
3049 
3050  if (!Subtarget->is64Bit())
3051  return false;
3052 
3053  if (Subtarget->useSoftFloat())
3054  return false;
3055 
3056  // Only handle simple cases. i.e. Up to 6 i32/i64 scalar arguments.
3057  unsigned GPRCnt = 0;
3058  unsigned FPRCnt = 0;
3059  for (auto const &Arg : F->args()) {
3060  if (Arg.hasAttribute(Attribute::ByVal) ||
3061  Arg.hasAttribute(Attribute::InReg) ||
3062  Arg.hasAttribute(Attribute::StructRet) ||
3063  Arg.hasAttribute(Attribute::SwiftSelf) ||
3064  Arg.hasAttribute(Attribute::SwiftError) ||
3065  Arg.hasAttribute(Attribute::Nest))
3066  return false;
3067 
3068  Type *ArgTy = Arg.getType();
3069  if (ArgTy->isStructTy() || ArgTy->isArrayTy() || ArgTy->isVectorTy())
3070  return false;
3071 
3072  EVT ArgVT = TLI.getValueType(DL, ArgTy);
3073  if (!ArgVT.isSimple()) return false;
3074  switch (ArgVT.getSimpleVT().SimpleTy) {
3075  default: return false;
3076  case MVT::i32:
3077  case MVT::i64:
3078  ++GPRCnt;
3079  break;
3080  case MVT::f32:
3081  case MVT::f64:
3082  if (!Subtarget->hasSSE1())
3083  return false;
3084  ++FPRCnt;
3085  break;
3086  }
3087 
3088  if (GPRCnt > 6)
3089  return false;
3090 
3091  if (FPRCnt > 8)
3092  return false;
3093  }
3094 
3095  static const MCPhysReg GPR32ArgRegs[] = {
3096  X86::EDI, X86::ESI, X86::EDX, X86::ECX, X86::R8D, X86::R9D
3097  };
3098  static const MCPhysReg GPR64ArgRegs[] = {
3099  X86::RDI, X86::RSI, X86::RDX, X86::RCX, X86::R8 , X86::R9
3100  };
3101  static const MCPhysReg XMMArgRegs[] = {
3102  X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
3103  X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
3104  };
3105 
3106  unsigned GPRIdx = 0;
3107  unsigned FPRIdx = 0;
3108  for (auto const &Arg : F->args()) {
3109  MVT VT = TLI.getSimpleValueType(DL, Arg.getType());
3110  const TargetRegisterClass *RC = TLI.getRegClassFor(VT);
3111  unsigned SrcReg;
3112  switch (VT.SimpleTy) {
3113  default: llvm_unreachable("Unexpected value type.");
3114  case MVT::i32: SrcReg = GPR32ArgRegs[GPRIdx++]; break;
3115  case MVT::i64: SrcReg = GPR64ArgRegs[GPRIdx++]; break;
3116  case MVT::f32: LLVM_FALLTHROUGH;
3117  case MVT::f64: SrcReg = XMMArgRegs[FPRIdx++]; break;
3118  }
3119  unsigned DstReg = FuncInfo.MF->addLiveIn(SrcReg, RC);
3120  // FIXME: Unfortunately it's necessary to emit a copy from the livein copy.
3121  // Without this, EmitLiveInCopies may eliminate the livein if its only
3122  // use is a bitcast (which isn't turned into an instruction).
3123  unsigned ResultReg = createResultReg(RC);
3124  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3125  TII.get(TargetOpcode::COPY), ResultReg)
3126  .addReg(DstReg, getKillRegState(true));
3127  updateValueMap(&Arg, ResultReg);
3128  }
3129  return true;
3130 }
3131 
3132 static unsigned computeBytesPoppedByCalleeForSRet(const X86Subtarget *Subtarget,
3133  CallingConv::ID CC,
3134  ImmutableCallSite *CS) {
3135  if (Subtarget->is64Bit())
3136  return 0;
3137  if (Subtarget->getTargetTriple().isOSMSVCRT())
3138  return 0;
3139  if (CC == CallingConv::Fast || CC == CallingConv::GHC ||
3140  CC == CallingConv::HiPE)
3141  return 0;
3142 
3143  if (CS)
3144  if (CS->arg_empty() || !CS->paramHasAttr(0, Attribute::StructRet) ||
3145  CS->paramHasAttr(0, Attribute::InReg) || Subtarget->isTargetMCU())
3146  return 0;
3147 
3148  return 4;
3149 }
3150 
3151 bool X86FastISel::fastLowerCall(CallLoweringInfo &CLI) {
3152  auto &OutVals = CLI.OutVals;
3153  auto &OutFlags = CLI.OutFlags;
3154  auto &OutRegs = CLI.OutRegs;
3155  auto &Ins = CLI.Ins;
3156  auto &InRegs = CLI.InRegs;
3157  CallingConv::ID CC = CLI.CallConv;
3158  bool &IsTailCall = CLI.IsTailCall;
3159  bool IsVarArg = CLI.IsVarArg;
3160  const Value *Callee = CLI.Callee;
3161  MCSymbol *Symbol = CLI.Symbol;
3162 
3163  bool Is64Bit = Subtarget->is64Bit();
3164  bool IsWin64 = Subtarget->isCallingConvWin64(CC);
3165 
3166  const CallInst *CI =
3167  CLI.CS ? dyn_cast<CallInst>(CLI.CS->getInstruction()) : nullptr;
3168  const Function *CalledFn = CI ? CI->getCalledFunction() : nullptr;
3169 
3170  // Functions with no_caller_saved_registers that need special handling.
3171  if ((CI && CI->hasFnAttr("no_caller_saved_registers")) ||
3172  (CalledFn && CalledFn->hasFnAttribute("no_caller_saved_registers")))
3173  return false;
3174 
3175  // Functions using retpoline should use SDISel for calls.
3176  if (Subtarget->useRetpoline())
3177  return false;
3178 
3179  // Handle only C, fastcc, and webkit_js calling conventions for now.
3180  switch (CC) {
3181  default: return false;
3182  case CallingConv::C:
3183  case CallingConv::Fast:
3185  case CallingConv::Swift:
3189  case CallingConv::Win64:
3191  break;
3192  }
3193 
3194  // Allow SelectionDAG isel to handle tail calls.
3195  if (IsTailCall)
3196  return false;
3197 
3198  // fastcc with -tailcallopt is intended to provide a guaranteed
3199  // tail call optimization. Fastisel doesn't know how to do that.
3200  if (CC == CallingConv::Fast && TM.Options.GuaranteedTailCallOpt)
3201  return false;
3202 
3203  // Don't know how to handle Win64 varargs yet. Nothing special needed for
3204  // x86-32. Special handling for x86-64 is implemented.
3205  if (IsVarArg && IsWin64)
3206  return false;
3207 
3208  // Don't know about inalloca yet.
3209  if (CLI.CS && CLI.CS->hasInAllocaArgument())
3210  return false;
3211 
3212  for (auto Flag : CLI.OutFlags)
3213  if (Flag.isSwiftError())
3214  return false;
3215 
3216  SmallVector<MVT, 16> OutVTs;
3217  SmallVector<unsigned, 16> ArgRegs;
3218 
3219  // If this is a constant i1/i8/i16 argument, promote to i32 to avoid an extra
3220  // instruction. This is safe because it is common to all FastISel supported
3221  // calling conventions on x86.
3222  for (int i = 0, e = OutVals.size(); i != e; ++i) {
3223  Value *&Val = OutVals[i];
3224  ISD::ArgFlagsTy Flags = OutFlags[i];
3225  if (auto *CI = dyn_cast<ConstantInt>(Val)) {
3226  if (CI->getBitWidth() < 32) {
3227  if (Flags.isSExt())
3229  else
3231  }
3232  }
3233 
3234  // Passing bools around ends up doing a trunc to i1 and passing it.
3235  // Codegen this as an argument + "and 1".
3236  MVT VT;
3237  auto *TI = dyn_cast<TruncInst>(Val);
3238  unsigned ResultReg;
3239  if (TI && TI->getType()->isIntegerTy(1) && CLI.CS &&
3240  (TI->getParent() == CLI.CS->getInstruction()->getParent()) &&
3241  TI->hasOneUse()) {
3242  Value *PrevVal = TI->getOperand(0);
3243  ResultReg = getRegForValue(PrevVal);
3244 
3245  if (!ResultReg)
3246  return false;
3247 
3248  if (!isTypeLegal(PrevVal->getType(), VT))
3249  return false;
3250 
3251  ResultReg =
3252  fastEmit_ri(VT, VT, ISD::AND, ResultReg, hasTrivialKill(PrevVal), 1);
3253  } else {
3254  if (!isTypeLegal(Val->getType(), VT))
3255  return false;
3256  ResultReg = getRegForValue(Val);
3257  }
3258 
3259  if (!ResultReg)
3260  return false;
3261 
3262  ArgRegs.push_back(ResultReg);
3263  OutVTs.push_back(VT);
3264  }
3265 
3266  // Analyze operands of the call, assigning locations to each operand.
3268  CCState CCInfo(CC, IsVarArg, *FuncInfo.MF, ArgLocs, CLI.RetTy->getContext());
3269 
3270  // Allocate shadow area for Win64
3271  if (IsWin64)
3272  CCInfo.AllocateStack(32, 8);
3273 
3274  CCInfo.AnalyzeCallOperands(OutVTs, OutFlags, CC_X86);
3275 
3276  // Get a count of how many bytes are to be pushed on the stack.
3277  unsigned NumBytes = CCInfo.getAlignedCallFrameSize();
3278 
3279  // Issue CALLSEQ_START
3280  unsigned AdjStackDown = TII.getCallFrameSetupOpcode();
3281  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackDown))
3282  .addImm(NumBytes).addImm(0).addImm(0);
3283 
3284  // Walk the register/memloc assignments, inserting copies/loads.
3285  const X86RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
3286  for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
3287  CCValAssign const &VA = ArgLocs[i];
3288  const Value *ArgVal = OutVals[VA.getValNo()];
3289  MVT ArgVT = OutVTs[VA.getValNo()];
3290 
3291  if (ArgVT == MVT::x86mmx)
3292  return false;
3293 
3294  unsigned ArgReg = ArgRegs[VA.getValNo()];
3295 
3296  // Promote the value if needed.
3297  switch (VA.getLocInfo()) {
3298  case CCValAssign::Full: break;
3299  case CCValAssign::SExt: {
3300  assert(VA.getLocVT().isInteger() && !VA.getLocVT().isVector() &&
3301  "Unexpected extend");
3302 
3303  if (ArgVT == MVT::i1)
3304  return false;
3305 
3306  bool Emitted = X86FastEmitExtend(ISD::SIGN_EXTEND, VA.getLocVT(), ArgReg,
3307  ArgVT, ArgReg);
3308  assert(Emitted && "Failed to emit a sext!"); (void)Emitted;
3309  ArgVT = VA.getLocVT();
3310  break;
3311  }
3312  case CCValAssign::ZExt: {
3313  assert(VA.getLocVT().isInteger() && !VA.getLocVT().isVector() &&
3314  "Unexpected extend");
3315 
3316  // Handle zero-extension from i1 to i8, which is common.
3317  if (ArgVT == MVT::i1) {
3318  // Set the high bits to zero.
3319  ArgReg = fastEmitZExtFromI1(MVT::i8, ArgReg, /*TODO: Kill=*/false);
3320  ArgVT = MVT::i8;
3321 
3322  if (ArgReg == 0)
3323  return false;
3324  }
3325 
3326  bool Emitted = X86FastEmitExtend(ISD::ZERO_EXTEND, VA.getLocVT(), ArgReg,
3327  ArgVT, ArgReg);
3328  assert(Emitted && "Failed to emit a zext!"); (void)Emitted;
3329  ArgVT = VA.getLocVT();
3330  break;
3331  }
3332  case CCValAssign::AExt: {
3333  assert(VA.getLocVT().isInteger() && !VA.getLocVT().isVector() &&
3334  "Unexpected extend");
3335  bool Emitted = X86FastEmitExtend(ISD::ANY_EXTEND, VA.getLocVT(), ArgReg,
3336  ArgVT, ArgReg);
3337  if (!Emitted)
3338  Emitted = X86FastEmitExtend(ISD::ZERO_EXTEND, VA.getLocVT(), ArgReg,
3339  ArgVT, ArgReg);
3340  if (!Emitted)
3341  Emitted = X86FastEmitExtend(ISD::SIGN_EXTEND, VA.getLocVT(), ArgReg,
3342  ArgVT, ArgReg);
3343 
3344  assert(Emitted && "Failed to emit a aext!"); (void)Emitted;
3345  ArgVT = VA.getLocVT();
3346  break;
3347  }
3348  case CCValAssign::BCvt: {
3349  ArgReg = fastEmit_r(ArgVT, VA.getLocVT(), ISD::BITCAST, ArgReg,
3350  /*TODO: Kill=*/false);
3351  assert(ArgReg && "Failed to emit a bitcast!");
3352  ArgVT = VA.getLocVT();
3353  break;
3354  }
3355  case CCValAssign::VExt:
3356  // VExt has not been implemented, so this should be impossible to reach
3357  // for now. However, fallback to Selection DAG isel once implemented.
3358  return false;
3362  case CCValAssign::FPExt:
3363  llvm_unreachable("Unexpected loc info!");
3364  case CCValAssign::Indirect:
3365  // FIXME: Indirect doesn't need extending, but fast-isel doesn't fully
3366  // support this.
3367  return false;
3368  }
3369 
3370  if (VA.isRegLoc()) {
3371  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3372  TII.get(TargetOpcode::COPY), VA.getLocReg()).addReg(ArgReg);
3373  OutRegs.push_back(VA.getLocReg());
3374  } else {
3375  assert(VA.isMemLoc());
3376 
3377  // Don't emit stores for undef values.
3378  if (isa<UndefValue>(ArgVal))
3379  continue;
3380 
3381  unsigned LocMemOffset = VA.getLocMemOffset();
3382  X86AddressMode AM;
3383  AM.Base.Reg = RegInfo->getStackRegister();
3384  AM.Disp = LocMemOffset;
3385  ISD::ArgFlagsTy Flags = OutFlags[VA.getValNo()];
3386  unsigned Alignment = DL.getABITypeAlignment(ArgVal->getType());
3387  MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
3388  MachinePointerInfo::getStack(*FuncInfo.MF, LocMemOffset),
3389  MachineMemOperand::MOStore, ArgVT.getStoreSize(), Alignment);
3390  if (Flags.isByVal()) {
3391  X86AddressMode SrcAM;
3392  SrcAM.Base.Reg = ArgReg;
3393  if (!TryEmitSmallMemcpy(AM, SrcAM, Flags.getByValSize()))
3394  return false;
3395  } else if (isa<ConstantInt>(ArgVal) || isa<ConstantPointerNull>(ArgVal)) {
3396  // If this is a really simple value, emit this with the Value* version
3397  // of X86FastEmitStore. If it isn't simple, we don't want to do this,
3398  // as it can cause us to reevaluate the argument.
3399  if (!X86FastEmitStore(ArgVT, ArgVal, AM, MMO))
3400  return false;
3401  } else {
3402  bool ValIsKill = hasTrivialKill(ArgVal);
3403  if (!X86FastEmitStore(ArgVT, ArgReg, ValIsKill, AM, MMO))
3404  return false;
3405  }
3406  }
3407  }
3408 
3409  // ELF / PIC requires GOT in the EBX register before function calls via PLT
3410  // GOT pointer.
3411  if (Subtarget->isPICStyleGOT()) {
3412  unsigned Base = getInstrInfo()->getGlobalBaseReg(FuncInfo.MF);
3413  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3414  TII.get(TargetOpcode::COPY), X86::EBX).addReg(Base);
3415  }
3416 
3417  if (Is64Bit && IsVarArg && !IsWin64) {
3418  // From AMD64 ABI document:
3419  // For calls that may call functions that use varargs or stdargs
3420  // (prototype-less calls or calls to functions containing ellipsis (...) in
3421  // the declaration) %al is used as hidden argument to specify the number
3422  // of SSE registers used. The contents of %al do not need to match exactly
3423  // the number of registers, but must be an ubound on the number of SSE
3424  // registers used and is in the range 0 - 8 inclusive.
3425 
3426  // Count the number of XMM registers allocated.
3427  static const MCPhysReg XMMArgRegs[] = {
3428  X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
3429  X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
3430  };
3431  unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs);
3432  assert((Subtarget->hasSSE1() || !NumXMMRegs)
3433  && "SSE registers cannot be used when SSE is disabled");
3434  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::MOV8ri),
3435  X86::AL).addImm(NumXMMRegs);
3436  }
3437 
3438  // Materialize callee address in a register. FIXME: GV address can be
3439  // handled with a CALLpcrel32 instead.
3440  X86AddressMode CalleeAM;
3441  if (!X86SelectCallAddress(Callee, CalleeAM))
3442  return false;
3443 
3444  unsigned CalleeOp = 0;
3445  const GlobalValue *GV = nullptr;
3446  if (CalleeAM.GV != nullptr) {
3447  GV = CalleeAM.GV;
3448  } else if (CalleeAM.Base.Reg != 0) {
3449  CalleeOp = CalleeAM.Base.Reg;
3450  } else
3451  return false;
3452 
3453  // Issue the call.
3454  MachineInstrBuilder MIB;
3455  if (CalleeOp) {
3456  // Register-indirect call.
3457  unsigned CallOpc = Is64Bit ? X86::CALL64r : X86::CALL32r;
3458  MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CallOpc))
3459  .addReg(CalleeOp);
3460  } else {
3461  // Direct call.
3462  assert(GV && "Not a direct call");
3463  // See if we need any target-specific flags on the GV operand.
3464  unsigned char OpFlags = Subtarget->classifyGlobalFunctionReference(GV);
3465 
3466  // This will be a direct call, or an indirect call through memory for
3467  // NonLazyBind calls or dllimport calls.
3468  bool NeedLoad =
3469  OpFlags == X86II::MO_DLLIMPORT || OpFlags == X86II::MO_GOTPCREL;
3470  unsigned CallOpc = NeedLoad
3471  ? (Is64Bit ? X86::CALL64m : X86::CALL32m)
3472  : (Is64Bit ? X86::CALL64pcrel32 : X86::CALLpcrel32);
3473 
3474  MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CallOpc));
3475  if (NeedLoad)
3476  MIB.addReg(Is64Bit ? X86::RIP : 0).addImm(1).addReg(0);
3477  if (Symbol)
3478  MIB.addSym(Symbol, OpFlags);
3479  else
3480  MIB.addGlobalAddress(GV, 0, OpFlags);
3481  if (NeedLoad)
3482  MIB.addReg(0);
3483  }
3484 
3485  // Add a register mask operand representing the call-preserved registers.
3486  // Proper defs for return values will be added by setPhysRegsDeadExcept().
3487  MIB.addRegMask(TRI.getCallPreservedMask(*FuncInfo.MF, CC));
3488 
3489  // Add an implicit use GOT pointer in EBX.
3490  if (Subtarget->isPICStyleGOT())
3492 
3493  if (Is64Bit && IsVarArg && !IsWin64)
3495 
3496  // Add implicit physical register uses to the call.
3497  for (auto Reg : OutRegs)
3499 
3500  // Issue CALLSEQ_END
3501  unsigned NumBytesForCalleeToPop =
3502  X86::isCalleePop(CC, Subtarget->is64Bit(), IsVarArg,
3503  TM.Options.GuaranteedTailCallOpt)
3504  ? NumBytes // Callee pops everything.
3505  : computeBytesPoppedByCalleeForSRet(Subtarget, CC, CLI.CS);
3506  unsigned AdjStackUp = TII.getCallFrameDestroyOpcode();
3507  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackUp))
3508  .addImm(NumBytes).addImm(NumBytesForCalleeToPop);
3509 
3510  // Now handle call return values.
3512  CCState CCRetInfo(CC, IsVarArg, *FuncInfo.MF, RVLocs,
3513  CLI.RetTy->getContext());
3514  CCRetInfo.AnalyzeCallResult(Ins, RetCC_X86);
3515 
3516  // Copy all of the result registers out of their specified physreg.
3517  unsigned ResultReg = FuncInfo.CreateRegs(CLI.RetTy);
3518  for (unsigned i = 0; i != RVLocs.size(); ++i) {
3519  CCValAssign &VA = RVLocs[i];
3520  EVT CopyVT = VA.getValVT();
3521  unsigned CopyReg = ResultReg + i;
3522  unsigned SrcReg = VA.getLocReg();
3523 
3524  // If this is x86-64, and we disabled SSE, we can't return FP values
3525  if ((CopyVT == MVT::f32 || CopyVT == MVT::f64) &&
3526  ((Is64Bit || Ins[i].Flags.isInReg()) && !Subtarget->hasSSE1())) {
3527  report_fatal_error("SSE register return with SSE disabled");
3528  }
3529 
3530  // If we prefer to use the value in xmm registers, copy it out as f80 and
3531  // use a truncate to move it from fp stack reg to xmm reg.
3532  if ((SrcReg == X86::FP0 || SrcReg == X86::FP1) &&
3533  isScalarFPTypeInSSEReg(VA.getValVT())) {
3534  CopyVT = MVT::f80;
3535  CopyReg = createResultReg(&X86::RFP80RegClass);
3536  }
3537 
3538  // Copy out the result.
3539  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3540  TII.get(TargetOpcode::COPY), CopyReg).addReg(SrcReg);
3541  InRegs.push_back(VA.getLocReg());
3542 
3543  // Round the f80 to the right size, which also moves it to the appropriate
3544  // xmm register. This is accomplished by storing the f80 value in memory
3545  // and then loading it back.
3546  if (CopyVT != VA.getValVT()) {
3547  EVT ResVT = VA.getValVT();
3548  unsigned Opc = ResVT == MVT::f32 ? X86::ST_Fp80m32 : X86::ST_Fp80m64;
3549  unsigned MemSize = ResVT.getSizeInBits()/8;
3550  int FI = MFI.CreateStackObject(MemSize, MemSize, false);
3551  addFrameReference(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3552  TII.get(Opc)), FI)
3553  .addReg(CopyReg);
3554  Opc = ResVT == MVT::f32 ? X86::MOVSSrm : X86::MOVSDrm;
3555  addFrameReference(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3556  TII.get(Opc), ResultReg + i), FI);
3557  }
3558  }
3559 
3560  CLI.ResultReg = ResultReg;
3561  CLI.NumResultRegs = RVLocs.size();
3562  CLI.Call = MIB;
3563 
3564  return true;
3565 }
3566 
3567 bool
3568 X86FastISel::fastSelectInstruction(const Instruction *I) {
3569  switch (I->getOpcode()) {
3570  default: break;
3571  case Instruction::Load:
3572  return X86SelectLoad(I);
3573  case Instruction::Store:
3574  return X86SelectStore(I);
3575  case Instruction::Ret:
3576  return X86SelectRet(I);
3577  case Instruction::ICmp:
3578  case Instruction::FCmp:
3579  return X86SelectCmp(I);
3580  case Instruction::ZExt:
3581  return X86SelectZExt(I);
3582  case Instruction::SExt:
3583  return X86SelectSExt(I);
3584  case Instruction::Br:
3585  return X86SelectBranch(I);
3586  case Instruction::LShr:
3587  case Instruction::AShr:
3588  case Instruction::Shl:
3589  return X86SelectShift(I);
3590  case Instruction::SDiv:
3591  case Instruction::UDiv:
3592  case Instruction::SRem:
3593  case Instruction::URem:
3594  return X86SelectDivRem(I);
3595  case Instruction::Select:
3596  return X86SelectSelect(I);
3597  case Instruction::Trunc:
3598  return X86SelectTrunc(I);
3599  case Instruction::FPExt:
3600  return X86SelectFPExt(I);
3601  case Instruction::FPTrunc:
3602  return X86SelectFPTrunc(I);
3603  case Instruction::SIToFP:
3604  return X86SelectSIToFP(I);
3605  case Instruction::IntToPtr: // Deliberate fall-through.
3606  case Instruction::PtrToInt: {
3607  EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType());
3608  EVT DstVT = TLI.getValueType(DL, I->getType());
3609  if (DstVT.bitsGT(SrcVT))
3610  return X86SelectZExt(I);
3611  if (DstVT.bitsLT(SrcVT))
3612  return X86SelectTrunc(I);
3613  unsigned Reg = getRegForValue(I->getOperand(0));
3614  if (Reg == 0) return false;
3615  updateValueMap(I, Reg);
3616  return true;
3617  }
3618  case Instruction::BitCast: {
3619  // Select SSE2/AVX bitcasts between 128/256 bit vector types.
3620  if (!Subtarget->hasSSE2())
3621  return false;
3622 
3623  EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType());
3624  EVT DstVT = TLI.getValueType(DL, I->getType());
3625 
3626  if (!SrcVT.isSimple() || !DstVT.isSimple())
3627  return false;
3628 
3629  MVT SVT = SrcVT.getSimpleVT();
3630  MVT DVT = DstVT.getSimpleVT();
3631 
3632  if (!SVT.is128BitVector() &&
3633  !(Subtarget->hasAVX() && SVT.is256BitVector()) &&
3634  !(Subtarget->hasAVX512() && SVT.is512BitVector() &&
3635  (Subtarget->hasBWI() || (SVT.getScalarSizeInBits() >= 32 &&
3636  DVT.getScalarSizeInBits() >= 32))))
3637  return false;
3638 
3639  unsigned Reg = getRegForValue(I->getOperand(0));
3640  if (Reg == 0)
3641  return false;
3642 
3643  // No instruction is needed for conversion. Reuse the register used by
3644  // the fist operand.
3645  updateValueMap(I, Reg);
3646  return true;
3647  }
3648  }
3649 
3650  return false;
3651 }
3652 
3653 unsigned X86FastISel::X86MaterializeInt(const ConstantInt *CI, MVT VT) {
3654  if (VT > MVT::i64)
3655  return 0;
3656 
3657  uint64_t Imm = CI->getZExtValue();
3658  if (Imm == 0) {
3659  unsigned SrcReg = fastEmitInst_(X86::MOV32r0, &X86::GR32RegClass);
3660  switch (VT.SimpleTy) {
3661  default: llvm_unreachable("Unexpected value type");
3662  case MVT::i1:
3663  case MVT::i8:
3664  return fastEmitInst_extractsubreg(MVT::i8, SrcReg, /*Kill=*/true,
3665  X86::sub_8bit);
3666  case MVT::i16:
3667  return fastEmitInst_extractsubreg(MVT::i16, SrcReg, /*Kill=*/true,
3668  X86::sub_16bit);
3669  case MVT::i32:
3670  return SrcReg;
3671  case MVT::i64: {
3672  unsigned ResultReg = createResultReg(&X86::GR64RegClass);
3673  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3674  TII.get(TargetOpcode::SUBREG_TO_REG), ResultReg)
3675  .addImm(0).addReg(SrcReg).addImm(X86::sub_32bit);
3676  return ResultReg;
3677  }
3678  }
3679  }
3680 
3681  unsigned Opc = 0;
3682  switch (VT.SimpleTy) {
3683  default: llvm_unreachable("Unexpected value type");
3684  case MVT::i1:
3685  // TODO: Support this properly.
3686  if (Subtarget->hasAVX512())
3687  return 0;
3688  VT = MVT::i8;
3690  case MVT::i8: Opc = X86::MOV8ri; break;
3691  case MVT::i16: Opc = X86::MOV16ri; break;
3692  case MVT::i32: Opc = X86::MOV32ri; break;
3693  case MVT::i64: {
3694  if (isUInt<32>(Imm))
3695  Opc = X86::MOV32ri;
3696  else if (isInt<32>(Imm))
3697  Opc = X86::MOV64ri32;
3698  else
3699  Opc = X86::MOV64ri;
3700  break;
3701  }
3702  }
3703  if (VT == MVT::i64 && Opc == X86::MOV32ri) {
3704  unsigned SrcReg = fastEmitInst_i(Opc, &X86::GR32RegClass, Imm);
3705  unsigned ResultReg = createResultReg(&X86::GR64RegClass);
3706  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3707  TII.get(TargetOpcode::SUBREG_TO_REG), ResultReg)
3708  .addImm(0).addReg(SrcReg).addImm(X86::sub_32bit);
3709  return ResultReg;
3710  }
3711  return fastEmitInst_i(Opc, TLI.getRegClassFor(VT), Imm);
3712 }
3713 
3714 unsigned X86FastISel::X86MaterializeFP(const ConstantFP *CFP, MVT VT) {
3715  if (CFP->isNullValue())
3716  return fastMaterializeFloatZero(CFP);
3717 
3718  // Can't handle alternate code models yet.
3719  CodeModel::Model CM = TM.getCodeModel();
3720  if (CM != CodeModel::Small && CM != CodeModel::Large)
3721  return 0;
3722 
3723  // Get opcode and regclass of the output for the given load instruction.
3724  unsigned Opc = 0;
3725  const TargetRegisterClass *RC = nullptr;
3726  switch (VT.SimpleTy) {
3727  default: return 0;
3728  case MVT::f32:
3729  if (X86ScalarSSEf32) {
3730  Opc = Subtarget->hasAVX512()
3731  ? X86::VMOVSSZrm
3732  : Subtarget->hasAVX() ? X86::VMOVSSrm : X86::MOVSSrm;
3733  RC = Subtarget->hasAVX512() ? &X86::FR32XRegClass : &X86::FR32RegClass;
3734  } else {
3735  Opc = X86::LD_Fp32m;
3736  RC = &X86::RFP32RegClass;
3737  }
3738  break;
3739  case MVT::f64:
3740  if (X86ScalarSSEf64) {
3741  Opc = Subtarget->hasAVX512()
3742  ? X86::VMOVSDZrm
3743  : Subtarget->hasAVX() ? X86::VMOVSDrm : X86::MOVSDrm;
3744  RC = Subtarget->hasAVX512() ? &X86::FR64XRegClass : &X86::FR64RegClass;
3745  } else {
3746  Opc = X86::LD_Fp64m;
3747  RC = &X86::RFP64RegClass;
3748  }
3749  break;
3750  case MVT::f80:
3751  // No f80 support yet.
3752  return 0;
3753  }
3754 
3755  // MachineConstantPool wants an explicit alignment.
3756  unsigned Align = DL.getPrefTypeAlignment(CFP->getType());
3757  if (Align == 0) {
3758  // Alignment of vector types. FIXME!
3759  Align = DL.getTypeAllocSize(CFP->getType());
3760  }
3761 
3762  // x86-32 PIC requires a PIC base register for constant pools.
3763  unsigned PICBase = 0;
3764  unsigned char OpFlag = Subtarget->classifyLocalReference(nullptr);
3765  if (OpFlag == X86II::MO_PIC_BASE_OFFSET)
3766  PICBase = getInstrInfo()->getGlobalBaseReg(FuncInfo.MF);
3767  else if (OpFlag == X86II::MO_GOTOFF)
3768  PICBase = getInstrInfo()->getGlobalBaseReg(FuncInfo.MF);
3769  else if (Subtarget->is64Bit() && TM.getCodeModel() == CodeModel::Small)
3770  PICBase = X86::RIP;
3771 
3772  // Create the load from the constant pool.
3773  unsigned CPI = MCP.getConstantPoolIndex(CFP, Align);
3774  unsigned ResultReg = createResultReg(RC);
3775 
3776  if (CM == CodeModel::Large) {
3777  unsigned AddrReg = createResultReg(&X86::GR64RegClass);
3778  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::MOV64ri),
3779  AddrReg)
3780  .addConstantPoolIndex(CPI, 0, OpFlag);
3781  MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3782  TII.get(Opc), ResultReg);
3783  addDirectMem(MIB, AddrReg);
3784  MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
3786  MachineMemOperand::MOLoad, DL.getPointerSize(), Align);
3787  MIB->addMemOperand(*FuncInfo.MF, MMO);
3788  return ResultReg;
3789  }
3790 
3791  addConstantPoolReference(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3792  TII.get(Opc), ResultReg),
3793  CPI, PICBase, OpFlag);
3794  return ResultReg;
3795 }
3796 
3797 unsigned X86FastISel::X86MaterializeGV(const GlobalValue *GV, MVT VT) {
3798  // Can't handle alternate code models yet.
3799  if (TM.getCodeModel() != CodeModel::Small)
3800  return 0;
3801 
3802  // Materialize addresses with LEA/MOV instructions.
3803  X86AddressMode AM;
3804  if (X86SelectAddress(GV, AM)) {
3805  // If the expression is just a basereg, then we're done, otherwise we need
3806  // to emit an LEA.
3807  if (AM.BaseType == X86AddressMode::RegBase &&
3808  AM.IndexReg == 0 && AM.Disp == 0 && AM.GV == nullptr)
3809  return AM.Base.Reg;
3810 
3811  unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
3812  if (TM.getRelocationModel() == Reloc::Static &&
3813  TLI.getPointerTy(DL) == MVT::i64) {
3814  // The displacement code could be more than 32 bits away so we need to use
3815  // an instruction with a 64 bit immediate
3816  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::MOV64ri),
3817  ResultReg)
3818  .addGlobalAddress(GV);
3819  } else {
3820  unsigned Opc =
3821  TLI.getPointerTy(DL) == MVT::i32
3822  ? (Subtarget->isTarget64BitILP32() ? X86::LEA64_32r : X86::LEA32r)
3823  : X86::LEA64r;
3824  addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3825  TII.get(Opc), ResultReg), AM);
3826  }
3827  return ResultReg;
3828  }
3829  return 0;
3830 }
3831 
3832 unsigned X86FastISel::fastMaterializeConstant(const Constant *C) {
3833  EVT CEVT = TLI.getValueType(DL, C->getType(), true);
3834 
3835  // Only handle simple types.
3836  if (!CEVT.isSimple())
3837  return 0;
3838  MVT VT = CEVT.getSimpleVT();
3839 
3840  if (const auto *CI = dyn_cast<ConstantInt>(C))
3841  return X86MaterializeInt(CI, VT);
3842  else if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C))
3843  return X86MaterializeFP(CFP, VT);
3844  else if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
3845  return X86MaterializeGV(GV, VT);
3846 
3847  return 0;
3848 }
3849 
3850 unsigned X86FastISel::fastMaterializeAlloca(const AllocaInst *C) {
3851  // Fail on dynamic allocas. At this point, getRegForValue has already
3852  // checked its CSE maps, so if we're here trying to handle a dynamic
3853  // alloca, we're not going to succeed. X86SelectAddress has a
3854  // check for dynamic allocas, because it's called directly from
3855  // various places, but targetMaterializeAlloca also needs a check
3856  // in order to avoid recursion between getRegForValue,
3857  // X86SelectAddrss, and targetMaterializeAlloca.
3858  if (!FuncInfo.StaticAllocaMap.count(C))
3859  return 0;
3860  assert(C->isStaticAlloca() && "dynamic alloca in the static alloca map?");
3861 
3862  X86AddressMode AM;
3863  if (!X86SelectAddress(C, AM))
3864  return 0;
3865  unsigned Opc =
3866  TLI.getPointerTy(DL) == MVT::i32
3867  ? (Subtarget->isTarget64BitILP32() ? X86::LEA64_32r : X86::LEA32r)
3868  : X86::LEA64r;
3869  const TargetRegisterClass *RC = TLI.getRegClassFor(TLI.getPointerTy(DL));
3870  unsigned ResultReg = createResultReg(RC);
3871  addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3872  TII.get(Opc), ResultReg), AM);
3873  return ResultReg;
3874 }
3875 
3876 unsigned X86FastISel::fastMaterializeFloatZero(const ConstantFP *CF) {
3877  MVT VT;
3878  if (!isTypeLegal(CF->getType(), VT))
3879  return 0;
3880 
3881  // Get opcode and regclass for the given zero.
3882  bool HasAVX512 = Subtarget->hasAVX512();
3883  unsigned Opc = 0;
3884  const TargetRegisterClass *RC = nullptr;
3885  switch (VT.SimpleTy) {
3886  default: return 0;
3887  case MVT::f32:
3888  if (X86ScalarSSEf32) {
3889  Opc = HasAVX512 ? X86::AVX512_FsFLD0SS : X86::FsFLD0SS;
3890  RC = HasAVX512 ? &X86::FR32XRegClass : &X86::FR32RegClass;
3891  } else {
3892  Opc = X86::LD_Fp032;
3893  RC = &X86::RFP32RegClass;
3894  }
3895  break;
3896  case MVT::f64:
3897  if (X86ScalarSSEf64) {
3898  Opc = HasAVX512 ? X86::AVX512_FsFLD0SD : X86::FsFLD0SD;
3899  RC = HasAVX512 ? &X86::FR64XRegClass : &X86::FR64RegClass;
3900  } else {
3901  Opc = X86::LD_Fp064;
3902  RC = &X86::RFP64RegClass;
3903  }
3904  break;
3905  case MVT::f80:
3906  // No f80 support yet.
3907  return 0;
3908  }
3909 
3910  unsigned ResultReg = createResultReg(RC);
3911  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg);
3912  return ResultReg;
3913 }
3914 
3915 
3916 bool X86FastISel::tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo,
3917  const LoadInst *LI) {
3918  const Value *Ptr = LI->getPointerOperand();
3919  X86AddressMode AM;
3920  if (!X86SelectAddress(Ptr, AM))
3921  return false;
3922 
3923  const X86InstrInfo &XII = (const X86InstrInfo &)TII;
3924 
3925  unsigned Size = DL.getTypeAllocSize(LI->getType());
3926  unsigned Alignment = LI->getAlignment();
3927 
3928  if (Alignment == 0) // Ensure that codegen never sees alignment 0
3929  Alignment = DL.getABITypeAlignment(LI->getType());
3930 
3932  AM.getFullAddress(AddrOps);
3933 
3934  MachineInstr *Result = XII.foldMemoryOperandImpl(
3935  *FuncInfo.MF, *MI, OpNo, AddrOps, FuncInfo.InsertPt, Size, Alignment,
3936  /*AllowCommute=*/true);
3937  if (!Result)
3938  return false;
3939 
3940  // The index register could be in the wrong register class. Unfortunately,
3941  // foldMemoryOperandImpl could have commuted the instruction so its not enough
3942  // to just look at OpNo + the offset to the index reg. We actually need to
3943  // scan the instruction to find the index reg and see if its the correct reg
3944  // class.
3945  unsigned OperandNo = 0;
3946  for (MachineInstr::mop_iterator I = Result->operands_begin(),
3947  E = Result->operands_end(); I != E; ++I, ++OperandNo) {
3948  MachineOperand &MO = *I;
3949  if (!MO.isReg() || MO.isDef() || MO.getReg() != AM.IndexReg)
3950  continue;
3951  // Found the index reg, now try to rewrite it.
3952  unsigned IndexReg = constrainOperandRegClass(Result->getDesc(),
3953  MO.getReg(), OperandNo);
3954  if (IndexReg == MO.getReg())
3955  continue;
3956  MO.setReg(IndexReg);
3957  }
3958 
3959  Result->addMemOperand(*FuncInfo.MF, createMachineMemOperandFor(LI));
3960  MI->eraseFromParent();
3961  return true;
3962 }
3963 
3964 unsigned X86FastISel::fastEmitInst_rrrr(unsigned MachineInstOpcode,
3965  const TargetRegisterClass *RC,
3966  unsigned Op0, bool Op0IsKill,
3967  unsigned Op1, bool Op1IsKill,
3968  unsigned Op2, bool Op2IsKill,
3969  unsigned Op3, bool Op3IsKill) {
3970  const MCInstrDesc &II = TII.get(MachineInstOpcode);
3971 
3972  unsigned ResultReg = createResultReg(RC);
3973  Op0 = constrainOperandRegClass(II, Op0, II.getNumDefs());
3974  Op1 = constrainOperandRegClass(II, Op1, II.getNumDefs() + 1);
3975  Op2 = constrainOperandRegClass(II, Op2, II.getNumDefs() + 2);
3976  Op3 = constrainOperandRegClass(II, Op3, II.getNumDefs() + 3);
3977 
3978  if (II.getNumDefs() >= 1)
3979  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
3980  .addReg(Op0, getKillRegState(Op0IsKill))
3981  .addReg(Op1, getKillRegState(Op1IsKill))
3982  .addReg(Op2, getKillRegState(Op2IsKill))
3983  .addReg(Op3, getKillRegState(Op3IsKill));
3984  else {
3985  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
3986  .addReg(Op0, getKillRegState(Op0IsKill))
3987  .addReg(Op1, getKillRegState(Op1IsKill))
3988  .addReg(Op2, getKillRegState(Op2IsKill))
3989  .addReg(Op3, getKillRegState(Op3IsKill));
3990  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3991  TII.get(TargetOpcode::COPY), ResultReg).addReg(II.ImplicitDefs[0]);
3992  }
3993  return ResultReg;
3994 }
3995 
3996 
3997 namespace llvm {
3999  const TargetLibraryInfo *libInfo) {
4000  return new X86FastISel(funcInfo, libInfo);
4001  }
4002 }
bool hasAVX() const
Definition: X86Subtarget.h:524
bool isVarArg() const
isVarArg - Return true if this function takes a variable number of arguments.
Definition: Function.h:158
void setFrameAddressIsTaken(bool T)
unsigned GetCondBranchFromCond(CondCode CC)
uint64_t CallInst * C
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:546
constexpr bool isUInt< 32 >(uint64_t x)
Definition: MathExtras.h:341
Return a value (possibly void), from a function.
Value * getValueOperand()
Definition: Instructions.h:395
const MachineInstrBuilder & addMetadata(const MDNode *MD) const
void AnalyzeCallResult(const SmallVectorImpl< ISD::InputArg > &Ins, CCAssignFn Fn)
AnalyzeCallResult - Analyze the return values of a call, incorporating info about the passed values i...
bool is64Bit() const
Is this x86_64? (disregarding specific ABI / programming model)
Definition: X86Subtarget.h:488
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
bool usesWindowsCFI() const
Definition: MCAsmInfo.h:569
mop_iterator operands_end()
Definition: MachineInstr.h:330
bool isInteger() const
Return true if this is an integer or a vector integer type.
This class is the base class for the comparison instructions.
Definition: InstrTypes.h:843
This class represents an incoming formal argument to a Function.
Definition: Argument.h:30
bool contains(unsigned Reg) const
Return true if the specified register is included in this register class.
bool isOSMSVCRT() const
Is this a "Windows" OS targeting a "MSVCRT.dll" environment.
Definition: Triple.h:556
LLVM_ATTRIBUTE_NORETURN void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:115
Compute iterated dominance frontiers using a linear time algorithm.
Definition: AllocatorList.h:24
The C convention as implemented on Windows/x86-64 and AArch64.
Definition: CallingConv.h:154
static const MachineInstrBuilder & addConstantPoolReference(const MachineInstrBuilder &MIB, unsigned CPI, unsigned GlobalBaseReg, unsigned char OpFlags)
addConstantPoolReference - This function is used to add a reference to the base of a constant value s...
bool isAtomic() const
Return true if this instruction has an AtomicOrdering of unordered or higher.
unsigned constrainOperandRegClass(const MachineFunction &MF, const TargetRegisterInfo &TRI, MachineRegisterInfo &MRI, const TargetInstrInfo &TII, const RegisterBankInfo &RBI, MachineInstr &InsertPt, const MCInstrDesc &II, unsigned Reg, unsigned OpIdx)
Try to constrain Reg so that it is usable by argument OpIdx of the provided MCInstrDesc II...
Definition: Utils.cpp:46
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
Definition: MCSymbol.h:42
The C convention as specified in the x86-64 supplement to the System V ABI, used on most non-Windows ...
Definition: CallingConv.h:144
LLVM_ATTRIBUTE_ALWAYS_INLINE size_type size() const
Definition: SmallVector.h:136
C - The default llvm calling convention, compatible with C.
Definition: CallingConv.h:35
bool isVector() const
Return true if this is a vector value type.
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:163
unsigned getReg() const
getReg - Returns the register number.
bool is256BitVector() const
Return true if this is a 256-bit vector type.
This class represents a function call, abstracting a target machine&#39;s calling convention.
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:253
0 1 0 0 True if ordered and less than
Definition: InstrTypes.h:859
constexpr bool isInt< 8 >(int64_t x)
Definition: MathExtras.h:295
unsigned getValNo() const
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:738
1 1 1 0 True if unordered or not equal
Definition: InstrTypes.h:869
This class wraps the llvm.memset intrinsic.
BasicBlock * getSuccessor(unsigned i) const
const GlobalValue * GV
A debug info location.
Definition: DebugLoc.h:34
F(f)
unsigned getPtrSizedFrameRegister(const MachineFunction &MF) const
An instruction for reading from memory.
Definition: Instructions.h:164
MO_GOTPCREL - On a symbol operand this indicates that the immediate is offset to the GOT entry for th...
Definition: X86BaseInfo.h:109
Hexagon Common GEP
Value * getCondition() const
const MachineInstrBuilder & addGlobalAddress(const GlobalValue *GV, int64_t Offset=0, unsigned char TargetFlags=0) const
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:227
bool isCalleePop(CallingConv::ID CallingConv, bool is64Bit, bool IsVarArg, bool GuaranteeTCO)
Determines whether the callee is required to pop its own arguments.
Value * getLength() const
op_iterator op_begin()
Definition: User.h:214
NodeType
ISD::NodeType enum - This enum defines the target-independent operators for a SelectionDAG.
Definition: ISDOpcodes.h:39
bool isMemLoc() const
static Constant * getNullValue(Type *Ty)
Constructor to create a &#39;0&#39; constant of arbitrary type.
Definition: Constants.cpp:245
X86MachineFunctionInfo - This class is derived from MachineFunction and contains private X86 target-s...
Value * getRawSource() const
Return the arguments to the instruction.
bool arg_empty() const
Definition: CallSite.h:218
1 0 0 1 True if unordered or equal
Definition: InstrTypes.h:864
Used to lazily calculate structure layout information for a target machine, based on the DataLayout s...
Definition: DataLayout.h:515
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
Definition: InstrTypes.h:863
A description of a memory reference used in the backend.
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE, etc.
Definition: InstrTypes.h:951
This file declares the MachineConstantPool class which is an abstract constant pool to keep track of ...
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
Definition: MCInstrDesc.h:210
const HexagonInstrInfo * TII
unsigned getNumArgOperands() const
Return the number of call arguments.
unsigned getNumOperands() const
Access to explicit operands of the instruction.
Definition: MachineInstr.h:296
union llvm::X86AddressMode::@463 Base
Class to represent struct types.
Definition: DerivedTypes.h:201
A Use represents the edge between a Value definition and its users.
Definition: Use.h:56
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
static bool isGlobalStubReference(unsigned char TargetFlag)
isGlobalStubReference - Return true if the specified TargetFlag operand is a reference to a stub for ...
Definition: X86InstrInfo.h:97
void eraseFromParent()
Unlink &#39;this&#39; from the containing basic block and delete it.
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:197
Reg
All possible values of the reg field in the ModR/M byte.
0 1 0 1 True if ordered and less than or equal
Definition: InstrTypes.h:860
SimpleValueType SimpleTy
static Constant * getSExt(Constant *C, Type *Ty, bool OnlyIfReduced=false)
Definition: Constants.cpp:1605
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted...
unsigned getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
LocInfo getLocInfo() const
unsigned getSizeInBits() const
auto reverse(ContainerTy &&C, typename std::enable_if< has_rbegin< ContainerTy >::value >::type *=nullptr) -> decltype(make_range(C.rbegin(), C.rend()))
Definition: STLExtras.h:233
static bool isGlobalRelativeToPICBase(unsigned char TargetFlag)
isGlobalRelativeToPICBase - Return true if the specified global value reference is relative to a 32-b...
Definition: X86InstrInfo.h:113
static Constant * getZExt(Constant *C, Type *Ty, bool OnlyIfReduced=false)
Definition: Constants.cpp:1619
bool isNullValue() const
Return true if this is the value that would be returned by getNullValue.
Definition: Constants.cpp:85
This is a fast-path instruction selection class that generates poor code and doesn&#39;t support illegal ...
Definition: FastISel.h:67
A constant value that is initialized with an expression using other constant values.
Definition: Constants.h:867
unsigned getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:292
const MCInstrDesc & getDesc() const
Returns the target instruction descriptor of this MachineInstr.
Definition: MachineInstr.h:290
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:245
unsigned getSourceAddressSpace() const
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:201
This instruction compares its operands according to the predicate given to the constructor.
bool paramHasAttr(unsigned ArgNo, Attribute::AttrKind Kind) const
Return true if the call or the callee has the given attribute.
Definition: CallSite.h:377
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:126
An instruction for storing to memory.
Definition: Instructions.h:306
void addMemOperand(MachineFunction &MF, MachineMemOperand *MO)
Add a MachineMemOperand to the machine instruction.
amdgpu Simplify well known AMD library false Value * Callee
This class represents a truncation of integer types.
Value * getOperand(unsigned i) const
Definition: User.h:154
Class to represent pointers.
Definition: DerivedTypes.h:467
unsigned getByValSize() const
unsigned getKillRegState(bool B)
unsigned getStackRegister() const
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Flag
These should be considered private to the implementation of the MCInstrDesc class.
Definition: MCInstrDesc.h:121
bool is128BitVector() const
Return true if this is a 128-bit vector type.
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
Definition: ValueTypes.h:229
bool isFloatTy() const
Return true if this is &#39;float&#39;, a 32-bit IEEE fp type.
Definition: Type.h:147
an instruction for type-safe pointer arithmetic to access elements of arrays and structs ...
Definition: Instructions.h:837
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
Type * getReturnType() const
Returns the type of the ret val.
Definition: Function.h:150
std::pair< CondCode, bool > getX86ConditionCode(CmpInst::Predicate Predicate)
Return a pair of condition code for the given predicate and whether the instruction operands should b...
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:149
unsigned const MachineRegisterInfo * MRI
Machine Value Type.
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:46
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Conditional or Unconditional Branch instruction.
Value * getAddress() const
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
unsigned getScalarSizeInBits() const
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This is an important base class in LLVM.
Definition: Constant.h:42
const MCAsmInfo * getMCAsmInfo() const
Return target specific asm information.
ConstantFP - Floating Point Values [float, double].
Definition: Constants.h:264
const MCPhysReg * ImplicitDefs
Definition: MCInstrDesc.h:173
bool isValidLocationForIntrinsic(const DILocation *DL) const
Check that a location is valid for this variable.
op_iterator op_end()
Definition: User.h:216
DILocalVariable * getVariable() const
Definition: IntrinsicInst.h:80
X86_ThisCall - Similar to X86_StdCall.
Definition: CallingConv.h:111
Fast - This calling convention attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:43
static std::pair< unsigned, bool > getX86SSEConditionCode(CmpInst::Predicate Predicate)
const MachineInstrBuilder & addRegMask(const uint32_t *Mask) const
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:853
static const unsigned End
TRAP - Trapping instruction.
Definition: ISDOpcodes.h:735
DIExpression * getExpression() const
Definition: IntrinsicInst.h:84
0 1 1 1 True if ordered (no nans)
Definition: InstrTypes.h:862
Value * getPointerOperand()
Definition: Instructions.h:270
const Triple & getTargetTriple() const
Definition: X86Subtarget.h:666
static void X86SelectAddress(const MachineInstr &I, const MachineRegisterInfo &MRI, X86AddressMode &AM)
const MachineInstrBuilder & addSym(MCSymbol *Sym, unsigned char TargetFlags=0) const
1 1 1 1 Always true (always folded)
Definition: InstrTypes.h:870
unsigned getSETFromCond(CondCode CC, bool HasMemoryOperand=false)
Return a set opcode for the given condition and whether it has a memory operand.
bool isTargetMCU() const
Definition: X86Subtarget.h:685
Extended Value Type.
Definition: ValueTypes.h:34
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
bool isVolatile() const
1 1 0 1 True if unordered, less than, or equal
Definition: InstrTypes.h:868
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
enum llvm::X86AddressMode::@462 BaseType
X86_FastCall - &#39;fast&#39; analog of X86_StdCall.
Definition: CallingConv.h:92
static const MachineInstrBuilder & addFrameReference(const MachineInstrBuilder &MIB, int FI, int Offset=0, bool mem=true)
addFrameReference - This function is used to add a reference to the base of an abstract object on the...
The memory access writes data.
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
Definition: IntrinsicInst.h:51
0 0 1 0 True if ordered and greater than
Definition: InstrTypes.h:857
Iterator for intrusive lists based on ilist_node.
unsigned getNumOperands() const
Definition: User.h:176
CCState - This class holds information needed while lowering arguments and return values...
This is the shared class of boolean and integer constants.
Definition: Constants.h:84
constexpr bool isInt< 32 >(int64_t x)
Definition: MathExtras.h:301
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type...
Definition: Type.cpp:130
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:194
MachineOperand class - Representation of each machine instruction operand.
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:862
1 1 0 0 True if unordered or less than
Definition: InstrTypes.h:867
Provides information about what library functions are available for the current target.
CCValAssign - Represent assignment of one arg/retval to a location.
void getFullAddress(SmallVectorImpl< MachineOperand > &MO)
bool hasFnAttr(Attribute::AttrKind Kind) const
Determine whether this call has the given attribute.
This class wraps the llvm.memcpy intrinsic.
unsigned getNumDefs() const
Return the number of MachineOperands that are register definitions.
Definition: MCInstrDesc.h:225
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:924
This file defines the FastISel class.
bool bitsLT(EVT VT) const
Return true if this has less bits than VT.
Definition: ValueTypes.h:241
unsigned getCMovFromCond(CondCode CC, unsigned RegBytes, bool HasMemoryOperand=false)
Return a cmov opcode for the given condition, register size in bytes, and operand type...
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:446
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:449
amdgpu Simplify well known AMD library false Value Value * Arg
The memory access reads data.
Function * getCalledFunction() const
Return the function called, or null if this is an indirect function invocation.
MachineInstr * foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI, ArrayRef< unsigned > Ops, MachineBasicBlock::iterator InsertPt, int FrameIndex, LiveIntervals *LIS=nullptr) const override
foldMemoryOperand - If this target supports it, fold a load or store of the specified stack slot into...
Representation of each machine instruction.
Definition: MachineInstr.h:60
MO_GOTOFF - On a symbol operand this indicates that the immediate is the offset to the location of th...
Definition: X86BaseInfo.h:101
static const MachineInstrBuilder & addDirectMem(const MachineInstrBuilder &MIB, unsigned Reg)
addDirectMem - This function is used to add a direct memory reference to the current instruction – t...
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
Definition: Instruction.h:285
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:363
unsigned getDestAddressSpace() const
uint64_t getElementOffset(unsigned Idx) const
Definition: DataLayout.h:537
unsigned getAlignment() const
Return the alignment of the access that is being performed.
Definition: Instructions.h:226
static IntegerType * getInt32Ty(LLVMContext &C)
Definition: Type.cpp:176
unsigned getLocMemOffset() const
Value * getArgOperand(unsigned i) const
getArgOperand/setArgOperand - Return/set the i-th call argument.
Establish a view to a call site for examination.
Definition: CallSite.h:713
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:108
void setReg(unsigned Reg)
Change the register this operand corresponds to.
#define I(x, y, z)
Definition: MD5.cpp:58
void AnalyzeReturn(const SmallVectorImpl< ISD::OutputArg > &Outs, CCAssignFn Fn)
AnalyzeReturn - Analyze the returned values of a return, incorporating info about the result values i...
FunctionLoweringInfo - This contains information that is global to a function that is used when lower...
0 1 1 0 True if ordered and operands are unequal
Definition: InstrTypes.h:861
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:323
bool is512BitVector() const
Return true if this is a 512-bit vector type.
const MachineInstrBuilder & addReg(unsigned RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
1 0 1 0 True if unordered or greater than
Definition: InstrTypes.h:865
constexpr bool isUInt< 16 >(uint64_t x)
Definition: MathExtras.h:338
static MachinePointerInfo getConstantPool(MachineFunction &MF)
Return a MachinePointerInfo record that refers to the constant pool.
unsigned getAlignment() const
Return the alignment of the access that is being performed.
Definition: Instructions.h:351
bool isReg() const
isReg - Tests if this is a MO_Register operand.
bool isRegLoc() const
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
void GetReturnInfo(Type *ReturnType, AttributeList attr, SmallVectorImpl< ISD::OutputArg > &Outs, const TargetLowering &TLI, const DataLayout &DL)
Given an LLVM IR type and return type attributes, compute the return value EVTs and flags...
bool hasSSE1() const
Definition: X86Subtarget.h:518
0 0 0 1 True if ordered and equal
Definition: InstrTypes.h:856
LLVM Value Representation.
Definition: Value.h:73
mop_iterator operands_begin()
Definition: MachineInstr.h:329
1 0 1 1 True if unordered, greater than, or equal
Definition: InstrTypes.h:866
static unsigned X86ChooseCmpImmediateOpcode(EVT VT, const ConstantInt *RHSC)
If we have a comparison with RHS as the RHS of the comparison, return an opcode that works for the co...
constexpr char Size[]
Key for Kernel::Arg::Metadata::mSize.
#define LLVM_FALLTHROUGH
LLVM_FALLTHROUGH - Mark fallthrough cases in switch statements.
Definition: Compiler.h:235
bool hasAVX512() const
Definition: X86Subtarget.h:526
MO_PIC_BASE_OFFSET - On a symbol operand this indicates that the immediate should get the value of th...
Definition: X86BaseInfo.h:87
IRTranslator LLVM IR MI
X86_StdCall - stdcall is the calling conventions mostly used by the Win32 API.
Definition: CallingConv.h:87
static MachinePointerInfo getStack(MachineFunction &MF, int64_t Offset, uint8_t ID=0)
Stack pointer relative access.
unsigned getRegSizeInBits(const TargetRegisterClass &RC) const
Return the size in bits of a register from class RC.
Conversion operators.
Definition: ISDOpcodes.h:443
bool isStaticAlloca() const
Return true if this alloca is in the entry block of the function and is a constant size...
int64_t getSExtValue() const
Return the constant as a 64-bit integer value after it has been sign extended as appropriate for the ...
Definition: Constants.h:157
X86AddressMode - This struct holds a generalized full x86 address mode.
unsigned getLocReg() const
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition: ValueTypes.h:126
0 0 1 1 True if ordered and greater than or equal
Definition: InstrTypes.h:858
This represents the llvm.dbg.declare instruction.
bool isDoubleTy() const
Return true if this is &#39;double&#39;, a 64-bit IEEE fp type.
Definition: Type.h:150
Value * getPointerOperand()
Definition: Instructions.h:398
unsigned AllocateStack(unsigned Size, unsigned Align)
AllocateStack - Allocate a chunk of stack space with the specified size and alignment.
Value * getRawDest() const
static const MachineInstrBuilder & addFullAddress(const MachineInstrBuilder &MIB, const X86AddressMode &AM)
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition: ISDOpcodes.h:351
FastISel * createFastISel(FunctionLoweringInfo &funcInfo, const TargetLibraryInfo *libInfo)
bool hasSSE2() const
Definition: X86Subtarget.h:519
iterator_range< arg_iterator > args()
Definition: Function.h:667
0 0 0 0 Always false (always folded)
Definition: InstrTypes.h:855
bool isStructTy() const
True if this is an instance of StructType.
Definition: Type.h:215
MO_DLLIMPORT - On a symbol operand "FOO", this indicates that the reference is actually to the "__imp...
Definition: X86BaseInfo.h:199
static unsigned computeBytesPoppedByCalleeForSRet(const X86Subtarget *Subtarget, CallingConv::ID CC, ImmutableCallSite *CS)
bool isArrayTy() const
True if this is an instance of ArrayType.
Definition: Type.h:218
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:44
const BasicBlock * getParent() const
Definition: Instruction.h:67
static unsigned X86ChooseCmpOpcode(EVT VT, const X86Subtarget *Subtarget)
an instruction to allocate memory on the stack
Definition: Instructions.h:60
gep_type_iterator gep_type_begin(const User *GEP)