LLVM  6.0.0svn
X86FastISel.cpp
Go to the documentation of this file.
1 //===-- X86FastISel.cpp - X86 FastISel implementation ---------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file defines the X86-specific support for the FastISel class. Much
11 // of the target-specific code is generated by tablegen in the file
12 // X86GenFastISel.inc, which is #included here.
13 //
14 //===----------------------------------------------------------------------===//
15 
16 #include "X86.h"
17 #include "X86CallingConv.h"
18 #include "X86InstrBuilder.h"
19 #include "X86InstrInfo.h"
20 #include "X86MachineFunctionInfo.h"
21 #include "X86RegisterInfo.h"
22 #include "X86Subtarget.h"
23 #include "X86TargetMachine.h"
25 #include "llvm/CodeGen/FastISel.h"
30 #include "llvm/IR/CallSite.h"
31 #include "llvm/IR/CallingConv.h"
32 #include "llvm/IR/DebugInfo.h"
33 #include "llvm/IR/DerivedTypes.h"
35 #include "llvm/IR/GlobalAlias.h"
36 #include "llvm/IR/GlobalVariable.h"
37 #include "llvm/IR/Instructions.h"
38 #include "llvm/IR/IntrinsicInst.h"
39 #include "llvm/IR/Operator.h"
40 #include "llvm/MC/MCAsmInfo.h"
41 #include "llvm/MC/MCSymbol.h"
44 using namespace llvm;
45 
46 namespace {
47 
48 class X86FastISel final : public FastISel {
49  /// Subtarget - Keep a pointer to the X86Subtarget around so that we can
50  /// make the right decision when generating code for different targets.
51  const X86Subtarget *Subtarget;
52 
53  /// X86ScalarSSEf32, X86ScalarSSEf64 - Select between SSE or x87
54  /// floating point ops.
55  /// When SSE is available, use it for f32 operations.
56  /// When SSE2 is available, use it for f64 operations.
57  bool X86ScalarSSEf64;
58  bool X86ScalarSSEf32;
59 
60 public:
61  explicit X86FastISel(FunctionLoweringInfo &funcInfo,
62  const TargetLibraryInfo *libInfo)
63  : FastISel(funcInfo, libInfo) {
64  Subtarget = &funcInfo.MF->getSubtarget<X86Subtarget>();
65  X86ScalarSSEf64 = Subtarget->hasSSE2();
66  X86ScalarSSEf32 = Subtarget->hasSSE1();
67  }
68 
69  bool fastSelectInstruction(const Instruction *I) override;
70 
71  /// \brief The specified machine instr operand is a vreg, and that
72  /// vreg is being provided by the specified load instruction. If possible,
73  /// try to fold the load as an operand to the instruction, returning true if
74  /// possible.
75  bool tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo,
76  const LoadInst *LI) override;
77 
78  bool fastLowerArguments() override;
79  bool fastLowerCall(CallLoweringInfo &CLI) override;
80  bool fastLowerIntrinsicCall(const IntrinsicInst *II) override;
81 
82 #include "X86GenFastISel.inc"
83 
84 private:
85  bool X86FastEmitCompare(const Value *LHS, const Value *RHS, EVT VT,
86  const DebugLoc &DL);
87 
88  bool X86FastEmitLoad(EVT VT, X86AddressMode &AM, MachineMemOperand *MMO,
89  unsigned &ResultReg, unsigned Alignment = 1);
90 
91  bool X86FastEmitStore(EVT VT, const Value *Val, X86AddressMode &AM,
92  MachineMemOperand *MMO = nullptr, bool Aligned = false);
93  bool X86FastEmitStore(EVT VT, unsigned ValReg, bool ValIsKill,
94  X86AddressMode &AM,
95  MachineMemOperand *MMO = nullptr, bool Aligned = false);
96 
97  bool X86FastEmitExtend(ISD::NodeType Opc, EVT DstVT, unsigned Src, EVT SrcVT,
98  unsigned &ResultReg);
99 
100  bool X86SelectAddress(const Value *V, X86AddressMode &AM);
101  bool X86SelectCallAddress(const Value *V, X86AddressMode &AM);
102 
103  bool X86SelectLoad(const Instruction *I);
104 
105  bool X86SelectStore(const Instruction *I);
106 
107  bool X86SelectRet(const Instruction *I);
108 
109  bool X86SelectCmp(const Instruction *I);
110 
111  bool X86SelectZExt(const Instruction *I);
112 
113  bool X86SelectSExt(const Instruction *I);
114 
115  bool X86SelectBranch(const Instruction *I);
116 
117  bool X86SelectShift(const Instruction *I);
118 
119  bool X86SelectDivRem(const Instruction *I);
120 
121  bool X86FastEmitCMoveSelect(MVT RetVT, const Instruction *I);
122 
123  bool X86FastEmitSSESelect(MVT RetVT, const Instruction *I);
124 
125  bool X86FastEmitPseudoSelect(MVT RetVT, const Instruction *I);
126 
127  bool X86SelectSelect(const Instruction *I);
128 
129  bool X86SelectTrunc(const Instruction *I);
130 
131  bool X86SelectFPExtOrFPTrunc(const Instruction *I, unsigned Opc,
132  const TargetRegisterClass *RC);
133 
134  bool X86SelectFPExt(const Instruction *I);
135  bool X86SelectFPTrunc(const Instruction *I);
136  bool X86SelectSIToFP(const Instruction *I);
137 
138  const X86InstrInfo *getInstrInfo() const {
139  return Subtarget->getInstrInfo();
140  }
141  const X86TargetMachine *getTargetMachine() const {
142  return static_cast<const X86TargetMachine *>(&TM);
143  }
144 
145  bool handleConstantAddresses(const Value *V, X86AddressMode &AM);
146 
147  unsigned X86MaterializeInt(const ConstantInt *CI, MVT VT);
148  unsigned X86MaterializeFP(const ConstantFP *CFP, MVT VT);
149  unsigned X86MaterializeGV(const GlobalValue *GV, MVT VT);
150  unsigned fastMaterializeConstant(const Constant *C) override;
151 
152  unsigned fastMaterializeAlloca(const AllocaInst *C) override;
153 
154  unsigned fastMaterializeFloatZero(const ConstantFP *CF) override;
155 
156  /// isScalarFPTypeInSSEReg - Return true if the specified scalar FP type is
157  /// computed in an SSE register, not on the X87 floating point stack.
158  bool isScalarFPTypeInSSEReg(EVT VT) const {
159  return (VT == MVT::f64 && X86ScalarSSEf64) || // f64 is when SSE2
160  (VT == MVT::f32 && X86ScalarSSEf32); // f32 is when SSE1
161  }
162 
163  bool isTypeLegal(Type *Ty, MVT &VT, bool AllowI1 = false);
164 
165  bool IsMemcpySmall(uint64_t Len);
166 
167  bool TryEmitSmallMemcpy(X86AddressMode DestAM,
168  X86AddressMode SrcAM, uint64_t Len);
169 
170  bool foldX86XALUIntrinsic(X86::CondCode &CC, const Instruction *I,
171  const Value *Cond);
172 
174  X86AddressMode &AM);
175 
176  unsigned fastEmitInst_rrrr(unsigned MachineInstOpcode,
177  const TargetRegisterClass *RC, unsigned Op0,
178  bool Op0IsKill, unsigned Op1, bool Op1IsKill,
179  unsigned Op2, bool Op2IsKill, unsigned Op3,
180  bool Op3IsKill);
181 };
182 
183 } // end anonymous namespace.
184 
185 static std::pair<unsigned, bool>
187  unsigned CC;
188  bool NeedSwap = false;
189 
190  // SSE Condition code mapping:
191  // 0 - EQ
192  // 1 - LT
193  // 2 - LE
194  // 3 - UNORD
195  // 4 - NEQ
196  // 5 - NLT
197  // 6 - NLE
198  // 7 - ORD
199  switch (Predicate) {
200  default: llvm_unreachable("Unexpected predicate");
201  case CmpInst::FCMP_OEQ: CC = 0; break;
202  case CmpInst::FCMP_OGT: NeedSwap = true; LLVM_FALLTHROUGH;
203  case CmpInst::FCMP_OLT: CC = 1; break;
204  case CmpInst::FCMP_OGE: NeedSwap = true; LLVM_FALLTHROUGH;
205  case CmpInst::FCMP_OLE: CC = 2; break;
206  case CmpInst::FCMP_UNO: CC = 3; break;
207  case CmpInst::FCMP_UNE: CC = 4; break;
208  case CmpInst::FCMP_ULE: NeedSwap = true; LLVM_FALLTHROUGH;
209  case CmpInst::FCMP_UGE: CC = 5; break;
210  case CmpInst::FCMP_ULT: NeedSwap = true; LLVM_FALLTHROUGH;
211  case CmpInst::FCMP_UGT: CC = 6; break;
212  case CmpInst::FCMP_ORD: CC = 7; break;
213  case CmpInst::FCMP_UEQ: CC = 8; break;
214  case CmpInst::FCMP_ONE: CC = 12; break;
215  }
216 
217  return std::make_pair(CC, NeedSwap);
218 }
219 
220 /// \brief Adds a complex addressing mode to the given machine instr builder.
221 /// Note, this will constrain the index register. If its not possible to
222 /// constrain the given index register, then a new one will be created. The
223 /// IndexReg field of the addressing mode will be updated to match in this case.
224 const MachineInstrBuilder &
226  X86AddressMode &AM) {
227  // First constrain the index register. It needs to be a GR64_NOSP.
229  MIB->getNumOperands() +
231  return ::addFullAddress(MIB, AM);
232 }
233 
234 /// \brief Check if it is possible to fold the condition from the XALU intrinsic
235 /// into the user. The condition code will only be updated on success.
236 bool X86FastISel::foldX86XALUIntrinsic(X86::CondCode &CC, const Instruction *I,
237  const Value *Cond) {
238  if (!isa<ExtractValueInst>(Cond))
239  return false;
240 
241  const auto *EV = cast<ExtractValueInst>(Cond);
242  if (!isa<IntrinsicInst>(EV->getAggregateOperand()))
243  return false;
244 
245  const auto *II = cast<IntrinsicInst>(EV->getAggregateOperand());
246  MVT RetVT;
247  const Function *Callee = II->getCalledFunction();
248  Type *RetTy =
249  cast<StructType>(Callee->getReturnType())->getTypeAtIndex(0U);
250  if (!isTypeLegal(RetTy, RetVT))
251  return false;
252 
253  if (RetVT != MVT::i32 && RetVT != MVT::i64)
254  return false;
255 
256  X86::CondCode TmpCC;
257  switch (II->getIntrinsicID()) {
258  default: return false;
259  case Intrinsic::sadd_with_overflow:
260  case Intrinsic::ssub_with_overflow:
261  case Intrinsic::smul_with_overflow:
262  case Intrinsic::umul_with_overflow: TmpCC = X86::COND_O; break;
263  case Intrinsic::uadd_with_overflow:
264  case Intrinsic::usub_with_overflow: TmpCC = X86::COND_B; break;
265  }
266 
267  // Check if both instructions are in the same basic block.
268  if (II->getParent() != I->getParent())
269  return false;
270 
271  // Make sure nothing is in the way
274  for (auto Itr = std::prev(Start); Itr != End; --Itr) {
275  // We only expect extractvalue instructions between the intrinsic and the
276  // instruction to be selected.
277  if (!isa<ExtractValueInst>(Itr))
278  return false;
279 
280  // Check that the extractvalue operand comes from the intrinsic.
281  const auto *EVI = cast<ExtractValueInst>(Itr);
282  if (EVI->getAggregateOperand() != II)
283  return false;
284  }
285 
286  CC = TmpCC;
287  return true;
288 }
289 
290 bool X86FastISel::isTypeLegal(Type *Ty, MVT &VT, bool AllowI1) {
291  EVT evt = TLI.getValueType(DL, Ty, /*HandleUnknown=*/true);
292  if (evt == MVT::Other || !evt.isSimple())
293  // Unhandled type. Halt "fast" selection and bail.
294  return false;
295 
296  VT = evt.getSimpleVT();
297  // For now, require SSE/SSE2 for performing floating-point operations,
298  // since x87 requires additional work.
299  if (VT == MVT::f64 && !X86ScalarSSEf64)
300  return false;
301  if (VT == MVT::f32 && !X86ScalarSSEf32)
302  return false;
303  // Similarly, no f80 support yet.
304  if (VT == MVT::f80)
305  return false;
306  // We only handle legal types. For example, on x86-32 the instruction
307  // selector contains all of the 64-bit instructions from x86-64,
308  // under the assumption that i64 won't be used if the target doesn't
309  // support it.
310  return (AllowI1 && VT == MVT::i1) || TLI.isTypeLegal(VT);
311 }
312 
313 #include "X86GenCallingConv.inc"
314 
315 /// X86FastEmitLoad - Emit a machine instruction to load a value of type VT.
316 /// The address is either pre-computed, i.e. Ptr, or a GlobalAddress, i.e. GV.
317 /// Return true and the result register by reference if it is possible.
318 bool X86FastISel::X86FastEmitLoad(EVT VT, X86AddressMode &AM,
319  MachineMemOperand *MMO, unsigned &ResultReg,
320  unsigned Alignment) {
321  bool HasSSE41 = Subtarget->hasSSE41();
322  bool HasAVX = Subtarget->hasAVX();
323  bool HasAVX2 = Subtarget->hasAVX2();
324  bool HasAVX512 = Subtarget->hasAVX512();
325  bool HasVLX = Subtarget->hasVLX();
326  bool IsNonTemporal = MMO && MMO->isNonTemporal();
327 
328  // Get opcode and regclass of the output for the given load instruction.
329  unsigned Opc = 0;
330  const TargetRegisterClass *RC = nullptr;
331  switch (VT.getSimpleVT().SimpleTy) {
332  default: return false;
333  case MVT::i1:
334  case MVT::i8:
335  Opc = X86::MOV8rm;
336  RC = &X86::GR8RegClass;
337  break;
338  case MVT::i16:
339  Opc = X86::MOV16rm;
340  RC = &X86::GR16RegClass;
341  break;
342  case MVT::i32:
343  Opc = X86::MOV32rm;
344  RC = &X86::GR32RegClass;
345  break;
346  case MVT::i64:
347  // Must be in x86-64 mode.
348  Opc = X86::MOV64rm;
349  RC = &X86::GR64RegClass;
350  break;
351  case MVT::f32:
352  if (X86ScalarSSEf32) {
353  Opc = HasAVX512 ? X86::VMOVSSZrm : HasAVX ? X86::VMOVSSrm : X86::MOVSSrm;
354  RC = HasAVX512 ? &X86::FR32XRegClass : &X86::FR32RegClass;
355  } else {
356  Opc = X86::LD_Fp32m;
357  RC = &X86::RFP32RegClass;
358  }
359  break;
360  case MVT::f64:
361  if (X86ScalarSSEf64) {
362  Opc = HasAVX512 ? X86::VMOVSDZrm : HasAVX ? X86::VMOVSDrm : X86::MOVSDrm;
363  RC = HasAVX512 ? &X86::FR64XRegClass : &X86::FR64RegClass;
364  } else {
365  Opc = X86::LD_Fp64m;
366  RC = &X86::RFP64RegClass;
367  }
368  break;
369  case MVT::f80:
370  // No f80 support yet.
371  return false;
372  case MVT::v4f32:
373  if (IsNonTemporal && Alignment >= 16 && HasSSE41)
374  Opc = HasVLX ? X86::VMOVNTDQAZ128rm :
375  HasAVX ? X86::VMOVNTDQArm : X86::MOVNTDQArm;
376  else if (Alignment >= 16)
377  Opc = HasVLX ? X86::VMOVAPSZ128rm :
378  HasAVX ? X86::VMOVAPSrm : X86::MOVAPSrm;
379  else
380  Opc = HasVLX ? X86::VMOVUPSZ128rm :
381  HasAVX ? X86::VMOVUPSrm : X86::MOVUPSrm;
382  RC = HasVLX ? &X86::VR128XRegClass : &X86::VR128RegClass;
383  break;
384  case MVT::v2f64:
385  if (IsNonTemporal && Alignment >= 16 && HasSSE41)
386  Opc = HasVLX ? X86::VMOVNTDQAZ128rm :
387  HasAVX ? X86::VMOVNTDQArm : X86::MOVNTDQArm;
388  else if (Alignment >= 16)
389  Opc = HasVLX ? X86::VMOVAPDZ128rm :
390  HasAVX ? X86::VMOVAPDrm : X86::MOVAPDrm;
391  else
392  Opc = HasVLX ? X86::VMOVUPDZ128rm :
393  HasAVX ? X86::VMOVUPDrm : X86::MOVUPDrm;
394  RC = HasVLX ? &X86::VR128XRegClass : &X86::VR128RegClass;
395  break;
396  case MVT::v4i32:
397  case MVT::v2i64:
398  case MVT::v8i16:
399  case MVT::v16i8:
400  if (IsNonTemporal && Alignment >= 16)
401  Opc = HasVLX ? X86::VMOVNTDQAZ128rm :
402  HasAVX ? X86::VMOVNTDQArm : X86::MOVNTDQArm;
403  else if (Alignment >= 16)
404  Opc = HasVLX ? X86::VMOVDQA64Z128rm :
405  HasAVX ? X86::VMOVDQArm : X86::MOVDQArm;
406  else
407  Opc = HasVLX ? X86::VMOVDQU64Z128rm :
408  HasAVX ? X86::VMOVDQUrm : X86::MOVDQUrm;
409  RC = HasVLX ? &X86::VR128XRegClass : &X86::VR128RegClass;
410  break;
411  case MVT::v8f32:
412  assert(HasAVX);
413  if (IsNonTemporal && Alignment >= 32 && HasAVX2)
414  Opc = HasVLX ? X86::VMOVNTDQAZ256rm : X86::VMOVNTDQAYrm;
415  else if (IsNonTemporal && Alignment >= 16)
416  return false; // Force split for X86::VMOVNTDQArm
417  else if (Alignment >= 32)
418  Opc = HasVLX ? X86::VMOVAPSZ256rm : X86::VMOVAPSYrm;
419  else
420  Opc = HasVLX ? X86::VMOVUPSZ256rm : X86::VMOVUPSYrm;
421  RC = HasVLX ? &X86::VR256XRegClass : &X86::VR256RegClass;
422  break;
423  case MVT::v4f64:
424  assert(HasAVX);
425  if (IsNonTemporal && Alignment >= 32 && HasAVX2)
426  Opc = HasVLX ? X86::VMOVNTDQAZ256rm : X86::VMOVNTDQAYrm;
427  else if (IsNonTemporal && Alignment >= 16)
428  return false; // Force split for X86::VMOVNTDQArm
429  else if (Alignment >= 32)
430  Opc = HasVLX ? X86::VMOVAPDZ256rm : X86::VMOVAPDYrm;
431  else
432  Opc = HasVLX ? X86::VMOVUPDZ256rm : X86::VMOVUPDYrm;
433  RC = HasVLX ? &X86::VR256XRegClass : &X86::VR256RegClass;
434  break;
435  case MVT::v8i32:
436  case MVT::v4i64:
437  case MVT::v16i16:
438  case MVT::v32i8:
439  assert(HasAVX);
440  if (IsNonTemporal && Alignment >= 32 && HasAVX2)
441  Opc = HasVLX ? X86::VMOVNTDQAZ256rm : X86::VMOVNTDQAYrm;
442  else if (IsNonTemporal && Alignment >= 16)
443  return false; // Force split for X86::VMOVNTDQArm
444  else if (Alignment >= 32)
445  Opc = HasVLX ? X86::VMOVDQA64Z256rm : X86::VMOVDQAYrm;
446  else
447  Opc = HasVLX ? X86::VMOVDQU64Z256rm : X86::VMOVDQUYrm;
448  RC = HasVLX ? &X86::VR256XRegClass : &X86::VR256RegClass;
449  break;
450  case MVT::v16f32:
451  assert(HasAVX512);
452  if (IsNonTemporal && Alignment >= 64)
453  Opc = X86::VMOVNTDQAZrm;
454  else
455  Opc = (Alignment >= 64) ? X86::VMOVAPSZrm : X86::VMOVUPSZrm;
456  RC = &X86::VR512RegClass;
457  break;
458  case MVT::v8f64:
459  assert(HasAVX512);
460  if (IsNonTemporal && Alignment >= 64)
461  Opc = X86::VMOVNTDQAZrm;
462  else
463  Opc = (Alignment >= 64) ? X86::VMOVAPDZrm : X86::VMOVUPDZrm;
464  RC = &X86::VR512RegClass;
465  break;
466  case MVT::v8i64:
467  case MVT::v16i32:
468  case MVT::v32i16:
469  case MVT::v64i8:
470  assert(HasAVX512);
471  // Note: There are a lot more choices based on type with AVX-512, but
472  // there's really no advantage when the load isn't masked.
473  if (IsNonTemporal && Alignment >= 64)
474  Opc = X86::VMOVNTDQAZrm;
475  else
476  Opc = (Alignment >= 64) ? X86::VMOVDQA64Zrm : X86::VMOVDQU64Zrm;
477  RC = &X86::VR512RegClass;
478  break;
479  }
480 
481  ResultReg = createResultReg(RC);
482  MachineInstrBuilder MIB =
483  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg);
484  addFullAddress(MIB, AM);
485  if (MMO)
486  MIB->addMemOperand(*FuncInfo.MF, MMO);
487  return true;
488 }
489 
490 /// X86FastEmitStore - Emit a machine instruction to store a value Val of
491 /// type VT. The address is either pre-computed, consisted of a base ptr, Ptr
492 /// and a displacement offset, or a GlobalAddress,
493 /// i.e. V. Return true if it is possible.
494 bool X86FastISel::X86FastEmitStore(EVT VT, unsigned ValReg, bool ValIsKill,
495  X86AddressMode &AM,
496  MachineMemOperand *MMO, bool Aligned) {
497  bool HasSSE1 = Subtarget->hasSSE1();
498  bool HasSSE2 = Subtarget->hasSSE2();
499  bool HasSSE4A = Subtarget->hasSSE4A();
500  bool HasAVX = Subtarget->hasAVX();
501  bool HasAVX512 = Subtarget->hasAVX512();
502  bool HasVLX = Subtarget->hasVLX();
503  bool IsNonTemporal = MMO && MMO->isNonTemporal();
504 
505  // Get opcode and regclass of the output for the given store instruction.
506  unsigned Opc = 0;
507  switch (VT.getSimpleVT().SimpleTy) {
508  case MVT::f80: // No f80 support yet.
509  default: return false;
510  case MVT::i1: {
511  // Mask out all but lowest bit.
512  unsigned AndResult = createResultReg(&X86::GR8RegClass);
513  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
514  TII.get(X86::AND8ri), AndResult)
515  .addReg(ValReg, getKillRegState(ValIsKill)).addImm(1);
516  ValReg = AndResult;
517  LLVM_FALLTHROUGH; // handle i1 as i8.
518  }
519  case MVT::i8: Opc = X86::MOV8mr; break;
520  case MVT::i16: Opc = X86::MOV16mr; break;
521  case MVT::i32:
522  Opc = (IsNonTemporal && HasSSE2) ? X86::MOVNTImr : X86::MOV32mr;
523  break;
524  case MVT::i64:
525  // Must be in x86-64 mode.
526  Opc = (IsNonTemporal && HasSSE2) ? X86::MOVNTI_64mr : X86::MOV64mr;
527  break;
528  case MVT::f32:
529  if (X86ScalarSSEf32) {
530  if (IsNonTemporal && HasSSE4A)
531  Opc = X86::MOVNTSS;
532  else
533  Opc = HasAVX512 ? X86::VMOVSSZmr :
534  HasAVX ? X86::VMOVSSmr : X86::MOVSSmr;
535  } else
536  Opc = X86::ST_Fp32m;
537  break;
538  case MVT::f64:
539  if (X86ScalarSSEf32) {
540  if (IsNonTemporal && HasSSE4A)
541  Opc = X86::MOVNTSD;
542  else
543  Opc = HasAVX512 ? X86::VMOVSDZmr :
544  HasAVX ? X86::VMOVSDmr : X86::MOVSDmr;
545  } else
546  Opc = X86::ST_Fp64m;
547  break;
548  case MVT::x86mmx:
549  Opc = (IsNonTemporal && HasSSE1) ? X86::MMX_MOVNTQmr : X86::MMX_MOVQ64mr;
550  break;
551  case MVT::v4f32:
552  if (Aligned) {
553  if (IsNonTemporal)
554  Opc = HasVLX ? X86::VMOVNTPSZ128mr :
555  HasAVX ? X86::VMOVNTPSmr : X86::MOVNTPSmr;
556  else
557  Opc = HasVLX ? X86::VMOVAPSZ128mr :
558  HasAVX ? X86::VMOVAPSmr : X86::MOVAPSmr;
559  } else
560  Opc = HasVLX ? X86::VMOVUPSZ128mr :
561  HasAVX ? X86::VMOVUPSmr : X86::MOVUPSmr;
562  break;
563  case MVT::v2f64:
564  if (Aligned) {
565  if (IsNonTemporal)
566  Opc = HasVLX ? X86::VMOVNTPDZ128mr :
567  HasAVX ? X86::VMOVNTPDmr : X86::MOVNTPDmr;
568  else
569  Opc = HasVLX ? X86::VMOVAPDZ128mr :
570  HasAVX ? X86::VMOVAPDmr : X86::MOVAPDmr;
571  } else
572  Opc = HasVLX ? X86::VMOVUPDZ128mr :
573  HasAVX ? X86::VMOVUPDmr : X86::MOVUPDmr;
574  break;
575  case MVT::v4i32:
576  case MVT::v2i64:
577  case MVT::v8i16:
578  case MVT::v16i8:
579  if (Aligned) {
580  if (IsNonTemporal)
581  Opc = HasVLX ? X86::VMOVNTDQZ128mr :
582  HasAVX ? X86::VMOVNTDQmr : X86::MOVNTDQmr;
583  else
584  Opc = HasVLX ? X86::VMOVDQA64Z128mr :
585  HasAVX ? X86::VMOVDQAmr : X86::MOVDQAmr;
586  } else
587  Opc = HasVLX ? X86::VMOVDQU64Z128mr :
588  HasAVX ? X86::VMOVDQUmr : X86::MOVDQUmr;
589  break;
590  case MVT::v8f32:
591  assert(HasAVX);
592  if (Aligned) {
593  if (IsNonTemporal)
594  Opc = HasVLX ? X86::VMOVNTPSZ256mr : X86::VMOVNTPSYmr;
595  else
596  Opc = HasVLX ? X86::VMOVAPSZ256mr : X86::VMOVAPSYmr;
597  } else
598  Opc = HasVLX ? X86::VMOVUPSZ256mr : X86::VMOVUPSYmr;
599  break;
600  case MVT::v4f64:
601  assert(HasAVX);
602  if (Aligned) {
603  if (IsNonTemporal)
604  Opc = HasVLX ? X86::VMOVNTPDZ256mr : X86::VMOVNTPDYmr;
605  else
606  Opc = HasVLX ? X86::VMOVAPDZ256mr : X86::VMOVAPDYmr;
607  } else
608  Opc = HasVLX ? X86::VMOVUPDZ256mr : X86::VMOVUPDYmr;
609  break;
610  case MVT::v8i32:
611  case MVT::v4i64:
612  case MVT::v16i16:
613  case MVT::v32i8:
614  assert(HasAVX);
615  if (Aligned) {
616  if (IsNonTemporal)
617  Opc = HasVLX ? X86::VMOVNTDQZ256mr : X86::VMOVNTDQYmr;
618  else
619  Opc = HasVLX ? X86::VMOVDQA64Z256mr : X86::VMOVDQAYmr;
620  } else
621  Opc = HasVLX ? X86::VMOVDQU64Z256mr : X86::VMOVDQUYmr;
622  break;
623  case MVT::v16f32:
624  assert(HasAVX512);
625  if (Aligned)
626  Opc = IsNonTemporal ? X86::VMOVNTPSZmr : X86::VMOVAPSZmr;
627  else
628  Opc = X86::VMOVUPSZmr;
629  break;
630  case MVT::v8f64:
631  assert(HasAVX512);
632  if (Aligned) {
633  Opc = IsNonTemporal ? X86::VMOVNTPDZmr : X86::VMOVAPDZmr;
634  } else
635  Opc = X86::VMOVUPDZmr;
636  break;
637  case MVT::v8i64:
638  case MVT::v16i32:
639  case MVT::v32i16:
640  case MVT::v64i8:
641  assert(HasAVX512);
642  // Note: There are a lot more choices based on type with AVX-512, but
643  // there's really no advantage when the store isn't masked.
644  if (Aligned)
645  Opc = IsNonTemporal ? X86::VMOVNTDQZmr : X86::VMOVDQA64Zmr;
646  else
647  Opc = X86::VMOVDQU64Zmr;
648  break;
649  }
650 
651  const MCInstrDesc &Desc = TII.get(Opc);
652  // Some of the instructions in the previous switch use FR128 instead
653  // of FR32 for ValReg. Make sure the register we feed the instruction
654  // matches its register class constraints.
655  // Note: This is fine to do a copy from FR32 to FR128, this is the
656  // same registers behind the scene and actually why it did not trigger
657  // any bugs before.
658  ValReg = constrainOperandRegClass(Desc, ValReg, Desc.getNumOperands() - 1);
659  MachineInstrBuilder MIB =
660  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, Desc);
661  addFullAddress(MIB, AM).addReg(ValReg, getKillRegState(ValIsKill));
662  if (MMO)
663  MIB->addMemOperand(*FuncInfo.MF, MMO);
664 
665  return true;
666 }
667 
668 bool X86FastISel::X86FastEmitStore(EVT VT, const Value *Val,
669  X86AddressMode &AM,
670  MachineMemOperand *MMO, bool Aligned) {
671  // Handle 'null' like i32/i64 0.
672  if (isa<ConstantPointerNull>(Val))
673  Val = Constant::getNullValue(DL.getIntPtrType(Val->getContext()));
674 
675  // If this is a store of a simple constant, fold the constant into the store.
676  if (const ConstantInt *CI = dyn_cast<ConstantInt>(Val)) {
677  unsigned Opc = 0;
678  bool Signed = true;
679  switch (VT.getSimpleVT().SimpleTy) {
680  default: break;
681  case MVT::i1:
682  Signed = false;
683  LLVM_FALLTHROUGH; // Handle as i8.
684  case MVT::i8: Opc = X86::MOV8mi; break;
685  case MVT::i16: Opc = X86::MOV16mi; break;
686  case MVT::i32: Opc = X86::MOV32mi; break;
687  case MVT::i64:
688  // Must be a 32-bit sign extended value.
689  if (isInt<32>(CI->getSExtValue()))
690  Opc = X86::MOV64mi32;
691  break;
692  }
693 
694  if (Opc) {
695  MachineInstrBuilder MIB =
696  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc));
697  addFullAddress(MIB, AM).addImm(Signed ? (uint64_t) CI->getSExtValue()
698  : CI->getZExtValue());
699  if (MMO)
700  MIB->addMemOperand(*FuncInfo.MF, MMO);
701  return true;
702  }
703  }
704 
705  unsigned ValReg = getRegForValue(Val);
706  if (ValReg == 0)
707  return false;
708 
709  bool ValKill = hasTrivialKill(Val);
710  return X86FastEmitStore(VT, ValReg, ValKill, AM, MMO, Aligned);
711 }
712 
713 /// X86FastEmitExtend - Emit a machine instruction to extend a value Src of
714 /// type SrcVT to type DstVT using the specified extension opcode Opc (e.g.
715 /// ISD::SIGN_EXTEND).
716 bool X86FastISel::X86FastEmitExtend(ISD::NodeType Opc, EVT DstVT,
717  unsigned Src, EVT SrcVT,
718  unsigned &ResultReg) {
719  unsigned RR = fastEmit_r(SrcVT.getSimpleVT(), DstVT.getSimpleVT(), Opc,
720  Src, /*TODO: Kill=*/false);
721  if (RR == 0)
722  return false;
723 
724  ResultReg = RR;
725  return true;
726 }
727 
728 bool X86FastISel::handleConstantAddresses(const Value *V, X86AddressMode &AM) {
729  // Handle constant address.
730  if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
731  // Can't handle alternate code models yet.
732  if (TM.getCodeModel() != CodeModel::Small)
733  return false;
734 
735  // Can't handle TLS yet.
736  if (GV->isThreadLocal())
737  return false;
738 
739  // RIP-relative addresses can't have additional register operands, so if
740  // we've already folded stuff into the addressing mode, just force the
741  // global value into its own register, which we can use as the basereg.
742  if (!Subtarget->isPICStyleRIPRel() ||
743  (AM.Base.Reg == 0 && AM.IndexReg == 0)) {
744  // Okay, we've committed to selecting this global. Set up the address.
745  AM.GV = GV;
746 
747  // Allow the subtarget to classify the global.
748  unsigned char GVFlags = Subtarget->classifyGlobalReference(GV);
749 
750  // If this reference is relative to the pic base, set it now.
751  if (isGlobalRelativeToPICBase(GVFlags)) {
752  // FIXME: How do we know Base.Reg is free??
753  AM.Base.Reg = getInstrInfo()->getGlobalBaseReg(FuncInfo.MF);
754  }
755 
756  // Unless the ABI requires an extra load, return a direct reference to
757  // the global.
758  if (!isGlobalStubReference(GVFlags)) {
759  if (Subtarget->isPICStyleRIPRel()) {
760  // Use rip-relative addressing if we can. Above we verified that the
761  // base and index registers are unused.
762  assert(AM.Base.Reg == 0 && AM.IndexReg == 0);
763  AM.Base.Reg = X86::RIP;
764  }
765  AM.GVOpFlags = GVFlags;
766  return true;
767  }
768 
769  // Ok, we need to do a load from a stub. If we've already loaded from
770  // this stub, reuse the loaded pointer, otherwise emit the load now.
771  DenseMap<const Value *, unsigned>::iterator I = LocalValueMap.find(V);
772  unsigned LoadReg;
773  if (I != LocalValueMap.end() && I->second != 0) {
774  LoadReg = I->second;
775  } else {
776  // Issue load from stub.
777  unsigned Opc = 0;
778  const TargetRegisterClass *RC = nullptr;
779  X86AddressMode StubAM;
780  StubAM.Base.Reg = AM.Base.Reg;
781  StubAM.GV = GV;
782  StubAM.GVOpFlags = GVFlags;
783 
784  // Prepare for inserting code in the local-value area.
785  SavePoint SaveInsertPt = enterLocalValueArea();
786 
787  if (TLI.getPointerTy(DL) == MVT::i64) {
788  Opc = X86::MOV64rm;
789  RC = &X86::GR64RegClass;
790 
791  if (Subtarget->isPICStyleRIPRel())
792  StubAM.Base.Reg = X86::RIP;
793  } else {
794  Opc = X86::MOV32rm;
795  RC = &X86::GR32RegClass;
796  }
797 
798  LoadReg = createResultReg(RC);
799  MachineInstrBuilder LoadMI =
800  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), LoadReg);
801  addFullAddress(LoadMI, StubAM);
802 
803  // Ok, back to normal mode.
804  leaveLocalValueArea(SaveInsertPt);
805 
806  // Prevent loading GV stub multiple times in same MBB.
807  LocalValueMap[V] = LoadReg;
808  }
809 
810  // Now construct the final address. Note that the Disp, Scale,
811  // and Index values may already be set here.
812  AM.Base.Reg = LoadReg;
813  AM.GV = nullptr;
814  return true;
815  }
816  }
817 
818  // If all else fails, try to materialize the value in a register.
819  if (!AM.GV || !Subtarget->isPICStyleRIPRel()) {
820  if (AM.Base.Reg == 0) {
821  AM.Base.Reg = getRegForValue(V);
822  return AM.Base.Reg != 0;
823  }
824  if (AM.IndexReg == 0) {
825  assert(AM.Scale == 1 && "Scale with no index!");
826  AM.IndexReg = getRegForValue(V);
827  return AM.IndexReg != 0;
828  }
829  }
830 
831  return false;
832 }
833 
834 /// X86SelectAddress - Attempt to fill in an address from the given value.
835 ///
838 redo_gep:
839  const User *U = nullptr;
840  unsigned Opcode = Instruction::UserOp1;
841  if (const Instruction *I = dyn_cast<Instruction>(V)) {
842  // Don't walk into other basic blocks; it's possible we haven't
843  // visited them yet, so the instructions may not yet be assigned
844  // virtual registers.
845  if (FuncInfo.StaticAllocaMap.count(static_cast<const AllocaInst *>(V)) ||
846  FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
847  Opcode = I->getOpcode();
848  U = I;
849  }
850  } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(V)) {
851  Opcode = C->getOpcode();
852  U = C;
853  }
854 
855  if (PointerType *Ty = dyn_cast<PointerType>(V->getType()))
856  if (Ty->getAddressSpace() > 255)
857  // Fast instruction selection doesn't support the special
858  // address spaces.
859  return false;
860 
861  switch (Opcode) {
862  default: break;
863  case Instruction::BitCast:
864  // Look past bitcasts.
865  return X86SelectAddress(U->getOperand(0), AM);
866 
867  case Instruction::IntToPtr:
868  // Look past no-op inttoptrs.
869  if (TLI.getValueType(DL, U->getOperand(0)->getType()) ==
870  TLI.getPointerTy(DL))
871  return X86SelectAddress(U->getOperand(0), AM);
872  break;
873 
874  case Instruction::PtrToInt:
875  // Look past no-op ptrtoints.
876  if (TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL))
877  return X86SelectAddress(U->getOperand(0), AM);
878  break;
879 
880  case Instruction::Alloca: {
881  // Do static allocas.
882  const AllocaInst *A = cast<AllocaInst>(V);
884  FuncInfo.StaticAllocaMap.find(A);
885  if (SI != FuncInfo.StaticAllocaMap.end()) {
887  AM.Base.FrameIndex = SI->second;
888  return true;
889  }
890  break;
891  }
892 
893  case Instruction::Add: {
894  // Adds of constants are common and easy enough.
895  if (const ConstantInt *CI = dyn_cast<ConstantInt>(U->getOperand(1))) {
896  uint64_t Disp = (int32_t)AM.Disp + (uint64_t)CI->getSExtValue();
897  // They have to fit in the 32-bit signed displacement field though.
898  if (isInt<32>(Disp)) {
899  AM.Disp = (uint32_t)Disp;
900  return X86SelectAddress(U->getOperand(0), AM);
901  }
902  }
903  break;
904  }
905 
906  case Instruction::GetElementPtr: {
907  X86AddressMode SavedAM = AM;
908 
909  // Pattern-match simple GEPs.
910  uint64_t Disp = (int32_t)AM.Disp;
911  unsigned IndexReg = AM.IndexReg;
912  unsigned Scale = AM.Scale;
914  // Iterate through the indices, folding what we can. Constants can be
915  // folded, and one dynamic index can be handled, if the scale is supported.
916  for (User::const_op_iterator i = U->op_begin() + 1, e = U->op_end();
917  i != e; ++i, ++GTI) {
918  const Value *Op = *i;
919  if (StructType *STy = GTI.getStructTypeOrNull()) {
920  const StructLayout *SL = DL.getStructLayout(STy);
921  Disp += SL->getElementOffset(cast<ConstantInt>(Op)->getZExtValue());
922  continue;
923  }
924 
925  // A array/variable index is always of the form i*S where S is the
926  // constant scale size. See if we can push the scale into immediates.
927  uint64_t S = DL.getTypeAllocSize(GTI.getIndexedType());
928  for (;;) {
929  if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
930  // Constant-offset addressing.
931  Disp += CI->getSExtValue() * S;
932  break;
933  }
934  if (canFoldAddIntoGEP(U, Op)) {
935  // A compatible add with a constant operand. Fold the constant.
936  ConstantInt *CI =
937  cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1));
938  Disp += CI->getSExtValue() * S;
939  // Iterate on the other operand.
940  Op = cast<AddOperator>(Op)->getOperand(0);
941  continue;
942  }
943  if (IndexReg == 0 &&
944  (!AM.GV || !Subtarget->isPICStyleRIPRel()) &&
945  (S == 1 || S == 2 || S == 4 || S == 8)) {
946  // Scaled-index addressing.
947  Scale = S;
948  IndexReg = getRegForGEPIndex(Op).first;
949  if (IndexReg == 0)
950  return false;
951  break;
952  }
953  // Unsupported.
954  goto unsupported_gep;
955  }
956  }
957 
958  // Check for displacement overflow.
959  if (!isInt<32>(Disp))
960  break;
961 
962  AM.IndexReg = IndexReg;
963  AM.Scale = Scale;
964  AM.Disp = (uint32_t)Disp;
965  GEPs.push_back(V);
966 
967  if (const GetElementPtrInst *GEP =
968  dyn_cast<GetElementPtrInst>(U->getOperand(0))) {
969  // Ok, the GEP indices were covered by constant-offset and scaled-index
970  // addressing. Update the address state and move on to examining the base.
971  V = GEP;
972  goto redo_gep;
973  } else if (X86SelectAddress(U->getOperand(0), AM)) {
974  return true;
975  }
976 
977  // If we couldn't merge the gep value into this addr mode, revert back to
978  // our address and just match the value instead of completely failing.
979  AM = SavedAM;
980 
981  for (const Value *I : reverse(GEPs))
982  if (handleConstantAddresses(I, AM))
983  return true;
984 
985  return false;
986  unsupported_gep:
987  // Ok, the GEP indices weren't all covered.
988  break;
989  }
990  }
991 
992  return handleConstantAddresses(V, AM);
993 }
994 
995 /// X86SelectCallAddress - Attempt to fill in an address from the given value.
996 ///
997 bool X86FastISel::X86SelectCallAddress(const Value *V, X86AddressMode &AM) {
998  const User *U = nullptr;
999  unsigned Opcode = Instruction::UserOp1;
1000  const Instruction *I = dyn_cast<Instruction>(V);
1001  // Record if the value is defined in the same basic block.
1002  //
1003  // This information is crucial to know whether or not folding an
1004  // operand is valid.
1005  // Indeed, FastISel generates or reuses a virtual register for all
1006  // operands of all instructions it selects. Obviously, the definition and
1007  // its uses must use the same virtual register otherwise the produced
1008  // code is incorrect.
1009  // Before instruction selection, FunctionLoweringInfo::set sets the virtual
1010  // registers for values that are alive across basic blocks. This ensures
1011  // that the values are consistently set between across basic block, even
1012  // if different instruction selection mechanisms are used (e.g., a mix of
1013  // SDISel and FastISel).
1014  // For values local to a basic block, the instruction selection process
1015  // generates these virtual registers with whatever method is appropriate
1016  // for its needs. In particular, FastISel and SDISel do not share the way
1017  // local virtual registers are set.
1018  // Therefore, this is impossible (or at least unsafe) to share values
1019  // between basic blocks unless they use the same instruction selection
1020  // method, which is not guarantee for X86.
1021  // Moreover, things like hasOneUse could not be used accurately, if we
1022  // allow to reference values across basic blocks whereas they are not
1023  // alive across basic blocks initially.
1024  bool InMBB = true;
1025  if (I) {
1026  Opcode = I->getOpcode();
1027  U = I;
1028  InMBB = I->getParent() == FuncInfo.MBB->getBasicBlock();
1029  } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(V)) {
1030  Opcode = C->getOpcode();
1031  U = C;
1032  }
1033 
1034  switch (Opcode) {
1035  default: break;
1036  case Instruction::BitCast:
1037  // Look past bitcasts if its operand is in the same BB.
1038  if (InMBB)
1039  return X86SelectCallAddress(U->getOperand(0), AM);
1040  break;
1041 
1042  case Instruction::IntToPtr:
1043  // Look past no-op inttoptrs if its operand is in the same BB.
1044  if (InMBB &&
1045  TLI.getValueType(DL, U->getOperand(0)->getType()) ==
1046  TLI.getPointerTy(DL))
1047  return X86SelectCallAddress(U->getOperand(0), AM);
1048  break;
1049 
1050  case Instruction::PtrToInt:
1051  // Look past no-op ptrtoints if its operand is in the same BB.
1052  if (InMBB && TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL))
1053  return X86SelectCallAddress(U->getOperand(0), AM);
1054  break;
1055  }
1056 
1057  // Handle constant address.
1058  if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
1059  // Can't handle alternate code models yet.
1060  if (TM.getCodeModel() != CodeModel::Small)
1061  return false;
1062 
1063  // RIP-relative addresses can't have additional register operands.
1064  if (Subtarget->isPICStyleRIPRel() &&
1065  (AM.Base.Reg != 0 || AM.IndexReg != 0))
1066  return false;
1067 
1068  // Can't handle TLS.
1069  if (const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV))
1070  if (GVar->isThreadLocal())
1071  return false;
1072 
1073  // Okay, we've committed to selecting this global. Set up the basic address.
1074  AM.GV = GV;
1075 
1076  // Return a direct reference to the global. Fastisel can handle calls to
1077  // functions that require loads, such as dllimport and nonlazybind
1078  // functions.
1079  if (Subtarget->isPICStyleRIPRel()) {
1080  // Use rip-relative addressing if we can. Above we verified that the
1081  // base and index registers are unused.
1082  assert(AM.Base.Reg == 0 && AM.IndexReg == 0);
1083  AM.Base.Reg = X86::RIP;
1084  } else {
1085  AM.GVOpFlags = Subtarget->classifyLocalReference(nullptr);
1086  }
1087 
1088  return true;
1089  }
1090 
1091  // If all else fails, try to materialize the value in a register.
1092  if (!AM.GV || !Subtarget->isPICStyleRIPRel()) {
1093  if (AM.Base.Reg == 0) {
1094  AM.Base.Reg = getRegForValue(V);
1095  return AM.Base.Reg != 0;
1096  }
1097  if (AM.IndexReg == 0) {
1098  assert(AM.Scale == 1 && "Scale with no index!");
1099  AM.IndexReg = getRegForValue(V);
1100  return AM.IndexReg != 0;
1101  }
1102  }
1103 
1104  return false;
1105 }
1106 
1107 
1108 /// X86SelectStore - Select and emit code to implement store instructions.
1109 bool X86FastISel::X86SelectStore(const Instruction *I) {
1110  // Atomic stores need special handling.
1111  const StoreInst *S = cast<StoreInst>(I);
1112 
1113  if (S->isAtomic())
1114  return false;
1115 
1116  const Value *PtrV = I->getOperand(1);
1117  if (TLI.supportSwiftError()) {
1118  // Swifterror values can come from either a function parameter with
1119  // swifterror attribute or an alloca with swifterror attribute.
1120  if (const Argument *Arg = dyn_cast<Argument>(PtrV)) {
1121  if (Arg->hasSwiftErrorAttr())
1122  return false;
1123  }
1124 
1125  if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(PtrV)) {
1126  if (Alloca->isSwiftError())
1127  return false;
1128  }
1129  }
1130 
1131  const Value *Val = S->getValueOperand();
1132  const Value *Ptr = S->getPointerOperand();
1133 
1134  MVT VT;
1135  if (!isTypeLegal(Val->getType(), VT, /*AllowI1=*/true))
1136  return false;
1137 
1138  unsigned Alignment = S->getAlignment();
1139  unsigned ABIAlignment = DL.getABITypeAlignment(Val->getType());
1140  if (Alignment == 0) // Ensure that codegen never sees alignment 0
1141  Alignment = ABIAlignment;
1142  bool Aligned = Alignment >= ABIAlignment;
1143 
1144  X86AddressMode AM;
1145  if (!X86SelectAddress(Ptr, AM))
1146  return false;
1147 
1148  return X86FastEmitStore(VT, Val, AM, createMachineMemOperandFor(I), Aligned);
1149 }
1150 
1151 /// X86SelectRet - Select and emit code to implement ret instructions.
1152 bool X86FastISel::X86SelectRet(const Instruction *I) {
1153  const ReturnInst *Ret = cast<ReturnInst>(I);
1154  const Function &F = *I->getParent()->getParent();
1155  const X86MachineFunctionInfo *X86MFInfo =
1156  FuncInfo.MF->getInfo<X86MachineFunctionInfo>();
1157 
1158  if (!FuncInfo.CanLowerReturn)
1159  return false;
1160 
1161  if (TLI.supportSwiftError() &&
1162  F.getAttributes().hasAttrSomewhere(Attribute::SwiftError))
1163  return false;
1164 
1165  if (TLI.supportSplitCSR(FuncInfo.MF))
1166  return false;
1167 
1168  CallingConv::ID CC = F.getCallingConv();
1169  if (CC != CallingConv::C &&
1170  CC != CallingConv::Fast &&
1171  CC != CallingConv::X86_FastCall &&
1172  CC != CallingConv::X86_StdCall &&
1173  CC != CallingConv::X86_ThisCall &&
1174  CC != CallingConv::X86_64_SysV &&
1175  CC != CallingConv::Win64)
1176  return false;
1177 
1178  // Don't handle popping bytes if they don't fit the ret's immediate.
1179  if (!isUInt<16>(X86MFInfo->getBytesToPopOnReturn()))
1180  return false;
1181 
1182  // fastcc with -tailcallopt is intended to provide a guaranteed
1183  // tail call optimization. Fastisel doesn't know how to do that.
1184  if (CC == CallingConv::Fast && TM.Options.GuaranteedTailCallOpt)
1185  return false;
1186 
1187  // Let SDISel handle vararg functions.
1188  if (F.isVarArg())
1189  return false;
1190 
1191  // Build a list of return value registers.
1192  SmallVector<unsigned, 4> RetRegs;
1193 
1194  if (Ret->getNumOperands() > 0) {
1196  GetReturnInfo(F.getReturnType(), F.getAttributes(), Outs, TLI, DL);
1197 
1198  // Analyze operands of the call, assigning locations to each operand.
1200  CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, ValLocs, I->getContext());
1201  CCInfo.AnalyzeReturn(Outs, RetCC_X86);
1202 
1203  const Value *RV = Ret->getOperand(0);
1204  unsigned Reg = getRegForValue(RV);
1205  if (Reg == 0)
1206  return false;
1207 
1208  // Only handle a single return value for now.
1209  if (ValLocs.size() != 1)
1210  return false;
1211 
1212  CCValAssign &VA = ValLocs[0];
1213 
1214  // Don't bother handling odd stuff for now.
1215  if (VA.getLocInfo() != CCValAssign::Full)
1216  return false;
1217  // Only handle register returns for now.
1218  if (!VA.isRegLoc())
1219  return false;
1220 
1221  // The calling-convention tables for x87 returns don't tell
1222  // the whole story.
1223  if (VA.getLocReg() == X86::FP0 || VA.getLocReg() == X86::FP1)
1224  return false;
1225 
1226  unsigned SrcReg = Reg + VA.getValNo();
1227  EVT SrcVT = TLI.getValueType(DL, RV->getType());
1228  EVT DstVT = VA.getValVT();
1229  // Special handling for extended integers.
1230  if (SrcVT != DstVT) {
1231  if (SrcVT != MVT::i1 && SrcVT != MVT::i8 && SrcVT != MVT::i16)
1232  return false;
1233 
1234  if (!Outs[0].Flags.isZExt() && !Outs[0].Flags.isSExt())
1235  return false;
1236 
1237  assert(DstVT == MVT::i32 && "X86 should always ext to i32");
1238 
1239  if (SrcVT == MVT::i1) {
1240  if (Outs[0].Flags.isSExt())
1241  return false;
1242  SrcReg = fastEmitZExtFromI1(MVT::i8, SrcReg, /*TODO: Kill=*/false);
1243  SrcVT = MVT::i8;
1244  }
1245  unsigned Op = Outs[0].Flags.isZExt() ? ISD::ZERO_EXTEND :
1247  SrcReg = fastEmit_r(SrcVT.getSimpleVT(), DstVT.getSimpleVT(), Op,
1248  SrcReg, /*TODO: Kill=*/false);
1249  }
1250 
1251  // Make the copy.
1252  unsigned DstReg = VA.getLocReg();
1253  const TargetRegisterClass *SrcRC = MRI.getRegClass(SrcReg);
1254  // Avoid a cross-class copy. This is very unlikely.
1255  if (!SrcRC->contains(DstReg))
1256  return false;
1257  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1258  TII.get(TargetOpcode::COPY), DstReg).addReg(SrcReg);
1259 
1260  // Add register to return instruction.
1261  RetRegs.push_back(VA.getLocReg());
1262  }
1263 
1264  // Swift calling convention does not require we copy the sret argument
1265  // into %rax/%eax for the return, and SRetReturnReg is not set for Swift.
1266 
1267  // All x86 ABIs require that for returning structs by value we copy
1268  // the sret argument into %rax/%eax (depending on ABI) for the return.
1269  // We saved the argument into a virtual register in the entry block,
1270  // so now we copy the value out and into %rax/%eax.
1271  if (F.hasStructRetAttr() && CC != CallingConv::Swift) {
1272  unsigned Reg = X86MFInfo->getSRetReturnReg();
1273  assert(Reg &&
1274  "SRetReturnReg should have been set in LowerFormalArguments()!");
1275  unsigned RetReg = Subtarget->is64Bit() ? X86::RAX : X86::EAX;
1276  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1277  TII.get(TargetOpcode::COPY), RetReg).addReg(Reg);
1278  RetRegs.push_back(RetReg);
1279  }
1280 
1281  // Now emit the RET.
1282  MachineInstrBuilder MIB;
1283  if (X86MFInfo->getBytesToPopOnReturn()) {
1284  MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1285  TII.get(Subtarget->is64Bit() ? X86::RETIQ : X86::RETIL))
1286  .addImm(X86MFInfo->getBytesToPopOnReturn());
1287  } else {
1288  MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1289  TII.get(Subtarget->is64Bit() ? X86::RETQ : X86::RETL));
1290  }
1291  for (unsigned i = 0, e = RetRegs.size(); i != e; ++i)
1292  MIB.addReg(RetRegs[i], RegState::Implicit);
1293  return true;
1294 }
1295 
1296 /// X86SelectLoad - Select and emit code to implement load instructions.
1297 ///
1298 bool X86FastISel::X86SelectLoad(const Instruction *I) {
1299  const LoadInst *LI = cast<LoadInst>(I);
1300 
1301  // Atomic loads need special handling.
1302  if (LI->isAtomic())
1303  return false;
1304 
1305  const Value *SV = I->getOperand(0);
1306  if (TLI.supportSwiftError()) {
1307  // Swifterror values can come from either a function parameter with
1308  // swifterror attribute or an alloca with swifterror attribute.
1309  if (const Argument *Arg = dyn_cast<Argument>(SV)) {
1310  if (Arg->hasSwiftErrorAttr())
1311  return false;
1312  }
1313 
1314  if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(SV)) {
1315  if (Alloca->isSwiftError())
1316  return false;
1317  }
1318  }
1319 
1320  MVT VT;
1321  if (!isTypeLegal(LI->getType(), VT, /*AllowI1=*/true))
1322  return false;
1323 
1324  const Value *Ptr = LI->getPointerOperand();
1325 
1326  X86AddressMode AM;
1327  if (!X86SelectAddress(Ptr, AM))
1328  return false;
1329 
1330  unsigned Alignment = LI->getAlignment();
1331  unsigned ABIAlignment = DL.getABITypeAlignment(LI->getType());
1332  if (Alignment == 0) // Ensure that codegen never sees alignment 0
1333  Alignment = ABIAlignment;
1334 
1335  unsigned ResultReg = 0;
1336  if (!X86FastEmitLoad(VT, AM, createMachineMemOperandFor(LI), ResultReg,
1337  Alignment))
1338  return false;
1339 
1340  updateValueMap(I, ResultReg);
1341  return true;
1342 }
1343 
1344 static unsigned X86ChooseCmpOpcode(EVT VT, const X86Subtarget *Subtarget) {
1345  bool HasAVX512 = Subtarget->hasAVX512();
1346  bool HasAVX = Subtarget->hasAVX();
1347  bool X86ScalarSSEf32 = Subtarget->hasSSE1();
1348  bool X86ScalarSSEf64 = Subtarget->hasSSE2();
1349 
1350  switch (VT.getSimpleVT().SimpleTy) {
1351  default: return 0;
1352  case MVT::i8: return X86::CMP8rr;
1353  case MVT::i16: return X86::CMP16rr;
1354  case MVT::i32: return X86::CMP32rr;
1355  case MVT::i64: return X86::CMP64rr;
1356  case MVT::f32:
1357  return X86ScalarSSEf32
1358  ? (HasAVX512 ? X86::VUCOMISSZrr
1359  : HasAVX ? X86::VUCOMISSrr : X86::UCOMISSrr)
1360  : 0;
1361  case MVT::f64:
1362  return X86ScalarSSEf64
1363  ? (HasAVX512 ? X86::VUCOMISDZrr
1364  : HasAVX ? X86::VUCOMISDrr : X86::UCOMISDrr)
1365  : 0;
1366  }
1367 }
1368 
1369 /// If we have a comparison with RHS as the RHS of the comparison, return an
1370 /// opcode that works for the compare (e.g. CMP32ri) otherwise return 0.
1371 static unsigned X86ChooseCmpImmediateOpcode(EVT VT, const ConstantInt *RHSC) {
1372  int64_t Val = RHSC->getSExtValue();
1373  switch (VT.getSimpleVT().SimpleTy) {
1374  // Otherwise, we can't fold the immediate into this comparison.
1375  default:
1376  return 0;
1377  case MVT::i8:
1378  return X86::CMP8ri;
1379  case MVT::i16:
1380  if (isInt<8>(Val))
1381  return X86::CMP16ri8;
1382  return X86::CMP16ri;
1383  case MVT::i32:
1384  if (isInt<8>(Val))
1385  return X86::CMP32ri8;
1386  return X86::CMP32ri;
1387  case MVT::i64:
1388  if (isInt<8>(Val))
1389  return X86::CMP64ri8;
1390  // 64-bit comparisons are only valid if the immediate fits in a 32-bit sext
1391  // field.
1392  if (isInt<32>(Val))
1393  return X86::CMP64ri32;
1394  return 0;
1395  }
1396 }
1397 
1398 bool X86FastISel::X86FastEmitCompare(const Value *Op0, const Value *Op1, EVT VT,
1399  const DebugLoc &CurDbgLoc) {
1400  unsigned Op0Reg = getRegForValue(Op0);
1401  if (Op0Reg == 0) return false;
1402 
1403  // Handle 'null' like i32/i64 0.
1404  if (isa<ConstantPointerNull>(Op1))
1405  Op1 = Constant::getNullValue(DL.getIntPtrType(Op0->getContext()));
1406 
1407  // We have two options: compare with register or immediate. If the RHS of
1408  // the compare is an immediate that we can fold into this compare, use
1409  // CMPri, otherwise use CMPrr.
1410  if (const ConstantInt *Op1C = dyn_cast<ConstantInt>(Op1)) {
1411  if (unsigned CompareImmOpc = X86ChooseCmpImmediateOpcode(VT, Op1C)) {
1412  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, CurDbgLoc, TII.get(CompareImmOpc))
1413  .addReg(Op0Reg)
1414  .addImm(Op1C->getSExtValue());
1415  return true;
1416  }
1417  }
1418 
1419  unsigned CompareOpc = X86ChooseCmpOpcode(VT, Subtarget);
1420  if (CompareOpc == 0) return false;
1421 
1422  unsigned Op1Reg = getRegForValue(Op1);
1423  if (Op1Reg == 0) return false;
1424  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, CurDbgLoc, TII.get(CompareOpc))
1425  .addReg(Op0Reg)
1426  .addReg(Op1Reg);
1427 
1428  return true;
1429 }
1430 
1431 bool X86FastISel::X86SelectCmp(const Instruction *I) {
1432  const CmpInst *CI = cast<CmpInst>(I);
1433 
1434  MVT VT;
1435  if (!isTypeLegal(I->getOperand(0)->getType(), VT))
1436  return false;
1437 
1438  // Try to optimize or fold the cmp.
1439  CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
1440  unsigned ResultReg = 0;
1441  switch (Predicate) {
1442  default: break;
1443  case CmpInst::FCMP_FALSE: {
1444  ResultReg = createResultReg(&X86::GR32RegClass);
1445  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::MOV32r0),
1446  ResultReg);
1447  ResultReg = fastEmitInst_extractsubreg(MVT::i8, ResultReg, /*Kill=*/true,
1448  X86::sub_8bit);
1449  if (!ResultReg)
1450  return false;
1451  break;
1452  }
1453  case CmpInst::FCMP_TRUE: {
1454  ResultReg = createResultReg(&X86::GR8RegClass);
1455  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::MOV8ri),
1456  ResultReg).addImm(1);
1457  break;
1458  }
1459  }
1460 
1461  if (ResultReg) {
1462  updateValueMap(I, ResultReg);
1463  return true;
1464  }
1465 
1466  const Value *LHS = CI->getOperand(0);
1467  const Value *RHS = CI->getOperand(1);
1468 
1469  // The optimizer might have replaced fcmp oeq %x, %x with fcmp ord %x, 0.0.
1470  // We don't have to materialize a zero constant for this case and can just use
1471  // %x again on the RHS.
1472  if (Predicate == CmpInst::FCMP_ORD || Predicate == CmpInst::FCMP_UNO) {
1473  const auto *RHSC = dyn_cast<ConstantFP>(RHS);
1474  if (RHSC && RHSC->isNullValue())
1475  RHS = LHS;
1476  }
1477 
1478  // FCMP_OEQ and FCMP_UNE cannot be checked with a single instruction.
1479  static const uint16_t SETFOpcTable[2][3] = {
1480  { X86::SETEr, X86::SETNPr, X86::AND8rr },
1481  { X86::SETNEr, X86::SETPr, X86::OR8rr }
1482  };
1483  const uint16_t *SETFOpc = nullptr;
1484  switch (Predicate) {
1485  default: break;
1486  case CmpInst::FCMP_OEQ: SETFOpc = &SETFOpcTable[0][0]; break;
1487  case CmpInst::FCMP_UNE: SETFOpc = &SETFOpcTable[1][0]; break;
1488  }
1489 
1490  ResultReg = createResultReg(&X86::GR8RegClass);
1491  if (SETFOpc) {
1492  if (!X86FastEmitCompare(LHS, RHS, VT, I->getDebugLoc()))
1493  return false;
1494 
1495  unsigned FlagReg1 = createResultReg(&X86::GR8RegClass);
1496  unsigned FlagReg2 = createResultReg(&X86::GR8RegClass);
1497  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(SETFOpc[0]),
1498  FlagReg1);
1499  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(SETFOpc[1]),
1500  FlagReg2);
1501  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(SETFOpc[2]),
1502  ResultReg).addReg(FlagReg1).addReg(FlagReg2);
1503  updateValueMap(I, ResultReg);
1504  return true;
1505  }
1506 
1507  X86::CondCode CC;
1508  bool SwapArgs;
1509  std::tie(CC, SwapArgs) = X86::getX86ConditionCode(Predicate);
1510  assert(CC <= X86::LAST_VALID_COND && "Unexpected condition code.");
1511  unsigned Opc = X86::getSETFromCond(CC);
1512 
1513  if (SwapArgs)
1514  std::swap(LHS, RHS);
1515 
1516  // Emit a compare of LHS/RHS.
1517  if (!X86FastEmitCompare(LHS, RHS, VT, I->getDebugLoc()))
1518  return false;
1519 
1520  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg);
1521  updateValueMap(I, ResultReg);
1522  return true;
1523 }
1524 
1525 bool X86FastISel::X86SelectZExt(const Instruction *I) {
1526  EVT DstVT = TLI.getValueType(DL, I->getType());
1527  if (!TLI.isTypeLegal(DstVT))
1528  return false;
1529 
1530  unsigned ResultReg = getRegForValue(I->getOperand(0));
1531  if (ResultReg == 0)
1532  return false;
1533 
1534  // Handle zero-extension from i1 to i8, which is common.
1535  MVT SrcVT = TLI.getSimpleValueType(DL, I->getOperand(0)->getType());
1536  if (SrcVT == MVT::i1) {
1537  // Set the high bits to zero.
1538  ResultReg = fastEmitZExtFromI1(MVT::i8, ResultReg, /*TODO: Kill=*/false);
1539  SrcVT = MVT::i8;
1540 
1541  if (ResultReg == 0)
1542  return false;
1543  }
1544 
1545  if (DstVT == MVT::i64) {
1546  // Handle extension to 64-bits via sub-register shenanigans.
1547  unsigned MovInst;
1548 
1549  switch (SrcVT.SimpleTy) {
1550  case MVT::i8: MovInst = X86::MOVZX32rr8; break;
1551  case MVT::i16: MovInst = X86::MOVZX32rr16; break;
1552  case MVT::i32: MovInst = X86::MOV32rr; break;
1553  default: llvm_unreachable("Unexpected zext to i64 source type");
1554  }
1555 
1556  unsigned Result32 = createResultReg(&X86::GR32RegClass);
1557  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(MovInst), Result32)
1558  .addReg(ResultReg);
1559 
1560  ResultReg = createResultReg(&X86::GR64RegClass);
1561  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::SUBREG_TO_REG),
1562  ResultReg)
1563  .addImm(0).addReg(Result32).addImm(X86::sub_32bit);
1564  } else if (DstVT == MVT::i16) {
1565  // i8->i16 doesn't exist in the autogenerated isel table. Need to zero
1566  // extend to 32-bits and then extract down to 16-bits.
1567  unsigned Result32 = createResultReg(&X86::GR32RegClass);
1568  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::MOVZX32rr8),
1569  Result32).addReg(ResultReg);
1570 
1571  ResultReg = fastEmitInst_extractsubreg(MVT::i16, Result32, /*Kill=*/true,
1572  X86::sub_16bit);
1573  } else if (DstVT != MVT::i8) {
1574  ResultReg = fastEmit_r(MVT::i8, DstVT.getSimpleVT(), ISD::ZERO_EXTEND,
1575  ResultReg, /*Kill=*/true);
1576  if (ResultReg == 0)
1577  return false;
1578  }
1579 
1580  updateValueMap(I, ResultReg);
1581  return true;
1582 }
1583 
1584 bool X86FastISel::X86SelectSExt(const Instruction *I) {
1585  EVT DstVT = TLI.getValueType(DL, I->getType());
1586  if (!TLI.isTypeLegal(DstVT))
1587  return false;
1588 
1589  unsigned ResultReg = getRegForValue(I->getOperand(0));
1590  if (ResultReg == 0)
1591  return false;
1592 
1593  // Handle sign-extension from i1 to i8.
1594  MVT SrcVT = TLI.getSimpleValueType(DL, I->getOperand(0)->getType());
1595  if (SrcVT == MVT::i1) {
1596  // Set the high bits to zero.
1597  unsigned ZExtReg = fastEmitZExtFromI1(MVT::i8, ResultReg,
1598  /*TODO: Kill=*/false);
1599  if (ZExtReg == 0)
1600  return false;
1601 
1602  // Negate the result to make an 8-bit sign extended value.
1603  ResultReg = createResultReg(&X86::GR8RegClass);
1604  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::NEG8r),
1605  ResultReg).addReg(ZExtReg);
1606 
1607  SrcVT = MVT::i8;
1608  }
1609 
1610  if (DstVT == MVT::i16) {
1611  // i8->i16 doesn't exist in the autogenerated isel table. Need to sign
1612  // extend to 32-bits and then extract down to 16-bits.
1613  unsigned Result32 = createResultReg(&X86::GR32RegClass);
1614  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::MOVSX32rr8),
1615  Result32).addReg(ResultReg);
1616 
1617  ResultReg = fastEmitInst_extractsubreg(MVT::i16, Result32, /*Kill=*/true,
1618  X86::sub_16bit);
1619  } else if (DstVT != MVT::i8) {
1620  ResultReg = fastEmit_r(MVT::i8, DstVT.getSimpleVT(), ISD::SIGN_EXTEND,
1621  ResultReg, /*Kill=*/true);
1622  if (ResultReg == 0)
1623  return false;
1624  }
1625 
1626  updateValueMap(I, ResultReg);
1627  return true;
1628 }
1629 
1630 bool X86FastISel::X86SelectBranch(const Instruction *I) {
1631  // Unconditional branches are selected by tablegen-generated code.
1632  // Handle a conditional branch.
1633  const BranchInst *BI = cast<BranchInst>(I);
1634  MachineBasicBlock *TrueMBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
1635  MachineBasicBlock *FalseMBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
1636 
1637  // Fold the common case of a conditional branch with a comparison
1638  // in the same block (values defined on other blocks may not have
1639  // initialized registers).
1640  X86::CondCode CC;
1641  if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) {
1642  if (CI->hasOneUse() && CI->getParent() == I->getParent()) {
1643  EVT VT = TLI.getValueType(DL, CI->getOperand(0)->getType());
1644 
1645  // Try to optimize or fold the cmp.
1646  CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
1647  switch (Predicate) {
1648  default: break;
1649  case CmpInst::FCMP_FALSE: fastEmitBranch(FalseMBB, DbgLoc); return true;
1650  case CmpInst::FCMP_TRUE: fastEmitBranch(TrueMBB, DbgLoc); return true;
1651  }
1652 
1653  const Value *CmpLHS = CI->getOperand(0);
1654  const Value *CmpRHS = CI->getOperand(1);
1655 
1656  // The optimizer might have replaced fcmp oeq %x, %x with fcmp ord %x,
1657  // 0.0.
1658  // We don't have to materialize a zero constant for this case and can just
1659  // use %x again on the RHS.
1660  if (Predicate == CmpInst::FCMP_ORD || Predicate == CmpInst::FCMP_UNO) {
1661  const auto *CmpRHSC = dyn_cast<ConstantFP>(CmpRHS);
1662  if (CmpRHSC && CmpRHSC->isNullValue())
1663  CmpRHS = CmpLHS;
1664  }
1665 
1666  // Try to take advantage of fallthrough opportunities.
1667  if (FuncInfo.MBB->isLayoutSuccessor(TrueMBB)) {
1668  std::swap(TrueMBB, FalseMBB);
1669  Predicate = CmpInst::getInversePredicate(Predicate);
1670  }
1671 
1672  // FCMP_OEQ and FCMP_UNE cannot be expressed with a single flag/condition
1673  // code check. Instead two branch instructions are required to check all
1674  // the flags. First we change the predicate to a supported condition code,
1675  // which will be the first branch. Later one we will emit the second
1676  // branch.
1677  bool NeedExtraBranch = false;
1678  switch (Predicate) {
1679  default: break;
1680  case CmpInst::FCMP_OEQ:
1681  std::swap(TrueMBB, FalseMBB);
1683  case CmpInst::FCMP_UNE:
1684  NeedExtraBranch = true;
1685  Predicate = CmpInst::FCMP_ONE;
1686  break;
1687  }
1688 
1689  bool SwapArgs;
1690  unsigned BranchOpc;
1691  std::tie(CC, SwapArgs) = X86::getX86ConditionCode(Predicate);
1692  assert(CC <= X86::LAST_VALID_COND && "Unexpected condition code.");
1693 
1694  BranchOpc = X86::GetCondBranchFromCond(CC);
1695  if (SwapArgs)
1696  std::swap(CmpLHS, CmpRHS);
1697 
1698  // Emit a compare of the LHS and RHS, setting the flags.
1699  if (!X86FastEmitCompare(CmpLHS, CmpRHS, VT, CI->getDebugLoc()))
1700  return false;
1701 
1702  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(BranchOpc))
1703  .addMBB(TrueMBB);
1704 
1705  // X86 requires a second branch to handle UNE (and OEQ, which is mapped
1706  // to UNE above).
1707  if (NeedExtraBranch) {
1708  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::JP_1))
1709  .addMBB(TrueMBB);
1710  }
1711 
1712  finishCondBranch(BI->getParent(), TrueMBB, FalseMBB);
1713  return true;
1714  }
1715  } else if (TruncInst *TI = dyn_cast<TruncInst>(BI->getCondition())) {
1716  // Handle things like "%cond = trunc i32 %X to i1 / br i1 %cond", which
1717  // typically happen for _Bool and C++ bools.
1718  MVT SourceVT;
1719  if (TI->hasOneUse() && TI->getParent() == I->getParent() &&
1720  isTypeLegal(TI->getOperand(0)->getType(), SourceVT)) {
1721  unsigned TestOpc = 0;
1722  switch (SourceVT.SimpleTy) {
1723  default: break;
1724  case MVT::i8: TestOpc = X86::TEST8ri; break;
1725  case MVT::i16: TestOpc = X86::TEST16ri; break;
1726  case MVT::i32: TestOpc = X86::TEST32ri; break;
1727  case MVT::i64: TestOpc = X86::TEST64ri32; break;
1728  }
1729  if (TestOpc) {
1730  unsigned OpReg = getRegForValue(TI->getOperand(0));
1731  if (OpReg == 0) return false;
1732 
1733  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TestOpc))
1734  .addReg(OpReg).addImm(1);
1735 
1736  unsigned JmpOpc = X86::JNE_1;
1737  if (FuncInfo.MBB->isLayoutSuccessor(TrueMBB)) {
1738  std::swap(TrueMBB, FalseMBB);
1739  JmpOpc = X86::JE_1;
1740  }
1741 
1742  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(JmpOpc))
1743  .addMBB(TrueMBB);
1744 
1745  finishCondBranch(BI->getParent(), TrueMBB, FalseMBB);
1746  return true;
1747  }
1748  }
1749  } else if (foldX86XALUIntrinsic(CC, BI, BI->getCondition())) {
1750  // Fake request the condition, otherwise the intrinsic might be completely
1751  // optimized away.
1752  unsigned TmpReg = getRegForValue(BI->getCondition());
1753  if (TmpReg == 0)
1754  return false;
1755 
1756  unsigned BranchOpc = X86::GetCondBranchFromCond(CC);
1757 
1758  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(BranchOpc))
1759  .addMBB(TrueMBB);
1760  finishCondBranch(BI->getParent(), TrueMBB, FalseMBB);
1761  return true;
1762  }
1763 
1764  // Otherwise do a clumsy setcc and re-test it.
1765  // Note that i1 essentially gets ANY_EXTEND'ed to i8 where it isn't used
1766  // in an explicit cast, so make sure to handle that correctly.
1767  unsigned OpReg = getRegForValue(BI->getCondition());
1768  if (OpReg == 0) return false;
1769 
1770  // In case OpReg is a K register, COPY to a GPR
1771  if (MRI.getRegClass(OpReg) == &X86::VK1RegClass) {
1772  unsigned KOpReg = OpReg;
1773  OpReg = createResultReg(&X86::GR32RegClass);
1774  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1775  TII.get(TargetOpcode::COPY), OpReg)
1776  .addReg(KOpReg);
1777  OpReg = fastEmitInst_extractsubreg(MVT::i8, OpReg, /*Kill=*/true,
1778  X86::sub_8bit);
1779  }
1780  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::TEST8ri))
1781  .addReg(OpReg)
1782  .addImm(1);
1783  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::JNE_1))
1784  .addMBB(TrueMBB);
1785  finishCondBranch(BI->getParent(), TrueMBB, FalseMBB);
1786  return true;
1787 }
1788 
1789 bool X86FastISel::X86SelectShift(const Instruction *I) {
1790  unsigned CReg = 0, OpReg = 0;
1791  const TargetRegisterClass *RC = nullptr;
1792  assert(!I->getType()->isIntegerTy(8) &&
1793  "i8 shifts should be handled by autogenerated table");
1794  if (I->getType()->isIntegerTy(16)) {
1795  CReg = X86::CX;
1796  RC = &X86::GR16RegClass;
1797  switch (I->getOpcode()) {
1798  default: llvm_unreachable("Unexpected shift opcode");
1799  case Instruction::LShr: OpReg = X86::SHR16rCL; break;
1800  case Instruction::AShr: OpReg = X86::SAR16rCL; break;
1801  case Instruction::Shl: OpReg = X86::SHL16rCL; break;
1802  }
1803  } else if (I->getType()->isIntegerTy(32)) {
1804  CReg = X86::ECX;
1805  RC = &X86::GR32RegClass;
1806  switch (I->getOpcode()) {
1807  default: llvm_unreachable("Unexpected shift opcode");
1808  case Instruction::LShr: OpReg = X86::SHR32rCL; break;
1809  case Instruction::AShr: OpReg = X86::SAR32rCL; break;
1810  case Instruction::Shl: OpReg = X86::SHL32rCL; break;
1811  }
1812  } else if (I->getType()->isIntegerTy(64)) {
1813  CReg = X86::RCX;
1814  RC = &X86::GR64RegClass;
1815  switch (I->getOpcode()) {
1816  default: llvm_unreachable("Unexpected shift opcode");
1817  case Instruction::LShr: OpReg = X86::SHR64rCL; break;
1818  case Instruction::AShr: OpReg = X86::SAR64rCL; break;
1819  case Instruction::Shl: OpReg = X86::SHL64rCL; break;
1820  }
1821  } else {
1822  return false;
1823  }
1824 
1825  MVT VT;
1826  if (!isTypeLegal(I->getType(), VT))
1827  return false;
1828 
1829  unsigned Op0Reg = getRegForValue(I->getOperand(0));
1830  if (Op0Reg == 0) return false;
1831 
1832  unsigned Op1Reg = getRegForValue(I->getOperand(1));
1833  if (Op1Reg == 0) return false;
1834  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY),
1835  CReg).addReg(Op1Reg);
1836 
1837  // The shift instruction uses X86::CL. If we defined a super-register
1838  // of X86::CL, emit a subreg KILL to precisely describe what we're doing here.
1839  assert(CReg != X86::CL && "CReg should be a super register of CL");
1840  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1841  TII.get(TargetOpcode::KILL), X86::CL)
1842  .addReg(CReg, RegState::Kill);
1843 
1844  unsigned ResultReg = createResultReg(RC);
1845  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(OpReg), ResultReg)
1846  .addReg(Op0Reg);
1847  updateValueMap(I, ResultReg);
1848  return true;
1849 }
1850 
1851 bool X86FastISel::X86SelectDivRem(const Instruction *I) {
1852  const static unsigned NumTypes = 4; // i8, i16, i32, i64
1853  const static unsigned NumOps = 4; // SDiv, SRem, UDiv, URem
1854  const static bool S = true; // IsSigned
1855  const static bool U = false; // !IsSigned
1856  const static unsigned Copy = TargetOpcode::COPY;
1857  // For the X86 DIV/IDIV instruction, in most cases the dividend
1858  // (numerator) must be in a specific register pair highreg:lowreg,
1859  // producing the quotient in lowreg and the remainder in highreg.
1860  // For most data types, to set up the instruction, the dividend is
1861  // copied into lowreg, and lowreg is sign-extended or zero-extended
1862  // into highreg. The exception is i8, where the dividend is defined
1863  // as a single register rather than a register pair, and we
1864  // therefore directly sign-extend or zero-extend the dividend into
1865  // lowreg, instead of copying, and ignore the highreg.
1866  const static struct DivRemEntry {
1867  // The following portion depends only on the data type.
1868  const TargetRegisterClass *RC;
1869  unsigned LowInReg; // low part of the register pair
1870  unsigned HighInReg; // high part of the register pair
1871  // The following portion depends on both the data type and the operation.
1872  struct DivRemResult {
1873  unsigned OpDivRem; // The specific DIV/IDIV opcode to use.
1874  unsigned OpSignExtend; // Opcode for sign-extending lowreg into
1875  // highreg, or copying a zero into highreg.
1876  unsigned OpCopy; // Opcode for copying dividend into lowreg, or
1877  // zero/sign-extending into lowreg for i8.
1878  unsigned DivRemResultReg; // Register containing the desired result.
1879  bool IsOpSigned; // Whether to use signed or unsigned form.
1880  } ResultTable[NumOps];
1881  } OpTable[NumTypes] = {
1882  { &X86::GR8RegClass, X86::AX, 0, {
1883  { X86::IDIV8r, 0, X86::MOVSX16rr8, X86::AL, S }, // SDiv
1884  { X86::IDIV8r, 0, X86::MOVSX16rr8, X86::AH, S }, // SRem
1885  { X86::DIV8r, 0, X86::MOVZX16rr8, X86::AL, U }, // UDiv
1886  { X86::DIV8r, 0, X86::MOVZX16rr8, X86::AH, U }, // URem
1887  }
1888  }, // i8
1889  { &X86::GR16RegClass, X86::AX, X86::DX, {
1890  { X86::IDIV16r, X86::CWD, Copy, X86::AX, S }, // SDiv
1891  { X86::IDIV16r, X86::CWD, Copy, X86::DX, S }, // SRem
1892  { X86::DIV16r, X86::MOV32r0, Copy, X86::AX, U }, // UDiv
1893  { X86::DIV16r, X86::MOV32r0, Copy, X86::DX, U }, // URem
1894  }
1895  }, // i16
1896  { &X86::GR32RegClass, X86::EAX, X86::EDX, {
1897  { X86::IDIV32r, X86::CDQ, Copy, X86::EAX, S }, // SDiv
1898  { X86::IDIV32r, X86::CDQ, Copy, X86::EDX, S }, // SRem
1899  { X86::DIV32r, X86::MOV32r0, Copy, X86::EAX, U }, // UDiv
1900  { X86::DIV32r, X86::MOV32r0, Copy, X86::EDX, U }, // URem
1901  }
1902  }, // i32
1903  { &X86::GR64RegClass, X86::RAX, X86::RDX, {
1904  { X86::IDIV64r, X86::CQO, Copy, X86::RAX, S }, // SDiv
1905  { X86::IDIV64r, X86::CQO, Copy, X86::RDX, S }, // SRem
1906  { X86::DIV64r, X86::MOV32r0, Copy, X86::RAX, U }, // UDiv
1907  { X86::DIV64r, X86::MOV32r0, Copy, X86::RDX, U }, // URem
1908  }
1909  }, // i64
1910  };
1911 
1912  MVT VT;
1913  if (!isTypeLegal(I->getType(), VT))
1914  return false;
1915 
1916  unsigned TypeIndex, OpIndex;
1917  switch (VT.SimpleTy) {
1918  default: return false;
1919  case MVT::i8: TypeIndex = 0; break;
1920  case MVT::i16: TypeIndex = 1; break;
1921  case MVT::i32: TypeIndex = 2; break;
1922  case MVT::i64: TypeIndex = 3;
1923  if (!Subtarget->is64Bit())
1924  return false;
1925  break;
1926  }
1927 
1928  switch (I->getOpcode()) {
1929  default: llvm_unreachable("Unexpected div/rem opcode");
1930  case Instruction::SDiv: OpIndex = 0; break;
1931  case Instruction::SRem: OpIndex = 1; break;
1932  case Instruction::UDiv: OpIndex = 2; break;
1933  case Instruction::URem: OpIndex = 3; break;
1934  }
1935 
1936  const DivRemEntry &TypeEntry = OpTable[TypeIndex];
1937  const DivRemEntry::DivRemResult &OpEntry = TypeEntry.ResultTable[OpIndex];
1938  unsigned Op0Reg = getRegForValue(I->getOperand(0));
1939  if (Op0Reg == 0)
1940  return false;
1941  unsigned Op1Reg = getRegForValue(I->getOperand(1));
1942  if (Op1Reg == 0)
1943  return false;
1944 
1945  // Move op0 into low-order input register.
1946  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1947  TII.get(OpEntry.OpCopy), TypeEntry.LowInReg).addReg(Op0Reg);
1948  // Zero-extend or sign-extend into high-order input register.
1949  if (OpEntry.OpSignExtend) {
1950  if (OpEntry.IsOpSigned)
1951  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1952  TII.get(OpEntry.OpSignExtend));
1953  else {
1954  unsigned Zero32 = createResultReg(&X86::GR32RegClass);
1955  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1956  TII.get(X86::MOV32r0), Zero32);
1957 
1958  // Copy the zero into the appropriate sub/super/identical physical
1959  // register. Unfortunately the operations needed are not uniform enough
1960  // to fit neatly into the table above.
1961  if (VT == MVT::i16) {
1962  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1963  TII.get(Copy), TypeEntry.HighInReg)
1964  .addReg(Zero32, 0, X86::sub_16bit);
1965  } else if (VT == MVT::i32) {
1966  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1967  TII.get(Copy), TypeEntry.HighInReg)
1968  .addReg(Zero32);
1969  } else if (VT == MVT::i64) {
1970  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1971  TII.get(TargetOpcode::SUBREG_TO_REG), TypeEntry.HighInReg)
1972  .addImm(0).addReg(Zero32).addImm(X86::sub_32bit);
1973  }
1974  }
1975  }
1976  // Generate the DIV/IDIV instruction.
1977  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1978  TII.get(OpEntry.OpDivRem)).addReg(Op1Reg);
1979  // For i8 remainder, we can't reference AH directly, as we'll end
1980  // up with bogus copies like %R9B = COPY %AH. Reference AX
1981  // instead to prevent AH references in a REX instruction.
1982  //
1983  // The current assumption of the fast register allocator is that isel
1984  // won't generate explicit references to the GR8_NOREX registers. If
1985  // the allocator and/or the backend get enhanced to be more robust in
1986  // that regard, this can be, and should be, removed.
1987  unsigned ResultReg = 0;
1988  if ((I->getOpcode() == Instruction::SRem ||
1989  I->getOpcode() == Instruction::URem) &&
1990  OpEntry.DivRemResultReg == X86::AH && Subtarget->is64Bit()) {
1991  unsigned SourceSuperReg = createResultReg(&X86::GR16RegClass);
1992  unsigned ResultSuperReg = createResultReg(&X86::GR16RegClass);
1993  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1994  TII.get(Copy), SourceSuperReg).addReg(X86::AX);
1995 
1996  // Shift AX right by 8 bits instead of using AH.
1997  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::SHR16ri),
1998  ResultSuperReg).addReg(SourceSuperReg).addImm(8);
1999 
2000  // Now reference the 8-bit subreg of the result.
2001  ResultReg = fastEmitInst_extractsubreg(MVT::i8, ResultSuperReg,
2002  /*Kill=*/true, X86::sub_8bit);
2003  }
2004  // Copy the result out of the physreg if we haven't already.
2005  if (!ResultReg) {
2006  ResultReg = createResultReg(TypeEntry.RC);
2007  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Copy), ResultReg)
2008  .addReg(OpEntry.DivRemResultReg);
2009  }
2010  updateValueMap(I, ResultReg);
2011 
2012  return true;
2013 }
2014 
2015 /// \brief Emit a conditional move instruction (if the are supported) to lower
2016 /// the select.
2017 bool X86FastISel::X86FastEmitCMoveSelect(MVT RetVT, const Instruction *I) {
2018  // Check if the subtarget supports these instructions.
2019  if (!Subtarget->hasCMov())
2020  return false;
2021 
2022  // FIXME: Add support for i8.
2023  if (RetVT < MVT::i16 || RetVT > MVT::i64)
2024  return false;
2025 
2026  const Value *Cond = I->getOperand(0);
2027  const TargetRegisterClass *RC = TLI.getRegClassFor(RetVT);
2028  bool NeedTest = true;
2030 
2031  // Optimize conditions coming from a compare if both instructions are in the
2032  // same basic block (values defined in other basic blocks may not have
2033  // initialized registers).
2034  const auto *CI = dyn_cast<CmpInst>(Cond);
2035  if (CI && (CI->getParent() == I->getParent())) {
2036  CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
2037 
2038  // FCMP_OEQ and FCMP_UNE cannot be checked with a single instruction.
2039  static const uint16_t SETFOpcTable[2][3] = {
2040  { X86::SETNPr, X86::SETEr , X86::TEST8rr },
2041  { X86::SETPr, X86::SETNEr, X86::OR8rr }
2042  };
2043  const uint16_t *SETFOpc = nullptr;
2044  switch (Predicate) {
2045  default: break;
2046  case CmpInst::FCMP_OEQ:
2047  SETFOpc = &SETFOpcTable[0][0];
2048  Predicate = CmpInst::ICMP_NE;
2049  break;
2050  case CmpInst::FCMP_UNE:
2051  SETFOpc = &SETFOpcTable[1][0];
2052  Predicate = CmpInst::ICMP_NE;
2053  break;
2054  }
2055 
2056  bool NeedSwap;
2057  std::tie(CC, NeedSwap) = X86::getX86ConditionCode(Predicate);
2058  assert(CC <= X86::LAST_VALID_COND && "Unexpected condition code.");
2059 
2060  const Value *CmpLHS = CI->getOperand(0);
2061  const Value *CmpRHS = CI->getOperand(1);
2062  if (NeedSwap)
2063  std::swap(CmpLHS, CmpRHS);
2064 
2065  EVT CmpVT = TLI.getValueType(DL, CmpLHS->getType());
2066  // Emit a compare of the LHS and RHS, setting the flags.
2067  if (!X86FastEmitCompare(CmpLHS, CmpRHS, CmpVT, CI->getDebugLoc()))
2068  return false;
2069 
2070  if (SETFOpc) {
2071  unsigned FlagReg1 = createResultReg(&X86::GR8RegClass);
2072  unsigned FlagReg2 = createResultReg(&X86::GR8RegClass);
2073  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(SETFOpc[0]),
2074  FlagReg1);
2075  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(SETFOpc[1]),
2076  FlagReg2);
2077  auto const &II = TII.get(SETFOpc[2]);
2078  if (II.getNumDefs()) {
2079  unsigned TmpReg = createResultReg(&X86::GR8RegClass);
2080  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, TmpReg)
2081  .addReg(FlagReg2).addReg(FlagReg1);
2082  } else {
2083  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
2084  .addReg(FlagReg2).addReg(FlagReg1);
2085  }
2086  }
2087  NeedTest = false;
2088  } else if (foldX86XALUIntrinsic(CC, I, Cond)) {
2089  // Fake request the condition, otherwise the intrinsic might be completely
2090  // optimized away.
2091  unsigned TmpReg = getRegForValue(Cond);
2092  if (TmpReg == 0)
2093  return false;
2094 
2095  NeedTest = false;
2096  }
2097 
2098  if (NeedTest) {
2099  // Selects operate on i1, however, CondReg is 8 bits width and may contain
2100  // garbage. Indeed, only the less significant bit is supposed to be
2101  // accurate. If we read more than the lsb, we may see non-zero values
2102  // whereas lsb is zero. Therefore, we have to truncate Op0Reg to i1 for
2103  // the select. This is achieved by performing TEST against 1.
2104  unsigned CondReg = getRegForValue(Cond);
2105  if (CondReg == 0)
2106  return false;
2107  bool CondIsKill = hasTrivialKill(Cond);
2108 
2109  // In case OpReg is a K register, COPY to a GPR
2110  if (MRI.getRegClass(CondReg) == &X86::VK1RegClass) {
2111  unsigned KCondReg = CondReg;
2112  CondReg = createResultReg(&X86::GR32RegClass);
2113  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2114  TII.get(TargetOpcode::COPY), CondReg)
2115  .addReg(KCondReg, getKillRegState(CondIsKill));
2116  CondReg = fastEmitInst_extractsubreg(MVT::i8, CondReg, /*Kill=*/true,
2117  X86::sub_8bit);
2118  }
2119  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::TEST8ri))
2120  .addReg(CondReg, getKillRegState(CondIsKill))
2121  .addImm(1);
2122  }
2123 
2124  const Value *LHS = I->getOperand(1);
2125  const Value *RHS = I->getOperand(2);
2126 
2127  unsigned RHSReg = getRegForValue(RHS);
2128  bool RHSIsKill = hasTrivialKill(RHS);
2129 
2130  unsigned LHSReg = getRegForValue(LHS);
2131  bool LHSIsKill = hasTrivialKill(LHS);
2132 
2133  if (!LHSReg || !RHSReg)
2134  return false;
2135 
2136  const TargetRegisterInfo &TRI = *Subtarget->getRegisterInfo();
2137  unsigned Opc = X86::getCMovFromCond(CC, TRI.getRegSizeInBits(*RC)/8);
2138  unsigned ResultReg = fastEmitInst_rr(Opc, RC, RHSReg, RHSIsKill,
2139  LHSReg, LHSIsKill);
2140  updateValueMap(I, ResultReg);
2141  return true;
2142 }
2143 
2144 /// \brief Emit SSE or AVX instructions to lower the select.
2145 ///
2146 /// Try to use SSE1/SSE2 instructions to simulate a select without branches.
2147 /// This lowers fp selects into a CMP/AND/ANDN/OR sequence when the necessary
2148 /// SSE instructions are available. If AVX is available, try to use a VBLENDV.
2149 bool X86FastISel::X86FastEmitSSESelect(MVT RetVT, const Instruction *I) {
2150  // Optimize conditions coming from a compare if both instructions are in the
2151  // same basic block (values defined in other basic blocks may not have
2152  // initialized registers).
2153  const auto *CI = dyn_cast<FCmpInst>(I->getOperand(0));
2154  if (!CI || (CI->getParent() != I->getParent()))
2155  return false;
2156 
2157  if (I->getType() != CI->getOperand(0)->getType() ||
2158  !((Subtarget->hasSSE1() && RetVT == MVT::f32) ||
2159  (Subtarget->hasSSE2() && RetVT == MVT::f64)))
2160  return false;
2161 
2162  const Value *CmpLHS = CI->getOperand(0);
2163  const Value *CmpRHS = CI->getOperand(1);
2164  CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
2165 
2166  // The optimizer might have replaced fcmp oeq %x, %x with fcmp ord %x, 0.0.
2167  // We don't have to materialize a zero constant for this case and can just use
2168  // %x again on the RHS.
2169  if (Predicate == CmpInst::FCMP_ORD || Predicate == CmpInst::FCMP_UNO) {
2170  const auto *CmpRHSC = dyn_cast<ConstantFP>(CmpRHS);
2171  if (CmpRHSC && CmpRHSC->isNullValue())
2172  CmpRHS = CmpLHS;
2173  }
2174 
2175  unsigned CC;
2176  bool NeedSwap;
2177  std::tie(CC, NeedSwap) = getX86SSEConditionCode(Predicate);
2178  if (CC > 7 && !Subtarget->hasAVX())
2179  return false;
2180 
2181  if (NeedSwap)
2182  std::swap(CmpLHS, CmpRHS);
2183 
2184  // Choose the SSE instruction sequence based on data type (float or double).
2185  static const uint16_t OpcTable[2][4] = {
2186  { X86::CMPSSrr, X86::ANDPSrr, X86::ANDNPSrr, X86::ORPSrr },
2187  { X86::CMPSDrr, X86::ANDPDrr, X86::ANDNPDrr, X86::ORPDrr }
2188  };
2189 
2190  const uint16_t *Opc = nullptr;
2191  switch (RetVT.SimpleTy) {
2192  default: return false;
2193  case MVT::f32: Opc = &OpcTable[0][0]; break;
2194  case MVT::f64: Opc = &OpcTable[1][0]; break;
2195  }
2196 
2197  const Value *LHS = I->getOperand(1);
2198  const Value *RHS = I->getOperand(2);
2199 
2200  unsigned LHSReg = getRegForValue(LHS);
2201  bool LHSIsKill = hasTrivialKill(LHS);
2202 
2203  unsigned RHSReg = getRegForValue(RHS);
2204  bool RHSIsKill = hasTrivialKill(RHS);
2205 
2206  unsigned CmpLHSReg = getRegForValue(CmpLHS);
2207  bool CmpLHSIsKill = hasTrivialKill(CmpLHS);
2208 
2209  unsigned CmpRHSReg = getRegForValue(CmpRHS);
2210  bool CmpRHSIsKill = hasTrivialKill(CmpRHS);
2211 
2212  if (!LHSReg || !RHSReg || !CmpLHS || !CmpRHS)
2213  return false;
2214 
2215  const TargetRegisterClass *RC = TLI.getRegClassFor(RetVT);
2216  unsigned ResultReg;
2217 
2218  if (Subtarget->hasAVX512()) {
2219  // If we have AVX512 we can use a mask compare and masked movss/sd.
2220  const TargetRegisterClass *VR128X = &X86::VR128XRegClass;
2221  const TargetRegisterClass *VK1 = &X86::VK1RegClass;
2222 
2223  unsigned CmpOpcode =
2224  (RetVT == MVT::f32) ? X86::VCMPSSZrr : X86::VCMPSDZrr;
2225  unsigned CmpReg = fastEmitInst_rri(CmpOpcode, VK1, CmpLHSReg, CmpLHSIsKill,
2226  CmpRHSReg, CmpRHSIsKill, CC);
2227 
2228  // Need an IMPLICIT_DEF for the input that is used to generate the upper
2229  // bits of the result register since its not based on any of the inputs.
2230  unsigned ImplicitDefReg = createResultReg(VR128X);
2231  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2232  TII.get(TargetOpcode::IMPLICIT_DEF), ImplicitDefReg);
2233 
2234  // Place RHSReg is the passthru of the masked movss/sd operation and put
2235  // LHS in the input. The mask input comes from the compare.
2236  unsigned MovOpcode =
2237  (RetVT == MVT::f32) ? X86::VMOVSSZrrk : X86::VMOVSDZrrk;
2238  unsigned MovReg = fastEmitInst_rrrr(MovOpcode, VR128X, RHSReg, RHSIsKill,
2239  CmpReg, true, ImplicitDefReg, true,
2240  LHSReg, LHSIsKill);
2241 
2242  ResultReg = createResultReg(RC);
2243  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2244  TII.get(TargetOpcode::COPY), ResultReg).addReg(MovReg);
2245 
2246  } else if (Subtarget->hasAVX()) {
2247  const TargetRegisterClass *VR128 = &X86::VR128RegClass;
2248 
2249  // If we have AVX, create 1 blendv instead of 3 logic instructions.
2250  // Blendv was introduced with SSE 4.1, but the 2 register form implicitly
2251  // uses XMM0 as the selection register. That may need just as many
2252  // instructions as the AND/ANDN/OR sequence due to register moves, so
2253  // don't bother.
2254  unsigned CmpOpcode =
2255  (RetVT == MVT::f32) ? X86::VCMPSSrr : X86::VCMPSDrr;
2256  unsigned BlendOpcode =
2257  (RetVT == MVT::f32) ? X86::VBLENDVPSrr : X86::VBLENDVPDrr;
2258 
2259  unsigned CmpReg = fastEmitInst_rri(CmpOpcode, RC, CmpLHSReg, CmpLHSIsKill,
2260  CmpRHSReg, CmpRHSIsKill, CC);
2261  unsigned VBlendReg = fastEmitInst_rrr(BlendOpcode, VR128, RHSReg, RHSIsKill,
2262  LHSReg, LHSIsKill, CmpReg, true);
2263  ResultReg = createResultReg(RC);
2264  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2265  TII.get(TargetOpcode::COPY), ResultReg).addReg(VBlendReg);
2266  } else {
2267  const TargetRegisterClass *VR128 = &X86::VR128RegClass;
2268  unsigned CmpReg = fastEmitInst_rri(Opc[0], RC, CmpLHSReg, CmpLHSIsKill,
2269  CmpRHSReg, CmpRHSIsKill, CC);
2270  unsigned AndReg = fastEmitInst_rr(Opc[1], VR128, CmpReg, /*IsKill=*/false,
2271  LHSReg, LHSIsKill);
2272  unsigned AndNReg = fastEmitInst_rr(Opc[2], VR128, CmpReg, /*IsKill=*/true,
2273  RHSReg, RHSIsKill);
2274  unsigned OrReg = fastEmitInst_rr(Opc[3], VR128, AndNReg, /*IsKill=*/true,
2275  AndReg, /*IsKill=*/true);
2276  ResultReg = createResultReg(RC);
2277  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2278  TII.get(TargetOpcode::COPY), ResultReg).addReg(OrReg);
2279  }
2280  updateValueMap(I, ResultReg);
2281  return true;
2282 }
2283 
2284 bool X86FastISel::X86FastEmitPseudoSelect(MVT RetVT, const Instruction *I) {
2285  // These are pseudo CMOV instructions and will be later expanded into control-
2286  // flow.
2287  unsigned Opc;
2288  switch (RetVT.SimpleTy) {
2289  default: return false;
2290  case MVT::i8: Opc = X86::CMOV_GR8; break;
2291  case MVT::i16: Opc = X86::CMOV_GR16; break;
2292  case MVT::i32: Opc = X86::CMOV_GR32; break;
2293  case MVT::f32: Opc = X86::CMOV_FR32; break;
2294  case MVT::f64: Opc = X86::CMOV_FR64; break;
2295  }
2296 
2297  const Value *Cond = I->getOperand(0);
2299 
2300  // Optimize conditions coming from a compare if both instructions are in the
2301  // same basic block (values defined in other basic blocks may not have
2302  // initialized registers).
2303  const auto *CI = dyn_cast<CmpInst>(Cond);
2304  if (CI && (CI->getParent() == I->getParent())) {
2305  bool NeedSwap;
2306  std::tie(CC, NeedSwap) = X86::getX86ConditionCode(CI->getPredicate());
2307  if (CC > X86::LAST_VALID_COND)
2308  return false;
2309 
2310  const Value *CmpLHS = CI->getOperand(0);
2311  const Value *CmpRHS = CI->getOperand(1);
2312 
2313  if (NeedSwap)
2314  std::swap(CmpLHS, CmpRHS);
2315 
2316  EVT CmpVT = TLI.getValueType(DL, CmpLHS->getType());
2317  if (!X86FastEmitCompare(CmpLHS, CmpRHS, CmpVT, CI->getDebugLoc()))
2318  return false;
2319  } else {
2320  unsigned CondReg = getRegForValue(Cond);
2321  if (CondReg == 0)
2322  return false;
2323  bool CondIsKill = hasTrivialKill(Cond);
2324 
2325  // In case OpReg is a K register, COPY to a GPR
2326  if (MRI.getRegClass(CondReg) == &X86::VK1RegClass) {
2327  unsigned KCondReg = CondReg;
2328  CondReg = createResultReg(&X86::GR32RegClass);
2329  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2330  TII.get(TargetOpcode::COPY), CondReg)
2331  .addReg(KCondReg, getKillRegState(CondIsKill));
2332  CondReg = fastEmitInst_extractsubreg(MVT::i8, CondReg, /*Kill=*/true,
2333  X86::sub_8bit);
2334  }
2335  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::TEST8ri))
2336  .addReg(CondReg, getKillRegState(CondIsKill))
2337  .addImm(1);
2338  }
2339 
2340  const Value *LHS = I->getOperand(1);
2341  const Value *RHS = I->getOperand(2);
2342 
2343  unsigned LHSReg = getRegForValue(LHS);
2344  bool LHSIsKill = hasTrivialKill(LHS);
2345 
2346  unsigned RHSReg = getRegForValue(RHS);
2347  bool RHSIsKill = hasTrivialKill(RHS);
2348 
2349  if (!LHSReg || !RHSReg)
2350  return false;
2351 
2352  const TargetRegisterClass *RC = TLI.getRegClassFor(RetVT);
2353 
2354  unsigned ResultReg =
2355  fastEmitInst_rri(Opc, RC, RHSReg, RHSIsKill, LHSReg, LHSIsKill, CC);
2356  updateValueMap(I, ResultReg);
2357  return true;
2358 }
2359 
2360 bool X86FastISel::X86SelectSelect(const Instruction *I) {
2361  MVT RetVT;
2362  if (!isTypeLegal(I->getType(), RetVT))
2363  return false;
2364 
2365  // Check if we can fold the select.
2366  if (const auto *CI = dyn_cast<CmpInst>(I->getOperand(0))) {
2367  CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
2368  const Value *Opnd = nullptr;
2369  switch (Predicate) {
2370  default: break;
2371  case CmpInst::FCMP_FALSE: Opnd = I->getOperand(2); break;
2372  case CmpInst::FCMP_TRUE: Opnd = I->getOperand(1); break;
2373  }
2374  // No need for a select anymore - this is an unconditional move.
2375  if (Opnd) {
2376  unsigned OpReg = getRegForValue(Opnd);
2377  if (OpReg == 0)
2378  return false;
2379  bool OpIsKill = hasTrivialKill(Opnd);
2380  const TargetRegisterClass *RC = TLI.getRegClassFor(RetVT);
2381  unsigned ResultReg = createResultReg(RC);
2382  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2383  TII.get(TargetOpcode::COPY), ResultReg)
2384  .addReg(OpReg, getKillRegState(OpIsKill));
2385  updateValueMap(I, ResultReg);
2386  return true;
2387  }
2388  }
2389 
2390  // First try to use real conditional move instructions.
2391  if (X86FastEmitCMoveSelect(RetVT, I))
2392  return true;
2393 
2394  // Try to use a sequence of SSE instructions to simulate a conditional move.
2395  if (X86FastEmitSSESelect(RetVT, I))
2396  return true;
2397 
2398  // Fall-back to pseudo conditional move instructions, which will be later
2399  // converted to control-flow.
2400  if (X86FastEmitPseudoSelect(RetVT, I))
2401  return true;
2402 
2403  return false;
2404 }
2405 
2406 bool X86FastISel::X86SelectSIToFP(const Instruction *I) {
2407  // The target-independent selection algorithm in FastISel already knows how
2408  // to select a SINT_TO_FP if the target is SSE but not AVX.
2409  // Early exit if the subtarget doesn't have AVX.
2410  if (!Subtarget->hasAVX())
2411  return false;
2412 
2413  Type *InTy = I->getOperand(0)->getType();
2414  if (!InTy->isIntegerTy(32) && !InTy->isIntegerTy(64))
2415  return false;
2416 
2417  // Select integer to float/double conversion.
2418  unsigned OpReg = getRegForValue(I->getOperand(0));
2419  if (OpReg == 0)
2420  return false;
2421 
2422  const TargetRegisterClass *RC = nullptr;
2423  unsigned Opcode;
2424 
2425  if (I->getType()->isDoubleTy()) {
2426  // sitofp int -> double
2427  Opcode = InTy->isIntegerTy(64) ? X86::VCVTSI2SD64rr : X86::VCVTSI2SDrr;
2428  RC = &X86::FR64RegClass;
2429  } else if (I->getType()->isFloatTy()) {
2430  // sitofp int -> float
2431  Opcode = InTy->isIntegerTy(64) ? X86::VCVTSI2SS64rr : X86::VCVTSI2SSrr;
2432  RC = &X86::FR32RegClass;
2433  } else
2434  return false;
2435 
2436  unsigned ImplicitDefReg = createResultReg(RC);
2437  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2438  TII.get(TargetOpcode::IMPLICIT_DEF), ImplicitDefReg);
2439  unsigned ResultReg =
2440  fastEmitInst_rr(Opcode, RC, ImplicitDefReg, true, OpReg, false);
2441  updateValueMap(I, ResultReg);
2442  return true;
2443 }
2444 
2445 // Helper method used by X86SelectFPExt and X86SelectFPTrunc.
2446 bool X86FastISel::X86SelectFPExtOrFPTrunc(const Instruction *I,
2447  unsigned TargetOpc,
2448  const TargetRegisterClass *RC) {
2449  assert((I->getOpcode() == Instruction::FPExt ||
2450  I->getOpcode() == Instruction::FPTrunc) &&
2451  "Instruction must be an FPExt or FPTrunc!");
2452 
2453  unsigned OpReg = getRegForValue(I->getOperand(0));
2454  if (OpReg == 0)
2455  return false;
2456 
2457  unsigned ImplicitDefReg;
2458  if (Subtarget->hasAVX()) {
2459  ImplicitDefReg = createResultReg(RC);
2460  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2461  TII.get(TargetOpcode::IMPLICIT_DEF), ImplicitDefReg);
2462 
2463  }
2464 
2465  unsigned ResultReg = createResultReg(RC);
2466  MachineInstrBuilder MIB;
2467  MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpc),
2468  ResultReg);
2469 
2470  if (Subtarget->hasAVX())
2471  MIB.addReg(ImplicitDefReg);
2472 
2473  MIB.addReg(OpReg);
2474  updateValueMap(I, ResultReg);
2475  return true;
2476 }
2477 
2478 bool X86FastISel::X86SelectFPExt(const Instruction *I) {
2479  if (X86ScalarSSEf64 && I->getType()->isDoubleTy() &&
2480  I->getOperand(0)->getType()->isFloatTy()) {
2481  bool HasAVX512 = Subtarget->hasAVX512();
2482  // fpext from float to double.
2483  unsigned Opc =
2484  HasAVX512 ? X86::VCVTSS2SDZrr
2485  : Subtarget->hasAVX() ? X86::VCVTSS2SDrr : X86::CVTSS2SDrr;
2486  return X86SelectFPExtOrFPTrunc(
2487  I, Opc, HasAVX512 ? &X86::FR64XRegClass : &X86::FR64RegClass);
2488  }
2489 
2490  return false;
2491 }
2492 
2493 bool X86FastISel::X86SelectFPTrunc(const Instruction *I) {
2494  if (X86ScalarSSEf64 && I->getType()->isFloatTy() &&
2495  I->getOperand(0)->getType()->isDoubleTy()) {
2496  bool HasAVX512 = Subtarget->hasAVX512();
2497  // fptrunc from double to float.
2498  unsigned Opc =
2499  HasAVX512 ? X86::VCVTSD2SSZrr
2500  : Subtarget->hasAVX() ? X86::VCVTSD2SSrr : X86::CVTSD2SSrr;
2501  return X86SelectFPExtOrFPTrunc(
2502  I, Opc, HasAVX512 ? &X86::FR32XRegClass : &X86::FR32RegClass);
2503  }
2504 
2505  return false;
2506 }
2507 
2508 bool X86FastISel::X86SelectTrunc(const Instruction *I) {
2509  EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType());
2510  EVT DstVT = TLI.getValueType(DL, I->getType());
2511 
2512  // This code only handles truncation to byte.
2513  if (DstVT != MVT::i8 && DstVT != MVT::i1)
2514  return false;
2515  if (!TLI.isTypeLegal(SrcVT))
2516  return false;
2517 
2518  unsigned InputReg = getRegForValue(I->getOperand(0));
2519  if (!InputReg)
2520  // Unhandled operand. Halt "fast" selection and bail.
2521  return false;
2522 
2523  if (SrcVT == MVT::i8) {
2524  // Truncate from i8 to i1; no code needed.
2525  updateValueMap(I, InputReg);
2526  return true;
2527  }
2528 
2529  // Issue an extract_subreg.
2530  unsigned ResultReg = fastEmitInst_extractsubreg(MVT::i8,
2531  InputReg, false,
2532  X86::sub_8bit);
2533  if (!ResultReg)
2534  return false;
2535 
2536  updateValueMap(I, ResultReg);
2537  return true;
2538 }
2539 
2540 bool X86FastISel::IsMemcpySmall(uint64_t Len) {
2541  return Len <= (Subtarget->is64Bit() ? 32 : 16);
2542 }
2543 
2544 bool X86FastISel::TryEmitSmallMemcpy(X86AddressMode DestAM,
2545  X86AddressMode SrcAM, uint64_t Len) {
2546 
2547  // Make sure we don't bloat code by inlining very large memcpy's.
2548  if (!IsMemcpySmall(Len))
2549  return false;
2550 
2551  bool i64Legal = Subtarget->is64Bit();
2552 
2553  // We don't care about alignment here since we just emit integer accesses.
2554  while (Len) {
2555  MVT VT;
2556  if (Len >= 8 && i64Legal)
2557  VT = MVT::i64;
2558  else if (Len >= 4)
2559  VT = MVT::i32;
2560  else if (Len >= 2)
2561  VT = MVT::i16;
2562  else
2563  VT = MVT::i8;
2564 
2565  unsigned Reg;
2566  bool RV = X86FastEmitLoad(VT, SrcAM, nullptr, Reg);
2567  RV &= X86FastEmitStore(VT, Reg, /*Kill=*/true, DestAM);
2568  assert(RV && "Failed to emit load or store??");
2569 
2570  unsigned Size = VT.getSizeInBits()/8;
2571  Len -= Size;
2572  DestAM.Disp += Size;
2573  SrcAM.Disp += Size;
2574  }
2575 
2576  return true;
2577 }
2578 
2579 bool X86FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
2580  // FIXME: Handle more intrinsics.
2581  switch (II->getIntrinsicID()) {
2582  default: return false;
2583  case Intrinsic::convert_from_fp16:
2584  case Intrinsic::convert_to_fp16: {
2585  if (Subtarget->useSoftFloat() || !Subtarget->hasF16C())
2586  return false;
2587 
2588  const Value *Op = II->getArgOperand(0);
2589  unsigned InputReg = getRegForValue(Op);
2590  if (InputReg == 0)
2591  return false;
2592 
2593  // F16C only allows converting from float to half and from half to float.
2594  bool IsFloatToHalf = II->getIntrinsicID() == Intrinsic::convert_to_fp16;
2595  if (IsFloatToHalf) {
2596  if (!Op->getType()->isFloatTy())
2597  return false;
2598  } else {
2599  if (!II->getType()->isFloatTy())
2600  return false;
2601  }
2602 
2603  unsigned ResultReg = 0;
2604  const TargetRegisterClass *RC = TLI.getRegClassFor(MVT::v8i16);
2605  if (IsFloatToHalf) {
2606  // 'InputReg' is implicitly promoted from register class FR32 to
2607  // register class VR128 by method 'constrainOperandRegClass' which is
2608  // directly called by 'fastEmitInst_ri'.
2609  // Instruction VCVTPS2PHrr takes an extra immediate operand which is
2610  // used to provide rounding control: use MXCSR.RC, encoded as 0b100.
2611  // It's consistent with the other FP instructions, which are usually
2612  // controlled by MXCSR.
2613  InputReg = fastEmitInst_ri(X86::VCVTPS2PHrr, RC, InputReg, false, 4);
2614 
2615  // Move the lower 32-bits of ResultReg to another register of class GR32.
2616  ResultReg = createResultReg(&X86::GR32RegClass);
2617  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2618  TII.get(X86::VMOVPDI2DIrr), ResultReg)
2619  .addReg(InputReg, RegState::Kill);
2620 
2621  // The result value is in the lower 16-bits of ResultReg.
2622  unsigned RegIdx = X86::sub_16bit;
2623  ResultReg = fastEmitInst_extractsubreg(MVT::i16, ResultReg, true, RegIdx);
2624  } else {
2625  assert(Op->getType()->isIntegerTy(16) && "Expected a 16-bit integer!");
2626  // Explicitly sign-extend the input to 32-bit.
2627  InputReg = fastEmit_r(MVT::i16, MVT::i32, ISD::SIGN_EXTEND, InputReg,
2628  /*Kill=*/false);
2629 
2630  // The following SCALAR_TO_VECTOR will be expanded into a VMOVDI2PDIrr.
2631  InputReg = fastEmit_r(MVT::i32, MVT::v4i32, ISD::SCALAR_TO_VECTOR,
2632  InputReg, /*Kill=*/true);
2633 
2634  InputReg = fastEmitInst_r(X86::VCVTPH2PSrr, RC, InputReg, /*Kill=*/true);
2635 
2636  // The result value is in the lower 32-bits of ResultReg.
2637  // Emit an explicit copy from register class VR128 to register class FR32.
2638  ResultReg = createResultReg(&X86::FR32RegClass);
2639  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2640  TII.get(TargetOpcode::COPY), ResultReg)
2641  .addReg(InputReg, RegState::Kill);
2642  }
2643 
2644  updateValueMap(II, ResultReg);
2645  return true;
2646  }
2647  case Intrinsic::frameaddress: {
2648  MachineFunction *MF = FuncInfo.MF;
2649  if (MF->getTarget().getMCAsmInfo()->usesWindowsCFI())
2650  return false;
2651 
2652  Type *RetTy = II->getCalledFunction()->getReturnType();
2653 
2654  MVT VT;
2655  if (!isTypeLegal(RetTy, VT))
2656  return false;
2657 
2658  unsigned Opc;
2659  const TargetRegisterClass *RC = nullptr;
2660 
2661  switch (VT.SimpleTy) {
2662  default: llvm_unreachable("Invalid result type for frameaddress.");
2663  case MVT::i32: Opc = X86::MOV32rm; RC = &X86::GR32RegClass; break;
2664  case MVT::i64: Opc = X86::MOV64rm; RC = &X86::GR64RegClass; break;
2665  }
2666 
2667  // This needs to be set before we call getPtrSizedFrameRegister, otherwise
2668  // we get the wrong frame register.
2669  MachineFrameInfo &MFI = MF->getFrameInfo();
2670  MFI.setFrameAddressIsTaken(true);
2671 
2672  const X86RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
2673  unsigned FrameReg = RegInfo->getPtrSizedFrameRegister(*MF);
2674  assert(((FrameReg == X86::RBP && VT == MVT::i64) ||
2675  (FrameReg == X86::EBP && VT == MVT::i32)) &&
2676  "Invalid Frame Register!");
2677 
2678  // Always make a copy of the frame register to to a vreg first, so that we
2679  // never directly reference the frame register (the TwoAddressInstruction-
2680  // Pass doesn't like that).
2681  unsigned SrcReg = createResultReg(RC);
2682  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2683  TII.get(TargetOpcode::COPY), SrcReg).addReg(FrameReg);
2684 
2685  // Now recursively load from the frame address.
2686  // movq (%rbp), %rax
2687  // movq (%rax), %rax
2688  // movq (%rax), %rax
2689  // ...
2690  unsigned DestReg;
2691  unsigned Depth = cast<ConstantInt>(II->getOperand(0))->getZExtValue();
2692  while (Depth--) {
2693  DestReg = createResultReg(RC);
2694  addDirectMem(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2695  TII.get(Opc), DestReg), SrcReg);
2696  SrcReg = DestReg;
2697  }
2698 
2699  updateValueMap(II, SrcReg);
2700  return true;
2701  }
2702  case Intrinsic::memcpy: {
2703  const MemCpyInst *MCI = cast<MemCpyInst>(II);
2704  // Don't handle volatile or variable length memcpys.
2705  if (MCI->isVolatile())
2706  return false;
2707 
2708  if (isa<ConstantInt>(MCI->getLength())) {
2709  // Small memcpy's are common enough that we want to do them
2710  // without a call if possible.
2711  uint64_t Len = cast<ConstantInt>(MCI->getLength())->getZExtValue();
2712  if (IsMemcpySmall(Len)) {
2713  X86AddressMode DestAM, SrcAM;
2714  if (!X86SelectAddress(MCI->getRawDest(), DestAM) ||
2715  !X86SelectAddress(MCI->getRawSource(), SrcAM))
2716  return false;
2717  TryEmitSmallMemcpy(DestAM, SrcAM, Len);
2718  return true;
2719  }
2720  }
2721 
2722  unsigned SizeWidth = Subtarget->is64Bit() ? 64 : 32;
2723  if (!MCI->getLength()->getType()->isIntegerTy(SizeWidth))
2724  return false;
2725 
2726  if (MCI->getSourceAddressSpace() > 255 || MCI->getDestAddressSpace() > 255)
2727  return false;
2728 
2729  return lowerCallTo(II, "memcpy", II->getNumArgOperands() - 2);
2730  }
2731  case Intrinsic::memset: {
2732  const MemSetInst *MSI = cast<MemSetInst>(II);
2733 
2734  if (MSI->isVolatile())
2735  return false;
2736 
2737  unsigned SizeWidth = Subtarget->is64Bit() ? 64 : 32;
2738  if (!MSI->getLength()->getType()->isIntegerTy(SizeWidth))
2739  return false;
2740 
2741  if (MSI->getDestAddressSpace() > 255)
2742  return false;
2743 
2744  return lowerCallTo(II, "memset", II->getNumArgOperands() - 2);
2745  }
2746  case Intrinsic::stackprotector: {
2747  // Emit code to store the stack guard onto the stack.
2748  EVT PtrTy = TLI.getPointerTy(DL);
2749 
2750  const Value *Op1 = II->getArgOperand(0); // The guard's value.
2751  const AllocaInst *Slot = cast<AllocaInst>(II->getArgOperand(1));
2752 
2753  MFI.setStackProtectorIndex(FuncInfo.StaticAllocaMap[Slot]);
2754 
2755  // Grab the frame index.
2756  X86AddressMode AM;
2757  if (!X86SelectAddress(Slot, AM)) return false;
2758  if (!X86FastEmitStore(PtrTy, Op1, AM)) return false;
2759  return true;
2760  }
2761  case Intrinsic::dbg_declare: {
2762  const DbgDeclareInst *DI = cast<DbgDeclareInst>(II);
2763  X86AddressMode AM;
2764  assert(DI->getAddress() && "Null address should be checked earlier!");
2765  if (!X86SelectAddress(DI->getAddress(), AM))
2766  return false;
2767  const MCInstrDesc &II = TII.get(TargetOpcode::DBG_VALUE);
2768  // FIXME may need to add RegState::Debug to any registers produced,
2769  // although ESP/EBP should be the only ones at the moment.
2771  "Expected inlined-at fields to agree");
2772  addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II), AM)
2773  .addImm(0)
2774  .addMetadata(DI->getVariable())
2775  .addMetadata(DI->getExpression());
2776  return true;
2777  }
2778  case Intrinsic::trap: {
2779  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::TRAP));
2780  return true;
2781  }
2782  case Intrinsic::sqrt: {
2783  if (!Subtarget->hasSSE1())
2784  return false;
2785 
2786  Type *RetTy = II->getCalledFunction()->getReturnType();
2787 
2788  MVT VT;
2789  if (!isTypeLegal(RetTy, VT))
2790  return false;
2791 
2792  // Unfortunately we can't use fastEmit_r, because the AVX version of FSQRT
2793  // is not generated by FastISel yet.
2794  // FIXME: Update this code once tablegen can handle it.
2795  static const uint16_t SqrtOpc[2][2] = {
2796  {X86::SQRTSSr, X86::VSQRTSSr},
2797  {X86::SQRTSDr, X86::VSQRTSDr}
2798  };
2799  bool HasAVX = Subtarget->hasAVX();
2800  unsigned Opc;
2801  const TargetRegisterClass *RC;
2802  switch (VT.SimpleTy) {
2803  default: return false;
2804  case MVT::f32: Opc = SqrtOpc[0][HasAVX]; RC = &X86::FR32RegClass; break;
2805  case MVT::f64: Opc = SqrtOpc[1][HasAVX]; RC = &X86::FR64RegClass; break;
2806  }
2807 
2808  const Value *SrcVal = II->getArgOperand(0);
2809  unsigned SrcReg = getRegForValue(SrcVal);
2810 
2811  if (SrcReg == 0)
2812  return false;
2813 
2814  unsigned ImplicitDefReg = 0;
2815  if (HasAVX) {
2816  ImplicitDefReg = createResultReg(RC);
2817  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2818  TII.get(TargetOpcode::IMPLICIT_DEF), ImplicitDefReg);
2819  }
2820 
2821  unsigned ResultReg = createResultReg(RC);
2822  MachineInstrBuilder MIB;
2823  MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc),
2824  ResultReg);
2825 
2826  if (ImplicitDefReg)
2827  MIB.addReg(ImplicitDefReg);
2828 
2829  MIB.addReg(SrcReg);
2830 
2831  updateValueMap(II, ResultReg);
2832  return true;
2833  }
2834  case Intrinsic::sadd_with_overflow:
2835  case Intrinsic::uadd_with_overflow:
2836  case Intrinsic::ssub_with_overflow:
2837  case Intrinsic::usub_with_overflow:
2838  case Intrinsic::smul_with_overflow:
2839  case Intrinsic::umul_with_overflow: {
2840  // This implements the basic lowering of the xalu with overflow intrinsics
2841  // into add/sub/mul followed by either seto or setb.
2842  const Function *Callee = II->getCalledFunction();
2843  auto *Ty = cast<StructType>(Callee->getReturnType());
2844  Type *RetTy = Ty->getTypeAtIndex(0U);
2845  assert(Ty->getTypeAtIndex(1)->isIntegerTy() &&
2846  Ty->getTypeAtIndex(1)->getScalarSizeInBits() == 1 &&
2847  "Overflow value expected to be an i1");
2848 
2849  MVT VT;
2850  if (!isTypeLegal(RetTy, VT))
2851  return false;
2852 
2853  if (VT < MVT::i8 || VT > MVT::i64)
2854  return false;
2855 
2856  const Value *LHS = II->getArgOperand(0);
2857  const Value *RHS = II->getArgOperand(1);
2858 
2859  // Canonicalize immediate to the RHS.
2860  if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS) &&
2861  isCommutativeIntrinsic(II))
2862  std::swap(LHS, RHS);
2863 
2864  bool UseIncDec = false;
2865  if (isa<ConstantInt>(RHS) && cast<ConstantInt>(RHS)->isOne())
2866  UseIncDec = true;
2867 
2868  unsigned BaseOpc, CondOpc;
2869  switch (II->getIntrinsicID()) {
2870  default: llvm_unreachable("Unexpected intrinsic!");
2871  case Intrinsic::sadd_with_overflow:
2872  BaseOpc = UseIncDec ? unsigned(X86ISD::INC) : unsigned(ISD::ADD);
2873  CondOpc = X86::SETOr;
2874  break;
2875  case Intrinsic::uadd_with_overflow:
2876  BaseOpc = ISD::ADD; CondOpc = X86::SETBr; break;
2877  case Intrinsic::ssub_with_overflow:
2878  BaseOpc = UseIncDec ? unsigned(X86ISD::DEC) : unsigned(ISD::SUB);
2879  CondOpc = X86::SETOr;
2880  break;
2881  case Intrinsic::usub_with_overflow:
2882  BaseOpc = ISD::SUB; CondOpc = X86::SETBr; break;
2883  case Intrinsic::smul_with_overflow:
2884  BaseOpc = X86ISD::SMUL; CondOpc = X86::SETOr; break;
2885  case Intrinsic::umul_with_overflow:
2886  BaseOpc = X86ISD::UMUL; CondOpc = X86::SETOr; break;
2887  }
2888 
2889  unsigned LHSReg = getRegForValue(LHS);
2890  if (LHSReg == 0)
2891  return false;
2892  bool LHSIsKill = hasTrivialKill(LHS);
2893 
2894  unsigned ResultReg = 0;
2895  // Check if we have an immediate version.
2896  if (const auto *CI = dyn_cast<ConstantInt>(RHS)) {
2897  static const uint16_t Opc[2][4] = {
2898  { X86::INC8r, X86::INC16r, X86::INC32r, X86::INC64r },
2899  { X86::DEC8r, X86::DEC16r, X86::DEC32r, X86::DEC64r }
2900  };
2901 
2902  if (BaseOpc == X86ISD::INC || BaseOpc == X86ISD::DEC) {
2903  ResultReg = createResultReg(TLI.getRegClassFor(VT));
2904  bool IsDec = BaseOpc == X86ISD::DEC;
2905  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2906  TII.get(Opc[IsDec][VT.SimpleTy-MVT::i8]), ResultReg)
2907  .addReg(LHSReg, getKillRegState(LHSIsKill));
2908  } else
2909  ResultReg = fastEmit_ri(VT, VT, BaseOpc, LHSReg, LHSIsKill,
2910  CI->getZExtValue());
2911  }
2912 
2913  unsigned RHSReg;
2914  bool RHSIsKill;
2915  if (!ResultReg) {
2916  RHSReg = getRegForValue(RHS);
2917  if (RHSReg == 0)
2918  return false;
2919  RHSIsKill = hasTrivialKill(RHS);
2920  ResultReg = fastEmit_rr(VT, VT, BaseOpc, LHSReg, LHSIsKill, RHSReg,
2921  RHSIsKill);
2922  }
2923 
2924  // FastISel doesn't have a pattern for all X86::MUL*r and X86::IMUL*r. Emit
2925  // it manually.
2926  if (BaseOpc == X86ISD::UMUL && !ResultReg) {
2927  static const uint16_t MULOpc[] =
2928  { X86::MUL8r, X86::MUL16r, X86::MUL32r, X86::MUL64r };
2929  static const MCPhysReg Reg[] = { X86::AL, X86::AX, X86::EAX, X86::RAX };
2930  // First copy the first operand into RAX, which is an implicit input to
2931  // the X86::MUL*r instruction.
2932  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2933  TII.get(TargetOpcode::COPY), Reg[VT.SimpleTy-MVT::i8])
2934  .addReg(LHSReg, getKillRegState(LHSIsKill));
2935  ResultReg = fastEmitInst_r(MULOpc[VT.SimpleTy-MVT::i8],
2936  TLI.getRegClassFor(VT), RHSReg, RHSIsKill);
2937  } else if (BaseOpc == X86ISD::SMUL && !ResultReg) {
2938  static const uint16_t MULOpc[] =
2939  { X86::IMUL8r, X86::IMUL16rr, X86::IMUL32rr, X86::IMUL64rr };
2940  if (VT == MVT::i8) {
2941  // Copy the first operand into AL, which is an implicit input to the
2942  // X86::IMUL8r instruction.
2943  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2944  TII.get(TargetOpcode::COPY), X86::AL)
2945  .addReg(LHSReg, getKillRegState(LHSIsKill));
2946  ResultReg = fastEmitInst_r(MULOpc[0], TLI.getRegClassFor(VT), RHSReg,
2947  RHSIsKill);
2948  } else
2949  ResultReg = fastEmitInst_rr(MULOpc[VT.SimpleTy-MVT::i8],
2950  TLI.getRegClassFor(VT), LHSReg, LHSIsKill,
2951  RHSReg, RHSIsKill);
2952  }
2953 
2954  if (!ResultReg)
2955  return false;
2956 
2957  // Assign to a GPR since the overflow return value is lowered to a SETcc.
2958  unsigned ResultReg2 = createResultReg(&X86::GR8RegClass);
2959  assert((ResultReg+1) == ResultReg2 && "Nonconsecutive result registers.");
2960  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CondOpc),
2961  ResultReg2);
2962 
2963  updateValueMap(II, ResultReg, 2);
2964  return true;
2965  }
2966  case Intrinsic::x86_sse_cvttss2si:
2967  case Intrinsic::x86_sse_cvttss2si64:
2968  case Intrinsic::x86_sse2_cvttsd2si:
2969  case Intrinsic::x86_sse2_cvttsd2si64: {
2970  bool IsInputDouble;
2971  switch (II->getIntrinsicID()) {
2972  default: llvm_unreachable("Unexpected intrinsic.");
2973  case Intrinsic::x86_sse_cvttss2si:
2974  case Intrinsic::x86_sse_cvttss2si64:
2975  if (!Subtarget->hasSSE1())
2976  return false;
2977  IsInputDouble = false;
2978  break;
2979  case Intrinsic::x86_sse2_cvttsd2si:
2980  case Intrinsic::x86_sse2_cvttsd2si64:
2981  if (!Subtarget->hasSSE2())
2982  return false;
2983  IsInputDouble = true;
2984  break;
2985  }
2986 
2987  Type *RetTy = II->getCalledFunction()->getReturnType();
2988  MVT VT;
2989  if (!isTypeLegal(RetTy, VT))
2990  return false;
2991 
2992  static const uint16_t CvtOpc[2][2][2] = {
2993  { { X86::CVTTSS2SIrr, X86::VCVTTSS2SIrr },
2994  { X86::CVTTSS2SI64rr, X86::VCVTTSS2SI64rr } },
2995  { { X86::CVTTSD2SIrr, X86::VCVTTSD2SIrr },
2996  { X86::CVTTSD2SI64rr, X86::VCVTTSD2SI64rr } }
2997  };
2998  bool HasAVX = Subtarget->hasAVX();
2999  unsigned Opc;
3000  switch (VT.SimpleTy) {
3001  default: llvm_unreachable("Unexpected result type.");
3002  case MVT::i32: Opc = CvtOpc[IsInputDouble][0][HasAVX]; break;
3003  case MVT::i64: Opc = CvtOpc[IsInputDouble][1][HasAVX]; break;
3004  }
3005 
3006  // Check if we can fold insertelement instructions into the convert.
3007  const Value *Op = II->getArgOperand(0);
3008  while (auto *IE = dyn_cast<InsertElementInst>(Op)) {
3009  const Value *Index = IE->getOperand(2);
3010  if (!isa<ConstantInt>(Index))
3011  break;
3012  unsigned Idx = cast<ConstantInt>(Index)->getZExtValue();
3013 
3014  if (Idx == 0) {
3015  Op = IE->getOperand(1);
3016  break;
3017  }
3018  Op = IE->getOperand(0);
3019  }
3020 
3021  unsigned Reg = getRegForValue(Op);
3022  if (Reg == 0)
3023  return false;
3024 
3025  unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
3026  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
3027  .addReg(Reg);
3028 
3029  updateValueMap(II, ResultReg);
3030  return true;
3031  }
3032  }
3033 }
3034 
3035 bool X86FastISel::fastLowerArguments() {
3036  if (!FuncInfo.CanLowerReturn)
3037  return false;
3038 
3039  const Function *F = FuncInfo.Fn;
3040  if (F->isVarArg())
3041  return false;
3042 
3043  CallingConv::ID CC = F->getCallingConv();
3044  if (CC != CallingConv::C)
3045  return false;
3046 
3047  if (Subtarget->isCallingConvWin64(CC))
3048  return false;
3049 
3050  if (!Subtarget->is64Bit())
3051  return false;
3052 
3053  if (Subtarget->useSoftFloat())
3054  return false;
3055 
3056  // Only handle simple cases. i.e. Up to 6 i32/i64 scalar arguments.
3057  unsigned GPRCnt = 0;
3058  unsigned FPRCnt = 0;
3059  for (auto const &Arg : F->args()) {
3060  if (Arg.hasAttribute(Attribute::ByVal) ||
3061  Arg.hasAttribute(Attribute::InReg) ||
3062  Arg.hasAttribute(Attribute::StructRet) ||
3063  Arg.hasAttribute(Attribute::SwiftSelf) ||
3064  Arg.hasAttribute(Attribute::SwiftError) ||
3065  Arg.hasAttribute(Attribute::Nest))
3066  return false;
3067 
3068  Type *ArgTy = Arg.getType();
3069  if (ArgTy->isStructTy() || ArgTy->isArrayTy() || ArgTy->isVectorTy())
3070  return false;
3071 
3072  EVT ArgVT = TLI.getValueType(DL, ArgTy);
3073  if (!ArgVT.isSimple()) return false;
3074  switch (ArgVT.getSimpleVT().SimpleTy) {
3075  default: return false;
3076  case MVT::i32:
3077  case MVT::i64:
3078  ++GPRCnt;
3079  break;
3080  case MVT::f32:
3081  case MVT::f64:
3082  if (!Subtarget->hasSSE1())
3083  return false;
3084  ++FPRCnt;
3085  break;
3086  }
3087 
3088  if (GPRCnt > 6)
3089  return false;
3090 
3091  if (FPRCnt > 8)
3092  return false;
3093  }
3094 
3095  static const MCPhysReg GPR32ArgRegs[] = {
3096  X86::EDI, X86::ESI, X86::EDX, X86::ECX, X86::R8D, X86::R9D
3097  };
3098  static const MCPhysReg GPR64ArgRegs[] = {
3099  X86::RDI, X86::RSI, X86::RDX, X86::RCX, X86::R8 , X86::R9
3100  };
3101  static const MCPhysReg XMMArgRegs[] = {
3102  X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
3103  X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
3104  };
3105 
3106  unsigned GPRIdx = 0;
3107  unsigned FPRIdx = 0;
3108  for (auto const &Arg : F->args()) {
3109  MVT VT = TLI.getSimpleValueType(DL, Arg.getType());
3110  const TargetRegisterClass *RC = TLI.getRegClassFor(VT);
3111  unsigned SrcReg;
3112  switch (VT.SimpleTy) {
3113  default: llvm_unreachable("Unexpected value type.");
3114  case MVT::i32: SrcReg = GPR32ArgRegs[GPRIdx++]; break;
3115  case MVT::i64: SrcReg = GPR64ArgRegs[GPRIdx++]; break;
3116  case MVT::f32: LLVM_FALLTHROUGH;
3117  case MVT::f64: SrcReg = XMMArgRegs[FPRIdx++]; break;
3118  }
3119  unsigned DstReg = FuncInfo.MF->addLiveIn(SrcReg, RC);
3120  // FIXME: Unfortunately it's necessary to emit a copy from the livein copy.
3121  // Without this, EmitLiveInCopies may eliminate the livein if its only
3122  // use is a bitcast (which isn't turned into an instruction).
3123  unsigned ResultReg = createResultReg(RC);
3124  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3125  TII.get(TargetOpcode::COPY), ResultReg)
3126  .addReg(DstReg, getKillRegState(true));
3127  updateValueMap(&Arg, ResultReg);
3128  }
3129  return true;
3130 }
3131 
3132 static unsigned computeBytesPoppedByCalleeForSRet(const X86Subtarget *Subtarget,
3133  CallingConv::ID CC,
3134  ImmutableCallSite *CS) {
3135  if (Subtarget->is64Bit())
3136  return 0;
3137  if (Subtarget->getTargetTriple().isOSMSVCRT())
3138  return 0;
3139  if (CC == CallingConv::Fast || CC == CallingConv::GHC ||
3140  CC == CallingConv::HiPE)
3141  return 0;
3142 
3143  if (CS)
3144  if (CS->arg_empty() || !CS->paramHasAttr(0, Attribute::StructRet) ||
3145  CS->paramHasAttr(0, Attribute::InReg) || Subtarget->isTargetMCU())
3146  return 0;
3147 
3148  return 4;
3149 }
3150 
3151 bool X86FastISel::fastLowerCall(CallLoweringInfo &CLI) {
3152  auto &OutVals = CLI.OutVals;
3153  auto &OutFlags = CLI.OutFlags;
3154  auto &OutRegs = CLI.OutRegs;
3155  auto &Ins = CLI.Ins;
3156  auto &InRegs = CLI.InRegs;
3157  CallingConv::ID CC = CLI.CallConv;
3158  bool &IsTailCall = CLI.IsTailCall;
3159  bool IsVarArg = CLI.IsVarArg;
3160  const Value *Callee = CLI.Callee;
3161  MCSymbol *Symbol = CLI.Symbol;
3162 
3163  bool Is64Bit = Subtarget->is64Bit();
3164  bool IsWin64 = Subtarget->isCallingConvWin64(CC);
3165 
3166  const CallInst *CI =
3167  CLI.CS ? dyn_cast<CallInst>(CLI.CS->getInstruction()) : nullptr;
3168  const Function *CalledFn = CI ? CI->getCalledFunction() : nullptr;
3169 
3170  // Functions with no_caller_saved_registers that need special handling.
3171  if ((CI && CI->hasFnAttr("no_caller_saved_registers")) ||
3172  (CalledFn && CalledFn->hasFnAttribute("no_caller_saved_registers")))
3173  return false;
3174 
3175  // Handle only C, fastcc, and webkit_js calling conventions for now.
3176  switch (CC) {
3177  default: return false;
3178  case CallingConv::C:
3179  case CallingConv::Fast:
3181  case CallingConv::Swift:
3185  case CallingConv::Win64:
3187  break;
3188  }
3189 
3190  // Allow SelectionDAG isel to handle tail calls.
3191  if (IsTailCall)
3192  return false;
3193 
3194  // fastcc with -tailcallopt is intended to provide a guaranteed
3195  // tail call optimization. Fastisel doesn't know how to do that.
3196  if (CC == CallingConv::Fast && TM.Options.GuaranteedTailCallOpt)
3197  return false;
3198 
3199  // Don't know how to handle Win64 varargs yet. Nothing special needed for
3200  // x86-32. Special handling for x86-64 is implemented.
3201  if (IsVarArg && IsWin64)
3202  return false;
3203 
3204  // Don't know about inalloca yet.
3205  if (CLI.CS && CLI.CS->hasInAllocaArgument())
3206  return false;
3207 
3208  for (auto Flag : CLI.OutFlags)
3209  if (Flag.isSwiftError())
3210  return false;
3211 
3212  SmallVector<MVT, 16> OutVTs;
3213  SmallVector<unsigned, 16> ArgRegs;
3214 
3215  // If this is a constant i1/i8/i16 argument, promote to i32 to avoid an extra
3216  // instruction. This is safe because it is common to all FastISel supported
3217  // calling conventions on x86.
3218  for (int i = 0, e = OutVals.size(); i != e; ++i) {
3219  Value *&Val = OutVals[i];
3220  ISD::ArgFlagsTy Flags = OutFlags[i];
3221  if (auto *CI = dyn_cast<ConstantInt>(Val)) {
3222  if (CI->getBitWidth() < 32) {
3223  if (Flags.isSExt())
3225  else
3227  }
3228  }
3229 
3230  // Passing bools around ends up doing a trunc to i1 and passing it.
3231  // Codegen this as an argument + "and 1".
3232  MVT VT;
3233  auto *TI = dyn_cast<TruncInst>(Val);
3234  unsigned ResultReg;
3235  if (TI && TI->getType()->isIntegerTy(1) && CLI.CS &&
3236  (TI->getParent() == CLI.CS->getInstruction()->getParent()) &&
3237  TI->hasOneUse()) {
3238  Value *PrevVal = TI->getOperand(0);
3239  ResultReg = getRegForValue(PrevVal);
3240 
3241  if (!ResultReg)
3242  return false;
3243 
3244  if (!isTypeLegal(PrevVal->getType(), VT))
3245  return false;
3246 
3247  ResultReg =
3248  fastEmit_ri(VT, VT, ISD::AND, ResultReg, hasTrivialKill(PrevVal), 1);
3249  } else {
3250  if (!isTypeLegal(Val->getType(), VT))
3251  return false;
3252  ResultReg = getRegForValue(Val);
3253  }
3254 
3255  if (!ResultReg)
3256  return false;
3257 
3258  ArgRegs.push_back(ResultReg);
3259  OutVTs.push_back(VT);
3260  }
3261 
3262  // Analyze operands of the call, assigning locations to each operand.
3264  CCState CCInfo(CC, IsVarArg, *FuncInfo.MF, ArgLocs, CLI.RetTy->getContext());
3265 
3266  // Allocate shadow area for Win64
3267  if (IsWin64)
3268  CCInfo.AllocateStack(32, 8);
3269 
3270  CCInfo.AnalyzeCallOperands(OutVTs, OutFlags, CC_X86);
3271 
3272  // Get a count of how many bytes are to be pushed on the stack.
3273  unsigned NumBytes = CCInfo.getAlignedCallFrameSize();
3274 
3275  // Issue CALLSEQ_START
3276  unsigned AdjStackDown = TII.getCallFrameSetupOpcode();
3277  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackDown))
3278  .addImm(NumBytes).addImm(0).addImm(0);
3279 
3280  // Walk the register/memloc assignments, inserting copies/loads.
3281  const X86RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
3282  for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
3283  CCValAssign const &VA = ArgLocs[i];
3284  const Value *ArgVal = OutVals[VA.getValNo()];
3285  MVT ArgVT = OutVTs[VA.getValNo()];
3286 
3287  if (ArgVT == MVT::x86mmx)
3288  return false;
3289 
3290  unsigned ArgReg = ArgRegs[VA.getValNo()];
3291 
3292  // Promote the value if needed.
3293  switch (VA.getLocInfo()) {
3294  case CCValAssign::Full: break;
3295  case CCValAssign::SExt: {
3296  assert(VA.getLocVT().isInteger() && !VA.getLocVT().isVector() &&
3297  "Unexpected extend");
3298 
3299  if (ArgVT == MVT::i1)
3300  return false;
3301 
3302  bool Emitted = X86FastEmitExtend(ISD::SIGN_EXTEND, VA.getLocVT(), ArgReg,
3303  ArgVT, ArgReg);
3304  assert(Emitted && "Failed to emit a sext!"); (void)Emitted;
3305  ArgVT = VA.getLocVT();
3306  break;
3307  }
3308  case CCValAssign::ZExt: {
3309  assert(VA.getLocVT().isInteger() && !VA.getLocVT().isVector() &&
3310  "Unexpected extend");
3311 
3312  // Handle zero-extension from i1 to i8, which is common.
3313  if (ArgVT == MVT::i1) {
3314  // Set the high bits to zero.
3315  ArgReg = fastEmitZExtFromI1(MVT::i8, ArgReg, /*TODO: Kill=*/false);
3316  ArgVT = MVT::i8;
3317 
3318  if (ArgReg == 0)
3319  return false;
3320  }
3321 
3322  bool Emitted = X86FastEmitExtend(ISD::ZERO_EXTEND, VA.getLocVT(), ArgReg,
3323  ArgVT, ArgReg);
3324  assert(Emitted && "Failed to emit a zext!"); (void)Emitted;
3325  ArgVT = VA.getLocVT();
3326  break;
3327  }
3328  case CCValAssign::AExt: {
3329  assert(VA.getLocVT().isInteger() && !VA.getLocVT().isVector() &&
3330  "Unexpected extend");
3331  bool Emitted = X86FastEmitExtend(ISD::ANY_EXTEND, VA.getLocVT(), ArgReg,
3332  ArgVT, ArgReg);
3333  if (!Emitted)
3334  Emitted = X86FastEmitExtend(ISD::ZERO_EXTEND, VA.getLocVT(), ArgReg,
3335  ArgVT, ArgReg);
3336  if (!Emitted)
3337  Emitted = X86FastEmitExtend(ISD::SIGN_EXTEND, VA.getLocVT(), ArgReg,
3338  ArgVT, ArgReg);
3339 
3340  assert(Emitted && "Failed to emit a aext!"); (void)Emitted;
3341  ArgVT = VA.getLocVT();
3342  break;
3343  }
3344  case CCValAssign::BCvt: {
3345  ArgReg = fastEmit_r(ArgVT, VA.getLocVT(), ISD::BITCAST, ArgReg,
3346  /*TODO: Kill=*/false);
3347  assert(ArgReg && "Failed to emit a bitcast!");
3348  ArgVT = VA.getLocVT();
3349  break;
3350  }
3351  case CCValAssign::VExt:
3352  // VExt has not been implemented, so this should be impossible to reach
3353  // for now. However, fallback to Selection DAG isel once implemented.
3354  return false;
3358  case CCValAssign::FPExt:
3359  llvm_unreachable("Unexpected loc info!");
3360  case CCValAssign::Indirect:
3361  // FIXME: Indirect doesn't need extending, but fast-isel doesn't fully
3362  // support this.
3363  return false;
3364  }
3365 
3366  if (VA.isRegLoc()) {
3367  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3368  TII.get(TargetOpcode::COPY), VA.getLocReg()).addReg(ArgReg);
3369  OutRegs.push_back(VA.getLocReg());
3370  } else {
3371  assert(VA.isMemLoc());
3372 
3373  // Don't emit stores for undef values.
3374  if (isa<UndefValue>(ArgVal))
3375  continue;
3376 
3377  unsigned LocMemOffset = VA.getLocMemOffset();
3378  X86AddressMode AM;
3379  AM.Base.Reg = RegInfo->getStackRegister();
3380  AM.Disp = LocMemOffset;
3381  ISD::ArgFlagsTy Flags = OutFlags[VA.getValNo()];
3382  unsigned Alignment = DL.getABITypeAlignment(ArgVal->getType());
3383  MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
3384  MachinePointerInfo::getStack(*FuncInfo.MF, LocMemOffset),
3385  MachineMemOperand::MOStore, ArgVT.getStoreSize(), Alignment);
3386  if (Flags.isByVal()) {
3387  X86AddressMode SrcAM;
3388  SrcAM.Base.Reg = ArgReg;
3389  if (!TryEmitSmallMemcpy(AM, SrcAM, Flags.getByValSize()))
3390  return false;
3391  } else if (isa<ConstantInt>(ArgVal) || isa<ConstantPointerNull>(ArgVal)) {
3392  // If this is a really simple value, emit this with the Value* version
3393  // of X86FastEmitStore. If it isn't simple, we don't want to do this,
3394  // as it can cause us to reevaluate the argument.
3395  if (!X86FastEmitStore(ArgVT, ArgVal, AM, MMO))
3396  return false;
3397  } else {
3398  bool ValIsKill = hasTrivialKill(ArgVal);
3399  if (!X86FastEmitStore(ArgVT, ArgReg, ValIsKill, AM, MMO))
3400  return false;
3401  }
3402  }
3403  }
3404 
3405  // ELF / PIC requires GOT in the EBX register before function calls via PLT
3406  // GOT pointer.
3407  if (Subtarget->isPICStyleGOT()) {
3408  unsigned Base = getInstrInfo()->getGlobalBaseReg(FuncInfo.MF);
3409  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3410  TII.get(TargetOpcode::COPY), X86::EBX).addReg(Base);
3411  }
3412 
3413  if (Is64Bit && IsVarArg && !IsWin64) {
3414  // From AMD64 ABI document:
3415  // For calls that may call functions that use varargs or stdargs
3416  // (prototype-less calls or calls to functions containing ellipsis (...) in
3417  // the declaration) %al is used as hidden argument to specify the number
3418  // of SSE registers used. The contents of %al do not need to match exactly
3419  // the number of registers, but must be an ubound on the number of SSE
3420  // registers used and is in the range 0 - 8 inclusive.
3421 
3422  // Count the number of XMM registers allocated.
3423  static const MCPhysReg XMMArgRegs[] = {
3424  X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
3425  X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
3426  };
3427  unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs);
3428  assert((Subtarget->hasSSE1() || !NumXMMRegs)
3429  && "SSE registers cannot be used when SSE is disabled");
3430  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::MOV8ri),
3431  X86::AL).addImm(NumXMMRegs);
3432  }
3433 
3434  // Materialize callee address in a register. FIXME: GV address can be
3435  // handled with a CALLpcrel32 instead.
3436  X86AddressMode CalleeAM;
3437  if (!X86SelectCallAddress(Callee, CalleeAM))
3438  return false;
3439 
3440  unsigned CalleeOp = 0;
3441  const GlobalValue *GV = nullptr;
3442  if (CalleeAM.GV != nullptr) {
3443  GV = CalleeAM.GV;
3444  } else if (CalleeAM.Base.Reg != 0) {
3445  CalleeOp = CalleeAM.Base.Reg;
3446  } else
3447  return false;
3448 
3449  // Issue the call.
3450  MachineInstrBuilder MIB;
3451  if (CalleeOp) {
3452  // Register-indirect call.
3453  unsigned CallOpc = Is64Bit ? X86::CALL64r : X86::CALL32r;
3454  MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CallOpc))
3455  .addReg(CalleeOp);
3456  } else {
3457  // Direct call.
3458  assert(GV && "Not a direct call");
3459  // See if we need any target-specific flags on the GV operand.
3460  unsigned char OpFlags = Subtarget->classifyGlobalFunctionReference(GV);
3461  // Ignore NonLazyBind attribute in FastISel
3462  if (OpFlags == X86II::MO_GOTPCREL)
3463  OpFlags = 0;
3464 
3465  // This will be a direct call, or an indirect call through memory for
3466  // NonLazyBind calls or dllimport calls.
3467  bool NeedLoad = OpFlags == X86II::MO_DLLIMPORT;
3468  unsigned CallOpc = NeedLoad
3469  ? (Is64Bit ? X86::CALL64m : X86::CALL32m)
3470  : (Is64Bit ? X86::CALL64pcrel32 : X86::CALLpcrel32);
3471 
3472  MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CallOpc));
3473  if (NeedLoad)
3474  MIB.addReg(Is64Bit ? X86::RIP : 0).addImm(1).addReg(0);
3475  if (Symbol)
3476  MIB.addSym(Symbol, OpFlags);
3477  else
3478  MIB.addGlobalAddress(GV, 0, OpFlags);
3479  if (NeedLoad)
3480  MIB.addReg(0);
3481  }
3482 
3483  // Add a register mask operand representing the call-preserved registers.
3484  // Proper defs for return values will be added by setPhysRegsDeadExcept().
3485  MIB.addRegMask(TRI.getCallPreservedMask(*FuncInfo.MF, CC));
3486 
3487  // Add an implicit use GOT pointer in EBX.
3488  if (Subtarget->isPICStyleGOT())
3490 
3491  if (Is64Bit && IsVarArg && !IsWin64)
3493 
3494  // Add implicit physical register uses to the call.
3495  for (auto Reg : OutRegs)
3497 
3498  // Issue CALLSEQ_END
3499  unsigned NumBytesForCalleeToPop =
3500  X86::isCalleePop(CC, Subtarget->is64Bit(), IsVarArg,
3501  TM.Options.GuaranteedTailCallOpt)
3502  ? NumBytes // Callee pops everything.
3503  : computeBytesPoppedByCalleeForSRet(Subtarget, CC, CLI.CS);
3504  unsigned AdjStackUp = TII.getCallFrameDestroyOpcode();
3505  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackUp))
3506  .addImm(NumBytes).addImm(NumBytesForCalleeToPop);
3507 
3508  // Now handle call return values.
3510  CCState CCRetInfo(CC, IsVarArg, *FuncInfo.MF, RVLocs,
3511  CLI.RetTy->getContext());
3512  CCRetInfo.AnalyzeCallResult(Ins, RetCC_X86);
3513 
3514  // Copy all of the result registers out of their specified physreg.
3515  unsigned ResultReg = FuncInfo.CreateRegs(CLI.RetTy);
3516  for (unsigned i = 0; i != RVLocs.size(); ++i) {
3517  CCValAssign &VA = RVLocs[i];
3518  EVT CopyVT = VA.getValVT();
3519  unsigned CopyReg = ResultReg + i;
3520  unsigned SrcReg = VA.getLocReg();
3521 
3522  // If this is x86-64, and we disabled SSE, we can't return FP values
3523  if ((CopyVT == MVT::f32 || CopyVT == MVT::f64) &&
3524  ((Is64Bit || Ins[i].Flags.isInReg()) && !Subtarget->hasSSE1())) {
3525  report_fatal_error("SSE register return with SSE disabled");
3526  }
3527 
3528  // If we prefer to use the value in xmm registers, copy it out as f80 and
3529  // use a truncate to move it from fp stack reg to xmm reg.
3530  if ((SrcReg == X86::FP0 || SrcReg == X86::FP1) &&
3531  isScalarFPTypeInSSEReg(VA.getValVT())) {
3532  CopyVT = MVT::f80;
3533  CopyReg = createResultReg(&X86::RFP80RegClass);
3534  }
3535 
3536  // Copy out the result.
3537  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3538  TII.get(TargetOpcode::COPY), CopyReg).addReg(SrcReg);
3539  InRegs.push_back(VA.getLocReg());
3540 
3541  // Round the f80 to the right size, which also moves it to the appropriate
3542  // xmm register. This is accomplished by storing the f80 value in memory
3543  // and then loading it back.
3544  if (CopyVT != VA.getValVT()) {
3545  EVT ResVT = VA.getValVT();
3546  unsigned Opc = ResVT == MVT::f32 ? X86::ST_Fp80m32 : X86::ST_Fp80m64;
3547  unsigned MemSize = ResVT.getSizeInBits()/8;
3548  int FI = MFI.CreateStackObject(MemSize, MemSize, false);
3549  addFrameReference(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3550  TII.get(Opc)), FI)
3551  .addReg(CopyReg);
3552  Opc = ResVT == MVT::f32 ? X86::MOVSSrm : X86::MOVSDrm;
3553  addFrameReference(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3554  TII.get(Opc), ResultReg + i), FI);
3555  }
3556  }
3557 
3558  CLI.ResultReg = ResultReg;
3559  CLI.NumResultRegs = RVLocs.size();
3560  CLI.Call = MIB;
3561 
3562  return true;
3563 }
3564 
3565 bool
3566 X86FastISel::fastSelectInstruction(const Instruction *I) {
3567  switch (I->getOpcode()) {
3568  default: break;
3569  case Instruction::Load:
3570  return X86SelectLoad(I);
3571  case Instruction::Store:
3572  return X86SelectStore(I);
3573  case Instruction::Ret:
3574  return X86SelectRet(I);
3575  case Instruction::ICmp:
3576  case Instruction::FCmp:
3577  return X86SelectCmp(I);
3578  case Instruction::ZExt:
3579  return X86SelectZExt(I);
3580  case Instruction::SExt:
3581  return X86SelectSExt(I);
3582  case Instruction::Br:
3583  return X86SelectBranch(I);
3584  case Instruction::LShr:
3585  case Instruction::AShr:
3586  case Instruction::Shl:
3587  return X86SelectShift(I);
3588  case Instruction::SDiv:
3589  case Instruction::UDiv:
3590  case Instruction::SRem:
3591  case Instruction::URem:
3592  return X86SelectDivRem(I);
3593  case Instruction::Select:
3594  return X86SelectSelect(I);
3595  case Instruction::Trunc:
3596  return X86SelectTrunc(I);
3597  case Instruction::FPExt:
3598  return X86SelectFPExt(I);
3599  case Instruction::FPTrunc:
3600  return X86SelectFPTrunc(I);
3601  case Instruction::SIToFP:
3602  return X86SelectSIToFP(I);
3603  case Instruction::IntToPtr: // Deliberate fall-through.
3604  case Instruction::PtrToInt: {
3605  EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType());
3606  EVT DstVT = TLI.getValueType(DL, I->getType());
3607  if (DstVT.bitsGT(SrcVT))
3608  return X86SelectZExt(I);
3609  if (DstVT.bitsLT(SrcVT))
3610  return X86SelectTrunc(I);
3611  unsigned Reg = getRegForValue(I->getOperand(0));
3612  if (Reg == 0) return false;
3613  updateValueMap(I, Reg);
3614  return true;
3615  }
3616  case Instruction::BitCast: {
3617  // Select SSE2/AVX bitcasts between 128/256 bit vector types.
3618  if (!Subtarget->hasSSE2())
3619  return false;
3620 
3621  EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType());
3622  EVT DstVT = TLI.getValueType(DL, I->getType());
3623 
3624  if (!SrcVT.isSimple() || !DstVT.isSimple())
3625  return false;
3626 
3627  MVT SVT = SrcVT.getSimpleVT();
3628  MVT DVT = DstVT.getSimpleVT();
3629 
3630  if (!SVT.is128BitVector() &&
3631  !(Subtarget->hasAVX() && SVT.is256BitVector()) &&
3632  !(Subtarget->hasAVX512() && SVT.is512BitVector() &&
3633  (Subtarget->hasBWI() || (SVT.getScalarSizeInBits() >= 32 &&
3634  DVT.getScalarSizeInBits() >= 32))))
3635  return false;
3636 
3637  unsigned Reg = getRegForValue(I->getOperand(0));
3638  if (Reg == 0)
3639  return false;
3640 
3641  // No instruction is needed for conversion. Reuse the register used by
3642  // the fist operand.
3643  updateValueMap(I, Reg);
3644  return true;
3645  }
3646  }
3647 
3648  return false;
3649 }
3650 
3651 unsigned X86FastISel::X86MaterializeInt(const ConstantInt *CI, MVT VT) {
3652  if (VT > MVT::i64)
3653  return 0;
3654 
3655  uint64_t Imm = CI->getZExtValue();
3656  if (Imm == 0) {
3657  unsigned SrcReg = fastEmitInst_(X86::MOV32r0, &X86::GR32RegClass);
3658  switch (VT.SimpleTy) {
3659  default: llvm_unreachable("Unexpected value type");
3660  case MVT::i1:
3661  case MVT::i8:
3662  return fastEmitInst_extractsubreg(MVT::i8, SrcReg, /*Kill=*/true,
3663  X86::sub_8bit);
3664  case MVT::i16:
3665  return fastEmitInst_extractsubreg(MVT::i16, SrcReg, /*Kill=*/true,
3666  X86::sub_16bit);
3667  case MVT::i32:
3668  return SrcReg;
3669  case MVT::i64: {
3670  unsigned ResultReg = createResultReg(&X86::GR64RegClass);
3671  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3672  TII.get(TargetOpcode::SUBREG_TO_REG), ResultReg)
3673  .addImm(0).addReg(SrcReg).addImm(X86::sub_32bit);
3674  return ResultReg;
3675  }
3676  }
3677  }
3678 
3679  unsigned Opc = 0;
3680  switch (VT.SimpleTy) {
3681  default: llvm_unreachable("Unexpected value type");
3682  case MVT::i1:
3683  // TODO: Support this properly.
3684  if (Subtarget->hasAVX512())
3685  return 0;
3686  VT = MVT::i8;
3688  case MVT::i8: Opc = X86::MOV8ri; break;
3689  case MVT::i16: Opc = X86::MOV16ri; break;
3690  case MVT::i32: Opc = X86::MOV32ri; break;
3691  case MVT::i64: {
3692  if (isUInt<32>(Imm))
3693  Opc = X86::MOV32ri;
3694  else if (isInt<32>(Imm))
3695  Opc = X86::MOV64ri32;
3696  else
3697  Opc = X86::MOV64ri;
3698  break;
3699  }
3700  }
3701  if (VT == MVT::i64 && Opc == X86::MOV32ri) {
3702  unsigned SrcReg = fastEmitInst_i(Opc, &X86::GR32RegClass, Imm);
3703  unsigned ResultReg = createResultReg(&X86::GR64RegClass);
3704  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3705  TII.get(TargetOpcode::SUBREG_TO_REG), ResultReg)
3706  .addImm(0).addReg(SrcReg).addImm(X86::sub_32bit);
3707  return ResultReg;
3708  }
3709  return fastEmitInst_i(Opc, TLI.getRegClassFor(VT), Imm);
3710 }
3711 
3712 unsigned X86FastISel::X86MaterializeFP(const ConstantFP *CFP, MVT VT) {
3713  if (CFP->isNullValue())
3714  return fastMaterializeFloatZero(CFP);
3715 
3716  // Can't handle alternate code models yet.
3717  CodeModel::Model CM = TM.getCodeModel();
3718  if (CM != CodeModel::Small && CM != CodeModel::Large)
3719  return 0;
3720 
3721  // Get opcode and regclass of the output for the given load instruction.
3722  unsigned Opc = 0;
3723  const TargetRegisterClass *RC = nullptr;
3724  switch (VT.SimpleTy) {
3725  default: return 0;
3726  case MVT::f32:
3727  if (X86ScalarSSEf32) {
3728  Opc = Subtarget->hasAVX512()
3729  ? X86::VMOVSSZrm
3730  : Subtarget->hasAVX() ? X86::VMOVSSrm : X86::MOVSSrm;
3731  RC = Subtarget->hasAVX512() ? &X86::FR32XRegClass : &X86::FR32RegClass;
3732  } else {
3733  Opc = X86::LD_Fp32m;
3734  RC = &X86::RFP32RegClass;
3735  }
3736  break;
3737  case MVT::f64:
3738  if (X86ScalarSSEf64) {
3739  Opc = Subtarget->hasAVX512()
3740  ? X86::VMOVSDZrm
3741  : Subtarget->hasAVX() ? X86::VMOVSDrm : X86::MOVSDrm;
3742  RC = Subtarget->hasAVX512() ? &X86::FR64XRegClass : &X86::FR64RegClass;
3743  } else {
3744  Opc = X86::LD_Fp64m;
3745  RC = &X86::RFP64RegClass;
3746  }
3747  break;
3748  case MVT::f80:
3749  // No f80 support yet.
3750  return 0;
3751  }
3752 
3753  // MachineConstantPool wants an explicit alignment.
3754  unsigned Align = DL.getPrefTypeAlignment(CFP->getType());
3755  if (Align == 0) {
3756  // Alignment of vector types. FIXME!
3757  Align = DL.getTypeAllocSize(CFP->getType());
3758  }
3759 
3760  // x86-32 PIC requires a PIC base register for constant pools.
3761  unsigned PICBase = 0;
3762  unsigned char OpFlag = Subtarget->classifyLocalReference(nullptr);
3763  if (OpFlag == X86II::MO_PIC_BASE_OFFSET)
3764  PICBase = getInstrInfo()->getGlobalBaseReg(FuncInfo.MF);
3765  else if (OpFlag == X86II::MO_GOTOFF)
3766  PICBase = getInstrInfo()->getGlobalBaseReg(FuncInfo.MF);
3767  else if (Subtarget->is64Bit() && TM.getCodeModel() == CodeModel::Small)
3768  PICBase = X86::RIP;
3769 
3770  // Create the load from the constant pool.
3771  unsigned CPI = MCP.getConstantPoolIndex(CFP, Align);
3772  unsigned ResultReg = createResultReg(RC);
3773 
3774  if (CM == CodeModel::Large) {
3775  unsigned AddrReg = createResultReg(&X86::GR64RegClass);
3776  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::MOV64ri),
3777  AddrReg)
3778  .addConstantPoolIndex(CPI, 0, OpFlag);
3779  MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3780  TII.get(Opc), ResultReg);
3781  addDirectMem(MIB, AddrReg);
3782  MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
3784  MachineMemOperand::MOLoad, DL.getPointerSize(), Align);
3785  MIB->addMemOperand(*FuncInfo.MF, MMO);
3786  return ResultReg;
3787  }
3788 
3789  addConstantPoolReference(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3790  TII.get(Opc), ResultReg),
3791  CPI, PICBase, OpFlag);
3792  return ResultReg;
3793 }
3794 
3795 unsigned X86FastISel::X86MaterializeGV(const GlobalValue *GV, MVT VT) {
3796  // Can't handle alternate code models yet.
3797  if (TM.getCodeModel() != CodeModel::Small)
3798  return 0;
3799 
3800  // Materialize addresses with LEA/MOV instructions.
3801  X86AddressMode AM;
3802  if (X86SelectAddress(GV, AM)) {
3803  // If the expression is just a basereg, then we're done, otherwise we need
3804  // to emit an LEA.
3805  if (AM.BaseType == X86AddressMode::RegBase &&
3806  AM.IndexReg == 0 && AM.Disp == 0 && AM.GV == nullptr)
3807  return AM.Base.Reg;
3808 
3809  unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
3810  if (TM.getRelocationModel() == Reloc::Static &&
3811  TLI.getPointerTy(DL) == MVT::i64) {
3812  // The displacement code could be more than 32 bits away so we need to use
3813  // an instruction with a 64 bit immediate
3814  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::MOV64ri),
3815  ResultReg)
3816  .addGlobalAddress(GV);
3817  } else {
3818  unsigned Opc =
3819  TLI.getPointerTy(DL) == MVT::i32
3820  ? (Subtarget->isTarget64BitILP32() ? X86::LEA64_32r : X86::LEA32r)
3821  : X86::LEA64r;
3822  addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3823  TII.get(Opc), ResultReg), AM);
3824  }
3825  return ResultReg;
3826  }
3827  return 0;
3828 }
3829 
3830 unsigned X86FastISel::fastMaterializeConstant(const Constant *C) {
3831  EVT CEVT = TLI.getValueType(DL, C->getType(), true);
3832 
3833  // Only handle simple types.
3834  if (!CEVT.isSimple())
3835  return 0;
3836  MVT VT = CEVT.getSimpleVT();
3837 
3838  if (const auto *CI = dyn_cast<ConstantInt>(C))
3839  return X86MaterializeInt(CI, VT);
3840  else if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C))
3841  return X86MaterializeFP(CFP, VT);
3842  else if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
3843  return X86MaterializeGV(GV, VT);
3844 
3845  return 0;
3846 }
3847 
3848 unsigned X86FastISel::fastMaterializeAlloca(const AllocaInst *C) {
3849  // Fail on dynamic allocas. At this point, getRegForValue has already
3850  // checked its CSE maps, so if we're here trying to handle a dynamic
3851  // alloca, we're not going to succeed. X86SelectAddress has a
3852  // check for dynamic allocas, because it's called directly from
3853  // various places, but targetMaterializeAlloca also needs a check
3854  // in order to avoid recursion between getRegForValue,
3855  // X86SelectAddrss, and targetMaterializeAlloca.
3856  if (!FuncInfo.StaticAllocaMap.count(C))
3857  return 0;
3858  assert(C->isStaticAlloca() && "dynamic alloca in the static alloca map?");
3859 
3860  X86AddressMode AM;
3861  if (!X86SelectAddress(C, AM))
3862  return 0;
3863  unsigned Opc =
3864  TLI.getPointerTy(DL) == MVT::i32
3865  ? (Subtarget->isTarget64BitILP32() ? X86::LEA64_32r : X86::LEA32r)
3866  : X86::LEA64r;
3867  const TargetRegisterClass *RC = TLI.getRegClassFor(TLI.getPointerTy(DL));
3868  unsigned ResultReg = createResultReg(RC);
3869  addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3870  TII.get(Opc), ResultReg), AM);
3871  return ResultReg;
3872 }
3873 
3874 unsigned X86FastISel::fastMaterializeFloatZero(const ConstantFP *CF) {
3875  MVT VT;
3876  if (!isTypeLegal(CF->getType(), VT))
3877  return 0;
3878 
3879  // Get opcode and regclass for the given zero.
3880  bool HasAVX512 = Subtarget->hasAVX512();
3881  unsigned Opc = 0;
3882  const TargetRegisterClass *RC = nullptr;
3883  switch (VT.SimpleTy) {
3884  default: return 0;
3885  case MVT::f32:
3886  if (X86ScalarSSEf32) {
3887  Opc = HasAVX512 ? X86::AVX512_FsFLD0SS : X86::FsFLD0SS;
3888  RC = HasAVX512 ? &X86::FR32XRegClass : &X86::FR32RegClass;
3889  } else {
3890  Opc = X86::LD_Fp032;
3891  RC = &X86::RFP32RegClass;
3892  }
3893  break;
3894  case MVT::f64:
3895  if (X86ScalarSSEf64) {
3896  Opc = HasAVX512 ? X86::AVX512_FsFLD0SD : X86::FsFLD0SD;
3897  RC = HasAVX512 ? &X86::FR64XRegClass : &X86::FR64RegClass;
3898  } else {
3899  Opc = X86::LD_Fp064;
3900  RC = &X86::RFP64RegClass;
3901  }
3902  break;
3903  case MVT::f80:
3904  // No f80 support yet.
3905  return 0;
3906  }
3907 
3908  unsigned ResultReg = createResultReg(RC);
3909  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg);
3910  return ResultReg;
3911 }
3912 
3913 
3914 bool X86FastISel::tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo,
3915  const LoadInst *LI) {
3916  const Value *Ptr = LI->getPointerOperand();
3917  X86AddressMode AM;
3918  if (!X86SelectAddress(Ptr, AM))
3919  return false;
3920 
3921  const X86InstrInfo &XII = (const X86InstrInfo &)TII;
3922 
3923  unsigned Size = DL.getTypeAllocSize(LI->getType());
3924  unsigned Alignment = LI->getAlignment();
3925 
3926  if (Alignment == 0) // Ensure that codegen never sees alignment 0
3927  Alignment = DL.getABITypeAlignment(LI->getType());
3928 
3930  AM.getFullAddress(AddrOps);
3931 
3932  MachineInstr *Result = XII.foldMemoryOperandImpl(
3933  *FuncInfo.MF, *MI, OpNo, AddrOps, FuncInfo.InsertPt, Size, Alignment,
3934  /*AllowCommute=*/true);
3935  if (!Result)
3936  return false;
3937 
3938  // The index register could be in the wrong register class. Unfortunately,
3939  // foldMemoryOperandImpl could have commuted the instruction so its not enough
3940  // to just look at OpNo + the offset to the index reg. We actually need to
3941  // scan the instruction to find the index reg and see if its the correct reg
3942  // class.
3943  unsigned OperandNo = 0;
3944  for (MachineInstr::mop_iterator I = Result->operands_begin(),
3945  E = Result->operands_end(); I != E; ++I, ++OperandNo) {
3946  MachineOperand &MO = *I;
3947  if (!MO.isReg() || MO.isDef() || MO.getReg() != AM.IndexReg)
3948  continue;
3949  // Found the index reg, now try to rewrite it.
3950  unsigned IndexReg = constrainOperandRegClass(Result->getDesc(),
3951  MO.getReg(), OperandNo);
3952  if (IndexReg == MO.getReg())
3953  continue;
3954  MO.setReg(IndexReg);
3955  }
3956 
3957  Result->addMemOperand(*FuncInfo.MF, createMachineMemOperandFor(LI));
3958  MI->eraseFromParent();
3959  return true;
3960 }
3961 
3962 unsigned X86FastISel::fastEmitInst_rrrr(unsigned MachineInstOpcode,
3963  const TargetRegisterClass *RC,
3964  unsigned Op0, bool Op0IsKill,
3965  unsigned Op1, bool Op1IsKill,
3966  unsigned Op2, bool Op2IsKill,
3967  unsigned Op3, bool Op3IsKill) {
3968  const MCInstrDesc &II = TII.get(MachineInstOpcode);
3969 
3970  unsigned ResultReg = createResultReg(RC);
3971  Op0 = constrainOperandRegClass(II, Op0, II.getNumDefs());
3972  Op1 = constrainOperandRegClass(II, Op1, II.getNumDefs() + 1);
3973  Op2 = constrainOperandRegClass(II, Op2, II.getNumDefs() + 2);
3974  Op3 = constrainOperandRegClass(II, Op3, II.getNumDefs() + 3);
3975 
3976  if (II.getNumDefs() >= 1)
3977  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
3978  .addReg(Op0, getKillRegState(Op0IsKill))
3979  .addReg(Op1, getKillRegState(Op1IsKill))
3980  .addReg(Op2, getKillRegState(Op2IsKill))
3981  .addReg(Op3, getKillRegState(Op3IsKill));
3982  else {
3983  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
3984  .addReg(Op0, getKillRegState(Op0IsKill))
3985  .addReg(Op1, getKillRegState(Op1IsKill))
3986  .addReg(Op2, getKillRegState(Op2IsKill))
3987  .addReg(Op3, getKillRegState(Op3IsKill));
3988  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3989  TII.get(TargetOpcode::COPY), ResultReg).addReg(II.ImplicitDefs[0]);
3990  }
3991  return ResultReg;
3992 }
3993 
3994 
3995 namespace llvm {
3997  const TargetLibraryInfo *libInfo) {
3998  return new X86FastISel(funcInfo, libInfo);
3999  }
4000 }
bool hasAVX() const
Definition: X86Subtarget.h:449
bool isVarArg() const
isVarArg - Return true if this function takes a variable number of arguments.
Definition: Function.h:158
Fast - This calling convention attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:43
void setFrameAddressIsTaken(bool T)
unsigned GetCondBranchFromCond(CondCode CC)
uint64_t CallInst * C
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:545
constexpr bool isUInt< 32 >(uint64_t x)
Definition: MathExtras.h:341
Return a value (possibly void), from a function.
Value * getValueOperand()
Definition: Instructions.h:395
const MachineInstrBuilder & addMetadata(const MDNode *MD) const
void AnalyzeCallResult(const SmallVectorImpl< ISD::InputArg > &Ins, CCAssignFn Fn)
AnalyzeCallResult - Analyze the return values of a call, incorporating info about the passed values i...
bool is64Bit() const
Is this x86_64? (disregarding specific ABI / programming model)
Definition: X86Subtarget.h:414
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
bool usesWindowsCFI() const
Definition: MCAsmInfo.h:568
mop_iterator operands_end()
Definition: MachineInstr.h:327
bool isInteger() const
Return true if this is an integer or a vector integer type.
This class is the base class for the comparison instructions.
Definition: InstrTypes.h:843
This class represents an incoming formal argument to a Function.
Definition: Argument.h:30
bool contains(unsigned Reg) const
Return true if the specified register is included in this register class.
bool isOSMSVCRT() const
Is this a "Windows" OS targeting a "MSVCRT.dll" environment.
Definition: Triple.h:556
LLVM_ATTRIBUTE_NORETURN void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:115
Compute iterated dominance frontiers using a linear time algorithm.
Definition: AllocatorList.h:24
static const MachineInstrBuilder & addConstantPoolReference(const MachineInstrBuilder &MIB, unsigned CPI, unsigned GlobalBaseReg, unsigned char OpFlags)
addConstantPoolReference - This function is used to add a reference to the base of a constant value s...
bool isAtomic() const
Return true if this instruction has an AtomicOrdering of unordered or higher.
unsigned constrainOperandRegClass(const MachineFunction &MF, const TargetRegisterInfo &TRI, MachineRegisterInfo &MRI, const TargetInstrInfo &TII, const RegisterBankInfo &RBI, MachineInstr &InsertPt, const MCInstrDesc &II, unsigned Reg, unsigned OpIdx)
Try to constrain Reg so that it is usable by argument OpIdx of the provided MCInstrDesc II...
Definition: Utils.cpp:46
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
Definition: MCSymbol.h:42
LLVM_ATTRIBUTE_ALWAYS_INLINE size_type size() const
Definition: SmallVector.h:136
bool isVector() const
Return true if this is a vector value type.
X86_StdCall - stdcall is the calling conventions mostly used by the Win32 API.
Definition: CallingConv.h:87
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:163
unsigned getReg() const
getReg - Returns the register number.
bool is256BitVector() const
Return true if this is a 256-bit vector type.
This class represents a function call, abstracting a target machine&#39;s calling convention.
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:253
0 1 0 0 True if ordered and less than
Definition: InstrTypes.h:859
constexpr bool isInt< 8 >(int64_t x)
Definition: MathExtras.h:295
X86_ThisCall - Similar to X86_StdCall.
Definition: CallingConv.h:111
unsigned getValNo() const
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:728
1 1 1 0 True if unordered or not equal
Definition: InstrTypes.h:869
This class wraps the llvm.memset intrinsic.
BasicBlock * getSuccessor(unsigned i) const
const GlobalValue * GV
A debug info location.
Definition: DebugLoc.h:34
F(f)
unsigned getPtrSizedFrameRegister(const MachineFunction &MF) const
An instruction for reading from memory.
Definition: Instructions.h:164
MO_GOTPCREL - On a symbol operand this indicates that the immediate is offset to the GOT entry for th...
Definition: X86BaseInfo.h:107
Hexagon Common GEP
Value * getCondition() const
const MachineInstrBuilder & addGlobalAddress(const GlobalValue *GV, int64_t Offset=0, unsigned char TargetFlags=0) const
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:227
bool isCalleePop(CallingConv::ID CallingConv, bool is64Bit, bool IsVarArg, bool GuaranteeTCO)
Determines whether the callee is required to pop its own arguments.
Value * getLength() const
op_iterator op_begin()
Definition: User.h:214
NodeType
ISD::NodeType enum - This enum defines the target-independent operators for a SelectionDAG.
Definition: ISDOpcodes.h:39
bool isMemLoc() const
static Constant * getNullValue(Type *Ty)
Constructor to create a &#39;0&#39; constant of arbitrary type.
Definition: Constants.cpp:207
X86MachineFunctionInfo - This class is derived from MachineFunction and contains private X86 target-s...
Value * getRawSource() const
Return the arguments to the instruction.
bool arg_empty() const
Definition: CallSite.h:218
1 0 0 1 True if unordered or equal
Definition: InstrTypes.h:864
Used to lazily calculate structure layout information for a target machine, based on the DataLayout s...
Definition: DataLayout.h:491
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
Definition: InstrTypes.h:863
A description of a memory reference used in the backend.
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE, etc.
Definition: InstrTypes.h:951
This file declares the MachineConstantPool class which is an abstract constant pool to keep track of ...
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
Definition: MCInstrDesc.h:210
const HexagonInstrInfo * TII
unsigned getNumArgOperands() const
Return the number of call arguments.
The C convention as specified in the x86-64 supplement to the System V ABI, used on most non-Windows ...
Definition: CallingConv.h:144
unsigned getNumOperands() const
Access to explicit operands of the instruction.
Definition: MachineInstr.h:293
Class to represent struct types.
Definition: DerivedTypes.h:201
A Use represents the edge between a Value definition and its users.
Definition: Use.h:56
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
static bool isGlobalStubReference(unsigned char TargetFlag)
isGlobalStubReference - Return true if the specified TargetFlag operand is a reference to a stub for ...
Definition: X86InstrInfo.h:90
void eraseFromParent()
Unlink &#39;this&#39; from the containing basic block and delete it.
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:197
Reg
All possible values of the reg field in the ModR/M byte.
0 1 0 1 True if ordered and less than or equal
Definition: InstrTypes.h:860
SimpleValueType SimpleTy
static Constant * getSExt(Constant *C, Type *Ty, bool OnlyIfReduced=false)
Definition: Constants.cpp:1556
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted...
unsigned getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
LocInfo getLocInfo() const
unsigned getSizeInBits() const
auto reverse(ContainerTy &&C, typename std::enable_if< has_rbegin< ContainerTy >::value >::type *=nullptr) -> decltype(make_range(C.rbegin(), C.rend()))
Definition: STLExtras.h:232
static bool isGlobalRelativeToPICBase(unsigned char TargetFlag)
isGlobalRelativeToPICBase - Return true if the specified global value reference is relative to a 32-b...
Definition: X86InstrInfo.h:106
static Constant * getZExt(Constant *C, Type *Ty, bool OnlyIfReduced=false)
Definition: Constants.cpp:1570
bool isNullValue() const
Return true if this is the value that would be returned by getNullValue.
Definition: Constants.cpp:86
This is a fast-path instruction selection class that generates poor code and doesn&#39;t support illegal ...
Definition: FastISel.h:67
A constant value that is initialized with an expression using other constant values.
Definition: Constants.h:862
unsigned getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:292
const MCInstrDesc & getDesc() const
Returns the target instruction descriptor of this MachineInstr.
Definition: MachineInstr.h:287
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:245
unsigned getSourceAddressSpace() const
C - The default llvm calling convention, compatible with C.
Definition: CallingConv.h:35
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:200
This instruction compares its operands according to the predicate given to the constructor.
bool paramHasAttr(unsigned ArgNo, Attribute::AttrKind Kind) const
Return true if the call or the callee has the given attribute.
Definition: CallSite.h:377
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:125
An instruction for storing to memory.
Definition: Instructions.h:306
void addMemOperand(MachineFunction &MF, MachineMemOperand *MO)
Add a MachineMemOperand to the machine instruction.
amdgpu Simplify well known AMD library false Value * Callee
This class represents a truncation of integer types.
Value * getOperand(unsigned i) const
Definition: User.h:154
Class to represent pointers.
Definition: DerivedTypes.h:467
unsigned getByValSize() const
unsigned getKillRegState(bool B)
unsigned getStackRegister() const
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Flag
These should be considered private to the implementation of the MCInstrDesc class.
Definition: MCInstrDesc.h:121
bool is128BitVector() const
Return true if this is a 128-bit vector type.
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
Definition: ValueTypes.h:229
bool isFloatTy() const
Return true if this is &#39;float&#39;, a 32-bit IEEE fp type.
Definition: Type.h:147
an instruction for type-safe pointer arithmetic to access elements of arrays and structs ...
Definition: Instructions.h:837
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
Type * getReturnType() const
Returns the type of the ret val.
Definition: Function.h:150
std::pair< CondCode, bool > getX86ConditionCode(CmpInst::Predicate Predicate)
Return a pair of condition code for the given predicate and whether the instruction operands should b...
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:149
unsigned const MachineRegisterInfo * MRI
Machine Value Type.
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:46
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Conditional or Unconditional Branch instruction.
Value * getAddress() const
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
unsigned getScalarSizeInBits() const
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This is an important base class in LLVM.
Definition: Constant.h:42
const MCAsmInfo * getMCAsmInfo() const
Return target specific asm information.
ConstantFP - Floating Point Values [float, double].
Definition: Constants.h:264
const MCPhysReg * ImplicitDefs
Definition: MCInstrDesc.h:173
bool isValidLocationForIntrinsic(const DILocation *DL) const
Check that a location is valid for this variable.
op_iterator op_end()
Definition: User.h:216
DILocalVariable * getVariable() const
Definition: IntrinsicInst.h:80
static std::pair< unsigned, bool > getX86SSEConditionCode(CmpInst::Predicate Predicate)
union llvm::X86AddressMode::@454 Base
const MachineInstrBuilder & addRegMask(const uint32_t *Mask) const
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:853
static const unsigned End
TRAP - Trapping instruction.
Definition: ISDOpcodes.h:734
DIExpression * getExpression() const
Definition: IntrinsicInst.h:84
0 1 1 1 True if ordered (no nans)
Definition: InstrTypes.h:862
Value * getPointerOperand()
Definition: Instructions.h:270
const Triple & getTargetTriple() const
Definition: X86Subtarget.h:543
static void X86SelectAddress(const MachineInstr &I, const MachineRegisterInfo &MRI, X86AddressMode &AM)
const MachineInstrBuilder & addSym(MCSymbol *Sym, unsigned char TargetFlags=0) const
1 1 1 1 Always true (always folded)
Definition: InstrTypes.h:870
unsigned getSETFromCond(CondCode CC, bool HasMemoryOperand=false)
Return a set opcode for the given condition and whether it has a memory operand.
bool isTargetMCU() const
Definition: X86Subtarget.h:562
Extended Value Type.
Definition: ValueTypes.h:34
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
bool isVolatile() const
1 1 0 1 True if unordered, less than, or equal
Definition: InstrTypes.h:868
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
static const MachineInstrBuilder & addFrameReference(const MachineInstrBuilder &MIB, int FI, int Offset=0, bool mem=true)
addFrameReference - This function is used to add a reference to the base of an abstract object on the...
The memory access writes data.
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
Definition: IntrinsicInst.h:51
0 0 1 0 True if ordered and greater than
Definition: InstrTypes.h:857
Iterator for intrusive lists based on ilist_node.
unsigned getNumOperands() const
Definition: User.h:176
CCState - This class holds information needed while lowering arguments and return values...
This is the shared class of boolean and integer constants.
Definition: Constants.h:84
constexpr bool isInt< 32 >(int64_t x)
Definition: MathExtras.h:301
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type...
Definition: Type.cpp:130
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:194
MachineOperand class - Representation of each machine instruction operand.
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:864
1 1 0 0 True if unordered or less than
Definition: InstrTypes.h:867
Provides information about what library functions are available for the current target.
CCValAssign - Represent assignment of one arg/retval to a location.
void getFullAddress(SmallVectorImpl< MachineOperand > &MO)
bool hasFnAttr(Attribute::AttrKind Kind) const
Determine whether this call has the given attribute.
This class wraps the llvm.memcpy intrinsic.
unsigned getNumDefs() const
Return the number of MachineOperands that are register definitions.
Definition: MCInstrDesc.h:225
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:923
This file defines the FastISel class.
bool bitsLT(EVT VT) const
Return true if this has less bits than VT.
Definition: ValueTypes.h:241
unsigned getCMovFromCond(CondCode CC, unsigned RegBytes, bool HasMemoryOperand=false)
Return a cmov opcode for the given condition, register size in bytes, and operand type...
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:445
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:448
amdgpu Simplify well known AMD library false Value Value * Arg
The memory access reads data.
Function * getCalledFunction() const
Return the function called, or null if this is an indirect function invocation.
MachineInstr * foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI, ArrayRef< unsigned > Ops, MachineBasicBlock::iterator InsertPt, int FrameIndex, LiveIntervals *LIS=nullptr) const override
foldMemoryOperand - If this target supports it, fold a load or store of the specified stack slot into...
Representation of each machine instruction.
Definition: MachineInstr.h:59
MO_GOTOFF - On a symbol operand this indicates that the immediate is the offset to the location of th...
Definition: X86BaseInfo.h:99
static const MachineInstrBuilder & addDirectMem(const MachineInstrBuilder &MIB, unsigned Reg)
addDirectMem - This function is used to add a direct memory reference to the current instruction – t...
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
Definition: Instruction.h:284
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:362
unsigned getDestAddressSpace() const
uint64_t getElementOffset(unsigned Idx) const
Definition: DataLayout.h:513
unsigned getAlignment() const
Return the alignment of the access that is being performed.
Definition: Instructions.h:226
static IntegerType * getInt32Ty(LLVMContext &C)
Definition: Type.cpp:176
unsigned getLocMemOffset() const
Value * getArgOperand(unsigned i) const
getArgOperand/setArgOperand - Return/set the i-th call argument.
Establish a view to a call site for examination.
Definition: CallSite.h:713
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:108
void setReg(unsigned Reg)
Change the register this operand corresponds to.
#define I(x, y, z)
Definition: MD5.cpp:58
void AnalyzeReturn(const SmallVectorImpl< ISD::OutputArg > &Outs, CCAssignFn Fn)
AnalyzeReturn - Analyze the returned values of a return, incorporating info about the result values i...
FunctionLoweringInfo - This contains information that is global to a function that is used when lower...
The C convention as implemented on Windows/x86-64 and AArch64.
Definition: CallingConv.h:154
0 1 1 0 True if ordered and operands are unequal
Definition: InstrTypes.h:861
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:323
bool is512BitVector() const
Return true if this is a 512-bit vector type.
const MachineInstrBuilder & addReg(unsigned RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
1 0 1 0 True if unordered or greater than
Definition: InstrTypes.h:865
constexpr bool isUInt< 16 >(uint64_t x)
Definition: MathExtras.h:338
static MachinePointerInfo getConstantPool(MachineFunction &MF)
Return a MachinePointerInfo record that refers to the constant pool.
unsigned getAlignment() const
Return the alignment of the access that is being performed.
Definition: Instructions.h:351
bool isReg() const
isReg - Tests if this is a MO_Register operand.
bool isRegLoc() const
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
void GetReturnInfo(Type *ReturnType, AttributeList attr, SmallVectorImpl< ISD::OutputArg > &Outs, const TargetLowering &TLI, const DataLayout &DL)
Given an LLVM IR type and return type attributes, compute the return value EVTs and flags...
bool hasSSE1() const
Definition: X86Subtarget.h:443
0 0 0 1 True if ordered and equal
Definition: InstrTypes.h:856
LLVM Value Representation.
Definition: Value.h:73
mop_iterator operands_begin()
Definition: MachineInstr.h:326
1 0 1 1 True if unordered, greater than, or equal
Definition: InstrTypes.h:866
static unsigned X86ChooseCmpImmediateOpcode(EVT VT, const ConstantInt *RHSC)
If we have a comparison with RHS as the RHS of the comparison, return an opcode that works for the co...
constexpr char Size[]
Key for Kernel::Arg::Metadata::mSize.
#define LLVM_FALLTHROUGH
LLVM_FALLTHROUGH - Mark fallthrough cases in switch statements.
Definition: Compiler.h:235
bool hasAVX512() const
Definition: X86Subtarget.h:451
MO_PIC_BASE_OFFSET - On a symbol operand this indicates that the immediate should get the value of th...
Definition: X86BaseInfo.h:85
IRTranslator LLVM IR MI
static MachinePointerInfo getStack(MachineFunction &MF, int64_t Offset, uint8_t ID=0)
Stack pointer relative access.
unsigned getRegSizeInBits(const TargetRegisterClass &RC) const
Return the size in bits of a register from class RC.
Conversion operators.
Definition: ISDOpcodes.h:442
bool isStaticAlloca() const
Return true if this alloca is in the entry block of the function and is a constant size...
int64_t getSExtValue() const
Return the constant as a 64-bit integer value after it has been sign extended as appropriate for the ...
Definition: Constants.h:157
X86AddressMode - This struct holds a generalized full x86 address mode.
unsigned getLocReg() const
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition: ValueTypes.h:126
0 0 1 1 True if ordered and greater than or equal
Definition: InstrTypes.h:858
This represents the llvm.dbg.declare instruction.
bool isDoubleTy() const
Return true if this is &#39;double&#39;, a 64-bit IEEE fp type.
Definition: Type.h:150
enum llvm::X86AddressMode::@453 BaseType
Value * getPointerOperand()
Definition: Instructions.h:398
unsigned AllocateStack(unsigned Size, unsigned Align)
AllocateStack - Allocate a chunk of stack space with the specified size and alignment.
Value * getRawDest() const
static const MachineInstrBuilder & addFullAddress(const MachineInstrBuilder &MIB, const X86AddressMode &AM)
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition: ISDOpcodes.h:350
FastISel * createFastISel(FunctionLoweringInfo &funcInfo, const TargetLibraryInfo *libInfo)
bool hasSSE2() const
Definition: X86Subtarget.h:444
iterator_range< arg_iterator > args()
Definition: Function.h:621
X86_FastCall - &#39;fast&#39; analog of X86_StdCall.
Definition: CallingConv.h:92
0 0 0 0 Always false (always folded)
Definition: InstrTypes.h:855
bool isStructTy() const
True if this is an instance of StructType.
Definition: Type.h:215
MO_DLLIMPORT - On a symbol operand "FOO", this indicates that the reference is actually to the "__imp...
Definition: X86BaseInfo.h:197
static unsigned computeBytesPoppedByCalleeForSRet(const X86Subtarget *Subtarget, CallingConv::ID CC, ImmutableCallSite *CS)
bool isArrayTy() const
True if this is an instance of ArrayType.
Definition: Type.h:218
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:44
const BasicBlock * getParent() const
Definition: Instruction.h:66
static unsigned X86ChooseCmpOpcode(EVT VT, const X86Subtarget *Subtarget)
an instruction to allocate memory on the stack
Definition: Instructions.h:60
gep_type_iterator gep_type_begin(const User *GEP)