LLVM  10.0.0svn
X86FastISel.cpp
Go to the documentation of this file.
1 //===-- X86FastISel.cpp - X86 FastISel implementation ---------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines the X86-specific support for the FastISel class. Much
10 // of the target-specific code is generated by tablegen in the file
11 // X86GenFastISel.inc, which is #included here.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "X86.h"
16 #include "X86CallingConv.h"
17 #include "X86InstrBuilder.h"
18 #include "X86InstrInfo.h"
19 #include "X86MachineFunctionInfo.h"
20 #include "X86RegisterInfo.h"
21 #include "X86Subtarget.h"
22 #include "X86TargetMachine.h"
24 #include "llvm/CodeGen/FastISel.h"
29 #include "llvm/IR/CallSite.h"
30 #include "llvm/IR/CallingConv.h"
31 #include "llvm/IR/DebugInfo.h"
32 #include "llvm/IR/DerivedTypes.h"
34 #include "llvm/IR/GlobalAlias.h"
35 #include "llvm/IR/GlobalVariable.h"
36 #include "llvm/IR/Instructions.h"
37 #include "llvm/IR/IntrinsicInst.h"
38 #include "llvm/IR/Operator.h"
39 #include "llvm/MC/MCAsmInfo.h"
40 #include "llvm/MC/MCSymbol.h"
43 using namespace llvm;
44 
45 namespace {
46 
47 class X86FastISel final : public FastISel {
48  /// Subtarget - Keep a pointer to the X86Subtarget around so that we can
49  /// make the right decision when generating code for different targets.
50  const X86Subtarget *Subtarget;
51 
52  /// X86ScalarSSEf32, X86ScalarSSEf64 - Select between SSE or x87
53  /// floating point ops.
54  /// When SSE is available, use it for f32 operations.
55  /// When SSE2 is available, use it for f64 operations.
56  bool X86ScalarSSEf64;
57  bool X86ScalarSSEf32;
58 
59 public:
60  explicit X86FastISel(FunctionLoweringInfo &funcInfo,
61  const TargetLibraryInfo *libInfo)
62  : FastISel(funcInfo, libInfo) {
63  Subtarget = &funcInfo.MF->getSubtarget<X86Subtarget>();
64  X86ScalarSSEf64 = Subtarget->hasSSE2();
65  X86ScalarSSEf32 = Subtarget->hasSSE1();
66  }
67 
68  bool fastSelectInstruction(const Instruction *I) override;
69 
70  /// The specified machine instr operand is a vreg, and that
71  /// vreg is being provided by the specified load instruction. If possible,
72  /// try to fold the load as an operand to the instruction, returning true if
73  /// possible.
74  bool tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo,
75  const LoadInst *LI) override;
76 
77  bool fastLowerArguments() override;
78  bool fastLowerCall(CallLoweringInfo &CLI) override;
79  bool fastLowerIntrinsicCall(const IntrinsicInst *II) override;
80 
81 #include "X86GenFastISel.inc"
82 
83 private:
84  bool X86FastEmitCompare(const Value *LHS, const Value *RHS, EVT VT,
85  const DebugLoc &DL);
86 
87  bool X86FastEmitLoad(MVT VT, X86AddressMode &AM, MachineMemOperand *MMO,
88  unsigned &ResultReg, unsigned Alignment = 1);
89 
90  bool X86FastEmitStore(EVT VT, const Value *Val, X86AddressMode &AM,
91  MachineMemOperand *MMO = nullptr, bool Aligned = false);
92  bool X86FastEmitStore(EVT VT, unsigned ValReg, bool ValIsKill,
93  X86AddressMode &AM,
94  MachineMemOperand *MMO = nullptr, bool Aligned = false);
95 
96  bool X86FastEmitExtend(ISD::NodeType Opc, EVT DstVT, unsigned Src, EVT SrcVT,
97  unsigned &ResultReg);
98 
99  bool X86SelectAddress(const Value *V, X86AddressMode &AM);
100  bool X86SelectCallAddress(const Value *V, X86AddressMode &AM);
101 
102  bool X86SelectLoad(const Instruction *I);
103 
104  bool X86SelectStore(const Instruction *I);
105 
106  bool X86SelectRet(const Instruction *I);
107 
108  bool X86SelectCmp(const Instruction *I);
109 
110  bool X86SelectZExt(const Instruction *I);
111 
112  bool X86SelectSExt(const Instruction *I);
113 
114  bool X86SelectBranch(const Instruction *I);
115 
116  bool X86SelectShift(const Instruction *I);
117 
118  bool X86SelectDivRem(const Instruction *I);
119 
120  bool X86FastEmitCMoveSelect(MVT RetVT, const Instruction *I);
121 
122  bool X86FastEmitSSESelect(MVT RetVT, const Instruction *I);
123 
124  bool X86FastEmitPseudoSelect(MVT RetVT, const Instruction *I);
125 
126  bool X86SelectSelect(const Instruction *I);
127 
128  bool X86SelectTrunc(const Instruction *I);
129 
130  bool X86SelectFPExtOrFPTrunc(const Instruction *I, unsigned Opc,
131  const TargetRegisterClass *RC);
132 
133  bool X86SelectFPExt(const Instruction *I);
134  bool X86SelectFPTrunc(const Instruction *I);
135  bool X86SelectSIToFP(const Instruction *I);
136  bool X86SelectUIToFP(const Instruction *I);
137  bool X86SelectIntToFP(const Instruction *I, bool IsSigned);
138 
139  const X86InstrInfo *getInstrInfo() const {
140  return Subtarget->getInstrInfo();
141  }
142  const X86TargetMachine *getTargetMachine() const {
143  return static_cast<const X86TargetMachine *>(&TM);
144  }
145 
146  bool handleConstantAddresses(const Value *V, X86AddressMode &AM);
147 
148  unsigned X86MaterializeInt(const ConstantInt *CI, MVT VT);
149  unsigned X86MaterializeFP(const ConstantFP *CFP, MVT VT);
150  unsigned X86MaterializeGV(const GlobalValue *GV, MVT VT);
151  unsigned fastMaterializeConstant(const Constant *C) override;
152 
153  unsigned fastMaterializeAlloca(const AllocaInst *C) override;
154 
155  unsigned fastMaterializeFloatZero(const ConstantFP *CF) override;
156 
157  /// isScalarFPTypeInSSEReg - Return true if the specified scalar FP type is
158  /// computed in an SSE register, not on the X87 floating point stack.
159  bool isScalarFPTypeInSSEReg(EVT VT) const {
160  return (VT == MVT::f64 && X86ScalarSSEf64) || // f64 is when SSE2
161  (VT == MVT::f32 && X86ScalarSSEf32); // f32 is when SSE1
162  }
163 
164  bool isTypeLegal(Type *Ty, MVT &VT, bool AllowI1 = false);
165 
166  bool IsMemcpySmall(uint64_t Len);
167 
168  bool TryEmitSmallMemcpy(X86AddressMode DestAM,
169  X86AddressMode SrcAM, uint64_t Len);
170 
171  bool foldX86XALUIntrinsic(X86::CondCode &CC, const Instruction *I,
172  const Value *Cond);
173 
175  X86AddressMode &AM);
176 
177  unsigned fastEmitInst_rrrr(unsigned MachineInstOpcode,
178  const TargetRegisterClass *RC, unsigned Op0,
179  bool Op0IsKill, unsigned Op1, bool Op1IsKill,
180  unsigned Op2, bool Op2IsKill, unsigned Op3,
181  bool Op3IsKill);
182 };
183 
184 } // end anonymous namespace.
185 
186 static std::pair<unsigned, bool>
188  unsigned CC;
189  bool NeedSwap = false;
190 
191  // SSE Condition code mapping:
192  // 0 - EQ
193  // 1 - LT
194  // 2 - LE
195  // 3 - UNORD
196  // 4 - NEQ
197  // 5 - NLT
198  // 6 - NLE
199  // 7 - ORD
200  switch (Predicate) {
201  default: llvm_unreachable("Unexpected predicate");
202  case CmpInst::FCMP_OEQ: CC = 0; break;
203  case CmpInst::FCMP_OGT: NeedSwap = true; LLVM_FALLTHROUGH;
204  case CmpInst::FCMP_OLT: CC = 1; break;
205  case CmpInst::FCMP_OGE: NeedSwap = true; LLVM_FALLTHROUGH;
206  case CmpInst::FCMP_OLE: CC = 2; break;
207  case CmpInst::FCMP_UNO: CC = 3; break;
208  case CmpInst::FCMP_UNE: CC = 4; break;
209  case CmpInst::FCMP_ULE: NeedSwap = true; LLVM_FALLTHROUGH;
210  case CmpInst::FCMP_UGE: CC = 5; break;
211  case CmpInst::FCMP_ULT: NeedSwap = true; LLVM_FALLTHROUGH;
212  case CmpInst::FCMP_UGT: CC = 6; break;
213  case CmpInst::FCMP_ORD: CC = 7; break;
214  case CmpInst::FCMP_UEQ: CC = 8; break;
215  case CmpInst::FCMP_ONE: CC = 12; break;
216  }
217 
218  return std::make_pair(CC, NeedSwap);
219 }
220 
221 /// Adds a complex addressing mode to the given machine instr builder.
222 /// Note, this will constrain the index register. If its not possible to
223 /// constrain the given index register, then a new one will be created. The
224 /// IndexReg field of the addressing mode will be updated to match in this case.
225 const MachineInstrBuilder &
227  X86AddressMode &AM) {
228  // First constrain the index register. It needs to be a GR64_NOSP.
230  MIB->getNumOperands() +
232  return ::addFullAddress(MIB, AM);
233 }
234 
235 /// Check if it is possible to fold the condition from the XALU intrinsic
236 /// into the user. The condition code will only be updated on success.
237 bool X86FastISel::foldX86XALUIntrinsic(X86::CondCode &CC, const Instruction *I,
238  const Value *Cond) {
239  if (!isa<ExtractValueInst>(Cond))
240  return false;
241 
242  const auto *EV = cast<ExtractValueInst>(Cond);
243  if (!isa<IntrinsicInst>(EV->getAggregateOperand()))
244  return false;
245 
246  const auto *II = cast<IntrinsicInst>(EV->getAggregateOperand());
247  MVT RetVT;
248  const Function *Callee = II->getCalledFunction();
249  Type *RetTy =
250  cast<StructType>(Callee->getReturnType())->getTypeAtIndex(0U);
251  if (!isTypeLegal(RetTy, RetVT))
252  return false;
253 
254  if (RetVT != MVT::i32 && RetVT != MVT::i64)
255  return false;
256 
257  X86::CondCode TmpCC;
258  switch (II->getIntrinsicID()) {
259  default: return false;
260  case Intrinsic::sadd_with_overflow:
261  case Intrinsic::ssub_with_overflow:
262  case Intrinsic::smul_with_overflow:
263  case Intrinsic::umul_with_overflow: TmpCC = X86::COND_O; break;
264  case Intrinsic::uadd_with_overflow:
265  case Intrinsic::usub_with_overflow: TmpCC = X86::COND_B; break;
266  }
267 
268  // Check if both instructions are in the same basic block.
269  if (II->getParent() != I->getParent())
270  return false;
271 
272  // Make sure nothing is in the way
275  for (auto Itr = std::prev(Start); Itr != End; --Itr) {
276  // We only expect extractvalue instructions between the intrinsic and the
277  // instruction to be selected.
278  if (!isa<ExtractValueInst>(Itr))
279  return false;
280 
281  // Check that the extractvalue operand comes from the intrinsic.
282  const auto *EVI = cast<ExtractValueInst>(Itr);
283  if (EVI->getAggregateOperand() != II)
284  return false;
285  }
286 
287  CC = TmpCC;
288  return true;
289 }
290 
291 bool X86FastISel::isTypeLegal(Type *Ty, MVT &VT, bool AllowI1) {
292  EVT evt = TLI.getValueType(DL, Ty, /*AllowUnknown=*/true);
293  if (evt == MVT::Other || !evt.isSimple())
294  // Unhandled type. Halt "fast" selection and bail.
295  return false;
296 
297  VT = evt.getSimpleVT();
298  // For now, require SSE/SSE2 for performing floating-point operations,
299  // since x87 requires additional work.
300  if (VT == MVT::f64 && !X86ScalarSSEf64)
301  return false;
302  if (VT == MVT::f32 && !X86ScalarSSEf32)
303  return false;
304  // Similarly, no f80 support yet.
305  if (VT == MVT::f80)
306  return false;
307  // We only handle legal types. For example, on x86-32 the instruction
308  // selector contains all of the 64-bit instructions from x86-64,
309  // under the assumption that i64 won't be used if the target doesn't
310  // support it.
311  return (AllowI1 && VT == MVT::i1) || TLI.isTypeLegal(VT);
312 }
313 
314 /// X86FastEmitLoad - Emit a machine instruction to load a value of type VT.
315 /// The address is either pre-computed, i.e. Ptr, or a GlobalAddress, i.e. GV.
316 /// Return true and the result register by reference if it is possible.
317 bool X86FastISel::X86FastEmitLoad(MVT VT, X86AddressMode &AM,
318  MachineMemOperand *MMO, unsigned &ResultReg,
319  unsigned Alignment) {
320  bool HasSSE41 = Subtarget->hasSSE41();
321  bool HasAVX = Subtarget->hasAVX();
322  bool HasAVX2 = Subtarget->hasAVX2();
323  bool HasAVX512 = Subtarget->hasAVX512();
324  bool HasVLX = Subtarget->hasVLX();
325  bool IsNonTemporal = MMO && MMO->isNonTemporal();
326 
327  // Treat i1 loads the same as i8 loads. Masking will be done when storing.
328  if (VT == MVT::i1)
329  VT = MVT::i8;
330 
331  // Get opcode and regclass of the output for the given load instruction.
332  unsigned Opc = 0;
333  switch (VT.SimpleTy) {
334  default: return false;
335  case MVT::i8:
336  Opc = X86::MOV8rm;
337  break;
338  case MVT::i16:
339  Opc = X86::MOV16rm;
340  break;
341  case MVT::i32:
342  Opc = X86::MOV32rm;
343  break;
344  case MVT::i64:
345  // Must be in x86-64 mode.
346  Opc = X86::MOV64rm;
347  break;
348  case MVT::f32:
349  if (X86ScalarSSEf32)
350  Opc = HasAVX512 ? X86::VMOVSSZrm_alt :
351  HasAVX ? X86::VMOVSSrm_alt :
352  X86::MOVSSrm_alt;
353  else
354  Opc = X86::LD_Fp32m;
355  break;
356  case MVT::f64:
357  if (X86ScalarSSEf64)
358  Opc = HasAVX512 ? X86::VMOVSDZrm_alt :
359  HasAVX ? X86::VMOVSDrm_alt :
360  X86::MOVSDrm_alt;
361  else
362  Opc = X86::LD_Fp64m;
363  break;
364  case MVT::f80:
365  // No f80 support yet.
366  return false;
367  case MVT::v4f32:
368  if (IsNonTemporal && Alignment >= 16 && HasSSE41)
369  Opc = HasVLX ? X86::VMOVNTDQAZ128rm :
370  HasAVX ? X86::VMOVNTDQArm : X86::MOVNTDQArm;
371  else if (Alignment >= 16)
372  Opc = HasVLX ? X86::VMOVAPSZ128rm :
373  HasAVX ? X86::VMOVAPSrm : X86::MOVAPSrm;
374  else
375  Opc = HasVLX ? X86::VMOVUPSZ128rm :
376  HasAVX ? X86::VMOVUPSrm : X86::MOVUPSrm;
377  break;
378  case MVT::v2f64:
379  if (IsNonTemporal && Alignment >= 16 && HasSSE41)
380  Opc = HasVLX ? X86::VMOVNTDQAZ128rm :
381  HasAVX ? X86::VMOVNTDQArm : X86::MOVNTDQArm;
382  else if (Alignment >= 16)
383  Opc = HasVLX ? X86::VMOVAPDZ128rm :
384  HasAVX ? X86::VMOVAPDrm : X86::MOVAPDrm;
385  else
386  Opc = HasVLX ? X86::VMOVUPDZ128rm :
387  HasAVX ? X86::VMOVUPDrm : X86::MOVUPDrm;
388  break;
389  case MVT::v4i32:
390  case MVT::v2i64:
391  case MVT::v8i16:
392  case MVT::v16i8:
393  if (IsNonTemporal && Alignment >= 16 && HasSSE41)
394  Opc = HasVLX ? X86::VMOVNTDQAZ128rm :
395  HasAVX ? X86::VMOVNTDQArm : X86::MOVNTDQArm;
396  else if (Alignment >= 16)
397  Opc = HasVLX ? X86::VMOVDQA64Z128rm :
398  HasAVX ? X86::VMOVDQArm : X86::MOVDQArm;
399  else
400  Opc = HasVLX ? X86::VMOVDQU64Z128rm :
401  HasAVX ? X86::VMOVDQUrm : X86::MOVDQUrm;
402  break;
403  case MVT::v8f32:
404  assert(HasAVX);
405  if (IsNonTemporal && Alignment >= 32 && HasAVX2)
406  Opc = HasVLX ? X86::VMOVNTDQAZ256rm : X86::VMOVNTDQAYrm;
407  else if (IsNonTemporal && Alignment >= 16)
408  return false; // Force split for X86::VMOVNTDQArm
409  else if (Alignment >= 32)
410  Opc = HasVLX ? X86::VMOVAPSZ256rm : X86::VMOVAPSYrm;
411  else
412  Opc = HasVLX ? X86::VMOVUPSZ256rm : X86::VMOVUPSYrm;
413  break;
414  case MVT::v4f64:
415  assert(HasAVX);
416  if (IsNonTemporal && Alignment >= 32 && HasAVX2)
417  Opc = HasVLX ? X86::VMOVNTDQAZ256rm : X86::VMOVNTDQAYrm;
418  else if (IsNonTemporal && Alignment >= 16)
419  return false; // Force split for X86::VMOVNTDQArm
420  else if (Alignment >= 32)
421  Opc = HasVLX ? X86::VMOVAPDZ256rm : X86::VMOVAPDYrm;
422  else
423  Opc = HasVLX ? X86::VMOVUPDZ256rm : X86::VMOVUPDYrm;
424  break;
425  case MVT::v8i32:
426  case MVT::v4i64:
427  case MVT::v16i16:
428  case MVT::v32i8:
429  assert(HasAVX);
430  if (IsNonTemporal && Alignment >= 32 && HasAVX2)
431  Opc = HasVLX ? X86::VMOVNTDQAZ256rm : X86::VMOVNTDQAYrm;
432  else if (IsNonTemporal && Alignment >= 16)
433  return false; // Force split for X86::VMOVNTDQArm
434  else if (Alignment >= 32)
435  Opc = HasVLX ? X86::VMOVDQA64Z256rm : X86::VMOVDQAYrm;
436  else
437  Opc = HasVLX ? X86::VMOVDQU64Z256rm : X86::VMOVDQUYrm;
438  break;
439  case MVT::v16f32:
440  assert(HasAVX512);
441  if (IsNonTemporal && Alignment >= 64)
442  Opc = X86::VMOVNTDQAZrm;
443  else
444  Opc = (Alignment >= 64) ? X86::VMOVAPSZrm : X86::VMOVUPSZrm;
445  break;
446  case MVT::v8f64:
447  assert(HasAVX512);
448  if (IsNonTemporal && Alignment >= 64)
449  Opc = X86::VMOVNTDQAZrm;
450  else
451  Opc = (Alignment >= 64) ? X86::VMOVAPDZrm : X86::VMOVUPDZrm;
452  break;
453  case MVT::v8i64:
454  case MVT::v16i32:
455  case MVT::v32i16:
456  case MVT::v64i8:
457  assert(HasAVX512);
458  // Note: There are a lot more choices based on type with AVX-512, but
459  // there's really no advantage when the load isn't masked.
460  if (IsNonTemporal && Alignment >= 64)
461  Opc = X86::VMOVNTDQAZrm;
462  else
463  Opc = (Alignment >= 64) ? X86::VMOVDQA64Zrm : X86::VMOVDQU64Zrm;
464  break;
465  }
466 
467  const TargetRegisterClass *RC = TLI.getRegClassFor(VT);
468 
469  ResultReg = createResultReg(RC);
470  MachineInstrBuilder MIB =
471  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg);
472  addFullAddress(MIB, AM);
473  if (MMO)
474  MIB->addMemOperand(*FuncInfo.MF, MMO);
475  return true;
476 }
477 
478 /// X86FastEmitStore - Emit a machine instruction to store a value Val of
479 /// type VT. The address is either pre-computed, consisted of a base ptr, Ptr
480 /// and a displacement offset, or a GlobalAddress,
481 /// i.e. V. Return true if it is possible.
482 bool X86FastISel::X86FastEmitStore(EVT VT, unsigned ValReg, bool ValIsKill,
483  X86AddressMode &AM,
484  MachineMemOperand *MMO, bool Aligned) {
485  bool HasSSE1 = Subtarget->hasSSE1();
486  bool HasSSE2 = Subtarget->hasSSE2();
487  bool HasSSE4A = Subtarget->hasSSE4A();
488  bool HasAVX = Subtarget->hasAVX();
489  bool HasAVX512 = Subtarget->hasAVX512();
490  bool HasVLX = Subtarget->hasVLX();
491  bool IsNonTemporal = MMO && MMO->isNonTemporal();
492 
493  // Get opcode and regclass of the output for the given store instruction.
494  unsigned Opc = 0;
495  switch (VT.getSimpleVT().SimpleTy) {
496  case MVT::f80: // No f80 support yet.
497  default: return false;
498  case MVT::i1: {
499  // Mask out all but lowest bit.
500  unsigned AndResult = createResultReg(&X86::GR8RegClass);
501  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
502  TII.get(X86::AND8ri), AndResult)
503  .addReg(ValReg, getKillRegState(ValIsKill)).addImm(1);
504  ValReg = AndResult;
505  LLVM_FALLTHROUGH; // handle i1 as i8.
506  }
507  case MVT::i8: Opc = X86::MOV8mr; break;
508  case MVT::i16: Opc = X86::MOV16mr; break;
509  case MVT::i32:
510  Opc = (IsNonTemporal && HasSSE2) ? X86::MOVNTImr : X86::MOV32mr;
511  break;
512  case MVT::i64:
513  // Must be in x86-64 mode.
514  Opc = (IsNonTemporal && HasSSE2) ? X86::MOVNTI_64mr : X86::MOV64mr;
515  break;
516  case MVT::f32:
517  if (X86ScalarSSEf32) {
518  if (IsNonTemporal && HasSSE4A)
519  Opc = X86::MOVNTSS;
520  else
521  Opc = HasAVX512 ? X86::VMOVSSZmr :
522  HasAVX ? X86::VMOVSSmr : X86::MOVSSmr;
523  } else
524  Opc = X86::ST_Fp32m;
525  break;
526  case MVT::f64:
527  if (X86ScalarSSEf32) {
528  if (IsNonTemporal && HasSSE4A)
529  Opc = X86::MOVNTSD;
530  else
531  Opc = HasAVX512 ? X86::VMOVSDZmr :
532  HasAVX ? X86::VMOVSDmr : X86::MOVSDmr;
533  } else
534  Opc = X86::ST_Fp64m;
535  break;
536  case MVT::x86mmx:
537  Opc = (IsNonTemporal && HasSSE1) ? X86::MMX_MOVNTQmr : X86::MMX_MOVQ64mr;
538  break;
539  case MVT::v4f32:
540  if (Aligned) {
541  if (IsNonTemporal)
542  Opc = HasVLX ? X86::VMOVNTPSZ128mr :
543  HasAVX ? X86::VMOVNTPSmr : X86::MOVNTPSmr;
544  else
545  Opc = HasVLX ? X86::VMOVAPSZ128mr :
546  HasAVX ? X86::VMOVAPSmr : X86::MOVAPSmr;
547  } else
548  Opc = HasVLX ? X86::VMOVUPSZ128mr :
549  HasAVX ? X86::VMOVUPSmr : X86::MOVUPSmr;
550  break;
551  case MVT::v2f64:
552  if (Aligned) {
553  if (IsNonTemporal)
554  Opc = HasVLX ? X86::VMOVNTPDZ128mr :
555  HasAVX ? X86::VMOVNTPDmr : X86::MOVNTPDmr;
556  else
557  Opc = HasVLX ? X86::VMOVAPDZ128mr :
558  HasAVX ? X86::VMOVAPDmr : X86::MOVAPDmr;
559  } else
560  Opc = HasVLX ? X86::VMOVUPDZ128mr :
561  HasAVX ? X86::VMOVUPDmr : X86::MOVUPDmr;
562  break;
563  case MVT::v4i32:
564  case MVT::v2i64:
565  case MVT::v8i16:
566  case MVT::v16i8:
567  if (Aligned) {
568  if (IsNonTemporal)
569  Opc = HasVLX ? X86::VMOVNTDQZ128mr :
570  HasAVX ? X86::VMOVNTDQmr : X86::MOVNTDQmr;
571  else
572  Opc = HasVLX ? X86::VMOVDQA64Z128mr :
573  HasAVX ? X86::VMOVDQAmr : X86::MOVDQAmr;
574  } else
575  Opc = HasVLX ? X86::VMOVDQU64Z128mr :
576  HasAVX ? X86::VMOVDQUmr : X86::MOVDQUmr;
577  break;
578  case MVT::v8f32:
579  assert(HasAVX);
580  if (Aligned) {
581  if (IsNonTemporal)
582  Opc = HasVLX ? X86::VMOVNTPSZ256mr : X86::VMOVNTPSYmr;
583  else
584  Opc = HasVLX ? X86::VMOVAPSZ256mr : X86::VMOVAPSYmr;
585  } else
586  Opc = HasVLX ? X86::VMOVUPSZ256mr : X86::VMOVUPSYmr;
587  break;
588  case MVT::v4f64:
589  assert(HasAVX);
590  if (Aligned) {
591  if (IsNonTemporal)
592  Opc = HasVLX ? X86::VMOVNTPDZ256mr : X86::VMOVNTPDYmr;
593  else
594  Opc = HasVLX ? X86::VMOVAPDZ256mr : X86::VMOVAPDYmr;
595  } else
596  Opc = HasVLX ? X86::VMOVUPDZ256mr : X86::VMOVUPDYmr;
597  break;
598  case MVT::v8i32:
599  case MVT::v4i64:
600  case MVT::v16i16:
601  case MVT::v32i8:
602  assert(HasAVX);
603  if (Aligned) {
604  if (IsNonTemporal)
605  Opc = HasVLX ? X86::VMOVNTDQZ256mr : X86::VMOVNTDQYmr;
606  else
607  Opc = HasVLX ? X86::VMOVDQA64Z256mr : X86::VMOVDQAYmr;
608  } else
609  Opc = HasVLX ? X86::VMOVDQU64Z256mr : X86::VMOVDQUYmr;
610  break;
611  case MVT::v16f32:
612  assert(HasAVX512);
613  if (Aligned)
614  Opc = IsNonTemporal ? X86::VMOVNTPSZmr : X86::VMOVAPSZmr;
615  else
616  Opc = X86::VMOVUPSZmr;
617  break;
618  case MVT::v8f64:
619  assert(HasAVX512);
620  if (Aligned) {
621  Opc = IsNonTemporal ? X86::VMOVNTPDZmr : X86::VMOVAPDZmr;
622  } else
623  Opc = X86::VMOVUPDZmr;
624  break;
625  case MVT::v8i64:
626  case MVT::v16i32:
627  case MVT::v32i16:
628  case MVT::v64i8:
629  assert(HasAVX512);
630  // Note: There are a lot more choices based on type with AVX-512, but
631  // there's really no advantage when the store isn't masked.
632  if (Aligned)
633  Opc = IsNonTemporal ? X86::VMOVNTDQZmr : X86::VMOVDQA64Zmr;
634  else
635  Opc = X86::VMOVDQU64Zmr;
636  break;
637  }
638 
639  const MCInstrDesc &Desc = TII.get(Opc);
640  // Some of the instructions in the previous switch use FR128 instead
641  // of FR32 for ValReg. Make sure the register we feed the instruction
642  // matches its register class constraints.
643  // Note: This is fine to do a copy from FR32 to FR128, this is the
644  // same registers behind the scene and actually why it did not trigger
645  // any bugs before.
646  ValReg = constrainOperandRegClass(Desc, ValReg, Desc.getNumOperands() - 1);
647  MachineInstrBuilder MIB =
648  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, Desc);
649  addFullAddress(MIB, AM).addReg(ValReg, getKillRegState(ValIsKill));
650  if (MMO)
651  MIB->addMemOperand(*FuncInfo.MF, MMO);
652 
653  return true;
654 }
655 
656 bool X86FastISel::X86FastEmitStore(EVT VT, const Value *Val,
657  X86AddressMode &AM,
658  MachineMemOperand *MMO, bool Aligned) {
659  // Handle 'null' like i32/i64 0.
660  if (isa<ConstantPointerNull>(Val))
661  Val = Constant::getNullValue(DL.getIntPtrType(Val->getContext()));
662 
663  // If this is a store of a simple constant, fold the constant into the store.
664  if (const ConstantInt *CI = dyn_cast<ConstantInt>(Val)) {
665  unsigned Opc = 0;
666  bool Signed = true;
667  switch (VT.getSimpleVT().SimpleTy) {
668  default: break;
669  case MVT::i1:
670  Signed = false;
671  LLVM_FALLTHROUGH; // Handle as i8.
672  case MVT::i8: Opc = X86::MOV8mi; break;
673  case MVT::i16: Opc = X86::MOV16mi; break;
674  case MVT::i32: Opc = X86::MOV32mi; break;
675  case MVT::i64:
676  // Must be a 32-bit sign extended value.
677  if (isInt<32>(CI->getSExtValue()))
678  Opc = X86::MOV64mi32;
679  break;
680  }
681 
682  if (Opc) {
683  MachineInstrBuilder MIB =
684  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc));
685  addFullAddress(MIB, AM).addImm(Signed ? (uint64_t) CI->getSExtValue()
686  : CI->getZExtValue());
687  if (MMO)
688  MIB->addMemOperand(*FuncInfo.MF, MMO);
689  return true;
690  }
691  }
692 
693  unsigned ValReg = getRegForValue(Val);
694  if (ValReg == 0)
695  return false;
696 
697  bool ValKill = hasTrivialKill(Val);
698  return X86FastEmitStore(VT, ValReg, ValKill, AM, MMO, Aligned);
699 }
700 
701 /// X86FastEmitExtend - Emit a machine instruction to extend a value Src of
702 /// type SrcVT to type DstVT using the specified extension opcode Opc (e.g.
703 /// ISD::SIGN_EXTEND).
704 bool X86FastISel::X86FastEmitExtend(ISD::NodeType Opc, EVT DstVT,
705  unsigned Src, EVT SrcVT,
706  unsigned &ResultReg) {
707  unsigned RR = fastEmit_r(SrcVT.getSimpleVT(), DstVT.getSimpleVT(), Opc,
708  Src, /*TODO: Kill=*/false);
709  if (RR == 0)
710  return false;
711 
712  ResultReg = RR;
713  return true;
714 }
715 
716 bool X86FastISel::handleConstantAddresses(const Value *V, X86AddressMode &AM) {
717  // Handle constant address.
718  if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
719  // Can't handle alternate code models yet.
720  if (TM.getCodeModel() != CodeModel::Small)
721  return false;
722 
723  // Can't handle TLS yet.
724  if (GV->isThreadLocal())
725  return false;
726 
727  // Can't handle !absolute_symbol references yet.
728  if (GV->isAbsoluteSymbolRef())
729  return false;
730 
731  // RIP-relative addresses can't have additional register operands, so if
732  // we've already folded stuff into the addressing mode, just force the
733  // global value into its own register, which we can use as the basereg.
734  if (!Subtarget->isPICStyleRIPRel() ||
735  (AM.Base.Reg == 0 && AM.IndexReg == 0)) {
736  // Okay, we've committed to selecting this global. Set up the address.
737  AM.GV = GV;
738 
739  // Allow the subtarget to classify the global.
740  unsigned char GVFlags = Subtarget->classifyGlobalReference(GV);
741 
742  // If this reference is relative to the pic base, set it now.
743  if (isGlobalRelativeToPICBase(GVFlags)) {
744  // FIXME: How do we know Base.Reg is free??
745  AM.Base.Reg = getInstrInfo()->getGlobalBaseReg(FuncInfo.MF);
746  }
747 
748  // Unless the ABI requires an extra load, return a direct reference to
749  // the global.
750  if (!isGlobalStubReference(GVFlags)) {
751  if (Subtarget->isPICStyleRIPRel()) {
752  // Use rip-relative addressing if we can. Above we verified that the
753  // base and index registers are unused.
754  assert(AM.Base.Reg == 0 && AM.IndexReg == 0);
755  AM.Base.Reg = X86::RIP;
756  }
757  AM.GVOpFlags = GVFlags;
758  return true;
759  }
760 
761  // Ok, we need to do a load from a stub. If we've already loaded from
762  // this stub, reuse the loaded pointer, otherwise emit the load now.
763  DenseMap<const Value *, unsigned>::iterator I = LocalValueMap.find(V);
764  unsigned LoadReg;
765  if (I != LocalValueMap.end() && I->second != 0) {
766  LoadReg = I->second;
767  } else {
768  // Issue load from stub.
769  unsigned Opc = 0;
770  const TargetRegisterClass *RC = nullptr;
771  X86AddressMode StubAM;
772  StubAM.Base.Reg = AM.Base.Reg;
773  StubAM.GV = GV;
774  StubAM.GVOpFlags = GVFlags;
775 
776  // Prepare for inserting code in the local-value area.
777  SavePoint SaveInsertPt = enterLocalValueArea();
778 
779  if (TLI.getPointerTy(DL) == MVT::i64) {
780  Opc = X86::MOV64rm;
781  RC = &X86::GR64RegClass;
782 
783  if (Subtarget->isPICStyleRIPRel())
784  StubAM.Base.Reg = X86::RIP;
785  } else {
786  Opc = X86::MOV32rm;
787  RC = &X86::GR32RegClass;
788  }
789 
790  LoadReg = createResultReg(RC);
791  MachineInstrBuilder LoadMI =
792  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), LoadReg);
793  addFullAddress(LoadMI, StubAM);
794 
795  // Ok, back to normal mode.
796  leaveLocalValueArea(SaveInsertPt);
797 
798  // Prevent loading GV stub multiple times in same MBB.
799  LocalValueMap[V] = LoadReg;
800  }
801 
802  // Now construct the final address. Note that the Disp, Scale,
803  // and Index values may already be set here.
804  AM.Base.Reg = LoadReg;
805  AM.GV = nullptr;
806  return true;
807  }
808  }
809 
810  // If all else fails, try to materialize the value in a register.
811  if (!AM.GV || !Subtarget->isPICStyleRIPRel()) {
812  if (AM.Base.Reg == 0) {
813  AM.Base.Reg = getRegForValue(V);
814  return AM.Base.Reg != 0;
815  }
816  if (AM.IndexReg == 0) {
817  assert(AM.Scale == 1 && "Scale with no index!");
818  AM.IndexReg = getRegForValue(V);
819  return AM.IndexReg != 0;
820  }
821  }
822 
823  return false;
824 }
825 
826 /// X86SelectAddress - Attempt to fill in an address from the given value.
827 ///
830 redo_gep:
831  const User *U = nullptr;
832  unsigned Opcode = Instruction::UserOp1;
833  if (const Instruction *I = dyn_cast<Instruction>(V)) {
834  // Don't walk into other basic blocks; it's possible we haven't
835  // visited them yet, so the instructions may not yet be assigned
836  // virtual registers.
837  if (FuncInfo.StaticAllocaMap.count(static_cast<const AllocaInst *>(V)) ||
838  FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
839  Opcode = I->getOpcode();
840  U = I;
841  }
842  } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(V)) {
843  Opcode = C->getOpcode();
844  U = C;
845  }
846 
847  if (PointerType *Ty = dyn_cast<PointerType>(V->getType()))
848  if (Ty->getAddressSpace() > 255)
849  // Fast instruction selection doesn't support the special
850  // address spaces.
851  return false;
852 
853  switch (Opcode) {
854  default: break;
855  case Instruction::BitCast:
856  // Look past bitcasts.
857  return X86SelectAddress(U->getOperand(0), AM);
858 
859  case Instruction::IntToPtr:
860  // Look past no-op inttoptrs.
861  if (TLI.getValueType(DL, U->getOperand(0)->getType()) ==
862  TLI.getPointerTy(DL))
863  return X86SelectAddress(U->getOperand(0), AM);
864  break;
865 
866  case Instruction::PtrToInt:
867  // Look past no-op ptrtoints.
868  if (TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL))
869  return X86SelectAddress(U->getOperand(0), AM);
870  break;
871 
872  case Instruction::Alloca: {
873  // Do static allocas.
874  const AllocaInst *A = cast<AllocaInst>(V);
876  FuncInfo.StaticAllocaMap.find(A);
877  if (SI != FuncInfo.StaticAllocaMap.end()) {
879  AM.Base.FrameIndex = SI->second;
880  return true;
881  }
882  break;
883  }
884 
885  case Instruction::Add: {
886  // Adds of constants are common and easy enough.
887  if (const ConstantInt *CI = dyn_cast<ConstantInt>(U->getOperand(1))) {
888  uint64_t Disp = (int32_t)AM.Disp + (uint64_t)CI->getSExtValue();
889  // They have to fit in the 32-bit signed displacement field though.
890  if (isInt<32>(Disp)) {
891  AM.Disp = (uint32_t)Disp;
892  return X86SelectAddress(U->getOperand(0), AM);
893  }
894  }
895  break;
896  }
897 
898  case Instruction::GetElementPtr: {
899  X86AddressMode SavedAM = AM;
900 
901  // Pattern-match simple GEPs.
902  uint64_t Disp = (int32_t)AM.Disp;
903  unsigned IndexReg = AM.IndexReg;
904  unsigned Scale = AM.Scale;
906  // Iterate through the indices, folding what we can. Constants can be
907  // folded, and one dynamic index can be handled, if the scale is supported.
908  for (User::const_op_iterator i = U->op_begin() + 1, e = U->op_end();
909  i != e; ++i, ++GTI) {
910  const Value *Op = *i;
911  if (StructType *STy = GTI.getStructTypeOrNull()) {
912  const StructLayout *SL = DL.getStructLayout(STy);
913  Disp += SL->getElementOffset(cast<ConstantInt>(Op)->getZExtValue());
914  continue;
915  }
916 
917  // A array/variable index is always of the form i*S where S is the
918  // constant scale size. See if we can push the scale into immediates.
919  uint64_t S = DL.getTypeAllocSize(GTI.getIndexedType());
920  for (;;) {
921  if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
922  // Constant-offset addressing.
923  Disp += CI->getSExtValue() * S;
924  break;
925  }
926  if (canFoldAddIntoGEP(U, Op)) {
927  // A compatible add with a constant operand. Fold the constant.
928  ConstantInt *CI =
929  cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1));
930  Disp += CI->getSExtValue() * S;
931  // Iterate on the other operand.
932  Op = cast<AddOperator>(Op)->getOperand(0);
933  continue;
934  }
935  if (IndexReg == 0 &&
936  (!AM.GV || !Subtarget->isPICStyleRIPRel()) &&
937  (S == 1 || S == 2 || S == 4 || S == 8)) {
938  // Scaled-index addressing.
939  Scale = S;
940  IndexReg = getRegForGEPIndex(Op).first;
941  if (IndexReg == 0)
942  return false;
943  break;
944  }
945  // Unsupported.
946  goto unsupported_gep;
947  }
948  }
949 
950  // Check for displacement overflow.
951  if (!isInt<32>(Disp))
952  break;
953 
954  AM.IndexReg = IndexReg;
955  AM.Scale = Scale;
956  AM.Disp = (uint32_t)Disp;
957  GEPs.push_back(V);
958 
959  if (const GetElementPtrInst *GEP =
960  dyn_cast<GetElementPtrInst>(U->getOperand(0))) {
961  // Ok, the GEP indices were covered by constant-offset and scaled-index
962  // addressing. Update the address state and move on to examining the base.
963  V = GEP;
964  goto redo_gep;
965  } else if (X86SelectAddress(U->getOperand(0), AM)) {
966  return true;
967  }
968 
969  // If we couldn't merge the gep value into this addr mode, revert back to
970  // our address and just match the value instead of completely failing.
971  AM = SavedAM;
972 
973  for (const Value *I : reverse(GEPs))
974  if (handleConstantAddresses(I, AM))
975  return true;
976 
977  return false;
978  unsupported_gep:
979  // Ok, the GEP indices weren't all covered.
980  break;
981  }
982  }
983 
984  return handleConstantAddresses(V, AM);
985 }
986 
987 /// X86SelectCallAddress - Attempt to fill in an address from the given value.
988 ///
989 bool X86FastISel::X86SelectCallAddress(const Value *V, X86AddressMode &AM) {
990  const User *U = nullptr;
991  unsigned Opcode = Instruction::UserOp1;
992  const Instruction *I = dyn_cast<Instruction>(V);
993  // Record if the value is defined in the same basic block.
994  //
995  // This information is crucial to know whether or not folding an
996  // operand is valid.
997  // Indeed, FastISel generates or reuses a virtual register for all
998  // operands of all instructions it selects. Obviously, the definition and
999  // its uses must use the same virtual register otherwise the produced
1000  // code is incorrect.
1001  // Before instruction selection, FunctionLoweringInfo::set sets the virtual
1002  // registers for values that are alive across basic blocks. This ensures
1003  // that the values are consistently set between across basic block, even
1004  // if different instruction selection mechanisms are used (e.g., a mix of
1005  // SDISel and FastISel).
1006  // For values local to a basic block, the instruction selection process
1007  // generates these virtual registers with whatever method is appropriate
1008  // for its needs. In particular, FastISel and SDISel do not share the way
1009  // local virtual registers are set.
1010  // Therefore, this is impossible (or at least unsafe) to share values
1011  // between basic blocks unless they use the same instruction selection
1012  // method, which is not guarantee for X86.
1013  // Moreover, things like hasOneUse could not be used accurately, if we
1014  // allow to reference values across basic blocks whereas they are not
1015  // alive across basic blocks initially.
1016  bool InMBB = true;
1017  if (I) {
1018  Opcode = I->getOpcode();
1019  U = I;
1020  InMBB = I->getParent() == FuncInfo.MBB->getBasicBlock();
1021  } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(V)) {
1022  Opcode = C->getOpcode();
1023  U = C;
1024  }
1025 
1026  switch (Opcode) {
1027  default: break;
1028  case Instruction::BitCast:
1029  // Look past bitcasts if its operand is in the same BB.
1030  if (InMBB)
1031  return X86SelectCallAddress(U->getOperand(0), AM);
1032  break;
1033 
1034  case Instruction::IntToPtr:
1035  // Look past no-op inttoptrs if its operand is in the same BB.
1036  if (InMBB &&
1037  TLI.getValueType(DL, U->getOperand(0)->getType()) ==
1038  TLI.getPointerTy(DL))
1039  return X86SelectCallAddress(U->getOperand(0), AM);
1040  break;
1041 
1042  case Instruction::PtrToInt:
1043  // Look past no-op ptrtoints if its operand is in the same BB.
1044  if (InMBB && TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL))
1045  return X86SelectCallAddress(U->getOperand(0), AM);
1046  break;
1047  }
1048 
1049  // Handle constant address.
1050  if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
1051  // Can't handle alternate code models yet.
1052  if (TM.getCodeModel() != CodeModel::Small)
1053  return false;
1054 
1055  // RIP-relative addresses can't have additional register operands.
1056  if (Subtarget->isPICStyleRIPRel() &&
1057  (AM.Base.Reg != 0 || AM.IndexReg != 0))
1058  return false;
1059 
1060  // Can't handle TLS.
1061  if (const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV))
1062  if (GVar->isThreadLocal())
1063  return false;
1064 
1065  // Okay, we've committed to selecting this global. Set up the basic address.
1066  AM.GV = GV;
1067 
1068  // Return a direct reference to the global. Fastisel can handle calls to
1069  // functions that require loads, such as dllimport and nonlazybind
1070  // functions.
1071  if (Subtarget->isPICStyleRIPRel()) {
1072  // Use rip-relative addressing if we can. Above we verified that the
1073  // base and index registers are unused.
1074  assert(AM.Base.Reg == 0 && AM.IndexReg == 0);
1075  AM.Base.Reg = X86::RIP;
1076  } else {
1077  AM.GVOpFlags = Subtarget->classifyLocalReference(nullptr);
1078  }
1079 
1080  return true;
1081  }
1082 
1083  // If all else fails, try to materialize the value in a register.
1084  if (!AM.GV || !Subtarget->isPICStyleRIPRel()) {
1085  if (AM.Base.Reg == 0) {
1086  AM.Base.Reg = getRegForValue(V);
1087  return AM.Base.Reg != 0;
1088  }
1089  if (AM.IndexReg == 0) {
1090  assert(AM.Scale == 1 && "Scale with no index!");
1091  AM.IndexReg = getRegForValue(V);
1092  return AM.IndexReg != 0;
1093  }
1094  }
1095 
1096  return false;
1097 }
1098 
1099 
1100 /// X86SelectStore - Select and emit code to implement store instructions.
1101 bool X86FastISel::X86SelectStore(const Instruction *I) {
1102  // Atomic stores need special handling.
1103  const StoreInst *S = cast<StoreInst>(I);
1104 
1105  if (S->isAtomic())
1106  return false;
1107 
1108  const Value *PtrV = I->getOperand(1);
1109  if (TLI.supportSwiftError()) {
1110  // Swifterror values can come from either a function parameter with
1111  // swifterror attribute or an alloca with swifterror attribute.
1112  if (const Argument *Arg = dyn_cast<Argument>(PtrV)) {
1113  if (Arg->hasSwiftErrorAttr())
1114  return false;
1115  }
1116 
1117  if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(PtrV)) {
1118  if (Alloca->isSwiftError())
1119  return false;
1120  }
1121  }
1122 
1123  const Value *Val = S->getValueOperand();
1124  const Value *Ptr = S->getPointerOperand();
1125 
1126  MVT VT;
1127  if (!isTypeLegal(Val->getType(), VT, /*AllowI1=*/true))
1128  return false;
1129 
1130  unsigned Alignment = S->getAlignment();
1131  unsigned ABIAlignment = DL.getABITypeAlignment(Val->getType());
1132  if (Alignment == 0) // Ensure that codegen never sees alignment 0
1133  Alignment = ABIAlignment;
1134  bool Aligned = Alignment >= ABIAlignment;
1135 
1136  X86AddressMode AM;
1137  if (!X86SelectAddress(Ptr, AM))
1138  return false;
1139 
1140  return X86FastEmitStore(VT, Val, AM, createMachineMemOperandFor(I), Aligned);
1141 }
1142 
1143 /// X86SelectRet - Select and emit code to implement ret instructions.
1144 bool X86FastISel::X86SelectRet(const Instruction *I) {
1145  const ReturnInst *Ret = cast<ReturnInst>(I);
1146  const Function &F = *I->getParent()->getParent();
1147  const X86MachineFunctionInfo *X86MFInfo =
1148  FuncInfo.MF->getInfo<X86MachineFunctionInfo>();
1149 
1150  if (!FuncInfo.CanLowerReturn)
1151  return false;
1152 
1153  if (TLI.supportSwiftError() &&
1154  F.getAttributes().hasAttrSomewhere(Attribute::SwiftError))
1155  return false;
1156 
1157  if (TLI.supportSplitCSR(FuncInfo.MF))
1158  return false;
1159 
1160  CallingConv::ID CC = F.getCallingConv();
1161  if (CC != CallingConv::C &&
1162  CC != CallingConv::Fast &&
1163  CC != CallingConv::X86_FastCall &&
1164  CC != CallingConv::X86_StdCall &&
1165  CC != CallingConv::X86_ThisCall &&
1166  CC != CallingConv::X86_64_SysV &&
1167  CC != CallingConv::Win64)
1168  return false;
1169 
1170  // Don't handle popping bytes if they don't fit the ret's immediate.
1171  if (!isUInt<16>(X86MFInfo->getBytesToPopOnReturn()))
1172  return false;
1173 
1174  // fastcc with -tailcallopt is intended to provide a guaranteed
1175  // tail call optimization. Fastisel doesn't know how to do that.
1176  if (CC == CallingConv::Fast && TM.Options.GuaranteedTailCallOpt)
1177  return false;
1178 
1179  // Let SDISel handle vararg functions.
1180  if (F.isVarArg())
1181  return false;
1182 
1183  // Build a list of return value registers.
1184  SmallVector<unsigned, 4> RetRegs;
1185 
1186  if (Ret->getNumOperands() > 0) {
1188  GetReturnInfo(CC, F.getReturnType(), F.getAttributes(), Outs, TLI, DL);
1189 
1190  // Analyze operands of the call, assigning locations to each operand.
1192  CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, ValLocs, I->getContext());
1193  CCInfo.AnalyzeReturn(Outs, RetCC_X86);
1194 
1195  const Value *RV = Ret->getOperand(0);
1196  unsigned Reg = getRegForValue(RV);
1197  if (Reg == 0)
1198  return false;
1199 
1200  // Only handle a single return value for now.
1201  if (ValLocs.size() != 1)
1202  return false;
1203 
1204  CCValAssign &VA = ValLocs[0];
1205 
1206  // Don't bother handling odd stuff for now.
1207  if (VA.getLocInfo() != CCValAssign::Full)
1208  return false;
1209  // Only handle register returns for now.
1210  if (!VA.isRegLoc())
1211  return false;
1212 
1213  // The calling-convention tables for x87 returns don't tell
1214  // the whole story.
1215  if (VA.getLocReg() == X86::FP0 || VA.getLocReg() == X86::FP1)
1216  return false;
1217 
1218  unsigned SrcReg = Reg + VA.getValNo();
1219  EVT SrcVT = TLI.getValueType(DL, RV->getType());
1220  EVT DstVT = VA.getValVT();
1221  // Special handling for extended integers.
1222  if (SrcVT != DstVT) {
1223  if (SrcVT != MVT::i1 && SrcVT != MVT::i8 && SrcVT != MVT::i16)
1224  return false;
1225 
1226  if (!Outs[0].Flags.isZExt() && !Outs[0].Flags.isSExt())
1227  return false;
1228 
1229  assert(DstVT == MVT::i32 && "X86 should always ext to i32");
1230 
1231  if (SrcVT == MVT::i1) {
1232  if (Outs[0].Flags.isSExt())
1233  return false;
1234  SrcReg = fastEmitZExtFromI1(MVT::i8, SrcReg, /*TODO: Kill=*/false);
1235  SrcVT = MVT::i8;
1236  }
1237  unsigned Op = Outs[0].Flags.isZExt() ? ISD::ZERO_EXTEND :
1239  SrcReg = fastEmit_r(SrcVT.getSimpleVT(), DstVT.getSimpleVT(), Op,
1240  SrcReg, /*TODO: Kill=*/false);
1241  }
1242 
1243  // Make the copy.
1244  Register DstReg = VA.getLocReg();
1245  const TargetRegisterClass *SrcRC = MRI.getRegClass(SrcReg);
1246  // Avoid a cross-class copy. This is very unlikely.
1247  if (!SrcRC->contains(DstReg))
1248  return false;
1249  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1250  TII.get(TargetOpcode::COPY), DstReg).addReg(SrcReg);
1251 
1252  // Add register to return instruction.
1253  RetRegs.push_back(VA.getLocReg());
1254  }
1255 
1256  // Swift calling convention does not require we copy the sret argument
1257  // into %rax/%eax for the return, and SRetReturnReg is not set for Swift.
1258 
1259  // All x86 ABIs require that for returning structs by value we copy
1260  // the sret argument into %rax/%eax (depending on ABI) for the return.
1261  // We saved the argument into a virtual register in the entry block,
1262  // so now we copy the value out and into %rax/%eax.
1263  if (F.hasStructRetAttr() && CC != CallingConv::Swift) {
1264  unsigned Reg = X86MFInfo->getSRetReturnReg();
1265  assert(Reg &&
1266  "SRetReturnReg should have been set in LowerFormalArguments()!");
1267  unsigned RetReg = Subtarget->isTarget64BitLP64() ? X86::RAX : X86::EAX;
1268  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1269  TII.get(TargetOpcode::COPY), RetReg).addReg(Reg);
1270  RetRegs.push_back(RetReg);
1271  }
1272 
1273  // Now emit the RET.
1274  MachineInstrBuilder MIB;
1275  if (X86MFInfo->getBytesToPopOnReturn()) {
1276  MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1277  TII.get(Subtarget->is64Bit() ? X86::RETIQ : X86::RETIL))
1278  .addImm(X86MFInfo->getBytesToPopOnReturn());
1279  } else {
1280  MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1281  TII.get(Subtarget->is64Bit() ? X86::RETQ : X86::RETL));
1282  }
1283  for (unsigned i = 0, e = RetRegs.size(); i != e; ++i)
1284  MIB.addReg(RetRegs[i], RegState::Implicit);
1285  return true;
1286 }
1287 
1288 /// X86SelectLoad - Select and emit code to implement load instructions.
1289 ///
1290 bool X86FastISel::X86SelectLoad(const Instruction *I) {
1291  const LoadInst *LI = cast<LoadInst>(I);
1292 
1293  // Atomic loads need special handling.
1294  if (LI->isAtomic())
1295  return false;
1296 
1297  const Value *SV = I->getOperand(0);
1298  if (TLI.supportSwiftError()) {
1299  // Swifterror values can come from either a function parameter with
1300  // swifterror attribute or an alloca with swifterror attribute.
1301  if (const Argument *Arg = dyn_cast<Argument>(SV)) {
1302  if (Arg->hasSwiftErrorAttr())
1303  return false;
1304  }
1305 
1306  if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(SV)) {
1307  if (Alloca->isSwiftError())
1308  return false;
1309  }
1310  }
1311 
1312  MVT VT;
1313  if (!isTypeLegal(LI->getType(), VT, /*AllowI1=*/true))
1314  return false;
1315 
1316  const Value *Ptr = LI->getPointerOperand();
1317 
1318  X86AddressMode AM;
1319  if (!X86SelectAddress(Ptr, AM))
1320  return false;
1321 
1322  unsigned Alignment = LI->getAlignment();
1323  unsigned ABIAlignment = DL.getABITypeAlignment(LI->getType());
1324  if (Alignment == 0) // Ensure that codegen never sees alignment 0
1325  Alignment = ABIAlignment;
1326 
1327  unsigned ResultReg = 0;
1328  if (!X86FastEmitLoad(VT, AM, createMachineMemOperandFor(LI), ResultReg,
1329  Alignment))
1330  return false;
1331 
1332  updateValueMap(I, ResultReg);
1333  return true;
1334 }
1335 
1336 static unsigned X86ChooseCmpOpcode(EVT VT, const X86Subtarget *Subtarget) {
1337  bool HasAVX512 = Subtarget->hasAVX512();
1338  bool HasAVX = Subtarget->hasAVX();
1339  bool X86ScalarSSEf32 = Subtarget->hasSSE1();
1340  bool X86ScalarSSEf64 = Subtarget->hasSSE2();
1341 
1342  switch (VT.getSimpleVT().SimpleTy) {
1343  default: return 0;
1344  case MVT::i8: return X86::CMP8rr;
1345  case MVT::i16: return X86::CMP16rr;
1346  case MVT::i32: return X86::CMP32rr;
1347  case MVT::i64: return X86::CMP64rr;
1348  case MVT::f32:
1349  return X86ScalarSSEf32
1350  ? (HasAVX512 ? X86::VUCOMISSZrr
1351  : HasAVX ? X86::VUCOMISSrr : X86::UCOMISSrr)
1352  : 0;
1353  case MVT::f64:
1354  return X86ScalarSSEf64
1355  ? (HasAVX512 ? X86::VUCOMISDZrr
1356  : HasAVX ? X86::VUCOMISDrr : X86::UCOMISDrr)
1357  : 0;
1358  }
1359 }
1360 
1361 /// If we have a comparison with RHS as the RHS of the comparison, return an
1362 /// opcode that works for the compare (e.g. CMP32ri) otherwise return 0.
1363 static unsigned X86ChooseCmpImmediateOpcode(EVT VT, const ConstantInt *RHSC) {
1364  int64_t Val = RHSC->getSExtValue();
1365  switch (VT.getSimpleVT().SimpleTy) {
1366  // Otherwise, we can't fold the immediate into this comparison.
1367  default:
1368  return 0;
1369  case MVT::i8:
1370  return X86::CMP8ri;
1371  case MVT::i16:
1372  if (isInt<8>(Val))
1373  return X86::CMP16ri8;
1374  return X86::CMP16ri;
1375  case MVT::i32:
1376  if (isInt<8>(Val))
1377  return X86::CMP32ri8;
1378  return X86::CMP32ri;
1379  case MVT::i64:
1380  if (isInt<8>(Val))
1381  return X86::CMP64ri8;
1382  // 64-bit comparisons are only valid if the immediate fits in a 32-bit sext
1383  // field.
1384  if (isInt<32>(Val))
1385  return X86::CMP64ri32;
1386  return 0;
1387  }
1388 }
1389 
1390 bool X86FastISel::X86FastEmitCompare(const Value *Op0, const Value *Op1, EVT VT,
1391  const DebugLoc &CurDbgLoc) {
1392  unsigned Op0Reg = getRegForValue(Op0);
1393  if (Op0Reg == 0) return false;
1394 
1395  // Handle 'null' like i32/i64 0.
1396  if (isa<ConstantPointerNull>(Op1))
1397  Op1 = Constant::getNullValue(DL.getIntPtrType(Op0->getContext()));
1398 
1399  // We have two options: compare with register or immediate. If the RHS of
1400  // the compare is an immediate that we can fold into this compare, use
1401  // CMPri, otherwise use CMPrr.
1402  if (const ConstantInt *Op1C = dyn_cast<ConstantInt>(Op1)) {
1403  if (unsigned CompareImmOpc = X86ChooseCmpImmediateOpcode(VT, Op1C)) {
1404  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, CurDbgLoc, TII.get(CompareImmOpc))
1405  .addReg(Op0Reg)
1406  .addImm(Op1C->getSExtValue());
1407  return true;
1408  }
1409  }
1410 
1411  unsigned CompareOpc = X86ChooseCmpOpcode(VT, Subtarget);
1412  if (CompareOpc == 0) return false;
1413 
1414  unsigned Op1Reg = getRegForValue(Op1);
1415  if (Op1Reg == 0) return false;
1416  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, CurDbgLoc, TII.get(CompareOpc))
1417  .addReg(Op0Reg)
1418  .addReg(Op1Reg);
1419 
1420  return true;
1421 }
1422 
1423 bool X86FastISel::X86SelectCmp(const Instruction *I) {
1424  const CmpInst *CI = cast<CmpInst>(I);
1425 
1426  MVT VT;
1427  if (!isTypeLegal(I->getOperand(0)->getType(), VT))
1428  return false;
1429 
1430  // Try to optimize or fold the cmp.
1431  CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
1432  unsigned ResultReg = 0;
1433  switch (Predicate) {
1434  default: break;
1435  case CmpInst::FCMP_FALSE: {
1436  ResultReg = createResultReg(&X86::GR32RegClass);
1437  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::MOV32r0),
1438  ResultReg);
1439  ResultReg = fastEmitInst_extractsubreg(MVT::i8, ResultReg, /*Kill=*/true,
1440  X86::sub_8bit);
1441  if (!ResultReg)
1442  return false;
1443  break;
1444  }
1445  case CmpInst::FCMP_TRUE: {
1446  ResultReg = createResultReg(&X86::GR8RegClass);
1447  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::MOV8ri),
1448  ResultReg).addImm(1);
1449  break;
1450  }
1451  }
1452 
1453  if (ResultReg) {
1454  updateValueMap(I, ResultReg);
1455  return true;
1456  }
1457 
1458  const Value *LHS = CI->getOperand(0);
1459  const Value *RHS = CI->getOperand(1);
1460 
1461  // The optimizer might have replaced fcmp oeq %x, %x with fcmp ord %x, 0.0.
1462  // We don't have to materialize a zero constant for this case and can just use
1463  // %x again on the RHS.
1464  if (Predicate == CmpInst::FCMP_ORD || Predicate == CmpInst::FCMP_UNO) {
1465  const auto *RHSC = dyn_cast<ConstantFP>(RHS);
1466  if (RHSC && RHSC->isNullValue())
1467  RHS = LHS;
1468  }
1469 
1470  // FCMP_OEQ and FCMP_UNE cannot be checked with a single instruction.
1471  static const uint16_t SETFOpcTable[2][3] = {
1472  { X86::COND_E, X86::COND_NP, X86::AND8rr },
1473  { X86::COND_NE, X86::COND_P, X86::OR8rr }
1474  };
1475  const uint16_t *SETFOpc = nullptr;
1476  switch (Predicate) {
1477  default: break;
1478  case CmpInst::FCMP_OEQ: SETFOpc = &SETFOpcTable[0][0]; break;
1479  case CmpInst::FCMP_UNE: SETFOpc = &SETFOpcTable[1][0]; break;
1480  }
1481 
1482  ResultReg = createResultReg(&X86::GR8RegClass);
1483  if (SETFOpc) {
1484  if (!X86FastEmitCompare(LHS, RHS, VT, I->getDebugLoc()))
1485  return false;
1486 
1487  unsigned FlagReg1 = createResultReg(&X86::GR8RegClass);
1488  unsigned FlagReg2 = createResultReg(&X86::GR8RegClass);
1489  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::SETCCr),
1490  FlagReg1).addImm(SETFOpc[0]);
1491  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::SETCCr),
1492  FlagReg2).addImm(SETFOpc[1]);
1493  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(SETFOpc[2]),
1494  ResultReg).addReg(FlagReg1).addReg(FlagReg2);
1495  updateValueMap(I, ResultReg);
1496  return true;
1497  }
1498 
1499  X86::CondCode CC;
1500  bool SwapArgs;
1501  std::tie(CC, SwapArgs) = X86::getX86ConditionCode(Predicate);
1502  assert(CC <= X86::LAST_VALID_COND && "Unexpected condition code.");
1503 
1504  if (SwapArgs)
1505  std::swap(LHS, RHS);
1506 
1507  // Emit a compare of LHS/RHS.
1508  if (!X86FastEmitCompare(LHS, RHS, VT, I->getDebugLoc()))
1509  return false;
1510 
1511  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::SETCCr),
1512  ResultReg).addImm(CC);
1513  updateValueMap(I, ResultReg);
1514  return true;
1515 }
1516 
1517 bool X86FastISel::X86SelectZExt(const Instruction *I) {
1518  EVT DstVT = TLI.getValueType(DL, I->getType());
1519  if (!TLI.isTypeLegal(DstVT))
1520  return false;
1521 
1522  unsigned ResultReg = getRegForValue(I->getOperand(0));
1523  if (ResultReg == 0)
1524  return false;
1525 
1526  // Handle zero-extension from i1 to i8, which is common.
1527  MVT SrcVT = TLI.getSimpleValueType(DL, I->getOperand(0)->getType());
1528  if (SrcVT == MVT::i1) {
1529  // Set the high bits to zero.
1530  ResultReg = fastEmitZExtFromI1(MVT::i8, ResultReg, /*TODO: Kill=*/false);
1531  SrcVT = MVT::i8;
1532 
1533  if (ResultReg == 0)
1534  return false;
1535  }
1536 
1537  if (DstVT == MVT::i64) {
1538  // Handle extension to 64-bits via sub-register shenanigans.
1539  unsigned MovInst;
1540 
1541  switch (SrcVT.SimpleTy) {
1542  case MVT::i8: MovInst = X86::MOVZX32rr8; break;
1543  case MVT::i16: MovInst = X86::MOVZX32rr16; break;
1544  case MVT::i32: MovInst = X86::MOV32rr; break;
1545  default: llvm_unreachable("Unexpected zext to i64 source type");
1546  }
1547 
1548  unsigned Result32 = createResultReg(&X86::GR32RegClass);
1549  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(MovInst), Result32)
1550  .addReg(ResultReg);
1551 
1552  ResultReg = createResultReg(&X86::GR64RegClass);
1553  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::SUBREG_TO_REG),
1554  ResultReg)
1555  .addImm(0).addReg(Result32).addImm(X86::sub_32bit);
1556  } else if (DstVT == MVT::i16) {
1557  // i8->i16 doesn't exist in the autogenerated isel table. Need to zero
1558  // extend to 32-bits and then extract down to 16-bits.
1559  unsigned Result32 = createResultReg(&X86::GR32RegClass);
1560  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::MOVZX32rr8),
1561  Result32).addReg(ResultReg);
1562 
1563  ResultReg = fastEmitInst_extractsubreg(MVT::i16, Result32, /*Kill=*/true,
1564  X86::sub_16bit);
1565  } else if (DstVT != MVT::i8) {
1566  ResultReg = fastEmit_r(MVT::i8, DstVT.getSimpleVT(), ISD::ZERO_EXTEND,
1567  ResultReg, /*Kill=*/true);
1568  if (ResultReg == 0)
1569  return false;
1570  }
1571 
1572  updateValueMap(I, ResultReg);
1573  return true;
1574 }
1575 
1576 bool X86FastISel::X86SelectSExt(const Instruction *I) {
1577  EVT DstVT = TLI.getValueType(DL, I->getType());
1578  if (!TLI.isTypeLegal(DstVT))
1579  return false;
1580 
1581  unsigned ResultReg = getRegForValue(I->getOperand(0));
1582  if (ResultReg == 0)
1583  return false;
1584 
1585  // Handle sign-extension from i1 to i8.
1586  MVT SrcVT = TLI.getSimpleValueType(DL, I->getOperand(0)->getType());
1587  if (SrcVT == MVT::i1) {
1588  // Set the high bits to zero.
1589  unsigned ZExtReg = fastEmitZExtFromI1(MVT::i8, ResultReg,
1590  /*TODO: Kill=*/false);
1591  if (ZExtReg == 0)
1592  return false;
1593 
1594  // Negate the result to make an 8-bit sign extended value.
1595  ResultReg = createResultReg(&X86::GR8RegClass);
1596  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::NEG8r),
1597  ResultReg).addReg(ZExtReg);
1598 
1599  SrcVT = MVT::i8;
1600  }
1601 
1602  if (DstVT == MVT::i16) {
1603  // i8->i16 doesn't exist in the autogenerated isel table. Need to sign
1604  // extend to 32-bits and then extract down to 16-bits.
1605  unsigned Result32 = createResultReg(&X86::GR32RegClass);
1606  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::MOVSX32rr8),
1607  Result32).addReg(ResultReg);
1608 
1609  ResultReg = fastEmitInst_extractsubreg(MVT::i16, Result32, /*Kill=*/true,
1610  X86::sub_16bit);
1611  } else if (DstVT != MVT::i8) {
1612  ResultReg = fastEmit_r(MVT::i8, DstVT.getSimpleVT(), ISD::SIGN_EXTEND,
1613  ResultReg, /*Kill=*/true);
1614  if (ResultReg == 0)
1615  return false;
1616  }
1617 
1618  updateValueMap(I, ResultReg);
1619  return true;
1620 }
1621 
1622 bool X86FastISel::X86SelectBranch(const Instruction *I) {
1623  // Unconditional branches are selected by tablegen-generated code.
1624  // Handle a conditional branch.
1625  const BranchInst *BI = cast<BranchInst>(I);
1626  MachineBasicBlock *TrueMBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
1627  MachineBasicBlock *FalseMBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
1628 
1629  // Fold the common case of a conditional branch with a comparison
1630  // in the same block (values defined on other blocks may not have
1631  // initialized registers).
1632  X86::CondCode CC;
1633  if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) {
1634  if (CI->hasOneUse() && CI->getParent() == I->getParent()) {
1635  EVT VT = TLI.getValueType(DL, CI->getOperand(0)->getType());
1636 
1637  // Try to optimize or fold the cmp.
1638  CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
1639  switch (Predicate) {
1640  default: break;
1641  case CmpInst::FCMP_FALSE: fastEmitBranch(FalseMBB, DbgLoc); return true;
1642  case CmpInst::FCMP_TRUE: fastEmitBranch(TrueMBB, DbgLoc); return true;
1643  }
1644 
1645  const Value *CmpLHS = CI->getOperand(0);
1646  const Value *CmpRHS = CI->getOperand(1);
1647 
1648  // The optimizer might have replaced fcmp oeq %x, %x with fcmp ord %x,
1649  // 0.0.
1650  // We don't have to materialize a zero constant for this case and can just
1651  // use %x again on the RHS.
1652  if (Predicate == CmpInst::FCMP_ORD || Predicate == CmpInst::FCMP_UNO) {
1653  const auto *CmpRHSC = dyn_cast<ConstantFP>(CmpRHS);
1654  if (CmpRHSC && CmpRHSC->isNullValue())
1655  CmpRHS = CmpLHS;
1656  }
1657 
1658  // Try to take advantage of fallthrough opportunities.
1659  if (FuncInfo.MBB->isLayoutSuccessor(TrueMBB)) {
1660  std::swap(TrueMBB, FalseMBB);
1661  Predicate = CmpInst::getInversePredicate(Predicate);
1662  }
1663 
1664  // FCMP_OEQ and FCMP_UNE cannot be expressed with a single flag/condition
1665  // code check. Instead two branch instructions are required to check all
1666  // the flags. First we change the predicate to a supported condition code,
1667  // which will be the first branch. Later one we will emit the second
1668  // branch.
1669  bool NeedExtraBranch = false;
1670  switch (Predicate) {
1671  default: break;
1672  case CmpInst::FCMP_OEQ:
1673  std::swap(TrueMBB, FalseMBB);
1675  case CmpInst::FCMP_UNE:
1676  NeedExtraBranch = true;
1677  Predicate = CmpInst::FCMP_ONE;
1678  break;
1679  }
1680 
1681  bool SwapArgs;
1682  std::tie(CC, SwapArgs) = X86::getX86ConditionCode(Predicate);
1683  assert(CC <= X86::LAST_VALID_COND && "Unexpected condition code.");
1684 
1685  if (SwapArgs)
1686  std::swap(CmpLHS, CmpRHS);
1687 
1688  // Emit a compare of the LHS and RHS, setting the flags.
1689  if (!X86FastEmitCompare(CmpLHS, CmpRHS, VT, CI->getDebugLoc()))
1690  return false;
1691 
1692  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::JCC_1))
1693  .addMBB(TrueMBB).addImm(CC);
1694 
1695  // X86 requires a second branch to handle UNE (and OEQ, which is mapped
1696  // to UNE above).
1697  if (NeedExtraBranch) {
1698  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::JCC_1))
1699  .addMBB(TrueMBB).addImm(X86::COND_P);
1700  }
1701 
1702  finishCondBranch(BI->getParent(), TrueMBB, FalseMBB);
1703  return true;
1704  }
1705  } else if (TruncInst *TI = dyn_cast<TruncInst>(BI->getCondition())) {
1706  // Handle things like "%cond = trunc i32 %X to i1 / br i1 %cond", which
1707  // typically happen for _Bool and C++ bools.
1708  MVT SourceVT;
1709  if (TI->hasOneUse() && TI->getParent() == I->getParent() &&
1710  isTypeLegal(TI->getOperand(0)->getType(), SourceVT)) {
1711  unsigned TestOpc = 0;
1712  switch (SourceVT.SimpleTy) {
1713  default: break;
1714  case MVT::i8: TestOpc = X86::TEST8ri; break;
1715  case MVT::i16: TestOpc = X86::TEST16ri; break;
1716  case MVT::i32: TestOpc = X86::TEST32ri; break;
1717  case MVT::i64: TestOpc = X86::TEST64ri32; break;
1718  }
1719  if (TestOpc) {
1720  unsigned OpReg = getRegForValue(TI->getOperand(0));
1721  if (OpReg == 0) return false;
1722 
1723  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TestOpc))
1724  .addReg(OpReg).addImm(1);
1725 
1726  unsigned JmpCond = X86::COND_NE;
1727  if (FuncInfo.MBB->isLayoutSuccessor(TrueMBB)) {
1728  std::swap(TrueMBB, FalseMBB);
1729  JmpCond = X86::COND_E;
1730  }
1731 
1732  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::JCC_1))
1733  .addMBB(TrueMBB).addImm(JmpCond);
1734 
1735  finishCondBranch(BI->getParent(), TrueMBB, FalseMBB);
1736  return true;
1737  }
1738  }
1739  } else if (foldX86XALUIntrinsic(CC, BI, BI->getCondition())) {
1740  // Fake request the condition, otherwise the intrinsic might be completely
1741  // optimized away.
1742  unsigned TmpReg = getRegForValue(BI->getCondition());
1743  if (TmpReg == 0)
1744  return false;
1745 
1746  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::JCC_1))
1747  .addMBB(TrueMBB).addImm(CC);
1748  finishCondBranch(BI->getParent(), TrueMBB, FalseMBB);
1749  return true;
1750  }
1751 
1752  // Otherwise do a clumsy setcc and re-test it.
1753  // Note that i1 essentially gets ANY_EXTEND'ed to i8 where it isn't used
1754  // in an explicit cast, so make sure to handle that correctly.
1755  unsigned OpReg = getRegForValue(BI->getCondition());
1756  if (OpReg == 0) return false;
1757 
1758  // In case OpReg is a K register, COPY to a GPR
1759  if (MRI.getRegClass(OpReg) == &X86::VK1RegClass) {
1760  unsigned KOpReg = OpReg;
1761  OpReg = createResultReg(&X86::GR32RegClass);
1762  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1763  TII.get(TargetOpcode::COPY), OpReg)
1764  .addReg(KOpReg);
1765  OpReg = fastEmitInst_extractsubreg(MVT::i8, OpReg, /*Kill=*/true,
1766  X86::sub_8bit);
1767  }
1768  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::TEST8ri))
1769  .addReg(OpReg)
1770  .addImm(1);
1771  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::JCC_1))
1772  .addMBB(TrueMBB).addImm(X86::COND_NE);
1773  finishCondBranch(BI->getParent(), TrueMBB, FalseMBB);
1774  return true;
1775 }
1776 
1777 bool X86FastISel::X86SelectShift(const Instruction *I) {
1778  unsigned CReg = 0, OpReg = 0;
1779  const TargetRegisterClass *RC = nullptr;
1780  if (I->getType()->isIntegerTy(8)) {
1781  CReg = X86::CL;
1782  RC = &X86::GR8RegClass;
1783  switch (I->getOpcode()) {
1784  case Instruction::LShr: OpReg = X86::SHR8rCL; break;
1785  case Instruction::AShr: OpReg = X86::SAR8rCL; break;
1786  case Instruction::Shl: OpReg = X86::SHL8rCL; break;
1787  default: return false;
1788  }
1789  } else if (I->getType()->isIntegerTy(16)) {
1790  CReg = X86::CX;
1791  RC = &X86::GR16RegClass;
1792  switch (I->getOpcode()) {
1793  default: llvm_unreachable("Unexpected shift opcode");
1794  case Instruction::LShr: OpReg = X86::SHR16rCL; break;
1795  case Instruction::AShr: OpReg = X86::SAR16rCL; break;
1796  case Instruction::Shl: OpReg = X86::SHL16rCL; break;
1797  }
1798  } else if (I->getType()->isIntegerTy(32)) {
1799  CReg = X86::ECX;
1800  RC = &X86::GR32RegClass;
1801  switch (I->getOpcode()) {
1802  default: llvm_unreachable("Unexpected shift opcode");
1803  case Instruction::LShr: OpReg = X86::SHR32rCL; break;
1804  case Instruction::AShr: OpReg = X86::SAR32rCL; break;
1805  case Instruction::Shl: OpReg = X86::SHL32rCL; break;
1806  }
1807  } else if (I->getType()->isIntegerTy(64)) {
1808  CReg = X86::RCX;
1809  RC = &X86::GR64RegClass;
1810  switch (I->getOpcode()) {
1811  default: llvm_unreachable("Unexpected shift opcode");
1812  case Instruction::LShr: OpReg = X86::SHR64rCL; break;
1813  case Instruction::AShr: OpReg = X86::SAR64rCL; break;
1814  case Instruction::Shl: OpReg = X86::SHL64rCL; break;
1815  }
1816  } else {
1817  return false;
1818  }
1819 
1820  MVT VT;
1821  if (!isTypeLegal(I->getType(), VT))
1822  return false;
1823 
1824  unsigned Op0Reg = getRegForValue(I->getOperand(0));
1825  if (Op0Reg == 0) return false;
1826 
1827  unsigned Op1Reg = getRegForValue(I->getOperand(1));
1828  if (Op1Reg == 0) return false;
1829  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY),
1830  CReg).addReg(Op1Reg);
1831 
1832  // The shift instruction uses X86::CL. If we defined a super-register
1833  // of X86::CL, emit a subreg KILL to precisely describe what we're doing here.
1834  if (CReg != X86::CL)
1835  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1836  TII.get(TargetOpcode::KILL), X86::CL)
1837  .addReg(CReg, RegState::Kill);
1838 
1839  unsigned ResultReg = createResultReg(RC);
1840  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(OpReg), ResultReg)
1841  .addReg(Op0Reg);
1842  updateValueMap(I, ResultReg);
1843  return true;
1844 }
1845 
1846 bool X86FastISel::X86SelectDivRem(const Instruction *I) {
1847  const static unsigned NumTypes = 4; // i8, i16, i32, i64
1848  const static unsigned NumOps = 4; // SDiv, SRem, UDiv, URem
1849  const static bool S = true; // IsSigned
1850  const static bool U = false; // !IsSigned
1851  const static unsigned Copy = TargetOpcode::COPY;
1852  // For the X86 DIV/IDIV instruction, in most cases the dividend
1853  // (numerator) must be in a specific register pair highreg:lowreg,
1854  // producing the quotient in lowreg and the remainder in highreg.
1855  // For most data types, to set up the instruction, the dividend is
1856  // copied into lowreg, and lowreg is sign-extended or zero-extended
1857  // into highreg. The exception is i8, where the dividend is defined
1858  // as a single register rather than a register pair, and we
1859  // therefore directly sign-extend or zero-extend the dividend into
1860  // lowreg, instead of copying, and ignore the highreg.
1861  const static struct DivRemEntry {
1862  // The following portion depends only on the data type.
1863  const TargetRegisterClass *RC;
1864  unsigned LowInReg; // low part of the register pair
1865  unsigned HighInReg; // high part of the register pair
1866  // The following portion depends on both the data type and the operation.
1867  struct DivRemResult {
1868  unsigned OpDivRem; // The specific DIV/IDIV opcode to use.
1869  unsigned OpSignExtend; // Opcode for sign-extending lowreg into
1870  // highreg, or copying a zero into highreg.
1871  unsigned OpCopy; // Opcode for copying dividend into lowreg, or
1872  // zero/sign-extending into lowreg for i8.
1873  unsigned DivRemResultReg; // Register containing the desired result.
1874  bool IsOpSigned; // Whether to use signed or unsigned form.
1875  } ResultTable[NumOps];
1876  } OpTable[NumTypes] = {
1877  { &X86::GR8RegClass, X86::AX, 0, {
1878  { X86::IDIV8r, 0, X86::MOVSX16rr8, X86::AL, S }, // SDiv
1879  { X86::IDIV8r, 0, X86::MOVSX16rr8, X86::AH, S }, // SRem
1880  { X86::DIV8r, 0, X86::MOVZX16rr8, X86::AL, U }, // UDiv
1881  { X86::DIV8r, 0, X86::MOVZX16rr8, X86::AH, U }, // URem
1882  }
1883  }, // i8
1884  { &X86::GR16RegClass, X86::AX, X86::DX, {
1885  { X86::IDIV16r, X86::CWD, Copy, X86::AX, S }, // SDiv
1886  { X86::IDIV16r, X86::CWD, Copy, X86::DX, S }, // SRem
1887  { X86::DIV16r, X86::MOV32r0, Copy, X86::AX, U }, // UDiv
1888  { X86::DIV16r, X86::MOV32r0, Copy, X86::DX, U }, // URem
1889  }
1890  }, // i16
1891  { &X86::GR32RegClass, X86::EAX, X86::EDX, {
1892  { X86::IDIV32r, X86::CDQ, Copy, X86::EAX, S }, // SDiv
1893  { X86::IDIV32r, X86::CDQ, Copy, X86::EDX, S }, // SRem
1894  { X86::DIV32r, X86::MOV32r0, Copy, X86::EAX, U }, // UDiv
1895  { X86::DIV32r, X86::MOV32r0, Copy, X86::EDX, U }, // URem
1896  }
1897  }, // i32
1898  { &X86::GR64RegClass, X86::RAX, X86::RDX, {
1899  { X86::IDIV64r, X86::CQO, Copy, X86::RAX, S }, // SDiv
1900  { X86::IDIV64r, X86::CQO, Copy, X86::RDX, S }, // SRem
1901  { X86::DIV64r, X86::MOV32r0, Copy, X86::RAX, U }, // UDiv
1902  { X86::DIV64r, X86::MOV32r0, Copy, X86::RDX, U }, // URem
1903  }
1904  }, // i64
1905  };
1906 
1907  MVT VT;
1908  if (!isTypeLegal(I->getType(), VT))
1909  return false;
1910 
1911  unsigned TypeIndex, OpIndex;
1912  switch (VT.SimpleTy) {
1913  default: return false;
1914  case MVT::i8: TypeIndex = 0; break;
1915  case MVT::i16: TypeIndex = 1; break;
1916  case MVT::i32: TypeIndex = 2; break;
1917  case MVT::i64: TypeIndex = 3;
1918  if (!Subtarget->is64Bit())
1919  return false;
1920  break;
1921  }
1922 
1923  switch (I->getOpcode()) {
1924  default: llvm_unreachable("Unexpected div/rem opcode");
1925  case Instruction::SDiv: OpIndex = 0; break;
1926  case Instruction::SRem: OpIndex = 1; break;
1927  case Instruction::UDiv: OpIndex = 2; break;
1928  case Instruction::URem: OpIndex = 3; break;
1929  }
1930 
1931  const DivRemEntry &TypeEntry = OpTable[TypeIndex];
1932  const DivRemEntry::DivRemResult &OpEntry = TypeEntry.ResultTable[OpIndex];
1933  unsigned Op0Reg = getRegForValue(I->getOperand(0));
1934  if (Op0Reg == 0)
1935  return false;
1936  unsigned Op1Reg = getRegForValue(I->getOperand(1));
1937  if (Op1Reg == 0)
1938  return false;
1939 
1940  // Move op0 into low-order input register.
1941  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1942  TII.get(OpEntry.OpCopy), TypeEntry.LowInReg).addReg(Op0Reg);
1943  // Zero-extend or sign-extend into high-order input register.
1944  if (OpEntry.OpSignExtend) {
1945  if (OpEntry.IsOpSigned)
1946  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1947  TII.get(OpEntry.OpSignExtend));
1948  else {
1949  unsigned Zero32 = createResultReg(&X86::GR32RegClass);
1950  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1951  TII.get(X86::MOV32r0), Zero32);
1952 
1953  // Copy the zero into the appropriate sub/super/identical physical
1954  // register. Unfortunately the operations needed are not uniform enough
1955  // to fit neatly into the table above.
1956  if (VT == MVT::i16) {
1957  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1958  TII.get(Copy), TypeEntry.HighInReg)
1959  .addReg(Zero32, 0, X86::sub_16bit);
1960  } else if (VT == MVT::i32) {
1961  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1962  TII.get(Copy), TypeEntry.HighInReg)
1963  .addReg(Zero32);
1964  } else if (VT == MVT::i64) {
1965  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1966  TII.get(TargetOpcode::SUBREG_TO_REG), TypeEntry.HighInReg)
1967  .addImm(0).addReg(Zero32).addImm(X86::sub_32bit);
1968  }
1969  }
1970  }
1971  // Generate the DIV/IDIV instruction.
1972  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1973  TII.get(OpEntry.OpDivRem)).addReg(Op1Reg);
1974  // For i8 remainder, we can't reference ah directly, as we'll end
1975  // up with bogus copies like %r9b = COPY %ah. Reference ax
1976  // instead to prevent ah references in a rex instruction.
1977  //
1978  // The current assumption of the fast register allocator is that isel
1979  // won't generate explicit references to the GR8_NOREX registers. If
1980  // the allocator and/or the backend get enhanced to be more robust in
1981  // that regard, this can be, and should be, removed.
1982  unsigned ResultReg = 0;
1983  if ((I->getOpcode() == Instruction::SRem ||
1984  I->getOpcode() == Instruction::URem) &&
1985  OpEntry.DivRemResultReg == X86::AH && Subtarget->is64Bit()) {
1986  unsigned SourceSuperReg = createResultReg(&X86::GR16RegClass);
1987  unsigned ResultSuperReg = createResultReg(&X86::GR16RegClass);
1988  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1989  TII.get(Copy), SourceSuperReg).addReg(X86::AX);
1990 
1991  // Shift AX right by 8 bits instead of using AH.
1992  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::SHR16ri),
1993  ResultSuperReg).addReg(SourceSuperReg).addImm(8);
1994 
1995  // Now reference the 8-bit subreg of the result.
1996  ResultReg = fastEmitInst_extractsubreg(MVT::i8, ResultSuperReg,
1997  /*Kill=*/true, X86::sub_8bit);
1998  }
1999  // Copy the result out of the physreg if we haven't already.
2000  if (!ResultReg) {
2001  ResultReg = createResultReg(TypeEntry.RC);
2002  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Copy), ResultReg)
2003  .addReg(OpEntry.DivRemResultReg);
2004  }
2005  updateValueMap(I, ResultReg);
2006 
2007  return true;
2008 }
2009 
2010 /// Emit a conditional move instruction (if the are supported) to lower
2011 /// the select.
2012 bool X86FastISel::X86FastEmitCMoveSelect(MVT RetVT, const Instruction *I) {
2013  // Check if the subtarget supports these instructions.
2014  if (!Subtarget->hasCMov())
2015  return false;
2016 
2017  // FIXME: Add support for i8.
2018  if (RetVT < MVT::i16 || RetVT > MVT::i64)
2019  return false;
2020 
2021  const Value *Cond = I->getOperand(0);
2022  const TargetRegisterClass *RC = TLI.getRegClassFor(RetVT);
2023  bool NeedTest = true;
2025 
2026  // Optimize conditions coming from a compare if both instructions are in the
2027  // same basic block (values defined in other basic blocks may not have
2028  // initialized registers).
2029  const auto *CI = dyn_cast<CmpInst>(Cond);
2030  if (CI && (CI->getParent() == I->getParent())) {
2031  CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
2032 
2033  // FCMP_OEQ and FCMP_UNE cannot be checked with a single instruction.
2034  static const uint16_t SETFOpcTable[2][3] = {
2035  { X86::COND_NP, X86::COND_E, X86::TEST8rr },
2036  { X86::COND_P, X86::COND_NE, X86::OR8rr }
2037  };
2038  const uint16_t *SETFOpc = nullptr;
2039  switch (Predicate) {
2040  default: break;
2041  case CmpInst::FCMP_OEQ:
2042  SETFOpc = &SETFOpcTable[0][0];
2043  Predicate = CmpInst::ICMP_NE;
2044  break;
2045  case CmpInst::FCMP_UNE:
2046  SETFOpc = &SETFOpcTable[1][0];
2047  Predicate = CmpInst::ICMP_NE;
2048  break;
2049  }
2050 
2051  bool NeedSwap;
2052  std::tie(CC, NeedSwap) = X86::getX86ConditionCode(Predicate);
2053  assert(CC <= X86::LAST_VALID_COND && "Unexpected condition code.");
2054 
2055  const Value *CmpLHS = CI->getOperand(0);
2056  const Value *CmpRHS = CI->getOperand(1);
2057  if (NeedSwap)
2058  std::swap(CmpLHS, CmpRHS);
2059 
2060  EVT CmpVT = TLI.getValueType(DL, CmpLHS->getType());
2061  // Emit a compare of the LHS and RHS, setting the flags.
2062  if (!X86FastEmitCompare(CmpLHS, CmpRHS, CmpVT, CI->getDebugLoc()))
2063  return false;
2064 
2065  if (SETFOpc) {
2066  unsigned FlagReg1 = createResultReg(&X86::GR8RegClass);
2067  unsigned FlagReg2 = createResultReg(&X86::GR8RegClass);
2068  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::SETCCr),
2069  FlagReg1).addImm(SETFOpc[0]);
2070  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::SETCCr),
2071  FlagReg2).addImm(SETFOpc[1]);
2072  auto const &II = TII.get(SETFOpc[2]);
2073  if (II.getNumDefs()) {
2074  unsigned TmpReg = createResultReg(&X86::GR8RegClass);
2075  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, TmpReg)
2076  .addReg(FlagReg2).addReg(FlagReg1);
2077  } else {
2078  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
2079  .addReg(FlagReg2).addReg(FlagReg1);
2080  }
2081  }
2082  NeedTest = false;
2083  } else if (foldX86XALUIntrinsic(CC, I, Cond)) {
2084  // Fake request the condition, otherwise the intrinsic might be completely
2085  // optimized away.
2086  unsigned TmpReg = getRegForValue(Cond);
2087  if (TmpReg == 0)
2088  return false;
2089 
2090  NeedTest = false;
2091  }
2092 
2093  if (NeedTest) {
2094  // Selects operate on i1, however, CondReg is 8 bits width and may contain
2095  // garbage. Indeed, only the less significant bit is supposed to be
2096  // accurate. If we read more than the lsb, we may see non-zero values
2097  // whereas lsb is zero. Therefore, we have to truncate Op0Reg to i1 for
2098  // the select. This is achieved by performing TEST against 1.
2099  unsigned CondReg = getRegForValue(Cond);
2100  if (CondReg == 0)
2101  return false;
2102  bool CondIsKill = hasTrivialKill(Cond);
2103 
2104  // In case OpReg is a K register, COPY to a GPR
2105  if (MRI.getRegClass(CondReg) == &X86::VK1RegClass) {
2106  unsigned KCondReg = CondReg;
2107  CondReg = createResultReg(&X86::GR32RegClass);
2108  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2109  TII.get(TargetOpcode::COPY), CondReg)
2110  .addReg(KCondReg, getKillRegState(CondIsKill));
2111  CondReg = fastEmitInst_extractsubreg(MVT::i8, CondReg, /*Kill=*/true,
2112  X86::sub_8bit);
2113  }
2114  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::TEST8ri))
2115  .addReg(CondReg, getKillRegState(CondIsKill))
2116  .addImm(1);
2117  }
2118 
2119  const Value *LHS = I->getOperand(1);
2120  const Value *RHS = I->getOperand(2);
2121 
2122  unsigned RHSReg = getRegForValue(RHS);
2123  bool RHSIsKill = hasTrivialKill(RHS);
2124 
2125  unsigned LHSReg = getRegForValue(LHS);
2126  bool LHSIsKill = hasTrivialKill(LHS);
2127 
2128  if (!LHSReg || !RHSReg)
2129  return false;
2130 
2131  const TargetRegisterInfo &TRI = *Subtarget->getRegisterInfo();
2132  unsigned Opc = X86::getCMovOpcode(TRI.getRegSizeInBits(*RC)/8);
2133  unsigned ResultReg = fastEmitInst_rri(Opc, RC, RHSReg, RHSIsKill,
2134  LHSReg, LHSIsKill, CC);
2135  updateValueMap(I, ResultReg);
2136  return true;
2137 }
2138 
2139 /// Emit SSE or AVX instructions to lower the select.
2140 ///
2141 /// Try to use SSE1/SSE2 instructions to simulate a select without branches.
2142 /// This lowers fp selects into a CMP/AND/ANDN/OR sequence when the necessary
2143 /// SSE instructions are available. If AVX is available, try to use a VBLENDV.
2144 bool X86FastISel::X86FastEmitSSESelect(MVT RetVT, const Instruction *I) {
2145  // Optimize conditions coming from a compare if both instructions are in the
2146  // same basic block (values defined in other basic blocks may not have
2147  // initialized registers).
2148  const auto *CI = dyn_cast<FCmpInst>(I->getOperand(0));
2149  if (!CI || (CI->getParent() != I->getParent()))
2150  return false;
2151 
2152  if (I->getType() != CI->getOperand(0)->getType() ||
2153  !((Subtarget->hasSSE1() && RetVT == MVT::f32) ||
2154  (Subtarget->hasSSE2() && RetVT == MVT::f64)))
2155  return false;
2156 
2157  const Value *CmpLHS = CI->getOperand(0);
2158  const Value *CmpRHS = CI->getOperand(1);
2159  CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
2160 
2161  // The optimizer might have replaced fcmp oeq %x, %x with fcmp ord %x, 0.0.
2162  // We don't have to materialize a zero constant for this case and can just use
2163  // %x again on the RHS.
2164  if (Predicate == CmpInst::FCMP_ORD || Predicate == CmpInst::FCMP_UNO) {
2165  const auto *CmpRHSC = dyn_cast<ConstantFP>(CmpRHS);
2166  if (CmpRHSC && CmpRHSC->isNullValue())
2167  CmpRHS = CmpLHS;
2168  }
2169 
2170  unsigned CC;
2171  bool NeedSwap;
2172  std::tie(CC, NeedSwap) = getX86SSEConditionCode(Predicate);
2173  if (CC > 7 && !Subtarget->hasAVX())
2174  return false;
2175 
2176  if (NeedSwap)
2177  std::swap(CmpLHS, CmpRHS);
2178 
2179  const Value *LHS = I->getOperand(1);
2180  const Value *RHS = I->getOperand(2);
2181 
2182  unsigned LHSReg = getRegForValue(LHS);
2183  bool LHSIsKill = hasTrivialKill(LHS);
2184 
2185  unsigned RHSReg = getRegForValue(RHS);
2186  bool RHSIsKill = hasTrivialKill(RHS);
2187 
2188  unsigned CmpLHSReg = getRegForValue(CmpLHS);
2189  bool CmpLHSIsKill = hasTrivialKill(CmpLHS);
2190 
2191  unsigned CmpRHSReg = getRegForValue(CmpRHS);
2192  bool CmpRHSIsKill = hasTrivialKill(CmpRHS);
2193 
2194  if (!LHSReg || !RHSReg || !CmpLHS || !CmpRHS)
2195  return false;
2196 
2197  const TargetRegisterClass *RC = TLI.getRegClassFor(RetVT);
2198  unsigned ResultReg;
2199 
2200  if (Subtarget->hasAVX512()) {
2201  // If we have AVX512 we can use a mask compare and masked movss/sd.
2202  const TargetRegisterClass *VR128X = &X86::VR128XRegClass;
2203  const TargetRegisterClass *VK1 = &X86::VK1RegClass;
2204 
2205  unsigned CmpOpcode =
2206  (RetVT == MVT::f32) ? X86::VCMPSSZrr : X86::VCMPSDZrr;
2207  unsigned CmpReg = fastEmitInst_rri(CmpOpcode, VK1, CmpLHSReg, CmpLHSIsKill,
2208  CmpRHSReg, CmpRHSIsKill, CC);
2209 
2210  // Need an IMPLICIT_DEF for the input that is used to generate the upper
2211  // bits of the result register since its not based on any of the inputs.
2212  unsigned ImplicitDefReg = createResultReg(VR128X);
2213  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2214  TII.get(TargetOpcode::IMPLICIT_DEF), ImplicitDefReg);
2215 
2216  // Place RHSReg is the passthru of the masked movss/sd operation and put
2217  // LHS in the input. The mask input comes from the compare.
2218  unsigned MovOpcode =
2219  (RetVT == MVT::f32) ? X86::VMOVSSZrrk : X86::VMOVSDZrrk;
2220  unsigned MovReg = fastEmitInst_rrrr(MovOpcode, VR128X, RHSReg, RHSIsKill,
2221  CmpReg, true, ImplicitDefReg, true,
2222  LHSReg, LHSIsKill);
2223 
2224  ResultReg = createResultReg(RC);
2225  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2226  TII.get(TargetOpcode::COPY), ResultReg).addReg(MovReg);
2227 
2228  } else if (Subtarget->hasAVX()) {
2229  const TargetRegisterClass *VR128 = &X86::VR128RegClass;
2230 
2231  // If we have AVX, create 1 blendv instead of 3 logic instructions.
2232  // Blendv was introduced with SSE 4.1, but the 2 register form implicitly
2233  // uses XMM0 as the selection register. That may need just as many
2234  // instructions as the AND/ANDN/OR sequence due to register moves, so
2235  // don't bother.
2236  unsigned CmpOpcode =
2237  (RetVT == MVT::f32) ? X86::VCMPSSrr : X86::VCMPSDrr;
2238  unsigned BlendOpcode =
2239  (RetVT == MVT::f32) ? X86::VBLENDVPSrr : X86::VBLENDVPDrr;
2240 
2241  unsigned CmpReg = fastEmitInst_rri(CmpOpcode, RC, CmpLHSReg, CmpLHSIsKill,
2242  CmpRHSReg, CmpRHSIsKill, CC);
2243  unsigned VBlendReg = fastEmitInst_rrr(BlendOpcode, VR128, RHSReg, RHSIsKill,
2244  LHSReg, LHSIsKill, CmpReg, true);
2245  ResultReg = createResultReg(RC);
2246  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2247  TII.get(TargetOpcode::COPY), ResultReg).addReg(VBlendReg);
2248  } else {
2249  // Choose the SSE instruction sequence based on data type (float or double).
2250  static const uint16_t OpcTable[2][4] = {
2251  { X86::CMPSSrr, X86::ANDPSrr, X86::ANDNPSrr, X86::ORPSrr },
2252  { X86::CMPSDrr, X86::ANDPDrr, X86::ANDNPDrr, X86::ORPDrr }
2253  };
2254 
2255  const uint16_t *Opc = nullptr;
2256  switch (RetVT.SimpleTy) {
2257  default: return false;
2258  case MVT::f32: Opc = &OpcTable[0][0]; break;
2259  case MVT::f64: Opc = &OpcTable[1][0]; break;
2260  }
2261 
2262  const TargetRegisterClass *VR128 = &X86::VR128RegClass;
2263  unsigned CmpReg = fastEmitInst_rri(Opc[0], RC, CmpLHSReg, CmpLHSIsKill,
2264  CmpRHSReg, CmpRHSIsKill, CC);
2265  unsigned AndReg = fastEmitInst_rr(Opc[1], VR128, CmpReg, /*IsKill=*/false,
2266  LHSReg, LHSIsKill);
2267  unsigned AndNReg = fastEmitInst_rr(Opc[2], VR128, CmpReg, /*IsKill=*/true,
2268  RHSReg, RHSIsKill);
2269  unsigned OrReg = fastEmitInst_rr(Opc[3], VR128, AndNReg, /*IsKill=*/true,
2270  AndReg, /*IsKill=*/true);
2271  ResultReg = createResultReg(RC);
2272  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2273  TII.get(TargetOpcode::COPY), ResultReg).addReg(OrReg);
2274  }
2275  updateValueMap(I, ResultReg);
2276  return true;
2277 }
2278 
2279 bool X86FastISel::X86FastEmitPseudoSelect(MVT RetVT, const Instruction *I) {
2280  // These are pseudo CMOV instructions and will be later expanded into control-
2281  // flow.
2282  unsigned Opc;
2283  switch (RetVT.SimpleTy) {
2284  default: return false;
2285  case MVT::i8: Opc = X86::CMOV_GR8; break;
2286  case MVT::i16: Opc = X86::CMOV_GR16; break;
2287  case MVT::i32: Opc = X86::CMOV_GR32; break;
2288  case MVT::f32: Opc = Subtarget->hasAVX512() ? X86::CMOV_FR32X
2289  : X86::CMOV_FR32; break;
2290  case MVT::f64: Opc = Subtarget->hasAVX512() ? X86::CMOV_FR64X
2291  : X86::CMOV_FR64; break;
2292  }
2293 
2294  const Value *Cond = I->getOperand(0);
2296 
2297  // Optimize conditions coming from a compare if both instructions are in the
2298  // same basic block (values defined in other basic blocks may not have
2299  // initialized registers).
2300  const auto *CI = dyn_cast<CmpInst>(Cond);
2301  if (CI && (CI->getParent() == I->getParent())) {
2302  bool NeedSwap;
2303  std::tie(CC, NeedSwap) = X86::getX86ConditionCode(CI->getPredicate());
2304  if (CC > X86::LAST_VALID_COND)
2305  return false;
2306 
2307  const Value *CmpLHS = CI->getOperand(0);
2308  const Value *CmpRHS = CI->getOperand(1);
2309 
2310  if (NeedSwap)
2311  std::swap(CmpLHS, CmpRHS);
2312 
2313  EVT CmpVT = TLI.getValueType(DL, CmpLHS->getType());
2314  if (!X86FastEmitCompare(CmpLHS, CmpRHS, CmpVT, CI->getDebugLoc()))
2315  return false;
2316  } else {
2317  unsigned CondReg = getRegForValue(Cond);
2318  if (CondReg == 0)
2319  return false;
2320  bool CondIsKill = hasTrivialKill(Cond);
2321 
2322  // In case OpReg is a K register, COPY to a GPR
2323  if (MRI.getRegClass(CondReg) == &X86::VK1RegClass) {
2324  unsigned KCondReg = CondReg;
2325  CondReg = createResultReg(&X86::GR32RegClass);
2326  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2327  TII.get(TargetOpcode::COPY), CondReg)
2328  .addReg(KCondReg, getKillRegState(CondIsKill));
2329  CondReg = fastEmitInst_extractsubreg(MVT::i8, CondReg, /*Kill=*/true,
2330  X86::sub_8bit);
2331  }
2332  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::TEST8ri))
2333  .addReg(CondReg, getKillRegState(CondIsKill))
2334  .addImm(1);
2335  }
2336 
2337  const Value *LHS = I->getOperand(1);
2338  const Value *RHS = I->getOperand(2);
2339 
2340  unsigned LHSReg = getRegForValue(LHS);
2341  bool LHSIsKill = hasTrivialKill(LHS);
2342 
2343  unsigned RHSReg = getRegForValue(RHS);
2344  bool RHSIsKill = hasTrivialKill(RHS);
2345 
2346  if (!LHSReg || !RHSReg)
2347  return false;
2348 
2349  const TargetRegisterClass *RC = TLI.getRegClassFor(RetVT);
2350 
2351  unsigned ResultReg =
2352  fastEmitInst_rri(Opc, RC, RHSReg, RHSIsKill, LHSReg, LHSIsKill, CC);
2353  updateValueMap(I, ResultReg);
2354  return true;
2355 }
2356 
2357 bool X86FastISel::X86SelectSelect(const Instruction *I) {
2358  MVT RetVT;
2359  if (!isTypeLegal(I->getType(), RetVT))
2360  return false;
2361 
2362  // Check if we can fold the select.
2363  if (const auto *CI = dyn_cast<CmpInst>(I->getOperand(0))) {
2364  CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
2365  const Value *Opnd = nullptr;
2366  switch (Predicate) {
2367  default: break;
2368  case CmpInst::FCMP_FALSE: Opnd = I->getOperand(2); break;
2369  case CmpInst::FCMP_TRUE: Opnd = I->getOperand(1); break;
2370  }
2371  // No need for a select anymore - this is an unconditional move.
2372  if (Opnd) {
2373  unsigned OpReg = getRegForValue(Opnd);
2374  if (OpReg == 0)
2375  return false;
2376  bool OpIsKill = hasTrivialKill(Opnd);
2377  const TargetRegisterClass *RC = TLI.getRegClassFor(RetVT);
2378  unsigned ResultReg = createResultReg(RC);
2379  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2380  TII.get(TargetOpcode::COPY), ResultReg)
2381  .addReg(OpReg, getKillRegState(OpIsKill));
2382  updateValueMap(I, ResultReg);
2383  return true;
2384  }
2385  }
2386 
2387  // First try to use real conditional move instructions.
2388  if (X86FastEmitCMoveSelect(RetVT, I))
2389  return true;
2390 
2391  // Try to use a sequence of SSE instructions to simulate a conditional move.
2392  if (X86FastEmitSSESelect(RetVT, I))
2393  return true;
2394 
2395  // Fall-back to pseudo conditional move instructions, which will be later
2396  // converted to control-flow.
2397  if (X86FastEmitPseudoSelect(RetVT, I))
2398  return true;
2399 
2400  return false;
2401 }
2402 
2403 // Common code for X86SelectSIToFP and X86SelectUIToFP.
2404 bool X86FastISel::X86SelectIntToFP(const Instruction *I, bool IsSigned) {
2405  // The target-independent selection algorithm in FastISel already knows how
2406  // to select a SINT_TO_FP if the target is SSE but not AVX.
2407  // Early exit if the subtarget doesn't have AVX.
2408  // Unsigned conversion requires avx512.
2409  bool HasAVX512 = Subtarget->hasAVX512();
2410  if (!Subtarget->hasAVX() || (!IsSigned && !HasAVX512))
2411  return false;
2412 
2413  // TODO: We could sign extend narrower types.
2414  MVT SrcVT = TLI.getSimpleValueType(DL, I->getOperand(0)->getType());
2415  if (SrcVT != MVT::i32 && SrcVT != MVT::i64)
2416  return false;
2417 
2418  // Select integer to float/double conversion.
2419  unsigned OpReg = getRegForValue(I->getOperand(0));
2420  if (OpReg == 0)
2421  return false;
2422 
2423  unsigned Opcode;
2424 
2425  static const uint16_t SCvtOpc[2][2][2] = {
2426  { { X86::VCVTSI2SSrr, X86::VCVTSI642SSrr },
2427  { X86::VCVTSI2SDrr, X86::VCVTSI642SDrr } },
2428  { { X86::VCVTSI2SSZrr, X86::VCVTSI642SSZrr },
2429  { X86::VCVTSI2SDZrr, X86::VCVTSI642SDZrr } },
2430  };
2431  static const uint16_t UCvtOpc[2][2] = {
2432  { X86::VCVTUSI2SSZrr, X86::VCVTUSI642SSZrr },
2433  { X86::VCVTUSI2SDZrr, X86::VCVTUSI642SDZrr },
2434  };
2435  bool Is64Bit = SrcVT == MVT::i64;
2436 
2437  if (I->getType()->isDoubleTy()) {
2438  // s/uitofp int -> double
2439  Opcode = IsSigned ? SCvtOpc[HasAVX512][1][Is64Bit] : UCvtOpc[1][Is64Bit];
2440  } else if (I->getType()->isFloatTy()) {
2441  // s/uitofp int -> float
2442  Opcode = IsSigned ? SCvtOpc[HasAVX512][0][Is64Bit] : UCvtOpc[0][Is64Bit];
2443  } else
2444  return false;
2445 
2446  MVT DstVT = TLI.getValueType(DL, I->getType()).getSimpleVT();
2447  const TargetRegisterClass *RC = TLI.getRegClassFor(DstVT);
2448  unsigned ImplicitDefReg = createResultReg(RC);
2449  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2450  TII.get(TargetOpcode::IMPLICIT_DEF), ImplicitDefReg);
2451  unsigned ResultReg =
2452  fastEmitInst_rr(Opcode, RC, ImplicitDefReg, true, OpReg, false);
2453  updateValueMap(I, ResultReg);
2454  return true;
2455 }
2456 
2457 bool X86FastISel::X86SelectSIToFP(const Instruction *I) {
2458  return X86SelectIntToFP(I, /*IsSigned*/true);
2459 }
2460 
2461 bool X86FastISel::X86SelectUIToFP(const Instruction *I) {
2462  return X86SelectIntToFP(I, /*IsSigned*/false);
2463 }
2464 
2465 // Helper method used by X86SelectFPExt and X86SelectFPTrunc.
2466 bool X86FastISel::X86SelectFPExtOrFPTrunc(const Instruction *I,
2467  unsigned TargetOpc,
2468  const TargetRegisterClass *RC) {
2469  assert((I->getOpcode() == Instruction::FPExt ||
2470  I->getOpcode() == Instruction::FPTrunc) &&
2471  "Instruction must be an FPExt or FPTrunc!");
2472  bool HasAVX = Subtarget->hasAVX();
2473 
2474  unsigned OpReg = getRegForValue(I->getOperand(0));
2475  if (OpReg == 0)
2476  return false;
2477 
2478  unsigned ImplicitDefReg;
2479  if (HasAVX) {
2480  ImplicitDefReg = createResultReg(RC);
2481  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2482  TII.get(TargetOpcode::IMPLICIT_DEF), ImplicitDefReg);
2483 
2484  }
2485 
2486  unsigned ResultReg = createResultReg(RC);
2487  MachineInstrBuilder MIB;
2488  MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpc),
2489  ResultReg);
2490 
2491  if (HasAVX)
2492  MIB.addReg(ImplicitDefReg);
2493 
2494  MIB.addReg(OpReg);
2495  updateValueMap(I, ResultReg);
2496  return true;
2497 }
2498 
2499 bool X86FastISel::X86SelectFPExt(const Instruction *I) {
2500  if (X86ScalarSSEf64 && I->getType()->isDoubleTy() &&
2501  I->getOperand(0)->getType()->isFloatTy()) {
2502  bool HasAVX512 = Subtarget->hasAVX512();
2503  // fpext from float to double.
2504  unsigned Opc =
2505  HasAVX512 ? X86::VCVTSS2SDZrr
2506  : Subtarget->hasAVX() ? X86::VCVTSS2SDrr : X86::CVTSS2SDrr;
2507  return X86SelectFPExtOrFPTrunc(I, Opc, TLI.getRegClassFor(MVT::f64));
2508  }
2509 
2510  return false;
2511 }
2512 
2513 bool X86FastISel::X86SelectFPTrunc(const Instruction *I) {
2514  if (X86ScalarSSEf64 && I->getType()->isFloatTy() &&
2515  I->getOperand(0)->getType()->isDoubleTy()) {
2516  bool HasAVX512 = Subtarget->hasAVX512();
2517  // fptrunc from double to float.
2518  unsigned Opc =
2519  HasAVX512 ? X86::VCVTSD2SSZrr
2520  : Subtarget->hasAVX() ? X86::VCVTSD2SSrr : X86::CVTSD2SSrr;
2521  return X86SelectFPExtOrFPTrunc(I, Opc, TLI.getRegClassFor(MVT::f32));
2522  }
2523 
2524  return false;
2525 }
2526 
2527 bool X86FastISel::X86SelectTrunc(const Instruction *I) {
2528  EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType());
2529  EVT DstVT = TLI.getValueType(DL, I->getType());
2530 
2531  // This code only handles truncation to byte.
2532  if (DstVT != MVT::i8 && DstVT != MVT::i1)
2533  return false;
2534  if (!TLI.isTypeLegal(SrcVT))
2535  return false;
2536 
2537  unsigned InputReg = getRegForValue(I->getOperand(0));
2538  if (!InputReg)
2539  // Unhandled operand. Halt "fast" selection and bail.
2540  return false;
2541 
2542  if (SrcVT == MVT::i8) {
2543  // Truncate from i8 to i1; no code needed.
2544  updateValueMap(I, InputReg);
2545  return true;
2546  }
2547 
2548  // Issue an extract_subreg.
2549  unsigned ResultReg = fastEmitInst_extractsubreg(MVT::i8,
2550  InputReg, false,
2551  X86::sub_8bit);
2552  if (!ResultReg)
2553  return false;
2554 
2555  updateValueMap(I, ResultReg);
2556  return true;
2557 }
2558 
2559 bool X86FastISel::IsMemcpySmall(uint64_t Len) {
2560  return Len <= (Subtarget->is64Bit() ? 32 : 16);
2561 }
2562 
2563 bool X86FastISel::TryEmitSmallMemcpy(X86AddressMode DestAM,
2564  X86AddressMode SrcAM, uint64_t Len) {
2565 
2566  // Make sure we don't bloat code by inlining very large memcpy's.
2567  if (!IsMemcpySmall(Len))
2568  return false;
2569 
2570  bool i64Legal = Subtarget->is64Bit();
2571 
2572  // We don't care about alignment here since we just emit integer accesses.
2573  while (Len) {
2574  MVT VT;
2575  if (Len >= 8 && i64Legal)
2576  VT = MVT::i64;
2577  else if (Len >= 4)
2578  VT = MVT::i32;
2579  else if (Len >= 2)
2580  VT = MVT::i16;
2581  else
2582  VT = MVT::i8;
2583 
2584  unsigned Reg;
2585  bool RV = X86FastEmitLoad(VT, SrcAM, nullptr, Reg);
2586  RV &= X86FastEmitStore(VT, Reg, /*Kill=*/true, DestAM);
2587  assert(RV && "Failed to emit load or store??");
2588 
2589  unsigned Size = VT.getSizeInBits()/8;
2590  Len -= Size;
2591  DestAM.Disp += Size;
2592  SrcAM.Disp += Size;
2593  }
2594 
2595  return true;
2596 }
2597 
2598 bool X86FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
2599  // FIXME: Handle more intrinsics.
2600  switch (II->getIntrinsicID()) {
2601  default: return false;
2602  case Intrinsic::convert_from_fp16:
2603  case Intrinsic::convert_to_fp16: {
2604  if (Subtarget->useSoftFloat() || !Subtarget->hasF16C())
2605  return false;
2606 
2607  const Value *Op = II->getArgOperand(0);
2608  unsigned InputReg = getRegForValue(Op);
2609  if (InputReg == 0)
2610  return false;
2611 
2612  // F16C only allows converting from float to half and from half to float.
2613  bool IsFloatToHalf = II->getIntrinsicID() == Intrinsic::convert_to_fp16;
2614  if (IsFloatToHalf) {
2615  if (!Op->getType()->isFloatTy())
2616  return false;
2617  } else {
2618  if (!II->getType()->isFloatTy())
2619  return false;
2620  }
2621 
2622  unsigned ResultReg = 0;
2623  const TargetRegisterClass *RC = TLI.getRegClassFor(MVT::v8i16);
2624  if (IsFloatToHalf) {
2625  // 'InputReg' is implicitly promoted from register class FR32 to
2626  // register class VR128 by method 'constrainOperandRegClass' which is
2627  // directly called by 'fastEmitInst_ri'.
2628  // Instruction VCVTPS2PHrr takes an extra immediate operand which is
2629  // used to provide rounding control: use MXCSR.RC, encoded as 0b100.
2630  // It's consistent with the other FP instructions, which are usually
2631  // controlled by MXCSR.
2632  InputReg = fastEmitInst_ri(X86::VCVTPS2PHrr, RC, InputReg, false, 4);
2633 
2634  // Move the lower 32-bits of ResultReg to another register of class GR32.
2635  ResultReg = createResultReg(&X86::GR32RegClass);
2636  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2637  TII.get(X86::VMOVPDI2DIrr), ResultReg)
2638  .addReg(InputReg, RegState::Kill);
2639 
2640  // The result value is in the lower 16-bits of ResultReg.
2641  unsigned RegIdx = X86::sub_16bit;
2642  ResultReg = fastEmitInst_extractsubreg(MVT::i16, ResultReg, true, RegIdx);
2643  } else {
2644  assert(Op->getType()->isIntegerTy(16) && "Expected a 16-bit integer!");
2645  // Explicitly sign-extend the input to 32-bit.
2646  InputReg = fastEmit_r(MVT::i16, MVT::i32, ISD::SIGN_EXTEND, InputReg,
2647  /*Kill=*/false);
2648 
2649  // The following SCALAR_TO_VECTOR will be expanded into a VMOVDI2PDIrr.
2650  InputReg = fastEmit_r(MVT::i32, MVT::v4i32, ISD::SCALAR_TO_VECTOR,
2651  InputReg, /*Kill=*/true);
2652 
2653  InputReg = fastEmitInst_r(X86::VCVTPH2PSrr, RC, InputReg, /*Kill=*/true);
2654 
2655  // The result value is in the lower 32-bits of ResultReg.
2656  // Emit an explicit copy from register class VR128 to register class FR32.
2657  ResultReg = createResultReg(&X86::FR32RegClass);
2658  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2659  TII.get(TargetOpcode::COPY), ResultReg)
2660  .addReg(InputReg, RegState::Kill);
2661  }
2662 
2663  updateValueMap(II, ResultReg);
2664  return true;
2665  }
2666  case Intrinsic::frameaddress: {
2667  MachineFunction *MF = FuncInfo.MF;
2668  if (MF->getTarget().getMCAsmInfo()->usesWindowsCFI())
2669  return false;
2670 
2671  Type *RetTy = II->getCalledFunction()->getReturnType();
2672 
2673  MVT VT;
2674  if (!isTypeLegal(RetTy, VT))
2675  return false;
2676 
2677  unsigned Opc;
2678  const TargetRegisterClass *RC = nullptr;
2679 
2680  switch (VT.SimpleTy) {
2681  default: llvm_unreachable("Invalid result type for frameaddress.");
2682  case MVT::i32: Opc = X86::MOV32rm; RC = &X86::GR32RegClass; break;
2683  case MVT::i64: Opc = X86::MOV64rm; RC = &X86::GR64RegClass; break;
2684  }
2685 
2686  // This needs to be set before we call getPtrSizedFrameRegister, otherwise
2687  // we get the wrong frame register.
2688  MachineFrameInfo &MFI = MF->getFrameInfo();
2689  MFI.setFrameAddressIsTaken(true);
2690 
2691  const X86RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
2692  unsigned FrameReg = RegInfo->getPtrSizedFrameRegister(*MF);
2693  assert(((FrameReg == X86::RBP && VT == MVT::i64) ||
2694  (FrameReg == X86::EBP && VT == MVT::i32)) &&
2695  "Invalid Frame Register!");
2696 
2697  // Always make a copy of the frame register to a vreg first, so that we
2698  // never directly reference the frame register (the TwoAddressInstruction-
2699  // Pass doesn't like that).
2700  unsigned SrcReg = createResultReg(RC);
2701  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2702  TII.get(TargetOpcode::COPY), SrcReg).addReg(FrameReg);
2703 
2704  // Now recursively load from the frame address.
2705  // movq (%rbp), %rax
2706  // movq (%rax), %rax
2707  // movq (%rax), %rax
2708  // ...
2709  unsigned DestReg;
2710  unsigned Depth = cast<ConstantInt>(II->getOperand(0))->getZExtValue();
2711  while (Depth--) {
2712  DestReg = createResultReg(RC);
2713  addDirectMem(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2714  TII.get(Opc), DestReg), SrcReg);
2715  SrcReg = DestReg;
2716  }
2717 
2718  updateValueMap(II, SrcReg);
2719  return true;
2720  }
2721  case Intrinsic::memcpy: {
2722  const MemCpyInst *MCI = cast<MemCpyInst>(II);
2723  // Don't handle volatile or variable length memcpys.
2724  if (MCI->isVolatile())
2725  return false;
2726 
2727  if (isa<ConstantInt>(MCI->getLength())) {
2728  // Small memcpy's are common enough that we want to do them
2729  // without a call if possible.
2730  uint64_t Len = cast<ConstantInt>(MCI->getLength())->getZExtValue();
2731  if (IsMemcpySmall(Len)) {
2732  X86AddressMode DestAM, SrcAM;
2733  if (!X86SelectAddress(MCI->getRawDest(), DestAM) ||
2734  !X86SelectAddress(MCI->getRawSource(), SrcAM))
2735  return false;
2736  TryEmitSmallMemcpy(DestAM, SrcAM, Len);
2737  return true;
2738  }
2739  }
2740 
2741  unsigned SizeWidth = Subtarget->is64Bit() ? 64 : 32;
2742  if (!MCI->getLength()->getType()->isIntegerTy(SizeWidth))
2743  return false;
2744 
2745  if (MCI->getSourceAddressSpace() > 255 || MCI->getDestAddressSpace() > 255)
2746  return false;
2747 
2748  return lowerCallTo(II, "memcpy", II->getNumArgOperands() - 1);
2749  }
2750  case Intrinsic::memset: {
2751  const MemSetInst *MSI = cast<MemSetInst>(II);
2752 
2753  if (MSI->isVolatile())
2754  return false;
2755 
2756  unsigned SizeWidth = Subtarget->is64Bit() ? 64 : 32;
2757  if (!MSI->getLength()->getType()->isIntegerTy(SizeWidth))
2758  return false;
2759 
2760  if (MSI->getDestAddressSpace() > 255)
2761  return false;
2762 
2763  return lowerCallTo(II, "memset", II->getNumArgOperands() - 1);
2764  }
2765  case Intrinsic::stackprotector: {
2766  // Emit code to store the stack guard onto the stack.
2767  EVT PtrTy = TLI.getPointerTy(DL);
2768 
2769  const Value *Op1 = II->getArgOperand(0); // The guard's value.
2770  const AllocaInst *Slot = cast<AllocaInst>(II->getArgOperand(1));
2771 
2772  MFI.setStackProtectorIndex(FuncInfo.StaticAllocaMap[Slot]);
2773 
2774  // Grab the frame index.
2775  X86AddressMode AM;
2776  if (!X86SelectAddress(Slot, AM)) return false;
2777  if (!X86FastEmitStore(PtrTy, Op1, AM)) return false;
2778  return true;
2779  }
2780  case Intrinsic::dbg_declare: {
2781  const DbgDeclareInst *DI = cast<DbgDeclareInst>(II);
2782  X86AddressMode AM;
2783  assert(DI->getAddress() && "Null address should be checked earlier!");
2784  if (!X86SelectAddress(DI->getAddress(), AM))
2785  return false;
2786  const MCInstrDesc &II = TII.get(TargetOpcode::DBG_VALUE);
2787  // FIXME may need to add RegState::Debug to any registers produced,
2788  // although ESP/EBP should be the only ones at the moment.
2790  "Expected inlined-at fields to agree");
2791  addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II), AM)
2792  .addImm(0)
2793  .addMetadata(DI->getVariable())
2794  .addMetadata(DI->getExpression());
2795  return true;
2796  }
2797  case Intrinsic::trap: {
2798  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::TRAP));
2799  return true;
2800  }
2801  case Intrinsic::sqrt: {
2802  if (!Subtarget->hasSSE1())
2803  return false;
2804 
2805  Type *RetTy = II->getCalledFunction()->getReturnType();
2806 
2807  MVT VT;
2808  if (!isTypeLegal(RetTy, VT))
2809  return false;
2810 
2811  // Unfortunately we can't use fastEmit_r, because the AVX version of FSQRT
2812  // is not generated by FastISel yet.
2813  // FIXME: Update this code once tablegen can handle it.
2814  static const uint16_t SqrtOpc[3][2] = {
2815  { X86::SQRTSSr, X86::SQRTSDr },
2816  { X86::VSQRTSSr, X86::VSQRTSDr },
2817  { X86::VSQRTSSZr, X86::VSQRTSDZr },
2818  };
2819  unsigned AVXLevel = Subtarget->hasAVX512() ? 2 :
2820  Subtarget->hasAVX() ? 1 :
2821  0;
2822  unsigned Opc;
2823  switch (VT.SimpleTy) {
2824  default: return false;
2825  case MVT::f32: Opc = SqrtOpc[AVXLevel][0]; break;
2826  case MVT::f64: Opc = SqrtOpc[AVXLevel][1]; break;
2827  }
2828 
2829  const Value *SrcVal = II->getArgOperand(0);
2830  unsigned SrcReg = getRegForValue(SrcVal);
2831 
2832  if (SrcReg == 0)
2833  return false;
2834 
2835  const TargetRegisterClass *RC = TLI.getRegClassFor(VT);
2836  unsigned ImplicitDefReg = 0;
2837  if (AVXLevel > 0) {
2838  ImplicitDefReg = createResultReg(RC);
2839  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2840  TII.get(TargetOpcode::IMPLICIT_DEF), ImplicitDefReg);
2841  }
2842 
2843  unsigned ResultReg = createResultReg(RC);
2844  MachineInstrBuilder MIB;
2845  MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc),
2846  ResultReg);
2847 
2848  if (ImplicitDefReg)
2849  MIB.addReg(ImplicitDefReg);
2850 
2851  MIB.addReg(SrcReg);
2852 
2853  updateValueMap(II, ResultReg);
2854  return true;
2855  }
2856  case Intrinsic::sadd_with_overflow:
2857  case Intrinsic::uadd_with_overflow:
2858  case Intrinsic::ssub_with_overflow:
2859  case Intrinsic::usub_with_overflow:
2860  case Intrinsic::smul_with_overflow:
2861  case Intrinsic::umul_with_overflow: {
2862  // This implements the basic lowering of the xalu with overflow intrinsics
2863  // into add/sub/mul followed by either seto or setb.
2864  const Function *Callee = II->getCalledFunction();
2865  auto *Ty = cast<StructType>(Callee->getReturnType());
2866  Type *RetTy = Ty->getTypeAtIndex(0U);
2867  assert(Ty->getTypeAtIndex(1)->isIntegerTy() &&
2868  Ty->getTypeAtIndex(1)->getScalarSizeInBits() == 1 &&
2869  "Overflow value expected to be an i1");
2870 
2871  MVT VT;
2872  if (!isTypeLegal(RetTy, VT))
2873  return false;
2874 
2875  if (VT < MVT::i8 || VT > MVT::i64)
2876  return false;
2877 
2878  const Value *LHS = II->getArgOperand(0);
2879  const Value *RHS = II->getArgOperand(1);
2880 
2881  // Canonicalize immediate to the RHS.
2882  if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS) &&
2883  isCommutativeIntrinsic(II))
2884  std::swap(LHS, RHS);
2885 
2886  unsigned BaseOpc, CondCode;
2887  switch (II->getIntrinsicID()) {
2888  default: llvm_unreachable("Unexpected intrinsic!");
2889  case Intrinsic::sadd_with_overflow:
2890  BaseOpc = ISD::ADD; CondCode = X86::COND_O; break;
2891  case Intrinsic::uadd_with_overflow:
2892  BaseOpc = ISD::ADD; CondCode = X86::COND_B; break;
2893  case Intrinsic::ssub_with_overflow:
2894  BaseOpc = ISD::SUB; CondCode = X86::COND_O; break;
2895  case Intrinsic::usub_with_overflow:
2896  BaseOpc = ISD::SUB; CondCode = X86::COND_B; break;
2897  case Intrinsic::smul_with_overflow:
2898  BaseOpc = X86ISD::SMUL; CondCode = X86::COND_O; break;
2899  case Intrinsic::umul_with_overflow:
2900  BaseOpc = X86ISD::UMUL; CondCode = X86::COND_O; break;
2901  }
2902 
2903  unsigned LHSReg = getRegForValue(LHS);
2904  if (LHSReg == 0)
2905  return false;
2906  bool LHSIsKill = hasTrivialKill(LHS);
2907 
2908  unsigned ResultReg = 0;
2909  // Check if we have an immediate version.
2910  if (const auto *CI = dyn_cast<ConstantInt>(RHS)) {
2911  static const uint16_t Opc[2][4] = {
2912  { X86::INC8r, X86::INC16r, X86::INC32r, X86::INC64r },
2913  { X86::DEC8r, X86::DEC16r, X86::DEC32r, X86::DEC64r }
2914  };
2915 
2916  if (CI->isOne() && (BaseOpc == ISD::ADD || BaseOpc == ISD::SUB) &&
2917  CondCode == X86::COND_O) {
2918  // We can use INC/DEC.
2919  ResultReg = createResultReg(TLI.getRegClassFor(VT));
2920  bool IsDec = BaseOpc == ISD::SUB;
2921  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2922  TII.get(Opc[IsDec][VT.SimpleTy-MVT::i8]), ResultReg)
2923  .addReg(LHSReg, getKillRegState(LHSIsKill));
2924  } else
2925  ResultReg = fastEmit_ri(VT, VT, BaseOpc, LHSReg, LHSIsKill,
2926  CI->getZExtValue());
2927  }
2928 
2929  unsigned RHSReg;
2930  bool RHSIsKill;
2931  if (!ResultReg) {
2932  RHSReg = getRegForValue(RHS);
2933  if (RHSReg == 0)
2934  return false;
2935  RHSIsKill = hasTrivialKill(RHS);
2936  ResultReg = fastEmit_rr(VT, VT, BaseOpc, LHSReg, LHSIsKill, RHSReg,
2937  RHSIsKill);
2938  }
2939 
2940  // FastISel doesn't have a pattern for all X86::MUL*r and X86::IMUL*r. Emit
2941  // it manually.
2942  if (BaseOpc == X86ISD::UMUL && !ResultReg) {
2943  static const uint16_t MULOpc[] =
2944  { X86::MUL8r, X86::MUL16r, X86::MUL32r, X86::MUL64r };
2945  static const MCPhysReg Reg[] = { X86::AL, X86::AX, X86::EAX, X86::RAX };
2946  // First copy the first operand into RAX, which is an implicit input to
2947  // the X86::MUL*r instruction.
2948  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2949  TII.get(TargetOpcode::COPY), Reg[VT.SimpleTy-MVT::i8])
2950  .addReg(LHSReg, getKillRegState(LHSIsKill));
2951  ResultReg = fastEmitInst_r(MULOpc[VT.SimpleTy-MVT::i8],
2952  TLI.getRegClassFor(VT), RHSReg, RHSIsKill);
2953  } else if (BaseOpc == X86ISD::SMUL && !ResultReg) {
2954  static const uint16_t MULOpc[] =
2955  { X86::IMUL8r, X86::IMUL16rr, X86::IMUL32rr, X86::IMUL64rr };
2956  if (VT == MVT::i8) {
2957  // Copy the first operand into AL, which is an implicit input to the
2958  // X86::IMUL8r instruction.
2959  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2960  TII.get(TargetOpcode::COPY), X86::AL)
2961  .addReg(LHSReg, getKillRegState(LHSIsKill));
2962  ResultReg = fastEmitInst_r(MULOpc[0], TLI.getRegClassFor(VT), RHSReg,
2963  RHSIsKill);
2964  } else
2965  ResultReg = fastEmitInst_rr(MULOpc[VT.SimpleTy-MVT::i8],
2966  TLI.getRegClassFor(VT), LHSReg, LHSIsKill,
2967  RHSReg, RHSIsKill);
2968  }
2969 
2970  if (!ResultReg)
2971  return false;
2972 
2973  // Assign to a GPR since the overflow return value is lowered to a SETcc.
2974  unsigned ResultReg2 = createResultReg(&X86::GR8RegClass);
2975  assert((ResultReg+1) == ResultReg2 && "Nonconsecutive result registers.");
2976  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::SETCCr),
2977  ResultReg2).addImm(CondCode);
2978 
2979  updateValueMap(II, ResultReg, 2);
2980  return true;
2981  }
2982  case Intrinsic::x86_sse_cvttss2si:
2983  case Intrinsic::x86_sse_cvttss2si64:
2984  case Intrinsic::x86_sse2_cvttsd2si:
2985  case Intrinsic::x86_sse2_cvttsd2si64: {
2986  bool IsInputDouble;
2987  switch (II->getIntrinsicID()) {
2988  default: llvm_unreachable("Unexpected intrinsic.");
2989  case Intrinsic::x86_sse_cvttss2si:
2990  case Intrinsic::x86_sse_cvttss2si64:
2991  if (!Subtarget->hasSSE1())
2992  return false;
2993  IsInputDouble = false;
2994  break;
2995  case Intrinsic::x86_sse2_cvttsd2si:
2996  case Intrinsic::x86_sse2_cvttsd2si64:
2997  if (!Subtarget->hasSSE2())
2998  return false;
2999  IsInputDouble = true;
3000  break;
3001  }
3002 
3003  Type *RetTy = II->getCalledFunction()->getReturnType();
3004  MVT VT;
3005  if (!isTypeLegal(RetTy, VT))
3006  return false;
3007 
3008  static const uint16_t CvtOpc[3][2][2] = {
3009  { { X86::CVTTSS2SIrr, X86::CVTTSS2SI64rr },
3010  { X86::CVTTSD2SIrr, X86::CVTTSD2SI64rr } },
3011  { { X86::VCVTTSS2SIrr, X86::VCVTTSS2SI64rr },
3012  { X86::VCVTTSD2SIrr, X86::VCVTTSD2SI64rr } },
3013  { { X86::VCVTTSS2SIZrr, X86::VCVTTSS2SI64Zrr },
3014  { X86::VCVTTSD2SIZrr, X86::VCVTTSD2SI64Zrr } },
3015  };
3016  unsigned AVXLevel = Subtarget->hasAVX512() ? 2 :
3017  Subtarget->hasAVX() ? 1 :
3018  0;
3019  unsigned Opc;
3020  switch (VT.SimpleTy) {
3021  default: llvm_unreachable("Unexpected result type.");
3022  case MVT::i32: Opc = CvtOpc[AVXLevel][IsInputDouble][0]; break;
3023  case MVT::i64: Opc = CvtOpc[AVXLevel][IsInputDouble][1]; break;
3024  }
3025 
3026  // Check if we can fold insertelement instructions into the convert.
3027  const Value *Op = II->getArgOperand(0);
3028  while (auto *IE = dyn_cast<InsertElementInst>(Op)) {
3029  const Value *Index = IE->getOperand(2);
3030  if (!isa<ConstantInt>(Index))
3031  break;
3032  unsigned Idx = cast<ConstantInt>(Index)->getZExtValue();
3033 
3034  if (Idx == 0) {
3035  Op = IE->getOperand(1);
3036  break;
3037  }
3038  Op = IE->getOperand(0);
3039  }
3040 
3041  unsigned Reg = getRegForValue(Op);
3042  if (Reg == 0)
3043  return false;
3044 
3045  unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
3046  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
3047  .addReg(Reg);
3048 
3049  updateValueMap(II, ResultReg);
3050  return true;
3051  }
3052  }
3053 }
3054 
3055 bool X86FastISel::fastLowerArguments() {
3056  if (!FuncInfo.CanLowerReturn)
3057  return false;
3058 
3059  const Function *F = FuncInfo.Fn;
3060  if (F->isVarArg())
3061  return false;
3062 
3063  CallingConv::ID CC = F->getCallingConv();
3064  if (CC != CallingConv::C)
3065  return false;
3066 
3067  if (Subtarget->isCallingConvWin64(CC))
3068  return false;
3069 
3070  if (!Subtarget->is64Bit())
3071  return false;
3072 
3073  if (Subtarget->useSoftFloat())
3074  return false;
3075 
3076  // Only handle simple cases. i.e. Up to 6 i32/i64 scalar arguments.
3077  unsigned GPRCnt = 0;
3078  unsigned FPRCnt = 0;
3079  for (auto const &Arg : F->args()) {
3080  if (Arg.hasAttribute(Attribute::ByVal) ||
3081  Arg.hasAttribute(Attribute::InReg) ||
3082  Arg.hasAttribute(Attribute::StructRet) ||
3083  Arg.hasAttribute(Attribute::SwiftSelf) ||
3084  Arg.hasAttribute(Attribute::SwiftError) ||
3085  Arg.hasAttribute(Attribute::Nest))
3086  return false;
3087 
3088  Type *ArgTy = Arg.getType();
3089  if (ArgTy->isStructTy() || ArgTy->isArrayTy() || ArgTy->isVectorTy())
3090  return false;
3091 
3092  EVT ArgVT = TLI.getValueType(DL, ArgTy);
3093  if (!ArgVT.isSimple()) return false;
3094  switch (ArgVT.getSimpleVT().SimpleTy) {
3095  default: return false;
3096  case MVT::i32:
3097  case MVT::i64:
3098  ++GPRCnt;
3099  break;
3100  case MVT::f32:
3101  case MVT::f64:
3102  if (!Subtarget->hasSSE1())
3103  return false;
3104  ++FPRCnt;
3105  break;
3106  }
3107 
3108  if (GPRCnt > 6)
3109  return false;
3110 
3111  if (FPRCnt > 8)
3112  return false;
3113  }
3114 
3115  static const MCPhysReg GPR32ArgRegs[] = {
3116  X86::EDI, X86::ESI, X86::EDX, X86::ECX, X86::R8D, X86::R9D
3117  };
3118  static const MCPhysReg GPR64ArgRegs[] = {
3119  X86::RDI, X86::RSI, X86::RDX, X86::RCX, X86::R8 , X86::R9
3120  };
3121  static const MCPhysReg XMMArgRegs[] = {
3122  X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
3123  X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
3124  };
3125 
3126  unsigned GPRIdx = 0;
3127  unsigned FPRIdx = 0;
3128  for (auto const &Arg : F->args()) {
3129  MVT VT = TLI.getSimpleValueType(DL, Arg.getType());
3130  const TargetRegisterClass *RC = TLI.getRegClassFor(VT);
3131  unsigned SrcReg;
3132  switch (VT.SimpleTy) {
3133  default: llvm_unreachable("Unexpected value type.");
3134  case MVT::i32: SrcReg = GPR32ArgRegs[GPRIdx++]; break;
3135  case MVT::i64: SrcReg = GPR64ArgRegs[GPRIdx++]; break;
3136  case MVT::f32: LLVM_FALLTHROUGH;
3137  case MVT::f64: SrcReg = XMMArgRegs[FPRIdx++]; break;
3138  }
3139  unsigned DstReg = FuncInfo.MF->addLiveIn(SrcReg, RC);
3140  // FIXME: Unfortunately it's necessary to emit a copy from the livein copy.
3141  // Without this, EmitLiveInCopies may eliminate the livein if its only
3142  // use is a bitcast (which isn't turned into an instruction).
3143  unsigned ResultReg = createResultReg(RC);
3144  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3145  TII.get(TargetOpcode::COPY), ResultReg)
3146  .addReg(DstReg, getKillRegState(true));
3147  updateValueMap(&Arg, ResultReg);
3148  }
3149  return true;
3150 }
3151 
3152 static unsigned computeBytesPoppedByCalleeForSRet(const X86Subtarget *Subtarget,
3153  CallingConv::ID CC,
3154  ImmutableCallSite *CS) {
3155  if (Subtarget->is64Bit())
3156  return 0;
3157  if (Subtarget->getTargetTriple().isOSMSVCRT())
3158  return 0;
3159  if (CC == CallingConv::Fast || CC == CallingConv::GHC ||
3160  CC == CallingConv::HiPE)
3161  return 0;
3162 
3163  if (CS)
3164  if (CS->arg_empty() || !CS->paramHasAttr(0, Attribute::StructRet) ||
3165  CS->paramHasAttr(0, Attribute::InReg) || Subtarget->isTargetMCU())
3166  return 0;
3167 
3168  return 4;
3169 }
3170 
3171 bool X86FastISel::fastLowerCall(CallLoweringInfo &CLI) {
3172  auto &OutVals = CLI.OutVals;
3173  auto &OutFlags = CLI.OutFlags;
3174  auto &OutRegs = CLI.OutRegs;
3175  auto &Ins = CLI.Ins;
3176  auto &InRegs = CLI.InRegs;
3177  CallingConv::ID CC = CLI.CallConv;
3178  bool &IsTailCall = CLI.IsTailCall;
3179  bool IsVarArg = CLI.IsVarArg;
3180  const Value *Callee = CLI.Callee;
3181  MCSymbol *Symbol = CLI.Symbol;
3182 
3183  bool Is64Bit = Subtarget->is64Bit();
3184  bool IsWin64 = Subtarget->isCallingConvWin64(CC);
3185 
3186  const CallInst *CI =
3187  CLI.CS ? dyn_cast<CallInst>(CLI.CS->getInstruction()) : nullptr;
3188  const Function *CalledFn = CI ? CI->getCalledFunction() : nullptr;
3189 
3190  // Call / invoke instructions with NoCfCheck attribute require special
3191  // handling.
3192  const auto *II =
3193  CLI.CS ? dyn_cast<InvokeInst>(CLI.CS->getInstruction()) : nullptr;
3194  if ((CI && CI->doesNoCfCheck()) || (II && II->doesNoCfCheck()))
3195  return false;
3196 
3197  // Functions with no_caller_saved_registers that need special handling.
3198  if ((CI && CI->hasFnAttr("no_caller_saved_registers")) ||
3199  (CalledFn && CalledFn->hasFnAttribute("no_caller_saved_registers")))
3200  return false;
3201 
3202  // Functions using retpoline for indirect calls need to use SDISel.
3203  if (Subtarget->useRetpolineIndirectCalls())
3204  return false;
3205 
3206  // Handle only C, fastcc, and webkit_js calling conventions for now.
3207  switch (CC) {
3208  default: return false;
3209  case CallingConv::C:
3210  case CallingConv::Fast:
3212  case CallingConv::Swift:
3216  case CallingConv::Win64:
3218  break;
3219  }
3220 
3221  // Allow SelectionDAG isel to handle tail calls.
3222  if (IsTailCall)
3223  return false;
3224 
3225  // fastcc with -tailcallopt is intended to provide a guaranteed
3226  // tail call optimization. Fastisel doesn't know how to do that.
3227  if (CC == CallingConv::Fast && TM.Options.GuaranteedTailCallOpt)
3228  return false;
3229 
3230  // Don't know how to handle Win64 varargs yet. Nothing special needed for
3231  // x86-32. Special handling for x86-64 is implemented.
3232  if (IsVarArg && IsWin64)
3233  return false;
3234 
3235  // Don't know about inalloca yet.
3236  if (CLI.CS && CLI.CS->hasInAllocaArgument())
3237  return false;
3238 
3239  for (auto Flag : CLI.OutFlags)
3240  if (Flag.isSwiftError())
3241  return false;
3242 
3243  SmallVector<MVT, 16> OutVTs;
3244  SmallVector<unsigned, 16> ArgRegs;
3245 
3246  // If this is a constant i1/i8/i16 argument, promote to i32 to avoid an extra
3247  // instruction. This is safe because it is common to all FastISel supported
3248  // calling conventions on x86.
3249  for (int i = 0, e = OutVals.size(); i != e; ++i) {
3250  Value *&Val = OutVals[i];
3251  ISD::ArgFlagsTy Flags = OutFlags[i];
3252  if (auto *CI = dyn_cast<ConstantInt>(Val)) {
3253  if (CI->getBitWidth() < 32) {
3254  if (Flags.isSExt())
3256  else
3258  }
3259  }
3260 
3261  // Passing bools around ends up doing a trunc to i1 and passing it.
3262  // Codegen this as an argument + "and 1".
3263  MVT VT;
3264  auto *TI = dyn_cast<TruncInst>(Val);
3265  unsigned ResultReg;
3266  if (TI && TI->getType()->isIntegerTy(1) && CLI.CS &&
3267  (TI->getParent() == CLI.CS->getInstruction()->getParent()) &&
3268  TI->hasOneUse()) {
3269  Value *PrevVal = TI->getOperand(0);
3270  ResultReg = getRegForValue(PrevVal);
3271 
3272  if (!ResultReg)
3273  return false;
3274 
3275  if (!isTypeLegal(PrevVal->getType(), VT))
3276  return false;
3277 
3278  ResultReg =
3279  fastEmit_ri(VT, VT, ISD::AND, ResultReg, hasTrivialKill(PrevVal), 1);
3280  } else {
3281  if (!isTypeLegal(Val->getType(), VT))
3282  return false;
3283  ResultReg = getRegForValue(Val);
3284  }
3285 
3286  if (!ResultReg)
3287  return false;
3288 
3289  ArgRegs.push_back(ResultReg);
3290  OutVTs.push_back(VT);
3291  }
3292 
3293  // Analyze operands of the call, assigning locations to each operand.
3295  CCState CCInfo(CC, IsVarArg, *FuncInfo.MF, ArgLocs, CLI.RetTy->getContext());
3296 
3297  // Allocate shadow area for Win64
3298  if (IsWin64)
3299  CCInfo.AllocateStack(32, 8);
3300 
3301  CCInfo.AnalyzeCallOperands(OutVTs, OutFlags, CC_X86);
3302 
3303  // Get a count of how many bytes are to be pushed on the stack.
3304  unsigned NumBytes = CCInfo.getAlignedCallFrameSize();
3305 
3306  // Issue CALLSEQ_START
3307  unsigned AdjStackDown = TII.getCallFrameSetupOpcode();
3308  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackDown))
3309  .addImm(NumBytes).addImm(0).addImm(0);
3310 
3311  // Walk the register/memloc assignments, inserting copies/loads.
3312  const X86RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
3313  for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
3314  CCValAssign const &VA = ArgLocs[i];
3315  const Value *ArgVal = OutVals[VA.getValNo()];
3316  MVT ArgVT = OutVTs[VA.getValNo()];
3317 
3318  if (ArgVT == MVT::x86mmx)
3319  return false;
3320 
3321  unsigned ArgReg = ArgRegs[VA.getValNo()];
3322 
3323  // Promote the value if needed.
3324  switch (VA.getLocInfo()) {
3325  case CCValAssign::Full: break;
3326  case CCValAssign::SExt: {
3327  assert(VA.getLocVT().isInteger() && !VA.getLocVT().isVector() &&
3328  "Unexpected extend");
3329 
3330  if (ArgVT == MVT::i1)
3331  return false;
3332 
3333  bool Emitted = X86FastEmitExtend(ISD::SIGN_EXTEND, VA.getLocVT(), ArgReg,
3334  ArgVT, ArgReg);
3335  assert(Emitted && "Failed to emit a sext!"); (void)Emitted;
3336  ArgVT = VA.getLocVT();
3337  break;
3338  }
3339  case CCValAssign::ZExt: {
3340  assert(VA.getLocVT().isInteger() && !VA.getLocVT().isVector() &&
3341  "Unexpected extend");
3342 
3343  // Handle zero-extension from i1 to i8, which is common.
3344  if (ArgVT == MVT::i1) {
3345  // Set the high bits to zero.
3346  ArgReg = fastEmitZExtFromI1(MVT::i8, ArgReg, /*TODO: Kill=*/false);
3347  ArgVT = MVT::i8;
3348 
3349  if (ArgReg == 0)
3350  return false;
3351  }
3352 
3353  bool Emitted = X86FastEmitExtend(ISD::ZERO_EXTEND, VA.getLocVT(), ArgReg,
3354  ArgVT, ArgReg);
3355  assert(Emitted && "Failed to emit a zext!"); (void)Emitted;
3356  ArgVT = VA.getLocVT();
3357  break;
3358  }
3359  case CCValAssign::AExt: {
3360  assert(VA.getLocVT().isInteger() && !VA.getLocVT().isVector() &&
3361  "Unexpected extend");
3362  bool Emitted = X86FastEmitExtend(ISD::ANY_EXTEND, VA.getLocVT(), ArgReg,
3363  ArgVT, ArgReg);
3364  if (!Emitted)
3365  Emitted = X86FastEmitExtend(ISD::ZERO_EXTEND, VA.getLocVT(), ArgReg,
3366  ArgVT, ArgReg);
3367  if (!Emitted)
3368  Emitted = X86FastEmitExtend(ISD::SIGN_EXTEND, VA.getLocVT(), ArgReg,
3369  ArgVT, ArgReg);
3370 
3371  assert(Emitted && "Failed to emit a aext!"); (void)Emitted;
3372  ArgVT = VA.getLocVT();
3373  break;
3374  }
3375  case CCValAssign::BCvt: {
3376  ArgReg = fastEmit_r(ArgVT, VA.getLocVT(), ISD::BITCAST, ArgReg,
3377  /*TODO: Kill=*/false);
3378  assert(ArgReg && "Failed to emit a bitcast!");
3379  ArgVT = VA.getLocVT();
3380  break;
3381  }
3382  case CCValAssign::VExt:
3383  // VExt has not been implemented, so this should be impossible to reach
3384  // for now. However, fallback to Selection DAG isel once implemented.
3385  return false;
3389  case CCValAssign::FPExt:
3390  llvm_unreachable("Unexpected loc info!");
3391  case CCValAssign::Indirect:
3392  // FIXME: Indirect doesn't need extending, but fast-isel doesn't fully
3393  // support this.
3394  return false;
3395  }
3396 
3397  if (VA.isRegLoc()) {
3398  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3399  TII.get(TargetOpcode::COPY), VA.getLocReg()).addReg(ArgReg);
3400  OutRegs.push_back(VA.getLocReg());
3401  } else {
3402  assert(VA.isMemLoc());
3403 
3404  // Don't emit stores for undef values.
3405  if (isa<UndefValue>(ArgVal))
3406  continue;
3407 
3408  unsigned LocMemOffset = VA.getLocMemOffset();
3409  X86AddressMode AM;
3410  AM.Base.Reg = RegInfo->getStackRegister();
3411  AM.Disp = LocMemOffset;
3412  ISD::ArgFlagsTy Flags = OutFlags[VA.getValNo()];
3413  unsigned Alignment = DL.getABITypeAlignment(ArgVal->getType());
3414  MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
3415  MachinePointerInfo::getStack(*FuncInfo.MF, LocMemOffset),
3416  MachineMemOperand::MOStore, ArgVT.getStoreSize(), Alignment);
3417  if (Flags.isByVal()) {
3418  X86AddressMode SrcAM;
3419  SrcAM.Base.Reg = ArgReg;
3420  if (!TryEmitSmallMemcpy(AM, SrcAM, Flags.getByValSize()))
3421  return false;
3422  } else if (isa<ConstantInt>(ArgVal) || isa<ConstantPointerNull>(ArgVal)) {
3423  // If this is a really simple value, emit this with the Value* version
3424  // of X86FastEmitStore. If it isn't simple, we don't want to do this,
3425  // as it can cause us to reevaluate the argument.
3426  if (!X86FastEmitStore(ArgVT, ArgVal, AM, MMO))
3427  return false;
3428  } else {
3429  bool ValIsKill = hasTrivialKill(ArgVal);
3430  if (!X86FastEmitStore(ArgVT, ArgReg, ValIsKill, AM, MMO))
3431  return false;
3432  }
3433  }
3434  }
3435 
3436  // ELF / PIC requires GOT in the EBX register before function calls via PLT
3437  // GOT pointer.
3438  if (Subtarget->isPICStyleGOT()) {
3439  unsigned Base = getInstrInfo()->getGlobalBaseReg(FuncInfo.MF);
3440  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3441  TII.get(TargetOpcode::COPY), X86::EBX).addReg(Base);
3442  }
3443 
3444  if (Is64Bit && IsVarArg && !IsWin64) {
3445  // From AMD64 ABI document:
3446  // For calls that may call functions that use varargs or stdargs
3447  // (prototype-less calls or calls to functions containing ellipsis (...) in
3448  // the declaration) %al is used as hidden argument to specify the number
3449  // of SSE registers used. The contents of %al do not need to match exactly
3450  // the number of registers, but must be an ubound on the number of SSE
3451  // registers used and is in the range 0 - 8 inclusive.
3452 
3453  // Count the number of XMM registers allocated.
3454  static const MCPhysReg XMMArgRegs[] = {
3455  X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
3456  X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
3457  };
3458  unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs);
3459  assert((Subtarget->hasSSE1() || !NumXMMRegs)
3460  && "SSE registers cannot be used when SSE is disabled");
3461  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::MOV8ri),
3462  X86::AL).addImm(NumXMMRegs);
3463  }
3464 
3465  // Materialize callee address in a register. FIXME: GV address can be
3466  // handled with a CALLpcrel32 instead.
3467  X86AddressMode CalleeAM;
3468  if (!X86SelectCallAddress(Callee, CalleeAM))
3469  return false;
3470 
3471  unsigned CalleeOp = 0;
3472  const GlobalValue *GV = nullptr;
3473  if (CalleeAM.GV != nullptr) {
3474  GV = CalleeAM.GV;
3475  } else if (CalleeAM.Base.Reg != 0) {
3476  CalleeOp = CalleeAM.Base.Reg;
3477  } else
3478  return false;
3479 
3480  // Issue the call.
3481  MachineInstrBuilder MIB;
3482  if (CalleeOp) {
3483  // Register-indirect call.
3484  unsigned CallOpc = Is64Bit ? X86::CALL64r : X86::CALL32r;
3485  MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CallOpc))
3486  .addReg(CalleeOp);
3487  } else {
3488  // Direct call.
3489  assert(GV && "Not a direct call");
3490  // See if we need any target-specific flags on the GV operand.
3491  unsigned char OpFlags = Subtarget->classifyGlobalFunctionReference(GV);
3492 
3493  // This will be a direct call, or an indirect call through memory for
3494  // NonLazyBind calls or dllimport calls.
3495  bool NeedLoad = OpFlags == X86II::MO_DLLIMPORT ||
3496  OpFlags == X86II::MO_GOTPCREL ||
3497  OpFlags == X86II::MO_COFFSTUB;
3498  unsigned CallOpc = NeedLoad
3499  ? (Is64Bit ? X86::CALL64m : X86::CALL32m)
3500  : (Is64Bit ? X86::CALL64pcrel32 : X86::CALLpcrel32);
3501 
3502  MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CallOpc));
3503  if (NeedLoad)
3504  MIB.addReg(Is64Bit ? X86::RIP : 0).addImm(1).addReg(0);
3505  if (Symbol)
3506  MIB.addSym(Symbol, OpFlags);
3507  else
3508  MIB.addGlobalAddress(GV, 0, OpFlags);
3509  if (NeedLoad)
3510  MIB.addReg(0);
3511  }
3512 
3513  // Add a register mask operand representing the call-preserved registers.
3514  // Proper defs for return values will be added by setPhysRegsDeadExcept().
3515  MIB.addRegMask(TRI.getCallPreservedMask(*FuncInfo.MF, CC));
3516 
3517  // Add an implicit use GOT pointer in EBX.
3518  if (Subtarget->isPICStyleGOT())
3520 
3521  if (Is64Bit && IsVarArg && !IsWin64)
3523 
3524  // Add implicit physical register uses to the call.
3525  for (auto Reg : OutRegs)
3527 
3528  // Issue CALLSEQ_END
3529  unsigned NumBytesForCalleeToPop =
3530  X86::isCalleePop(CC, Subtarget->is64Bit(), IsVarArg,
3531  TM.Options.GuaranteedTailCallOpt)
3532  ? NumBytes // Callee pops everything.
3533  : computeBytesPoppedByCalleeForSRet(Subtarget, CC, CLI.CS);
3534  unsigned AdjStackUp = TII.getCallFrameDestroyOpcode();
3535  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackUp))
3536  .addImm(NumBytes).addImm(NumBytesForCalleeToPop);
3537 
3538  // Now handle call return values.
3540  CCState CCRetInfo(CC, IsVarArg, *FuncInfo.MF, RVLocs,
3541  CLI.RetTy->getContext());
3542  CCRetInfo.AnalyzeCallResult(Ins, RetCC_X86);
3543 
3544  // Copy all of the result registers out of their specified physreg.
3545  unsigned ResultReg = FuncInfo.CreateRegs(CLI.RetTy);
3546  for (unsigned i = 0; i != RVLocs.size(); ++i) {
3547  CCValAssign &VA = RVLocs[i];
3548  EVT CopyVT = VA.getValVT();
3549  unsigned CopyReg = ResultReg + i;
3550  Register SrcReg = VA.getLocReg();
3551 
3552  // If this is x86-64, and we disabled SSE, we can't return FP values
3553  if ((CopyVT == MVT::f32 || CopyVT == MVT::f64) &&
3554  ((Is64Bit || Ins[i].Flags.isInReg()) && !Subtarget->hasSSE1())) {
3555  report_fatal_error("SSE register return with SSE disabled");
3556  }
3557 
3558  // If we prefer to use the value in xmm registers, copy it out as f80 and
3559  // use a truncate to move it from fp stack reg to xmm reg.
3560  if ((SrcReg == X86::FP0 || SrcReg == X86::FP1) &&
3561  isScalarFPTypeInSSEReg(VA.getValVT())) {
3562  CopyVT = MVT::f80;
3563  CopyReg = createResultReg(&X86::RFP80RegClass);
3564  }
3565 
3566  // Copy out the result.
3567  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3568  TII.get(TargetOpcode::COPY), CopyReg).addReg(SrcReg);
3569  InRegs.push_back(VA.getLocReg());
3570 
3571  // Round the f80 to the right size, which also moves it to the appropriate
3572  // xmm register. This is accomplished by storing the f80 value in memory
3573  // and then loading it back.
3574  if (CopyVT != VA.getValVT()) {
3575  EVT ResVT = VA.getValVT();
3576  unsigned Opc = ResVT == MVT::f32 ? X86::ST_Fp80m32 : X86::ST_Fp80m64;
3577  unsigned MemSize = ResVT.getSizeInBits()/8;
3578  int FI = MFI.CreateStackObject(MemSize, MemSize, false);
3579  addFrameReference(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3580  TII.get(Opc)), FI)
3581  .addReg(CopyReg);
3582  Opc = ResVT == MVT::f32 ? X86::MOVSSrm_alt : X86::MOVSDrm_alt;
3583  addFrameReference(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3584  TII.get(Opc), ResultReg + i), FI);
3585  }
3586  }
3587 
3588  CLI.ResultReg = ResultReg;
3589  CLI.NumResultRegs = RVLocs.size();
3590  CLI.Call = MIB;
3591 
3592  return true;
3593 }
3594 
3595 bool
3596 X86FastISel::fastSelectInstruction(const Instruction *I) {
3597  switch (I->getOpcode()) {
3598  default: break;
3599  case Instruction::Load:
3600  return X86SelectLoad(I);
3601  case Instruction::Store:
3602  return X86SelectStore(I);
3603  case Instruction::Ret:
3604  return X86SelectRet(I);
3605  case Instruction::ICmp:
3606  case Instruction::FCmp:
3607  return X86SelectCmp(I);
3608  case Instruction::ZExt:
3609  return X86SelectZExt(I);
3610  case Instruction::SExt:
3611  return X86SelectSExt(I);
3612  case Instruction::Br:
3613  return X86SelectBranch(I);
3614  case Instruction::LShr:
3615  case Instruction::AShr:
3616  case Instruction::Shl:
3617  return X86SelectShift(I);
3618  case Instruction::SDiv:
3619  case Instruction::UDiv:
3620  case Instruction::SRem:
3621  case Instruction::URem:
3622  return X86SelectDivRem(I);
3623  case Instruction::Select:
3624  return X86SelectSelect(I);
3625  case Instruction::Trunc:
3626  return X86SelectTrunc(I);
3627  case Instruction::FPExt:
3628  return X86SelectFPExt(I);
3629  case Instruction::FPTrunc:
3630  return X86SelectFPTrunc(I);
3631  case Instruction::SIToFP:
3632  return X86SelectSIToFP(I);
3633  case Instruction::UIToFP:
3634  return X86SelectUIToFP(I);
3635  case Instruction::IntToPtr: // Deliberate fall-through.
3636  case Instruction::PtrToInt: {
3637  EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType());
3638  EVT DstVT = TLI.getValueType(DL, I->getType());
3639  if (DstVT.bitsGT(SrcVT))
3640  return X86SelectZExt(I);
3641  if (DstVT.bitsLT(SrcVT))
3642  return X86SelectTrunc(I);
3643  unsigned Reg = getRegForValue(I->getOperand(0));
3644  if (Reg == 0) return false;
3645  updateValueMap(I, Reg);
3646  return true;
3647  }
3648  case Instruction::BitCast: {
3649  // Select SSE2/AVX bitcasts between 128/256/512 bit vector types.
3650  if (!Subtarget->hasSSE2())
3651  return false;
3652 
3653  MVT SrcVT, DstVT;
3654  if (!isTypeLegal(I->getOperand(0)->getType(), SrcVT) ||
3655  !isTypeLegal(I->getType(), DstVT))
3656  return false;
3657 
3658  // Only allow vectors that use xmm/ymm/zmm.
3659  if (!SrcVT.isVector() || !DstVT.isVector() ||
3660  SrcVT.getVectorElementType() == MVT::i1 ||
3661  DstVT.getVectorElementType() == MVT::i1)
3662  return false;
3663 
3664  unsigned Reg = getRegForValue(I->getOperand(0));
3665  if (Reg == 0)
3666  return false;
3667 
3668  // No instruction is needed for conversion. Reuse the register used by
3669  // the fist operand.
3670  updateValueMap(I, Reg);
3671  return true;
3672  }
3673  }
3674 
3675  return false;
3676 }
3677 
3678 unsigned X86FastISel::X86MaterializeInt(const ConstantInt *CI, MVT VT) {
3679  if (VT > MVT::i64)
3680  return 0;
3681 
3682  uint64_t Imm = CI->getZExtValue();
3683  if (Imm == 0) {
3684  unsigned SrcReg = fastEmitInst_(X86::MOV32r0, &X86::GR32RegClass);
3685  switch (VT.SimpleTy) {
3686  default: llvm_unreachable("Unexpected value type");
3687  case MVT::i1:
3688  case MVT::i8:
3689  return fastEmitInst_extractsubreg(MVT::i8, SrcReg, /*Kill=*/true,
3690  X86::sub_8bit);
3691  case MVT::i16:
3692  return fastEmitInst_extractsubreg(MVT::i16, SrcReg, /*Kill=*/true,
3693  X86::sub_16bit);
3694  case MVT::i32:
3695  return SrcReg;
3696  case MVT::i64: {
3697  unsigned ResultReg = createResultReg(&X86::GR64RegClass);
3698  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3699  TII.get(TargetOpcode::SUBREG_TO_REG), ResultReg)
3700  .addImm(0).addReg(SrcReg).addImm(X86::sub_32bit);
3701  return ResultReg;
3702  }
3703  }
3704  }
3705 
3706  unsigned Opc = 0;
3707  switch (VT.SimpleTy) {
3708  default: llvm_unreachable("Unexpected value type");
3709  case MVT::i1:
3710  VT = MVT::i8;
3712  case MVT::i8: Opc = X86::MOV8ri; break;
3713  case MVT::i16: Opc = X86::MOV16ri; break;
3714  case MVT::i32: Opc = X86::MOV32ri; break;
3715  case MVT::i64: {
3716  if (isUInt<32>(Imm))
3717  Opc = X86::MOV32ri64;
3718  else if (isInt<32>(Imm))
3719  Opc = X86::MOV64ri32;
3720  else
3721  Opc = X86::MOV64ri;
3722  break;
3723  }
3724  }
3725  return fastEmitInst_i(Opc, TLI.getRegClassFor(VT), Imm);
3726 }
3727 
3728 unsigned X86FastISel::X86MaterializeFP(const ConstantFP *CFP, MVT VT) {
3729  if (CFP->isNullValue())
3730  return fastMaterializeFloatZero(CFP);
3731 
3732  // Can't handle alternate code models yet.
3733  CodeModel::Model CM = TM.getCodeModel();
3734  if (CM != CodeModel::Small && CM != CodeModel::Large)
3735  return 0;
3736 
3737  // Get opcode and regclass of the output for the given load instruction.
3738  unsigned Opc = 0;
3739  bool HasAVX = Subtarget->hasAVX();
3740  bool HasAVX512 = Subtarget->hasAVX512();
3741  switch (VT.SimpleTy) {
3742  default: return 0;
3743  case MVT::f32:
3744  if (X86ScalarSSEf32)
3745  Opc = HasAVX512 ? X86::VMOVSSZrm_alt :
3746  HasAVX ? X86::VMOVSSrm_alt :
3747  X86::MOVSSrm_alt;
3748  else
3749  Opc = X86::LD_Fp32m;
3750  break;
3751  case MVT::f64:
3752  if (X86ScalarSSEf64)
3753  Opc = HasAVX512 ? X86::VMOVSDZrm_alt :
3754  HasAVX ? X86::VMOVSDrm_alt :
3755  X86::MOVSDrm_alt;
3756  else
3757  Opc = X86::LD_Fp64m;
3758  break;
3759  case MVT::f80:
3760  // No f80 support yet.
3761  return 0;
3762  }
3763 
3764  // MachineConstantPool wants an explicit alignment.
3765  unsigned Align = DL.getPrefTypeAlignment(CFP->getType());
3766  if (Align == 0) {
3767  // Alignment of vector types. FIXME!
3768  Align = DL.getTypeAllocSize(CFP->getType());
3769  }
3770 
3771  // x86-32 PIC requires a PIC base register for constant pools.
3772  unsigned PICBase = 0;
3773  unsigned char OpFlag = Subtarget->classifyLocalReference(nullptr);
3774  if (OpFlag == X86II::MO_PIC_BASE_OFFSET)
3775  PICBase = getInstrInfo()->getGlobalBaseReg(FuncInfo.MF);
3776  else if (OpFlag == X86II::MO_GOTOFF)
3777  PICBase = getInstrInfo()->getGlobalBaseReg(FuncInfo.MF);
3778  else if (Subtarget->is64Bit() && TM.getCodeModel() == CodeModel::Small)
3779  PICBase = X86::RIP;
3780 
3781  // Create the load from the constant pool.
3782  unsigned CPI = MCP.getConstantPoolIndex(CFP, Align);
3783  unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT.SimpleTy));
3784 
3785  if (CM == CodeModel::Large) {
3786  unsigned AddrReg = createResultReg(&X86::GR64RegClass);
3787  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::MOV64ri),
3788  AddrReg)
3789  .addConstantPoolIndex(CPI, 0, OpFlag);
3790  MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3791  TII.get(Opc), ResultReg);
3792  addDirectMem(MIB, AddrReg);
3793  MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
3795  MachineMemOperand::MOLoad, DL.getPointerSize(), Align);
3796  MIB->addMemOperand(*FuncInfo.MF, MMO);
3797  return ResultReg;
3798  }
3799 
3800  addConstantPoolReference(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3801  TII.get(Opc), ResultReg),
3802  CPI, PICBase, OpFlag);
3803  return ResultReg;
3804 }
3805 
3806 unsigned X86FastISel::X86MaterializeGV(const GlobalValue *GV, MVT VT) {
3807  // Can't handle alternate code models yet.
3808  if (TM.getCodeModel() != CodeModel::Small)
3809  return 0;
3810 
3811  // Materialize addresses with LEA/MOV instructions.
3812  X86AddressMode AM;
3813  if (X86SelectAddress(GV, AM)) {
3814  // If the expression is just a basereg, then we're done, otherwise we need
3815  // to emit an LEA.
3816  if (AM.BaseType == X86AddressMode::RegBase &&
3817  AM.IndexReg == 0 && AM.Disp == 0 && AM.GV == nullptr)
3818  return AM.Base.Reg;
3819 
3820  unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
3821  if (TM.getRelocationModel() == Reloc::Static &&
3822  TLI.getPointerTy(DL) == MVT::i64) {
3823  // The displacement code could be more than 32 bits away so we need to use
3824  // an instruction with a 64 bit immediate
3825  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::MOV64ri),
3826  ResultReg)
3827  .addGlobalAddress(GV);
3828  } else {
3829  unsigned Opc =
3830  TLI.getPointerTy(DL) == MVT::i32
3831  ? (Subtarget->isTarget64BitILP32() ? X86::LEA64_32r : X86::LEA32r)
3832  : X86::LEA64r;
3833  addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3834  TII.get(Opc), ResultReg), AM);
3835  }
3836  return ResultReg;
3837  }
3838  return 0;
3839 }
3840 
3841 unsigned X86FastISel::fastMaterializeConstant(const Constant *C) {
3842  EVT CEVT = TLI.getValueType(DL, C->getType(), true);
3843 
3844  // Only handle simple types.
3845  if (!CEVT.isSimple())
3846  return 0;
3847  MVT VT = CEVT.getSimpleVT();
3848 
3849  if (const auto *CI = dyn_cast<ConstantInt>(C))
3850  return X86MaterializeInt(CI, VT);
3851  else if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C))
3852  return X86MaterializeFP(CFP, VT);
3853  else if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
3854  return X86MaterializeGV(GV, VT);
3855 
3856  return 0;
3857 }
3858 
3859 unsigned X86FastISel::fastMaterializeAlloca(const AllocaInst *C) {
3860  // Fail on dynamic allocas. At this point, getRegForValue has already
3861  // checked its CSE maps, so if we're here trying to handle a dynamic
3862  // alloca, we're not going to succeed. X86SelectAddress has a
3863  // check for dynamic allocas, because it's called directly from
3864  // various places, but targetMaterializeAlloca also needs a check
3865  // in order to avoid recursion between getRegForValue,
3866  // X86SelectAddrss, and targetMaterializeAlloca.
3867  if (!FuncInfo.StaticAllocaMap.count(C))
3868  return 0;
3869  assert(C->isStaticAlloca() && "dynamic alloca in the static alloca map?");
3870 
3871  X86AddressMode AM;
3872  if (!X86SelectAddress(C, AM))
3873  return 0;
3874  unsigned Opc =
3875  TLI.getPointerTy(DL) == MVT::i32
3876  ? (Subtarget->isTarget64BitILP32() ? X86::LEA64_32r : X86::LEA32r)
3877  : X86::LEA64r;
3878  const TargetRegisterClass *RC = TLI.getRegClassFor(TLI.getPointerTy(DL));
3879  unsigned ResultReg = createResultReg(RC);
3880  addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3881  TII.get(Opc), ResultReg), AM);
3882  return ResultReg;
3883 }
3884 
3885 unsigned X86FastISel::fastMaterializeFloatZero(const ConstantFP *CF) {
3886  MVT VT;
3887  if (!isTypeLegal(CF->getType(), VT))
3888  return 0;
3889 
3890  // Get opcode and regclass for the given zero.
3891  bool HasAVX512 = Subtarget->hasAVX512();
3892  unsigned Opc = 0;
3893  switch (VT.SimpleTy) {
3894  default: return 0;
3895  case MVT::f32:
3896  if (X86ScalarSSEf32)
3897  Opc = HasAVX512 ? X86::AVX512_FsFLD0SS : X86::FsFLD0SS;
3898  else
3899  Opc = X86::LD_Fp032;
3900  break;
3901  case MVT::f64:
3902  if (X86ScalarSSEf64)
3903  Opc = HasAVX512 ? X86::AVX512_FsFLD0SD : X86::FsFLD0SD;
3904  else
3905  Opc = X86::LD_Fp064;
3906  break;
3907  case MVT::f80:
3908  // No f80 support yet.
3909  return 0;
3910  }
3911 
3912  unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
3913  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg);
3914  return ResultReg;
3915 }
3916 
3917 
3918 bool X86FastISel::tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo,
3919  const LoadInst *LI) {
3920  const Value *Ptr = LI->getPointerOperand();
3921  X86AddressMode AM;
3922  if (!X86SelectAddress(Ptr, AM))
3923  return false;
3924 
3925  const X86InstrInfo &XII = (const X86InstrInfo &)TII;
3926 
3927  unsigned Size = DL.getTypeAllocSize(LI->getType());
3928  unsigned Alignment = LI->getAlignment();
3929 
3930  if (Alignment == 0) // Ensure that codegen never sees alignment 0
3931  Alignment = DL.getABITypeAlignment(LI->getType());
3932 
3934  AM.getFullAddress(AddrOps);
3935 
3936  MachineInstr *Result = XII.foldMemoryOperandImpl(
3937  *FuncInfo.MF, *MI, OpNo, AddrOps, FuncInfo.InsertPt, Size, Alignment,
3938  /*AllowCommute=*/true);
3939  if (!Result)
3940  return false;
3941 
3942  // The index register could be in the wrong register class. Unfortunately,
3943  // foldMemoryOperandImpl could have commuted the instruction so its not enough
3944  // to just look at OpNo + the offset to the index reg. We actually need to
3945  // scan the instruction to find the index reg and see if its the correct reg
3946  // class.
3947  unsigned OperandNo = 0;
3948  for (MachineInstr::mop_iterator I = Result->operands_begin(),
3949  E = Result->operands_end(); I != E; ++I, ++OperandNo) {
3950  MachineOperand &MO = *I;
3951  if (!MO.isReg() || MO.isDef() || MO.getReg() != AM.IndexReg)
3952  continue;
3953  // Found the index reg, now try to rewrite it.
3954  unsigned IndexReg = constrainOperandRegClass(Result->getDesc(),
3955  MO.getReg(), OperandNo);
3956  if (IndexReg == MO.getReg())
3957  continue;
3958  MO.setReg(IndexReg);
3959  }
3960 
3961  Result->addMemOperand(*FuncInfo.MF, createMachineMemOperandFor(LI));
3962  Result->cloneInstrSymbols(*FuncInfo.MF, *MI);
3964  removeDeadCode(I, std::next(I));
3965  return true;
3966 }
3967 
3968 unsigned X86FastISel::fastEmitInst_rrrr(unsigned MachineInstOpcode,
3969  const TargetRegisterClass *RC,
3970  unsigned Op0, bool Op0IsKill,
3971  unsigned Op1, bool Op1IsKill,
3972  unsigned Op2, bool Op2IsKill,
3973  unsigned Op3, bool Op3IsKill) {
3974  const MCInstrDesc &II = TII.get(MachineInstOpcode);
3975 
3976  unsigned ResultReg = createResultReg(RC);
3977  Op0 = constrainOperandRegClass(II, Op0, II.getNumDefs());
3978  Op1 = constrainOperandRegClass(II, Op1, II.getNumDefs() + 1);
3979  Op2 = constrainOperandRegClass(II, Op2, II.getNumDefs() + 2);
3980  Op3 = constrainOperandRegClass(II, Op3, II.getNumDefs() + 3);
3981 
3982  if (II.getNumDefs() >= 1)
3983  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
3984  .addReg(Op0, getKillRegState(Op0IsKill))
3985  .addReg(Op1, getKillRegState(Op1IsKill))
3986  .addReg(Op2, getKillRegState(Op2IsKill))
3987  .addReg(Op3, getKillRegState(Op3IsKill));
3988  else {
3989  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
3990  .addReg(Op0, getKillRegState(Op0IsKill))
3991  .addReg(Op1, getKillRegState(Op1IsKill))
3992  .addReg(Op2, getKillRegState(Op2IsKill))
3993  .addReg(Op3, getKillRegState(Op3IsKill));
3994  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3995  TII.get(TargetOpcode::COPY), ResultReg).addReg(II.ImplicitDefs[0]);
3996  }
3997  return ResultReg;
3998 }
3999 
4000 
4001 namespace llvm {
4003  const TargetLibraryInfo *libInfo) {
4004  return new X86FastISel(funcInfo, libInfo);
4005  }
4006 }
The C convention as specified in the x86-64 supplement to the System V ABI, used on most non-Windows ...
Definition: CallingConv.h:143
bool hasAVX() const
Definition: X86Subtarget.h:581
bool isVarArg() const
isVarArg - Return true if this function takes a variable number of arguments.
Definition: Function.h:176
void setFrameAddressIsTaken(bool T)
uint64_t CallInst * C
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:595
constexpr bool isUInt< 32 >(uint64_t x)
Definition: MathExtras.h:348
Return a value (possibly void), from a function.
Value * getValueOperand()
Definition: Instructions.h:409
const MachineInstrBuilder & addMetadata(const MDNode *MD) const
void AnalyzeCallResult(const SmallVectorImpl< ISD::InputArg > &Ins, CCAssignFn Fn)
AnalyzeCallResult - Analyze the return values of a call, incorporating info about the passed values i...
bool is64Bit() const
Is this x86_64? (disregarding specific ABI / programming model)
Definition: X86Subtarget.h:542
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
bool usesWindowsCFI() const
Definition: MCAsmInfo.h:590
mop_iterator operands_end()
Definition: MachineInstr.h:472
bool isInteger() const
Return true if this is an integer or a vector integer type.
This class is the base class for the comparison instructions.
Definition: InstrTypes.h:722
MO_COFFSTUB - On a symbol operand "FOO", this indicates that the reference is actually to the "...
Definition: X86BaseInfo.h:275
This class represents an incoming formal argument to a Function.
Definition: Argument.h:29
bool contains(unsigned Reg) const
Return true if the specified register is included in this register class.
bool isOSMSVCRT() const
Is this a "Windows" OS targeting a "MSVCRT.dll" environment.
Definition: Triple.h:575
unsigned constrainOperandRegClass(const MachineFunction &MF, const TargetRegisterInfo &TRI, MachineRegisterInfo &MRI, const TargetInstrInfo &TII, const RegisterBankInfo &RBI, MachineInstr &InsertPt, const TargetRegisterClass &RegClass, const MachineOperand &RegMO, unsigned OpIdx)
Constrain the Register operand OpIdx, so that it is now constrained to the TargetRegisterClass passed...
Definition: Utils.cpp:40
C - The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
LLVM_ATTRIBUTE_NORETURN void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:139
This class represents lattice values for constants.
Definition: AllocatorList.h:23
Register getLocReg() const
static const MachineInstrBuilder & addConstantPoolReference(const MachineInstrBuilder &MIB, unsigned CPI, unsigned GlobalBaseReg, unsigned char OpFlags)
addConstantPoolReference - This function is used to add a reference to the base of a constant value s...
bool isAtomic() const
Return true if this instruction has an AtomicOrdering of unordered or higher.
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
Definition: MCSymbol.h:41
enum llvm::X86AddressMode::@518 BaseType
bool isVector() const
Return true if this is a vector value type.
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:178
This class represents a function call, abstracting a target machine&#39;s calling convention.
unsigned Reg
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:252
0 1 0 0 True if ordered and less than
Definition: InstrTypes.h:738
constexpr bool isInt< 8 >(int64_t x)
Definition: MathExtras.h:302
unsigned getSourceAddressSpace() const
unsigned getValNo() const
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:733
1 1 1 0 True if unordered or not equal
Definition: InstrTypes.h:748
This class wraps the llvm.memset intrinsic.
BasicBlock * getSuccessor(unsigned i) const
unsigned const TargetRegisterInfo * TRI
const GlobalValue * GV
A debug info location.
Definition: DebugLoc.h:33
F(f)
unsigned getPtrSizedFrameRegister(const MachineFunction &MF) const
An instruction for reading from memory.
Definition: Instructions.h:167
MO_GOTPCREL - On a symbol operand this indicates that the immediate is offset to the GOT entry for th...
Definition: X86BaseInfo.h:147
Hexagon Common GEP
Value * getCondition() const
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:229
bool isCalleePop(CallingConv::ID CallingConv, bool is64Bit, bool IsVarArg, bool GuaranteeTCO)
Determines whether the callee is required to pop its own arguments.
Value * getLength() const
op_iterator op_begin()
Definition: User.h:229
bool CC_X86(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
NodeType
ISD::NodeType enum - This enum defines the target-independent operators for a SelectionDAG.
Definition: ISDOpcodes.h:38
bool isMemLoc() const
static Constant * getNullValue(Type *Ty)
Constructor to create a &#39;0&#39; constant of arbitrary type.
Definition: Constants.cpp:289
X86MachineFunctionInfo - This class is derived from MachineFunction and contains private X86 target-s...
Value * getArgOperand(unsigned i) const
Definition: InstrTypes.h:1241
1 0 0 1 True if unordered or equal
Definition: InstrTypes.h:743
Used to lazily calculate structure layout information for a target machine, based on the DataLayout s...
Definition: DataLayout.h:559
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
Definition: InstrTypes.h:742
A description of a memory reference used in the backend.
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE, etc.
Definition: InstrTypes.h:831
This file declares the MachineConstantPool class which is an abstract constant pool to keep track of ...
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
Definition: MCInstrDesc.h:225
const HexagonInstrInfo * TII
unsigned getNumOperands() const
Retuns the total number of operands.
Definition: MachineInstr.h:414
Class to represent struct types.
Definition: DerivedTypes.h:233
A Use represents the edge between a Value definition and its users.
Definition: Use.h:55
union llvm::X86AddressMode::@519 Base
static bool isGlobalStubReference(unsigned char TargetFlag)
isGlobalStubReference - Return true if the specified TargetFlag operand is a reference to a stub for ...
Definition: X86InstrInfo.h:74
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:196
0 1 0 1 True if ordered and less than or equal
Definition: InstrTypes.h:739
SimpleValueType SimpleTy
static Constant * getSExt(Constant *C, Type *Ty, bool OnlyIfReduced=false)
Definition: Constants.cpp:1682
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted...
unsigned getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
LocInfo getLocInfo() const
unsigned getSizeInBits() const
auto reverse(ContainerTy &&C, typename std::enable_if< has_rbegin< ContainerTy >::value >::type *=nullptr) -> decltype(make_range(C.rbegin(), C.rend()))
Definition: STLExtras.h:261
static bool isGlobalRelativeToPICBase(unsigned char TargetFlag)
isGlobalRelativeToPICBase - Return true if the specified global value reference is relative to a 32-b...
Definition: X86InstrInfo.h:91
static Constant * getZExt(Constant *C, Type *Ty, bool OnlyIfReduced=false)
Definition: Constants.cpp:1696
bool isNullValue() const
Return true if this is the value that would be returned by getNullValue.
Definition: Constants.cpp:85
This is a fast-path instruction selection class that generates poor code and doesn&#39;t support illegal ...
Definition: FastISel.h:66
A constant value that is initialized with an expression using other constant values.
Definition: Constants.h:888
unsigned getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:291
const MCInstrDesc & getDesc() const
Returns the target instruction descriptor of this MachineInstr.
Definition: MachineInstr.h:408
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:245
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:200
This instruction compares its operands according to the predicate given to the constructor.
const MachineInstrBuilder & addGlobalAddress(const GlobalValue *GV, int64_t Offset=0, unsigned TargetFlags=0) const
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:125
bool paramHasAttr(unsigned ArgNo, Attribute::AttrKind Kind) const
Return true if the call or the callee has the given attribute.
Definition: CallSite.h:385
An instruction for storing to memory.
Definition: Instructions.h:320
void setReg(Register Reg)
Change the register this operand corresponds to.
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out...
Definition: ISDOpcodes.h:1012
bool doesNoCfCheck() const
Determine if the call should not perform indirect branch tracking.
Definition: InstrTypes.h:1680
void addMemOperand(MachineFunction &MF, MachineMemOperand *MO)
Add a MachineMemOperand to the machine instruction.
This class represents a truncation of integer types.
MVT getVectorElementType() const
Value * getOperand(unsigned i) const
Definition: User.h:169
Class to represent pointers.
Definition: DerivedTypes.h:544
unsigned getByValSize() const
unsigned getKillRegState(bool B)
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Definition: MCRegister.h:19
Flag
These should be considered private to the implementation of the MCInstrDesc class.
Definition: MCInstrDesc.h:131
unsigned getCMovOpcode(unsigned RegBytes, bool HasMemoryOperand=false)
Return a cmov opcode for the given register size in bytes, and operand type.
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
Definition: ValueTypes.h:228
bool isFloatTy() const
Return true if this is &#39;float&#39;, a 32-bit IEEE fp type.
Definition: Type.h:146
X86_ThisCall - Similar to X86_StdCall.
Definition: CallingConv.h:110
an instruction for type-safe pointer arithmetic to access elements of arrays and structs ...
Definition: Instructions.h:875
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
Type * getReturnType() const
Returns the type of the ret val.
Definition: Function.h:168
std::pair< CondCode, bool > getX86ConditionCode(CmpInst::Predicate Predicate)
Return a pair of condition code for the given predicate and whether the instruction operands should b...
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:148
unsigned const MachineRegisterInfo * MRI
void cloneInstrSymbols(MachineFunction &MF, const MachineInstr &MI)
Clone another MachineInstr&#39;s pre- and post- instruction symbols and replace ours with it...
Fast - This calling convention attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:42
Machine Value Type.
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:45
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Conditional or Unconditional Branch instruction.
Value * getAddress() const
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
X86_FastCall - &#39;fast&#39; analog of X86_StdCall.
Definition: CallingConv.h:91
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This is an important base class in LLVM.
Definition: Constant.h:41
const MCAsmInfo * getMCAsmInfo() const
Return target specific asm information.
ConstantFP - Floating Point Values [float, double].
Definition: Constants.h:263
const MCPhysReg * ImplicitDefs
Definition: MCInstrDesc.h:188
DIExpression * getExpression() const
bool isValidLocationForIntrinsic(const DILocation *DL) const
Check that a location is valid for this variable.
op_iterator op_end()
Definition: User.h:231
static std::pair< unsigned, bool > getX86SSEConditionCode(CmpInst::Predicate Predicate)
const MachineInstrBuilder & addRegMask(const uint32_t *Mask) const
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:732
TRAP - Trapping instruction.
Definition: ISDOpcodes.h:798
amdgpu Simplify well known AMD library false FunctionCallee Value * Arg
0 1 1 1 True if ordered (no nans)
Definition: InstrTypes.h:741
Value * getPointerOperand()
Definition: Instructions.h:284
MachineInstr * foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI, ArrayRef< unsigned > Ops, MachineBasicBlock::iterator InsertPt, int FrameIndex, LiveIntervals *LIS=nullptr, VirtRegMap *VRM=nullptr) const override
foldMemoryOperand - If this target supports it, fold a load or store of the specified stack slot into...
const Triple & getTargetTriple() const
Definition: X86Subtarget.h:748
static void X86SelectAddress(const MachineInstr &I, const MachineRegisterInfo &MRI, X86AddressMode &AM)
const MachineInstrBuilder & addSym(MCSymbol *Sym, unsigned char TargetFlags=0) const
1 1 1 1 Always true (always folded)
Definition: InstrTypes.h:749
bool isTargetMCU() const
Definition: X86Subtarget.h:767
Extended Value Type.
Definition: ValueTypes.h:33
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
size_t size() const
Definition: SmallVector.h:52
bool hasFnAttr(Attribute::AttrKind Kind) const
Determine whether this call has the given attribute.
Definition: InstrTypes.h:1373
bool isVolatile() const
1 1 0 1 True if unordered, less than, or equal
Definition: InstrTypes.h:747
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:40
static const MachineInstrBuilder & addFrameReference(const MachineInstrBuilder &MIB, int FI, int Offset=0, bool mem=true)
addFrameReference - This function is used to add a reference to the base of an abstract object on the...
bool arg_empty() const
Definition: CallSite.h:225
The memory access writes data.
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
Definition: IntrinsicInst.h:50
void GetReturnInfo(CallingConv::ID CC, Type *ReturnType, AttributeList attr, SmallVectorImpl< ISD::OutputArg > &Outs, const TargetLowering &TLI, const DataLayout &DL)
Given an LLVM IR type and return type attributes, compute the return value EVTs and flags...
0 0 1 0 True if ordered and greater than
Definition: InstrTypes.h:736
Iterator for intrusive lists based on ilist_node.
unsigned getNumOperands() const
Definition: User.h:191
CCState - This class holds information needed while lowering arguments and return values...
This is the shared class of boolean and integer constants.
Definition: Constants.h:83
constexpr bool isInt< 32 >(int64_t x)
Definition: MathExtras.h:308
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type...
Definition: Type.cpp:129
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:212
MachineOperand class - Representation of each machine instruction operand.
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:837
1 1 0 0 True if unordered or less than
Definition: InstrTypes.h:746
Provides information about what library functions are available for the current target.
CCValAssign - Represent assignment of one arg/retval to a location.
void getFullAddress(SmallVectorImpl< MachineOperand > &MO)
This class wraps the llvm.memcpy intrinsic.
unsigned getNumDefs() const
Return the number of MachineOperands that are register definitions.
Definition: MCInstrDesc.h:240
Value * getRawSource() const
Return the arguments to the instruction.
bool RetCC_X86(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:940
amdgpu Simplify well known AMD library false FunctionCallee Callee
This file defines the FastISel class.
bool bitsLT(EVT VT) const
Return true if this has less bits than VT.
Definition: ValueTypes.h:240
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:492
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:495
The memory access reads data.
Representation of each machine instruction.
Definition: MachineInstr.h:64
MO_GOTOFF - On a symbol operand this indicates that the immediate is the offset to the location of th...
Definition: X86BaseInfo.h:139
static const MachineInstrBuilder & addDirectMem(const MachineInstrBuilder &MIB, unsigned Reg)
addDirectMem - This function is used to add a direct memory reference to the current instruction – t...
unsigned getNumArgOperands() const
Definition: InstrTypes.h:1239
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
Definition: Instruction.h:321
unsigned AllocateStack(unsigned Size, unsigned Alignment)
AllocateStack - Allocate a chunk of stack space with the specified size and alignment.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:411
unsigned getDestAddressSpace() const
uint64_t getElementOffset(unsigned Idx) const
Definition: DataLayout.h:581
unsigned getAlignment() const
Return the alignment of the access that is being performed.
Definition: Instructions.h:240
static IntegerType * getInt32Ty(LLVMContext &C)
Definition: Type.cpp:175
unsigned getLocMemOffset() const
Establish a view to a call site for examination.
Definition: CallSite.h:897
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation.
Definition: InstrTypes.h:1287
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:106
#define I(x, y, z)
Definition: MD5.cpp:58
void AnalyzeReturn(const SmallVectorImpl< ISD::OutputArg > &Outs, CCAssignFn Fn)
AnalyzeReturn - Analyze the returned values of a return, incorporating info about the result values i...
X86_StdCall - stdcall is the calling conventions mostly used by the Win32 API.
Definition: CallingConv.h:86
FunctionLoweringInfo - This contains information that is global to a function that is used when lower...
0 1 1 0 True if ordered and operands are unequal
Definition: InstrTypes.h:740
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:332
uint32_t Size
Definition: Profile.cpp:46
DILocalVariable * getVariable() const
const LLVMTargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
1 0 1 0 True if unordered or greater than
Definition: InstrTypes.h:744
constexpr bool isUInt< 16 >(uint64_t x)
Definition: MathExtras.h:345
static MachinePointerInfo getConstantPool(MachineFunction &MF)
Return a MachinePointerInfo record that refers to the constant pool.
unsigned getAlignment() const
Return the alignment of the access that is being performed.
Definition: Instructions.h:365
bool isReg() const
isReg - Tests if this is a MO_Register operand.
bool isRegLoc() const
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
bool hasSSE1() const
Definition: X86Subtarget.h:575
0 0 0 1 True if ordered and equal
Definition: InstrTypes.h:735
LLVM Value Representation.
Definition: Value.h:73
mop_iterator operands_begin()
Definition: MachineInstr.h:471
1 0 1 1 True if unordered, greater than, or equal
Definition: InstrTypes.h:745
static unsigned X86ChooseCmpImmediateOpcode(EVT VT, const ConstantInt *RHSC)
If we have a comparison with RHS as the RHS of the comparison, return an opcode that works for the co...
#define LLVM_FALLTHROUGH
LLVM_FALLTHROUGH - Mark fallthrough cases in switch statements.
Definition: Compiler.h:265
bool hasAVX512() const
Definition: X86Subtarget.h:583
Invoke instruction.
MO_PIC_BASE_OFFSET - On a symbol operand this indicates that the immediate should get the value of th...
Definition: X86BaseInfo.h:125
IRTranslator LLVM IR MI
static MachinePointerInfo getStack(MachineFunction &MF, int64_t Offset, uint8_t ID=0)
Stack pointer relative access.
unsigned getRegSizeInBits(const TargetRegisterClass &RC) const
Return the size in bits of a register from class RC.
Register getReg() const
getReg - Returns the register number.
Conversion operators.
Definition: ISDOpcodes.h:489
bool isStaticAlloca() const
Return true if this alloca is in the entry block of the function and is a constant size...
int64_t getSExtValue() const
Return the constant as a 64-bit integer value after it has been sign extended as appropriate for the ...
Definition: Constants.h:156
X86AddressMode - This struct holds a generalized full x86 address mode.
The C convention as implemented on Windows/x86-64 and AArch64.
Definition: CallingConv.h:153
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition: ValueTypes.h:125
0 0 1 1 True if ordered and greater than or equal
Definition: InstrTypes.h:737
This represents the llvm.dbg.declare instruction.
bool isDoubleTy() const
Return true if this is &#39;double&#39;, a 64-bit IEEE fp type.
Definition: Type.h:149
Value * getPointerOperand()
Definition: Instructions.h:412
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
Value * getRawDest() const
static const MachineInstrBuilder & addFullAddress(const MachineInstrBuilder &MIB, const X86AddressMode &AM)
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition: ISDOpcodes.h:399
FastISel * createFastISel(FunctionLoweringInfo &funcInfo, const TargetLibraryInfo *libInfo)
Register getStackRegister() const
bool hasSSE2() const
Definition: X86Subtarget.h:576
iterator_range< arg_iterator > args()
Definition: Function.h:719
0 0 0 0 Always false (always folded)
Definition: InstrTypes.h:734
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
bool isStructTy() const
True if this is an instance of StructType.
Definition: Type.h:217
MO_DLLIMPORT - On a symbol operand "FOO", this indicates that the reference is actually to the "__imp...
Definition: X86BaseInfo.h:237
static unsigned computeBytesPoppedByCalleeForSRet(const X86Subtarget *Subtarget, CallingConv::ID CC, ImmutableCallSite *CS)
bool isArrayTy() const
True if this is an instance of ArrayType.
Definition: Type.h:220
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:43
const BasicBlock * getParent() const
Definition: Instruction.h:66
static unsigned X86ChooseCmpOpcode(EVT VT, const X86Subtarget *Subtarget)
an instruction to allocate memory on the stack
Definition: Instructions.h:59
gep_type_iterator gep_type_begin(const User *GEP)