LLVM  9.0.0svn
X86FastISel.cpp
Go to the documentation of this file.
1 //===-- X86FastISel.cpp - X86 FastISel implementation ---------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines the X86-specific support for the FastISel class. Much
10 // of the target-specific code is generated by tablegen in the file
11 // X86GenFastISel.inc, which is #included here.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "X86.h"
16 #include "X86CallingConv.h"
17 #include "X86InstrBuilder.h"
18 #include "X86InstrInfo.h"
19 #include "X86MachineFunctionInfo.h"
20 #include "X86RegisterInfo.h"
21 #include "X86Subtarget.h"
22 #include "X86TargetMachine.h"
24 #include "llvm/CodeGen/FastISel.h"
29 #include "llvm/IR/CallSite.h"
30 #include "llvm/IR/CallingConv.h"
31 #include "llvm/IR/DebugInfo.h"
32 #include "llvm/IR/DerivedTypes.h"
34 #include "llvm/IR/GlobalAlias.h"
35 #include "llvm/IR/GlobalVariable.h"
36 #include "llvm/IR/Instructions.h"
37 #include "llvm/IR/IntrinsicInst.h"
38 #include "llvm/IR/Operator.h"
39 #include "llvm/MC/MCAsmInfo.h"
40 #include "llvm/MC/MCSymbol.h"
43 using namespace llvm;
44 
45 namespace {
46 
47 class X86FastISel final : public FastISel {
48  /// Subtarget - Keep a pointer to the X86Subtarget around so that we can
49  /// make the right decision when generating code for different targets.
50  const X86Subtarget *Subtarget;
51 
52  /// X86ScalarSSEf32, X86ScalarSSEf64 - Select between SSE or x87
53  /// floating point ops.
54  /// When SSE is available, use it for f32 operations.
55  /// When SSE2 is available, use it for f64 operations.
56  bool X86ScalarSSEf64;
57  bool X86ScalarSSEf32;
58 
59 public:
60  explicit X86FastISel(FunctionLoweringInfo &funcInfo,
61  const TargetLibraryInfo *libInfo)
62  : FastISel(funcInfo, libInfo) {
63  Subtarget = &funcInfo.MF->getSubtarget<X86Subtarget>();
64  X86ScalarSSEf64 = Subtarget->hasSSE2();
65  X86ScalarSSEf32 = Subtarget->hasSSE1();
66  }
67 
68  bool fastSelectInstruction(const Instruction *I) override;
69 
70  /// The specified machine instr operand is a vreg, and that
71  /// vreg is being provided by the specified load instruction. If possible,
72  /// try to fold the load as an operand to the instruction, returning true if
73  /// possible.
74  bool tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo,
75  const LoadInst *LI) override;
76 
77  bool fastLowerArguments() override;
78  bool fastLowerCall(CallLoweringInfo &CLI) override;
79  bool fastLowerIntrinsicCall(const IntrinsicInst *II) override;
80 
81 #include "X86GenFastISel.inc"
82 
83 private:
84  bool X86FastEmitCompare(const Value *LHS, const Value *RHS, EVT VT,
85  const DebugLoc &DL);
86 
87  bool X86FastEmitLoad(EVT VT, X86AddressMode &AM, MachineMemOperand *MMO,
88  unsigned &ResultReg, unsigned Alignment = 1);
89 
90  bool X86FastEmitStore(EVT VT, const Value *Val, X86AddressMode &AM,
91  MachineMemOperand *MMO = nullptr, bool Aligned = false);
92  bool X86FastEmitStore(EVT VT, unsigned ValReg, bool ValIsKill,
93  X86AddressMode &AM,
94  MachineMemOperand *MMO = nullptr, bool Aligned = false);
95 
96  bool X86FastEmitExtend(ISD::NodeType Opc, EVT DstVT, unsigned Src, EVT SrcVT,
97  unsigned &ResultReg);
98 
99  bool X86SelectAddress(const Value *V, X86AddressMode &AM);
100  bool X86SelectCallAddress(const Value *V, X86AddressMode &AM);
101 
102  bool X86SelectLoad(const Instruction *I);
103 
104  bool X86SelectStore(const Instruction *I);
105 
106  bool X86SelectRet(const Instruction *I);
107 
108  bool X86SelectCmp(const Instruction *I);
109 
110  bool X86SelectZExt(const Instruction *I);
111 
112  bool X86SelectSExt(const Instruction *I);
113 
114  bool X86SelectBranch(const Instruction *I);
115 
116  bool X86SelectShift(const Instruction *I);
117 
118  bool X86SelectDivRem(const Instruction *I);
119 
120  bool X86FastEmitCMoveSelect(MVT RetVT, const Instruction *I);
121 
122  bool X86FastEmitSSESelect(MVT RetVT, const Instruction *I);
123 
124  bool X86FastEmitPseudoSelect(MVT RetVT, const Instruction *I);
125 
126  bool X86SelectSelect(const Instruction *I);
127 
128  bool X86SelectTrunc(const Instruction *I);
129 
130  bool X86SelectFPExtOrFPTrunc(const Instruction *I, unsigned Opc,
131  const TargetRegisterClass *RC);
132 
133  bool X86SelectFPExt(const Instruction *I);
134  bool X86SelectFPTrunc(const Instruction *I);
135  bool X86SelectSIToFP(const Instruction *I);
136  bool X86SelectUIToFP(const Instruction *I);
137  bool X86SelectIntToFP(const Instruction *I, bool IsSigned);
138 
139  const X86InstrInfo *getInstrInfo() const {
140  return Subtarget->getInstrInfo();
141  }
142  const X86TargetMachine *getTargetMachine() const {
143  return static_cast<const X86TargetMachine *>(&TM);
144  }
145 
146  bool handleConstantAddresses(const Value *V, X86AddressMode &AM);
147 
148  unsigned X86MaterializeInt(const ConstantInt *CI, MVT VT);
149  unsigned X86MaterializeFP(const ConstantFP *CFP, MVT VT);
150  unsigned X86MaterializeGV(const GlobalValue *GV, MVT VT);
151  unsigned fastMaterializeConstant(const Constant *C) override;
152 
153  unsigned fastMaterializeAlloca(const AllocaInst *C) override;
154 
155  unsigned fastMaterializeFloatZero(const ConstantFP *CF) override;
156 
157  /// isScalarFPTypeInSSEReg - Return true if the specified scalar FP type is
158  /// computed in an SSE register, not on the X87 floating point stack.
159  bool isScalarFPTypeInSSEReg(EVT VT) const {
160  return (VT == MVT::f64 && X86ScalarSSEf64) || // f64 is when SSE2
161  (VT == MVT::f32 && X86ScalarSSEf32); // f32 is when SSE1
162  }
163 
164  bool isTypeLegal(Type *Ty, MVT &VT, bool AllowI1 = false);
165 
166  bool IsMemcpySmall(uint64_t Len);
167 
168  bool TryEmitSmallMemcpy(X86AddressMode DestAM,
169  X86AddressMode SrcAM, uint64_t Len);
170 
171  bool foldX86XALUIntrinsic(X86::CondCode &CC, const Instruction *I,
172  const Value *Cond);
173 
175  X86AddressMode &AM);
176 
177  unsigned fastEmitInst_rrrr(unsigned MachineInstOpcode,
178  const TargetRegisterClass *RC, unsigned Op0,
179  bool Op0IsKill, unsigned Op1, bool Op1IsKill,
180  unsigned Op2, bool Op2IsKill, unsigned Op3,
181  bool Op3IsKill);
182 };
183 
184 } // end anonymous namespace.
185 
186 static std::pair<unsigned, bool>
188  unsigned CC;
189  bool NeedSwap = false;
190 
191  // SSE Condition code mapping:
192  // 0 - EQ
193  // 1 - LT
194  // 2 - LE
195  // 3 - UNORD
196  // 4 - NEQ
197  // 5 - NLT
198  // 6 - NLE
199  // 7 - ORD
200  switch (Predicate) {
201  default: llvm_unreachable("Unexpected predicate");
202  case CmpInst::FCMP_OEQ: CC = 0; break;
203  case CmpInst::FCMP_OGT: NeedSwap = true; LLVM_FALLTHROUGH;
204  case CmpInst::FCMP_OLT: CC = 1; break;
205  case CmpInst::FCMP_OGE: NeedSwap = true; LLVM_FALLTHROUGH;
206  case CmpInst::FCMP_OLE: CC = 2; break;
207  case CmpInst::FCMP_UNO: CC = 3; break;
208  case CmpInst::FCMP_UNE: CC = 4; break;
209  case CmpInst::FCMP_ULE: NeedSwap = true; LLVM_FALLTHROUGH;
210  case CmpInst::FCMP_UGE: CC = 5; break;
211  case CmpInst::FCMP_ULT: NeedSwap = true; LLVM_FALLTHROUGH;
212  case CmpInst::FCMP_UGT: CC = 6; break;
213  case CmpInst::FCMP_ORD: CC = 7; break;
214  case CmpInst::FCMP_UEQ: CC = 8; break;
215  case CmpInst::FCMP_ONE: CC = 12; break;
216  }
217 
218  return std::make_pair(CC, NeedSwap);
219 }
220 
221 /// Adds a complex addressing mode to the given machine instr builder.
222 /// Note, this will constrain the index register. If its not possible to
223 /// constrain the given index register, then a new one will be created. The
224 /// IndexReg field of the addressing mode will be updated to match in this case.
225 const MachineInstrBuilder &
227  X86AddressMode &AM) {
228  // First constrain the index register. It needs to be a GR64_NOSP.
230  MIB->getNumOperands() +
232  return ::addFullAddress(MIB, AM);
233 }
234 
235 /// Check if it is possible to fold the condition from the XALU intrinsic
236 /// into the user. The condition code will only be updated on success.
237 bool X86FastISel::foldX86XALUIntrinsic(X86::CondCode &CC, const Instruction *I,
238  const Value *Cond) {
239  if (!isa<ExtractValueInst>(Cond))
240  return false;
241 
242  const auto *EV = cast<ExtractValueInst>(Cond);
243  if (!isa<IntrinsicInst>(EV->getAggregateOperand()))
244  return false;
245 
246  const auto *II = cast<IntrinsicInst>(EV->getAggregateOperand());
247  MVT RetVT;
248  const Function *Callee = II->getCalledFunction();
249  Type *RetTy =
250  cast<StructType>(Callee->getReturnType())->getTypeAtIndex(0U);
251  if (!isTypeLegal(RetTy, RetVT))
252  return false;
253 
254  if (RetVT != MVT::i32 && RetVT != MVT::i64)
255  return false;
256 
257  X86::CondCode TmpCC;
258  switch (II->getIntrinsicID()) {
259  default: return false;
260  case Intrinsic::sadd_with_overflow:
261  case Intrinsic::ssub_with_overflow:
262  case Intrinsic::smul_with_overflow:
263  case Intrinsic::umul_with_overflow: TmpCC = X86::COND_O; break;
264  case Intrinsic::uadd_with_overflow:
265  case Intrinsic::usub_with_overflow: TmpCC = X86::COND_B; break;
266  }
267 
268  // Check if both instructions are in the same basic block.
269  if (II->getParent() != I->getParent())
270  return false;
271 
272  // Make sure nothing is in the way
275  for (auto Itr = std::prev(Start); Itr != End; --Itr) {
276  // We only expect extractvalue instructions between the intrinsic and the
277  // instruction to be selected.
278  if (!isa<ExtractValueInst>(Itr))
279  return false;
280 
281  // Check that the extractvalue operand comes from the intrinsic.
282  const auto *EVI = cast<ExtractValueInst>(Itr);
283  if (EVI->getAggregateOperand() != II)
284  return false;
285  }
286 
287  CC = TmpCC;
288  return true;
289 }
290 
291 bool X86FastISel::isTypeLegal(Type *Ty, MVT &VT, bool AllowI1) {
292  EVT evt = TLI.getValueType(DL, Ty, /*HandleUnknown=*/true);
293  if (evt == MVT::Other || !evt.isSimple())
294  // Unhandled type. Halt "fast" selection and bail.
295  return false;
296 
297  VT = evt.getSimpleVT();
298  // For now, require SSE/SSE2 for performing floating-point operations,
299  // since x87 requires additional work.
300  if (VT == MVT::f64 && !X86ScalarSSEf64)
301  return false;
302  if (VT == MVT::f32 && !X86ScalarSSEf32)
303  return false;
304  // Similarly, no f80 support yet.
305  if (VT == MVT::f80)
306  return false;
307  // We only handle legal types. For example, on x86-32 the instruction
308  // selector contains all of the 64-bit instructions from x86-64,
309  // under the assumption that i64 won't be used if the target doesn't
310  // support it.
311  return (AllowI1 && VT == MVT::i1) || TLI.isTypeLegal(VT);
312 }
313 
314 /// X86FastEmitLoad - Emit a machine instruction to load a value of type VT.
315 /// The address is either pre-computed, i.e. Ptr, or a GlobalAddress, i.e. GV.
316 /// Return true and the result register by reference if it is possible.
317 bool X86FastISel::X86FastEmitLoad(EVT VT, X86AddressMode &AM,
318  MachineMemOperand *MMO, unsigned &ResultReg,
319  unsigned Alignment) {
320  bool HasSSE41 = Subtarget->hasSSE41();
321  bool HasAVX = Subtarget->hasAVX();
322  bool HasAVX2 = Subtarget->hasAVX2();
323  bool HasAVX512 = Subtarget->hasAVX512();
324  bool HasVLX = Subtarget->hasVLX();
325  bool IsNonTemporal = MMO && MMO->isNonTemporal();
326 
327  // Get opcode and regclass of the output for the given load instruction.
328  unsigned Opc = 0;
329  const TargetRegisterClass *RC = nullptr;
330  switch (VT.getSimpleVT().SimpleTy) {
331  default: return false;
332  case MVT::i1:
333  case MVT::i8:
334  Opc = X86::MOV8rm;
335  RC = &X86::GR8RegClass;
336  break;
337  case MVT::i16:
338  Opc = X86::MOV16rm;
339  RC = &X86::GR16RegClass;
340  break;
341  case MVT::i32:
342  Opc = X86::MOV32rm;
343  RC = &X86::GR32RegClass;
344  break;
345  case MVT::i64:
346  // Must be in x86-64 mode.
347  Opc = X86::MOV64rm;
348  RC = &X86::GR64RegClass;
349  break;
350  case MVT::f32:
351  if (X86ScalarSSEf32) {
352  Opc = HasAVX512 ? X86::VMOVSSZrm : HasAVX ? X86::VMOVSSrm : X86::MOVSSrm;
353  RC = HasAVX512 ? &X86::FR32XRegClass : &X86::FR32RegClass;
354  } else {
355  Opc = X86::LD_Fp32m;
356  RC = &X86::RFP32RegClass;
357  }
358  break;
359  case MVT::f64:
360  if (X86ScalarSSEf64) {
361  Opc = HasAVX512 ? X86::VMOVSDZrm : HasAVX ? X86::VMOVSDrm : X86::MOVSDrm;
362  RC = HasAVX512 ? &X86::FR64XRegClass : &X86::FR64RegClass;
363  } else {
364  Opc = X86::LD_Fp64m;
365  RC = &X86::RFP64RegClass;
366  }
367  break;
368  case MVT::f80:
369  // No f80 support yet.
370  return false;
371  case MVT::v4f32:
372  if (IsNonTemporal && Alignment >= 16 && HasSSE41)
373  Opc = HasVLX ? X86::VMOVNTDQAZ128rm :
374  HasAVX ? X86::VMOVNTDQArm : X86::MOVNTDQArm;
375  else if (Alignment >= 16)
376  Opc = HasVLX ? X86::VMOVAPSZ128rm :
377  HasAVX ? X86::VMOVAPSrm : X86::MOVAPSrm;
378  else
379  Opc = HasVLX ? X86::VMOVUPSZ128rm :
380  HasAVX ? X86::VMOVUPSrm : X86::MOVUPSrm;
381  RC = HasVLX ? &X86::VR128XRegClass : &X86::VR128RegClass;
382  break;
383  case MVT::v2f64:
384  if (IsNonTemporal && Alignment >= 16 && HasSSE41)
385  Opc = HasVLX ? X86::VMOVNTDQAZ128rm :
386  HasAVX ? X86::VMOVNTDQArm : X86::MOVNTDQArm;
387  else if (Alignment >= 16)
388  Opc = HasVLX ? X86::VMOVAPDZ128rm :
389  HasAVX ? X86::VMOVAPDrm : X86::MOVAPDrm;
390  else
391  Opc = HasVLX ? X86::VMOVUPDZ128rm :
392  HasAVX ? X86::VMOVUPDrm : X86::MOVUPDrm;
393  RC = HasVLX ? &X86::VR128XRegClass : &X86::VR128RegClass;
394  break;
395  case MVT::v4i32:
396  case MVT::v2i64:
397  case MVT::v8i16:
398  case MVT::v16i8:
399  if (IsNonTemporal && Alignment >= 16)
400  Opc = HasVLX ? X86::VMOVNTDQAZ128rm :
401  HasAVX ? X86::VMOVNTDQArm : X86::MOVNTDQArm;
402  else if (Alignment >= 16)
403  Opc = HasVLX ? X86::VMOVDQA64Z128rm :
404  HasAVX ? X86::VMOVDQArm : X86::MOVDQArm;
405  else
406  Opc = HasVLX ? X86::VMOVDQU64Z128rm :
407  HasAVX ? X86::VMOVDQUrm : X86::MOVDQUrm;
408  RC = HasVLX ? &X86::VR128XRegClass : &X86::VR128RegClass;
409  break;
410  case MVT::v8f32:
411  assert(HasAVX);
412  if (IsNonTemporal && Alignment >= 32 && HasAVX2)
413  Opc = HasVLX ? X86::VMOVNTDQAZ256rm : X86::VMOVNTDQAYrm;
414  else if (IsNonTemporal && Alignment >= 16)
415  return false; // Force split for X86::VMOVNTDQArm
416  else if (Alignment >= 32)
417  Opc = HasVLX ? X86::VMOVAPSZ256rm : X86::VMOVAPSYrm;
418  else
419  Opc = HasVLX ? X86::VMOVUPSZ256rm : X86::VMOVUPSYrm;
420  RC = HasVLX ? &X86::VR256XRegClass : &X86::VR256RegClass;
421  break;
422  case MVT::v4f64:
423  assert(HasAVX);
424  if (IsNonTemporal && Alignment >= 32 && HasAVX2)
425  Opc = HasVLX ? X86::VMOVNTDQAZ256rm : X86::VMOVNTDQAYrm;
426  else if (IsNonTemporal && Alignment >= 16)
427  return false; // Force split for X86::VMOVNTDQArm
428  else if (Alignment >= 32)
429  Opc = HasVLX ? X86::VMOVAPDZ256rm : X86::VMOVAPDYrm;
430  else
431  Opc = HasVLX ? X86::VMOVUPDZ256rm : X86::VMOVUPDYrm;
432  RC = HasVLX ? &X86::VR256XRegClass : &X86::VR256RegClass;
433  break;
434  case MVT::v8i32:
435  case MVT::v4i64:
436  case MVT::v16i16:
437  case MVT::v32i8:
438  assert(HasAVX);
439  if (IsNonTemporal && Alignment >= 32 && HasAVX2)
440  Opc = HasVLX ? X86::VMOVNTDQAZ256rm : X86::VMOVNTDQAYrm;
441  else if (IsNonTemporal && Alignment >= 16)
442  return false; // Force split for X86::VMOVNTDQArm
443  else if (Alignment >= 32)
444  Opc = HasVLX ? X86::VMOVDQA64Z256rm : X86::VMOVDQAYrm;
445  else
446  Opc = HasVLX ? X86::VMOVDQU64Z256rm : X86::VMOVDQUYrm;
447  RC = HasVLX ? &X86::VR256XRegClass : &X86::VR256RegClass;
448  break;
449  case MVT::v16f32:
450  assert(HasAVX512);
451  if (IsNonTemporal && Alignment >= 64)
452  Opc = X86::VMOVNTDQAZrm;
453  else
454  Opc = (Alignment >= 64) ? X86::VMOVAPSZrm : X86::VMOVUPSZrm;
455  RC = &X86::VR512RegClass;
456  break;
457  case MVT::v8f64:
458  assert(HasAVX512);
459  if (IsNonTemporal && Alignment >= 64)
460  Opc = X86::VMOVNTDQAZrm;
461  else
462  Opc = (Alignment >= 64) ? X86::VMOVAPDZrm : X86::VMOVUPDZrm;
463  RC = &X86::VR512RegClass;
464  break;
465  case MVT::v8i64:
466  case MVT::v16i32:
467  case MVT::v32i16:
468  case MVT::v64i8:
469  assert(HasAVX512);
470  // Note: There are a lot more choices based on type with AVX-512, but
471  // there's really no advantage when the load isn't masked.
472  if (IsNonTemporal && Alignment >= 64)
473  Opc = X86::VMOVNTDQAZrm;
474  else
475  Opc = (Alignment >= 64) ? X86::VMOVDQA64Zrm : X86::VMOVDQU64Zrm;
476  RC = &X86::VR512RegClass;
477  break;
478  }
479 
480  ResultReg = createResultReg(RC);
481  MachineInstrBuilder MIB =
482  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg);
483  addFullAddress(MIB, AM);
484  if (MMO)
485  MIB->addMemOperand(*FuncInfo.MF, MMO);
486  return true;
487 }
488 
489 /// X86FastEmitStore - Emit a machine instruction to store a value Val of
490 /// type VT. The address is either pre-computed, consisted of a base ptr, Ptr
491 /// and a displacement offset, or a GlobalAddress,
492 /// i.e. V. Return true if it is possible.
493 bool X86FastISel::X86FastEmitStore(EVT VT, unsigned ValReg, bool ValIsKill,
494  X86AddressMode &AM,
495  MachineMemOperand *MMO, bool Aligned) {
496  bool HasSSE1 = Subtarget->hasSSE1();
497  bool HasSSE2 = Subtarget->hasSSE2();
498  bool HasSSE4A = Subtarget->hasSSE4A();
499  bool HasAVX = Subtarget->hasAVX();
500  bool HasAVX512 = Subtarget->hasAVX512();
501  bool HasVLX = Subtarget->hasVLX();
502  bool IsNonTemporal = MMO && MMO->isNonTemporal();
503 
504  // Get opcode and regclass of the output for the given store instruction.
505  unsigned Opc = 0;
506  switch (VT.getSimpleVT().SimpleTy) {
507  case MVT::f80: // No f80 support yet.
508  default: return false;
509  case MVT::i1: {
510  // Mask out all but lowest bit.
511  unsigned AndResult = createResultReg(&X86::GR8RegClass);
512  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
513  TII.get(X86::AND8ri), AndResult)
514  .addReg(ValReg, getKillRegState(ValIsKill)).addImm(1);
515  ValReg = AndResult;
516  LLVM_FALLTHROUGH; // handle i1 as i8.
517  }
518  case MVT::i8: Opc = X86::MOV8mr; break;
519  case MVT::i16: Opc = X86::MOV16mr; break;
520  case MVT::i32:
521  Opc = (IsNonTemporal && HasSSE2) ? X86::MOVNTImr : X86::MOV32mr;
522  break;
523  case MVT::i64:
524  // Must be in x86-64 mode.
525  Opc = (IsNonTemporal && HasSSE2) ? X86::MOVNTI_64mr : X86::MOV64mr;
526  break;
527  case MVT::f32:
528  if (X86ScalarSSEf32) {
529  if (IsNonTemporal && HasSSE4A)
530  Opc = X86::MOVNTSS;
531  else
532  Opc = HasAVX512 ? X86::VMOVSSZmr :
533  HasAVX ? X86::VMOVSSmr : X86::MOVSSmr;
534  } else
535  Opc = X86::ST_Fp32m;
536  break;
537  case MVT::f64:
538  if (X86ScalarSSEf32) {
539  if (IsNonTemporal && HasSSE4A)
540  Opc = X86::MOVNTSD;
541  else
542  Opc = HasAVX512 ? X86::VMOVSDZmr :
543  HasAVX ? X86::VMOVSDmr : X86::MOVSDmr;
544  } else
545  Opc = X86::ST_Fp64m;
546  break;
547  case MVT::x86mmx:
548  Opc = (IsNonTemporal && HasSSE1) ? X86::MMX_MOVNTQmr : X86::MMX_MOVQ64mr;
549  break;
550  case MVT::v4f32:
551  if (Aligned) {
552  if (IsNonTemporal)
553  Opc = HasVLX ? X86::VMOVNTPSZ128mr :
554  HasAVX ? X86::VMOVNTPSmr : X86::MOVNTPSmr;
555  else
556  Opc = HasVLX ? X86::VMOVAPSZ128mr :
557  HasAVX ? X86::VMOVAPSmr : X86::MOVAPSmr;
558  } else
559  Opc = HasVLX ? X86::VMOVUPSZ128mr :
560  HasAVX ? X86::VMOVUPSmr : X86::MOVUPSmr;
561  break;
562  case MVT::v2f64:
563  if (Aligned) {
564  if (IsNonTemporal)
565  Opc = HasVLX ? X86::VMOVNTPDZ128mr :
566  HasAVX ? X86::VMOVNTPDmr : X86::MOVNTPDmr;
567  else
568  Opc = HasVLX ? X86::VMOVAPDZ128mr :
569  HasAVX ? X86::VMOVAPDmr : X86::MOVAPDmr;
570  } else
571  Opc = HasVLX ? X86::VMOVUPDZ128mr :
572  HasAVX ? X86::VMOVUPDmr : X86::MOVUPDmr;
573  break;
574  case MVT::v4i32:
575  case MVT::v2i64:
576  case MVT::v8i16:
577  case MVT::v16i8:
578  if (Aligned) {
579  if (IsNonTemporal)
580  Opc = HasVLX ? X86::VMOVNTDQZ128mr :
581  HasAVX ? X86::VMOVNTDQmr : X86::MOVNTDQmr;
582  else
583  Opc = HasVLX ? X86::VMOVDQA64Z128mr :
584  HasAVX ? X86::VMOVDQAmr : X86::MOVDQAmr;
585  } else
586  Opc = HasVLX ? X86::VMOVDQU64Z128mr :
587  HasAVX ? X86::VMOVDQUmr : X86::MOVDQUmr;
588  break;
589  case MVT::v8f32:
590  assert(HasAVX);
591  if (Aligned) {
592  if (IsNonTemporal)
593  Opc = HasVLX ? X86::VMOVNTPSZ256mr : X86::VMOVNTPSYmr;
594  else
595  Opc = HasVLX ? X86::VMOVAPSZ256mr : X86::VMOVAPSYmr;
596  } else
597  Opc = HasVLX ? X86::VMOVUPSZ256mr : X86::VMOVUPSYmr;
598  break;
599  case MVT::v4f64:
600  assert(HasAVX);
601  if (Aligned) {
602  if (IsNonTemporal)
603  Opc = HasVLX ? X86::VMOVNTPDZ256mr : X86::VMOVNTPDYmr;
604  else
605  Opc = HasVLX ? X86::VMOVAPDZ256mr : X86::VMOVAPDYmr;
606  } else
607  Opc = HasVLX ? X86::VMOVUPDZ256mr : X86::VMOVUPDYmr;
608  break;
609  case MVT::v8i32:
610  case MVT::v4i64:
611  case MVT::v16i16:
612  case MVT::v32i8:
613  assert(HasAVX);
614  if (Aligned) {
615  if (IsNonTemporal)
616  Opc = HasVLX ? X86::VMOVNTDQZ256mr : X86::VMOVNTDQYmr;
617  else
618  Opc = HasVLX ? X86::VMOVDQA64Z256mr : X86::VMOVDQAYmr;
619  } else
620  Opc = HasVLX ? X86::VMOVDQU64Z256mr : X86::VMOVDQUYmr;
621  break;
622  case MVT::v16f32:
623  assert(HasAVX512);
624  if (Aligned)
625  Opc = IsNonTemporal ? X86::VMOVNTPSZmr : X86::VMOVAPSZmr;
626  else
627  Opc = X86::VMOVUPSZmr;
628  break;
629  case MVT::v8f64:
630  assert(HasAVX512);
631  if (Aligned) {
632  Opc = IsNonTemporal ? X86::VMOVNTPDZmr : X86::VMOVAPDZmr;
633  } else
634  Opc = X86::VMOVUPDZmr;
635  break;
636  case MVT::v8i64:
637  case MVT::v16i32:
638  case MVT::v32i16:
639  case MVT::v64i8:
640  assert(HasAVX512);
641  // Note: There are a lot more choices based on type with AVX-512, but
642  // there's really no advantage when the store isn't masked.
643  if (Aligned)
644  Opc = IsNonTemporal ? X86::VMOVNTDQZmr : X86::VMOVDQA64Zmr;
645  else
646  Opc = X86::VMOVDQU64Zmr;
647  break;
648  }
649 
650  const MCInstrDesc &Desc = TII.get(Opc);
651  // Some of the instructions in the previous switch use FR128 instead
652  // of FR32 for ValReg. Make sure the register we feed the instruction
653  // matches its register class constraints.
654  // Note: This is fine to do a copy from FR32 to FR128, this is the
655  // same registers behind the scene and actually why it did not trigger
656  // any bugs before.
657  ValReg = constrainOperandRegClass(Desc, ValReg, Desc.getNumOperands() - 1);
658  MachineInstrBuilder MIB =
659  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, Desc);
660  addFullAddress(MIB, AM).addReg(ValReg, getKillRegState(ValIsKill));
661  if (MMO)
662  MIB->addMemOperand(*FuncInfo.MF, MMO);
663 
664  return true;
665 }
666 
667 bool X86FastISel::X86FastEmitStore(EVT VT, const Value *Val,
668  X86AddressMode &AM,
669  MachineMemOperand *MMO, bool Aligned) {
670  // Handle 'null' like i32/i64 0.
671  if (isa<ConstantPointerNull>(Val))
672  Val = Constant::getNullValue(DL.getIntPtrType(Val->getContext()));
673 
674  // If this is a store of a simple constant, fold the constant into the store.
675  if (const ConstantInt *CI = dyn_cast<ConstantInt>(Val)) {
676  unsigned Opc = 0;
677  bool Signed = true;
678  switch (VT.getSimpleVT().SimpleTy) {
679  default: break;
680  case MVT::i1:
681  Signed = false;
682  LLVM_FALLTHROUGH; // Handle as i8.
683  case MVT::i8: Opc = X86::MOV8mi; break;
684  case MVT::i16: Opc = X86::MOV16mi; break;
685  case MVT::i32: Opc = X86::MOV32mi; break;
686  case MVT::i64:
687  // Must be a 32-bit sign extended value.
688  if (isInt<32>(CI->getSExtValue()))
689  Opc = X86::MOV64mi32;
690  break;
691  }
692 
693  if (Opc) {
694  MachineInstrBuilder MIB =
695  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc));
696  addFullAddress(MIB, AM).addImm(Signed ? (uint64_t) CI->getSExtValue()
697  : CI->getZExtValue());
698  if (MMO)
699  MIB->addMemOperand(*FuncInfo.MF, MMO);
700  return true;
701  }
702  }
703 
704  unsigned ValReg = getRegForValue(Val);
705  if (ValReg == 0)
706  return false;
707 
708  bool ValKill = hasTrivialKill(Val);
709  return X86FastEmitStore(VT, ValReg, ValKill, AM, MMO, Aligned);
710 }
711 
712 /// X86FastEmitExtend - Emit a machine instruction to extend a value Src of
713 /// type SrcVT to type DstVT using the specified extension opcode Opc (e.g.
714 /// ISD::SIGN_EXTEND).
715 bool X86FastISel::X86FastEmitExtend(ISD::NodeType Opc, EVT DstVT,
716  unsigned Src, EVT SrcVT,
717  unsigned &ResultReg) {
718  unsigned RR = fastEmit_r(SrcVT.getSimpleVT(), DstVT.getSimpleVT(), Opc,
719  Src, /*TODO: Kill=*/false);
720  if (RR == 0)
721  return false;
722 
723  ResultReg = RR;
724  return true;
725 }
726 
727 bool X86FastISel::handleConstantAddresses(const Value *V, X86AddressMode &AM) {
728  // Handle constant address.
729  if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
730  // Can't handle alternate code models yet.
731  if (TM.getCodeModel() != CodeModel::Small)
732  return false;
733 
734  // Can't handle TLS yet.
735  if (GV->isThreadLocal())
736  return false;
737 
738  // Can't handle !absolute_symbol references yet.
739  if (GV->isAbsoluteSymbolRef())
740  return false;
741 
742  // RIP-relative addresses can't have additional register operands, so if
743  // we've already folded stuff into the addressing mode, just force the
744  // global value into its own register, which we can use as the basereg.
745  if (!Subtarget->isPICStyleRIPRel() ||
746  (AM.Base.Reg == 0 && AM.IndexReg == 0)) {
747  // Okay, we've committed to selecting this global. Set up the address.
748  AM.GV = GV;
749 
750  // Allow the subtarget to classify the global.
751  unsigned char GVFlags = Subtarget->classifyGlobalReference(GV);
752 
753  // If this reference is relative to the pic base, set it now.
754  if (isGlobalRelativeToPICBase(GVFlags)) {
755  // FIXME: How do we know Base.Reg is free??
756  AM.Base.Reg = getInstrInfo()->getGlobalBaseReg(FuncInfo.MF);
757  }
758 
759  // Unless the ABI requires an extra load, return a direct reference to
760  // the global.
761  if (!isGlobalStubReference(GVFlags)) {
762  if (Subtarget->isPICStyleRIPRel()) {
763  // Use rip-relative addressing if we can. Above we verified that the
764  // base and index registers are unused.
765  assert(AM.Base.Reg == 0 && AM.IndexReg == 0);
766  AM.Base.Reg = X86::RIP;
767  }
768  AM.GVOpFlags = GVFlags;
769  return true;
770  }
771 
772  // Ok, we need to do a load from a stub. If we've already loaded from
773  // this stub, reuse the loaded pointer, otherwise emit the load now.
774  DenseMap<const Value *, unsigned>::iterator I = LocalValueMap.find(V);
775  unsigned LoadReg;
776  if (I != LocalValueMap.end() && I->second != 0) {
777  LoadReg = I->second;
778  } else {
779  // Issue load from stub.
780  unsigned Opc = 0;
781  const TargetRegisterClass *RC = nullptr;
782  X86AddressMode StubAM;
783  StubAM.Base.Reg = AM.Base.Reg;
784  StubAM.GV = GV;
785  StubAM.GVOpFlags = GVFlags;
786 
787  // Prepare for inserting code in the local-value area.
788  SavePoint SaveInsertPt = enterLocalValueArea();
789 
790  if (TLI.getPointerTy(DL) == MVT::i64) {
791  Opc = X86::MOV64rm;
792  RC = &X86::GR64RegClass;
793 
794  if (Subtarget->isPICStyleRIPRel())
795  StubAM.Base.Reg = X86::RIP;
796  } else {
797  Opc = X86::MOV32rm;
798  RC = &X86::GR32RegClass;
799  }
800 
801  LoadReg = createResultReg(RC);
802  MachineInstrBuilder LoadMI =
803  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), LoadReg);
804  addFullAddress(LoadMI, StubAM);
805 
806  // Ok, back to normal mode.
807  leaveLocalValueArea(SaveInsertPt);
808 
809  // Prevent loading GV stub multiple times in same MBB.
810  LocalValueMap[V] = LoadReg;
811  }
812 
813  // Now construct the final address. Note that the Disp, Scale,
814  // and Index values may already be set here.
815  AM.Base.Reg = LoadReg;
816  AM.GV = nullptr;
817  return true;
818  }
819  }
820 
821  // If all else fails, try to materialize the value in a register.
822  if (!AM.GV || !Subtarget->isPICStyleRIPRel()) {
823  if (AM.Base.Reg == 0) {
824  AM.Base.Reg = getRegForValue(V);
825  return AM.Base.Reg != 0;
826  }
827  if (AM.IndexReg == 0) {
828  assert(AM.Scale == 1 && "Scale with no index!");
829  AM.IndexReg = getRegForValue(V);
830  return AM.IndexReg != 0;
831  }
832  }
833 
834  return false;
835 }
836 
837 /// X86SelectAddress - Attempt to fill in an address from the given value.
838 ///
841 redo_gep:
842  const User *U = nullptr;
843  unsigned Opcode = Instruction::UserOp1;
844  if (const Instruction *I = dyn_cast<Instruction>(V)) {
845  // Don't walk into other basic blocks; it's possible we haven't
846  // visited them yet, so the instructions may not yet be assigned
847  // virtual registers.
848  if (FuncInfo.StaticAllocaMap.count(static_cast<const AllocaInst *>(V)) ||
849  FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
850  Opcode = I->getOpcode();
851  U = I;
852  }
853  } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(V)) {
854  Opcode = C->getOpcode();
855  U = C;
856  }
857 
858  if (PointerType *Ty = dyn_cast<PointerType>(V->getType()))
859  if (Ty->getAddressSpace() > 255)
860  // Fast instruction selection doesn't support the special
861  // address spaces.
862  return false;
863 
864  switch (Opcode) {
865  default: break;
866  case Instruction::BitCast:
867  // Look past bitcasts.
868  return X86SelectAddress(U->getOperand(0), AM);
869 
870  case Instruction::IntToPtr:
871  // Look past no-op inttoptrs.
872  if (TLI.getValueType(DL, U->getOperand(0)->getType()) ==
873  TLI.getPointerTy(DL))
874  return X86SelectAddress(U->getOperand(0), AM);
875  break;
876 
877  case Instruction::PtrToInt:
878  // Look past no-op ptrtoints.
879  if (TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL))
880  return X86SelectAddress(U->getOperand(0), AM);
881  break;
882 
883  case Instruction::Alloca: {
884  // Do static allocas.
885  const AllocaInst *A = cast<AllocaInst>(V);
887  FuncInfo.StaticAllocaMap.find(A);
888  if (SI != FuncInfo.StaticAllocaMap.end()) {
890  AM.Base.FrameIndex = SI->second;
891  return true;
892  }
893  break;
894  }
895 
896  case Instruction::Add: {
897  // Adds of constants are common and easy enough.
898  if (const ConstantInt *CI = dyn_cast<ConstantInt>(U->getOperand(1))) {
899  uint64_t Disp = (int32_t)AM.Disp + (uint64_t)CI->getSExtValue();
900  // They have to fit in the 32-bit signed displacement field though.
901  if (isInt<32>(Disp)) {
902  AM.Disp = (uint32_t)Disp;
903  return X86SelectAddress(U->getOperand(0), AM);
904  }
905  }
906  break;
907  }
908 
909  case Instruction::GetElementPtr: {
910  X86AddressMode SavedAM = AM;
911 
912  // Pattern-match simple GEPs.
913  uint64_t Disp = (int32_t)AM.Disp;
914  unsigned IndexReg = AM.IndexReg;
915  unsigned Scale = AM.Scale;
917  // Iterate through the indices, folding what we can. Constants can be
918  // folded, and one dynamic index can be handled, if the scale is supported.
919  for (User::const_op_iterator i = U->op_begin() + 1, e = U->op_end();
920  i != e; ++i, ++GTI) {
921  const Value *Op = *i;
922  if (StructType *STy = GTI.getStructTypeOrNull()) {
923  const StructLayout *SL = DL.getStructLayout(STy);
924  Disp += SL->getElementOffset(cast<ConstantInt>(Op)->getZExtValue());
925  continue;
926  }
927 
928  // A array/variable index is always of the form i*S where S is the
929  // constant scale size. See if we can push the scale into immediates.
930  uint64_t S = DL.getTypeAllocSize(GTI.getIndexedType());
931  for (;;) {
932  if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
933  // Constant-offset addressing.
934  Disp += CI->getSExtValue() * S;
935  break;
936  }
937  if (canFoldAddIntoGEP(U, Op)) {
938  // A compatible add with a constant operand. Fold the constant.
939  ConstantInt *CI =
940  cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1));
941  Disp += CI->getSExtValue() * S;
942  // Iterate on the other operand.
943  Op = cast<AddOperator>(Op)->getOperand(0);
944  continue;
945  }
946  if (IndexReg == 0 &&
947  (!AM.GV || !Subtarget->isPICStyleRIPRel()) &&
948  (S == 1 || S == 2 || S == 4 || S == 8)) {
949  // Scaled-index addressing.
950  Scale = S;
951  IndexReg = getRegForGEPIndex(Op).first;
952  if (IndexReg == 0)
953  return false;
954  break;
955  }
956  // Unsupported.
957  goto unsupported_gep;
958  }
959  }
960 
961  // Check for displacement overflow.
962  if (!isInt<32>(Disp))
963  break;
964 
965  AM.IndexReg = IndexReg;
966  AM.Scale = Scale;
967  AM.Disp = (uint32_t)Disp;
968  GEPs.push_back(V);
969 
970  if (const GetElementPtrInst *GEP =
971  dyn_cast<GetElementPtrInst>(U->getOperand(0))) {
972  // Ok, the GEP indices were covered by constant-offset and scaled-index
973  // addressing. Update the address state and move on to examining the base.
974  V = GEP;
975  goto redo_gep;
976  } else if (X86SelectAddress(U->getOperand(0), AM)) {
977  return true;
978  }
979 
980  // If we couldn't merge the gep value into this addr mode, revert back to
981  // our address and just match the value instead of completely failing.
982  AM = SavedAM;
983 
984  for (const Value *I : reverse(GEPs))
985  if (handleConstantAddresses(I, AM))
986  return true;
987 
988  return false;
989  unsupported_gep:
990  // Ok, the GEP indices weren't all covered.
991  break;
992  }
993  }
994 
995  return handleConstantAddresses(V, AM);
996 }
997 
998 /// X86SelectCallAddress - Attempt to fill in an address from the given value.
999 ///
1000 bool X86FastISel::X86SelectCallAddress(const Value *V, X86AddressMode &AM) {
1001  const User *U = nullptr;
1002  unsigned Opcode = Instruction::UserOp1;
1003  const Instruction *I = dyn_cast<Instruction>(V);
1004  // Record if the value is defined in the same basic block.
1005  //
1006  // This information is crucial to know whether or not folding an
1007  // operand is valid.
1008  // Indeed, FastISel generates or reuses a virtual register for all
1009  // operands of all instructions it selects. Obviously, the definition and
1010  // its uses must use the same virtual register otherwise the produced
1011  // code is incorrect.
1012  // Before instruction selection, FunctionLoweringInfo::set sets the virtual
1013  // registers for values that are alive across basic blocks. This ensures
1014  // that the values are consistently set between across basic block, even
1015  // if different instruction selection mechanisms are used (e.g., a mix of
1016  // SDISel and FastISel).
1017  // For values local to a basic block, the instruction selection process
1018  // generates these virtual registers with whatever method is appropriate
1019  // for its needs. In particular, FastISel and SDISel do not share the way
1020  // local virtual registers are set.
1021  // Therefore, this is impossible (or at least unsafe) to share values
1022  // between basic blocks unless they use the same instruction selection
1023  // method, which is not guarantee for X86.
1024  // Moreover, things like hasOneUse could not be used accurately, if we
1025  // allow to reference values across basic blocks whereas they are not
1026  // alive across basic blocks initially.
1027  bool InMBB = true;
1028  if (I) {
1029  Opcode = I->getOpcode();
1030  U = I;
1031  InMBB = I->getParent() == FuncInfo.MBB->getBasicBlock();
1032  } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(V)) {
1033  Opcode = C->getOpcode();
1034  U = C;
1035  }
1036 
1037  switch (Opcode) {
1038  default: break;
1039  case Instruction::BitCast:
1040  // Look past bitcasts if its operand is in the same BB.
1041  if (InMBB)
1042  return X86SelectCallAddress(U->getOperand(0), AM);
1043  break;
1044 
1045  case Instruction::IntToPtr:
1046  // Look past no-op inttoptrs if its operand is in the same BB.
1047  if (InMBB &&
1048  TLI.getValueType(DL, U->getOperand(0)->getType()) ==
1049  TLI.getPointerTy(DL))
1050  return X86SelectCallAddress(U->getOperand(0), AM);
1051  break;
1052 
1053  case Instruction::PtrToInt:
1054  // Look past no-op ptrtoints if its operand is in the same BB.
1055  if (InMBB && TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL))
1056  return X86SelectCallAddress(U->getOperand(0), AM);
1057  break;
1058  }
1059 
1060  // Handle constant address.
1061  if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
1062  // Can't handle alternate code models yet.
1063  if (TM.getCodeModel() != CodeModel::Small)
1064  return false;
1065 
1066  // RIP-relative addresses can't have additional register operands.
1067  if (Subtarget->isPICStyleRIPRel() &&
1068  (AM.Base.Reg != 0 || AM.IndexReg != 0))
1069  return false;
1070 
1071  // Can't handle TLS.
1072  if (const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV))
1073  if (GVar->isThreadLocal())
1074  return false;
1075 
1076  // Okay, we've committed to selecting this global. Set up the basic address.
1077  AM.GV = GV;
1078 
1079  // Return a direct reference to the global. Fastisel can handle calls to
1080  // functions that require loads, such as dllimport and nonlazybind
1081  // functions.
1082  if (Subtarget->isPICStyleRIPRel()) {
1083  // Use rip-relative addressing if we can. Above we verified that the
1084  // base and index registers are unused.
1085  assert(AM.Base.Reg == 0 && AM.IndexReg == 0);
1086  AM.Base.Reg = X86::RIP;
1087  } else {
1088  AM.GVOpFlags = Subtarget->classifyLocalReference(nullptr);
1089  }
1090 
1091  return true;
1092  }
1093 
1094  // If all else fails, try to materialize the value in a register.
1095  if (!AM.GV || !Subtarget->isPICStyleRIPRel()) {
1096  if (AM.Base.Reg == 0) {
1097  AM.Base.Reg = getRegForValue(V);
1098  return AM.Base.Reg != 0;
1099  }
1100  if (AM.IndexReg == 0) {
1101  assert(AM.Scale == 1 && "Scale with no index!");
1102  AM.IndexReg = getRegForValue(V);
1103  return AM.IndexReg != 0;
1104  }
1105  }
1106 
1107  return false;
1108 }
1109 
1110 
1111 /// X86SelectStore - Select and emit code to implement store instructions.
1112 bool X86FastISel::X86SelectStore(const Instruction *I) {
1113  // Atomic stores need special handling.
1114  const StoreInst *S = cast<StoreInst>(I);
1115 
1116  if (S->isAtomic())
1117  return false;
1118 
1119  const Value *PtrV = I->getOperand(1);
1120  if (TLI.supportSwiftError()) {
1121  // Swifterror values can come from either a function parameter with
1122  // swifterror attribute or an alloca with swifterror attribute.
1123  if (const Argument *Arg = dyn_cast<Argument>(PtrV)) {
1124  if (Arg->hasSwiftErrorAttr())
1125  return false;
1126  }
1127 
1128  if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(PtrV)) {
1129  if (Alloca->isSwiftError())
1130  return false;
1131  }
1132  }
1133 
1134  const Value *Val = S->getValueOperand();
1135  const Value *Ptr = S->getPointerOperand();
1136 
1137  MVT VT;
1138  if (!isTypeLegal(Val->getType(), VT, /*AllowI1=*/true))
1139  return false;
1140 
1141  unsigned Alignment = S->getAlignment();
1142  unsigned ABIAlignment = DL.getABITypeAlignment(Val->getType());
1143  if (Alignment == 0) // Ensure that codegen never sees alignment 0
1144  Alignment = ABIAlignment;
1145  bool Aligned = Alignment >= ABIAlignment;
1146 
1147  X86AddressMode AM;
1148  if (!X86SelectAddress(Ptr, AM))
1149  return false;
1150 
1151  return X86FastEmitStore(VT, Val, AM, createMachineMemOperandFor(I), Aligned);
1152 }
1153 
1154 /// X86SelectRet - Select and emit code to implement ret instructions.
1155 bool X86FastISel::X86SelectRet(const Instruction *I) {
1156  const ReturnInst *Ret = cast<ReturnInst>(I);
1157  const Function &F = *I->getParent()->getParent();
1158  const X86MachineFunctionInfo *X86MFInfo =
1159  FuncInfo.MF->getInfo<X86MachineFunctionInfo>();
1160 
1161  if (!FuncInfo.CanLowerReturn)
1162  return false;
1163 
1164  if (TLI.supportSwiftError() &&
1165  F.getAttributes().hasAttrSomewhere(Attribute::SwiftError))
1166  return false;
1167 
1168  if (TLI.supportSplitCSR(FuncInfo.MF))
1169  return false;
1170 
1171  CallingConv::ID CC = F.getCallingConv();
1172  if (CC != CallingConv::C &&
1173  CC != CallingConv::Fast &&
1174  CC != CallingConv::X86_FastCall &&
1175  CC != CallingConv::X86_StdCall &&
1176  CC != CallingConv::X86_ThisCall &&
1177  CC != CallingConv::X86_64_SysV &&
1178  CC != CallingConv::Win64)
1179  return false;
1180 
1181  // Don't handle popping bytes if they don't fit the ret's immediate.
1182  if (!isUInt<16>(X86MFInfo->getBytesToPopOnReturn()))
1183  return false;
1184 
1185  // fastcc with -tailcallopt is intended to provide a guaranteed
1186  // tail call optimization. Fastisel doesn't know how to do that.
1187  if (CC == CallingConv::Fast && TM.Options.GuaranteedTailCallOpt)
1188  return false;
1189 
1190  // Let SDISel handle vararg functions.
1191  if (F.isVarArg())
1192  return false;
1193 
1194  // Build a list of return value registers.
1195  SmallVector<unsigned, 4> RetRegs;
1196 
1197  if (Ret->getNumOperands() > 0) {
1199  GetReturnInfo(CC, F.getReturnType(), F.getAttributes(), Outs, TLI, DL);
1200 
1201  // Analyze operands of the call, assigning locations to each operand.
1203  CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, ValLocs, I->getContext());
1204  CCInfo.AnalyzeReturn(Outs, RetCC_X86);
1205 
1206  const Value *RV = Ret->getOperand(0);
1207  unsigned Reg = getRegForValue(RV);
1208  if (Reg == 0)
1209  return false;
1210 
1211  // Only handle a single return value for now.
1212  if (ValLocs.size() != 1)
1213  return false;
1214 
1215  CCValAssign &VA = ValLocs[0];
1216 
1217  // Don't bother handling odd stuff for now.
1218  if (VA.getLocInfo() != CCValAssign::Full)
1219  return false;
1220  // Only handle register returns for now.
1221  if (!VA.isRegLoc())
1222  return false;
1223 
1224  // The calling-convention tables for x87 returns don't tell
1225  // the whole story.
1226  if (VA.getLocReg() == X86::FP0 || VA.getLocReg() == X86::FP1)
1227  return false;
1228 
1229  unsigned SrcReg = Reg + VA.getValNo();
1230  EVT SrcVT = TLI.getValueType(DL, RV->getType());
1231  EVT DstVT = VA.getValVT();
1232  // Special handling for extended integers.
1233  if (SrcVT != DstVT) {
1234  if (SrcVT != MVT::i1 && SrcVT != MVT::i8 && SrcVT != MVT::i16)
1235  return false;
1236 
1237  if (!Outs[0].Flags.isZExt() && !Outs[0].Flags.isSExt())
1238  return false;
1239 
1240  assert(DstVT == MVT::i32 && "X86 should always ext to i32");
1241 
1242  if (SrcVT == MVT::i1) {
1243  if (Outs[0].Flags.isSExt())
1244  return false;
1245  SrcReg = fastEmitZExtFromI1(MVT::i8, SrcReg, /*TODO: Kill=*/false);
1246  SrcVT = MVT::i8;
1247  }
1248  unsigned Op = Outs[0].Flags.isZExt() ? ISD::ZERO_EXTEND :
1250  SrcReg = fastEmit_r(SrcVT.getSimpleVT(), DstVT.getSimpleVT(), Op,
1251  SrcReg, /*TODO: Kill=*/false);
1252  }
1253 
1254  // Make the copy.
1255  unsigned DstReg = VA.getLocReg();
1256  const TargetRegisterClass *SrcRC = MRI.getRegClass(SrcReg);
1257  // Avoid a cross-class copy. This is very unlikely.
1258  if (!SrcRC->contains(DstReg))
1259  return false;
1260  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1261  TII.get(TargetOpcode::COPY), DstReg).addReg(SrcReg);
1262 
1263  // Add register to return instruction.
1264  RetRegs.push_back(VA.getLocReg());
1265  }
1266 
1267  // Swift calling convention does not require we copy the sret argument
1268  // into %rax/%eax for the return, and SRetReturnReg is not set for Swift.
1269 
1270  // All x86 ABIs require that for returning structs by value we copy
1271  // the sret argument into %rax/%eax (depending on ABI) for the return.
1272  // We saved the argument into a virtual register in the entry block,
1273  // so now we copy the value out and into %rax/%eax.
1274  if (F.hasStructRetAttr() && CC != CallingConv::Swift) {
1275  unsigned Reg = X86MFInfo->getSRetReturnReg();
1276  assert(Reg &&
1277  "SRetReturnReg should have been set in LowerFormalArguments()!");
1278  unsigned RetReg = Subtarget->isTarget64BitLP64() ? X86::RAX : X86::EAX;
1279  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1280  TII.get(TargetOpcode::COPY), RetReg).addReg(Reg);
1281  RetRegs.push_back(RetReg);
1282  }
1283 
1284  // Now emit the RET.
1285  MachineInstrBuilder MIB;
1286  if (X86MFInfo->getBytesToPopOnReturn()) {
1287  MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1288  TII.get(Subtarget->is64Bit() ? X86::RETIQ : X86::RETIL))
1289  .addImm(X86MFInfo->getBytesToPopOnReturn());
1290  } else {
1291  MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1292  TII.get(Subtarget->is64Bit() ? X86::RETQ : X86::RETL));
1293  }
1294  for (unsigned i = 0, e = RetRegs.size(); i != e; ++i)
1295  MIB.addReg(RetRegs[i], RegState::Implicit);
1296  return true;
1297 }
1298 
1299 /// X86SelectLoad - Select and emit code to implement load instructions.
1300 ///
1301 bool X86FastISel::X86SelectLoad(const Instruction *I) {
1302  const LoadInst *LI = cast<LoadInst>(I);
1303 
1304  // Atomic loads need special handling.
1305  if (LI->isAtomic())
1306  return false;
1307 
1308  const Value *SV = I->getOperand(0);
1309  if (TLI.supportSwiftError()) {
1310  // Swifterror values can come from either a function parameter with
1311  // swifterror attribute or an alloca with swifterror attribute.
1312  if (const Argument *Arg = dyn_cast<Argument>(SV)) {
1313  if (Arg->hasSwiftErrorAttr())
1314  return false;
1315  }
1316 
1317  if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(SV)) {
1318  if (Alloca->isSwiftError())
1319  return false;
1320  }
1321  }
1322 
1323  MVT VT;
1324  if (!isTypeLegal(LI->getType(), VT, /*AllowI1=*/true))
1325  return false;
1326 
1327  const Value *Ptr = LI->getPointerOperand();
1328 
1329  X86AddressMode AM;
1330  if (!X86SelectAddress(Ptr, AM))
1331  return false;
1332 
1333  unsigned Alignment = LI->getAlignment();
1334  unsigned ABIAlignment = DL.getABITypeAlignment(LI->getType());
1335  if (Alignment == 0) // Ensure that codegen never sees alignment 0
1336  Alignment = ABIAlignment;
1337 
1338  unsigned ResultReg = 0;
1339  if (!X86FastEmitLoad(VT, AM, createMachineMemOperandFor(LI), ResultReg,
1340  Alignment))
1341  return false;
1342 
1343  updateValueMap(I, ResultReg);
1344  return true;
1345 }
1346 
1347 static unsigned X86ChooseCmpOpcode(EVT VT, const X86Subtarget *Subtarget) {
1348  bool HasAVX512 = Subtarget->hasAVX512();
1349  bool HasAVX = Subtarget->hasAVX();
1350  bool X86ScalarSSEf32 = Subtarget->hasSSE1();
1351  bool X86ScalarSSEf64 = Subtarget->hasSSE2();
1352 
1353  switch (VT.getSimpleVT().SimpleTy) {
1354  default: return 0;
1355  case MVT::i8: return X86::CMP8rr;
1356  case MVT::i16: return X86::CMP16rr;
1357  case MVT::i32: return X86::CMP32rr;
1358  case MVT::i64: return X86::CMP64rr;
1359  case MVT::f32:
1360  return X86ScalarSSEf32
1361  ? (HasAVX512 ? X86::VUCOMISSZrr
1362  : HasAVX ? X86::VUCOMISSrr : X86::UCOMISSrr)
1363  : 0;
1364  case MVT::f64:
1365  return X86ScalarSSEf64
1366  ? (HasAVX512 ? X86::VUCOMISDZrr
1367  : HasAVX ? X86::VUCOMISDrr : X86::UCOMISDrr)
1368  : 0;
1369  }
1370 }
1371 
1372 /// If we have a comparison with RHS as the RHS of the comparison, return an
1373 /// opcode that works for the compare (e.g. CMP32ri) otherwise return 0.
1374 static unsigned X86ChooseCmpImmediateOpcode(EVT VT, const ConstantInt *RHSC) {
1375  int64_t Val = RHSC->getSExtValue();
1376  switch (VT.getSimpleVT().SimpleTy) {
1377  // Otherwise, we can't fold the immediate into this comparison.
1378  default:
1379  return 0;
1380  case MVT::i8:
1381  return X86::CMP8ri;
1382  case MVT::i16:
1383  if (isInt<8>(Val))
1384  return X86::CMP16ri8;
1385  return X86::CMP16ri;
1386  case MVT::i32:
1387  if (isInt<8>(Val))
1388  return X86::CMP32ri8;
1389  return X86::CMP32ri;
1390  case MVT::i64:
1391  if (isInt<8>(Val))
1392  return X86::CMP64ri8;
1393  // 64-bit comparisons are only valid if the immediate fits in a 32-bit sext
1394  // field.
1395  if (isInt<32>(Val))
1396  return X86::CMP64ri32;
1397  return 0;
1398  }
1399 }
1400 
1401 bool X86FastISel::X86FastEmitCompare(const Value *Op0, const Value *Op1, EVT VT,
1402  const DebugLoc &CurDbgLoc) {
1403  unsigned Op0Reg = getRegForValue(Op0);
1404  if (Op0Reg == 0) return false;
1405 
1406  // Handle 'null' like i32/i64 0.
1407  if (isa<ConstantPointerNull>(Op1))
1408  Op1 = Constant::getNullValue(DL.getIntPtrType(Op0->getContext()));
1409 
1410  // We have two options: compare with register or immediate. If the RHS of
1411  // the compare is an immediate that we can fold into this compare, use
1412  // CMPri, otherwise use CMPrr.
1413  if (const ConstantInt *Op1C = dyn_cast<ConstantInt>(Op1)) {
1414  if (unsigned CompareImmOpc = X86ChooseCmpImmediateOpcode(VT, Op1C)) {
1415  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, CurDbgLoc, TII.get(CompareImmOpc))
1416  .addReg(Op0Reg)
1417  .addImm(Op1C->getSExtValue());
1418  return true;
1419  }
1420  }
1421 
1422  unsigned CompareOpc = X86ChooseCmpOpcode(VT, Subtarget);
1423  if (CompareOpc == 0) return false;
1424 
1425  unsigned Op1Reg = getRegForValue(Op1);
1426  if (Op1Reg == 0) return false;
1427  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, CurDbgLoc, TII.get(CompareOpc))
1428  .addReg(Op0Reg)
1429  .addReg(Op1Reg);
1430 
1431  return true;
1432 }
1433 
1434 bool X86FastISel::X86SelectCmp(const Instruction *I) {
1435  const CmpInst *CI = cast<CmpInst>(I);
1436 
1437  MVT VT;
1438  if (!isTypeLegal(I->getOperand(0)->getType(), VT))
1439  return false;
1440 
1441  // Try to optimize or fold the cmp.
1442  CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
1443  unsigned ResultReg = 0;
1444  switch (Predicate) {
1445  default: break;
1446  case CmpInst::FCMP_FALSE: {
1447  ResultReg = createResultReg(&X86::GR32RegClass);
1448  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::MOV32r0),
1449  ResultReg);
1450  ResultReg = fastEmitInst_extractsubreg(MVT::i8, ResultReg, /*Kill=*/true,
1451  X86::sub_8bit);
1452  if (!ResultReg)
1453  return false;
1454  break;
1455  }
1456  case CmpInst::FCMP_TRUE: {
1457  ResultReg = createResultReg(&X86::GR8RegClass);
1458  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::MOV8ri),
1459  ResultReg).addImm(1);
1460  break;
1461  }
1462  }
1463 
1464  if (ResultReg) {
1465  updateValueMap(I, ResultReg);
1466  return true;
1467  }
1468 
1469  const Value *LHS = CI->getOperand(0);
1470  const Value *RHS = CI->getOperand(1);
1471 
1472  // The optimizer might have replaced fcmp oeq %x, %x with fcmp ord %x, 0.0.
1473  // We don't have to materialize a zero constant for this case and can just use
1474  // %x again on the RHS.
1475  if (Predicate == CmpInst::FCMP_ORD || Predicate == CmpInst::FCMP_UNO) {
1476  const auto *RHSC = dyn_cast<ConstantFP>(RHS);
1477  if (RHSC && RHSC->isNullValue())
1478  RHS = LHS;
1479  }
1480 
1481  // FCMP_OEQ and FCMP_UNE cannot be checked with a single instruction.
1482  static const uint16_t SETFOpcTable[2][3] = {
1483  { X86::COND_E, X86::COND_NP, X86::AND8rr },
1484  { X86::COND_NE, X86::COND_P, X86::OR8rr }
1485  };
1486  const uint16_t *SETFOpc = nullptr;
1487  switch (Predicate) {
1488  default: break;
1489  case CmpInst::FCMP_OEQ: SETFOpc = &SETFOpcTable[0][0]; break;
1490  case CmpInst::FCMP_UNE: SETFOpc = &SETFOpcTable[1][0]; break;
1491  }
1492 
1493  ResultReg = createResultReg(&X86::GR8RegClass);
1494  if (SETFOpc) {
1495  if (!X86FastEmitCompare(LHS, RHS, VT, I->getDebugLoc()))
1496  return false;
1497 
1498  unsigned FlagReg1 = createResultReg(&X86::GR8RegClass);
1499  unsigned FlagReg2 = createResultReg(&X86::GR8RegClass);
1500  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::SETCCr),
1501  FlagReg1).addImm(SETFOpc[0]);
1502  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::SETCCr),
1503  FlagReg2).addImm(SETFOpc[1]);
1504  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(SETFOpc[2]),
1505  ResultReg).addReg(FlagReg1).addReg(FlagReg2);
1506  updateValueMap(I, ResultReg);
1507  return true;
1508  }
1509 
1510  X86::CondCode CC;
1511  bool SwapArgs;
1512  std::tie(CC, SwapArgs) = X86::getX86ConditionCode(Predicate);
1513  assert(CC <= X86::LAST_VALID_COND && "Unexpected condition code.");
1514 
1515  if (SwapArgs)
1516  std::swap(LHS, RHS);
1517 
1518  // Emit a compare of LHS/RHS.
1519  if (!X86FastEmitCompare(LHS, RHS, VT, I->getDebugLoc()))
1520  return false;
1521 
1522  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::SETCCr),
1523  ResultReg).addImm(CC);
1524  updateValueMap(I, ResultReg);
1525  return true;
1526 }
1527 
1528 bool X86FastISel::X86SelectZExt(const Instruction *I) {
1529  EVT DstVT = TLI.getValueType(DL, I->getType());
1530  if (!TLI.isTypeLegal(DstVT))
1531  return false;
1532 
1533  unsigned ResultReg = getRegForValue(I->getOperand(0));
1534  if (ResultReg == 0)
1535  return false;
1536 
1537  // Handle zero-extension from i1 to i8, which is common.
1538  MVT SrcVT = TLI.getSimpleValueType(DL, I->getOperand(0)->getType());
1539  if (SrcVT == MVT::i1) {
1540  // Set the high bits to zero.
1541  ResultReg = fastEmitZExtFromI1(MVT::i8, ResultReg, /*TODO: Kill=*/false);
1542  SrcVT = MVT::i8;
1543 
1544  if (ResultReg == 0)
1545  return false;
1546  }
1547 
1548  if (DstVT == MVT::i64) {
1549  // Handle extension to 64-bits via sub-register shenanigans.
1550  unsigned MovInst;
1551 
1552  switch (SrcVT.SimpleTy) {
1553  case MVT::i8: MovInst = X86::MOVZX32rr8; break;
1554  case MVT::i16: MovInst = X86::MOVZX32rr16; break;
1555  case MVT::i32: MovInst = X86::MOV32rr; break;
1556  default: llvm_unreachable("Unexpected zext to i64 source type");
1557  }
1558 
1559  unsigned Result32 = createResultReg(&X86::GR32RegClass);
1560  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(MovInst), Result32)
1561  .addReg(ResultReg);
1562 
1563  ResultReg = createResultReg(&X86::GR64RegClass);
1564  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::SUBREG_TO_REG),
1565  ResultReg)
1566  .addImm(0).addReg(Result32).addImm(X86::sub_32bit);
1567  } else if (DstVT == MVT::i16) {
1568  // i8->i16 doesn't exist in the autogenerated isel table. Need to zero
1569  // extend to 32-bits and then extract down to 16-bits.
1570  unsigned Result32 = createResultReg(&X86::GR32RegClass);
1571  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::MOVZX32rr8),
1572  Result32).addReg(ResultReg);
1573 
1574  ResultReg = fastEmitInst_extractsubreg(MVT::i16, Result32, /*Kill=*/true,
1575  X86::sub_16bit);
1576  } else if (DstVT != MVT::i8) {
1577  ResultReg = fastEmit_r(MVT::i8, DstVT.getSimpleVT(), ISD::ZERO_EXTEND,
1578  ResultReg, /*Kill=*/true);
1579  if (ResultReg == 0)
1580  return false;
1581  }
1582 
1583  updateValueMap(I, ResultReg);
1584  return true;
1585 }
1586 
1587 bool X86FastISel::X86SelectSExt(const Instruction *I) {
1588  EVT DstVT = TLI.getValueType(DL, I->getType());
1589  if (!TLI.isTypeLegal(DstVT))
1590  return false;
1591 
1592  unsigned ResultReg = getRegForValue(I->getOperand(0));
1593  if (ResultReg == 0)
1594  return false;
1595 
1596  // Handle sign-extension from i1 to i8.
1597  MVT SrcVT = TLI.getSimpleValueType(DL, I->getOperand(0)->getType());
1598  if (SrcVT == MVT::i1) {
1599  // Set the high bits to zero.
1600  unsigned ZExtReg = fastEmitZExtFromI1(MVT::i8, ResultReg,
1601  /*TODO: Kill=*/false);
1602  if (ZExtReg == 0)
1603  return false;
1604 
1605  // Negate the result to make an 8-bit sign extended value.
1606  ResultReg = createResultReg(&X86::GR8RegClass);
1607  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::NEG8r),
1608  ResultReg).addReg(ZExtReg);
1609 
1610  SrcVT = MVT::i8;
1611  }
1612 
1613  if (DstVT == MVT::i16) {
1614  // i8->i16 doesn't exist in the autogenerated isel table. Need to sign
1615  // extend to 32-bits and then extract down to 16-bits.
1616  unsigned Result32 = createResultReg(&X86::GR32RegClass);
1617  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::MOVSX32rr8),
1618  Result32).addReg(ResultReg);
1619 
1620  ResultReg = fastEmitInst_extractsubreg(MVT::i16, Result32, /*Kill=*/true,
1621  X86::sub_16bit);
1622  } else if (DstVT != MVT::i8) {
1623  ResultReg = fastEmit_r(MVT::i8, DstVT.getSimpleVT(), ISD::SIGN_EXTEND,
1624  ResultReg, /*Kill=*/true);
1625  if (ResultReg == 0)
1626  return false;
1627  }
1628 
1629  updateValueMap(I, ResultReg);
1630  return true;
1631 }
1632 
1633 bool X86FastISel::X86SelectBranch(const Instruction *I) {
1634  // Unconditional branches are selected by tablegen-generated code.
1635  // Handle a conditional branch.
1636  const BranchInst *BI = cast<BranchInst>(I);
1637  MachineBasicBlock *TrueMBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
1638  MachineBasicBlock *FalseMBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
1639 
1640  // Fold the common case of a conditional branch with a comparison
1641  // in the same block (values defined on other blocks may not have
1642  // initialized registers).
1643  X86::CondCode CC;
1644  if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) {
1645  if (CI->hasOneUse() && CI->getParent() == I->getParent()) {
1646  EVT VT = TLI.getValueType(DL, CI->getOperand(0)->getType());
1647 
1648  // Try to optimize or fold the cmp.
1649  CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
1650  switch (Predicate) {
1651  default: break;
1652  case CmpInst::FCMP_FALSE: fastEmitBranch(FalseMBB, DbgLoc); return true;
1653  case CmpInst::FCMP_TRUE: fastEmitBranch(TrueMBB, DbgLoc); return true;
1654  }
1655 
1656  const Value *CmpLHS = CI->getOperand(0);
1657  const Value *CmpRHS = CI->getOperand(1);
1658 
1659  // The optimizer might have replaced fcmp oeq %x, %x with fcmp ord %x,
1660  // 0.0.
1661  // We don't have to materialize a zero constant for this case and can just
1662  // use %x again on the RHS.
1663  if (Predicate == CmpInst::FCMP_ORD || Predicate == CmpInst::FCMP_UNO) {
1664  const auto *CmpRHSC = dyn_cast<ConstantFP>(CmpRHS);
1665  if (CmpRHSC && CmpRHSC->isNullValue())
1666  CmpRHS = CmpLHS;
1667  }
1668 
1669  // Try to take advantage of fallthrough opportunities.
1670  if (FuncInfo.MBB->isLayoutSuccessor(TrueMBB)) {
1671  std::swap(TrueMBB, FalseMBB);
1672  Predicate = CmpInst::getInversePredicate(Predicate);
1673  }
1674 
1675  // FCMP_OEQ and FCMP_UNE cannot be expressed with a single flag/condition
1676  // code check. Instead two branch instructions are required to check all
1677  // the flags. First we change the predicate to a supported condition code,
1678  // which will be the first branch. Later one we will emit the second
1679  // branch.
1680  bool NeedExtraBranch = false;
1681  switch (Predicate) {
1682  default: break;
1683  case CmpInst::FCMP_OEQ:
1684  std::swap(TrueMBB, FalseMBB);
1686  case CmpInst::FCMP_UNE:
1687  NeedExtraBranch = true;
1688  Predicate = CmpInst::FCMP_ONE;
1689  break;
1690  }
1691 
1692  bool SwapArgs;
1693  std::tie(CC, SwapArgs) = X86::getX86ConditionCode(Predicate);
1694  assert(CC <= X86::LAST_VALID_COND && "Unexpected condition code.");
1695 
1696  if (SwapArgs)
1697  std::swap(CmpLHS, CmpRHS);
1698 
1699  // Emit a compare of the LHS and RHS, setting the flags.
1700  if (!X86FastEmitCompare(CmpLHS, CmpRHS, VT, CI->getDebugLoc()))
1701  return false;
1702 
1703  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::JCC_1))
1704  .addMBB(TrueMBB).addImm(CC);
1705 
1706  // X86 requires a second branch to handle UNE (and OEQ, which is mapped
1707  // to UNE above).
1708  if (NeedExtraBranch) {
1709  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::JCC_1))
1710  .addMBB(TrueMBB).addImm(X86::COND_P);
1711  }
1712 
1713  finishCondBranch(BI->getParent(), TrueMBB, FalseMBB);
1714  return true;
1715  }
1716  } else if (TruncInst *TI = dyn_cast<TruncInst>(BI->getCondition())) {
1717  // Handle things like "%cond = trunc i32 %X to i1 / br i1 %cond", which
1718  // typically happen for _Bool and C++ bools.
1719  MVT SourceVT;
1720  if (TI->hasOneUse() && TI->getParent() == I->getParent() &&
1721  isTypeLegal(TI->getOperand(0)->getType(), SourceVT)) {
1722  unsigned TestOpc = 0;
1723  switch (SourceVT.SimpleTy) {
1724  default: break;
1725  case MVT::i8: TestOpc = X86::TEST8ri; break;
1726  case MVT::i16: TestOpc = X86::TEST16ri; break;
1727  case MVT::i32: TestOpc = X86::TEST32ri; break;
1728  case MVT::i64: TestOpc = X86::TEST64ri32; break;
1729  }
1730  if (TestOpc) {
1731  unsigned OpReg = getRegForValue(TI->getOperand(0));
1732  if (OpReg == 0) return false;
1733 
1734  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TestOpc))
1735  .addReg(OpReg).addImm(1);
1736 
1737  unsigned JmpCond = X86::COND_NE;
1738  if (FuncInfo.MBB->isLayoutSuccessor(TrueMBB)) {
1739  std::swap(TrueMBB, FalseMBB);
1740  JmpCond = X86::COND_E;
1741  }
1742 
1743  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::JCC_1))
1744  .addMBB(TrueMBB).addImm(JmpCond);
1745 
1746  finishCondBranch(BI->getParent(), TrueMBB, FalseMBB);
1747  return true;
1748  }
1749  }
1750  } else if (foldX86XALUIntrinsic(CC, BI, BI->getCondition())) {
1751  // Fake request the condition, otherwise the intrinsic might be completely
1752  // optimized away.
1753  unsigned TmpReg = getRegForValue(BI->getCondition());
1754  if (TmpReg == 0)
1755  return false;
1756 
1757  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::JCC_1))
1758  .addMBB(TrueMBB).addImm(CC);
1759  finishCondBranch(BI->getParent(), TrueMBB, FalseMBB);
1760  return true;
1761  }
1762 
1763  // Otherwise do a clumsy setcc and re-test it.
1764  // Note that i1 essentially gets ANY_EXTEND'ed to i8 where it isn't used
1765  // in an explicit cast, so make sure to handle that correctly.
1766  unsigned OpReg = getRegForValue(BI->getCondition());
1767  if (OpReg == 0) return false;
1768 
1769  // In case OpReg is a K register, COPY to a GPR
1770  if (MRI.getRegClass(OpReg) == &X86::VK1RegClass) {
1771  unsigned KOpReg = OpReg;
1772  OpReg = createResultReg(&X86::GR32RegClass);
1773  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1774  TII.get(TargetOpcode::COPY), OpReg)
1775  .addReg(KOpReg);
1776  OpReg = fastEmitInst_extractsubreg(MVT::i8, OpReg, /*Kill=*/true,
1777  X86::sub_8bit);
1778  }
1779  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::TEST8ri))
1780  .addReg(OpReg)
1781  .addImm(1);
1782  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::JCC_1))
1783  .addMBB(TrueMBB).addImm(X86::COND_NE);
1784  finishCondBranch(BI->getParent(), TrueMBB, FalseMBB);
1785  return true;
1786 }
1787 
1788 bool X86FastISel::X86SelectShift(const Instruction *I) {
1789  unsigned CReg = 0, OpReg = 0;
1790  const TargetRegisterClass *RC = nullptr;
1791  if (I->getType()->isIntegerTy(8)) {
1792  CReg = X86::CL;
1793  RC = &X86::GR8RegClass;
1794  switch (I->getOpcode()) {
1795  case Instruction::LShr: OpReg = X86::SHR8rCL; break;
1796  case Instruction::AShr: OpReg = X86::SAR8rCL; break;
1797  case Instruction::Shl: OpReg = X86::SHL8rCL; break;
1798  default: return false;
1799  }
1800  } else if (I->getType()->isIntegerTy(16)) {
1801  CReg = X86::CX;
1802  RC = &X86::GR16RegClass;
1803  switch (I->getOpcode()) {
1804  default: llvm_unreachable("Unexpected shift opcode");
1805  case Instruction::LShr: OpReg = X86::SHR16rCL; break;
1806  case Instruction::AShr: OpReg = X86::SAR16rCL; break;
1807  case Instruction::Shl: OpReg = X86::SHL16rCL; break;
1808  }
1809  } else if (I->getType()->isIntegerTy(32)) {
1810  CReg = X86::ECX;
1811  RC = &X86::GR32RegClass;
1812  switch (I->getOpcode()) {
1813  default: llvm_unreachable("Unexpected shift opcode");
1814  case Instruction::LShr: OpReg = X86::SHR32rCL; break;
1815  case Instruction::AShr: OpReg = X86::SAR32rCL; break;
1816  case Instruction::Shl: OpReg = X86::SHL32rCL; break;
1817  }
1818  } else if (I->getType()->isIntegerTy(64)) {
1819  CReg = X86::RCX;
1820  RC = &X86::GR64RegClass;
1821  switch (I->getOpcode()) {
1822  default: llvm_unreachable("Unexpected shift opcode");
1823  case Instruction::LShr: OpReg = X86::SHR64rCL; break;
1824  case Instruction::AShr: OpReg = X86::SAR64rCL; break;
1825  case Instruction::Shl: OpReg = X86::SHL64rCL; break;
1826  }
1827  } else {
1828  return false;
1829  }
1830 
1831  MVT VT;
1832  if (!isTypeLegal(I->getType(), VT))
1833  return false;
1834 
1835  unsigned Op0Reg = getRegForValue(I->getOperand(0));
1836  if (Op0Reg == 0) return false;
1837 
1838  unsigned Op1Reg = getRegForValue(I->getOperand(1));
1839  if (Op1Reg == 0) return false;
1840  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY),
1841  CReg).addReg(Op1Reg);
1842 
1843  // The shift instruction uses X86::CL. If we defined a super-register
1844  // of X86::CL, emit a subreg KILL to precisely describe what we're doing here.
1845  if (CReg != X86::CL)
1846  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1847  TII.get(TargetOpcode::KILL), X86::CL)
1848  .addReg(CReg, RegState::Kill);
1849 
1850  unsigned ResultReg = createResultReg(RC);
1851  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(OpReg), ResultReg)
1852  .addReg(Op0Reg);
1853  updateValueMap(I, ResultReg);
1854  return true;
1855 }
1856 
1857 bool X86FastISel::X86SelectDivRem(const Instruction *I) {
1858  const static unsigned NumTypes = 4; // i8, i16, i32, i64
1859  const static unsigned NumOps = 4; // SDiv, SRem, UDiv, URem
1860  const static bool S = true; // IsSigned
1861  const static bool U = false; // !IsSigned
1862  const static unsigned Copy = TargetOpcode::COPY;
1863  // For the X86 DIV/IDIV instruction, in most cases the dividend
1864  // (numerator) must be in a specific register pair highreg:lowreg,
1865  // producing the quotient in lowreg and the remainder in highreg.
1866  // For most data types, to set up the instruction, the dividend is
1867  // copied into lowreg, and lowreg is sign-extended or zero-extended
1868  // into highreg. The exception is i8, where the dividend is defined
1869  // as a single register rather than a register pair, and we
1870  // therefore directly sign-extend or zero-extend the dividend into
1871  // lowreg, instead of copying, and ignore the highreg.
1872  const static struct DivRemEntry {
1873  // The following portion depends only on the data type.
1874  const TargetRegisterClass *RC;
1875  unsigned LowInReg; // low part of the register pair
1876  unsigned HighInReg; // high part of the register pair
1877  // The following portion depends on both the data type and the operation.
1878  struct DivRemResult {
1879  unsigned OpDivRem; // The specific DIV/IDIV opcode to use.
1880  unsigned OpSignExtend; // Opcode for sign-extending lowreg into
1881  // highreg, or copying a zero into highreg.
1882  unsigned OpCopy; // Opcode for copying dividend into lowreg, or
1883  // zero/sign-extending into lowreg for i8.
1884  unsigned DivRemResultReg; // Register containing the desired result.
1885  bool IsOpSigned; // Whether to use signed or unsigned form.
1886  } ResultTable[NumOps];
1887  } OpTable[NumTypes] = {
1888  { &X86::GR8RegClass, X86::AX, 0, {
1889  { X86::IDIV8r, 0, X86::MOVSX16rr8, X86::AL, S }, // SDiv
1890  { X86::IDIV8r, 0, X86::MOVSX16rr8, X86::AH, S }, // SRem
1891  { X86::DIV8r, 0, X86::MOVZX16rr8, X86::AL, U }, // UDiv
1892  { X86::DIV8r, 0, X86::MOVZX16rr8, X86::AH, U }, // URem
1893  }
1894  }, // i8
1895  { &X86::GR16RegClass, X86::AX, X86::DX, {
1896  { X86::IDIV16r, X86::CWD, Copy, X86::AX, S }, // SDiv
1897  { X86::IDIV16r, X86::CWD, Copy, X86::DX, S }, // SRem
1898  { X86::DIV16r, X86::MOV32r0, Copy, X86::AX, U }, // UDiv
1899  { X86::DIV16r, X86::MOV32r0, Copy, X86::DX, U }, // URem
1900  }
1901  }, // i16
1902  { &X86::GR32RegClass, X86::EAX, X86::EDX, {
1903  { X86::IDIV32r, X86::CDQ, Copy, X86::EAX, S }, // SDiv
1904  { X86::IDIV32r, X86::CDQ, Copy, X86::EDX, S }, // SRem
1905  { X86::DIV32r, X86::MOV32r0, Copy, X86::EAX, U }, // UDiv
1906  { X86::DIV32r, X86::MOV32r0, Copy, X86::EDX, U }, // URem
1907  }
1908  }, // i32
1909  { &X86::GR64RegClass, X86::RAX, X86::RDX, {
1910  { X86::IDIV64r, X86::CQO, Copy, X86::RAX, S }, // SDiv
1911  { X86::IDIV64r, X86::CQO, Copy, X86::RDX, S }, // SRem
1912  { X86::DIV64r, X86::MOV32r0, Copy, X86::RAX, U }, // UDiv
1913  { X86::DIV64r, X86::MOV32r0, Copy, X86::RDX, U }, // URem
1914  }
1915  }, // i64
1916  };
1917 
1918  MVT VT;
1919  if (!isTypeLegal(I->getType(), VT))
1920  return false;
1921 
1922  unsigned TypeIndex, OpIndex;
1923  switch (VT.SimpleTy) {
1924  default: return false;
1925  case MVT::i8: TypeIndex = 0; break;
1926  case MVT::i16: TypeIndex = 1; break;
1927  case MVT::i32: TypeIndex = 2; break;
1928  case MVT::i64: TypeIndex = 3;
1929  if (!Subtarget->is64Bit())
1930  return false;
1931  break;
1932  }
1933 
1934  switch (I->getOpcode()) {
1935  default: llvm_unreachable("Unexpected div/rem opcode");
1936  case Instruction::SDiv: OpIndex = 0; break;
1937  case Instruction::SRem: OpIndex = 1; break;
1938  case Instruction::UDiv: OpIndex = 2; break;
1939  case Instruction::URem: OpIndex = 3; break;
1940  }
1941 
1942  const DivRemEntry &TypeEntry = OpTable[TypeIndex];
1943  const DivRemEntry::DivRemResult &OpEntry = TypeEntry.ResultTable[OpIndex];
1944  unsigned Op0Reg = getRegForValue(I->getOperand(0));
1945  if (Op0Reg == 0)
1946  return false;
1947  unsigned Op1Reg = getRegForValue(I->getOperand(1));
1948  if (Op1Reg == 0)
1949  return false;
1950 
1951  // Move op0 into low-order input register.
1952  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1953  TII.get(OpEntry.OpCopy), TypeEntry.LowInReg).addReg(Op0Reg);
1954  // Zero-extend or sign-extend into high-order input register.
1955  if (OpEntry.OpSignExtend) {
1956  if (OpEntry.IsOpSigned)
1957  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1958  TII.get(OpEntry.OpSignExtend));
1959  else {
1960  unsigned Zero32 = createResultReg(&X86::GR32RegClass);
1961  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1962  TII.get(X86::MOV32r0), Zero32);
1963 
1964  // Copy the zero into the appropriate sub/super/identical physical
1965  // register. Unfortunately the operations needed are not uniform enough
1966  // to fit neatly into the table above.
1967  if (VT == MVT::i16) {
1968  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1969  TII.get(Copy), TypeEntry.HighInReg)
1970  .addReg(Zero32, 0, X86::sub_16bit);
1971  } else if (VT == MVT::i32) {
1972  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1973  TII.get(Copy), TypeEntry.HighInReg)
1974  .addReg(Zero32);
1975  } else if (VT == MVT::i64) {
1976  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1977  TII.get(TargetOpcode::SUBREG_TO_REG), TypeEntry.HighInReg)
1978  .addImm(0).addReg(Zero32).addImm(X86::sub_32bit);
1979  }
1980  }
1981  }
1982  // Generate the DIV/IDIV instruction.
1983  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1984  TII.get(OpEntry.OpDivRem)).addReg(Op1Reg);
1985  // For i8 remainder, we can't reference ah directly, as we'll end
1986  // up with bogus copies like %r9b = COPY %ah. Reference ax
1987  // instead to prevent ah references in a rex instruction.
1988  //
1989  // The current assumption of the fast register allocator is that isel
1990  // won't generate explicit references to the GR8_NOREX registers. If
1991  // the allocator and/or the backend get enhanced to be more robust in
1992  // that regard, this can be, and should be, removed.
1993  unsigned ResultReg = 0;
1994  if ((I->getOpcode() == Instruction::SRem ||
1995  I->getOpcode() == Instruction::URem) &&
1996  OpEntry.DivRemResultReg == X86::AH && Subtarget->is64Bit()) {
1997  unsigned SourceSuperReg = createResultReg(&X86::GR16RegClass);
1998  unsigned ResultSuperReg = createResultReg(&X86::GR16RegClass);
1999  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2000  TII.get(Copy), SourceSuperReg).addReg(X86::AX);
2001 
2002  // Shift AX right by 8 bits instead of using AH.
2003  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::SHR16ri),
2004  ResultSuperReg).addReg(SourceSuperReg).addImm(8);
2005 
2006  // Now reference the 8-bit subreg of the result.
2007  ResultReg = fastEmitInst_extractsubreg(MVT::i8, ResultSuperReg,
2008  /*Kill=*/true, X86::sub_8bit);
2009  }
2010  // Copy the result out of the physreg if we haven't already.
2011  if (!ResultReg) {
2012  ResultReg = createResultReg(TypeEntry.RC);
2013  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Copy), ResultReg)
2014  .addReg(OpEntry.DivRemResultReg);
2015  }
2016  updateValueMap(I, ResultReg);
2017 
2018  return true;
2019 }
2020 
2021 /// Emit a conditional move instruction (if the are supported) to lower
2022 /// the select.
2023 bool X86FastISel::X86FastEmitCMoveSelect(MVT RetVT, const Instruction *I) {
2024  // Check if the subtarget supports these instructions.
2025  if (!Subtarget->hasCMov())
2026  return false;
2027 
2028  // FIXME: Add support for i8.
2029  if (RetVT < MVT::i16 || RetVT > MVT::i64)
2030  return false;
2031 
2032  const Value *Cond = I->getOperand(0);
2033  const TargetRegisterClass *RC = TLI.getRegClassFor(RetVT);
2034  bool NeedTest = true;
2036 
2037  // Optimize conditions coming from a compare if both instructions are in the
2038  // same basic block (values defined in other basic blocks may not have
2039  // initialized registers).
2040  const auto *CI = dyn_cast<CmpInst>(Cond);
2041  if (CI && (CI->getParent() == I->getParent())) {
2042  CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
2043 
2044  // FCMP_OEQ and FCMP_UNE cannot be checked with a single instruction.
2045  static const uint16_t SETFOpcTable[2][3] = {
2046  { X86::COND_NP, X86::COND_E, X86::TEST8rr },
2047  { X86::COND_P, X86::COND_NE, X86::OR8rr }
2048  };
2049  const uint16_t *SETFOpc = nullptr;
2050  switch (Predicate) {
2051  default: break;
2052  case CmpInst::FCMP_OEQ:
2053  SETFOpc = &SETFOpcTable[0][0];
2054  Predicate = CmpInst::ICMP_NE;
2055  break;
2056  case CmpInst::FCMP_UNE:
2057  SETFOpc = &SETFOpcTable[1][0];
2058  Predicate = CmpInst::ICMP_NE;
2059  break;
2060  }
2061 
2062  bool NeedSwap;
2063  std::tie(CC, NeedSwap) = X86::getX86ConditionCode(Predicate);
2064  assert(CC <= X86::LAST_VALID_COND && "Unexpected condition code.");
2065 
2066  const Value *CmpLHS = CI->getOperand(0);
2067  const Value *CmpRHS = CI->getOperand(1);
2068  if (NeedSwap)
2069  std::swap(CmpLHS, CmpRHS);
2070 
2071  EVT CmpVT = TLI.getValueType(DL, CmpLHS->getType());
2072  // Emit a compare of the LHS and RHS, setting the flags.
2073  if (!X86FastEmitCompare(CmpLHS, CmpRHS, CmpVT, CI->getDebugLoc()))
2074  return false;
2075 
2076  if (SETFOpc) {
2077  unsigned FlagReg1 = createResultReg(&X86::GR8RegClass);
2078  unsigned FlagReg2 = createResultReg(&X86::GR8RegClass);
2079  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::SETCCr),
2080  FlagReg1).addImm(SETFOpc[0]);
2081  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::SETCCr),
2082  FlagReg2).addImm(SETFOpc[1]);
2083  auto const &II = TII.get(SETFOpc[2]);
2084  if (II.getNumDefs()) {
2085  unsigned TmpReg = createResultReg(&X86::GR8RegClass);
2086  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, TmpReg)
2087  .addReg(FlagReg2).addReg(FlagReg1);
2088  } else {
2089  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
2090  .addReg(FlagReg2).addReg(FlagReg1);
2091  }
2092  }
2093  NeedTest = false;
2094  } else if (foldX86XALUIntrinsic(CC, I, Cond)) {
2095  // Fake request the condition, otherwise the intrinsic might be completely
2096  // optimized away.
2097  unsigned TmpReg = getRegForValue(Cond);
2098  if (TmpReg == 0)
2099  return false;
2100 
2101  NeedTest = false;
2102  }
2103 
2104  if (NeedTest) {
2105  // Selects operate on i1, however, CondReg is 8 bits width and may contain
2106  // garbage. Indeed, only the less significant bit is supposed to be
2107  // accurate. If we read more than the lsb, we may see non-zero values
2108  // whereas lsb is zero. Therefore, we have to truncate Op0Reg to i1 for
2109  // the select. This is achieved by performing TEST against 1.
2110  unsigned CondReg = getRegForValue(Cond);
2111  if (CondReg == 0)
2112  return false;
2113  bool CondIsKill = hasTrivialKill(Cond);
2114 
2115  // In case OpReg is a K register, COPY to a GPR
2116  if (MRI.getRegClass(CondReg) == &X86::VK1RegClass) {
2117  unsigned KCondReg = CondReg;
2118  CondReg = createResultReg(&X86::GR32RegClass);
2119  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2120  TII.get(TargetOpcode::COPY), CondReg)
2121  .addReg(KCondReg, getKillRegState(CondIsKill));
2122  CondReg = fastEmitInst_extractsubreg(MVT::i8, CondReg, /*Kill=*/true,
2123  X86::sub_8bit);
2124  }
2125  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::TEST8ri))
2126  .addReg(CondReg, getKillRegState(CondIsKill))
2127  .addImm(1);
2128  }
2129 
2130  const Value *LHS = I->getOperand(1);
2131  const Value *RHS = I->getOperand(2);
2132 
2133  unsigned RHSReg = getRegForValue(RHS);
2134  bool RHSIsKill = hasTrivialKill(RHS);
2135 
2136  unsigned LHSReg = getRegForValue(LHS);
2137  bool LHSIsKill = hasTrivialKill(LHS);
2138 
2139  if (!LHSReg || !RHSReg)
2140  return false;
2141 
2142  const TargetRegisterInfo &TRI = *Subtarget->getRegisterInfo();
2143  unsigned Opc = X86::getCMovOpcode(TRI.getRegSizeInBits(*RC)/8);
2144  unsigned ResultReg = fastEmitInst_rri(Opc, RC, RHSReg, RHSIsKill,
2145  LHSReg, LHSIsKill, CC);
2146  updateValueMap(I, ResultReg);
2147  return true;
2148 }
2149 
2150 /// Emit SSE or AVX instructions to lower the select.
2151 ///
2152 /// Try to use SSE1/SSE2 instructions to simulate a select without branches.
2153 /// This lowers fp selects into a CMP/AND/ANDN/OR sequence when the necessary
2154 /// SSE instructions are available. If AVX is available, try to use a VBLENDV.
2155 bool X86FastISel::X86FastEmitSSESelect(MVT RetVT, const Instruction *I) {
2156  // Optimize conditions coming from a compare if both instructions are in the
2157  // same basic block (values defined in other basic blocks may not have
2158  // initialized registers).
2159  const auto *CI = dyn_cast<FCmpInst>(I->getOperand(0));
2160  if (!CI || (CI->getParent() != I->getParent()))
2161  return false;
2162 
2163  if (I->getType() != CI->getOperand(0)->getType() ||
2164  !((Subtarget->hasSSE1() && RetVT == MVT::f32) ||
2165  (Subtarget->hasSSE2() && RetVT == MVT::f64)))
2166  return false;
2167 
2168  const Value *CmpLHS = CI->getOperand(0);
2169  const Value *CmpRHS = CI->getOperand(1);
2170  CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
2171 
2172  // The optimizer might have replaced fcmp oeq %x, %x with fcmp ord %x, 0.0.
2173  // We don't have to materialize a zero constant for this case and can just use
2174  // %x again on the RHS.
2175  if (Predicate == CmpInst::FCMP_ORD || Predicate == CmpInst::FCMP_UNO) {
2176  const auto *CmpRHSC = dyn_cast<ConstantFP>(CmpRHS);
2177  if (CmpRHSC && CmpRHSC->isNullValue())
2178  CmpRHS = CmpLHS;
2179  }
2180 
2181  unsigned CC;
2182  bool NeedSwap;
2183  std::tie(CC, NeedSwap) = getX86SSEConditionCode(Predicate);
2184  if (CC > 7 && !Subtarget->hasAVX())
2185  return false;
2186 
2187  if (NeedSwap)
2188  std::swap(CmpLHS, CmpRHS);
2189 
2190  // Choose the SSE instruction sequence based on data type (float or double).
2191  static const uint16_t OpcTable[2][4] = {
2192  { X86::CMPSSrr, X86::ANDPSrr, X86::ANDNPSrr, X86::ORPSrr },
2193  { X86::CMPSDrr, X86::ANDPDrr, X86::ANDNPDrr, X86::ORPDrr }
2194  };
2195 
2196  const uint16_t *Opc = nullptr;
2197  switch (RetVT.SimpleTy) {
2198  default: return false;
2199  case MVT::f32: Opc = &OpcTable[0][0]; break;
2200  case MVT::f64: Opc = &OpcTable[1][0]; break;
2201  }
2202 
2203  const Value *LHS = I->getOperand(1);
2204  const Value *RHS = I->getOperand(2);
2205 
2206  unsigned LHSReg = getRegForValue(LHS);
2207  bool LHSIsKill = hasTrivialKill(LHS);
2208 
2209  unsigned RHSReg = getRegForValue(RHS);
2210  bool RHSIsKill = hasTrivialKill(RHS);
2211 
2212  unsigned CmpLHSReg = getRegForValue(CmpLHS);
2213  bool CmpLHSIsKill = hasTrivialKill(CmpLHS);
2214 
2215  unsigned CmpRHSReg = getRegForValue(CmpRHS);
2216  bool CmpRHSIsKill = hasTrivialKill(CmpRHS);
2217 
2218  if (!LHSReg || !RHSReg || !CmpLHS || !CmpRHS)
2219  return false;
2220 
2221  const TargetRegisterClass *RC = TLI.getRegClassFor(RetVT);
2222  unsigned ResultReg;
2223 
2224  if (Subtarget->hasAVX512()) {
2225  // If we have AVX512 we can use a mask compare and masked movss/sd.
2226  const TargetRegisterClass *VR128X = &X86::VR128XRegClass;
2227  const TargetRegisterClass *VK1 = &X86::VK1RegClass;
2228 
2229  unsigned CmpOpcode =
2230  (RetVT == MVT::f32) ? X86::VCMPSSZrr : X86::VCMPSDZrr;
2231  unsigned CmpReg = fastEmitInst_rri(CmpOpcode, VK1, CmpLHSReg, CmpLHSIsKill,
2232  CmpRHSReg, CmpRHSIsKill, CC);
2233 
2234  // Need an IMPLICIT_DEF for the input that is used to generate the upper
2235  // bits of the result register since its not based on any of the inputs.
2236  unsigned ImplicitDefReg = createResultReg(VR128X);
2237  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2238  TII.get(TargetOpcode::IMPLICIT_DEF), ImplicitDefReg);
2239 
2240  // Place RHSReg is the passthru of the masked movss/sd operation and put
2241  // LHS in the input. The mask input comes from the compare.
2242  unsigned MovOpcode =
2243  (RetVT == MVT::f32) ? X86::VMOVSSZrrk : X86::VMOVSDZrrk;
2244  unsigned MovReg = fastEmitInst_rrrr(MovOpcode, VR128X, RHSReg, RHSIsKill,
2245  CmpReg, true, ImplicitDefReg, true,
2246  LHSReg, LHSIsKill);
2247 
2248  ResultReg = createResultReg(RC);
2249  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2250  TII.get(TargetOpcode::COPY), ResultReg).addReg(MovReg);
2251 
2252  } else if (Subtarget->hasAVX()) {
2253  const TargetRegisterClass *VR128 = &X86::VR128RegClass;
2254 
2255  // If we have AVX, create 1 blendv instead of 3 logic instructions.
2256  // Blendv was introduced with SSE 4.1, but the 2 register form implicitly
2257  // uses XMM0 as the selection register. That may need just as many
2258  // instructions as the AND/ANDN/OR sequence due to register moves, so
2259  // don't bother.
2260  unsigned CmpOpcode =
2261  (RetVT == MVT::f32) ? X86::VCMPSSrr : X86::VCMPSDrr;
2262  unsigned BlendOpcode =
2263  (RetVT == MVT::f32) ? X86::VBLENDVPSrr : X86::VBLENDVPDrr;
2264 
2265  unsigned CmpReg = fastEmitInst_rri(CmpOpcode, RC, CmpLHSReg, CmpLHSIsKill,
2266  CmpRHSReg, CmpRHSIsKill, CC);
2267  unsigned VBlendReg = fastEmitInst_rrr(BlendOpcode, VR128, RHSReg, RHSIsKill,
2268  LHSReg, LHSIsKill, CmpReg, true);
2269  ResultReg = createResultReg(RC);
2270  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2271  TII.get(TargetOpcode::COPY), ResultReg).addReg(VBlendReg);
2272  } else {
2273  const TargetRegisterClass *VR128 = &X86::VR128RegClass;
2274  unsigned CmpReg = fastEmitInst_rri(Opc[0], RC, CmpLHSReg, CmpLHSIsKill,
2275  CmpRHSReg, CmpRHSIsKill, CC);
2276  unsigned AndReg = fastEmitInst_rr(Opc[1], VR128, CmpReg, /*IsKill=*/false,
2277  LHSReg, LHSIsKill);
2278  unsigned AndNReg = fastEmitInst_rr(Opc[2], VR128, CmpReg, /*IsKill=*/true,
2279  RHSReg, RHSIsKill);
2280  unsigned OrReg = fastEmitInst_rr(Opc[3], VR128, AndNReg, /*IsKill=*/true,
2281  AndReg, /*IsKill=*/true);
2282  ResultReg = createResultReg(RC);
2283  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2284  TII.get(TargetOpcode::COPY), ResultReg).addReg(OrReg);
2285  }
2286  updateValueMap(I, ResultReg);
2287  return true;
2288 }
2289 
2290 bool X86FastISel::X86FastEmitPseudoSelect(MVT RetVT, const Instruction *I) {
2291  // These are pseudo CMOV instructions and will be later expanded into control-
2292  // flow.
2293  unsigned Opc;
2294  switch (RetVT.SimpleTy) {
2295  default: return false;
2296  case MVT::i8: Opc = X86::CMOV_GR8; break;
2297  case MVT::i16: Opc = X86::CMOV_GR16; break;
2298  case MVT::i32: Opc = X86::CMOV_GR32; break;
2299  case MVT::f32: Opc = X86::CMOV_FR32; break;
2300  case MVT::f64: Opc = X86::CMOV_FR64; break;
2301  }
2302 
2303  const Value *Cond = I->getOperand(0);
2305 
2306  // Optimize conditions coming from a compare if both instructions are in the
2307  // same basic block (values defined in other basic blocks may not have
2308  // initialized registers).
2309  const auto *CI = dyn_cast<CmpInst>(Cond);
2310  if (CI && (CI->getParent() == I->getParent())) {
2311  bool NeedSwap;
2312  std::tie(CC, NeedSwap) = X86::getX86ConditionCode(CI->getPredicate());
2313  if (CC > X86::LAST_VALID_COND)
2314  return false;
2315 
2316  const Value *CmpLHS = CI->getOperand(0);
2317  const Value *CmpRHS = CI->getOperand(1);
2318 
2319  if (NeedSwap)
2320  std::swap(CmpLHS, CmpRHS);
2321 
2322  EVT CmpVT = TLI.getValueType(DL, CmpLHS->getType());
2323  if (!X86FastEmitCompare(CmpLHS, CmpRHS, CmpVT, CI->getDebugLoc()))
2324  return false;
2325  } else {
2326  unsigned CondReg = getRegForValue(Cond);
2327  if (CondReg == 0)
2328  return false;
2329  bool CondIsKill = hasTrivialKill(Cond);
2330 
2331  // In case OpReg is a K register, COPY to a GPR
2332  if (MRI.getRegClass(CondReg) == &X86::VK1RegClass) {
2333  unsigned KCondReg = CondReg;
2334  CondReg = createResultReg(&X86::GR32RegClass);
2335  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2336  TII.get(TargetOpcode::COPY), CondReg)
2337  .addReg(KCondReg, getKillRegState(CondIsKill));
2338  CondReg = fastEmitInst_extractsubreg(MVT::i8, CondReg, /*Kill=*/true,
2339  X86::sub_8bit);
2340  }
2341  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::TEST8ri))
2342  .addReg(CondReg, getKillRegState(CondIsKill))
2343  .addImm(1);
2344  }
2345 
2346  const Value *LHS = I->getOperand(1);
2347  const Value *RHS = I->getOperand(2);
2348 
2349  unsigned LHSReg = getRegForValue(LHS);
2350  bool LHSIsKill = hasTrivialKill(LHS);
2351 
2352  unsigned RHSReg = getRegForValue(RHS);
2353  bool RHSIsKill = hasTrivialKill(RHS);
2354 
2355  if (!LHSReg || !RHSReg)
2356  return false;
2357 
2358  const TargetRegisterClass *RC = TLI.getRegClassFor(RetVT);
2359 
2360  unsigned ResultReg =
2361  fastEmitInst_rri(Opc, RC, RHSReg, RHSIsKill, LHSReg, LHSIsKill, CC);
2362  updateValueMap(I, ResultReg);
2363  return true;
2364 }
2365 
2366 bool X86FastISel::X86SelectSelect(const Instruction *I) {
2367  MVT RetVT;
2368  if (!isTypeLegal(I->getType(), RetVT))
2369  return false;
2370 
2371  // Check if we can fold the select.
2372  if (const auto *CI = dyn_cast<CmpInst>(I->getOperand(0))) {
2373  CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
2374  const Value *Opnd = nullptr;
2375  switch (Predicate) {
2376  default: break;
2377  case CmpInst::FCMP_FALSE: Opnd = I->getOperand(2); break;
2378  case CmpInst::FCMP_TRUE: Opnd = I->getOperand(1); break;
2379  }
2380  // No need for a select anymore - this is an unconditional move.
2381  if (Opnd) {
2382  unsigned OpReg = getRegForValue(Opnd);
2383  if (OpReg == 0)
2384  return false;
2385  bool OpIsKill = hasTrivialKill(Opnd);
2386  const TargetRegisterClass *RC = TLI.getRegClassFor(RetVT);
2387  unsigned ResultReg = createResultReg(RC);
2388  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2389  TII.get(TargetOpcode::COPY), ResultReg)
2390  .addReg(OpReg, getKillRegState(OpIsKill));
2391  updateValueMap(I, ResultReg);
2392  return true;
2393  }
2394  }
2395 
2396  // First try to use real conditional move instructions.
2397  if (X86FastEmitCMoveSelect(RetVT, I))
2398  return true;
2399 
2400  // Try to use a sequence of SSE instructions to simulate a conditional move.
2401  if (X86FastEmitSSESelect(RetVT, I))
2402  return true;
2403 
2404  // Fall-back to pseudo conditional move instructions, which will be later
2405  // converted to control-flow.
2406  if (X86FastEmitPseudoSelect(RetVT, I))
2407  return true;
2408 
2409  return false;
2410 }
2411 
2412 // Common code for X86SelectSIToFP and X86SelectUIToFP.
2413 bool X86FastISel::X86SelectIntToFP(const Instruction *I, bool IsSigned) {
2414  // The target-independent selection algorithm in FastISel already knows how
2415  // to select a SINT_TO_FP if the target is SSE but not AVX.
2416  // Early exit if the subtarget doesn't have AVX.
2417  // Unsigned conversion requires avx512.
2418  bool HasAVX512 = Subtarget->hasAVX512();
2419  if (!Subtarget->hasAVX() || (!IsSigned && !HasAVX512))
2420  return false;
2421 
2422  // TODO: We could sign extend narrower types.
2423  MVT SrcVT = TLI.getSimpleValueType(DL, I->getOperand(0)->getType());
2424  if (SrcVT != MVT::i32 && SrcVT != MVT::i64)
2425  return false;
2426 
2427  // Select integer to float/double conversion.
2428  unsigned OpReg = getRegForValue(I->getOperand(0));
2429  if (OpReg == 0)
2430  return false;
2431 
2432  unsigned Opcode;
2433 
2434  static const uint16_t SCvtOpc[2][2][2] = {
2435  { { X86::VCVTSI2SSrr, X86::VCVTSI642SSrr },
2436  { X86::VCVTSI2SDrr, X86::VCVTSI642SDrr } },
2437  { { X86::VCVTSI2SSZrr, X86::VCVTSI642SSZrr },
2438  { X86::VCVTSI2SDZrr, X86::VCVTSI642SDZrr } },
2439  };
2440  static const uint16_t UCvtOpc[2][2] = {
2441  { X86::VCVTUSI2SSZrr, X86::VCVTUSI642SSZrr },
2442  { X86::VCVTUSI2SDZrr, X86::VCVTUSI642SDZrr },
2443  };
2444  bool Is64Bit = SrcVT == MVT::i64;
2445 
2446  if (I->getType()->isDoubleTy()) {
2447  // s/uitofp int -> double
2448  Opcode = IsSigned ? SCvtOpc[HasAVX512][1][Is64Bit] : UCvtOpc[1][Is64Bit];
2449  } else if (I->getType()->isFloatTy()) {
2450  // s/uitofp int -> float
2451  Opcode = IsSigned ? SCvtOpc[HasAVX512][0][Is64Bit] : UCvtOpc[0][Is64Bit];
2452  } else
2453  return false;
2454 
2455  MVT DstVT = TLI.getValueType(DL, I->getType()).getSimpleVT();
2456  const TargetRegisterClass *RC = TLI.getRegClassFor(DstVT);
2457  unsigned ImplicitDefReg = createResultReg(RC);
2458  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2459  TII.get(TargetOpcode::IMPLICIT_DEF), ImplicitDefReg);
2460  unsigned ResultReg =
2461  fastEmitInst_rr(Opcode, RC, ImplicitDefReg, true, OpReg, false);
2462  updateValueMap(I, ResultReg);
2463  return true;
2464 }
2465 
2466 bool X86FastISel::X86SelectSIToFP(const Instruction *I) {
2467  return X86SelectIntToFP(I, /*IsSigned*/true);
2468 }
2469 
2470 bool X86FastISel::X86SelectUIToFP(const Instruction *I) {
2471  return X86SelectIntToFP(I, /*IsSigned*/false);
2472 }
2473 
2474 // Helper method used by X86SelectFPExt and X86SelectFPTrunc.
2475 bool X86FastISel::X86SelectFPExtOrFPTrunc(const Instruction *I,
2476  unsigned TargetOpc,
2477  const TargetRegisterClass *RC) {
2478  assert((I->getOpcode() == Instruction::FPExt ||
2479  I->getOpcode() == Instruction::FPTrunc) &&
2480  "Instruction must be an FPExt or FPTrunc!");
2481 
2482  unsigned OpReg = getRegForValue(I->getOperand(0));
2483  if (OpReg == 0)
2484  return false;
2485 
2486  unsigned ImplicitDefReg;
2487  if (Subtarget->hasAVX()) {
2488  ImplicitDefReg = createResultReg(RC);
2489  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2490  TII.get(TargetOpcode::IMPLICIT_DEF), ImplicitDefReg);
2491 
2492  }
2493 
2494  unsigned ResultReg = createResultReg(RC);
2495  MachineInstrBuilder MIB;
2496  MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpc),
2497  ResultReg);
2498 
2499  if (Subtarget->hasAVX())
2500  MIB.addReg(ImplicitDefReg);
2501 
2502  MIB.addReg(OpReg);
2503  updateValueMap(I, ResultReg);
2504  return true;
2505 }
2506 
2507 bool X86FastISel::X86SelectFPExt(const Instruction *I) {
2508  if (X86ScalarSSEf64 && I->getType()->isDoubleTy() &&
2509  I->getOperand(0)->getType()->isFloatTy()) {
2510  bool HasAVX512 = Subtarget->hasAVX512();
2511  // fpext from float to double.
2512  unsigned Opc =
2513  HasAVX512 ? X86::VCVTSS2SDZrr
2514  : Subtarget->hasAVX() ? X86::VCVTSS2SDrr : X86::CVTSS2SDrr;
2515  return X86SelectFPExtOrFPTrunc(
2516  I, Opc, HasAVX512 ? &X86::FR64XRegClass : &X86::FR64RegClass);
2517  }
2518 
2519  return false;
2520 }
2521 
2522 bool X86FastISel::X86SelectFPTrunc(const Instruction *I) {
2523  if (X86ScalarSSEf64 && I->getType()->isFloatTy() &&
2524  I->getOperand(0)->getType()->isDoubleTy()) {
2525  bool HasAVX512 = Subtarget->hasAVX512();
2526  // fptrunc from double to float.
2527  unsigned Opc =
2528  HasAVX512 ? X86::VCVTSD2SSZrr
2529  : Subtarget->hasAVX() ? X86::VCVTSD2SSrr : X86::CVTSD2SSrr;
2530  return X86SelectFPExtOrFPTrunc(
2531  I, Opc, HasAVX512 ? &X86::FR32XRegClass : &X86::FR32RegClass);
2532  }
2533 
2534  return false;
2535 }
2536 
2537 bool X86FastISel::X86SelectTrunc(const Instruction *I) {
2538  EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType());
2539  EVT DstVT = TLI.getValueType(DL, I->getType());
2540 
2541  // This code only handles truncation to byte.
2542  if (DstVT != MVT::i8 && DstVT != MVT::i1)
2543  return false;
2544  if (!TLI.isTypeLegal(SrcVT))
2545  return false;
2546 
2547  unsigned InputReg = getRegForValue(I->getOperand(0));
2548  if (!InputReg)
2549  // Unhandled operand. Halt "fast" selection and bail.
2550  return false;
2551 
2552  if (SrcVT == MVT::i8) {
2553  // Truncate from i8 to i1; no code needed.
2554  updateValueMap(I, InputReg);
2555  return true;
2556  }
2557 
2558  // Issue an extract_subreg.
2559  unsigned ResultReg = fastEmitInst_extractsubreg(MVT::i8,
2560  InputReg, false,
2561  X86::sub_8bit);
2562  if (!ResultReg)
2563  return false;
2564 
2565  updateValueMap(I, ResultReg);
2566  return true;
2567 }
2568 
2569 bool X86FastISel::IsMemcpySmall(uint64_t Len) {
2570  return Len <= (Subtarget->is64Bit() ? 32 : 16);
2571 }
2572 
2573 bool X86FastISel::TryEmitSmallMemcpy(X86AddressMode DestAM,
2574  X86AddressMode SrcAM, uint64_t Len) {
2575 
2576  // Make sure we don't bloat code by inlining very large memcpy's.
2577  if (!IsMemcpySmall(Len))
2578  return false;
2579 
2580  bool i64Legal = Subtarget->is64Bit();
2581 
2582  // We don't care about alignment here since we just emit integer accesses.
2583  while (Len) {
2584  MVT VT;
2585  if (Len >= 8 && i64Legal)
2586  VT = MVT::i64;
2587  else if (Len >= 4)
2588  VT = MVT::i32;
2589  else if (Len >= 2)
2590  VT = MVT::i16;
2591  else
2592  VT = MVT::i8;
2593 
2594  unsigned Reg;
2595  bool RV = X86FastEmitLoad(VT, SrcAM, nullptr, Reg);
2596  RV &= X86FastEmitStore(VT, Reg, /*Kill=*/true, DestAM);
2597  assert(RV && "Failed to emit load or store??");
2598 
2599  unsigned Size = VT.getSizeInBits()/8;
2600  Len -= Size;
2601  DestAM.Disp += Size;
2602  SrcAM.Disp += Size;
2603  }
2604 
2605  return true;
2606 }
2607 
2608 bool X86FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
2609  // FIXME: Handle more intrinsics.
2610  switch (II->getIntrinsicID()) {
2611  default: return false;
2612  case Intrinsic::convert_from_fp16:
2613  case Intrinsic::convert_to_fp16: {
2614  if (Subtarget->useSoftFloat() || !Subtarget->hasF16C())
2615  return false;
2616 
2617  const Value *Op = II->getArgOperand(0);
2618  unsigned InputReg = getRegForValue(Op);
2619  if (InputReg == 0)
2620  return false;
2621 
2622  // F16C only allows converting from float to half and from half to float.
2623  bool IsFloatToHalf = II->getIntrinsicID() == Intrinsic::convert_to_fp16;
2624  if (IsFloatToHalf) {
2625  if (!Op->getType()->isFloatTy())
2626  return false;
2627  } else {
2628  if (!II->getType()->isFloatTy())
2629  return false;
2630  }
2631 
2632  unsigned ResultReg = 0;
2633  const TargetRegisterClass *RC = TLI.getRegClassFor(MVT::v8i16);
2634  if (IsFloatToHalf) {
2635  // 'InputReg' is implicitly promoted from register class FR32 to
2636  // register class VR128 by method 'constrainOperandRegClass' which is
2637  // directly called by 'fastEmitInst_ri'.
2638  // Instruction VCVTPS2PHrr takes an extra immediate operand which is
2639  // used to provide rounding control: use MXCSR.RC, encoded as 0b100.
2640  // It's consistent with the other FP instructions, which are usually
2641  // controlled by MXCSR.
2642  InputReg = fastEmitInst_ri(X86::VCVTPS2PHrr, RC, InputReg, false, 4);
2643 
2644  // Move the lower 32-bits of ResultReg to another register of class GR32.
2645  ResultReg = createResultReg(&X86::GR32RegClass);
2646  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2647  TII.get(X86::VMOVPDI2DIrr), ResultReg)
2648  .addReg(InputReg, RegState::Kill);
2649 
2650  // The result value is in the lower 16-bits of ResultReg.
2651  unsigned RegIdx = X86::sub_16bit;
2652  ResultReg = fastEmitInst_extractsubreg(MVT::i16, ResultReg, true, RegIdx);
2653  } else {
2654  assert(Op->getType()->isIntegerTy(16) && "Expected a 16-bit integer!");
2655  // Explicitly sign-extend the input to 32-bit.
2656  InputReg = fastEmit_r(MVT::i16, MVT::i32, ISD::SIGN_EXTEND, InputReg,
2657  /*Kill=*/false);
2658 
2659  // The following SCALAR_TO_VECTOR will be expanded into a VMOVDI2PDIrr.
2660  InputReg = fastEmit_r(MVT::i32, MVT::v4i32, ISD::SCALAR_TO_VECTOR,
2661  InputReg, /*Kill=*/true);
2662 
2663  InputReg = fastEmitInst_r(X86::VCVTPH2PSrr, RC, InputReg, /*Kill=*/true);
2664 
2665  // The result value is in the lower 32-bits of ResultReg.
2666  // Emit an explicit copy from register class VR128 to register class FR32.
2667  ResultReg = createResultReg(&X86::FR32RegClass);
2668  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2669  TII.get(TargetOpcode::COPY), ResultReg)
2670  .addReg(InputReg, RegState::Kill);
2671  }
2672 
2673  updateValueMap(II, ResultReg);
2674  return true;
2675  }
2676  case Intrinsic::frameaddress: {
2677  MachineFunction *MF = FuncInfo.MF;
2678  if (MF->getTarget().getMCAsmInfo()->usesWindowsCFI())
2679  return false;
2680 
2681  Type *RetTy = II->getCalledFunction()->getReturnType();
2682 
2683  MVT VT;
2684  if (!isTypeLegal(RetTy, VT))
2685  return false;
2686 
2687  unsigned Opc;
2688  const TargetRegisterClass *RC = nullptr;
2689 
2690  switch (VT.SimpleTy) {
2691  default: llvm_unreachable("Invalid result type for frameaddress.");
2692  case MVT::i32: Opc = X86::MOV32rm; RC = &X86::GR32RegClass; break;
2693  case MVT::i64: Opc = X86::MOV64rm; RC = &X86::GR64RegClass; break;
2694  }
2695 
2696  // This needs to be set before we call getPtrSizedFrameRegister, otherwise
2697  // we get the wrong frame register.
2698  MachineFrameInfo &MFI = MF->getFrameInfo();
2699  MFI.setFrameAddressIsTaken(true);
2700 
2701  const X86RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
2702  unsigned FrameReg = RegInfo->getPtrSizedFrameRegister(*MF);
2703  assert(((FrameReg == X86::RBP && VT == MVT::i64) ||
2704  (FrameReg == X86::EBP && VT == MVT::i32)) &&
2705  "Invalid Frame Register!");
2706 
2707  // Always make a copy of the frame register to a vreg first, so that we
2708  // never directly reference the frame register (the TwoAddressInstruction-
2709  // Pass doesn't like that).
2710  unsigned SrcReg = createResultReg(RC);
2711  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2712  TII.get(TargetOpcode::COPY), SrcReg).addReg(FrameReg);
2713 
2714  // Now recursively load from the frame address.
2715  // movq (%rbp), %rax
2716  // movq (%rax), %rax
2717  // movq (%rax), %rax
2718  // ...
2719  unsigned DestReg;
2720  unsigned Depth = cast<ConstantInt>(II->getOperand(0))->getZExtValue();
2721  while (Depth--) {
2722  DestReg = createResultReg(RC);
2723  addDirectMem(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2724  TII.get(Opc), DestReg), SrcReg);
2725  SrcReg = DestReg;
2726  }
2727 
2728  updateValueMap(II, SrcReg);
2729  return true;
2730  }
2731  case Intrinsic::memcpy: {
2732  const MemCpyInst *MCI = cast<MemCpyInst>(II);
2733  // Don't handle volatile or variable length memcpys.
2734  if (MCI->isVolatile())
2735  return false;
2736 
2737  if (isa<ConstantInt>(MCI->getLength())) {
2738  // Small memcpy's are common enough that we want to do them
2739  // without a call if possible.
2740  uint64_t Len = cast<ConstantInt>(MCI->getLength())->getZExtValue();
2741  if (IsMemcpySmall(Len)) {
2742  X86AddressMode DestAM, SrcAM;
2743  if (!X86SelectAddress(MCI->getRawDest(), DestAM) ||
2744  !X86SelectAddress(MCI->getRawSource(), SrcAM))
2745  return false;
2746  TryEmitSmallMemcpy(DestAM, SrcAM, Len);
2747  return true;
2748  }
2749  }
2750 
2751  unsigned SizeWidth = Subtarget->is64Bit() ? 64 : 32;
2752  if (!MCI->getLength()->getType()->isIntegerTy(SizeWidth))
2753  return false;
2754 
2755  if (MCI->getSourceAddressSpace() > 255 || MCI->getDestAddressSpace() > 255)
2756  return false;
2757 
2758  return lowerCallTo(II, "memcpy", II->getNumArgOperands() - 1);
2759  }
2760  case Intrinsic::memset: {
2761  const MemSetInst *MSI = cast<MemSetInst>(II);
2762 
2763  if (MSI->isVolatile())
2764  return false;
2765 
2766  unsigned SizeWidth = Subtarget->is64Bit() ? 64 : 32;
2767  if (!MSI->getLength()->getType()->isIntegerTy(SizeWidth))
2768  return false;
2769 
2770  if (MSI->getDestAddressSpace() > 255)
2771  return false;
2772 
2773  return lowerCallTo(II, "memset", II->getNumArgOperands() - 1);
2774  }
2775  case Intrinsic::stackprotector: {
2776  // Emit code to store the stack guard onto the stack.
2777  EVT PtrTy = TLI.getPointerTy(DL);
2778 
2779  const Value *Op1 = II->getArgOperand(0); // The guard's value.
2780  const AllocaInst *Slot = cast<AllocaInst>(II->getArgOperand(1));
2781 
2782  MFI.setStackProtectorIndex(FuncInfo.StaticAllocaMap[Slot]);
2783 
2784  // Grab the frame index.
2785  X86AddressMode AM;
2786  if (!X86SelectAddress(Slot, AM)) return false;
2787  if (!X86FastEmitStore(PtrTy, Op1, AM)) return false;
2788  return true;
2789  }
2790  case Intrinsic::dbg_declare: {
2791  const DbgDeclareInst *DI = cast<DbgDeclareInst>(II);
2792  X86AddressMode AM;
2793  assert(DI->getAddress() && "Null address should be checked earlier!");
2794  if (!X86SelectAddress(DI->getAddress(), AM))
2795  return false;
2796  const MCInstrDesc &II = TII.get(TargetOpcode::DBG_VALUE);
2797  // FIXME may need to add RegState::Debug to any registers produced,
2798  // although ESP/EBP should be the only ones at the moment.
2800  "Expected inlined-at fields to agree");
2801  addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II), AM)
2802  .addImm(0)
2803  .addMetadata(DI->getVariable())
2804  .addMetadata(DI->getExpression());
2805  return true;
2806  }
2807  case Intrinsic::trap: {
2808  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::TRAP));
2809  return true;
2810  }
2811  case Intrinsic::sqrt: {
2812  if (!Subtarget->hasSSE1())
2813  return false;
2814 
2815  Type *RetTy = II->getCalledFunction()->getReturnType();
2816 
2817  MVT VT;
2818  if (!isTypeLegal(RetTy, VT))
2819  return false;
2820 
2821  // Unfortunately we can't use fastEmit_r, because the AVX version of FSQRT
2822  // is not generated by FastISel yet.
2823  // FIXME: Update this code once tablegen can handle it.
2824  static const uint16_t SqrtOpc[3][2] = {
2825  { X86::SQRTSSr, X86::SQRTSDr },
2826  { X86::VSQRTSSr, X86::VSQRTSDr },
2827  { X86::VSQRTSSZr, X86::VSQRTSDZr },
2828  };
2829  unsigned AVXLevel = Subtarget->hasAVX512() ? 2 :
2830  Subtarget->hasAVX() ? 1 :
2831  0;
2832  unsigned Opc;
2833  switch (VT.SimpleTy) {
2834  default: return false;
2835  case MVT::f32: Opc = SqrtOpc[AVXLevel][0]; break;
2836  case MVT::f64: Opc = SqrtOpc[AVXLevel][1]; break;
2837  }
2838 
2839  const Value *SrcVal = II->getArgOperand(0);
2840  unsigned SrcReg = getRegForValue(SrcVal);
2841 
2842  if (SrcReg == 0)
2843  return false;
2844 
2845  const TargetRegisterClass *RC = TLI.getRegClassFor(VT);
2846  unsigned ImplicitDefReg = 0;
2847  if (AVXLevel > 0) {
2848  ImplicitDefReg = createResultReg(RC);
2849  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2850  TII.get(TargetOpcode::IMPLICIT_DEF), ImplicitDefReg);
2851  }
2852 
2853  unsigned ResultReg = createResultReg(RC);
2854  MachineInstrBuilder MIB;
2855  MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc),
2856  ResultReg);
2857 
2858  if (ImplicitDefReg)
2859  MIB.addReg(ImplicitDefReg);
2860 
2861  MIB.addReg(SrcReg);
2862 
2863  updateValueMap(II, ResultReg);
2864  return true;
2865  }
2866  case Intrinsic::sadd_with_overflow:
2867  case Intrinsic::uadd_with_overflow:
2868  case Intrinsic::ssub_with_overflow:
2869  case Intrinsic::usub_with_overflow:
2870  case Intrinsic::smul_with_overflow:
2871  case Intrinsic::umul_with_overflow: {
2872  // This implements the basic lowering of the xalu with overflow intrinsics
2873  // into add/sub/mul followed by either seto or setb.
2874  const Function *Callee = II->getCalledFunction();
2875  auto *Ty = cast<StructType>(Callee->getReturnType());
2876  Type *RetTy = Ty->getTypeAtIndex(0U);
2877  assert(Ty->getTypeAtIndex(1)->isIntegerTy() &&
2878  Ty->getTypeAtIndex(1)->getScalarSizeInBits() == 1 &&
2879  "Overflow value expected to be an i1");
2880 
2881  MVT VT;
2882  if (!isTypeLegal(RetTy, VT))
2883  return false;
2884 
2885  if (VT < MVT::i8 || VT > MVT::i64)
2886  return false;
2887 
2888  const Value *LHS = II->getArgOperand(0);
2889  const Value *RHS = II->getArgOperand(1);
2890 
2891  // Canonicalize immediate to the RHS.
2892  if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS) &&
2893  isCommutativeIntrinsic(II))
2894  std::swap(LHS, RHS);
2895 
2896  unsigned BaseOpc, CondCode;
2897  switch (II->getIntrinsicID()) {
2898  default: llvm_unreachable("Unexpected intrinsic!");
2899  case Intrinsic::sadd_with_overflow:
2900  BaseOpc = ISD::ADD; CondCode = X86::COND_O; break;
2901  case Intrinsic::uadd_with_overflow:
2902  BaseOpc = ISD::ADD; CondCode = X86::COND_B; break;
2903  case Intrinsic::ssub_with_overflow:
2904  BaseOpc = ISD::SUB; CondCode = X86::COND_O; break;
2905  case Intrinsic::usub_with_overflow:
2906  BaseOpc = ISD::SUB; CondCode = X86::COND_B; break;
2907  case Intrinsic::smul_with_overflow:
2908  BaseOpc = X86ISD::SMUL; CondCode = X86::COND_O; break;
2909  case Intrinsic::umul_with_overflow:
2910  BaseOpc = X86ISD::UMUL; CondCode = X86::COND_O; break;
2911  }
2912 
2913  unsigned LHSReg = getRegForValue(LHS);
2914  if (LHSReg == 0)
2915  return false;
2916  bool LHSIsKill = hasTrivialKill(LHS);
2917 
2918  unsigned ResultReg = 0;
2919  // Check if we have an immediate version.
2920  if (const auto *CI = dyn_cast<ConstantInt>(RHS)) {
2921  static const uint16_t Opc[2][4] = {
2922  { X86::INC8r, X86::INC16r, X86::INC32r, X86::INC64r },
2923  { X86::DEC8r, X86::DEC16r, X86::DEC32r, X86::DEC64r }
2924  };
2925 
2926  if (CI->isOne() && (BaseOpc == ISD::ADD || BaseOpc == ISD::SUB) &&
2927  CondCode == X86::COND_O) {
2928  // We can use INC/DEC.
2929  ResultReg = createResultReg(TLI.getRegClassFor(VT));
2930  bool IsDec = BaseOpc == ISD::SUB;
2931  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2932  TII.get(Opc[IsDec][VT.SimpleTy-MVT::i8]), ResultReg)
2933  .addReg(LHSReg, getKillRegState(LHSIsKill));
2934  } else
2935  ResultReg = fastEmit_ri(VT, VT, BaseOpc, LHSReg, LHSIsKill,
2936  CI->getZExtValue());
2937  }
2938 
2939  unsigned RHSReg;
2940  bool RHSIsKill;
2941  if (!ResultReg) {
2942  RHSReg = getRegForValue(RHS);
2943  if (RHSReg == 0)
2944  return false;
2945  RHSIsKill = hasTrivialKill(RHS);
2946  ResultReg = fastEmit_rr(VT, VT, BaseOpc, LHSReg, LHSIsKill, RHSReg,
2947  RHSIsKill);
2948  }
2949 
2950  // FastISel doesn't have a pattern for all X86::MUL*r and X86::IMUL*r. Emit
2951  // it manually.
2952  if (BaseOpc == X86ISD::UMUL && !ResultReg) {
2953  static const uint16_t MULOpc[] =
2954  { X86::MUL8r, X86::MUL16r, X86::MUL32r, X86::MUL64r };
2955  static const MCPhysReg Reg[] = { X86::AL, X86::AX, X86::EAX, X86::RAX };
2956  // First copy the first operand into RAX, which is an implicit input to
2957  // the X86::MUL*r instruction.
2958  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2959  TII.get(TargetOpcode::COPY), Reg[VT.SimpleTy-MVT::i8])
2960  .addReg(LHSReg, getKillRegState(LHSIsKill));
2961  ResultReg = fastEmitInst_r(MULOpc[VT.SimpleTy-MVT::i8],
2962  TLI.getRegClassFor(VT), RHSReg, RHSIsKill);
2963  } else if (BaseOpc == X86ISD::SMUL && !ResultReg) {
2964  static const uint16_t MULOpc[] =
2965  { X86::IMUL8r, X86::IMUL16rr, X86::IMUL32rr, X86::IMUL64rr };
2966  if (VT == MVT::i8) {
2967  // Copy the first operand into AL, which is an implicit input to the
2968  // X86::IMUL8r instruction.
2969  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2970  TII.get(TargetOpcode::COPY), X86::AL)
2971  .addReg(LHSReg, getKillRegState(LHSIsKill));
2972  ResultReg = fastEmitInst_r(MULOpc[0], TLI.getRegClassFor(VT), RHSReg,
2973  RHSIsKill);
2974  } else
2975  ResultReg = fastEmitInst_rr(MULOpc[VT.SimpleTy-MVT::i8],
2976  TLI.getRegClassFor(VT), LHSReg, LHSIsKill,
2977  RHSReg, RHSIsKill);
2978  }
2979 
2980  if (!ResultReg)
2981  return false;
2982 
2983  // Assign to a GPR since the overflow return value is lowered to a SETcc.
2984  unsigned ResultReg2 = createResultReg(&X86::GR8RegClass);
2985  assert((ResultReg+1) == ResultReg2 && "Nonconsecutive result registers.");
2986  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::SETCCr),
2987  ResultReg2).addImm(CondCode);
2988 
2989  updateValueMap(II, ResultReg, 2);
2990  return true;
2991  }
2992  case Intrinsic::x86_sse_cvttss2si:
2993  case Intrinsic::x86_sse_cvttss2si64:
2994  case Intrinsic::x86_sse2_cvttsd2si:
2995  case Intrinsic::x86_sse2_cvttsd2si64: {
2996  bool IsInputDouble;
2997  switch (II->getIntrinsicID()) {
2998  default: llvm_unreachable("Unexpected intrinsic.");
2999  case Intrinsic::x86_sse_cvttss2si:
3000  case Intrinsic::x86_sse_cvttss2si64:
3001  if (!Subtarget->hasSSE1())
3002  return false;
3003  IsInputDouble = false;
3004  break;
3005  case Intrinsic::x86_sse2_cvttsd2si:
3006  case Intrinsic::x86_sse2_cvttsd2si64:
3007  if (!Subtarget->hasSSE2())
3008  return false;
3009  IsInputDouble = true;
3010  break;
3011  }
3012 
3013  Type *RetTy = II->getCalledFunction()->getReturnType();
3014  MVT VT;
3015  if (!isTypeLegal(RetTy, VT))
3016  return false;
3017 
3018  static const uint16_t CvtOpc[3][2][2] = {
3019  { { X86::CVTTSS2SIrr, X86::CVTTSS2SI64rr },
3020  { X86::CVTTSD2SIrr, X86::CVTTSD2SI64rr } },
3021  { { X86::VCVTTSS2SIrr, X86::VCVTTSS2SI64rr },
3022  { X86::VCVTTSD2SIrr, X86::VCVTTSD2SI64rr } },
3023  { { X86::VCVTTSS2SIZrr, X86::VCVTTSS2SI64Zrr },
3024  { X86::VCVTTSD2SIZrr, X86::VCVTTSD2SI64Zrr } },
3025  };
3026  unsigned AVXLevel = Subtarget->hasAVX512() ? 2 :
3027  Subtarget->hasAVX() ? 1 :
3028  0;
3029  unsigned Opc;
3030  switch (VT.SimpleTy) {
3031  default: llvm_unreachable("Unexpected result type.");
3032  case MVT::i32: Opc = CvtOpc[AVXLevel][IsInputDouble][0]; break;
3033  case MVT::i64: Opc = CvtOpc[AVXLevel][IsInputDouble][1]; break;
3034  }
3035 
3036  // Check if we can fold insertelement instructions into the convert.
3037  const Value *Op = II->getArgOperand(0);
3038  while (auto *IE = dyn_cast<InsertElementInst>(Op)) {
3039  const Value *Index = IE->getOperand(2);
3040  if (!isa<ConstantInt>(Index))
3041  break;
3042  unsigned Idx = cast<ConstantInt>(Index)->getZExtValue();
3043 
3044  if (Idx == 0) {
3045  Op = IE->getOperand(1);
3046  break;
3047  }
3048  Op = IE->getOperand(0);
3049  }
3050 
3051  unsigned Reg = getRegForValue(Op);
3052  if (Reg == 0)
3053  return false;
3054 
3055  unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
3056  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
3057  .addReg(Reg);
3058 
3059  updateValueMap(II, ResultReg);
3060  return true;
3061  }
3062  }
3063 }
3064 
3065 bool X86FastISel::fastLowerArguments() {
3066  if (!FuncInfo.CanLowerReturn)
3067  return false;
3068 
3069  const Function *F = FuncInfo.Fn;
3070  if (F->isVarArg())
3071  return false;
3072 
3073  CallingConv::ID CC = F->getCallingConv();
3074  if (CC != CallingConv::C)
3075  return false;
3076 
3077  if (Subtarget->isCallingConvWin64(CC))
3078  return false;
3079 
3080  if (!Subtarget->is64Bit())
3081  return false;
3082 
3083  if (Subtarget->useSoftFloat())
3084  return false;
3085 
3086  // Only handle simple cases. i.e. Up to 6 i32/i64 scalar arguments.
3087  unsigned GPRCnt = 0;
3088  unsigned FPRCnt = 0;
3089  for (auto const &Arg : F->args()) {
3090  if (Arg.hasAttribute(Attribute::ByVal) ||
3091  Arg.hasAttribute(Attribute::InReg) ||
3092  Arg.hasAttribute(Attribute::StructRet) ||
3093  Arg.hasAttribute(Attribute::SwiftSelf) ||
3094  Arg.hasAttribute(Attribute::SwiftError) ||
3095  Arg.hasAttribute(Attribute::Nest))
3096  return false;
3097 
3098  Type *ArgTy = Arg.getType();
3099  if (ArgTy->isStructTy() || ArgTy->isArrayTy() || ArgTy->isVectorTy())
3100  return false;
3101 
3102  EVT ArgVT = TLI.getValueType(DL, ArgTy);
3103  if (!ArgVT.isSimple()) return false;
3104  switch (ArgVT.getSimpleVT().SimpleTy) {
3105  default: return false;
3106  case MVT::i32:
3107  case MVT::i64:
3108  ++GPRCnt;
3109  break;
3110  case MVT::f32:
3111  case MVT::f64:
3112  if (!Subtarget->hasSSE1())
3113  return false;
3114  ++FPRCnt;
3115  break;
3116  }
3117 
3118  if (GPRCnt > 6)
3119  return false;
3120 
3121  if (FPRCnt > 8)
3122  return false;
3123  }
3124 
3125  static const MCPhysReg GPR32ArgRegs[] = {
3126  X86::EDI, X86::ESI, X86::EDX, X86::ECX, X86::R8D, X86::R9D
3127  };
3128  static const MCPhysReg GPR64ArgRegs[] = {
3129  X86::RDI, X86::RSI, X86::RDX, X86::RCX, X86::R8 , X86::R9
3130  };
3131  static const MCPhysReg XMMArgRegs[] = {
3132  X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
3133  X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
3134  };
3135 
3136  unsigned GPRIdx = 0;
3137  unsigned FPRIdx = 0;
3138  for (auto const &Arg : F->args()) {
3139  MVT VT = TLI.getSimpleValueType(DL, Arg.getType());
3140  const TargetRegisterClass *RC = TLI.getRegClassFor(VT);
3141  unsigned SrcReg;
3142  switch (VT.SimpleTy) {
3143  default: llvm_unreachable("Unexpected value type.");
3144  case MVT::i32: SrcReg = GPR32ArgRegs[GPRIdx++]; break;
3145  case MVT::i64: SrcReg = GPR64ArgRegs[GPRIdx++]; break;
3146  case MVT::f32: LLVM_FALLTHROUGH;
3147  case MVT::f64: SrcReg = XMMArgRegs[FPRIdx++]; break;
3148  }
3149  unsigned DstReg = FuncInfo.MF->addLiveIn(SrcReg, RC);
3150  // FIXME: Unfortunately it's necessary to emit a copy from the livein copy.
3151  // Without this, EmitLiveInCopies may eliminate the livein if its only
3152  // use is a bitcast (which isn't turned into an instruction).
3153  unsigned ResultReg = createResultReg(RC);
3154  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3155  TII.get(TargetOpcode::COPY), ResultReg)
3156  .addReg(DstReg, getKillRegState(true));
3157  updateValueMap(&Arg, ResultReg);
3158  }
3159  return true;
3160 }
3161 
3162 static unsigned computeBytesPoppedByCalleeForSRet(const X86Subtarget *Subtarget,
3163  CallingConv::ID CC,
3164  ImmutableCallSite *CS) {
3165  if (Subtarget->is64Bit())
3166  return 0;
3167  if (Subtarget->getTargetTriple().isOSMSVCRT())
3168  return 0;
3169  if (CC == CallingConv::Fast || CC == CallingConv::GHC ||
3170  CC == CallingConv::HiPE)
3171  return 0;
3172 
3173  if (CS)
3174  if (CS->arg_empty() || !CS->paramHasAttr(0, Attribute::StructRet) ||
3175  CS->paramHasAttr(0, Attribute::InReg) || Subtarget->isTargetMCU())
3176  return 0;
3177 
3178  return 4;
3179 }
3180 
3181 bool X86FastISel::fastLowerCall(CallLoweringInfo &CLI) {
3182  auto &OutVals = CLI.OutVals;
3183  auto &OutFlags = CLI.OutFlags;
3184  auto &OutRegs = CLI.OutRegs;
3185  auto &Ins = CLI.Ins;
3186  auto &InRegs = CLI.InRegs;
3187  CallingConv::ID CC = CLI.CallConv;
3188  bool &IsTailCall = CLI.IsTailCall;
3189  bool IsVarArg = CLI.IsVarArg;
3190  const Value *Callee = CLI.Callee;
3191  MCSymbol *Symbol = CLI.Symbol;
3192 
3193  bool Is64Bit = Subtarget->is64Bit();
3194  bool IsWin64 = Subtarget->isCallingConvWin64(CC);
3195 
3196  const CallInst *CI =
3197  CLI.CS ? dyn_cast<CallInst>(CLI.CS->getInstruction()) : nullptr;
3198  const Function *CalledFn = CI ? CI->getCalledFunction() : nullptr;
3199 
3200  // Call / invoke instructions with NoCfCheck attribute require special
3201  // handling.
3202  const auto *II =
3203  CLI.CS ? dyn_cast<InvokeInst>(CLI.CS->getInstruction()) : nullptr;
3204  if ((CI && CI->doesNoCfCheck()) || (II && II->doesNoCfCheck()))
3205  return false;
3206 
3207  // Functions with no_caller_saved_registers that need special handling.
3208  if ((CI && CI->hasFnAttr("no_caller_saved_registers")) ||
3209  (CalledFn && CalledFn->hasFnAttribute("no_caller_saved_registers")))
3210  return false;
3211 
3212  // Functions using retpoline for indirect calls need to use SDISel.
3213  if (Subtarget->useRetpolineIndirectCalls())
3214  return false;
3215 
3216  // Handle only C, fastcc, and webkit_js calling conventions for now.
3217  switch (CC) {
3218  default: return false;
3219  case CallingConv::C:
3220  case CallingConv::Fast:
3222  case CallingConv::Swift:
3226  case CallingConv::Win64:
3228  break;
3229  }
3230 
3231  // Allow SelectionDAG isel to handle tail calls.
3232  if (IsTailCall)
3233  return false;
3234 
3235  // fastcc with -tailcallopt is intended to provide a guaranteed
3236  // tail call optimization. Fastisel doesn't know how to do that.
3237  if (CC == CallingConv::Fast && TM.Options.GuaranteedTailCallOpt)
3238  return false;
3239 
3240  // Don't know how to handle Win64 varargs yet. Nothing special needed for
3241  // x86-32. Special handling for x86-64 is implemented.
3242  if (IsVarArg && IsWin64)
3243  return false;
3244 
3245  // Don't know about inalloca yet.
3246  if (CLI.CS && CLI.CS->hasInAllocaArgument())
3247  return false;
3248 
3249  for (auto Flag : CLI.OutFlags)
3250  if (Flag.isSwiftError())
3251  return false;
3252 
3253  SmallVector<MVT, 16> OutVTs;
3254  SmallVector<unsigned, 16> ArgRegs;
3255 
3256  // If this is a constant i1/i8/i16 argument, promote to i32 to avoid an extra
3257  // instruction. This is safe because it is common to all FastISel supported
3258  // calling conventions on x86.
3259  for (int i = 0, e = OutVals.size(); i != e; ++i) {
3260  Value *&Val = OutVals[i];
3261  ISD::ArgFlagsTy Flags = OutFlags[i];
3262  if (auto *CI = dyn_cast<ConstantInt>(Val)) {
3263  if (CI->getBitWidth() < 32) {
3264  if (Flags.isSExt())
3266  else
3268  }
3269  }
3270 
3271  // Passing bools around ends up doing a trunc to i1 and passing it.
3272  // Codegen this as an argument + "and 1".
3273  MVT VT;
3274  auto *TI = dyn_cast<TruncInst>(Val);
3275  unsigned ResultReg;
3276  if (TI && TI->getType()->isIntegerTy(1) && CLI.CS &&
3277  (TI->getParent() == CLI.CS->getInstruction()->getParent()) &&
3278  TI->hasOneUse()) {
3279  Value *PrevVal = TI->getOperand(0);
3280  ResultReg = getRegForValue(PrevVal);
3281 
3282  if (!ResultReg)
3283  return false;
3284 
3285  if (!isTypeLegal(PrevVal->getType(), VT))
3286  return false;
3287 
3288  ResultReg =
3289  fastEmit_ri(VT, VT, ISD::AND, ResultReg, hasTrivialKill(PrevVal), 1);
3290  } else {
3291  if (!isTypeLegal(Val->getType(), VT))
3292  return false;
3293  ResultReg = getRegForValue(Val);
3294  }
3295 
3296  if (!ResultReg)
3297  return false;
3298 
3299  ArgRegs.push_back(ResultReg);
3300  OutVTs.push_back(VT);
3301  }
3302 
3303  // Analyze operands of the call, assigning locations to each operand.
3305  CCState CCInfo(CC, IsVarArg, *FuncInfo.MF, ArgLocs, CLI.RetTy->getContext());
3306 
3307  // Allocate shadow area for Win64
3308  if (IsWin64)
3309  CCInfo.AllocateStack(32, 8);
3310 
3311  CCInfo.AnalyzeCallOperands(OutVTs, OutFlags, CC_X86);
3312 
3313  // Get a count of how many bytes are to be pushed on the stack.
3314  unsigned NumBytes = CCInfo.getAlignedCallFrameSize();
3315 
3316  // Issue CALLSEQ_START
3317  unsigned AdjStackDown = TII.getCallFrameSetupOpcode();
3318  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackDown))
3319  .addImm(NumBytes).addImm(0).addImm(0);
3320 
3321  // Walk the register/memloc assignments, inserting copies/loads.
3322  const X86RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
3323  for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
3324  CCValAssign const &VA = ArgLocs[i];
3325  const Value *ArgVal = OutVals[VA.getValNo()];
3326  MVT ArgVT = OutVTs[VA.getValNo()];
3327 
3328  if (ArgVT == MVT::x86mmx)
3329  return false;
3330 
3331  unsigned ArgReg = ArgRegs[VA.getValNo()];
3332 
3333  // Promote the value if needed.
3334  switch (VA.getLocInfo()) {
3335  case CCValAssign::Full: break;
3336  case CCValAssign::SExt: {
3337  assert(VA.getLocVT().isInteger() && !VA.getLocVT().isVector() &&
3338  "Unexpected extend");
3339 
3340  if (ArgVT == MVT::i1)
3341  return false;
3342 
3343  bool Emitted = X86FastEmitExtend(ISD::SIGN_EXTEND, VA.getLocVT(), ArgReg,
3344  ArgVT, ArgReg);
3345  assert(Emitted && "Failed to emit a sext!"); (void)Emitted;
3346  ArgVT = VA.getLocVT();
3347  break;
3348  }
3349  case CCValAssign::ZExt: {
3350  assert(VA.getLocVT().isInteger() && !VA.getLocVT().isVector() &&
3351  "Unexpected extend");
3352 
3353  // Handle zero-extension from i1 to i8, which is common.
3354  if (ArgVT == MVT::i1) {
3355  // Set the high bits to zero.
3356  ArgReg = fastEmitZExtFromI1(MVT::i8, ArgReg, /*TODO: Kill=*/false);
3357  ArgVT = MVT::i8;
3358 
3359  if (ArgReg == 0)
3360  return false;
3361  }
3362 
3363  bool Emitted = X86FastEmitExtend(ISD::ZERO_EXTEND, VA.getLocVT(), ArgReg,
3364  ArgVT, ArgReg);
3365  assert(Emitted && "Failed to emit a zext!"); (void)Emitted;
3366  ArgVT = VA.getLocVT();
3367  break;
3368  }
3369  case CCValAssign::AExt: {
3370  assert(VA.getLocVT().isInteger() && !VA.getLocVT().isVector() &&
3371  "Unexpected extend");
3372  bool Emitted = X86FastEmitExtend(ISD::ANY_EXTEND, VA.getLocVT(), ArgReg,
3373  ArgVT, ArgReg);
3374  if (!Emitted)
3375  Emitted = X86FastEmitExtend(ISD::ZERO_EXTEND, VA.getLocVT(), ArgReg,
3376  ArgVT, ArgReg);
3377  if (!Emitted)
3378  Emitted = X86FastEmitExtend(ISD::SIGN_EXTEND, VA.getLocVT(), ArgReg,
3379  ArgVT, ArgReg);
3380 
3381  assert(Emitted && "Failed to emit a aext!"); (void)Emitted;
3382  ArgVT = VA.getLocVT();
3383  break;
3384  }
3385  case CCValAssign::BCvt: {
3386  ArgReg = fastEmit_r(ArgVT, VA.getLocVT(), ISD::BITCAST, ArgReg,
3387  /*TODO: Kill=*/false);
3388  assert(ArgReg && "Failed to emit a bitcast!");
3389  ArgVT = VA.getLocVT();
3390  break;
3391  }
3392  case CCValAssign::VExt:
3393  // VExt has not been implemented, so this should be impossible to reach
3394  // for now. However, fallback to Selection DAG isel once implemented.
3395  return false;
3399  case CCValAssign::FPExt:
3400  llvm_unreachable("Unexpected loc info!");
3401  case CCValAssign::Indirect:
3402  // FIXME: Indirect doesn't need extending, but fast-isel doesn't fully
3403  // support this.
3404  return false;
3405  }
3406 
3407  if (VA.isRegLoc()) {
3408  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3409  TII.get(TargetOpcode::COPY), VA.getLocReg()).addReg(ArgReg);
3410  OutRegs.push_back(VA.getLocReg());
3411  } else {
3412  assert(VA.isMemLoc());
3413 
3414  // Don't emit stores for undef values.
3415  if (isa<UndefValue>(ArgVal))
3416  continue;
3417 
3418  unsigned LocMemOffset = VA.getLocMemOffset();
3419  X86AddressMode AM;
3420  AM.Base.Reg = RegInfo->getStackRegister();
3421  AM.Disp = LocMemOffset;
3422  ISD::ArgFlagsTy Flags = OutFlags[VA.getValNo()];
3423  unsigned Alignment = DL.getABITypeAlignment(ArgVal->getType());
3424  MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
3425  MachinePointerInfo::getStack(*FuncInfo.MF, LocMemOffset),
3426  MachineMemOperand::MOStore, ArgVT.getStoreSize(), Alignment);
3427  if (Flags.isByVal()) {
3428  X86AddressMode SrcAM;
3429  SrcAM.Base.Reg = ArgReg;
3430  if (!TryEmitSmallMemcpy(AM, SrcAM, Flags.getByValSize()))
3431  return false;
3432  } else if (isa<ConstantInt>(ArgVal) || isa<ConstantPointerNull>(ArgVal)) {
3433  // If this is a really simple value, emit this with the Value* version
3434  // of X86FastEmitStore. If it isn't simple, we don't want to do this,
3435  // as it can cause us to reevaluate the argument.
3436  if (!X86FastEmitStore(ArgVT, ArgVal, AM, MMO))
3437  return false;
3438  } else {
3439  bool ValIsKill = hasTrivialKill(ArgVal);
3440  if (!X86FastEmitStore(ArgVT, ArgReg, ValIsKill, AM, MMO))
3441  return false;
3442  }
3443  }
3444  }
3445 
3446  // ELF / PIC requires GOT in the EBX register before function calls via PLT
3447  // GOT pointer.
3448  if (Subtarget->isPICStyleGOT()) {
3449  unsigned Base = getInstrInfo()->getGlobalBaseReg(FuncInfo.MF);
3450  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3451  TII.get(TargetOpcode::COPY), X86::EBX).addReg(Base);
3452  }
3453 
3454  if (Is64Bit && IsVarArg && !IsWin64) {
3455  // From AMD64 ABI document:
3456  // For calls that may call functions that use varargs or stdargs
3457  // (prototype-less calls or calls to functions containing ellipsis (...) in
3458  // the declaration) %al is used as hidden argument to specify the number
3459  // of SSE registers used. The contents of %al do not need to match exactly
3460  // the number of registers, but must be an ubound on the number of SSE
3461  // registers used and is in the range 0 - 8 inclusive.
3462 
3463  // Count the number of XMM registers allocated.
3464  static const MCPhysReg XMMArgRegs[] = {
3465  X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
3466  X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
3467  };
3468  unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs);
3469  assert((Subtarget->hasSSE1() || !NumXMMRegs)
3470  && "SSE registers cannot be used when SSE is disabled");
3471  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::MOV8ri),
3472  X86::AL).addImm(NumXMMRegs);
3473  }
3474 
3475  // Materialize callee address in a register. FIXME: GV address can be
3476  // handled with a CALLpcrel32 instead.
3477  X86AddressMode CalleeAM;
3478  if (!X86SelectCallAddress(Callee, CalleeAM))
3479  return false;
3480 
3481  unsigned CalleeOp = 0;
3482  const GlobalValue *GV = nullptr;
3483  if (CalleeAM.GV != nullptr) {
3484  GV = CalleeAM.GV;
3485  } else if (CalleeAM.Base.Reg != 0) {
3486  CalleeOp = CalleeAM.Base.Reg;
3487  } else
3488  return false;
3489 
3490  // Issue the call.
3491  MachineInstrBuilder MIB;
3492  if (CalleeOp) {
3493  // Register-indirect call.
3494  unsigned CallOpc = Is64Bit ? X86::CALL64r : X86::CALL32r;
3495  MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CallOpc))
3496  .addReg(CalleeOp);
3497  } else {
3498  // Direct call.
3499  assert(GV && "Not a direct call");
3500  // See if we need any target-specific flags on the GV operand.
3501  unsigned char OpFlags = Subtarget->classifyGlobalFunctionReference(GV);
3502 
3503  // This will be a direct call, or an indirect call through memory for
3504  // NonLazyBind calls or dllimport calls.
3505  bool NeedLoad =
3506  OpFlags == X86II::MO_DLLIMPORT || OpFlags == X86II::MO_GOTPCREL;
3507  unsigned CallOpc = NeedLoad
3508  ? (Is64Bit ? X86::CALL64m : X86::CALL32m)
3509  : (Is64Bit ? X86::CALL64pcrel32 : X86::CALLpcrel32);
3510 
3511  MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CallOpc));
3512  if (NeedLoad)
3513  MIB.addReg(Is64Bit ? X86::RIP : 0).addImm(1).addReg(0);
3514  if (Symbol)
3515  MIB.addSym(Symbol, OpFlags);
3516  else
3517  MIB.addGlobalAddress(GV, 0, OpFlags);
3518  if (NeedLoad)
3519  MIB.addReg(0);
3520  }
3521 
3522  // Add a register mask operand representing the call-preserved registers.
3523  // Proper defs for return values will be added by setPhysRegsDeadExcept().
3524  MIB.addRegMask(TRI.getCallPreservedMask(*FuncInfo.MF, CC));
3525 
3526  // Add an implicit use GOT pointer in EBX.
3527  if (Subtarget->isPICStyleGOT())
3529 
3530  if (Is64Bit && IsVarArg && !IsWin64)
3532 
3533  // Add implicit physical register uses to the call.
3534  for (auto Reg : OutRegs)
3536 
3537  // Issue CALLSEQ_END
3538  unsigned NumBytesForCalleeToPop =
3539  X86::isCalleePop(CC, Subtarget->is64Bit(), IsVarArg,
3540  TM.Options.GuaranteedTailCallOpt)
3541  ? NumBytes // Callee pops everything.
3542  : computeBytesPoppedByCalleeForSRet(Subtarget, CC, CLI.CS);
3543  unsigned AdjStackUp = TII.getCallFrameDestroyOpcode();
3544  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackUp))
3545  .addImm(NumBytes).addImm(NumBytesForCalleeToPop);
3546 
3547  // Now handle call return values.
3549  CCState CCRetInfo(CC, IsVarArg, *FuncInfo.MF, RVLocs,
3550  CLI.RetTy->getContext());
3551  CCRetInfo.AnalyzeCallResult(Ins, RetCC_X86);
3552 
3553  // Copy all of the result registers out of their specified physreg.
3554  unsigned ResultReg = FuncInfo.CreateRegs(CLI.RetTy);
3555  for (unsigned i = 0; i != RVLocs.size(); ++i) {
3556  CCValAssign &VA = RVLocs[i];
3557  EVT CopyVT = VA.getValVT();
3558  unsigned CopyReg = ResultReg + i;
3559  unsigned SrcReg = VA.getLocReg();
3560 
3561  // If this is x86-64, and we disabled SSE, we can't return FP values
3562  if ((CopyVT == MVT::f32 || CopyVT == MVT::f64) &&
3563  ((Is64Bit || Ins[i].Flags.isInReg()) && !Subtarget->hasSSE1())) {
3564  report_fatal_error("SSE register return with SSE disabled");
3565  }
3566 
3567  // If we prefer to use the value in xmm registers, copy it out as f80 and
3568  // use a truncate to move it from fp stack reg to xmm reg.
3569  if ((SrcReg == X86::FP0 || SrcReg == X86::FP1) &&
3570  isScalarFPTypeInSSEReg(VA.getValVT())) {
3571  CopyVT = MVT::f80;
3572  CopyReg = createResultReg(&X86::RFP80RegClass);
3573  }
3574 
3575  // Copy out the result.
3576  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3577  TII.get(TargetOpcode::COPY), CopyReg).addReg(SrcReg);
3578  InRegs.push_back(VA.getLocReg());
3579 
3580  // Round the f80 to the right size, which also moves it to the appropriate
3581  // xmm register. This is accomplished by storing the f80 value in memory
3582  // and then loading it back.
3583  if (CopyVT != VA.getValVT()) {
3584  EVT ResVT = VA.getValVT();
3585  unsigned Opc = ResVT == MVT::f32 ? X86::ST_Fp80m32 : X86::ST_Fp80m64;
3586  unsigned MemSize = ResVT.getSizeInBits()/8;
3587  int FI = MFI.CreateStackObject(MemSize, MemSize, false);
3588  addFrameReference(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3589  TII.get(Opc)), FI)
3590  .addReg(CopyReg);
3591  Opc = ResVT == MVT::f32 ? X86::MOVSSrm : X86::MOVSDrm;
3592  addFrameReference(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3593  TII.get(Opc), ResultReg + i), FI);
3594  }
3595  }
3596 
3597  CLI.ResultReg = ResultReg;
3598  CLI.NumResultRegs = RVLocs.size();
3599  CLI.Call = MIB;
3600 
3601  return true;
3602 }
3603 
3604 bool
3605 X86FastISel::fastSelectInstruction(const Instruction *I) {
3606  switch (I->getOpcode()) {
3607  default: break;
3608  case Instruction::Load:
3609  return X86SelectLoad(I);
3610  case Instruction::Store:
3611  return X86SelectStore(I);
3612  case Instruction::Ret:
3613  return X86SelectRet(I);
3614  case Instruction::ICmp:
3615  case Instruction::FCmp:
3616  return X86SelectCmp(I);
3617  case Instruction::ZExt:
3618  return X86SelectZExt(I);
3619  case Instruction::SExt:
3620  return X86SelectSExt(I);
3621  case Instruction::Br:
3622  return X86SelectBranch(I);
3623  case Instruction::LShr:
3624  case Instruction::AShr:
3625  case Instruction::Shl:
3626  return X86SelectShift(I);
3627  case Instruction::SDiv:
3628  case Instruction::UDiv:
3629  case Instruction::SRem:
3630  case Instruction::URem:
3631  return X86SelectDivRem(I);
3632  case Instruction::Select:
3633  return X86SelectSelect(I);
3634  case Instruction::Trunc:
3635  return X86SelectTrunc(I);
3636  case Instruction::FPExt:
3637  return X86SelectFPExt(I);
3638  case Instruction::FPTrunc:
3639  return X86SelectFPTrunc(I);
3640  case Instruction::SIToFP:
3641  return X86SelectSIToFP(I);
3642  case Instruction::UIToFP:
3643  return X86SelectUIToFP(I);
3644  case Instruction::IntToPtr: // Deliberate fall-through.
3645  case Instruction::PtrToInt: {
3646  EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType());
3647  EVT DstVT = TLI.getValueType(DL, I->getType());
3648  if (DstVT.bitsGT(SrcVT))
3649  return X86SelectZExt(I);
3650  if (DstVT.bitsLT(SrcVT))
3651  return X86SelectTrunc(I);
3652  unsigned Reg = getRegForValue(I->getOperand(0));
3653  if (Reg == 0) return false;
3654  updateValueMap(I, Reg);
3655  return true;
3656  }
3657  case Instruction::BitCast: {
3658  // Select SSE2/AVX bitcasts between 128/256 bit vector types.
3659  if (!Subtarget->hasSSE2())
3660  return false;
3661 
3662  EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType());
3663  EVT DstVT = TLI.getValueType(DL, I->getType());
3664 
3665  if (!SrcVT.isSimple() || !DstVT.isSimple())
3666  return false;
3667 
3668  MVT SVT = SrcVT.getSimpleVT();
3669  MVT DVT = DstVT.getSimpleVT();
3670 
3671  if (!SVT.is128BitVector() &&
3672  !(Subtarget->hasAVX() && SVT.is256BitVector()) &&
3673  !(Subtarget->hasAVX512() && SVT.is512BitVector() &&
3674  (Subtarget->hasBWI() || (SVT.getScalarSizeInBits() >= 32 &&
3675  DVT.getScalarSizeInBits() >= 32))))
3676  return false;
3677 
3678  unsigned Reg = getRegForValue(I->getOperand(0));
3679  if (Reg == 0)
3680  return false;
3681 
3682  // No instruction is needed for conversion. Reuse the register used by
3683  // the fist operand.
3684  updateValueMap(I, Reg);
3685  return true;
3686  }
3687  }
3688 
3689  return false;
3690 }
3691 
3692 unsigned X86FastISel::X86MaterializeInt(const ConstantInt *CI, MVT VT) {
3693  if (VT > MVT::i64)
3694  return 0;
3695 
3696  uint64_t Imm = CI->getZExtValue();
3697  if (Imm == 0) {
3698  unsigned SrcReg = fastEmitInst_(X86::MOV32r0, &X86::GR32RegClass);
3699  switch (VT.SimpleTy) {
3700  default: llvm_unreachable("Unexpected value type");
3701  case MVT::i1:
3702  case MVT::i8:
3703  return fastEmitInst_extractsubreg(MVT::i8, SrcReg, /*Kill=*/true,
3704  X86::sub_8bit);
3705  case MVT::i16:
3706  return fastEmitInst_extractsubreg(MVT::i16, SrcReg, /*Kill=*/true,
3707  X86::sub_16bit);
3708  case MVT::i32:
3709  return SrcReg;
3710  case MVT::i64: {
3711  unsigned ResultReg = createResultReg(&X86::GR64RegClass);
3712  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3713  TII.get(TargetOpcode::SUBREG_TO_REG), ResultReg)
3714  .addImm(0).addReg(SrcReg).addImm(X86::sub_32bit);
3715  return ResultReg;
3716  }
3717  }
3718  }
3719 
3720  unsigned Opc = 0;
3721  switch (VT.SimpleTy) {
3722  default: llvm_unreachable("Unexpected value type");
3723  case MVT::i1:
3724  VT = MVT::i8;
3726  case MVT::i8: Opc = X86::MOV8ri; break;
3727  case MVT::i16: Opc = X86::MOV16ri; break;
3728  case MVT::i32: Opc = X86::MOV32ri; break;
3729  case MVT::i64: {
3730  if (isUInt<32>(Imm))
3731  Opc = X86::MOV32ri64;
3732  else if (isInt<32>(Imm))
3733  Opc = X86::MOV64ri32;
3734  else
3735  Opc = X86::MOV64ri;
3736  break;
3737  }
3738  }
3739  return fastEmitInst_i(Opc, TLI.getRegClassFor(VT), Imm);
3740 }
3741 
3742 unsigned X86FastISel::X86MaterializeFP(const ConstantFP *CFP, MVT VT) {
3743  if (CFP->isNullValue())
3744  return fastMaterializeFloatZero(CFP);
3745 
3746  // Can't handle alternate code models yet.
3747  CodeModel::Model CM = TM.getCodeModel();
3748  if (CM != CodeModel::Small && CM != CodeModel::Large)
3749  return 0;
3750 
3751  // Get opcode and regclass of the output for the given load instruction.
3752  unsigned Opc = 0;
3753  const TargetRegisterClass *RC = nullptr;
3754  switch (VT.SimpleTy) {
3755  default: return 0;
3756  case MVT::f32:
3757  if (X86ScalarSSEf32) {
3758  Opc = Subtarget->hasAVX512()
3759  ? X86::VMOVSSZrm
3760  : Subtarget->hasAVX() ? X86::VMOVSSrm : X86::MOVSSrm;
3761  RC = Subtarget->hasAVX512() ? &X86::FR32XRegClass : &X86::FR32RegClass;
3762  } else {
3763  Opc = X86::LD_Fp32m;
3764  RC = &X86::RFP32RegClass;
3765  }
3766  break;
3767  case MVT::f64:
3768  if (X86ScalarSSEf64) {
3769  Opc = Subtarget->hasAVX512()
3770  ? X86::VMOVSDZrm
3771  : Subtarget->hasAVX() ? X86::VMOVSDrm : X86::MOVSDrm;
3772  RC = Subtarget->hasAVX512() ? &X86::FR64XRegClass : &X86::FR64RegClass;
3773  } else {
3774  Opc = X86::LD_Fp64m;
3775  RC = &X86::RFP64RegClass;
3776  }
3777  break;
3778  case MVT::f80:
3779  // No f80 support yet.
3780  return 0;
3781  }
3782 
3783  // MachineConstantPool wants an explicit alignment.
3784  unsigned Align = DL.getPrefTypeAlignment(CFP->getType());
3785  if (Align == 0) {
3786  // Alignment of vector types. FIXME!
3787  Align = DL.getTypeAllocSize(CFP->getType());
3788  }
3789 
3790  // x86-32 PIC requires a PIC base register for constant pools.
3791  unsigned PICBase = 0;
3792  unsigned char OpFlag = Subtarget->classifyLocalReference(nullptr);
3793  if (OpFlag == X86II::MO_PIC_BASE_OFFSET)
3794  PICBase = getInstrInfo()->getGlobalBaseReg(FuncInfo.MF);
3795  else if (OpFlag == X86II::MO_GOTOFF)
3796  PICBase = getInstrInfo()->getGlobalBaseReg(FuncInfo.MF);
3797  else if (Subtarget->is64Bit() && TM.getCodeModel() == CodeModel::Small)
3798  PICBase = X86::RIP;
3799 
3800  // Create the load from the constant pool.
3801  unsigned CPI = MCP.getConstantPoolIndex(CFP, Align);
3802  unsigned ResultReg = createResultReg(RC);
3803 
3804  if (CM == CodeModel::Large) {
3805  unsigned AddrReg = createResultReg(&X86::GR64RegClass);
3806  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::MOV64ri),
3807  AddrReg)
3808  .addConstantPoolIndex(CPI, 0, OpFlag);
3809  MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3810  TII.get(Opc), ResultReg);
3811  addDirectMem(MIB, AddrReg);
3812  MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
3814  MachineMemOperand::MOLoad, DL.getPointerSize(), Align);
3815  MIB->addMemOperand(*FuncInfo.MF, MMO);
3816  return ResultReg;
3817  }
3818 
3819  addConstantPoolReference(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3820  TII.get(Opc), ResultReg),
3821  CPI, PICBase, OpFlag);
3822  return ResultReg;
3823 }
3824 
3825 unsigned X86FastISel::X86MaterializeGV(const GlobalValue *GV, MVT VT) {
3826  // Can't handle alternate code models yet.
3827  if (TM.getCodeModel() != CodeModel::Small)
3828  return 0;
3829 
3830  // Materialize addresses with LEA/MOV instructions.
3831  X86AddressMode AM;
3832  if (X86SelectAddress(GV, AM)) {
3833  // If the expression is just a basereg, then we're done, otherwise we need
3834  // to emit an LEA.
3835  if (AM.BaseType == X86AddressMode::RegBase &&
3836  AM.IndexReg == 0 && AM.Disp == 0 && AM.GV == nullptr)
3837  return AM.Base.Reg;
3838 
3839  unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
3840  if (TM.getRelocationModel() == Reloc::Static &&
3841  TLI.getPointerTy(DL) == MVT::i64) {
3842  // The displacement code could be more than 32 bits away so we need to use
3843  // an instruction with a 64 bit immediate
3844  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::MOV64ri),
3845  ResultReg)
3846  .addGlobalAddress(GV);
3847  } else {
3848  unsigned Opc =
3849  TLI.getPointerTy(DL) == MVT::i32
3850  ? (Subtarget->isTarget64BitILP32() ? X86::LEA64_32r : X86::LEA32r)
3851  : X86::LEA64r;
3852  addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3853  TII.get(Opc), ResultReg), AM);
3854  }
3855  return ResultReg;
3856  }
3857  return 0;
3858 }
3859 
3860 unsigned X86FastISel::fastMaterializeConstant(const Constant *C) {
3861  EVT CEVT = TLI.getValueType(DL, C->getType(), true);
3862 
3863  // Only handle simple types.
3864  if (!CEVT.isSimple())
3865  return 0;
3866  MVT VT = CEVT.getSimpleVT();
3867 
3868  if (const auto *CI = dyn_cast<ConstantInt>(C))
3869  return X86MaterializeInt(CI, VT);
3870  else if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C))
3871  return X86MaterializeFP(CFP, VT);
3872  else if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
3873  return X86MaterializeGV(GV, VT);
3874 
3875  return 0;
3876 }
3877 
3878 unsigned X86FastISel::fastMaterializeAlloca(const AllocaInst *C) {
3879  // Fail on dynamic allocas. At this point, getRegForValue has already
3880  // checked its CSE maps, so if we're here trying to handle a dynamic
3881  // alloca, we're not going to succeed. X86SelectAddress has a
3882  // check for dynamic allocas, because it's called directly from
3883  // various places, but targetMaterializeAlloca also needs a check
3884  // in order to avoid recursion between getRegForValue,
3885  // X86SelectAddrss, and targetMaterializeAlloca.
3886  if (!FuncInfo.StaticAllocaMap.count(C))
3887  return 0;
3888  assert(C->isStaticAlloca() && "dynamic alloca in the static alloca map?");
3889 
3890  X86AddressMode AM;
3891  if (!X86SelectAddress(C, AM))
3892  return 0;
3893  unsigned Opc =
3894  TLI.getPointerTy(DL) == MVT::i32
3895  ? (Subtarget->isTarget64BitILP32() ? X86::LEA64_32r : X86::LEA32r)
3896  : X86::LEA64r;
3897  const TargetRegisterClass *RC = TLI.getRegClassFor(TLI.getPointerTy(DL));
3898  unsigned ResultReg = createResultReg(RC);
3899  addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3900  TII.get(Opc), ResultReg), AM);
3901  return ResultReg;
3902 }
3903 
3904 unsigned X86FastISel::fastMaterializeFloatZero(const ConstantFP *CF) {
3905  MVT VT;
3906  if (!isTypeLegal(CF->getType(), VT))
3907  return 0;
3908 
3909  // Get opcode and regclass for the given zero.
3910  bool HasAVX512 = Subtarget->hasAVX512();
3911  unsigned Opc = 0;
3912  const TargetRegisterClass *RC = nullptr;
3913  switch (VT.SimpleTy) {
3914  default: return 0;
3915  case MVT::f32:
3916  if (X86ScalarSSEf32) {
3917  Opc = HasAVX512 ? X86::AVX512_FsFLD0SS : X86::FsFLD0SS;
3918  RC = HasAVX512 ? &X86::FR32XRegClass : &X86::FR32RegClass;
3919  } else {
3920  Opc = X86::LD_Fp032;
3921  RC = &X86::RFP32RegClass;
3922  }
3923  break;
3924  case MVT::f64:
3925  if (X86ScalarSSEf64) {
3926  Opc = HasAVX512 ? X86::AVX512_FsFLD0SD : X86::FsFLD0SD;
3927  RC = HasAVX512 ? &X86::FR64XRegClass : &X86::FR64RegClass;
3928  } else {
3929  Opc = X86::LD_Fp064;
3930  RC = &X86::RFP64RegClass;
3931  }
3932  break;
3933  case MVT::f80:
3934  // No f80 support yet.
3935  return 0;
3936  }
3937 
3938  unsigned ResultReg = createResultReg(RC);
3939  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg);
3940  return ResultReg;
3941 }
3942 
3943 
3944 bool X86FastISel::tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo,
3945  const LoadInst *LI) {
3946  const Value *Ptr = LI->getPointerOperand();
3947  X86AddressMode AM;
3948  if (!X86SelectAddress(Ptr, AM))
3949  return false;
3950 
3951  const X86InstrInfo &XII = (const X86InstrInfo &)TII;
3952 
3953  unsigned Size = DL.getTypeAllocSize(LI->getType());
3954  unsigned Alignment = LI->getAlignment();
3955 
3956  if (Alignment == 0) // Ensure that codegen never sees alignment 0
3957  Alignment = DL.getABITypeAlignment(LI->getType());
3958 
3960  AM.getFullAddress(AddrOps);
3961 
3962  MachineInstr *Result = XII.foldMemoryOperandImpl(
3963  *FuncInfo.MF, *MI, OpNo, AddrOps, FuncInfo.InsertPt, Size, Alignment,
3964  /*AllowCommute=*/true);
3965  if (!Result)
3966  return false;
3967 
3968  // The index register could be in the wrong register class. Unfortunately,
3969  // foldMemoryOperandImpl could have commuted the instruction so its not enough
3970  // to just look at OpNo + the offset to the index reg. We actually need to
3971  // scan the instruction to find the index reg and see if its the correct reg
3972  // class.
3973  unsigned OperandNo = 0;
3974  for (MachineInstr::mop_iterator I = Result->operands_begin(),
3975  E = Result->operands_end(); I != E; ++I, ++OperandNo) {
3976  MachineOperand &MO = *I;
3977  if (!MO.isReg() || MO.isDef() || MO.getReg() != AM.IndexReg)
3978  continue;
3979  // Found the index reg, now try to rewrite it.
3980  unsigned IndexReg = constrainOperandRegClass(Result->getDesc(),
3981  MO.getReg(), OperandNo);
3982  if (IndexReg == MO.getReg())
3983  continue;
3984  MO.setReg(IndexReg);
3985  }
3986 
3987  Result->addMemOperand(*FuncInfo.MF, createMachineMemOperandFor(LI));
3989  removeDeadCode(I, std::next(I));
3990  return true;
3991 }
3992 
3993 unsigned X86FastISel::fastEmitInst_rrrr(unsigned MachineInstOpcode,
3994  const TargetRegisterClass *RC,
3995  unsigned Op0, bool Op0IsKill,
3996  unsigned Op1, bool Op1IsKill,
3997  unsigned Op2, bool Op2IsKill,
3998  unsigned Op3, bool Op3IsKill) {
3999  const MCInstrDesc &II = TII.get(MachineInstOpcode);
4000 
4001  unsigned ResultReg = createResultReg(RC);
4002  Op0 = constrainOperandRegClass(II, Op0, II.getNumDefs());
4003  Op1 = constrainOperandRegClass(II, Op1, II.getNumDefs() + 1);
4004  Op2 = constrainOperandRegClass(II, Op2, II.getNumDefs() + 2);
4005  Op3 = constrainOperandRegClass(II, Op3, II.getNumDefs() + 3);
4006 
4007  if (II.getNumDefs() >= 1)
4008  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
4009  .addReg(Op0, getKillRegState(Op0IsKill))
4010  .addReg(Op1, getKillRegState(Op1IsKill))
4011  .addReg(Op2, getKillRegState(Op2IsKill))
4012  .addReg(Op3, getKillRegState(Op3IsKill));
4013  else {
4014  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
4015  .addReg(Op0, getKillRegState(Op0IsKill))
4016  .addReg(Op1, getKillRegState(Op1IsKill))
4017  .addReg(Op2, getKillRegState(Op2IsKill))
4018  .addReg(Op3, getKillRegState(Op3IsKill));
4019  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4020  TII.get(TargetOpcode::COPY), ResultReg).addReg(II.ImplicitDefs[0]);
4021  }
4022  return ResultReg;
4023 }
4024 
4025 
4026 namespace llvm {
4028  const TargetLibraryInfo *libInfo) {
4029  return new X86FastISel(funcInfo, libInfo);
4030  }
4031 }
bool hasAVX() const
Definition: X86Subtarget.h:566
bool isVarArg() const
isVarArg - Return true if this function takes a variable number of arguments.
Definition: Function.h:176
void setFrameAddressIsTaken(bool T)
uint64_t CallInst * C
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:570
constexpr bool isUInt< 32 >(uint64_t x)
Definition: MathExtras.h:348
Return a value (possibly void), from a function.
Value * getValueOperand()
Definition: Instructions.h:409
const MachineInstrBuilder & addMetadata(const MDNode *MD) const
void AnalyzeCallResult(const SmallVectorImpl< ISD::InputArg > &Ins, CCAssignFn Fn)
AnalyzeCallResult - Analyze the return values of a call, incorporating info about the passed values i...
bool is64Bit() const
Is this x86_64? (disregarding specific ABI / programming model)
Definition: X86Subtarget.h:527
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
bool usesWindowsCFI() const
Definition: MCAsmInfo.h:583
mop_iterator operands_end()
Definition: MachineInstr.h:453
bool isInteger() const
Return true if this is an integer or a vector integer type.
This class is the base class for the comparison instructions.
Definition: InstrTypes.h:636
union llvm::X86AddressMode::@501 Base
This class represents an incoming formal argument to a Function.
Definition: Argument.h:29
bool contains(unsigned Reg) const
Return true if the specified register is included in this register class.
bool isOSMSVCRT() const
Is this a "Windows" OS targeting a "MSVCRT.dll" environment.
Definition: Triple.h:566
LLVM_ATTRIBUTE_NORETURN void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:139
This class represents lattice values for constants.
Definition: AllocatorList.h:23
static const MachineInstrBuilder & addConstantPoolReference(const MachineInstrBuilder &MIB, unsigned CPI, unsigned GlobalBaseReg, unsigned char OpFlags)
addConstantPoolReference - This function is used to add a reference to the base of a constant value s...
bool isAtomic() const
Return true if this instruction has an AtomicOrdering of unordered or higher.
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
Definition: MCSymbol.h:41
enum llvm::X86AddressMode::@500 BaseType
bool isVector() const
Return true if this is a vector value type.
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:163
unsigned getReg() const
getReg - Returns the register number.
bool is256BitVector() const
Return true if this is a 256-bit vector type.
This class represents a function call, abstracting a target machine&#39;s calling convention.
unsigned Reg
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:252
0 1 0 0 True if ordered and less than
Definition: InstrTypes.h:652
constexpr bool isInt< 8 >(int64_t x)
Definition: MathExtras.h:302
unsigned getSourceAddressSpace() const
unsigned getValNo() const
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:709
1 1 1 0 True if unordered or not equal
Definition: InstrTypes.h:662
This class wraps the llvm.memset intrinsic.
BasicBlock * getSuccessor(unsigned i) const
unsigned const TargetRegisterInfo * TRI
const GlobalValue * GV
A debug info location.
Definition: DebugLoc.h:33
F(f)
unsigned getPtrSizedFrameRegister(const MachineFunction &MF) const
An instruction for reading from memory.
Definition: Instructions.h:167
MO_GOTPCREL - On a symbol operand this indicates that the immediate is offset to the GOT entry for th...
Definition: X86BaseInfo.h:147
Hexagon Common GEP
Value * getCondition() const
const MachineInstrBuilder & addGlobalAddress(const GlobalValue *GV, int64_t Offset=0, unsigned char TargetFlags=0) const
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:229
bool isCalleePop(CallingConv::ID CallingConv, bool is64Bit, bool IsVarArg, bool GuaranteeTCO)
Determines whether the callee is required to pop its own arguments.
Value * getLength() const
C - The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
op_iterator op_begin()
Definition: User.h:229
bool CC_X86(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
NodeType
ISD::NodeType enum - This enum defines the target-independent operators for a SelectionDAG.
Definition: ISDOpcodes.h:38
bool isMemLoc() const
static Constant * getNullValue(Type *Ty)
Constructor to create a &#39;0&#39; constant of arbitrary type.
Definition: Constants.cpp:274
X86MachineFunctionInfo - This class is derived from MachineFunction and contains private X86 target-s...
Value * getArgOperand(unsigned i) const
Definition: InstrTypes.h:1155
1 0 0 1 True if unordered or equal
Definition: InstrTypes.h:657
Used to lazily calculate structure layout information for a target machine, based on the DataLayout s...
Definition: DataLayout.h:554
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
Definition: InstrTypes.h:656
A description of a memory reference used in the backend.
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE, etc.
Definition: InstrTypes.h:745
This file declares the MachineConstantPool class which is an abstract constant pool to keep track of ...
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
Definition: MCInstrDesc.h:210
const HexagonInstrInfo * TII
unsigned getNumOperands() const
Retuns the total number of operands.
Definition: MachineInstr.h:411
Class to represent struct types.
Definition: DerivedTypes.h:232
A Use represents the edge between a Value definition and its users.
Definition: Use.h:55
static bool isGlobalStubReference(unsigned char TargetFlag)
isGlobalStubReference - Return true if the specified TargetFlag operand is a reference to a stub for ...
Definition: X86InstrInfo.h:74
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:196
X86_FastCall - &#39;fast&#39; analog of X86_StdCall.
Definition: CallingConv.h:91
0 1 0 1 True if ordered and less than or equal
Definition: InstrTypes.h:653
SimpleValueType SimpleTy
static Constant * getSExt(Constant *C, Type *Ty, bool OnlyIfReduced=false)
Definition: Constants.cpp:1660
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted...
unsigned getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
LocInfo getLocInfo() const
unsigned getSizeInBits() const
auto reverse(ContainerTy &&C, typename std::enable_if< has_rbegin< ContainerTy >::value >::type *=nullptr) -> decltype(make_range(C.rbegin(), C.rend()))
Definition: STLExtras.h:266
static bool isGlobalRelativeToPICBase(unsigned char TargetFlag)
isGlobalRelativeToPICBase - Return true if the specified global value reference is relative to a 32-b...
Definition: X86InstrInfo.h:91
static Constant * getZExt(Constant *C, Type *Ty, bool OnlyIfReduced=false)
Definition: Constants.cpp:1674
bool isNullValue() const
Return true if this is the value that would be returned by getNullValue.
Definition: Constants.cpp:84
This is a fast-path instruction selection class that generates poor code and doesn&#39;t support illegal ...
Definition: FastISel.h:66
A constant value that is initialized with an expression using other constant values.
Definition: Constants.h:888
unsigned getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:291
const MCInstrDesc & getDesc() const
Returns the target instruction descriptor of this MachineInstr.
Definition: MachineInstr.h:405
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:244
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:200
This instruction compares its operands according to the predicate given to the constructor.
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:125
bool paramHasAttr(unsigned ArgNo, Attribute::AttrKind Kind) const
Return true if the call or the callee has the given attribute.
Definition: CallSite.h:385
An instruction for storing to memory.
Definition: Instructions.h:320
The C convention as specified in the x86-64 supplement to the System V ABI, used on most non-Windows ...
Definition: CallingConv.h:143
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out...
Definition: ISDOpcodes.h:968
bool doesNoCfCheck() const
Determine if the call should not perform indirect branch tracking.
Definition: InstrTypes.h:1588
void addMemOperand(MachineFunction &MF, MachineMemOperand *MO)
Add a MachineMemOperand to the machine instruction.
This class represents a truncation of integer types.
Value * getOperand(unsigned i) const
Definition: User.h:169
Class to represent pointers.
Definition: DerivedTypes.h:498
unsigned getByValSize() const
unsigned getKillRegState(bool B)
unsigned getStackRegister() const
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Flag
These should be considered private to the implementation of the MCInstrDesc class.
Definition: MCInstrDesc.h:117
unsigned getCMovOpcode(unsigned RegBytes, bool HasMemoryOperand=false)
Return a cmov opcode for the given register size in bytes, and operand type.
bool is128BitVector() const
Return true if this is a 128-bit vector type.
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
Definition: ValueTypes.h:228
bool isFloatTy() const
Return true if this is &#39;float&#39;, a 32-bit IEEE fp type.
Definition: Type.h:146
an instruction for type-safe pointer arithmetic to access elements of arrays and structs ...
Definition: Instructions.h:873
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
Type * getReturnType() const
Returns the type of the ret val.
Definition: Function.h:168
std::pair< CondCode, bool > getX86ConditionCode(CmpInst::Predicate Predicate)
Return a pair of condition code for the given predicate and whether the instruction operands should b...
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:148
unsigned const MachineRegisterInfo * MRI
X86_ThisCall - Similar to X86_StdCall.
Definition: CallingConv.h:110
Machine Value Type.
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:45
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Conditional or Unconditional Branch instruction.
Value * getAddress() const
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
unsigned getScalarSizeInBits() const
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This is an important base class in LLVM.
Definition: Constant.h:41
const MCAsmInfo * getMCAsmInfo() const
Return target specific asm information.
ConstantFP - Floating Point Values [float, double].
Definition: Constants.h:263
const MCPhysReg * ImplicitDefs
Definition: MCInstrDesc.h:173
DIExpression * getExpression() const
bool isValidLocationForIntrinsic(const DILocation *DL) const
Check that a location is valid for this variable.
op_iterator op_end()
Definition: User.h:231
static std::pair< unsigned, bool > getX86SSEConditionCode(CmpInst::Predicate Predicate)
const MachineInstrBuilder & addRegMask(const uint32_t *Mask) const
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:646
TRAP - Trapping instruction.
Definition: ISDOpcodes.h:771
amdgpu Simplify well known AMD library false FunctionCallee Value * Arg
0 1 1 1 True if ordered (no nans)
Definition: InstrTypes.h:655
Value * getPointerOperand()
Definition: Instructions.h:284
const Triple & getTargetTriple() const
Definition: X86Subtarget.h:728
static void X86SelectAddress(const MachineInstr &I, const MachineRegisterInfo &MRI, X86AddressMode &AM)
const MachineInstrBuilder & addSym(MCSymbol *Sym, unsigned char TargetFlags=0) const
1 1 1 1 Always true (always folded)
Definition: InstrTypes.h:663
bool isTargetMCU() const
Definition: X86Subtarget.h:747
Extended Value Type.
Definition: ValueTypes.h:33
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
size_t size() const
Definition: SmallVector.h:52
bool hasFnAttr(Attribute::AttrKind Kind) const
Determine whether this call has the given attribute.
Definition: InstrTypes.h:1287
bool isVolatile() const
1 1 0 1 True if unordered, less than, or equal
Definition: InstrTypes.h:661
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
static const MachineInstrBuilder & addFrameReference(const MachineInstrBuilder &MIB, int FI, int Offset=0, bool mem=true)
addFrameReference - This function is used to add a reference to the base of an abstract object on the...
bool arg_empty() const
Definition: CallSite.h:225
The memory access writes data.
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
Definition: IntrinsicInst.h:50
void GetReturnInfo(CallingConv::ID CC, Type *ReturnType, AttributeList attr, SmallVectorImpl< ISD::OutputArg > &Outs, const TargetLowering &TLI, const DataLayout &DL)
Given an LLVM IR type and return type attributes, compute the return value EVTs and flags...
0 0 1 0 True if ordered and greater than
Definition: InstrTypes.h:650
Iterator for intrusive lists based on ilist_node.
unsigned getNumOperands() const
Definition: User.h:191
CCState - This class holds information needed while lowering arguments and return values...
This is the shared class of boolean and integer constants.
Definition: Constants.h:83
constexpr bool isInt< 32 >(int64_t x)
Definition: MathExtras.h:308
The C convention as implemented on Windows/x86-64 and AArch64.
Definition: CallingConv.h:153
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type...
Definition: Type.cpp:129
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:212
MachineOperand class - Representation of each machine instruction operand.
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:841
1 1 0 0 True if unordered or less than
Definition: InstrTypes.h:660
Provides information about what library functions are available for the current target.
CCValAssign - Represent assignment of one arg/retval to a location.
void getFullAddress(SmallVectorImpl< MachineOperand > &MO)
This class wraps the llvm.memcpy intrinsic.
unsigned getNumDefs() const
Return the number of MachineOperands that are register definitions.
Definition: MCInstrDesc.h:225
Value * getRawSource() const
Return the arguments to the instruction.
bool RetCC_X86(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:940
Fast - This calling convention attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:42
amdgpu Simplify well known AMD library false FunctionCallee Callee
This file defines the FastISel class.
bool bitsLT(EVT VT) const
Return true if this has less bits than VT.
Definition: ValueTypes.h:240
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:467
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:470
X86_StdCall - stdcall is the calling conventions mostly used by the Win32 API.
Definition: CallingConv.h:86
The memory access reads data.
MachineInstr * foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI, ArrayRef< unsigned > Ops, MachineBasicBlock::iterator InsertPt, int FrameIndex, LiveIntervals *LIS=nullptr) const override
foldMemoryOperand - If this target supports it, fold a load or store of the specified stack slot into...
Representation of each machine instruction.
Definition: MachineInstr.h:63
MO_GOTOFF - On a symbol operand this indicates that the immediate is the offset to the location of th...
Definition: X86BaseInfo.h:139
static const MachineInstrBuilder & addDirectMem(const MachineInstrBuilder &MIB, unsigned Reg)
addDirectMem - This function is used to add a direct memory reference to the current instruction – t...
unsigned getNumArgOperands() const
Definition: InstrTypes.h:1153
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
Definition: Instruction.h:324
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:386
unsigned getDestAddressSpace() const
uint64_t getElementOffset(unsigned Idx) const
Definition: DataLayout.h:576
unsigned getAlignment() const
Return the alignment of the access that is being performed.
Definition: Instructions.h:240
static IntegerType * getInt32Ty(LLVMContext &C)
Definition: Type.cpp:175
unsigned getLocMemOffset() const
Establish a view to a call site for examination.
Definition: CallSite.h:892
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation.
Definition: InstrTypes.h:1201
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:106
void setReg(unsigned Reg)
Change the register this operand corresponds to.
#define I(x, y, z)
Definition: MD5.cpp:58
void AnalyzeReturn(const SmallVectorImpl< ISD::OutputArg > &Outs, CCAssignFn Fn)
AnalyzeReturn - Analyze the returned values of a return, incorporating info about the result values i...
FunctionLoweringInfo - This contains information that is global to a function that is used when lower...
0 1 1 0 True if ordered and operands are unequal
Definition: InstrTypes.h:654
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:332
bool is512BitVector() const
Return true if this is a 512-bit vector type.
uint32_t Size
Definition: Profile.cpp:46
DILocalVariable * getVariable() const
const MachineInstrBuilder & addReg(unsigned RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const LLVMTargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
1 0 1 0 True if unordered or greater than
Definition: InstrTypes.h:658
constexpr bool isUInt< 16 >(uint64_t x)
Definition: MathExtras.h:345
static MachinePointerInfo getConstantPool(MachineFunction &MF)
Return a MachinePointerInfo record that refers to the constant pool.
unsigned getAlignment() const
Return the alignment of the access that is being performed.
Definition: Instructions.h:365
bool isReg() const
isReg - Tests if this is a MO_Register operand.
bool isRegLoc() const
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
bool hasSSE1() const
Definition: X86Subtarget.h:560
0 0 0 1 True if ordered and equal
Definition: InstrTypes.h:649
LLVM Value Representation.
Definition: Value.h:72
mop_iterator operands_begin()
Definition: MachineInstr.h:452
1 0 1 1 True if unordered, greater than, or equal
Definition: InstrTypes.h:659
static unsigned X86ChooseCmpImmediateOpcode(EVT VT, const ConstantInt *RHSC)
If we have a comparison with RHS as the RHS of the comparison, return an opcode that works for the co...
#define LLVM_FALLTHROUGH
LLVM_FALLTHROUGH - Mark fallthrough cases in switch statements.
Definition: Compiler.h:250
bool hasAVX512() const
Definition: X86Subtarget.h:568
Invoke instruction.
MO_PIC_BASE_OFFSET - On a symbol operand this indicates that the immediate should get the value of th...
Definition: X86BaseInfo.h:125
IRTranslator LLVM IR MI
static MachinePointerInfo getStack(MachineFunction &MF, int64_t Offset, uint8_t ID=0)
Stack pointer relative access.
unsigned constrainOperandRegClass(const MachineFunction &MF, const TargetRegisterInfo &TRI, MachineRegisterInfo &MRI, const TargetInstrInfo &TII, const RegisterBankInfo &RBI, MachineInstr &InsertPt, const MCInstrDesc &II, const MachineOperand &RegMO, unsigned OpIdx)
Try to constrain Reg so that it is usable by argument OpIdx of the provided MCInstrDesc II...
Definition: Utils.cpp:46
unsigned getRegSizeInBits(const TargetRegisterClass &RC) const
Return the size in bits of a register from class RC.
Conversion operators.
Definition: ISDOpcodes.h:464
bool isStaticAlloca() const
Return true if this alloca is in the entry block of the function and is a constant size...
int64_t getSExtValue() const
Return the constant as a 64-bit integer value after it has been sign extended as appropriate for the ...
Definition: Constants.h:156
X86AddressMode - This struct holds a generalized full x86 address mode.
unsigned getLocReg() const
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition: ValueTypes.h:125
0 0 1 1 True if ordered and greater than or equal
Definition: InstrTypes.h:651
This represents the llvm.dbg.declare instruction.
bool isDoubleTy() const
Return true if this is &#39;double&#39;, a 64-bit IEEE fp type.
Definition: Type.h:149
Value * getPointerOperand()
Definition: Instructions.h:412
unsigned AllocateStack(unsigned Size, unsigned Align)
AllocateStack - Allocate a chunk of stack space with the specified size and alignment.
Value * getRawDest() const
static const MachineInstrBuilder & addFullAddress(const MachineInstrBuilder &MIB, const X86AddressMode &AM)
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition: ISDOpcodes.h:374
FastISel * createFastISel(FunctionLoweringInfo &funcInfo, const TargetLibraryInfo *libInfo)
bool hasSSE2() const
Definition: X86Subtarget.h:561
iterator_range< arg_iterator > args()
Definition: Function.h:691
0 0 0 0 Always false (always folded)
Definition: InstrTypes.h:648
bool isStructTy() const
True if this is an instance of StructType.
Definition: Type.h:217
MO_DLLIMPORT - On a symbol operand "FOO", this indicates that the reference is actually to the "__imp...
Definition: X86BaseInfo.h:237
static unsigned computeBytesPoppedByCalleeForSRet(const X86Subtarget *Subtarget, CallingConv::ID CC, ImmutableCallSite *CS)
bool isArrayTy() const
True if this is an instance of ArrayType.
Definition: Type.h:220
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:43
const BasicBlock * getParent() const
Definition: Instruction.h:66
static unsigned X86ChooseCmpOpcode(EVT VT, const X86Subtarget *Subtarget)
an instruction to allocate memory on the stack
Definition: Instructions.h:59
gep_type_iterator gep_type_begin(const User *GEP)