LLVM  8.0.0svn
X86FastISel.cpp
Go to the documentation of this file.
1 //===-- X86FastISel.cpp - X86 FastISel implementation ---------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file defines the X86-specific support for the FastISel class. Much
11 // of the target-specific code is generated by tablegen in the file
12 // X86GenFastISel.inc, which is #included here.
13 //
14 //===----------------------------------------------------------------------===//
15 
16 #include "X86.h"
17 #include "X86CallingConv.h"
18 #include "X86InstrBuilder.h"
19 #include "X86InstrInfo.h"
20 #include "X86MachineFunctionInfo.h"
21 #include "X86RegisterInfo.h"
22 #include "X86Subtarget.h"
23 #include "X86TargetMachine.h"
25 #include "llvm/CodeGen/FastISel.h"
30 #include "llvm/IR/CallSite.h"
31 #include "llvm/IR/CallingConv.h"
32 #include "llvm/IR/DebugInfo.h"
33 #include "llvm/IR/DerivedTypes.h"
35 #include "llvm/IR/GlobalAlias.h"
36 #include "llvm/IR/GlobalVariable.h"
37 #include "llvm/IR/Instructions.h"
38 #include "llvm/IR/IntrinsicInst.h"
39 #include "llvm/IR/Operator.h"
40 #include "llvm/MC/MCAsmInfo.h"
41 #include "llvm/MC/MCSymbol.h"
44 using namespace llvm;
45 
46 namespace {
47 
48 class X86FastISel final : public FastISel {
49  /// Subtarget - Keep a pointer to the X86Subtarget around so that we can
50  /// make the right decision when generating code for different targets.
51  const X86Subtarget *Subtarget;
52 
53  /// X86ScalarSSEf32, X86ScalarSSEf64 - Select between SSE or x87
54  /// floating point ops.
55  /// When SSE is available, use it for f32 operations.
56  /// When SSE2 is available, use it for f64 operations.
57  bool X86ScalarSSEf64;
58  bool X86ScalarSSEf32;
59 
60 public:
61  explicit X86FastISel(FunctionLoweringInfo &funcInfo,
62  const TargetLibraryInfo *libInfo)
63  : FastISel(funcInfo, libInfo) {
64  Subtarget = &funcInfo.MF->getSubtarget<X86Subtarget>();
65  X86ScalarSSEf64 = Subtarget->hasSSE2();
66  X86ScalarSSEf32 = Subtarget->hasSSE1();
67  }
68 
69  bool fastSelectInstruction(const Instruction *I) override;
70 
71  /// The specified machine instr operand is a vreg, and that
72  /// vreg is being provided by the specified load instruction. If possible,
73  /// try to fold the load as an operand to the instruction, returning true if
74  /// possible.
75  bool tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo,
76  const LoadInst *LI) override;
77 
78  bool fastLowerArguments() override;
79  bool fastLowerCall(CallLoweringInfo &CLI) override;
80  bool fastLowerIntrinsicCall(const IntrinsicInst *II) override;
81 
82 #include "X86GenFastISel.inc"
83 
84 private:
85  bool X86FastEmitCompare(const Value *LHS, const Value *RHS, EVT VT,
86  const DebugLoc &DL);
87 
88  bool X86FastEmitLoad(EVT VT, X86AddressMode &AM, MachineMemOperand *MMO,
89  unsigned &ResultReg, unsigned Alignment = 1);
90 
91  bool X86FastEmitStore(EVT VT, const Value *Val, X86AddressMode &AM,
92  MachineMemOperand *MMO = nullptr, bool Aligned = false);
93  bool X86FastEmitStore(EVT VT, unsigned ValReg, bool ValIsKill,
94  X86AddressMode &AM,
95  MachineMemOperand *MMO = nullptr, bool Aligned = false);
96 
97  bool X86FastEmitExtend(ISD::NodeType Opc, EVT DstVT, unsigned Src, EVT SrcVT,
98  unsigned &ResultReg);
99 
100  bool X86SelectAddress(const Value *V, X86AddressMode &AM);
101  bool X86SelectCallAddress(const Value *V, X86AddressMode &AM);
102 
103  bool X86SelectLoad(const Instruction *I);
104 
105  bool X86SelectStore(const Instruction *I);
106 
107  bool X86SelectRet(const Instruction *I);
108 
109  bool X86SelectCmp(const Instruction *I);
110 
111  bool X86SelectZExt(const Instruction *I);
112 
113  bool X86SelectSExt(const Instruction *I);
114 
115  bool X86SelectBranch(const Instruction *I);
116 
117  bool X86SelectShift(const Instruction *I);
118 
119  bool X86SelectDivRem(const Instruction *I);
120 
121  bool X86FastEmitCMoveSelect(MVT RetVT, const Instruction *I);
122 
123  bool X86FastEmitSSESelect(MVT RetVT, const Instruction *I);
124 
125  bool X86FastEmitPseudoSelect(MVT RetVT, const Instruction *I);
126 
127  bool X86SelectSelect(const Instruction *I);
128 
129  bool X86SelectTrunc(const Instruction *I);
130 
131  bool X86SelectFPExtOrFPTrunc(const Instruction *I, unsigned Opc,
132  const TargetRegisterClass *RC);
133 
134  bool X86SelectFPExt(const Instruction *I);
135  bool X86SelectFPTrunc(const Instruction *I);
136  bool X86SelectSIToFP(const Instruction *I);
137  bool X86SelectUIToFP(const Instruction *I);
138  bool X86SelectIntToFP(const Instruction *I, bool IsSigned);
139 
140  const X86InstrInfo *getInstrInfo() const {
141  return Subtarget->getInstrInfo();
142  }
143  const X86TargetMachine *getTargetMachine() const {
144  return static_cast<const X86TargetMachine *>(&TM);
145  }
146 
147  bool handleConstantAddresses(const Value *V, X86AddressMode &AM);
148 
149  unsigned X86MaterializeInt(const ConstantInt *CI, MVT VT);
150  unsigned X86MaterializeFP(const ConstantFP *CFP, MVT VT);
151  unsigned X86MaterializeGV(const GlobalValue *GV, MVT VT);
152  unsigned fastMaterializeConstant(const Constant *C) override;
153 
154  unsigned fastMaterializeAlloca(const AllocaInst *C) override;
155 
156  unsigned fastMaterializeFloatZero(const ConstantFP *CF) override;
157 
158  /// isScalarFPTypeInSSEReg - Return true if the specified scalar FP type is
159  /// computed in an SSE register, not on the X87 floating point stack.
160  bool isScalarFPTypeInSSEReg(EVT VT) const {
161  return (VT == MVT::f64 && X86ScalarSSEf64) || // f64 is when SSE2
162  (VT == MVT::f32 && X86ScalarSSEf32); // f32 is when SSE1
163  }
164 
165  bool isTypeLegal(Type *Ty, MVT &VT, bool AllowI1 = false);
166 
167  bool IsMemcpySmall(uint64_t Len);
168 
169  bool TryEmitSmallMemcpy(X86AddressMode DestAM,
170  X86AddressMode SrcAM, uint64_t Len);
171 
172  bool foldX86XALUIntrinsic(X86::CondCode &CC, const Instruction *I,
173  const Value *Cond);
174 
176  X86AddressMode &AM);
177 
178  unsigned fastEmitInst_rrrr(unsigned MachineInstOpcode,
179  const TargetRegisterClass *RC, unsigned Op0,
180  bool Op0IsKill, unsigned Op1, bool Op1IsKill,
181  unsigned Op2, bool Op2IsKill, unsigned Op3,
182  bool Op3IsKill);
183 };
184 
185 } // end anonymous namespace.
186 
187 static std::pair<unsigned, bool>
189  unsigned CC;
190  bool NeedSwap = false;
191 
192  // SSE Condition code mapping:
193  // 0 - EQ
194  // 1 - LT
195  // 2 - LE
196  // 3 - UNORD
197  // 4 - NEQ
198  // 5 - NLT
199  // 6 - NLE
200  // 7 - ORD
201  switch (Predicate) {
202  default: llvm_unreachable("Unexpected predicate");
203  case CmpInst::FCMP_OEQ: CC = 0; break;
204  case CmpInst::FCMP_OGT: NeedSwap = true; LLVM_FALLTHROUGH;
205  case CmpInst::FCMP_OLT: CC = 1; break;
206  case CmpInst::FCMP_OGE: NeedSwap = true; LLVM_FALLTHROUGH;
207  case CmpInst::FCMP_OLE: CC = 2; break;
208  case CmpInst::FCMP_UNO: CC = 3; break;
209  case CmpInst::FCMP_UNE: CC = 4; break;
210  case CmpInst::FCMP_ULE: NeedSwap = true; LLVM_FALLTHROUGH;
211  case CmpInst::FCMP_UGE: CC = 5; break;
212  case CmpInst::FCMP_ULT: NeedSwap = true; LLVM_FALLTHROUGH;
213  case CmpInst::FCMP_UGT: CC = 6; break;
214  case CmpInst::FCMP_ORD: CC = 7; break;
215  case CmpInst::FCMP_UEQ: CC = 8; break;
216  case CmpInst::FCMP_ONE: CC = 12; break;
217  }
218 
219  return std::make_pair(CC, NeedSwap);
220 }
221 
222 /// Adds a complex addressing mode to the given machine instr builder.
223 /// Note, this will constrain the index register. If its not possible to
224 /// constrain the given index register, then a new one will be created. The
225 /// IndexReg field of the addressing mode will be updated to match in this case.
226 const MachineInstrBuilder &
228  X86AddressMode &AM) {
229  // First constrain the index register. It needs to be a GR64_NOSP.
231  MIB->getNumOperands() +
233  return ::addFullAddress(MIB, AM);
234 }
235 
236 /// Check if it is possible to fold the condition from the XALU intrinsic
237 /// into the user. The condition code will only be updated on success.
238 bool X86FastISel::foldX86XALUIntrinsic(X86::CondCode &CC, const Instruction *I,
239  const Value *Cond) {
240  if (!isa<ExtractValueInst>(Cond))
241  return false;
242 
243  const auto *EV = cast<ExtractValueInst>(Cond);
244  if (!isa<IntrinsicInst>(EV->getAggregateOperand()))
245  return false;
246 
247  const auto *II = cast<IntrinsicInst>(EV->getAggregateOperand());
248  MVT RetVT;
249  const Function *Callee = II->getCalledFunction();
250  Type *RetTy =
251  cast<StructType>(Callee->getReturnType())->getTypeAtIndex(0U);
252  if (!isTypeLegal(RetTy, RetVT))
253  return false;
254 
255  if (RetVT != MVT::i32 && RetVT != MVT::i64)
256  return false;
257 
258  X86::CondCode TmpCC;
259  switch (II->getIntrinsicID()) {
260  default: return false;
261  case Intrinsic::sadd_with_overflow:
262  case Intrinsic::ssub_with_overflow:
263  case Intrinsic::smul_with_overflow:
264  case Intrinsic::umul_with_overflow: TmpCC = X86::COND_O; break;
265  case Intrinsic::uadd_with_overflow:
266  case Intrinsic::usub_with_overflow: TmpCC = X86::COND_B; break;
267  }
268 
269  // Check if both instructions are in the same basic block.
270  if (II->getParent() != I->getParent())
271  return false;
272 
273  // Make sure nothing is in the way
276  for (auto Itr = std::prev(Start); Itr != End; --Itr) {
277  // We only expect extractvalue instructions between the intrinsic and the
278  // instruction to be selected.
279  if (!isa<ExtractValueInst>(Itr))
280  return false;
281 
282  // Check that the extractvalue operand comes from the intrinsic.
283  const auto *EVI = cast<ExtractValueInst>(Itr);
284  if (EVI->getAggregateOperand() != II)
285  return false;
286  }
287 
288  CC = TmpCC;
289  return true;
290 }
291 
292 bool X86FastISel::isTypeLegal(Type *Ty, MVT &VT, bool AllowI1) {
293  EVT evt = TLI.getValueType(DL, Ty, /*HandleUnknown=*/true);
294  if (evt == MVT::Other || !evt.isSimple())
295  // Unhandled type. Halt "fast" selection and bail.
296  return false;
297 
298  VT = evt.getSimpleVT();
299  // For now, require SSE/SSE2 for performing floating-point operations,
300  // since x87 requires additional work.
301  if (VT == MVT::f64 && !X86ScalarSSEf64)
302  return false;
303  if (VT == MVT::f32 && !X86ScalarSSEf32)
304  return false;
305  // Similarly, no f80 support yet.
306  if (VT == MVT::f80)
307  return false;
308  // We only handle legal types. For example, on x86-32 the instruction
309  // selector contains all of the 64-bit instructions from x86-64,
310  // under the assumption that i64 won't be used if the target doesn't
311  // support it.
312  return (AllowI1 && VT == MVT::i1) || TLI.isTypeLegal(VT);
313 }
314 
315 #include "X86GenCallingConv.inc"
316 
317 /// X86FastEmitLoad - Emit a machine instruction to load a value of type VT.
318 /// The address is either pre-computed, i.e. Ptr, or a GlobalAddress, i.e. GV.
319 /// Return true and the result register by reference if it is possible.
320 bool X86FastISel::X86FastEmitLoad(EVT VT, X86AddressMode &AM,
321  MachineMemOperand *MMO, unsigned &ResultReg,
322  unsigned Alignment) {
323  bool HasSSE41 = Subtarget->hasSSE41();
324  bool HasAVX = Subtarget->hasAVX();
325  bool HasAVX2 = Subtarget->hasAVX2();
326  bool HasAVX512 = Subtarget->hasAVX512();
327  bool HasVLX = Subtarget->hasVLX();
328  bool IsNonTemporal = MMO && MMO->isNonTemporal();
329 
330  // Get opcode and regclass of the output for the given load instruction.
331  unsigned Opc = 0;
332  const TargetRegisterClass *RC = nullptr;
333  switch (VT.getSimpleVT().SimpleTy) {
334  default: return false;
335  case MVT::i1:
336  case MVT::i8:
337  Opc = X86::MOV8rm;
338  RC = &X86::GR8RegClass;
339  break;
340  case MVT::i16:
341  Opc = X86::MOV16rm;
342  RC = &X86::GR16RegClass;
343  break;
344  case MVT::i32:
345  Opc = X86::MOV32rm;
346  RC = &X86::GR32RegClass;
347  break;
348  case MVT::i64:
349  // Must be in x86-64 mode.
350  Opc = X86::MOV64rm;
351  RC = &X86::GR64RegClass;
352  break;
353  case MVT::f32:
354  if (X86ScalarSSEf32) {
355  Opc = HasAVX512 ? X86::VMOVSSZrm : HasAVX ? X86::VMOVSSrm : X86::MOVSSrm;
356  RC = HasAVX512 ? &X86::FR32XRegClass : &X86::FR32RegClass;
357  } else {
358  Opc = X86::LD_Fp32m;
359  RC = &X86::RFP32RegClass;
360  }
361  break;
362  case MVT::f64:
363  if (X86ScalarSSEf64) {
364  Opc = HasAVX512 ? X86::VMOVSDZrm : HasAVX ? X86::VMOVSDrm : X86::MOVSDrm;
365  RC = HasAVX512 ? &X86::FR64XRegClass : &X86::FR64RegClass;
366  } else {
367  Opc = X86::LD_Fp64m;
368  RC = &X86::RFP64RegClass;
369  }
370  break;
371  case MVT::f80:
372  // No f80 support yet.
373  return false;
374  case MVT::v4f32:
375  if (IsNonTemporal && Alignment >= 16 && HasSSE41)
376  Opc = HasVLX ? X86::VMOVNTDQAZ128rm :
377  HasAVX ? X86::VMOVNTDQArm : X86::MOVNTDQArm;
378  else if (Alignment >= 16)
379  Opc = HasVLX ? X86::VMOVAPSZ128rm :
380  HasAVX ? X86::VMOVAPSrm : X86::MOVAPSrm;
381  else
382  Opc = HasVLX ? X86::VMOVUPSZ128rm :
383  HasAVX ? X86::VMOVUPSrm : X86::MOVUPSrm;
384  RC = HasVLX ? &X86::VR128XRegClass : &X86::VR128RegClass;
385  break;
386  case MVT::v2f64:
387  if (IsNonTemporal && Alignment >= 16 && HasSSE41)
388  Opc = HasVLX ? X86::VMOVNTDQAZ128rm :
389  HasAVX ? X86::VMOVNTDQArm : X86::MOVNTDQArm;
390  else if (Alignment >= 16)
391  Opc = HasVLX ? X86::VMOVAPDZ128rm :
392  HasAVX ? X86::VMOVAPDrm : X86::MOVAPDrm;
393  else
394  Opc = HasVLX ? X86::VMOVUPDZ128rm :
395  HasAVX ? X86::VMOVUPDrm : X86::MOVUPDrm;
396  RC = HasVLX ? &X86::VR128XRegClass : &X86::VR128RegClass;
397  break;
398  case MVT::v4i32:
399  case MVT::v2i64:
400  case MVT::v8i16:
401  case MVT::v16i8:
402  if (IsNonTemporal && Alignment >= 16)
403  Opc = HasVLX ? X86::VMOVNTDQAZ128rm :
404  HasAVX ? X86::VMOVNTDQArm : X86::MOVNTDQArm;
405  else if (Alignment >= 16)
406  Opc = HasVLX ? X86::VMOVDQA64Z128rm :
407  HasAVX ? X86::VMOVDQArm : X86::MOVDQArm;
408  else
409  Opc = HasVLX ? X86::VMOVDQU64Z128rm :
410  HasAVX ? X86::VMOVDQUrm : X86::MOVDQUrm;
411  RC = HasVLX ? &X86::VR128XRegClass : &X86::VR128RegClass;
412  break;
413  case MVT::v8f32:
414  assert(HasAVX);
415  if (IsNonTemporal && Alignment >= 32 && HasAVX2)
416  Opc = HasVLX ? X86::VMOVNTDQAZ256rm : X86::VMOVNTDQAYrm;
417  else if (IsNonTemporal && Alignment >= 16)
418  return false; // Force split for X86::VMOVNTDQArm
419  else if (Alignment >= 32)
420  Opc = HasVLX ? X86::VMOVAPSZ256rm : X86::VMOVAPSYrm;
421  else
422  Opc = HasVLX ? X86::VMOVUPSZ256rm : X86::VMOVUPSYrm;
423  RC = HasVLX ? &X86::VR256XRegClass : &X86::VR256RegClass;
424  break;
425  case MVT::v4f64:
426  assert(HasAVX);
427  if (IsNonTemporal && Alignment >= 32 && HasAVX2)
428  Opc = HasVLX ? X86::VMOVNTDQAZ256rm : X86::VMOVNTDQAYrm;
429  else if (IsNonTemporal && Alignment >= 16)
430  return false; // Force split for X86::VMOVNTDQArm
431  else if (Alignment >= 32)
432  Opc = HasVLX ? X86::VMOVAPDZ256rm : X86::VMOVAPDYrm;
433  else
434  Opc = HasVLX ? X86::VMOVUPDZ256rm : X86::VMOVUPDYrm;
435  RC = HasVLX ? &X86::VR256XRegClass : &X86::VR256RegClass;
436  break;
437  case MVT::v8i32:
438  case MVT::v4i64:
439  case MVT::v16i16:
440  case MVT::v32i8:
441  assert(HasAVX);
442  if (IsNonTemporal && Alignment >= 32 && HasAVX2)
443  Opc = HasVLX ? X86::VMOVNTDQAZ256rm : X86::VMOVNTDQAYrm;
444  else if (IsNonTemporal && Alignment >= 16)
445  return false; // Force split for X86::VMOVNTDQArm
446  else if (Alignment >= 32)
447  Opc = HasVLX ? X86::VMOVDQA64Z256rm : X86::VMOVDQAYrm;
448  else
449  Opc = HasVLX ? X86::VMOVDQU64Z256rm : X86::VMOVDQUYrm;
450  RC = HasVLX ? &X86::VR256XRegClass : &X86::VR256RegClass;
451  break;
452  case MVT::v16f32:
453  assert(HasAVX512);
454  if (IsNonTemporal && Alignment >= 64)
455  Opc = X86::VMOVNTDQAZrm;
456  else
457  Opc = (Alignment >= 64) ? X86::VMOVAPSZrm : X86::VMOVUPSZrm;
458  RC = &X86::VR512RegClass;
459  break;
460  case MVT::v8f64:
461  assert(HasAVX512);
462  if (IsNonTemporal && Alignment >= 64)
463  Opc = X86::VMOVNTDQAZrm;
464  else
465  Opc = (Alignment >= 64) ? X86::VMOVAPDZrm : X86::VMOVUPDZrm;
466  RC = &X86::VR512RegClass;
467  break;
468  case MVT::v8i64:
469  case MVT::v16i32:
470  case MVT::v32i16:
471  case MVT::v64i8:
472  assert(HasAVX512);
473  // Note: There are a lot more choices based on type with AVX-512, but
474  // there's really no advantage when the load isn't masked.
475  if (IsNonTemporal && Alignment >= 64)
476  Opc = X86::VMOVNTDQAZrm;
477  else
478  Opc = (Alignment >= 64) ? X86::VMOVDQA64Zrm : X86::VMOVDQU64Zrm;
479  RC = &X86::VR512RegClass;
480  break;
481  }
482 
483  ResultReg = createResultReg(RC);
484  MachineInstrBuilder MIB =
485  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg);
486  addFullAddress(MIB, AM);
487  if (MMO)
488  MIB->addMemOperand(*FuncInfo.MF, MMO);
489  return true;
490 }
491 
492 /// X86FastEmitStore - Emit a machine instruction to store a value Val of
493 /// type VT. The address is either pre-computed, consisted of a base ptr, Ptr
494 /// and a displacement offset, or a GlobalAddress,
495 /// i.e. V. Return true if it is possible.
496 bool X86FastISel::X86FastEmitStore(EVT VT, unsigned ValReg, bool ValIsKill,
497  X86AddressMode &AM,
498  MachineMemOperand *MMO, bool Aligned) {
499  bool HasSSE1 = Subtarget->hasSSE1();
500  bool HasSSE2 = Subtarget->hasSSE2();
501  bool HasSSE4A = Subtarget->hasSSE4A();
502  bool HasAVX = Subtarget->hasAVX();
503  bool HasAVX512 = Subtarget->hasAVX512();
504  bool HasVLX = Subtarget->hasVLX();
505  bool IsNonTemporal = MMO && MMO->isNonTemporal();
506 
507  // Get opcode and regclass of the output for the given store instruction.
508  unsigned Opc = 0;
509  switch (VT.getSimpleVT().SimpleTy) {
510  case MVT::f80: // No f80 support yet.
511  default: return false;
512  case MVT::i1: {
513  // Mask out all but lowest bit.
514  unsigned AndResult = createResultReg(&X86::GR8RegClass);
515  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
516  TII.get(X86::AND8ri), AndResult)
517  .addReg(ValReg, getKillRegState(ValIsKill)).addImm(1);
518  ValReg = AndResult;
519  LLVM_FALLTHROUGH; // handle i1 as i8.
520  }
521  case MVT::i8: Opc = X86::MOV8mr; break;
522  case MVT::i16: Opc = X86::MOV16mr; break;
523  case MVT::i32:
524  Opc = (IsNonTemporal && HasSSE2) ? X86::MOVNTImr : X86::MOV32mr;
525  break;
526  case MVT::i64:
527  // Must be in x86-64 mode.
528  Opc = (IsNonTemporal && HasSSE2) ? X86::MOVNTI_64mr : X86::MOV64mr;
529  break;
530  case MVT::f32:
531  if (X86ScalarSSEf32) {
532  if (IsNonTemporal && HasSSE4A)
533  Opc = X86::MOVNTSS;
534  else
535  Opc = HasAVX512 ? X86::VMOVSSZmr :
536  HasAVX ? X86::VMOVSSmr : X86::MOVSSmr;
537  } else
538  Opc = X86::ST_Fp32m;
539  break;
540  case MVT::f64:
541  if (X86ScalarSSEf32) {
542  if (IsNonTemporal && HasSSE4A)
543  Opc = X86::MOVNTSD;
544  else
545  Opc = HasAVX512 ? X86::VMOVSDZmr :
546  HasAVX ? X86::VMOVSDmr : X86::MOVSDmr;
547  } else
548  Opc = X86::ST_Fp64m;
549  break;
550  case MVT::x86mmx:
551  Opc = (IsNonTemporal && HasSSE1) ? X86::MMX_MOVNTQmr : X86::MMX_MOVQ64mr;
552  break;
553  case MVT::v4f32:
554  if (Aligned) {
555  if (IsNonTemporal)
556  Opc = HasVLX ? X86::VMOVNTPSZ128mr :
557  HasAVX ? X86::VMOVNTPSmr : X86::MOVNTPSmr;
558  else
559  Opc = HasVLX ? X86::VMOVAPSZ128mr :
560  HasAVX ? X86::VMOVAPSmr : X86::MOVAPSmr;
561  } else
562  Opc = HasVLX ? X86::VMOVUPSZ128mr :
563  HasAVX ? X86::VMOVUPSmr : X86::MOVUPSmr;
564  break;
565  case MVT::v2f64:
566  if (Aligned) {
567  if (IsNonTemporal)
568  Opc = HasVLX ? X86::VMOVNTPDZ128mr :
569  HasAVX ? X86::VMOVNTPDmr : X86::MOVNTPDmr;
570  else
571  Opc = HasVLX ? X86::VMOVAPDZ128mr :
572  HasAVX ? X86::VMOVAPDmr : X86::MOVAPDmr;
573  } else
574  Opc = HasVLX ? X86::VMOVUPDZ128mr :
575  HasAVX ? X86::VMOVUPDmr : X86::MOVUPDmr;
576  break;
577  case MVT::v4i32:
578  case MVT::v2i64:
579  case MVT::v8i16:
580  case MVT::v16i8:
581  if (Aligned) {
582  if (IsNonTemporal)
583  Opc = HasVLX ? X86::VMOVNTDQZ128mr :
584  HasAVX ? X86::VMOVNTDQmr : X86::MOVNTDQmr;
585  else
586  Opc = HasVLX ? X86::VMOVDQA64Z128mr :
587  HasAVX ? X86::VMOVDQAmr : X86::MOVDQAmr;
588  } else
589  Opc = HasVLX ? X86::VMOVDQU64Z128mr :
590  HasAVX ? X86::VMOVDQUmr : X86::MOVDQUmr;
591  break;
592  case MVT::v8f32:
593  assert(HasAVX);
594  if (Aligned) {
595  if (IsNonTemporal)
596  Opc = HasVLX ? X86::VMOVNTPSZ256mr : X86::VMOVNTPSYmr;
597  else
598  Opc = HasVLX ? X86::VMOVAPSZ256mr : X86::VMOVAPSYmr;
599  } else
600  Opc = HasVLX ? X86::VMOVUPSZ256mr : X86::VMOVUPSYmr;
601  break;
602  case MVT::v4f64:
603  assert(HasAVX);
604  if (Aligned) {
605  if (IsNonTemporal)
606  Opc = HasVLX ? X86::VMOVNTPDZ256mr : X86::VMOVNTPDYmr;
607  else
608  Opc = HasVLX ? X86::VMOVAPDZ256mr : X86::VMOVAPDYmr;
609  } else
610  Opc = HasVLX ? X86::VMOVUPDZ256mr : X86::VMOVUPDYmr;
611  break;
612  case MVT::v8i32:
613  case MVT::v4i64:
614  case MVT::v16i16:
615  case MVT::v32i8:
616  assert(HasAVX);
617  if (Aligned) {
618  if (IsNonTemporal)
619  Opc = HasVLX ? X86::VMOVNTDQZ256mr : X86::VMOVNTDQYmr;
620  else
621  Opc = HasVLX ? X86::VMOVDQA64Z256mr : X86::VMOVDQAYmr;
622  } else
623  Opc = HasVLX ? X86::VMOVDQU64Z256mr : X86::VMOVDQUYmr;
624  break;
625  case MVT::v16f32:
626  assert(HasAVX512);
627  if (Aligned)
628  Opc = IsNonTemporal ? X86::VMOVNTPSZmr : X86::VMOVAPSZmr;
629  else
630  Opc = X86::VMOVUPSZmr;
631  break;
632  case MVT::v8f64:
633  assert(HasAVX512);
634  if (Aligned) {
635  Opc = IsNonTemporal ? X86::VMOVNTPDZmr : X86::VMOVAPDZmr;
636  } else
637  Opc = X86::VMOVUPDZmr;
638  break;
639  case MVT::v8i64:
640  case MVT::v16i32:
641  case MVT::v32i16:
642  case MVT::v64i8:
643  assert(HasAVX512);
644  // Note: There are a lot more choices based on type with AVX-512, but
645  // there's really no advantage when the store isn't masked.
646  if (Aligned)
647  Opc = IsNonTemporal ? X86::VMOVNTDQZmr : X86::VMOVDQA64Zmr;
648  else
649  Opc = X86::VMOVDQU64Zmr;
650  break;
651  }
652 
653  const MCInstrDesc &Desc = TII.get(Opc);
654  // Some of the instructions in the previous switch use FR128 instead
655  // of FR32 for ValReg. Make sure the register we feed the instruction
656  // matches its register class constraints.
657  // Note: This is fine to do a copy from FR32 to FR128, this is the
658  // same registers behind the scene and actually why it did not trigger
659  // any bugs before.
660  ValReg = constrainOperandRegClass(Desc, ValReg, Desc.getNumOperands() - 1);
661  MachineInstrBuilder MIB =
662  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, Desc);
663  addFullAddress(MIB, AM).addReg(ValReg, getKillRegState(ValIsKill));
664  if (MMO)
665  MIB->addMemOperand(*FuncInfo.MF, MMO);
666 
667  return true;
668 }
669 
670 bool X86FastISel::X86FastEmitStore(EVT VT, const Value *Val,
671  X86AddressMode &AM,
672  MachineMemOperand *MMO, bool Aligned) {
673  // Handle 'null' like i32/i64 0.
674  if (isa<ConstantPointerNull>(Val))
675  Val = Constant::getNullValue(DL.getIntPtrType(Val->getContext()));
676 
677  // If this is a store of a simple constant, fold the constant into the store.
678  if (const ConstantInt *CI = dyn_cast<ConstantInt>(Val)) {
679  unsigned Opc = 0;
680  bool Signed = true;
681  switch (VT.getSimpleVT().SimpleTy) {
682  default: break;
683  case MVT::i1:
684  Signed = false;
685  LLVM_FALLTHROUGH; // Handle as i8.
686  case MVT::i8: Opc = X86::MOV8mi; break;
687  case MVT::i16: Opc = X86::MOV16mi; break;
688  case MVT::i32: Opc = X86::MOV32mi; break;
689  case MVT::i64:
690  // Must be a 32-bit sign extended value.
691  if (isInt<32>(CI->getSExtValue()))
692  Opc = X86::MOV64mi32;
693  break;
694  }
695 
696  if (Opc) {
697  MachineInstrBuilder MIB =
698  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc));
699  addFullAddress(MIB, AM).addImm(Signed ? (uint64_t) CI->getSExtValue()
700  : CI->getZExtValue());
701  if (MMO)
702  MIB->addMemOperand(*FuncInfo.MF, MMO);
703  return true;
704  }
705  }
706 
707  unsigned ValReg = getRegForValue(Val);
708  if (ValReg == 0)
709  return false;
710 
711  bool ValKill = hasTrivialKill(Val);
712  return X86FastEmitStore(VT, ValReg, ValKill, AM, MMO, Aligned);
713 }
714 
715 /// X86FastEmitExtend - Emit a machine instruction to extend a value Src of
716 /// type SrcVT to type DstVT using the specified extension opcode Opc (e.g.
717 /// ISD::SIGN_EXTEND).
718 bool X86FastISel::X86FastEmitExtend(ISD::NodeType Opc, EVT DstVT,
719  unsigned Src, EVT SrcVT,
720  unsigned &ResultReg) {
721  unsigned RR = fastEmit_r(SrcVT.getSimpleVT(), DstVT.getSimpleVT(), Opc,
722  Src, /*TODO: Kill=*/false);
723  if (RR == 0)
724  return false;
725 
726  ResultReg = RR;
727  return true;
728 }
729 
730 bool X86FastISel::handleConstantAddresses(const Value *V, X86AddressMode &AM) {
731  // Handle constant address.
732  if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
733  // Can't handle alternate code models yet.
734  if (TM.getCodeModel() != CodeModel::Small)
735  return false;
736 
737  // Can't handle TLS yet.
738  if (GV->isThreadLocal())
739  return false;
740 
741  // Can't handle !absolute_symbol references yet.
742  if (GV->isAbsoluteSymbolRef())
743  return false;
744 
745  // RIP-relative addresses can't have additional register operands, so if
746  // we've already folded stuff into the addressing mode, just force the
747  // global value into its own register, which we can use as the basereg.
748  if (!Subtarget->isPICStyleRIPRel() ||
749  (AM.Base.Reg == 0 && AM.IndexReg == 0)) {
750  // Okay, we've committed to selecting this global. Set up the address.
751  AM.GV = GV;
752 
753  // Allow the subtarget to classify the global.
754  unsigned char GVFlags = Subtarget->classifyGlobalReference(GV);
755 
756  // If this reference is relative to the pic base, set it now.
757  if (isGlobalRelativeToPICBase(GVFlags)) {
758  // FIXME: How do we know Base.Reg is free??
759  AM.Base.Reg = getInstrInfo()->getGlobalBaseReg(FuncInfo.MF);
760  }
761 
762  // Unless the ABI requires an extra load, return a direct reference to
763  // the global.
764  if (!isGlobalStubReference(GVFlags)) {
765  if (Subtarget->isPICStyleRIPRel()) {
766  // Use rip-relative addressing if we can. Above we verified that the
767  // base and index registers are unused.
768  assert(AM.Base.Reg == 0 && AM.IndexReg == 0);
769  AM.Base.Reg = X86::RIP;
770  }
771  AM.GVOpFlags = GVFlags;
772  return true;
773  }
774 
775  // Ok, we need to do a load from a stub. If we've already loaded from
776  // this stub, reuse the loaded pointer, otherwise emit the load now.
777  DenseMap<const Value *, unsigned>::iterator I = LocalValueMap.find(V);
778  unsigned LoadReg;
779  if (I != LocalValueMap.end() && I->second != 0) {
780  LoadReg = I->second;
781  } else {
782  // Issue load from stub.
783  unsigned Opc = 0;
784  const TargetRegisterClass *RC = nullptr;
785  X86AddressMode StubAM;
786  StubAM.Base.Reg = AM.Base.Reg;
787  StubAM.GV = GV;
788  StubAM.GVOpFlags = GVFlags;
789 
790  // Prepare for inserting code in the local-value area.
791  SavePoint SaveInsertPt = enterLocalValueArea();
792 
793  if (TLI.getPointerTy(DL) == MVT::i64) {
794  Opc = X86::MOV64rm;
795  RC = &X86::GR64RegClass;
796 
797  if (Subtarget->isPICStyleRIPRel())
798  StubAM.Base.Reg = X86::RIP;
799  } else {
800  Opc = X86::MOV32rm;
801  RC = &X86::GR32RegClass;
802  }
803 
804  LoadReg = createResultReg(RC);
805  MachineInstrBuilder LoadMI =
806  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), LoadReg);
807  addFullAddress(LoadMI, StubAM);
808 
809  // Ok, back to normal mode.
810  leaveLocalValueArea(SaveInsertPt);
811 
812  // Prevent loading GV stub multiple times in same MBB.
813  LocalValueMap[V] = LoadReg;
814  }
815 
816  // Now construct the final address. Note that the Disp, Scale,
817  // and Index values may already be set here.
818  AM.Base.Reg = LoadReg;
819  AM.GV = nullptr;
820  return true;
821  }
822  }
823 
824  // If all else fails, try to materialize the value in a register.
825  if (!AM.GV || !Subtarget->isPICStyleRIPRel()) {
826  if (AM.Base.Reg == 0) {
827  AM.Base.Reg = getRegForValue(V);
828  return AM.Base.Reg != 0;
829  }
830  if (AM.IndexReg == 0) {
831  assert(AM.Scale == 1 && "Scale with no index!");
832  AM.IndexReg = getRegForValue(V);
833  return AM.IndexReg != 0;
834  }
835  }
836 
837  return false;
838 }
839 
840 /// X86SelectAddress - Attempt to fill in an address from the given value.
841 ///
844 redo_gep:
845  const User *U = nullptr;
846  unsigned Opcode = Instruction::UserOp1;
847  if (const Instruction *I = dyn_cast<Instruction>(V)) {
848  // Don't walk into other basic blocks; it's possible we haven't
849  // visited them yet, so the instructions may not yet be assigned
850  // virtual registers.
851  if (FuncInfo.StaticAllocaMap.count(static_cast<const AllocaInst *>(V)) ||
852  FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
853  Opcode = I->getOpcode();
854  U = I;
855  }
856  } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(V)) {
857  Opcode = C->getOpcode();
858  U = C;
859  }
860 
861  if (PointerType *Ty = dyn_cast<PointerType>(V->getType()))
862  if (Ty->getAddressSpace() > 255)
863  // Fast instruction selection doesn't support the special
864  // address spaces.
865  return false;
866 
867  switch (Opcode) {
868  default: break;
869  case Instruction::BitCast:
870  // Look past bitcasts.
871  return X86SelectAddress(U->getOperand(0), AM);
872 
873  case Instruction::IntToPtr:
874  // Look past no-op inttoptrs.
875  if (TLI.getValueType(DL, U->getOperand(0)->getType()) ==
876  TLI.getPointerTy(DL))
877  return X86SelectAddress(U->getOperand(0), AM);
878  break;
879 
880  case Instruction::PtrToInt:
881  // Look past no-op ptrtoints.
882  if (TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL))
883  return X86SelectAddress(U->getOperand(0), AM);
884  break;
885 
886  case Instruction::Alloca: {
887  // Do static allocas.
888  const AllocaInst *A = cast<AllocaInst>(V);
890  FuncInfo.StaticAllocaMap.find(A);
891  if (SI != FuncInfo.StaticAllocaMap.end()) {
893  AM.Base.FrameIndex = SI->second;
894  return true;
895  }
896  break;
897  }
898 
899  case Instruction::Add: {
900  // Adds of constants are common and easy enough.
901  if (const ConstantInt *CI = dyn_cast<ConstantInt>(U->getOperand(1))) {
902  uint64_t Disp = (int32_t)AM.Disp + (uint64_t)CI->getSExtValue();
903  // They have to fit in the 32-bit signed displacement field though.
904  if (isInt<32>(Disp)) {
905  AM.Disp = (uint32_t)Disp;
906  return X86SelectAddress(U->getOperand(0), AM);
907  }
908  }
909  break;
910  }
911 
912  case Instruction::GetElementPtr: {
913  X86AddressMode SavedAM = AM;
914 
915  // Pattern-match simple GEPs.
916  uint64_t Disp = (int32_t)AM.Disp;
917  unsigned IndexReg = AM.IndexReg;
918  unsigned Scale = AM.Scale;
920  // Iterate through the indices, folding what we can. Constants can be
921  // folded, and one dynamic index can be handled, if the scale is supported.
922  for (User::const_op_iterator i = U->op_begin() + 1, e = U->op_end();
923  i != e; ++i, ++GTI) {
924  const Value *Op = *i;
925  if (StructType *STy = GTI.getStructTypeOrNull()) {
926  const StructLayout *SL = DL.getStructLayout(STy);
927  Disp += SL->getElementOffset(cast<ConstantInt>(Op)->getZExtValue());
928  continue;
929  }
930 
931  // A array/variable index is always of the form i*S where S is the
932  // constant scale size. See if we can push the scale into immediates.
933  uint64_t S = DL.getTypeAllocSize(GTI.getIndexedType());
934  for (;;) {
935  if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
936  // Constant-offset addressing.
937  Disp += CI->getSExtValue() * S;
938  break;
939  }
940  if (canFoldAddIntoGEP(U, Op)) {
941  // A compatible add with a constant operand. Fold the constant.
942  ConstantInt *CI =
943  cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1));
944  Disp += CI->getSExtValue() * S;
945  // Iterate on the other operand.
946  Op = cast<AddOperator>(Op)->getOperand(0);
947  continue;
948  }
949  if (IndexReg == 0 &&
950  (!AM.GV || !Subtarget->isPICStyleRIPRel()) &&
951  (S == 1 || S == 2 || S == 4 || S == 8)) {
952  // Scaled-index addressing.
953  Scale = S;
954  IndexReg = getRegForGEPIndex(Op).first;
955  if (IndexReg == 0)
956  return false;
957  break;
958  }
959  // Unsupported.
960  goto unsupported_gep;
961  }
962  }
963 
964  // Check for displacement overflow.
965  if (!isInt<32>(Disp))
966  break;
967 
968  AM.IndexReg = IndexReg;
969  AM.Scale = Scale;
970  AM.Disp = (uint32_t)Disp;
971  GEPs.push_back(V);
972 
973  if (const GetElementPtrInst *GEP =
974  dyn_cast<GetElementPtrInst>(U->getOperand(0))) {
975  // Ok, the GEP indices were covered by constant-offset and scaled-index
976  // addressing. Update the address state and move on to examining the base.
977  V = GEP;
978  goto redo_gep;
979  } else if (X86SelectAddress(U->getOperand(0), AM)) {
980  return true;
981  }
982 
983  // If we couldn't merge the gep value into this addr mode, revert back to
984  // our address and just match the value instead of completely failing.
985  AM = SavedAM;
986 
987  for (const Value *I : reverse(GEPs))
988  if (handleConstantAddresses(I, AM))
989  return true;
990 
991  return false;
992  unsupported_gep:
993  // Ok, the GEP indices weren't all covered.
994  break;
995  }
996  }
997 
998  return handleConstantAddresses(V, AM);
999 }
1000 
1001 /// X86SelectCallAddress - Attempt to fill in an address from the given value.
1002 ///
1003 bool X86FastISel::X86SelectCallAddress(const Value *V, X86AddressMode &AM) {
1004  const User *U = nullptr;
1005  unsigned Opcode = Instruction::UserOp1;
1006  const Instruction *I = dyn_cast<Instruction>(V);
1007  // Record if the value is defined in the same basic block.
1008  //
1009  // This information is crucial to know whether or not folding an
1010  // operand is valid.
1011  // Indeed, FastISel generates or reuses a virtual register for all
1012  // operands of all instructions it selects. Obviously, the definition and
1013  // its uses must use the same virtual register otherwise the produced
1014  // code is incorrect.
1015  // Before instruction selection, FunctionLoweringInfo::set sets the virtual
1016  // registers for values that are alive across basic blocks. This ensures
1017  // that the values are consistently set between across basic block, even
1018  // if different instruction selection mechanisms are used (e.g., a mix of
1019  // SDISel and FastISel).
1020  // For values local to a basic block, the instruction selection process
1021  // generates these virtual registers with whatever method is appropriate
1022  // for its needs. In particular, FastISel and SDISel do not share the way
1023  // local virtual registers are set.
1024  // Therefore, this is impossible (or at least unsafe) to share values
1025  // between basic blocks unless they use the same instruction selection
1026  // method, which is not guarantee for X86.
1027  // Moreover, things like hasOneUse could not be used accurately, if we
1028  // allow to reference values across basic blocks whereas they are not
1029  // alive across basic blocks initially.
1030  bool InMBB = true;
1031  if (I) {
1032  Opcode = I->getOpcode();
1033  U = I;
1034  InMBB = I->getParent() == FuncInfo.MBB->getBasicBlock();
1035  } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(V)) {
1036  Opcode = C->getOpcode();
1037  U = C;
1038  }
1039 
1040  switch (Opcode) {
1041  default: break;
1042  case Instruction::BitCast:
1043  // Look past bitcasts if its operand is in the same BB.
1044  if (InMBB)
1045  return X86SelectCallAddress(U->getOperand(0), AM);
1046  break;
1047 
1048  case Instruction::IntToPtr:
1049  // Look past no-op inttoptrs if its operand is in the same BB.
1050  if (InMBB &&
1051  TLI.getValueType(DL, U->getOperand(0)->getType()) ==
1052  TLI.getPointerTy(DL))
1053  return X86SelectCallAddress(U->getOperand(0), AM);
1054  break;
1055 
1056  case Instruction::PtrToInt:
1057  // Look past no-op ptrtoints if its operand is in the same BB.
1058  if (InMBB && TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL))
1059  return X86SelectCallAddress(U->getOperand(0), AM);
1060  break;
1061  }
1062 
1063  // Handle constant address.
1064  if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
1065  // Can't handle alternate code models yet.
1066  if (TM.getCodeModel() != CodeModel::Small)
1067  return false;
1068 
1069  // RIP-relative addresses can't have additional register operands.
1070  if (Subtarget->isPICStyleRIPRel() &&
1071  (AM.Base.Reg != 0 || AM.IndexReg != 0))
1072  return false;
1073 
1074  // Can't handle TLS.
1075  if (const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV))
1076  if (GVar->isThreadLocal())
1077  return false;
1078 
1079  // Okay, we've committed to selecting this global. Set up the basic address.
1080  AM.GV = GV;
1081 
1082  // Return a direct reference to the global. Fastisel can handle calls to
1083  // functions that require loads, such as dllimport and nonlazybind
1084  // functions.
1085  if (Subtarget->isPICStyleRIPRel()) {
1086  // Use rip-relative addressing if we can. Above we verified that the
1087  // base and index registers are unused.
1088  assert(AM.Base.Reg == 0 && AM.IndexReg == 0);
1089  AM.Base.Reg = X86::RIP;
1090  } else {
1091  AM.GVOpFlags = Subtarget->classifyLocalReference(nullptr);
1092  }
1093 
1094  return true;
1095  }
1096 
1097  // If all else fails, try to materialize the value in a register.
1098  if (!AM.GV || !Subtarget->isPICStyleRIPRel()) {
1099  if (AM.Base.Reg == 0) {
1100  AM.Base.Reg = getRegForValue(V);
1101  return AM.Base.Reg != 0;
1102  }
1103  if (AM.IndexReg == 0) {
1104  assert(AM.Scale == 1 && "Scale with no index!");
1105  AM.IndexReg = getRegForValue(V);
1106  return AM.IndexReg != 0;
1107  }
1108  }
1109 
1110  return false;
1111 }
1112 
1113 
1114 /// X86SelectStore - Select and emit code to implement store instructions.
1115 bool X86FastISel::X86SelectStore(const Instruction *I) {
1116  // Atomic stores need special handling.
1117  const StoreInst *S = cast<StoreInst>(I);
1118 
1119  if (S->isAtomic())
1120  return false;
1121 
1122  const Value *PtrV = I->getOperand(1);
1123  if (TLI.supportSwiftError()) {
1124  // Swifterror values can come from either a function parameter with
1125  // swifterror attribute or an alloca with swifterror attribute.
1126  if (const Argument *Arg = dyn_cast<Argument>(PtrV)) {
1127  if (Arg->hasSwiftErrorAttr())
1128  return false;
1129  }
1130 
1131  if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(PtrV)) {
1132  if (Alloca->isSwiftError())
1133  return false;
1134  }
1135  }
1136 
1137  const Value *Val = S->getValueOperand();
1138  const Value *Ptr = S->getPointerOperand();
1139 
1140  MVT VT;
1141  if (!isTypeLegal(Val->getType(), VT, /*AllowI1=*/true))
1142  return false;
1143 
1144  unsigned Alignment = S->getAlignment();
1145  unsigned ABIAlignment = DL.getABITypeAlignment(Val->getType());
1146  if (Alignment == 0) // Ensure that codegen never sees alignment 0
1147  Alignment = ABIAlignment;
1148  bool Aligned = Alignment >= ABIAlignment;
1149 
1150  X86AddressMode AM;
1151  if (!X86SelectAddress(Ptr, AM))
1152  return false;
1153 
1154  return X86FastEmitStore(VT, Val, AM, createMachineMemOperandFor(I), Aligned);
1155 }
1156 
1157 /// X86SelectRet - Select and emit code to implement ret instructions.
1158 bool X86FastISel::X86SelectRet(const Instruction *I) {
1159  const ReturnInst *Ret = cast<ReturnInst>(I);
1160  const Function &F = *I->getParent()->getParent();
1161  const X86MachineFunctionInfo *X86MFInfo =
1162  FuncInfo.MF->getInfo<X86MachineFunctionInfo>();
1163 
1164  if (!FuncInfo.CanLowerReturn)
1165  return false;
1166 
1167  if (TLI.supportSwiftError() &&
1168  F.getAttributes().hasAttrSomewhere(Attribute::SwiftError))
1169  return false;
1170 
1171  if (TLI.supportSplitCSR(FuncInfo.MF))
1172  return false;
1173 
1174  CallingConv::ID CC = F.getCallingConv();
1175  if (CC != CallingConv::C &&
1176  CC != CallingConv::Fast &&
1177  CC != CallingConv::X86_FastCall &&
1178  CC != CallingConv::X86_StdCall &&
1179  CC != CallingConv::X86_ThisCall &&
1180  CC != CallingConv::X86_64_SysV &&
1181  CC != CallingConv::Win64)
1182  return false;
1183 
1184  // Don't handle popping bytes if they don't fit the ret's immediate.
1185  if (!isUInt<16>(X86MFInfo->getBytesToPopOnReturn()))
1186  return false;
1187 
1188  // fastcc with -tailcallopt is intended to provide a guaranteed
1189  // tail call optimization. Fastisel doesn't know how to do that.
1190  if (CC == CallingConv::Fast && TM.Options.GuaranteedTailCallOpt)
1191  return false;
1192 
1193  // Let SDISel handle vararg functions.
1194  if (F.isVarArg())
1195  return false;
1196 
1197  // Build a list of return value registers.
1198  SmallVector<unsigned, 4> RetRegs;
1199 
1200  if (Ret->getNumOperands() > 0) {
1202  GetReturnInfo(CC, F.getReturnType(), F.getAttributes(), Outs, TLI, DL);
1203 
1204  // Analyze operands of the call, assigning locations to each operand.
1206  CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, ValLocs, I->getContext());
1207  CCInfo.AnalyzeReturn(Outs, RetCC_X86);
1208 
1209  const Value *RV = Ret->getOperand(0);
1210  unsigned Reg = getRegForValue(RV);
1211  if (Reg == 0)
1212  return false;
1213 
1214  // Only handle a single return value for now.
1215  if (ValLocs.size() != 1)
1216  return false;
1217 
1218  CCValAssign &VA = ValLocs[0];
1219 
1220  // Don't bother handling odd stuff for now.
1221  if (VA.getLocInfo() != CCValAssign::Full)
1222  return false;
1223  // Only handle register returns for now.
1224  if (!VA.isRegLoc())
1225  return false;
1226 
1227  // The calling-convention tables for x87 returns don't tell
1228  // the whole story.
1229  if (VA.getLocReg() == X86::FP0 || VA.getLocReg() == X86::FP1)
1230  return false;
1231 
1232  unsigned SrcReg = Reg + VA.getValNo();
1233  EVT SrcVT = TLI.getValueType(DL, RV->getType());
1234  EVT DstVT = VA.getValVT();
1235  // Special handling for extended integers.
1236  if (SrcVT != DstVT) {
1237  if (SrcVT != MVT::i1 && SrcVT != MVT::i8 && SrcVT != MVT::i16)
1238  return false;
1239 
1240  if (!Outs[0].Flags.isZExt() && !Outs[0].Flags.isSExt())
1241  return false;
1242 
1243  assert(DstVT == MVT::i32 && "X86 should always ext to i32");
1244 
1245  if (SrcVT == MVT::i1) {
1246  if (Outs[0].Flags.isSExt())
1247  return false;
1248  SrcReg = fastEmitZExtFromI1(MVT::i8, SrcReg, /*TODO: Kill=*/false);
1249  SrcVT = MVT::i8;
1250  }
1251  unsigned Op = Outs[0].Flags.isZExt() ? ISD::ZERO_EXTEND :
1253  SrcReg = fastEmit_r(SrcVT.getSimpleVT(), DstVT.getSimpleVT(), Op,
1254  SrcReg, /*TODO: Kill=*/false);
1255  }
1256 
1257  // Make the copy.
1258  unsigned DstReg = VA.getLocReg();
1259  const TargetRegisterClass *SrcRC = MRI.getRegClass(SrcReg);
1260  // Avoid a cross-class copy. This is very unlikely.
1261  if (!SrcRC->contains(DstReg))
1262  return false;
1263  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1264  TII.get(TargetOpcode::COPY), DstReg).addReg(SrcReg);
1265 
1266  // Add register to return instruction.
1267  RetRegs.push_back(VA.getLocReg());
1268  }
1269 
1270  // Swift calling convention does not require we copy the sret argument
1271  // into %rax/%eax for the return, and SRetReturnReg is not set for Swift.
1272 
1273  // All x86 ABIs require that for returning structs by value we copy
1274  // the sret argument into %rax/%eax (depending on ABI) for the return.
1275  // We saved the argument into a virtual register in the entry block,
1276  // so now we copy the value out and into %rax/%eax.
1277  if (F.hasStructRetAttr() && CC != CallingConv::Swift) {
1278  unsigned Reg = X86MFInfo->getSRetReturnReg();
1279  assert(Reg &&
1280  "SRetReturnReg should have been set in LowerFormalArguments()!");
1281  unsigned RetReg = Subtarget->isTarget64BitLP64() ? X86::RAX : X86::EAX;
1282  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1283  TII.get(TargetOpcode::COPY), RetReg).addReg(Reg);
1284  RetRegs.push_back(RetReg);
1285  }
1286 
1287  // Now emit the RET.
1288  MachineInstrBuilder MIB;
1289  if (X86MFInfo->getBytesToPopOnReturn()) {
1290  MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1291  TII.get(Subtarget->is64Bit() ? X86::RETIQ : X86::RETIL))
1292  .addImm(X86MFInfo->getBytesToPopOnReturn());
1293  } else {
1294  MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1295  TII.get(Subtarget->is64Bit() ? X86::RETQ : X86::RETL));
1296  }
1297  for (unsigned i = 0, e = RetRegs.size(); i != e; ++i)
1298  MIB.addReg(RetRegs[i], RegState::Implicit);
1299  return true;
1300 }
1301 
1302 /// X86SelectLoad - Select and emit code to implement load instructions.
1303 ///
1304 bool X86FastISel::X86SelectLoad(const Instruction *I) {
1305  const LoadInst *LI = cast<LoadInst>(I);
1306 
1307  // Atomic loads need special handling.
1308  if (LI->isAtomic())
1309  return false;
1310 
1311  const Value *SV = I->getOperand(0);
1312  if (TLI.supportSwiftError()) {
1313  // Swifterror values can come from either a function parameter with
1314  // swifterror attribute or an alloca with swifterror attribute.
1315  if (const Argument *Arg = dyn_cast<Argument>(SV)) {
1316  if (Arg->hasSwiftErrorAttr())
1317  return false;
1318  }
1319 
1320  if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(SV)) {
1321  if (Alloca->isSwiftError())
1322  return false;
1323  }
1324  }
1325 
1326  MVT VT;
1327  if (!isTypeLegal(LI->getType(), VT, /*AllowI1=*/true))
1328  return false;
1329 
1330  const Value *Ptr = LI->getPointerOperand();
1331 
1332  X86AddressMode AM;
1333  if (!X86SelectAddress(Ptr, AM))
1334  return false;
1335 
1336  unsigned Alignment = LI->getAlignment();
1337  unsigned ABIAlignment = DL.getABITypeAlignment(LI->getType());
1338  if (Alignment == 0) // Ensure that codegen never sees alignment 0
1339  Alignment = ABIAlignment;
1340 
1341  unsigned ResultReg = 0;
1342  if (!X86FastEmitLoad(VT, AM, createMachineMemOperandFor(LI), ResultReg,
1343  Alignment))
1344  return false;
1345 
1346  updateValueMap(I, ResultReg);
1347  return true;
1348 }
1349 
1350 static unsigned X86ChooseCmpOpcode(EVT VT, const X86Subtarget *Subtarget) {
1351  bool HasAVX512 = Subtarget->hasAVX512();
1352  bool HasAVX = Subtarget->hasAVX();
1353  bool X86ScalarSSEf32 = Subtarget->hasSSE1();
1354  bool X86ScalarSSEf64 = Subtarget->hasSSE2();
1355 
1356  switch (VT.getSimpleVT().SimpleTy) {
1357  default: return 0;
1358  case MVT::i8: return X86::CMP8rr;
1359  case MVT::i16: return X86::CMP16rr;
1360  case MVT::i32: return X86::CMP32rr;
1361  case MVT::i64: return X86::CMP64rr;
1362  case MVT::f32:
1363  return X86ScalarSSEf32
1364  ? (HasAVX512 ? X86::VUCOMISSZrr
1365  : HasAVX ? X86::VUCOMISSrr : X86::UCOMISSrr)
1366  : 0;
1367  case MVT::f64:
1368  return X86ScalarSSEf64
1369  ? (HasAVX512 ? X86::VUCOMISDZrr
1370  : HasAVX ? X86::VUCOMISDrr : X86::UCOMISDrr)
1371  : 0;
1372  }
1373 }
1374 
1375 /// If we have a comparison with RHS as the RHS of the comparison, return an
1376 /// opcode that works for the compare (e.g. CMP32ri) otherwise return 0.
1377 static unsigned X86ChooseCmpImmediateOpcode(EVT VT, const ConstantInt *RHSC) {
1378  int64_t Val = RHSC->getSExtValue();
1379  switch (VT.getSimpleVT().SimpleTy) {
1380  // Otherwise, we can't fold the immediate into this comparison.
1381  default:
1382  return 0;
1383  case MVT::i8:
1384  return X86::CMP8ri;
1385  case MVT::i16:
1386  if (isInt<8>(Val))
1387  return X86::CMP16ri8;
1388  return X86::CMP16ri;
1389  case MVT::i32:
1390  if (isInt<8>(Val))
1391  return X86::CMP32ri8;
1392  return X86::CMP32ri;
1393  case MVT::i64:
1394  if (isInt<8>(Val))
1395  return X86::CMP64ri8;
1396  // 64-bit comparisons are only valid if the immediate fits in a 32-bit sext
1397  // field.
1398  if (isInt<32>(Val))
1399  return X86::CMP64ri32;
1400  return 0;
1401  }
1402 }
1403 
1404 bool X86FastISel::X86FastEmitCompare(const Value *Op0, const Value *Op1, EVT VT,
1405  const DebugLoc &CurDbgLoc) {
1406  unsigned Op0Reg = getRegForValue(Op0);
1407  if (Op0Reg == 0) return false;
1408 
1409  // Handle 'null' like i32/i64 0.
1410  if (isa<ConstantPointerNull>(Op1))
1411  Op1 = Constant::getNullValue(DL.getIntPtrType(Op0->getContext()));
1412 
1413  // We have two options: compare with register or immediate. If the RHS of
1414  // the compare is an immediate that we can fold into this compare, use
1415  // CMPri, otherwise use CMPrr.
1416  if (const ConstantInt *Op1C = dyn_cast<ConstantInt>(Op1)) {
1417  if (unsigned CompareImmOpc = X86ChooseCmpImmediateOpcode(VT, Op1C)) {
1418  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, CurDbgLoc, TII.get(CompareImmOpc))
1419  .addReg(Op0Reg)
1420  .addImm(Op1C->getSExtValue());
1421  return true;
1422  }
1423  }
1424 
1425  unsigned CompareOpc = X86ChooseCmpOpcode(VT, Subtarget);
1426  if (CompareOpc == 0) return false;
1427 
1428  unsigned Op1Reg = getRegForValue(Op1);
1429  if (Op1Reg == 0) return false;
1430  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, CurDbgLoc, TII.get(CompareOpc))
1431  .addReg(Op0Reg)
1432  .addReg(Op1Reg);
1433 
1434  return true;
1435 }
1436 
1437 bool X86FastISel::X86SelectCmp(const Instruction *I) {
1438  const CmpInst *CI = cast<CmpInst>(I);
1439 
1440  MVT VT;
1441  if (!isTypeLegal(I->getOperand(0)->getType(), VT))
1442  return false;
1443 
1444  // Try to optimize or fold the cmp.
1445  CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
1446  unsigned ResultReg = 0;
1447  switch (Predicate) {
1448  default: break;
1449  case CmpInst::FCMP_FALSE: {
1450  ResultReg = createResultReg(&X86::GR32RegClass);
1451  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::MOV32r0),
1452  ResultReg);
1453  ResultReg = fastEmitInst_extractsubreg(MVT::i8, ResultReg, /*Kill=*/true,
1454  X86::sub_8bit);
1455  if (!ResultReg)
1456  return false;
1457  break;
1458  }
1459  case CmpInst::FCMP_TRUE: {
1460  ResultReg = createResultReg(&X86::GR8RegClass);
1461  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::MOV8ri),
1462  ResultReg).addImm(1);
1463  break;
1464  }
1465  }
1466 
1467  if (ResultReg) {
1468  updateValueMap(I, ResultReg);
1469  return true;
1470  }
1471 
1472  const Value *LHS = CI->getOperand(0);
1473  const Value *RHS = CI->getOperand(1);
1474 
1475  // The optimizer might have replaced fcmp oeq %x, %x with fcmp ord %x, 0.0.
1476  // We don't have to materialize a zero constant for this case and can just use
1477  // %x again on the RHS.
1478  if (Predicate == CmpInst::FCMP_ORD || Predicate == CmpInst::FCMP_UNO) {
1479  const auto *RHSC = dyn_cast<ConstantFP>(RHS);
1480  if (RHSC && RHSC->isNullValue())
1481  RHS = LHS;
1482  }
1483 
1484  // FCMP_OEQ and FCMP_UNE cannot be checked with a single instruction.
1485  static const uint16_t SETFOpcTable[2][3] = {
1486  { X86::SETEr, X86::SETNPr, X86::AND8rr },
1487  { X86::SETNEr, X86::SETPr, X86::OR8rr }
1488  };
1489  const uint16_t *SETFOpc = nullptr;
1490  switch (Predicate) {
1491  default: break;
1492  case CmpInst::FCMP_OEQ: SETFOpc = &SETFOpcTable[0][0]; break;
1493  case CmpInst::FCMP_UNE: SETFOpc = &SETFOpcTable[1][0]; break;
1494  }
1495 
1496  ResultReg = createResultReg(&X86::GR8RegClass);
1497  if (SETFOpc) {
1498  if (!X86FastEmitCompare(LHS, RHS, VT, I->getDebugLoc()))
1499  return false;
1500 
1501  unsigned FlagReg1 = createResultReg(&X86::GR8RegClass);
1502  unsigned FlagReg2 = createResultReg(&X86::GR8RegClass);
1503  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(SETFOpc[0]),
1504  FlagReg1);
1505  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(SETFOpc[1]),
1506  FlagReg2);
1507  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(SETFOpc[2]),
1508  ResultReg).addReg(FlagReg1).addReg(FlagReg2);
1509  updateValueMap(I, ResultReg);
1510  return true;
1511  }
1512 
1513  X86::CondCode CC;
1514  bool SwapArgs;
1515  std::tie(CC, SwapArgs) = X86::getX86ConditionCode(Predicate);
1516  assert(CC <= X86::LAST_VALID_COND && "Unexpected condition code.");
1517  unsigned Opc = X86::getSETFromCond(CC);
1518 
1519  if (SwapArgs)
1520  std::swap(LHS, RHS);
1521 
1522  // Emit a compare of LHS/RHS.
1523  if (!X86FastEmitCompare(LHS, RHS, VT, I->getDebugLoc()))
1524  return false;
1525 
1526  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg);
1527  updateValueMap(I, ResultReg);
1528  return true;
1529 }
1530 
1531 bool X86FastISel::X86SelectZExt(const Instruction *I) {
1532  EVT DstVT = TLI.getValueType(DL, I->getType());
1533  if (!TLI.isTypeLegal(DstVT))
1534  return false;
1535 
1536  unsigned ResultReg = getRegForValue(I->getOperand(0));
1537  if (ResultReg == 0)
1538  return false;
1539 
1540  // Handle zero-extension from i1 to i8, which is common.
1541  MVT SrcVT = TLI.getSimpleValueType(DL, I->getOperand(0)->getType());
1542  if (SrcVT == MVT::i1) {
1543  // Set the high bits to zero.
1544  ResultReg = fastEmitZExtFromI1(MVT::i8, ResultReg, /*TODO: Kill=*/false);
1545  SrcVT = MVT::i8;
1546 
1547  if (ResultReg == 0)
1548  return false;
1549  }
1550 
1551  if (DstVT == MVT::i64) {
1552  // Handle extension to 64-bits via sub-register shenanigans.
1553  unsigned MovInst;
1554 
1555  switch (SrcVT.SimpleTy) {
1556  case MVT::i8: MovInst = X86::MOVZX32rr8; break;
1557  case MVT::i16: MovInst = X86::MOVZX32rr16; break;
1558  case MVT::i32: MovInst = X86::MOV32rr; break;
1559  default: llvm_unreachable("Unexpected zext to i64 source type");
1560  }
1561 
1562  unsigned Result32 = createResultReg(&X86::GR32RegClass);
1563  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(MovInst), Result32)
1564  .addReg(ResultReg);
1565 
1566  ResultReg = createResultReg(&X86::GR64RegClass);
1567  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::SUBREG_TO_REG),
1568  ResultReg)
1569  .addImm(0).addReg(Result32).addImm(X86::sub_32bit);
1570  } else if (DstVT == MVT::i16) {
1571  // i8->i16 doesn't exist in the autogenerated isel table. Need to zero
1572  // extend to 32-bits and then extract down to 16-bits.
1573  unsigned Result32 = createResultReg(&X86::GR32RegClass);
1574  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::MOVZX32rr8),
1575  Result32).addReg(ResultReg);
1576 
1577  ResultReg = fastEmitInst_extractsubreg(MVT::i16, Result32, /*Kill=*/true,
1578  X86::sub_16bit);
1579  } else if (DstVT != MVT::i8) {
1580  ResultReg = fastEmit_r(MVT::i8, DstVT.getSimpleVT(), ISD::ZERO_EXTEND,
1581  ResultReg, /*Kill=*/true);
1582  if (ResultReg == 0)
1583  return false;
1584  }
1585 
1586  updateValueMap(I, ResultReg);
1587  return true;
1588 }
1589 
1590 bool X86FastISel::X86SelectSExt(const Instruction *I) {
1591  EVT DstVT = TLI.getValueType(DL, I->getType());
1592  if (!TLI.isTypeLegal(DstVT))
1593  return false;
1594 
1595  unsigned ResultReg = getRegForValue(I->getOperand(0));
1596  if (ResultReg == 0)
1597  return false;
1598 
1599  // Handle sign-extension from i1 to i8.
1600  MVT SrcVT = TLI.getSimpleValueType(DL, I->getOperand(0)->getType());
1601  if (SrcVT == MVT::i1) {
1602  // Set the high bits to zero.
1603  unsigned ZExtReg = fastEmitZExtFromI1(MVT::i8, ResultReg,
1604  /*TODO: Kill=*/false);
1605  if (ZExtReg == 0)
1606  return false;
1607 
1608  // Negate the result to make an 8-bit sign extended value.
1609  ResultReg = createResultReg(&X86::GR8RegClass);
1610  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::NEG8r),
1611  ResultReg).addReg(ZExtReg);
1612 
1613  SrcVT = MVT::i8;
1614  }
1615 
1616  if (DstVT == MVT::i16) {
1617  // i8->i16 doesn't exist in the autogenerated isel table. Need to sign
1618  // extend to 32-bits and then extract down to 16-bits.
1619  unsigned Result32 = createResultReg(&X86::GR32RegClass);
1620  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::MOVSX32rr8),
1621  Result32).addReg(ResultReg);
1622 
1623  ResultReg = fastEmitInst_extractsubreg(MVT::i16, Result32, /*Kill=*/true,
1624  X86::sub_16bit);
1625  } else if (DstVT != MVT::i8) {
1626  ResultReg = fastEmit_r(MVT::i8, DstVT.getSimpleVT(), ISD::SIGN_EXTEND,
1627  ResultReg, /*Kill=*/true);
1628  if (ResultReg == 0)
1629  return false;
1630  }
1631 
1632  updateValueMap(I, ResultReg);
1633  return true;
1634 }
1635 
1636 bool X86FastISel::X86SelectBranch(const Instruction *I) {
1637  // Unconditional branches are selected by tablegen-generated code.
1638  // Handle a conditional branch.
1639  const BranchInst *BI = cast<BranchInst>(I);
1640  MachineBasicBlock *TrueMBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
1641  MachineBasicBlock *FalseMBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
1642 
1643  // Fold the common case of a conditional branch with a comparison
1644  // in the same block (values defined on other blocks may not have
1645  // initialized registers).
1646  X86::CondCode CC;
1647  if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) {
1648  if (CI->hasOneUse() && CI->getParent() == I->getParent()) {
1649  EVT VT = TLI.getValueType(DL, CI->getOperand(0)->getType());
1650 
1651  // Try to optimize or fold the cmp.
1652  CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
1653  switch (Predicate) {
1654  default: break;
1655  case CmpInst::FCMP_FALSE: fastEmitBranch(FalseMBB, DbgLoc); return true;
1656  case CmpInst::FCMP_TRUE: fastEmitBranch(TrueMBB, DbgLoc); return true;
1657  }
1658 
1659  const Value *CmpLHS = CI->getOperand(0);
1660  const Value *CmpRHS = CI->getOperand(1);
1661 
1662  // The optimizer might have replaced fcmp oeq %x, %x with fcmp ord %x,
1663  // 0.0.
1664  // We don't have to materialize a zero constant for this case and can just
1665  // use %x again on the RHS.
1666  if (Predicate == CmpInst::FCMP_ORD || Predicate == CmpInst::FCMP_UNO) {
1667  const auto *CmpRHSC = dyn_cast<ConstantFP>(CmpRHS);
1668  if (CmpRHSC && CmpRHSC->isNullValue())
1669  CmpRHS = CmpLHS;
1670  }
1671 
1672  // Try to take advantage of fallthrough opportunities.
1673  if (FuncInfo.MBB->isLayoutSuccessor(TrueMBB)) {
1674  std::swap(TrueMBB, FalseMBB);
1675  Predicate = CmpInst::getInversePredicate(Predicate);
1676  }
1677 
1678  // FCMP_OEQ and FCMP_UNE cannot be expressed with a single flag/condition
1679  // code check. Instead two branch instructions are required to check all
1680  // the flags. First we change the predicate to a supported condition code,
1681  // which will be the first branch. Later one we will emit the second
1682  // branch.
1683  bool NeedExtraBranch = false;
1684  switch (Predicate) {
1685  default: break;
1686  case CmpInst::FCMP_OEQ:
1687  std::swap(TrueMBB, FalseMBB);
1689  case CmpInst::FCMP_UNE:
1690  NeedExtraBranch = true;
1691  Predicate = CmpInst::FCMP_ONE;
1692  break;
1693  }
1694 
1695  bool SwapArgs;
1696  unsigned BranchOpc;
1697  std::tie(CC, SwapArgs) = X86::getX86ConditionCode(Predicate);
1698  assert(CC <= X86::LAST_VALID_COND && "Unexpected condition code.");
1699 
1700  BranchOpc = X86::GetCondBranchFromCond(CC);
1701  if (SwapArgs)
1702  std::swap(CmpLHS, CmpRHS);
1703 
1704  // Emit a compare of the LHS and RHS, setting the flags.
1705  if (!X86FastEmitCompare(CmpLHS, CmpRHS, VT, CI->getDebugLoc()))
1706  return false;
1707 
1708  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(BranchOpc))
1709  .addMBB(TrueMBB);
1710 
1711  // X86 requires a second branch to handle UNE (and OEQ, which is mapped
1712  // to UNE above).
1713  if (NeedExtraBranch) {
1714  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::JP_1))
1715  .addMBB(TrueMBB);
1716  }
1717 
1718  finishCondBranch(BI->getParent(), TrueMBB, FalseMBB);
1719  return true;
1720  }
1721  } else if (TruncInst *TI = dyn_cast<TruncInst>(BI->getCondition())) {
1722  // Handle things like "%cond = trunc i32 %X to i1 / br i1 %cond", which
1723  // typically happen for _Bool and C++ bools.
1724  MVT SourceVT;
1725  if (TI->hasOneUse() && TI->getParent() == I->getParent() &&
1726  isTypeLegal(TI->getOperand(0)->getType(), SourceVT)) {
1727  unsigned TestOpc = 0;
1728  switch (SourceVT.SimpleTy) {
1729  default: break;
1730  case MVT::i8: TestOpc = X86::TEST8ri; break;
1731  case MVT::i16: TestOpc = X86::TEST16ri; break;
1732  case MVT::i32: TestOpc = X86::TEST32ri; break;
1733  case MVT::i64: TestOpc = X86::TEST64ri32; break;
1734  }
1735  if (TestOpc) {
1736  unsigned OpReg = getRegForValue(TI->getOperand(0));
1737  if (OpReg == 0) return false;
1738 
1739  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TestOpc))
1740  .addReg(OpReg).addImm(1);
1741 
1742  unsigned JmpOpc = X86::JNE_1;
1743  if (FuncInfo.MBB->isLayoutSuccessor(TrueMBB)) {
1744  std::swap(TrueMBB, FalseMBB);
1745  JmpOpc = X86::JE_1;
1746  }
1747 
1748  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(JmpOpc))
1749  .addMBB(TrueMBB);
1750 
1751  finishCondBranch(BI->getParent(), TrueMBB, FalseMBB);
1752  return true;
1753  }
1754  }
1755  } else if (foldX86XALUIntrinsic(CC, BI, BI->getCondition())) {
1756  // Fake request the condition, otherwise the intrinsic might be completely
1757  // optimized away.
1758  unsigned TmpReg = getRegForValue(BI->getCondition());
1759  if (TmpReg == 0)
1760  return false;
1761 
1762  unsigned BranchOpc = X86::GetCondBranchFromCond(CC);
1763 
1764  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(BranchOpc))
1765  .addMBB(TrueMBB);
1766  finishCondBranch(BI->getParent(), TrueMBB, FalseMBB);
1767  return true;
1768  }
1769 
1770  // Otherwise do a clumsy setcc and re-test it.
1771  // Note that i1 essentially gets ANY_EXTEND'ed to i8 where it isn't used
1772  // in an explicit cast, so make sure to handle that correctly.
1773  unsigned OpReg = getRegForValue(BI->getCondition());
1774  if (OpReg == 0) return false;
1775 
1776  // In case OpReg is a K register, COPY to a GPR
1777  if (MRI.getRegClass(OpReg) == &X86::VK1RegClass) {
1778  unsigned KOpReg = OpReg;
1779  OpReg = createResultReg(&X86::GR32RegClass);
1780  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1781  TII.get(TargetOpcode::COPY), OpReg)
1782  .addReg(KOpReg);
1783  OpReg = fastEmitInst_extractsubreg(MVT::i8, OpReg, /*Kill=*/true,
1784  X86::sub_8bit);
1785  }
1786  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::TEST8ri))
1787  .addReg(OpReg)
1788  .addImm(1);
1789  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::JNE_1))
1790  .addMBB(TrueMBB);
1791  finishCondBranch(BI->getParent(), TrueMBB, FalseMBB);
1792  return true;
1793 }
1794 
1795 bool X86FastISel::X86SelectShift(const Instruction *I) {
1796  unsigned CReg = 0, OpReg = 0;
1797  const TargetRegisterClass *RC = nullptr;
1798  if (I->getType()->isIntegerTy(8)) {
1799  CReg = X86::CL;
1800  RC = &X86::GR8RegClass;
1801  switch (I->getOpcode()) {
1802  case Instruction::LShr: OpReg = X86::SHR8rCL; break;
1803  case Instruction::AShr: OpReg = X86::SAR8rCL; break;
1804  case Instruction::Shl: OpReg = X86::SHL8rCL; break;
1805  default: return false;
1806  }
1807  } else if (I->getType()->isIntegerTy(16)) {
1808  CReg = X86::CX;
1809  RC = &X86::GR16RegClass;
1810  switch (I->getOpcode()) {
1811  default: llvm_unreachable("Unexpected shift opcode");
1812  case Instruction::LShr: OpReg = X86::SHR16rCL; break;
1813  case Instruction::AShr: OpReg = X86::SAR16rCL; break;
1814  case Instruction::Shl: OpReg = X86::SHL16rCL; break;
1815  }
1816  } else if (I->getType()->isIntegerTy(32)) {
1817  CReg = X86::ECX;
1818  RC = &X86::GR32RegClass;
1819  switch (I->getOpcode()) {
1820  default: llvm_unreachable("Unexpected shift opcode");
1821  case Instruction::LShr: OpReg = X86::SHR32rCL; break;
1822  case Instruction::AShr: OpReg = X86::SAR32rCL; break;
1823  case Instruction::Shl: OpReg = X86::SHL32rCL; break;
1824  }
1825  } else if (I->getType()->isIntegerTy(64)) {
1826  CReg = X86::RCX;
1827  RC = &X86::GR64RegClass;
1828  switch (I->getOpcode()) {
1829  default: llvm_unreachable("Unexpected shift opcode");
1830  case Instruction::LShr: OpReg = X86::SHR64rCL; break;
1831  case Instruction::AShr: OpReg = X86::SAR64rCL; break;
1832  case Instruction::Shl: OpReg = X86::SHL64rCL; break;
1833  }
1834  } else {
1835  return false;
1836  }
1837 
1838  MVT VT;
1839  if (!isTypeLegal(I->getType(), VT))
1840  return false;
1841 
1842  unsigned Op0Reg = getRegForValue(I->getOperand(0));
1843  if (Op0Reg == 0) return false;
1844 
1845  unsigned Op1Reg = getRegForValue(I->getOperand(1));
1846  if (Op1Reg == 0) return false;
1847  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY),
1848  CReg).addReg(Op1Reg);
1849 
1850  // The shift instruction uses X86::CL. If we defined a super-register
1851  // of X86::CL, emit a subreg KILL to precisely describe what we're doing here.
1852  if (CReg != X86::CL)
1853  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1854  TII.get(TargetOpcode::KILL), X86::CL)
1855  .addReg(CReg, RegState::Kill);
1856 
1857  unsigned ResultReg = createResultReg(RC);
1858  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(OpReg), ResultReg)
1859  .addReg(Op0Reg);
1860  updateValueMap(I, ResultReg);
1861  return true;
1862 }
1863 
1864 bool X86FastISel::X86SelectDivRem(const Instruction *I) {
1865  const static unsigned NumTypes = 4; // i8, i16, i32, i64
1866  const static unsigned NumOps = 4; // SDiv, SRem, UDiv, URem
1867  const static bool S = true; // IsSigned
1868  const static bool U = false; // !IsSigned
1869  const static unsigned Copy = TargetOpcode::COPY;
1870  // For the X86 DIV/IDIV instruction, in most cases the dividend
1871  // (numerator) must be in a specific register pair highreg:lowreg,
1872  // producing the quotient in lowreg and the remainder in highreg.
1873  // For most data types, to set up the instruction, the dividend is
1874  // copied into lowreg, and lowreg is sign-extended or zero-extended
1875  // into highreg. The exception is i8, where the dividend is defined
1876  // as a single register rather than a register pair, and we
1877  // therefore directly sign-extend or zero-extend the dividend into
1878  // lowreg, instead of copying, and ignore the highreg.
1879  const static struct DivRemEntry {
1880  // The following portion depends only on the data type.
1881  const TargetRegisterClass *RC;
1882  unsigned LowInReg; // low part of the register pair
1883  unsigned HighInReg; // high part of the register pair
1884  // The following portion depends on both the data type and the operation.
1885  struct DivRemResult {
1886  unsigned OpDivRem; // The specific DIV/IDIV opcode to use.
1887  unsigned OpSignExtend; // Opcode for sign-extending lowreg into
1888  // highreg, or copying a zero into highreg.
1889  unsigned OpCopy; // Opcode for copying dividend into lowreg, or
1890  // zero/sign-extending into lowreg for i8.
1891  unsigned DivRemResultReg; // Register containing the desired result.
1892  bool IsOpSigned; // Whether to use signed or unsigned form.
1893  } ResultTable[NumOps];
1894  } OpTable[NumTypes] = {
1895  { &X86::GR8RegClass, X86::AX, 0, {
1896  { X86::IDIV8r, 0, X86::MOVSX16rr8, X86::AL, S }, // SDiv
1897  { X86::IDIV8r, 0, X86::MOVSX16rr8, X86::AH, S }, // SRem
1898  { X86::DIV8r, 0, X86::MOVZX16rr8, X86::AL, U }, // UDiv
1899  { X86::DIV8r, 0, X86::MOVZX16rr8, X86::AH, U }, // URem
1900  }
1901  }, // i8
1902  { &X86::GR16RegClass, X86::AX, X86::DX, {
1903  { X86::IDIV16r, X86::CWD, Copy, X86::AX, S }, // SDiv
1904  { X86::IDIV16r, X86::CWD, Copy, X86::DX, S }, // SRem
1905  { X86::DIV16r, X86::MOV32r0, Copy, X86::AX, U }, // UDiv
1906  { X86::DIV16r, X86::MOV32r0, Copy, X86::DX, U }, // URem
1907  }
1908  }, // i16
1909  { &X86::GR32RegClass, X86::EAX, X86::EDX, {
1910  { X86::IDIV32r, X86::CDQ, Copy, X86::EAX, S }, // SDiv
1911  { X86::IDIV32r, X86::CDQ, Copy, X86::EDX, S }, // SRem
1912  { X86::DIV32r, X86::MOV32r0, Copy, X86::EAX, U }, // UDiv
1913  { X86::DIV32r, X86::MOV32r0, Copy, X86::EDX, U }, // URem
1914  }
1915  }, // i32
1916  { &X86::GR64RegClass, X86::RAX, X86::RDX, {
1917  { X86::IDIV64r, X86::CQO, Copy, X86::RAX, S }, // SDiv
1918  { X86::IDIV64r, X86::CQO, Copy, X86::RDX, S }, // SRem
1919  { X86::DIV64r, X86::MOV32r0, Copy, X86::RAX, U }, // UDiv
1920  { X86::DIV64r, X86::MOV32r0, Copy, X86::RDX, U }, // URem
1921  }
1922  }, // i64
1923  };
1924 
1925  MVT VT;
1926  if (!isTypeLegal(I->getType(), VT))
1927  return false;
1928 
1929  unsigned TypeIndex, OpIndex;
1930  switch (VT.SimpleTy) {
1931  default: return false;
1932  case MVT::i8: TypeIndex = 0; break;
1933  case MVT::i16: TypeIndex = 1; break;
1934  case MVT::i32: TypeIndex = 2; break;
1935  case MVT::i64: TypeIndex = 3;
1936  if (!Subtarget->is64Bit())
1937  return false;
1938  break;
1939  }
1940 
1941  switch (I->getOpcode()) {
1942  default: llvm_unreachable("Unexpected div/rem opcode");
1943  case Instruction::SDiv: OpIndex = 0; break;
1944  case Instruction::SRem: OpIndex = 1; break;
1945  case Instruction::UDiv: OpIndex = 2; break;
1946  case Instruction::URem: OpIndex = 3; break;
1947  }
1948 
1949  const DivRemEntry &TypeEntry = OpTable[TypeIndex];
1950  const DivRemEntry::DivRemResult &OpEntry = TypeEntry.ResultTable[OpIndex];
1951  unsigned Op0Reg = getRegForValue(I->getOperand(0));
1952  if (Op0Reg == 0)
1953  return false;
1954  unsigned Op1Reg = getRegForValue(I->getOperand(1));
1955  if (Op1Reg == 0)
1956  return false;
1957 
1958  // Move op0 into low-order input register.
1959  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1960  TII.get(OpEntry.OpCopy), TypeEntry.LowInReg).addReg(Op0Reg);
1961  // Zero-extend or sign-extend into high-order input register.
1962  if (OpEntry.OpSignExtend) {
1963  if (OpEntry.IsOpSigned)
1964  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1965  TII.get(OpEntry.OpSignExtend));
1966  else {
1967  unsigned Zero32 = createResultReg(&X86::GR32RegClass);
1968  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1969  TII.get(X86::MOV32r0), Zero32);
1970 
1971  // Copy the zero into the appropriate sub/super/identical physical
1972  // register. Unfortunately the operations needed are not uniform enough
1973  // to fit neatly into the table above.
1974  if (VT == MVT::i16) {
1975  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1976  TII.get(Copy), TypeEntry.HighInReg)
1977  .addReg(Zero32, 0, X86::sub_16bit);
1978  } else if (VT == MVT::i32) {
1979  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1980  TII.get(Copy), TypeEntry.HighInReg)
1981  .addReg(Zero32);
1982  } else if (VT == MVT::i64) {
1983  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1984  TII.get(TargetOpcode::SUBREG_TO_REG), TypeEntry.HighInReg)
1985  .addImm(0).addReg(Zero32).addImm(X86::sub_32bit);
1986  }
1987  }
1988  }
1989  // Generate the DIV/IDIV instruction.
1990  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1991  TII.get(OpEntry.OpDivRem)).addReg(Op1Reg);
1992  // For i8 remainder, we can't reference ah directly, as we'll end
1993  // up with bogus copies like %r9b = COPY %ah. Reference ax
1994  // instead to prevent ah references in a rex instruction.
1995  //
1996  // The current assumption of the fast register allocator is that isel
1997  // won't generate explicit references to the GR8_NOREX registers. If
1998  // the allocator and/or the backend get enhanced to be more robust in
1999  // that regard, this can be, and should be, removed.
2000  unsigned ResultReg = 0;
2001  if ((I->getOpcode() == Instruction::SRem ||
2002  I->getOpcode() == Instruction::URem) &&
2003  OpEntry.DivRemResultReg == X86::AH && Subtarget->is64Bit()) {
2004  unsigned SourceSuperReg = createResultReg(&X86::GR16RegClass);
2005  unsigned ResultSuperReg = createResultReg(&X86::GR16RegClass);
2006  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2007  TII.get(Copy), SourceSuperReg).addReg(X86::AX);
2008 
2009  // Shift AX right by 8 bits instead of using AH.
2010  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::SHR16ri),
2011  ResultSuperReg).addReg(SourceSuperReg).addImm(8);
2012 
2013  // Now reference the 8-bit subreg of the result.
2014  ResultReg = fastEmitInst_extractsubreg(MVT::i8, ResultSuperReg,
2015  /*Kill=*/true, X86::sub_8bit);
2016  }
2017  // Copy the result out of the physreg if we haven't already.
2018  if (!ResultReg) {
2019  ResultReg = createResultReg(TypeEntry.RC);
2020  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Copy), ResultReg)
2021  .addReg(OpEntry.DivRemResultReg);
2022  }
2023  updateValueMap(I, ResultReg);
2024 
2025  return true;
2026 }
2027 
2028 /// Emit a conditional move instruction (if the are supported) to lower
2029 /// the select.
2030 bool X86FastISel::X86FastEmitCMoveSelect(MVT RetVT, const Instruction *I) {
2031  // Check if the subtarget supports these instructions.
2032  if (!Subtarget->hasCMov())
2033  return false;
2034 
2035  // FIXME: Add support for i8.
2036  if (RetVT < MVT::i16 || RetVT > MVT::i64)
2037  return false;
2038 
2039  const Value *Cond = I->getOperand(0);
2040  const TargetRegisterClass *RC = TLI.getRegClassFor(RetVT);
2041  bool NeedTest = true;
2043 
2044  // Optimize conditions coming from a compare if both instructions are in the
2045  // same basic block (values defined in other basic blocks may not have
2046  // initialized registers).
2047  const auto *CI = dyn_cast<CmpInst>(Cond);
2048  if (CI && (CI->getParent() == I->getParent())) {
2049  CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
2050 
2051  // FCMP_OEQ and FCMP_UNE cannot be checked with a single instruction.
2052  static const uint16_t SETFOpcTable[2][3] = {
2053  { X86::SETNPr, X86::SETEr , X86::TEST8rr },
2054  { X86::SETPr, X86::SETNEr, X86::OR8rr }
2055  };
2056  const uint16_t *SETFOpc = nullptr;
2057  switch (Predicate) {
2058  default: break;
2059  case CmpInst::FCMP_OEQ:
2060  SETFOpc = &SETFOpcTable[0][0];
2061  Predicate = CmpInst::ICMP_NE;
2062  break;
2063  case CmpInst::FCMP_UNE:
2064  SETFOpc = &SETFOpcTable[1][0];
2065  Predicate = CmpInst::ICMP_NE;
2066  break;
2067  }
2068 
2069  bool NeedSwap;
2070  std::tie(CC, NeedSwap) = X86::getX86ConditionCode(Predicate);
2071  assert(CC <= X86::LAST_VALID_COND && "Unexpected condition code.");
2072 
2073  const Value *CmpLHS = CI->getOperand(0);
2074  const Value *CmpRHS = CI->getOperand(1);
2075  if (NeedSwap)
2076  std::swap(CmpLHS, CmpRHS);
2077 
2078  EVT CmpVT = TLI.getValueType(DL, CmpLHS->getType());
2079  // Emit a compare of the LHS and RHS, setting the flags.
2080  if (!X86FastEmitCompare(CmpLHS, CmpRHS, CmpVT, CI->getDebugLoc()))
2081  return false;
2082 
2083  if (SETFOpc) {
2084  unsigned FlagReg1 = createResultReg(&X86::GR8RegClass);
2085  unsigned FlagReg2 = createResultReg(&X86::GR8RegClass);
2086  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(SETFOpc[0]),
2087  FlagReg1);
2088  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(SETFOpc[1]),
2089  FlagReg2);
2090  auto const &II = TII.get(SETFOpc[2]);
2091  if (II.getNumDefs()) {
2092  unsigned TmpReg = createResultReg(&X86::GR8RegClass);
2093  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, TmpReg)
2094  .addReg(FlagReg2).addReg(FlagReg1);
2095  } else {
2096  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
2097  .addReg(FlagReg2).addReg(FlagReg1);
2098  }
2099  }
2100  NeedTest = false;
2101  } else if (foldX86XALUIntrinsic(CC, I, Cond)) {
2102  // Fake request the condition, otherwise the intrinsic might be completely
2103  // optimized away.
2104  unsigned TmpReg = getRegForValue(Cond);
2105  if (TmpReg == 0)
2106  return false;
2107 
2108  NeedTest = false;
2109  }
2110 
2111  if (NeedTest) {
2112  // Selects operate on i1, however, CondReg is 8 bits width and may contain
2113  // garbage. Indeed, only the less significant bit is supposed to be
2114  // accurate. If we read more than the lsb, we may see non-zero values
2115  // whereas lsb is zero. Therefore, we have to truncate Op0Reg to i1 for
2116  // the select. This is achieved by performing TEST against 1.
2117  unsigned CondReg = getRegForValue(Cond);
2118  if (CondReg == 0)
2119  return false;
2120  bool CondIsKill = hasTrivialKill(Cond);
2121 
2122  // In case OpReg is a K register, COPY to a GPR
2123  if (MRI.getRegClass(CondReg) == &X86::VK1RegClass) {
2124  unsigned KCondReg = CondReg;
2125  CondReg = createResultReg(&X86::GR32RegClass);
2126  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2127  TII.get(TargetOpcode::COPY), CondReg)
2128  .addReg(KCondReg, getKillRegState(CondIsKill));
2129  CondReg = fastEmitInst_extractsubreg(MVT::i8, CondReg, /*Kill=*/true,
2130  X86::sub_8bit);
2131  }
2132  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::TEST8ri))
2133  .addReg(CondReg, getKillRegState(CondIsKill))
2134  .addImm(1);
2135  }
2136 
2137  const Value *LHS = I->getOperand(1);
2138  const Value *RHS = I->getOperand(2);
2139 
2140  unsigned RHSReg = getRegForValue(RHS);
2141  bool RHSIsKill = hasTrivialKill(RHS);
2142 
2143  unsigned LHSReg = getRegForValue(LHS);
2144  bool LHSIsKill = hasTrivialKill(LHS);
2145 
2146  if (!LHSReg || !RHSReg)
2147  return false;
2148 
2149  const TargetRegisterInfo &TRI = *Subtarget->getRegisterInfo();
2150  unsigned Opc = X86::getCMovFromCond(CC, TRI.getRegSizeInBits(*RC)/8);
2151  unsigned ResultReg = fastEmitInst_rr(Opc, RC, RHSReg, RHSIsKill,
2152  LHSReg, LHSIsKill);
2153  updateValueMap(I, ResultReg);
2154  return true;
2155 }
2156 
2157 /// Emit SSE or AVX instructions to lower the select.
2158 ///
2159 /// Try to use SSE1/SSE2 instructions to simulate a select without branches.
2160 /// This lowers fp selects into a CMP/AND/ANDN/OR sequence when the necessary
2161 /// SSE instructions are available. If AVX is available, try to use a VBLENDV.
2162 bool X86FastISel::X86FastEmitSSESelect(MVT RetVT, const Instruction *I) {
2163  // Optimize conditions coming from a compare if both instructions are in the
2164  // same basic block (values defined in other basic blocks may not have
2165  // initialized registers).
2166  const auto *CI = dyn_cast<FCmpInst>(I->getOperand(0));
2167  if (!CI || (CI->getParent() != I->getParent()))
2168  return false;
2169 
2170  if (I->getType() != CI->getOperand(0)->getType() ||
2171  !((Subtarget->hasSSE1() && RetVT == MVT::f32) ||
2172  (Subtarget->hasSSE2() && RetVT == MVT::f64)))
2173  return false;
2174 
2175  const Value *CmpLHS = CI->getOperand(0);
2176  const Value *CmpRHS = CI->getOperand(1);
2177  CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
2178 
2179  // The optimizer might have replaced fcmp oeq %x, %x with fcmp ord %x, 0.0.
2180  // We don't have to materialize a zero constant for this case and can just use
2181  // %x again on the RHS.
2182  if (Predicate == CmpInst::FCMP_ORD || Predicate == CmpInst::FCMP_UNO) {
2183  const auto *CmpRHSC = dyn_cast<ConstantFP>(CmpRHS);
2184  if (CmpRHSC && CmpRHSC->isNullValue())
2185  CmpRHS = CmpLHS;
2186  }
2187 
2188  unsigned CC;
2189  bool NeedSwap;
2190  std::tie(CC, NeedSwap) = getX86SSEConditionCode(Predicate);
2191  if (CC > 7 && !Subtarget->hasAVX())
2192  return false;
2193 
2194  if (NeedSwap)
2195  std::swap(CmpLHS, CmpRHS);
2196 
2197  // Choose the SSE instruction sequence based on data type (float or double).
2198  static const uint16_t OpcTable[2][4] = {
2199  { X86::CMPSSrr, X86::ANDPSrr, X86::ANDNPSrr, X86::ORPSrr },
2200  { X86::CMPSDrr, X86::ANDPDrr, X86::ANDNPDrr, X86::ORPDrr }
2201  };
2202 
2203  const uint16_t *Opc = nullptr;
2204  switch (RetVT.SimpleTy) {
2205  default: return false;
2206  case MVT::f32: Opc = &OpcTable[0][0]; break;
2207  case MVT::f64: Opc = &OpcTable[1][0]; break;
2208  }
2209 
2210  const Value *LHS = I->getOperand(1);
2211  const Value *RHS = I->getOperand(2);
2212 
2213  unsigned LHSReg = getRegForValue(LHS);
2214  bool LHSIsKill = hasTrivialKill(LHS);
2215 
2216  unsigned RHSReg = getRegForValue(RHS);
2217  bool RHSIsKill = hasTrivialKill(RHS);
2218 
2219  unsigned CmpLHSReg = getRegForValue(CmpLHS);
2220  bool CmpLHSIsKill = hasTrivialKill(CmpLHS);
2221 
2222  unsigned CmpRHSReg = getRegForValue(CmpRHS);
2223  bool CmpRHSIsKill = hasTrivialKill(CmpRHS);
2224 
2225  if (!LHSReg || !RHSReg || !CmpLHS || !CmpRHS)
2226  return false;
2227 
2228  const TargetRegisterClass *RC = TLI.getRegClassFor(RetVT);
2229  unsigned ResultReg;
2230 
2231  if (Subtarget->hasAVX512()) {
2232  // If we have AVX512 we can use a mask compare and masked movss/sd.
2233  const TargetRegisterClass *VR128X = &X86::VR128XRegClass;
2234  const TargetRegisterClass *VK1 = &X86::VK1RegClass;
2235 
2236  unsigned CmpOpcode =
2237  (RetVT == MVT::f32) ? X86::VCMPSSZrr : X86::VCMPSDZrr;
2238  unsigned CmpReg = fastEmitInst_rri(CmpOpcode, VK1, CmpLHSReg, CmpLHSIsKill,
2239  CmpRHSReg, CmpRHSIsKill, CC);
2240 
2241  // Need an IMPLICIT_DEF for the input that is used to generate the upper
2242  // bits of the result register since its not based on any of the inputs.
2243  unsigned ImplicitDefReg = createResultReg(VR128X);
2244  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2245  TII.get(TargetOpcode::IMPLICIT_DEF), ImplicitDefReg);
2246 
2247  // Place RHSReg is the passthru of the masked movss/sd operation and put
2248  // LHS in the input. The mask input comes from the compare.
2249  unsigned MovOpcode =
2250  (RetVT == MVT::f32) ? X86::VMOVSSZrrk : X86::VMOVSDZrrk;
2251  unsigned MovReg = fastEmitInst_rrrr(MovOpcode, VR128X, RHSReg, RHSIsKill,
2252  CmpReg, true, ImplicitDefReg, true,
2253  LHSReg, LHSIsKill);
2254 
2255  ResultReg = createResultReg(RC);
2256  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2257  TII.get(TargetOpcode::COPY), ResultReg).addReg(MovReg);
2258 
2259  } else if (Subtarget->hasAVX()) {
2260  const TargetRegisterClass *VR128 = &X86::VR128RegClass;
2261 
2262  // If we have AVX, create 1 blendv instead of 3 logic instructions.
2263  // Blendv was introduced with SSE 4.1, but the 2 register form implicitly
2264  // uses XMM0 as the selection register. That may need just as many
2265  // instructions as the AND/ANDN/OR sequence due to register moves, so
2266  // don't bother.
2267  unsigned CmpOpcode =
2268  (RetVT == MVT::f32) ? X86::VCMPSSrr : X86::VCMPSDrr;
2269  unsigned BlendOpcode =
2270  (RetVT == MVT::f32) ? X86::VBLENDVPSrr : X86::VBLENDVPDrr;
2271 
2272  unsigned CmpReg = fastEmitInst_rri(CmpOpcode, RC, CmpLHSReg, CmpLHSIsKill,
2273  CmpRHSReg, CmpRHSIsKill, CC);
2274  unsigned VBlendReg = fastEmitInst_rrr(BlendOpcode, VR128, RHSReg, RHSIsKill,
2275  LHSReg, LHSIsKill, CmpReg, true);
2276  ResultReg = createResultReg(RC);
2277  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2278  TII.get(TargetOpcode::COPY), ResultReg).addReg(VBlendReg);
2279  } else {
2280  const TargetRegisterClass *VR128 = &X86::VR128RegClass;
2281  unsigned CmpReg = fastEmitInst_rri(Opc[0], RC, CmpLHSReg, CmpLHSIsKill,
2282  CmpRHSReg, CmpRHSIsKill, CC);
2283  unsigned AndReg = fastEmitInst_rr(Opc[1], VR128, CmpReg, /*IsKill=*/false,
2284  LHSReg, LHSIsKill);
2285  unsigned AndNReg = fastEmitInst_rr(Opc[2], VR128, CmpReg, /*IsKill=*/true,
2286  RHSReg, RHSIsKill);
2287  unsigned OrReg = fastEmitInst_rr(Opc[3], VR128, AndNReg, /*IsKill=*/true,
2288  AndReg, /*IsKill=*/true);
2289  ResultReg = createResultReg(RC);
2290  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2291  TII.get(TargetOpcode::COPY), ResultReg).addReg(OrReg);
2292  }
2293  updateValueMap(I, ResultReg);
2294  return true;
2295 }
2296 
2297 bool X86FastISel::X86FastEmitPseudoSelect(MVT RetVT, const Instruction *I) {
2298  // These are pseudo CMOV instructions and will be later expanded into control-
2299  // flow.
2300  unsigned Opc;
2301  switch (RetVT.SimpleTy) {
2302  default: return false;
2303  case MVT::i8: Opc = X86::CMOV_GR8; break;
2304  case MVT::i16: Opc = X86::CMOV_GR16; break;
2305  case MVT::i32: Opc = X86::CMOV_GR32; break;
2306  case MVT::f32: Opc = X86::CMOV_FR32; break;
2307  case MVT::f64: Opc = X86::CMOV_FR64; break;
2308  }
2309 
2310  const Value *Cond = I->getOperand(0);
2312 
2313  // Optimize conditions coming from a compare if both instructions are in the
2314  // same basic block (values defined in other basic blocks may not have
2315  // initialized registers).
2316  const auto *CI = dyn_cast<CmpInst>(Cond);
2317  if (CI && (CI->getParent() == I->getParent())) {
2318  bool NeedSwap;
2319  std::tie(CC, NeedSwap) = X86::getX86ConditionCode(CI->getPredicate());
2320  if (CC > X86::LAST_VALID_COND)
2321  return false;
2322 
2323  const Value *CmpLHS = CI->getOperand(0);
2324  const Value *CmpRHS = CI->getOperand(1);
2325 
2326  if (NeedSwap)
2327  std::swap(CmpLHS, CmpRHS);
2328 
2329  EVT CmpVT = TLI.getValueType(DL, CmpLHS->getType());
2330  if (!X86FastEmitCompare(CmpLHS, CmpRHS, CmpVT, CI->getDebugLoc()))
2331  return false;
2332  } else {
2333  unsigned CondReg = getRegForValue(Cond);
2334  if (CondReg == 0)
2335  return false;
2336  bool CondIsKill = hasTrivialKill(Cond);
2337 
2338  // In case OpReg is a K register, COPY to a GPR
2339  if (MRI.getRegClass(CondReg) == &X86::VK1RegClass) {
2340  unsigned KCondReg = CondReg;
2341  CondReg = createResultReg(&X86::GR32RegClass);
2342  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2343  TII.get(TargetOpcode::COPY), CondReg)
2344  .addReg(KCondReg, getKillRegState(CondIsKill));
2345  CondReg = fastEmitInst_extractsubreg(MVT::i8, CondReg, /*Kill=*/true,
2346  X86::sub_8bit);
2347  }
2348  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::TEST8ri))
2349  .addReg(CondReg, getKillRegState(CondIsKill))
2350  .addImm(1);
2351  }
2352 
2353  const Value *LHS = I->getOperand(1);
2354  const Value *RHS = I->getOperand(2);
2355 
2356  unsigned LHSReg = getRegForValue(LHS);
2357  bool LHSIsKill = hasTrivialKill(LHS);
2358 
2359  unsigned RHSReg = getRegForValue(RHS);
2360  bool RHSIsKill = hasTrivialKill(RHS);
2361 
2362  if (!LHSReg || !RHSReg)
2363  return false;
2364 
2365  const TargetRegisterClass *RC = TLI.getRegClassFor(RetVT);
2366 
2367  unsigned ResultReg =
2368  fastEmitInst_rri(Opc, RC, RHSReg, RHSIsKill, LHSReg, LHSIsKill, CC);
2369  updateValueMap(I, ResultReg);
2370  return true;
2371 }
2372 
2373 bool X86FastISel::X86SelectSelect(const Instruction *I) {
2374  MVT RetVT;
2375  if (!isTypeLegal(I->getType(), RetVT))
2376  return false;
2377 
2378  // Check if we can fold the select.
2379  if (const auto *CI = dyn_cast<CmpInst>(I->getOperand(0))) {
2380  CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
2381  const Value *Opnd = nullptr;
2382  switch (Predicate) {
2383  default: break;
2384  case CmpInst::FCMP_FALSE: Opnd = I->getOperand(2); break;
2385  case CmpInst::FCMP_TRUE: Opnd = I->getOperand(1); break;
2386  }
2387  // No need for a select anymore - this is an unconditional move.
2388  if (Opnd) {
2389  unsigned OpReg = getRegForValue(Opnd);
2390  if (OpReg == 0)
2391  return false;
2392  bool OpIsKill = hasTrivialKill(Opnd);
2393  const TargetRegisterClass *RC = TLI.getRegClassFor(RetVT);
2394  unsigned ResultReg = createResultReg(RC);
2395  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2396  TII.get(TargetOpcode::COPY), ResultReg)
2397  .addReg(OpReg, getKillRegState(OpIsKill));
2398  updateValueMap(I, ResultReg);
2399  return true;
2400  }
2401  }
2402 
2403  // First try to use real conditional move instructions.
2404  if (X86FastEmitCMoveSelect(RetVT, I))
2405  return true;
2406 
2407  // Try to use a sequence of SSE instructions to simulate a conditional move.
2408  if (X86FastEmitSSESelect(RetVT, I))
2409  return true;
2410 
2411  // Fall-back to pseudo conditional move instructions, which will be later
2412  // converted to control-flow.
2413  if (X86FastEmitPseudoSelect(RetVT, I))
2414  return true;
2415 
2416  return false;
2417 }
2418 
2419 // Common code for X86SelectSIToFP and X86SelectUIToFP.
2420 bool X86FastISel::X86SelectIntToFP(const Instruction *I, bool IsSigned) {
2421  // The target-independent selection algorithm in FastISel already knows how
2422  // to select a SINT_TO_FP if the target is SSE but not AVX.
2423  // Early exit if the subtarget doesn't have AVX.
2424  // Unsigned conversion requires avx512.
2425  bool HasAVX512 = Subtarget->hasAVX512();
2426  if (!Subtarget->hasAVX() || (!IsSigned && !HasAVX512))
2427  return false;
2428 
2429  // TODO: We could sign extend narrower types.
2430  MVT SrcVT = TLI.getSimpleValueType(DL, I->getOperand(0)->getType());
2431  if (SrcVT != MVT::i32 && SrcVT != MVT::i64)
2432  return false;
2433 
2434  // Select integer to float/double conversion.
2435  unsigned OpReg = getRegForValue(I->getOperand(0));
2436  if (OpReg == 0)
2437  return false;
2438 
2439  unsigned Opcode;
2440 
2441  static const uint16_t SCvtOpc[2][2][2] = {
2442  { { X86::VCVTSI2SSrr, X86::VCVTSI642SSrr },
2443  { X86::VCVTSI2SDrr, X86::VCVTSI642SDrr } },
2444  { { X86::VCVTSI2SSZrr, X86::VCVTSI642SSZrr },
2445  { X86::VCVTSI2SDZrr, X86::VCVTSI642SDZrr } },
2446  };
2447  static const uint16_t UCvtOpc[2][2] = {
2448  { X86::VCVTUSI2SSZrr, X86::VCVTUSI642SSZrr },
2449  { X86::VCVTUSI2SDZrr, X86::VCVTUSI642SDZrr },
2450  };
2451  bool Is64Bit = SrcVT == MVT::i64;
2452 
2453  if (I->getType()->isDoubleTy()) {
2454  // s/uitofp int -> double
2455  Opcode = IsSigned ? SCvtOpc[HasAVX512][1][Is64Bit] : UCvtOpc[1][Is64Bit];
2456  } else if (I->getType()->isFloatTy()) {
2457  // s/uitofp int -> float
2458  Opcode = IsSigned ? SCvtOpc[HasAVX512][0][Is64Bit] : UCvtOpc[0][Is64Bit];
2459  } else
2460  return false;
2461 
2462  MVT DstVT = TLI.getValueType(DL, I->getType()).getSimpleVT();
2463  const TargetRegisterClass *RC = TLI.getRegClassFor(DstVT);
2464  unsigned ImplicitDefReg = createResultReg(RC);
2465  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2466  TII.get(TargetOpcode::IMPLICIT_DEF), ImplicitDefReg);
2467  unsigned ResultReg =
2468  fastEmitInst_rr(Opcode, RC, ImplicitDefReg, true, OpReg, false);
2469  updateValueMap(I, ResultReg);
2470  return true;
2471 }
2472 
2473 bool X86FastISel::X86SelectSIToFP(const Instruction *I) {
2474  return X86SelectIntToFP(I, /*IsSigned*/true);
2475 }
2476 
2477 bool X86FastISel::X86SelectUIToFP(const Instruction *I) {
2478  return X86SelectIntToFP(I, /*IsSigned*/false);
2479 }
2480 
2481 // Helper method used by X86SelectFPExt and X86SelectFPTrunc.
2482 bool X86FastISel::X86SelectFPExtOrFPTrunc(const Instruction *I,
2483  unsigned TargetOpc,
2484  const TargetRegisterClass *RC) {
2485  assert((I->getOpcode() == Instruction::FPExt ||
2486  I->getOpcode() == Instruction::FPTrunc) &&
2487  "Instruction must be an FPExt or FPTrunc!");
2488 
2489  unsigned OpReg = getRegForValue(I->getOperand(0));
2490  if (OpReg == 0)
2491  return false;
2492 
2493  unsigned ImplicitDefReg;
2494  if (Subtarget->hasAVX()) {
2495  ImplicitDefReg = createResultReg(RC);
2496  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2497  TII.get(TargetOpcode::IMPLICIT_DEF), ImplicitDefReg);
2498 
2499  }
2500 
2501  unsigned ResultReg = createResultReg(RC);
2502  MachineInstrBuilder MIB;
2503  MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpc),
2504  ResultReg);
2505 
2506  if (Subtarget->hasAVX())
2507  MIB.addReg(ImplicitDefReg);
2508 
2509  MIB.addReg(OpReg);
2510  updateValueMap(I, ResultReg);
2511  return true;
2512 }
2513 
2514 bool X86FastISel::X86SelectFPExt(const Instruction *I) {
2515  if (X86ScalarSSEf64 && I->getType()->isDoubleTy() &&
2516  I->getOperand(0)->getType()->isFloatTy()) {
2517  bool HasAVX512 = Subtarget->hasAVX512();
2518  // fpext from float to double.
2519  unsigned Opc =
2520  HasAVX512 ? X86::VCVTSS2SDZrr
2521  : Subtarget->hasAVX() ? X86::VCVTSS2SDrr : X86::CVTSS2SDrr;
2522  return X86SelectFPExtOrFPTrunc(
2523  I, Opc, HasAVX512 ? &X86::FR64XRegClass : &X86::FR64RegClass);
2524  }
2525 
2526  return false;
2527 }
2528 
2529 bool X86FastISel::X86SelectFPTrunc(const Instruction *I) {
2530  if (X86ScalarSSEf64 && I->getType()->isFloatTy() &&
2531  I->getOperand(0)->getType()->isDoubleTy()) {
2532  bool HasAVX512 = Subtarget->hasAVX512();
2533  // fptrunc from double to float.
2534  unsigned Opc =
2535  HasAVX512 ? X86::VCVTSD2SSZrr
2536  : Subtarget->hasAVX() ? X86::VCVTSD2SSrr : X86::CVTSD2SSrr;
2537  return X86SelectFPExtOrFPTrunc(
2538  I, Opc, HasAVX512 ? &X86::FR32XRegClass : &X86::FR32RegClass);
2539  }
2540 
2541  return false;
2542 }
2543 
2544 bool X86FastISel::X86SelectTrunc(const Instruction *I) {
2545  EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType());
2546  EVT DstVT = TLI.getValueType(DL, I->getType());
2547 
2548  // This code only handles truncation to byte.
2549  if (DstVT != MVT::i8 && DstVT != MVT::i1)
2550  return false;
2551  if (!TLI.isTypeLegal(SrcVT))
2552  return false;
2553 
2554  unsigned InputReg = getRegForValue(I->getOperand(0));
2555  if (!InputReg)
2556  // Unhandled operand. Halt "fast" selection and bail.
2557  return false;
2558 
2559  if (SrcVT == MVT::i8) {
2560  // Truncate from i8 to i1; no code needed.
2561  updateValueMap(I, InputReg);
2562  return true;
2563  }
2564 
2565  // Issue an extract_subreg.
2566  unsigned ResultReg = fastEmitInst_extractsubreg(MVT::i8,
2567  InputReg, false,
2568  X86::sub_8bit);
2569  if (!ResultReg)
2570  return false;
2571 
2572  updateValueMap(I, ResultReg);
2573  return true;
2574 }
2575 
2576 bool X86FastISel::IsMemcpySmall(uint64_t Len) {
2577  return Len <= (Subtarget->is64Bit() ? 32 : 16);
2578 }
2579 
2580 bool X86FastISel::TryEmitSmallMemcpy(X86AddressMode DestAM,
2581  X86AddressMode SrcAM, uint64_t Len) {
2582 
2583  // Make sure we don't bloat code by inlining very large memcpy's.
2584  if (!IsMemcpySmall(Len))
2585  return false;
2586 
2587  bool i64Legal = Subtarget->is64Bit();
2588 
2589  // We don't care about alignment here since we just emit integer accesses.
2590  while (Len) {
2591  MVT VT;
2592  if (Len >= 8 && i64Legal)
2593  VT = MVT::i64;
2594  else if (Len >= 4)
2595  VT = MVT::i32;
2596  else if (Len >= 2)
2597  VT = MVT::i16;
2598  else
2599  VT = MVT::i8;
2600 
2601  unsigned Reg;
2602  bool RV = X86FastEmitLoad(VT, SrcAM, nullptr, Reg);
2603  RV &= X86FastEmitStore(VT, Reg, /*Kill=*/true, DestAM);
2604  assert(RV && "Failed to emit load or store??");
2605 
2606  unsigned Size = VT.getSizeInBits()/8;
2607  Len -= Size;
2608  DestAM.Disp += Size;
2609  SrcAM.Disp += Size;
2610  }
2611 
2612  return true;
2613 }
2614 
2615 bool X86FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
2616  // FIXME: Handle more intrinsics.
2617  switch (II->getIntrinsicID()) {
2618  default: return false;
2619  case Intrinsic::convert_from_fp16:
2620  case Intrinsic::convert_to_fp16: {
2621  if (Subtarget->useSoftFloat() || !Subtarget->hasF16C())
2622  return false;
2623 
2624  const Value *Op = II->getArgOperand(0);
2625  unsigned InputReg = getRegForValue(Op);
2626  if (InputReg == 0)
2627  return false;
2628 
2629  // F16C only allows converting from float to half and from half to float.
2630  bool IsFloatToHalf = II->getIntrinsicID() == Intrinsic::convert_to_fp16;
2631  if (IsFloatToHalf) {
2632  if (!Op->getType()->isFloatTy())
2633  return false;
2634  } else {
2635  if (!II->getType()->isFloatTy())
2636  return false;
2637  }
2638 
2639  unsigned ResultReg = 0;
2640  const TargetRegisterClass *RC = TLI.getRegClassFor(MVT::v8i16);
2641  if (IsFloatToHalf) {
2642  // 'InputReg' is implicitly promoted from register class FR32 to
2643  // register class VR128 by method 'constrainOperandRegClass' which is
2644  // directly called by 'fastEmitInst_ri'.
2645  // Instruction VCVTPS2PHrr takes an extra immediate operand which is
2646  // used to provide rounding control: use MXCSR.RC, encoded as 0b100.
2647  // It's consistent with the other FP instructions, which are usually
2648  // controlled by MXCSR.
2649  InputReg = fastEmitInst_ri(X86::VCVTPS2PHrr, RC, InputReg, false, 4);
2650 
2651  // Move the lower 32-bits of ResultReg to another register of class GR32.
2652  ResultReg = createResultReg(&X86::GR32RegClass);
2653  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2654  TII.get(X86::VMOVPDI2DIrr), ResultReg)
2655  .addReg(InputReg, RegState::Kill);
2656 
2657  // The result value is in the lower 16-bits of ResultReg.
2658  unsigned RegIdx = X86::sub_16bit;
2659  ResultReg = fastEmitInst_extractsubreg(MVT::i16, ResultReg, true, RegIdx);
2660  } else {
2661  assert(Op->getType()->isIntegerTy(16) && "Expected a 16-bit integer!");
2662  // Explicitly sign-extend the input to 32-bit.
2663  InputReg = fastEmit_r(MVT::i16, MVT::i32, ISD::SIGN_EXTEND, InputReg,
2664  /*Kill=*/false);
2665 
2666  // The following SCALAR_TO_VECTOR will be expanded into a VMOVDI2PDIrr.
2667  InputReg = fastEmit_r(MVT::i32, MVT::v4i32, ISD::SCALAR_TO_VECTOR,
2668  InputReg, /*Kill=*/true);
2669 
2670  InputReg = fastEmitInst_r(X86::VCVTPH2PSrr, RC, InputReg, /*Kill=*/true);
2671 
2672  // The result value is in the lower 32-bits of ResultReg.
2673  // Emit an explicit copy from register class VR128 to register class FR32.
2674  ResultReg = createResultReg(&X86::FR32RegClass);
2675  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2676  TII.get(TargetOpcode::COPY), ResultReg)
2677  .addReg(InputReg, RegState::Kill);
2678  }
2679 
2680  updateValueMap(II, ResultReg);
2681  return true;
2682  }
2683  case Intrinsic::frameaddress: {
2684  MachineFunction *MF = FuncInfo.MF;
2685  if (MF->getTarget().getMCAsmInfo()->usesWindowsCFI())
2686  return false;
2687 
2688  Type *RetTy = II->getCalledFunction()->getReturnType();
2689 
2690  MVT VT;
2691  if (!isTypeLegal(RetTy, VT))
2692  return false;
2693 
2694  unsigned Opc;
2695  const TargetRegisterClass *RC = nullptr;
2696 
2697  switch (VT.SimpleTy) {
2698  default: llvm_unreachable("Invalid result type for frameaddress.");
2699  case MVT::i32: Opc = X86::MOV32rm; RC = &X86::GR32RegClass; break;
2700  case MVT::i64: Opc = X86::MOV64rm; RC = &X86::GR64RegClass; break;
2701  }
2702 
2703  // This needs to be set before we call getPtrSizedFrameRegister, otherwise
2704  // we get the wrong frame register.
2705  MachineFrameInfo &MFI = MF->getFrameInfo();
2706  MFI.setFrameAddressIsTaken(true);
2707 
2708  const X86RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
2709  unsigned FrameReg = RegInfo->getPtrSizedFrameRegister(*MF);
2710  assert(((FrameReg == X86::RBP && VT == MVT::i64) ||
2711  (FrameReg == X86::EBP && VT == MVT::i32)) &&
2712  "Invalid Frame Register!");
2713 
2714  // Always make a copy of the frame register to a vreg first, so that we
2715  // never directly reference the frame register (the TwoAddressInstruction-
2716  // Pass doesn't like that).
2717  unsigned SrcReg = createResultReg(RC);
2718  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2719  TII.get(TargetOpcode::COPY), SrcReg).addReg(FrameReg);
2720 
2721  // Now recursively load from the frame address.
2722  // movq (%rbp), %rax
2723  // movq (%rax), %rax
2724  // movq (%rax), %rax
2725  // ...
2726  unsigned DestReg;
2727  unsigned Depth = cast<ConstantInt>(II->getOperand(0))->getZExtValue();
2728  while (Depth--) {
2729  DestReg = createResultReg(RC);
2730  addDirectMem(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2731  TII.get(Opc), DestReg), SrcReg);
2732  SrcReg = DestReg;
2733  }
2734 
2735  updateValueMap(II, SrcReg);
2736  return true;
2737  }
2738  case Intrinsic::memcpy: {
2739  const MemCpyInst *MCI = cast<MemCpyInst>(II);
2740  // Don't handle volatile or variable length memcpys.
2741  if (MCI->isVolatile())
2742  return false;
2743 
2744  if (isa<ConstantInt>(MCI->getLength())) {
2745  // Small memcpy's are common enough that we want to do them
2746  // without a call if possible.
2747  uint64_t Len = cast<ConstantInt>(MCI->getLength())->getZExtValue();
2748  if (IsMemcpySmall(Len)) {
2749  X86AddressMode DestAM, SrcAM;
2750  if (!X86SelectAddress(MCI->getRawDest(), DestAM) ||
2751  !X86SelectAddress(MCI->getRawSource(), SrcAM))
2752  return false;
2753  TryEmitSmallMemcpy(DestAM, SrcAM, Len);
2754  return true;
2755  }
2756  }
2757 
2758  unsigned SizeWidth = Subtarget->is64Bit() ? 64 : 32;
2759  if (!MCI->getLength()->getType()->isIntegerTy(SizeWidth))
2760  return false;
2761 
2762  if (MCI->getSourceAddressSpace() > 255 || MCI->getDestAddressSpace() > 255)
2763  return false;
2764 
2765  return lowerCallTo(II, "memcpy", II->getNumArgOperands() - 1);
2766  }
2767  case Intrinsic::memset: {
2768  const MemSetInst *MSI = cast<MemSetInst>(II);
2769 
2770  if (MSI->isVolatile())
2771  return false;
2772 
2773  unsigned SizeWidth = Subtarget->is64Bit() ? 64 : 32;
2774  if (!MSI->getLength()->getType()->isIntegerTy(SizeWidth))
2775  return false;
2776 
2777  if (MSI->getDestAddressSpace() > 255)
2778  return false;
2779 
2780  return lowerCallTo(II, "memset", II->getNumArgOperands() - 1);
2781  }
2782  case Intrinsic::stackprotector: {
2783  // Emit code to store the stack guard onto the stack.
2784  EVT PtrTy = TLI.getPointerTy(DL);
2785 
2786  const Value *Op1 = II->getArgOperand(0); // The guard's value.
2787  const AllocaInst *Slot = cast<AllocaInst>(II->getArgOperand(1));
2788 
2789  MFI.setStackProtectorIndex(FuncInfo.StaticAllocaMap[Slot]);
2790 
2791  // Grab the frame index.
2792  X86AddressMode AM;
2793  if (!X86SelectAddress(Slot, AM)) return false;
2794  if (!X86FastEmitStore(PtrTy, Op1, AM)) return false;
2795  return true;
2796  }
2797  case Intrinsic::dbg_declare: {
2798  const DbgDeclareInst *DI = cast<DbgDeclareInst>(II);
2799  X86AddressMode AM;
2800  assert(DI->getAddress() && "Null address should be checked earlier!");
2801  if (!X86SelectAddress(DI->getAddress(), AM))
2802  return false;
2803  const MCInstrDesc &II = TII.get(TargetOpcode::DBG_VALUE);
2804  // FIXME may need to add RegState::Debug to any registers produced,
2805  // although ESP/EBP should be the only ones at the moment.
2807  "Expected inlined-at fields to agree");
2808  addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II), AM)
2809  .addImm(0)
2810  .addMetadata(DI->getVariable())
2811  .addMetadata(DI->getExpression());
2812  return true;
2813  }
2814  case Intrinsic::trap: {
2815  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::TRAP));
2816  return true;
2817  }
2818  case Intrinsic::sqrt: {
2819  if (!Subtarget->hasSSE1())
2820  return false;
2821 
2822  Type *RetTy = II->getCalledFunction()->getReturnType();
2823 
2824  MVT VT;
2825  if (!isTypeLegal(RetTy, VT))
2826  return false;
2827 
2828  // Unfortunately we can't use fastEmit_r, because the AVX version of FSQRT
2829  // is not generated by FastISel yet.
2830  // FIXME: Update this code once tablegen can handle it.
2831  static const uint16_t SqrtOpc[3][2] = {
2832  { X86::SQRTSSr, X86::SQRTSDr },
2833  { X86::VSQRTSSr, X86::VSQRTSDr },
2834  { X86::VSQRTSSZr, X86::VSQRTSDZr },
2835  };
2836  unsigned AVXLevel = Subtarget->hasAVX512() ? 2 :
2837  Subtarget->hasAVX() ? 1 :
2838  0;
2839  unsigned Opc;
2840  switch (VT.SimpleTy) {
2841  default: return false;
2842  case MVT::f32: Opc = SqrtOpc[AVXLevel][0]; break;
2843  case MVT::f64: Opc = SqrtOpc[AVXLevel][1]; break;
2844  }
2845 
2846  const Value *SrcVal = II->getArgOperand(0);
2847  unsigned SrcReg = getRegForValue(SrcVal);
2848 
2849  if (SrcReg == 0)
2850  return false;
2851 
2852  const TargetRegisterClass *RC = TLI.getRegClassFor(VT);
2853  unsigned ImplicitDefReg = 0;
2854  if (AVXLevel > 0) {
2855  ImplicitDefReg = createResultReg(RC);
2856  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2857  TII.get(TargetOpcode::IMPLICIT_DEF), ImplicitDefReg);
2858  }
2859 
2860  unsigned ResultReg = createResultReg(RC);
2861  MachineInstrBuilder MIB;
2862  MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc),
2863  ResultReg);
2864 
2865  if (ImplicitDefReg)
2866  MIB.addReg(ImplicitDefReg);
2867 
2868  MIB.addReg(SrcReg);
2869 
2870  updateValueMap(II, ResultReg);
2871  return true;
2872  }
2873  case Intrinsic::sadd_with_overflow:
2874  case Intrinsic::uadd_with_overflow:
2875  case Intrinsic::ssub_with_overflow:
2876  case Intrinsic::usub_with_overflow:
2877  case Intrinsic::smul_with_overflow:
2878  case Intrinsic::umul_with_overflow: {
2879  // This implements the basic lowering of the xalu with overflow intrinsics
2880  // into add/sub/mul followed by either seto or setb.
2881  const Function *Callee = II->getCalledFunction();
2882  auto *Ty = cast<StructType>(Callee->getReturnType());
2883  Type *RetTy = Ty->getTypeAtIndex(0U);
2884  assert(Ty->getTypeAtIndex(1)->isIntegerTy() &&
2885  Ty->getTypeAtIndex(1)->getScalarSizeInBits() == 1 &&
2886  "Overflow value expected to be an i1");
2887 
2888  MVT VT;
2889  if (!isTypeLegal(RetTy, VT))
2890  return false;
2891 
2892  if (VT < MVT::i8 || VT > MVT::i64)
2893  return false;
2894 
2895  const Value *LHS = II->getArgOperand(0);
2896  const Value *RHS = II->getArgOperand(1);
2897 
2898  // Canonicalize immediate to the RHS.
2899  if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS) &&
2900  isCommutativeIntrinsic(II))
2901  std::swap(LHS, RHS);
2902 
2903  bool UseIncDec = false;
2904  if (isa<ConstantInt>(RHS) && cast<ConstantInt>(RHS)->isOne())
2905  UseIncDec = true;
2906 
2907  unsigned BaseOpc, CondOpc;
2908  switch (II->getIntrinsicID()) {
2909  default: llvm_unreachable("Unexpected intrinsic!");
2910  case Intrinsic::sadd_with_overflow:
2911  BaseOpc = UseIncDec ? unsigned(X86ISD::INC) : unsigned(ISD::ADD);
2912  CondOpc = X86::SETOr;
2913  break;
2914  case Intrinsic::uadd_with_overflow:
2915  BaseOpc = ISD::ADD; CondOpc = X86::SETBr; break;
2916  case Intrinsic::ssub_with_overflow:
2917  BaseOpc = UseIncDec ? unsigned(X86ISD::DEC) : unsigned(ISD::SUB);
2918  CondOpc = X86::SETOr;
2919  break;
2920  case Intrinsic::usub_with_overflow:
2921  BaseOpc = ISD::SUB; CondOpc = X86::SETBr; break;
2922  case Intrinsic::smul_with_overflow:
2923  BaseOpc = X86ISD::SMUL; CondOpc = X86::SETOr; break;
2924  case Intrinsic::umul_with_overflow:
2925  BaseOpc = X86ISD::UMUL; CondOpc = X86::SETOr; break;
2926  }
2927 
2928  unsigned LHSReg = getRegForValue(LHS);
2929  if (LHSReg == 0)
2930  return false;
2931  bool LHSIsKill = hasTrivialKill(LHS);
2932 
2933  unsigned ResultReg = 0;
2934  // Check if we have an immediate version.
2935  if (const auto *CI = dyn_cast<ConstantInt>(RHS)) {
2936  static const uint16_t Opc[2][4] = {
2937  { X86::INC8r, X86::INC16r, X86::INC32r, X86::INC64r },
2938  { X86::DEC8r, X86::DEC16r, X86::DEC32r, X86::DEC64r }
2939  };
2940 
2941  if (BaseOpc == X86ISD::INC || BaseOpc == X86ISD::DEC) {
2942  ResultReg = createResultReg(TLI.getRegClassFor(VT));
2943  bool IsDec = BaseOpc == X86ISD::DEC;
2944  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2945  TII.get(Opc[IsDec][VT.SimpleTy-MVT::i8]), ResultReg)
2946  .addReg(LHSReg, getKillRegState(LHSIsKill));
2947  } else
2948  ResultReg = fastEmit_ri(VT, VT, BaseOpc, LHSReg, LHSIsKill,
2949  CI->getZExtValue());
2950  }
2951 
2952  unsigned RHSReg;
2953  bool RHSIsKill;
2954  if (!ResultReg) {
2955  RHSReg = getRegForValue(RHS);
2956  if (RHSReg == 0)
2957  return false;
2958  RHSIsKill = hasTrivialKill(RHS);
2959  ResultReg = fastEmit_rr(VT, VT, BaseOpc, LHSReg, LHSIsKill, RHSReg,
2960  RHSIsKill);
2961  }
2962 
2963  // FastISel doesn't have a pattern for all X86::MUL*r and X86::IMUL*r. Emit
2964  // it manually.
2965  if (BaseOpc == X86ISD::UMUL && !ResultReg) {
2966  static const uint16_t MULOpc[] =
2967  { X86::MUL8r, X86::MUL16r, X86::MUL32r, X86::MUL64r };
2968  static const MCPhysReg Reg[] = { X86::AL, X86::AX, X86::EAX, X86::RAX };
2969  // First copy the first operand into RAX, which is an implicit input to
2970  // the X86::MUL*r instruction.
2971  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2972  TII.get(TargetOpcode::COPY), Reg[VT.SimpleTy-MVT::i8])
2973  .addReg(LHSReg, getKillRegState(LHSIsKill));
2974  ResultReg = fastEmitInst_r(MULOpc[VT.SimpleTy-MVT::i8],
2975  TLI.getRegClassFor(VT), RHSReg, RHSIsKill);
2976  } else if (BaseOpc == X86ISD::SMUL && !ResultReg) {
2977  static const uint16_t MULOpc[] =
2978  { X86::IMUL8r, X86::IMUL16rr, X86::IMUL32rr, X86::IMUL64rr };
2979  if (VT == MVT::i8) {
2980  // Copy the first operand into AL, which is an implicit input to the
2981  // X86::IMUL8r instruction.
2982  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2983  TII.get(TargetOpcode::COPY), X86::AL)
2984  .addReg(LHSReg, getKillRegState(LHSIsKill));
2985  ResultReg = fastEmitInst_r(MULOpc[0], TLI.getRegClassFor(VT), RHSReg,
2986  RHSIsKill);
2987  } else
2988  ResultReg = fastEmitInst_rr(MULOpc[VT.SimpleTy-MVT::i8],
2989  TLI.getRegClassFor(VT), LHSReg, LHSIsKill,
2990  RHSReg, RHSIsKill);
2991  }
2992 
2993  if (!ResultReg)
2994  return false;
2995 
2996  // Assign to a GPR since the overflow return value is lowered to a SETcc.
2997  unsigned ResultReg2 = createResultReg(&X86::GR8RegClass);
2998  assert((ResultReg+1) == ResultReg2 && "Nonconsecutive result registers.");
2999  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CondOpc),
3000  ResultReg2);
3001 
3002  updateValueMap(II, ResultReg, 2);
3003  return true;
3004  }
3005  case Intrinsic::x86_sse_cvttss2si:
3006  case Intrinsic::x86_sse_cvttss2si64:
3007  case Intrinsic::x86_sse2_cvttsd2si:
3008  case Intrinsic::x86_sse2_cvttsd2si64: {
3009  bool IsInputDouble;
3010  switch (II->getIntrinsicID()) {
3011  default: llvm_unreachable("Unexpected intrinsic.");
3012  case Intrinsic::x86_sse_cvttss2si:
3013  case Intrinsic::x86_sse_cvttss2si64:
3014  if (!Subtarget->hasSSE1())
3015  return false;
3016  IsInputDouble = false;
3017  break;
3018  case Intrinsic::x86_sse2_cvttsd2si:
3019  case Intrinsic::x86_sse2_cvttsd2si64:
3020  if (!Subtarget->hasSSE2())
3021  return false;
3022  IsInputDouble = true;
3023  break;
3024  }
3025 
3026  Type *RetTy = II->getCalledFunction()->getReturnType();
3027  MVT VT;
3028  if (!isTypeLegal(RetTy, VT))
3029  return false;
3030 
3031  static const uint16_t CvtOpc[3][2][2] = {
3032  { { X86::CVTTSS2SIrr, X86::CVTTSS2SI64rr },
3033  { X86::CVTTSD2SIrr, X86::CVTTSD2SI64rr } },
3034  { { X86::VCVTTSS2SIrr, X86::VCVTTSS2SI64rr },
3035  { X86::VCVTTSD2SIrr, X86::VCVTTSD2SI64rr } },
3036  { { X86::VCVTTSS2SIZrr, X86::VCVTTSS2SI64Zrr },
3037  { X86::VCVTTSD2SIZrr, X86::VCVTTSD2SI64Zrr } },
3038  };
3039  unsigned AVXLevel = Subtarget->hasAVX512() ? 2 :
3040  Subtarget->hasAVX() ? 1 :
3041  0;
3042  unsigned Opc;
3043  switch (VT.SimpleTy) {
3044  default: llvm_unreachable("Unexpected result type.");
3045  case MVT::i32: Opc = CvtOpc[AVXLevel][IsInputDouble][0]; break;
3046  case MVT::i64: Opc = CvtOpc[AVXLevel][IsInputDouble][1]; break;
3047  }
3048 
3049  // Check if we can fold insertelement instructions into the convert.
3050  const Value *Op = II->getArgOperand(0);
3051  while (auto *IE = dyn_cast<InsertElementInst>(Op)) {
3052  const Value *Index = IE->getOperand(2);
3053  if (!isa<ConstantInt>(Index))
3054  break;
3055  unsigned Idx = cast<ConstantInt>(Index)->getZExtValue();
3056 
3057  if (Idx == 0) {
3058  Op = IE->getOperand(1);
3059  break;
3060  }
3061  Op = IE->getOperand(0);
3062  }
3063 
3064  unsigned Reg = getRegForValue(Op);
3065  if (Reg == 0)
3066  return false;
3067 
3068  unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
3069  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
3070  .addReg(Reg);
3071 
3072  updateValueMap(II, ResultReg);
3073  return true;
3074  }
3075  }
3076 }
3077 
3078 bool X86FastISel::fastLowerArguments() {
3079  if (!FuncInfo.CanLowerReturn)
3080  return false;
3081 
3082  const Function *F = FuncInfo.Fn;
3083  if (F->isVarArg())
3084  return false;
3085 
3086  CallingConv::ID CC = F->getCallingConv();
3087  if (CC != CallingConv::C)
3088  return false;
3089 
3090  if (Subtarget->isCallingConvWin64(CC))
3091  return false;
3092 
3093  if (!Subtarget->is64Bit())
3094  return false;
3095 
3096  if (Subtarget->useSoftFloat())
3097  return false;
3098 
3099  // Only handle simple cases. i.e. Up to 6 i32/i64 scalar arguments.
3100  unsigned GPRCnt = 0;
3101  unsigned FPRCnt = 0;
3102  for (auto const &Arg : F->args()) {
3103  if (Arg.hasAttribute(Attribute::ByVal) ||
3104  Arg.hasAttribute(Attribute::InReg) ||
3105  Arg.hasAttribute(Attribute::StructRet) ||
3106  Arg.hasAttribute(Attribute::SwiftSelf) ||
3107  Arg.hasAttribute(Attribute::SwiftError) ||
3108  Arg.hasAttribute(Attribute::Nest))
3109  return false;
3110 
3111  Type *ArgTy = Arg.getType();
3112  if (ArgTy->isStructTy() || ArgTy->isArrayTy() || ArgTy->isVectorTy())
3113  return false;
3114 
3115  EVT ArgVT = TLI.getValueType(DL, ArgTy);
3116  if (!ArgVT.isSimple()) return false;
3117  switch (ArgVT.getSimpleVT().SimpleTy) {
3118  default: return false;
3119  case MVT::i32:
3120  case MVT::i64:
3121  ++GPRCnt;
3122  break;
3123  case MVT::f32:
3124  case MVT::f64:
3125  if (!Subtarget->hasSSE1())
3126  return false;
3127  ++FPRCnt;
3128  break;
3129  }
3130 
3131  if (GPRCnt > 6)
3132  return false;
3133 
3134  if (FPRCnt > 8)
3135  return false;
3136  }
3137 
3138  static const MCPhysReg GPR32ArgRegs[] = {
3139  X86::EDI, X86::ESI, X86::EDX, X86::ECX, X86::R8D, X86::R9D
3140  };
3141  static const MCPhysReg GPR64ArgRegs[] = {
3142  X86::RDI, X86::RSI, X86::RDX, X86::RCX, X86::R8 , X86::R9
3143  };
3144  static const MCPhysReg XMMArgRegs[] = {
3145  X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
3146  X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
3147  };
3148 
3149  unsigned GPRIdx = 0;
3150  unsigned FPRIdx = 0;
3151  for (auto const &Arg : F->args()) {
3152  MVT VT = TLI.getSimpleValueType(DL, Arg.getType());
3153  const TargetRegisterClass *RC = TLI.getRegClassFor(VT);
3154  unsigned SrcReg;
3155  switch (VT.SimpleTy) {
3156  default: llvm_unreachable("Unexpected value type.");
3157  case MVT::i32: SrcReg = GPR32ArgRegs[GPRIdx++]; break;
3158  case MVT::i64: SrcReg = GPR64ArgRegs[GPRIdx++]; break;
3159  case MVT::f32: LLVM_FALLTHROUGH;
3160  case MVT::f64: SrcReg = XMMArgRegs[FPRIdx++]; break;
3161  }
3162  unsigned DstReg = FuncInfo.MF->addLiveIn(SrcReg, RC);
3163  // FIXME: Unfortunately it's necessary to emit a copy from the livein copy.
3164  // Without this, EmitLiveInCopies may eliminate the livein if its only
3165  // use is a bitcast (which isn't turned into an instruction).
3166  unsigned ResultReg = createResultReg(RC);
3167  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3168  TII.get(TargetOpcode::COPY), ResultReg)
3169  .addReg(DstReg, getKillRegState(true));
3170  updateValueMap(&Arg, ResultReg);
3171  }
3172  return true;
3173 }
3174 
3175 static unsigned computeBytesPoppedByCalleeForSRet(const X86Subtarget *Subtarget,
3176  CallingConv::ID CC,
3177  ImmutableCallSite *CS) {
3178  if (Subtarget->is64Bit())
3179  return 0;
3180  if (Subtarget->getTargetTriple().isOSMSVCRT())
3181  return 0;
3182  if (CC == CallingConv::Fast || CC == CallingConv::GHC ||
3183  CC == CallingConv::HiPE)
3184  return 0;
3185 
3186  if (CS)
3187  if (CS->arg_empty() || !CS->paramHasAttr(0, Attribute::StructRet) ||
3188  CS->paramHasAttr(0, Attribute::InReg) || Subtarget->isTargetMCU())
3189  return 0;
3190 
3191  return 4;
3192 }
3193 
3194 bool X86FastISel::fastLowerCall(CallLoweringInfo &CLI) {
3195  auto &OutVals = CLI.OutVals;
3196  auto &OutFlags = CLI.OutFlags;
3197  auto &OutRegs = CLI.OutRegs;
3198  auto &Ins = CLI.Ins;
3199  auto &InRegs = CLI.InRegs;
3200  CallingConv::ID CC = CLI.CallConv;
3201  bool &IsTailCall = CLI.IsTailCall;
3202  bool IsVarArg = CLI.IsVarArg;
3203  const Value *Callee = CLI.Callee;
3204  MCSymbol *Symbol = CLI.Symbol;
3205 
3206  bool Is64Bit = Subtarget->is64Bit();
3207  bool IsWin64 = Subtarget->isCallingConvWin64(CC);
3208 
3209  const CallInst *CI =
3210  CLI.CS ? dyn_cast<CallInst>(CLI.CS->getInstruction()) : nullptr;
3211  const Function *CalledFn = CI ? CI->getCalledFunction() : nullptr;
3212 
3213  // Call / invoke instructions with NoCfCheck attribute require special
3214  // handling.
3215  const auto *II =
3216  CLI.CS ? dyn_cast<InvokeInst>(CLI.CS->getInstruction()) : nullptr;
3217  if ((CI && CI->doesNoCfCheck()) || (II && II->doesNoCfCheck()))
3218  return false;
3219 
3220  // Functions with no_caller_saved_registers that need special handling.
3221  if ((CI && CI->hasFnAttr("no_caller_saved_registers")) ||
3222  (CalledFn && CalledFn->hasFnAttribute("no_caller_saved_registers")))
3223  return false;
3224 
3225  // Functions using retpoline for indirect calls need to use SDISel.
3226  if (Subtarget->useRetpolineIndirectCalls())
3227  return false;
3228 
3229  // Handle only C, fastcc, and webkit_js calling conventions for now.
3230  switch (CC) {
3231  default: return false;
3232  case CallingConv::C:
3233  case CallingConv::Fast:
3235  case CallingConv::Swift:
3239  case CallingConv::Win64:
3241  break;
3242  }
3243 
3244  // Allow SelectionDAG isel to handle tail calls.
3245  if (IsTailCall)
3246  return false;
3247 
3248  // fastcc with -tailcallopt is intended to provide a guaranteed
3249  // tail call optimization. Fastisel doesn't know how to do that.
3250  if (CC == CallingConv::Fast && TM.Options.GuaranteedTailCallOpt)
3251  return false;
3252 
3253  // Don't know how to handle Win64 varargs yet. Nothing special needed for
3254  // x86-32. Special handling for x86-64 is implemented.
3255  if (IsVarArg && IsWin64)
3256  return false;
3257 
3258  // Don't know about inalloca yet.
3259  if (CLI.CS && CLI.CS->hasInAllocaArgument())
3260  return false;
3261 
3262  for (auto Flag : CLI.OutFlags)
3263  if (Flag.isSwiftError())
3264  return false;
3265 
3266  SmallVector<MVT, 16> OutVTs;
3267  SmallVector<unsigned, 16> ArgRegs;
3268 
3269  // If this is a constant i1/i8/i16 argument, promote to i32 to avoid an extra
3270  // instruction. This is safe because it is common to all FastISel supported
3271  // calling conventions on x86.
3272  for (int i = 0, e = OutVals.size(); i != e; ++i) {
3273  Value *&Val = OutVals[i];
3274  ISD::ArgFlagsTy Flags = OutFlags[i];
3275  if (auto *CI = dyn_cast<ConstantInt>(Val)) {
3276  if (CI->getBitWidth() < 32) {
3277  if (Flags.isSExt())
3279  else
3281  }
3282  }
3283 
3284  // Passing bools around ends up doing a trunc to i1 and passing it.
3285  // Codegen this as an argument + "and 1".
3286  MVT VT;
3287  auto *TI = dyn_cast<TruncInst>(Val);
3288  unsigned ResultReg;
3289  if (TI && TI->getType()->isIntegerTy(1) && CLI.CS &&
3290  (TI->getParent() == CLI.CS->getInstruction()->getParent()) &&
3291  TI->hasOneUse()) {
3292  Value *PrevVal = TI->getOperand(0);
3293  ResultReg = getRegForValue(PrevVal);
3294 
3295  if (!ResultReg)
3296  return false;
3297 
3298  if (!isTypeLegal(PrevVal->getType(), VT))
3299  return false;
3300 
3301  ResultReg =
3302  fastEmit_ri(VT, VT, ISD::AND, ResultReg, hasTrivialKill(PrevVal), 1);
3303  } else {
3304  if (!isTypeLegal(Val->getType(), VT))
3305  return false;
3306  ResultReg = getRegForValue(Val);
3307  }
3308 
3309  if (!ResultReg)
3310  return false;
3311 
3312  ArgRegs.push_back(ResultReg);
3313  OutVTs.push_back(VT);
3314  }
3315 
3316  // Analyze operands of the call, assigning locations to each operand.
3318  CCState CCInfo(CC, IsVarArg, *FuncInfo.MF, ArgLocs, CLI.RetTy->getContext());
3319 
3320  // Allocate shadow area for Win64
3321  if (IsWin64)
3322  CCInfo.AllocateStack(32, 8);
3323 
3324  CCInfo.AnalyzeCallOperands(OutVTs, OutFlags, CC_X86);
3325 
3326  // Get a count of how many bytes are to be pushed on the stack.
3327  unsigned NumBytes = CCInfo.getAlignedCallFrameSize();
3328 
3329  // Issue CALLSEQ_START
3330  unsigned AdjStackDown = TII.getCallFrameSetupOpcode();
3331  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackDown))
3332  .addImm(NumBytes).addImm(0).addImm(0);
3333 
3334  // Walk the register/memloc assignments, inserting copies/loads.
3335  const X86RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
3336  for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
3337  CCValAssign const &VA = ArgLocs[i];
3338  const Value *ArgVal = OutVals[VA.getValNo()];
3339  MVT ArgVT = OutVTs[VA.getValNo()];
3340 
3341  if (ArgVT == MVT::x86mmx)
3342  return false;
3343 
3344  unsigned ArgReg = ArgRegs[VA.getValNo()];
3345 
3346  // Promote the value if needed.
3347  switch (VA.getLocInfo()) {
3348  case CCValAssign::Full: break;
3349  case CCValAssign::SExt: {
3350  assert(VA.getLocVT().isInteger() && !VA.getLocVT().isVector() &&
3351  "Unexpected extend");
3352 
3353  if (ArgVT == MVT::i1)
3354  return false;
3355 
3356  bool Emitted = X86FastEmitExtend(ISD::SIGN_EXTEND, VA.getLocVT(), ArgReg,
3357  ArgVT, ArgReg);
3358  assert(Emitted && "Failed to emit a sext!"); (void)Emitted;
3359  ArgVT = VA.getLocVT();
3360  break;
3361  }
3362  case CCValAssign::ZExt: {
3363  assert(VA.getLocVT().isInteger() && !VA.getLocVT().isVector() &&
3364  "Unexpected extend");
3365 
3366  // Handle zero-extension from i1 to i8, which is common.
3367  if (ArgVT == MVT::i1) {
3368  // Set the high bits to zero.
3369  ArgReg = fastEmitZExtFromI1(MVT::i8, ArgReg, /*TODO: Kill=*/false);
3370  ArgVT = MVT::i8;
3371 
3372  if (ArgReg == 0)
3373  return false;
3374  }
3375 
3376  bool Emitted = X86FastEmitExtend(ISD::ZERO_EXTEND, VA.getLocVT(), ArgReg,
3377  ArgVT, ArgReg);
3378  assert(Emitted && "Failed to emit a zext!"); (void)Emitted;
3379  ArgVT = VA.getLocVT();
3380  break;
3381  }
3382  case CCValAssign::AExt: {
3383  assert(VA.getLocVT().isInteger() && !VA.getLocVT().isVector() &&
3384  "Unexpected extend");
3385  bool Emitted = X86FastEmitExtend(ISD::ANY_EXTEND, VA.getLocVT(), ArgReg,
3386  ArgVT, ArgReg);
3387  if (!Emitted)
3388  Emitted = X86FastEmitExtend(ISD::ZERO_EXTEND, VA.getLocVT(), ArgReg,
3389  ArgVT, ArgReg);
3390  if (!Emitted)
3391  Emitted = X86FastEmitExtend(ISD::SIGN_EXTEND, VA.getLocVT(), ArgReg,
3392  ArgVT, ArgReg);
3393 
3394  assert(Emitted && "Failed to emit a aext!"); (void)Emitted;
3395  ArgVT = VA.getLocVT();
3396  break;
3397  }
3398  case CCValAssign::BCvt: {
3399  ArgReg = fastEmit_r(ArgVT, VA.getLocVT(), ISD::BITCAST, ArgReg,
3400  /*TODO: Kill=*/false);
3401  assert(ArgReg && "Failed to emit a bitcast!");
3402  ArgVT = VA.getLocVT();
3403  break;
3404  }
3405  case CCValAssign::VExt:
3406  // VExt has not been implemented, so this should be impossible to reach
3407  // for now. However, fallback to Selection DAG isel once implemented.
3408  return false;
3412  case CCValAssign::FPExt:
3413  llvm_unreachable("Unexpected loc info!");
3414  case CCValAssign::Indirect:
3415  // FIXME: Indirect doesn't need extending, but fast-isel doesn't fully
3416  // support this.
3417  return false;
3418  }
3419 
3420  if (VA.isRegLoc()) {
3421  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3422  TII.get(TargetOpcode::COPY), VA.getLocReg()).addReg(ArgReg);
3423  OutRegs.push_back(VA.getLocReg());
3424  } else {
3425  assert(VA.isMemLoc());
3426 
3427  // Don't emit stores for undef values.
3428  if (isa<UndefValue>(ArgVal))
3429  continue;
3430 
3431  unsigned LocMemOffset = VA.getLocMemOffset();
3432  X86AddressMode AM;
3433  AM.Base.Reg = RegInfo->getStackRegister();
3434  AM.Disp = LocMemOffset;
3435  ISD::ArgFlagsTy Flags = OutFlags[VA.getValNo()];
3436  unsigned Alignment = DL.getABITypeAlignment(ArgVal->getType());
3437  MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
3438  MachinePointerInfo::getStack(*FuncInfo.MF, LocMemOffset),
3439  MachineMemOperand::MOStore, ArgVT.getStoreSize(), Alignment);
3440  if (Flags.isByVal()) {
3441  X86AddressMode SrcAM;
3442  SrcAM.Base.Reg = ArgReg;
3443  if (!TryEmitSmallMemcpy(AM, SrcAM, Flags.getByValSize()))
3444  return false;
3445  } else if (isa<ConstantInt>(ArgVal) || isa<ConstantPointerNull>(ArgVal)) {
3446  // If this is a really simple value, emit this with the Value* version
3447  // of X86FastEmitStore. If it isn't simple, we don't want to do this,
3448  // as it can cause us to reevaluate the argument.
3449  if (!X86FastEmitStore(ArgVT, ArgVal, AM, MMO))
3450  return false;
3451  } else {
3452  bool ValIsKill = hasTrivialKill(ArgVal);
3453  if (!X86FastEmitStore(ArgVT, ArgReg, ValIsKill, AM, MMO))
3454  return false;
3455  }
3456  }
3457  }
3458 
3459  // ELF / PIC requires GOT in the EBX register before function calls via PLT
3460  // GOT pointer.
3461  if (Subtarget->isPICStyleGOT()) {
3462  unsigned Base = getInstrInfo()->getGlobalBaseReg(FuncInfo.MF);
3463  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3464  TII.get(TargetOpcode::COPY), X86::EBX).addReg(Base);
3465  }
3466 
3467  if (Is64Bit && IsVarArg && !IsWin64) {
3468  // From AMD64 ABI document:
3469  // For calls that may call functions that use varargs or stdargs
3470  // (prototype-less calls or calls to functions containing ellipsis (...) in
3471  // the declaration) %al is used as hidden argument to specify the number
3472  // of SSE registers used. The contents of %al do not need to match exactly
3473  // the number of registers, but must be an ubound on the number of SSE
3474  // registers used and is in the range 0 - 8 inclusive.
3475 
3476  // Count the number of XMM registers allocated.
3477  static const MCPhysReg XMMArgRegs[] = {
3478  X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
3479  X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
3480  };
3481  unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs);
3482  assert((Subtarget->hasSSE1() || !NumXMMRegs)
3483  && "SSE registers cannot be used when SSE is disabled");
3484  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::MOV8ri),
3485  X86::AL).addImm(NumXMMRegs);
3486  }
3487 
3488  // Materialize callee address in a register. FIXME: GV address can be
3489  // handled with a CALLpcrel32 instead.
3490  X86AddressMode CalleeAM;
3491  if (!X86SelectCallAddress(Callee, CalleeAM))
3492  return false;
3493 
3494  unsigned CalleeOp = 0;
3495  const GlobalValue *GV = nullptr;
3496  if (CalleeAM.GV != nullptr) {
3497  GV = CalleeAM.GV;
3498  } else if (CalleeAM.Base.Reg != 0) {
3499  CalleeOp = CalleeAM.Base.Reg;
3500  } else
3501  return false;
3502 
3503  // Issue the call.
3504  MachineInstrBuilder MIB;
3505  if (CalleeOp) {
3506  // Register-indirect call.
3507  unsigned CallOpc = Is64Bit ? X86::CALL64r : X86::CALL32r;
3508  MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CallOpc))
3509  .addReg(CalleeOp);
3510  } else {
3511  // Direct call.
3512  assert(GV && "Not a direct call");
3513  // See if we need any target-specific flags on the GV operand.
3514  unsigned char OpFlags = Subtarget->classifyGlobalFunctionReference(GV);
3515 
3516  // This will be a direct call, or an indirect call through memory for
3517  // NonLazyBind calls or dllimport calls.
3518  bool NeedLoad =
3519  OpFlags == X86II::MO_DLLIMPORT || OpFlags == X86II::MO_GOTPCREL;
3520  unsigned CallOpc = NeedLoad
3521  ? (Is64Bit ? X86::CALL64m : X86::CALL32m)
3522  : (Is64Bit ? X86::CALL64pcrel32 : X86::CALLpcrel32);
3523 
3524  MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CallOpc));
3525  if (NeedLoad)
3526  MIB.addReg(Is64Bit ? X86::RIP : 0).addImm(1).addReg(0);
3527  if (Symbol)
3528  MIB.addSym(Symbol, OpFlags);
3529  else
3530  MIB.addGlobalAddress(GV, 0, OpFlags);
3531  if (NeedLoad)
3532  MIB.addReg(0);
3533  }
3534 
3535  // Add a register mask operand representing the call-preserved registers.
3536  // Proper defs for return values will be added by setPhysRegsDeadExcept().
3537  MIB.addRegMask(TRI.getCallPreservedMask(*FuncInfo.MF, CC));
3538 
3539  // Add an implicit use GOT pointer in EBX.
3540  if (Subtarget->isPICStyleGOT())
3542 
3543  if (Is64Bit && IsVarArg && !IsWin64)
3545 
3546  // Add implicit physical register uses to the call.
3547  for (auto Reg : OutRegs)
3549 
3550  // Issue CALLSEQ_END
3551  unsigned NumBytesForCalleeToPop =
3552  X86::isCalleePop(CC, Subtarget->is64Bit(), IsVarArg,
3553  TM.Options.GuaranteedTailCallOpt)
3554  ? NumBytes // Callee pops everything.
3555  : computeBytesPoppedByCalleeForSRet(Subtarget, CC, CLI.CS);
3556  unsigned AdjStackUp = TII.getCallFrameDestroyOpcode();
3557  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackUp))
3558  .addImm(NumBytes).addImm(NumBytesForCalleeToPop);
3559 
3560  // Now handle call return values.
3562  CCState CCRetInfo(CC, IsVarArg, *FuncInfo.MF, RVLocs,
3563  CLI.RetTy->getContext());
3564  CCRetInfo.AnalyzeCallResult(Ins, RetCC_X86);
3565 
3566  // Copy all of the result registers out of their specified physreg.
3567  unsigned ResultReg = FuncInfo.CreateRegs(CLI.RetTy);
3568  for (unsigned i = 0; i != RVLocs.size(); ++i) {
3569  CCValAssign &VA = RVLocs[i];
3570  EVT CopyVT = VA.getValVT();
3571  unsigned CopyReg = ResultReg + i;
3572  unsigned SrcReg = VA.getLocReg();
3573 
3574  // If this is x86-64, and we disabled SSE, we can't return FP values
3575  if ((CopyVT == MVT::f32 || CopyVT == MVT::f64) &&
3576  ((Is64Bit || Ins[i].Flags.isInReg()) && !Subtarget->hasSSE1())) {
3577  report_fatal_error("SSE register return with SSE disabled");
3578  }
3579 
3580  // If we prefer to use the value in xmm registers, copy it out as f80 and
3581  // use a truncate to move it from fp stack reg to xmm reg.
3582  if ((SrcReg == X86::FP0 || SrcReg == X86::FP1) &&
3583  isScalarFPTypeInSSEReg(VA.getValVT())) {
3584  CopyVT = MVT::f80;
3585  CopyReg = createResultReg(&X86::RFP80RegClass);
3586  }
3587 
3588  // Copy out the result.
3589  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3590  TII.get(TargetOpcode::COPY), CopyReg).addReg(SrcReg);
3591  InRegs.push_back(VA.getLocReg());
3592 
3593  // Round the f80 to the right size, which also moves it to the appropriate
3594  // xmm register. This is accomplished by storing the f80 value in memory
3595  // and then loading it back.
3596  if (CopyVT != VA.getValVT()) {
3597  EVT ResVT = VA.getValVT();
3598  unsigned Opc = ResVT == MVT::f32 ? X86::ST_Fp80m32 : X86::ST_Fp80m64;
3599  unsigned MemSize = ResVT.getSizeInBits()/8;
3600  int FI = MFI.CreateStackObject(MemSize, MemSize, false);
3601  addFrameReference(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3602  TII.get(Opc)), FI)
3603  .addReg(CopyReg);
3604  Opc = ResVT == MVT::f32 ? X86::MOVSSrm : X86::MOVSDrm;
3605  addFrameReference(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3606  TII.get(Opc), ResultReg + i), FI);
3607  }
3608  }
3609 
3610  CLI.ResultReg = ResultReg;
3611  CLI.NumResultRegs = RVLocs.size();
3612  CLI.Call = MIB;
3613 
3614  return true;
3615 }
3616 
3617 bool
3618 X86FastISel::fastSelectInstruction(const Instruction *I) {
3619  switch (I->getOpcode()) {
3620  default: break;
3621  case Instruction::Load:
3622  return X86SelectLoad(I);
3623  case Instruction::Store:
3624  return X86SelectStore(I);
3625  case Instruction::Ret:
3626  return X86SelectRet(I);
3627  case Instruction::ICmp:
3628  case Instruction::FCmp:
3629  return X86SelectCmp(I);
3630  case Instruction::ZExt:
3631  return X86SelectZExt(I);
3632  case Instruction::SExt:
3633  return X86SelectSExt(I);
3634  case Instruction::Br:
3635  return X86SelectBranch(I);
3636  case Instruction::LShr:
3637  case Instruction::AShr:
3638  case Instruction::Shl:
3639  return X86SelectShift(I);
3640  case Instruction::SDiv:
3641  case Instruction::UDiv:
3642  case Instruction::SRem:
3643  case Instruction::URem:
3644  return X86SelectDivRem(I);
3645  case Instruction::Select:
3646  return X86SelectSelect(I);
3647  case Instruction::Trunc:
3648  return X86SelectTrunc(I);
3649  case Instruction::FPExt:
3650  return X86SelectFPExt(I);
3651  case Instruction::FPTrunc:
3652  return X86SelectFPTrunc(I);
3653  case Instruction::SIToFP:
3654  return X86SelectSIToFP(I);
3655  case Instruction::UIToFP:
3656  return X86SelectUIToFP(I);
3657  case Instruction::IntToPtr: // Deliberate fall-through.
3658  case Instruction::PtrToInt: {
3659  EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType());
3660  EVT DstVT = TLI.getValueType(DL, I->getType());
3661  if (DstVT.bitsGT(SrcVT))
3662  return X86SelectZExt(I);
3663  if (DstVT.bitsLT(SrcVT))
3664  return X86SelectTrunc(I);
3665  unsigned Reg = getRegForValue(I->getOperand(0));
3666  if (Reg == 0) return false;
3667  updateValueMap(I, Reg);
3668  return true;
3669  }
3670  case Instruction::BitCast: {
3671  // Select SSE2/AVX bitcasts between 128/256 bit vector types.
3672  if (!Subtarget->hasSSE2())
3673  return false;
3674 
3675  EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType());
3676  EVT DstVT = TLI.getValueType(DL, I->getType());
3677 
3678  if (!SrcVT.isSimple() || !DstVT.isSimple())
3679  return false;
3680 
3681  MVT SVT = SrcVT.getSimpleVT();
3682  MVT DVT = DstVT.getSimpleVT();
3683 
3684  if (!SVT.is128BitVector() &&
3685  !(Subtarget->hasAVX() && SVT.is256BitVector()) &&
3686  !(Subtarget->hasAVX512() && SVT.is512BitVector() &&
3687  (Subtarget->hasBWI() || (SVT.getScalarSizeInBits() >= 32 &&
3688  DVT.getScalarSizeInBits() >= 32))))
3689  return false;
3690 
3691  unsigned Reg = getRegForValue(I->getOperand(0));
3692  if (Reg == 0)
3693  return false;
3694 
3695  // No instruction is needed for conversion. Reuse the register used by
3696  // the fist operand.
3697  updateValueMap(I, Reg);
3698  return true;
3699  }
3700  }
3701 
3702  return false;
3703 }
3704 
3705 unsigned X86FastISel::X86MaterializeInt(const ConstantInt *CI, MVT VT) {
3706  if (VT > MVT::i64)
3707  return 0;
3708 
3709  uint64_t Imm = CI->getZExtValue();
3710  if (Imm == 0) {
3711  unsigned SrcReg = fastEmitInst_(X86::MOV32r0, &X86::GR32RegClass);
3712  switch (VT.SimpleTy) {
3713  default: llvm_unreachable("Unexpected value type");
3714  case MVT::i1:
3715  case MVT::i8:
3716  return fastEmitInst_extractsubreg(MVT::i8, SrcReg, /*Kill=*/true,
3717  X86::sub_8bit);
3718  case MVT::i16:
3719  return fastEmitInst_extractsubreg(MVT::i16, SrcReg, /*Kill=*/true,
3720  X86::sub_16bit);
3721  case MVT::i32:
3722  return SrcReg;
3723  case MVT::i64: {
3724  unsigned ResultReg = createResultReg(&X86::GR64RegClass);
3725  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3726  TII.get(TargetOpcode::SUBREG_TO_REG), ResultReg)
3727  .addImm(0).addReg(SrcReg).addImm(X86::sub_32bit);
3728  return ResultReg;
3729  }
3730  }
3731  }
3732 
3733  unsigned Opc = 0;
3734  switch (VT.SimpleTy) {
3735  default: llvm_unreachable("Unexpected value type");
3736  case MVT::i1:
3737  // TODO: Support this properly.
3738  if (Subtarget->hasAVX512())
3739  return 0;
3740  VT = MVT::i8;
3742  case MVT::i8: Opc = X86::MOV8ri; break;
3743  case MVT::i16: Opc = X86::MOV16ri; break;
3744  case MVT::i32: Opc = X86::MOV32ri; break;
3745  case MVT::i64: {
3746  if (isUInt<32>(Imm))
3747  Opc = X86::MOV32ri;
3748  else if (isInt<32>(Imm))
3749  Opc = X86::MOV64ri32;
3750  else
3751  Opc = X86::MOV64ri;
3752  break;
3753  }
3754  }
3755  if (VT == MVT::i64 && Opc == X86::MOV32ri) {
3756  unsigned SrcReg = fastEmitInst_i(Opc, &X86::GR32RegClass, Imm);
3757  unsigned ResultReg = createResultReg(&X86::GR64RegClass);
3758  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3759  TII.get(TargetOpcode::SUBREG_TO_REG), ResultReg)
3760  .addImm(0).addReg(SrcReg).addImm(X86::sub_32bit);
3761  return ResultReg;
3762  }
3763  return fastEmitInst_i(Opc, TLI.getRegClassFor(VT), Imm);
3764 }
3765 
3766 unsigned X86FastISel::X86MaterializeFP(const ConstantFP *CFP, MVT VT) {
3767  if (CFP->isNullValue())
3768  return fastMaterializeFloatZero(CFP);
3769 
3770  // Can't handle alternate code models yet.
3771  CodeModel::Model CM = TM.getCodeModel();
3772  if (CM != CodeModel::Small && CM != CodeModel::Large)
3773  return 0;
3774 
3775  // Get opcode and regclass of the output for the given load instruction.
3776  unsigned Opc = 0;
3777  const TargetRegisterClass *RC = nullptr;
3778  switch (VT.SimpleTy) {
3779  default: return 0;
3780  case MVT::f32:
3781  if (X86ScalarSSEf32) {
3782  Opc = Subtarget->hasAVX512()
3783  ? X86::VMOVSSZrm
3784  : Subtarget->hasAVX() ? X86::VMOVSSrm : X86::MOVSSrm;
3785  RC = Subtarget->hasAVX512() ? &X86::FR32XRegClass : &X86::FR32RegClass;
3786  } else {
3787  Opc = X86::LD_Fp32m;
3788  RC = &X86::RFP32RegClass;
3789  }
3790  break;
3791  case MVT::f64:
3792  if (X86ScalarSSEf64) {
3793  Opc = Subtarget->hasAVX512()
3794  ? X86::VMOVSDZrm
3795  : Subtarget->hasAVX() ? X86::VMOVSDrm : X86::MOVSDrm;
3796  RC = Subtarget->hasAVX512() ? &X86::FR64XRegClass : &X86::FR64RegClass;
3797  } else {
3798  Opc = X86::LD_Fp64m;
3799  RC = &X86::RFP64RegClass;
3800  }
3801  break;
3802  case MVT::f80:
3803  // No f80 support yet.
3804  return 0;
3805  }
3806 
3807  // MachineConstantPool wants an explicit alignment.
3808  unsigned Align = DL.getPrefTypeAlignment(CFP->getType());
3809  if (Align == 0) {
3810  // Alignment of vector types. FIXME!
3811  Align = DL.getTypeAllocSize(CFP->getType());
3812  }
3813 
3814  // x86-32 PIC requires a PIC base register for constant pools.
3815  unsigned PICBase = 0;
3816  unsigned char OpFlag = Subtarget->classifyLocalReference(nullptr);
3817  if (OpFlag == X86II::MO_PIC_BASE_OFFSET)
3818  PICBase = getInstrInfo()->getGlobalBaseReg(FuncInfo.MF);
3819  else if (OpFlag == X86II::MO_GOTOFF)
3820  PICBase = getInstrInfo()->getGlobalBaseReg(FuncInfo.MF);
3821  else if (Subtarget->is64Bit() && TM.getCodeModel() == CodeModel::Small)
3822  PICBase = X86::RIP;
3823 
3824  // Create the load from the constant pool.
3825  unsigned CPI = MCP.getConstantPoolIndex(CFP, Align);
3826  unsigned ResultReg = createResultReg(RC);
3827 
3828  if (CM == CodeModel::Large) {
3829  unsigned AddrReg = createResultReg(&X86::GR64RegClass);
3830  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::MOV64ri),
3831  AddrReg)
3832  .addConstantPoolIndex(CPI, 0, OpFlag);
3833  MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3834  TII.get(Opc), ResultReg);
3835  addDirectMem(MIB, AddrReg);
3836  MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
3838  MachineMemOperand::MOLoad, DL.getPointerSize(), Align);
3839  MIB->addMemOperand(*FuncInfo.MF, MMO);
3840  return ResultReg;
3841  }
3842 
3843  addConstantPoolReference(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3844  TII.get(Opc), ResultReg),
3845  CPI, PICBase, OpFlag);
3846  return ResultReg;
3847 }
3848 
3849 unsigned X86FastISel::X86MaterializeGV(const GlobalValue *GV, MVT VT) {
3850  // Can't handle alternate code models yet.
3851  if (TM.getCodeModel() != CodeModel::Small)
3852  return 0;
3853 
3854  // Materialize addresses with LEA/MOV instructions.
3855  X86AddressMode AM;
3856  if (X86SelectAddress(GV, AM)) {
3857  // If the expression is just a basereg, then we're done, otherwise we need
3858  // to emit an LEA.
3859  if (AM.BaseType == X86AddressMode::RegBase &&
3860  AM.IndexReg == 0 && AM.Disp == 0 && AM.GV == nullptr)
3861  return AM.Base.Reg;
3862 
3863  unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
3864  if (TM.getRelocationModel() == Reloc::Static &&
3865  TLI.getPointerTy(DL) == MVT::i64) {
3866  // The displacement code could be more than 32 bits away so we need to use
3867  // an instruction with a 64 bit immediate
3868  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::MOV64ri),
3869  ResultReg)
3870  .addGlobalAddress(GV);
3871  } else {
3872  unsigned Opc =
3873  TLI.getPointerTy(DL) == MVT::i32
3874  ? (Subtarget->isTarget64BitILP32() ? X86::LEA64_32r : X86::LEA32r)
3875  : X86::LEA64r;
3876  addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3877  TII.get(Opc), ResultReg), AM);
3878  }
3879  return ResultReg;
3880  }
3881  return 0;
3882 }
3883 
3884 unsigned X86FastISel::fastMaterializeConstant(const Constant *C) {
3885  EVT CEVT = TLI.getValueType(DL, C->getType(), true);
3886 
3887  // Only handle simple types.
3888  if (!CEVT.isSimple())
3889  return 0;
3890  MVT VT = CEVT.getSimpleVT();
3891 
3892  if (const auto *CI = dyn_cast<ConstantInt>(C))
3893  return X86MaterializeInt(CI, VT);
3894  else if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C))
3895  return X86MaterializeFP(CFP, VT);
3896  else if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
3897  return X86MaterializeGV(GV, VT);
3898 
3899  return 0;
3900 }
3901 
3902 unsigned X86FastISel::fastMaterializeAlloca(const AllocaInst *C) {
3903  // Fail on dynamic allocas. At this point, getRegForValue has already
3904  // checked its CSE maps, so if we're here trying to handle a dynamic
3905  // alloca, we're not going to succeed. X86SelectAddress has a
3906  // check for dynamic allocas, because it's called directly from
3907  // various places, but targetMaterializeAlloca also needs a check
3908  // in order to avoid recursion between getRegForValue,
3909  // X86SelectAddrss, and targetMaterializeAlloca.
3910  if (!FuncInfo.StaticAllocaMap.count(C))
3911  return 0;
3912  assert(C->isStaticAlloca() && "dynamic alloca in the static alloca map?");
3913 
3914  X86AddressMode AM;
3915  if (!X86SelectAddress(C, AM))
3916  return 0;
3917  unsigned Opc =
3918  TLI.getPointerTy(DL) == MVT::i32
3919  ? (Subtarget->isTarget64BitILP32() ? X86::LEA64_32r : X86::LEA32r)
3920  : X86::LEA64r;
3921  const TargetRegisterClass *RC = TLI.getRegClassFor(TLI.getPointerTy(DL));
3922  unsigned ResultReg = createResultReg(RC);
3923  addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3924  TII.get(Opc), ResultReg), AM);
3925  return ResultReg;
3926 }
3927 
3928 unsigned X86FastISel::fastMaterializeFloatZero(const ConstantFP *CF) {
3929  MVT VT;
3930  if (!isTypeLegal(CF->getType(), VT))
3931  return 0;
3932 
3933  // Get opcode and regclass for the given zero.
3934  bool HasAVX512 = Subtarget->hasAVX512();
3935  unsigned Opc = 0;
3936  const TargetRegisterClass *RC = nullptr;
3937  switch (VT.SimpleTy) {
3938  default: return 0;
3939  case MVT::f32:
3940  if (X86ScalarSSEf32) {
3941  Opc = HasAVX512 ? X86::AVX512_FsFLD0SS : X86::FsFLD0SS;
3942  RC = HasAVX512 ? &X86::FR32XRegClass : &X86::FR32RegClass;
3943  } else {
3944  Opc = X86::LD_Fp032;
3945  RC = &X86::RFP32RegClass;
3946  }
3947  break;
3948  case MVT::f64:
3949  if (X86ScalarSSEf64) {
3950  Opc = HasAVX512 ? X86::AVX512_FsFLD0SD : X86::FsFLD0SD;
3951  RC = HasAVX512 ? &X86::FR64XRegClass : &X86::FR64RegClass;
3952  } else {
3953  Opc = X86::LD_Fp064;
3954  RC = &X86::RFP64RegClass;
3955  }
3956  break;
3957  case MVT::f80:
3958  // No f80 support yet.
3959  return 0;
3960  }
3961 
3962  unsigned ResultReg = createResultReg(RC);
3963  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg);
3964  return ResultReg;
3965 }
3966 
3967 
3968 bool X86FastISel::tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo,
3969  const LoadInst *LI) {
3970  const Value *Ptr = LI->getPointerOperand();
3971  X86AddressMode AM;
3972  if (!X86SelectAddress(Ptr, AM))
3973  return false;
3974 
3975  const X86InstrInfo &XII = (const X86InstrInfo &)TII;
3976 
3977  unsigned Size = DL.getTypeAllocSize(LI->getType());
3978  unsigned Alignment = LI->getAlignment();
3979 
3980  if (Alignment == 0) // Ensure that codegen never sees alignment 0
3981  Alignment = DL.getABITypeAlignment(LI->getType());
3982 
3984  AM.getFullAddress(AddrOps);
3985 
3987  *FuncInfo.MF, *MI, OpNo, AddrOps, FuncInfo.InsertPt, Size, Alignment,
3988  /*AllowCommute=*/true);
3989  if (!Result)
3990  return false;
3991 
3992  // The index register could be in the wrong register class. Unfortunately,
3993  // foldMemoryOperandImpl could have commuted the instruction so its not enough
3994  // to just look at OpNo + the offset to the index reg. We actually need to
3995  // scan the instruction to find the index reg and see if its the correct reg
3996  // class.
3997  unsigned OperandNo = 0;
3998  for (MachineInstr::mop_iterator I = Result->operands_begin(),
3999  E = Result->operands_end(); I != E; ++I, ++OperandNo) {
4000  MachineOperand &MO = *I;
4001  if (!MO.isReg() || MO.isDef() || MO.getReg() != AM.IndexReg)
4002  continue;
4003  // Found the index reg, now try to rewrite it.
4004  unsigned IndexReg = constrainOperandRegClass(Result->getDesc(),
4005  MO.getReg(), OperandNo);
4006  if (IndexReg == MO.getReg())
4007  continue;
4008  MO.setReg(IndexReg);
4009  }
4010 
4011  Result->addMemOperand(*FuncInfo.MF, createMachineMemOperandFor(LI));
4012  MI->eraseFromParent();
4013  return true;
4014 }
4015 
4016 unsigned X86FastISel::fastEmitInst_rrrr(unsigned MachineInstOpcode,
4017  const TargetRegisterClass *RC,
4018  unsigned Op0, bool Op0IsKill,
4019  unsigned Op1, bool Op1IsKill,
4020  unsigned Op2, bool Op2IsKill,
4021  unsigned Op3, bool Op3IsKill) {
4022  const MCInstrDesc &II = TII.get(MachineInstOpcode);
4023 
4024  unsigned ResultReg = createResultReg(RC);
4025  Op0 = constrainOperandRegClass(II, Op0, II.getNumDefs());
4026  Op1 = constrainOperandRegClass(II, Op1, II.getNumDefs() + 1);
4027  Op2 = constrainOperandRegClass(II, Op2, II.getNumDefs() + 2);
4028  Op3 = constrainOperandRegClass(II, Op3, II.getNumDefs() + 3);
4029 
4030  if (II.getNumDefs() >= 1)
4031  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
4032  .addReg(Op0, getKillRegState(Op0IsKill))
4033  .addReg(Op1, getKillRegState(Op1IsKill))
4034  .addReg(Op2, getKillRegState(Op2IsKill))
4035  .addReg(Op3, getKillRegState(Op3IsKill));
4036  else {
4037  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
4038  .addReg(Op0, getKillRegState(Op0IsKill))
4039  .addReg(Op1, getKillRegState(Op1IsKill))
4040  .addReg(Op2, getKillRegState(Op2IsKill))
4041  .addReg(Op3, getKillRegState(Op3IsKill));
4042  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4043  TII.get(TargetOpcode::COPY), ResultReg).addReg(II.ImplicitDefs[0]);
4044  }
4045  return ResultReg;
4046 }
4047 
4048 
4049 namespace llvm {
4051  const TargetLibraryInfo *libInfo) {
4052  return new X86FastISel(funcInfo, libInfo);
4053  }
4054 }
bool hasAVX() const
Definition: X86Subtarget.h:554
bool isVarArg() const
isVarArg - Return true if this function takes a variable number of arguments.
Definition: Function.h:177
void setFrameAddressIsTaken(bool T)
unsigned GetCondBranchFromCond(CondCode CC)
uint64_t CallInst * C
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:541
constexpr bool isUInt< 32 >(uint64_t x)
Definition: MathExtras.h:349
Return a value (possibly void), from a function.
Value * getValueOperand()
Definition: Instructions.h:399
const MachineInstrBuilder & addMetadata(const MDNode *MD) const
void AnalyzeCallResult(const SmallVectorImpl< ISD::InputArg > &Ins, CCAssignFn Fn)
AnalyzeCallResult - Analyze the return values of a call, incorporating info about the passed values i...
bool is64Bit() const
Is this x86_64? (disregarding specific ABI / programming model)
Definition: X86Subtarget.h:516
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
bool usesWindowsCFI() const
Definition: MCAsmInfo.h:584
mop_iterator operands_end()
Definition: MachineInstr.h:454
Function * getCalledFunction() const
Return the function called, or null if this is an indirect function invocation.
bool isInteger() const
Return true if this is an integer or a vector integer type.
This class is the base class for the comparison instructions.
Definition: InstrTypes.h:675
This class represents an incoming formal argument to a Function.
Definition: Argument.h:30
bool contains(unsigned Reg) const
Return true if the specified register is included in this register class.
bool isOSMSVCRT() const
Is this a "Windows" OS targeting a "MSVCRT.dll" environment.
Definition: Triple.h:557
LLVM_ATTRIBUTE_NORETURN void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:139
Compute iterated dominance frontiers using a linear time algorithm.
Definition: AllocatorList.h:24
static const MachineInstrBuilder & addConstantPoolReference(const MachineInstrBuilder &MIB, unsigned CPI, unsigned GlobalBaseReg, unsigned char OpFlags)
addConstantPoolReference - This function is used to add a reference to the base of a constant value s...
bool isAtomic() const
Return true if this instruction has an AtomicOrdering of unordered or higher.
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
Definition: MCSymbol.h:42
#define LLVM_FALLTHROUGH
Definition: Compiler.h:86
bool isVector() const
Return true if this is a vector value type.
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:163
unsigned getReg() const
getReg - Returns the register number.
bool is256BitVector() const
Return true if this is a 256-bit vector type.
This class represents a function call, abstracting a target machine&#39;s calling convention.
unsigned Reg
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:253
0 1 0 0 True if ordered and less than
Definition: InstrTypes.h:691
constexpr bool isInt< 8 >(int64_t x)
Definition: MathExtras.h:303
unsigned getSourceAddressSpace() const
unsigned getValNo() const
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:714
1 1 1 0 True if unordered or not equal
Definition: InstrTypes.h:701
This class wraps the llvm.memset intrinsic.
BasicBlock * getSuccessor(unsigned i) const
unsigned const TargetRegisterInfo * TRI
const GlobalValue * GV
A debug info location.
Definition: DebugLoc.h:34
F(f)
unsigned getPtrSizedFrameRegister(const MachineFunction &MF) const
An instruction for reading from memory.
Definition: Instructions.h:168
MO_GOTPCREL - On a symbol operand this indicates that the immediate is offset to the GOT entry for th...
Definition: X86BaseInfo.h:110
Hexagon Common GEP
Value * getCondition() const
const MachineInstrBuilder & addGlobalAddress(const GlobalValue *GV, int64_t Offset=0, unsigned char TargetFlags=0) const
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:230
bool isCalleePop(CallingConv::ID CallingConv, bool is64Bit, bool IsVarArg, bool GuaranteeTCO)
Determines whether the callee is required to pop its own arguments.
Value * getLength() const
op_iterator op_begin()
Definition: User.h:230
NodeType
ISD::NodeType enum - This enum defines the target-independent operators for a SelectionDAG.
Definition: ISDOpcodes.h:39
bool isMemLoc() const
static Constant * getNullValue(Type *Ty)
Constructor to create a &#39;0&#39; constant of arbitrary type.
Definition: Constants.cpp:268
X86MachineFunctionInfo - This class is derived from MachineFunction and contains private X86 target-s...
bool arg_empty() const
Definition: CallSite.h:218
1 0 0 1 True if unordered or equal
Definition: InstrTypes.h:696
Used to lazily calculate structure layout information for a target machine, based on the DataLayout s...
Definition: DataLayout.h:521
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
Definition: InstrTypes.h:695
A description of a memory reference used in the backend.
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE, etc.
Definition: InstrTypes.h:783
This file declares the MachineConstantPool class which is an abstract constant pool to keep track of ...
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
Definition: MCInstrDesc.h:210
const HexagonInstrInfo * TII
The C convention as implemented on Windows/x86-64 and AArch64.
Definition: CallingConv.h:154
unsigned getNumOperands() const
Access to explicit operands of the instruction.
Definition: MachineInstr.h:412
Class to represent struct types.
Definition: DerivedTypes.h:201
A Use represents the edge between a Value definition and its users.
Definition: Use.h:56
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
static bool isGlobalStubReference(unsigned char TargetFlag)
isGlobalStubReference - Return true if the specified TargetFlag operand is a reference to a stub for ...
Definition: X86InstrInfo.h:113
void eraseFromParent()
Unlink &#39;this&#39; from the containing basic block and delete it.
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:197
0 1 0 1 True if ordered and less than or equal
Definition: InstrTypes.h:692
SimpleValueType SimpleTy
static Constant * getSExt(Constant *C, Type *Ty, bool OnlyIfReduced=false)
Definition: Constants.cpp:1628
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted...
unsigned getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
LocInfo getLocInfo() const
unsigned getSizeInBits() const
auto reverse(ContainerTy &&C, typename std::enable_if< has_rbegin< ContainerTy >::value >::type *=nullptr) -> decltype(make_range(C.rbegin(), C.rend()))
Definition: STLExtras.h:251
static bool isGlobalRelativeToPICBase(unsigned char TargetFlag)
isGlobalRelativeToPICBase - Return true if the specified global value reference is relative to a 32-b...
Definition: X86InstrInfo.h:130
static Constant * getZExt(Constant *C, Type *Ty, bool OnlyIfReduced=false)
Definition: Constants.cpp:1642
bool isNullValue() const
Return true if this is the value that would be returned by getNullValue.
Definition: Constants.cpp:85
This is a fast-path instruction selection class that generates poor code and doesn&#39;t support illegal ...
Definition: FastISel.h:67
A constant value that is initialized with an expression using other constant values.
Definition: Constants.h:885
unsigned getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:292
const MCInstrDesc & getDesc() const
Returns the target instruction descriptor of this MachineInstr.
Definition: MachineInstr.h:406
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:245
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:201
This instruction compares its operands according to the predicate given to the constructor.
X86_StdCall - stdcall is the calling conventions mostly used by the Win32 API.
Definition: CallingConv.h:87
bool paramHasAttr(unsigned ArgNo, Attribute::AttrKind Kind) const
Return true if the call or the callee has the given attribute.
Definition: CallSite.h:377
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:126
X86_FastCall - &#39;fast&#39; analog of X86_StdCall.
Definition: CallingConv.h:92
An instruction for storing to memory.
Definition: Instructions.h:310
void addMemOperand(MachineFunction &MF, MachineMemOperand *MO)
Add a MachineMemOperand to the machine instruction.
amdgpu Simplify well known AMD library false Value * Callee
This class represents a truncation of integer types.
Value * getOperand(unsigned i) const
Definition: User.h:170
Class to represent pointers.
Definition: DerivedTypes.h:467
unsigned getByValSize() const
unsigned getKillRegState(bool B)
unsigned getStackRegister() const
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Flag
These should be considered private to the implementation of the MCInstrDesc class.
Definition: MCInstrDesc.h:118
Value * getOperand(unsigned i_nocapture) const
bool is128BitVector() const
Return true if this is a 128-bit vector type.
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
Definition: ValueTypes.h:229
bool isFloatTy() const
Return true if this is &#39;float&#39;, a 32-bit IEEE fp type.
Definition: Type.h:147
an instruction for type-safe pointer arithmetic to access elements of arrays and structs ...
Definition: Instructions.h:841
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
Type * getReturnType() const
Returns the type of the ret val.
Definition: Function.h:169
std::pair< CondCode, bool > getX86ConditionCode(CmpInst::Predicate Predicate)
Return a pair of condition code for the given predicate and whether the instruction operands should b...
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:149
bool hasFnAttr(Attribute::AttrKind Kind) const
Determine whether this call has the given attribute.
unsigned const MachineRegisterInfo * MRI
Machine Value Type.
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:46
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Conditional or Unconditional Branch instruction.
C - The default llvm calling convention, compatible with C.
Definition: CallingConv.h:35
Value * getAddress() const
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
unsigned getScalarSizeInBits() const
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This is an important base class in LLVM.
Definition: Constant.h:42
const MCAsmInfo * getMCAsmInfo() const
Return target specific asm information.
ConstantFP - Floating Point Values [float, double].
Definition: Constants.h:264
const MCPhysReg * ImplicitDefs
Definition: MCInstrDesc.h:173
DIExpression * getExpression() const
bool isValidLocationForIntrinsic(const DILocation *DL) const
Check that a location is valid for this variable.
op_iterator op_end()
Definition: User.h:232
static std::pair< unsigned, bool > getX86SSEConditionCode(CmpInst::Predicate Predicate)
const MachineInstrBuilder & addRegMask(const uint32_t *Mask) const
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:685
TRAP - Trapping instruction.
Definition: ISDOpcodes.h:727
0 1 1 1 True if ordered (no nans)
Definition: InstrTypes.h:694
Value * getPointerOperand()
Definition: Instructions.h:274
const Triple & getTargetTriple() const
Definition: X86Subtarget.h:711
static void X86SelectAddress(const MachineInstr &I, const MachineRegisterInfo &MRI, X86AddressMode &AM)
const MachineInstrBuilder & addSym(MCSymbol *Sym, unsigned char TargetFlags=0) const
1 1 1 1 Always true (always folded)
Definition: InstrTypes.h:702
X86_ThisCall - Similar to X86_StdCall.
Definition: CallingConv.h:111
unsigned getSETFromCond(CondCode CC, bool HasMemoryOperand=false)
Return a set opcode for the given condition and whether it has a memory operand.
bool isTargetMCU() const
Definition: X86Subtarget.h:730
Extended Value Type.
Definition: ValueTypes.h:34
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
size_t size() const
Definition: SmallVector.h:53
bool isVolatile() const
1 1 0 1 True if unordered, less than, or equal
Definition: InstrTypes.h:700
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
static const MachineInstrBuilder & addFrameReference(const MachineInstrBuilder &MIB, int FI, int Offset=0, bool mem=true)
addFrameReference - This function is used to add a reference to the base of an abstract object on the...
The memory access writes data.
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
Definition: IntrinsicInst.h:51
void GetReturnInfo(CallingConv::ID CC, Type *ReturnType, AttributeList attr, SmallVectorImpl< ISD::OutputArg > &Outs, const TargetLowering &TLI, const DataLayout &DL)
Given an LLVM IR type and return type attributes, compute the return value EVTs and flags...
0 0 1 0 True if ordered and greater than
Definition: InstrTypes.h:689
union llvm::X86AddressMode::@472 Base
Iterator for intrusive lists based on ilist_node.
unsigned getNumOperands() const
Definition: User.h:192
CCState - This class holds information needed while lowering arguments and return values...
This is the shared class of boolean and integer constants.
Definition: Constants.h:84
constexpr bool isInt< 32 >(int64_t x)
Definition: MathExtras.h:309
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type...
Definition: Type.cpp:130
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:213
MachineOperand class - Representation of each machine instruction operand.
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:847
1 1 0 0 True if unordered or less than
Definition: InstrTypes.h:699
Provides information about what library functions are available for the current target.
CCValAssign - Represent assignment of one arg/retval to a location.
Predicate
Predicate - These are "(BI << 5) | BO" for various predicates.
Definition: PPCPredicates.h:27
void getFullAddress(SmallVectorImpl< MachineOperand > &MO)
This class wraps the llvm.memcpy intrinsic.
unsigned getNumDefs() const
Return the number of MachineOperands that are register definitions.
Definition: MCInstrDesc.h:225
Value * getRawSource() const
Return the arguments to the instruction.
Fast - This calling convention attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:43
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:941
This file defines the FastISel class.
bool bitsLT(EVT VT) const
Return true if this has less bits than VT.
Definition: ValueTypes.h:241
unsigned getCMovFromCond(CondCode CC, unsigned RegBytes, bool HasMemoryOperand=false)
Return a cmov opcode for the given condition, register size in bytes, and operand type...
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:440
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:443
amdgpu Simplify well known AMD library false Value Value * Arg
The memory access reads data.
MachineInstr * foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI, ArrayRef< unsigned > Ops, MachineBasicBlock::iterator InsertPt, int FrameIndex, LiveIntervals *LIS=nullptr) const override
foldMemoryOperand - If this target supports it, fold a load or store of the specified stack slot into...
Representation of each machine instruction.
Definition: MachineInstr.h:64
MO_GOTOFF - On a symbol operand this indicates that the immediate is the offset to the location of th...
Definition: X86BaseInfo.h:102
static const MachineInstrBuilder & addDirectMem(const MachineInstrBuilder &MIB, unsigned Reg)
addDirectMem - This function is used to add a direct memory reference to the current instruction – t...
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
Definition: Instruction.h:307
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:363
unsigned getDestAddressSpace() const
bool doesNoCfCheck() const
Determine if the call should not perform indirect branch tracking.
uint64_t getElementOffset(unsigned Idx) const
Definition: DataLayout.h:543
unsigned getAlignment() const
Return the alignment of the access that is being performed.
Definition: Instructions.h:230
static IntegerType * getInt32Ty(LLVMContext &C)
Definition: Type.cpp:176
unsigned getLocMemOffset() const
unsigned getNumArgOperands() const
Return the number of call arguments.
Establish a view to a call site for examination.
Definition: CallSite.h:714
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:108
void setReg(unsigned Reg)
Change the register this operand corresponds to.
#define I(x, y, z)
Definition: MD5.cpp:58
void AnalyzeReturn(const SmallVectorImpl< ISD::OutputArg > &Outs, CCAssignFn Fn)
AnalyzeReturn - Analyze the returned values of a return, incorporating info about the result values i...
enum llvm::X86AddressMode::@471 BaseType
FunctionLoweringInfo - This contains information that is global to a function that is used when lower...
0 1 1 0 True if ordered and operands are unequal
Definition: InstrTypes.h:693
The C convention as specified in the x86-64 supplement to the System V ABI, used on most non-Windows ...
Definition: CallingConv.h:144
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:323
bool is512BitVector() const
Return true if this is a 512-bit vector type.
uint32_t Size
Definition: Profile.cpp:47
DILocalVariable * getVariable() const
const MachineInstrBuilder & addReg(unsigned RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
1 0 1 0 True if unordered or greater than
Definition: InstrTypes.h:697
constexpr bool isUInt< 16 >(uint64_t x)
Definition: MathExtras.h:346
static MachinePointerInfo getConstantPool(MachineFunction &MF)
Return a MachinePointerInfo record that refers to the constant pool.
unsigned getAlignment() const
Return the alignment of the access that is being performed.
Definition: Instructions.h:355
bool isReg() const
isReg - Tests if this is a MO_Register operand.
bool isRegLoc() const
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
Value * getArgOperand(unsigned i) const
getArgOperand/setArgOperand - Return/set the i-th call argument.
bool hasSSE1() const
Definition: X86Subtarget.h:548
0 0 0 1 True if ordered and equal
Definition: InstrTypes.h:688
LLVM Value Representation.
Definition: Value.h:73
mop_iterator operands_begin()
Definition: MachineInstr.h:453
1 0 1 1 True if unordered, greater than, or equal
Definition: InstrTypes.h:698
static unsigned X86ChooseCmpImmediateOpcode(EVT VT, const ConstantInt *RHSC)
If we have a comparison with RHS as the RHS of the comparison, return an opcode that works for the co...
bool hasAVX512() const
Definition: X86Subtarget.h:556
Invoke instruction.
MO_PIC_BASE_OFFSET - On a symbol operand this indicates that the immediate should get the value of th...
Definition: X86BaseInfo.h:88
IRTranslator LLVM IR MI
static MachinePointerInfo getStack(MachineFunction &MF, int64_t Offset, uint8_t ID=0)
Stack pointer relative access.
unsigned constrainOperandRegClass(const MachineFunction &MF, const TargetRegisterInfo &TRI, MachineRegisterInfo &MRI, const TargetInstrInfo &TII, const RegisterBankInfo &RBI, MachineInstr &InsertPt, const MCInstrDesc &II, const MachineOperand &RegMO, unsigned OpIdx)
Try to constrain Reg so that it is usable by argument OpIdx of the provided MCInstrDesc II...
Definition: Utils.cpp:47
unsigned getRegSizeInBits(const TargetRegisterClass &RC) const
Return the size in bits of a register from class RC.
Conversion operators.
Definition: ISDOpcodes.h:437
bool isStaticAlloca() const
Return true if this alloca is in the entry block of the function and is a constant size...
int64_t getSExtValue() const
Return the constant as a 64-bit integer value after it has been sign extended as appropriate for the ...
Definition: Constants.h:157
X86AddressMode - This struct holds a generalized full x86 address mode.
unsigned getLocReg() const
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition: ValueTypes.h:126
0 0 1 1 True if ordered and greater than or equal
Definition: InstrTypes.h:690
This represents the llvm.dbg.declare instruction.
bool isDoubleTy() const
Return true if this is &#39;double&#39;, a 64-bit IEEE fp type.
Definition: Type.h:150
Value * getPointerOperand()
Definition: Instructions.h:402
unsigned AllocateStack(unsigned Size, unsigned Align)
AllocateStack - Allocate a chunk of stack space with the specified size and alignment.
Value * getRawDest() const
static const MachineInstrBuilder & addFullAddress(const MachineInstrBuilder &MIB, const X86AddressMode &AM)
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition: ISDOpcodes.h:351
FastISel * createFastISel(FunctionLoweringInfo &funcInfo, const TargetLibraryInfo *libInfo)
bool hasSSE2() const
Definition: X86Subtarget.h:549
iterator_range< arg_iterator > args()
Definition: Function.h:689
0 0 0 0 Always false (always folded)
Definition: InstrTypes.h:687
bool isStructTy() const
True if this is an instance of StructType.
Definition: Type.h:218
MO_DLLIMPORT - On a symbol operand "FOO", this indicates that the reference is actually to the "__imp...
Definition: X86BaseInfo.h:200
static unsigned computeBytesPoppedByCalleeForSRet(const X86Subtarget *Subtarget, CallingConv::ID CC, ImmutableCallSite *CS)
bool isArrayTy() const
True if this is an instance of ArrayType.
Definition: Type.h:221
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:44
const BasicBlock * getParent() const
Definition: Instruction.h:67
static unsigned X86ChooseCmpOpcode(EVT VT, const X86Subtarget *Subtarget)
an instruction to allocate memory on the stack
Definition: Instructions.h:60
gep_type_iterator gep_type_begin(const User *GEP)