LLVM  16.0.0git
X86FastISel.cpp
Go to the documentation of this file.
1 //===-- X86FastISel.cpp - X86 FastISel implementation ---------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines the X86-specific support for the FastISel class. Much
10 // of the target-specific code is generated by tablegen in the file
11 // X86GenFastISel.inc, which is #included here.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "X86.h"
16 #include "X86CallingConv.h"
17 #include "X86InstrBuilder.h"
18 #include "X86InstrInfo.h"
19 #include "X86MachineFunctionInfo.h"
20 #include "X86RegisterInfo.h"
21 #include "X86Subtarget.h"
22 #include "X86TargetMachine.h"
24 #include "llvm/CodeGen/FastISel.h"
29 #include "llvm/IR/CallingConv.h"
30 #include "llvm/IR/DebugInfo.h"
31 #include "llvm/IR/DerivedTypes.h"
33 #include "llvm/IR/GlobalAlias.h"
34 #include "llvm/IR/GlobalVariable.h"
35 #include "llvm/IR/Instructions.h"
36 #include "llvm/IR/IntrinsicInst.h"
37 #include "llvm/IR/IntrinsicsX86.h"
38 #include "llvm/IR/Operator.h"
39 #include "llvm/MC/MCAsmInfo.h"
40 #include "llvm/MC/MCSymbol.h"
43 using namespace llvm;
44 
45 namespace {
46 
47 class X86FastISel final : public FastISel {
48  /// Subtarget - Keep a pointer to the X86Subtarget around so that we can
49  /// make the right decision when generating code for different targets.
50  const X86Subtarget *Subtarget;
51 
52 public:
53  explicit X86FastISel(FunctionLoweringInfo &funcInfo,
54  const TargetLibraryInfo *libInfo)
55  : FastISel(funcInfo, libInfo) {
56  Subtarget = &funcInfo.MF->getSubtarget<X86Subtarget>();
57  }
58 
59  bool fastSelectInstruction(const Instruction *I) override;
60 
61  /// The specified machine instr operand is a vreg, and that
62  /// vreg is being provided by the specified load instruction. If possible,
63  /// try to fold the load as an operand to the instruction, returning true if
64  /// possible.
65  bool tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo,
66  const LoadInst *LI) override;
67 
68  bool fastLowerArguments() override;
69  bool fastLowerCall(CallLoweringInfo &CLI) override;
70  bool fastLowerIntrinsicCall(const IntrinsicInst *II) override;
71 
72 #include "X86GenFastISel.inc"
73 
74 private:
75  bool X86FastEmitCompare(const Value *LHS, const Value *RHS, EVT VT,
76  const DebugLoc &DL);
77 
78  bool X86FastEmitLoad(MVT VT, X86AddressMode &AM, MachineMemOperand *MMO,
79  unsigned &ResultReg, unsigned Alignment = 1);
80 
81  bool X86FastEmitStore(EVT VT, const Value *Val, X86AddressMode &AM,
82  MachineMemOperand *MMO = nullptr, bool Aligned = false);
83  bool X86FastEmitStore(EVT VT, unsigned ValReg, X86AddressMode &AM,
84  MachineMemOperand *MMO = nullptr, bool Aligned = false);
85 
86  bool X86FastEmitExtend(ISD::NodeType Opc, EVT DstVT, unsigned Src, EVT SrcVT,
87  unsigned &ResultReg);
88 
89  bool X86SelectAddress(const Value *V, X86AddressMode &AM);
90  bool X86SelectCallAddress(const Value *V, X86AddressMode &AM);
91 
92  bool X86SelectLoad(const Instruction *I);
93 
94  bool X86SelectStore(const Instruction *I);
95 
96  bool X86SelectRet(const Instruction *I);
97 
98  bool X86SelectCmp(const Instruction *I);
99 
100  bool X86SelectZExt(const Instruction *I);
101 
102  bool X86SelectSExt(const Instruction *I);
103 
104  bool X86SelectBranch(const Instruction *I);
105 
106  bool X86SelectShift(const Instruction *I);
107 
108  bool X86SelectDivRem(const Instruction *I);
109 
110  bool X86FastEmitCMoveSelect(MVT RetVT, const Instruction *I);
111 
112  bool X86FastEmitSSESelect(MVT RetVT, const Instruction *I);
113 
114  bool X86FastEmitPseudoSelect(MVT RetVT, const Instruction *I);
115 
116  bool X86SelectSelect(const Instruction *I);
117 
118  bool X86SelectTrunc(const Instruction *I);
119 
120  bool X86SelectFPExtOrFPTrunc(const Instruction *I, unsigned Opc,
121  const TargetRegisterClass *RC);
122 
123  bool X86SelectFPExt(const Instruction *I);
124  bool X86SelectFPTrunc(const Instruction *I);
125  bool X86SelectSIToFP(const Instruction *I);
126  bool X86SelectUIToFP(const Instruction *I);
127  bool X86SelectIntToFP(const Instruction *I, bool IsSigned);
128 
129  const X86InstrInfo *getInstrInfo() const {
130  return Subtarget->getInstrInfo();
131  }
132  const X86TargetMachine *getTargetMachine() const {
133  return static_cast<const X86TargetMachine *>(&TM);
134  }
135 
136  bool handleConstantAddresses(const Value *V, X86AddressMode &AM);
137 
138  unsigned X86MaterializeInt(const ConstantInt *CI, MVT VT);
139  unsigned X86MaterializeFP(const ConstantFP *CFP, MVT VT);
140  unsigned X86MaterializeGV(const GlobalValue *GV, MVT VT);
141  unsigned fastMaterializeConstant(const Constant *C) override;
142 
143  unsigned fastMaterializeAlloca(const AllocaInst *C) override;
144 
145  unsigned fastMaterializeFloatZero(const ConstantFP *CF) override;
146 
147  /// isScalarFPTypeInSSEReg - Return true if the specified scalar FP type is
148  /// computed in an SSE register, not on the X87 floating point stack.
149  bool isScalarFPTypeInSSEReg(EVT VT) const {
150  return (VT == MVT::f64 && Subtarget->hasSSE2()) ||
151  (VT == MVT::f32 && Subtarget->hasSSE1()) || VT == MVT::f16;
152  }
153 
154  bool isTypeLegal(Type *Ty, MVT &VT, bool AllowI1 = false);
155 
156  bool IsMemcpySmall(uint64_t Len);
157 
158  bool TryEmitSmallMemcpy(X86AddressMode DestAM,
159  X86AddressMode SrcAM, uint64_t Len);
160 
161  bool foldX86XALUIntrinsic(X86::CondCode &CC, const Instruction *I,
162  const Value *Cond);
163 
165  X86AddressMode &AM);
166 
167  unsigned fastEmitInst_rrrr(unsigned MachineInstOpcode,
168  const TargetRegisterClass *RC, unsigned Op0,
169  unsigned Op1, unsigned Op2, unsigned Op3);
170 };
171 
172 } // end anonymous namespace.
173 
174 static std::pair<unsigned, bool>
176  unsigned CC;
177  bool NeedSwap = false;
178 
179  // SSE Condition code mapping:
180  // 0 - EQ
181  // 1 - LT
182  // 2 - LE
183  // 3 - UNORD
184  // 4 - NEQ
185  // 5 - NLT
186  // 6 - NLE
187  // 7 - ORD
188  switch (Predicate) {
189  default: llvm_unreachable("Unexpected predicate");
190  case CmpInst::FCMP_OEQ: CC = 0; break;
191  case CmpInst::FCMP_OGT: NeedSwap = true; [[fallthrough]];
192  case CmpInst::FCMP_OLT: CC = 1; break;
193  case CmpInst::FCMP_OGE: NeedSwap = true; [[fallthrough]];
194  case CmpInst::FCMP_OLE: CC = 2; break;
195  case CmpInst::FCMP_UNO: CC = 3; break;
196  case CmpInst::FCMP_UNE: CC = 4; break;
197  case CmpInst::FCMP_ULE: NeedSwap = true; [[fallthrough]];
198  case CmpInst::FCMP_UGE: CC = 5; break;
199  case CmpInst::FCMP_ULT: NeedSwap = true; [[fallthrough]];
200  case CmpInst::FCMP_UGT: CC = 6; break;
201  case CmpInst::FCMP_ORD: CC = 7; break;
202  case CmpInst::FCMP_UEQ: CC = 8; break;
203  case CmpInst::FCMP_ONE: CC = 12; break;
204  }
205 
206  return std::make_pair(CC, NeedSwap);
207 }
208 
209 /// Adds a complex addressing mode to the given machine instr builder.
210 /// Note, this will constrain the index register. If its not possible to
211 /// constrain the given index register, then a new one will be created. The
212 /// IndexReg field of the addressing mode will be updated to match in this case.
213 const MachineInstrBuilder &
215  X86AddressMode &AM) {
216  // First constrain the index register. It needs to be a GR64_NOSP.
218  MIB->getNumOperands() +
220  return ::addFullAddress(MIB, AM);
221 }
222 
223 /// Check if it is possible to fold the condition from the XALU intrinsic
224 /// into the user. The condition code will only be updated on success.
225 bool X86FastISel::foldX86XALUIntrinsic(X86::CondCode &CC, const Instruction *I,
226  const Value *Cond) {
227  if (!isa<ExtractValueInst>(Cond))
228  return false;
229 
230  const auto *EV = cast<ExtractValueInst>(Cond);
231  if (!isa<IntrinsicInst>(EV->getAggregateOperand()))
232  return false;
233 
234  const auto *II = cast<IntrinsicInst>(EV->getAggregateOperand());
235  MVT RetVT;
236  const Function *Callee = II->getCalledFunction();
237  Type *RetTy =
238  cast<StructType>(Callee->getReturnType())->getTypeAtIndex(0U);
239  if (!isTypeLegal(RetTy, RetVT))
240  return false;
241 
242  if (RetVT != MVT::i32 && RetVT != MVT::i64)
243  return false;
244 
245  X86::CondCode TmpCC;
246  switch (II->getIntrinsicID()) {
247  default: return false;
248  case Intrinsic::sadd_with_overflow:
249  case Intrinsic::ssub_with_overflow:
250  case Intrinsic::smul_with_overflow:
251  case Intrinsic::umul_with_overflow: TmpCC = X86::COND_O; break;
252  case Intrinsic::uadd_with_overflow:
253  case Intrinsic::usub_with_overflow: TmpCC = X86::COND_B; break;
254  }
255 
256  // Check if both instructions are in the same basic block.
257  if (II->getParent() != I->getParent())
258  return false;
259 
260  // Make sure nothing is in the way
263  for (auto Itr = std::prev(Start); Itr != End; --Itr) {
264  // We only expect extractvalue instructions between the intrinsic and the
265  // instruction to be selected.
266  if (!isa<ExtractValueInst>(Itr))
267  return false;
268 
269  // Check that the extractvalue operand comes from the intrinsic.
270  const auto *EVI = cast<ExtractValueInst>(Itr);
271  if (EVI->getAggregateOperand() != II)
272  return false;
273  }
274 
275  // Make sure no potentially eflags clobbering phi moves can be inserted in
276  // between.
277  auto HasPhis = [](const BasicBlock *Succ) {
278  return !llvm::empty(Succ->phis());
279  };
280  if (I->isTerminator() && llvm::any_of(successors(I), HasPhis))
281  return false;
282 
283  // Make sure there are no potentially eflags clobbering constant
284  // materializations in between.
285  if (llvm::any_of(I->operands(), [](Value *V) { return isa<Constant>(V); }))
286  return false;
287 
288  CC = TmpCC;
289  return true;
290 }
291 
292 bool X86FastISel::isTypeLegal(Type *Ty, MVT &VT, bool AllowI1) {
293  EVT evt = TLI.getValueType(DL, Ty, /*AllowUnknown=*/true);
294  if (evt == MVT::Other || !evt.isSimple())
295  // Unhandled type. Halt "fast" selection and bail.
296  return false;
297 
298  VT = evt.getSimpleVT();
299  // For now, require SSE/SSE2 for performing floating-point operations,
300  // since x87 requires additional work.
301  if (VT == MVT::f64 && !Subtarget->hasSSE2())
302  return false;
303  if (VT == MVT::f32 && !Subtarget->hasSSE1())
304  return false;
305  // Similarly, no f80 support yet.
306  if (VT == MVT::f80)
307  return false;
308  // We only handle legal types. For example, on x86-32 the instruction
309  // selector contains all of the 64-bit instructions from x86-64,
310  // under the assumption that i64 won't be used if the target doesn't
311  // support it.
312  return (AllowI1 && VT == MVT::i1) || TLI.isTypeLegal(VT);
313 }
314 
315 /// X86FastEmitLoad - Emit a machine instruction to load a value of type VT.
316 /// The address is either pre-computed, i.e. Ptr, or a GlobalAddress, i.e. GV.
317 /// Return true and the result register by reference if it is possible.
318 bool X86FastISel::X86FastEmitLoad(MVT VT, X86AddressMode &AM,
319  MachineMemOperand *MMO, unsigned &ResultReg,
320  unsigned Alignment) {
321  bool HasSSE1 = Subtarget->hasSSE1();
322  bool HasSSE2 = Subtarget->hasSSE2();
323  bool HasSSE41 = Subtarget->hasSSE41();
324  bool HasAVX = Subtarget->hasAVX();
325  bool HasAVX2 = Subtarget->hasAVX2();
326  bool HasAVX512 = Subtarget->hasAVX512();
327  bool HasVLX = Subtarget->hasVLX();
328  bool IsNonTemporal = MMO && MMO->isNonTemporal();
329 
330  // Treat i1 loads the same as i8 loads. Masking will be done when storing.
331  if (VT == MVT::i1)
332  VT = MVT::i8;
333 
334  // Get opcode and regclass of the output for the given load instruction.
335  unsigned Opc = 0;
336  switch (VT.SimpleTy) {
337  default: return false;
338  case MVT::i8:
339  Opc = X86::MOV8rm;
340  break;
341  case MVT::i16:
342  Opc = X86::MOV16rm;
343  break;
344  case MVT::i32:
345  Opc = X86::MOV32rm;
346  break;
347  case MVT::i64:
348  // Must be in x86-64 mode.
349  Opc = X86::MOV64rm;
350  break;
351  case MVT::f32:
352  Opc = HasAVX512 ? X86::VMOVSSZrm_alt
353  : HasAVX ? X86::VMOVSSrm_alt
354  : HasSSE1 ? X86::MOVSSrm_alt
355  : X86::LD_Fp32m;
356  break;
357  case MVT::f64:
358  Opc = HasAVX512 ? X86::VMOVSDZrm_alt
359  : HasAVX ? X86::VMOVSDrm_alt
360  : HasSSE2 ? X86::MOVSDrm_alt
361  : X86::LD_Fp64m;
362  break;
363  case MVT::f80:
364  // No f80 support yet.
365  return false;
366  case MVT::v4f32:
367  if (IsNonTemporal && Alignment >= 16 && HasSSE41)
368  Opc = HasVLX ? X86::VMOVNTDQAZ128rm :
369  HasAVX ? X86::VMOVNTDQArm : X86::MOVNTDQArm;
370  else if (Alignment >= 16)
371  Opc = HasVLX ? X86::VMOVAPSZ128rm :
372  HasAVX ? X86::VMOVAPSrm : X86::MOVAPSrm;
373  else
374  Opc = HasVLX ? X86::VMOVUPSZ128rm :
375  HasAVX ? X86::VMOVUPSrm : X86::MOVUPSrm;
376  break;
377  case MVT::v2f64:
378  if (IsNonTemporal && Alignment >= 16 && HasSSE41)
379  Opc = HasVLX ? X86::VMOVNTDQAZ128rm :
380  HasAVX ? X86::VMOVNTDQArm : X86::MOVNTDQArm;
381  else if (Alignment >= 16)
382  Opc = HasVLX ? X86::VMOVAPDZ128rm :
383  HasAVX ? X86::VMOVAPDrm : X86::MOVAPDrm;
384  else
385  Opc = HasVLX ? X86::VMOVUPDZ128rm :
386  HasAVX ? X86::VMOVUPDrm : X86::MOVUPDrm;
387  break;
388  case MVT::v4i32:
389  case MVT::v2i64:
390  case MVT::v8i16:
391  case MVT::v16i8:
392  if (IsNonTemporal && Alignment >= 16 && HasSSE41)
393  Opc = HasVLX ? X86::VMOVNTDQAZ128rm :
394  HasAVX ? X86::VMOVNTDQArm : X86::MOVNTDQArm;
395  else if (Alignment >= 16)
396  Opc = HasVLX ? X86::VMOVDQA64Z128rm :
397  HasAVX ? X86::VMOVDQArm : X86::MOVDQArm;
398  else
399  Opc = HasVLX ? X86::VMOVDQU64Z128rm :
400  HasAVX ? X86::VMOVDQUrm : X86::MOVDQUrm;
401  break;
402  case MVT::v8f32:
403  assert(HasAVX);
404  if (IsNonTemporal && Alignment >= 32 && HasAVX2)
405  Opc = HasVLX ? X86::VMOVNTDQAZ256rm : X86::VMOVNTDQAYrm;
406  else if (IsNonTemporal && Alignment >= 16)
407  return false; // Force split for X86::VMOVNTDQArm
408  else if (Alignment >= 32)
409  Opc = HasVLX ? X86::VMOVAPSZ256rm : X86::VMOVAPSYrm;
410  else
411  Opc = HasVLX ? X86::VMOVUPSZ256rm : X86::VMOVUPSYrm;
412  break;
413  case MVT::v4f64:
414  assert(HasAVX);
415  if (IsNonTemporal && Alignment >= 32 && HasAVX2)
416  Opc = HasVLX ? X86::VMOVNTDQAZ256rm : X86::VMOVNTDQAYrm;
417  else if (IsNonTemporal && Alignment >= 16)
418  return false; // Force split for X86::VMOVNTDQArm
419  else if (Alignment >= 32)
420  Opc = HasVLX ? X86::VMOVAPDZ256rm : X86::VMOVAPDYrm;
421  else
422  Opc = HasVLX ? X86::VMOVUPDZ256rm : X86::VMOVUPDYrm;
423  break;
424  case MVT::v8i32:
425  case MVT::v4i64:
426  case MVT::v16i16:
427  case MVT::v32i8:
428  assert(HasAVX);
429  if (IsNonTemporal && Alignment >= 32 && HasAVX2)
430  Opc = HasVLX ? X86::VMOVNTDQAZ256rm : X86::VMOVNTDQAYrm;
431  else if (IsNonTemporal && Alignment >= 16)
432  return false; // Force split for X86::VMOVNTDQArm
433  else if (Alignment >= 32)
434  Opc = HasVLX ? X86::VMOVDQA64Z256rm : X86::VMOVDQAYrm;
435  else
436  Opc = HasVLX ? X86::VMOVDQU64Z256rm : X86::VMOVDQUYrm;
437  break;
438  case MVT::v16f32:
439  assert(HasAVX512);
440  if (IsNonTemporal && Alignment >= 64)
441  Opc = X86::VMOVNTDQAZrm;
442  else
443  Opc = (Alignment >= 64) ? X86::VMOVAPSZrm : X86::VMOVUPSZrm;
444  break;
445  case MVT::v8f64:
446  assert(HasAVX512);
447  if (IsNonTemporal && Alignment >= 64)
448  Opc = X86::VMOVNTDQAZrm;
449  else
450  Opc = (Alignment >= 64) ? X86::VMOVAPDZrm : X86::VMOVUPDZrm;
451  break;
452  case MVT::v8i64:
453  case MVT::v16i32:
454  case MVT::v32i16:
455  case MVT::v64i8:
456  assert(HasAVX512);
457  // Note: There are a lot more choices based on type with AVX-512, but
458  // there's really no advantage when the load isn't masked.
459  if (IsNonTemporal && Alignment >= 64)
460  Opc = X86::VMOVNTDQAZrm;
461  else
462  Opc = (Alignment >= 64) ? X86::VMOVDQA64Zrm : X86::VMOVDQU64Zrm;
463  break;
464  }
465 
466  const TargetRegisterClass *RC = TLI.getRegClassFor(VT);
467 
468  ResultReg = createResultReg(RC);
469  MachineInstrBuilder MIB =
470  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg);
471  addFullAddress(MIB, AM);
472  if (MMO)
473  MIB->addMemOperand(*FuncInfo.MF, MMO);
474  return true;
475 }
476 
477 /// X86FastEmitStore - Emit a machine instruction to store a value Val of
478 /// type VT. The address is either pre-computed, consisted of a base ptr, Ptr
479 /// and a displacement offset, or a GlobalAddress,
480 /// i.e. V. Return true if it is possible.
481 bool X86FastISel::X86FastEmitStore(EVT VT, unsigned ValReg, X86AddressMode &AM,
482  MachineMemOperand *MMO, bool Aligned) {
483  bool HasSSE1 = Subtarget->hasSSE1();
484  bool HasSSE2 = Subtarget->hasSSE2();
485  bool HasSSE4A = Subtarget->hasSSE4A();
486  bool HasAVX = Subtarget->hasAVX();
487  bool HasAVX512 = Subtarget->hasAVX512();
488  bool HasVLX = Subtarget->hasVLX();
489  bool IsNonTemporal = MMO && MMO->isNonTemporal();
490 
491  // Get opcode and regclass of the output for the given store instruction.
492  unsigned Opc = 0;
493  switch (VT.getSimpleVT().SimpleTy) {
494  case MVT::f80: // No f80 support yet.
495  default: return false;
496  case MVT::i1: {
497  // Mask out all but lowest bit.
498  Register AndResult = createResultReg(&X86::GR8RegClass);
499  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
500  TII.get(X86::AND8ri), AndResult)
501  .addReg(ValReg).addImm(1);
502  ValReg = AndResult;
503  [[fallthrough]]; // handle i1 as i8.
504  }
505  case MVT::i8: Opc = X86::MOV8mr; break;
506  case MVT::i16: Opc = X86::MOV16mr; break;
507  case MVT::i32:
508  Opc = (IsNonTemporal && HasSSE2) ? X86::MOVNTImr : X86::MOV32mr;
509  break;
510  case MVT::i64:
511  // Must be in x86-64 mode.
512  Opc = (IsNonTemporal && HasSSE2) ? X86::MOVNTI_64mr : X86::MOV64mr;
513  break;
514  case MVT::f32:
515  if (HasSSE1) {
516  if (IsNonTemporal && HasSSE4A)
517  Opc = X86::MOVNTSS;
518  else
519  Opc = HasAVX512 ? X86::VMOVSSZmr :
520  HasAVX ? X86::VMOVSSmr : X86::MOVSSmr;
521  } else
522  Opc = X86::ST_Fp32m;
523  break;
524  case MVT::f64:
525  if (HasSSE2) {
526  if (IsNonTemporal && HasSSE4A)
527  Opc = X86::MOVNTSD;
528  else
529  Opc = HasAVX512 ? X86::VMOVSDZmr :
530  HasAVX ? X86::VMOVSDmr : X86::MOVSDmr;
531  } else
532  Opc = X86::ST_Fp64m;
533  break;
534  case MVT::x86mmx:
535  Opc = (IsNonTemporal && HasSSE1) ? X86::MMX_MOVNTQmr : X86::MMX_MOVQ64mr;
536  break;
537  case MVT::v4f32:
538  if (Aligned) {
539  if (IsNonTemporal)
540  Opc = HasVLX ? X86::VMOVNTPSZ128mr :
541  HasAVX ? X86::VMOVNTPSmr : X86::MOVNTPSmr;
542  else
543  Opc = HasVLX ? X86::VMOVAPSZ128mr :
544  HasAVX ? X86::VMOVAPSmr : X86::MOVAPSmr;
545  } else
546  Opc = HasVLX ? X86::VMOVUPSZ128mr :
547  HasAVX ? X86::VMOVUPSmr : X86::MOVUPSmr;
548  break;
549  case MVT::v2f64:
550  if (Aligned) {
551  if (IsNonTemporal)
552  Opc = HasVLX ? X86::VMOVNTPDZ128mr :
553  HasAVX ? X86::VMOVNTPDmr : X86::MOVNTPDmr;
554  else
555  Opc = HasVLX ? X86::VMOVAPDZ128mr :
556  HasAVX ? X86::VMOVAPDmr : X86::MOVAPDmr;
557  } else
558  Opc = HasVLX ? X86::VMOVUPDZ128mr :
559  HasAVX ? X86::VMOVUPDmr : X86::MOVUPDmr;
560  break;
561  case MVT::v4i32:
562  case MVT::v2i64:
563  case MVT::v8i16:
564  case MVT::v16i8:
565  if (Aligned) {
566  if (IsNonTemporal)
567  Opc = HasVLX ? X86::VMOVNTDQZ128mr :
568  HasAVX ? X86::VMOVNTDQmr : X86::MOVNTDQmr;
569  else
570  Opc = HasVLX ? X86::VMOVDQA64Z128mr :
571  HasAVX ? X86::VMOVDQAmr : X86::MOVDQAmr;
572  } else
573  Opc = HasVLX ? X86::VMOVDQU64Z128mr :
574  HasAVX ? X86::VMOVDQUmr : X86::MOVDQUmr;
575  break;
576  case MVT::v8f32:
577  assert(HasAVX);
578  if (Aligned) {
579  if (IsNonTemporal)
580  Opc = HasVLX ? X86::VMOVNTPSZ256mr : X86::VMOVNTPSYmr;
581  else
582  Opc = HasVLX ? X86::VMOVAPSZ256mr : X86::VMOVAPSYmr;
583  } else
584  Opc = HasVLX ? X86::VMOVUPSZ256mr : X86::VMOVUPSYmr;
585  break;
586  case MVT::v4f64:
587  assert(HasAVX);
588  if (Aligned) {
589  if (IsNonTemporal)
590  Opc = HasVLX ? X86::VMOVNTPDZ256mr : X86::VMOVNTPDYmr;
591  else
592  Opc = HasVLX ? X86::VMOVAPDZ256mr : X86::VMOVAPDYmr;
593  } else
594  Opc = HasVLX ? X86::VMOVUPDZ256mr : X86::VMOVUPDYmr;
595  break;
596  case MVT::v8i32:
597  case MVT::v4i64:
598  case MVT::v16i16:
599  case MVT::v32i8:
600  assert(HasAVX);
601  if (Aligned) {
602  if (IsNonTemporal)
603  Opc = HasVLX ? X86::VMOVNTDQZ256mr : X86::VMOVNTDQYmr;
604  else
605  Opc = HasVLX ? X86::VMOVDQA64Z256mr : X86::VMOVDQAYmr;
606  } else
607  Opc = HasVLX ? X86::VMOVDQU64Z256mr : X86::VMOVDQUYmr;
608  break;
609  case MVT::v16f32:
610  assert(HasAVX512);
611  if (Aligned)
612  Opc = IsNonTemporal ? X86::VMOVNTPSZmr : X86::VMOVAPSZmr;
613  else
614  Opc = X86::VMOVUPSZmr;
615  break;
616  case MVT::v8f64:
617  assert(HasAVX512);
618  if (Aligned) {
619  Opc = IsNonTemporal ? X86::VMOVNTPDZmr : X86::VMOVAPDZmr;
620  } else
621  Opc = X86::VMOVUPDZmr;
622  break;
623  case MVT::v8i64:
624  case MVT::v16i32:
625  case MVT::v32i16:
626  case MVT::v64i8:
627  assert(HasAVX512);
628  // Note: There are a lot more choices based on type with AVX-512, but
629  // there's really no advantage when the store isn't masked.
630  if (Aligned)
631  Opc = IsNonTemporal ? X86::VMOVNTDQZmr : X86::VMOVDQA64Zmr;
632  else
633  Opc = X86::VMOVDQU64Zmr;
634  break;
635  }
636 
637  const MCInstrDesc &Desc = TII.get(Opc);
638  // Some of the instructions in the previous switch use FR128 instead
639  // of FR32 for ValReg. Make sure the register we feed the instruction
640  // matches its register class constraints.
641  // Note: This is fine to do a copy from FR32 to FR128, this is the
642  // same registers behind the scene and actually why it did not trigger
643  // any bugs before.
644  ValReg = constrainOperandRegClass(Desc, ValReg, Desc.getNumOperands() - 1);
645  MachineInstrBuilder MIB =
646  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, Desc);
647  addFullAddress(MIB, AM).addReg(ValReg);
648  if (MMO)
649  MIB->addMemOperand(*FuncInfo.MF, MMO);
650 
651  return true;
652 }
653 
654 bool X86FastISel::X86FastEmitStore(EVT VT, const Value *Val,
655  X86AddressMode &AM,
656  MachineMemOperand *MMO, bool Aligned) {
657  // Handle 'null' like i32/i64 0.
658  if (isa<ConstantPointerNull>(Val))
659  Val = Constant::getNullValue(DL.getIntPtrType(Val->getContext()));
660 
661  // If this is a store of a simple constant, fold the constant into the store.
662  if (const ConstantInt *CI = dyn_cast<ConstantInt>(Val)) {
663  unsigned Opc = 0;
664  bool Signed = true;
665  switch (VT.getSimpleVT().SimpleTy) {
666  default: break;
667  case MVT::i1:
668  Signed = false;
669  [[fallthrough]]; // Handle as i8.
670  case MVT::i8: Opc = X86::MOV8mi; break;
671  case MVT::i16: Opc = X86::MOV16mi; break;
672  case MVT::i32: Opc = X86::MOV32mi; break;
673  case MVT::i64:
674  // Must be a 32-bit sign extended value.
675  if (isInt<32>(CI->getSExtValue()))
676  Opc = X86::MOV64mi32;
677  break;
678  }
679 
680  if (Opc) {
681  MachineInstrBuilder MIB =
682  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc));
683  addFullAddress(MIB, AM).addImm(Signed ? (uint64_t) CI->getSExtValue()
684  : CI->getZExtValue());
685  if (MMO)
686  MIB->addMemOperand(*FuncInfo.MF, MMO);
687  return true;
688  }
689  }
690 
691  Register ValReg = getRegForValue(Val);
692  if (ValReg == 0)
693  return false;
694 
695  return X86FastEmitStore(VT, ValReg, AM, MMO, Aligned);
696 }
697 
698 /// X86FastEmitExtend - Emit a machine instruction to extend a value Src of
699 /// type SrcVT to type DstVT using the specified extension opcode Opc (e.g.
700 /// ISD::SIGN_EXTEND).
701 bool X86FastISel::X86FastEmitExtend(ISD::NodeType Opc, EVT DstVT,
702  unsigned Src, EVT SrcVT,
703  unsigned &ResultReg) {
704  unsigned RR = fastEmit_r(SrcVT.getSimpleVT(), DstVT.getSimpleVT(), Opc, Src);
705  if (RR == 0)
706  return false;
707 
708  ResultReg = RR;
709  return true;
710 }
711 
712 bool X86FastISel::handleConstantAddresses(const Value *V, X86AddressMode &AM) {
713  // Handle constant address.
714  if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
715  // Can't handle alternate code models yet.
716  if (TM.getCodeModel() != CodeModel::Small)
717  return false;
718 
719  // Can't handle TLS yet.
720  if (GV->isThreadLocal())
721  return false;
722 
723  // Can't handle !absolute_symbol references yet.
724  if (GV->isAbsoluteSymbolRef())
725  return false;
726 
727  // RIP-relative addresses can't have additional register operands, so if
728  // we've already folded stuff into the addressing mode, just force the
729  // global value into its own register, which we can use as the basereg.
730  if (!Subtarget->isPICStyleRIPRel() ||
731  (AM.Base.Reg == 0 && AM.IndexReg == 0)) {
732  // Okay, we've committed to selecting this global. Set up the address.
733  AM.GV = GV;
734 
735  // Allow the subtarget to classify the global.
736  unsigned char GVFlags = Subtarget->classifyGlobalReference(GV);
737 
738  // If this reference is relative to the pic base, set it now.
739  if (isGlobalRelativeToPICBase(GVFlags)) {
740  // FIXME: How do we know Base.Reg is free??
741  AM.Base.Reg = getInstrInfo()->getGlobalBaseReg(FuncInfo.MF);
742  }
743 
744  // Unless the ABI requires an extra load, return a direct reference to
745  // the global.
746  if (!isGlobalStubReference(GVFlags)) {
747  if (Subtarget->isPICStyleRIPRel()) {
748  // Use rip-relative addressing if we can. Above we verified that the
749  // base and index registers are unused.
750  assert(AM.Base.Reg == 0 && AM.IndexReg == 0);
751  AM.Base.Reg = X86::RIP;
752  }
753  AM.GVOpFlags = GVFlags;
754  return true;
755  }
756 
757  // Ok, we need to do a load from a stub. If we've already loaded from
758  // this stub, reuse the loaded pointer, otherwise emit the load now.
759  DenseMap<const Value *, Register>::iterator I = LocalValueMap.find(V);
760  Register LoadReg;
761  if (I != LocalValueMap.end() && I->second) {
762  LoadReg = I->second;
763  } else {
764  // Issue load from stub.
765  unsigned Opc = 0;
766  const TargetRegisterClass *RC = nullptr;
767  X86AddressMode StubAM;
768  StubAM.Base.Reg = AM.Base.Reg;
769  StubAM.GV = GV;
770  StubAM.GVOpFlags = GVFlags;
771 
772  // Prepare for inserting code in the local-value area.
773  SavePoint SaveInsertPt = enterLocalValueArea();
774 
775  if (TLI.getPointerTy(DL) == MVT::i64) {
776  Opc = X86::MOV64rm;
777  RC = &X86::GR64RegClass;
778  } else {
779  Opc = X86::MOV32rm;
780  RC = &X86::GR32RegClass;
781  }
782 
783  if (Subtarget->isPICStyleRIPRel() || GVFlags == X86II::MO_GOTPCREL ||
784  GVFlags == X86II::MO_GOTPCREL_NORELAX)
785  StubAM.Base.Reg = X86::RIP;
786 
787  LoadReg = createResultReg(RC);
788  MachineInstrBuilder LoadMI =
789  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), LoadReg);
790  addFullAddress(LoadMI, StubAM);
791 
792  // Ok, back to normal mode.
793  leaveLocalValueArea(SaveInsertPt);
794 
795  // Prevent loading GV stub multiple times in same MBB.
796  LocalValueMap[V] = LoadReg;
797  }
798 
799  // Now construct the final address. Note that the Disp, Scale,
800  // and Index values may already be set here.
801  AM.Base.Reg = LoadReg;
802  AM.GV = nullptr;
803  return true;
804  }
805  }
806 
807  // If all else fails, try to materialize the value in a register.
808  if (!AM.GV || !Subtarget->isPICStyleRIPRel()) {
809  if (AM.Base.Reg == 0) {
810  AM.Base.Reg = getRegForValue(V);
811  return AM.Base.Reg != 0;
812  }
813  if (AM.IndexReg == 0) {
814  assert(AM.Scale == 1 && "Scale with no index!");
815  AM.IndexReg = getRegForValue(V);
816  return AM.IndexReg != 0;
817  }
818  }
819 
820  return false;
821 }
822 
823 /// X86SelectAddress - Attempt to fill in an address from the given value.
824 ///
827 redo_gep:
828  const User *U = nullptr;
829  unsigned Opcode = Instruction::UserOp1;
830  if (const Instruction *I = dyn_cast<Instruction>(V)) {
831  // Don't walk into other basic blocks; it's possible we haven't
832  // visited them yet, so the instructions may not yet be assigned
833  // virtual registers.
834  if (FuncInfo.StaticAllocaMap.count(static_cast<const AllocaInst *>(V)) ||
835  FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
836  Opcode = I->getOpcode();
837  U = I;
838  }
839  } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(V)) {
840  Opcode = C->getOpcode();
841  U = C;
842  }
843 
844  if (PointerType *Ty = dyn_cast<PointerType>(V->getType()))
845  if (Ty->getAddressSpace() > 255)
846  // Fast instruction selection doesn't support the special
847  // address spaces.
848  return false;
849 
850  switch (Opcode) {
851  default: break;
852  case Instruction::BitCast:
853  // Look past bitcasts.
854  return X86SelectAddress(U->getOperand(0), AM);
855 
856  case Instruction::IntToPtr:
857  // Look past no-op inttoptrs.
858  if (TLI.getValueType(DL, U->getOperand(0)->getType()) ==
859  TLI.getPointerTy(DL))
860  return X86SelectAddress(U->getOperand(0), AM);
861  break;
862 
863  case Instruction::PtrToInt:
864  // Look past no-op ptrtoints.
865  if (TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL))
866  return X86SelectAddress(U->getOperand(0), AM);
867  break;
868 
869  case Instruction::Alloca: {
870  // Do static allocas.
871  const AllocaInst *A = cast<AllocaInst>(V);
873  FuncInfo.StaticAllocaMap.find(A);
874  if (SI != FuncInfo.StaticAllocaMap.end()) {
876  AM.Base.FrameIndex = SI->second;
877  return true;
878  }
879  break;
880  }
881 
882  case Instruction::Add: {
883  // Adds of constants are common and easy enough.
884  if (const ConstantInt *CI = dyn_cast<ConstantInt>(U->getOperand(1))) {
885  uint64_t Disp = (int32_t)AM.Disp + (uint64_t)CI->getSExtValue();
886  // They have to fit in the 32-bit signed displacement field though.
887  if (isInt<32>(Disp)) {
888  AM.Disp = (uint32_t)Disp;
889  return X86SelectAddress(U->getOperand(0), AM);
890  }
891  }
892  break;
893  }
894 
895  case Instruction::GetElementPtr: {
896  X86AddressMode SavedAM = AM;
897 
898  // Pattern-match simple GEPs.
899  uint64_t Disp = (int32_t)AM.Disp;
900  unsigned IndexReg = AM.IndexReg;
901  unsigned Scale = AM.Scale;
903  // Iterate through the indices, folding what we can. Constants can be
904  // folded, and one dynamic index can be handled, if the scale is supported.
905  for (User::const_op_iterator i = U->op_begin() + 1, e = U->op_end();
906  i != e; ++i, ++GTI) {
907  const Value *Op = *i;
908  if (StructType *STy = GTI.getStructTypeOrNull()) {
909  const StructLayout *SL = DL.getStructLayout(STy);
910  Disp += SL->getElementOffset(cast<ConstantInt>(Op)->getZExtValue());
911  continue;
912  }
913 
914  // A array/variable index is always of the form i*S where S is the
915  // constant scale size. See if we can push the scale into immediates.
916  uint64_t S = DL.getTypeAllocSize(GTI.getIndexedType());
917  for (;;) {
918  if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
919  // Constant-offset addressing.
920  Disp += CI->getSExtValue() * S;
921  break;
922  }
923  if (canFoldAddIntoGEP(U, Op)) {
924  // A compatible add with a constant operand. Fold the constant.
925  ConstantInt *CI =
926  cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1));
927  Disp += CI->getSExtValue() * S;
928  // Iterate on the other operand.
929  Op = cast<AddOperator>(Op)->getOperand(0);
930  continue;
931  }
932  if (IndexReg == 0 &&
933  (!AM.GV || !Subtarget->isPICStyleRIPRel()) &&
934  (S == 1 || S == 2 || S == 4 || S == 8)) {
935  // Scaled-index addressing.
936  Scale = S;
937  IndexReg = getRegForGEPIndex(Op);
938  if (IndexReg == 0)
939  return false;
940  break;
941  }
942  // Unsupported.
943  goto unsupported_gep;
944  }
945  }
946 
947  // Check for displacement overflow.
948  if (!isInt<32>(Disp))
949  break;
950 
951  AM.IndexReg = IndexReg;
952  AM.Scale = Scale;
953  AM.Disp = (uint32_t)Disp;
954  GEPs.push_back(V);
955 
956  if (const GetElementPtrInst *GEP =
957  dyn_cast<GetElementPtrInst>(U->getOperand(0))) {
958  // Ok, the GEP indices were covered by constant-offset and scaled-index
959  // addressing. Update the address state and move on to examining the base.
960  V = GEP;
961  goto redo_gep;
962  } else if (X86SelectAddress(U->getOperand(0), AM)) {
963  return true;
964  }
965 
966  // If we couldn't merge the gep value into this addr mode, revert back to
967  // our address and just match the value instead of completely failing.
968  AM = SavedAM;
969 
970  for (const Value *I : reverse(GEPs))
971  if (handleConstantAddresses(I, AM))
972  return true;
973 
974  return false;
975  unsupported_gep:
976  // Ok, the GEP indices weren't all covered.
977  break;
978  }
979  }
980 
981  return handleConstantAddresses(V, AM);
982 }
983 
984 /// X86SelectCallAddress - Attempt to fill in an address from the given value.
985 ///
986 bool X86FastISel::X86SelectCallAddress(const Value *V, X86AddressMode &AM) {
987  const User *U = nullptr;
988  unsigned Opcode = Instruction::UserOp1;
989  const Instruction *I = dyn_cast<Instruction>(V);
990  // Record if the value is defined in the same basic block.
991  //
992  // This information is crucial to know whether or not folding an
993  // operand is valid.
994  // Indeed, FastISel generates or reuses a virtual register for all
995  // operands of all instructions it selects. Obviously, the definition and
996  // its uses must use the same virtual register otherwise the produced
997  // code is incorrect.
998  // Before instruction selection, FunctionLoweringInfo::set sets the virtual
999  // registers for values that are alive across basic blocks. This ensures
1000  // that the values are consistently set between across basic block, even
1001  // if different instruction selection mechanisms are used (e.g., a mix of
1002  // SDISel and FastISel).
1003  // For values local to a basic block, the instruction selection process
1004  // generates these virtual registers with whatever method is appropriate
1005  // for its needs. In particular, FastISel and SDISel do not share the way
1006  // local virtual registers are set.
1007  // Therefore, this is impossible (or at least unsafe) to share values
1008  // between basic blocks unless they use the same instruction selection
1009  // method, which is not guarantee for X86.
1010  // Moreover, things like hasOneUse could not be used accurately, if we
1011  // allow to reference values across basic blocks whereas they are not
1012  // alive across basic blocks initially.
1013  bool InMBB = true;
1014  if (I) {
1015  Opcode = I->getOpcode();
1016  U = I;
1017  InMBB = I->getParent() == FuncInfo.MBB->getBasicBlock();
1018  } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(V)) {
1019  Opcode = C->getOpcode();
1020  U = C;
1021  }
1022 
1023  switch (Opcode) {
1024  default: break;
1025  case Instruction::BitCast:
1026  // Look past bitcasts if its operand is in the same BB.
1027  if (InMBB)
1028  return X86SelectCallAddress(U->getOperand(0), AM);
1029  break;
1030 
1031  case Instruction::IntToPtr:
1032  // Look past no-op inttoptrs if its operand is in the same BB.
1033  if (InMBB &&
1034  TLI.getValueType(DL, U->getOperand(0)->getType()) ==
1035  TLI.getPointerTy(DL))
1036  return X86SelectCallAddress(U->getOperand(0), AM);
1037  break;
1038 
1039  case Instruction::PtrToInt:
1040  // Look past no-op ptrtoints if its operand is in the same BB.
1041  if (InMBB && TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL))
1042  return X86SelectCallAddress(U->getOperand(0), AM);
1043  break;
1044  }
1045 
1046  // Handle constant address.
1047  if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
1048  // Can't handle alternate code models yet.
1049  if (TM.getCodeModel() != CodeModel::Small)
1050  return false;
1051 
1052  // RIP-relative addresses can't have additional register operands.
1053  if (Subtarget->isPICStyleRIPRel() &&
1054  (AM.Base.Reg != 0 || AM.IndexReg != 0))
1055  return false;
1056 
1057  // Can't handle TLS.
1058  if (const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV))
1059  if (GVar->isThreadLocal())
1060  return false;
1061 
1062  // Okay, we've committed to selecting this global. Set up the basic address.
1063  AM.GV = GV;
1064 
1065  // Return a direct reference to the global. Fastisel can handle calls to
1066  // functions that require loads, such as dllimport and nonlazybind
1067  // functions.
1068  if (Subtarget->isPICStyleRIPRel()) {
1069  // Use rip-relative addressing if we can. Above we verified that the
1070  // base and index registers are unused.
1071  assert(AM.Base.Reg == 0 && AM.IndexReg == 0);
1072  AM.Base.Reg = X86::RIP;
1073  } else {
1074  AM.GVOpFlags = Subtarget->classifyLocalReference(nullptr);
1075  }
1076 
1077  return true;
1078  }
1079 
1080  // If all else fails, try to materialize the value in a register.
1081  if (!AM.GV || !Subtarget->isPICStyleRIPRel()) {
1082  auto GetCallRegForValue = [this](const Value *V) {
1083  Register Reg = getRegForValue(V);
1084 
1085  // In 64-bit mode, we need a 64-bit register even if pointers are 32 bits.
1086  if (Reg && Subtarget->isTarget64BitILP32()) {
1087  Register CopyReg = createResultReg(&X86::GR32RegClass);
1088  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::MOV32rr),
1089  CopyReg)
1090  .addReg(Reg);
1091 
1092  Register ExtReg = createResultReg(&X86::GR64RegClass);
1093  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1094  TII.get(TargetOpcode::SUBREG_TO_REG), ExtReg)
1095  .addImm(0)
1096  .addReg(CopyReg)
1097  .addImm(X86::sub_32bit);
1098  Reg = ExtReg;
1099  }
1100 
1101  return Reg;
1102  };
1103 
1104  if (AM.Base.Reg == 0) {
1105  AM.Base.Reg = GetCallRegForValue(V);
1106  return AM.Base.Reg != 0;
1107  }
1108  if (AM.IndexReg == 0) {
1109  assert(AM.Scale == 1 && "Scale with no index!");
1110  AM.IndexReg = GetCallRegForValue(V);
1111  return AM.IndexReg != 0;
1112  }
1113  }
1114 
1115  return false;
1116 }
1117 
1118 
1119 /// X86SelectStore - Select and emit code to implement store instructions.
1120 bool X86FastISel::X86SelectStore(const Instruction *I) {
1121  // Atomic stores need special handling.
1122  const StoreInst *S = cast<StoreInst>(I);
1123 
1124  if (S->isAtomic())
1125  return false;
1126 
1127  const Value *PtrV = I->getOperand(1);
1128  if (TLI.supportSwiftError()) {
1129  // Swifterror values can come from either a function parameter with
1130  // swifterror attribute or an alloca with swifterror attribute.
1131  if (const Argument *Arg = dyn_cast<Argument>(PtrV)) {
1132  if (Arg->hasSwiftErrorAttr())
1133  return false;
1134  }
1135 
1136  if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(PtrV)) {
1137  if (Alloca->isSwiftError())
1138  return false;
1139  }
1140  }
1141 
1142  const Value *Val = S->getValueOperand();
1143  const Value *Ptr = S->getPointerOperand();
1144 
1145  MVT VT;
1146  if (!isTypeLegal(Val->getType(), VT, /*AllowI1=*/true))
1147  return false;
1148 
1149  Align Alignment = S->getAlign();
1150  Align ABIAlignment = DL.getABITypeAlign(Val->getType());
1151  bool Aligned = Alignment >= ABIAlignment;
1152 
1153  X86AddressMode AM;
1154  if (!X86SelectAddress(Ptr, AM))
1155  return false;
1156 
1157  return X86FastEmitStore(VT, Val, AM, createMachineMemOperandFor(I), Aligned);
1158 }
1159 
1160 /// X86SelectRet - Select and emit code to implement ret instructions.
1161 bool X86FastISel::X86SelectRet(const Instruction *I) {
1162  const ReturnInst *Ret = cast<ReturnInst>(I);
1163  const Function &F = *I->getParent()->getParent();
1164  const X86MachineFunctionInfo *X86MFInfo =
1165  FuncInfo.MF->getInfo<X86MachineFunctionInfo>();
1166 
1167  if (!FuncInfo.CanLowerReturn)
1168  return false;
1169 
1170  if (TLI.supportSwiftError() &&
1171  F.getAttributes().hasAttrSomewhere(Attribute::SwiftError))
1172  return false;
1173 
1174  if (TLI.supportSplitCSR(FuncInfo.MF))
1175  return false;
1176 
1177  CallingConv::ID CC = F.getCallingConv();
1178  if (CC != CallingConv::C &&
1179  CC != CallingConv::Fast &&
1180  CC != CallingConv::Tail &&
1181  CC != CallingConv::SwiftTail &&
1182  CC != CallingConv::X86_FastCall &&
1183  CC != CallingConv::X86_StdCall &&
1184  CC != CallingConv::X86_ThisCall &&
1185  CC != CallingConv::X86_64_SysV &&
1186  CC != CallingConv::Win64)
1187  return false;
1188 
1189  // Don't handle popping bytes if they don't fit the ret's immediate.
1190  if (!isUInt<16>(X86MFInfo->getBytesToPopOnReturn()))
1191  return false;
1192 
1193  // fastcc with -tailcallopt is intended to provide a guaranteed
1194  // tail call optimization. Fastisel doesn't know how to do that.
1195  if ((CC == CallingConv::Fast && TM.Options.GuaranteedTailCallOpt) ||
1197  return false;
1198 
1199  // Let SDISel handle vararg functions.
1200  if (F.isVarArg())
1201  return false;
1202 
1203  // Build a list of return value registers.
1204  SmallVector<unsigned, 4> RetRegs;
1205 
1206  if (Ret->getNumOperands() > 0) {
1208  GetReturnInfo(CC, F.getReturnType(), F.getAttributes(), Outs, TLI, DL);
1209 
1210  // Analyze operands of the call, assigning locations to each operand.
1212  CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, ValLocs, I->getContext());
1213  CCInfo.AnalyzeReturn(Outs, RetCC_X86);
1214 
1215  const Value *RV = Ret->getOperand(0);
1216  Register Reg = getRegForValue(RV);
1217  if (Reg == 0)
1218  return false;
1219 
1220  // Only handle a single return value for now.
1221  if (ValLocs.size() != 1)
1222  return false;
1223 
1224  CCValAssign &VA = ValLocs[0];
1225 
1226  // Don't bother handling odd stuff for now.
1227  if (VA.getLocInfo() != CCValAssign::Full)
1228  return false;
1229  // Only handle register returns for now.
1230  if (!VA.isRegLoc())
1231  return false;
1232 
1233  // The calling-convention tables for x87 returns don't tell
1234  // the whole story.
1235  if (VA.getLocReg() == X86::FP0 || VA.getLocReg() == X86::FP1)
1236  return false;
1237 
1238  unsigned SrcReg = Reg + VA.getValNo();
1239  EVT SrcVT = TLI.getValueType(DL, RV->getType());
1240  EVT DstVT = VA.getValVT();
1241  // Special handling for extended integers.
1242  if (SrcVT != DstVT) {
1243  if (SrcVT != MVT::i1 && SrcVT != MVT::i8 && SrcVT != MVT::i16)
1244  return false;
1245 
1246  if (!Outs[0].Flags.isZExt() && !Outs[0].Flags.isSExt())
1247  return false;
1248 
1249  assert(DstVT == MVT::i32 && "X86 should always ext to i32");
1250 
1251  if (SrcVT == MVT::i1) {
1252  if (Outs[0].Flags.isSExt())
1253  return false;
1254  // TODO
1255  SrcReg = fastEmitZExtFromI1(MVT::i8, SrcReg);
1256  SrcVT = MVT::i8;
1257  }
1258  unsigned Op = Outs[0].Flags.isZExt() ? ISD::ZERO_EXTEND :
1260  // TODO
1261  SrcReg = fastEmit_r(SrcVT.getSimpleVT(), DstVT.getSimpleVT(), Op, SrcReg);
1262  }
1263 
1264  // Make the copy.
1265  Register DstReg = VA.getLocReg();
1266  const TargetRegisterClass *SrcRC = MRI.getRegClass(SrcReg);
1267  // Avoid a cross-class copy. This is very unlikely.
1268  if (!SrcRC->contains(DstReg))
1269  return false;
1270  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1271  TII.get(TargetOpcode::COPY), DstReg).addReg(SrcReg);
1272 
1273  // Add register to return instruction.
1274  RetRegs.push_back(VA.getLocReg());
1275  }
1276 
1277  // Swift calling convention does not require we copy the sret argument
1278  // into %rax/%eax for the return, and SRetReturnReg is not set for Swift.
1279 
1280  // All x86 ABIs require that for returning structs by value we copy
1281  // the sret argument into %rax/%eax (depending on ABI) for the return.
1282  // We saved the argument into a virtual register in the entry block,
1283  // so now we copy the value out and into %rax/%eax.
1284  if (F.hasStructRetAttr() && CC != CallingConv::Swift &&
1285  CC != CallingConv::SwiftTail) {
1286  Register Reg = X86MFInfo->getSRetReturnReg();
1287  assert(Reg &&
1288  "SRetReturnReg should have been set in LowerFormalArguments()!");
1289  unsigned RetReg = Subtarget->isTarget64BitLP64() ? X86::RAX : X86::EAX;
1290  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1291  TII.get(TargetOpcode::COPY), RetReg).addReg(Reg);
1292  RetRegs.push_back(RetReg);
1293  }
1294 
1295  // Now emit the RET.
1296  MachineInstrBuilder MIB;
1297  if (X86MFInfo->getBytesToPopOnReturn()) {
1298  MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1299  TII.get(Subtarget->is64Bit() ? X86::RETI64 : X86::RETI32))
1300  .addImm(X86MFInfo->getBytesToPopOnReturn());
1301  } else {
1302  MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1303  TII.get(Subtarget->is64Bit() ? X86::RET64 : X86::RET32));
1304  }
1305  for (unsigned i = 0, e = RetRegs.size(); i != e; ++i)
1306  MIB.addReg(RetRegs[i], RegState::Implicit);
1307  return true;
1308 }
1309 
1310 /// X86SelectLoad - Select and emit code to implement load instructions.
1311 ///
1312 bool X86FastISel::X86SelectLoad(const Instruction *I) {
1313  const LoadInst *LI = cast<LoadInst>(I);
1314 
1315  // Atomic loads need special handling.
1316  if (LI->isAtomic())
1317  return false;
1318 
1319  const Value *SV = I->getOperand(0);
1320  if (TLI.supportSwiftError()) {
1321  // Swifterror values can come from either a function parameter with
1322  // swifterror attribute or an alloca with swifterror attribute.
1323  if (const Argument *Arg = dyn_cast<Argument>(SV)) {
1324  if (Arg->hasSwiftErrorAttr())
1325  return false;
1326  }
1327 
1328  if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(SV)) {
1329  if (Alloca->isSwiftError())
1330  return false;
1331  }
1332  }
1333 
1334  MVT VT;
1335  if (!isTypeLegal(LI->getType(), VT, /*AllowI1=*/true))
1336  return false;
1337 
1338  const Value *Ptr = LI->getPointerOperand();
1339 
1340  X86AddressMode AM;
1341  if (!X86SelectAddress(Ptr, AM))
1342  return false;
1343 
1344  unsigned ResultReg = 0;
1345  if (!X86FastEmitLoad(VT, AM, createMachineMemOperandFor(LI), ResultReg,
1346  LI->getAlign().value()))
1347  return false;
1348 
1349  updateValueMap(I, ResultReg);
1350  return true;
1351 }
1352 
1353 static unsigned X86ChooseCmpOpcode(EVT VT, const X86Subtarget *Subtarget) {
1354  bool HasAVX512 = Subtarget->hasAVX512();
1355  bool HasAVX = Subtarget->hasAVX();
1356  bool HasSSE1 = Subtarget->hasSSE1();
1357  bool HasSSE2 = Subtarget->hasSSE2();
1358 
1359  switch (VT.getSimpleVT().SimpleTy) {
1360  default: return 0;
1361  case MVT::i8: return X86::CMP8rr;
1362  case MVT::i16: return X86::CMP16rr;
1363  case MVT::i32: return X86::CMP32rr;
1364  case MVT::i64: return X86::CMP64rr;
1365  case MVT::f32:
1366  return HasAVX512 ? X86::VUCOMISSZrr
1367  : HasAVX ? X86::VUCOMISSrr
1368  : HasSSE1 ? X86::UCOMISSrr
1369  : 0;
1370  case MVT::f64:
1371  return HasAVX512 ? X86::VUCOMISDZrr
1372  : HasAVX ? X86::VUCOMISDrr
1373  : HasSSE2 ? X86::UCOMISDrr
1374  : 0;
1375  }
1376 }
1377 
1378 /// If we have a comparison with RHS as the RHS of the comparison, return an
1379 /// opcode that works for the compare (e.g. CMP32ri) otherwise return 0.
1380 static unsigned X86ChooseCmpImmediateOpcode(EVT VT, const ConstantInt *RHSC) {
1381  int64_t Val = RHSC->getSExtValue();
1382  switch (VT.getSimpleVT().SimpleTy) {
1383  // Otherwise, we can't fold the immediate into this comparison.
1384  default:
1385  return 0;
1386  case MVT::i8:
1387  return X86::CMP8ri;
1388  case MVT::i16:
1389  if (isInt<8>(Val))
1390  return X86::CMP16ri8;
1391  return X86::CMP16ri;
1392  case MVT::i32:
1393  if (isInt<8>(Val))
1394  return X86::CMP32ri8;
1395  return X86::CMP32ri;
1396  case MVT::i64:
1397  if (isInt<8>(Val))
1398  return X86::CMP64ri8;
1399  // 64-bit comparisons are only valid if the immediate fits in a 32-bit sext
1400  // field.
1401  if (isInt<32>(Val))
1402  return X86::CMP64ri32;
1403  return 0;
1404  }
1405 }
1406 
1407 bool X86FastISel::X86FastEmitCompare(const Value *Op0, const Value *Op1, EVT VT,
1408  const DebugLoc &CurDbgLoc) {
1409  Register Op0Reg = getRegForValue(Op0);
1410  if (Op0Reg == 0) return false;
1411 
1412  // Handle 'null' like i32/i64 0.
1413  if (isa<ConstantPointerNull>(Op1))
1414  Op1 = Constant::getNullValue(DL.getIntPtrType(Op0->getContext()));
1415 
1416  // We have two options: compare with register or immediate. If the RHS of
1417  // the compare is an immediate that we can fold into this compare, use
1418  // CMPri, otherwise use CMPrr.
1419  if (const ConstantInt *Op1C = dyn_cast<ConstantInt>(Op1)) {
1420  if (unsigned CompareImmOpc = X86ChooseCmpImmediateOpcode(VT, Op1C)) {
1421  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, CurDbgLoc, TII.get(CompareImmOpc))
1422  .addReg(Op0Reg)
1423  .addImm(Op1C->getSExtValue());
1424  return true;
1425  }
1426  }
1427 
1428  unsigned CompareOpc = X86ChooseCmpOpcode(VT, Subtarget);
1429  if (CompareOpc == 0) return false;
1430 
1431  Register Op1Reg = getRegForValue(Op1);
1432  if (Op1Reg == 0) return false;
1433  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, CurDbgLoc, TII.get(CompareOpc))
1434  .addReg(Op0Reg)
1435  .addReg(Op1Reg);
1436 
1437  return true;
1438 }
1439 
1440 bool X86FastISel::X86SelectCmp(const Instruction *I) {
1441  const CmpInst *CI = cast<CmpInst>(I);
1442 
1443  MVT VT;
1444  if (!isTypeLegal(I->getOperand(0)->getType(), VT))
1445  return false;
1446 
1447  // Below code only works for scalars.
1448  if (VT.isVector())
1449  return false;
1450 
1451  // Try to optimize or fold the cmp.
1452  CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
1453  unsigned ResultReg = 0;
1454  switch (Predicate) {
1455  default: break;
1456  case CmpInst::FCMP_FALSE: {
1457  ResultReg = createResultReg(&X86::GR32RegClass);
1458  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::MOV32r0),
1459  ResultReg);
1460  ResultReg = fastEmitInst_extractsubreg(MVT::i8, ResultReg, X86::sub_8bit);
1461  if (!ResultReg)
1462  return false;
1463  break;
1464  }
1465  case CmpInst::FCMP_TRUE: {
1466  ResultReg = createResultReg(&X86::GR8RegClass);
1467  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::MOV8ri),
1468  ResultReg).addImm(1);
1469  break;
1470  }
1471  }
1472 
1473  if (ResultReg) {
1474  updateValueMap(I, ResultReg);
1475  return true;
1476  }
1477 
1478  const Value *LHS = CI->getOperand(0);
1479  const Value *RHS = CI->getOperand(1);
1480 
1481  // The optimizer might have replaced fcmp oeq %x, %x with fcmp ord %x, 0.0.
1482  // We don't have to materialize a zero constant for this case and can just use
1483  // %x again on the RHS.
1485  const auto *RHSC = dyn_cast<ConstantFP>(RHS);
1486  if (RHSC && RHSC->isNullValue())
1487  RHS = LHS;
1488  }
1489 
1490  // FCMP_OEQ and FCMP_UNE cannot be checked with a single instruction.
1491  static const uint16_t SETFOpcTable[2][3] = {
1492  { X86::COND_E, X86::COND_NP, X86::AND8rr },
1493  { X86::COND_NE, X86::COND_P, X86::OR8rr }
1494  };
1495  const uint16_t *SETFOpc = nullptr;
1496  switch (Predicate) {
1497  default: break;
1498  case CmpInst::FCMP_OEQ: SETFOpc = &SETFOpcTable[0][0]; break;
1499  case CmpInst::FCMP_UNE: SETFOpc = &SETFOpcTable[1][0]; break;
1500  }
1501 
1502  ResultReg = createResultReg(&X86::GR8RegClass);
1503  if (SETFOpc) {
1504  if (!X86FastEmitCompare(LHS, RHS, VT, I->getDebugLoc()))
1505  return false;
1506 
1507  Register FlagReg1 = createResultReg(&X86::GR8RegClass);
1508  Register FlagReg2 = createResultReg(&X86::GR8RegClass);
1509  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::SETCCr),
1510  FlagReg1).addImm(SETFOpc[0]);
1511  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::SETCCr),
1512  FlagReg2).addImm(SETFOpc[1]);
1513  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(SETFOpc[2]),
1514  ResultReg).addReg(FlagReg1).addReg(FlagReg2);
1515  updateValueMap(I, ResultReg);
1516  return true;
1517  }
1518 
1519  X86::CondCode CC;
1520  bool SwapArgs;
1521  std::tie(CC, SwapArgs) = X86::getX86ConditionCode(Predicate);
1522  assert(CC <= X86::LAST_VALID_COND && "Unexpected condition code.");
1523 
1524  if (SwapArgs)
1525  std::swap(LHS, RHS);
1526 
1527  // Emit a compare of LHS/RHS.
1528  if (!X86FastEmitCompare(LHS, RHS, VT, I->getDebugLoc()))
1529  return false;
1530 
1531  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::SETCCr),
1532  ResultReg).addImm(CC);
1533  updateValueMap(I, ResultReg);
1534  return true;
1535 }
1536 
1537 bool X86FastISel::X86SelectZExt(const Instruction *I) {
1538  EVT DstVT = TLI.getValueType(DL, I->getType());
1539  if (!TLI.isTypeLegal(DstVT))
1540  return false;
1541 
1542  Register ResultReg = getRegForValue(I->getOperand(0));
1543  if (ResultReg == 0)
1544  return false;
1545 
1546  // Handle zero-extension from i1 to i8, which is common.
1547  MVT SrcVT = TLI.getSimpleValueType(DL, I->getOperand(0)->getType());
1548  if (SrcVT == MVT::i1) {
1549  // Set the high bits to zero.
1550  ResultReg = fastEmitZExtFromI1(MVT::i8, ResultReg);
1551  SrcVT = MVT::i8;
1552 
1553  if (ResultReg == 0)
1554  return false;
1555  }
1556 
1557  if (DstVT == MVT::i64) {
1558  // Handle extension to 64-bits via sub-register shenanigans.
1559  unsigned MovInst;
1560 
1561  switch (SrcVT.SimpleTy) {
1562  case MVT::i8: MovInst = X86::MOVZX32rr8; break;
1563  case MVT::i16: MovInst = X86::MOVZX32rr16; break;
1564  case MVT::i32: MovInst = X86::MOV32rr; break;
1565  default: llvm_unreachable("Unexpected zext to i64 source type");
1566  }
1567 
1568  Register Result32 = createResultReg(&X86::GR32RegClass);
1569  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(MovInst), Result32)
1570  .addReg(ResultReg);
1571 
1572  ResultReg = createResultReg(&X86::GR64RegClass);
1573  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::SUBREG_TO_REG),
1574  ResultReg)
1575  .addImm(0).addReg(Result32).addImm(X86::sub_32bit);
1576  } else if (DstVT == MVT::i16) {
1577  // i8->i16 doesn't exist in the autogenerated isel table. Need to zero
1578  // extend to 32-bits and then extract down to 16-bits.
1579  Register Result32 = createResultReg(&X86::GR32RegClass);
1580  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::MOVZX32rr8),
1581  Result32).addReg(ResultReg);
1582 
1583  ResultReg = fastEmitInst_extractsubreg(MVT::i16, Result32, X86::sub_16bit);
1584  } else if (DstVT != MVT::i8) {
1585  ResultReg = fastEmit_r(MVT::i8, DstVT.getSimpleVT(), ISD::ZERO_EXTEND,
1586  ResultReg);
1587  if (ResultReg == 0)
1588  return false;
1589  }
1590 
1591  updateValueMap(I, ResultReg);
1592  return true;
1593 }
1594 
1595 bool X86FastISel::X86SelectSExt(const Instruction *I) {
1596  EVT DstVT = TLI.getValueType(DL, I->getType());
1597  if (!TLI.isTypeLegal(DstVT))
1598  return false;
1599 
1600  Register ResultReg = getRegForValue(I->getOperand(0));
1601  if (ResultReg == 0)
1602  return false;
1603 
1604  // Handle sign-extension from i1 to i8.
1605  MVT SrcVT = TLI.getSimpleValueType(DL, I->getOperand(0)->getType());
1606  if (SrcVT == MVT::i1) {
1607  // Set the high bits to zero.
1608  Register ZExtReg = fastEmitZExtFromI1(MVT::i8, ResultReg);
1609  if (ZExtReg == 0)
1610  return false;
1611 
1612  // Negate the result to make an 8-bit sign extended value.
1613  ResultReg = createResultReg(&X86::GR8RegClass);
1614  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::NEG8r),
1615  ResultReg).addReg(ZExtReg);
1616 
1617  SrcVT = MVT::i8;
1618  }
1619 
1620  if (DstVT == MVT::i16) {
1621  // i8->i16 doesn't exist in the autogenerated isel table. Need to sign
1622  // extend to 32-bits and then extract down to 16-bits.
1623  Register Result32 = createResultReg(&X86::GR32RegClass);
1624  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::MOVSX32rr8),
1625  Result32).addReg(ResultReg);
1626 
1627  ResultReg = fastEmitInst_extractsubreg(MVT::i16, Result32, X86::sub_16bit);
1628  } else if (DstVT != MVT::i8) {
1629  ResultReg = fastEmit_r(MVT::i8, DstVT.getSimpleVT(), ISD::SIGN_EXTEND,
1630  ResultReg);
1631  if (ResultReg == 0)
1632  return false;
1633  }
1634 
1635  updateValueMap(I, ResultReg);
1636  return true;
1637 }
1638 
1639 bool X86FastISel::X86SelectBranch(const Instruction *I) {
1640  // Unconditional branches are selected by tablegen-generated code.
1641  // Handle a conditional branch.
1642  const BranchInst *BI = cast<BranchInst>(I);
1643  MachineBasicBlock *TrueMBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
1644  MachineBasicBlock *FalseMBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
1645 
1646  // Fold the common case of a conditional branch with a comparison
1647  // in the same block (values defined on other blocks may not have
1648  // initialized registers).
1649  X86::CondCode CC;
1650  if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) {
1651  if (CI->hasOneUse() && CI->getParent() == I->getParent()) {
1652  EVT VT = TLI.getValueType(DL, CI->getOperand(0)->getType());
1653 
1654  // Try to optimize or fold the cmp.
1655  CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
1656  switch (Predicate) {
1657  default: break;
1658  case CmpInst::FCMP_FALSE: fastEmitBranch(FalseMBB, DbgLoc); return true;
1659  case CmpInst::FCMP_TRUE: fastEmitBranch(TrueMBB, DbgLoc); return true;
1660  }
1661 
1662  const Value *CmpLHS = CI->getOperand(0);
1663  const Value *CmpRHS = CI->getOperand(1);
1664 
1665  // The optimizer might have replaced fcmp oeq %x, %x with fcmp ord %x,
1666  // 0.0.
1667  // We don't have to materialize a zero constant for this case and can just
1668  // use %x again on the RHS.
1670  const auto *CmpRHSC = dyn_cast<ConstantFP>(CmpRHS);
1671  if (CmpRHSC && CmpRHSC->isNullValue())
1672  CmpRHS = CmpLHS;
1673  }
1674 
1675  // Try to take advantage of fallthrough opportunities.
1676  if (FuncInfo.MBB->isLayoutSuccessor(TrueMBB)) {
1677  std::swap(TrueMBB, FalseMBB);
1679  }
1680 
1681  // FCMP_OEQ and FCMP_UNE cannot be expressed with a single flag/condition
1682  // code check. Instead two branch instructions are required to check all
1683  // the flags. First we change the predicate to a supported condition code,
1684  // which will be the first branch. Later one we will emit the second
1685  // branch.
1686  bool NeedExtraBranch = false;
1687  switch (Predicate) {
1688  default: break;
1689  case CmpInst::FCMP_OEQ:
1690  std::swap(TrueMBB, FalseMBB);
1691  [[fallthrough]];
1692  case CmpInst::FCMP_UNE:
1693  NeedExtraBranch = true;
1695  break;
1696  }
1697 
1698  bool SwapArgs;
1699  std::tie(CC, SwapArgs) = X86::getX86ConditionCode(Predicate);
1700  assert(CC <= X86::LAST_VALID_COND && "Unexpected condition code.");
1701 
1702  if (SwapArgs)
1703  std::swap(CmpLHS, CmpRHS);
1704 
1705  // Emit a compare of the LHS and RHS, setting the flags.
1706  if (!X86FastEmitCompare(CmpLHS, CmpRHS, VT, CI->getDebugLoc()))
1707  return false;
1708 
1709  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::JCC_1))
1710  .addMBB(TrueMBB).addImm(CC);
1711 
1712  // X86 requires a second branch to handle UNE (and OEQ, which is mapped
1713  // to UNE above).
1714  if (NeedExtraBranch) {
1715  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::JCC_1))
1716  .addMBB(TrueMBB).addImm(X86::COND_P);
1717  }
1718 
1719  finishCondBranch(BI->getParent(), TrueMBB, FalseMBB);
1720  return true;
1721  }
1722  } else if (TruncInst *TI = dyn_cast<TruncInst>(BI->getCondition())) {
1723  // Handle things like "%cond = trunc i32 %X to i1 / br i1 %cond", which
1724  // typically happen for _Bool and C++ bools.
1725  MVT SourceVT;
1726  if (TI->hasOneUse() && TI->getParent() == I->getParent() &&
1727  isTypeLegal(TI->getOperand(0)->getType(), SourceVT)) {
1728  unsigned TestOpc = 0;
1729  switch (SourceVT.SimpleTy) {
1730  default: break;
1731  case MVT::i8: TestOpc = X86::TEST8ri; break;
1732  case MVT::i16: TestOpc = X86::TEST16ri; break;
1733  case MVT::i32: TestOpc = X86::TEST32ri; break;
1734  case MVT::i64: TestOpc = X86::TEST64ri32; break;
1735  }
1736  if (TestOpc) {
1737  Register OpReg = getRegForValue(TI->getOperand(0));
1738  if (OpReg == 0) return false;
1739 
1740  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TestOpc))
1741  .addReg(OpReg).addImm(1);
1742 
1743  unsigned JmpCond = X86::COND_NE;
1744  if (FuncInfo.MBB->isLayoutSuccessor(TrueMBB)) {
1745  std::swap(TrueMBB, FalseMBB);
1746  JmpCond = X86::COND_E;
1747  }
1748 
1749  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::JCC_1))
1750  .addMBB(TrueMBB).addImm(JmpCond);
1751 
1752  finishCondBranch(BI->getParent(), TrueMBB, FalseMBB);
1753  return true;
1754  }
1755  }
1756  } else if (foldX86XALUIntrinsic(CC, BI, BI->getCondition())) {
1757  // Fake request the condition, otherwise the intrinsic might be completely
1758  // optimized away.
1759  Register TmpReg = getRegForValue(BI->getCondition());
1760  if (TmpReg == 0)
1761  return false;
1762 
1763  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::JCC_1))
1764  .addMBB(TrueMBB).addImm(CC);
1765  finishCondBranch(BI->getParent(), TrueMBB, FalseMBB);
1766  return true;
1767  }
1768 
1769  // Otherwise do a clumsy setcc and re-test it.
1770  // Note that i1 essentially gets ANY_EXTEND'ed to i8 where it isn't used
1771  // in an explicit cast, so make sure to handle that correctly.
1772  Register OpReg = getRegForValue(BI->getCondition());
1773  if (OpReg == 0) return false;
1774 
1775  // In case OpReg is a K register, COPY to a GPR
1776  if (MRI.getRegClass(OpReg) == &X86::VK1RegClass) {
1777  unsigned KOpReg = OpReg;
1778  OpReg = createResultReg(&X86::GR32RegClass);
1779  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1780  TII.get(TargetOpcode::COPY), OpReg)
1781  .addReg(KOpReg);
1782  OpReg = fastEmitInst_extractsubreg(MVT::i8, OpReg, X86::sub_8bit);
1783  }
1784  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::TEST8ri))
1785  .addReg(OpReg)
1786  .addImm(1);
1787  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::JCC_1))
1788  .addMBB(TrueMBB).addImm(X86::COND_NE);
1789  finishCondBranch(BI->getParent(), TrueMBB, FalseMBB);
1790  return true;
1791 }
1792 
1793 bool X86FastISel::X86SelectShift(const Instruction *I) {
1794  unsigned CReg = 0, OpReg = 0;
1795  const TargetRegisterClass *RC = nullptr;
1796  if (I->getType()->isIntegerTy(8)) {
1797  CReg = X86::CL;
1798  RC = &X86::GR8RegClass;
1799  switch (I->getOpcode()) {
1800  case Instruction::LShr: OpReg = X86::SHR8rCL; break;
1801  case Instruction::AShr: OpReg = X86::SAR8rCL; break;
1802  case Instruction::Shl: OpReg = X86::SHL8rCL; break;
1803  default: return false;
1804  }
1805  } else if (I->getType()->isIntegerTy(16)) {
1806  CReg = X86::CX;
1807  RC = &X86::GR16RegClass;
1808  switch (I->getOpcode()) {
1809  default: llvm_unreachable("Unexpected shift opcode");
1810  case Instruction::LShr: OpReg = X86::SHR16rCL; break;
1811  case Instruction::AShr: OpReg = X86::SAR16rCL; break;
1812  case Instruction::Shl: OpReg = X86::SHL16rCL; break;
1813  }
1814  } else if (I->getType()->isIntegerTy(32)) {
1815  CReg = X86::ECX;
1816  RC = &X86::GR32RegClass;
1817  switch (I->getOpcode()) {
1818  default: llvm_unreachable("Unexpected shift opcode");
1819  case Instruction::LShr: OpReg = X86::SHR32rCL; break;
1820  case Instruction::AShr: OpReg = X86::SAR32rCL; break;
1821  case Instruction::Shl: OpReg = X86::SHL32rCL; break;
1822  }
1823  } else if (I->getType()->isIntegerTy(64)) {
1824  CReg = X86::RCX;
1825  RC = &X86::GR64RegClass;
1826  switch (I->getOpcode()) {
1827  default: llvm_unreachable("Unexpected shift opcode");
1828  case Instruction::LShr: OpReg = X86::SHR64rCL; break;
1829  case Instruction::AShr: OpReg = X86::SAR64rCL; break;
1830  case Instruction::Shl: OpReg = X86::SHL64rCL; break;
1831  }
1832  } else {
1833  return false;
1834  }
1835 
1836  MVT VT;
1837  if (!isTypeLegal(I->getType(), VT))
1838  return false;
1839 
1840  Register Op0Reg = getRegForValue(I->getOperand(0));
1841  if (Op0Reg == 0) return false;
1842 
1843  Register Op1Reg = getRegForValue(I->getOperand(1));
1844  if (Op1Reg == 0) return false;
1845  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY),
1846  CReg).addReg(Op1Reg);
1847 
1848  // The shift instruction uses X86::CL. If we defined a super-register
1849  // of X86::CL, emit a subreg KILL to precisely describe what we're doing here.
1850  if (CReg != X86::CL)
1851  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1852  TII.get(TargetOpcode::KILL), X86::CL)
1853  .addReg(CReg, RegState::Kill);
1854 
1855  Register ResultReg = createResultReg(RC);
1856  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(OpReg), ResultReg)
1857  .addReg(Op0Reg);
1858  updateValueMap(I, ResultReg);
1859  return true;
1860 }
1861 
1862 bool X86FastISel::X86SelectDivRem(const Instruction *I) {
1863  const static unsigned NumTypes = 4; // i8, i16, i32, i64
1864  const static unsigned NumOps = 4; // SDiv, SRem, UDiv, URem
1865  const static bool S = true; // IsSigned
1866  const static bool U = false; // !IsSigned
1867  const static unsigned Copy = TargetOpcode::COPY;
1868  // For the X86 DIV/IDIV instruction, in most cases the dividend
1869  // (numerator) must be in a specific register pair highreg:lowreg,
1870  // producing the quotient in lowreg and the remainder in highreg.
1871  // For most data types, to set up the instruction, the dividend is
1872  // copied into lowreg, and lowreg is sign-extended or zero-extended
1873  // into highreg. The exception is i8, where the dividend is defined
1874  // as a single register rather than a register pair, and we
1875  // therefore directly sign-extend or zero-extend the dividend into
1876  // lowreg, instead of copying, and ignore the highreg.
1877  const static struct DivRemEntry {
1878  // The following portion depends only on the data type.
1879  const TargetRegisterClass *RC;
1880  unsigned LowInReg; // low part of the register pair
1881  unsigned HighInReg; // high part of the register pair
1882  // The following portion depends on both the data type and the operation.
1883  struct DivRemResult {
1884  unsigned OpDivRem; // The specific DIV/IDIV opcode to use.
1885  unsigned OpSignExtend; // Opcode for sign-extending lowreg into
1886  // highreg, or copying a zero into highreg.
1887  unsigned OpCopy; // Opcode for copying dividend into lowreg, or
1888  // zero/sign-extending into lowreg for i8.
1889  unsigned DivRemResultReg; // Register containing the desired result.
1890  bool IsOpSigned; // Whether to use signed or unsigned form.
1891  } ResultTable[NumOps];
1892  } OpTable[NumTypes] = {
1893  { &X86::GR8RegClass, X86::AX, 0, {
1894  { X86::IDIV8r, 0, X86::MOVSX16rr8, X86::AL, S }, // SDiv
1895  { X86::IDIV8r, 0, X86::MOVSX16rr8, X86::AH, S }, // SRem
1896  { X86::DIV8r, 0, X86::MOVZX16rr8, X86::AL, U }, // UDiv
1897  { X86::DIV8r, 0, X86::MOVZX16rr8, X86::AH, U }, // URem
1898  }
1899  }, // i8
1900  { &X86::GR16RegClass, X86::AX, X86::DX, {
1901  { X86::IDIV16r, X86::CWD, Copy, X86::AX, S }, // SDiv
1902  { X86::IDIV16r, X86::CWD, Copy, X86::DX, S }, // SRem
1903  { X86::DIV16r, X86::MOV32r0, Copy, X86::AX, U }, // UDiv
1904  { X86::DIV16r, X86::MOV32r0, Copy, X86::DX, U }, // URem
1905  }
1906  }, // i16
1907  { &X86::GR32RegClass, X86::EAX, X86::EDX, {
1908  { X86::IDIV32r, X86::CDQ, Copy, X86::EAX, S }, // SDiv
1909  { X86::IDIV32r, X86::CDQ, Copy, X86::EDX, S }, // SRem
1910  { X86::DIV32r, X86::MOV32r0, Copy, X86::EAX, U }, // UDiv
1911  { X86::DIV32r, X86::MOV32r0, Copy, X86::EDX, U }, // URem
1912  }
1913  }, // i32
1914  { &X86::GR64RegClass, X86::RAX, X86::RDX, {
1915  { X86::IDIV64r, X86::CQO, Copy, X86::RAX, S }, // SDiv
1916  { X86::IDIV64r, X86::CQO, Copy, X86::RDX, S }, // SRem
1917  { X86::DIV64r, X86::MOV32r0, Copy, X86::RAX, U }, // UDiv
1918  { X86::DIV64r, X86::MOV32r0, Copy, X86::RDX, U }, // URem
1919  }
1920  }, // i64
1921  };
1922 
1923  MVT VT;
1924  if (!isTypeLegal(I->getType(), VT))
1925  return false;
1926 
1927  unsigned TypeIndex, OpIndex;
1928  switch (VT.SimpleTy) {
1929  default: return false;
1930  case MVT::i8: TypeIndex = 0; break;
1931  case MVT::i16: TypeIndex = 1; break;
1932  case MVT::i32: TypeIndex = 2; break;
1933  case MVT::i64: TypeIndex = 3;
1934  if (!Subtarget->is64Bit())
1935  return false;
1936  break;
1937  }
1938 
1939  switch (I->getOpcode()) {
1940  default: llvm_unreachable("Unexpected div/rem opcode");
1941  case Instruction::SDiv: OpIndex = 0; break;
1942  case Instruction::SRem: OpIndex = 1; break;
1943  case Instruction::UDiv: OpIndex = 2; break;
1944  case Instruction::URem: OpIndex = 3; break;
1945  }
1946 
1947  const DivRemEntry &TypeEntry = OpTable[TypeIndex];
1948  const DivRemEntry::DivRemResult &OpEntry = TypeEntry.ResultTable[OpIndex];
1949  Register Op0Reg = getRegForValue(I->getOperand(0));
1950  if (Op0Reg == 0)
1951  return false;
1952  Register Op1Reg = getRegForValue(I->getOperand(1));
1953  if (Op1Reg == 0)
1954  return false;
1955 
1956  // Move op0 into low-order input register.
1957  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1958  TII.get(OpEntry.OpCopy), TypeEntry.LowInReg).addReg(Op0Reg);
1959  // Zero-extend or sign-extend into high-order input register.
1960  if (OpEntry.OpSignExtend) {
1961  if (OpEntry.IsOpSigned)
1962  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1963  TII.get(OpEntry.OpSignExtend));
1964  else {
1965  Register Zero32 = createResultReg(&X86::GR32RegClass);
1966  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1967  TII.get(X86::MOV32r0), Zero32);
1968 
1969  // Copy the zero into the appropriate sub/super/identical physical
1970  // register. Unfortunately the operations needed are not uniform enough
1971  // to fit neatly into the table above.
1972  if (VT == MVT::i16) {
1973  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1974  TII.get(Copy), TypeEntry.HighInReg)
1975  .addReg(Zero32, 0, X86::sub_16bit);
1976  } else if (VT == MVT::i32) {
1977  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1978  TII.get(Copy), TypeEntry.HighInReg)
1979  .addReg(Zero32);
1980  } else if (VT == MVT::i64) {
1981  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1982  TII.get(TargetOpcode::SUBREG_TO_REG), TypeEntry.HighInReg)
1983  .addImm(0).addReg(Zero32).addImm(X86::sub_32bit);
1984  }
1985  }
1986  }
1987  // Generate the DIV/IDIV instruction.
1988  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1989  TII.get(OpEntry.OpDivRem)).addReg(Op1Reg);
1990  // For i8 remainder, we can't reference ah directly, as we'll end
1991  // up with bogus copies like %r9b = COPY %ah. Reference ax
1992  // instead to prevent ah references in a rex instruction.
1993  //
1994  // The current assumption of the fast register allocator is that isel
1995  // won't generate explicit references to the GR8_NOREX registers. If
1996  // the allocator and/or the backend get enhanced to be more robust in
1997  // that regard, this can be, and should be, removed.
1998  unsigned ResultReg = 0;
1999  if ((I->getOpcode() == Instruction::SRem ||
2000  I->getOpcode() == Instruction::URem) &&
2001  OpEntry.DivRemResultReg == X86::AH && Subtarget->is64Bit()) {
2002  Register SourceSuperReg = createResultReg(&X86::GR16RegClass);
2003  Register ResultSuperReg = createResultReg(&X86::GR16RegClass);
2004  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2005  TII.get(Copy), SourceSuperReg).addReg(X86::AX);
2006 
2007  // Shift AX right by 8 bits instead of using AH.
2008  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::SHR16ri),
2009  ResultSuperReg).addReg(SourceSuperReg).addImm(8);
2010 
2011  // Now reference the 8-bit subreg of the result.
2012  ResultReg = fastEmitInst_extractsubreg(MVT::i8, ResultSuperReg,
2013  X86::sub_8bit);
2014  }
2015  // Copy the result out of the physreg if we haven't already.
2016  if (!ResultReg) {
2017  ResultReg = createResultReg(TypeEntry.RC);
2018  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Copy), ResultReg)
2019  .addReg(OpEntry.DivRemResultReg);
2020  }
2021  updateValueMap(I, ResultReg);
2022 
2023  return true;
2024 }
2025 
2026 /// Emit a conditional move instruction (if the are supported) to lower
2027 /// the select.
2028 bool X86FastISel::X86FastEmitCMoveSelect(MVT RetVT, const Instruction *I) {
2029  // Check if the subtarget supports these instructions.
2030  if (!Subtarget->canUseCMOV())
2031  return false;
2032 
2033  // FIXME: Add support for i8.
2034  if (RetVT < MVT::i16 || RetVT > MVT::i64)
2035  return false;
2036 
2037  const Value *Cond = I->getOperand(0);
2038  const TargetRegisterClass *RC = TLI.getRegClassFor(RetVT);
2039  bool NeedTest = true;
2041 
2042  // Optimize conditions coming from a compare if both instructions are in the
2043  // same basic block (values defined in other basic blocks may not have
2044  // initialized registers).
2045  const auto *CI = dyn_cast<CmpInst>(Cond);
2046  if (CI && (CI->getParent() == I->getParent())) {
2047  CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
2048 
2049  // FCMP_OEQ and FCMP_UNE cannot be checked with a single instruction.
2050  static const uint16_t SETFOpcTable[2][3] = {
2051  { X86::COND_NP, X86::COND_E, X86::TEST8rr },
2052  { X86::COND_P, X86::COND_NE, X86::OR8rr }
2053  };
2054  const uint16_t *SETFOpc = nullptr;
2055  switch (Predicate) {
2056  default: break;
2057  case CmpInst::FCMP_OEQ:
2058  SETFOpc = &SETFOpcTable[0][0];
2060  break;
2061  case CmpInst::FCMP_UNE:
2062  SETFOpc = &SETFOpcTable[1][0];
2064  break;
2065  }
2066 
2067  bool NeedSwap;
2068  std::tie(CC, NeedSwap) = X86::getX86ConditionCode(Predicate);
2069  assert(CC <= X86::LAST_VALID_COND && "Unexpected condition code.");
2070 
2071  const Value *CmpLHS = CI->getOperand(0);
2072  const Value *CmpRHS = CI->getOperand(1);
2073  if (NeedSwap)
2074  std::swap(CmpLHS, CmpRHS);
2075 
2076  EVT CmpVT = TLI.getValueType(DL, CmpLHS->getType());
2077  // Emit a compare of the LHS and RHS, setting the flags.
2078  if (!X86FastEmitCompare(CmpLHS, CmpRHS, CmpVT, CI->getDebugLoc()))
2079  return false;
2080 
2081  if (SETFOpc) {
2082  Register FlagReg1 = createResultReg(&X86::GR8RegClass);
2083  Register FlagReg2 = createResultReg(&X86::GR8RegClass);
2084  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::SETCCr),
2085  FlagReg1).addImm(SETFOpc[0]);
2086  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::SETCCr),
2087  FlagReg2).addImm(SETFOpc[1]);
2088  auto const &II = TII.get(SETFOpc[2]);
2089  if (II.getNumDefs()) {
2090  Register TmpReg = createResultReg(&X86::GR8RegClass);
2091  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, TmpReg)
2092  .addReg(FlagReg2).addReg(FlagReg1);
2093  } else {
2094  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
2095  .addReg(FlagReg2).addReg(FlagReg1);
2096  }
2097  }
2098  NeedTest = false;
2099  } else if (foldX86XALUIntrinsic(CC, I, Cond)) {
2100  // Fake request the condition, otherwise the intrinsic might be completely
2101  // optimized away.
2102  Register TmpReg = getRegForValue(Cond);
2103  if (TmpReg == 0)
2104  return false;
2105 
2106  NeedTest = false;
2107  }
2108 
2109  if (NeedTest) {
2110  // Selects operate on i1, however, CondReg is 8 bits width and may contain
2111  // garbage. Indeed, only the less significant bit is supposed to be
2112  // accurate. If we read more than the lsb, we may see non-zero values
2113  // whereas lsb is zero. Therefore, we have to truncate Op0Reg to i1 for
2114  // the select. This is achieved by performing TEST against 1.
2115  Register CondReg = getRegForValue(Cond);
2116  if (CondReg == 0)
2117  return false;
2118 
2119  // In case OpReg is a K register, COPY to a GPR
2120  if (MRI.getRegClass(CondReg) == &X86::VK1RegClass) {
2121  unsigned KCondReg = CondReg;
2122  CondReg = createResultReg(&X86::GR32RegClass);
2123  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2124  TII.get(TargetOpcode::COPY), CondReg)
2125  .addReg(KCondReg);
2126  CondReg = fastEmitInst_extractsubreg(MVT::i8, CondReg, X86::sub_8bit);
2127  }
2128  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::TEST8ri))
2129  .addReg(CondReg)
2130  .addImm(1);
2131  }
2132 
2133  const Value *LHS = I->getOperand(1);
2134  const Value *RHS = I->getOperand(2);
2135 
2136  Register RHSReg = getRegForValue(RHS);
2137  Register LHSReg = getRegForValue(LHS);
2138  if (!LHSReg || !RHSReg)
2139  return false;
2140 
2141  const TargetRegisterInfo &TRI = *Subtarget->getRegisterInfo();
2142  unsigned Opc = X86::getCMovOpcode(TRI.getRegSizeInBits(*RC)/8);
2143  Register ResultReg = fastEmitInst_rri(Opc, RC, RHSReg, LHSReg, CC);
2144  updateValueMap(I, ResultReg);
2145  return true;
2146 }
2147 
2148 /// Emit SSE or AVX instructions to lower the select.
2149 ///
2150 /// Try to use SSE1/SSE2 instructions to simulate a select without branches.
2151 /// This lowers fp selects into a CMP/AND/ANDN/OR sequence when the necessary
2152 /// SSE instructions are available. If AVX is available, try to use a VBLENDV.
2153 bool X86FastISel::X86FastEmitSSESelect(MVT RetVT, const Instruction *I) {
2154  // Optimize conditions coming from a compare if both instructions are in the
2155  // same basic block (values defined in other basic blocks may not have
2156  // initialized registers).
2157  const auto *CI = dyn_cast<FCmpInst>(I->getOperand(0));
2158  if (!CI || (CI->getParent() != I->getParent()))
2159  return false;
2160 
2161  if (I->getType() != CI->getOperand(0)->getType() ||
2162  !((Subtarget->hasSSE1() && RetVT == MVT::f32) ||
2163  (Subtarget->hasSSE2() && RetVT == MVT::f64)))
2164  return false;
2165 
2166  const Value *CmpLHS = CI->getOperand(0);
2167  const Value *CmpRHS = CI->getOperand(1);
2168  CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
2169 
2170  // The optimizer might have replaced fcmp oeq %x, %x with fcmp ord %x, 0.0.
2171  // We don't have to materialize a zero constant for this case and can just use
2172  // %x again on the RHS.
2174  const auto *CmpRHSC = dyn_cast<ConstantFP>(CmpRHS);
2175  if (CmpRHSC && CmpRHSC->isNullValue())
2176  CmpRHS = CmpLHS;
2177  }
2178 
2179  unsigned CC;
2180  bool NeedSwap;
2181  std::tie(CC, NeedSwap) = getX86SSEConditionCode(Predicate);
2182  if (CC > 7 && !Subtarget->hasAVX())
2183  return false;
2184 
2185  if (NeedSwap)
2186  std::swap(CmpLHS, CmpRHS);
2187 
2188  const Value *LHS = I->getOperand(1);
2189  const Value *RHS = I->getOperand(2);
2190 
2191  Register LHSReg = getRegForValue(LHS);
2192  Register RHSReg = getRegForValue(RHS);
2193  Register CmpLHSReg = getRegForValue(CmpLHS);
2194  Register CmpRHSReg = getRegForValue(CmpRHS);
2195  if (!LHSReg || !RHSReg || !CmpLHSReg || !CmpRHSReg)
2196  return false;
2197 
2198  const TargetRegisterClass *RC = TLI.getRegClassFor(RetVT);
2199  unsigned ResultReg;
2200 
2201  if (Subtarget->hasAVX512()) {
2202  // If we have AVX512 we can use a mask compare and masked movss/sd.
2203  const TargetRegisterClass *VR128X = &X86::VR128XRegClass;
2204  const TargetRegisterClass *VK1 = &X86::VK1RegClass;
2205 
2206  unsigned CmpOpcode =
2207  (RetVT == MVT::f32) ? X86::VCMPSSZrr : X86::VCMPSDZrr;
2208  Register CmpReg = fastEmitInst_rri(CmpOpcode, VK1, CmpLHSReg, CmpRHSReg,
2209  CC);
2210 
2211  // Need an IMPLICIT_DEF for the input that is used to generate the upper
2212  // bits of the result register since its not based on any of the inputs.
2213  Register ImplicitDefReg = createResultReg(VR128X);
2214  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2215  TII.get(TargetOpcode::IMPLICIT_DEF), ImplicitDefReg);
2216 
2217  // Place RHSReg is the passthru of the masked movss/sd operation and put
2218  // LHS in the input. The mask input comes from the compare.
2219  unsigned MovOpcode =
2220  (RetVT == MVT::f32) ? X86::VMOVSSZrrk : X86::VMOVSDZrrk;
2221  unsigned MovReg = fastEmitInst_rrrr(MovOpcode, VR128X, RHSReg, CmpReg,
2222  ImplicitDefReg, LHSReg);
2223 
2224  ResultReg = createResultReg(RC);
2225  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2226  TII.get(TargetOpcode::COPY), ResultReg).addReg(MovReg);
2227 
2228  } else if (Subtarget->hasAVX()) {
2229  const TargetRegisterClass *VR128 = &X86::VR128RegClass;
2230 
2231  // If we have AVX, create 1 blendv instead of 3 logic instructions.
2232  // Blendv was introduced with SSE 4.1, but the 2 register form implicitly
2233  // uses XMM0 as the selection register. That may need just as many
2234  // instructions as the AND/ANDN/OR sequence due to register moves, so
2235  // don't bother.
2236  unsigned CmpOpcode =
2237  (RetVT == MVT::f32) ? X86::VCMPSSrr : X86::VCMPSDrr;
2238  unsigned BlendOpcode =
2239  (RetVT == MVT::f32) ? X86::VBLENDVPSrr : X86::VBLENDVPDrr;
2240 
2241  Register CmpReg = fastEmitInst_rri(CmpOpcode, RC, CmpLHSReg, CmpRHSReg,
2242  CC);
2243  Register VBlendReg = fastEmitInst_rrr(BlendOpcode, VR128, RHSReg, LHSReg,
2244  CmpReg);
2245  ResultReg = createResultReg(RC);
2246  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2247  TII.get(TargetOpcode::COPY), ResultReg).addReg(VBlendReg);
2248  } else {
2249  // Choose the SSE instruction sequence based on data type (float or double).
2250  static const uint16_t OpcTable[2][4] = {
2251  { X86::CMPSSrr, X86::ANDPSrr, X86::ANDNPSrr, X86::ORPSrr },
2252  { X86::CMPSDrr, X86::ANDPDrr, X86::ANDNPDrr, X86::ORPDrr }
2253  };
2254 
2255  const uint16_t *Opc = nullptr;
2256  switch (RetVT.SimpleTy) {
2257  default: return false;
2258  case MVT::f32: Opc = &OpcTable[0][0]; break;
2259  case MVT::f64: Opc = &OpcTable[1][0]; break;
2260  }
2261 
2262  const TargetRegisterClass *VR128 = &X86::VR128RegClass;
2263  Register CmpReg = fastEmitInst_rri(Opc[0], RC, CmpLHSReg, CmpRHSReg, CC);
2264  Register AndReg = fastEmitInst_rr(Opc[1], VR128, CmpReg, LHSReg);
2265  Register AndNReg = fastEmitInst_rr(Opc[2], VR128, CmpReg, RHSReg);
2266  Register OrReg = fastEmitInst_rr(Opc[3], VR128, AndNReg, AndReg);
2267  ResultReg = createResultReg(RC);
2268  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2269  TII.get(TargetOpcode::COPY), ResultReg).addReg(OrReg);
2270  }
2271  updateValueMap(I, ResultReg);
2272  return true;
2273 }
2274 
2275 bool X86FastISel::X86FastEmitPseudoSelect(MVT RetVT, const Instruction *I) {
2276  // These are pseudo CMOV instructions and will be later expanded into control-
2277  // flow.
2278  unsigned Opc;
2279  switch (RetVT.SimpleTy) {
2280  default: return false;
2281  case MVT::i8: Opc = X86::CMOV_GR8; break;
2282  case MVT::i16: Opc = X86::CMOV_GR16; break;
2283  case MVT::i32: Opc = X86::CMOV_GR32; break;
2284  case MVT::f16:
2285  Opc = Subtarget->hasAVX512() ? X86::CMOV_FR16X : X86::CMOV_FR16; break;
2286  case MVT::f32:
2287  Opc = Subtarget->hasAVX512() ? X86::CMOV_FR32X : X86::CMOV_FR32; break;
2288  case MVT::f64:
2289  Opc = Subtarget->hasAVX512() ? X86::CMOV_FR64X : X86::CMOV_FR64; break;
2290  }
2291 
2292  const Value *Cond = I->getOperand(0);
2294 
2295  // Optimize conditions coming from a compare if both instructions are in the
2296  // same basic block (values defined in other basic blocks may not have
2297  // initialized registers).
2298  const auto *CI = dyn_cast<CmpInst>(Cond);
2299  if (CI && (CI->getParent() == I->getParent())) {
2300  bool NeedSwap;
2301  std::tie(CC, NeedSwap) = X86::getX86ConditionCode(CI->getPredicate());
2302  if (CC > X86::LAST_VALID_COND)
2303  return false;
2304 
2305  const Value *CmpLHS = CI->getOperand(0);
2306  const Value *CmpRHS = CI->getOperand(1);
2307 
2308  if (NeedSwap)
2309  std::swap(CmpLHS, CmpRHS);
2310 
2311  EVT CmpVT = TLI.getValueType(DL, CmpLHS->getType());
2312  if (!X86FastEmitCompare(CmpLHS, CmpRHS, CmpVT, CI->getDebugLoc()))
2313  return false;
2314  } else {
2315  Register CondReg = getRegForValue(Cond);
2316  if (CondReg == 0)
2317  return false;
2318 
2319  // In case OpReg is a K register, COPY to a GPR
2320  if (MRI.getRegClass(CondReg) == &X86::VK1RegClass) {
2321  unsigned KCondReg = CondReg;
2322  CondReg = createResultReg(&X86::GR32RegClass);
2323  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2324  TII.get(TargetOpcode::COPY), CondReg)
2325  .addReg(KCondReg);
2326  CondReg = fastEmitInst_extractsubreg(MVT::i8, CondReg, X86::sub_8bit);
2327  }
2328  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::TEST8ri))
2329  .addReg(CondReg)
2330  .addImm(1);
2331  }
2332 
2333  const Value *LHS = I->getOperand(1);
2334  const Value *RHS = I->getOperand(2);
2335 
2336  Register LHSReg = getRegForValue(LHS);
2337  Register RHSReg = getRegForValue(RHS);
2338  if (!LHSReg || !RHSReg)
2339  return false;
2340 
2341  const TargetRegisterClass *RC = TLI.getRegClassFor(RetVT);
2342 
2343  Register ResultReg =
2344  fastEmitInst_rri(Opc, RC, RHSReg, LHSReg, CC);
2345  updateValueMap(I, ResultReg);
2346  return true;
2347 }
2348 
2349 bool X86FastISel::X86SelectSelect(const Instruction *I) {
2350  MVT RetVT;
2351  if (!isTypeLegal(I->getType(), RetVT))
2352  return false;
2353 
2354  // Check if we can fold the select.
2355  if (const auto *CI = dyn_cast<CmpInst>(I->getOperand(0))) {
2356  CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
2357  const Value *Opnd = nullptr;
2358  switch (Predicate) {
2359  default: break;
2360  case CmpInst::FCMP_FALSE: Opnd = I->getOperand(2); break;
2361  case CmpInst::FCMP_TRUE: Opnd = I->getOperand(1); break;
2362  }
2363  // No need for a select anymore - this is an unconditional move.
2364  if (Opnd) {
2365  Register OpReg = getRegForValue(Opnd);
2366  if (OpReg == 0)
2367  return false;
2368  const TargetRegisterClass *RC = TLI.getRegClassFor(RetVT);
2369  Register ResultReg = createResultReg(RC);
2370  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2371  TII.get(TargetOpcode::COPY), ResultReg)
2372  .addReg(OpReg);
2373  updateValueMap(I, ResultReg);
2374  return true;
2375  }
2376  }
2377 
2378  // First try to use real conditional move instructions.
2379  if (X86FastEmitCMoveSelect(RetVT, I))
2380  return true;
2381 
2382  // Try to use a sequence of SSE instructions to simulate a conditional move.
2383  if (X86FastEmitSSESelect(RetVT, I))
2384  return true;
2385 
2386  // Fall-back to pseudo conditional move instructions, which will be later
2387  // converted to control-flow.
2388  if (X86FastEmitPseudoSelect(RetVT, I))
2389  return true;
2390 
2391  return false;
2392 }
2393 
2394 // Common code for X86SelectSIToFP and X86SelectUIToFP.
2395 bool X86FastISel::X86SelectIntToFP(const Instruction *I, bool IsSigned) {
2396  // The target-independent selection algorithm in FastISel already knows how
2397  // to select a SINT_TO_FP if the target is SSE but not AVX.
2398  // Early exit if the subtarget doesn't have AVX.
2399  // Unsigned conversion requires avx512.
2400  bool HasAVX512 = Subtarget->hasAVX512();
2401  if (!Subtarget->hasAVX() || (!IsSigned && !HasAVX512))
2402  return false;
2403 
2404  // TODO: We could sign extend narrower types.
2405  MVT SrcVT = TLI.getSimpleValueType(DL, I->getOperand(0)->getType());
2406  if (SrcVT != MVT::i32 && SrcVT != MVT::i64)
2407  return false;
2408 
2409  // Select integer to float/double conversion.
2410  Register OpReg = getRegForValue(I->getOperand(0));
2411  if (OpReg == 0)
2412  return false;
2413 
2414  unsigned Opcode;
2415 
2416  static const uint16_t SCvtOpc[2][2][2] = {
2417  { { X86::VCVTSI2SSrr, X86::VCVTSI642SSrr },
2418  { X86::VCVTSI2SDrr, X86::VCVTSI642SDrr } },
2419  { { X86::VCVTSI2SSZrr, X86::VCVTSI642SSZrr },
2420  { X86::VCVTSI2SDZrr, X86::VCVTSI642SDZrr } },
2421  };
2422  static const uint16_t UCvtOpc[2][2] = {
2423  { X86::VCVTUSI2SSZrr, X86::VCVTUSI642SSZrr },
2424  { X86::VCVTUSI2SDZrr, X86::VCVTUSI642SDZrr },
2425  };
2426  bool Is64Bit = SrcVT == MVT::i64;
2427 
2428  if (I->getType()->isDoubleTy()) {
2429  // s/uitofp int -> double
2430  Opcode = IsSigned ? SCvtOpc[HasAVX512][1][Is64Bit] : UCvtOpc[1][Is64Bit];
2431  } else if (I->getType()->isFloatTy()) {
2432  // s/uitofp int -> float
2433  Opcode = IsSigned ? SCvtOpc[HasAVX512][0][Is64Bit] : UCvtOpc[0][Is64Bit];
2434  } else
2435  return false;
2436 
2437  MVT DstVT = TLI.getValueType(DL, I->getType()).getSimpleVT();
2438  const TargetRegisterClass *RC = TLI.getRegClassFor(DstVT);
2439  Register ImplicitDefReg = createResultReg(RC);
2440  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2441  TII.get(TargetOpcode::IMPLICIT_DEF), ImplicitDefReg);
2442  Register ResultReg = fastEmitInst_rr(Opcode, RC, ImplicitDefReg, OpReg);
2443  updateValueMap(I, ResultReg);
2444  return true;
2445 }
2446 
2447 bool X86FastISel::X86SelectSIToFP(const Instruction *I) {
2448  return X86SelectIntToFP(I, /*IsSigned*/true);
2449 }
2450 
2451 bool X86FastISel::X86SelectUIToFP(const Instruction *I) {
2452  return X86SelectIntToFP(I, /*IsSigned*/false);
2453 }
2454 
2455 // Helper method used by X86SelectFPExt and X86SelectFPTrunc.
2456 bool X86FastISel::X86SelectFPExtOrFPTrunc(const Instruction *I,
2457  unsigned TargetOpc,
2458  const TargetRegisterClass *RC) {
2459  assert((I->getOpcode() == Instruction::FPExt ||
2460  I->getOpcode() == Instruction::FPTrunc) &&
2461  "Instruction must be an FPExt or FPTrunc!");
2462  bool HasAVX = Subtarget->hasAVX();
2463 
2464  Register OpReg = getRegForValue(I->getOperand(0));
2465  if (OpReg == 0)
2466  return false;
2467 
2468  unsigned ImplicitDefReg;
2469  if (HasAVX) {
2470  ImplicitDefReg = createResultReg(RC);
2471  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2472  TII.get(TargetOpcode::IMPLICIT_DEF), ImplicitDefReg);
2473 
2474  }
2475 
2476  Register ResultReg = createResultReg(RC);
2477  MachineInstrBuilder MIB;
2478  MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpc),
2479  ResultReg);
2480 
2481  if (HasAVX)
2482  MIB.addReg(ImplicitDefReg);
2483 
2484  MIB.addReg(OpReg);
2485  updateValueMap(I, ResultReg);
2486  return true;
2487 }
2488 
2489 bool X86FastISel::X86SelectFPExt(const Instruction *I) {
2490  if (Subtarget->hasSSE2() && I->getType()->isDoubleTy() &&
2491  I->getOperand(0)->getType()->isFloatTy()) {
2492  bool HasAVX512 = Subtarget->hasAVX512();
2493  // fpext from float to double.
2494  unsigned Opc =
2495  HasAVX512 ? X86::VCVTSS2SDZrr
2496  : Subtarget->hasAVX() ? X86::VCVTSS2SDrr : X86::CVTSS2SDrr;
2497  return X86SelectFPExtOrFPTrunc(I, Opc, TLI.getRegClassFor(MVT::f64));
2498  }
2499 
2500  return false;
2501 }
2502 
2503 bool X86FastISel::X86SelectFPTrunc(const Instruction *I) {
2504  if (Subtarget->hasSSE2() && I->getType()->isFloatTy() &&
2505  I->getOperand(0)->getType()->isDoubleTy()) {
2506  bool HasAVX512 = Subtarget->hasAVX512();
2507  // fptrunc from double to float.
2508  unsigned Opc =
2509  HasAVX512 ? X86::VCVTSD2SSZrr
2510  : Subtarget->hasAVX() ? X86::VCVTSD2SSrr : X86::CVTSD2SSrr;
2511  return X86SelectFPExtOrFPTrunc(I, Opc, TLI.getRegClassFor(MVT::f32));
2512  }
2513 
2514  return false;
2515 }
2516 
2517 bool X86FastISel::X86SelectTrunc(const Instruction *I) {
2518  EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType());
2519  EVT DstVT = TLI.getValueType(DL, I->getType());
2520 
2521  // This code only handles truncation to byte.
2522  if (DstVT != MVT::i8 && DstVT != MVT::i1)
2523  return false;
2524  if (!TLI.isTypeLegal(SrcVT))
2525  return false;
2526 
2527  Register InputReg = getRegForValue(I->getOperand(0));
2528  if (!InputReg)
2529  // Unhandled operand. Halt "fast" selection and bail.
2530  return false;
2531 
2532  if (SrcVT == MVT::i8) {
2533  // Truncate from i8 to i1; no code needed.
2534  updateValueMap(I, InputReg);
2535  return true;
2536  }
2537 
2538  // Issue an extract_subreg.
2539  Register ResultReg = fastEmitInst_extractsubreg(MVT::i8, InputReg,
2540  X86::sub_8bit);
2541  if (!ResultReg)
2542  return false;
2543 
2544  updateValueMap(I, ResultReg);
2545  return true;
2546 }
2547 
2548 bool X86FastISel::IsMemcpySmall(uint64_t Len) {
2549  return Len <= (Subtarget->is64Bit() ? 32 : 16);
2550 }
2551 
2552 bool X86FastISel::TryEmitSmallMemcpy(X86AddressMode DestAM,
2553  X86AddressMode SrcAM, uint64_t Len) {
2554 
2555  // Make sure we don't bloat code by inlining very large memcpy's.
2556  if (!IsMemcpySmall(Len))
2557  return false;
2558 
2559  bool i64Legal = Subtarget->is64Bit();
2560 
2561  // We don't care about alignment here since we just emit integer accesses.
2562  while (Len) {
2563  MVT VT;
2564  if (Len >= 8 && i64Legal)
2565  VT = MVT::i64;
2566  else if (Len >= 4)
2567  VT = MVT::i32;
2568  else if (Len >= 2)
2569  VT = MVT::i16;
2570  else
2571  VT = MVT::i8;
2572 
2573  unsigned Reg;
2574  bool RV = X86FastEmitLoad(VT, SrcAM, nullptr, Reg);
2575  RV &= X86FastEmitStore(VT, Reg, DestAM);
2576  assert(RV && "Failed to emit load or store??");
2577  (void)RV;
2578 
2579  unsigned Size = VT.getSizeInBits()/8;
2580  Len -= Size;
2581  DestAM.Disp += Size;
2582  SrcAM.Disp += Size;
2583  }
2584 
2585  return true;
2586 }
2587 
2588 bool X86FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
2589  // FIXME: Handle more intrinsics.
2590  switch (II->getIntrinsicID()) {
2591  default: return false;
2592  case Intrinsic::convert_from_fp16:
2593  case Intrinsic::convert_to_fp16: {
2594  if (Subtarget->useSoftFloat() || !Subtarget->hasF16C())
2595  return false;
2596 
2597  const Value *Op = II->getArgOperand(0);
2598  Register InputReg = getRegForValue(Op);
2599  if (InputReg == 0)
2600  return false;
2601 
2602  // F16C only allows converting from float to half and from half to float.
2603  bool IsFloatToHalf = II->getIntrinsicID() == Intrinsic::convert_to_fp16;
2604  if (IsFloatToHalf) {
2605  if (!Op->getType()->isFloatTy())
2606  return false;
2607  } else {
2608  if (!II->getType()->isFloatTy())
2609  return false;
2610  }
2611 
2612  unsigned ResultReg = 0;
2613  const TargetRegisterClass *RC = TLI.getRegClassFor(MVT::v8i16);
2614  if (IsFloatToHalf) {
2615  // 'InputReg' is implicitly promoted from register class FR32 to
2616  // register class VR128 by method 'constrainOperandRegClass' which is
2617  // directly called by 'fastEmitInst_ri'.
2618  // Instruction VCVTPS2PHrr takes an extra immediate operand which is
2619  // used to provide rounding control: use MXCSR.RC, encoded as 0b100.
2620  // It's consistent with the other FP instructions, which are usually
2621  // controlled by MXCSR.
2622  unsigned Opc = Subtarget->hasVLX() ? X86::VCVTPS2PHZ128rr
2623  : X86::VCVTPS2PHrr;
2624  InputReg = fastEmitInst_ri(Opc, RC, InputReg, 4);
2625 
2626  // Move the lower 32-bits of ResultReg to another register of class GR32.
2627  Opc = Subtarget->hasAVX512() ? X86::VMOVPDI2DIZrr
2628  : X86::VMOVPDI2DIrr;
2629  ResultReg = createResultReg(&X86::GR32RegClass);
2630  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
2631  .addReg(InputReg, RegState::Kill);
2632 
2633  // The result value is in the lower 16-bits of ResultReg.
2634  unsigned RegIdx = X86::sub_16bit;
2635  ResultReg = fastEmitInst_extractsubreg(MVT::i16, ResultReg, RegIdx);
2636  } else {
2637  assert(Op->getType()->isIntegerTy(16) && "Expected a 16-bit integer!");
2638  // Explicitly zero-extend the input to 32-bit.
2639  InputReg = fastEmit_r(MVT::i16, MVT::i32, ISD::ZERO_EXTEND, InputReg);
2640 
2641  // The following SCALAR_TO_VECTOR will be expanded into a VMOVDI2PDIrr.
2642  InputReg = fastEmit_r(MVT::i32, MVT::v4i32, ISD::SCALAR_TO_VECTOR,
2643  InputReg);
2644 
2645  unsigned Opc = Subtarget->hasVLX() ? X86::VCVTPH2PSZ128rr
2646  : X86::VCVTPH2PSrr;
2647  InputReg = fastEmitInst_r(Opc, RC, InputReg);
2648 
2649  // The result value is in the lower 32-bits of ResultReg.
2650  // Emit an explicit copy from register class VR128 to register class FR32.
2651  ResultReg = createResultReg(TLI.getRegClassFor(MVT::f32));
2652  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2653  TII.get(TargetOpcode::COPY), ResultReg)
2654  .addReg(InputReg, RegState::Kill);
2655  }
2656 
2657  updateValueMap(II, ResultReg);
2658  return true;
2659  }
2660  case Intrinsic::frameaddress: {
2661  MachineFunction *MF = FuncInfo.MF;
2662  if (MF->getTarget().getMCAsmInfo()->usesWindowsCFI())
2663  return false;
2664 
2665  Type *RetTy = II->getCalledFunction()->getReturnType();
2666 
2667  MVT VT;
2668  if (!isTypeLegal(RetTy, VT))
2669  return false;
2670 
2671  unsigned Opc;
2672  const TargetRegisterClass *RC = nullptr;
2673 
2674  switch (VT.SimpleTy) {
2675  default: llvm_unreachable("Invalid result type for frameaddress.");
2676  case MVT::i32: Opc = X86::MOV32rm; RC = &X86::GR32RegClass; break;
2677  case MVT::i64: Opc = X86::MOV64rm; RC = &X86::GR64RegClass; break;
2678  }
2679 
2680  // This needs to be set before we call getPtrSizedFrameRegister, otherwise
2681  // we get the wrong frame register.
2682  MachineFrameInfo &MFI = MF->getFrameInfo();
2683  MFI.setFrameAddressIsTaken(true);
2684 
2685  const X86RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
2686  unsigned FrameReg = RegInfo->getPtrSizedFrameRegister(*MF);
2687  assert(((FrameReg == X86::RBP && VT == MVT::i64) ||
2688  (FrameReg == X86::EBP && VT == MVT::i32)) &&
2689  "Invalid Frame Register!");
2690 
2691  // Always make a copy of the frame register to a vreg first, so that we
2692  // never directly reference the frame register (the TwoAddressInstruction-
2693  // Pass doesn't like that).
2694  Register SrcReg = createResultReg(RC);
2695  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2696  TII.get(TargetOpcode::COPY), SrcReg).addReg(FrameReg);
2697 
2698  // Now recursively load from the frame address.
2699  // movq (%rbp), %rax
2700  // movq (%rax), %rax
2701  // movq (%rax), %rax
2702  // ...
2703  unsigned Depth = cast<ConstantInt>(II->getOperand(0))->getZExtValue();
2704  while (Depth--) {
2705  Register DestReg = createResultReg(RC);
2706  addDirectMem(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2707  TII.get(Opc), DestReg), SrcReg);
2708  SrcReg = DestReg;
2709  }
2710 
2711  updateValueMap(II, SrcReg);
2712  return true;
2713  }
2714  case Intrinsic::memcpy: {
2715  const MemCpyInst *MCI = cast<MemCpyInst>(II);
2716  // Don't handle volatile or variable length memcpys.
2717  if (MCI->isVolatile())
2718  return false;
2719 
2720  if (isa<ConstantInt>(MCI->getLength())) {
2721  // Small memcpy's are common enough that we want to do them
2722  // without a call if possible.
2723  uint64_t Len = cast<ConstantInt>(MCI->getLength())->getZExtValue();
2724  if (IsMemcpySmall(Len)) {
2725  X86AddressMode DestAM, SrcAM;
2726  if (!X86SelectAddress(MCI->getRawDest(), DestAM) ||
2727  !X86SelectAddress(MCI->getRawSource(), SrcAM))
2728  return false;
2729  TryEmitSmallMemcpy(DestAM, SrcAM, Len);
2730  return true;
2731  }
2732  }
2733 
2734  unsigned SizeWidth = Subtarget->is64Bit() ? 64 : 32;
2735  if (!MCI->getLength()->getType()->isIntegerTy(SizeWidth))
2736  return false;
2737 
2738  if (MCI->getSourceAddressSpace() > 255 || MCI->getDestAddressSpace() > 255)
2739  return false;
2740 
2741  return lowerCallTo(II, "memcpy", II->arg_size() - 1);
2742  }
2743  case Intrinsic::memset: {
2744  const MemSetInst *MSI = cast<MemSetInst>(II);
2745 
2746  if (MSI->isVolatile())
2747  return false;
2748 
2749  unsigned SizeWidth = Subtarget->is64Bit() ? 64 : 32;
2750  if (!MSI->getLength()->getType()->isIntegerTy(SizeWidth))
2751  return false;
2752 
2753  if (MSI->getDestAddressSpace() > 255)
2754  return false;
2755 
2756  return lowerCallTo(II, "memset", II->arg_size() - 1);
2757  }
2758  case Intrinsic::stackprotector: {
2759  // Emit code to store the stack guard onto the stack.
2760  EVT PtrTy = TLI.getPointerTy(DL);
2761 
2762  const Value *Op1 = II->getArgOperand(0); // The guard's value.
2763  const AllocaInst *Slot = cast<AllocaInst>(II->getArgOperand(1));
2764 
2765  MFI.setStackProtectorIndex(FuncInfo.StaticAllocaMap[Slot]);
2766 
2767  // Grab the frame index.
2768  X86AddressMode AM;
2769  if (!X86SelectAddress(Slot, AM)) return false;
2770  if (!X86FastEmitStore(PtrTy, Op1, AM)) return false;
2771  return true;
2772  }
2773  case Intrinsic::dbg_declare: {
2774  const DbgDeclareInst *DI = cast<DbgDeclareInst>(II);
2775  X86AddressMode AM;
2776  assert(DI->getAddress() && "Null address should be checked earlier!");
2777  if (!X86SelectAddress(DI->getAddress(), AM))
2778  return false;
2779  const MCInstrDesc &II = TII.get(TargetOpcode::DBG_VALUE);
2781  "Expected inlined-at fields to agree");
2782  addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II), AM)
2783  .addImm(0)
2784  .addMetadata(DI->getVariable())
2785  .addMetadata(DI->getExpression());
2786  return true;
2787  }
2788  case Intrinsic::trap: {
2789  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::TRAP));
2790  return true;
2791  }
2792  case Intrinsic::sqrt: {
2793  if (!Subtarget->hasSSE1())
2794  return false;
2795 
2796  Type *RetTy = II->getCalledFunction()->getReturnType();
2797 
2798  MVT VT;
2799  if (!isTypeLegal(RetTy, VT))
2800  return false;
2801 
2802  // Unfortunately we can't use fastEmit_r, because the AVX version of FSQRT
2803  // is not generated by FastISel yet.
2804  // FIXME: Update this code once tablegen can handle it.
2805  static const uint16_t SqrtOpc[3][2] = {
2806  { X86::SQRTSSr, X86::SQRTSDr },
2807  { X86::VSQRTSSr, X86::VSQRTSDr },
2808  { X86::VSQRTSSZr, X86::VSQRTSDZr },
2809  };
2810  unsigned AVXLevel = Subtarget->hasAVX512() ? 2 :
2811  Subtarget->hasAVX() ? 1 :
2812  0;
2813  unsigned Opc;
2814  switch (VT.SimpleTy) {
2815  default: return false;
2816  case MVT::f32: Opc = SqrtOpc[AVXLevel][0]; break;
2817  case MVT::f64: Opc = SqrtOpc[AVXLevel][1]; break;
2818  }
2819 
2820  const Value *SrcVal = II->getArgOperand(0);
2821  Register SrcReg = getRegForValue(SrcVal);
2822 
2823  if (SrcReg == 0)
2824  return false;
2825 
2826  const TargetRegisterClass *RC = TLI.getRegClassFor(VT);
2827  unsigned ImplicitDefReg = 0;
2828  if (AVXLevel > 0) {
2829  ImplicitDefReg = createResultReg(RC);
2830  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2831  TII.get(TargetOpcode::IMPLICIT_DEF), ImplicitDefReg);
2832  }
2833 
2834  Register ResultReg = createResultReg(RC);
2835  MachineInstrBuilder MIB;
2836  MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc),
2837  ResultReg);
2838 
2839  if (ImplicitDefReg)
2840  MIB.addReg(ImplicitDefReg);
2841 
2842  MIB.addReg(SrcReg);
2843 
2844  updateValueMap(II, ResultReg);
2845  return true;
2846  }
2847  case Intrinsic::sadd_with_overflow:
2848  case Intrinsic::uadd_with_overflow:
2849  case Intrinsic::ssub_with_overflow:
2850  case Intrinsic::usub_with_overflow:
2851  case Intrinsic::smul_with_overflow:
2852  case Intrinsic::umul_with_overflow: {
2853  // This implements the basic lowering of the xalu with overflow intrinsics
2854  // into add/sub/mul followed by either seto or setb.
2855  const Function *Callee = II->getCalledFunction();
2856  auto *Ty = cast<StructType>(Callee->getReturnType());
2857  Type *RetTy = Ty->getTypeAtIndex(0U);
2858  assert(Ty->getTypeAtIndex(1)->isIntegerTy() &&
2859  Ty->getTypeAtIndex(1)->getScalarSizeInBits() == 1 &&
2860  "Overflow value expected to be an i1");
2861 
2862  MVT VT;
2863  if (!isTypeLegal(RetTy, VT))
2864  return false;
2865 
2866  if (VT < MVT::i8 || VT > MVT::i64)
2867  return false;
2868 
2869  const Value *LHS = II->getArgOperand(0);
2870  const Value *RHS = II->getArgOperand(1);
2871 
2872  // Canonicalize immediate to the RHS.
2873  if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS) && II->isCommutative())
2874  std::swap(LHS, RHS);
2875 
2876  unsigned BaseOpc, CondCode;
2877  switch (II->getIntrinsicID()) {
2878  default: llvm_unreachable("Unexpected intrinsic!");
2879  case Intrinsic::sadd_with_overflow:
2880  BaseOpc = ISD::ADD; CondCode = X86::COND_O; break;
2881  case Intrinsic::uadd_with_overflow:
2882  BaseOpc = ISD::ADD; CondCode = X86::COND_B; break;
2883  case Intrinsic::ssub_with_overflow:
2884  BaseOpc = ISD::SUB; CondCode = X86::COND_O; break;
2885  case Intrinsic::usub_with_overflow:
2886  BaseOpc = ISD::SUB; CondCode = X86::COND_B; break;
2887  case Intrinsic::smul_with_overflow:
2888  BaseOpc = X86ISD::SMUL; CondCode = X86::COND_O; break;
2889  case Intrinsic::umul_with_overflow:
2890  BaseOpc = X86ISD::UMUL; CondCode = X86::COND_O; break;
2891  }
2892 
2893  Register LHSReg = getRegForValue(LHS);
2894  if (LHSReg == 0)
2895  return false;
2896 
2897  unsigned ResultReg = 0;
2898  // Check if we have an immediate version.
2899  if (const auto *CI = dyn_cast<ConstantInt>(RHS)) {
2900  static const uint16_t Opc[2][4] = {
2901  { X86::INC8r, X86::INC16r, X86::INC32r, X86::INC64r },
2902  { X86::DEC8r, X86::DEC16r, X86::DEC32r, X86::DEC64r }
2903  };
2904 
2905  if (CI->isOne() && (BaseOpc == ISD::ADD || BaseOpc == ISD::SUB) &&
2906  CondCode == X86::COND_O) {
2907  // We can use INC/DEC.
2908  ResultReg = createResultReg(TLI.getRegClassFor(VT));
2909  bool IsDec = BaseOpc == ISD::SUB;
2910  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2911  TII.get(Opc[IsDec][VT.SimpleTy-MVT::i8]), ResultReg)
2912  .addReg(LHSReg);
2913  } else
2914  ResultReg = fastEmit_ri(VT, VT, BaseOpc, LHSReg, CI->getZExtValue());
2915  }
2916 
2917  unsigned RHSReg;
2918  if (!ResultReg) {
2919  RHSReg = getRegForValue(RHS);
2920  if (RHSReg == 0)
2921  return false;
2922  ResultReg = fastEmit_rr(VT, VT, BaseOpc, LHSReg, RHSReg);
2923  }
2924 
2925  // FastISel doesn't have a pattern for all X86::MUL*r and X86::IMUL*r. Emit
2926  // it manually.
2927  if (BaseOpc == X86ISD::UMUL && !ResultReg) {
2928  static const uint16_t MULOpc[] =
2929  { X86::MUL8r, X86::MUL16r, X86::MUL32r, X86::MUL64r };
2930  static const MCPhysReg Reg[] = { X86::AL, X86::AX, X86::EAX, X86::RAX };
2931  // First copy the first operand into RAX, which is an implicit input to
2932  // the X86::MUL*r instruction.
2933  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2934  TII.get(TargetOpcode::COPY), Reg[VT.SimpleTy-MVT::i8])
2935  .addReg(LHSReg);
2936  ResultReg = fastEmitInst_r(MULOpc[VT.SimpleTy-MVT::i8],
2937  TLI.getRegClassFor(VT), RHSReg);
2938  } else if (BaseOpc == X86ISD::SMUL && !ResultReg) {
2939  static const uint16_t MULOpc[] =
2940  { X86::IMUL8r, X86::IMUL16rr, X86::IMUL32rr, X86::IMUL64rr };
2941  if (VT == MVT::i8) {
2942  // Copy the first operand into AL, which is an implicit input to the
2943  // X86::IMUL8r instruction.
2944  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2945  TII.get(TargetOpcode::COPY), X86::AL)
2946  .addReg(LHSReg);
2947  ResultReg = fastEmitInst_r(MULOpc[0], TLI.getRegClassFor(VT), RHSReg);
2948  } else
2949  ResultReg = fastEmitInst_rr(MULOpc[VT.SimpleTy-MVT::i8],
2950  TLI.getRegClassFor(VT), LHSReg, RHSReg);
2951  }
2952 
2953  if (!ResultReg)
2954  return false;
2955 
2956  // Assign to a GPR since the overflow return value is lowered to a SETcc.
2957  Register ResultReg2 = createResultReg(&X86::GR8RegClass);
2958  assert((ResultReg+1) == ResultReg2 && "Nonconsecutive result registers.");
2959  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::SETCCr),
2960  ResultReg2).addImm(CondCode);
2961 
2962  updateValueMap(II, ResultReg, 2);
2963  return true;
2964  }
2965  case Intrinsic::x86_sse_cvttss2si:
2966  case Intrinsic::x86_sse_cvttss2si64:
2967  case Intrinsic::x86_sse2_cvttsd2si:
2968  case Intrinsic::x86_sse2_cvttsd2si64: {
2969  bool IsInputDouble;
2970  switch (II->getIntrinsicID()) {
2971  default: llvm_unreachable("Unexpected intrinsic.");
2972  case Intrinsic::x86_sse_cvttss2si:
2973  case Intrinsic::x86_sse_cvttss2si64:
2974  if (!Subtarget->hasSSE1())
2975  return false;
2976  IsInputDouble = false;
2977  break;
2978  case Intrinsic::x86_sse2_cvttsd2si:
2979  case Intrinsic::x86_sse2_cvttsd2si64:
2980  if (!Subtarget->hasSSE2())
2981  return false;
2982  IsInputDouble = true;
2983  break;
2984  }
2985 
2986  Type *RetTy = II->getCalledFunction()->getReturnType();
2987  MVT VT;
2988  if (!isTypeLegal(RetTy, VT))
2989  return false;
2990 
2991  static const uint16_t CvtOpc[3][2][2] = {
2992  { { X86::CVTTSS2SIrr, X86::CVTTSS2SI64rr },
2993  { X86::CVTTSD2SIrr, X86::CVTTSD2SI64rr } },
2994  { { X86::VCVTTSS2SIrr, X86::VCVTTSS2SI64rr },
2995  { X86::VCVTTSD2SIrr, X86::VCVTTSD2SI64rr } },
2996  { { X86::VCVTTSS2SIZrr, X86::VCVTTSS2SI64Zrr },
2997  { X86::VCVTTSD2SIZrr, X86::VCVTTSD2SI64Zrr } },
2998  };
2999  unsigned AVXLevel = Subtarget->hasAVX512() ? 2 :
3000  Subtarget->hasAVX() ? 1 :
3001  0;
3002  unsigned Opc;
3003  switch (VT.SimpleTy) {
3004  default: llvm_unreachable("Unexpected result type.");
3005  case MVT::i32: Opc = CvtOpc[AVXLevel][IsInputDouble][0]; break;
3006  case MVT::i64: Opc = CvtOpc[AVXLevel][IsInputDouble][1]; break;
3007  }
3008 
3009  // Check if we can fold insertelement instructions into the convert.
3010  const Value *Op = II->getArgOperand(0);
3011  while (auto *IE = dyn_cast<InsertElementInst>(Op)) {
3012  const Value *Index = IE->getOperand(2);
3013  if (!isa<ConstantInt>(Index))
3014  break;
3015  unsigned Idx = cast<ConstantInt>(Index)->getZExtValue();
3016 
3017  if (Idx == 0) {
3018  Op = IE->getOperand(1);
3019  break;
3020  }
3021  Op = IE->getOperand(0);
3022  }
3023 
3024  Register Reg = getRegForValue(Op);
3025  if (Reg == 0)
3026  return false;
3027 
3028  Register ResultReg = createResultReg(TLI.getRegClassFor(VT));
3029  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
3030  .addReg(Reg);
3031 
3032  updateValueMap(II, ResultReg);
3033  return true;
3034  }
3035  }
3036 }
3037 
3038 bool X86FastISel::fastLowerArguments() {
3039  if (!FuncInfo.CanLowerReturn)
3040  return false;
3041 
3042  const Function *F = FuncInfo.Fn;
3043  if (F->isVarArg())
3044  return false;
3045 
3046  CallingConv::ID CC = F->getCallingConv();
3047  if (CC != CallingConv::C)
3048  return false;
3049 
3050  if (Subtarget->isCallingConvWin64(CC))
3051  return false;
3052 
3053  if (!Subtarget->is64Bit())
3054  return false;
3055 
3056  if (Subtarget->useSoftFloat())
3057  return false;
3058 
3059  // Only handle simple cases. i.e. Up to 6 i32/i64 scalar arguments.
3060  unsigned GPRCnt = 0;
3061  unsigned FPRCnt = 0;
3062  for (auto const &Arg : F->args()) {
3063  if (Arg.hasAttribute(Attribute::ByVal) ||
3064  Arg.hasAttribute(Attribute::InReg) ||
3065  Arg.hasAttribute(Attribute::StructRet) ||
3066  Arg.hasAttribute(Attribute::SwiftSelf) ||
3067  Arg.hasAttribute(Attribute::SwiftAsync) ||
3068  Arg.hasAttribute(Attribute::SwiftError) ||
3069  Arg.hasAttribute(Attribute::Nest))
3070  return false;
3071 
3072  Type *ArgTy = Arg.getType();
3073  if (ArgTy->isStructTy() || ArgTy->isArrayTy() || ArgTy->isVectorTy())
3074  return false;
3075 
3076  EVT ArgVT = TLI.getValueType(DL, ArgTy);
3077  if (!ArgVT.isSimple()) return false;
3078  switch (ArgVT.getSimpleVT().SimpleTy) {
3079  default: return false;
3080  case MVT::i32:
3081  case MVT::i64:
3082  ++GPRCnt;
3083  break;
3084  case MVT::f32:
3085  case MVT::f64:
3086  if (!Subtarget->hasSSE1())
3087  return false;
3088  ++FPRCnt;
3089  break;
3090  }
3091 
3092  if (GPRCnt > 6)
3093  return false;
3094 
3095  if (FPRCnt > 8)
3096  return false;
3097  }
3098 
3099  static const MCPhysReg GPR32ArgRegs[] = {
3100  X86::EDI, X86::ESI, X86::EDX, X86::ECX, X86::R8D, X86::R9D
3101  };
3102  static const MCPhysReg GPR64ArgRegs[] = {
3103  X86::RDI, X86::RSI, X86::RDX, X86::RCX, X86::R8 , X86::R9
3104  };
3105  static const MCPhysReg XMMArgRegs[] = {
3106  X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
3107  X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
3108  };
3109 
3110  unsigned GPRIdx = 0;
3111  unsigned FPRIdx = 0;
3112  for (auto const &Arg : F->args()) {
3113  MVT VT = TLI.getSimpleValueType(DL, Arg.getType());
3114  const TargetRegisterClass *RC = TLI.getRegClassFor(VT);
3115  unsigned SrcReg;
3116  switch (VT.SimpleTy) {
3117  default: llvm_unreachable("Unexpected value type.");
3118  case MVT::i32: SrcReg = GPR32ArgRegs[GPRIdx++]; break;
3119  case MVT::i64: SrcReg = GPR64ArgRegs[GPRIdx++]; break;
3120  case MVT::f32: [[fallthrough]];
3121  case MVT::f64: SrcReg = XMMArgRegs[FPRIdx++]; break;
3122  }
3123  Register DstReg = FuncInfo.MF->addLiveIn(SrcReg, RC);
3124  // FIXME: Unfortunately it's necessary to emit a copy from the livein copy.
3125  // Without this, EmitLiveInCopies may eliminate the livein if its only
3126  // use is a bitcast (which isn't turned into an instruction).
3127  Register ResultReg = createResultReg(RC);
3128  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3129  TII.get(TargetOpcode::COPY), ResultReg)
3130  .addReg(DstReg, getKillRegState(true));
3131  updateValueMap(&Arg, ResultReg);
3132  }
3133  return true;
3134 }
3135 
3136 static unsigned computeBytesPoppedByCalleeForSRet(const X86Subtarget *Subtarget,
3137  CallingConv::ID CC,
3138  const CallBase *CB) {
3139  if (Subtarget->is64Bit())
3140  return 0;
3141  if (Subtarget->getTargetTriple().isOSMSVCRT())
3142  return 0;
3143  if (CC == CallingConv::Fast || CC == CallingConv::GHC ||
3144  CC == CallingConv::HiPE || CC == CallingConv::Tail ||
3145  CC == CallingConv::SwiftTail)
3146  return 0;
3147 
3148  if (CB)
3149  if (CB->arg_empty() || !CB->paramHasAttr(0, Attribute::StructRet) ||
3150  CB->paramHasAttr(0, Attribute::InReg) || Subtarget->isTargetMCU())
3151  return 0;
3152 
3153  return 4;
3154 }
3155 
3156 bool X86FastISel::fastLowerCall(CallLoweringInfo &CLI) {
3157  auto &OutVals = CLI.OutVals;
3158  auto &OutFlags = CLI.OutFlags;
3159  auto &OutRegs = CLI.OutRegs;
3160  auto &Ins = CLI.Ins;
3161  auto &InRegs = CLI.InRegs;
3162  CallingConv::ID CC = CLI.CallConv;
3163  bool &IsTailCall = CLI.IsTailCall;
3164  bool IsVarArg = CLI.IsVarArg;
3165  const Value *Callee = CLI.Callee;
3166  MCSymbol *Symbol = CLI.Symbol;
3167  const auto *CB = CLI.CB;
3168 
3169  bool Is64Bit = Subtarget->is64Bit();
3170  bool IsWin64 = Subtarget->isCallingConvWin64(CC);
3171 
3172  // Call / invoke instructions with NoCfCheck attribute require special
3173  // handling.
3174  if (CB && CB->doesNoCfCheck())
3175  return false;
3176 
3177  // Functions with no_caller_saved_registers that need special handling.
3178  if ((CB && isa<CallInst>(CB) && CB->hasFnAttr("no_caller_saved_registers")))
3179  return false;
3180 
3181  // Functions with no_callee_saved_registers that need special handling.
3182  if ((CB && CB->hasFnAttr("no_callee_saved_registers")))
3183  return false;
3184 
3185  // Functions using thunks for indirect calls need to use SDISel.
3186  if (Subtarget->useIndirectThunkCalls())
3187  return false;
3188 
3189  // Handle only C, fastcc, and webkit_js calling conventions for now.
3190  switch (CC) {
3191  default: return false;
3192  case CallingConv::C:
3193  case CallingConv::Fast:
3194  case CallingConv::Tail:
3196  case CallingConv::Swift:
3201  case CallingConv::Win64:
3204  break;
3205  }
3206 
3207  // Allow SelectionDAG isel to handle tail calls.
3208  if (IsTailCall)
3209  return false;
3210 
3211  // fastcc with -tailcallopt is intended to provide a guaranteed
3212  // tail call optimization. Fastisel doesn't know how to do that.
3213  if ((CC == CallingConv::Fast && TM.Options.GuaranteedTailCallOpt) ||
3215  return false;
3216 
3217  // Don't know how to handle Win64 varargs yet. Nothing special needed for
3218  // x86-32. Special handling for x86-64 is implemented.
3219  if (IsVarArg && IsWin64)
3220  return false;
3221 
3222  // Don't know about inalloca yet.
3223  if (CLI.CB && CLI.CB->hasInAllocaArgument())
3224  return false;
3225 
3226  for (auto Flag : CLI.OutFlags)
3227  if (Flag.isSwiftError() || Flag.isPreallocated())
3228  return false;
3229 
3230  SmallVector<MVT, 16> OutVTs;
3231  SmallVector<unsigned, 16> ArgRegs;
3232 
3233  // If this is a constant i1/i8/i16 argument, promote to i32 to avoid an extra
3234  // instruction. This is safe because it is common to all FastISel supported
3235  // calling conventions on x86.
3236  for (int i = 0, e = OutVals.size(); i != e; ++i) {
3237  Value *&Val = OutVals[i];
3238  ISD::ArgFlagsTy Flags = OutFlags[i];
3239  if (auto *CI = dyn_cast<ConstantInt>(Val)) {
3240  if (CI->getBitWidth() < 32) {
3241  if (Flags.isSExt())
3243  else
3245  }
3246  }
3247 
3248  // Passing bools around ends up doing a trunc to i1 and passing it.
3249  // Codegen this as an argument + "and 1".
3250  MVT VT;
3251  auto *TI = dyn_cast<TruncInst>(Val);
3252  unsigned ResultReg;
3253  if (TI && TI->getType()->isIntegerTy(1) && CLI.CB &&
3254  (TI->getParent() == CLI.CB->getParent()) && TI->hasOneUse()) {
3255  Value *PrevVal = TI->getOperand(0);
3256  ResultReg = getRegForValue(PrevVal);
3257 
3258  if (!ResultReg)
3259  return false;
3260 
3261  if (!isTypeLegal(PrevVal->getType(), VT))
3262  return false;
3263 
3264  ResultReg = fastEmit_ri(VT, VT, ISD::AND, ResultReg, 1);
3265  } else {
3266  if (!isTypeLegal(Val->getType(), VT) ||
3267  (VT.isVector() && VT.getVectorElementType() == MVT::i1))
3268  return false;
3269  ResultReg = getRegForValue(Val);
3270  }
3271 
3272  if (!ResultReg)
3273  return false;
3274 
3275  ArgRegs.push_back(ResultReg);
3276  OutVTs.push_back(VT);
3277  }
3278 
3279  // Analyze operands of the call, assigning locations to each operand.
3281  CCState CCInfo(CC, IsVarArg, *FuncInfo.MF, ArgLocs, CLI.RetTy->getContext());
3282 
3283  // Allocate shadow area for Win64
3284  if (IsWin64)
3285  CCInfo.AllocateStack(32, Align(8));
3286 
3287  CCInfo.AnalyzeCallOperands(OutVTs, OutFlags, CC_X86);
3288 
3289  // Get a count of how many bytes are to be pushed on the stack.
3290  unsigned NumBytes = CCInfo.getAlignedCallFrameSize();
3291 
3292  // Issue CALLSEQ_START
3293  unsigned AdjStackDown = TII.getCallFrameSetupOpcode();
3294  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackDown))
3295  .addImm(NumBytes).addImm(0).addImm(0);
3296 
3297  // Walk the register/memloc assignments, inserting copies/loads.
3298  const X86RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
3299  for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
3300  CCValAssign const &VA = ArgLocs[i];
3301  const Value *ArgVal = OutVals[VA.getValNo()];
3302  MVT ArgVT = OutVTs[VA.getValNo()];
3303 
3304  if (ArgVT == MVT::x86mmx)
3305  return false;
3306 
3307  unsigned ArgReg = ArgRegs[VA.getValNo()];
3308 
3309  // Promote the value if needed.
3310  switch (VA.getLocInfo()) {
3311  case CCValAssign::Full: break;
3312  case CCValAssign::SExt: {
3313  assert(VA.getLocVT().isInteger() && !VA.getLocVT().isVector() &&
3314  "Unexpected extend");
3315 
3316  if (ArgVT == MVT::i1)
3317  return false;
3318 
3319  bool Emitted = X86FastEmitExtend(ISD::SIGN_EXTEND, VA.getLocVT(), ArgReg,
3320  ArgVT, ArgReg);
3321  assert(Emitted && "Failed to emit a sext!"); (void)Emitted;
3322  ArgVT = VA.getLocVT();
3323  break;
3324  }
3325  case CCValAssign::ZExt: {
3326  assert(VA.getLocVT().isInteger() && !VA.getLocVT().isVector() &&
3327  "Unexpected extend");
3328 
3329  // Handle zero-extension from i1 to i8, which is common.
3330  if (ArgVT == MVT::i1) {
3331  // Set the high bits to zero.
3332  ArgReg = fastEmitZExtFromI1(MVT::i8, ArgReg);
3333  ArgVT = MVT::i8;
3334 
3335  if (ArgReg == 0)
3336  return false;
3337  }
3338 
3339  bool Emitted = X86FastEmitExtend(ISD::ZERO_EXTEND, VA.getLocVT(), ArgReg,
3340  ArgVT, ArgReg);
3341  assert(Emitted && "Failed to emit a zext!"); (void)Emitted;
3342  ArgVT = VA.getLocVT();
3343  break;
3344  }
3345  case CCValAssign::AExt: {
3346  assert(VA.getLocVT().isInteger() && !VA.getLocVT().isVector() &&
3347  "Unexpected extend");
3348  bool Emitted = X86FastEmitExtend(ISD::ANY_EXTEND, VA.getLocVT(), ArgReg,
3349  ArgVT, ArgReg);
3350  if (!Emitted)
3351  Emitted = X86FastEmitExtend(ISD::ZERO_EXTEND, VA.getLocVT(), ArgReg,
3352  ArgVT, ArgReg);
3353  if (!Emitted)
3354  Emitted = X86FastEmitExtend(ISD::SIGN_EXTEND, VA.getLocVT(), ArgReg,
3355  ArgVT, ArgReg);
3356 
3357  assert(Emitted && "Failed to emit a aext!"); (void)Emitted;
3358  ArgVT = VA.getLocVT();
3359  break;
3360  }
3361  case CCValAssign::BCvt: {
3362  ArgReg = fastEmit_r(ArgVT, VA.getLocVT(), ISD::BITCAST, ArgReg);
3363  assert(ArgReg && "Failed to emit a bitcast!");
3364  ArgVT = VA.getLocVT();
3365  break;
3366  }
3367  case CCValAssign::VExt:
3368  // VExt has not been implemented, so this should be impossible to reach
3369  // for now. However, fallback to Selection DAG isel once implemented.
3370  return false;
3374  case CCValAssign::FPExt:
3375  case CCValAssign::Trunc:
3376  llvm_unreachable("Unexpected loc info!");
3377  case CCValAssign::Indirect:
3378  // FIXME: Indirect doesn't need extending, but fast-isel doesn't fully
3379  // support this.
3380  return false;
3381  }
3382 
3383  if (VA.isRegLoc()) {
3384  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3385  TII.get(TargetOpcode::COPY), VA.getLocReg()).addReg(ArgReg);
3386  OutRegs.push_back(VA.getLocReg());
3387  } else {
3388  assert(VA.isMemLoc() && "Unknown value location!");
3389 
3390  // Don't emit stores for undef values.
3391  if (isa<UndefValue>(ArgVal))
3392  continue;
3393 
3394  unsigned LocMemOffset = VA.getLocMemOffset();
3395  X86AddressMode AM;
3396  AM.Base.Reg = RegInfo->getStackRegister();
3397  AM.Disp = LocMemOffset;
3398  ISD::ArgFlagsTy Flags = OutFlags[VA.getValNo()];
3399  Align Alignment = DL.getABITypeAlign(ArgVal->getType());
3400  MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
3401  MachinePointerInfo::getStack(*FuncInfo.MF, LocMemOffset),
3402  MachineMemOperand::MOStore, ArgVT.getStoreSize(), Alignment);
3403  if (Flags.isByVal()) {
3404  X86AddressMode SrcAM;
3405  SrcAM.Base.Reg = ArgReg;
3406  if (!TryEmitSmallMemcpy(AM, SrcAM, Flags.getByValSize()))
3407  return false;
3408  } else if (isa<ConstantInt>(ArgVal) || isa<ConstantPointerNull>(ArgVal)) {
3409  // If this is a really simple value, emit this with the Value* version
3410  // of X86FastEmitStore. If it isn't simple, we don't want to do this,
3411  // as it can cause us to reevaluate the argument.
3412  if (!X86FastEmitStore(ArgVT, ArgVal, AM, MMO))
3413  return false;
3414  } else {
3415  if (!X86FastEmitStore(ArgVT, ArgReg, AM, MMO))
3416  return false;
3417  }
3418  }
3419  }
3420 
3421  // ELF / PIC requires GOT in the EBX register before function calls via PLT
3422  // GOT pointer.
3423  if (Subtarget->isPICStyleGOT()) {
3424  unsigned Base = getInstrInfo()->getGlobalBaseReg(FuncInfo.MF);
3425  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3426  TII.get(TargetOpcode::COPY), X86::EBX).addReg(Base);
3427  }
3428 
3429  if (Is64Bit && IsVarArg && !IsWin64) {
3430  // From AMD64 ABI document:
3431  // For calls that may call functions that use varargs or stdargs
3432  // (prototype-less calls or calls to functions containing ellipsis (...) in
3433  // the declaration) %al is used as hidden argument to specify the number
3434  // of SSE registers used. The contents of %al do not need to match exactly
3435  // the number of registers, but must be an ubound on the number of SSE
3436  // registers used and is in the range 0 - 8 inclusive.
3437 
3438  // Count the number of XMM registers allocated.
3439  static const MCPhysReg XMMArgRegs[] = {
3440  X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
3441  X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
3442  };
3443  unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs);
3444  assert((Subtarget->hasSSE1() || !NumXMMRegs)
3445  && "SSE registers cannot be used when SSE is disabled");
3446  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::MOV8ri),
3447  X86::AL).addImm(NumXMMRegs);
3448  }
3449 
3450  // Materialize callee address in a register. FIXME: GV address can be
3451  // handled with a CALLpcrel32 instead.
3452  X86AddressMode CalleeAM;
3453  if (!X86SelectCallAddress(Callee, CalleeAM))
3454  return false;
3455 
3456  unsigned CalleeOp = 0;
3457  const GlobalValue *GV = nullptr;
3458  if (CalleeAM.GV != nullptr) {
3459  GV = CalleeAM.GV;
3460  } else if (CalleeAM.Base.Reg != 0) {
3461  CalleeOp = CalleeAM.Base.Reg;
3462  } else
3463  return false;
3464 
3465  // Issue the call.
3466  MachineInstrBuilder MIB;
3467  if (CalleeOp) {
3468  // Register-indirect call.
3469  unsigned CallOpc = Is64Bit ? X86::CALL64r : X86::CALL32r;
3470  MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CallOpc))
3471  .addReg(CalleeOp);
3472  } else {
3473  // Direct call.
3474  assert(GV && "Not a direct call");
3475  // See if we need any target-specific flags on the GV operand.
3476  unsigned char OpFlags = Subtarget->classifyGlobalFunctionReference(GV);
3477 
3478  // This will be a direct call, or an indirect call through memory for
3479  // NonLazyBind calls or dllimport calls.
3480  bool NeedLoad = OpFlags == X86II::MO_DLLIMPORT ||
3481  OpFlags == X86II::MO_GOTPCREL ||
3482  OpFlags == X86II::MO_GOTPCREL_NORELAX ||
3483  OpFlags == X86II::MO_COFFSTUB;
3484  unsigned CallOpc = NeedLoad
3485  ? (Is64Bit ? X86::CALL64m : X86::CALL32m)
3486  : (Is64Bit ? X86::CALL64pcrel32 : X86::CALLpcrel32);
3487 
3488  MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CallOpc));
3489  if (NeedLoad)
3490  MIB.addReg(Is64Bit ? X86::RIP : 0).addImm(1).addReg(0);
3491  if (Symbol)
3492  MIB.addSym(Symbol, OpFlags);
3493  else
3494  MIB.addGlobalAddress(GV, 0, OpFlags);
3495  if (NeedLoad)
3496  MIB.addReg(0);
3497  }
3498 
3499  // Add a register mask operand representing the call-preserved registers.
3500  // Proper defs for return values will be added by setPhysRegsDeadExcept().
3501  MIB.addRegMask(TRI.getCallPreservedMask(*FuncInfo.MF, CC));
3502 
3503  // Add an implicit use GOT pointer in EBX.
3504  if (Subtarget->isPICStyleGOT())
3506 
3507  if (Is64Bit && IsVarArg && !IsWin64)
3509 
3510  // Add implicit physical register uses to the call.
3511  for (auto Reg : OutRegs)
3513 
3514  // Issue CALLSEQ_END
3515  unsigned NumBytesForCalleeToPop =
3516  X86::isCalleePop(CC, Subtarget->is64Bit(), IsVarArg,
3517  TM.Options.GuaranteedTailCallOpt)
3518  ? NumBytes // Callee pops everything.
3519  : computeBytesPoppedByCalleeForSRet(Subtarget, CC, CLI.CB);
3520  unsigned AdjStackUp = TII.getCallFrameDestroyOpcode();
3521  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackUp))
3522  .addImm(NumBytes).addImm(NumBytesForCalleeToPop);
3523 
3524  // Now handle call return values.
3526  CCState CCRetInfo(CC, IsVarArg, *FuncInfo.MF, RVLocs,
3527  CLI.RetTy->getContext());
3528  CCRetInfo.AnalyzeCallResult(Ins, RetCC_X86);
3529 
3530  // Copy all of the result registers out of their specified physreg.
3531  Register ResultReg = FuncInfo.CreateRegs(CLI.RetTy);
3532  for (unsigned i = 0; i != RVLocs.size(); ++i) {
3533  CCValAssign &VA = RVLocs[i];
3534  EVT CopyVT = VA.getValVT();
3535  unsigned CopyReg = ResultReg + i;
3536  Register SrcReg = VA.getLocReg();
3537 
3538  // If this is x86-64, and we disabled SSE, we can't return FP values
3539  if ((CopyVT == MVT::f32 || CopyVT == MVT::f64) &&
3540  ((Is64Bit || Ins[i].Flags.isInReg()) && !Subtarget->hasSSE1())) {
3541  report_fatal_error("SSE register return with SSE disabled");
3542  }
3543 
3544  // If we prefer to use the value in xmm registers, copy it out as f80 and
3545  // use a truncate to move it from fp stack reg to xmm reg.
3546  if ((SrcReg == X86::FP0 || SrcReg == X86::FP1) &&
3547  isScalarFPTypeInSSEReg(VA.getValVT())) {
3548  CopyVT = MVT::f80;
3549  CopyReg = createResultReg(&X86::RFP80RegClass);
3550  }
3551 
3552  // Copy out the result.
3553  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3554  TII.get(TargetOpcode::COPY), CopyReg).addReg(SrcReg);
3555  InRegs.push_back(VA.getLocReg());
3556 
3557  // Round the f80 to the right size, which also moves it to the appropriate
3558  // xmm register. This is accomplished by storing the f80 value in memory
3559  // and then loading it back.
3560  if (CopyVT != VA.getValVT()) {
3561  EVT ResVT = VA.getValVT();
3562  unsigned Opc = ResVT == MVT::f32 ? X86::ST_Fp80m32 : X86::ST_Fp80m64;
3563  unsigned MemSize = ResVT.getSizeInBits()/8;
3564  int FI = MFI.CreateStackObject(MemSize, Align(MemSize), false);
3565  addFrameReference(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3566  TII.get(Opc)), FI)
3567  .addReg(CopyReg);
3568  Opc = ResVT == MVT::f32 ? X86::MOVSSrm_alt : X86::MOVSDrm_alt;
3569  addFrameReference(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3570  TII.get(Opc), ResultReg + i), FI);
3571  }
3572  }
3573 
3574  CLI.ResultReg = ResultReg;
3575  CLI.NumResultRegs = RVLocs.size();
3576  CLI.Call = MIB;
3577 
3578  return true;
3579 }
3580 
3581 bool
3582 X86FastISel::fastSelectInstruction(const Instruction *I) {
3583  switch (I->getOpcode()) {
3584  default: break;
3585  case Instruction::Load:
3586  return X86SelectLoad(I);
3587  case Instruction::Store:
3588  return X86SelectStore(I);
3589  case Instruction::Ret:
3590  return X86SelectRet(I);
3591  case Instruction::ICmp:
3592  case Instruction::FCmp:
3593  return X86SelectCmp(I);
3594  case Instruction::ZExt:
3595  return X86SelectZExt(I);
3596  case Instruction::SExt:
3597  return X86SelectSExt(I);
3598  case Instruction::Br:
3599  return X86SelectBranch(I);
3600  case Instruction::LShr:
3601  case Instruction::AShr:
3602  case Instruction::Shl:
3603  return X86SelectShift(I);
3604  case Instruction::SDiv:
3605  case Instruction::UDiv:
3606  case Instruction::SRem:
3607  case Instruction::URem:
3608  return X86SelectDivRem(I);
3609  case Instruction::Select:
3610  return X86SelectSelect(I);
3611  case Instruction::Trunc:
3612  return X86SelectTrunc(I);
3613  case Instruction::FPExt:
3614  return X86SelectFPExt(I);
3615  case Instruction::FPTrunc:
3616  return X86SelectFPTrunc(I);
3617  case Instruction::SIToFP:
3618  return X86SelectSIToFP(I);
3619  case Instruction::UIToFP:
3620  return X86SelectUIToFP(I);
3621  case Instruction::IntToPtr: // Deliberate fall-through.
3622  case Instruction::PtrToInt: {
3623  EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType());
3624  EVT DstVT = TLI.getValueType(DL, I->getType());
3625  if (DstVT.bitsGT(SrcVT))
3626  return X86SelectZExt(I);
3627  if (DstVT.bitsLT(SrcVT))
3628  return X86SelectTrunc(I);
3629  Register Reg = getRegForValue(I->getOperand(0));
3630  if (Reg == 0) return false;
3631  updateValueMap(I, Reg);
3632  return true;
3633  }
3634  case Instruction::BitCast: {
3635  // Select SSE2/AVX bitcasts between 128/256/512 bit vector types.
3636  if (!Subtarget->hasSSE2())
3637  return false;
3638 
3639  MVT SrcVT, DstVT;
3640  if (!isTypeLegal(I->getOperand(0)->getType(), SrcVT) ||
3641  !isTypeLegal(I->getType(), DstVT))
3642  return false;
3643 
3644  // Only allow vectors that use xmm/ymm/zmm.
3645  if (!SrcVT.isVector() || !DstVT.isVector() ||
3646  SrcVT.getVectorElementType() == MVT::i1 ||
3647  DstVT.getVectorElementType() == MVT::i1)
3648  return false;
3649 
3650  Register Reg = getRegForValue(I->getOperand(0));
3651  if (!Reg)
3652  return false;
3653 
3654  // Emit a reg-reg copy so we don't propagate cached known bits information
3655  // with the wrong VT if we fall out of fast isel after selecting this.
3656  const TargetRegisterClass *DstClass = TLI.getRegClassFor(DstVT);
3657  Register ResultReg = createResultReg(DstClass);
3658  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3659  TII.get(TargetOpcode::COPY), ResultReg).addReg(Reg);
3660 
3661  updateValueMap(I, ResultReg);
3662  return true;
3663  }
3664  }
3665 
3666  return false;
3667 }
3668 
3669 unsigned X86FastISel::X86MaterializeInt(const ConstantInt *CI, MVT VT) {
3670  if (VT > MVT::i64)
3671  return 0;
3672 
3673  uint64_t Imm = CI->getZExtValue();
3674  if (Imm == 0) {
3675  Register SrcReg = fastEmitInst_(X86::MOV32r0, &X86::GR32RegClass);
3676  switch (VT.SimpleTy) {
3677  default: llvm_unreachable("Unexpected value type");
3678  case MVT::i1:
3679  case MVT::i8:
3680  return fastEmitInst_extractsubreg(MVT::i8, SrcReg, X86::sub_8bit);
3681  case MVT::i16:
3682  return fastEmitInst_extractsubreg(MVT::i16, SrcReg, X86::sub_16bit);
3683  case MVT::i32:
3684  return SrcReg;
3685  case MVT::i64: {
3686  Register ResultReg = createResultReg(&X86::GR64RegClass);
3687  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3688  TII.get(TargetOpcode::SUBREG_TO_REG), ResultReg)
3689  .addImm(0).addReg(SrcReg).addImm(X86::sub_32bit);
3690  return ResultReg;
3691  }
3692  }
3693  }
3694 
3695  unsigned Opc = 0;
3696  switch (VT.SimpleTy) {
3697  default: llvm_unreachable("Unexpected value type");
3698  case MVT::i1:
3699  VT = MVT::i8;
3700  [[fallthrough]];
3701  case MVT::i8: Opc = X86::MOV8ri; break;
3702  case MVT::i16: Opc = X86::MOV16ri; break;
3703  case MVT::i32: Opc = X86::MOV32ri; break;
3704  case MVT::i64: {
3705  if (isUInt<32>(Imm))
3706  Opc = X86::MOV32ri64;
3707  else if (isInt<32>(Imm))
3708  Opc = X86::MOV64ri32;
3709  else
3710  Opc = X86::MOV64ri;
3711  break;
3712  }
3713  }
3714  return fastEmitInst_i(Opc, TLI.getRegClassFor(VT), Imm);
3715 }
3716 
3717 unsigned X86FastISel::X86MaterializeFP(const ConstantFP *CFP, MVT VT) {
3718  if (CFP->isNullValue())
3719  return fastMaterializeFloatZero(CFP);
3720 
3721  // Can't handle alternate code models yet.
3722  CodeModel::Model CM = TM.getCodeModel();
3723  if (CM != CodeModel::Small && CM != CodeModel::Large)
3724  return 0;
3725 
3726  // Get opcode and regclass of the output for the given load instruction.
3727  unsigned Opc = 0;
3728  bool HasSSE1 = Subtarget->hasSSE1();
3729  bool HasSSE2 = Subtarget->hasSSE2();
3730  bool HasAVX = Subtarget->hasAVX();
3731  bool HasAVX512 = Subtarget->hasAVX512();
3732  switch (VT.SimpleTy) {
3733  default: return 0;
3734  case MVT::f32:
3735  Opc = HasAVX512 ? X86::VMOVSSZrm_alt
3736  : HasAVX ? X86::VMOVSSrm_alt
3737  : HasSSE1 ? X86::MOVSSrm_alt
3738  : X86::LD_Fp32m;
3739  break;
3740  case MVT::f64:
3741  Opc = HasAVX512 ? X86::VMOVSDZrm_alt
3742  : HasAVX ? X86::VMOVSDrm_alt
3743  : HasSSE2 ? X86::MOVSDrm_alt
3744  : X86::LD_Fp64m;
3745  break;
3746  case MVT::f80:
3747  // No f80 support yet.
3748  return 0;
3749  }
3750 
3751  // MachineConstantPool wants an explicit alignment.
3752  Align Alignment = DL.getPrefTypeAlign(CFP->getType());
3753 
3754  // x86-32 PIC requires a PIC base register for constant pools.
3755  unsigned PICBase = 0;
3756  unsigned char OpFlag = Subtarget->classifyLocalReference(nullptr);
3757  if (OpFlag == X86II::MO_PIC_BASE_OFFSET)
3758  PICBase = getInstrInfo()->getGlobalBaseReg(FuncInfo.MF);
3759  else if (OpFlag == X86II::MO_GOTOFF)
3760  PICBase = getInstrInfo()->getGlobalBaseReg(FuncInfo.MF);
3761  else if (Subtarget->is64Bit() && TM.getCodeModel() == CodeModel::Small)
3762  PICBase = X86::RIP;
3763 
3764  // Create the load from the constant pool.
3765  unsigned CPI = MCP.getConstantPoolIndex(CFP, Alignment);
3766  Register ResultReg = createResultReg(TLI.getRegClassFor(VT.SimpleTy));
3767 
3768  // Large code model only applies to 64-bit mode.
3769  if (Subtarget->is64Bit() && CM == CodeModel::Large) {
3770  Register AddrReg = createResultReg(&X86::GR64RegClass);
3771  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::MOV64ri),
3772  AddrReg)
3773  .addConstantPoolIndex(CPI, 0, OpFlag);
3774  MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3775  TII.get(Opc), ResultReg);
3776  addRegReg(MIB, AddrReg, false, PICBase, false);
3777  MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
3779  MachineMemOperand::MOLoad, DL.getPointerSize(), Alignment);
3780  MIB->addMemOperand(*FuncInfo.MF, MMO);
3781  return ResultReg;
3782  }
3783 
3784  addConstantPoolReference(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3785  TII.get(Opc), ResultReg),
3786  CPI, PICBase, OpFlag);
3787  return ResultReg;
3788 }
3789 
3790 unsigned X86FastISel::X86MaterializeGV(const GlobalValue *GV, MVT VT) {
3791  // Can't handle alternate code models yet.
3792  if (TM.getCodeModel() != CodeModel::Small)
3793  return 0;
3794 
3795  // Materialize addresses with LEA/MOV instructions.
3796  X86AddressMode AM;
3797  if (X86SelectAddress(GV, AM)) {
3798  // If the expression is just a basereg, then we're done, otherwise we need
3799  // to emit an LEA.
3800  if (AM.BaseType == X86AddressMode::RegBase &&
3801  AM.IndexReg == 0 && AM.Disp == 0 && AM.GV == nullptr)
3802  return AM.Base.Reg;
3803 
3804  Register ResultReg = createResultReg(TLI.getRegClassFor(VT));
3805  if (TM.getRelocationModel() == Reloc::Static &&
3806  TLI.getPointerTy(DL) == MVT::i64) {
3807  // The displacement code could be more than 32 bits away so we need to use
3808  // an instruction with a 64 bit immediate
3809  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::MOV64ri),
3810  ResultReg)
3811  .addGlobalAddress(GV);
3812  } else {
3813  unsigned Opc =
3814  TLI.getPointerTy(DL) == MVT::i32
3815  ? (Subtarget->isTarget64BitILP32() ? X86::LEA64_32r : X86::LEA32r)
3816  : X86::LEA64r;
3817  addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3818  TII.get(Opc), ResultReg), AM);
3819  }
3820  return ResultReg;
3821  }
3822  return 0;
3823 }
3824 
3825 unsigned X86FastISel::fastMaterializeConstant(const Constant *C) {
3826  EVT CEVT = TLI.getValueType(DL, C->getType(), true);
3827 
3828  // Only handle simple types.
3829  if (!CEVT.isSimple())
3830  return 0;
3831  MVT VT = CEVT.getSimpleVT();
3832 
3833  if (const auto *CI = dyn_cast<ConstantInt>(C))
3834  return X86MaterializeInt(CI, VT);
3835  if (const auto *CFP = dyn_cast<ConstantFP>(C))
3836  return X86MaterializeFP(CFP, VT);
3837  if (const auto *GV = dyn_cast<GlobalValue>(C))
3838  return X86MaterializeGV(GV, VT);
3839  if (isa<UndefValue>(C)) {
3840  unsigned Opc = 0;
3841  switch (VT.SimpleTy) {
3842  default:
3843  break;
3844  case MVT::f32:
3845  if (!Subtarget->hasSSE1())
3846  Opc = X86::LD_Fp032;
3847  break;
3848  case MVT::f64:
3849  if (!Subtarget->hasSSE2())
3850  Opc = X86::LD_Fp064;
3851  break;
3852  case MVT::f80:
3853  Opc = X86::LD_Fp080;
3854  break;
3855  }
3856 
3857  if (Opc) {
3858  Register ResultReg = createResultReg(TLI.getRegClassFor(VT));
3859  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc),
3860  ResultReg);
3861  return ResultReg;
3862  }
3863  }
3864 
3865  return 0;
3866 }
3867 
3868 unsigned X86FastISel::fastMaterializeAlloca(const AllocaInst *C) {
3869  // Fail on dynamic allocas. At this point, getRegForValue has already
3870  // checked its CSE maps, so if we're here trying to handle a dynamic
3871  // alloca, we're not going to succeed. X86SelectAddress has a
3872  // check for dynamic allocas, because it's called directly from
3873  // various places, but targetMaterializeAlloca also needs a check
3874  // in order to avoid recursion between getRegForValue,
3875  // X86SelectAddrss, and targetMaterializeAlloca.
3876  if (!FuncInfo.StaticAllocaMap.count(C))
3877  return 0;
3878  assert(C->isStaticAlloca() && "dynamic alloca in the static alloca map?");
3879 
3880  X86AddressMode AM;
3881  if (!X86SelectAddress(C, AM))
3882  return 0;
3883  unsigned Opc =
3884  TLI.getPointerTy(DL) == MVT::i32
3885  ? (Subtarget->isTarget64BitILP32() ? X86::LEA64_32r : X86::LEA32r)
3886  : X86::LEA64r;
3887  const TargetRegisterClass *RC = TLI.getRegClassFor(TLI.getPointerTy(DL));
3888  Register ResultReg = createResultReg(RC);
3889  addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3890  TII.get(Opc), ResultReg), AM);
3891  return ResultReg;
3892 }
3893 
3894 unsigned X86FastISel::fastMaterializeFloatZero(const ConstantFP *CF) {
3895  MVT VT;
3896  if (!isTypeLegal(CF->getType(), VT))
3897  return 0;
3898 
3899  // Get opcode and regclass for the given zero.
3900  bool HasSSE1 = Subtarget->hasSSE1();
3901  bool HasSSE2 = Subtarget->hasSSE2();
3902  bool HasAVX512 = Subtarget->hasAVX512();
3903  unsigned Opc = 0;
3904  switch (VT.SimpleTy) {
3905  default: return 0;
3906  case MVT::f16:
3907  Opc = HasAVX512 ? X86::AVX512_FsFLD0SH : X86::FsFLD0SH;
3908  break;
3909  case MVT::f32:
3910  Opc = HasAVX512 ? X86::AVX512_FsFLD0SS
3911  : HasSSE1 ? X86::FsFLD0SS
3912  : X86::LD_Fp032;
3913  break;
3914  case MVT::f64:
3915  Opc = HasAVX512 ? X86::AVX512_FsFLD0SD
3916  : HasSSE2 ? X86::FsFLD0SD
3917  : X86::LD_Fp064;
3918  break;
3919  case MVT::f80:
3920  // No f80 support yet.
3921  return 0;
3922  }
3923 
3924  Register ResultReg = createResultReg(TLI.getRegClassFor(VT));
3925  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg);
3926  return ResultReg;
3927 }
3928 
3929 
3930 bool X86FastISel::tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo,
3931  const LoadInst *LI) {
3932  const Value *Ptr = LI->getPointerOperand();
3933  X86AddressMode AM;
3934  if (!X86SelectAddress(Ptr, AM))
3935  return false;
3936 
3937  const X86InstrInfo &XII = (const X86InstrInfo &)TII;
3938 
3939  unsigned Size = DL.getTypeAllocSize(LI->getType());
3940 
3942  AM.getFullAddress(AddrOps);
3943 
3945  *FuncInfo.MF, *MI, OpNo, AddrOps, FuncInfo.InsertPt, Size, LI->getAlign(),
3946  /*AllowCommute=*/true);
3947  if (!Result)
3948  return false;
3949 
3950  // The index register could be in the wrong register class. Unfortunately,
3951  // foldMemoryOperandImpl could have commuted the instruction so its not enough
3952  // to just look at OpNo + the offset to the index reg. We actually need to
3953  // scan the instruction to find the index reg and see if its the correct reg
3954  // class.
3955  unsigned OperandNo = 0;
3956  for (MachineInstr::mop_iterator I = Result->operands_begin(),
3957  E = Result->operands_end(); I != E; ++I, ++OperandNo) {
3958  MachineOperand &MO = *I;
3959  if (!MO.isReg() || MO.isDef() || MO.getReg() != AM.IndexReg)
3960  continue;
3961  // Found the index reg, now try to rewrite it.
3962  Register IndexReg = constrainOperandRegClass(Result->getDesc(),
3963  MO.getReg(), OperandNo);
3964  if (IndexReg == MO.getReg())
3965  continue;
3966  MO.setReg(IndexReg);
3967  }
3968 
3969  Result->addMemOperand(*FuncInfo.MF, createMachineMemOperandFor(LI));
3970  Result->cloneInstrSymbols(*FuncInfo.MF, *MI);
3972  removeDeadCode(I, std::next(I));
3973  return true;
3974 }
3975 
3976 unsigned X86FastISel::fastEmitInst_rrrr(unsigned MachineInstOpcode,
3977  const TargetRegisterClass *RC,
3978  unsigned Op0, unsigned Op1,
3979  unsigned Op2, unsigned Op3) {
3980  const MCInstrDesc &II = TII.get(MachineInstOpcode);
3981 
3982  Register ResultReg = createResultReg(RC);
3983  Op0 = constrainOperandRegClass(II, Op0, II.getNumDefs());
3984  Op1 = constrainOperandRegClass(II, Op1, II.getNumDefs() + 1);
3985  Op2 = constrainOperandRegClass(II, Op2, II.getNumDefs() + 2);
3986  Op3 = constrainOperandRegClass(II, Op3, II.getNumDefs() + 3);
3987 
3988  if (II.getNumDefs() >= 1)
3989  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
3990  .addReg(Op0)
3991  .addReg(Op1)
3992  .addReg(Op2)
3993  .addReg(Op3);
3994  else {
3995  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
3996  .addReg(Op0)
3997  .addReg(Op1)
3998  .addReg(Op2)
3999  .addReg(Op3);
4000  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4001  TII.get(TargetOpcode::COPY), ResultReg).addReg(II.ImplicitDefs[0]);
4002  }
4003  return ResultReg;
4004 }
4005 
4006 
4007 namespace llvm {
4009  const TargetLibraryInfo *libInfo) {
4010  return new X86FastISel(funcInfo, libInfo);
4011  }
4012 }
llvm::ISD::SUB
@ SUB
Definition: ISDOpcodes.h:240
llvm::Check::Size
@ Size
Definition: FileCheck.h:77
llvm::addRegReg
static const MachineInstrBuilder & addRegReg(const MachineInstrBuilder &MIB, unsigned Reg1, bool isKill1, unsigned Reg2, bool isKill2)
addRegReg - This function is used to add a memory reference of the form: [Reg + Reg].
Definition: X86InstrBuilder.h:164
llvm::CCValAssign::getLocVT
MVT getLocVT() const
Definition: CallingConvLower.h:151
i
i
Definition: README.txt:29
llvm::CallingConv::X86_64_SysV
@ X86_64_SysV
The C convention as specified in the x86-64 supplement to the System V ABI, used on most non-Windows ...
Definition: CallingConv.h:148
llvm::MCInstrDesc::getNumDefs
unsigned getNumDefs() const
Return the number of MachineOperands that are register definitions.
Definition: MCInstrDesc.h:245
llvm::CmpInst::FCMP_ULE
@ FCMP_ULE
1 1 0 1 True if unordered, less than, or equal
Definition: InstrTypes.h:734
llvm::CCValAssign::ZExt
@ ZExt
Definition: CallingConvLower.h:36
llvm::DbgVariableIntrinsic::getExpression
DIExpression * getExpression() const
Definition: IntrinsicInst.h:262
llvm::Argument
This class represents an incoming formal argument to a Function.
Definition: Argument.h:28
llvm::MVT::getVectorElementType
MVT getVectorElementType() const
Definition: MachineValueType.h:528
Signed
@ Signed
Definition: NVPTXISelLowering.cpp:4710
llvm::MVT::getStoreSize
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition: MachineValueType.h:1101
llvm::CallingConv::X86_ThisCall
@ X86_ThisCall
Similar to X86_StdCall.
Definition: CallingConv.h:119
MI
IRTranslator LLVM IR MI
Definition: IRTranslator.cpp:105
llvm::MachineInstrBuilder::addImm
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
Definition: MachineInstrBuilder.h:131
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
llvm::DbgDeclareInst::getAddress
Value * getAddress() const
Definition: IntrinsicInst.h:315
llvm::CallingConv::Win64
@ Win64
The C convention as implemented on Windows/x86-64 and AArch64.
Definition: CallingConv.h:156
llvm::MCSymbol
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
Definition: MCSymbol.h:41
llvm::AArch64CC::AL
@ AL
Definition: AArch64BaseInfo.h:269
llvm::ReturnInst
Return a value (possibly void), from a function.
Definition: Instructions.h:3017
llvm::CCValAssign::Full
@ Full
Definition: CallingConvLower.h:34
llvm::MVT::isInteger
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition: MachineValueType.h:358
llvm::X86Subtarget::hasSSE2
bool hasSSE2() const
Definition: X86Subtarget.h:200
llvm::CCValAssign::SExtUpper
@ SExtUpper
Definition: CallingConvLower.h:38
llvm::Value::hasOneUse
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition: Value.h:434
llvm::generic_gep_type_iterator
Definition: GetElementPtrTypeIterator.h:31
llvm::ISD::BITCAST
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:886
llvm::CmpInst::Predicate
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:719
llvm::X86AddressMode::RegBase
@ RegBase
Definition: X86InstrBuilder.h:44
llvm::X86AddressMode
X86AddressMode - This struct holds a generalized full x86 address mode.
Definition: X86InstrBuilder.h:42
IntrinsicInst.h
X86Subtarget.h
llvm::CCState
CCState - This class holds information needed while lowering arguments and return values.
Definition: CallingConvLower.h:189
llvm::ConstantExpr::getZExt
static Constant * getZExt(Constant *C, Type *Ty, bool OnlyIfReduced=false)
Definition: Constants.cpp:2089
llvm::N86::EDX
@ EDX
Definition: X86MCTargetDesc.h:51
llvm::Function
Definition: Function.h:60
llvm::IntrinsicInst::getIntrinsicID
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
Definition: IntrinsicInst.h:53
X86InstrBuilder.h
llvm::MachinePointerInfo::getConstantPool
static MachinePointerInfo getConstantPool(MachineFunction &MF)
Return a MachinePointerInfo record that refers to the constant pool.
Definition: MachineOperand.cpp:1000
X86SelectAddress
static void X86SelectAddress(const MachineInstr &I, const MachineRegisterInfo &MRI, X86AddressMode &AM)
Definition: X86InstructionSelector.cpp:512
GetElementPtrTypeIterator.h
llvm::ConstantExpr::getSExt
static Constant * getSExt(Constant *C, Type *Ty, bool OnlyIfReduced=false)
Definition: Constants.cpp:2075
llvm::MemIntrinsicBase::getDestAddressSpace
unsigned getDestAddressSpace() const
Definition: IntrinsicInst.h:746
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1183
llvm::CallingConv::Fast
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:41
llvm::X86AddressMode::GV
const GlobalValue * GV
Definition: X86InstrBuilder.h:56
llvm::MVT::isVector
bool isVector() const
Return true if this is a vector value type.
Definition: MachineValueType.h:374
llvm::CmpInst::FCMP_ONE
@ FCMP_ONE
0 1 1 0 True if ordered and operands are unequal
Definition: InstrTypes.h:727
llvm::X86Subtarget
Definition: X86Subtarget.h:52
ErrorHandling.h
llvm::SPII::Load
@ Load
Definition: SparcInstrInfo.h:32
llvm::X86Disassembler::Reg
Reg
All possible values of the reg field in the ModR/M byte.
Definition: X86DisassemblerDecoder.h:462
llvm::GlobalVariable
Definition: GlobalVariable.h:39
llvm::X86::COND_P
@ COND_P
Definition: X86BaseInfo.h:91
llvm::CmpInst::ICMP_NE
@ ICMP_NE
not equal
Definition: InstrTypes.h:741
llvm::CmpInst::getInversePredicate
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
Definition: InstrTypes.h:833
llvm::ISD::ANY_EXTEND
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:766
llvm::CallingConv::X86_StdCall
@ X86_StdCall
stdcall is mostly used by the Win32 API.
Definition: CallingConv.h:96
llvm::CCValAssign::VExt
@ VExt
Definition: CallingConvLower.h:46
llvm::CallingConv::HiPE
@ HiPE
Used by the High-Performance Erlang Compiler (HiPE).
Definition: CallingConv.h:53
llvm::CCValAssign::Indirect
@ Indirect
Definition: CallingConvLower.h:50
llvm::X86Subtarget::isTargetMCU
bool isTargetMCU() const
Definition: X86Subtarget.h:287
llvm::TargetRegisterInfo
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
Definition: TargetRegisterInfo.h:234
llvm::Depth
@ Depth
Definition: SIMachineScheduler.h:36
llvm::MachineInstr::getDesc
const MCInstrDesc & getDesc() const
Returns the target instruction descriptor of this MachineInstr.
Definition: MachineInstr.h:488
llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
llvm::reverse
auto reverse(ContainerTy &&C, std::enable_if_t< has_rbegin< ContainerTy >::value > *=nullptr)
Definition: STLExtras.h:381
llvm::MachineMemOperand
A description of a memory reference used in the backend.
Definition: MachineMemOperand.h:127
llvm::CallingConv::C
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
llvm::DbgVariableIntrinsic::getVariable
DILocalVariable * getVariable() const
Definition: IntrinsicInst.h:258
llvm::X86AddressMode::FrameIndexBase
@ FrameIndexBase
Definition: X86InstrBuilder.h:45
llvm::CCValAssign::BCvt
@ BCvt
Definition: CallingConvLower.h:44
llvm::CCValAssign::AExtUpper
@ AExtUpper
Definition: CallingConvLower.h:42
llvm::CallingConv::WebKit_JS
@ WebKit_JS
Used for stack based JavaScript calls.
Definition: CallingConv.h:56
Operator.h
llvm::successors
auto successors(MachineBasicBlock *BB)
Definition: MachineSSAContext.h:29
llvm::MipsISD::Ret
@ Ret
Definition: MipsISelLowering.h:119
llvm::CCValAssign::ZExtUpper
@ ZExtUpper
Definition: CallingConvLower.h:40
RHS
Value * RHS
Definition: X86PartialReduction.cpp:76
llvm::X86::CondCode
CondCode
Definition: X86BaseInfo.h:80
llvm::X86ISD::SMUL
@ SMUL
Definition: X86ISelLowering.h:399
llvm::LoadInst::getPointerOperand
Value * getPointerOperand()
Definition: Instructions.h:261
llvm::X86AddressMode::Base
union llvm::X86AddressMode::@590 Base
llvm::gep_type_begin
gep_type_iterator gep_type_begin(const User *GEP)
Definition: GetElementPtrTypeIterator.h:123
llvm::MVT::v2f64
@ v2f64
Definition: MachineValueType.h:178
llvm::CCValAssign::Trunc
@ Trunc
Definition: CallingConvLower.h:45
llvm::X86II::MO_GOTOFF
@ MO_GOTOFF
MO_GOTOFF - On a symbol operand this indicates that the immediate is the offset to the location of th...
Definition: X86BaseInfo.h:434
llvm::LoadInst::getAlign
Align getAlign() const
Return the alignment of the access that is being performed.
Definition: Instructions.h:217
llvm::CmpInst::FCMP_OGT
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
Definition: InstrTypes.h:723
llvm::N86::EAX
@ EAX
Definition: X86MCTargetDesc.h:51
TRI
unsigned const TargetRegisterInfo * TRI
Definition: MachineSink.cpp:1628
llvm::Type::getInt32Ty
static IntegerType * getInt32Ty(LLVMContext &C)
Definition: Type.cpp:239
OpIndex
unsigned OpIndex
Definition: SPIRVModuleAnalysis.cpp:45
llvm::MachineInstr::addMemOperand
void addMemOperand(MachineFunction &MF, MachineMemOperand *MO)
Add a MachineMemOperand to the machine instruction.
Definition: MachineInstr.cpp:355
llvm::X86::COND_O
@ COND_O
Definition: X86BaseInfo.h:81
F
#define F(x, y, z)
Definition: MD5.cpp:55
MachineRegisterInfo.h
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:55
llvm::EVT::isSimple
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition: ValueTypes.h:129
llvm::X86AddressMode::GVOpFlags
unsigned GVOpFlags
Definition: X86InstrBuilder.h:57
llvm::CmpInst::FCMP_ULT
@ FCMP_ULT
1 1 0 0 True if unordered or less than
Definition: InstrTypes.h:733
Arg
amdgpu Simplify well known AMD library false FunctionCallee Value * Arg
Definition: AMDGPULibCalls.cpp:186
LHS
Value * LHS
Definition: X86PartialReduction.cpp:75
llvm::ConstantInt
This is the shared class of boolean and integer constants.
Definition: Constants.h:79
llvm::Type::isArrayTy
bool isArrayTy() const
True if this is an instance of ArrayType.
Definition: Type.h:220
X86MachineFunctionInfo.h
llvm::TargetRegisterClass::contains
bool contains(Register Reg) const
Return true if the specified register is included in this register class.
Definition: TargetRegisterInfo.h:94
llvm::MVT::i1
@ i1
Definition: MachineValueType.h:43
llvm::CCValAssign::AExt
@ AExt
Definition: CallingConvLower.h:37
llvm::MemTransferBase::getSourceAddressSpace
unsigned getSourceAddressSpace() const
Definition: IntrinsicInst.h:806
llvm::X86II::MO_COFFSTUB
@ MO_COFFSTUB
MO_COFFSTUB - On a symbol operand "FOO", this indicates that the reference is actually to the "....
Definition: X86BaseInfo.h:575
X86.h
llvm::MVT::v8f64
@ v8f64
Definition: MachineValueType.h:181
llvm::addConstantPoolReference
static const MachineInstrBuilder & addConstantPoolReference(const MachineInstrBuilder &MIB, unsigned CPI, unsigned GlobalBaseReg, unsigned char OpFlags)
addConstantPoolReference - This function is used to add a reference to the base of a constant value s...
Definition: X86InstrBuilder.h:223
llvm::addFullAddress
static const MachineInstrBuilder & addFullAddress(const MachineInstrBuilder &MIB, const X86AddressMode &AM)
Definition: X86InstrBuilder.h:172
llvm::CCValAssign
CCValAssign - Represent assignment of one arg/retval to a location.
Definition: CallingConvLower.h:31
llvm::MachineInstrBuilder::addMBB
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
Definition: MachineInstrBuilder.h:146
llvm::CallingConv::Swift
@ Swift
Calling convention for Swift.
Definition: CallingConv.h:69
llvm::ISD::ZERO_EXTEND
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:763
llvm::ISD::ArgFlagsTy::isByVal
bool isByVal() const
Definition: TargetCallingConv.h:85
llvm::Constant::isNullValue
bool isNullValue() const
Return true if this is the value that would be returned by getNullValue.
Definition: Constants.cpp:76
FunctionLoweringInfo.h
E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
llvm::CCValAssign::getLocReg
Register getLocReg() const
Definition: CallingConvLower.h:148
llvm::User
Definition: User.h:44
llvm::addDirectMem
static const MachineInstrBuilder & addDirectMem(const MachineInstrBuilder &MIB, unsigned Reg)
addDirectMem - This function is used to add a direct memory reference to the current instruction – th...
Definition: X86InstrBuilder.h:124
llvm::EVT
Extended Value Type.
Definition: ValueTypes.h:34
C
(vector float) vec_cmpeq(*A, *B) C
Definition: README_ALTIVEC.txt:86
llvm::ARM_PROC::A
@ A
Definition: ARMBaseInfo.h:34
llvm::MVT::f64
@ f64
Definition: MachineValueType.h:58
llvm::CallBase::getCalledFunction
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
Definition: InstrTypes.h:1396
llvm::SPII::Store
@ Store
Definition: SparcInstrInfo.h:33
llvm::X86II::MO_GOTPCREL
@ MO_GOTPCREL
MO_GOTPCREL - On a symbol operand this indicates that the immediate is offset to the GOT entry for th...
Definition: X86BaseInfo.h:442
llvm::TargetRegisterClass
Definition: TargetRegisterInfo.h:45
llvm::X86AddressMode::Scale
unsigned Scale
Definition: X86InstrBuilder.h:53
llvm::MVT::x86mmx
@ x86mmx
Definition: MachineValueType.h:268
MCSymbol.h
llvm::MemTransferBase::getRawSource
Value * getRawSource() const
Return the arguments to the instruction.
Definition: IntrinsicInst.h:793
llvm::CmpInst::FCMP_UGE
@ FCMP_UGE
1 0 1 1 True if unordered, greater than, or equal
Definition: InstrTypes.h:732
llvm::ms_demangle::QualifierMangleMode::Result
@ Result
llvm::MVT::SimpleTy
SimpleValueType SimpleTy
Definition: MachineValueType.h:329
llvm::RetCC_X86
bool RetCC_X86(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
llvm::Type::isVectorTy
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:232
llvm::addFrameReference
static const MachineInstrBuilder & addFrameReference(const MachineInstrBuilder &MIB, int FI, int Offset=0, bool mem=true)
addFrameReference - This function is used to add a reference to the base of an abstract object on the...
Definition: PPCInstrBuilder.h:32
llvm::Mips::GPRIdx
@ GPRIdx
Definition: MipsRegisterBankInfo.cpp:44
TII
const HexagonInstrInfo * TII
Definition: HexagonCopyToCombine.cpp:125
llvm::dwarf::Index
Index
Definition: Dwarf.h:472
llvm::X86Subtarget::hasSSE1
bool hasSSE1() const
Definition: X86Subtarget.h:199
llvm::MCInstrDesc
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:197
llvm::MachineOperand
MachineOperand class - Representation of each machine instruction operand.
Definition: MachineOperand.h:48
llvm::N86::ESI
@ ESI
Definition: X86MCTargetDesc.h:51
llvm::CodeModel::Small
@ Small
Definition: CodeGen.h:28
llvm::ISD::NodeType
NodeType
ISD::NodeType enum - This enum defines the target-independent operators for a SelectionDAG.
Definition: ISDOpcodes.h:40
llvm::MVT::v4f64
@ v4f64
Definition: MachineValueType.h:180
llvm::CmpInst::FCMP_UNO
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
Definition: InstrTypes.h:729
llvm::Instruction
Definition: Instruction.h:42
llvm::MCID::Flag
Flag
These should be considered private to the implementation of the MCInstrDesc class.
Definition: MCInstrDesc.h:147
llvm::Type::getScalarSizeInBits
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Definition: Type.cpp:189
llvm::report_fatal_error
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:145
llvm::ConstantFP
ConstantFP - Floating Point Values [float, double].
Definition: Constants.h:257
llvm::X86II::MO_GOTPCREL_NORELAX
@ MO_GOTPCREL_NORELAX
MO_GOTPCREL_NORELAX - Same as MO_GOTPCREL except that R_X86_64_GOTPCREL relocations are guaranteed to...
Definition: X86BaseInfo.h:447
llvm::DILocalVariable::isValidLocationForIntrinsic
bool isValidLocationForIntrinsic(const DILocation *DL) const
Check that a location is valid for this variable.
Definition: DebugInfoMetadata.h:3137
llvm::CmpInst::FCMP_OEQ
@ FCMP_OEQ
0 0 0 1 True if ordered and equal
Definition: InstrTypes.h:722
llvm::CmpInst::FCMP_OLT
@ FCMP_OLT
0 1 0 0 True if ordered and less than
Definition: InstrTypes.h:725
llvm::ISD::AND
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:666
llvm::CCValAssign::getLocInfo
LocInfo getLocInfo() const
Definition: CallingConvLower.h:153
Align
uint64_t Align
Definition: ELFObjHandler.cpp:81
llvm::X86AddressMode::Reg
unsigned Reg
Definition: X86InstrBuilder.h:49
llvm::CCValAssign::getLocMemOffset
unsigned getLocMemOffset() const
Definition: CallingConvLower.h:149
llvm::Align
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
llvm::EVT::bitsLT
bool bitsLT(EVT VT) const
Return true if this has less bits than VT.
Definition: ValueTypes.h:272
llvm::CCValAssign::isRegLoc
bool isRegLoc() const
Definition: CallingConvLower.h:143
llvm::CallingConv::ID
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
llvm::Triple::isOSMSVCRT
bool isOSMSVCRT() const
Is this a "Windows" OS targeting a "MSVCRT.dll" environment.
Definition: Triple.h:608
llvm::MachineBasicBlock
Definition: MachineBasicBlock.h:94
llvm::MemSetInst
This class wraps the llvm.memset and llvm.memset.inline intrinsics.
Definition: IntrinsicInst.h:993
llvm::BranchInst::getCondition
Value * getCondition() const
Definition: Instructions.h:3180
llvm::X86::AddrIndexReg
@ AddrIndexReg
Definition: X86BaseInfo.h:34
llvm::CmpInst::FCMP_FALSE
@ FCMP_FALSE
0 0 0 0 Always false (always folded)
Definition: InstrTypes.h:721
llvm::MachineRegisterInfo::getRegClass
const TargetRegisterClass * getRegClass(Register Reg) const
Return the register class of the specified virtual register.
Definition: MachineRegisterInfo.h:647
llvm::ARM_PROC::IE
@ IE
Definition: ARMBaseInfo.h:27
llvm::CodeModel::Model
Model
Definition: CodeGen.h:28
llvm::CmpInst
This class is the base class for the comparison instructions.
Definition: InstrTypes.h:709
llvm::MVT::f80
@ f80
Definition: MachineValueType.h:59
X86ChooseCmpOpcode
static unsigned X86ChooseCmpOpcode(EVT VT, const X86Subtarget *Subtarget)
Definition: X86FastISel.cpp:1353
llvm::Type::isIntegerTy
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:196
llvm::isInt< 8 >
constexpr bool isInt< 8 >(int64_t x)
Definition: MathExtras.h:363
llvm::MVT::v4i64
@ v4i64
Definition: MachineValueType.h:126
llvm::MachineFunction::getSubtarget
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Definition: MachineFunction.h:656
llvm::MVT::v32i16
@ v32i16
Definition: MachineValueType.h:100
llvm::X86AddressMode::IndexReg
unsigned IndexReg
Definition: X86InstrBuilder.h:54
llvm::PPC::Predicate
Predicate
Predicate - These are "(BI << 5) | BO" for various predicates.
Definition: PPCPredicates.h:26
llvm::StoreInst
An instruction for storing to memory.
Definition: Instructions.h:298
llvm::GlobalValue
Definition: GlobalValue.h:44
llvm::Constant
This is an important base class in LLVM.
Definition: Constant.h:41
llvm::MVT::v16i8
@ v16i8
Definition: MachineValueType.h:86
BranchProbabilityInfo.h
llvm::isInt< 32 >
constexpr bool isInt< 32 >(int64_t x)
Definition: MathExtras.h:369
llvm::EVT::getSizeInBits
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:340
llvm::Function::getReturnType
Type * getReturnType() const
Returns the type of the ret val.
Definition: Function.h:180
llvm::CCValAssign::SExt
@ SExt
Definition: CallingConvLower.h:35
llvm::isUInt< 16 >
constexpr bool isUInt< 16 >(uint64_t x)
Definition: MathExtras.h:404
llvm::MVT::v16i16
@ v16i16
Definition: MachineValueType.h:99
llvm::MachineOperand::isReg
bool isReg() const
isReg - Tests if this is a MO_Register operand.
Definition: MachineOperand.h:320
llvm::MachineInstr
Representation of each machine instruction.
Definition: MachineInstr.h:66
llvm::MachineInstrBuilder
Definition: MachineInstrBuilder.h:69
llvm::MVT::v2i64
@ v2i64
Definition: MachineValueType.h:124
llvm::isGlobalStubReference
static bool isGlobalStubReference(unsigned char TargetFlag)
isGlobalStubReference - Return true if the specified TargetFlag operand is a reference to a stub for ...
Definition: X86InstrInfo.h:82
llvm::StructLayout
Used to lazily calculate structure layout information for a target machine, based on the DataLayout s...
Definition: DataLayout.h:622
uint64_t
llvm::DbgDeclareInst
This represents the llvm.dbg.declare instruction.
Definition: IntrinsicInst.h:313
llvm::X86II::MO_PIC_BASE_OFFSET
@ MO_PIC_BASE_OFFSET
MO_PIC_BASE_OFFSET - On a symbol operand this indicates that the immediate should get the value of th...
Definition: X86BaseInfo.h:420
llvm::MVT::v16f32
@ v16f32
Definition: MachineValueType.h:168
llvm::TruncInst
This class represents a truncation of integer types.
Definition: Instructions.h:4776
AH
AMD64 Optimization Manual has some nice information about optimizing integer multiplication by a constant How much of it applies to Intel s X86 implementation There are definite trade offs to xmm0 cvttss2siq rdx jb L3 subss xmm0 rax cvttss2siq rdx xorq rdx rax ret instead of xmm1 cvttss2siq rcx movaps xmm2 subss xmm2 cvttss2siq rax rdx xorq rax ucomiss xmm0 cmovb rax ret Seems like the jb branch has high likelihood of being taken It would have saved a few instructions It s not possible to reference AH
Definition: README-X86-64.txt:44
llvm::ISD::TRAP
@ TRAP
TRAP - Trapping instruction.
Definition: ISDOpcodes.h:1133
llvm::numbers::e
constexpr double e
Definition: MathExtras.h:53
llvm::DenseMap
Definition: DenseMap.h:714
llvm::codeview::FrameCookieKind::Copy
@ Copy
DebugInfo.h
I
#define I(x, y, z)
Definition: MD5.cpp:58
llvm::MCPhysReg
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Definition: MCRegister.h:21
llvm::X86RegisterInfo::getPtrSizedFrameRegister
unsigned getPtrSizedFrameRegister(const MachineFunction &MF) const
Definition: X86RegisterInfo.cpp:920
llvm::GetElementPtrInst
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
Definition: Instructions.h:929
MachineConstantPool.h
llvm::PointerType
Class to represent pointers.
Definition: DerivedTypes.h:632
llvm::X86ISD::UMUL
@ UMUL
Definition: X86ISelLowering.h:400
llvm::MVT::v4f32
@ v4f32
Definition: MachineValueType.h:163
llvm::isUInt< 32 >
constexpr bool isUInt< 32 >(uint64_t x)
Definition: MathExtras.h:407
llvm::AllocFnKind::Aligned
@ Aligned
llvm::MVT::i8
@ i8
Definition: MachineValueType.h:46
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::X86AddressMode::BaseType
enum llvm::X86AddressMode::@589 BaseType
llvm::FunctionLoweringInfo
FunctionLoweringInfo - This contains information that is global to a function that is used when lower...
Definition: FunctionLoweringInfo.h:52
llvm::MVT::Other
@ Other
Definition: MachineValueType.h:42
llvm::MVT::getSizeInBits
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
Definition: MachineValueType.h:883
std::swap
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:853
llvm::MachineFunction::getFrameInfo
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
Definition: MachineFunction.h:672
llvm::X86::COND_B
@ COND_B
Definition: X86BaseInfo.h:83
memcpy
<%struct.s * > cast struct s *S to sbyte *< sbyte * > sbyte uint cast struct s *agg result to sbyte *< sbyte * > sbyte uint cast struct s *memtmp to sbyte *< sbyte * > sbyte uint ret void llc ends up issuing two memcpy or custom lower memcpy(of small size) to be ldmia/stmia. I think option 2 is better but the current register allocator cannot allocate a chunk of registers at a time. A feasible temporary solution is to use specific physical registers at the lowering time for small(<
SI
StandardInstrumentations SI(Debug, VerifyEach)
llvm::CmpInst::FCMP_OGE
@ FCMP_OGE
0 0 1 1 True if ordered and greater than or equal
Definition: InstrTypes.h:724
llvm::X86AddressMode::Disp
int Disp
Definition: X86InstrBuilder.h:55
Ptr
@ Ptr
Definition: TargetLibraryInfo.cpp:60
llvm::ISD::CondCode
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
Definition: ISDOpcodes.h:1424
llvm::X86TargetMachine
Definition: X86TargetMachine.h:28
llvm::X86MachineFunctionInfo
X86MachineFunctionInfo - This class is derived from MachineFunction and contains private X86 target-s...
Definition: X86MachineFunctionInfo.h:25
llvm::MVT
Machine Value Type.
Definition: MachineValueType.h:31
llvm::MachineInstrBuilder::addReg
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
Definition: MachineInstrBuilder.h:97
llvm::FastISel
This is a fast-path instruction selection class that generates poor code and doesn't support illegal ...
Definition: FastISel.h:65
llvm::MachineOperand::getReg
Register getReg() const
getReg - Returns the register number.
Definition: MachineOperand.h:359
llvm::X86InstrInfo::foldMemoryOperandImpl
MachineInstr * foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI, ArrayRef< unsigned > Ops, MachineBasicBlock::iterator InsertPt, int FrameIndex, LiveIntervals *LIS=nullptr, VirtRegMap *VRM=nullptr) const override
foldMemoryOperand - If this target supports it, fold a load or store of the specified stack slot into...
Definition: X86InstrInfo.cpp:6228
llvm::N86::EBX
@ EBX
Definition: X86MCTargetDesc.h:51
llvm::MachineFunction
Definition: MachineFunction.h:257
llvm::X86InstrInfo
Definition: X86InstrInfo.h:138
TargetOptions.h
llvm::CCValAssign::getValNo
unsigned getValNo() const
Definition: CallingConvLower.h:140
llvm::MachineInstrBuilder::addRegMask
const MachineInstrBuilder & addRegMask(const uint32_t *Mask) const
Definition: MachineInstrBuilder.h:197
llvm::TargetMachine::getMCAsmInfo
const MCAsmInfo * getMCAsmInfo() const
Return target specific asm information.
Definition: TargetMachine.h:205
llvm::X86II::MO_DLLIMPORT
@ MO_DLLIMPORT
MO_DLLIMPORT - On a symbol operand "FOO", this indicates that the reference is actually to the "__imp...
Definition: X86BaseInfo.h:537
llvm::X86::isCalleePop
bool isCalleePop(CallingConv::ID CallingConv, bool is64Bit, bool IsVarArg, bool GuaranteeTCO)
Determines whether the callee is required to pop its own arguments.
Definition: X86ISelLowering.cpp:5406
llvm::RegState::Implicit
@ Implicit
Not emitted register (e.g. carry, or temporary result).
Definition: MachineInstrBuilder.h:46
MCAsmInfo.h
llvm::any_of
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1625
llvm::MVT::i64
@ i64
Definition: MachineValueType.h:49
llvm::MachineFrameInfo::CreateStackObject
int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
Definition: MachineFrameInfo.cpp:51
llvm::StructType
Class to represent struct types.
Definition: DerivedTypes.h:213
Cond
SmallVector< MachineOperand, 4 > Cond
Definition: BasicBlockSections.cpp:137
llvm::isGlobalRelativeToPICBase
static bool isGlobalRelativeToPICBase(unsigned char TargetFlag)
isGlobalRelativeToPICBase - Return true if the specified global value reference is relative to a 32-b...
Definition: X86InstrInfo.h:100
llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:143
llvm::Value::getType
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
llvm::X86MachineFunctionInfo::getBytesToPopOnReturn
unsigned getBytesToPopOnReturn() const
Definition: X86MachineFunctionInfo.h:161
llvm::Reloc::Static
@ Static
Definition: CodeGen.h:22
uint32_t
llvm::Value::getContext
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:994
llvm::ISD::ArgFlagsTy
Definition: TargetCallingConv.h:27
llvm::N86::ECX
@ ECX
Definition: X86MCTargetDesc.h:51
DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition: AArch64SLSHardening.cpp:76
llvm::MVT::v64i8
@ v64i8
Definition: MachineValueType.h:88
llvm::MachineOperand::isDef
bool isDef() const
Definition: MachineOperand.h:374
S
add sub stmia L5 ldr r0 bl L_printf $stub Instead of a and a wouldn t it be better to do three moves *Return an aggregate type is even return S
Definition: README.txt:210
llvm::X86::createFastISel
FastISel * createFastISel(FunctionLoweringInfo &funcInfo, const TargetLibraryInfo *libInfo)
Definition: X86FastISel.cpp:4008
llvm::ConstantInt::getSExtValue
int64_t getSExtValue() const
Return the constant as a 64-bit integer value after it has been sign extended as appropriate for the ...
Definition: Constants.h:148
llvm::MVT::v4i32
@ v4i32
Definition: MachineValueType.h:109
llvm::CCValAssign::FPExt
@ FPExt
Definition: CallingConvLower.h:49
llvm::LoadInst
An instruction for reading from memory.
Definition: Instructions.h:174
llvm::CallBase::paramHasAttr
bool paramHasAttr(unsigned ArgNo, Attribute::AttrKind Kind) const
Determine whether the argument or parameter has the given attribute.
Definition: Instructions.cpp:341
llvm::MVT::v8i64
@ v8i64
Definition: MachineValueType.h:127
llvm::MachineMemOperand::MOLoad
@ MOLoad
The memory access reads data.
Definition: MachineMemOperand.h:134
MRI
unsigned const MachineRegisterInfo * MRI
Definition: AArch64AdvSIMDScalarPass.cpp:105
llvm::Register
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
llvm::ConstantInt::getZExtValue
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:142
llvm::MVT::v16i32
@ v16i32
Definition: MachineValueType.h:114
Callee
amdgpu Simplify well known AMD library false FunctionCallee Callee
Definition: AMDGPULibCalls.cpp:186
llvm::MCID::Select
@ Select
Definition: MCInstrDesc.h:164
CallingConv.h
llvm::Instruction::isAtomic
bool isAtomic() const
Return true if this instruction has an AtomicOrdering of unordered or higher.
Definition: Instruction.cpp:616
llvm::CmpInst::FCMP_UGT
@ FCMP_UGT
1 0 1 0 True if unordered or greater than
Definition: InstrTypes.h:731
llvm::X86Subtarget::hasAVX512
bool hasAVX512() const
Definition: X86Subtarget.h:207
llvm::CCValAssign::isMemLoc
bool isMemLoc() const
Definition: CallingConvLower.h:144
llvm::ISD::ArgFlagsTy::getByValSize
unsigned getByValSize() const
Definition: TargetCallingConv.h:169
X86ChooseCmpImmediateOpcode
static unsigned X86ChooseCmpImmediateOpcode(EVT VT, const ConstantInt *RHSC)
If we have a comparison with RHS as the RHS of the comparison, return an opcode that works for the co...
Definition: X86FastISel.cpp:1380
llvm::ConstantExpr
A constant value that is initialized with an expression using other constant values.
Definition: Constants.h:972
llvm::empty
constexpr bool empty(const T &RangeOrContainer)
Test whether RangeOrContainer is empty. Similar to C++17 std::empty.
Definition: STLExtras.h:269
X86CallingConv.h
llvm::Constant::getNullValue
static Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
Definition: Constants.cpp:350
llvm::Type::isFloatTy
bool isFloatTy() const
Return true if this is 'float', a 32-bit IEEE fp type.
Definition: Type.h:153
llvm::TargetRegisterInfo::getRegSizeInBits
unsigned getRegSizeInBits(const TargetRegisterClass &RC) const
Return the size in bits of a register from class RC.
Definition: TargetRegisterInfo.h:277
llvm::MemIntrinsicBase::getLength
Value * getLength() const
Definition: IntrinsicInst.h:735
uint16_t
llvm::MachineFunction::getTarget
const LLVMTargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
Definition: MachineFunction.h:652
llvm::AMDGPU::SendMsg::Op
Op
Definition: SIDefines.h:348
X86TargetMachine.h
llvm::StructLayout::getElementOffset
uint64_t getElementOffset(unsigned Idx) const
Definition: DataLayout.h:652
MachineFrameInfo.h
llvm::Align::value
uint64_t value() const
This is a hole in the type system and should not be abused.
Definition: Alignment.h:85
llvm::CC_X86
bool CC_X86(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
llvm::CallBase::arg_size
unsigned arg_size() const
Definition: InstrTypes.h:1339
GlobalVariable.h
llvm::CallingConv::SwiftTail
@ SwiftTail
This follows the Swift calling convention in how arguments are passed but guarantees tail calls will ...
Definition: CallingConv.h:87
llvm::MachineInstrBuilder::addConstantPoolIndex
const MachineInstrBuilder & addConstantPoolIndex(unsigned Idx, int Offset=0, unsigned TargetFlags=0) const
Definition: MachineInstrBuilder.h:158
llvm::MVT::v8i16
@ v8i16
Definition: MachineValueType.h:98
llvm::pdb::PDB_LocType::Slot
@ Slot
llvm::ISD::ArgFlagsTy::isSExt
bool isSExt() const
Definition: TargetCallingConv.h:76
llvm::MVT::i32
@ i32
Definition: MachineValueType.h:48
llvm::TargetLibraryInfo
Provides information about what library functions are available for the current target.
Definition: TargetLibraryInfo.h:222
llvm::CallBase::arg_empty
bool arg_empty() const
Definition: InstrTypes.h:1338
llvm::X86Subtarget::hasAVX
bool hasAVX() const
Definition: X86Subtarget.h:205
llvm::X86::getCMovOpcode
unsigned getCMovOpcode(unsigned RegBytes, bool HasMemoryOperand=false)
Return a cmov opcode for the given register size in bytes, and operand type.
Definition: X86InstrInfo.cpp:2832
llvm::MCID::Add
@ Add
Definition: MCInstrDesc.h:185
llvm::MachineFrameInfo::setStackProtectorIndex
void setStackProtectorIndex(int I)
Definition: MachineFrameInfo.h:358
llvm::MVT::v8i32
@ v8i32
Definition: MachineValueType.h:113
llvm::MachineMemOperand::MOStore
@ MOStore
The memory access writes data.
Definition: MachineMemOperand.h:136
GlobalAlias.h
llvm::ISD::ADD
@ ADD
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:239
X86RegisterInfo.h
llvm::IntrinsicInst
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:46
llvm::ARMBuildAttrs::Symbol
@ Symbol
Definition: ARMBuildAttributes.h:83
llvm::CallingConv::X86_FastCall
@ X86_FastCall
'fast' analog of X86_StdCall.
Definition: CallingConv.h:100
llvm::X86AddressMode::FrameIndex
int FrameIndex
Definition: X86InstrBuilder.h:50
llvm::RISCVMatInt::Imm
@ Imm
Definition: RISCVMatInt.h:23
llvm::MachineInstrBuilder::addGlobalAddress
const MachineInstrBuilder & addGlobalAddress(const GlobalValue *GV, int64_t Offset=0, unsigned TargetFlags=0) const
Definition: MachineInstrBuilder.h:177
llvm::CodeModel::Large
@ Large
Definition: CodeGen.h:28
llvm::getKillRegState
unsigned getKillRegState(bool B)
Definition: MachineInstrBuilder.h:508
llvm::MachineFrameInfo
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
Definition: MachineFrameInfo.h:105
FastISel.h
llvm::X86Subtarget::getTargetTriple
const Triple & getTargetTriple() const
Definition: X86Subtarget.h:268
Instructions.h
llvm::MemCpyInst
This class wraps the llvm.memcpy intrinsic.
Definition: IntrinsicInst.h:1045
llvm::MachineInstrBuilder::addSym
const MachineInstrBuilder & addSym(MCSymbol *Sym, unsigned char TargetFlags=0) const
Definition: MachineInstrBuilder.h:267
llvm::FunctionLoweringInfo::MF
MachineFunction * MF
Definition: FunctionLoweringInfo.h:55
llvm::Type::isStructTy
bool isStructTy() const
True if this is an instance of StructType.
Definition: Type.h:217
llvm::GetReturnInfo
void GetReturnInfo(CallingConv::ID CC, Type *ReturnType, AttributeList attr, SmallVectorImpl< ISD::OutputArg > &Outs, const TargetLowering &TLI, const DataLayout &DL)
Given an LLVM IR type and return type attributes, compute the return value EVTs and flags,...
Definition: TargetLoweringBase.cpp:1653
llvm::MemIntrinsic::isVolatile
bool isVolatile() const
Definition: IntrinsicInst.h:970
llvm::N86::EDI
@ EDI
Definition: X86MCTargetDesc.h:51
llvm::X86AddressMode::getFullAddress
void getFullAddress(SmallVectorImpl< MachineOperand > &MO)
Definition: X86InstrBuilder.h:65
llvm::Instruction::getDebugLoc
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
Definition: Instruction.h:347
llvm::X86::getX86ConditionCode
std::pair< CondCode, bool > getX86ConditionCode(CmpInst::Predicate Predicate)
Return a pair of condition code for the given predicate and whether the instruction operands should b...
Definition: X86InstrInfo.cpp:2794
llvm::User::op_begin
op_iterator op_begin()
Definition: User.h:234
getX86SSEConditionCode
static std::pair< unsigned, bool > getX86SSEConditionCode(CmpInst::Predicate Predicate)
Definition: X86FastISel.cpp:175
llvm::CCValAssign::getValVT
MVT getValVT() const
Definition: CallingConvLower.h:141
llvm::MVT::f16
@ f16
Definition: MachineValueType.h:56
llvm::CmpInst::FCMP_UNE
@ FCMP_UNE
1 1 1 0 True if unordered or not equal
Definition: InstrTypes.h:735
llvm::BuildMI
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
Definition: MachineInstrBuilder.h:328
computeBytesPoppedByCalleeForSRet
static unsigned computeBytesPoppedByCalleeForSRet(const X86Subtarget *Subtarget, CallingConv::ID CC, const CallBase *CB)
Definition: X86FastISel.cpp:3136
llvm::CallBase::getArgOperand
Value * getArgOperand(unsigned i) const
Definition: InstrTypes.h:1341
llvm::MachineOperand::setReg
void setReg(Register Reg)
Change the register this operand corresponds to.
Definition: MachineOperand.cpp:53
llvm::MachineInstr::getNumOperands
unsigned getNumOperands() const
Retuns the total number of operands.
Definition: MachineInstr.h:494
llvm::Instruction::getParent
const BasicBlock * getParent() const
Definition: Instruction.h:91
llvm::CmpInst::getPredicate
Predicate getPredicate() const
Return the predicate for this instruction.
Definition: InstrTypes.h:809
llvm::CallingConv::CFGuard_Check
@ CFGuard_Check
Special calling convention on Windows for calling the Control Guard Check ICall funtion.
Definition: CallingConv.h:82
llvm::constrainOperandRegClass
Register constrainOperandRegClass(const MachineFunction &MF, const TargetRegisterInfo &TRI, MachineRegisterInfo &MRI, const TargetInstrInfo &TII, const RegisterBankInfo &RBI, MachineInstr &InsertPt, const TargetRegisterClass &RegClass, MachineOperand &RegMO)
Constrain the Register operand OpIdx, so that it is now constrained to the TargetRegisterClass passed...
Definition: Utils.cpp:52
llvm::CallingConv::GHC
@ GHC
Used by the Glasgow Haskell Compiler (GHC).
Definition: CallingConv.h:50
llvm::MemIntrinsicBase::getRawDest
Value * getRawDest() const
Definition: IntrinsicInst.h:729
llvm::orc::SymbolState::Emitted
@ Emitted
Assigned address, still materializing.
llvm::MachineInstrBuilder::addMetadata
const MachineInstrBuilder & addMetadata(const MDNode *MD) const
Definition: MachineInstrBuilder.h:236
llvm::MipsISD::Ins
@ Ins
Definition: MipsISelLowering.h:160
llvm::CmpInst::FCMP_OLE
@ FCMP_OLE
0 1 0 1 True if ordered and less than or equal
Definition: InstrTypes.h:726
llvm::ISD::ArgFlagsTy::isInReg
bool isInReg() const
Definition: TargetCallingConv.h:79
llvm::CallBase
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Definition: InstrTypes.h:1174
llvm::RegState::Kill
@ Kill
The last use of a register.
Definition: MachineInstrBuilder.h:48
DerivedTypes.h
TM
const char LLVMTargetMachineRef TM
Definition: PassBuilderBindings.cpp:47
llvm::MVT::v32i8
@ v32i8
Definition: MachineValueType.h:87
llvm::EVT::bitsGT
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
Definition: ValueTypes.h:256
llvm::MVT::i16
@ i16
Definition: MachineValueType.h:47
llvm::X86RegisterInfo::getStackRegister
Register getStackRegister() const
Definition: X86RegisterInfo.h:150
GEP
Hexagon Common GEP
Definition: HexagonCommonGEP.cpp:171
llvm::CallingConv::Tail
@ Tail
Attemps to make calls as fast as possible while guaranteeing that tail call optimization can always b...
Definition: CallingConv.h:76
llvm::DebugLoc
A debug info location.
Definition: DebugLoc.h:33
llvm::MachineFrameInfo::setFrameAddressIsTaken
void setFrameAddressIsTaken(bool T)
Definition: MachineFrameInfo.h:371
llvm::AllocaInst
an instruction to allocate memory on the stack
Definition: Instructions.h:59
llvm::User::getOperand
Value * getOperand(unsigned i) const
Definition: User.h:169
llvm::MachineMemOperand::isNonTemporal
bool isNonTemporal() const
Definition: MachineMemOperand.h:290
llvm::BranchInst
Conditional or Unconditional Branch instruction.
Definition: Instructions.h:3099
llvm::ISD::SIGN_EXTEND
@ SIGN_EXTEND
Conversion operators.
Definition: ISDOpcodes.h:760
llvm::X86MachineFunctionInfo::getSRetReturnReg
Register getSRetReturnReg() const
Definition: X86MachineFunctionInfo.h:173
llvm::MVT::v8f32
@ v8f32
Definition: MachineValueType.h:167
X86InstrInfo.h
llvm::CmpInst::FCMP_TRUE
@ FCMP_TRUE
1 1 1 1 Always true (always folded)
Definition: InstrTypes.h:736
llvm::MachineInstrBundleIterator< MachineInstr >
llvm::X86::COND_E
@ COND_E
Definition: X86BaseInfo.h:85
llvm::MCAsmInfo::usesWindowsCFI
bool usesWindowsCFI() const
Definition: MCAsmInfo.h:793
llvm::MVT::f32
@ f32
Definition: MachineValueType.h:57
llvm::MCInstrDesc::getNumOperands
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
Definition: MCInstrDesc.h:230
llvm::X86::LAST_VALID_COND
@ LAST_VALID_COND
Definition: X86BaseInfo.h:97
llvm::MachinePointerInfo::getStack
static MachinePointerInfo getStack(MachineFunction &MF, int64_t Offset, uint8_t ID=0)
Stack pointer relative access.
Definition: MachineOperand.cpp:1019
llvm::Value
LLVM Value Representation.
Definition: Value.h:74
llvm::X86::COND_NP
@ COND_NP
Definition: X86BaseInfo.h:92
llvm::TargetRegisterInfo::getCallPreservedMask
virtual const uint32_t * getCallPreservedMask(const MachineFunction &MF, CallingConv::ID) const
Return a mask of call-preserved registers for the given calling convention on the current function.
Definition: TargetRegisterInfo.h:480
llvm::ISD::SCALAR_TO_VECTOR
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition: ISDOpcodes.h:606
llvm::CmpInst::FCMP_ORD
@ FCMP_ORD
0 1 1 1 True if ordered (no nans)
Definition: InstrTypes.h:728
llvm::BranchInst::getSuccessor
BasicBlock * getSuccessor(unsigned i) const
Definition: Instructions.h:3192
llvm::BasicBlock::const_iterator
InstListType::const_iterator const_iterator
Definition: BasicBlock.h:88
llvm::sampleprof::Base
@ Base
Definition: Discriminator.h:58
llvm::CmpInst::FCMP_UEQ
@ FCMP_UEQ
1 0 0 1 True if unordered or equal
Definition: InstrTypes.h:730
llvm::X86::COND_NE
@ COND_NE
Definition: X86BaseInfo.h:86
llvm::Use
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
llvm::MCInstrDesc::ImplicitDefs
const MCPhysReg * ImplicitDefs
Definition: MCInstrDesc.h:207
llvm::EVT::getSimpleVT
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:288
llvm::X86RegisterInfo
Definition: X86RegisterInfo.h:24
llvm::User::op_end
op_iterator op_end()
Definition: User.h:236