LLVM  16.0.0git
X86FastISel.cpp
Go to the documentation of this file.
1 //===-- X86FastISel.cpp - X86 FastISel implementation ---------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines the X86-specific support for the FastISel class. Much
10 // of the target-specific code is generated by tablegen in the file
11 // X86GenFastISel.inc, which is #included here.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "X86.h"
16 #include "X86CallingConv.h"
17 #include "X86InstrBuilder.h"
18 #include "X86InstrInfo.h"
19 #include "X86MachineFunctionInfo.h"
20 #include "X86RegisterInfo.h"
21 #include "X86Subtarget.h"
22 #include "X86TargetMachine.h"
24 #include "llvm/CodeGen/FastISel.h"
29 #include "llvm/IR/CallingConv.h"
30 #include "llvm/IR/DebugInfo.h"
31 #include "llvm/IR/DerivedTypes.h"
33 #include "llvm/IR/GlobalAlias.h"
34 #include "llvm/IR/GlobalVariable.h"
35 #include "llvm/IR/Instructions.h"
36 #include "llvm/IR/IntrinsicInst.h"
37 #include "llvm/IR/IntrinsicsX86.h"
38 #include "llvm/IR/Operator.h"
39 #include "llvm/MC/MCAsmInfo.h"
40 #include "llvm/MC/MCSymbol.h"
43 using namespace llvm;
44 
45 namespace {
46 
47 class X86FastISel final : public FastISel {
48  /// Subtarget - Keep a pointer to the X86Subtarget around so that we can
49  /// make the right decision when generating code for different targets.
50  const X86Subtarget *Subtarget;
51 
52 public:
53  explicit X86FastISel(FunctionLoweringInfo &funcInfo,
54  const TargetLibraryInfo *libInfo)
55  : FastISel(funcInfo, libInfo) {
56  Subtarget = &funcInfo.MF->getSubtarget<X86Subtarget>();
57  }
58 
59  bool fastSelectInstruction(const Instruction *I) override;
60 
61  /// The specified machine instr operand is a vreg, and that
62  /// vreg is being provided by the specified load instruction. If possible,
63  /// try to fold the load as an operand to the instruction, returning true if
64  /// possible.
65  bool tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo,
66  const LoadInst *LI) override;
67 
68  bool fastLowerArguments() override;
69  bool fastLowerCall(CallLoweringInfo &CLI) override;
70  bool fastLowerIntrinsicCall(const IntrinsicInst *II) override;
71 
72 #include "X86GenFastISel.inc"
73 
74 private:
75  bool X86FastEmitCompare(const Value *LHS, const Value *RHS, EVT VT,
76  const DebugLoc &DL);
77 
78  bool X86FastEmitLoad(MVT VT, X86AddressMode &AM, MachineMemOperand *MMO,
79  unsigned &ResultReg, unsigned Alignment = 1);
80 
81  bool X86FastEmitStore(EVT VT, const Value *Val, X86AddressMode &AM,
82  MachineMemOperand *MMO = nullptr, bool Aligned = false);
83  bool X86FastEmitStore(EVT VT, unsigned ValReg, X86AddressMode &AM,
84  MachineMemOperand *MMO = nullptr, bool Aligned = false);
85 
86  bool X86FastEmitExtend(ISD::NodeType Opc, EVT DstVT, unsigned Src, EVT SrcVT,
87  unsigned &ResultReg);
88 
89  bool X86SelectAddress(const Value *V, X86AddressMode &AM);
90  bool X86SelectCallAddress(const Value *V, X86AddressMode &AM);
91 
92  bool X86SelectLoad(const Instruction *I);
93 
94  bool X86SelectStore(const Instruction *I);
95 
96  bool X86SelectRet(const Instruction *I);
97 
98  bool X86SelectCmp(const Instruction *I);
99 
100  bool X86SelectZExt(const Instruction *I);
101 
102  bool X86SelectSExt(const Instruction *I);
103 
104  bool X86SelectBranch(const Instruction *I);
105 
106  bool X86SelectShift(const Instruction *I);
107 
108  bool X86SelectDivRem(const Instruction *I);
109 
110  bool X86FastEmitCMoveSelect(MVT RetVT, const Instruction *I);
111 
112  bool X86FastEmitSSESelect(MVT RetVT, const Instruction *I);
113 
114  bool X86FastEmitPseudoSelect(MVT RetVT, const Instruction *I);
115 
116  bool X86SelectSelect(const Instruction *I);
117 
118  bool X86SelectTrunc(const Instruction *I);
119 
120  bool X86SelectFPExtOrFPTrunc(const Instruction *I, unsigned Opc,
121  const TargetRegisterClass *RC);
122 
123  bool X86SelectFPExt(const Instruction *I);
124  bool X86SelectFPTrunc(const Instruction *I);
125  bool X86SelectSIToFP(const Instruction *I);
126  bool X86SelectUIToFP(const Instruction *I);
127  bool X86SelectIntToFP(const Instruction *I, bool IsSigned);
128 
129  const X86InstrInfo *getInstrInfo() const {
130  return Subtarget->getInstrInfo();
131  }
132  const X86TargetMachine *getTargetMachine() const {
133  return static_cast<const X86TargetMachine *>(&TM);
134  }
135 
136  bool handleConstantAddresses(const Value *V, X86AddressMode &AM);
137 
138  unsigned X86MaterializeInt(const ConstantInt *CI, MVT VT);
139  unsigned X86MaterializeFP(const ConstantFP *CFP, MVT VT);
140  unsigned X86MaterializeGV(const GlobalValue *GV, MVT VT);
141  unsigned fastMaterializeConstant(const Constant *C) override;
142 
143  unsigned fastMaterializeAlloca(const AllocaInst *C) override;
144 
145  unsigned fastMaterializeFloatZero(const ConstantFP *CF) override;
146 
147  /// isScalarFPTypeInSSEReg - Return true if the specified scalar FP type is
148  /// computed in an SSE register, not on the X87 floating point stack.
149  bool isScalarFPTypeInSSEReg(EVT VT) const {
150  return (VT == MVT::f64 && Subtarget->hasSSE2()) ||
151  (VT == MVT::f32 && Subtarget->hasSSE1()) || VT == MVT::f16;
152  }
153 
154  bool isTypeLegal(Type *Ty, MVT &VT, bool AllowI1 = false);
155 
156  bool IsMemcpySmall(uint64_t Len);
157 
158  bool TryEmitSmallMemcpy(X86AddressMode DestAM,
159  X86AddressMode SrcAM, uint64_t Len);
160 
161  bool foldX86XALUIntrinsic(X86::CondCode &CC, const Instruction *I,
162  const Value *Cond);
163 
165  X86AddressMode &AM);
166 
167  unsigned fastEmitInst_rrrr(unsigned MachineInstOpcode,
168  const TargetRegisterClass *RC, unsigned Op0,
169  unsigned Op1, unsigned Op2, unsigned Op3);
170 };
171 
172 } // end anonymous namespace.
173 
174 static std::pair<unsigned, bool>
176  unsigned CC;
177  bool NeedSwap = false;
178 
179  // SSE Condition code mapping:
180  // 0 - EQ
181  // 1 - LT
182  // 2 - LE
183  // 3 - UNORD
184  // 4 - NEQ
185  // 5 - NLT
186  // 6 - NLE
187  // 7 - ORD
188  switch (Predicate) {
189  default: llvm_unreachable("Unexpected predicate");
190  case CmpInst::FCMP_OEQ: CC = 0; break;
191  case CmpInst::FCMP_OGT: NeedSwap = true; [[fallthrough]];
192  case CmpInst::FCMP_OLT: CC = 1; break;
193  case CmpInst::FCMP_OGE: NeedSwap = true; [[fallthrough]];
194  case CmpInst::FCMP_OLE: CC = 2; break;
195  case CmpInst::FCMP_UNO: CC = 3; break;
196  case CmpInst::FCMP_UNE: CC = 4; break;
197  case CmpInst::FCMP_ULE: NeedSwap = true; [[fallthrough]];
198  case CmpInst::FCMP_UGE: CC = 5; break;
199  case CmpInst::FCMP_ULT: NeedSwap = true; [[fallthrough]];
200  case CmpInst::FCMP_UGT: CC = 6; break;
201  case CmpInst::FCMP_ORD: CC = 7; break;
202  case CmpInst::FCMP_UEQ: CC = 8; break;
203  case CmpInst::FCMP_ONE: CC = 12; break;
204  }
205 
206  return std::make_pair(CC, NeedSwap);
207 }
208 
209 /// Adds a complex addressing mode to the given machine instr builder.
210 /// Note, this will constrain the index register. If its not possible to
211 /// constrain the given index register, then a new one will be created. The
212 /// IndexReg field of the addressing mode will be updated to match in this case.
213 const MachineInstrBuilder &
215  X86AddressMode &AM) {
216  // First constrain the index register. It needs to be a GR64_NOSP.
218  MIB->getNumOperands() +
220  return ::addFullAddress(MIB, AM);
221 }
222 
223 /// Check if it is possible to fold the condition from the XALU intrinsic
224 /// into the user. The condition code will only be updated on success.
225 bool X86FastISel::foldX86XALUIntrinsic(X86::CondCode &CC, const Instruction *I,
226  const Value *Cond) {
227  if (!isa<ExtractValueInst>(Cond))
228  return false;
229 
230  const auto *EV = cast<ExtractValueInst>(Cond);
231  if (!isa<IntrinsicInst>(EV->getAggregateOperand()))
232  return false;
233 
234  const auto *II = cast<IntrinsicInst>(EV->getAggregateOperand());
235  MVT RetVT;
236  const Function *Callee = II->getCalledFunction();
237  Type *RetTy =
238  cast<StructType>(Callee->getReturnType())->getTypeAtIndex(0U);
239  if (!isTypeLegal(RetTy, RetVT))
240  return false;
241 
242  if (RetVT != MVT::i32 && RetVT != MVT::i64)
243  return false;
244 
245  X86::CondCode TmpCC;
246  switch (II->getIntrinsicID()) {
247  default: return false;
248  case Intrinsic::sadd_with_overflow:
249  case Intrinsic::ssub_with_overflow:
250  case Intrinsic::smul_with_overflow:
251  case Intrinsic::umul_with_overflow: TmpCC = X86::COND_O; break;
252  case Intrinsic::uadd_with_overflow:
253  case Intrinsic::usub_with_overflow: TmpCC = X86::COND_B; break;
254  }
255 
256  // Check if both instructions are in the same basic block.
257  if (II->getParent() != I->getParent())
258  return false;
259 
260  // Make sure nothing is in the way
263  for (auto Itr = std::prev(Start); Itr != End; --Itr) {
264  // We only expect extractvalue instructions between the intrinsic and the
265  // instruction to be selected.
266  if (!isa<ExtractValueInst>(Itr))
267  return false;
268 
269  // Check that the extractvalue operand comes from the intrinsic.
270  const auto *EVI = cast<ExtractValueInst>(Itr);
271  if (EVI->getAggregateOperand() != II)
272  return false;
273  }
274 
275  // Make sure no potentially eflags clobbering phi moves can be inserted in
276  // between.
277  auto HasPhis = [](const BasicBlock *Succ) { return !Succ->phis().empty(); };
278  if (I->isTerminator() && llvm::any_of(successors(I), HasPhis))
279  return false;
280 
281  // Make sure there are no potentially eflags clobbering constant
282  // materializations in between.
283  if (llvm::any_of(I->operands(), [](Value *V) { return isa<Constant>(V); }))
284  return false;
285 
286  CC = TmpCC;
287  return true;
288 }
289 
290 bool X86FastISel::isTypeLegal(Type *Ty, MVT &VT, bool AllowI1) {
291  EVT evt = TLI.getValueType(DL, Ty, /*AllowUnknown=*/true);
292  if (evt == MVT::Other || !evt.isSimple())
293  // Unhandled type. Halt "fast" selection and bail.
294  return false;
295 
296  VT = evt.getSimpleVT();
297  // For now, require SSE/SSE2 for performing floating-point operations,
298  // since x87 requires additional work.
299  if (VT == MVT::f64 && !Subtarget->hasSSE2())
300  return false;
301  if (VT == MVT::f32 && !Subtarget->hasSSE1())
302  return false;
303  // Similarly, no f80 support yet.
304  if (VT == MVT::f80)
305  return false;
306  // We only handle legal types. For example, on x86-32 the instruction
307  // selector contains all of the 64-bit instructions from x86-64,
308  // under the assumption that i64 won't be used if the target doesn't
309  // support it.
310  return (AllowI1 && VT == MVT::i1) || TLI.isTypeLegal(VT);
311 }
312 
313 /// X86FastEmitLoad - Emit a machine instruction to load a value of type VT.
314 /// The address is either pre-computed, i.e. Ptr, or a GlobalAddress, i.e. GV.
315 /// Return true and the result register by reference if it is possible.
316 bool X86FastISel::X86FastEmitLoad(MVT VT, X86AddressMode &AM,
317  MachineMemOperand *MMO, unsigned &ResultReg,
318  unsigned Alignment) {
319  bool HasSSE1 = Subtarget->hasSSE1();
320  bool HasSSE2 = Subtarget->hasSSE2();
321  bool HasSSE41 = Subtarget->hasSSE41();
322  bool HasAVX = Subtarget->hasAVX();
323  bool HasAVX2 = Subtarget->hasAVX2();
324  bool HasAVX512 = Subtarget->hasAVX512();
325  bool HasVLX = Subtarget->hasVLX();
326  bool IsNonTemporal = MMO && MMO->isNonTemporal();
327 
328  // Treat i1 loads the same as i8 loads. Masking will be done when storing.
329  if (VT == MVT::i1)
330  VT = MVT::i8;
331 
332  // Get opcode and regclass of the output for the given load instruction.
333  unsigned Opc = 0;
334  switch (VT.SimpleTy) {
335  default: return false;
336  case MVT::i8:
337  Opc = X86::MOV8rm;
338  break;
339  case MVT::i16:
340  Opc = X86::MOV16rm;
341  break;
342  case MVT::i32:
343  Opc = X86::MOV32rm;
344  break;
345  case MVT::i64:
346  // Must be in x86-64 mode.
347  Opc = X86::MOV64rm;
348  break;
349  case MVT::f32:
350  Opc = HasAVX512 ? X86::VMOVSSZrm_alt
351  : HasAVX ? X86::VMOVSSrm_alt
352  : HasSSE1 ? X86::MOVSSrm_alt
353  : X86::LD_Fp32m;
354  break;
355  case MVT::f64:
356  Opc = HasAVX512 ? X86::VMOVSDZrm_alt
357  : HasAVX ? X86::VMOVSDrm_alt
358  : HasSSE2 ? X86::MOVSDrm_alt
359  : X86::LD_Fp64m;
360  break;
361  case MVT::f80:
362  // No f80 support yet.
363  return false;
364  case MVT::v4f32:
365  if (IsNonTemporal && Alignment >= 16 && HasSSE41)
366  Opc = HasVLX ? X86::VMOVNTDQAZ128rm :
367  HasAVX ? X86::VMOVNTDQArm : X86::MOVNTDQArm;
368  else if (Alignment >= 16)
369  Opc = HasVLX ? X86::VMOVAPSZ128rm :
370  HasAVX ? X86::VMOVAPSrm : X86::MOVAPSrm;
371  else
372  Opc = HasVLX ? X86::VMOVUPSZ128rm :
373  HasAVX ? X86::VMOVUPSrm : X86::MOVUPSrm;
374  break;
375  case MVT::v2f64:
376  if (IsNonTemporal && Alignment >= 16 && HasSSE41)
377  Opc = HasVLX ? X86::VMOVNTDQAZ128rm :
378  HasAVX ? X86::VMOVNTDQArm : X86::MOVNTDQArm;
379  else if (Alignment >= 16)
380  Opc = HasVLX ? X86::VMOVAPDZ128rm :
381  HasAVX ? X86::VMOVAPDrm : X86::MOVAPDrm;
382  else
383  Opc = HasVLX ? X86::VMOVUPDZ128rm :
384  HasAVX ? X86::VMOVUPDrm : X86::MOVUPDrm;
385  break;
386  case MVT::v4i32:
387  case MVT::v2i64:
388  case MVT::v8i16:
389  case MVT::v16i8:
390  if (IsNonTemporal && Alignment >= 16 && HasSSE41)
391  Opc = HasVLX ? X86::VMOVNTDQAZ128rm :
392  HasAVX ? X86::VMOVNTDQArm : X86::MOVNTDQArm;
393  else if (Alignment >= 16)
394  Opc = HasVLX ? X86::VMOVDQA64Z128rm :
395  HasAVX ? X86::VMOVDQArm : X86::MOVDQArm;
396  else
397  Opc = HasVLX ? X86::VMOVDQU64Z128rm :
398  HasAVX ? X86::VMOVDQUrm : X86::MOVDQUrm;
399  break;
400  case MVT::v8f32:
401  assert(HasAVX);
402  if (IsNonTemporal && Alignment >= 32 && HasAVX2)
403  Opc = HasVLX ? X86::VMOVNTDQAZ256rm : X86::VMOVNTDQAYrm;
404  else if (IsNonTemporal && Alignment >= 16)
405  return false; // Force split for X86::VMOVNTDQArm
406  else if (Alignment >= 32)
407  Opc = HasVLX ? X86::VMOVAPSZ256rm : X86::VMOVAPSYrm;
408  else
409  Opc = HasVLX ? X86::VMOVUPSZ256rm : X86::VMOVUPSYrm;
410  break;
411  case MVT::v4f64:
412  assert(HasAVX);
413  if (IsNonTemporal && Alignment >= 32 && HasAVX2)
414  Opc = HasVLX ? X86::VMOVNTDQAZ256rm : X86::VMOVNTDQAYrm;
415  else if (IsNonTemporal && Alignment >= 16)
416  return false; // Force split for X86::VMOVNTDQArm
417  else if (Alignment >= 32)
418  Opc = HasVLX ? X86::VMOVAPDZ256rm : X86::VMOVAPDYrm;
419  else
420  Opc = HasVLX ? X86::VMOVUPDZ256rm : X86::VMOVUPDYrm;
421  break;
422  case MVT::v8i32:
423  case MVT::v4i64:
424  case MVT::v16i16:
425  case MVT::v32i8:
426  assert(HasAVX);
427  if (IsNonTemporal && Alignment >= 32 && HasAVX2)
428  Opc = HasVLX ? X86::VMOVNTDQAZ256rm : X86::VMOVNTDQAYrm;
429  else if (IsNonTemporal && Alignment >= 16)
430  return false; // Force split for X86::VMOVNTDQArm
431  else if (Alignment >= 32)
432  Opc = HasVLX ? X86::VMOVDQA64Z256rm : X86::VMOVDQAYrm;
433  else
434  Opc = HasVLX ? X86::VMOVDQU64Z256rm : X86::VMOVDQUYrm;
435  break;
436  case MVT::v16f32:
437  assert(HasAVX512);
438  if (IsNonTemporal && Alignment >= 64)
439  Opc = X86::VMOVNTDQAZrm;
440  else
441  Opc = (Alignment >= 64) ? X86::VMOVAPSZrm : X86::VMOVUPSZrm;
442  break;
443  case MVT::v8f64:
444  assert(HasAVX512);
445  if (IsNonTemporal && Alignment >= 64)
446  Opc = X86::VMOVNTDQAZrm;
447  else
448  Opc = (Alignment >= 64) ? X86::VMOVAPDZrm : X86::VMOVUPDZrm;
449  break;
450  case MVT::v8i64:
451  case MVT::v16i32:
452  case MVT::v32i16:
453  case MVT::v64i8:
454  assert(HasAVX512);
455  // Note: There are a lot more choices based on type with AVX-512, but
456  // there's really no advantage when the load isn't masked.
457  if (IsNonTemporal && Alignment >= 64)
458  Opc = X86::VMOVNTDQAZrm;
459  else
460  Opc = (Alignment >= 64) ? X86::VMOVDQA64Zrm : X86::VMOVDQU64Zrm;
461  break;
462  }
463 
464  const TargetRegisterClass *RC = TLI.getRegClassFor(VT);
465 
466  ResultReg = createResultReg(RC);
467  MachineInstrBuilder MIB =
468  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc), ResultReg);
469  addFullAddress(MIB, AM);
470  if (MMO)
471  MIB->addMemOperand(*FuncInfo.MF, MMO);
472  return true;
473 }
474 
475 /// X86FastEmitStore - Emit a machine instruction to store a value Val of
476 /// type VT. The address is either pre-computed, consisted of a base ptr, Ptr
477 /// and a displacement offset, or a GlobalAddress,
478 /// i.e. V. Return true if it is possible.
479 bool X86FastISel::X86FastEmitStore(EVT VT, unsigned ValReg, X86AddressMode &AM,
480  MachineMemOperand *MMO, bool Aligned) {
481  bool HasSSE1 = Subtarget->hasSSE1();
482  bool HasSSE2 = Subtarget->hasSSE2();
483  bool HasSSE4A = Subtarget->hasSSE4A();
484  bool HasAVX = Subtarget->hasAVX();
485  bool HasAVX512 = Subtarget->hasAVX512();
486  bool HasVLX = Subtarget->hasVLX();
487  bool IsNonTemporal = MMO && MMO->isNonTemporal();
488 
489  // Get opcode and regclass of the output for the given store instruction.
490  unsigned Opc = 0;
491  switch (VT.getSimpleVT().SimpleTy) {
492  case MVT::f80: // No f80 support yet.
493  default: return false;
494  case MVT::i1: {
495  // Mask out all but lowest bit.
496  Register AndResult = createResultReg(&X86::GR8RegClass);
497  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
498  TII.get(X86::AND8ri), AndResult)
499  .addReg(ValReg).addImm(1);
500  ValReg = AndResult;
501  [[fallthrough]]; // handle i1 as i8.
502  }
503  case MVT::i8: Opc = X86::MOV8mr; break;
504  case MVT::i16: Opc = X86::MOV16mr; break;
505  case MVT::i32:
506  Opc = (IsNonTemporal && HasSSE2) ? X86::MOVNTImr : X86::MOV32mr;
507  break;
508  case MVT::i64:
509  // Must be in x86-64 mode.
510  Opc = (IsNonTemporal && HasSSE2) ? X86::MOVNTI_64mr : X86::MOV64mr;
511  break;
512  case MVT::f32:
513  if (HasSSE1) {
514  if (IsNonTemporal && HasSSE4A)
515  Opc = X86::MOVNTSS;
516  else
517  Opc = HasAVX512 ? X86::VMOVSSZmr :
518  HasAVX ? X86::VMOVSSmr : X86::MOVSSmr;
519  } else
520  Opc = X86::ST_Fp32m;
521  break;
522  case MVT::f64:
523  if (HasSSE2) {
524  if (IsNonTemporal && HasSSE4A)
525  Opc = X86::MOVNTSD;
526  else
527  Opc = HasAVX512 ? X86::VMOVSDZmr :
528  HasAVX ? X86::VMOVSDmr : X86::MOVSDmr;
529  } else
530  Opc = X86::ST_Fp64m;
531  break;
532  case MVT::x86mmx:
533  Opc = (IsNonTemporal && HasSSE1) ? X86::MMX_MOVNTQmr : X86::MMX_MOVQ64mr;
534  break;
535  case MVT::v4f32:
536  if (Aligned) {
537  if (IsNonTemporal)
538  Opc = HasVLX ? X86::VMOVNTPSZ128mr :
539  HasAVX ? X86::VMOVNTPSmr : X86::MOVNTPSmr;
540  else
541  Opc = HasVLX ? X86::VMOVAPSZ128mr :
542  HasAVX ? X86::VMOVAPSmr : X86::MOVAPSmr;
543  } else
544  Opc = HasVLX ? X86::VMOVUPSZ128mr :
545  HasAVX ? X86::VMOVUPSmr : X86::MOVUPSmr;
546  break;
547  case MVT::v2f64:
548  if (Aligned) {
549  if (IsNonTemporal)
550  Opc = HasVLX ? X86::VMOVNTPDZ128mr :
551  HasAVX ? X86::VMOVNTPDmr : X86::MOVNTPDmr;
552  else
553  Opc = HasVLX ? X86::VMOVAPDZ128mr :
554  HasAVX ? X86::VMOVAPDmr : X86::MOVAPDmr;
555  } else
556  Opc = HasVLX ? X86::VMOVUPDZ128mr :
557  HasAVX ? X86::VMOVUPDmr : X86::MOVUPDmr;
558  break;
559  case MVT::v4i32:
560  case MVT::v2i64:
561  case MVT::v8i16:
562  case MVT::v16i8:
563  if (Aligned) {
564  if (IsNonTemporal)
565  Opc = HasVLX ? X86::VMOVNTDQZ128mr :
566  HasAVX ? X86::VMOVNTDQmr : X86::MOVNTDQmr;
567  else
568  Opc = HasVLX ? X86::VMOVDQA64Z128mr :
569  HasAVX ? X86::VMOVDQAmr : X86::MOVDQAmr;
570  } else
571  Opc = HasVLX ? X86::VMOVDQU64Z128mr :
572  HasAVX ? X86::VMOVDQUmr : X86::MOVDQUmr;
573  break;
574  case MVT::v8f32:
575  assert(HasAVX);
576  if (Aligned) {
577  if (IsNonTemporal)
578  Opc = HasVLX ? X86::VMOVNTPSZ256mr : X86::VMOVNTPSYmr;
579  else
580  Opc = HasVLX ? X86::VMOVAPSZ256mr : X86::VMOVAPSYmr;
581  } else
582  Opc = HasVLX ? X86::VMOVUPSZ256mr : X86::VMOVUPSYmr;
583  break;
584  case MVT::v4f64:
585  assert(HasAVX);
586  if (Aligned) {
587  if (IsNonTemporal)
588  Opc = HasVLX ? X86::VMOVNTPDZ256mr : X86::VMOVNTPDYmr;
589  else
590  Opc = HasVLX ? X86::VMOVAPDZ256mr : X86::VMOVAPDYmr;
591  } else
592  Opc = HasVLX ? X86::VMOVUPDZ256mr : X86::VMOVUPDYmr;
593  break;
594  case MVT::v8i32:
595  case MVT::v4i64:
596  case MVT::v16i16:
597  case MVT::v32i8:
598  assert(HasAVX);
599  if (Aligned) {
600  if (IsNonTemporal)
601  Opc = HasVLX ? X86::VMOVNTDQZ256mr : X86::VMOVNTDQYmr;
602  else
603  Opc = HasVLX ? X86::VMOVDQA64Z256mr : X86::VMOVDQAYmr;
604  } else
605  Opc = HasVLX ? X86::VMOVDQU64Z256mr : X86::VMOVDQUYmr;
606  break;
607  case MVT::v16f32:
608  assert(HasAVX512);
609  if (Aligned)
610  Opc = IsNonTemporal ? X86::VMOVNTPSZmr : X86::VMOVAPSZmr;
611  else
612  Opc = X86::VMOVUPSZmr;
613  break;
614  case MVT::v8f64:
615  assert(HasAVX512);
616  if (Aligned) {
617  Opc = IsNonTemporal ? X86::VMOVNTPDZmr : X86::VMOVAPDZmr;
618  } else
619  Opc = X86::VMOVUPDZmr;
620  break;
621  case MVT::v8i64:
622  case MVT::v16i32:
623  case MVT::v32i16:
624  case MVT::v64i8:
625  assert(HasAVX512);
626  // Note: There are a lot more choices based on type with AVX-512, but
627  // there's really no advantage when the store isn't masked.
628  if (Aligned)
629  Opc = IsNonTemporal ? X86::VMOVNTDQZmr : X86::VMOVDQA64Zmr;
630  else
631  Opc = X86::VMOVDQU64Zmr;
632  break;
633  }
634 
635  const MCInstrDesc &Desc = TII.get(Opc);
636  // Some of the instructions in the previous switch use FR128 instead
637  // of FR32 for ValReg. Make sure the register we feed the instruction
638  // matches its register class constraints.
639  // Note: This is fine to do a copy from FR32 to FR128, this is the
640  // same registers behind the scene and actually why it did not trigger
641  // any bugs before.
642  ValReg = constrainOperandRegClass(Desc, ValReg, Desc.getNumOperands() - 1);
643  MachineInstrBuilder MIB =
644  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, Desc);
645  addFullAddress(MIB, AM).addReg(ValReg);
646  if (MMO)
647  MIB->addMemOperand(*FuncInfo.MF, MMO);
648 
649  return true;
650 }
651 
652 bool X86FastISel::X86FastEmitStore(EVT VT, const Value *Val,
653  X86AddressMode &AM,
654  MachineMemOperand *MMO, bool Aligned) {
655  // Handle 'null' like i32/i64 0.
656  if (isa<ConstantPointerNull>(Val))
657  Val = Constant::getNullValue(DL.getIntPtrType(Val->getContext()));
658 
659  // If this is a store of a simple constant, fold the constant into the store.
660  if (const ConstantInt *CI = dyn_cast<ConstantInt>(Val)) {
661  unsigned Opc = 0;
662  bool Signed = true;
663  switch (VT.getSimpleVT().SimpleTy) {
664  default: break;
665  case MVT::i1:
666  Signed = false;
667  [[fallthrough]]; // Handle as i8.
668  case MVT::i8: Opc = X86::MOV8mi; break;
669  case MVT::i16: Opc = X86::MOV16mi; break;
670  case MVT::i32: Opc = X86::MOV32mi; break;
671  case MVT::i64:
672  // Must be a 32-bit sign extended value.
673  if (isInt<32>(CI->getSExtValue()))
674  Opc = X86::MOV64mi32;
675  break;
676  }
677 
678  if (Opc) {
679  MachineInstrBuilder MIB =
680  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc));
681  addFullAddress(MIB, AM).addImm(Signed ? (uint64_t) CI->getSExtValue()
682  : CI->getZExtValue());
683  if (MMO)
684  MIB->addMemOperand(*FuncInfo.MF, MMO);
685  return true;
686  }
687  }
688 
689  Register ValReg = getRegForValue(Val);
690  if (ValReg == 0)
691  return false;
692 
693  return X86FastEmitStore(VT, ValReg, AM, MMO, Aligned);
694 }
695 
696 /// X86FastEmitExtend - Emit a machine instruction to extend a value Src of
697 /// type SrcVT to type DstVT using the specified extension opcode Opc (e.g.
698 /// ISD::SIGN_EXTEND).
699 bool X86FastISel::X86FastEmitExtend(ISD::NodeType Opc, EVT DstVT,
700  unsigned Src, EVT SrcVT,
701  unsigned &ResultReg) {
702  unsigned RR = fastEmit_r(SrcVT.getSimpleVT(), DstVT.getSimpleVT(), Opc, Src);
703  if (RR == 0)
704  return false;
705 
706  ResultReg = RR;
707  return true;
708 }
709 
710 bool X86FastISel::handleConstantAddresses(const Value *V, X86AddressMode &AM) {
711  // Handle constant address.
712  if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
713  // Can't handle alternate code models yet.
714  if (TM.getCodeModel() != CodeModel::Small)
715  return false;
716 
717  // Can't handle TLS yet.
718  if (GV->isThreadLocal())
719  return false;
720 
721  // Can't handle !absolute_symbol references yet.
722  if (GV->isAbsoluteSymbolRef())
723  return false;
724 
725  // RIP-relative addresses can't have additional register operands, so if
726  // we've already folded stuff into the addressing mode, just force the
727  // global value into its own register, which we can use as the basereg.
728  if (!Subtarget->isPICStyleRIPRel() ||
729  (AM.Base.Reg == 0 && AM.IndexReg == 0)) {
730  // Okay, we've committed to selecting this global. Set up the address.
731  AM.GV = GV;
732 
733  // Allow the subtarget to classify the global.
734  unsigned char GVFlags = Subtarget->classifyGlobalReference(GV);
735 
736  // If this reference is relative to the pic base, set it now.
737  if (isGlobalRelativeToPICBase(GVFlags)) {
738  // FIXME: How do we know Base.Reg is free??
739  AM.Base.Reg = getInstrInfo()->getGlobalBaseReg(FuncInfo.MF);
740  }
741 
742  // Unless the ABI requires an extra load, return a direct reference to
743  // the global.
744  if (!isGlobalStubReference(GVFlags)) {
745  if (Subtarget->isPICStyleRIPRel()) {
746  // Use rip-relative addressing if we can. Above we verified that the
747  // base and index registers are unused.
748  assert(AM.Base.Reg == 0 && AM.IndexReg == 0);
749  AM.Base.Reg = X86::RIP;
750  }
751  AM.GVOpFlags = GVFlags;
752  return true;
753  }
754 
755  // Ok, we need to do a load from a stub. If we've already loaded from
756  // this stub, reuse the loaded pointer, otherwise emit the load now.
757  DenseMap<const Value *, Register>::iterator I = LocalValueMap.find(V);
758  Register LoadReg;
759  if (I != LocalValueMap.end() && I->second) {
760  LoadReg = I->second;
761  } else {
762  // Issue load from stub.
763  unsigned Opc = 0;
764  const TargetRegisterClass *RC = nullptr;
765  X86AddressMode StubAM;
766  StubAM.Base.Reg = AM.Base.Reg;
767  StubAM.GV = GV;
768  StubAM.GVOpFlags = GVFlags;
769 
770  // Prepare for inserting code in the local-value area.
771  SavePoint SaveInsertPt = enterLocalValueArea();
772 
773  if (TLI.getPointerTy(DL) == MVT::i64) {
774  Opc = X86::MOV64rm;
775  RC = &X86::GR64RegClass;
776  } else {
777  Opc = X86::MOV32rm;
778  RC = &X86::GR32RegClass;
779  }
780 
781  if (Subtarget->isPICStyleRIPRel() || GVFlags == X86II::MO_GOTPCREL ||
782  GVFlags == X86II::MO_GOTPCREL_NORELAX)
783  StubAM.Base.Reg = X86::RIP;
784 
785  LoadReg = createResultReg(RC);
786  MachineInstrBuilder LoadMI =
787  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc), LoadReg);
788  addFullAddress(LoadMI, StubAM);
789 
790  // Ok, back to normal mode.
791  leaveLocalValueArea(SaveInsertPt);
792 
793  // Prevent loading GV stub multiple times in same MBB.
794  LocalValueMap[V] = LoadReg;
795  }
796 
797  // Now construct the final address. Note that the Disp, Scale,
798  // and Index values may already be set here.
799  AM.Base.Reg = LoadReg;
800  AM.GV = nullptr;
801  return true;
802  }
803  }
804 
805  // If all else fails, try to materialize the value in a register.
806  if (!AM.GV || !Subtarget->isPICStyleRIPRel()) {
807  if (AM.Base.Reg == 0) {
808  AM.Base.Reg = getRegForValue(V);
809  return AM.Base.Reg != 0;
810  }
811  if (AM.IndexReg == 0) {
812  assert(AM.Scale == 1 && "Scale with no index!");
813  AM.IndexReg = getRegForValue(V);
814  return AM.IndexReg != 0;
815  }
816  }
817 
818  return false;
819 }
820 
821 /// X86SelectAddress - Attempt to fill in an address from the given value.
822 ///
825 redo_gep:
826  const User *U = nullptr;
827  unsigned Opcode = Instruction::UserOp1;
828  if (const Instruction *I = dyn_cast<Instruction>(V)) {
829  // Don't walk into other basic blocks; it's possible we haven't
830  // visited them yet, so the instructions may not yet be assigned
831  // virtual registers.
832  if (FuncInfo.StaticAllocaMap.count(static_cast<const AllocaInst *>(V)) ||
833  FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
834  Opcode = I->getOpcode();
835  U = I;
836  }
837  } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(V)) {
838  Opcode = C->getOpcode();
839  U = C;
840  }
841 
842  if (PointerType *Ty = dyn_cast<PointerType>(V->getType()))
843  if (Ty->getAddressSpace() > 255)
844  // Fast instruction selection doesn't support the special
845  // address spaces.
846  return false;
847 
848  switch (Opcode) {
849  default: break;
850  case Instruction::BitCast:
851  // Look past bitcasts.
852  return X86SelectAddress(U->getOperand(0), AM);
853 
854  case Instruction::IntToPtr:
855  // Look past no-op inttoptrs.
856  if (TLI.getValueType(DL, U->getOperand(0)->getType()) ==
857  TLI.getPointerTy(DL))
858  return X86SelectAddress(U->getOperand(0), AM);
859  break;
860 
861  case Instruction::PtrToInt:
862  // Look past no-op ptrtoints.
863  if (TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL))
864  return X86SelectAddress(U->getOperand(0), AM);
865  break;
866 
867  case Instruction::Alloca: {
868  // Do static allocas.
869  const AllocaInst *A = cast<AllocaInst>(V);
871  FuncInfo.StaticAllocaMap.find(A);
872  if (SI != FuncInfo.StaticAllocaMap.end()) {
874  AM.Base.FrameIndex = SI->second;
875  return true;
876  }
877  break;
878  }
879 
880  case Instruction::Add: {
881  // Adds of constants are common and easy enough.
882  if (const ConstantInt *CI = dyn_cast<ConstantInt>(U->getOperand(1))) {
883  uint64_t Disp = (int32_t)AM.Disp + (uint64_t)CI->getSExtValue();
884  // They have to fit in the 32-bit signed displacement field though.
885  if (isInt<32>(Disp)) {
886  AM.Disp = (uint32_t)Disp;
887  return X86SelectAddress(U->getOperand(0), AM);
888  }
889  }
890  break;
891  }
892 
893  case Instruction::GetElementPtr: {
894  X86AddressMode SavedAM = AM;
895 
896  // Pattern-match simple GEPs.
897  uint64_t Disp = (int32_t)AM.Disp;
898  unsigned IndexReg = AM.IndexReg;
899  unsigned Scale = AM.Scale;
901  // Iterate through the indices, folding what we can. Constants can be
902  // folded, and one dynamic index can be handled, if the scale is supported.
903  for (User::const_op_iterator i = U->op_begin() + 1, e = U->op_end();
904  i != e; ++i, ++GTI) {
905  const Value *Op = *i;
906  if (StructType *STy = GTI.getStructTypeOrNull()) {
907  const StructLayout *SL = DL.getStructLayout(STy);
908  Disp += SL->getElementOffset(cast<ConstantInt>(Op)->getZExtValue());
909  continue;
910  }
911 
912  // A array/variable index is always of the form i*S where S is the
913  // constant scale size. See if we can push the scale into immediates.
914  uint64_t S = DL.getTypeAllocSize(GTI.getIndexedType());
915  for (;;) {
916  if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
917  // Constant-offset addressing.
918  Disp += CI->getSExtValue() * S;
919  break;
920  }
921  if (canFoldAddIntoGEP(U, Op)) {
922  // A compatible add with a constant operand. Fold the constant.
923  ConstantInt *CI =
924  cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1));
925  Disp += CI->getSExtValue() * S;
926  // Iterate on the other operand.
927  Op = cast<AddOperator>(Op)->getOperand(0);
928  continue;
929  }
930  if (IndexReg == 0 &&
931  (!AM.GV || !Subtarget->isPICStyleRIPRel()) &&
932  (S == 1 || S == 2 || S == 4 || S == 8)) {
933  // Scaled-index addressing.
934  Scale = S;
935  IndexReg = getRegForGEPIndex(Op);
936  if (IndexReg == 0)
937  return false;
938  break;
939  }
940  // Unsupported.
941  goto unsupported_gep;
942  }
943  }
944 
945  // Check for displacement overflow.
946  if (!isInt<32>(Disp))
947  break;
948 
949  AM.IndexReg = IndexReg;
950  AM.Scale = Scale;
951  AM.Disp = (uint32_t)Disp;
952  GEPs.push_back(V);
953 
954  if (const GetElementPtrInst *GEP =
955  dyn_cast<GetElementPtrInst>(U->getOperand(0))) {
956  // Ok, the GEP indices were covered by constant-offset and scaled-index
957  // addressing. Update the address state and move on to examining the base.
958  V = GEP;
959  goto redo_gep;
960  } else if (X86SelectAddress(U->getOperand(0), AM)) {
961  return true;
962  }
963 
964  // If we couldn't merge the gep value into this addr mode, revert back to
965  // our address and just match the value instead of completely failing.
966  AM = SavedAM;
967 
968  for (const Value *I : reverse(GEPs))
969  if (handleConstantAddresses(I, AM))
970  return true;
971 
972  return false;
973  unsupported_gep:
974  // Ok, the GEP indices weren't all covered.
975  break;
976  }
977  }
978 
979  return handleConstantAddresses(V, AM);
980 }
981 
982 /// X86SelectCallAddress - Attempt to fill in an address from the given value.
983 ///
984 bool X86FastISel::X86SelectCallAddress(const Value *V, X86AddressMode &AM) {
985  const User *U = nullptr;
986  unsigned Opcode = Instruction::UserOp1;
987  const Instruction *I = dyn_cast<Instruction>(V);
988  // Record if the value is defined in the same basic block.
989  //
990  // This information is crucial to know whether or not folding an
991  // operand is valid.
992  // Indeed, FastISel generates or reuses a virtual register for all
993  // operands of all instructions it selects. Obviously, the definition and
994  // its uses must use the same virtual register otherwise the produced
995  // code is incorrect.
996  // Before instruction selection, FunctionLoweringInfo::set sets the virtual
997  // registers for values that are alive across basic blocks. This ensures
998  // that the values are consistently set between across basic block, even
999  // if different instruction selection mechanisms are used (e.g., a mix of
1000  // SDISel and FastISel).
1001  // For values local to a basic block, the instruction selection process
1002  // generates these virtual registers with whatever method is appropriate
1003  // for its needs. In particular, FastISel and SDISel do not share the way
1004  // local virtual registers are set.
1005  // Therefore, this is impossible (or at least unsafe) to share values
1006  // between basic blocks unless they use the same instruction selection
1007  // method, which is not guarantee for X86.
1008  // Moreover, things like hasOneUse could not be used accurately, if we
1009  // allow to reference values across basic blocks whereas they are not
1010  // alive across basic blocks initially.
1011  bool InMBB = true;
1012  if (I) {
1013  Opcode = I->getOpcode();
1014  U = I;
1015  InMBB = I->getParent() == FuncInfo.MBB->getBasicBlock();
1016  } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(V)) {
1017  Opcode = C->getOpcode();
1018  U = C;
1019  }
1020 
1021  switch (Opcode) {
1022  default: break;
1023  case Instruction::BitCast:
1024  // Look past bitcasts if its operand is in the same BB.
1025  if (InMBB)
1026  return X86SelectCallAddress(U->getOperand(0), AM);
1027  break;
1028 
1029  case Instruction::IntToPtr:
1030  // Look past no-op inttoptrs if its operand is in the same BB.
1031  if (InMBB &&
1032  TLI.getValueType(DL, U->getOperand(0)->getType()) ==
1033  TLI.getPointerTy(DL))
1034  return X86SelectCallAddress(U->getOperand(0), AM);
1035  break;
1036 
1037  case Instruction::PtrToInt:
1038  // Look past no-op ptrtoints if its operand is in the same BB.
1039  if (InMBB && TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL))
1040  return X86SelectCallAddress(U->getOperand(0), AM);
1041  break;
1042  }
1043 
1044  // Handle constant address.
1045  if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
1046  // Can't handle alternate code models yet.
1047  if (TM.getCodeModel() != CodeModel::Small)
1048  return false;
1049 
1050  // RIP-relative addresses can't have additional register operands.
1051  if (Subtarget->isPICStyleRIPRel() &&
1052  (AM.Base.Reg != 0 || AM.IndexReg != 0))
1053  return false;
1054 
1055  // Can't handle TLS.
1056  if (const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV))
1057  if (GVar->isThreadLocal())
1058  return false;
1059 
1060  // Okay, we've committed to selecting this global. Set up the basic address.
1061  AM.GV = GV;
1062 
1063  // Return a direct reference to the global. Fastisel can handle calls to
1064  // functions that require loads, such as dllimport and nonlazybind
1065  // functions.
1066  if (Subtarget->isPICStyleRIPRel()) {
1067  // Use rip-relative addressing if we can. Above we verified that the
1068  // base and index registers are unused.
1069  assert(AM.Base.Reg == 0 && AM.IndexReg == 0);
1070  AM.Base.Reg = X86::RIP;
1071  } else {
1072  AM.GVOpFlags = Subtarget->classifyLocalReference(nullptr);
1073  }
1074 
1075  return true;
1076  }
1077 
1078  // If all else fails, try to materialize the value in a register.
1079  if (!AM.GV || !Subtarget->isPICStyleRIPRel()) {
1080  auto GetCallRegForValue = [this](const Value *V) {
1081  Register Reg = getRegForValue(V);
1082 
1083  // In 64-bit mode, we need a 64-bit register even if pointers are 32 bits.
1084  if (Reg && Subtarget->isTarget64BitILP32()) {
1085  Register CopyReg = createResultReg(&X86::GR32RegClass);
1086  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(X86::MOV32rr),
1087  CopyReg)
1088  .addReg(Reg);
1089 
1090  Register ExtReg = createResultReg(&X86::GR64RegClass);
1091  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
1092  TII.get(TargetOpcode::SUBREG_TO_REG), ExtReg)
1093  .addImm(0)
1094  .addReg(CopyReg)
1095  .addImm(X86::sub_32bit);
1096  Reg = ExtReg;
1097  }
1098 
1099  return Reg;
1100  };
1101 
1102  if (AM.Base.Reg == 0) {
1103  AM.Base.Reg = GetCallRegForValue(V);
1104  return AM.Base.Reg != 0;
1105  }
1106  if (AM.IndexReg == 0) {
1107  assert(AM.Scale == 1 && "Scale with no index!");
1108  AM.IndexReg = GetCallRegForValue(V);
1109  return AM.IndexReg != 0;
1110  }
1111  }
1112 
1113  return false;
1114 }
1115 
1116 
1117 /// X86SelectStore - Select and emit code to implement store instructions.
1118 bool X86FastISel::X86SelectStore(const Instruction *I) {
1119  // Atomic stores need special handling.
1120  const StoreInst *S = cast<StoreInst>(I);
1121 
1122  if (S->isAtomic())
1123  return false;
1124 
1125  const Value *PtrV = I->getOperand(1);
1126  if (TLI.supportSwiftError()) {
1127  // Swifterror values can come from either a function parameter with
1128  // swifterror attribute or an alloca with swifterror attribute.
1129  if (const Argument *Arg = dyn_cast<Argument>(PtrV)) {
1130  if (Arg->hasSwiftErrorAttr())
1131  return false;
1132  }
1133 
1134  if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(PtrV)) {
1135  if (Alloca->isSwiftError())
1136  return false;
1137  }
1138  }
1139 
1140  const Value *Val = S->getValueOperand();
1141  const Value *Ptr = S->getPointerOperand();
1142 
1143  MVT VT;
1144  if (!isTypeLegal(Val->getType(), VT, /*AllowI1=*/true))
1145  return false;
1146 
1147  Align Alignment = S->getAlign();
1148  Align ABIAlignment = DL.getABITypeAlign(Val->getType());
1149  bool Aligned = Alignment >= ABIAlignment;
1150 
1151  X86AddressMode AM;
1152  if (!X86SelectAddress(Ptr, AM))
1153  return false;
1154 
1155  return X86FastEmitStore(VT, Val, AM, createMachineMemOperandFor(I), Aligned);
1156 }
1157 
1158 /// X86SelectRet - Select and emit code to implement ret instructions.
1159 bool X86FastISel::X86SelectRet(const Instruction *I) {
1160  const ReturnInst *Ret = cast<ReturnInst>(I);
1161  const Function &F = *I->getParent()->getParent();
1162  const X86MachineFunctionInfo *X86MFInfo =
1163  FuncInfo.MF->getInfo<X86MachineFunctionInfo>();
1164 
1165  if (!FuncInfo.CanLowerReturn)
1166  return false;
1167 
1168  if (TLI.supportSwiftError() &&
1169  F.getAttributes().hasAttrSomewhere(Attribute::SwiftError))
1170  return false;
1171 
1172  if (TLI.supportSplitCSR(FuncInfo.MF))
1173  return false;
1174 
1175  CallingConv::ID CC = F.getCallingConv();
1176  if (CC != CallingConv::C &&
1177  CC != CallingConv::Fast &&
1178  CC != CallingConv::Tail &&
1185  return false;
1186 
1187  // Don't handle popping bytes if they don't fit the ret's immediate.
1188  if (!isUInt<16>(X86MFInfo->getBytesToPopOnReturn()))
1189  return false;
1190 
1191  // fastcc with -tailcallopt is intended to provide a guaranteed
1192  // tail call optimization. Fastisel doesn't know how to do that.
1193  if ((CC == CallingConv::Fast && TM.Options.GuaranteedTailCallOpt) ||
1195  return false;
1196 
1197  // Let SDISel handle vararg functions.
1198  if (F.isVarArg())
1199  return false;
1200 
1201  // Build a list of return value registers.
1202  SmallVector<unsigned, 4> RetRegs;
1203 
1204  if (Ret->getNumOperands() > 0) {
1206  GetReturnInfo(CC, F.getReturnType(), F.getAttributes(), Outs, TLI, DL);
1207 
1208  // Analyze operands of the call, assigning locations to each operand.
1210  CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, ValLocs, I->getContext());
1211  CCInfo.AnalyzeReturn(Outs, RetCC_X86);
1212 
1213  const Value *RV = Ret->getOperand(0);
1214  Register Reg = getRegForValue(RV);
1215  if (Reg == 0)
1216  return false;
1217 
1218  // Only handle a single return value for now.
1219  if (ValLocs.size() != 1)
1220  return false;
1221 
1222  CCValAssign &VA = ValLocs[0];
1223 
1224  // Don't bother handling odd stuff for now.
1225  if (VA.getLocInfo() != CCValAssign::Full)
1226  return false;
1227  // Only handle register returns for now.
1228  if (!VA.isRegLoc())
1229  return false;
1230 
1231  // The calling-convention tables for x87 returns don't tell
1232  // the whole story.
1233  if (VA.getLocReg() == X86::FP0 || VA.getLocReg() == X86::FP1)
1234  return false;
1235 
1236  unsigned SrcReg = Reg + VA.getValNo();
1237  EVT SrcVT = TLI.getValueType(DL, RV->getType());
1238  EVT DstVT = VA.getValVT();
1239  // Special handling for extended integers.
1240  if (SrcVT != DstVT) {
1241  if (SrcVT != MVT::i1 && SrcVT != MVT::i8 && SrcVT != MVT::i16)
1242  return false;
1243 
1244  if (!Outs[0].Flags.isZExt() && !Outs[0].Flags.isSExt())
1245  return false;
1246 
1247  assert(DstVT == MVT::i32 && "X86 should always ext to i32");
1248 
1249  if (SrcVT == MVT::i1) {
1250  if (Outs[0].Flags.isSExt())
1251  return false;
1252  // TODO
1253  SrcReg = fastEmitZExtFromI1(MVT::i8, SrcReg);
1254  SrcVT = MVT::i8;
1255  }
1256  unsigned Op = Outs[0].Flags.isZExt() ? ISD::ZERO_EXTEND :
1258  // TODO
1259  SrcReg = fastEmit_r(SrcVT.getSimpleVT(), DstVT.getSimpleVT(), Op, SrcReg);
1260  }
1261 
1262  // Make the copy.
1263  Register DstReg = VA.getLocReg();
1264  const TargetRegisterClass *SrcRC = MRI.getRegClass(SrcReg);
1265  // Avoid a cross-class copy. This is very unlikely.
1266  if (!SrcRC->contains(DstReg))
1267  return false;
1268  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
1269  TII.get(TargetOpcode::COPY), DstReg).addReg(SrcReg);
1270 
1271  // Add register to return instruction.
1272  RetRegs.push_back(VA.getLocReg());
1273  }
1274 
1275  // Swift calling convention does not require we copy the sret argument
1276  // into %rax/%eax for the return, and SRetReturnReg is not set for Swift.
1277 
1278  // All x86 ABIs require that for returning structs by value we copy
1279  // the sret argument into %rax/%eax (depending on ABI) for the return.
1280  // We saved the argument into a virtual register in the entry block,
1281  // so now we copy the value out and into %rax/%eax.
1282  if (F.hasStructRetAttr() && CC != CallingConv::Swift &&
1284  Register Reg = X86MFInfo->getSRetReturnReg();
1285  assert(Reg &&
1286  "SRetReturnReg should have been set in LowerFormalArguments()!");
1287  unsigned RetReg = Subtarget->isTarget64BitLP64() ? X86::RAX : X86::EAX;
1288  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
1289  TII.get(TargetOpcode::COPY), RetReg).addReg(Reg);
1290  RetRegs.push_back(RetReg);
1291  }
1292 
1293  // Now emit the RET.
1294  MachineInstrBuilder MIB;
1295  if (X86MFInfo->getBytesToPopOnReturn()) {
1296  MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
1297  TII.get(Subtarget->is64Bit() ? X86::RETI64 : X86::RETI32))
1298  .addImm(X86MFInfo->getBytesToPopOnReturn());
1299  } else {
1300  MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
1301  TII.get(Subtarget->is64Bit() ? X86::RET64 : X86::RET32));
1302  }
1303  for (unsigned i = 0, e = RetRegs.size(); i != e; ++i)
1304  MIB.addReg(RetRegs[i], RegState::Implicit);
1305  return true;
1306 }
1307 
1308 /// X86SelectLoad - Select and emit code to implement load instructions.
1309 ///
1310 bool X86FastISel::X86SelectLoad(const Instruction *I) {
1311  const LoadInst *LI = cast<LoadInst>(I);
1312 
1313  // Atomic loads need special handling.
1314  if (LI->isAtomic())
1315  return false;
1316 
1317  const Value *SV = I->getOperand(0);
1318  if (TLI.supportSwiftError()) {
1319  // Swifterror values can come from either a function parameter with
1320  // swifterror attribute or an alloca with swifterror attribute.
1321  if (const Argument *Arg = dyn_cast<Argument>(SV)) {
1322  if (Arg->hasSwiftErrorAttr())
1323  return false;
1324  }
1325 
1326  if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(SV)) {
1327  if (Alloca->isSwiftError())
1328  return false;
1329  }
1330  }
1331 
1332  MVT VT;
1333  if (!isTypeLegal(LI->getType(), VT, /*AllowI1=*/true))
1334  return false;
1335 
1336  const Value *Ptr = LI->getPointerOperand();
1337 
1338  X86AddressMode AM;
1339  if (!X86SelectAddress(Ptr, AM))
1340  return false;
1341 
1342  unsigned ResultReg = 0;
1343  if (!X86FastEmitLoad(VT, AM, createMachineMemOperandFor(LI), ResultReg,
1344  LI->getAlign().value()))
1345  return false;
1346 
1347  updateValueMap(I, ResultReg);
1348  return true;
1349 }
1350 
1351 static unsigned X86ChooseCmpOpcode(EVT VT, const X86Subtarget *Subtarget) {
1352  bool HasAVX512 = Subtarget->hasAVX512();
1353  bool HasAVX = Subtarget->hasAVX();
1354  bool HasSSE1 = Subtarget->hasSSE1();
1355  bool HasSSE2 = Subtarget->hasSSE2();
1356 
1357  switch (VT.getSimpleVT().SimpleTy) {
1358  default: return 0;
1359  case MVT::i8: return X86::CMP8rr;
1360  case MVT::i16: return X86::CMP16rr;
1361  case MVT::i32: return X86::CMP32rr;
1362  case MVT::i64: return X86::CMP64rr;
1363  case MVT::f32:
1364  return HasAVX512 ? X86::VUCOMISSZrr
1365  : HasAVX ? X86::VUCOMISSrr
1366  : HasSSE1 ? X86::UCOMISSrr
1367  : 0;
1368  case MVT::f64:
1369  return HasAVX512 ? X86::VUCOMISDZrr
1370  : HasAVX ? X86::VUCOMISDrr
1371  : HasSSE2 ? X86::UCOMISDrr
1372  : 0;
1373  }
1374 }
1375 
1376 /// If we have a comparison with RHS as the RHS of the comparison, return an
1377 /// opcode that works for the compare (e.g. CMP32ri) otherwise return 0.
1378 static unsigned X86ChooseCmpImmediateOpcode(EVT VT, const ConstantInt *RHSC) {
1379  int64_t Val = RHSC->getSExtValue();
1380  switch (VT.getSimpleVT().SimpleTy) {
1381  // Otherwise, we can't fold the immediate into this comparison.
1382  default:
1383  return 0;
1384  case MVT::i8:
1385  return X86::CMP8ri;
1386  case MVT::i16:
1387  if (isInt<8>(Val))
1388  return X86::CMP16ri8;
1389  return X86::CMP16ri;
1390  case MVT::i32:
1391  if (isInt<8>(Val))
1392  return X86::CMP32ri8;
1393  return X86::CMP32ri;
1394  case MVT::i64:
1395  if (isInt<8>(Val))
1396  return X86::CMP64ri8;
1397  // 64-bit comparisons are only valid if the immediate fits in a 32-bit sext
1398  // field.
1399  if (isInt<32>(Val))
1400  return X86::CMP64ri32;
1401  return 0;
1402  }
1403 }
1404 
1405 bool X86FastISel::X86FastEmitCompare(const Value *Op0, const Value *Op1, EVT VT,
1406  const DebugLoc &CurMIMD) {
1407  Register Op0Reg = getRegForValue(Op0);
1408  if (Op0Reg == 0) return false;
1409 
1410  // Handle 'null' like i32/i64 0.
1411  if (isa<ConstantPointerNull>(Op1))
1412  Op1 = Constant::getNullValue(DL.getIntPtrType(Op0->getContext()));
1413 
1414  // We have two options: compare with register or immediate. If the RHS of
1415  // the compare is an immediate that we can fold into this compare, use
1416  // CMPri, otherwise use CMPrr.
1417  if (const ConstantInt *Op1C = dyn_cast<ConstantInt>(Op1)) {
1418  if (unsigned CompareImmOpc = X86ChooseCmpImmediateOpcode(VT, Op1C)) {
1419  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, CurMIMD, TII.get(CompareImmOpc))
1420  .addReg(Op0Reg)
1421  .addImm(Op1C->getSExtValue());
1422  return true;
1423  }
1424  }
1425 
1426  unsigned CompareOpc = X86ChooseCmpOpcode(VT, Subtarget);
1427  if (CompareOpc == 0) return false;
1428 
1429  Register Op1Reg = getRegForValue(Op1);
1430  if (Op1Reg == 0) return false;
1431  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, CurMIMD, TII.get(CompareOpc))
1432  .addReg(Op0Reg)
1433  .addReg(Op1Reg);
1434 
1435  return true;
1436 }
1437 
1438 bool X86FastISel::X86SelectCmp(const Instruction *I) {
1439  const CmpInst *CI = cast<CmpInst>(I);
1440 
1441  MVT VT;
1442  if (!isTypeLegal(I->getOperand(0)->getType(), VT))
1443  return false;
1444 
1445  // Below code only works for scalars.
1446  if (VT.isVector())
1447  return false;
1448 
1449  // Try to optimize or fold the cmp.
1450  CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
1451  unsigned ResultReg = 0;
1452  switch (Predicate) {
1453  default: break;
1454  case CmpInst::FCMP_FALSE: {
1455  ResultReg = createResultReg(&X86::GR32RegClass);
1456  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(X86::MOV32r0),
1457  ResultReg);
1458  ResultReg = fastEmitInst_extractsubreg(MVT::i8, ResultReg, X86::sub_8bit);
1459  if (!ResultReg)
1460  return false;
1461  break;
1462  }
1463  case CmpInst::FCMP_TRUE: {
1464  ResultReg = createResultReg(&X86::GR8RegClass);
1465  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(X86::MOV8ri),
1466  ResultReg).addImm(1);
1467  break;
1468  }
1469  }
1470 
1471  if (ResultReg) {
1472  updateValueMap(I, ResultReg);
1473  return true;
1474  }
1475 
1476  const Value *LHS = CI->getOperand(0);
1477  const Value *RHS = CI->getOperand(1);
1478 
1479  // The optimizer might have replaced fcmp oeq %x, %x with fcmp ord %x, 0.0.
1480  // We don't have to materialize a zero constant for this case and can just use
1481  // %x again on the RHS.
1483  const auto *RHSC = dyn_cast<ConstantFP>(RHS);
1484  if (RHSC && RHSC->isNullValue())
1485  RHS = LHS;
1486  }
1487 
1488  // FCMP_OEQ and FCMP_UNE cannot be checked with a single instruction.
1489  static const uint16_t SETFOpcTable[2][3] = {
1490  { X86::COND_E, X86::COND_NP, X86::AND8rr },
1491  { X86::COND_NE, X86::COND_P, X86::OR8rr }
1492  };
1493  const uint16_t *SETFOpc = nullptr;
1494  switch (Predicate) {
1495  default: break;
1496  case CmpInst::FCMP_OEQ: SETFOpc = &SETFOpcTable[0][0]; break;
1497  case CmpInst::FCMP_UNE: SETFOpc = &SETFOpcTable[1][0]; break;
1498  }
1499 
1500  ResultReg = createResultReg(&X86::GR8RegClass);
1501  if (SETFOpc) {
1502  if (!X86FastEmitCompare(LHS, RHS, VT, I->getDebugLoc()))
1503  return false;
1504 
1505  Register FlagReg1 = createResultReg(&X86::GR8RegClass);
1506  Register FlagReg2 = createResultReg(&X86::GR8RegClass);
1507  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(X86::SETCCr),
1508  FlagReg1).addImm(SETFOpc[0]);
1509  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(X86::SETCCr),
1510  FlagReg2).addImm(SETFOpc[1]);
1511  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(SETFOpc[2]),
1512  ResultReg).addReg(FlagReg1).addReg(FlagReg2);
1513  updateValueMap(I, ResultReg);
1514  return true;
1515  }
1516 
1517  X86::CondCode CC;
1518  bool SwapArgs;
1519  std::tie(CC, SwapArgs) = X86::getX86ConditionCode(Predicate);
1520  assert(CC <= X86::LAST_VALID_COND && "Unexpected condition code.");
1521 
1522  if (SwapArgs)
1523  std::swap(LHS, RHS);
1524 
1525  // Emit a compare of LHS/RHS.
1526  if (!X86FastEmitCompare(LHS, RHS, VT, I->getDebugLoc()))
1527  return false;
1528 
1529  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(X86::SETCCr),
1530  ResultReg).addImm(CC);
1531  updateValueMap(I, ResultReg);
1532  return true;
1533 }
1534 
1535 bool X86FastISel::X86SelectZExt(const Instruction *I) {
1536  EVT DstVT = TLI.getValueType(DL, I->getType());
1537  if (!TLI.isTypeLegal(DstVT))
1538  return false;
1539 
1540  Register ResultReg = getRegForValue(I->getOperand(0));
1541  if (ResultReg == 0)
1542  return false;
1543 
1544  // Handle zero-extension from i1 to i8, which is common.
1545  MVT SrcVT = TLI.getSimpleValueType(DL, I->getOperand(0)->getType());
1546  if (SrcVT == MVT::i1) {
1547  // Set the high bits to zero.
1548  ResultReg = fastEmitZExtFromI1(MVT::i8, ResultReg);
1549  SrcVT = MVT::i8;
1550 
1551  if (ResultReg == 0)
1552  return false;
1553  }
1554 
1555  if (DstVT == MVT::i64) {
1556  // Handle extension to 64-bits via sub-register shenanigans.
1557  unsigned MovInst;
1558 
1559  switch (SrcVT.SimpleTy) {
1560  case MVT::i8: MovInst = X86::MOVZX32rr8; break;
1561  case MVT::i16: MovInst = X86::MOVZX32rr16; break;
1562  case MVT::i32: MovInst = X86::MOV32rr; break;
1563  default: llvm_unreachable("Unexpected zext to i64 source type");
1564  }
1565 
1566  Register Result32 = createResultReg(&X86::GR32RegClass);
1567  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(MovInst), Result32)
1568  .addReg(ResultReg);
1569 
1570  ResultReg = createResultReg(&X86::GR64RegClass);
1571  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(TargetOpcode::SUBREG_TO_REG),
1572  ResultReg)
1573  .addImm(0).addReg(Result32).addImm(X86::sub_32bit);
1574  } else if (DstVT == MVT::i16) {
1575  // i8->i16 doesn't exist in the autogenerated isel table. Need to zero
1576  // extend to 32-bits and then extract down to 16-bits.
1577  Register Result32 = createResultReg(&X86::GR32RegClass);
1578  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(X86::MOVZX32rr8),
1579  Result32).addReg(ResultReg);
1580 
1581  ResultReg = fastEmitInst_extractsubreg(MVT::i16, Result32, X86::sub_16bit);
1582  } else if (DstVT != MVT::i8) {
1583  ResultReg = fastEmit_r(MVT::i8, DstVT.getSimpleVT(), ISD::ZERO_EXTEND,
1584  ResultReg);
1585  if (ResultReg == 0)
1586  return false;
1587  }
1588 
1589  updateValueMap(I, ResultReg);
1590  return true;
1591 }
1592 
1593 bool X86FastISel::X86SelectSExt(const Instruction *I) {
1594  EVT DstVT = TLI.getValueType(DL, I->getType());
1595  if (!TLI.isTypeLegal(DstVT))
1596  return false;
1597 
1598  Register ResultReg = getRegForValue(I->getOperand(0));
1599  if (ResultReg == 0)
1600  return false;
1601 
1602  // Handle sign-extension from i1 to i8.
1603  MVT SrcVT = TLI.getSimpleValueType(DL, I->getOperand(0)->getType());
1604  if (SrcVT == MVT::i1) {
1605  // Set the high bits to zero.
1606  Register ZExtReg = fastEmitZExtFromI1(MVT::i8, ResultReg);
1607  if (ZExtReg == 0)
1608  return false;
1609 
1610  // Negate the result to make an 8-bit sign extended value.
1611  ResultReg = createResultReg(&X86::GR8RegClass);
1612  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(X86::NEG8r),
1613  ResultReg).addReg(ZExtReg);
1614 
1615  SrcVT = MVT::i8;
1616  }
1617 
1618  if (DstVT == MVT::i16) {
1619  // i8->i16 doesn't exist in the autogenerated isel table. Need to sign
1620  // extend to 32-bits and then extract down to 16-bits.
1621  Register Result32 = createResultReg(&X86::GR32RegClass);
1622  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(X86::MOVSX32rr8),
1623  Result32).addReg(ResultReg);
1624 
1625  ResultReg = fastEmitInst_extractsubreg(MVT::i16, Result32, X86::sub_16bit);
1626  } else if (DstVT != MVT::i8) {
1627  ResultReg = fastEmit_r(MVT::i8, DstVT.getSimpleVT(), ISD::SIGN_EXTEND,
1628  ResultReg);
1629  if (ResultReg == 0)
1630  return false;
1631  }
1632 
1633  updateValueMap(I, ResultReg);
1634  return true;
1635 }
1636 
1637 bool X86FastISel::X86SelectBranch(const Instruction *I) {
1638  // Unconditional branches are selected by tablegen-generated code.
1639  // Handle a conditional branch.
1640  const BranchInst *BI = cast<BranchInst>(I);
1641  MachineBasicBlock *TrueMBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
1642  MachineBasicBlock *FalseMBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
1643 
1644  // Fold the common case of a conditional branch with a comparison
1645  // in the same block (values defined on other blocks may not have
1646  // initialized registers).
1647  X86::CondCode CC;
1648  if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) {
1649  if (CI->hasOneUse() && CI->getParent() == I->getParent()) {
1650  EVT VT = TLI.getValueType(DL, CI->getOperand(0)->getType());
1651 
1652  // Try to optimize or fold the cmp.
1653  CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
1654  switch (Predicate) {
1655  default: break;
1656  case CmpInst::FCMP_FALSE: fastEmitBranch(FalseMBB, MIMD.getDL()); return true;
1657  case CmpInst::FCMP_TRUE: fastEmitBranch(TrueMBB, MIMD.getDL()); return true;
1658  }
1659 
1660  const Value *CmpLHS = CI->getOperand(0);
1661  const Value *CmpRHS = CI->getOperand(1);
1662 
1663  // The optimizer might have replaced fcmp oeq %x, %x with fcmp ord %x,
1664  // 0.0.
1665  // We don't have to materialize a zero constant for this case and can just
1666  // use %x again on the RHS.
1668  const auto *CmpRHSC = dyn_cast<ConstantFP>(CmpRHS);
1669  if (CmpRHSC && CmpRHSC->isNullValue())
1670  CmpRHS = CmpLHS;
1671  }
1672 
1673  // Try to take advantage of fallthrough opportunities.
1674  if (FuncInfo.MBB->isLayoutSuccessor(TrueMBB)) {
1675  std::swap(TrueMBB, FalseMBB);
1677  }
1678 
1679  // FCMP_OEQ and FCMP_UNE cannot be expressed with a single flag/condition
1680  // code check. Instead two branch instructions are required to check all
1681  // the flags. First we change the predicate to a supported condition code,
1682  // which will be the first branch. Later one we will emit the second
1683  // branch.
1684  bool NeedExtraBranch = false;
1685  switch (Predicate) {
1686  default: break;
1687  case CmpInst::FCMP_OEQ:
1688  std::swap(TrueMBB, FalseMBB);
1689  [[fallthrough]];
1690  case CmpInst::FCMP_UNE:
1691  NeedExtraBranch = true;
1693  break;
1694  }
1695 
1696  bool SwapArgs;
1697  std::tie(CC, SwapArgs) = X86::getX86ConditionCode(Predicate);
1698  assert(CC <= X86::LAST_VALID_COND && "Unexpected condition code.");
1699 
1700  if (SwapArgs)
1701  std::swap(CmpLHS, CmpRHS);
1702 
1703  // Emit a compare of the LHS and RHS, setting the flags.
1704  if (!X86FastEmitCompare(CmpLHS, CmpRHS, VT, CI->getDebugLoc()))
1705  return false;
1706 
1707  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(X86::JCC_1))
1708  .addMBB(TrueMBB).addImm(CC);
1709 
1710  // X86 requires a second branch to handle UNE (and OEQ, which is mapped
1711  // to UNE above).
1712  if (NeedExtraBranch) {
1713  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(X86::JCC_1))
1714  .addMBB(TrueMBB).addImm(X86::COND_P);
1715  }
1716 
1717  finishCondBranch(BI->getParent(), TrueMBB, FalseMBB);
1718  return true;
1719  }
1720  } else if (TruncInst *TI = dyn_cast<TruncInst>(BI->getCondition())) {
1721  // Handle things like "%cond = trunc i32 %X to i1 / br i1 %cond", which
1722  // typically happen for _Bool and C++ bools.
1723  MVT SourceVT;
1724  if (TI->hasOneUse() && TI->getParent() == I->getParent() &&
1725  isTypeLegal(TI->getOperand(0)->getType(), SourceVT)) {
1726  unsigned TestOpc = 0;
1727  switch (SourceVT.SimpleTy) {
1728  default: break;
1729  case MVT::i8: TestOpc = X86::TEST8ri; break;
1730  case MVT::i16: TestOpc = X86::TEST16ri; break;
1731  case MVT::i32: TestOpc = X86::TEST32ri; break;
1732  case MVT::i64: TestOpc = X86::TEST64ri32; break;
1733  }
1734  if (TestOpc) {
1735  Register OpReg = getRegForValue(TI->getOperand(0));
1736  if (OpReg == 0) return false;
1737 
1738  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(TestOpc))
1739  .addReg(OpReg).addImm(1);
1740 
1741  unsigned JmpCond = X86::COND_NE;
1742  if (FuncInfo.MBB->isLayoutSuccessor(TrueMBB)) {
1743  std::swap(TrueMBB, FalseMBB);
1744  JmpCond = X86::COND_E;
1745  }
1746 
1747  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(X86::JCC_1))
1748  .addMBB(TrueMBB).addImm(JmpCond);
1749 
1750  finishCondBranch(BI->getParent(), TrueMBB, FalseMBB);
1751  return true;
1752  }
1753  }
1754  } else if (foldX86XALUIntrinsic(CC, BI, BI->getCondition())) {
1755  // Fake request the condition, otherwise the intrinsic might be completely
1756  // optimized away.
1757  Register TmpReg = getRegForValue(BI->getCondition());
1758  if (TmpReg == 0)
1759  return false;
1760 
1761  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(X86::JCC_1))
1762  .addMBB(TrueMBB).addImm(CC);
1763  finishCondBranch(BI->getParent(), TrueMBB, FalseMBB);
1764  return true;
1765  }
1766 
1767  // Otherwise do a clumsy setcc and re-test it.
1768  // Note that i1 essentially gets ANY_EXTEND'ed to i8 where it isn't used
1769  // in an explicit cast, so make sure to handle that correctly.
1770  Register OpReg = getRegForValue(BI->getCondition());
1771  if (OpReg == 0) return false;
1772 
1773  // In case OpReg is a K register, COPY to a GPR
1774  if (MRI.getRegClass(OpReg) == &X86::VK1RegClass) {
1775  unsigned KOpReg = OpReg;
1776  OpReg = createResultReg(&X86::GR32RegClass);
1777  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
1778  TII.get(TargetOpcode::COPY), OpReg)
1779  .addReg(KOpReg);
1780  OpReg = fastEmitInst_extractsubreg(MVT::i8, OpReg, X86::sub_8bit);
1781  }
1782  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(X86::TEST8ri))
1783  .addReg(OpReg)
1784  .addImm(1);
1785  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(X86::JCC_1))
1786  .addMBB(TrueMBB).addImm(X86::COND_NE);
1787  finishCondBranch(BI->getParent(), TrueMBB, FalseMBB);
1788  return true;
1789 }
1790 
1791 bool X86FastISel::X86SelectShift(const Instruction *I) {
1792  unsigned CReg = 0, OpReg = 0;
1793  const TargetRegisterClass *RC = nullptr;
1794  if (I->getType()->isIntegerTy(8)) {
1795  CReg = X86::CL;
1796  RC = &X86::GR8RegClass;
1797  switch (I->getOpcode()) {
1798  case Instruction::LShr: OpReg = X86::SHR8rCL; break;
1799  case Instruction::AShr: OpReg = X86::SAR8rCL; break;
1800  case Instruction::Shl: OpReg = X86::SHL8rCL; break;
1801  default: return false;
1802  }
1803  } else if (I->getType()->isIntegerTy(16)) {
1804  CReg = X86::CX;
1805  RC = &X86::GR16RegClass;
1806  switch (I->getOpcode()) {
1807  default: llvm_unreachable("Unexpected shift opcode");
1808  case Instruction::LShr: OpReg = X86::SHR16rCL; break;
1809  case Instruction::AShr: OpReg = X86::SAR16rCL; break;
1810  case Instruction::Shl: OpReg = X86::SHL16rCL; break;
1811  }
1812  } else if (I->getType()->isIntegerTy(32)) {
1813  CReg = X86::ECX;
1814  RC = &X86::GR32RegClass;
1815  switch (I->getOpcode()) {
1816  default: llvm_unreachable("Unexpected shift opcode");
1817  case Instruction::LShr: OpReg = X86::SHR32rCL; break;
1818  case Instruction::AShr: OpReg = X86::SAR32rCL; break;
1819  case Instruction::Shl: OpReg = X86::SHL32rCL; break;
1820  }
1821  } else if (I->getType()->isIntegerTy(64)) {
1822  CReg = X86::RCX;
1823  RC = &X86::GR64RegClass;
1824  switch (I->getOpcode()) {
1825  default: llvm_unreachable("Unexpected shift opcode");
1826  case Instruction::LShr: OpReg = X86::SHR64rCL; break;
1827  case Instruction::AShr: OpReg = X86::SAR64rCL; break;
1828  case Instruction::Shl: OpReg = X86::SHL64rCL; break;
1829  }
1830  } else {
1831  return false;
1832  }
1833 
1834  MVT VT;
1835  if (!isTypeLegal(I->getType(), VT))
1836  return false;
1837 
1838  Register Op0Reg = getRegForValue(I->getOperand(0));
1839  if (Op0Reg == 0) return false;
1840 
1841  Register Op1Reg = getRegForValue(I->getOperand(1));
1842  if (Op1Reg == 0) return false;
1843  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(TargetOpcode::COPY),
1844  CReg).addReg(Op1Reg);
1845 
1846  // The shift instruction uses X86::CL. If we defined a super-register
1847  // of X86::CL, emit a subreg KILL to precisely describe what we're doing here.
1848  if (CReg != X86::CL)
1849  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
1850  TII.get(TargetOpcode::KILL), X86::CL)
1851  .addReg(CReg, RegState::Kill);
1852 
1853  Register ResultReg = createResultReg(RC);
1854  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(OpReg), ResultReg)
1855  .addReg(Op0Reg);
1856  updateValueMap(I, ResultReg);
1857  return true;
1858 }
1859 
1860 bool X86FastISel::X86SelectDivRem(const Instruction *I) {
1861  const static unsigned NumTypes = 4; // i8, i16, i32, i64
1862  const static unsigned NumOps = 4; // SDiv, SRem, UDiv, URem
1863  const static bool S = true; // IsSigned
1864  const static bool U = false; // !IsSigned
1865  const static unsigned Copy = TargetOpcode::COPY;
1866  // For the X86 DIV/IDIV instruction, in most cases the dividend
1867  // (numerator) must be in a specific register pair highreg:lowreg,
1868  // producing the quotient in lowreg and the remainder in highreg.
1869  // For most data types, to set up the instruction, the dividend is
1870  // copied into lowreg, and lowreg is sign-extended or zero-extended
1871  // into highreg. The exception is i8, where the dividend is defined
1872  // as a single register rather than a register pair, and we
1873  // therefore directly sign-extend or zero-extend the dividend into
1874  // lowreg, instead of copying, and ignore the highreg.
1875  const static struct DivRemEntry {
1876  // The following portion depends only on the data type.
1877  const TargetRegisterClass *RC;
1878  unsigned LowInReg; // low part of the register pair
1879  unsigned HighInReg; // high part of the register pair
1880  // The following portion depends on both the data type and the operation.
1881  struct DivRemResult {
1882  unsigned OpDivRem; // The specific DIV/IDIV opcode to use.
1883  unsigned OpSignExtend; // Opcode for sign-extending lowreg into
1884  // highreg, or copying a zero into highreg.
1885  unsigned OpCopy; // Opcode for copying dividend into lowreg, or
1886  // zero/sign-extending into lowreg for i8.
1887  unsigned DivRemResultReg; // Register containing the desired result.
1888  bool IsOpSigned; // Whether to use signed or unsigned form.
1889  } ResultTable[NumOps];
1890  } OpTable[NumTypes] = {
1891  { &X86::GR8RegClass, X86::AX, 0, {
1892  { X86::IDIV8r, 0, X86::MOVSX16rr8, X86::AL, S }, // SDiv
1893  { X86::IDIV8r, 0, X86::MOVSX16rr8, X86::AH, S }, // SRem
1894  { X86::DIV8r, 0, X86::MOVZX16rr8, X86::AL, U }, // UDiv
1895  { X86::DIV8r, 0, X86::MOVZX16rr8, X86::AH, U }, // URem
1896  }
1897  }, // i8
1898  { &X86::GR16RegClass, X86::AX, X86::DX, {
1899  { X86::IDIV16r, X86::CWD, Copy, X86::AX, S }, // SDiv
1900  { X86::IDIV16r, X86::CWD, Copy, X86::DX, S }, // SRem
1901  { X86::DIV16r, X86::MOV32r0, Copy, X86::AX, U }, // UDiv
1902  { X86::DIV16r, X86::MOV32r0, Copy, X86::DX, U }, // URem
1903  }
1904  }, // i16
1905  { &X86::GR32RegClass, X86::EAX, X86::EDX, {
1906  { X86::IDIV32r, X86::CDQ, Copy, X86::EAX, S }, // SDiv
1907  { X86::IDIV32r, X86::CDQ, Copy, X86::EDX, S }, // SRem
1908  { X86::DIV32r, X86::MOV32r0, Copy, X86::EAX, U }, // UDiv
1909  { X86::DIV32r, X86::MOV32r0, Copy, X86::EDX, U }, // URem
1910  }
1911  }, // i32
1912  { &X86::GR64RegClass, X86::RAX, X86::RDX, {
1913  { X86::IDIV64r, X86::CQO, Copy, X86::RAX, S }, // SDiv
1914  { X86::IDIV64r, X86::CQO, Copy, X86::RDX, S }, // SRem
1915  { X86::DIV64r, X86::MOV32r0, Copy, X86::RAX, U }, // UDiv
1916  { X86::DIV64r, X86::MOV32r0, Copy, X86::RDX, U }, // URem
1917  }
1918  }, // i64
1919  };
1920 
1921  MVT VT;
1922  if (!isTypeLegal(I->getType(), VT))
1923  return false;
1924 
1925  unsigned TypeIndex, OpIndex;
1926  switch (VT.SimpleTy) {
1927  default: return false;
1928  case MVT::i8: TypeIndex = 0; break;
1929  case MVT::i16: TypeIndex = 1; break;
1930  case MVT::i32: TypeIndex = 2; break;
1931  case MVT::i64: TypeIndex = 3;
1932  if (!Subtarget->is64Bit())
1933  return false;
1934  break;
1935  }
1936 
1937  switch (I->getOpcode()) {
1938  default: llvm_unreachable("Unexpected div/rem opcode");
1939  case Instruction::SDiv: OpIndex = 0; break;
1940  case Instruction::SRem: OpIndex = 1; break;
1941  case Instruction::UDiv: OpIndex = 2; break;
1942  case Instruction::URem: OpIndex = 3; break;
1943  }
1944 
1945  const DivRemEntry &TypeEntry = OpTable[TypeIndex];
1946  const DivRemEntry::DivRemResult &OpEntry = TypeEntry.ResultTable[OpIndex];
1947  Register Op0Reg = getRegForValue(I->getOperand(0));
1948  if (Op0Reg == 0)
1949  return false;
1950  Register Op1Reg = getRegForValue(I->getOperand(1));
1951  if (Op1Reg == 0)
1952  return false;
1953 
1954  // Move op0 into low-order input register.
1955  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
1956  TII.get(OpEntry.OpCopy), TypeEntry.LowInReg).addReg(Op0Reg);
1957  // Zero-extend or sign-extend into high-order input register.
1958  if (OpEntry.OpSignExtend) {
1959  if (OpEntry.IsOpSigned)
1960  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
1961  TII.get(OpEntry.OpSignExtend));
1962  else {
1963  Register Zero32 = createResultReg(&X86::GR32RegClass);
1964  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
1965  TII.get(X86::MOV32r0), Zero32);
1966 
1967  // Copy the zero into the appropriate sub/super/identical physical
1968  // register. Unfortunately the operations needed are not uniform enough
1969  // to fit neatly into the table above.
1970  if (VT == MVT::i16) {
1971  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
1972  TII.get(Copy), TypeEntry.HighInReg)
1973  .addReg(Zero32, 0, X86::sub_16bit);
1974  } else if (VT == MVT::i32) {
1975  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
1976  TII.get(Copy), TypeEntry.HighInReg)
1977  .addReg(Zero32);
1978  } else if (VT == MVT::i64) {
1979  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
1980  TII.get(TargetOpcode::SUBREG_TO_REG), TypeEntry.HighInReg)
1981  .addImm(0).addReg(Zero32).addImm(X86::sub_32bit);
1982  }
1983  }
1984  }
1985  // Generate the DIV/IDIV instruction.
1986  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
1987  TII.get(OpEntry.OpDivRem)).addReg(Op1Reg);
1988  // For i8 remainder, we can't reference ah directly, as we'll end
1989  // up with bogus copies like %r9b = COPY %ah. Reference ax
1990  // instead to prevent ah references in a rex instruction.
1991  //
1992  // The current assumption of the fast register allocator is that isel
1993  // won't generate explicit references to the GR8_NOREX registers. If
1994  // the allocator and/or the backend get enhanced to be more robust in
1995  // that regard, this can be, and should be, removed.
1996  unsigned ResultReg = 0;
1997  if ((I->getOpcode() == Instruction::SRem ||
1998  I->getOpcode() == Instruction::URem) &&
1999  OpEntry.DivRemResultReg == X86::AH && Subtarget->is64Bit()) {
2000  Register SourceSuperReg = createResultReg(&X86::GR16RegClass);
2001  Register ResultSuperReg = createResultReg(&X86::GR16RegClass);
2002  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
2003  TII.get(Copy), SourceSuperReg).addReg(X86::AX);
2004 
2005  // Shift AX right by 8 bits instead of using AH.
2006  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(X86::SHR16ri),
2007  ResultSuperReg).addReg(SourceSuperReg).addImm(8);
2008 
2009  // Now reference the 8-bit subreg of the result.
2010  ResultReg = fastEmitInst_extractsubreg(MVT::i8, ResultSuperReg,
2011  X86::sub_8bit);
2012  }
2013  // Copy the result out of the physreg if we haven't already.
2014  if (!ResultReg) {
2015  ResultReg = createResultReg(TypeEntry.RC);
2016  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Copy), ResultReg)
2017  .addReg(OpEntry.DivRemResultReg);
2018  }
2019  updateValueMap(I, ResultReg);
2020 
2021  return true;
2022 }
2023 
2024 /// Emit a conditional move instruction (if the are supported) to lower
2025 /// the select.
2026 bool X86FastISel::X86FastEmitCMoveSelect(MVT RetVT, const Instruction *I) {
2027  // Check if the subtarget supports these instructions.
2028  if (!Subtarget->canUseCMOV())
2029  return false;
2030 
2031  // FIXME: Add support for i8.
2032  if (RetVT < MVT::i16 || RetVT > MVT::i64)
2033  return false;
2034 
2035  const Value *Cond = I->getOperand(0);
2036  const TargetRegisterClass *RC = TLI.getRegClassFor(RetVT);
2037  bool NeedTest = true;
2039 
2040  // Optimize conditions coming from a compare if both instructions are in the
2041  // same basic block (values defined in other basic blocks may not have
2042  // initialized registers).
2043  const auto *CI = dyn_cast<CmpInst>(Cond);
2044  if (CI && (CI->getParent() == I->getParent())) {
2045  CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
2046 
2047  // FCMP_OEQ and FCMP_UNE cannot be checked with a single instruction.
2048  static const uint16_t SETFOpcTable[2][3] = {
2049  { X86::COND_NP, X86::COND_E, X86::TEST8rr },
2050  { X86::COND_P, X86::COND_NE, X86::OR8rr }
2051  };
2052  const uint16_t *SETFOpc = nullptr;
2053  switch (Predicate) {
2054  default: break;
2055  case CmpInst::FCMP_OEQ:
2056  SETFOpc = &SETFOpcTable[0][0];
2058  break;
2059  case CmpInst::FCMP_UNE:
2060  SETFOpc = &SETFOpcTable[1][0];
2062  break;
2063  }
2064 
2065  bool NeedSwap;
2066  std::tie(CC, NeedSwap) = X86::getX86ConditionCode(Predicate);
2067  assert(CC <= X86::LAST_VALID_COND && "Unexpected condition code.");
2068 
2069  const Value *CmpLHS = CI->getOperand(0);
2070  const Value *CmpRHS = CI->getOperand(1);
2071  if (NeedSwap)
2072  std::swap(CmpLHS, CmpRHS);
2073 
2074  EVT CmpVT = TLI.getValueType(DL, CmpLHS->getType());
2075  // Emit a compare of the LHS and RHS, setting the flags.
2076  if (!X86FastEmitCompare(CmpLHS, CmpRHS, CmpVT, CI->getDebugLoc()))
2077  return false;
2078 
2079  if (SETFOpc) {
2080  Register FlagReg1 = createResultReg(&X86::GR8RegClass);
2081  Register FlagReg2 = createResultReg(&X86::GR8RegClass);
2082  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(X86::SETCCr),
2083  FlagReg1).addImm(SETFOpc[0]);
2084  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(X86::SETCCr),
2085  FlagReg2).addImm(SETFOpc[1]);
2086  auto const &II = TII.get(SETFOpc[2]);
2087  if (II.getNumDefs()) {
2088  Register TmpReg = createResultReg(&X86::GR8RegClass);
2089  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II, TmpReg)
2090  .addReg(FlagReg2).addReg(FlagReg1);
2091  } else {
2092  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II)
2093  .addReg(FlagReg2).addReg(FlagReg1);
2094  }
2095  }
2096  NeedTest = false;
2097  } else if (foldX86XALUIntrinsic(CC, I, Cond)) {
2098  // Fake request the condition, otherwise the intrinsic might be completely
2099  // optimized away.
2100  Register TmpReg = getRegForValue(Cond);
2101  if (TmpReg == 0)
2102  return false;
2103 
2104  NeedTest = false;
2105  }
2106 
2107  if (NeedTest) {
2108  // Selects operate on i1, however, CondReg is 8 bits width and may contain
2109  // garbage. Indeed, only the less significant bit is supposed to be
2110  // accurate. If we read more than the lsb, we may see non-zero values
2111  // whereas lsb is zero. Therefore, we have to truncate Op0Reg to i1 for
2112  // the select. This is achieved by performing TEST against 1.
2113  Register CondReg = getRegForValue(Cond);
2114  if (CondReg == 0)
2115  return false;
2116 
2117  // In case OpReg is a K register, COPY to a GPR
2118  if (MRI.getRegClass(CondReg) == &X86::VK1RegClass) {
2119  unsigned KCondReg = CondReg;
2120  CondReg = createResultReg(&X86::GR32RegClass);
2121  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
2122  TII.get(TargetOpcode::COPY), CondReg)
2123  .addReg(KCondReg);
2124  CondReg = fastEmitInst_extractsubreg(MVT::i8, CondReg, X86::sub_8bit);
2125  }
2126  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(X86::TEST8ri))
2127  .addReg(CondReg)
2128  .addImm(1);
2129  }
2130 
2131  const Value *LHS = I->getOperand(1);
2132  const Value *RHS = I->getOperand(2);
2133 
2134  Register RHSReg = getRegForValue(RHS);
2135  Register LHSReg = getRegForValue(LHS);
2136  if (!LHSReg || !RHSReg)
2137  return false;
2138 
2139  const TargetRegisterInfo &TRI = *Subtarget->getRegisterInfo();
2140  unsigned Opc = X86::getCMovOpcode(TRI.getRegSizeInBits(*RC)/8);
2141  Register ResultReg = fastEmitInst_rri(Opc, RC, RHSReg, LHSReg, CC);
2142  updateValueMap(I, ResultReg);
2143  return true;
2144 }
2145 
2146 /// Emit SSE or AVX instructions to lower the select.
2147 ///
2148 /// Try to use SSE1/SSE2 instructions to simulate a select without branches.
2149 /// This lowers fp selects into a CMP/AND/ANDN/OR sequence when the necessary
2150 /// SSE instructions are available. If AVX is available, try to use a VBLENDV.
2151 bool X86FastISel::X86FastEmitSSESelect(MVT RetVT, const Instruction *I) {
2152  // Optimize conditions coming from a compare if both instructions are in the
2153  // same basic block (values defined in other basic blocks may not have
2154  // initialized registers).
2155  const auto *CI = dyn_cast<FCmpInst>(I->getOperand(0));
2156  if (!CI || (CI->getParent() != I->getParent()))
2157  return false;
2158 
2159  if (I->getType() != CI->getOperand(0)->getType() ||
2160  !((Subtarget->hasSSE1() && RetVT == MVT::f32) ||
2161  (Subtarget->hasSSE2() && RetVT == MVT::f64)))
2162  return false;
2163 
2164  const Value *CmpLHS = CI->getOperand(0);
2165  const Value *CmpRHS = CI->getOperand(1);
2166  CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
2167 
2168  // The optimizer might have replaced fcmp oeq %x, %x with fcmp ord %x, 0.0.
2169  // We don't have to materialize a zero constant for this case and can just use
2170  // %x again on the RHS.
2172  const auto *CmpRHSC = dyn_cast<ConstantFP>(CmpRHS);
2173  if (CmpRHSC && CmpRHSC->isNullValue())
2174  CmpRHS = CmpLHS;
2175  }
2176 
2177  unsigned CC;
2178  bool NeedSwap;
2179  std::tie(CC, NeedSwap) = getX86SSEConditionCode(Predicate);
2180  if (CC > 7 && !Subtarget->hasAVX())
2181  return false;
2182 
2183  if (NeedSwap)
2184  std::swap(CmpLHS, CmpRHS);
2185 
2186  const Value *LHS = I->getOperand(1);
2187  const Value *RHS = I->getOperand(2);
2188 
2189  Register LHSReg = getRegForValue(LHS);
2190  Register RHSReg = getRegForValue(RHS);
2191  Register CmpLHSReg = getRegForValue(CmpLHS);
2192  Register CmpRHSReg = getRegForValue(CmpRHS);
2193  if (!LHSReg || !RHSReg || !CmpLHSReg || !CmpRHSReg)
2194  return false;
2195 
2196  const TargetRegisterClass *RC = TLI.getRegClassFor(RetVT);
2197  unsigned ResultReg;
2198 
2199  if (Subtarget->hasAVX512()) {
2200  // If we have AVX512 we can use a mask compare and masked movss/sd.
2201  const TargetRegisterClass *VR128X = &X86::VR128XRegClass;
2202  const TargetRegisterClass *VK1 = &X86::VK1RegClass;
2203 
2204  unsigned CmpOpcode =
2205  (RetVT == MVT::f32) ? X86::VCMPSSZrr : X86::VCMPSDZrr;
2206  Register CmpReg = fastEmitInst_rri(CmpOpcode, VK1, CmpLHSReg, CmpRHSReg,
2207  CC);
2208 
2209  // Need an IMPLICIT_DEF for the input that is used to generate the upper
2210  // bits of the result register since its not based on any of the inputs.
2211  Register ImplicitDefReg = createResultReg(VR128X);
2212  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
2213  TII.get(TargetOpcode::IMPLICIT_DEF), ImplicitDefReg);
2214 
2215  // Place RHSReg is the passthru of the masked movss/sd operation and put
2216  // LHS in the input. The mask input comes from the compare.
2217  unsigned MovOpcode =
2218  (RetVT == MVT::f32) ? X86::VMOVSSZrrk : X86::VMOVSDZrrk;
2219  unsigned MovReg = fastEmitInst_rrrr(MovOpcode, VR128X, RHSReg, CmpReg,
2220  ImplicitDefReg, LHSReg);
2221 
2222  ResultReg = createResultReg(RC);
2223  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
2224  TII.get(TargetOpcode::COPY), ResultReg).addReg(MovReg);
2225 
2226  } else if (Subtarget->hasAVX()) {
2227  const TargetRegisterClass *VR128 = &X86::VR128RegClass;
2228 
2229  // If we have AVX, create 1 blendv instead of 3 logic instructions.
2230  // Blendv was introduced with SSE 4.1, but the 2 register form implicitly
2231  // uses XMM0 as the selection register. That may need just as many
2232  // instructions as the AND/ANDN/OR sequence due to register moves, so
2233  // don't bother.
2234  unsigned CmpOpcode =
2235  (RetVT == MVT::f32) ? X86::VCMPSSrr : X86::VCMPSDrr;
2236  unsigned BlendOpcode =
2237  (RetVT == MVT::f32) ? X86::VBLENDVPSrr : X86::VBLENDVPDrr;
2238 
2239  Register CmpReg = fastEmitInst_rri(CmpOpcode, RC, CmpLHSReg, CmpRHSReg,
2240  CC);
2241  Register VBlendReg = fastEmitInst_rrr(BlendOpcode, VR128, RHSReg, LHSReg,
2242  CmpReg);
2243  ResultReg = createResultReg(RC);
2244  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
2245  TII.get(TargetOpcode::COPY), ResultReg).addReg(VBlendReg);
2246  } else {
2247  // Choose the SSE instruction sequence based on data type (float or double).
2248  static const uint16_t OpcTable[2][4] = {
2249  { X86::CMPSSrr, X86::ANDPSrr, X86::ANDNPSrr, X86::ORPSrr },
2250  { X86::CMPSDrr, X86::ANDPDrr, X86::ANDNPDrr, X86::ORPDrr }
2251  };
2252 
2253  const uint16_t *Opc = nullptr;
2254  switch (RetVT.SimpleTy) {
2255  default: return false;
2256  case MVT::f32: Opc = &OpcTable[0][0]; break;
2257  case MVT::f64: Opc = &OpcTable[1][0]; break;
2258  }
2259 
2260  const TargetRegisterClass *VR128 = &X86::VR128RegClass;
2261  Register CmpReg = fastEmitInst_rri(Opc[0], RC, CmpLHSReg, CmpRHSReg, CC);
2262  Register AndReg = fastEmitInst_rr(Opc[1], VR128, CmpReg, LHSReg);
2263  Register AndNReg = fastEmitInst_rr(Opc[2], VR128, CmpReg, RHSReg);
2264  Register OrReg = fastEmitInst_rr(Opc[3], VR128, AndNReg, AndReg);
2265  ResultReg = createResultReg(RC);
2266  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
2267  TII.get(TargetOpcode::COPY), ResultReg).addReg(OrReg);
2268  }
2269  updateValueMap(I, ResultReg);
2270  return true;
2271 }
2272 
2273 bool X86FastISel::X86FastEmitPseudoSelect(MVT RetVT, const Instruction *I) {
2274  // These are pseudo CMOV instructions and will be later expanded into control-
2275  // flow.
2276  unsigned Opc;
2277  switch (RetVT.SimpleTy) {
2278  default: return false;
2279  case MVT::i8: Opc = X86::CMOV_GR8; break;
2280  case MVT::i16: Opc = X86::CMOV_GR16; break;
2281  case MVT::i32: Opc = X86::CMOV_GR32; break;
2282  case MVT::f16:
2283  Opc = Subtarget->hasAVX512() ? X86::CMOV_FR16X : X86::CMOV_FR16; break;
2284  case MVT::f32:
2285  Opc = Subtarget->hasAVX512() ? X86::CMOV_FR32X : X86::CMOV_FR32; break;
2286  case MVT::f64:
2287  Opc = Subtarget->hasAVX512() ? X86::CMOV_FR64X : X86::CMOV_FR64; break;
2288  }
2289 
2290  const Value *Cond = I->getOperand(0);
2292 
2293  // Optimize conditions coming from a compare if both instructions are in the
2294  // same basic block (values defined in other basic blocks may not have
2295  // initialized registers).
2296  const auto *CI = dyn_cast<CmpInst>(Cond);
2297  if (CI && (CI->getParent() == I->getParent())) {
2298  bool NeedSwap;
2299  std::tie(CC, NeedSwap) = X86::getX86ConditionCode(CI->getPredicate());
2300  if (CC > X86::LAST_VALID_COND)
2301  return false;
2302 
2303  const Value *CmpLHS = CI->getOperand(0);
2304  const Value *CmpRHS = CI->getOperand(1);
2305 
2306  if (NeedSwap)
2307  std::swap(CmpLHS, CmpRHS);
2308 
2309  EVT CmpVT = TLI.getValueType(DL, CmpLHS->getType());
2310  if (!X86FastEmitCompare(CmpLHS, CmpRHS, CmpVT, CI->getDebugLoc()))
2311  return false;
2312  } else {
2313  Register CondReg = getRegForValue(Cond);
2314  if (CondReg == 0)
2315  return false;
2316 
2317  // In case OpReg is a K register, COPY to a GPR
2318  if (MRI.getRegClass(CondReg) == &X86::VK1RegClass) {
2319  unsigned KCondReg = CondReg;
2320  CondReg = createResultReg(&X86::GR32RegClass);
2321  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
2322  TII.get(TargetOpcode::COPY), CondReg)
2323  .addReg(KCondReg);
2324  CondReg = fastEmitInst_extractsubreg(MVT::i8, CondReg, X86::sub_8bit);
2325  }
2326  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(X86::TEST8ri))
2327  .addReg(CondReg)
2328  .addImm(1);
2329  }
2330 
2331  const Value *LHS = I->getOperand(1);
2332  const Value *RHS = I->getOperand(2);
2333 
2334  Register LHSReg = getRegForValue(LHS);
2335  Register RHSReg = getRegForValue(RHS);
2336  if (!LHSReg || !RHSReg)
2337  return false;
2338 
2339  const TargetRegisterClass *RC = TLI.getRegClassFor(RetVT);
2340 
2341  Register ResultReg =
2342  fastEmitInst_rri(Opc, RC, RHSReg, LHSReg, CC);
2343  updateValueMap(I, ResultReg);
2344  return true;
2345 }
2346 
2347 bool X86FastISel::X86SelectSelect(const Instruction *I) {
2348  MVT RetVT;
2349  if (!isTypeLegal(I->getType(), RetVT))
2350  return false;
2351 
2352  // Check if we can fold the select.
2353  if (const auto *CI = dyn_cast<CmpInst>(I->getOperand(0))) {
2354  CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
2355  const Value *Opnd = nullptr;
2356  switch (Predicate) {
2357  default: break;
2358  case CmpInst::FCMP_FALSE: Opnd = I->getOperand(2); break;
2359  case CmpInst::FCMP_TRUE: Opnd = I->getOperand(1); break;
2360  }
2361  // No need for a select anymore - this is an unconditional move.
2362  if (Opnd) {
2363  Register OpReg = getRegForValue(Opnd);
2364  if (OpReg == 0)
2365  return false;
2366  const TargetRegisterClass *RC = TLI.getRegClassFor(RetVT);
2367  Register ResultReg = createResultReg(RC);
2368  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
2369  TII.get(TargetOpcode::COPY), ResultReg)
2370  .addReg(OpReg);
2371  updateValueMap(I, ResultReg);
2372  return true;
2373  }
2374  }
2375 
2376  // First try to use real conditional move instructions.
2377  if (X86FastEmitCMoveSelect(RetVT, I))
2378  return true;
2379 
2380  // Try to use a sequence of SSE instructions to simulate a conditional move.
2381  if (X86FastEmitSSESelect(RetVT, I))
2382  return true;
2383 
2384  // Fall-back to pseudo conditional move instructions, which will be later
2385  // converted to control-flow.
2386  if (X86FastEmitPseudoSelect(RetVT, I))
2387  return true;
2388 
2389  return false;
2390 }
2391 
2392 // Common code for X86SelectSIToFP and X86SelectUIToFP.
2393 bool X86FastISel::X86SelectIntToFP(const Instruction *I, bool IsSigned) {
2394  // The target-independent selection algorithm in FastISel already knows how
2395  // to select a SINT_TO_FP if the target is SSE but not AVX.
2396  // Early exit if the subtarget doesn't have AVX.
2397  // Unsigned conversion requires avx512.
2398  bool HasAVX512 = Subtarget->hasAVX512();
2399  if (!Subtarget->hasAVX() || (!IsSigned && !HasAVX512))
2400  return false;
2401 
2402  // TODO: We could sign extend narrower types.
2403  MVT SrcVT = TLI.getSimpleValueType(DL, I->getOperand(0)->getType());
2404  if (SrcVT != MVT::i32 && SrcVT != MVT::i64)
2405  return false;
2406 
2407  // Select integer to float/double conversion.
2408  Register OpReg = getRegForValue(I->getOperand(0));
2409  if (OpReg == 0)
2410  return false;
2411 
2412  unsigned Opcode;
2413 
2414  static const uint16_t SCvtOpc[2][2][2] = {
2415  { { X86::VCVTSI2SSrr, X86::VCVTSI642SSrr },
2416  { X86::VCVTSI2SDrr, X86::VCVTSI642SDrr } },
2417  { { X86::VCVTSI2SSZrr, X86::VCVTSI642SSZrr },
2418  { X86::VCVTSI2SDZrr, X86::VCVTSI642SDZrr } },
2419  };
2420  static const uint16_t UCvtOpc[2][2] = {
2421  { X86::VCVTUSI2SSZrr, X86::VCVTUSI642SSZrr },
2422  { X86::VCVTUSI2SDZrr, X86::VCVTUSI642SDZrr },
2423  };
2424  bool Is64Bit = SrcVT == MVT::i64;
2425 
2426  if (I->getType()->isDoubleTy()) {
2427  // s/uitofp int -> double
2428  Opcode = IsSigned ? SCvtOpc[HasAVX512][1][Is64Bit] : UCvtOpc[1][Is64Bit];
2429  } else if (I->getType()->isFloatTy()) {
2430  // s/uitofp int -> float
2431  Opcode = IsSigned ? SCvtOpc[HasAVX512][0][Is64Bit] : UCvtOpc[0][Is64Bit];
2432  } else
2433  return false;
2434 
2435  MVT DstVT = TLI.getValueType(DL, I->getType()).getSimpleVT();
2436  const TargetRegisterClass *RC = TLI.getRegClassFor(DstVT);
2437  Register ImplicitDefReg = createResultReg(RC);
2438  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
2439  TII.get(TargetOpcode::IMPLICIT_DEF), ImplicitDefReg);
2440  Register ResultReg = fastEmitInst_rr(Opcode, RC, ImplicitDefReg, OpReg);
2441  updateValueMap(I, ResultReg);
2442  return true;
2443 }
2444 
2445 bool X86FastISel::X86SelectSIToFP(const Instruction *I) {
2446  return X86SelectIntToFP(I, /*IsSigned*/true);
2447 }
2448 
2449 bool X86FastISel::X86SelectUIToFP(const Instruction *I) {
2450  return X86SelectIntToFP(I, /*IsSigned*/false);
2451 }
2452 
2453 // Helper method used by X86SelectFPExt and X86SelectFPTrunc.
2454 bool X86FastISel::X86SelectFPExtOrFPTrunc(const Instruction *I,
2455  unsigned TargetOpc,
2456  const TargetRegisterClass *RC) {
2457  assert((I->getOpcode() == Instruction::FPExt ||
2458  I->getOpcode() == Instruction::FPTrunc) &&
2459  "Instruction must be an FPExt or FPTrunc!");
2460  bool HasAVX = Subtarget->hasAVX();
2461 
2462  Register OpReg = getRegForValue(I->getOperand(0));
2463  if (OpReg == 0)
2464  return false;
2465 
2466  unsigned ImplicitDefReg;
2467  if (HasAVX) {
2468  ImplicitDefReg = createResultReg(RC);
2469  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
2470  TII.get(TargetOpcode::IMPLICIT_DEF), ImplicitDefReg);
2471 
2472  }
2473 
2474  Register ResultReg = createResultReg(RC);
2475  MachineInstrBuilder MIB;
2476  MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(TargetOpc),
2477  ResultReg);
2478 
2479  if (HasAVX)
2480  MIB.addReg(ImplicitDefReg);
2481 
2482  MIB.addReg(OpReg);
2483  updateValueMap(I, ResultReg);
2484  return true;
2485 }
2486 
2487 bool X86FastISel::X86SelectFPExt(const Instruction *I) {
2488  if (Subtarget->hasSSE2() && I->getType()->isDoubleTy() &&
2489  I->getOperand(0)->getType()->isFloatTy()) {
2490  bool HasAVX512 = Subtarget->hasAVX512();
2491  // fpext from float to double.
2492  unsigned Opc =
2493  HasAVX512 ? X86::VCVTSS2SDZrr
2494  : Subtarget->hasAVX() ? X86::VCVTSS2SDrr : X86::CVTSS2SDrr;
2495  return X86SelectFPExtOrFPTrunc(I, Opc, TLI.getRegClassFor(MVT::f64));
2496  }
2497 
2498  return false;
2499 }
2500 
2501 bool X86FastISel::X86SelectFPTrunc(const Instruction *I) {
2502  if (Subtarget->hasSSE2() && I->getType()->isFloatTy() &&
2503  I->getOperand(0)->getType()->isDoubleTy()) {
2504  bool HasAVX512 = Subtarget->hasAVX512();
2505  // fptrunc from double to float.
2506  unsigned Opc =
2507  HasAVX512 ? X86::VCVTSD2SSZrr
2508  : Subtarget->hasAVX() ? X86::VCVTSD2SSrr : X86::CVTSD2SSrr;
2509  return X86SelectFPExtOrFPTrunc(I, Opc, TLI.getRegClassFor(MVT::f32));
2510  }
2511 
2512  return false;
2513 }
2514 
2515 bool X86FastISel::X86SelectTrunc(const Instruction *I) {
2516  EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType());
2517  EVT DstVT = TLI.getValueType(DL, I->getType());
2518 
2519  // This code only handles truncation to byte.
2520  if (DstVT != MVT::i8 && DstVT != MVT::i1)
2521  return false;
2522  if (!TLI.isTypeLegal(SrcVT))
2523  return false;
2524 
2525  Register InputReg = getRegForValue(I->getOperand(0));
2526  if (!InputReg)
2527  // Unhandled operand. Halt "fast" selection and bail.
2528  return false;
2529 
2530  if (SrcVT == MVT::i8) {
2531  // Truncate from i8 to i1; no code needed.
2532  updateValueMap(I, InputReg);
2533  return true;
2534  }
2535 
2536  // Issue an extract_subreg.
2537  Register ResultReg = fastEmitInst_extractsubreg(MVT::i8, InputReg,
2538  X86::sub_8bit);
2539  if (!ResultReg)
2540  return false;
2541 
2542  updateValueMap(I, ResultReg);
2543  return true;
2544 }
2545 
2546 bool X86FastISel::IsMemcpySmall(uint64_t Len) {
2547  return Len <= (Subtarget->is64Bit() ? 32 : 16);
2548 }
2549 
2550 bool X86FastISel::TryEmitSmallMemcpy(X86AddressMode DestAM,
2551  X86AddressMode SrcAM, uint64_t Len) {
2552 
2553  // Make sure we don't bloat code by inlining very large memcpy's.
2554  if (!IsMemcpySmall(Len))
2555  return false;
2556 
2557  bool i64Legal = Subtarget->is64Bit();
2558 
2559  // We don't care about alignment here since we just emit integer accesses.
2560  while (Len) {
2561  MVT VT;
2562  if (Len >= 8 && i64Legal)
2563  VT = MVT::i64;
2564  else if (Len >= 4)
2565  VT = MVT::i32;
2566  else if (Len >= 2)
2567  VT = MVT::i16;
2568  else
2569  VT = MVT::i8;
2570 
2571  unsigned Reg;
2572  bool RV = X86FastEmitLoad(VT, SrcAM, nullptr, Reg);
2573  RV &= X86FastEmitStore(VT, Reg, DestAM);
2574  assert(RV && "Failed to emit load or store??");
2575  (void)RV;
2576 
2577  unsigned Size = VT.getSizeInBits()/8;
2578  Len -= Size;
2579  DestAM.Disp += Size;
2580  SrcAM.Disp += Size;
2581  }
2582 
2583  return true;
2584 }
2585 
2586 bool X86FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
2587  // FIXME: Handle more intrinsics.
2588  switch (II->getIntrinsicID()) {
2589  default: return false;
2590  case Intrinsic::convert_from_fp16:
2591  case Intrinsic::convert_to_fp16: {
2592  if (Subtarget->useSoftFloat() || !Subtarget->hasF16C())
2593  return false;
2594 
2595  const Value *Op = II->getArgOperand(0);
2596  Register InputReg = getRegForValue(Op);
2597  if (InputReg == 0)
2598  return false;
2599 
2600  // F16C only allows converting from float to half and from half to float.
2601  bool IsFloatToHalf = II->getIntrinsicID() == Intrinsic::convert_to_fp16;
2602  if (IsFloatToHalf) {
2603  if (!Op->getType()->isFloatTy())
2604  return false;
2605  } else {
2606  if (!II->getType()->isFloatTy())
2607  return false;
2608  }
2609 
2610  unsigned ResultReg = 0;
2611  const TargetRegisterClass *RC = TLI.getRegClassFor(MVT::v8i16);
2612  if (IsFloatToHalf) {
2613  // 'InputReg' is implicitly promoted from register class FR32 to
2614  // register class VR128 by method 'constrainOperandRegClass' which is
2615  // directly called by 'fastEmitInst_ri'.
2616  // Instruction VCVTPS2PHrr takes an extra immediate operand which is
2617  // used to provide rounding control: use MXCSR.RC, encoded as 0b100.
2618  // It's consistent with the other FP instructions, which are usually
2619  // controlled by MXCSR.
2620  unsigned Opc = Subtarget->hasVLX() ? X86::VCVTPS2PHZ128rr
2621  : X86::VCVTPS2PHrr;
2622  InputReg = fastEmitInst_ri(Opc, RC, InputReg, 4);
2623 
2624  // Move the lower 32-bits of ResultReg to another register of class GR32.
2625  Opc = Subtarget->hasAVX512() ? X86::VMOVPDI2DIZrr
2626  : X86::VMOVPDI2DIrr;
2627  ResultReg = createResultReg(&X86::GR32RegClass);
2628  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc), ResultReg)
2629  .addReg(InputReg, RegState::Kill);
2630 
2631  // The result value is in the lower 16-bits of ResultReg.
2632  unsigned RegIdx = X86::sub_16bit;
2633  ResultReg = fastEmitInst_extractsubreg(MVT::i16, ResultReg, RegIdx);
2634  } else {
2635  assert(Op->getType()->isIntegerTy(16) && "Expected a 16-bit integer!");
2636  // Explicitly zero-extend the input to 32-bit.
2637  InputReg = fastEmit_r(MVT::i16, MVT::i32, ISD::ZERO_EXTEND, InputReg);
2638 
2639  // The following SCALAR_TO_VECTOR will be expanded into a VMOVDI2PDIrr.
2640  InputReg = fastEmit_r(MVT::i32, MVT::v4i32, ISD::SCALAR_TO_VECTOR,
2641  InputReg);
2642 
2643  unsigned Opc = Subtarget->hasVLX() ? X86::VCVTPH2PSZ128rr
2644  : X86::VCVTPH2PSrr;
2645  InputReg = fastEmitInst_r(Opc, RC, InputReg);
2646 
2647  // The result value is in the lower 32-bits of ResultReg.
2648  // Emit an explicit copy from register class VR128 to register class FR32.
2649  ResultReg = createResultReg(TLI.getRegClassFor(MVT::f32));
2650  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
2651  TII.get(TargetOpcode::COPY), ResultReg)
2652  .addReg(InputReg, RegState::Kill);
2653  }
2654 
2655  updateValueMap(II, ResultReg);
2656  return true;
2657  }
2658  case Intrinsic::frameaddress: {
2659  MachineFunction *MF = FuncInfo.MF;
2660  if (MF->getTarget().getMCAsmInfo()->usesWindowsCFI())
2661  return false;
2662 
2663  Type *RetTy = II->getCalledFunction()->getReturnType();
2664 
2665  MVT VT;
2666  if (!isTypeLegal(RetTy, VT))
2667  return false;
2668 
2669  unsigned Opc;
2670  const TargetRegisterClass *RC = nullptr;
2671 
2672  switch (VT.SimpleTy) {
2673  default: llvm_unreachable("Invalid result type for frameaddress.");
2674  case MVT::i32: Opc = X86::MOV32rm; RC = &X86::GR32RegClass; break;
2675  case MVT::i64: Opc = X86::MOV64rm; RC = &X86::GR64RegClass; break;
2676  }
2677 
2678  // This needs to be set before we call getPtrSizedFrameRegister, otherwise
2679  // we get the wrong frame register.
2680  MachineFrameInfo &MFI = MF->getFrameInfo();
2681  MFI.setFrameAddressIsTaken(true);
2682 
2683  const X86RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
2684  unsigned FrameReg = RegInfo->getPtrSizedFrameRegister(*MF);
2685  assert(((FrameReg == X86::RBP && VT == MVT::i64) ||
2686  (FrameReg == X86::EBP && VT == MVT::i32)) &&
2687  "Invalid Frame Register!");
2688 
2689  // Always make a copy of the frame register to a vreg first, so that we
2690  // never directly reference the frame register (the TwoAddressInstruction-
2691  // Pass doesn't like that).
2692  Register SrcReg = createResultReg(RC);
2693  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
2694  TII.get(TargetOpcode::COPY), SrcReg).addReg(FrameReg);
2695 
2696  // Now recursively load from the frame address.
2697  // movq (%rbp), %rax
2698  // movq (%rax), %rax
2699  // movq (%rax), %rax
2700  // ...
2701  unsigned Depth = cast<ConstantInt>(II->getOperand(0))->getZExtValue();
2702  while (Depth--) {
2703  Register DestReg = createResultReg(RC);
2704  addDirectMem(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
2705  TII.get(Opc), DestReg), SrcReg);
2706  SrcReg = DestReg;
2707  }
2708 
2709  updateValueMap(II, SrcReg);
2710  return true;
2711  }
2712  case Intrinsic::memcpy: {
2713  const MemCpyInst *MCI = cast<MemCpyInst>(II);
2714  // Don't handle volatile or variable length memcpys.
2715  if (MCI->isVolatile())
2716  return false;
2717 
2718  if (isa<ConstantInt>(MCI->getLength())) {
2719  // Small memcpy's are common enough that we want to do them
2720  // without a call if possible.
2721  uint64_t Len = cast<ConstantInt>(MCI->getLength())->getZExtValue();
2722  if (IsMemcpySmall(Len)) {
2723  X86AddressMode DestAM, SrcAM;
2724  if (!X86SelectAddress(MCI->getRawDest(), DestAM) ||
2725  !X86SelectAddress(MCI->getRawSource(), SrcAM))
2726  return false;
2727  TryEmitSmallMemcpy(DestAM, SrcAM, Len);
2728  return true;
2729  }
2730  }
2731 
2732  unsigned SizeWidth = Subtarget->is64Bit() ? 64 : 32;
2733  if (!MCI->getLength()->getType()->isIntegerTy(SizeWidth))
2734  return false;
2735 
2736  if (MCI->getSourceAddressSpace() > 255 || MCI->getDestAddressSpace() > 255)
2737  return false;
2738 
2739  return lowerCallTo(II, "memcpy", II->arg_size() - 1);
2740  }
2741  case Intrinsic::memset: {
2742  const MemSetInst *MSI = cast<MemSetInst>(II);
2743 
2744  if (MSI->isVolatile())
2745  return false;
2746 
2747  unsigned SizeWidth = Subtarget->is64Bit() ? 64 : 32;
2748  if (!MSI->getLength()->getType()->isIntegerTy(SizeWidth))
2749  return false;
2750 
2751  if (MSI->getDestAddressSpace() > 255)
2752  return false;
2753 
2754  return lowerCallTo(II, "memset", II->arg_size() - 1);
2755  }
2756  case Intrinsic::stackprotector: {
2757  // Emit code to store the stack guard onto the stack.
2758  EVT PtrTy = TLI.getPointerTy(DL);
2759 
2760  const Value *Op1 = II->getArgOperand(0); // The guard's value.
2761  const AllocaInst *Slot = cast<AllocaInst>(II->getArgOperand(1));
2762 
2763  MFI.setStackProtectorIndex(FuncInfo.StaticAllocaMap[Slot]);
2764 
2765  // Grab the frame index.
2766  X86AddressMode AM;
2767  if (!X86SelectAddress(Slot, AM)) return false;
2768  if (!X86FastEmitStore(PtrTy, Op1, AM)) return false;
2769  return true;
2770  }
2771  case Intrinsic::dbg_declare: {
2772  const DbgDeclareInst *DI = cast<DbgDeclareInst>(II);
2773  X86AddressMode AM;
2774  assert(DI->getAddress() && "Null address should be checked earlier!");
2775  if (!X86SelectAddress(DI->getAddress(), AM))
2776  return false;
2777  const MCInstrDesc &II = TII.get(TargetOpcode::DBG_VALUE);
2778  assert(DI->getVariable()->isValidLocationForIntrinsic(MIMD.getDL()) &&
2779  "Expected inlined-at fields to agree");
2780  addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II), AM)
2781  .addImm(0)
2782  .addMetadata(DI->getVariable())
2783  .addMetadata(DI->getExpression());
2784  return true;
2785  }
2786  case Intrinsic::trap: {
2787  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(X86::TRAP));
2788  return true;
2789  }
2790  case Intrinsic::sqrt: {
2791  if (!Subtarget->hasSSE1())
2792  return false;
2793 
2794  Type *RetTy = II->getCalledFunction()->getReturnType();
2795 
2796  MVT VT;
2797  if (!isTypeLegal(RetTy, VT))
2798  return false;
2799 
2800  // Unfortunately we can't use fastEmit_r, because the AVX version of FSQRT
2801  // is not generated by FastISel yet.
2802  // FIXME: Update this code once tablegen can handle it.
2803  static const uint16_t SqrtOpc[3][2] = {
2804  { X86::SQRTSSr, X86::SQRTSDr },
2805  { X86::VSQRTSSr, X86::VSQRTSDr },
2806  { X86::VSQRTSSZr, X86::VSQRTSDZr },
2807  };
2808  unsigned AVXLevel = Subtarget->hasAVX512() ? 2 :
2809  Subtarget->hasAVX() ? 1 :
2810  0;
2811  unsigned Opc;
2812  switch (VT.SimpleTy) {
2813  default: return false;
2814  case MVT::f32: Opc = SqrtOpc[AVXLevel][0]; break;
2815  case MVT::f64: Opc = SqrtOpc[AVXLevel][1]; break;
2816  }
2817 
2818  const Value *SrcVal = II->getArgOperand(0);
2819  Register SrcReg = getRegForValue(SrcVal);
2820 
2821  if (SrcReg == 0)
2822  return false;
2823 
2824  const TargetRegisterClass *RC = TLI.getRegClassFor(VT);
2825  unsigned ImplicitDefReg = 0;
2826  if (AVXLevel > 0) {
2827  ImplicitDefReg = createResultReg(RC);
2828  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
2829  TII.get(TargetOpcode::IMPLICIT_DEF), ImplicitDefReg);
2830  }
2831 
2832  Register ResultReg = createResultReg(RC);
2833  MachineInstrBuilder MIB;
2834  MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc),
2835  ResultReg);
2836 
2837  if (ImplicitDefReg)
2838  MIB.addReg(ImplicitDefReg);
2839 
2840  MIB.addReg(SrcReg);
2841 
2842  updateValueMap(II, ResultReg);
2843  return true;
2844  }
2845  case Intrinsic::sadd_with_overflow:
2846  case Intrinsic::uadd_with_overflow:
2847  case Intrinsic::ssub_with_overflow:
2848  case Intrinsic::usub_with_overflow:
2849  case Intrinsic::smul_with_overflow:
2850  case Intrinsic::umul_with_overflow: {
2851  // This implements the basic lowering of the xalu with overflow intrinsics
2852  // into add/sub/mul followed by either seto or setb.
2853  const Function *Callee = II->getCalledFunction();
2854  auto *Ty = cast<StructType>(Callee->getReturnType());
2855  Type *RetTy = Ty->getTypeAtIndex(0U);
2856  assert(Ty->getTypeAtIndex(1)->isIntegerTy() &&
2857  Ty->getTypeAtIndex(1)->getScalarSizeInBits() == 1 &&
2858  "Overflow value expected to be an i1");
2859 
2860  MVT VT;
2861  if (!isTypeLegal(RetTy, VT))
2862  return false;
2863 
2864  if (VT < MVT::i8 || VT > MVT::i64)
2865  return false;
2866 
2867  const Value *LHS = II->getArgOperand(0);
2868  const Value *RHS = II->getArgOperand(1);
2869 
2870  // Canonicalize immediate to the RHS.
2871  if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS) && II->isCommutative())
2872  std::swap(LHS, RHS);
2873 
2874  unsigned BaseOpc, CondCode;
2875  switch (II->getIntrinsicID()) {
2876  default: llvm_unreachable("Unexpected intrinsic!");
2877  case Intrinsic::sadd_with_overflow:
2878  BaseOpc = ISD::ADD; CondCode = X86::COND_O; break;
2879  case Intrinsic::uadd_with_overflow:
2880  BaseOpc = ISD::ADD; CondCode = X86::COND_B; break;
2881  case Intrinsic::ssub_with_overflow:
2882  BaseOpc = ISD::SUB; CondCode = X86::COND_O; break;
2883  case Intrinsic::usub_with_overflow:
2884  BaseOpc = ISD::SUB; CondCode = X86::COND_B; break;
2885  case Intrinsic::smul_with_overflow:
2886  BaseOpc = X86ISD::SMUL; CondCode = X86::COND_O; break;
2887  case Intrinsic::umul_with_overflow:
2888  BaseOpc = X86ISD::UMUL; CondCode = X86::COND_O; break;
2889  }
2890 
2891  Register LHSReg = getRegForValue(LHS);
2892  if (LHSReg == 0)
2893  return false;
2894 
2895  unsigned ResultReg = 0;
2896  // Check if we have an immediate version.
2897  if (const auto *CI = dyn_cast<ConstantInt>(RHS)) {
2898  static const uint16_t Opc[2][4] = {
2899  { X86::INC8r, X86::INC16r, X86::INC32r, X86::INC64r },
2900  { X86::DEC8r, X86::DEC16r, X86::DEC32r, X86::DEC64r }
2901  };
2902 
2903  if (CI->isOne() && (BaseOpc == ISD::ADD || BaseOpc == ISD::SUB) &&
2904  CondCode == X86::COND_O) {
2905  // We can use INC/DEC.
2906  ResultReg = createResultReg(TLI.getRegClassFor(VT));
2907  bool IsDec = BaseOpc == ISD::SUB;
2908  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
2909  TII.get(Opc[IsDec][VT.SimpleTy-MVT::i8]), ResultReg)
2910  .addReg(LHSReg);
2911  } else
2912  ResultReg = fastEmit_ri(VT, VT, BaseOpc, LHSReg, CI->getZExtValue());
2913  }
2914 
2915  unsigned RHSReg;
2916  if (!ResultReg) {
2917  RHSReg = getRegForValue(RHS);
2918  if (RHSReg == 0)
2919  return false;
2920  ResultReg = fastEmit_rr(VT, VT, BaseOpc, LHSReg, RHSReg);
2921  }
2922 
2923  // FastISel doesn't have a pattern for all X86::MUL*r and X86::IMUL*r. Emit
2924  // it manually.
2925  if (BaseOpc == X86ISD::UMUL && !ResultReg) {
2926  static const uint16_t MULOpc[] =
2927  { X86::MUL8r, X86::MUL16r, X86::MUL32r, X86::MUL64r };
2928  static const MCPhysReg Reg[] = { X86::AL, X86::AX, X86::EAX, X86::RAX };
2929  // First copy the first operand into RAX, which is an implicit input to
2930  // the X86::MUL*r instruction.
2931  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
2932  TII.get(TargetOpcode::COPY), Reg[VT.SimpleTy-MVT::i8])
2933  .addReg(LHSReg);
2934  ResultReg = fastEmitInst_r(MULOpc[VT.SimpleTy-MVT::i8],
2935  TLI.getRegClassFor(VT), RHSReg);
2936  } else if (BaseOpc == X86ISD::SMUL && !ResultReg) {
2937  static const uint16_t MULOpc[] =
2938  { X86::IMUL8r, X86::IMUL16rr, X86::IMUL32rr, X86::IMUL64rr };
2939  if (VT == MVT::i8) {
2940  // Copy the first operand into AL, which is an implicit input to the
2941  // X86::IMUL8r instruction.
2942  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
2943  TII.get(TargetOpcode::COPY), X86::AL)
2944  .addReg(LHSReg);
2945  ResultReg = fastEmitInst_r(MULOpc[0], TLI.getRegClassFor(VT), RHSReg);
2946  } else
2947  ResultReg = fastEmitInst_rr(MULOpc[VT.SimpleTy-MVT::i8],
2948  TLI.getRegClassFor(VT), LHSReg, RHSReg);
2949  }
2950 
2951  if (!ResultReg)
2952  return false;
2953 
2954  // Assign to a GPR since the overflow return value is lowered to a SETcc.
2955  Register ResultReg2 = createResultReg(&X86::GR8RegClass);
2956  assert((ResultReg+1) == ResultReg2 && "Nonconsecutive result registers.");
2957  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(X86::SETCCr),
2958  ResultReg2).addImm(CondCode);
2959 
2960  updateValueMap(II, ResultReg, 2);
2961  return true;
2962  }
2963  case Intrinsic::x86_sse_cvttss2si:
2964  case Intrinsic::x86_sse_cvttss2si64:
2965  case Intrinsic::x86_sse2_cvttsd2si:
2966  case Intrinsic::x86_sse2_cvttsd2si64: {
2967  bool IsInputDouble;
2968  switch (II->getIntrinsicID()) {
2969  default: llvm_unreachable("Unexpected intrinsic.");
2970  case Intrinsic::x86_sse_cvttss2si:
2971  case Intrinsic::x86_sse_cvttss2si64:
2972  if (!Subtarget->hasSSE1())
2973  return false;
2974  IsInputDouble = false;
2975  break;
2976  case Intrinsic::x86_sse2_cvttsd2si:
2977  case Intrinsic::x86_sse2_cvttsd2si64:
2978  if (!Subtarget->hasSSE2())
2979  return false;
2980  IsInputDouble = true;
2981  break;
2982  }
2983 
2984  Type *RetTy = II->getCalledFunction()->getReturnType();
2985  MVT VT;
2986  if (!isTypeLegal(RetTy, VT))
2987  return false;
2988 
2989  static const uint16_t CvtOpc[3][2][2] = {
2990  { { X86::CVTTSS2SIrr, X86::CVTTSS2SI64rr },
2991  { X86::CVTTSD2SIrr, X86::CVTTSD2SI64rr } },
2992  { { X86::VCVTTSS2SIrr, X86::VCVTTSS2SI64rr },
2993  { X86::VCVTTSD2SIrr, X86::VCVTTSD2SI64rr } },
2994  { { X86::VCVTTSS2SIZrr, X86::VCVTTSS2SI64Zrr },
2995  { X86::VCVTTSD2SIZrr, X86::VCVTTSD2SI64Zrr } },
2996  };
2997  unsigned AVXLevel = Subtarget->hasAVX512() ? 2 :
2998  Subtarget->hasAVX() ? 1 :
2999  0;
3000  unsigned Opc;
3001  switch (VT.SimpleTy) {
3002  default: llvm_unreachable("Unexpected result type.");
3003  case MVT::i32: Opc = CvtOpc[AVXLevel][IsInputDouble][0]; break;
3004  case MVT::i64: Opc = CvtOpc[AVXLevel][IsInputDouble][1]; break;
3005  }
3006 
3007  // Check if we can fold insertelement instructions into the convert.
3008  const Value *Op = II->getArgOperand(0);
3009  while (auto *IE = dyn_cast<InsertElementInst>(Op)) {
3010  const Value *Index = IE->getOperand(2);
3011  if (!isa<ConstantInt>(Index))
3012  break;
3013  unsigned Idx = cast<ConstantInt>(Index)->getZExtValue();
3014 
3015  if (Idx == 0) {
3016  Op = IE->getOperand(1);
3017  break;
3018  }
3019  Op = IE->getOperand(0);
3020  }
3021 
3022  Register Reg = getRegForValue(Op);
3023  if (Reg == 0)
3024  return false;
3025 
3026  Register ResultReg = createResultReg(TLI.getRegClassFor(VT));
3027  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc), ResultReg)
3028  .addReg(Reg);
3029 
3030  updateValueMap(II, ResultReg);
3031  return true;
3032  }
3033  }
3034 }
3035 
3036 bool X86FastISel::fastLowerArguments() {
3037  if (!FuncInfo.CanLowerReturn)
3038  return false;
3039 
3040  const Function *F = FuncInfo.Fn;
3041  if (F->isVarArg())
3042  return false;
3043 
3044  CallingConv::ID CC = F->getCallingConv();
3045  if (CC != CallingConv::C)
3046  return false;
3047 
3048  if (Subtarget->isCallingConvWin64(CC))
3049  return false;
3050 
3051  if (!Subtarget->is64Bit())
3052  return false;
3053 
3054  if (Subtarget->useSoftFloat())
3055  return false;
3056 
3057  // Only handle simple cases. i.e. Up to 6 i32/i64 scalar arguments.
3058  unsigned GPRCnt = 0;
3059  unsigned FPRCnt = 0;
3060  for (auto const &Arg : F->args()) {
3061  if (Arg.hasAttribute(Attribute::ByVal) ||
3062  Arg.hasAttribute(Attribute::InReg) ||
3063  Arg.hasAttribute(Attribute::StructRet) ||
3064  Arg.hasAttribute(Attribute::SwiftSelf) ||
3065  Arg.hasAttribute(Attribute::SwiftAsync) ||
3066  Arg.hasAttribute(Attribute::SwiftError) ||
3067  Arg.hasAttribute(Attribute::Nest))
3068  return false;
3069 
3070  Type *ArgTy = Arg.getType();
3071  if (ArgTy->isStructTy() || ArgTy->isArrayTy() || ArgTy->isVectorTy())
3072  return false;
3073 
3074  EVT ArgVT = TLI.getValueType(DL, ArgTy);
3075  if (!ArgVT.isSimple()) return false;
3076  switch (ArgVT.getSimpleVT().SimpleTy) {
3077  default: return false;
3078  case MVT::i32:
3079  case MVT::i64:
3080  ++GPRCnt;
3081  break;
3082  case MVT::f32:
3083  case MVT::f64:
3084  if (!Subtarget->hasSSE1())
3085  return false;
3086  ++FPRCnt;
3087  break;
3088  }
3089 
3090  if (GPRCnt > 6)
3091  return false;
3092 
3093  if (FPRCnt > 8)
3094  return false;
3095  }
3096 
3097  static const MCPhysReg GPR32ArgRegs[] = {
3098  X86::EDI, X86::ESI, X86::EDX, X86::ECX, X86::R8D, X86::R9D
3099  };
3100  static const MCPhysReg GPR64ArgRegs[] = {
3101  X86::RDI, X86::RSI, X86::RDX, X86::RCX, X86::R8 , X86::R9
3102  };
3103  static const MCPhysReg XMMArgRegs[] = {
3104  X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
3105  X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
3106  };
3107 
3108  unsigned GPRIdx = 0;
3109  unsigned FPRIdx = 0;
3110  for (auto const &Arg : F->args()) {
3111  MVT VT = TLI.getSimpleValueType(DL, Arg.getType());
3112  const TargetRegisterClass *RC = TLI.getRegClassFor(VT);
3113  unsigned SrcReg;
3114  switch (VT.SimpleTy) {
3115  default: llvm_unreachable("Unexpected value type.");
3116  case MVT::i32: SrcReg = GPR32ArgRegs[GPRIdx++]; break;
3117  case MVT::i64: SrcReg = GPR64ArgRegs[GPRIdx++]; break;
3118  case MVT::f32: [[fallthrough]];
3119  case MVT::f64: SrcReg = XMMArgRegs[FPRIdx++]; break;
3120  }
3121  Register DstReg = FuncInfo.MF->addLiveIn(SrcReg, RC);
3122  // FIXME: Unfortunately it's necessary to emit a copy from the livein copy.
3123  // Without this, EmitLiveInCopies may eliminate the livein if its only
3124  // use is a bitcast (which isn't turned into an instruction).
3125  Register ResultReg = createResultReg(RC);
3126  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3127  TII.get(TargetOpcode::COPY), ResultReg)
3128  .addReg(DstReg, getKillRegState(true));
3129  updateValueMap(&Arg, ResultReg);
3130  }
3131  return true;
3132 }
3133 
3134 static unsigned computeBytesPoppedByCalleeForSRet(const X86Subtarget *Subtarget,
3136  const CallBase *CB) {
3137  if (Subtarget->is64Bit())
3138  return 0;
3139  if (Subtarget->getTargetTriple().isOSMSVCRT())
3140  return 0;
3141  if (CC == CallingConv::Fast || CC == CallingConv::GHC ||
3144  return 0;
3145 
3146  if (CB)
3147  if (CB->arg_empty() || !CB->paramHasAttr(0, Attribute::StructRet) ||
3148  CB->paramHasAttr(0, Attribute::InReg) || Subtarget->isTargetMCU())
3149  return 0;
3150 
3151  return 4;
3152 }
3153 
3154 bool X86FastISel::fastLowerCall(CallLoweringInfo &CLI) {
3155  auto &OutVals = CLI.OutVals;
3156  auto &OutFlags = CLI.OutFlags;
3157  auto &OutRegs = CLI.OutRegs;
3158  auto &Ins = CLI.Ins;
3159  auto &InRegs = CLI.InRegs;
3160  CallingConv::ID CC = CLI.CallConv;
3161  bool &IsTailCall = CLI.IsTailCall;
3162  bool IsVarArg = CLI.IsVarArg;
3163  const Value *Callee = CLI.Callee;
3164  MCSymbol *Symbol = CLI.Symbol;
3165  const auto *CB = CLI.CB;
3166 
3167  bool Is64Bit = Subtarget->is64Bit();
3168  bool IsWin64 = Subtarget->isCallingConvWin64(CC);
3169 
3170  // Call / invoke instructions with NoCfCheck attribute require special
3171  // handling.
3172  if (CB && CB->doesNoCfCheck())
3173  return false;
3174 
3175  // Functions with no_caller_saved_registers that need special handling.
3176  if ((CB && isa<CallInst>(CB) && CB->hasFnAttr("no_caller_saved_registers")))
3177  return false;
3178 
3179  // Functions with no_callee_saved_registers that need special handling.
3180  if ((CB && CB->hasFnAttr("no_callee_saved_registers")))
3181  return false;
3182 
3183  // Indirect calls with CFI checks need special handling.
3184  if (CB && CB->isIndirectCall() && CB->getOperandBundle(LLVMContext::OB_kcfi))
3185  return false;
3186 
3187  // Functions using thunks for indirect calls need to use SDISel.
3188  if (Subtarget->useIndirectThunkCalls())
3189  return false;
3190 
3191  // Handle only C, fastcc, and webkit_js calling conventions for now.
3192  switch (CC) {
3193  default: return false;
3194  case CallingConv::C:
3195  case CallingConv::Fast:
3196  case CallingConv::Tail:
3198  case CallingConv::Swift:
3203  case CallingConv::Win64:
3206  break;
3207  }
3208 
3209  // Allow SelectionDAG isel to handle tail calls.
3210  if (IsTailCall)
3211  return false;
3212 
3213  // fastcc with -tailcallopt is intended to provide a guaranteed
3214  // tail call optimization. Fastisel doesn't know how to do that.
3215  if ((CC == CallingConv::Fast && TM.Options.GuaranteedTailCallOpt) ||
3217  return false;
3218 
3219  // Don't know how to handle Win64 varargs yet. Nothing special needed for
3220  // x86-32. Special handling for x86-64 is implemented.
3221  if (IsVarArg && IsWin64)
3222  return false;
3223 
3224  // Don't know about inalloca yet.
3225  if (CLI.CB && CLI.CB->hasInAllocaArgument())
3226  return false;
3227 
3228  for (auto Flag : CLI.OutFlags)
3229  if (Flag.isSwiftError() || Flag.isPreallocated())
3230  return false;
3231 
3232  SmallVector<MVT, 16> OutVTs;
3233  SmallVector<unsigned, 16> ArgRegs;
3234 
3235  // If this is a constant i1/i8/i16 argument, promote to i32 to avoid an extra
3236  // instruction. This is safe because it is common to all FastISel supported
3237  // calling conventions on x86.
3238  for (int i = 0, e = OutVals.size(); i != e; ++i) {
3239  Value *&Val = OutVals[i];
3240  ISD::ArgFlagsTy Flags = OutFlags[i];
3241  if (auto *CI = dyn_cast<ConstantInt>(Val)) {
3242  if (CI->getBitWidth() < 32) {
3243  if (Flags.isSExt())
3245  else
3247  }
3248  }
3249 
3250  // Passing bools around ends up doing a trunc to i1 and passing it.
3251  // Codegen this as an argument + "and 1".
3252  MVT VT;
3253  auto *TI = dyn_cast<TruncInst>(Val);
3254  unsigned ResultReg;
3255  if (TI && TI->getType()->isIntegerTy(1) && CLI.CB &&
3256  (TI->getParent() == CLI.CB->getParent()) && TI->hasOneUse()) {
3257  Value *PrevVal = TI->getOperand(0);
3258  ResultReg = getRegForValue(PrevVal);
3259 
3260  if (!ResultReg)
3261  return false;
3262 
3263  if (!isTypeLegal(PrevVal->getType(), VT))
3264  return false;
3265 
3266  ResultReg = fastEmit_ri(VT, VT, ISD::AND, ResultReg, 1);
3267  } else {
3268  if (!isTypeLegal(Val->getType(), VT) ||
3269  (VT.isVector() && VT.getVectorElementType() == MVT::i1))
3270  return false;
3271  ResultReg = getRegForValue(Val);
3272  }
3273 
3274  if (!ResultReg)
3275  return false;
3276 
3277  ArgRegs.push_back(ResultReg);
3278  OutVTs.push_back(VT);
3279  }
3280 
3281  // Analyze operands of the call, assigning locations to each operand.
3283  CCState CCInfo(CC, IsVarArg, *FuncInfo.MF, ArgLocs, CLI.RetTy->getContext());
3284 
3285  // Allocate shadow area for Win64
3286  if (IsWin64)
3287  CCInfo.AllocateStack(32, Align(8));
3288 
3289  CCInfo.AnalyzeCallOperands(OutVTs, OutFlags, CC_X86);
3290 
3291  // Get a count of how many bytes are to be pushed on the stack.
3292  unsigned NumBytes = CCInfo.getAlignedCallFrameSize();
3293 
3294  // Issue CALLSEQ_START
3295  unsigned AdjStackDown = TII.getCallFrameSetupOpcode();
3296  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AdjStackDown))
3297  .addImm(NumBytes).addImm(0).addImm(0);
3298 
3299  // Walk the register/memloc assignments, inserting copies/loads.
3300  const X86RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
3301  for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
3302  CCValAssign const &VA = ArgLocs[i];
3303  const Value *ArgVal = OutVals[VA.getValNo()];
3304  MVT ArgVT = OutVTs[VA.getValNo()];
3305 
3306  if (ArgVT == MVT::x86mmx)
3307  return false;
3308 
3309  unsigned ArgReg = ArgRegs[VA.getValNo()];
3310 
3311  // Promote the value if needed.
3312  switch (VA.getLocInfo()) {
3313  case CCValAssign::Full: break;
3314  case CCValAssign::SExt: {
3315  assert(VA.getLocVT().isInteger() && !VA.getLocVT().isVector() &&
3316  "Unexpected extend");
3317 
3318  if (ArgVT == MVT::i1)
3319  return false;
3320 
3321  bool Emitted = X86FastEmitExtend(ISD::SIGN_EXTEND, VA.getLocVT(), ArgReg,
3322  ArgVT, ArgReg);
3323  assert(Emitted && "Failed to emit a sext!"); (void)Emitted;
3324  ArgVT = VA.getLocVT();
3325  break;
3326  }
3327  case CCValAssign::ZExt: {
3328  assert(VA.getLocVT().isInteger() && !VA.getLocVT().isVector() &&
3329  "Unexpected extend");
3330 
3331  // Handle zero-extension from i1 to i8, which is common.
3332  if (ArgVT == MVT::i1) {
3333  // Set the high bits to zero.
3334  ArgReg = fastEmitZExtFromI1(MVT::i8, ArgReg);
3335  ArgVT = MVT::i8;
3336 
3337  if (ArgReg == 0)
3338  return false;
3339  }
3340 
3341  bool Emitted = X86FastEmitExtend(ISD::ZERO_EXTEND, VA.getLocVT(), ArgReg,
3342  ArgVT, ArgReg);
3343  assert(Emitted && "Failed to emit a zext!"); (void)Emitted;
3344  ArgVT = VA.getLocVT();
3345  break;
3346  }
3347  case CCValAssign::AExt: {
3348  assert(VA.getLocVT().isInteger() && !VA.getLocVT().isVector() &&
3349  "Unexpected extend");
3350  bool Emitted = X86FastEmitExtend(ISD::ANY_EXTEND, VA.getLocVT(), ArgReg,
3351  ArgVT, ArgReg);
3352  if (!Emitted)
3353  Emitted = X86FastEmitExtend(ISD::ZERO_EXTEND, VA.getLocVT(), ArgReg,
3354  ArgVT, ArgReg);
3355  if (!Emitted)
3356  Emitted = X86FastEmitExtend(ISD::SIGN_EXTEND, VA.getLocVT(), ArgReg,
3357  ArgVT, ArgReg);
3358 
3359  assert(Emitted && "Failed to emit a aext!"); (void)Emitted;
3360  ArgVT = VA.getLocVT();
3361  break;
3362  }
3363  case CCValAssign::BCvt: {
3364  ArgReg = fastEmit_r(ArgVT, VA.getLocVT(), ISD::BITCAST, ArgReg);
3365  assert(ArgReg && "Failed to emit a bitcast!");
3366  ArgVT = VA.getLocVT();
3367  break;
3368  }
3369  case CCValAssign::VExt:
3370  // VExt has not been implemented, so this should be impossible to reach
3371  // for now. However, fallback to Selection DAG isel once implemented.
3372  return false;
3376  case CCValAssign::FPExt:
3377  case CCValAssign::Trunc:
3378  llvm_unreachable("Unexpected loc info!");
3379  case CCValAssign::Indirect:
3380  // FIXME: Indirect doesn't need extending, but fast-isel doesn't fully
3381  // support this.
3382  return false;
3383  }
3384 
3385  if (VA.isRegLoc()) {
3386  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3387  TII.get(TargetOpcode::COPY), VA.getLocReg()).addReg(ArgReg);
3388  OutRegs.push_back(VA.getLocReg());
3389  } else {
3390  assert(VA.isMemLoc() && "Unknown value location!");
3391 
3392  // Don't emit stores for undef values.
3393  if (isa<UndefValue>(ArgVal))
3394  continue;
3395 
3396  unsigned LocMemOffset = VA.getLocMemOffset();
3397  X86AddressMode AM;
3398  AM.Base.Reg = RegInfo->getStackRegister();
3399  AM.Disp = LocMemOffset;
3400  ISD::ArgFlagsTy Flags = OutFlags[VA.getValNo()];
3401  Align Alignment = DL.getABITypeAlign(ArgVal->getType());
3402  MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
3403  MachinePointerInfo::getStack(*FuncInfo.MF, LocMemOffset),
3404  MachineMemOperand::MOStore, ArgVT.getStoreSize(), Alignment);
3405  if (Flags.isByVal()) {
3406  X86AddressMode SrcAM;
3407  SrcAM.Base.Reg = ArgReg;
3408  if (!TryEmitSmallMemcpy(AM, SrcAM, Flags.getByValSize()))
3409  return false;
3410  } else if (isa<ConstantInt>(ArgVal) || isa<ConstantPointerNull>(ArgVal)) {
3411  // If this is a really simple value, emit this with the Value* version
3412  // of X86FastEmitStore. If it isn't simple, we don't want to do this,
3413  // as it can cause us to reevaluate the argument.
3414  if (!X86FastEmitStore(ArgVT, ArgVal, AM, MMO))
3415  return false;
3416  } else {
3417  if (!X86FastEmitStore(ArgVT, ArgReg, AM, MMO))
3418  return false;
3419  }
3420  }
3421  }
3422 
3423  // ELF / PIC requires GOT in the EBX register before function calls via PLT
3424  // GOT pointer.
3425  if (Subtarget->isPICStyleGOT()) {
3426  unsigned Base = getInstrInfo()->getGlobalBaseReg(FuncInfo.MF);
3427  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3428  TII.get(TargetOpcode::COPY), X86::EBX).addReg(Base);
3429  }
3430 
3431  if (Is64Bit && IsVarArg && !IsWin64) {
3432  // From AMD64 ABI document:
3433  // For calls that may call functions that use varargs or stdargs
3434  // (prototype-less calls or calls to functions containing ellipsis (...) in
3435  // the declaration) %al is used as hidden argument to specify the number
3436  // of SSE registers used. The contents of %al do not need to match exactly
3437  // the number of registers, but must be an ubound on the number of SSE
3438  // registers used and is in the range 0 - 8 inclusive.
3439 
3440  // Count the number of XMM registers allocated.
3441  static const MCPhysReg XMMArgRegs[] = {
3442  X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
3443  X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
3444  };
3445  unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs);
3446  assert((Subtarget->hasSSE1() || !NumXMMRegs)
3447  && "SSE registers cannot be used when SSE is disabled");
3448  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(X86::MOV8ri),
3449  X86::AL).addImm(NumXMMRegs);
3450  }
3451 
3452  // Materialize callee address in a register. FIXME: GV address can be
3453  // handled with a CALLpcrel32 instead.
3454  X86AddressMode CalleeAM;
3455  if (!X86SelectCallAddress(Callee, CalleeAM))
3456  return false;
3457 
3458  unsigned CalleeOp = 0;
3459  const GlobalValue *GV = nullptr;
3460  if (CalleeAM.GV != nullptr) {
3461  GV = CalleeAM.GV;
3462  } else if (CalleeAM.Base.Reg != 0) {
3463  CalleeOp = CalleeAM.Base.Reg;
3464  } else
3465  return false;
3466 
3467  // Issue the call.
3468  MachineInstrBuilder MIB;
3469  if (CalleeOp) {
3470  // Register-indirect call.
3471  unsigned CallOpc = Is64Bit ? X86::CALL64r : X86::CALL32r;
3472  MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(CallOpc))
3473  .addReg(CalleeOp);
3474  } else {
3475  // Direct call.
3476  assert(GV && "Not a direct call");
3477  // See if we need any target-specific flags on the GV operand.
3478  unsigned char OpFlags = Subtarget->classifyGlobalFunctionReference(GV);
3479 
3480  // This will be a direct call, or an indirect call through memory for
3481  // NonLazyBind calls or dllimport calls.
3482  bool NeedLoad = OpFlags == X86II::MO_DLLIMPORT ||
3483  OpFlags == X86II::MO_GOTPCREL ||
3484  OpFlags == X86II::MO_GOTPCREL_NORELAX ||
3485  OpFlags == X86II::MO_COFFSTUB;
3486  unsigned CallOpc = NeedLoad
3487  ? (Is64Bit ? X86::CALL64m : X86::CALL32m)
3488  : (Is64Bit ? X86::CALL64pcrel32 : X86::CALLpcrel32);
3489 
3490  MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(CallOpc));
3491  if (NeedLoad)
3492  MIB.addReg(Is64Bit ? X86::RIP : 0).addImm(1).addReg(0);
3493  if (Symbol)
3494  MIB.addSym(Symbol, OpFlags);
3495  else
3496  MIB.addGlobalAddress(GV, 0, OpFlags);
3497  if (NeedLoad)
3498  MIB.addReg(0);
3499  }
3500 
3501  // Add a register mask operand representing the call-preserved registers.
3502  // Proper defs for return values will be added by setPhysRegsDeadExcept().
3503  MIB.addRegMask(TRI.getCallPreservedMask(*FuncInfo.MF, CC));
3504 
3505  // Add an implicit use GOT pointer in EBX.
3506  if (Subtarget->isPICStyleGOT())
3508 
3509  if (Is64Bit && IsVarArg && !IsWin64)
3511 
3512  // Add implicit physical register uses to the call.
3513  for (auto Reg : OutRegs)
3515 
3516  // Issue CALLSEQ_END
3517  unsigned NumBytesForCalleeToPop =
3518  X86::isCalleePop(CC, Subtarget->is64Bit(), IsVarArg,
3519  TM.Options.GuaranteedTailCallOpt)
3520  ? NumBytes // Callee pops everything.
3521  : computeBytesPoppedByCalleeForSRet(Subtarget, CC, CLI.CB);
3522  unsigned AdjStackUp = TII.getCallFrameDestroyOpcode();
3523  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AdjStackUp))
3524  .addImm(NumBytes).addImm(NumBytesForCalleeToPop);
3525 
3526  // Now handle call return values.
3528  CCState CCRetInfo(CC, IsVarArg, *FuncInfo.MF, RVLocs,
3529  CLI.RetTy->getContext());
3530  CCRetInfo.AnalyzeCallResult(Ins, RetCC_X86);
3531 
3532  // Copy all of the result registers out of their specified physreg.
3533  Register ResultReg = FuncInfo.CreateRegs(CLI.RetTy);
3534  for (unsigned i = 0; i != RVLocs.size(); ++i) {
3535  CCValAssign &VA = RVLocs[i];
3536  EVT CopyVT = VA.getValVT();
3537  unsigned CopyReg = ResultReg + i;
3538  Register SrcReg = VA.getLocReg();
3539 
3540  // If this is x86-64, and we disabled SSE, we can't return FP values
3541  if ((CopyVT == MVT::f32 || CopyVT == MVT::f64) &&
3542  ((Is64Bit || Ins[i].Flags.isInReg()) && !Subtarget->hasSSE1())) {
3543  report_fatal_error("SSE register return with SSE disabled");
3544  }
3545 
3546  // If we prefer to use the value in xmm registers, copy it out as f80 and
3547  // use a truncate to move it from fp stack reg to xmm reg.
3548  if ((SrcReg == X86::FP0 || SrcReg == X86::FP1) &&
3549  isScalarFPTypeInSSEReg(VA.getValVT())) {
3550  CopyVT = MVT::f80;
3551  CopyReg = createResultReg(&X86::RFP80RegClass);
3552  }
3553 
3554  // Copy out the result.
3555  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3556  TII.get(TargetOpcode::COPY), CopyReg).addReg(SrcReg);
3557  InRegs.push_back(VA.getLocReg());
3558 
3559  // Round the f80 to the right size, which also moves it to the appropriate
3560  // xmm register. This is accomplished by storing the f80 value in memory
3561  // and then loading it back.
3562  if (CopyVT != VA.getValVT()) {
3563  EVT ResVT = VA.getValVT();
3564  unsigned Opc = ResVT == MVT::f32 ? X86::ST_Fp80m32 : X86::ST_Fp80m64;
3565  unsigned MemSize = ResVT.getSizeInBits()/8;
3566  int FI = MFI.CreateStackObject(MemSize, Align(MemSize), false);
3567  addFrameReference(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3568  TII.get(Opc)), FI)
3569  .addReg(CopyReg);
3570  Opc = ResVT == MVT::f32 ? X86::MOVSSrm_alt : X86::MOVSDrm_alt;
3571  addFrameReference(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3572  TII.get(Opc), ResultReg + i), FI);
3573  }
3574  }
3575 
3576  CLI.ResultReg = ResultReg;
3577  CLI.NumResultRegs = RVLocs.size();
3578  CLI.Call = MIB;
3579 
3580  return true;
3581 }
3582 
3583 bool
3584 X86FastISel::fastSelectInstruction(const Instruction *I) {
3585  switch (I->getOpcode()) {
3586  default: break;
3587  case Instruction::Load:
3588  return X86SelectLoad(I);
3589  case Instruction::Store:
3590  return X86SelectStore(I);
3591  case Instruction::Ret:
3592  return X86SelectRet(I);
3593  case Instruction::ICmp:
3594  case Instruction::FCmp:
3595  return X86SelectCmp(I);
3596  case Instruction::ZExt:
3597  return X86SelectZExt(I);
3598  case Instruction::SExt:
3599  return X86SelectSExt(I);
3600  case Instruction::Br:
3601  return X86SelectBranch(I);
3602  case Instruction::LShr:
3603  case Instruction::AShr:
3604  case Instruction::Shl:
3605  return X86SelectShift(I);
3606  case Instruction::SDiv:
3607  case Instruction::UDiv:
3608  case Instruction::SRem:
3609  case Instruction::URem:
3610  return X86SelectDivRem(I);
3611  case Instruction::Select:
3612  return X86SelectSelect(I);
3613  case Instruction::Trunc:
3614  return X86SelectTrunc(I);
3615  case Instruction::FPExt:
3616  return X86SelectFPExt(I);
3617  case Instruction::FPTrunc:
3618  return X86SelectFPTrunc(I);
3619  case Instruction::SIToFP:
3620  return X86SelectSIToFP(I);
3621  case Instruction::UIToFP:
3622  return X86SelectUIToFP(I);
3623  case Instruction::IntToPtr: // Deliberate fall-through.
3624  case Instruction::PtrToInt: {
3625  EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType());
3626  EVT DstVT = TLI.getValueType(DL, I->getType());
3627  if (DstVT.bitsGT(SrcVT))
3628  return X86SelectZExt(I);
3629  if (DstVT.bitsLT(SrcVT))
3630  return X86SelectTrunc(I);
3631  Register Reg = getRegForValue(I->getOperand(0));
3632  if (Reg == 0) return false;
3633  updateValueMap(I, Reg);
3634  return true;
3635  }
3636  case Instruction::BitCast: {
3637  // Select SSE2/AVX bitcasts between 128/256/512 bit vector types.
3638  if (!Subtarget->hasSSE2())
3639  return false;
3640 
3641  MVT SrcVT, DstVT;
3642  if (!isTypeLegal(I->getOperand(0)->getType(), SrcVT) ||
3643  !isTypeLegal(I->getType(), DstVT))
3644  return false;
3645 
3646  // Only allow vectors that use xmm/ymm/zmm.
3647  if (!SrcVT.isVector() || !DstVT.isVector() ||
3648  SrcVT.getVectorElementType() == MVT::i1 ||
3649  DstVT.getVectorElementType() == MVT::i1)
3650  return false;
3651 
3652  Register Reg = getRegForValue(I->getOperand(0));
3653  if (!Reg)
3654  return false;
3655 
3656  // Emit a reg-reg copy so we don't propagate cached known bits information
3657  // with the wrong VT if we fall out of fast isel after selecting this.
3658  const TargetRegisterClass *DstClass = TLI.getRegClassFor(DstVT);
3659  Register ResultReg = createResultReg(DstClass);
3660  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3661  TII.get(TargetOpcode::COPY), ResultReg).addReg(Reg);
3662 
3663  updateValueMap(I, ResultReg);
3664  return true;
3665  }
3666  }
3667 
3668  return false;
3669 }
3670 
3671 unsigned X86FastISel::X86MaterializeInt(const ConstantInt *CI, MVT VT) {
3672  if (VT > MVT::i64)
3673  return 0;
3674 
3675  uint64_t Imm = CI->getZExtValue();
3676  if (Imm == 0) {
3677  Register SrcReg = fastEmitInst_(X86::MOV32r0, &X86::GR32RegClass);
3678  switch (VT.SimpleTy) {
3679  default: llvm_unreachable("Unexpected value type");
3680  case MVT::i1:
3681  case MVT::i8:
3682  return fastEmitInst_extractsubreg(MVT::i8, SrcReg, X86::sub_8bit);
3683  case MVT::i16:
3684  return fastEmitInst_extractsubreg(MVT::i16, SrcReg, X86::sub_16bit);
3685  case MVT::i32:
3686  return SrcReg;
3687  case MVT::i64: {
3688  Register ResultReg = createResultReg(&X86::GR64RegClass);
3689  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3690  TII.get(TargetOpcode::SUBREG_TO_REG), ResultReg)
3691  .addImm(0).addReg(SrcReg).addImm(X86::sub_32bit);
3692  return ResultReg;
3693  }
3694  }
3695  }
3696 
3697  unsigned Opc = 0;
3698  switch (VT.SimpleTy) {
3699  default: llvm_unreachable("Unexpected value type");
3700  case MVT::i1:
3701  VT = MVT::i8;
3702  [[fallthrough]];
3703  case MVT::i8: Opc = X86::MOV8ri; break;
3704  case MVT::i16: Opc = X86::MOV16ri; break;
3705  case MVT::i32: Opc = X86::MOV32ri; break;
3706  case MVT::i64: {
3707  if (isUInt<32>(Imm))
3708  Opc = X86::MOV32ri64;
3709  else if (isInt<32>(Imm))
3710  Opc = X86::MOV64ri32;
3711  else
3712  Opc = X86::MOV64ri;
3713  break;
3714  }
3715  }
3716  return fastEmitInst_i(Opc, TLI.getRegClassFor(VT), Imm);
3717 }
3718 
3719 unsigned X86FastISel::X86MaterializeFP(const ConstantFP *CFP, MVT VT) {
3720  if (CFP->isNullValue())
3721  return fastMaterializeFloatZero(CFP);
3722 
3723  // Can't handle alternate code models yet.
3724  CodeModel::Model CM = TM.getCodeModel();
3725  if (CM != CodeModel::Small && CM != CodeModel::Large)
3726  return 0;
3727 
3728  // Get opcode and regclass of the output for the given load instruction.
3729  unsigned Opc = 0;
3730  bool HasSSE1 = Subtarget->hasSSE1();
3731  bool HasSSE2 = Subtarget->hasSSE2();
3732  bool HasAVX = Subtarget->hasAVX();
3733  bool HasAVX512 = Subtarget->hasAVX512();
3734  switch (VT.SimpleTy) {
3735  default: return 0;
3736  case MVT::f32:
3737  Opc = HasAVX512 ? X86::VMOVSSZrm_alt
3738  : HasAVX ? X86::VMOVSSrm_alt
3739  : HasSSE1 ? X86::MOVSSrm_alt
3740  : X86::LD_Fp32m;
3741  break;
3742  case MVT::f64:
3743  Opc = HasAVX512 ? X86::VMOVSDZrm_alt
3744  : HasAVX ? X86::VMOVSDrm_alt
3745  : HasSSE2 ? X86::MOVSDrm_alt
3746  : X86::LD_Fp64m;
3747  break;
3748  case MVT::f80:
3749  // No f80 support yet.
3750  return 0;
3751  }
3752 
3753  // MachineConstantPool wants an explicit alignment.
3754  Align Alignment = DL.getPrefTypeAlign(CFP->getType());
3755 
3756  // x86-32 PIC requires a PIC base register for constant pools.
3757  unsigned PICBase = 0;
3758  unsigned char OpFlag = Subtarget->classifyLocalReference(nullptr);
3759  if (OpFlag == X86II::MO_PIC_BASE_OFFSET)
3760  PICBase = getInstrInfo()->getGlobalBaseReg(FuncInfo.MF);
3761  else if (OpFlag == X86II::MO_GOTOFF)
3762  PICBase = getInstrInfo()->getGlobalBaseReg(FuncInfo.MF);
3763  else if (Subtarget->is64Bit() && TM.getCodeModel() == CodeModel::Small)
3764  PICBase = X86::RIP;
3765 
3766  // Create the load from the constant pool.
3767  unsigned CPI = MCP.getConstantPoolIndex(CFP, Alignment);
3768  Register ResultReg = createResultReg(TLI.getRegClassFor(VT.SimpleTy));
3769 
3770  // Large code model only applies to 64-bit mode.
3771  if (Subtarget->is64Bit() && CM == CodeModel::Large) {
3772  Register AddrReg = createResultReg(&X86::GR64RegClass);
3773  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(X86::MOV64ri),
3774  AddrReg)
3775  .addConstantPoolIndex(CPI, 0, OpFlag);
3776  MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3777  TII.get(Opc), ResultReg);
3778  addRegReg(MIB, AddrReg, false, PICBase, false);
3779  MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
3781  MachineMemOperand::MOLoad, DL.getPointerSize(), Alignment);
3782  MIB->addMemOperand(*FuncInfo.MF, MMO);
3783  return ResultReg;
3784  }
3785 
3786  addConstantPoolReference(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3787  TII.get(Opc), ResultReg),
3788  CPI, PICBase, OpFlag);
3789  return ResultReg;
3790 }
3791 
3792 unsigned X86FastISel::X86MaterializeGV(const GlobalValue *GV, MVT VT) {
3793  // Can't handle alternate code models yet.
3794  if (TM.getCodeModel() != CodeModel::Small)
3795  return 0;
3796 
3797  // Materialize addresses with LEA/MOV instructions.
3798  X86AddressMode AM;
3799  if (X86SelectAddress(GV, AM)) {
3800  // If the expression is just a basereg, then we're done, otherwise we need
3801  // to emit an LEA.
3802  if (AM.BaseType == X86AddressMode::RegBase &&
3803  AM.IndexReg == 0 && AM.Disp == 0 && AM.GV == nullptr)
3804  return AM.Base.Reg;
3805 
3806  Register ResultReg = createResultReg(TLI.getRegClassFor(VT));
3807  if (TM.getRelocationModel() == Reloc::Static &&
3808  TLI.getPointerTy(DL) == MVT::i64) {
3809  // The displacement code could be more than 32 bits away so we need to use
3810  // an instruction with a 64 bit immediate
3811  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(X86::MOV64ri),
3812  ResultReg)
3813  .addGlobalAddress(GV);
3814  } else {
3815  unsigned Opc =
3816  TLI.getPointerTy(DL) == MVT::i32
3817  ? (Subtarget->isTarget64BitILP32() ? X86::LEA64_32r : X86::LEA32r)
3818  : X86::LEA64r;
3819  addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3820  TII.get(Opc), ResultReg), AM);
3821  }
3822  return ResultReg;
3823  }
3824  return 0;
3825 }
3826 
3827 unsigned X86FastISel::fastMaterializeConstant(const Constant *C) {
3828  EVT CEVT = TLI.getValueType(DL, C->getType(), true);
3829 
3830  // Only handle simple types.
3831  if (!CEVT.isSimple())
3832  return 0;
3833  MVT VT = CEVT.getSimpleVT();
3834 
3835  if (const auto *CI = dyn_cast<ConstantInt>(C))
3836  return X86MaterializeInt(CI, VT);
3837  if (const auto *CFP = dyn_cast<ConstantFP>(C))
3838  return X86MaterializeFP(CFP, VT);
3839  if (const auto *GV = dyn_cast<GlobalValue>(C))
3840  return X86MaterializeGV(GV, VT);
3841  if (isa<UndefValue>(C)) {
3842  unsigned Opc = 0;
3843  switch (VT.SimpleTy) {
3844  default:
3845  break;
3846  case MVT::f32:
3847  if (!Subtarget->hasSSE1())
3848  Opc = X86::LD_Fp032;
3849  break;
3850  case MVT::f64:
3851  if (!Subtarget->hasSSE2())
3852  Opc = X86::LD_Fp064;
3853  break;
3854  case MVT::f80:
3855  Opc = X86::LD_Fp080;
3856  break;
3857  }
3858 
3859  if (Opc) {
3860  Register ResultReg = createResultReg(TLI.getRegClassFor(VT));
3861  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc),
3862  ResultReg);
3863  return ResultReg;
3864  }
3865  }
3866 
3867  return 0;
3868 }
3869 
3870 unsigned X86FastISel::fastMaterializeAlloca(const AllocaInst *C) {
3871  // Fail on dynamic allocas. At this point, getRegForValue has already
3872  // checked its CSE maps, so if we're here trying to handle a dynamic
3873  // alloca, we're not going to succeed. X86SelectAddress has a
3874  // check for dynamic allocas, because it's called directly from
3875  // various places, but targetMaterializeAlloca also needs a check
3876  // in order to avoid recursion between getRegForValue,
3877  // X86SelectAddrss, and targetMaterializeAlloca.
3878  if (!FuncInfo.StaticAllocaMap.count(C))
3879  return 0;
3880  assert(C->isStaticAlloca() && "dynamic alloca in the static alloca map?");
3881 
3882  X86AddressMode AM;
3883  if (!X86SelectAddress(C, AM))
3884  return 0;
3885  unsigned Opc =
3886  TLI.getPointerTy(DL) == MVT::i32
3887  ? (Subtarget->isTarget64BitILP32() ? X86::LEA64_32r : X86::LEA32r)
3888  : X86::LEA64r;
3889  const TargetRegisterClass *RC = TLI.getRegClassFor(TLI.getPointerTy(DL));
3890  Register ResultReg = createResultReg(RC);
3891  addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3892  TII.get(Opc), ResultReg), AM);
3893  return ResultReg;
3894 }
3895 
3896 unsigned X86FastISel::fastMaterializeFloatZero(const ConstantFP *CF) {
3897  MVT VT;
3898  if (!isTypeLegal(CF->getType(), VT))
3899  return 0;
3900 
3901  // Get opcode and regclass for the given zero.
3902  bool HasSSE1 = Subtarget->hasSSE1();
3903  bool HasSSE2 = Subtarget->hasSSE2();
3904  bool HasAVX512 = Subtarget->hasAVX512();
3905  unsigned Opc = 0;
3906  switch (VT.SimpleTy) {
3907  default: return 0;
3908  case MVT::f16:
3909  Opc = HasAVX512 ? X86::AVX512_FsFLD0SH : X86::FsFLD0SH;
3910  break;
3911  case MVT::f32:
3912  Opc = HasAVX512 ? X86::AVX512_FsFLD0SS
3913  : HasSSE1 ? X86::FsFLD0SS
3914  : X86::LD_Fp032;
3915  break;
3916  case MVT::f64:
3917  Opc = HasAVX512 ? X86::AVX512_FsFLD0SD
3918  : HasSSE2 ? X86::FsFLD0SD
3919  : X86::LD_Fp064;
3920  break;
3921  case MVT::f80:
3922  // No f80 support yet.
3923  return 0;
3924  }
3925 
3926  Register ResultReg = createResultReg(TLI.getRegClassFor(VT));
3927  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc), ResultReg);
3928  return ResultReg;
3929 }
3930 
3931 
3932 bool X86FastISel::tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo,
3933  const LoadInst *LI) {
3934  const Value *Ptr = LI->getPointerOperand();
3935  X86AddressMode AM;
3936  if (!X86SelectAddress(Ptr, AM))
3937  return false;
3938 
3939  const X86InstrInfo &XII = (const X86InstrInfo &)TII;
3940 
3941  unsigned Size = DL.getTypeAllocSize(LI->getType());
3942 
3944  AM.getFullAddress(AddrOps);
3945 
3947  *FuncInfo.MF, *MI, OpNo, AddrOps, FuncInfo.InsertPt, Size, LI->getAlign(),
3948  /*AllowCommute=*/true);
3949  if (!Result)
3950  return false;
3951 
3952  // The index register could be in the wrong register class. Unfortunately,
3953  // foldMemoryOperandImpl could have commuted the instruction so its not enough
3954  // to just look at OpNo + the offset to the index reg. We actually need to
3955  // scan the instruction to find the index reg and see if its the correct reg
3956  // class.
3957  unsigned OperandNo = 0;
3958  for (MachineInstr::mop_iterator I = Result->operands_begin(),
3959  E = Result->operands_end(); I != E; ++I, ++OperandNo) {
3960  MachineOperand &MO = *I;
3961  if (!MO.isReg() || MO.isDef() || MO.getReg() != AM.IndexReg)
3962  continue;
3963  // Found the index reg, now try to rewrite it.
3964  Register IndexReg = constrainOperandRegClass(Result->getDesc(),
3965  MO.getReg(), OperandNo);
3966  if (IndexReg == MO.getReg())
3967  continue;
3968  MO.setReg(IndexReg);
3969  }
3970 
3971  Result->addMemOperand(*FuncInfo.MF, createMachineMemOperandFor(LI));
3972  Result->cloneInstrSymbols(*FuncInfo.MF, *MI);
3974  removeDeadCode(I, std::next(I));
3975  return true;
3976 }
3977 
3978 unsigned X86FastISel::fastEmitInst_rrrr(unsigned MachineInstOpcode,
3979  const TargetRegisterClass *RC,
3980  unsigned Op0, unsigned Op1,
3981  unsigned Op2, unsigned Op3) {
3982  const MCInstrDesc &II = TII.get(MachineInstOpcode);
3983 
3984  Register ResultReg = createResultReg(RC);
3985  Op0 = constrainOperandRegClass(II, Op0, II.getNumDefs());
3986  Op1 = constrainOperandRegClass(II, Op1, II.getNumDefs() + 1);
3987  Op2 = constrainOperandRegClass(II, Op2, II.getNumDefs() + 2);
3988  Op3 = constrainOperandRegClass(II, Op3, II.getNumDefs() + 3);
3989 
3990  if (II.getNumDefs() >= 1)
3991  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II, ResultReg)
3992  .addReg(Op0)
3993  .addReg(Op1)
3994  .addReg(Op2)
3995  .addReg(Op3);
3996  else {
3997  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II)
3998  .addReg(Op0)
3999  .addReg(Op1)
4000  .addReg(Op2)
4001  .addReg(Op3);
4002  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4003  TII.get(TargetOpcode::COPY), ResultReg).addReg(II.ImplicitDefs[0]);
4004  }
4005  return ResultReg;
4006 }
4007 
4008 
4009 namespace llvm {
4011  const TargetLibraryInfo *libInfo) {
4012  return new X86FastISel(funcInfo, libInfo);
4013  }
4014 }
llvm::ISD::SUB
@ SUB
Definition: ISDOpcodes.h:240
llvm::Check::Size
@ Size
Definition: FileCheck.h:77
llvm::addRegReg
static const MachineInstrBuilder & addRegReg(const MachineInstrBuilder &MIB, unsigned Reg1, bool isKill1, unsigned Reg2, bool isKill2)
addRegReg - This function is used to add a memory reference of the form: [Reg + Reg].
Definition: X86InstrBuilder.h:164
llvm::CCValAssign::getLocVT
MVT getLocVT() const
Definition: CallingConvLower.h:151
i
i
Definition: README.txt:29
llvm::MCInstrDesc::getNumDefs
unsigned getNumDefs() const
Return the number of MachineOperands that are register definitions.
Definition: MCInstrDesc.h:245
llvm::CmpInst::FCMP_ULE
@ FCMP_ULE
1 1 0 1 True if unordered, less than, or equal
Definition: InstrTypes.h:734
llvm::CCValAssign::ZExt
@ ZExt
Definition: CallingConvLower.h:36
llvm::X86AddressMode::RegBase
@ RegBase
Definition: X86InstrBuilder.h:44
llvm::DbgVariableIntrinsic::getExpression
DIExpression * getExpression() const
Definition: IntrinsicInst.h:291
llvm::Argument
This class represents an incoming formal argument to a Function.
Definition: Argument.h:28
llvm::MVT::getVectorElementType
MVT getVectorElementType() const
Definition: MachineValueType.h:533
Signed
@ Signed
Definition: NVPTXISelLowering.cpp:4715
llvm::MVT::getStoreSize
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition: MachineValueType.h:1115
MI
IRTranslator LLVM IR MI
Definition: IRTranslator.cpp:108
llvm::MachineInstrBuilder::addImm
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
Definition: MachineInstrBuilder.h:131
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
llvm::DbgDeclareInst::getAddress
Value * getAddress() const
Definition: IntrinsicInst.h:350
llvm::MCSymbol
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
Definition: MCSymbol.h:41
llvm::AArch64CC::AL
@ AL
Definition: AArch64BaseInfo.h:269
llvm::ReturnInst
Return a value (possibly void), from a function.
Definition: Instructions.h:3051
llvm::CCValAssign::Full
@ Full
Definition: CallingConvLower.h:34
llvm::MVT::isInteger
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition: MachineValueType.h:361
llvm::X86Subtarget::hasSSE2
bool hasSSE2() const
Definition: X86Subtarget.h:200
llvm::CCValAssign::SExtUpper
@ SExtUpper
Definition: CallingConvLower.h:38
llvm::N86::EDI
@ EDI
Definition: X86MCTargetDesc.h:51
llvm::Value::hasOneUse
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition: Value.h:434
llvm::generic_gep_type_iterator
Definition: GetElementPtrTypeIterator.h:31
llvm::ISD::BITCAST
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:886
llvm::CmpInst::Predicate
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:719
llvm::X86AddressMode
X86AddressMode - This struct holds a generalized full x86 address mode.
Definition: X86InstrBuilder.h:42
IntrinsicInst.h
X86Subtarget.h
llvm::CCState
CCState - This class holds information needed while lowering arguments and return values.
Definition: CallingConvLower.h:189
llvm::N86::EDX
@ EDX
Definition: X86MCTargetDesc.h:51
llvm::ConstantExpr::getZExt
static Constant * getZExt(Constant *C, Type *Ty, bool OnlyIfReduced=false)
Definition: Constants.cpp:2092
llvm::Function
Definition: Function.h:60
llvm::IntrinsicInst::getIntrinsicID
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
Definition: IntrinsicInst.h:53
X86InstrBuilder.h
llvm::MachinePointerInfo::getConstantPool
static MachinePointerInfo getConstantPool(MachineFunction &MF)
Return a MachinePointerInfo record that refers to the constant pool.
Definition: MachineOperand.cpp:1013
X86SelectAddress
static void X86SelectAddress(const MachineInstr &I, const MachineRegisterInfo &MRI, X86AddressMode &AM)
Definition: X86InstructionSelector.cpp:512
GetElementPtrTypeIterator.h
llvm::ConstantExpr::getSExt
static Constant * getSExt(Constant *C, Type *Ty, bool OnlyIfReduced=false)
Definition: Constants.cpp:2078
llvm::MemIntrinsicBase::getDestAddressSpace
unsigned getDestAddressSpace() const
Definition: IntrinsicInst.h:826
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1199
llvm::X86AddressMode::GV
const GlobalValue * GV
Definition: X86InstrBuilder.h:56
llvm::MVT::isVector
bool isVector() const
Return true if this is a vector value type.
Definition: MachineValueType.h:377
llvm::CmpInst::FCMP_ONE
@ FCMP_ONE
0 1 1 0 True if ordered and operands are unequal
Definition: InstrTypes.h:727
llvm::X86Subtarget
Definition: X86Subtarget.h:52
ErrorHandling.h
llvm::X86Disassembler::Reg
Reg
All possible values of the reg field in the ModR/M byte.
Definition: X86DisassemblerDecoder.h:462
llvm::GlobalVariable
Definition: GlobalVariable.h:39
llvm::X86::COND_P
@ COND_P
Definition: X86BaseInfo.h:91
llvm::CmpInst::ICMP_NE
@ ICMP_NE
not equal
Definition: InstrTypes.h:741
llvm::CmpInst::getInversePredicate
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
Definition: InstrTypes.h:833
llvm::ISD::ANY_EXTEND
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:766
llvm::CCValAssign::VExt
@ VExt
Definition: CallingConvLower.h:46
llvm::CCValAssign::Indirect
@ Indirect
Definition: CallingConvLower.h:50
llvm::X86Subtarget::isTargetMCU
bool isTargetMCU() const
Definition: X86Subtarget.h:288
llvm::TargetRegisterInfo
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
Definition: TargetRegisterInfo.h:237
llvm::Depth
@ Depth
Definition: SIMachineScheduler.h:36
llvm::MachineInstr::getDesc
const MCInstrDesc & getDesc() const
Returns the target instruction descriptor of this MachineInstr.
Definition: MachineInstr.h:513
llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
llvm::MachineMemOperand
A description of a memory reference used in the backend.
Definition: MachineMemOperand.h:127
llvm::N86::ESI
@ ESI
Definition: X86MCTargetDesc.h:51
llvm::DbgVariableIntrinsic::getVariable
DILocalVariable * getVariable() const
Definition: IntrinsicInst.h:287
llvm::CCValAssign::BCvt
@ BCvt
Definition: CallingConvLower.h:44
llvm::SPII::Store
@ Store
Definition: SparcInstrInfo.h:33
llvm::CCValAssign::AExtUpper
@ AExtUpper
Definition: CallingConvLower.h:42
Operator.h
llvm::successors
auto successors(MachineBasicBlock *BB)
Definition: MachineSSAContext.h:29
llvm::MipsISD::Ret
@ Ret
Definition: MipsISelLowering.h:119
llvm::CCValAssign::ZExtUpper
@ ZExtUpper
Definition: CallingConvLower.h:40
RHS
Value * RHS
Definition: X86PartialReduction.cpp:76
llvm::X86::CondCode
CondCode
Definition: X86BaseInfo.h:80
llvm::X86ISD::SMUL
@ SMUL
Definition: X86ISelLowering.h:399
llvm::LoadInst::getPointerOperand
Value * getPointerOperand()
Definition: Instructions.h:260
llvm::gep_type_begin
gep_type_iterator gep_type_begin(const User *GEP)
Definition: GetElementPtrTypeIterator.h:123
llvm::MVT::v2f64
@ v2f64
Definition: MachineValueType.h:181
llvm::CCValAssign::Trunc
@ Trunc
Definition: CallingConvLower.h:45
llvm::X86II::MO_GOTOFF
@ MO_GOTOFF
MO_GOTOFF - On a symbol operand this indicates that the immediate is the offset to the location of th...
Definition: X86BaseInfo.h:434
llvm::LoadInst::getAlign
Align getAlign() const
Return the alignment of the access that is being performed.
Definition: Instructions.h:216
llvm::CmpInst::FCMP_OGT
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
Definition: InstrTypes.h:723
TRI
unsigned const TargetRegisterInfo * TRI
Definition: MachineSink.cpp:1628
llvm::Type::getInt32Ty
static IntegerType * getInt32Ty(LLVMContext &C)
Definition: Type.cpp:239
OpIndex
unsigned OpIndex
Definition: SPIRVModuleAnalysis.cpp:46
llvm::MachineInstr::addMemOperand
void addMemOperand(MachineFunction &MF, MachineMemOperand *MO)
Add a MachineMemOperand to the machine instruction.
Definition: MachineInstr.cpp:368
llvm::X86::COND_O
@ COND_O
Definition: X86BaseInfo.h:81
F
#define F(x, y, z)
Definition: MD5.cpp:55
MachineRegisterInfo.h
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:55
llvm::EVT::isSimple
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition: ValueTypes.h:129
llvm::X86AddressMode::FrameIndexBase
@ FrameIndexBase
Definition: X86InstrBuilder.h:45
llvm::X86AddressMode::GVOpFlags
unsigned GVOpFlags
Definition: X86InstrBuilder.h:57
llvm::CmpInst::FCMP_ULT
@ FCMP_ULT
1 1 0 0 True if unordered or less than
Definition: InstrTypes.h:733
Arg
amdgpu Simplify well known AMD library false FunctionCallee Value * Arg
Definition: AMDGPULibCalls.cpp:187
LHS
Value * LHS
Definition: X86PartialReduction.cpp:75
llvm::ConstantInt
This is the shared class of boolean and integer constants.
Definition: Constants.h:79
llvm::Type::isArrayTy
bool isArrayTy() const
True if this is an instance of ArrayType.
Definition: Type.h:234
X86MachineFunctionInfo.h
llvm::TargetRegisterClass::contains
bool contains(Register Reg) const
Return true if the specified register is included in this register class.
Definition: TargetRegisterInfo.h:97
llvm::MVT::i1
@ i1
Definition: MachineValueType.h:43
llvm::CCValAssign::AExt
@ AExt
Definition: CallingConvLower.h:37
llvm::MemTransferBase::getSourceAddressSpace
unsigned getSourceAddressSpace() const
Definition: IntrinsicInst.h:886
llvm::X86II::MO_COFFSTUB
@ MO_COFFSTUB
MO_COFFSTUB - On a symbol operand "FOO", this indicates that the reference is actually to the "....
Definition: X86BaseInfo.h:575
X86.h
llvm::MVT::v8f64
@ v8f64
Definition: MachineValueType.h:184
llvm::addConstantPoolReference
static const MachineInstrBuilder & addConstantPoolReference(const MachineInstrBuilder &MIB, unsigned CPI, unsigned GlobalBaseReg, unsigned char OpFlags)
addConstantPoolReference - This function is used to add a reference to the base of a constant value s...
Definition: X86InstrBuilder.h:223
llvm::addFullAddress
static const MachineInstrBuilder & addFullAddress(const MachineInstrBuilder &MIB, const X86AddressMode &AM)
Definition: X86InstrBuilder.h:172
llvm::CCValAssign
CCValAssign - Represent assignment of one arg/retval to a location.
Definition: CallingConvLower.h:31
llvm::MachineInstrBuilder::addMBB
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
Definition: MachineInstrBuilder.h:146
llvm::ISD::ZERO_EXTEND
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:763
llvm::Constant::isNullValue
bool isNullValue() const
Return true if this is the value that would be returned by getNullValue.
Definition: Constants.cpp:76
FunctionLoweringInfo.h
E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
llvm::CCValAssign::getLocReg
Register getLocReg() const
Definition: CallingConvLower.h:148
llvm::User
Definition: User.h:44
llvm::addDirectMem
static const MachineInstrBuilder & addDirectMem(const MachineInstrBuilder &MIB, unsigned Reg)
addDirectMem - This function is used to add a direct memory reference to the current instruction – th...
Definition: X86InstrBuilder.h:124
llvm::EVT
Extended Value Type.
Definition: ValueTypes.h:34
C
(vector float) vec_cmpeq(*A, *B) C
Definition: README_ALTIVEC.txt:86
llvm::Instruction::isAtomic
bool isAtomic() const LLVM_READONLY
Return true if this instruction has an AtomicOrdering of unordered or higher.
Definition: Instruction.cpp:646
llvm::ARM_PROC::A
@ A
Definition: ARMBaseInfo.h:34
llvm::CallingConv::Swift
@ Swift
Calling convention for Swift.
Definition: CallingConv.h:69
llvm::MVT::f64
@ f64
Definition: MachineValueType.h:58
llvm::CallBase::getCalledFunction
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
Definition: InstrTypes.h:1396
llvm::X86II::MO_GOTPCREL
@ MO_GOTPCREL
MO_GOTPCREL - On a symbol operand this indicates that the immediate is offset to the GOT entry for th...
Definition: X86BaseInfo.h:442
SI
@ SI
Definition: SIInstrInfo.cpp:7882
llvm::TargetRegisterClass
Definition: TargetRegisterInfo.h:46
llvm::X86AddressMode::Scale
unsigned Scale
Definition: X86InstrBuilder.h:53
llvm::MVT::x86mmx
@ x86mmx
Definition: MachineValueType.h:271
MCSymbol.h
llvm::MemTransferBase::getRawSource
Value * getRawSource() const
Return the arguments to the instruction.
Definition: IntrinsicInst.h:873
llvm::CmpInst::FCMP_UGE
@ FCMP_UGE
1 0 1 1 True if unordered, greater than, or equal
Definition: InstrTypes.h:732
llvm::ms_demangle::QualifierMangleMode::Result
@ Result
llvm::MVT::SimpleTy
SimpleValueType SimpleTy
Definition: MachineValueType.h:332
llvm::RetCC_X86
bool RetCC_X86(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
llvm::Type::isVectorTy
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:246
llvm::addFrameReference
static const MachineInstrBuilder & addFrameReference(const MachineInstrBuilder &MIB, int FI, int Offset=0, bool mem=true)
addFrameReference - This function is used to add a reference to the base of an abstract object on the...
Definition: PPCInstrBuilder.h:32
llvm::Mips::GPRIdx
@ GPRIdx
Definition: MipsRegisterBankInfo.cpp:44
TII
const HexagonInstrInfo * TII
Definition: HexagonCopyToCombine.cpp:125
llvm::dwarf::Index
Index
Definition: Dwarf.h:472
llvm::X86Subtarget::hasSSE1
bool hasSSE1() const
Definition: X86Subtarget.h:199
llvm::MCInstrDesc
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:197
llvm::MachineOperand
MachineOperand class - Representation of each machine instruction operand.
Definition: MachineOperand.h:48
llvm::CodeModel::Small
@ Small
Definition: CodeGen.h:28
llvm::ISD::NodeType
NodeType
ISD::NodeType enum - This enum defines the target-independent operators for a SelectionDAG.
Definition: ISDOpcodes.h:40
llvm::MVT::v4f64
@ v4f64
Definition: MachineValueType.h:183
llvm::CallingConv::C
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
llvm::CmpInst::FCMP_UNO
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
Definition: InstrTypes.h:729
llvm::Instruction
Definition: Instruction.h:42
llvm::MCID::Flag
Flag
These should be considered private to the implementation of the MCInstrDesc class.
Definition: MCInstrDesc.h:147
llvm::Type::getScalarSizeInBits
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Definition: Type.cpp:189
llvm::report_fatal_error
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:145
llvm::ConstantFP
ConstantFP - Floating Point Values [float, double].
Definition: Constants.h:257
llvm::X86II::MO_GOTPCREL_NORELAX
@ MO_GOTPCREL_NORELAX
MO_GOTPCREL_NORELAX - Same as MO_GOTPCREL except that R_X86_64_GOTPCREL relocations are guaranteed to...
Definition: X86BaseInfo.h:447
llvm::DILocalVariable::isValidLocationForIntrinsic
bool isValidLocationForIntrinsic(const DILocation *DL) const
Check that a location is valid for this variable.
Definition: DebugInfoMetadata.h:3175
llvm::CmpInst::FCMP_OEQ
@ FCMP_OEQ
0 0 0 1 True if ordered and equal
Definition: InstrTypes.h:722
llvm::CmpInst::FCMP_OLT
@ FCMP_OLT
0 1 0 0 True if ordered and less than
Definition: InstrTypes.h:725
llvm::ISD::AND
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:666
llvm::CCValAssign::getLocInfo
LocInfo getLocInfo() const
Definition: CallingConvLower.h:153
Align
uint64_t Align
Definition: ELFObjHandler.cpp:82
llvm::X86AddressMode::Reg
unsigned Reg
Definition: X86InstrBuilder.h:49
llvm::CCValAssign::getLocMemOffset
unsigned getLocMemOffset() const
Definition: CallingConvLower.h:149
llvm::Align
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
llvm::EVT::bitsLT
bool bitsLT(EVT VT) const
Return true if this has less bits than VT.
Definition: ValueTypes.h:272
llvm::CCValAssign::isRegLoc
bool isRegLoc() const
Definition: CallingConvLower.h:143
llvm::CallingConv::ID
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
llvm::Triple::isOSMSVCRT
bool isOSMSVCRT() const
Is this a "Windows" OS targeting a "MSVCRT.dll" environment.
Definition: Triple.h:625
llvm::omp::RTLDependInfoFields::Len
@ Len
llvm::MachineBasicBlock
Definition: MachineBasicBlock.h:94
llvm::MemSetInst
This class wraps the llvm.memset and llvm.memset.inline intrinsics.
Definition: IntrinsicInst.h:1073
llvm::BranchInst::getCondition
Value * getCondition() const
Definition: Instructions.h:3214
llvm::CmpInst::FCMP_FALSE
@ FCMP_FALSE
0 0 0 0 Always false (always folded)
Definition: InstrTypes.h:721
llvm::MachineRegisterInfo::getRegClass
const TargetRegisterClass * getRegClass(Register Reg) const
Return the register class of the specified virtual register.
Definition: MachineRegisterInfo.h:647
llvm::ARM_PROC::IE
@ IE
Definition: ARMBaseInfo.h:27
llvm::CodeModel::Model
Model
Definition: CodeGen.h:28
llvm::CmpInst
This class is the base class for the comparison instructions.
Definition: InstrTypes.h:709
llvm::MVT::f80
@ f80
Definition: MachineValueType.h:59
X86ChooseCmpOpcode
static unsigned X86ChooseCmpOpcode(EVT VT, const X86Subtarget *Subtarget)
Definition: X86FastISel.cpp:1351
llvm::Type::isIntegerTy
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:210
llvm::CallingConv::Win64
@ Win64
The C convention as implemented on Windows/x86-64 and AArch64.
Definition: CallingConv.h:156
llvm::MVT::v4i64
@ v4i64
Definition: MachineValueType.h:129
llvm::MachineFunction::getSubtarget
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Definition: MachineFunction.h:657
llvm::MVT::v32i16
@ v32i16
Definition: MachineValueType.h:103
llvm::X86AddressMode::IndexReg
unsigned IndexReg
Definition: X86InstrBuilder.h:54
llvm::PPC::Predicate
Predicate
Predicate - These are "(BI << 5) | BO" for various predicates.
Definition: PPCPredicates.h:26
llvm::StoreInst
An instruction for storing to memory.
Definition: Instructions.h:297
llvm::GlobalValue
Definition: GlobalValue.h:44
llvm::Constant
This is an important base class in LLVM.
Definition: Constant.h:41
llvm::MVT::v16i8
@ v16i8
Definition: MachineValueType.h:89
BranchProbabilityInfo.h
llvm::EVT::getSizeInBits
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:340
llvm::Function::getReturnType
Type * getReturnType() const
Returns the type of the ret val.
Definition: Function.h:180
llvm::CCValAssign::SExt
@ SExt
Definition: CallingConvLower.h:35
llvm::MVT::v16i16
@ v16i16
Definition: MachineValueType.h:102
llvm::MachineOperand::isReg
bool isReg() const
isReg - Tests if this is a MO_Register operand.
Definition: MachineOperand.h:320
llvm::MachineInstr
Representation of each machine instruction.
Definition: MachineInstr.h:66
llvm::MachineInstrBuilder
Definition: MachineInstrBuilder.h:69
llvm::MVT::v2i64
@ v2i64
Definition: MachineValueType.h:127
llvm::isGlobalStubReference
static bool isGlobalStubReference(unsigned char TargetFlag)
isGlobalStubReference - Return true if the specified TargetFlag operand is a reference to a stub for ...
Definition: X86InstrInfo.h:82
llvm::StructLayout
Used to lazily calculate structure layout information for a target machine, based on the DataLayout s...
Definition: DataLayout.h:622
uint64_t
llvm::DbgDeclareInst
This represents the llvm.dbg.declare instruction.
Definition: IntrinsicInst.h:348
llvm::X86II::MO_PIC_BASE_OFFSET
@ MO_PIC_BASE_OFFSET
MO_PIC_BASE_OFFSET - On a symbol operand this indicates that the immediate should get the value of th...
Definition: X86BaseInfo.h:420
llvm::MVT::v16f32
@ v16f32
Definition: MachineValueType.h:171
llvm::TruncInst
This class represents a truncation of integer types.
Definition: Instructions.h:4810
AH
AMD64 Optimization Manual has some nice information about optimizing integer multiplication by a constant How much of it applies to Intel s X86 implementation There are definite trade offs to xmm0 cvttss2siq rdx jb L3 subss xmm0 rax cvttss2siq rdx xorq rdx rax ret instead of xmm1 cvttss2siq rcx movaps xmm2 subss xmm2 cvttss2siq rax rdx xorq rax ucomiss xmm0 cmovb rax ret Seems like the jb branch has high likelihood of being taken It would have saved a few instructions It s not possible to reference AH
Definition: README-X86-64.txt:44
llvm::ISD::TRAP
@ TRAP
TRAP - Trapping instruction.
Definition: ISDOpcodes.h:1133
llvm::numbers::e
constexpr double e
Definition: MathExtras.h:53
llvm::DenseMap
Definition: DenseMap.h:714
llvm::codeview::FrameCookieKind::Copy
@ Copy
DebugInfo.h
llvm::CallingConv::X86_64_SysV
@ X86_64_SysV
The C convention as specified in the x86-64 supplement to the System V ABI, used on most non-Windows ...
Definition: CallingConv.h:148
I
#define I(x, y, z)
Definition: MD5.cpp:58
llvm::MCPhysReg
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Definition: MCRegister.h:21
llvm::X86RegisterInfo::getPtrSizedFrameRegister
unsigned getPtrSizedFrameRegister(const MachineFunction &MF) const
Definition: X86RegisterInfo.cpp:921
llvm::GetElementPtrInst
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
Definition: Instructions.h:928
MachineConstantPool.h
llvm::PointerType
Class to represent pointers.
Definition: DerivedTypes.h:632
llvm::N86::ECX
@ ECX
Definition: X86MCTargetDesc.h:51
llvm::X86ISD::UMUL
@ UMUL
Definition: X86ISelLowering.h:400
llvm::MVT::v4f32
@ v4f32
Definition: MachineValueType.h:166
llvm::AllocFnKind::Aligned
@ Aligned
llvm::MVT::i8
@ i8
Definition: MachineValueType.h:46
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::FunctionLoweringInfo
FunctionLoweringInfo - This contains information that is global to a function that is used when lower...
Definition: FunctionLoweringInfo.h:52
llvm::MVT::Other
@ Other
Definition: MachineValueType.h:42
llvm::MVT::getSizeInBits
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
Definition: MachineValueType.h:894
std::swap
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:853
llvm::MachineFunction::getFrameInfo
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
Definition: MachineFunction.h:673
llvm::X86::COND_B
@ COND_B
Definition: X86BaseInfo.h:83
memcpy
<%struct.s * > cast struct s *S to sbyte *< sbyte * > sbyte uint cast struct s *agg result to sbyte *< sbyte * > sbyte uint cast struct s *memtmp to sbyte *< sbyte * > sbyte uint ret void llc ends up issuing two memcpy or custom lower memcpy(of small size) to be ldmia/stmia. I think option 2 is better but the current register allocator cannot allocate a chunk of registers at a time. A feasible temporary solution is to use specific physical registers at the lowering time for small(<
llvm::CmpInst::FCMP_OGE
@ FCMP_OGE
0 0 1 1 True if ordered and greater than or equal
Definition: InstrTypes.h:724
llvm::X86AddressMode::Disp
int Disp
Definition: X86InstrBuilder.h:55
Ptr
@ Ptr
Definition: TargetLibraryInfo.cpp:60
llvm::ISD::CondCode
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
Definition: ISDOpcodes.h:1424
llvm::X86TargetMachine
Definition: X86TargetMachine.h:28
llvm::X86MachineFunctionInfo
X86MachineFunctionInfo - This class is derived from MachineFunction and contains private X86 target-s...
Definition: X86MachineFunctionInfo.h:25
llvm::MVT
Machine Value Type.
Definition: MachineValueType.h:31
llvm::MachineInstrBuilder::addReg
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
Definition: MachineInstrBuilder.h:97
llvm::FastISel
This is a fast-path instruction selection class that generates poor code and doesn't support illegal ...
Definition: FastISel.h:66
llvm::MachineOperand::getReg
Register getReg() const
getReg - Returns the register number.
Definition: MachineOperand.h:359
llvm::N86::EAX
@ EAX
Definition: X86MCTargetDesc.h:51
llvm::CallingConv::SwiftTail
@ SwiftTail
This follows the Swift calling convention in how arguments are passed but guarantees tail calls will ...
Definition: CallingConv.h:87
llvm::X86InstrInfo::foldMemoryOperandImpl
MachineInstr * foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI, ArrayRef< unsigned > Ops, MachineBasicBlock::iterator InsertPt, int FrameIndex, LiveIntervals *LIS=nullptr, VirtRegMap *VRM=nullptr) const override
foldMemoryOperand - If this target supports it, fold a load or store of the specified stack slot into...
Definition: X86InstrInfo.cpp:6282
llvm::MachineFunction
Definition: MachineFunction.h:257
llvm::X86InstrInfo
Definition: X86InstrInfo.h:138
TargetOptions.h
llvm::CCValAssign::getValNo
unsigned getValNo() const
Definition: CallingConvLower.h:140
llvm::MachineInstrBuilder::addRegMask
const MachineInstrBuilder & addRegMask(const uint32_t *Mask) const
Definition: MachineInstrBuilder.h:197
llvm::TargetMachine::getMCAsmInfo
const MCAsmInfo * getMCAsmInfo() const
Return target specific asm information.
Definition: TargetMachine.h:205
llvm::X86II::MO_DLLIMPORT
@ MO_DLLIMPORT
MO_DLLIMPORT - On a symbol operand "FOO", this indicates that the reference is actually to the "__imp...
Definition: X86BaseInfo.h:537
llvm::X86::isCalleePop
bool isCalleePop(CallingConv::ID CallingConv, bool is64Bit, bool IsVarArg, bool GuaranteeTCO)
Determines whether the callee is required to pop its own arguments.
Definition: X86ISelLowering.cpp:5440
MCAsmInfo.h
llvm::any_of
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1716
llvm::MVT::i64
@ i64
Definition: MachineValueType.h:49
llvm::MachineFrameInfo::CreateStackObject
int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
Definition: MachineFrameInfo.cpp:51
llvm::StructType
Class to represent struct types.
Definition: DerivedTypes.h:213
Cond
SmallVector< MachineOperand, 4 > Cond
Definition: BasicBlockSections.cpp:138
llvm::isGlobalRelativeToPICBase
static bool isGlobalRelativeToPICBase(unsigned char TargetFlag)
isGlobalRelativeToPICBase - Return true if the specified global value reference is relative to a 32-b...
Definition: X86InstrInfo.h:100
llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:143
llvm::Value::getType
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
llvm::X86MachineFunctionInfo::getBytesToPopOnReturn
unsigned getBytesToPopOnReturn() const
Definition: X86MachineFunctionInfo.h:161
llvm::RegState::Implicit
@ Implicit
Not emitted register (e.g. carry, or temporary result).
Definition: MachineInstrBuilder.h:46
llvm::Reloc::Static
@ Static
Definition: CodeGen.h:22
uint32_t
llvm::LLVMContext::OB_kcfi
@ OB_kcfi
Definition: LLVMContext.h:97
llvm::Value::getContext
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:994
llvm::ISD::ArgFlagsTy
Definition: TargetCallingConv.h:27
DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition: AArch64SLSHardening.cpp:76
llvm::MVT::v64i8
@ v64i8
Definition: MachineValueType.h:91
llvm::MachineOperand::isDef
bool isDef() const
Definition: MachineOperand.h:374
S
add sub stmia L5 ldr r0 bl L_printf $stub Instead of a and a wouldn t it be better to do three moves *Return an aggregate type is even return S
Definition: README.txt:210
CC
auto CC
Definition: RISCVRedundantCopyElimination.cpp:79
llvm::X86::createFastISel
FastISel * createFastISel(FunctionLoweringInfo &funcInfo, const TargetLibraryInfo *libInfo)
Definition: X86FastISel.cpp:4010
llvm::ConstantInt::getSExtValue
int64_t getSExtValue() const
Return the constant as a 64-bit integer value after it has been sign extended as appropriate for the ...
Definition: Constants.h:148
llvm::MVT::v4i32
@ v4i32
Definition: MachineValueType.h:112
llvm::X86AddressMode::BaseType
enum llvm::X86AddressMode::@600 BaseType
llvm::CCValAssign::FPExt
@ FPExt
Definition: CallingConvLower.h:49
llvm::LoadInst
An instruction for reading from memory.
Definition: Instructions.h:173
llvm::CallBase::paramHasAttr
bool paramHasAttr(unsigned ArgNo, Attribute::AttrKind Kind) const
Determine whether the argument or parameter has the given attribute.
Definition: Instructions.cpp:343
llvm::MVT::v8i64
@ v8i64
Definition: MachineValueType.h:130
llvm::MachineMemOperand::MOLoad
@ MOLoad
The memory access reads data.
Definition: MachineMemOperand.h:134
MRI
unsigned const MachineRegisterInfo * MRI
Definition: AArch64AdvSIMDScalarPass.cpp:105
llvm::Register
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
llvm::ConstantInt::getZExtValue
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:142
llvm::MVT::v16i32
@ v16i32
Definition: MachineValueType.h:117
Callee
amdgpu Simplify well known AMD library false FunctionCallee Callee
Definition: AMDGPULibCalls.cpp:187
llvm::X86AddressMode::Base
union llvm::X86AddressMode::@601 Base
llvm::MCID::Select
@ Select
Definition: MCInstrDesc.h:164
CallingConv.h
llvm::CallingConv::CFGuard_Check
@ CFGuard_Check
Special calling convention on Windows for calling the Control Guard Check ICall funtion.
Definition: CallingConv.h:82
llvm::CmpInst::FCMP_UGT
@ FCMP_UGT
1 0 1 0 True if unordered or greater than
Definition: InstrTypes.h:731
llvm::X86Subtarget::hasAVX512
bool hasAVX512() const
Definition: X86Subtarget.h:207
llvm::CCValAssign::isMemLoc
bool isMemLoc() const
Definition: CallingConvLower.h:144
X86ChooseCmpImmediateOpcode
static unsigned X86ChooseCmpImmediateOpcode(EVT VT, const ConstantInt *RHSC)
If we have a comparison with RHS as the RHS of the comparison, return an opcode that works for the co...
Definition: X86FastISel.cpp:1378
llvm::ConstantExpr
A constant value that is initialized with an expression using other constant values.
Definition: Constants.h:972
X86CallingConv.h
llvm::CallingConv::GHC
@ GHC
Used by the Glasgow Haskell Compiler (GHC).
Definition: CallingConv.h:50
llvm::Constant::getNullValue
static Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
Definition: Constants.cpp:350
llvm::N86::EBX
@ EBX
Definition: X86MCTargetDesc.h:51
llvm::Type::isFloatTy
bool isFloatTy() const
Return true if this is 'float', a 32-bit IEEE fp type.
Definition: Type.h:153
llvm::TargetRegisterInfo::getRegSizeInBits
unsigned getRegSizeInBits(const TargetRegisterClass &RC) const
Return the size in bits of a register from class RC.
Definition: TargetRegisterInfo.h:280
llvm::MemIntrinsicBase::getLength
Value * getLength() const
Definition: IntrinsicInst.h:815
uint16_t
llvm::CallingConv::X86_StdCall
@ X86_StdCall
stdcall is mostly used by the Win32 API.
Definition: CallingConv.h:96
llvm::MachineFunction::getTarget
const LLVMTargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
Definition: MachineFunction.h:653
llvm::AMDGPU::SendMsg::Op
Op
Definition: SIDefines.h:348
X86TargetMachine.h
llvm::StructLayout::getElementOffset
uint64_t getElementOffset(unsigned Idx) const
Definition: DataLayout.h:652
MachineFrameInfo.h
llvm::Align::value
uint64_t value() const
This is a hole in the type system and should not be abused.
Definition: Alignment.h:85
llvm::CC_X86
bool CC_X86(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
llvm::CallBase::arg_size
unsigned arg_size() const
Definition: InstrTypes.h:1339
llvm::SPII::Load
@ Load
Definition: SparcInstrInfo.h:32
GlobalVariable.h
llvm::MachineInstrBuilder::addConstantPoolIndex
const MachineInstrBuilder & addConstantPoolIndex(unsigned Idx, int Offset=0, unsigned TargetFlags=0) const
Definition: MachineInstrBuilder.h:158
llvm::MVT::v8i16
@ v8i16
Definition: MachineValueType.h:101
llvm::pdb::PDB_LocType::Slot
@ Slot
llvm::RegState::Kill
@ Kill
The last use of a register.
Definition: MachineInstrBuilder.h:48
llvm::MVT::i32
@ i32
Definition: MachineValueType.h:48
llvm::TargetLibraryInfo
Provides information about what library functions are available for the current target.
Definition: TargetLibraryInfo.h:226
llvm::CallBase::arg_empty
bool arg_empty() const
Definition: InstrTypes.h:1338
llvm::X86Subtarget::hasAVX
bool hasAVX() const
Definition: X86Subtarget.h:205
llvm::X86::getCMovOpcode
unsigned getCMovOpcode(unsigned RegBytes, bool HasMemoryOperand=false)
Return a cmov opcode for the given register size in bytes, and operand type.
Definition: X86InstrInfo.cpp:2849
llvm::MCID::Add
@ Add
Definition: MCInstrDesc.h:185
llvm::MachineFrameInfo::setStackProtectorIndex
void setStackProtectorIndex(int I)
Definition: MachineFrameInfo.h:359
llvm::MVT::v8i32
@ v8i32
Definition: MachineValueType.h:116
llvm::MachineMemOperand::MOStore
@ MOStore
The memory access writes data.
Definition: MachineMemOperand.h:136
GlobalAlias.h
llvm::ISD::ADD
@ ADD
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:239
X86RegisterInfo.h
llvm::CallingConv::X86_FastCall
@ X86_FastCall
'fast' analog of X86_StdCall.
Definition: CallingConv.h:100
llvm::IntrinsicInst
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:46
llvm::ARMBuildAttrs::Symbol
@ Symbol
Definition: ARMBuildAttributes.h:83
llvm::X86AddressMode::FrameIndex
int FrameIndex
Definition: X86InstrBuilder.h:50
llvm::RISCVMatInt::Imm
@ Imm
Definition: RISCVMatInt.h:23
llvm::MachineInstrBuilder::addGlobalAddress
const MachineInstrBuilder & addGlobalAddress(const GlobalValue *GV, int64_t Offset=0, unsigned TargetFlags=0) const
Definition: MachineInstrBuilder.h:177
llvm::CodeModel::Large
@ Large
Definition: CodeGen.h:28
llvm::getKillRegState
unsigned getKillRegState(bool B)
Definition: MachineInstrBuilder.h:546
llvm::MachineFrameInfo
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
Definition: MachineFrameInfo.h:106
FastISel.h
llvm::X86Subtarget::getTargetTriple
const Triple & getTargetTriple() const
Definition: X86Subtarget.h:269
Instructions.h
llvm::MemCpyInst
This class wraps the llvm.memcpy intrinsic.
Definition: IntrinsicInst.h:1125
llvm::MachineInstrBuilder::addSym
const MachineInstrBuilder & addSym(MCSymbol *Sym, unsigned char TargetFlags=0) const
Definition: MachineInstrBuilder.h:267
llvm::FunctionLoweringInfo::MF
MachineFunction * MF
Definition: FunctionLoweringInfo.h:55
llvm::Type::isStructTy
bool isStructTy() const
True if this is an instance of StructType.
Definition: Type.h:231
llvm::GetReturnInfo
void GetReturnInfo(CallingConv::ID CC, Type *ReturnType, AttributeList attr, SmallVectorImpl< ISD::OutputArg > &Outs, const TargetLowering &TLI, const DataLayout &DL)
Given an LLVM IR type and return type attributes, compute the return value EVTs and flags,...
Definition: TargetLoweringBase.cpp:1669
llvm::MemIntrinsic::isVolatile
bool isVolatile() const
Definition: IntrinsicInst.h:1050
llvm::BuildMI
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
Definition: MachineInstrBuilder.h:357
llvm::X86AddressMode::getFullAddress
void getFullAddress(SmallVectorImpl< MachineOperand > &MO)
Definition: X86InstrBuilder.h:65
llvm::Instruction::getDebugLoc
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
Definition: Instruction.h:354
llvm::X86::getX86ConditionCode
std::pair< CondCode, bool > getX86ConditionCode(CmpInst::Predicate Predicate)
Return a pair of condition code for the given predicate and whether the instruction operands should b...
Definition: X86InstrInfo.cpp:2811
llvm::X86::AddrIndexReg
@ AddrIndexReg
Definition: X86BaseInfo.h:34
llvm::User::op_begin
op_iterator op_begin()
Definition: User.h:234
getX86SSEConditionCode
static std::pair< unsigned, bool > getX86SSEConditionCode(CmpInst::Predicate Predicate)
Definition: X86FastISel.cpp:175
llvm::CCValAssign::getValVT
MVT getValVT() const
Definition: CallingConvLower.h:141
llvm::MVT::f16
@ f16
Definition: MachineValueType.h:56
llvm::CmpInst::FCMP_UNE
@ FCMP_UNE
1 1 1 0 True if unordered or not equal
Definition: InstrTypes.h:735
computeBytesPoppedByCalleeForSRet
static unsigned computeBytesPoppedByCalleeForSRet(const X86Subtarget *Subtarget, CallingConv::ID CC, const CallBase *CB)
Definition: X86FastISel.cpp:3134
llvm::CallBase::getArgOperand
Value * getArgOperand(unsigned i) const
Definition: InstrTypes.h:1341
llvm::CallingConv::Fast
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:41
llvm::MachineOperand::setReg
void setReg(Register Reg)
Change the register this operand corresponds to.
Definition: MachineOperand.cpp:56
llvm::MachineInstr::getNumOperands
unsigned getNumOperands() const
Retuns the total number of operands.
Definition: MachineInstr.h:519
llvm::Instruction::getParent
const BasicBlock * getParent() const
Definition: Instruction.h:91
llvm::CmpInst::getPredicate
Predicate getPredicate() const
Return the predicate for this instruction.
Definition: InstrTypes.h:809
llvm::constrainOperandRegClass
Register constrainOperandRegClass(const MachineFunction &MF, const TargetRegisterInfo &TRI, MachineRegisterInfo &MRI, const TargetInstrInfo &TII, const RegisterBankInfo &RBI, MachineInstr &InsertPt, const TargetRegisterClass &RegClass, MachineOperand &RegMO)
Constrain the Register operand OpIdx, so that it is now constrained to the TargetRegisterClass passed...
Definition: Utils.cpp:54
llvm::CallingConv::Tail
@ Tail
Attemps to make calls as fast as possible while guaranteeing that tail call optimization can always b...
Definition: CallingConv.h:76
llvm::MemIntrinsicBase::getRawDest
Value * getRawDest() const
Definition: IntrinsicInst.h:809
llvm::orc::SymbolState::Emitted
@ Emitted
Assigned address, still materializing.
llvm::MachineInstrBuilder::addMetadata
const MachineInstrBuilder & addMetadata(const MDNode *MD) const
Definition: MachineInstrBuilder.h:236
llvm::MipsISD::Ins
@ Ins
Definition: MipsISelLowering.h:160
llvm::CmpInst::FCMP_OLE
@ FCMP_OLE
0 1 0 1 True if ordered and less than or equal
Definition: InstrTypes.h:726
llvm::reverse
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:485
llvm::CallBase
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Definition: InstrTypes.h:1174
DerivedTypes.h
TM
const char LLVMTargetMachineRef TM
Definition: PassBuilderBindings.cpp:47
llvm::MVT::v32i8
@ v32i8
Definition: MachineValueType.h:90
llvm::CallingConv::HiPE
@ HiPE
Used by the High-Performance Erlang Compiler (HiPE).
Definition: CallingConv.h:53
llvm::EVT::bitsGT
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
Definition: ValueTypes.h:256
llvm::MVT::i16
@ i16
Definition: MachineValueType.h:47
llvm::X86RegisterInfo::getStackRegister
Register getStackRegister() const
Definition: X86RegisterInfo.h:150
llvm::omp::RTLDependInfoFields::Flags
@ Flags
GEP
Hexagon Common GEP
Definition: HexagonCommonGEP.cpp:171
llvm::CallingConv::X86_ThisCall
@ X86_ThisCall
Similar to X86_StdCall.
Definition: CallingConv.h:119
llvm::DebugLoc
A debug info location.
Definition: DebugLoc.h:33
llvm::MachineFrameInfo::setFrameAddressIsTaken
void setFrameAddressIsTaken(bool T)
Definition: MachineFrameInfo.h:372
llvm::AllocaInst
an instruction to allocate memory on the stack
Definition: Instructions.h:58
llvm::User::getOperand
Value * getOperand(unsigned i) const
Definition: User.h:169
llvm::MachineMemOperand::isNonTemporal
bool isNonTemporal() const
Definition: MachineMemOperand.h:290
llvm::BranchInst
Conditional or Unconditional Branch instruction.
Definition: Instructions.h:3133
llvm::ISD::SIGN_EXTEND
@ SIGN_EXTEND
Conversion operators.
Definition: ISDOpcodes.h:760
llvm::X86MachineFunctionInfo::getSRetReturnReg
Register getSRetReturnReg() const
Definition: X86MachineFunctionInfo.h:173
llvm::MVT::v8f32
@ v8f32
Definition: MachineValueType.h:170
X86InstrInfo.h
llvm::CmpInst::FCMP_TRUE
@ FCMP_TRUE
1 1 1 1 Always true (always folded)
Definition: InstrTypes.h:736
llvm::MachineInstrBundleIterator< MachineInstr >
llvm::X86::COND_E
@ COND_E
Definition: X86BaseInfo.h:85
llvm::MCAsmInfo::usesWindowsCFI
bool usesWindowsCFI() const
Definition: MCAsmInfo.h:793
llvm::CallingConv::WebKit_JS
@ WebKit_JS
Used for stack based JavaScript calls.
Definition: CallingConv.h:56
llvm::MVT::f32
@ f32
Definition: MachineValueType.h:57
llvm::MCInstrDesc::getNumOperands
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
Definition: MCInstrDesc.h:230
llvm::X86::LAST_VALID_COND
@ LAST_VALID_COND
Definition: X86BaseInfo.h:97
llvm::MachinePointerInfo::getStack
static MachinePointerInfo getStack(MachineFunction &MF, int64_t Offset, uint8_t ID=0)
Stack pointer relative access.
Definition: MachineOperand.cpp:1032
llvm::Value
LLVM Value Representation.
Definition: Value.h:74
llvm::X86::COND_NP
@ COND_NP
Definition: X86BaseInfo.h:92
llvm::TargetRegisterInfo::getCallPreservedMask
virtual const uint32_t * getCallPreservedMask(const MachineFunction &MF, CallingConv::ID) const
Return a mask of call-preserved registers for the given calling convention on the current function.
Definition: TargetRegisterInfo.h:483
llvm::ISD::SCALAR_TO_VECTOR
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition: ISDOpcodes.h:606
llvm::CmpInst::FCMP_ORD
@ FCMP_ORD
0 1 1 1 True if ordered (no nans)
Definition: InstrTypes.h:728
llvm::BranchInst::getSuccessor
BasicBlock * getSuccessor(unsigned i) const
Definition: Instructions.h:3226
llvm::BasicBlock::const_iterator
InstListType::const_iterator const_iterator
Definition: BasicBlock.h:88
llvm::sampleprof::Base
@ Base
Definition: Discriminator.h:58
llvm::CmpInst::FCMP_UEQ
@ FCMP_UEQ
1 0 0 1 True if unordered or equal
Definition: InstrTypes.h:730
llvm::X86::COND_NE
@ COND_NE
Definition: X86BaseInfo.h:86
llvm::Use
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
llvm::MCInstrDesc::ImplicitDefs
const MCPhysReg * ImplicitDefs
Definition: MCInstrDesc.h:207
llvm::EVT::getSimpleVT
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:288
llvm::X86RegisterInfo
Definition: X86RegisterInfo.h:24
llvm::User::op_end
op_iterator op_end()
Definition: User.h:236