LLVM  8.0.0svn
X86ISelDAGToDAG.cpp
Go to the documentation of this file.
1 //===- X86ISelDAGToDAG.cpp - A DAG pattern matching inst selector for X86 -===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file defines a DAG pattern matching instruction selector for X86,
11 // converting from a legalized dag to a X86 dag.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "X86.h"
16 #include "X86MachineFunctionInfo.h"
17 #include "X86RegisterInfo.h"
18 #include "X86Subtarget.h"
19 #include "X86TargetMachine.h"
20 #include "llvm/ADT/Statistic.h"
24 #include "llvm/Config/llvm-config.h"
25 #include "llvm/IR/ConstantRange.h"
26 #include "llvm/IR/Function.h"
27 #include "llvm/IR/Instructions.h"
28 #include "llvm/IR/Intrinsics.h"
29 #include "llvm/IR/Type.h"
30 #include "llvm/Support/Debug.h"
32 #include "llvm/Support/KnownBits.h"
37 #include <stdint.h>
38 using namespace llvm;
39 
40 #define DEBUG_TYPE "x86-isel"
41 
42 STATISTIC(NumLoadMoved, "Number of loads moved below TokenFactor");
43 
44 static cl::opt<bool> AndImmShrink("x86-and-imm-shrink", cl::init(true),
45  cl::desc("Enable setting constant bits to reduce size of mask immediates"),
46  cl::Hidden);
47 
48 //===----------------------------------------------------------------------===//
49 // Pattern Matcher Implementation
50 //===----------------------------------------------------------------------===//
51 
52 namespace {
53  /// This corresponds to X86AddressMode, but uses SDValue's instead of register
54  /// numbers for the leaves of the matched tree.
55  struct X86ISelAddressMode {
56  enum {
57  RegBase,
58  FrameIndexBase
59  } BaseType;
60 
61  // This is really a union, discriminated by BaseType!
62  SDValue Base_Reg;
63  int Base_FrameIndex;
64 
65  unsigned Scale;
66  SDValue IndexReg;
67  int32_t Disp;
68  SDValue Segment;
69  const GlobalValue *GV;
70  const Constant *CP;
71  const BlockAddress *BlockAddr;
72  const char *ES;
73  MCSymbol *MCSym;
74  int JT;
75  unsigned Align; // CP alignment.
76  unsigned char SymbolFlags; // X86II::MO_*
77 
78  X86ISelAddressMode()
79  : BaseType(RegBase), Base_FrameIndex(0), Scale(1), IndexReg(), Disp(0),
80  Segment(), GV(nullptr), CP(nullptr), BlockAddr(nullptr), ES(nullptr),
81  MCSym(nullptr), JT(-1), Align(0), SymbolFlags(X86II::MO_NO_FLAG) {}
82 
83  bool hasSymbolicDisplacement() const {
84  return GV != nullptr || CP != nullptr || ES != nullptr ||
85  MCSym != nullptr || JT != -1 || BlockAddr != nullptr;
86  }
87 
88  bool hasBaseOrIndexReg() const {
89  return BaseType == FrameIndexBase ||
90  IndexReg.getNode() != nullptr || Base_Reg.getNode() != nullptr;
91  }
92 
93  /// Return true if this addressing mode is already RIP-relative.
94  bool isRIPRelative() const {
95  if (BaseType != RegBase) return false;
96  if (RegisterSDNode *RegNode =
97  dyn_cast_or_null<RegisterSDNode>(Base_Reg.getNode()))
98  return RegNode->getReg() == X86::RIP;
99  return false;
100  }
101 
102  void setBaseReg(SDValue Reg) {
103  BaseType = RegBase;
104  Base_Reg = Reg;
105  }
106 
107 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
108  void dump(SelectionDAG *DAG = nullptr) {
109  dbgs() << "X86ISelAddressMode " << this << '\n';
110  dbgs() << "Base_Reg ";
111  if (Base_Reg.getNode())
112  Base_Reg.getNode()->dump(DAG);
113  else
114  dbgs() << "nul\n";
115  if (BaseType == FrameIndexBase)
116  dbgs() << " Base.FrameIndex " << Base_FrameIndex << '\n';
117  dbgs() << " Scale " << Scale << '\n'
118  << "IndexReg ";
119  if (IndexReg.getNode())
120  IndexReg.getNode()->dump(DAG);
121  else
122  dbgs() << "nul\n";
123  dbgs() << " Disp " << Disp << '\n'
124  << "GV ";
125  if (GV)
126  GV->dump();
127  else
128  dbgs() << "nul";
129  dbgs() << " CP ";
130  if (CP)
131  CP->dump();
132  else
133  dbgs() << "nul";
134  dbgs() << '\n'
135  << "ES ";
136  if (ES)
137  dbgs() << ES;
138  else
139  dbgs() << "nul";
140  dbgs() << " MCSym ";
141  if (MCSym)
142  dbgs() << MCSym;
143  else
144  dbgs() << "nul";
145  dbgs() << " JT" << JT << " Align" << Align << '\n';
146  }
147 #endif
148  };
149 }
150 
151 namespace {
152  //===--------------------------------------------------------------------===//
153  /// ISel - X86-specific code to select X86 machine instructions for
154  /// SelectionDAG operations.
155  ///
156  class X86DAGToDAGISel final : public SelectionDAGISel {
157  /// Keep a pointer to the X86Subtarget around so that we can
158  /// make the right decision when generating code for different targets.
159  const X86Subtarget *Subtarget;
160 
161  /// If true, selector should try to optimize for code size instead of
162  /// performance.
163  bool OptForSize;
164 
165  /// If true, selector should try to optimize for minimum code size.
166  bool OptForMinSize;
167 
168  public:
169  explicit X86DAGToDAGISel(X86TargetMachine &tm, CodeGenOpt::Level OptLevel)
170  : SelectionDAGISel(tm, OptLevel), OptForSize(false),
171  OptForMinSize(false) {}
172 
173  StringRef getPassName() const override {
174  return "X86 DAG->DAG Instruction Selection";
175  }
176 
177  bool runOnMachineFunction(MachineFunction &MF) override {
178  // Reset the subtarget each time through.
179  Subtarget = &MF.getSubtarget<X86Subtarget>();
181  return true;
182  }
183 
184  void EmitFunctionEntryCode() override;
185 
186  bool IsProfitableToFold(SDValue N, SDNode *U, SDNode *Root) const override;
187 
188  void PreprocessISelDAG() override;
189  void PostprocessISelDAG() override;
190 
191 // Include the pieces autogenerated from the target description.
192 #include "X86GenDAGISel.inc"
193 
194  private:
195  void Select(SDNode *N) override;
196 
197  bool foldOffsetIntoAddress(uint64_t Offset, X86ISelAddressMode &AM);
198  bool matchLoadInAddress(LoadSDNode *N, X86ISelAddressMode &AM);
199  bool matchWrapper(SDValue N, X86ISelAddressMode &AM);
200  bool matchAddress(SDValue N, X86ISelAddressMode &AM);
201  bool matchVectorAddress(SDValue N, X86ISelAddressMode &AM);
202  bool matchAdd(SDValue N, X86ISelAddressMode &AM, unsigned Depth);
203  bool matchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
204  unsigned Depth);
205  bool matchAddressBase(SDValue N, X86ISelAddressMode &AM);
206  bool selectAddr(SDNode *Parent, SDValue N, SDValue &Base,
207  SDValue &Scale, SDValue &Index, SDValue &Disp,
208  SDValue &Segment);
209  bool selectVectorAddr(SDNode *Parent, SDValue N, SDValue &Base,
210  SDValue &Scale, SDValue &Index, SDValue &Disp,
211  SDValue &Segment);
212  bool selectMOV64Imm32(SDValue N, SDValue &Imm);
213  bool selectLEAAddr(SDValue N, SDValue &Base,
214  SDValue &Scale, SDValue &Index, SDValue &Disp,
215  SDValue &Segment);
216  bool selectLEA64_32Addr(SDValue N, SDValue &Base,
217  SDValue &Scale, SDValue &Index, SDValue &Disp,
218  SDValue &Segment);
219  bool selectTLSADDRAddr(SDValue N, SDValue &Base,
220  SDValue &Scale, SDValue &Index, SDValue &Disp,
221  SDValue &Segment);
222  bool selectScalarSSELoad(SDNode *Root, SDNode *Parent, SDValue N,
223  SDValue &Base, SDValue &Scale,
224  SDValue &Index, SDValue &Disp,
225  SDValue &Segment,
226  SDValue &NodeWithChain);
227  bool selectRelocImm(SDValue N, SDValue &Op);
228 
229  bool tryFoldLoad(SDNode *Root, SDNode *P, SDValue N,
230  SDValue &Base, SDValue &Scale,
231  SDValue &Index, SDValue &Disp,
232  SDValue &Segment);
233 
234  // Convenience method where P is also root.
235  bool tryFoldLoad(SDNode *P, SDValue N,
236  SDValue &Base, SDValue &Scale,
237  SDValue &Index, SDValue &Disp,
238  SDValue &Segment) {
239  return tryFoldLoad(P, P, N, Base, Scale, Index, Disp, Segment);
240  }
241 
242  // Try to fold a vector load. This makes sure the load isn't non-temporal.
243  bool tryFoldVecLoad(SDNode *Root, SDNode *P, SDValue N,
244  SDValue &Base, SDValue &Scale,
245  SDValue &Index, SDValue &Disp,
246  SDValue &Segment);
247 
248  /// Implement addressing mode selection for inline asm expressions.
249  bool SelectInlineAsmMemoryOperand(const SDValue &Op,
250  unsigned ConstraintID,
251  std::vector<SDValue> &OutOps) override;
252 
253  void emitSpecialCodeForMain();
254 
255  inline void getAddressOperands(X86ISelAddressMode &AM, const SDLoc &DL,
256  SDValue &Base, SDValue &Scale,
257  SDValue &Index, SDValue &Disp,
258  SDValue &Segment) {
259  Base = (AM.BaseType == X86ISelAddressMode::FrameIndexBase)
260  ? CurDAG->getTargetFrameIndex(
261  AM.Base_FrameIndex,
262  TLI->getPointerTy(CurDAG->getDataLayout()))
263  : AM.Base_Reg;
264  Scale = getI8Imm(AM.Scale, DL);
265  Index = AM.IndexReg;
266  // These are 32-bit even in 64-bit mode since RIP-relative offset
267  // is 32-bit.
268  if (AM.GV)
269  Disp = CurDAG->getTargetGlobalAddress(AM.GV, SDLoc(),
270  MVT::i32, AM.Disp,
271  AM.SymbolFlags);
272  else if (AM.CP)
273  Disp = CurDAG->getTargetConstantPool(AM.CP, MVT::i32,
274  AM.Align, AM.Disp, AM.SymbolFlags);
275  else if (AM.ES) {
276  assert(!AM.Disp && "Non-zero displacement is ignored with ES.");
277  Disp = CurDAG->getTargetExternalSymbol(AM.ES, MVT::i32, AM.SymbolFlags);
278  } else if (AM.MCSym) {
279  assert(!AM.Disp && "Non-zero displacement is ignored with MCSym.");
280  assert(AM.SymbolFlags == 0 && "oo");
281  Disp = CurDAG->getMCSymbol(AM.MCSym, MVT::i32);
282  } else if (AM.JT != -1) {
283  assert(!AM.Disp && "Non-zero displacement is ignored with JT.");
284  Disp = CurDAG->getTargetJumpTable(AM.JT, MVT::i32, AM.SymbolFlags);
285  } else if (AM.BlockAddr)
286  Disp = CurDAG->getTargetBlockAddress(AM.BlockAddr, MVT::i32, AM.Disp,
287  AM.SymbolFlags);
288  else
289  Disp = CurDAG->getTargetConstant(AM.Disp, DL, MVT::i32);
290 
291  if (AM.Segment.getNode())
292  Segment = AM.Segment;
293  else
294  Segment = CurDAG->getRegister(0, MVT::i32);
295  }
296 
297  // Utility function to determine whether we should avoid selecting
298  // immediate forms of instructions for better code size or not.
299  // At a high level, we'd like to avoid such instructions when
300  // we have similar constants used within the same basic block
301  // that can be kept in a register.
302  //
303  bool shouldAvoidImmediateInstFormsForSize(SDNode *N) const {
304  uint32_t UseCount = 0;
305 
306  // Do not want to hoist if we're not optimizing for size.
307  // TODO: We'd like to remove this restriction.
308  // See the comment in X86InstrInfo.td for more info.
309  if (!OptForSize)
310  return false;
311 
312  // Walk all the users of the immediate.
313  for (SDNode::use_iterator UI = N->use_begin(),
314  UE = N->use_end(); (UI != UE) && (UseCount < 2); ++UI) {
315 
316  SDNode *User = *UI;
317 
318  // This user is already selected. Count it as a legitimate use and
319  // move on.
320  if (User->isMachineOpcode()) {
321  UseCount++;
322  continue;
323  }
324 
325  // We want to count stores of immediates as real uses.
326  if (User->getOpcode() == ISD::STORE &&
327  User->getOperand(1).getNode() == N) {
328  UseCount++;
329  continue;
330  }
331 
332  // We don't currently match users that have > 2 operands (except
333  // for stores, which are handled above)
334  // Those instruction won't match in ISEL, for now, and would
335  // be counted incorrectly.
336  // This may change in the future as we add additional instruction
337  // types.
338  if (User->getNumOperands() != 2)
339  continue;
340 
341  // Immediates that are used for offsets as part of stack
342  // manipulation should be left alone. These are typically
343  // used to indicate SP offsets for argument passing and
344  // will get pulled into stores/pushes (implicitly).
345  if (User->getOpcode() == X86ISD::ADD ||
346  User->getOpcode() == ISD::ADD ||
347  User->getOpcode() == X86ISD::SUB ||
348  User->getOpcode() == ISD::SUB) {
349 
350  // Find the other operand of the add/sub.
351  SDValue OtherOp = User->getOperand(0);
352  if (OtherOp.getNode() == N)
353  OtherOp = User->getOperand(1);
354 
355  // Don't count if the other operand is SP.
356  RegisterSDNode *RegNode;
357  if (OtherOp->getOpcode() == ISD::CopyFromReg &&
358  (RegNode = dyn_cast_or_null<RegisterSDNode>(
359  OtherOp->getOperand(1).getNode())))
360  if ((RegNode->getReg() == X86::ESP) ||
361  (RegNode->getReg() == X86::RSP))
362  continue;
363  }
364 
365  // ... otherwise, count this and move on.
366  UseCount++;
367  }
368 
369  // If we have more than 1 use, then recommend for hoisting.
370  return (UseCount > 1);
371  }
372 
373  /// Return a target constant with the specified value of type i8.
374  inline SDValue getI8Imm(unsigned Imm, const SDLoc &DL) {
375  return CurDAG->getTargetConstant(Imm, DL, MVT::i8);
376  }
377 
378  /// Return a target constant with the specified value, of type i32.
379  inline SDValue getI32Imm(unsigned Imm, const SDLoc &DL) {
380  return CurDAG->getTargetConstant(Imm, DL, MVT::i32);
381  }
382 
383  /// Return a target constant with the specified value, of type i64.
384  inline SDValue getI64Imm(uint64_t Imm, const SDLoc &DL) {
385  return CurDAG->getTargetConstant(Imm, DL, MVT::i64);
386  }
387 
388  SDValue getExtractVEXTRACTImmediate(SDNode *N, unsigned VecWidth,
389  const SDLoc &DL) {
390  assert((VecWidth == 128 || VecWidth == 256) && "Unexpected vector width");
391  uint64_t Index = N->getConstantOperandVal(1);
392  MVT VecVT = N->getOperand(0).getSimpleValueType();
393  return getI8Imm((Index * VecVT.getScalarSizeInBits()) / VecWidth, DL);
394  }
395 
396  SDValue getInsertVINSERTImmediate(SDNode *N, unsigned VecWidth,
397  const SDLoc &DL) {
398  assert((VecWidth == 128 || VecWidth == 256) && "Unexpected vector width");
399  uint64_t Index = N->getConstantOperandVal(2);
400  MVT VecVT = N->getSimpleValueType(0);
401  return getI8Imm((Index * VecVT.getScalarSizeInBits()) / VecWidth, DL);
402  }
403 
404  /// Return an SDNode that returns the value of the global base register.
405  /// Output instructions required to initialize the global base register,
406  /// if necessary.
407  SDNode *getGlobalBaseReg();
408 
409  /// Return a reference to the TargetMachine, casted to the target-specific
410  /// type.
411  const X86TargetMachine &getTargetMachine() const {
412  return static_cast<const X86TargetMachine &>(TM);
413  }
414 
415  /// Return a reference to the TargetInstrInfo, casted to the target-specific
416  /// type.
417  const X86InstrInfo *getInstrInfo() const {
418  return Subtarget->getInstrInfo();
419  }
420 
421  /// Address-mode matching performs shift-of-and to and-of-shift
422  /// reassociation in order to expose more scaled addressing
423  /// opportunities.
424  bool ComplexPatternFuncMutatesDAG() const override {
425  return true;
426  }
427 
428  bool isSExtAbsoluteSymbolRef(unsigned Width, SDNode *N) const;
429 
430  /// Returns whether this is a relocatable immediate in the range
431  /// [-2^Width .. 2^Width-1].
432  template <unsigned Width> bool isSExtRelocImm(SDNode *N) const {
433  if (auto *CN = dyn_cast<ConstantSDNode>(N))
434  return isInt<Width>(CN->getSExtValue());
435  return isSExtAbsoluteSymbolRef(Width, N);
436  }
437 
438  // Indicates we should prefer to use a non-temporal load for this load.
439  bool useNonTemporalLoad(LoadSDNode *N) const {
440  if (!N->isNonTemporal())
441  return false;
442 
443  unsigned StoreSize = N->getMemoryVT().getStoreSize();
444 
445  if (N->getAlignment() < StoreSize)
446  return false;
447 
448  switch (StoreSize) {
449  default: llvm_unreachable("Unsupported store size");
450  case 16:
451  return Subtarget->hasSSE41();
452  case 32:
453  return Subtarget->hasAVX2();
454  case 64:
455  return Subtarget->hasAVX512();
456  }
457  }
458 
459  bool foldLoadStoreIntoMemOperand(SDNode *Node);
460  bool matchBEXTRFromAnd(SDNode *Node);
461  bool shrinkAndImmediate(SDNode *N);
462  bool isMaskZeroExtended(SDNode *N) const;
463  bool tryShiftAmountMod(SDNode *N);
464 
465  MachineSDNode *emitPCMPISTR(unsigned ROpc, unsigned MOpc, bool MayFoldLoad,
466  const SDLoc &dl, MVT VT, SDNode *Node);
467  MachineSDNode *emitPCMPESTR(unsigned ROpc, unsigned MOpc, bool MayFoldLoad,
468  const SDLoc &dl, MVT VT, SDNode *Node,
469  SDValue &InFlag);
470  };
471 }
472 
473 
474 // Returns true if this masked compare can be implemented legally with this
475 // type.
476 static bool isLegalMaskCompare(SDNode *N, const X86Subtarget *Subtarget) {
477  unsigned Opcode = N->getOpcode();
478  if (Opcode == X86ISD::CMPM || Opcode == ISD::SETCC ||
479  Opcode == X86ISD::CMPM_RND || Opcode == X86ISD::VFPCLASS) {
480  // We can get 256-bit 8 element types here without VLX being enabled. When
481  // this happens we will use 512-bit operations and the mask will not be
482  // zero extended.
483  EVT OpVT = N->getOperand(0).getValueType();
484  if (OpVT.is256BitVector() || OpVT.is128BitVector())
485  return Subtarget->hasVLX();
486 
487  return true;
488  }
489  // Scalar opcodes use 128 bit registers, but aren't subject to the VLX check.
490  if (Opcode == X86ISD::VFPCLASSS || Opcode == X86ISD::FSETCCM ||
491  Opcode == X86ISD::FSETCCM_RND)
492  return true;
493 
494  return false;
495 }
496 
497 // Returns true if we can assume the writer of the mask has zero extended it
498 // for us.
499 bool X86DAGToDAGISel::isMaskZeroExtended(SDNode *N) const {
500  // If this is an AND, check if we have a compare on either side. As long as
501  // one side guarantees the mask is zero extended, the AND will preserve those
502  // zeros.
503  if (N->getOpcode() == ISD::AND)
504  return isLegalMaskCompare(N->getOperand(0).getNode(), Subtarget) ||
505  isLegalMaskCompare(N->getOperand(1).getNode(), Subtarget);
506 
507  return isLegalMaskCompare(N, Subtarget);
508 }
509 
510 bool
511 X86DAGToDAGISel::IsProfitableToFold(SDValue N, SDNode *U, SDNode *Root) const {
512  if (OptLevel == CodeGenOpt::None) return false;
513 
514  if (!N.hasOneUse())
515  return false;
516 
517  if (N.getOpcode() != ISD::LOAD)
518  return true;
519 
520  // If N is a load, do additional profitability checks.
521  if (U == Root) {
522  switch (U->getOpcode()) {
523  default: break;
524  case X86ISD::ADD:
525  case X86ISD::ADC:
526  case X86ISD::SUB:
527  case X86ISD::SBB:
528  case X86ISD::AND:
529  case X86ISD::XOR:
530  case X86ISD::OR:
531  case ISD::ADD:
532  case ISD::ADDCARRY:
533  case ISD::AND:
534  case ISD::OR:
535  case ISD::XOR: {
536  SDValue Op1 = U->getOperand(1);
537 
538  // If the other operand is a 8-bit immediate we should fold the immediate
539  // instead. This reduces code size.
540  // e.g.
541  // movl 4(%esp), %eax
542  // addl $4, %eax
543  // vs.
544  // movl $4, %eax
545  // addl 4(%esp), %eax
546  // The former is 2 bytes shorter. In case where the increment is 1, then
547  // the saving can be 4 bytes (by using incl %eax).
548  if (ConstantSDNode *Imm = dyn_cast<ConstantSDNode>(Op1)) {
549  if (Imm->getAPIntValue().isSignedIntN(8))
550  return false;
551 
552  // If this is a 64-bit AND with an immediate that fits in 32-bits,
553  // prefer using the smaller and over folding the load. This is needed to
554  // make sure immediates created by shrinkAndImmediate are always folded.
555  // Ideally we would narrow the load during DAG combine and get the
556  // best of both worlds.
557  if (U->getOpcode() == ISD::AND &&
558  Imm->getAPIntValue().getBitWidth() == 64 &&
559  Imm->getAPIntValue().isIntN(32))
560  return false;
561  }
562 
563  // If the other operand is a TLS address, we should fold it instead.
564  // This produces
565  // movl %gs:0, %eax
566  // leal i@NTPOFF(%eax), %eax
567  // instead of
568  // movl $i@NTPOFF, %eax
569  // addl %gs:0, %eax
570  // if the block also has an access to a second TLS address this will save
571  // a load.
572  // FIXME: This is probably also true for non-TLS addresses.
573  if (Op1.getOpcode() == X86ISD::Wrapper) {
574  SDValue Val = Op1.getOperand(0);
576  return false;
577  }
578 
579  // Don't fold load if this matches the BTS/BTR/BTC patterns.
580  // BTS: (or X, (shl 1, n))
581  // BTR: (and X, (rotl -2, n))
582  // BTC: (xor X, (shl 1, n))
583  if (U->getOpcode() == ISD::OR || U->getOpcode() == ISD::XOR) {
584  if (U->getOperand(0).getOpcode() == ISD::SHL &&
586  return false;
587 
588  if (U->getOperand(1).getOpcode() == ISD::SHL &&
590  return false;
591  }
592  if (U->getOpcode() == ISD::AND) {
593  SDValue U0 = U->getOperand(0);
594  SDValue U1 = U->getOperand(1);
595  if (U0.getOpcode() == ISD::ROTL) {
596  auto *C = dyn_cast<ConstantSDNode>(U0.getOperand(0));
597  if (C && C->getSExtValue() == -2)
598  return false;
599  }
600 
601  if (U1.getOpcode() == ISD::ROTL) {
602  auto *C = dyn_cast<ConstantSDNode>(U1.getOperand(0));
603  if (C && C->getSExtValue() == -2)
604  return false;
605  }
606  }
607 
608  break;
609  }
610  case ISD::SHL:
611  case ISD::SRA:
612  case ISD::SRL:
613  // Don't fold a load into a shift by immediate. The BMI2 instructions
614  // support folding a load, but not an immediate. The legacy instructions
615  // support folding an immediate, but can't fold a load. Folding an
616  // immediate is preferable to folding a load.
617  if (isa<ConstantSDNode>(U->getOperand(1)))
618  return false;
619 
620  break;
621  }
622  }
623 
624  // Prevent folding a load if this can implemented with an insert_subreg or
625  // a move that implicitly zeroes.
626  if (Root->getOpcode() == ISD::INSERT_SUBVECTOR &&
627  isNullConstant(Root->getOperand(2)) &&
628  (Root->getOperand(0).isUndef() ||
630  return false;
631 
632  return true;
633 }
634 
635 /// Replace the original chain operand of the call with
636 /// load's chain operand and move load below the call's chain operand.
638  SDValue Call, SDValue OrigChain) {
640  SDValue Chain = OrigChain.getOperand(0);
641  if (Chain.getNode() == Load.getNode())
642  Ops.push_back(Load.getOperand(0));
643  else {
644  assert(Chain.getOpcode() == ISD::TokenFactor &&
645  "Unexpected chain operand");
646  for (unsigned i = 0, e = Chain.getNumOperands(); i != e; ++i)
647  if (Chain.getOperand(i).getNode() == Load.getNode())
648  Ops.push_back(Load.getOperand(0));
649  else
650  Ops.push_back(Chain.getOperand(i));
651  SDValue NewChain =
652  CurDAG->getNode(ISD::TokenFactor, SDLoc(Load), MVT::Other, Ops);
653  Ops.clear();
654  Ops.push_back(NewChain);
655  }
656  Ops.append(OrigChain->op_begin() + 1, OrigChain->op_end());
657  CurDAG->UpdateNodeOperands(OrigChain.getNode(), Ops);
658  CurDAG->UpdateNodeOperands(Load.getNode(), Call.getOperand(0),
659  Load.getOperand(1), Load.getOperand(2));
660 
661  Ops.clear();
662  Ops.push_back(SDValue(Load.getNode(), 1));
663  Ops.append(Call->op_begin() + 1, Call->op_end());
664  CurDAG->UpdateNodeOperands(Call.getNode(), Ops);
665 }
666 
667 /// Return true if call address is a load and it can be
668 /// moved below CALLSEQ_START and the chains leading up to the call.
669 /// Return the CALLSEQ_START by reference as a second output.
670 /// In the case of a tail call, there isn't a callseq node between the call
671 /// chain and the load.
672 static bool isCalleeLoad(SDValue Callee, SDValue &Chain, bool HasCallSeq) {
673  // The transformation is somewhat dangerous if the call's chain was glued to
674  // the call. After MoveBelowOrigChain the load is moved between the call and
675  // the chain, this can create a cycle if the load is not folded. So it is
676  // *really* important that we are sure the load will be folded.
677  if (Callee.getNode() == Chain.getNode() || !Callee.hasOneUse())
678  return false;
679  LoadSDNode *LD = dyn_cast<LoadSDNode>(Callee.getNode());
680  if (!LD ||
681  LD->isVolatile() ||
684  return false;
685 
686  // Now let's find the callseq_start.
687  while (HasCallSeq && Chain.getOpcode() != ISD::CALLSEQ_START) {
688  if (!Chain.hasOneUse())
689  return false;
690  Chain = Chain.getOperand(0);
691  }
692 
693  if (!Chain.getNumOperands())
694  return false;
695  // Since we are not checking for AA here, conservatively abort if the chain
696  // writes to memory. It's not safe to move the callee (a load) across a store.
697  if (isa<MemSDNode>(Chain.getNode()) &&
698  cast<MemSDNode>(Chain.getNode())->writeMem())
699  return false;
700  if (Chain.getOperand(0).getNode() == Callee.getNode())
701  return true;
702  if (Chain.getOperand(0).getOpcode() == ISD::TokenFactor &&
703  Callee.getValue(1).isOperandOf(Chain.getOperand(0).getNode()) &&
704  Callee.getValue(1).hasOneUse())
705  return true;
706  return false;
707 }
708 
709 void X86DAGToDAGISel::PreprocessISelDAG() {
710  // OptFor[Min]Size are used in pattern predicates that isel is matching.
711  OptForSize = MF->getFunction().optForSize();
712  OptForMinSize = MF->getFunction().optForMinSize();
713  assert((!OptForMinSize || OptForSize) && "OptForMinSize implies OptForSize");
714 
715  for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(),
716  E = CurDAG->allnodes_end(); I != E; ) {
717  SDNode *N = &*I++; // Preincrement iterator to avoid invalidation issues.
718 
719  // If this is a target specific AND node with no flag usages, turn it back
720  // into ISD::AND to enable test instruction matching.
721  if (N->getOpcode() == X86ISD::AND && !N->hasAnyUseOfValue(1)) {
722  SDValue Res = CurDAG->getNode(ISD::AND, SDLoc(N), N->getValueType(0),
723  N->getOperand(0), N->getOperand(1));
724  --I;
725  CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Res);
726  ++I;
727  CurDAG->DeleteNode(N);
728  continue;
729  }
730 
731  if (OptLevel != CodeGenOpt::None &&
732  // Only do this when the target can fold the load into the call or
733  // jmp.
734  !Subtarget->useRetpolineIndirectCalls() &&
735  ((N->getOpcode() == X86ISD::CALL && !Subtarget->slowTwoMemOps()) ||
736  (N->getOpcode() == X86ISD::TC_RETURN &&
737  (Subtarget->is64Bit() ||
738  !getTargetMachine().isPositionIndependent())))) {
739  /// Also try moving call address load from outside callseq_start to just
740  /// before the call to allow it to be folded.
741  ///
742  /// [Load chain]
743  /// ^
744  /// |
745  /// [Load]
746  /// ^ ^
747  /// | |
748  /// / \--
749  /// / |
750  ///[CALLSEQ_START] |
751  /// ^ |
752  /// | |
753  /// [LOAD/C2Reg] |
754  /// | |
755  /// \ /
756  /// \ /
757  /// [CALL]
758  bool HasCallSeq = N->getOpcode() == X86ISD::CALL;
759  SDValue Chain = N->getOperand(0);
760  SDValue Load = N->getOperand(1);
761  if (!isCalleeLoad(Load, Chain, HasCallSeq))
762  continue;
763  moveBelowOrigChain(CurDAG, Load, SDValue(N, 0), Chain);
764  ++NumLoadMoved;
765  continue;
766  }
767 
768  // Lower fpround and fpextend nodes that target the FP stack to be store and
769  // load to the stack. This is a gross hack. We would like to simply mark
770  // these as being illegal, but when we do that, legalize produces these when
771  // it expands calls, then expands these in the same legalize pass. We would
772  // like dag combine to be able to hack on these between the call expansion
773  // and the node legalization. As such this pass basically does "really
774  // late" legalization of these inline with the X86 isel pass.
775  // FIXME: This should only happen when not compiled with -O0.
776  if (N->getOpcode() != ISD::FP_ROUND && N->getOpcode() != ISD::FP_EXTEND)
777  continue;
778 
779  MVT SrcVT = N->getOperand(0).getSimpleValueType();
780  MVT DstVT = N->getSimpleValueType(0);
781 
782  // If any of the sources are vectors, no fp stack involved.
783  if (SrcVT.isVector() || DstVT.isVector())
784  continue;
785 
786  // If the source and destination are SSE registers, then this is a legal
787  // conversion that should not be lowered.
788  const X86TargetLowering *X86Lowering =
789  static_cast<const X86TargetLowering *>(TLI);
790  bool SrcIsSSE = X86Lowering->isScalarFPTypeInSSEReg(SrcVT);
791  bool DstIsSSE = X86Lowering->isScalarFPTypeInSSEReg(DstVT);
792  if (SrcIsSSE && DstIsSSE)
793  continue;
794 
795  if (!SrcIsSSE && !DstIsSSE) {
796  // If this is an FPStack extension, it is a noop.
797  if (N->getOpcode() == ISD::FP_EXTEND)
798  continue;
799  // If this is a value-preserving FPStack truncation, it is a noop.
800  if (N->getConstantOperandVal(1))
801  continue;
802  }
803 
804  // Here we could have an FP stack truncation or an FPStack <-> SSE convert.
805  // FPStack has extload and truncstore. SSE can fold direct loads into other
806  // operations. Based on this, decide what we want to do.
807  MVT MemVT;
808  if (N->getOpcode() == ISD::FP_ROUND)
809  MemVT = DstVT; // FP_ROUND must use DstVT, we can't do a 'trunc load'.
810  else
811  MemVT = SrcIsSSE ? SrcVT : DstVT;
812 
813  SDValue MemTmp = CurDAG->CreateStackTemporary(MemVT);
814  SDLoc dl(N);
815 
816  // FIXME: optimize the case where the src/dest is a load or store?
817  SDValue Store =
818  CurDAG->getTruncStore(CurDAG->getEntryNode(), dl, N->getOperand(0),
819  MemTmp, MachinePointerInfo(), MemVT);
820  SDValue Result = CurDAG->getExtLoad(ISD::EXTLOAD, dl, DstVT, Store, MemTmp,
821  MachinePointerInfo(), MemVT);
822 
823  // We're about to replace all uses of the FP_ROUND/FP_EXTEND with the
824  // extload we created. This will cause general havok on the dag because
825  // anything below the conversion could be folded into other existing nodes.
826  // To avoid invalidating 'I', back it up to the convert node.
827  --I;
828  CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
829 
830  // Now that we did that, the node is dead. Increment the iterator to the
831  // next node to process, then delete N.
832  ++I;
833  CurDAG->DeleteNode(N);
834  }
835 }
836 
837 
838 void X86DAGToDAGISel::PostprocessISelDAG() {
839  // Skip peepholes at -O0.
840  if (TM.getOptLevel() == CodeGenOpt::None)
841  return;
842 
843  // Attempt to remove vectors moves that were inserted to zero upper bits.
844 
845  SelectionDAG::allnodes_iterator Position(CurDAG->getRoot().getNode());
846  ++Position;
847 
848  while (Position != CurDAG->allnodes_begin()) {
849  SDNode *N = &*--Position;
850  // Skip dead nodes and any non-machine opcodes.
851  if (N->use_empty() || !N->isMachineOpcode())
852  continue;
853 
854  if (N->getMachineOpcode() != TargetOpcode::SUBREG_TO_REG)
855  continue;
856 
857  unsigned SubRegIdx = N->getConstantOperandVal(2);
858  if (SubRegIdx != X86::sub_xmm && SubRegIdx != X86::sub_ymm)
859  continue;
860 
861  SDValue Move = N->getOperand(1);
862  if (!Move.isMachineOpcode())
863  continue;
864 
865  // Make sure its one of the move opcodes we recognize.
866  switch (Move.getMachineOpcode()) {
867  default:
868  continue;
869  case X86::VMOVAPDrr: case X86::VMOVUPDrr:
870  case X86::VMOVAPSrr: case X86::VMOVUPSrr:
871  case X86::VMOVDQArr: case X86::VMOVDQUrr:
872  case X86::VMOVAPDYrr: case X86::VMOVUPDYrr:
873  case X86::VMOVAPSYrr: case X86::VMOVUPSYrr:
874  case X86::VMOVDQAYrr: case X86::VMOVDQUYrr:
875  case X86::VMOVAPDZ128rr: case X86::VMOVUPDZ128rr:
876  case X86::VMOVAPSZ128rr: case X86::VMOVUPSZ128rr:
877  case X86::VMOVDQA32Z128rr: case X86::VMOVDQU32Z128rr:
878  case X86::VMOVDQA64Z128rr: case X86::VMOVDQU64Z128rr:
879  case X86::VMOVAPDZ256rr: case X86::VMOVUPDZ256rr:
880  case X86::VMOVAPSZ256rr: case X86::VMOVUPSZ256rr:
881  case X86::VMOVDQA32Z256rr: case X86::VMOVDQU32Z256rr:
882  case X86::VMOVDQA64Z256rr: case X86::VMOVDQU64Z256rr:
883  break;
884  }
885 
886  SDValue In = Move.getOperand(0);
887  if (!In.isMachineOpcode() ||
888  In.getMachineOpcode() <= TargetOpcode::GENERIC_OP_END)
889  continue;
890 
891  // Make sure the instruction has a VEX, XOP, or EVEX prefix. This covers
892  // the SHA instructions which use a legacy encoding.
893  uint64_t TSFlags = getInstrInfo()->get(In.getMachineOpcode()).TSFlags;
894  if ((TSFlags & X86II::EncodingMask) != X86II::VEX &&
895  (TSFlags & X86II::EncodingMask) != X86II::EVEX &&
896  (TSFlags & X86II::EncodingMask) != X86II::XOP)
897  continue;
898 
899  // Producing instruction is another vector instruction. We can drop the
900  // move.
901  CurDAG->UpdateNodeOperands(N, N->getOperand(0), In, N->getOperand(2));
902 
903  // If the move is now dead, delete it.
904  if (Move.getNode()->use_empty())
905  CurDAG->RemoveDeadNode(Move.getNode());
906  }
907 }
908 
909 
910 /// Emit any code that needs to be executed only in the main function.
911 void X86DAGToDAGISel::emitSpecialCodeForMain() {
912  if (Subtarget->isTargetCygMing()) {
914  auto &DL = CurDAG->getDataLayout();
915 
917  CLI.setChain(CurDAG->getRoot())
918  .setCallee(CallingConv::C, Type::getVoidTy(*CurDAG->getContext()),
919  CurDAG->getExternalSymbol("__main", TLI->getPointerTy(DL)),
920  std::move(Args));
921  const TargetLowering &TLI = CurDAG->getTargetLoweringInfo();
922  std::pair<SDValue, SDValue> Result = TLI.LowerCallTo(CLI);
923  CurDAG->setRoot(Result.second);
924  }
925 }
926 
927 void X86DAGToDAGISel::EmitFunctionEntryCode() {
928  // If this is main, emit special code for main.
929  const Function &F = MF->getFunction();
930  if (F.hasExternalLinkage() && F.getName() == "main")
931  emitSpecialCodeForMain();
932 }
933 
934 static bool isDispSafeForFrameIndex(int64_t Val) {
935  // On 64-bit platforms, we can run into an issue where a frame index
936  // includes a displacement that, when added to the explicit displacement,
937  // will overflow the displacement field. Assuming that the frame index
938  // displacement fits into a 31-bit integer (which is only slightly more
939  // aggressive than the current fundamental assumption that it fits into
940  // a 32-bit integer), a 31-bit disp should always be safe.
941  return isInt<31>(Val);
942 }
943 
944 bool X86DAGToDAGISel::foldOffsetIntoAddress(uint64_t Offset,
945  X86ISelAddressMode &AM) {
946  // If there's no offset to fold, we don't need to do any work.
947  if (Offset == 0)
948  return false;
949 
950  // Cannot combine ExternalSymbol displacements with integer offsets.
951  if (AM.ES || AM.MCSym)
952  return true;
953 
954  int64_t Val = AM.Disp + Offset;
955  CodeModel::Model M = TM.getCodeModel();
956  if (Subtarget->is64Bit()) {
958  AM.hasSymbolicDisplacement()))
959  return true;
960  // In addition to the checks required for a register base, check that
961  // we do not try to use an unsafe Disp with a frame index.
962  if (AM.BaseType == X86ISelAddressMode::FrameIndexBase &&
964  return true;
965  }
966  AM.Disp = Val;
967  return false;
968 
969 }
970 
971 bool X86DAGToDAGISel::matchLoadInAddress(LoadSDNode *N, X86ISelAddressMode &AM){
972  SDValue Address = N->getOperand(1);
973 
974  // load gs:0 -> GS segment register.
975  // load fs:0 -> FS segment register.
976  //
977  // This optimization is valid because the GNU TLS model defines that
978  // gs:0 (or fs:0 on X86-64) contains its own address.
979  // For more information see http://people.redhat.com/drepper/tls.pdf
980  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Address))
981  if (C->getSExtValue() == 0 && AM.Segment.getNode() == nullptr &&
982  (Subtarget->isTargetGlibc() || Subtarget->isTargetAndroid() ||
983  Subtarget->isTargetFuchsia()))
984  switch (N->getPointerInfo().getAddrSpace()) {
985  case 256:
986  AM.Segment = CurDAG->getRegister(X86::GS, MVT::i16);
987  return false;
988  case 257:
989  AM.Segment = CurDAG->getRegister(X86::FS, MVT::i16);
990  return false;
991  // Address space 258 is not handled here, because it is not used to
992  // address TLS areas.
993  }
994 
995  return true;
996 }
997 
998 /// Try to match X86ISD::Wrapper and X86ISD::WrapperRIP nodes into an addressing
999 /// mode. These wrap things that will resolve down into a symbol reference.
1000 /// If no match is possible, this returns true, otherwise it returns false.
1001 bool X86DAGToDAGISel::matchWrapper(SDValue N, X86ISelAddressMode &AM) {
1002  // If the addressing mode already has a symbol as the displacement, we can
1003  // never match another symbol.
1004  if (AM.hasSymbolicDisplacement())
1005  return true;
1006 
1007  bool IsRIPRel = N.getOpcode() == X86ISD::WrapperRIP;
1008 
1009  // We can't use an addressing mode in the 64-bit large code model. In the
1010  // medium code model, we use can use an mode when RIP wrappers are present.
1011  // That signifies access to globals that are known to be "near", such as the
1012  // GOT itself.
1013  CodeModel::Model M = TM.getCodeModel();
1014  if (Subtarget->is64Bit() &&
1015  (M == CodeModel::Large || (M == CodeModel::Medium && !IsRIPRel)))
1016  return true;
1017 
1018  // Base and index reg must be 0 in order to use %rip as base.
1019  if (IsRIPRel && AM.hasBaseOrIndexReg())
1020  return true;
1021 
1022  // Make a local copy in case we can't do this fold.
1023  X86ISelAddressMode Backup = AM;
1024 
1025  int64_t Offset = 0;
1026  SDValue N0 = N.getOperand(0);
1027  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(N0)) {
1028  AM.GV = G->getGlobal();
1029  AM.SymbolFlags = G->getTargetFlags();
1030  Offset = G->getOffset();
1031  } else if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(N0)) {
1032  AM.CP = CP->getConstVal();
1033  AM.Align = CP->getAlignment();
1034  AM.SymbolFlags = CP->getTargetFlags();
1035  Offset = CP->getOffset();
1036  } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(N0)) {
1037  AM.ES = S->getSymbol();
1038  AM.SymbolFlags = S->getTargetFlags();
1039  } else if (auto *S = dyn_cast<MCSymbolSDNode>(N0)) {
1040  AM.MCSym = S->getMCSymbol();
1041  } else if (JumpTableSDNode *J = dyn_cast<JumpTableSDNode>(N0)) {
1042  AM.JT = J->getIndex();
1043  AM.SymbolFlags = J->getTargetFlags();
1044  } else if (BlockAddressSDNode *BA = dyn_cast<BlockAddressSDNode>(N0)) {
1045  AM.BlockAddr = BA->getBlockAddress();
1046  AM.SymbolFlags = BA->getTargetFlags();
1047  Offset = BA->getOffset();
1048  } else
1049  llvm_unreachable("Unhandled symbol reference node.");
1050 
1051  if (foldOffsetIntoAddress(Offset, AM)) {
1052  AM = Backup;
1053  return true;
1054  }
1055 
1056  if (IsRIPRel)
1057  AM.setBaseReg(CurDAG->getRegister(X86::RIP, MVT::i64));
1058 
1059  // Commit the changes now that we know this fold is safe.
1060  return false;
1061 }
1062 
1063 /// Add the specified node to the specified addressing mode, returning true if
1064 /// it cannot be done. This just pattern matches for the addressing mode.
1065 bool X86DAGToDAGISel::matchAddress(SDValue N, X86ISelAddressMode &AM) {
1066  if (matchAddressRecursively(N, AM, 0))
1067  return true;
1068 
1069  // Post-processing: Convert lea(,%reg,2) to lea(%reg,%reg), which has
1070  // a smaller encoding and avoids a scaled-index.
1071  if (AM.Scale == 2 &&
1072  AM.BaseType == X86ISelAddressMode::RegBase &&
1073  AM.Base_Reg.getNode() == nullptr) {
1074  AM.Base_Reg = AM.IndexReg;
1075  AM.Scale = 1;
1076  }
1077 
1078  // Post-processing: Convert foo to foo(%rip), even in non-PIC mode,
1079  // because it has a smaller encoding.
1080  // TODO: Which other code models can use this?
1081  if (TM.getCodeModel() == CodeModel::Small &&
1082  Subtarget->is64Bit() &&
1083  AM.Scale == 1 &&
1084  AM.BaseType == X86ISelAddressMode::RegBase &&
1085  AM.Base_Reg.getNode() == nullptr &&
1086  AM.IndexReg.getNode() == nullptr &&
1087  AM.SymbolFlags == X86II::MO_NO_FLAG &&
1088  AM.hasSymbolicDisplacement())
1089  AM.Base_Reg = CurDAG->getRegister(X86::RIP, MVT::i64);
1090 
1091  return false;
1092 }
1093 
1094 bool X86DAGToDAGISel::matchAdd(SDValue N, X86ISelAddressMode &AM,
1095  unsigned Depth) {
1096  // Add an artificial use to this node so that we can keep track of
1097  // it if it gets CSE'd with a different node.
1098  HandleSDNode Handle(N);
1099 
1100  X86ISelAddressMode Backup = AM;
1101  if (!matchAddressRecursively(N.getOperand(0), AM, Depth+1) &&
1102  !matchAddressRecursively(Handle.getValue().getOperand(1), AM, Depth+1))
1103  return false;
1104  AM = Backup;
1105 
1106  // Try again after commuting the operands.
1107  if (!matchAddressRecursively(Handle.getValue().getOperand(1), AM, Depth+1) &&
1108  !matchAddressRecursively(Handle.getValue().getOperand(0), AM, Depth+1))
1109  return false;
1110  AM = Backup;
1111 
1112  // If we couldn't fold both operands into the address at the same time,
1113  // see if we can just put each operand into a register and fold at least
1114  // the add.
1115  if (AM.BaseType == X86ISelAddressMode::RegBase &&
1116  !AM.Base_Reg.getNode() &&
1117  !AM.IndexReg.getNode()) {
1118  N = Handle.getValue();
1119  AM.Base_Reg = N.getOperand(0);
1120  AM.IndexReg = N.getOperand(1);
1121  AM.Scale = 1;
1122  return false;
1123  }
1124  N = Handle.getValue();
1125  return true;
1126 }
1127 
1128 // Insert a node into the DAG at least before the Pos node's position. This
1129 // will reposition the node as needed, and will assign it a node ID that is <=
1130 // the Pos node's ID. Note that this does *not* preserve the uniqueness of node
1131 // IDs! The selection DAG must no longer depend on their uniqueness when this
1132 // is used.
1133 static void insertDAGNode(SelectionDAG &DAG, SDValue Pos, SDValue N) {
1134  if (N->getNodeId() == -1 ||
1137  DAG.RepositionNode(Pos->getIterator(), N.getNode());
1138  // Mark Node as invalid for pruning as after this it may be a successor to a
1139  // selected node but otherwise be in the same position of Pos.
1140  // Conservatively mark it with the same -abs(Id) to assure node id
1141  // invariant is preserved.
1142  N->setNodeId(Pos->getNodeId());
1144  }
1145 }
1146 
1147 // Transform "(X >> (8-C1)) & (0xff << C1)" to "((X >> 8) & 0xff) << C1" if
1148 // safe. This allows us to convert the shift and and into an h-register
1149 // extract and a scaled index. Returns false if the simplification is
1150 // performed.
1152  uint64_t Mask,
1153  SDValue Shift, SDValue X,
1154  X86ISelAddressMode &AM) {
1155  if (Shift.getOpcode() != ISD::SRL ||
1156  !isa<ConstantSDNode>(Shift.getOperand(1)) ||
1157  !Shift.hasOneUse())
1158  return true;
1159 
1160  int ScaleLog = 8 - Shift.getConstantOperandVal(1);
1161  if (ScaleLog <= 0 || ScaleLog >= 4 ||
1162  Mask != (0xffu << ScaleLog))
1163  return true;
1164 
1165  MVT VT = N.getSimpleValueType();
1166  SDLoc DL(N);
1167  SDValue Eight = DAG.getConstant(8, DL, MVT::i8);
1168  SDValue NewMask = DAG.getConstant(0xff, DL, VT);
1169  SDValue Srl = DAG.getNode(ISD::SRL, DL, VT, X, Eight);
1170  SDValue And = DAG.getNode(ISD::AND, DL, VT, Srl, NewMask);
1171  SDValue ShlCount = DAG.getConstant(ScaleLog, DL, MVT::i8);
1172  SDValue Shl = DAG.getNode(ISD::SHL, DL, VT, And, ShlCount);
1173 
1174  // Insert the new nodes into the topological ordering. We must do this in
1175  // a valid topological ordering as nothing is going to go back and re-sort
1176  // these nodes. We continually insert before 'N' in sequence as this is
1177  // essentially a pre-flattened and pre-sorted sequence of nodes. There is no
1178  // hierarchy left to express.
1179  insertDAGNode(DAG, N, Eight);
1180  insertDAGNode(DAG, N, Srl);
1181  insertDAGNode(DAG, N, NewMask);
1182  insertDAGNode(DAG, N, And);
1183  insertDAGNode(DAG, N, ShlCount);
1184  insertDAGNode(DAG, N, Shl);
1185  DAG.ReplaceAllUsesWith(N, Shl);
1186  AM.IndexReg = And;
1187  AM.Scale = (1 << ScaleLog);
1188  return false;
1189 }
1190 
1191 // Transforms "(X << C1) & C2" to "(X & (C2>>C1)) << C1" if safe and if this
1192 // allows us to fold the shift into this addressing mode. Returns false if the
1193 // transform succeeded.
1195  uint64_t Mask,
1196  SDValue Shift, SDValue X,
1197  X86ISelAddressMode &AM) {
1198  if (Shift.getOpcode() != ISD::SHL ||
1199  !isa<ConstantSDNode>(Shift.getOperand(1)))
1200  return true;
1201 
1202  // Not likely to be profitable if either the AND or SHIFT node has more
1203  // than one use (unless all uses are for address computation). Besides,
1204  // isel mechanism requires their node ids to be reused.
1205  if (!N.hasOneUse() || !Shift.hasOneUse())
1206  return true;
1207 
1208  // Verify that the shift amount is something we can fold.
1209  unsigned ShiftAmt = Shift.getConstantOperandVal(1);
1210  if (ShiftAmt != 1 && ShiftAmt != 2 && ShiftAmt != 3)
1211  return true;
1212 
1213  MVT VT = N.getSimpleValueType();
1214  SDLoc DL(N);
1215  SDValue NewMask = DAG.getConstant(Mask >> ShiftAmt, DL, VT);
1216  SDValue NewAnd = DAG.getNode(ISD::AND, DL, VT, X, NewMask);
1217  SDValue NewShift = DAG.getNode(ISD::SHL, DL, VT, NewAnd, Shift.getOperand(1));
1218 
1219  // Insert the new nodes into the topological ordering. We must do this in
1220  // a valid topological ordering as nothing is going to go back and re-sort
1221  // these nodes. We continually insert before 'N' in sequence as this is
1222  // essentially a pre-flattened and pre-sorted sequence of nodes. There is no
1223  // hierarchy left to express.
1224  insertDAGNode(DAG, N, NewMask);
1225  insertDAGNode(DAG, N, NewAnd);
1226  insertDAGNode(DAG, N, NewShift);
1227  DAG.ReplaceAllUsesWith(N, NewShift);
1228 
1229  AM.Scale = 1 << ShiftAmt;
1230  AM.IndexReg = NewAnd;
1231  return false;
1232 }
1233 
1234 // Implement some heroics to detect shifts of masked values where the mask can
1235 // be replaced by extending the shift and undoing that in the addressing mode
1236 // scale. Patterns such as (shl (srl x, c1), c2) are canonicalized into (and
1237 // (srl x, SHIFT), MASK) by DAGCombines that don't know the shl can be done in
1238 // the addressing mode. This results in code such as:
1239 //
1240 // int f(short *y, int *lookup_table) {
1241 // ...
1242 // return *y + lookup_table[*y >> 11];
1243 // }
1244 //
1245 // Turning into:
1246 // movzwl (%rdi), %eax
1247 // movl %eax, %ecx
1248 // shrl $11, %ecx
1249 // addl (%rsi,%rcx,4), %eax
1250 //
1251 // Instead of:
1252 // movzwl (%rdi), %eax
1253 // movl %eax, %ecx
1254 // shrl $9, %ecx
1255 // andl $124, %rcx
1256 // addl (%rsi,%rcx), %eax
1257 //
1258 // Note that this function assumes the mask is provided as a mask *after* the
1259 // value is shifted. The input chain may or may not match that, but computing
1260 // such a mask is trivial.
1262  uint64_t Mask,
1263  SDValue Shift, SDValue X,
1264  X86ISelAddressMode &AM) {
1265  if (Shift.getOpcode() != ISD::SRL || !Shift.hasOneUse() ||
1266  !isa<ConstantSDNode>(Shift.getOperand(1)))
1267  return true;
1268 
1269  unsigned ShiftAmt = Shift.getConstantOperandVal(1);
1270  unsigned MaskLZ = countLeadingZeros(Mask);
1271  unsigned MaskTZ = countTrailingZeros(Mask);
1272 
1273  // The amount of shift we're trying to fit into the addressing mode is taken
1274  // from the trailing zeros of the mask.
1275  unsigned AMShiftAmt = MaskTZ;
1276 
1277  // There is nothing we can do here unless the mask is removing some bits.
1278  // Also, the addressing mode can only represent shifts of 1, 2, or 3 bits.
1279  if (AMShiftAmt <= 0 || AMShiftAmt > 3) return true;
1280 
1281  // We also need to ensure that mask is a continuous run of bits.
1282  if (countTrailingOnes(Mask >> MaskTZ) + MaskTZ + MaskLZ != 64) return true;
1283 
1284  // Scale the leading zero count down based on the actual size of the value.
1285  // Also scale it down based on the size of the shift.
1286  unsigned ScaleDown = (64 - X.getSimpleValueType().getSizeInBits()) + ShiftAmt;
1287  if (MaskLZ < ScaleDown)
1288  return true;
1289  MaskLZ -= ScaleDown;
1290 
1291  // The final check is to ensure that any masked out high bits of X are
1292  // already known to be zero. Otherwise, the mask has a semantic impact
1293  // other than masking out a couple of low bits. Unfortunately, because of
1294  // the mask, zero extensions will be removed from operands in some cases.
1295  // This code works extra hard to look through extensions because we can
1296  // replace them with zero extensions cheaply if necessary.
1297  bool ReplacingAnyExtend = false;
1298  if (X.getOpcode() == ISD::ANY_EXTEND) {
1299  unsigned ExtendBits = X.getSimpleValueType().getSizeInBits() -
1301  // Assume that we'll replace the any-extend with a zero-extend, and
1302  // narrow the search to the extended value.
1303  X = X.getOperand(0);
1304  MaskLZ = ExtendBits > MaskLZ ? 0 : MaskLZ - ExtendBits;
1305  ReplacingAnyExtend = true;
1306  }
1307  APInt MaskedHighBits =
1309  KnownBits Known;
1310  DAG.computeKnownBits(X, Known);
1311  if (MaskedHighBits != Known.Zero) return true;
1312 
1313  // We've identified a pattern that can be transformed into a single shift
1314  // and an addressing mode. Make it so.
1315  MVT VT = N.getSimpleValueType();
1316  if (ReplacingAnyExtend) {
1317  assert(X.getValueType() != VT);
1318  // We looked through an ANY_EXTEND node, insert a ZERO_EXTEND.
1319  SDValue NewX = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(X), VT, X);
1320  insertDAGNode(DAG, N, NewX);
1321  X = NewX;
1322  }
1323  SDLoc DL(N);
1324  SDValue NewSRLAmt = DAG.getConstant(ShiftAmt + AMShiftAmt, DL, MVT::i8);
1325  SDValue NewSRL = DAG.getNode(ISD::SRL, DL, VT, X, NewSRLAmt);
1326  SDValue NewSHLAmt = DAG.getConstant(AMShiftAmt, DL, MVT::i8);
1327  SDValue NewSHL = DAG.getNode(ISD::SHL, DL, VT, NewSRL, NewSHLAmt);
1328 
1329  // Insert the new nodes into the topological ordering. We must do this in
1330  // a valid topological ordering as nothing is going to go back and re-sort
1331  // these nodes. We continually insert before 'N' in sequence as this is
1332  // essentially a pre-flattened and pre-sorted sequence of nodes. There is no
1333  // hierarchy left to express.
1334  insertDAGNode(DAG, N, NewSRLAmt);
1335  insertDAGNode(DAG, N, NewSRL);
1336  insertDAGNode(DAG, N, NewSHLAmt);
1337  insertDAGNode(DAG, N, NewSHL);
1338  DAG.ReplaceAllUsesWith(N, NewSHL);
1339 
1340  AM.Scale = 1 << AMShiftAmt;
1341  AM.IndexReg = NewSRL;
1342  return false;
1343 }
1344 
1345 bool X86DAGToDAGISel::matchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
1346  unsigned Depth) {
1347  SDLoc dl(N);
1348  LLVM_DEBUG({
1349  dbgs() << "MatchAddress: ";
1350  AM.dump(CurDAG);
1351  });
1352  // Limit recursion.
1353  if (Depth > 5)
1354  return matchAddressBase(N, AM);
1355 
1356  // If this is already a %rip relative address, we can only merge immediates
1357  // into it. Instead of handling this in every case, we handle it here.
1358  // RIP relative addressing: %rip + 32-bit displacement!
1359  if (AM.isRIPRelative()) {
1360  // FIXME: JumpTable and ExternalSymbol address currently don't like
1361  // displacements. It isn't very important, but this should be fixed for
1362  // consistency.
1363  if (!(AM.ES || AM.MCSym) && AM.JT != -1)
1364  return true;
1365 
1366  if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(N))
1367  if (!foldOffsetIntoAddress(Cst->getSExtValue(), AM))
1368  return false;
1369  return true;
1370  }
1371 
1372  switch (N.getOpcode()) {
1373  default: break;
1374  case ISD::LOCAL_RECOVER: {
1375  if (!AM.hasSymbolicDisplacement() && AM.Disp == 0)
1376  if (const auto *ESNode = dyn_cast<MCSymbolSDNode>(N.getOperand(0))) {
1377  // Use the symbol and don't prefix it.
1378  AM.MCSym = ESNode->getMCSymbol();
1379  return false;
1380  }
1381  break;
1382  }
1383  case ISD::Constant: {
1384  uint64_t Val = cast<ConstantSDNode>(N)->getSExtValue();
1385  if (!foldOffsetIntoAddress(Val, AM))
1386  return false;
1387  break;
1388  }
1389 
1390  case X86ISD::Wrapper:
1391  case X86ISD::WrapperRIP:
1392  if (!matchWrapper(N, AM))
1393  return false;
1394  break;
1395 
1396  case ISD::LOAD:
1397  if (!matchLoadInAddress(cast<LoadSDNode>(N), AM))
1398  return false;
1399  break;
1400 
1401  case ISD::FrameIndex:
1402  if (AM.BaseType == X86ISelAddressMode::RegBase &&
1403  AM.Base_Reg.getNode() == nullptr &&
1404  (!Subtarget->is64Bit() || isDispSafeForFrameIndex(AM.Disp))) {
1405  AM.BaseType = X86ISelAddressMode::FrameIndexBase;
1406  AM.Base_FrameIndex = cast<FrameIndexSDNode>(N)->getIndex();
1407  return false;
1408  }
1409  break;
1410 
1411  case ISD::SHL:
1412  if (AM.IndexReg.getNode() != nullptr || AM.Scale != 1)
1413  break;
1414 
1415  if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1416  unsigned Val = CN->getZExtValue();
1417  // Note that we handle x<<1 as (,x,2) rather than (x,x) here so
1418  // that the base operand remains free for further matching. If
1419  // the base doesn't end up getting used, a post-processing step
1420  // in MatchAddress turns (,x,2) into (x,x), which is cheaper.
1421  if (Val == 1 || Val == 2 || Val == 3) {
1422  AM.Scale = 1 << Val;
1423  SDValue ShVal = N.getOperand(0);
1424 
1425  // Okay, we know that we have a scale by now. However, if the scaled
1426  // value is an add of something and a constant, we can fold the
1427  // constant into the disp field here.
1428  if (CurDAG->isBaseWithConstantOffset(ShVal)) {
1429  AM.IndexReg = ShVal.getOperand(0);
1430  ConstantSDNode *AddVal = cast<ConstantSDNode>(ShVal.getOperand(1));
1431  uint64_t Disp = (uint64_t)AddVal->getSExtValue() << Val;
1432  if (!foldOffsetIntoAddress(Disp, AM))
1433  return false;
1434  }
1435 
1436  AM.IndexReg = ShVal;
1437  return false;
1438  }
1439  }
1440  break;
1441 
1442  case ISD::SRL: {
1443  // Scale must not be used already.
1444  if (AM.IndexReg.getNode() != nullptr || AM.Scale != 1) break;
1445 
1446  SDValue And = N.getOperand(0);
1447  if (And.getOpcode() != ISD::AND) break;
1448  SDValue X = And.getOperand(0);
1449 
1450  // We only handle up to 64-bit values here as those are what matter for
1451  // addressing mode optimizations.
1452  if (X.getSimpleValueType().getSizeInBits() > 64) break;
1453 
1454  // The mask used for the transform is expected to be post-shift, but we
1455  // found the shift first so just apply the shift to the mask before passing
1456  // it down.
1457  if (!isa<ConstantSDNode>(N.getOperand(1)) ||
1458  !isa<ConstantSDNode>(And.getOperand(1)))
1459  break;
1460  uint64_t Mask = And.getConstantOperandVal(1) >> N.getConstantOperandVal(1);
1461 
1462  // Try to fold the mask and shift into the scale, and return false if we
1463  // succeed.
1464  if (!foldMaskAndShiftToScale(*CurDAG, N, Mask, N, X, AM))
1465  return false;
1466  break;
1467  }
1468 
1469  case ISD::SMUL_LOHI:
1470  case ISD::UMUL_LOHI:
1471  // A mul_lohi where we need the low part can be folded as a plain multiply.
1472  if (N.getResNo() != 0) break;
1474  case ISD::MUL:
1475  case X86ISD::MUL_IMM:
1476  // X*[3,5,9] -> X+X*[2,4,8]
1477  if (AM.BaseType == X86ISelAddressMode::RegBase &&
1478  AM.Base_Reg.getNode() == nullptr &&
1479  AM.IndexReg.getNode() == nullptr) {
1480  if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N.getOperand(1)))
1481  if (CN->getZExtValue() == 3 || CN->getZExtValue() == 5 ||
1482  CN->getZExtValue() == 9) {
1483  AM.Scale = unsigned(CN->getZExtValue())-1;
1484 
1485  SDValue MulVal = N.getOperand(0);
1486  SDValue Reg;
1487 
1488  // Okay, we know that we have a scale by now. However, if the scaled
1489  // value is an add of something and a constant, we can fold the
1490  // constant into the disp field here.
1491  if (MulVal.getNode()->getOpcode() == ISD::ADD && MulVal.hasOneUse() &&
1492  isa<ConstantSDNode>(MulVal.getOperand(1))) {
1493  Reg = MulVal.getOperand(0);
1494  ConstantSDNode *AddVal =
1495  cast<ConstantSDNode>(MulVal.getOperand(1));
1496  uint64_t Disp = AddVal->getSExtValue() * CN->getZExtValue();
1497  if (foldOffsetIntoAddress(Disp, AM))
1498  Reg = N.getOperand(0);
1499  } else {
1500  Reg = N.getOperand(0);
1501  }
1502 
1503  AM.IndexReg = AM.Base_Reg = Reg;
1504  return false;
1505  }
1506  }
1507  break;
1508 
1509  case ISD::SUB: {
1510  // Given A-B, if A can be completely folded into the address and
1511  // the index field with the index field unused, use -B as the index.
1512  // This is a win if a has multiple parts that can be folded into
1513  // the address. Also, this saves a mov if the base register has
1514  // other uses, since it avoids a two-address sub instruction, however
1515  // it costs an additional mov if the index register has other uses.
1516 
1517  // Add an artificial use to this node so that we can keep track of
1518  // it if it gets CSE'd with a different node.
1519  HandleSDNode Handle(N);
1520 
1521  // Test if the LHS of the sub can be folded.
1522  X86ISelAddressMode Backup = AM;
1523  if (matchAddressRecursively(N.getOperand(0), AM, Depth+1)) {
1524  AM = Backup;
1525  break;
1526  }
1527  // Test if the index field is free for use.
1528  if (AM.IndexReg.getNode() || AM.isRIPRelative()) {
1529  AM = Backup;
1530  break;
1531  }
1532 
1533  int Cost = 0;
1534  SDValue RHS = Handle.getValue().getOperand(1);
1535  // If the RHS involves a register with multiple uses, this
1536  // transformation incurs an extra mov, due to the neg instruction
1537  // clobbering its operand.
1538  if (!RHS.getNode()->hasOneUse() ||
1539  RHS.getNode()->getOpcode() == ISD::CopyFromReg ||
1540  RHS.getNode()->getOpcode() == ISD::TRUNCATE ||
1541  RHS.getNode()->getOpcode() == ISD::ANY_EXTEND ||
1542  (RHS.getNode()->getOpcode() == ISD::ZERO_EXTEND &&
1543  RHS.getOperand(0).getValueType() == MVT::i32))
1544  ++Cost;
1545  // If the base is a register with multiple uses, this
1546  // transformation may save a mov.
1547  // FIXME: Don't rely on DELETED_NODEs.
1548  if ((AM.BaseType == X86ISelAddressMode::RegBase && AM.Base_Reg.getNode() &&
1549  AM.Base_Reg->getOpcode() != ISD::DELETED_NODE &&
1550  !AM.Base_Reg.getNode()->hasOneUse()) ||
1551  AM.BaseType == X86ISelAddressMode::FrameIndexBase)
1552  --Cost;
1553  // If the folded LHS was interesting, this transformation saves
1554  // address arithmetic.
1555  if ((AM.hasSymbolicDisplacement() && !Backup.hasSymbolicDisplacement()) +
1556  ((AM.Disp != 0) && (Backup.Disp == 0)) +
1557  (AM.Segment.getNode() && !Backup.Segment.getNode()) >= 2)
1558  --Cost;
1559  // If it doesn't look like it may be an overall win, don't do it.
1560  if (Cost >= 0) {
1561  AM = Backup;
1562  break;
1563  }
1564 
1565  // Ok, the transformation is legal and appears profitable. Go for it.
1566  SDValue Zero = CurDAG->getConstant(0, dl, N.getValueType());
1567  SDValue Neg = CurDAG->getNode(ISD::SUB, dl, N.getValueType(), Zero, RHS);
1568  AM.IndexReg = Neg;
1569  AM.Scale = 1;
1570 
1571  // Insert the new nodes into the topological ordering.
1572  insertDAGNode(*CurDAG, Handle.getValue(), Zero);
1573  insertDAGNode(*CurDAG, Handle.getValue(), Neg);
1574  return false;
1575  }
1576 
1577  case ISD::ADD:
1578  if (!matchAdd(N, AM, Depth))
1579  return false;
1580  break;
1581 
1582  case ISD::OR:
1583  // We want to look through a transform in InstCombine and DAGCombiner that
1584  // turns 'add' into 'or', so we can treat this 'or' exactly like an 'add'.
1585  // Example: (or (and x, 1), (shl y, 3)) --> (add (and x, 1), (shl y, 3))
1586  // An 'lea' can then be used to match the shift (multiply) and add:
1587  // and $1, %esi
1588  // lea (%rsi, %rdi, 8), %rax
1589  if (CurDAG->haveNoCommonBitsSet(N.getOperand(0), N.getOperand(1)) &&
1590  !matchAdd(N, AM, Depth))
1591  return false;
1592  break;
1593 
1594  case ISD::AND: {
1595  // Perform some heroic transforms on an and of a constant-count shift
1596  // with a constant to enable use of the scaled offset field.
1597 
1598  // Scale must not be used already.
1599  if (AM.IndexReg.getNode() != nullptr || AM.Scale != 1) break;
1600 
1601  SDValue Shift = N.getOperand(0);
1602  if (Shift.getOpcode() != ISD::SRL && Shift.getOpcode() != ISD::SHL) break;
1603  SDValue X = Shift.getOperand(0);
1604 
1605  // We only handle up to 64-bit values here as those are what matter for
1606  // addressing mode optimizations.
1607  if (X.getSimpleValueType().getSizeInBits() > 64) break;
1608 
1609  if (!isa<ConstantSDNode>(N.getOperand(1)))
1610  break;
1611  uint64_t Mask = N.getConstantOperandVal(1);
1612 
1613  // Try to fold the mask and shift into an extract and scale.
1614  if (!foldMaskAndShiftToExtract(*CurDAG, N, Mask, Shift, X, AM))
1615  return false;
1616 
1617  // Try to fold the mask and shift directly into the scale.
1618  if (!foldMaskAndShiftToScale(*CurDAG, N, Mask, Shift, X, AM))
1619  return false;
1620 
1621  // Try to swap the mask and shift to place shifts which can be done as
1622  // a scale on the outside of the mask.
1623  if (!foldMaskedShiftToScaledMask(*CurDAG, N, Mask, Shift, X, AM))
1624  return false;
1625  break;
1626  }
1627  }
1628 
1629  return matchAddressBase(N, AM);
1630 }
1631 
1632 /// Helper for MatchAddress. Add the specified node to the
1633 /// specified addressing mode without any further recursion.
1634 bool X86DAGToDAGISel::matchAddressBase(SDValue N, X86ISelAddressMode &AM) {
1635  // Is the base register already occupied?
1636  if (AM.BaseType != X86ISelAddressMode::RegBase || AM.Base_Reg.getNode()) {
1637  // If so, check to see if the scale index register is set.
1638  if (!AM.IndexReg.getNode()) {
1639  AM.IndexReg = N;
1640  AM.Scale = 1;
1641  return false;
1642  }
1643 
1644  // Otherwise, we cannot select it.
1645  return true;
1646  }
1647 
1648  // Default, generate it as a register.
1649  AM.BaseType = X86ISelAddressMode::RegBase;
1650  AM.Base_Reg = N;
1651  return false;
1652 }
1653 
1654 /// Helper for selectVectorAddr. Handles things that can be folded into a
1655 /// gather scatter address. The index register and scale should have already
1656 /// been handled.
1657 bool X86DAGToDAGISel::matchVectorAddress(SDValue N, X86ISelAddressMode &AM) {
1658  // TODO: Support other operations.
1659  switch (N.getOpcode()) {
1660  case ISD::Constant: {
1661  uint64_t Val = cast<ConstantSDNode>(N)->getSExtValue();
1662  if (!foldOffsetIntoAddress(Val, AM))
1663  return false;
1664  break;
1665  }
1666  case X86ISD::Wrapper:
1667  if (!matchWrapper(N, AM))
1668  return false;
1669  break;
1670  }
1671 
1672  return matchAddressBase(N, AM);
1673 }
1674 
1675 bool X86DAGToDAGISel::selectVectorAddr(SDNode *Parent, SDValue N, SDValue &Base,
1676  SDValue &Scale, SDValue &Index,
1677  SDValue &Disp, SDValue &Segment) {
1678  X86ISelAddressMode AM;
1679  auto *Mgs = cast<X86MaskedGatherScatterSDNode>(Parent);
1680  AM.IndexReg = Mgs->getIndex();
1681  AM.Scale = cast<ConstantSDNode>(Mgs->getScale())->getZExtValue();
1682 
1683  unsigned AddrSpace = cast<MemSDNode>(Parent)->getPointerInfo().getAddrSpace();
1684  // AddrSpace 256 -> GS, 257 -> FS, 258 -> SS.
1685  if (AddrSpace == 256)
1686  AM.Segment = CurDAG->getRegister(X86::GS, MVT::i16);
1687  if (AddrSpace == 257)
1688  AM.Segment = CurDAG->getRegister(X86::FS, MVT::i16);
1689  if (AddrSpace == 258)
1690  AM.Segment = CurDAG->getRegister(X86::SS, MVT::i16);
1691 
1692  // Try to match into the base and displacement fields.
1693  if (matchVectorAddress(N, AM))
1694  return false;
1695 
1696  MVT VT = N.getSimpleValueType();
1697  if (AM.BaseType == X86ISelAddressMode::RegBase) {
1698  if (!AM.Base_Reg.getNode())
1699  AM.Base_Reg = CurDAG->getRegister(0, VT);
1700  }
1701 
1702  getAddressOperands(AM, SDLoc(N), Base, Scale, Index, Disp, Segment);
1703  return true;
1704 }
1705 
1706 /// Returns true if it is able to pattern match an addressing mode.
1707 /// It returns the operands which make up the maximal addressing mode it can
1708 /// match by reference.
1709 ///
1710 /// Parent is the parent node of the addr operand that is being matched. It
1711 /// is always a load, store, atomic node, or null. It is only null when
1712 /// checking memory operands for inline asm nodes.
1713 bool X86DAGToDAGISel::selectAddr(SDNode *Parent, SDValue N, SDValue &Base,
1714  SDValue &Scale, SDValue &Index,
1715  SDValue &Disp, SDValue &Segment) {
1716  X86ISelAddressMode AM;
1717 
1718  if (Parent &&
1719  // This list of opcodes are all the nodes that have an "addr:$ptr" operand
1720  // that are not a MemSDNode, and thus don't have proper addrspace info.
1721  Parent->getOpcode() != ISD::INTRINSIC_W_CHAIN && // unaligned loads, fixme
1722  Parent->getOpcode() != ISD::INTRINSIC_VOID && // nontemporal stores
1723  Parent->getOpcode() != X86ISD::TLSCALL && // Fixme
1724  Parent->getOpcode() != X86ISD::EH_SJLJ_SETJMP && // setjmp
1725  Parent->getOpcode() != X86ISD::EH_SJLJ_LONGJMP) { // longjmp
1726  unsigned AddrSpace =
1727  cast<MemSDNode>(Parent)->getPointerInfo().getAddrSpace();
1728  // AddrSpace 256 -> GS, 257 -> FS, 258 -> SS.
1729  if (AddrSpace == 256)
1730  AM.Segment = CurDAG->getRegister(X86::GS, MVT::i16);
1731  if (AddrSpace == 257)
1732  AM.Segment = CurDAG->getRegister(X86::FS, MVT::i16);
1733  if (AddrSpace == 258)
1734  AM.Segment = CurDAG->getRegister(X86::SS, MVT::i16);
1735  }
1736 
1737  if (matchAddress(N, AM))
1738  return false;
1739 
1740  MVT VT = N.getSimpleValueType();
1741  if (AM.BaseType == X86ISelAddressMode::RegBase) {
1742  if (!AM.Base_Reg.getNode())
1743  AM.Base_Reg = CurDAG->getRegister(0, VT);
1744  }
1745 
1746  if (!AM.IndexReg.getNode())
1747  AM.IndexReg = CurDAG->getRegister(0, VT);
1748 
1749  getAddressOperands(AM, SDLoc(N), Base, Scale, Index, Disp, Segment);
1750  return true;
1751 }
1752 
1753 // We can only fold a load if all nodes between it and the root node have a
1754 // single use. If there are additional uses, we could end up duplicating the
1755 // load.
1756 static bool hasSingleUsesFromRoot(SDNode *Root, SDNode *User) {
1757  while (User != Root) {
1758  if (!User->hasOneUse())
1759  return false;
1760  User = *User->use_begin();
1761  }
1762 
1763  return true;
1764 }
1765 
1766 /// Match a scalar SSE load. In particular, we want to match a load whose top
1767 /// elements are either undef or zeros. The load flavor is derived from the
1768 /// type of N, which is either v4f32 or v2f64.
1769 ///
1770 /// We also return:
1771 /// PatternChainNode: this is the matched node that has a chain input and
1772 /// output.
1773 bool X86DAGToDAGISel::selectScalarSSELoad(SDNode *Root, SDNode *Parent,
1774  SDValue N, SDValue &Base,
1775  SDValue &Scale, SDValue &Index,
1776  SDValue &Disp, SDValue &Segment,
1777  SDValue &PatternNodeWithChain) {
1778  if (!hasSingleUsesFromRoot(Root, Parent))
1779  return false;
1780 
1781  // We can allow a full vector load here since narrowing a load is ok.
1782  if (ISD::isNON_EXTLoad(N.getNode())) {
1783  PatternNodeWithChain = N;
1784  if (IsProfitableToFold(PatternNodeWithChain, N.getNode(), Root) &&
1785  IsLegalToFold(PatternNodeWithChain, Parent, Root, OptLevel)) {
1786  LoadSDNode *LD = cast<LoadSDNode>(PatternNodeWithChain);
1787  return selectAddr(LD, LD->getBasePtr(), Base, Scale, Index, Disp,
1788  Segment);
1789  }
1790  }
1791 
1792  // We can also match the special zero extended load opcode.
1793  if (N.getOpcode() == X86ISD::VZEXT_LOAD) {
1794  PatternNodeWithChain = N;
1795  if (IsProfitableToFold(PatternNodeWithChain, N.getNode(), Root) &&
1796  IsLegalToFold(PatternNodeWithChain, Parent, Root, OptLevel)) {
1797  auto *MI = cast<MemIntrinsicSDNode>(PatternNodeWithChain);
1798  return selectAddr(MI, MI->getBasePtr(), Base, Scale, Index, Disp,
1799  Segment);
1800  }
1801  }
1802 
1803  // Need to make sure that the SCALAR_TO_VECTOR and load are both only used
1804  // once. Otherwise the load might get duplicated and the chain output of the
1805  // duplicate load will not be observed by all dependencies.
1806  if (N.getOpcode() == ISD::SCALAR_TO_VECTOR && N.getNode()->hasOneUse()) {
1807  PatternNodeWithChain = N.getOperand(0);
1808  if (ISD::isNON_EXTLoad(PatternNodeWithChain.getNode()) &&
1809  IsProfitableToFold(PatternNodeWithChain, N.getNode(), Root) &&
1810  IsLegalToFold(PatternNodeWithChain, N.getNode(), Root, OptLevel)) {
1811  LoadSDNode *LD = cast<LoadSDNode>(PatternNodeWithChain);
1812  return selectAddr(LD, LD->getBasePtr(), Base, Scale, Index, Disp,
1813  Segment);
1814  }
1815  }
1816 
1817  // Also handle the case where we explicitly require zeros in the top
1818  // elements. This is a vector shuffle from the zero vector.
1819  if (N.getOpcode() == X86ISD::VZEXT_MOVL && N.getNode()->hasOneUse() &&
1820  // Check to see if the top elements are all zeros (or bitcast of zeros).
1822  N.getOperand(0).getNode()->hasOneUse()) {
1823  PatternNodeWithChain = N.getOperand(0).getOperand(0);
1824  if (ISD::isNON_EXTLoad(PatternNodeWithChain.getNode()) &&
1825  IsProfitableToFold(PatternNodeWithChain, N.getNode(), Root) &&
1826  IsLegalToFold(PatternNodeWithChain, N.getNode(), Root, OptLevel)) {
1827  // Okay, this is a zero extending load. Fold it.
1828  LoadSDNode *LD = cast<LoadSDNode>(PatternNodeWithChain);
1829  return selectAddr(LD, LD->getBasePtr(), Base, Scale, Index, Disp,
1830  Segment);
1831  }
1832  }
1833 
1834  return false;
1835 }
1836 
1837 
1838 bool X86DAGToDAGISel::selectMOV64Imm32(SDValue N, SDValue &Imm) {
1839  if (const ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) {
1840  uint64_t ImmVal = CN->getZExtValue();
1841  if (!isUInt<32>(ImmVal))
1842  return false;
1843 
1844  Imm = CurDAG->getTargetConstant(ImmVal, SDLoc(N), MVT::i64);
1845  return true;
1846  }
1847 
1848  // In static codegen with small code model, we can get the address of a label
1849  // into a register with 'movl'
1850  if (N->getOpcode() != X86ISD::Wrapper)
1851  return false;
1852 
1853  N = N.getOperand(0);
1854 
1855  // At least GNU as does not accept 'movl' for TPOFF relocations.
1856  // FIXME: We could use 'movl' when we know we are targeting MC.
1858  return false;
1859 
1860  Imm = N;
1861  if (N->getOpcode() != ISD::TargetGlobalAddress)
1862  return TM.getCodeModel() == CodeModel::Small;
1863 
1865  cast<GlobalAddressSDNode>(N)->getGlobal()->getAbsoluteSymbolRange();
1866  if (!CR)
1867  return TM.getCodeModel() == CodeModel::Small;
1868 
1869  return CR->getUnsignedMax().ult(1ull << 32);
1870 }
1871 
1872 bool X86DAGToDAGISel::selectLEA64_32Addr(SDValue N, SDValue &Base,
1873  SDValue &Scale, SDValue &Index,
1874  SDValue &Disp, SDValue &Segment) {
1875  // Save the debug loc before calling selectLEAAddr, in case it invalidates N.
1876  SDLoc DL(N);
1877 
1878  if (!selectLEAAddr(N, Base, Scale, Index, Disp, Segment))
1879  return false;
1880 
1882  if (RN && RN->getReg() == 0)
1883  Base = CurDAG->getRegister(0, MVT::i64);
1884  else if (Base.getValueType() == MVT::i32 && !dyn_cast<FrameIndexSDNode>(Base)) {
1885  // Base could already be %rip, particularly in the x32 ABI.
1886  Base = SDValue(CurDAG->getMachineNode(
1887  TargetOpcode::SUBREG_TO_REG, DL, MVT::i64,
1888  CurDAG->getTargetConstant(0, DL, MVT::i64),
1889  Base,
1890  CurDAG->getTargetConstant(X86::sub_32bit, DL, MVT::i32)),
1891  0);
1892  }
1893 
1894  RN = dyn_cast<RegisterSDNode>(Index);
1895  if (RN && RN->getReg() == 0)
1896  Index = CurDAG->getRegister(0, MVT::i64);
1897  else {
1898  assert(Index.getValueType() == MVT::i32 &&
1899  "Expect to be extending 32-bit registers for use in LEA");
1900  Index = SDValue(CurDAG->getMachineNode(
1901  TargetOpcode::SUBREG_TO_REG, DL, MVT::i64,
1902  CurDAG->getTargetConstant(0, DL, MVT::i64),
1903  Index,
1904  CurDAG->getTargetConstant(X86::sub_32bit, DL,
1905  MVT::i32)),
1906  0);
1907  }
1908 
1909  return true;
1910 }
1911 
1912 /// Calls SelectAddr and determines if the maximal addressing
1913 /// mode it matches can be cost effectively emitted as an LEA instruction.
1914 bool X86DAGToDAGISel::selectLEAAddr(SDValue N,
1915  SDValue &Base, SDValue &Scale,
1916  SDValue &Index, SDValue &Disp,
1917  SDValue &Segment) {
1918  X86ISelAddressMode AM;
1919 
1920  // Save the DL and VT before calling matchAddress, it can invalidate N.
1921  SDLoc DL(N);
1922  MVT VT = N.getSimpleValueType();
1923 
1924  // Set AM.Segment to prevent MatchAddress from using one. LEA doesn't support
1925  // segments.
1926  SDValue Copy = AM.Segment;
1927  SDValue T = CurDAG->getRegister(0, MVT::i32);
1928  AM.Segment = T;
1929  if (matchAddress(N, AM))
1930  return false;
1931  assert (T == AM.Segment);
1932  AM.Segment = Copy;
1933 
1934  unsigned Complexity = 0;
1935  if (AM.BaseType == X86ISelAddressMode::RegBase)
1936  if (AM.Base_Reg.getNode())
1937  Complexity = 1;
1938  else
1939  AM.Base_Reg = CurDAG->getRegister(0, VT);
1940  else if (AM.BaseType == X86ISelAddressMode::FrameIndexBase)
1941  Complexity = 4;
1942 
1943  if (AM.IndexReg.getNode())
1944  Complexity++;
1945  else
1946  AM.IndexReg = CurDAG->getRegister(0, VT);
1947 
1948  // Don't match just leal(,%reg,2). It's cheaper to do addl %reg, %reg, or with
1949  // a simple shift.
1950  if (AM.Scale > 1)
1951  Complexity++;
1952 
1953  // FIXME: We are artificially lowering the criteria to turn ADD %reg, $GA
1954  // to a LEA. This is determined with some experimentation but is by no means
1955  // optimal (especially for code size consideration). LEA is nice because of
1956  // its three-address nature. Tweak the cost function again when we can run
1957  // convertToThreeAddress() at register allocation time.
1958  if (AM.hasSymbolicDisplacement()) {
1959  // For X86-64, always use LEA to materialize RIP-relative addresses.
1960  if (Subtarget->is64Bit())
1961  Complexity = 4;
1962  else
1963  Complexity += 2;
1964  }
1965 
1966  if (AM.Disp && (AM.Base_Reg.getNode() || AM.IndexReg.getNode()))
1967  Complexity++;
1968 
1969  // If it isn't worth using an LEA, reject it.
1970  if (Complexity <= 2)
1971  return false;
1972 
1973  getAddressOperands(AM, DL, Base, Scale, Index, Disp, Segment);
1974  return true;
1975 }
1976 
1977 /// This is only run on TargetGlobalTLSAddress nodes.
1978 bool X86DAGToDAGISel::selectTLSADDRAddr(SDValue N, SDValue &Base,
1979  SDValue &Scale, SDValue &Index,
1980  SDValue &Disp, SDValue &Segment) {
1982  const GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(N);
1983 
1984  X86ISelAddressMode AM;
1985  AM.GV = GA->getGlobal();
1986  AM.Disp += GA->getOffset();
1987  AM.Base_Reg = CurDAG->getRegister(0, N.getValueType());
1988  AM.SymbolFlags = GA->getTargetFlags();
1989 
1990  if (N.getValueType() == MVT::i32) {
1991  AM.Scale = 1;
1992  AM.IndexReg = CurDAG->getRegister(X86::EBX, MVT::i32);
1993  } else {
1994  AM.IndexReg = CurDAG->getRegister(0, MVT::i64);
1995  }
1996 
1997  getAddressOperands(AM, SDLoc(N), Base, Scale, Index, Disp, Segment);
1998  return true;
1999 }
2000 
2001 bool X86DAGToDAGISel::selectRelocImm(SDValue N, SDValue &Op) {
2002  if (auto *CN = dyn_cast<ConstantSDNode>(N)) {
2003  Op = CurDAG->getTargetConstant(CN->getAPIntValue(), SDLoc(CN),
2004  N.getValueType());
2005  return true;
2006  }
2007 
2008  // Keep track of the original value type and whether this value was
2009  // truncated. If we see a truncation from pointer type to VT that truncates
2010  // bits that are known to be zero, we can use a narrow reference.
2011  EVT VT = N.getValueType();
2012  bool WasTruncated = false;
2013  if (N.getOpcode() == ISD::TRUNCATE) {
2014  WasTruncated = true;
2015  N = N.getOperand(0);
2016  }
2017 
2018  if (N.getOpcode() != X86ISD::Wrapper)
2019  return false;
2020 
2021  // We can only use non-GlobalValues as immediates if they were not truncated,
2022  // as we do not have any range information. If we have a GlobalValue and the
2023  // address was not truncated, we can select it as an operand directly.
2024  unsigned Opc = N.getOperand(0)->getOpcode();
2025  if (Opc != ISD::TargetGlobalAddress || !WasTruncated) {
2026  Op = N.getOperand(0);
2027  // We can only select the operand directly if we didn't have to look past a
2028  // truncate.
2029  return !WasTruncated;
2030  }
2031 
2032  // Check that the global's range fits into VT.
2033  auto *GA = cast<GlobalAddressSDNode>(N.getOperand(0));
2034  Optional<ConstantRange> CR = GA->getGlobal()->getAbsoluteSymbolRange();
2035  if (!CR || CR->getUnsignedMax().uge(1ull << VT.getSizeInBits()))
2036  return false;
2037 
2038  // Okay, we can use a narrow reference.
2039  Op = CurDAG->getTargetGlobalAddress(GA->getGlobal(), SDLoc(N), VT,
2040  GA->getOffset(), GA->getTargetFlags());
2041  return true;
2042 }
2043 
2044 bool X86DAGToDAGISel::tryFoldLoad(SDNode *Root, SDNode *P, SDValue N,
2045  SDValue &Base, SDValue &Scale,
2046  SDValue &Index, SDValue &Disp,
2047  SDValue &Segment) {
2048  if (!ISD::isNON_EXTLoad(N.getNode()) ||
2049  !IsProfitableToFold(N, P, Root) ||
2050  !IsLegalToFold(N, P, Root, OptLevel))
2051  return false;
2052 
2053  return selectAddr(N.getNode(),
2054  N.getOperand(1), Base, Scale, Index, Disp, Segment);
2055 }
2056 
2057 bool X86DAGToDAGISel::tryFoldVecLoad(SDNode *Root, SDNode *P, SDValue N,
2058  SDValue &Base, SDValue &Scale,
2059  SDValue &Index, SDValue &Disp,
2060  SDValue &Segment) {
2061  if (!ISD::isNON_EXTLoad(N.getNode()) ||
2062  useNonTemporalLoad(cast<LoadSDNode>(N)) ||
2063  !IsProfitableToFold(N, P, Root) ||
2064  !IsLegalToFold(N, P, Root, OptLevel))
2065  return false;
2066 
2067  return selectAddr(N.getNode(),
2068  N.getOperand(1), Base, Scale, Index, Disp, Segment);
2069 }
2070 
2071 /// Return an SDNode that returns the value of the global base register.
2072 /// Output instructions required to initialize the global base register,
2073 /// if necessary.
2074 SDNode *X86DAGToDAGISel::getGlobalBaseReg() {
2075  unsigned GlobalBaseReg = getInstrInfo()->getGlobalBaseReg(MF);
2076  auto &DL = MF->getDataLayout();
2077  return CurDAG->getRegister(GlobalBaseReg, TLI->getPointerTy(DL)).getNode();
2078 }
2079 
2080 bool X86DAGToDAGISel::isSExtAbsoluteSymbolRef(unsigned Width, SDNode *N) const {
2081  if (N->getOpcode() == ISD::TRUNCATE)
2082  N = N->getOperand(0).getNode();
2083  if (N->getOpcode() != X86ISD::Wrapper)
2084  return false;
2085 
2086  auto *GA = dyn_cast<GlobalAddressSDNode>(N->getOperand(0));
2087  if (!GA)
2088  return false;
2089 
2090  Optional<ConstantRange> CR = GA->getGlobal()->getAbsoluteSymbolRange();
2091  return CR && CR->getSignedMin().sge(-1ull << Width) &&
2092  CR->getSignedMax().slt(1ull << Width);
2093 }
2094 
2095 /// Test whether the given X86ISD::CMP node has any uses which require the SF
2096 /// or OF bits to be accurate.
2098  // Examine each user of the node.
2099  for (SDNode::use_iterator UI = N->use_begin(),
2100  UE = N->use_end(); UI != UE; ++UI) {
2101  // Only examine CopyToReg uses.
2102  if (UI->getOpcode() != ISD::CopyToReg)
2103  return false;
2104  // Only examine CopyToReg uses that copy to EFLAGS.
2105  if (cast<RegisterSDNode>(UI->getOperand(1))->getReg() !=
2106  X86::EFLAGS)
2107  return false;
2108  // Examine each user of the CopyToReg use.
2109  for (SDNode::use_iterator FlagUI = UI->use_begin(),
2110  FlagUE = UI->use_end(); FlagUI != FlagUE; ++FlagUI) {
2111  // Only examine the Flag result.
2112  if (FlagUI.getUse().getResNo() != 1) continue;
2113  // Anything unusual: assume conservatively.
2114  if (!FlagUI->isMachineOpcode()) return false;
2115  // Examine the opcode of the user.
2116  switch (FlagUI->getMachineOpcode()) {
2117  // These comparisons don't treat the most significant bit specially.
2118  case X86::SETAr: case X86::SETAEr: case X86::SETBr: case X86::SETBEr:
2119  case X86::SETEr: case X86::SETNEr: case X86::SETPr: case X86::SETNPr:
2120  case X86::SETAm: case X86::SETAEm: case X86::SETBm: case X86::SETBEm:
2121  case X86::SETEm: case X86::SETNEm: case X86::SETPm: case X86::SETNPm:
2122  case X86::JA_1: case X86::JAE_1: case X86::JB_1: case X86::JBE_1:
2123  case X86::JE_1: case X86::JNE_1: case X86::JP_1: case X86::JNP_1:
2124  case X86::CMOVA16rr: case X86::CMOVA16rm:
2125  case X86::CMOVA32rr: case X86::CMOVA32rm:
2126  case X86::CMOVA64rr: case X86::CMOVA64rm:
2127  case X86::CMOVAE16rr: case X86::CMOVAE16rm:
2128  case X86::CMOVAE32rr: case X86::CMOVAE32rm:
2129  case X86::CMOVAE64rr: case X86::CMOVAE64rm:
2130  case X86::CMOVB16rr: case X86::CMOVB16rm:
2131  case X86::CMOVB32rr: case X86::CMOVB32rm:
2132  case X86::CMOVB64rr: case X86::CMOVB64rm:
2133  case X86::CMOVBE16rr: case X86::CMOVBE16rm:
2134  case X86::CMOVBE32rr: case X86::CMOVBE32rm:
2135  case X86::CMOVBE64rr: case X86::CMOVBE64rm:
2136  case X86::CMOVE16rr: case X86::CMOVE16rm:
2137  case X86::CMOVE32rr: case X86::CMOVE32rm:
2138  case X86::CMOVE64rr: case X86::CMOVE64rm:
2139  case X86::CMOVNE16rr: case X86::CMOVNE16rm:
2140  case X86::CMOVNE32rr: case X86::CMOVNE32rm:
2141  case X86::CMOVNE64rr: case X86::CMOVNE64rm:
2142  case X86::CMOVNP16rr: case X86::CMOVNP16rm:
2143  case X86::CMOVNP32rr: case X86::CMOVNP32rm:
2144  case X86::CMOVNP64rr: case X86::CMOVNP64rm:
2145  case X86::CMOVP16rr: case X86::CMOVP16rm:
2146  case X86::CMOVP32rr: case X86::CMOVP32rm:
2147  case X86::CMOVP64rr: case X86::CMOVP64rm:
2148  continue;
2149  // Anything else: assume conservatively.
2150  default: return false;
2151  }
2152  }
2153  }
2154  return true;
2155 }
2156 
2157 /// Test whether the given node which sets flags has any uses which require the
2158 /// CF flag to be accurate.
2159 static bool hasNoCarryFlagUses(SDNode *N) {
2160  // Examine each user of the node.
2161  for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end(); UI != UE;
2162  ++UI) {
2163  // Only check things that use the flags.
2164  if (UI.getUse().getResNo() != 1)
2165  continue;
2166  // Only examine CopyToReg uses.
2167  if (UI->getOpcode() != ISD::CopyToReg)
2168  return false;
2169  // Only examine CopyToReg uses that copy to EFLAGS.
2170  if (cast<RegisterSDNode>(UI->getOperand(1))->getReg() != X86::EFLAGS)
2171  return false;
2172  // Examine each user of the CopyToReg use.
2173  for (SDNode::use_iterator FlagUI = UI->use_begin(), FlagUE = UI->use_end();
2174  FlagUI != FlagUE; ++FlagUI) {
2175  // Only examine the Flag result.
2176  if (FlagUI.getUse().getResNo() != 1)
2177  continue;
2178  // Anything unusual: assume conservatively.
2179  if (!FlagUI->isMachineOpcode())
2180  return false;
2181  // Examine the opcode of the user.
2182  switch (FlagUI->getMachineOpcode()) {
2183  // Comparisons which don't examine the CF flag.
2184  case X86::SETOr: case X86::SETNOr: case X86::SETEr: case X86::SETNEr:
2185  case X86::SETSr: case X86::SETNSr: case X86::SETPr: case X86::SETNPr:
2186  case X86::SETLr: case X86::SETGEr: case X86::SETLEr: case X86::SETGr:
2187  case X86::JO_1: case X86::JNO_1: case X86::JE_1: case X86::JNE_1:
2188  case X86::JS_1: case X86::JNS_1: case X86::JP_1: case X86::JNP_1:
2189  case X86::JL_1: case X86::JGE_1: case X86::JLE_1: case X86::JG_1:
2190  case X86::CMOVO16rr: case X86::CMOVO32rr: case X86::CMOVO64rr:
2191  case X86::CMOVO16rm: case X86::CMOVO32rm: case X86::CMOVO64rm:
2192  case X86::CMOVNO16rr: case X86::CMOVNO32rr: case X86::CMOVNO64rr:
2193  case X86::CMOVNO16rm: case X86::CMOVNO32rm: case X86::CMOVNO64rm:
2194  case X86::CMOVE16rr: case X86::CMOVE32rr: case X86::CMOVE64rr:
2195  case X86::CMOVE16rm: case X86::CMOVE32rm: case X86::CMOVE64rm:
2196  case X86::CMOVNE16rr: case X86::CMOVNE32rr: case X86::CMOVNE64rr:
2197  case X86::CMOVNE16rm: case X86::CMOVNE32rm: case X86::CMOVNE64rm:
2198  case X86::CMOVS16rr: case X86::CMOVS32rr: case X86::CMOVS64rr:
2199  case X86::CMOVS16rm: case X86::CMOVS32rm: case X86::CMOVS64rm:
2200  case X86::CMOVNS16rr: case X86::CMOVNS32rr: case X86::CMOVNS64rr:
2201  case X86::CMOVNS16rm: case X86::CMOVNS32rm: case X86::CMOVNS64rm:
2202  case X86::CMOVP16rr: case X86::CMOVP32rr: case X86::CMOVP64rr:
2203  case X86::CMOVP16rm: case X86::CMOVP32rm: case X86::CMOVP64rm:
2204  case X86::CMOVNP16rr: case X86::CMOVNP32rr: case X86::CMOVNP64rr:
2205  case X86::CMOVNP16rm: case X86::CMOVNP32rm: case X86::CMOVNP64rm:
2206  case X86::CMOVL16rr: case X86::CMOVL32rr: case X86::CMOVL64rr:
2207  case X86::CMOVL16rm: case X86::CMOVL32rm: case X86::CMOVL64rm:
2208  case X86::CMOVGE16rr: case X86::CMOVGE32rr: case X86::CMOVGE64rr:
2209  case X86::CMOVGE16rm: case X86::CMOVGE32rm: case X86::CMOVGE64rm:
2210  case X86::CMOVLE16rr: case X86::CMOVLE32rr: case X86::CMOVLE64rr:
2211  case X86::CMOVLE16rm: case X86::CMOVLE32rm: case X86::CMOVLE64rm:
2212  case X86::CMOVG16rr: case X86::CMOVG32rr: case X86::CMOVG64rr:
2213  case X86::CMOVG16rm: case X86::CMOVG32rm: case X86::CMOVG64rm:
2214  continue;
2215  // Anything else: assume conservatively.
2216  default:
2217  return false;
2218  }
2219  }
2220  }
2221  return true;
2222 }
2223 
2224 /// Check whether or not the chain ending in StoreNode is suitable for doing
2225 /// the {load; op; store} to modify transformation.
2227  SDValue StoredVal, SelectionDAG *CurDAG,
2228  unsigned LoadOpNo,
2229  LoadSDNode *&LoadNode,
2230  SDValue &InputChain) {
2231  // Is the stored value result 0 of the operation?
2232  if (StoredVal.getResNo() != 0) return false;
2233 
2234  // Are there other uses of the operation other than the store?
2235  if (!StoredVal.getNode()->hasNUsesOfValue(1, 0)) return false;
2236 
2237  // Is the store non-extending and non-indexed?
2238  if (!ISD::isNormalStore(StoreNode) || StoreNode->isNonTemporal())
2239  return false;
2240 
2241  SDValue Load = StoredVal->getOperand(LoadOpNo);
2242  // Is the stored value a non-extending and non-indexed load?
2243  if (!ISD::isNormalLoad(Load.getNode())) return false;
2244 
2245  // Return LoadNode by reference.
2246  LoadNode = cast<LoadSDNode>(Load);
2247 
2248  // Is store the only read of the loaded value?
2249  if (!Load.hasOneUse())
2250  return false;
2251 
2252  // Is the address of the store the same as the load?
2253  if (LoadNode->getBasePtr() != StoreNode->getBasePtr() ||
2254  LoadNode->getOffset() != StoreNode->getOffset())
2255  return false;
2256 
2257  bool FoundLoad = false;
2258  SmallVector<SDValue, 4> ChainOps;
2259  SmallVector<const SDNode *, 4> LoopWorklist;
2261  const unsigned int Max = 1024;
2262 
2263  // Visualization of Load-Op-Store fusion:
2264  // -------------------------
2265  // Legend:
2266  // *-lines = Chain operand dependencies.
2267  // |-lines = Normal operand dependencies.
2268  // Dependencies flow down and right. n-suffix references multiple nodes.
2269  //
2270  // C Xn C
2271  // * * *
2272  // * * *
2273  // Xn A-LD Yn TF Yn
2274  // * * \ | * |
2275  // * * \ | * |
2276  // * * \ | => A--LD_OP_ST
2277  // * * \| \
2278  // TF OP \
2279  // * | \ Zn
2280  // * | \
2281  // A-ST Zn
2282  //
2283 
2284  // This merge induced dependences from: #1: Xn -> LD, OP, Zn
2285  // #2: Yn -> LD
2286  // #3: ST -> Zn
2287 
2288  // Ensure the transform is safe by checking for the dual
2289  // dependencies to make sure we do not induce a loop.
2290 
2291  // As LD is a predecessor to both OP and ST we can do this by checking:
2292  // a). if LD is a predecessor to a member of Xn or Yn.
2293  // b). if a Zn is a predecessor to ST.
2294 
2295  // However, (b) can only occur through being a chain predecessor to
2296  // ST, which is the same as Zn being a member or predecessor of Xn,
2297  // which is a subset of LD being a predecessor of Xn. So it's
2298  // subsumed by check (a).
2299 
2300  SDValue Chain = StoreNode->getChain();
2301 
2302  // Gather X elements in ChainOps.
2303  if (Chain == Load.getValue(1)) {
2304  FoundLoad = true;
2305  ChainOps.push_back(Load.getOperand(0));
2306  } else if (Chain.getOpcode() == ISD::TokenFactor) {
2307  for (unsigned i = 0, e = Chain.getNumOperands(); i != e; ++i) {
2308  SDValue Op = Chain.getOperand(i);
2309  if (Op == Load.getValue(1)) {
2310  FoundLoad = true;
2311  // Drop Load, but keep its chain. No cycle check necessary.
2312  ChainOps.push_back(Load.getOperand(0));
2313  continue;
2314  }
2315  LoopWorklist.push_back(Op.getNode());
2316  ChainOps.push_back(Op);
2317  }
2318  }
2319 
2320  if (!FoundLoad)
2321  return false;
2322 
2323  // Worklist is currently Xn. Add Yn to worklist.
2324  for (SDValue Op : StoredVal->ops())
2325  if (Op.getNode() != LoadNode)
2326  LoopWorklist.push_back(Op.getNode());
2327 
2328  // Check (a) if Load is a predecessor to Xn + Yn
2329  if (SDNode::hasPredecessorHelper(Load.getNode(), Visited, LoopWorklist, Max,
2330  true))
2331  return false;
2332 
2333  InputChain =
2334  CurDAG->getNode(ISD::TokenFactor, SDLoc(Chain), MVT::Other, ChainOps);
2335  return true;
2336 }
2337 
2338 // Change a chain of {load; op; store} of the same value into a simple op
2339 // through memory of that value, if the uses of the modified value and its
2340 // address are suitable.
2341 //
2342 // The tablegen pattern memory operand pattern is currently not able to match
2343 // the case where the EFLAGS on the original operation are used.
2344 //
2345 // To move this to tablegen, we'll need to improve tablegen to allow flags to
2346 // be transferred from a node in the pattern to the result node, probably with
2347 // a new keyword. For example, we have this
2348 // def DEC64m : RI<0xFF, MRM1m, (outs), (ins i64mem:$dst), "dec{q}\t$dst",
2349 // [(store (add (loadi64 addr:$dst), -1), addr:$dst),
2350 // (implicit EFLAGS)]>;
2351 // but maybe need something like this
2352 // def DEC64m : RI<0xFF, MRM1m, (outs), (ins i64mem:$dst), "dec{q}\t$dst",
2353 // [(store (add (loadi64 addr:$dst), -1), addr:$dst),
2354 // (transferrable EFLAGS)]>;
2355 //
2356 // Until then, we manually fold these and instruction select the operation
2357 // here.
2358 bool X86DAGToDAGISel::foldLoadStoreIntoMemOperand(SDNode *Node) {
2359  StoreSDNode *StoreNode = cast<StoreSDNode>(Node);
2360  SDValue StoredVal = StoreNode->getOperand(1);
2361  unsigned Opc = StoredVal->getOpcode();
2362 
2363  // Before we try to select anything, make sure this is memory operand size
2364  // and opcode we can handle. Note that this must match the code below that
2365  // actually lowers the opcodes.
2366  EVT MemVT = StoreNode->getMemoryVT();
2367  if (MemVT != MVT::i64 && MemVT != MVT::i32 && MemVT != MVT::i16 &&
2368  MemVT != MVT::i8)
2369  return false;
2370 
2371  bool IsCommutable = false;
2372  switch (Opc) {
2373  default:
2374  return false;
2375  case X86ISD::INC:
2376  case X86ISD::DEC:
2377  case X86ISD::SUB:
2378  case X86ISD::SBB:
2379  break;
2380  case X86ISD::ADD:
2381  case X86ISD::ADC:
2382  case X86ISD::AND:
2383  case X86ISD::OR:
2384  case X86ISD::XOR:
2385  IsCommutable = true;
2386  break;
2387  }
2388 
2389  unsigned LoadOpNo = 0;
2390  LoadSDNode *LoadNode = nullptr;
2391  SDValue InputChain;
2392  if (!isFusableLoadOpStorePattern(StoreNode, StoredVal, CurDAG, LoadOpNo,
2393  LoadNode, InputChain)) {
2394  if (!IsCommutable)
2395  return false;
2396 
2397  // This operation is commutable, try the other operand.
2398  LoadOpNo = 1;
2399  if (!isFusableLoadOpStorePattern(StoreNode, StoredVal, CurDAG, LoadOpNo,
2400  LoadNode, InputChain))
2401  return false;
2402  }
2403 
2404  SDValue Base, Scale, Index, Disp, Segment;
2405  if (!selectAddr(LoadNode, LoadNode->getBasePtr(), Base, Scale, Index, Disp,
2406  Segment))
2407  return false;
2408 
2409  auto SelectOpcode = [&](unsigned Opc64, unsigned Opc32, unsigned Opc16,
2410  unsigned Opc8) {
2411  switch (MemVT.getSimpleVT().SimpleTy) {
2412  case MVT::i64:
2413  return Opc64;
2414  case MVT::i32:
2415  return Opc32;
2416  case MVT::i16:
2417  return Opc16;
2418  case MVT::i8:
2419  return Opc8;
2420  default:
2421  llvm_unreachable("Invalid size!");
2422  }
2423  };
2424 
2426  switch (Opc) {
2427  case X86ISD::INC:
2428  case X86ISD::DEC: {
2429  unsigned NewOpc =
2430  Opc == X86ISD::INC
2431  ? SelectOpcode(X86::INC64m, X86::INC32m, X86::INC16m, X86::INC8m)
2432  : SelectOpcode(X86::DEC64m, X86::DEC32m, X86::DEC16m, X86::DEC8m);
2433  const SDValue Ops[] = {Base, Scale, Index, Disp, Segment, InputChain};
2434  Result =
2435  CurDAG->getMachineNode(NewOpc, SDLoc(Node), MVT::i32, MVT::Other, Ops);
2436  break;
2437  }
2438  case X86ISD::ADD:
2439  case X86ISD::ADC:
2440  case X86ISD::SUB:
2441  case X86ISD::SBB:
2442  case X86ISD::AND:
2443  case X86ISD::OR:
2444  case X86ISD::XOR: {
2445  auto SelectRegOpcode = [SelectOpcode](unsigned Opc) {
2446  switch (Opc) {
2447  case X86ISD::ADD:
2448  return SelectOpcode(X86::ADD64mr, X86::ADD32mr, X86::ADD16mr,
2449  X86::ADD8mr);
2450  case X86ISD::ADC:
2451  return SelectOpcode(X86::ADC64mr, X86::ADC32mr, X86::ADC16mr,
2452  X86::ADC8mr);
2453  case X86ISD::SUB:
2454  return SelectOpcode(X86::SUB64mr, X86::SUB32mr, X86::SUB16mr,
2455  X86::SUB8mr);
2456  case X86ISD::SBB:
2457  return SelectOpcode(X86::SBB64mr, X86::SBB32mr, X86::SBB16mr,
2458  X86::SBB8mr);
2459  case X86ISD::AND:
2460  return SelectOpcode(X86::AND64mr, X86::AND32mr, X86::AND16mr,
2461  X86::AND8mr);
2462  case X86ISD::OR:
2463  return SelectOpcode(X86::OR64mr, X86::OR32mr, X86::OR16mr, X86::OR8mr);
2464  case X86ISD::XOR:
2465  return SelectOpcode(X86::XOR64mr, X86::XOR32mr, X86::XOR16mr,
2466  X86::XOR8mr);
2467  default:
2468  llvm_unreachable("Invalid opcode!");
2469  }
2470  };
2471  auto SelectImm8Opcode = [SelectOpcode](unsigned Opc) {
2472  switch (Opc) {
2473  case X86ISD::ADD:
2474  return SelectOpcode(X86::ADD64mi8, X86::ADD32mi8, X86::ADD16mi8, 0);
2475  case X86ISD::ADC:
2476  return SelectOpcode(X86::ADC64mi8, X86::ADC32mi8, X86::ADC16mi8, 0);
2477  case X86ISD::SUB:
2478  return SelectOpcode(X86::SUB64mi8, X86::SUB32mi8, X86::SUB16mi8, 0);
2479  case X86ISD::SBB:
2480  return SelectOpcode(X86::SBB64mi8, X86::SBB32mi8, X86::SBB16mi8, 0);
2481  case X86ISD::AND:
2482  return SelectOpcode(X86::AND64mi8, X86::AND32mi8, X86::AND16mi8, 0);
2483  case X86ISD::OR:
2484  return SelectOpcode(X86::OR64mi8, X86::OR32mi8, X86::OR16mi8, 0);
2485  case X86ISD::XOR:
2486  return SelectOpcode(X86::XOR64mi8, X86::XOR32mi8, X86::XOR16mi8, 0);
2487  default:
2488  llvm_unreachable("Invalid opcode!");
2489  }
2490  };
2491  auto SelectImmOpcode = [SelectOpcode](unsigned Opc) {
2492  switch (Opc) {
2493  case X86ISD::ADD:
2494  return SelectOpcode(X86::ADD64mi32, X86::ADD32mi, X86::ADD16mi,
2495  X86::ADD8mi);
2496  case X86ISD::ADC:
2497  return SelectOpcode(X86::ADC64mi32, X86::ADC32mi, X86::ADC16mi,
2498  X86::ADC8mi);
2499  case X86ISD::SUB:
2500  return SelectOpcode(X86::SUB64mi32, X86::SUB32mi, X86::SUB16mi,
2501  X86::SUB8mi);
2502  case X86ISD::SBB:
2503  return SelectOpcode(X86::SBB64mi32, X86::SBB32mi, X86::SBB16mi,
2504  X86::SBB8mi);
2505  case X86ISD::AND:
2506  return SelectOpcode(X86::AND64mi32, X86::AND32mi, X86::AND16mi,
2507  X86::AND8mi);
2508  case X86ISD::OR:
2509  return SelectOpcode(X86::OR64mi32, X86::OR32mi, X86::OR16mi,
2510  X86::OR8mi);
2511  case X86ISD::XOR:
2512  return SelectOpcode(X86::XOR64mi32, X86::XOR32mi, X86::XOR16mi,
2513  X86::XOR8mi);
2514  default:
2515  llvm_unreachable("Invalid opcode!");
2516  }
2517  };
2518 
2519  unsigned NewOpc = SelectRegOpcode(Opc);
2520  SDValue Operand = StoredVal->getOperand(1-LoadOpNo);
2521 
2522  // See if the operand is a constant that we can fold into an immediate
2523  // operand.
2524  if (auto *OperandC = dyn_cast<ConstantSDNode>(Operand)) {
2525  auto OperandV = OperandC->getAPIntValue();
2526 
2527  // Check if we can shrink the operand enough to fit in an immediate (or
2528  // fit into a smaller immediate) by negating it and switching the
2529  // operation.
2530  if ((Opc == X86ISD::ADD || Opc == X86ISD::SUB) &&
2531  ((MemVT != MVT::i8 && OperandV.getMinSignedBits() > 8 &&
2532  (-OperandV).getMinSignedBits() <= 8) ||
2533  (MemVT == MVT::i64 && OperandV.getMinSignedBits() > 32 &&
2534  (-OperandV).getMinSignedBits() <= 32)) &&
2535  hasNoCarryFlagUses(StoredVal.getNode())) {
2536  OperandV = -OperandV;
2537  Opc = Opc == X86ISD::ADD ? X86ISD::SUB : X86ISD::ADD;
2538  }
2539 
2540  // First try to fit this into an Imm8 operand. If it doesn't fit, then try
2541  // the larger immediate operand.
2542  if (MemVT != MVT::i8 && OperandV.getMinSignedBits() <= 8) {
2543  Operand = CurDAG->getTargetConstant(OperandV, SDLoc(Node), MemVT);
2544  NewOpc = SelectImm8Opcode(Opc);
2545  } else if (OperandV.getActiveBits() <= MemVT.getSizeInBits() &&
2546  (MemVT != MVT::i64 || OperandV.getMinSignedBits() <= 32)) {
2547  Operand = CurDAG->getTargetConstant(OperandV, SDLoc(Node), MemVT);
2548  NewOpc = SelectImmOpcode(Opc);
2549  }
2550  }
2551 
2552  if (Opc == X86ISD::ADC || Opc == X86ISD::SBB) {
2553  SDValue CopyTo =
2554  CurDAG->getCopyToReg(InputChain, SDLoc(Node), X86::EFLAGS,
2555  StoredVal.getOperand(2), SDValue());
2556 
2557  const SDValue Ops[] = {Base, Scale, Index, Disp,
2558  Segment, Operand, CopyTo, CopyTo.getValue(1)};
2559  Result = CurDAG->getMachineNode(NewOpc, SDLoc(Node), MVT::i32, MVT::Other,
2560  Ops);
2561  } else {
2562  const SDValue Ops[] = {Base, Scale, Index, Disp,
2563  Segment, Operand, InputChain};
2564  Result = CurDAG->getMachineNode(NewOpc, SDLoc(Node), MVT::i32, MVT::Other,
2565  Ops);
2566  }
2567  break;
2568  }
2569  default:
2570  llvm_unreachable("Invalid opcode!");
2571  }
2572 
2573  MachineMemOperand *MemOps[] = {StoreNode->getMemOperand(),
2574  LoadNode->getMemOperand()};
2575  CurDAG->setNodeMemRefs(Result, MemOps);
2576 
2577  // Update Load Chain uses as well.
2578  ReplaceUses(SDValue(LoadNode, 1), SDValue(Result, 1));
2579  ReplaceUses(SDValue(StoreNode, 0), SDValue(Result, 1));
2580  ReplaceUses(SDValue(StoredVal.getNode(), 1), SDValue(Result, 0));
2581  CurDAG->RemoveDeadNode(Node);
2582  return true;
2583 }
2584 
2585 // See if this is an (X >> C1) & C2 that we can match to BEXTR/BEXTRI.
2586 bool X86DAGToDAGISel::matchBEXTRFromAnd(SDNode *Node) {
2587  MVT NVT = Node->getSimpleValueType(0);
2588  SDLoc dl(Node);
2589 
2590  SDValue N0 = Node->getOperand(0);
2591  SDValue N1 = Node->getOperand(1);
2592 
2593  if (!Subtarget->hasBMI() && !Subtarget->hasTBM())
2594  return false;
2595 
2596  // Must have a shift right.
2597  if (N0->getOpcode() != ISD::SRL && N0->getOpcode() != ISD::SRA)
2598  return false;
2599 
2600  // Shift can't have additional users.
2601  if (!N0->hasOneUse())
2602  return false;
2603 
2604  // Only supported for 32 and 64 bits.
2605  if (NVT != MVT::i32 && NVT != MVT::i64)
2606  return false;
2607 
2608  // Shift amount and RHS of and must be constant.
2609  ConstantSDNode *MaskCst = dyn_cast<ConstantSDNode>(N1);
2610  ConstantSDNode *ShiftCst = dyn_cast<ConstantSDNode>(N0->getOperand(1));
2611  if (!MaskCst || !ShiftCst)
2612  return false;
2613 
2614  // And RHS must be a mask.
2615  uint64_t Mask = MaskCst->getZExtValue();
2616  if (!isMask_64(Mask))
2617  return false;
2618 
2619  uint64_t Shift = ShiftCst->getZExtValue();
2620  uint64_t MaskSize = countPopulation(Mask);
2621 
2622  // Don't interfere with something that can be handled by extracting AH.
2623  // TODO: If we are able to fold a load, BEXTR might still be better than AH.
2624  if (Shift == 8 && MaskSize == 8)
2625  return false;
2626 
2627  // Make sure we are only using bits that were in the original value, not
2628  // shifted in.
2629  if (Shift + MaskSize > NVT.getSizeInBits())
2630  return false;
2631 
2632  // Create a BEXTR node and run it through selection.
2633  SDValue C = CurDAG->getConstant(Shift | (MaskSize << 8), dl, NVT);
2634  SDValue New = CurDAG->getNode(X86ISD::BEXTR, dl, NVT,
2635  N0->getOperand(0), C);
2636  ReplaceNode(Node, New.getNode());
2637  SelectCode(New.getNode());
2638  return true;
2639 }
2640 
2641 // Emit a PCMISTR(I/M) instruction.
2642 MachineSDNode *X86DAGToDAGISel::emitPCMPISTR(unsigned ROpc, unsigned MOpc,
2643  bool MayFoldLoad, const SDLoc &dl,
2644  MVT VT, SDNode *Node) {
2645  SDValue N0 = Node->getOperand(0);
2646  SDValue N1 = Node->getOperand(1);
2647  SDValue Imm = Node->getOperand(2);
2648  const ConstantInt *Val = cast<ConstantSDNode>(Imm)->getConstantIntValue();
2649  Imm = CurDAG->getTargetConstant(*Val, SDLoc(Node), Imm.getValueType());
2650 
2651  // If there is a load, it will be behind a bitcast. We don't need to check
2652  // alignment on this load.
2653  SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
2654  if (MayFoldLoad && N1->getOpcode() == ISD::BITCAST && N1->hasOneUse() &&
2655  tryFoldVecLoad(Node, N1.getNode(), N1.getOperand(0), Tmp0, Tmp1, Tmp2,
2656  Tmp3, Tmp4)) {
2657  SDValue Load = N1.getOperand(0);
2658  SDValue Ops[] = { N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Imm,
2659  Load.getOperand(0) };
2660  SDVTList VTs = CurDAG->getVTList(VT, MVT::i32, MVT::Other);
2661  MachineSDNode *CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops);
2662  // Update the chain.
2663  ReplaceUses(Load.getValue(1), SDValue(CNode, 2));
2664  // Record the mem-refs
2665  CurDAG->setNodeMemRefs(CNode, {cast<LoadSDNode>(Load)->getMemOperand()});
2666  return CNode;
2667  }
2668 
2669  SDValue Ops[] = { N0, N1, Imm };
2670  SDVTList VTs = CurDAG->getVTList(VT, MVT::i32);
2671  MachineSDNode *CNode = CurDAG->getMachineNode(ROpc, dl, VTs, Ops);
2672  return CNode;
2673 }
2674 
2675 // Emit a PCMESTR(I/M) instruction. Also return the Glue result in case we need
2676 // to emit a second instruction after this one. This is needed since we have two
2677 // copyToReg nodes glued before this and we need to continue that glue through.
2678 MachineSDNode *X86DAGToDAGISel::emitPCMPESTR(unsigned ROpc, unsigned MOpc,
2679  bool MayFoldLoad, const SDLoc &dl,
2680  MVT VT, SDNode *Node,
2681  SDValue &InFlag) {
2682  SDValue N0 = Node->getOperand(0);
2683  SDValue N2 = Node->getOperand(2);
2684  SDValue Imm = Node->getOperand(4);
2685  const ConstantInt *Val = cast<ConstantSDNode>(Imm)->getConstantIntValue();
2686  Imm = CurDAG->getTargetConstant(*Val, SDLoc(Node), Imm.getValueType());
2687 
2688  // If there is a load, it will be behind a bitcast. We don't need to check
2689  // alignment on this load.
2690  SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
2691  if (MayFoldLoad && N2->getOpcode() == ISD::BITCAST && N2->hasOneUse() &&
2692  tryFoldVecLoad(Node, N2.getNode(), N2.getOperand(0), Tmp0, Tmp1, Tmp2,
2693  Tmp3, Tmp4)) {
2694  SDValue Load = N2.getOperand(0);
2695  SDValue Ops[] = { N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Imm,
2696  Load.getOperand(0), InFlag };
2697  SDVTList VTs = CurDAG->getVTList(VT, MVT::i32, MVT::Other, MVT::Glue);
2698  MachineSDNode *CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops);
2699  InFlag = SDValue(CNode, 3);
2700  // Update the chain.
2701  ReplaceUses(Load.getValue(1), SDValue(CNode, 2));
2702  // Record the mem-refs
2703  CurDAG->setNodeMemRefs(CNode, {cast<LoadSDNode>(Load)->getMemOperand()});
2704  return CNode;
2705  }
2706 
2707  SDValue Ops[] = { N0, N2, Imm, InFlag };
2708  SDVTList VTs = CurDAG->getVTList(VT, MVT::i32, MVT::Glue);
2709  MachineSDNode *CNode = CurDAG->getMachineNode(ROpc, dl, VTs, Ops);
2710  InFlag = SDValue(CNode, 2);
2711  return CNode;
2712 }
2713 
2714 bool X86DAGToDAGISel::tryShiftAmountMod(SDNode *N) {
2715  EVT VT = N->getValueType(0);
2716 
2717  // Only handle scalar shifts.
2718  if (VT.isVector())
2719  return false;
2720 
2721  // Narrower shifts only mask to 5 bits in hardware.
2722  unsigned Size = VT == MVT::i64 ? 64 : 32;
2723 
2724  SDValue OrigShiftAmt = N->getOperand(1);
2725  SDValue ShiftAmt = OrigShiftAmt;
2726  SDLoc DL(N);
2727 
2728  // Skip over a truncate of the shift amount.
2729  if (ShiftAmt->getOpcode() == ISD::TRUNCATE)
2730  ShiftAmt = ShiftAmt->getOperand(0);
2731 
2732  // Special case to avoid messing up a BZHI pattern.
2733  // Look for (srl (shl X, (size - y)), (size - y)
2734  if (Subtarget->hasBMI2() && (VT == MVT::i32 || VT == MVT::i64) &&
2735  N->getOpcode() == ISD::SRL && N->getOperand(0).getOpcode() == ISD::SHL &&
2736  // Shift amounts the same?
2737  N->getOperand(1) == N->getOperand(0).getOperand(1) &&
2738  // Shift amounts size - y?
2739  ShiftAmt.getOpcode() == ISD::SUB &&
2740  isa<ConstantSDNode>(ShiftAmt.getOperand(0)) &&
2741  cast<ConstantSDNode>(ShiftAmt.getOperand(0))->getZExtValue() == Size)
2742  return false;
2743 
2744  SDValue NewShiftAmt;
2745  if (ShiftAmt->getOpcode() == ISD::ADD || ShiftAmt->getOpcode() == ISD::SUB) {
2746  SDValue Add0 = ShiftAmt->getOperand(0);
2747  SDValue Add1 = ShiftAmt->getOperand(1);
2748  // If we are shifting by X+/-N where N == 0 mod Size, then just shift by X
2749  // to avoid the ADD/SUB.
2750  if (isa<ConstantSDNode>(Add1) &&
2751  cast<ConstantSDNode>(Add1)->getZExtValue() % Size == 0) {
2752  NewShiftAmt = Add0;
2753  // If we are shifting by N-X where N == 0 mod Size, then just shift by -X to
2754  // generate a NEG instead of a SUB of a constant.
2755  } else if (ShiftAmt->getOpcode() == ISD::SUB &&
2756  isa<ConstantSDNode>(Add0) &&
2757  cast<ConstantSDNode>(Add0)->getZExtValue() != 0 &&
2758  cast<ConstantSDNode>(Add0)->getZExtValue() % Size == 0) {
2759  // Insert a negate op.
2760  // TODO: This isn't guaranteed to replace the sub if there is a logic cone
2761  // that uses it that's not a shift.
2762  EVT SubVT = ShiftAmt.getValueType();
2763  SDValue Zero = CurDAG->getConstant(0, DL, SubVT);
2764  SDValue Neg = CurDAG->getNode(ISD::SUB, DL, SubVT, Zero, Add1);
2765  NewShiftAmt = Neg;
2766 
2767  // Insert these operands into a valid topological order so they can
2768  // get selected independently.
2769  insertDAGNode(*CurDAG, OrigShiftAmt, Zero);
2770  insertDAGNode(*CurDAG, OrigShiftAmt, Neg);
2771  } else
2772  return false;
2773  } else
2774  return false;
2775 
2776  if (NewShiftAmt.getValueType() != MVT::i8) {
2777  // Need to truncate the shift amount.
2778  NewShiftAmt = CurDAG->getNode(ISD::TRUNCATE, DL, MVT::i8, NewShiftAmt);
2779  // Add to a correct topological ordering.
2780  insertDAGNode(*CurDAG, OrigShiftAmt, NewShiftAmt);
2781  }
2782 
2783  // Insert a new mask to keep the shift amount legal. This should be removed
2784  // by isel patterns.
2785  NewShiftAmt = CurDAG->getNode(ISD::AND, DL, MVT::i8, NewShiftAmt,
2786  CurDAG->getConstant(Size - 1, DL, MVT::i8));
2787  // Place in a correct topological ordering.
2788  insertDAGNode(*CurDAG, OrigShiftAmt, NewShiftAmt);
2789 
2790  SDNode *UpdatedNode = CurDAG->UpdateNodeOperands(N, N->getOperand(0),
2791  NewShiftAmt);
2792  if (UpdatedNode != N) {
2793  // If we found an existing node, we should replace ourselves with that node
2794  // and wait for it to be selected after its other users.
2795  ReplaceNode(N, UpdatedNode);
2796  return true;
2797  }
2798 
2799  // If the original shift amount is now dead, delete it so that we don't run
2800  // it through isel.
2801  if (OrigShiftAmt.getNode()->use_empty())
2802  CurDAG->RemoveDeadNode(OrigShiftAmt.getNode());
2803 
2804  // Now that we've optimized the shift amount, defer to normal isel to get
2805  // load folding and legacy vs BMI2 selection without repeating it here.
2806  SelectCode(N);
2807  return true;
2808 }
2809 
2810 /// If the high bits of an 'and' operand are known zero, try setting the
2811 /// high bits of an 'and' constant operand to produce a smaller encoding by
2812 /// creating a small, sign-extended negative immediate rather than a large
2813 /// positive one. This reverses a transform in SimplifyDemandedBits that
2814 /// shrinks mask constants by clearing bits. There is also a possibility that
2815 /// the 'and' mask can be made -1, so the 'and' itself is unnecessary. In that
2816 /// case, just replace the 'and'. Return 'true' if the node is replaced.
2817 bool X86DAGToDAGISel::shrinkAndImmediate(SDNode *And) {
2818  // i8 is unshrinkable, i16 should be promoted to i32, and vector ops don't
2819  // have immediate operands.
2820  MVT VT = And->getSimpleValueType(0);
2821  if (VT != MVT::i32 && VT != MVT::i64)
2822  return false;
2823 
2824  auto *And1C = dyn_cast<ConstantSDNode>(And->getOperand(1));
2825  if (!And1C)
2826  return false;
2827 
2828  // Bail out if the mask constant is already negative. It's can't shrink more.
2829  // If the upper 32 bits of a 64 bit mask are all zeros, we have special isel
2830  // patterns to use a 32-bit and instead of a 64-bit and by relying on the
2831  // implicit zeroing of 32 bit ops. So we should check if the lower 32 bits
2832  // are negative too.
2833  APInt MaskVal = And1C->getAPIntValue();
2834  unsigned MaskLZ = MaskVal.countLeadingZeros();
2835  if (!MaskLZ || (VT == MVT::i64 && MaskLZ == 32))
2836  return false;
2837 
2838  // Don't extend into the upper 32 bits of a 64 bit mask.
2839  if (VT == MVT::i64 && MaskLZ >= 32) {
2840  MaskLZ -= 32;
2841  MaskVal = MaskVal.trunc(32);
2842  }
2843 
2844  SDValue And0 = And->getOperand(0);
2845  APInt HighZeros = APInt::getHighBitsSet(MaskVal.getBitWidth(), MaskLZ);
2846  APInt NegMaskVal = MaskVal | HighZeros;
2847 
2848  // If a negative constant would not allow a smaller encoding, there's no need
2849  // to continue. Only change the constant when we know it's a win.
2850  unsigned MinWidth = NegMaskVal.getMinSignedBits();
2851  if (MinWidth > 32 || (MinWidth > 8 && MaskVal.getMinSignedBits() <= 32))
2852  return false;
2853 
2854  // Extend masks if we truncated above.
2855  if (VT == MVT::i64 && MaskVal.getBitWidth() < 64) {
2856  NegMaskVal = NegMaskVal.zext(64);
2857  HighZeros = HighZeros.zext(64);
2858  }
2859 
2860  // The variable operand must be all zeros in the top bits to allow using the
2861  // new, negative constant as the mask.
2862  if (!CurDAG->MaskedValueIsZero(And0, HighZeros))
2863  return false;
2864 
2865  // Check if the mask is -1. In that case, this is an unnecessary instruction
2866  // that escaped earlier analysis.
2867  if (NegMaskVal.isAllOnesValue()) {
2868  ReplaceNode(And, And0.getNode());
2869  return true;
2870  }
2871 
2872  // A negative mask allows a smaller encoding. Create a new 'and' node.
2873  SDValue NewMask = CurDAG->getConstant(NegMaskVal, SDLoc(And), VT);
2874  SDValue NewAnd = CurDAG->getNode(ISD::AND, SDLoc(And), VT, And0, NewMask);
2875  ReplaceNode(And, NewAnd.getNode());
2876  SelectCode(NewAnd.getNode());
2877  return true;
2878 }
2879 
2880 void X86DAGToDAGISel::Select(SDNode *Node) {
2881  MVT NVT = Node->getSimpleValueType(0);
2882  unsigned Opcode = Node->getOpcode();
2883  SDLoc dl(Node);
2884 
2885  if (Node->isMachineOpcode()) {
2886  LLVM_DEBUG(dbgs() << "== "; Node->dump(CurDAG); dbgs() << '\n');
2887  Node->setNodeId(-1);
2888  return; // Already selected.
2889  }
2890 
2891  switch (Opcode) {
2892  default: break;
2893  case ISD::BRIND: {
2894  if (Subtarget->isTargetNaCl())
2895  // NaCl has its own pass where jmp %r32 are converted to jmp %r64. We
2896  // leave the instruction alone.
2897  break;
2898  if (Subtarget->isTarget64BitILP32()) {
2899  // Converts a 32-bit register to a 64-bit, zero-extended version of
2900  // it. This is needed because x86-64 can do many things, but jmp %r32
2901  // ain't one of them.
2902  const SDValue &Target = Node->getOperand(1);
2904  SDValue ZextTarget = CurDAG->getZExtOrTrunc(Target, dl, EVT(MVT::i64));
2905  SDValue Brind = CurDAG->getNode(ISD::BRIND, dl, MVT::Other,
2906  Node->getOperand(0), ZextTarget);
2907  ReplaceNode(Node, Brind.getNode());
2908  SelectCode(ZextTarget.getNode());
2909  SelectCode(Brind.getNode());
2910  return;
2911  }
2912  break;
2913  }
2914  case X86ISD::GlobalBaseReg:
2915  ReplaceNode(Node, getGlobalBaseReg());
2916  return;
2917 
2918  case ISD::BITCAST:
2919  // Just drop all 128/256/512-bit bitcasts.
2920  if (NVT.is512BitVector() || NVT.is256BitVector() || NVT.is128BitVector() ||
2921  NVT == MVT::f128) {
2922  ReplaceUses(SDValue(Node, 0), Node->getOperand(0));
2923  CurDAG->RemoveDeadNode(Node);
2924  return;
2925  }
2926  break;
2927 
2928  case X86ISD::SELECT:
2929  case X86ISD::SHRUNKBLEND: {
2930  // SHRUNKBLEND selects like a regular VSELECT. Same with X86ISD::SELECT.
2931  SDValue VSelect = CurDAG->getNode(
2932  ISD::VSELECT, SDLoc(Node), Node->getValueType(0), Node->getOperand(0),
2933  Node->getOperand(1), Node->getOperand(2));
2934  ReplaceNode(Node, VSelect.getNode());
2935  SelectCode(VSelect.getNode());
2936  // We already called ReplaceUses.
2937  return;
2938  }
2939 
2940  case ISD::SRL:
2941  case ISD::SRA:
2942  case ISD::SHL:
2943  if (tryShiftAmountMod(Node))
2944  return;
2945  break;
2946 
2947  case ISD::AND:
2948  if (matchBEXTRFromAnd(Node))
2949  return;
2950  if (AndImmShrink && shrinkAndImmediate(Node))
2951  return;
2952 
2954  case ISD::OR:
2955  case ISD::XOR: {
2956 
2957  // For operations of the form (x << C1) op C2, check if we can use a smaller
2958  // encoding for C2 by transforming it into (x op (C2>>C1)) << C1.
2959  SDValue N0 = Node->getOperand(0);
2960  SDValue N1 = Node->getOperand(1);
2961 
2962  if (N0->getOpcode() != ISD::SHL || !N0->hasOneUse())
2963  break;
2964 
2965  // i8 is unshrinkable, i16 should be promoted to i32.
2966  if (NVT != MVT::i32 && NVT != MVT::i64)
2967  break;
2968 
2970  ConstantSDNode *ShlCst = dyn_cast<ConstantSDNode>(N0->getOperand(1));
2971  if (!Cst || !ShlCst)
2972  break;
2973 
2974  int64_t Val = Cst->getSExtValue();
2975  uint64_t ShlVal = ShlCst->getZExtValue();
2976 
2977  // Make sure that we don't change the operation by removing bits.
2978  // This only matters for OR and XOR, AND is unaffected.
2979  uint64_t RemovedBitsMask = (1ULL << ShlVal) - 1;
2980  if (Opcode != ISD::AND && (Val & RemovedBitsMask) != 0)
2981  break;
2982 
2983  unsigned ShlOp, AddOp, Op;
2984  MVT CstVT = NVT;
2985 
2986  // Check the minimum bitwidth for the new constant.
2987  // TODO: AND32ri is the same as AND64ri32 with zext imm.
2988  // TODO: MOV32ri+OR64r is cheaper than MOV64ri64+OR64rr
2989  // TODO: Using 16 and 8 bit operations is also possible for or32 & xor32.
2990  if (!isInt<8>(Val) && isInt<8>(Val >> ShlVal))
2991  CstVT = MVT::i8;
2992  else if (!isInt<32>(Val) && isInt<32>(Val >> ShlVal))
2993  CstVT = MVT::i32;
2994 
2995  // Bail if there is no smaller encoding.
2996  if (NVT == CstVT)
2997  break;
2998 
2999  switch (NVT.SimpleTy) {
3000  default: llvm_unreachable("Unsupported VT!");
3001  case MVT::i32:
3002  assert(CstVT == MVT::i8);
3003  ShlOp = X86::SHL32ri;
3004  AddOp = X86::ADD32rr;
3005 
3006  switch (Opcode) {
3007  default: llvm_unreachable("Impossible opcode");
3008  case ISD::AND: Op = X86::AND32ri8; break;
3009  case ISD::OR: Op = X86::OR32ri8; break;
3010  case ISD::XOR: Op = X86::XOR32ri8; break;
3011  }
3012  break;
3013  case MVT::i64:
3014  assert(CstVT == MVT::i8 || CstVT == MVT::i32);
3015  ShlOp = X86::SHL64ri;
3016  AddOp = X86::ADD64rr;
3017 
3018  switch (Opcode) {
3019  default: llvm_unreachable("Impossible opcode");
3020  case ISD::AND: Op = CstVT==MVT::i8? X86::AND64ri8 : X86::AND64ri32; break;
3021  case ISD::OR: Op = CstVT==MVT::i8? X86::OR64ri8 : X86::OR64ri32; break;
3022  case ISD::XOR: Op = CstVT==MVT::i8? X86::XOR64ri8 : X86::XOR64ri32; break;
3023  }
3024  break;
3025  }
3026 
3027  // Emit the smaller op and the shift.
3028  SDValue NewCst = CurDAG->getTargetConstant(Val >> ShlVal, dl, CstVT);
3029  SDNode *New = CurDAG->getMachineNode(Op, dl, NVT, N0->getOperand(0),NewCst);
3030  if (ShlVal == 1)
3031  CurDAG->SelectNodeTo(Node, AddOp, NVT, SDValue(New, 0),
3032  SDValue(New, 0));
3033  else
3034  CurDAG->SelectNodeTo(Node, ShlOp, NVT, SDValue(New, 0),
3035  getI8Imm(ShlVal, dl));
3036  return;
3037  }
3038  case X86ISD::UMUL8:
3039  case X86ISD::SMUL8: {
3040  SDValue N0 = Node->getOperand(0);
3041  SDValue N1 = Node->getOperand(1);
3042 
3043  unsigned Opc = (Opcode == X86ISD::SMUL8 ? X86::IMUL8r : X86::MUL8r);
3044 
3045  SDValue InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, X86::AL,
3046  N0, SDValue()).getValue(1);
3047 
3048  SDVTList VTs = CurDAG->getVTList(NVT, MVT::i32);
3049  SDValue Ops[] = {N1, InFlag};
3050  SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops);
3051 
3052  ReplaceNode(Node, CNode);
3053  return;
3054  }
3055 
3056  case X86ISD::UMUL: {
3057  SDValue N0 = Node->getOperand(0);
3058  SDValue N1 = Node->getOperand(1);
3059 
3060  unsigned LoReg, Opc;
3061  switch (NVT.SimpleTy) {
3062  default: llvm_unreachable("Unsupported VT!");
3063  // MVT::i8 is handled by X86ISD::UMUL8.
3064  case MVT::i16: LoReg = X86::AX; Opc = X86::MUL16r; break;
3065  case MVT::i32: LoReg = X86::EAX; Opc = X86::MUL32r; break;
3066  case MVT::i64: LoReg = X86::RAX; Opc = X86::MUL64r; break;
3067  }
3068 
3069  SDValue InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, LoReg,
3070  N0, SDValue()).getValue(1);
3071 
3072  SDVTList VTs = CurDAG->getVTList(NVT, NVT, MVT::i32);
3073  SDValue Ops[] = {N1, InFlag};
3074  SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops);
3075 
3076  ReplaceNode(Node, CNode);
3077  return;
3078  }
3079 
3080  case ISD::SMUL_LOHI:
3081  case ISD::UMUL_LOHI: {
3082  SDValue N0 = Node->getOperand(0);
3083  SDValue N1 = Node->getOperand(1);
3084 
3085  unsigned Opc, MOpc;
3086  bool isSigned = Opcode == ISD::SMUL_LOHI;
3087  bool hasBMI2 = Subtarget->hasBMI2();
3088  if (!isSigned) {
3089  switch (NVT.SimpleTy) {
3090  default: llvm_unreachable("Unsupported VT!");
3091  case MVT::i32: Opc = hasBMI2 ? X86::MULX32rr : X86::MUL32r;
3092  MOpc = hasBMI2 ? X86::MULX32rm : X86::MUL32m; break;
3093  case MVT::i64: Opc = hasBMI2 ? X86::MULX64rr : X86::MUL64r;
3094  MOpc = hasBMI2 ? X86::MULX64rm : X86::MUL64m; break;
3095  }
3096  } else {
3097  switch (NVT.SimpleTy) {
3098  default: llvm_unreachable("Unsupported VT!");
3099  case MVT::i32: Opc = X86::IMUL32r; MOpc = X86::IMUL32m; break;
3100  case MVT::i64: Opc = X86::IMUL64r; MOpc = X86::IMUL64m; break;
3101  }
3102  }
3103 
3104  unsigned SrcReg, LoReg, HiReg;
3105  switch (Opc) {
3106  default: llvm_unreachable("Unknown MUL opcode!");
3107  case X86::IMUL32r:
3108  case X86::MUL32r:
3109  SrcReg = LoReg = X86::EAX; HiReg = X86::EDX;
3110  break;
3111  case X86::IMUL64r:
3112  case X86::MUL64r:
3113  SrcReg = LoReg = X86::RAX; HiReg = X86::RDX;
3114  break;
3115  case X86::MULX32rr:
3116  SrcReg = X86::EDX; LoReg = HiReg = 0;
3117  break;
3118  case X86::MULX64rr:
3119  SrcReg = X86::RDX; LoReg = HiReg = 0;
3120  break;
3121  }
3122 
3123  SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
3124  bool foldedLoad = tryFoldLoad(Node, N1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4);
3125  // Multiply is commmutative.
3126  if (!foldedLoad) {
3127  foldedLoad = tryFoldLoad(Node, N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4);
3128  if (foldedLoad)
3129  std::swap(N0, N1);
3130  }
3131 
3132  SDValue InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, SrcReg,
3133  N0, SDValue()).getValue(1);
3134  SDValue ResHi, ResLo;
3135 
3136  if (foldedLoad) {
3137  SDValue Chain;
3138  MachineSDNode *CNode = nullptr;
3139  SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N1.getOperand(0),
3140  InFlag };
3141  if (MOpc == X86::MULX32rm || MOpc == X86::MULX64rm) {
3142  SDVTList VTs = CurDAG->getVTList(NVT, NVT, MVT::Other, MVT::Glue);
3143  CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops);
3144  ResHi = SDValue(CNode, 0);
3145  ResLo = SDValue(CNode, 1);
3146  Chain = SDValue(CNode, 2);
3147  InFlag = SDValue(CNode, 3);
3148  } else {
3149  SDVTList VTs = CurDAG->getVTList(MVT::Other, MVT::Glue);
3150  CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops);
3151  Chain = SDValue(CNode, 0);
3152  InFlag = SDValue(CNode, 1);
3153  }
3154 
3155  // Update the chain.
3156  ReplaceUses(N1.getValue(1), Chain);
3157  // Record the mem-refs
3158  CurDAG->setNodeMemRefs(CNode, {cast<LoadSDNode>(N1)->getMemOperand()});
3159  } else {
3160  SDValue Ops[] = { N1, InFlag };
3161  if (Opc == X86::MULX32rr || Opc == X86::MULX64rr) {
3162  SDVTList VTs = CurDAG->getVTList(NVT, NVT, MVT::Glue);
3163  SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops);
3164  ResHi = SDValue(CNode, 0);
3165  ResLo = SDValue(CNode, 1);
3166  InFlag = SDValue(CNode, 2);
3167  } else {
3168  SDVTList VTs = CurDAG->getVTList(MVT::Glue);
3169  SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops);
3170  InFlag = SDValue(CNode, 0);
3171  }
3172  }
3173 
3174  // Copy the low half of the result, if it is needed.
3175  if (!SDValue(Node, 0).use_empty()) {
3176  if (!ResLo.getNode()) {
3177  assert(LoReg && "Register for low half is not defined!");
3178  ResLo = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, LoReg, NVT,
3179  InFlag);
3180  InFlag = ResLo.getValue(2);
3181  }
3182  ReplaceUses(SDValue(Node, 0), ResLo);
3183  LLVM_DEBUG(dbgs() << "=> "; ResLo.getNode()->dump(CurDAG);
3184  dbgs() << '\n');
3185  }
3186  // Copy the high half of the result, if it is needed.
3187  if (!SDValue(Node, 1).use_empty()) {
3188  if (!ResHi.getNode()) {
3189  assert(HiReg && "Register for high half is not defined!");
3190  ResHi = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, HiReg, NVT,
3191  InFlag);
3192  InFlag = ResHi.getValue(2);
3193  }
3194  ReplaceUses(SDValue(Node, 1), ResHi);
3195  LLVM_DEBUG(dbgs() << "=> "; ResHi.getNode()->dump(CurDAG);
3196  dbgs() << '\n');
3197  }
3198 
3199  CurDAG->RemoveDeadNode(Node);
3200  return;
3201  }
3202 
3203  case ISD::SDIVREM:
3204  case ISD::UDIVREM:
3207  SDValue N0 = Node->getOperand(0);
3208  SDValue N1 = Node->getOperand(1);
3209 
3210  unsigned Opc, MOpc;
3211  bool isSigned = (Opcode == ISD::SDIVREM ||
3212  Opcode == X86ISD::SDIVREM8_SEXT_HREG);
3213  if (!isSigned) {
3214  switch (NVT.SimpleTy) {
3215  default: llvm_unreachable("Unsupported VT!");
3216  case MVT::i8: Opc = X86::DIV8r; MOpc = X86::DIV8m; break;
3217  case MVT::i16: Opc = X86::DIV16r; MOpc = X86::DIV16m; break;
3218  case MVT::i32: Opc = X86::DIV32r; MOpc = X86::DIV32m; break;
3219  case MVT::i64: Opc = X86::DIV64r; MOpc = X86::DIV64m; break;
3220  }
3221  } else {
3222  switch (NVT.SimpleTy) {
3223  default: llvm_unreachable("Unsupported VT!");
3224  case MVT::i8: Opc = X86::IDIV8r; MOpc = X86::IDIV8m; break;
3225  case MVT::i16: Opc = X86::IDIV16r; MOpc = X86::IDIV16m; break;
3226  case MVT::i32: Opc = X86::IDIV32r; MOpc = X86::IDIV32m; break;
3227  case MVT::i64: Opc = X86::IDIV64r; MOpc = X86::IDIV64m; break;
3228  }
3229  }
3230 
3231  unsigned LoReg, HiReg, ClrReg;
3232  unsigned SExtOpcode;
3233  switch (NVT.SimpleTy) {
3234  default: llvm_unreachable("Unsupported VT!");
3235  case MVT::i8:
3236  LoReg = X86::AL; ClrReg = HiReg = X86::AH;
3237  SExtOpcode = X86::CBW;
3238  break;
3239  case MVT::i16:
3240  LoReg = X86::AX; HiReg = X86::DX;
3241  ClrReg = X86::DX;
3242  SExtOpcode = X86::CWD;
3243  break;
3244  case MVT::i32:
3245  LoReg = X86::EAX; ClrReg = HiReg = X86::EDX;
3246  SExtOpcode = X86::CDQ;
3247  break;
3248  case MVT::i64:
3249  LoReg = X86::RAX; ClrReg = HiReg = X86::RDX;
3250  SExtOpcode = X86::CQO;
3251  break;
3252  }
3253 
3254  SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
3255  bool foldedLoad = tryFoldLoad(Node, N1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4);
3256  bool signBitIsZero = CurDAG->SignBitIsZero(N0);
3257 
3258  SDValue InFlag;
3259  if (NVT == MVT::i8 && (!isSigned || signBitIsZero)) {
3260  // Special case for div8, just use a move with zero extension to AX to
3261  // clear the upper 8 bits (AH).
3262  SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Move, Chain;
3263  if (tryFoldLoad(Node, N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) {
3264  SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N0.getOperand(0) };
3265  Move =
3266  SDValue(CurDAG->getMachineNode(X86::MOVZX32rm8, dl, MVT::i32,
3267  MVT::Other, Ops), 0);
3268  Chain = Move.getValue(1);
3269  ReplaceUses(N0.getValue(1), Chain);
3270  } else {
3271  Move =
3272  SDValue(CurDAG->getMachineNode(X86::MOVZX32rr8, dl, MVT::i32, N0),0);
3273  Chain = CurDAG->getEntryNode();
3274  }
3275  Chain = CurDAG->getCopyToReg(Chain, dl, X86::EAX, Move, SDValue());
3276  InFlag = Chain.getValue(1);
3277  } else {
3278  InFlag =
3279  CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl,
3280  LoReg, N0, SDValue()).getValue(1);
3281  if (isSigned && !signBitIsZero) {
3282  // Sign extend the low part into the high part.
3283  InFlag =
3284  SDValue(CurDAG->getMachineNode(SExtOpcode, dl, MVT::Glue, InFlag),0);
3285  } else {
3286  // Zero out the high part, effectively zero extending the input.
3287  SDValue ClrNode = SDValue(CurDAG->getMachineNode(X86::MOV32r0, dl, NVT), 0);
3288  switch (NVT.SimpleTy) {
3289  case MVT::i16:
3290  ClrNode =
3291  SDValue(CurDAG->getMachineNode(
3292  TargetOpcode::EXTRACT_SUBREG, dl, MVT::i16, ClrNode,
3293  CurDAG->getTargetConstant(X86::sub_16bit, dl,
3294  MVT::i32)),
3295  0);
3296  break;
3297  case MVT::i32:
3298  break;
3299  case MVT::i64:
3300  ClrNode =
3301  SDValue(CurDAG->getMachineNode(
3302  TargetOpcode::SUBREG_TO_REG, dl, MVT::i64,
3303  CurDAG->getTargetConstant(0, dl, MVT::i64), ClrNode,
3304  CurDAG->getTargetConstant(X86::sub_32bit, dl,
3305  MVT::i32)),
3306  0);
3307  break;
3308  default:
3309  llvm_unreachable("Unexpected division source");
3310  }
3311 
3312  InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, ClrReg,
3313  ClrNode, InFlag).getValue(1);
3314  }
3315  }
3316 
3317  if (foldedLoad) {
3318  SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N1.getOperand(0),
3319  InFlag };
3320  MachineSDNode *CNode =
3321  CurDAG->getMachineNode(MOpc, dl, MVT::Other, MVT::Glue, Ops);
3322  InFlag = SDValue(CNode, 1);
3323  // Update the chain.
3324  ReplaceUses(N1.getValue(1), SDValue(CNode, 0));
3325  // Record the mem-refs
3326  CurDAG->setNodeMemRefs(CNode, {cast<LoadSDNode>(N1)->getMemOperand()});
3327  } else {
3328  InFlag =
3329  SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Glue, N1, InFlag), 0);
3330  }
3331 
3332  // Prevent use of AH in a REX instruction by explicitly copying it to
3333  // an ABCD_L register.
3334  //
3335  // The current assumption of the register allocator is that isel
3336  // won't generate explicit references to the GR8_ABCD_H registers. If
3337  // the allocator and/or the backend get enhanced to be more robust in
3338  // that regard, this can be, and should be, removed.
3339  if (HiReg == X86::AH && !SDValue(Node, 1).use_empty()) {
3340  SDValue AHCopy = CurDAG->getRegister(X86::AH, MVT::i8);
3341  unsigned AHExtOpcode =
3342  isSigned ? X86::MOVSX32rr8_NOREX : X86::MOVZX32rr8_NOREX;
3343 
3344  SDNode *RNode = CurDAG->getMachineNode(AHExtOpcode, dl, MVT::i32,
3345  MVT::Glue, AHCopy, InFlag);
3346  SDValue Result(RNode, 0);
3347  InFlag = SDValue(RNode, 1);
3348 
3349  if (Opcode == X86ISD::UDIVREM8_ZEXT_HREG ||
3350  Opcode == X86ISD::SDIVREM8_SEXT_HREG) {
3351  assert(Node->getValueType(1) == MVT::i32 && "Unexpected result type!");
3352  } else {
3353  Result =
3354  CurDAG->getTargetExtractSubreg(X86::sub_8bit, dl, MVT::i8, Result);
3355  }
3356  ReplaceUses(SDValue(Node, 1), Result);
3357  LLVM_DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG);
3358  dbgs() << '\n');
3359  }
3360  // Copy the division (low) result, if it is needed.
3361  if (!SDValue(Node, 0).use_empty()) {
3362  SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
3363  LoReg, NVT, InFlag);
3364  InFlag = Result.getValue(2);
3365  ReplaceUses(SDValue(Node, 0), Result);
3366  LLVM_DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG);
3367  dbgs() << '\n');
3368  }
3369  // Copy the remainder (high) result, if it is needed.
3370  if (!SDValue(Node, 1).use_empty()) {
3371  SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
3372  HiReg, NVT, InFlag);
3373  InFlag = Result.getValue(2);
3374  ReplaceUses(SDValue(Node, 1), Result);
3375  LLVM_DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG);
3376  dbgs() << '\n');
3377  }
3378  CurDAG->RemoveDeadNode(Node);
3379  return;
3380  }
3381 
3382  case X86ISD::CMP: {
3383  SDValue N0 = Node->getOperand(0);
3384  SDValue N1 = Node->getOperand(1);
3385 
3386  if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() &&
3388  N0 = N0.getOperand(0);
3389 
3390  // Look for (X86cmp (and $op, $imm), 0) and see if we can convert it to
3391  // use a smaller encoding.
3392  // Look past the truncate if CMP is the only use of it.
3393  if (N0.getOpcode() == ISD::AND &&
3394  N0.getNode()->hasOneUse() &&
3395  N0.getValueType() != MVT::i8 &&
3396  X86::isZeroNode(N1)) {
3398  if (!C) break;
3399  uint64_t Mask = C->getZExtValue();
3400 
3401  MVT VT;
3402  int SubRegOp;
3403  unsigned Op;
3404 
3405  if (isUInt<8>(Mask) &&
3406  (!(Mask & 0x80) || hasNoSignedComparisonUses(Node))) {
3407  // For example, convert "testl %eax, $8" to "testb %al, $8"
3408  VT = MVT::i8;
3409  SubRegOp = X86::sub_8bit;
3410  Op = X86::TEST8ri;
3411  } else if (OptForMinSize && isUInt<16>(Mask) &&
3412  (!(Mask & 0x8000) || hasNoSignedComparisonUses(Node))) {
3413  // For example, "testl %eax, $32776" to "testw %ax, $32776".
3414  // NOTE: We only want to form TESTW instructions if optimizing for
3415  // min size. Otherwise we only save one byte and possibly get a length
3416  // changing prefix penalty in the decoders.
3417  VT = MVT::i16;
3418  SubRegOp = X86::sub_16bit;
3419  Op = X86::TEST16ri;
3420  } else if (isUInt<32>(Mask) && N0.getValueType() != MVT::i16 &&
3421  (!(Mask & 0x80000000) || hasNoSignedComparisonUses(Node))) {
3422  // For example, "testq %rax, $268468232" to "testl %eax, $268468232".
3423  // NOTE: We only want to run that transform if N0 is 32 or 64 bits.
3424  // Otherwize, we find ourselves in a position where we have to do
3425  // promotion. If previous passes did not promote the and, we assume
3426  // they had a good reason not to and do not promote here.
3427  VT = MVT::i32;
3428  SubRegOp = X86::sub_32bit;
3429  Op = X86::TEST32ri;
3430  } else {
3431  // No eligible transformation was found.
3432  break;
3433  }
3434 
3435  SDValue Imm = CurDAG->getTargetConstant(Mask, dl, VT);
3436  SDValue Reg = N0.getOperand(0);
3437 
3438  // Extract the subregister if necessary.
3439  if (N0.getValueType() != VT)
3440  Reg = CurDAG->getTargetExtractSubreg(SubRegOp, dl, VT, Reg);
3441 
3442  // Emit a testl or testw.
3443  SDNode *NewNode = CurDAG->getMachineNode(Op, dl, MVT::i32, Reg, Imm);
3444  // Replace CMP with TEST.
3445  ReplaceNode(Node, NewNode);
3446  return;
3447  }
3448  break;
3449  }
3450  case X86ISD::PCMPISTR: {
3451  if (!Subtarget->hasSSE42())
3452  break;
3453 
3454  bool NeedIndex = !SDValue(Node, 0).use_empty();
3455  bool NeedMask = !SDValue(Node, 1).use_empty();
3456  // We can't fold a load if we are going to make two instructions.
3457  bool MayFoldLoad = !NeedIndex || !NeedMask;
3458 
3459  MachineSDNode *CNode;
3460  if (NeedMask) {
3461  unsigned ROpc = Subtarget->hasAVX() ? X86::VPCMPISTRMrr : X86::PCMPISTRMrr;
3462  unsigned MOpc = Subtarget->hasAVX() ? X86::VPCMPISTRMrm : X86::PCMPISTRMrm;
3463  CNode = emitPCMPISTR(ROpc, MOpc, MayFoldLoad, dl, MVT::v16i8, Node);
3464  ReplaceUses(SDValue(Node, 1), SDValue(CNode, 0));
3465  }
3466  if (NeedIndex || !NeedMask) {
3467  unsigned ROpc = Subtarget->hasAVX() ? X86::VPCMPISTRIrr : X86::PCMPISTRIrr;
3468  unsigned MOpc = Subtarget->hasAVX() ? X86::VPCMPISTRIrm : X86::PCMPISTRIrm;
3469  CNode = emitPCMPISTR(ROpc, MOpc, MayFoldLoad, dl, MVT::i32, Node);
3470  ReplaceUses(SDValue(Node, 0), SDValue(CNode, 0));
3471  }
3472 
3473  // Connect the flag usage to the last instruction created.
3474  ReplaceUses(SDValue(Node, 2), SDValue(CNode, 1));
3475  CurDAG->RemoveDeadNode(Node);
3476  return;
3477  }
3478  case X86ISD::PCMPESTR: {
3479  if (!Subtarget->hasSSE42())
3480  break;
3481 
3482  // Copy the two implicit register inputs.
3483  SDValue InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, X86::EAX,
3484  Node->getOperand(1),
3485  SDValue()).getValue(1);
3486  InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, X86::EDX,
3487  Node->getOperand(3), InFlag).getValue(1);
3488 
3489  bool NeedIndex = !SDValue(Node, 0).use_empty();
3490  bool NeedMask = !SDValue(Node, 1).use_empty();
3491  // We can't fold a load if we are going to make two instructions.
3492  bool MayFoldLoad = !NeedIndex || !NeedMask;
3493 
3494  MachineSDNode *CNode;
3495  if (NeedMask) {
3496  unsigned ROpc = Subtarget->hasAVX() ? X86::VPCMPESTRMrr : X86::PCMPESTRMrr;
3497  unsigned MOpc = Subtarget->hasAVX() ? X86::VPCMPESTRMrm : X86::PCMPESTRMrm;
3498  CNode = emitPCMPESTR(ROpc, MOpc, MayFoldLoad, dl, MVT::v16i8, Node,
3499  InFlag);
3500  ReplaceUses(SDValue(Node, 1), SDValue(CNode, 0));
3501  }
3502  if (NeedIndex || !NeedMask) {
3503  unsigned ROpc = Subtarget->hasAVX() ? X86::VPCMPESTRIrr : X86::PCMPESTRIrr;
3504  unsigned MOpc = Subtarget->hasAVX() ? X86::VPCMPESTRIrm : X86::PCMPESTRIrm;
3505  CNode = emitPCMPESTR(ROpc, MOpc, MayFoldLoad, dl, MVT::i32, Node, InFlag);
3506  ReplaceUses(SDValue(Node, 0), SDValue(CNode, 0));
3507  }
3508  // Connect the flag usage to the last instruction created.
3509  ReplaceUses(SDValue(Node, 2), SDValue(CNode, 1));
3510  CurDAG->RemoveDeadNode(Node);
3511  return;
3512  }
3513 
3514  case ISD::STORE:
3515  if (foldLoadStoreIntoMemOperand(Node))
3516  return;
3517  break;
3518  }
3519 
3520  SelectCode(Node);
3521 }
3522 
3523 bool X86DAGToDAGISel::
3524 SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
3525  std::vector<SDValue> &OutOps) {
3526  SDValue Op0, Op1, Op2, Op3, Op4;
3527  switch (ConstraintID) {
3528  default:
3529  llvm_unreachable("Unexpected asm memory constraint");
3531  // FIXME: It seems strange that 'i' is needed here since it's supposed to
3532  // be an immediate and not a memory constraint.
3534  case InlineAsm::Constraint_o: // offsetable ??
3535  case InlineAsm::Constraint_v: // not offsetable ??
3536  case InlineAsm::Constraint_m: // memory
3538  if (!selectAddr(nullptr, Op, Op0, Op1, Op2, Op3, Op4))
3539  return true;
3540  break;
3541  }
3542 
3543  OutOps.push_back(Op0);
3544  OutOps.push_back(Op1);
3545  OutOps.push_back(Op2);
3546  OutOps.push_back(Op3);
3547  OutOps.push_back(Op4);
3548  return false;
3549 }
3550 
3551 /// This pass converts a legalized DAG into a X86-specific DAG,
3552 /// ready for instruction scheduling.
3554  CodeGenOpt::Level OptLevel) {
3555  return new X86DAGToDAGISel(TM, OptLevel);
3556 }
uint64_t CallInst * C
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:541
constexpr bool isUInt< 32 >(uint64_t x)
Definition: MathExtras.h:349
X = FP_ROUND(Y, TRUNC) - Rounding &#39;Y&#39; from a larger floating point type down to the precision of the ...
Definition: ISDOpcodes.h:508
static bool isCalleeLoad(SDValue Callee, SDValue &Chain, bool HasCallSeq)
Return true if call address is a load and it can be moved below CALLSEQ_START and the chains leading ...
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
static bool isFusableLoadOpStorePattern(StoreSDNode *StoreNode, SDValue StoredVal, SelectionDAG *CurDAG, unsigned LoadOpNo, LoadSDNode *&LoadNode, SDValue &InputChain)
Check whether or not the chain ending in StoreNode is suitable for doing the {load; op; store} to mod...
EVT getValueType() const
Return the ValueType of the referenced return value.
Vector comparison generating mask bits for fp and integer signed and unsigned data types...
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
const SDValue & getOffset() const
bool isUndef() const
APInt getSignedMax() const
Return the largest signed value contained in the ConstantRange.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
DELETED_NODE - This is an illegal value that is used to catch errors.
Definition: ISDOpcodes.h:42
Compute iterated dominance frontiers using a linear time algorithm.
Definition: AllocatorList.h:24
static void insertDAGNode(SelectionDAG &DAG, SDValue Pos, SDValue N)
static bool isLegalMaskCompare(SDNode *N, const X86Subtarget *Subtarget)
Tail call return.
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
Definition: MCSymbol.h:42
#define LLVM_FALLTHROUGH
Definition: Compiler.h:86
bool isVector() const
Return true if this is a vector value type.
const SDValue & getBasePtr() const
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
APInt zext(unsigned width) const
Zero extend to a new width.
Definition: APInt.cpp:858
bool is256BitVector() const
Return true if this is a 256-bit vector type.
bool slt(const APInt &RHS) const
Signed less than comparison.
Definition: APInt.h:1198
unsigned Reg
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:253
const SDValue & getChain() const
constexpr bool isInt< 8 >(int64_t x)
Definition: MathExtras.h:303
ISD::MemIndexedMode getAddressingMode() const
Return the addressing mode for this load or store: unindexed, pre-inc, pre-dec, post-inc, or post-dec.
TargetGlobalAddress - Like GlobalAddress, but the DAG does no folding or anything else with this node...
Definition: ISDOpcodes.h:131
unsigned getAlignment() const
APInt trunc(unsigned width) const
Truncate to new width.
Definition: APInt.cpp:811
STATISTIC(NumFunctions, "Total number of functions")
MVT getSimpleValueType(unsigned ResNo) const
Return the type of a specified result as a simple type.
F(f)
void setNodeId(int Id)
Set unique node id.
SDNode * getNode() const
get the SDNode which holds the desired result
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1 at the ...
Definition: ISDOpcodes.h:329
std::size_t countLeadingZeros(T Val, ZeroBehavior ZB=ZB_Width)
Count number of 0&#39;s from the most significant bit to the least stopping at the first 1...
Definition: MathExtras.h:189
static bool MayFoldLoad(SDValue Op)
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1503
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition: ISDOpcodes.h:159
GlobalBaseReg - On Darwin, this node represents the result of the mflr at function entry...
static bool hasPredecessorHelper(const SDNode *N, SmallPtrSetImpl< const SDNode *> &Visited, SmallVectorImpl< const SDNode *> &Worklist, unsigned int MaxSteps=0, bool TopologicalPrune=false)
Returns true if N is a predecessor of any node in Worklist.
unsigned getAddrSpace() const
Return the LLVM IR address space number that this pointer points into.
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition: ISDOpcodes.h:210
X86 compare and logical compare instructions.
void dump() const
Support for debugging, callable in GDB: V->dump()
Definition: AsmWriter.cpp:4245
The address of a basic block.
Definition: Constants.h:836
static cl::opt< bool > AndImmShrink("x86-and-imm-shrink", cl::init(true), cl::desc("Enable setting constant bits to reduce size of mask immediates"), cl::Hidden)
bool hasOneUse() const
Return true if there is exactly one use of this node.
A description of a memory reference used in the backend.
Shift and rotation operations.
Definition: ISDOpcodes.h:382
bool isNormalStore(const SDNode *N)
Returns true if the specified node is a non-truncating and unindexed store.
std::size_t countTrailingOnes(T Value, ZeroBehavior ZB=ZB_Width)
Count the number of ones from the least significant bit to the first zero bit.
Definition: MathExtras.h:478
bool isBuildVectorAllZeros(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are 0 or undef...
CallLoweringInfo & setChain(SDValue InChain)
CopyToReg - This node has three operands: a chain, a register number to set to this value...
Definition: ISDOpcodes.h:170
op_iterator op_end() const
uint64_t getConstantOperandVal(unsigned i) const
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
SimpleValueType SimpleTy
unsigned getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition: ValueTypes.h:304
CALLSEQ_START/CALLSEQ_END - These operators mark the beginning and end of a call sequence, and carry arbitrary information that target might want to know.
Definition: ISDOpcodes.h:673
Position
Position to insert a new instruction relative to an existing instruction.
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
This represents a list of ValueType&#39;s that has been intern&#39;d by a SelectionDAG.
bool hasVLX() const
Definition: X86Subtarget.h:648
unsigned getSizeInBits() const
bool runOnMachineFunction(MachineFunction &MF) override
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
bool hasExternalLinkage() const
Definition: GlobalValue.h:422
int64_t getSExtValue() const
static void InvalidateNodeId(SDNode *N)
unsigned getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:292
X86 FP SETCC, similar to above, but with output as an i1 mask and with optional rounding mode...
constexpr bool isMask_64(uint64_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
Definition: MathExtras.h:411
#define T
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition: ISDOpcodes.h:401
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:201
op_iterator op_begin() const
ArrayRef< SDUse > ops() const
amdgpu Simplify well known AMD library false Value * Callee
bool use_empty() const
Return true if there are no nodes using value ResNo of Node.
This class is used to represent ISD::STORE nodes.
bool isOneConstant(SDValue V)
Returns true if V is a constant integer one.
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
bool is128BitVector() const
Return true if this is a 128-bit vector type.
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Get a value with high bits set.
Definition: APInt.h:636
#define P(N)
const SDValue & getBasePtr() const
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:410
bool isNormalLoad(const SDNode *N)
Returns true if the specified node is a non-extending and unindexed load.
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition: ISDOpcodes.h:166
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
constexpr bool isUInt< 8 >(uint64_t x)
Definition: MathExtras.h:343
std::size_t countTrailingZeros(T Val, ZeroBehavior ZB=ZB_Width)
Count number of 0&#39;s from the least significant bit to the most stopping at the first 1...
Definition: MathExtras.h:120
use_iterator use_begin() const
Provide iteration support to walk over all uses of an SDNode.
LOCAL_RECOVER - Represents the llvm.localrecover intrinsic.
Definition: ISDOpcodes.h:81
Machine Value Type.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
static bool foldMaskedShiftToScaledMask(SelectionDAG &DAG, SDValue N, uint64_t Mask, SDValue Shift, SDValue X, X86ISelAddressMode &AM)
bool hasAnyUseOfValue(unsigned Value) const
Return true if there are any use of the indicated value.
bool isMachineOpcode() const
unsigned getScalarSizeInBits() const
bool ult(const APInt &RHS) const
Unsigned less than comparison.
Definition: APInt.h:1179
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This is an important base class in LLVM.
Definition: Constant.h:42
const SDValue & getOperand(unsigned Num) const
bool isOperandOf(const SDNode *N) const
Return true if this node is an operand of N.
const SDValue & getOffset() const
APInt getUnsignedMax() const
Return the largest unsigned value contained in the ConstantRange.
static Type * getVoidTy(LLVMContext &C)
Definition: Type.cpp:161
This class provides iterator support for SDUse operands that use a specific SDNode.
unsigned getMachineOpcode() const
void RepositionNode(allnodes_iterator Position, SDNode *N)
Move node N in the AllNodes list to be immediately before the given iterator Position.
void ReplaceAllUsesWith(SDValue From, SDValue To)
Modify anything using &#39;From&#39; to use &#39;To&#39; instead.
static bool hasNoCarryFlagUses(SDNode *N)
Test whether the given node which sets flags has any uses which require the CF flag to be accurate...
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:285
On Darwin, this node represents the result of the popl at function entry, used for PIC code...
self_iterator getIterator()
Definition: ilist_node.h:82
bool hasNUsesOfValue(unsigned NUses, unsigned Value) const
Return true if there are exactly NUSES uses of the indicated value.
static bool foldMaskAndShiftToExtract(SelectionDAG &DAG, SDValue N, uint64_t Mask, SDValue Shift, SDValue X, X86ISelAddressMode &AM)
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:526
std::vector< ArgListEntry > ArgListTy
Extended Value Type.
Definition: ValueTypes.h:34
C - The default llvm calling convention, compatible with C.
Definition: CallingConv.h:35
This structure contains all information that is necessary for lowering calls.
bool isVolatile() const
APInt getSignedMin() const
Return the smallest signed value contained in the ConstantRange.
This class contains a discriminated union of information about pointers in memory operands...
bool isMachineOpcode() const
Test if this node has a post-isel opcode, directly corresponding to a MachineInstr opcode...
unsigned getNumOperands() const
Return the number of values used by this operation.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
BaseType
A given derived pointer can have multiple base pointers through phi/selects.
bool use_empty() const
Return true if there are no uses of this node.
These operations represent an abstract X86 call instruction, which includes a bunch of information...
TokenFactor - This node takes multiple tokens as input and produces a single token result...
Definition: ISDOpcodes.h:50
void dump() const
Dump this node, for debugging.
Iterator for intrusive lists based on ilist_node.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements...
Definition: SmallPtrSet.h:418
unsigned countPopulation(T Value)
Count the number of set bits in a value.
Definition: MathExtras.h:520
This is the shared class of boolean and integer constants.
Definition: Constants.h:84
constexpr bool isInt< 32 >(int64_t x)
Definition: MathExtras.h:309
bool isScalarFPTypeInSSEReg(EVT VT) const
Return true if the specified scalar FP type is computed in an SSE register, not on the X87 floating p...
SDNode * UpdateNodeOperands(SDNode *N, SDValue Op)
Mutate the specified node in-place to have the specified operands.
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:222
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:847
const DataFlowGraph & G
Definition: RDFGraph.cpp:211
An SDNode that represents everything that will be needed to construct a MachineInstr.
XOP - Opcode prefix used by XOP instructions.
Definition: X86BaseInfo.h:527
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
SelectionDAGISel - This is the common base class used for SelectionDAG-based pattern-matching instruc...
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:133
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:941
static bool foldMaskAndShiftToScale(SelectionDAG &DAG, SDValue N, uint64_t Mask, SDValue Shift, SDValue X, X86ISelAddressMode &AM)
EVT getMemoryVT() const
Return the type of the in-memory value.
Target - Wrapper for Target specific information.
Class for arbitrary precision integers.
Definition: APInt.h:70
static unsigned getReg(const void *D, unsigned RC, unsigned RegNo)
bool sge(const APInt &RHS) const
Signed greater or equal comparison.
Definition: APInt.h:1303
static use_iterator use_end()
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:440
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:443
void append(in_iter in_start, in_iter in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:394
static bool hasSingleUsesFromRoot(SDNode *Root, SDNode *User)
bool is256BitVector() const
Return true if this is a 256-bit vector type.
Definition: ValueTypes.h:187
int getNodeId() const
Return the unique node id.
static bool isDispSafeForFrameIndex(int64_t Val)
std::pair< SDValue, SDValue > LowerCallTo(CallLoweringInfo &CLI) const
This function lowers an abstract call to a function into an actual call.
bool isVector() const
Return true if this is a vector value type.
Definition: ValueTypes.h:151
const SDValue & getValue() const
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:363
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition: ISDOpcodes.h:206
bool is128BitVector() const
Return true if this is a 128-bit vector type.
Definition: ValueTypes.h:182
A wrapper node for TargetConstantPool, TargetJumpTable, TargetExternalSymbol, TargetGlobalAddress, TargetGlobalTLSAddress, MCSymbol and TargetBlockAddress.
FunctionPass * createX86ISelDag(X86TargetMachine &TM, CodeGenOpt::Level OptLevel)
This pass converts a legalized DAG into a X86-specific DAG, ready for instruction scheduling...
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:574
StringRef getName() const
Return a constant reference to the value&#39;s name.
Definition: Value.cpp:224
#define I(x, y, z)
Definition: MD5.cpp:58
#define N
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:323
bool is512BitVector() const
Return true if this is a 512-bit vector type.
uint32_t Size
Definition: Profile.cpp:47
bool isNON_EXTLoad(const SDNode *N)
Returns true if the specified node is a non-extending load.
unsigned getOpcode() const
SDValue getValue(unsigned R) const
constexpr bool isUInt< 16 >(uint64_t x)
Definition: MathExtras.h:346
static bool hasNoSignedComparisonUses(SDNode *N)
Test whether the given X86ISD::CMP node has any uses which require the SF or OF bits to be accurate...
CopyFromReg - This node indicates that the input value is a virtual or physical register that is defi...
Definition: ISDOpcodes.h:175
SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
const MachinePointerInfo & getPointerInfo() const
This class is used to form a handle around another node that is persistent and is updated across invo...
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
unsigned getReg() const
unsigned getMinSignedBits() const
Get the minimum bit size for this signed APInt.
Definition: APInt.h:1546
uint64_t getConstantOperandVal(unsigned Num) const
Helper method returns the integer value of a ConstantSDNode operand.
unsigned getResNo() const
get the index which selects a specific result in the SDNode
Dynamic (non-constant condition) vector blend where only the sign bits of the condition elements are ...
unsigned getMachineOpcode() const
This may only be called if isMachineOpcode returns true.
bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
std::underlying_type< E >::type Mask()
Get a bitmask with 1s in all places up to the high-order bit of E&#39;s largest value.
Definition: BitmaskEnum.h:81
bool isNonTemporal() const
IRTranslator LLVM IR MI
bool isZeroNode(SDValue Elt)
Returns true if Elt is a constant zero or floating point constant +0.0.
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:49
SetCC operator - This evaluates to a true value iff the condition is true.
Definition: ISDOpcodes.h:415
unsigned countLeadingZeros() const
The APInt version of the countLeadingZeros functions in MathExtras.h.
Definition: APInt.h:1590
KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
bool isOffsetSuitableForCodeModel(int64_t Offset, CodeModel::Model M, bool hasSymbolicDisplacement=true)
Returns true of the given offset can be fit into displacement field of the instruction.
unsigned getNumOperands() const
const SDValue & getOperand(unsigned i) const
uint64_t getZExtValue() const
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:446
#define LLVM_DEBUG(X)
Definition: Debug.h:123
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation...
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition: ISDOpcodes.h:351
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
static void moveBelowOrigChain(SelectionDAG *CurDAG, SDValue Load, SDValue Call, SDValue OrigChain)
Replace the original chain operand of the call with load&#39;s chain operand and move load below the call...
Carry-using nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:242
Special wrapper used under X86-64 PIC mode for RIP relative displacements.
BRIND - Indirect branch.
Definition: ISDOpcodes.h:594
This class is used to represent ISD::LOAD nodes.
static int getUninvalidatedNodeId(SDNode *N)