LLVM  8.0.0svn
X86ISelDAGToDAG.cpp
Go to the documentation of this file.
1 //===- X86ISelDAGToDAG.cpp - A DAG pattern matching inst selector for X86 -===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file defines a DAG pattern matching instruction selector for X86,
11 // converting from a legalized dag to a X86 dag.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "X86.h"
16 #include "X86MachineFunctionInfo.h"
17 #include "X86RegisterInfo.h"
18 #include "X86Subtarget.h"
19 #include "X86TargetMachine.h"
20 #include "llvm/ADT/Statistic.h"
24 #include "llvm/Config/llvm-config.h"
25 #include "llvm/IR/ConstantRange.h"
26 #include "llvm/IR/Function.h"
27 #include "llvm/IR/Instructions.h"
28 #include "llvm/IR/Intrinsics.h"
29 #include "llvm/IR/Type.h"
30 #include "llvm/Support/Debug.h"
32 #include "llvm/Support/KnownBits.h"
37 #include <stdint.h>
38 using namespace llvm;
39 
40 #define DEBUG_TYPE "x86-isel"
41 
42 STATISTIC(NumLoadMoved, "Number of loads moved below TokenFactor");
43 
44 static cl::opt<bool> AndImmShrink("x86-and-imm-shrink", cl::init(true),
45  cl::desc("Enable setting constant bits to reduce size of mask immediates"),
46  cl::Hidden);
47 
48 //===----------------------------------------------------------------------===//
49 // Pattern Matcher Implementation
50 //===----------------------------------------------------------------------===//
51 
52 namespace {
53  /// This corresponds to X86AddressMode, but uses SDValue's instead of register
54  /// numbers for the leaves of the matched tree.
55  struct X86ISelAddressMode {
56  enum {
57  RegBase,
58  FrameIndexBase
59  } BaseType;
60 
61  // This is really a union, discriminated by BaseType!
62  SDValue Base_Reg;
63  int Base_FrameIndex;
64 
65  unsigned Scale;
66  SDValue IndexReg;
67  int32_t Disp;
68  SDValue Segment;
69  const GlobalValue *GV;
70  const Constant *CP;
71  const BlockAddress *BlockAddr;
72  const char *ES;
73  MCSymbol *MCSym;
74  int JT;
75  unsigned Align; // CP alignment.
76  unsigned char SymbolFlags; // X86II::MO_*
77 
78  X86ISelAddressMode()
79  : BaseType(RegBase), Base_FrameIndex(0), Scale(1), IndexReg(), Disp(0),
80  Segment(), GV(nullptr), CP(nullptr), BlockAddr(nullptr), ES(nullptr),
81  MCSym(nullptr), JT(-1), Align(0), SymbolFlags(X86II::MO_NO_FLAG) {}
82 
83  bool hasSymbolicDisplacement() const {
84  return GV != nullptr || CP != nullptr || ES != nullptr ||
85  MCSym != nullptr || JT != -1 || BlockAddr != nullptr;
86  }
87 
88  bool hasBaseOrIndexReg() const {
89  return BaseType == FrameIndexBase ||
90  IndexReg.getNode() != nullptr || Base_Reg.getNode() != nullptr;
91  }
92 
93  /// Return true if this addressing mode is already RIP-relative.
94  bool isRIPRelative() const {
95  if (BaseType != RegBase) return false;
96  if (RegisterSDNode *RegNode =
97  dyn_cast_or_null<RegisterSDNode>(Base_Reg.getNode()))
98  return RegNode->getReg() == X86::RIP;
99  return false;
100  }
101 
102  void setBaseReg(SDValue Reg) {
103  BaseType = RegBase;
104  Base_Reg = Reg;
105  }
106 
107 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
108  void dump(SelectionDAG *DAG = nullptr) {
109  dbgs() << "X86ISelAddressMode " << this << '\n';
110  dbgs() << "Base_Reg ";
111  if (Base_Reg.getNode())
112  Base_Reg.getNode()->dump(DAG);
113  else
114  dbgs() << "nul\n";
115  if (BaseType == FrameIndexBase)
116  dbgs() << " Base.FrameIndex " << Base_FrameIndex << '\n';
117  dbgs() << " Scale " << Scale << '\n'
118  << "IndexReg ";
119  if (IndexReg.getNode())
120  IndexReg.getNode()->dump(DAG);
121  else
122  dbgs() << "nul\n";
123  dbgs() << " Disp " << Disp << '\n'
124  << "GV ";
125  if (GV)
126  GV->dump();
127  else
128  dbgs() << "nul";
129  dbgs() << " CP ";
130  if (CP)
131  CP->dump();
132  else
133  dbgs() << "nul";
134  dbgs() << '\n'
135  << "ES ";
136  if (ES)
137  dbgs() << ES;
138  else
139  dbgs() << "nul";
140  dbgs() << " MCSym ";
141  if (MCSym)
142  dbgs() << MCSym;
143  else
144  dbgs() << "nul";
145  dbgs() << " JT" << JT << " Align" << Align << '\n';
146  }
147 #endif
148  };
149 }
150 
151 namespace {
152  //===--------------------------------------------------------------------===//
153  /// ISel - X86-specific code to select X86 machine instructions for
154  /// SelectionDAG operations.
155  ///
156  class X86DAGToDAGISel final : public SelectionDAGISel {
157  /// Keep a pointer to the X86Subtarget around so that we can
158  /// make the right decision when generating code for different targets.
159  const X86Subtarget *Subtarget;
160 
161  /// If true, selector should try to optimize for code size instead of
162  /// performance.
163  bool OptForSize;
164 
165  /// If true, selector should try to optimize for minimum code size.
166  bool OptForMinSize;
167 
168  /// Disable direct TLS access through segment registers.
169  bool IndirectTlsSegRefs;
170 
171  public:
172  explicit X86DAGToDAGISel(X86TargetMachine &tm, CodeGenOpt::Level OptLevel)
173  : SelectionDAGISel(tm, OptLevel), OptForSize(false),
174  OptForMinSize(false) {}
175 
176  StringRef getPassName() const override {
177  return "X86 DAG->DAG Instruction Selection";
178  }
179 
180  bool runOnMachineFunction(MachineFunction &MF) override {
181  // Reset the subtarget each time through.
182  Subtarget = &MF.getSubtarget<X86Subtarget>();
183  IndirectTlsSegRefs = MF.getFunction().hasFnAttribute(
184  "indirect-tls-seg-refs");
186  return true;
187  }
188 
189  void EmitFunctionEntryCode() override;
190 
191  bool IsProfitableToFold(SDValue N, SDNode *U, SDNode *Root) const override;
192 
193  void PreprocessISelDAG() override;
194  void PostprocessISelDAG() override;
195 
196 // Include the pieces autogenerated from the target description.
197 #include "X86GenDAGISel.inc"
198 
199  private:
200  void Select(SDNode *N) override;
201 
202  bool foldOffsetIntoAddress(uint64_t Offset, X86ISelAddressMode &AM);
203  bool matchLoadInAddress(LoadSDNode *N, X86ISelAddressMode &AM);
204  bool matchWrapper(SDValue N, X86ISelAddressMode &AM);
205  bool matchAddress(SDValue N, X86ISelAddressMode &AM);
206  bool matchVectorAddress(SDValue N, X86ISelAddressMode &AM);
207  bool matchAdd(SDValue N, X86ISelAddressMode &AM, unsigned Depth);
208  bool matchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
209  unsigned Depth);
210  bool matchAddressBase(SDValue N, X86ISelAddressMode &AM);
211  bool selectAddr(SDNode *Parent, SDValue N, SDValue &Base,
212  SDValue &Scale, SDValue &Index, SDValue &Disp,
213  SDValue &Segment);
214  bool selectVectorAddr(SDNode *Parent, SDValue N, SDValue &Base,
215  SDValue &Scale, SDValue &Index, SDValue &Disp,
216  SDValue &Segment);
217  bool selectMOV64Imm32(SDValue N, SDValue &Imm);
218  bool selectLEAAddr(SDValue N, SDValue &Base,
219  SDValue &Scale, SDValue &Index, SDValue &Disp,
220  SDValue &Segment);
221  bool selectLEA64_32Addr(SDValue N, SDValue &Base,
222  SDValue &Scale, SDValue &Index, SDValue &Disp,
223  SDValue &Segment);
224  bool selectTLSADDRAddr(SDValue N, SDValue &Base,
225  SDValue &Scale, SDValue &Index, SDValue &Disp,
226  SDValue &Segment);
227  bool selectScalarSSELoad(SDNode *Root, SDNode *Parent, SDValue N,
228  SDValue &Base, SDValue &Scale,
229  SDValue &Index, SDValue &Disp,
230  SDValue &Segment,
231  SDValue &NodeWithChain);
232  bool selectRelocImm(SDValue N, SDValue &Op);
233 
234  bool tryFoldLoad(SDNode *Root, SDNode *P, SDValue N,
235  SDValue &Base, SDValue &Scale,
236  SDValue &Index, SDValue &Disp,
237  SDValue &Segment);
238 
239  // Convenience method where P is also root.
240  bool tryFoldLoad(SDNode *P, SDValue N,
241  SDValue &Base, SDValue &Scale,
242  SDValue &Index, SDValue &Disp,
243  SDValue &Segment) {
244  return tryFoldLoad(P, P, N, Base, Scale, Index, Disp, Segment);
245  }
246 
247  /// Implement addressing mode selection for inline asm expressions.
248  bool SelectInlineAsmMemoryOperand(const SDValue &Op,
249  unsigned ConstraintID,
250  std::vector<SDValue> &OutOps) override;
251 
252  void emitSpecialCodeForMain();
253 
254  inline void getAddressOperands(X86ISelAddressMode &AM, const SDLoc &DL,
255  SDValue &Base, SDValue &Scale,
256  SDValue &Index, SDValue &Disp,
257  SDValue &Segment) {
258  Base = (AM.BaseType == X86ISelAddressMode::FrameIndexBase)
259  ? CurDAG->getTargetFrameIndex(
260  AM.Base_FrameIndex,
261  TLI->getPointerTy(CurDAG->getDataLayout()))
262  : AM.Base_Reg;
263  Scale = getI8Imm(AM.Scale, DL);
264  Index = AM.IndexReg;
265  // These are 32-bit even in 64-bit mode since RIP-relative offset
266  // is 32-bit.
267  if (AM.GV)
268  Disp = CurDAG->getTargetGlobalAddress(AM.GV, SDLoc(),
269  MVT::i32, AM.Disp,
270  AM.SymbolFlags);
271  else if (AM.CP)
272  Disp = CurDAG->getTargetConstantPool(AM.CP, MVT::i32,
273  AM.Align, AM.Disp, AM.SymbolFlags);
274  else if (AM.ES) {
275  assert(!AM.Disp && "Non-zero displacement is ignored with ES.");
276  Disp = CurDAG->getTargetExternalSymbol(AM.ES, MVT::i32, AM.SymbolFlags);
277  } else if (AM.MCSym) {
278  assert(!AM.Disp && "Non-zero displacement is ignored with MCSym.");
279  assert(AM.SymbolFlags == 0 && "oo");
280  Disp = CurDAG->getMCSymbol(AM.MCSym, MVT::i32);
281  } else if (AM.JT != -1) {
282  assert(!AM.Disp && "Non-zero displacement is ignored with JT.");
283  Disp = CurDAG->getTargetJumpTable(AM.JT, MVT::i32, AM.SymbolFlags);
284  } else if (AM.BlockAddr)
285  Disp = CurDAG->getTargetBlockAddress(AM.BlockAddr, MVT::i32, AM.Disp,
286  AM.SymbolFlags);
287  else
288  Disp = CurDAG->getTargetConstant(AM.Disp, DL, MVT::i32);
289 
290  if (AM.Segment.getNode())
291  Segment = AM.Segment;
292  else
293  Segment = CurDAG->getRegister(0, MVT::i32);
294  }
295 
296  // Utility function to determine whether we should avoid selecting
297  // immediate forms of instructions for better code size or not.
298  // At a high level, we'd like to avoid such instructions when
299  // we have similar constants used within the same basic block
300  // that can be kept in a register.
301  //
302  bool shouldAvoidImmediateInstFormsForSize(SDNode *N) const {
303  uint32_t UseCount = 0;
304 
305  // Do not want to hoist if we're not optimizing for size.
306  // TODO: We'd like to remove this restriction.
307  // See the comment in X86InstrInfo.td for more info.
308  if (!OptForSize)
309  return false;
310 
311  // Walk all the users of the immediate.
312  for (SDNode::use_iterator UI = N->use_begin(),
313  UE = N->use_end(); (UI != UE) && (UseCount < 2); ++UI) {
314 
315  SDNode *User = *UI;
316 
317  // This user is already selected. Count it as a legitimate use and
318  // move on.
319  if (User->isMachineOpcode()) {
320  UseCount++;
321  continue;
322  }
323 
324  // We want to count stores of immediates as real uses.
325  if (User->getOpcode() == ISD::STORE &&
326  User->getOperand(1).getNode() == N) {
327  UseCount++;
328  continue;
329  }
330 
331  // We don't currently match users that have > 2 operands (except
332  // for stores, which are handled above)
333  // Those instruction won't match in ISEL, for now, and would
334  // be counted incorrectly.
335  // This may change in the future as we add additional instruction
336  // types.
337  if (User->getNumOperands() != 2)
338  continue;
339 
340  // Immediates that are used for offsets as part of stack
341  // manipulation should be left alone. These are typically
342  // used to indicate SP offsets for argument passing and
343  // will get pulled into stores/pushes (implicitly).
344  if (User->getOpcode() == X86ISD::ADD ||
345  User->getOpcode() == ISD::ADD ||
346  User->getOpcode() == X86ISD::SUB ||
347  User->getOpcode() == ISD::SUB) {
348 
349  // Find the other operand of the add/sub.
350  SDValue OtherOp = User->getOperand(0);
351  if (OtherOp.getNode() == N)
352  OtherOp = User->getOperand(1);
353 
354  // Don't count if the other operand is SP.
355  RegisterSDNode *RegNode;
356  if (OtherOp->getOpcode() == ISD::CopyFromReg &&
357  (RegNode = dyn_cast_or_null<RegisterSDNode>(
358  OtherOp->getOperand(1).getNode())))
359  if ((RegNode->getReg() == X86::ESP) ||
360  (RegNode->getReg() == X86::RSP))
361  continue;
362  }
363 
364  // ... otherwise, count this and move on.
365  UseCount++;
366  }
367 
368  // If we have more than 1 use, then recommend for hoisting.
369  return (UseCount > 1);
370  }
371 
372  /// Return a target constant with the specified value of type i8.
373  inline SDValue getI8Imm(unsigned Imm, const SDLoc &DL) {
374  return CurDAG->getTargetConstant(Imm, DL, MVT::i8);
375  }
376 
377  /// Return a target constant with the specified value, of type i32.
378  inline SDValue getI32Imm(unsigned Imm, const SDLoc &DL) {
379  return CurDAG->getTargetConstant(Imm, DL, MVT::i32);
380  }
381 
382  /// Return a target constant with the specified value, of type i64.
383  inline SDValue getI64Imm(uint64_t Imm, const SDLoc &DL) {
384  return CurDAG->getTargetConstant(Imm, DL, MVT::i64);
385  }
386 
387  SDValue getExtractVEXTRACTImmediate(SDNode *N, unsigned VecWidth,
388  const SDLoc &DL) {
389  assert((VecWidth == 128 || VecWidth == 256) && "Unexpected vector width");
390  uint64_t Index = N->getConstantOperandVal(1);
391  MVT VecVT = N->getOperand(0).getSimpleValueType();
392  return getI8Imm((Index * VecVT.getScalarSizeInBits()) / VecWidth, DL);
393  }
394 
395  SDValue getInsertVINSERTImmediate(SDNode *N, unsigned VecWidth,
396  const SDLoc &DL) {
397  assert((VecWidth == 128 || VecWidth == 256) && "Unexpected vector width");
398  uint64_t Index = N->getConstantOperandVal(2);
399  MVT VecVT = N->getSimpleValueType(0);
400  return getI8Imm((Index * VecVT.getScalarSizeInBits()) / VecWidth, DL);
401  }
402 
403  /// Return an SDNode that returns the value of the global base register.
404  /// Output instructions required to initialize the global base register,
405  /// if necessary.
406  SDNode *getGlobalBaseReg();
407 
408  /// Return a reference to the TargetMachine, casted to the target-specific
409  /// type.
410  const X86TargetMachine &getTargetMachine() const {
411  return static_cast<const X86TargetMachine &>(TM);
412  }
413 
414  /// Return a reference to the TargetInstrInfo, casted to the target-specific
415  /// type.
416  const X86InstrInfo *getInstrInfo() const {
417  return Subtarget->getInstrInfo();
418  }
419 
420  /// Address-mode matching performs shift-of-and to and-of-shift
421  /// reassociation in order to expose more scaled addressing
422  /// opportunities.
423  bool ComplexPatternFuncMutatesDAG() const override {
424  return true;
425  }
426 
427  bool isSExtAbsoluteSymbolRef(unsigned Width, SDNode *N) const;
428 
429  /// Returns whether this is a relocatable immediate in the range
430  /// [-2^Width .. 2^Width-1].
431  template <unsigned Width> bool isSExtRelocImm(SDNode *N) const {
432  if (auto *CN = dyn_cast<ConstantSDNode>(N))
433  return isInt<Width>(CN->getSExtValue());
434  return isSExtAbsoluteSymbolRef(Width, N);
435  }
436 
437  // Indicates we should prefer to use a non-temporal load for this load.
438  bool useNonTemporalLoad(LoadSDNode *N) const {
439  if (!N->isNonTemporal())
440  return false;
441 
442  unsigned StoreSize = N->getMemoryVT().getStoreSize();
443 
444  if (N->getAlignment() < StoreSize)
445  return false;
446 
447  switch (StoreSize) {
448  default: llvm_unreachable("Unsupported store size");
449  case 4:
450  case 8:
451  return false;
452  case 16:
453  return Subtarget->hasSSE41();
454  case 32:
455  return Subtarget->hasAVX2();
456  case 64:
457  return Subtarget->hasAVX512();
458  }
459  }
460 
461  bool foldLoadStoreIntoMemOperand(SDNode *Node);
462  MachineSDNode *matchBEXTRFromAndImm(SDNode *Node);
463  bool matchBitExtract(SDNode *Node);
464  bool shrinkAndImmediate(SDNode *N);
465  bool isMaskZeroExtended(SDNode *N) const;
466  bool tryShiftAmountMod(SDNode *N);
467 
468  MachineSDNode *emitPCMPISTR(unsigned ROpc, unsigned MOpc, bool MayFoldLoad,
469  const SDLoc &dl, MVT VT, SDNode *Node);
470  MachineSDNode *emitPCMPESTR(unsigned ROpc, unsigned MOpc, bool MayFoldLoad,
471  const SDLoc &dl, MVT VT, SDNode *Node,
472  SDValue &InFlag);
473 
474  bool tryOptimizeRem8Extend(SDNode *N);
475  };
476 }
477 
478 
479 // Returns true if this masked compare can be implemented legally with this
480 // type.
481 static bool isLegalMaskCompare(SDNode *N, const X86Subtarget *Subtarget) {
482  unsigned Opcode = N->getOpcode();
483  if (Opcode == X86ISD::CMPM || Opcode == ISD::SETCC ||
484  Opcode == X86ISD::CMPM_RND || Opcode == X86ISD::VFPCLASS) {
485  // We can get 256-bit 8 element types here without VLX being enabled. When
486  // this happens we will use 512-bit operations and the mask will not be
487  // zero extended.
488  EVT OpVT = N->getOperand(0).getValueType();
489  if (OpVT.is256BitVector() || OpVT.is128BitVector())
490  return Subtarget->hasVLX();
491 
492  return true;
493  }
494  // Scalar opcodes use 128 bit registers, but aren't subject to the VLX check.
495  if (Opcode == X86ISD::VFPCLASSS || Opcode == X86ISD::FSETCCM ||
496  Opcode == X86ISD::FSETCCM_RND)
497  return true;
498 
499  return false;
500 }
501 
502 // Returns true if we can assume the writer of the mask has zero extended it
503 // for us.
504 bool X86DAGToDAGISel::isMaskZeroExtended(SDNode *N) const {
505  // If this is an AND, check if we have a compare on either side. As long as
506  // one side guarantees the mask is zero extended, the AND will preserve those
507  // zeros.
508  if (N->getOpcode() == ISD::AND)
509  return isLegalMaskCompare(N->getOperand(0).getNode(), Subtarget) ||
510  isLegalMaskCompare(N->getOperand(1).getNode(), Subtarget);
511 
512  return isLegalMaskCompare(N, Subtarget);
513 }
514 
515 bool
516 X86DAGToDAGISel::IsProfitableToFold(SDValue N, SDNode *U, SDNode *Root) const {
517  if (OptLevel == CodeGenOpt::None) return false;
518 
519  if (!N.hasOneUse())
520  return false;
521 
522  if (N.getOpcode() != ISD::LOAD)
523  return true;
524 
525  // Don't fold non-temporal loads if we have an instruction for them.
526  if (useNonTemporalLoad(cast<LoadSDNode>(N)))
527  return false;
528 
529  // If N is a load, do additional profitability checks.
530  if (U == Root) {
531  switch (U->getOpcode()) {
532  default: break;
533  case X86ISD::ADD:
534  case X86ISD::ADC:
535  case X86ISD::SUB:
536  case X86ISD::SBB:
537  case X86ISD::AND:
538  case X86ISD::XOR:
539  case X86ISD::OR:
540  case ISD::ADD:
541  case ISD::ADDCARRY:
542  case ISD::AND:
543  case ISD::OR:
544  case ISD::XOR: {
545  SDValue Op1 = U->getOperand(1);
546 
547  // If the other operand is a 8-bit immediate we should fold the immediate
548  // instead. This reduces code size.
549  // e.g.
550  // movl 4(%esp), %eax
551  // addl $4, %eax
552  // vs.
553  // movl $4, %eax
554  // addl 4(%esp), %eax
555  // The former is 2 bytes shorter. In case where the increment is 1, then
556  // the saving can be 4 bytes (by using incl %eax).
557  if (ConstantSDNode *Imm = dyn_cast<ConstantSDNode>(Op1)) {
558  if (Imm->getAPIntValue().isSignedIntN(8))
559  return false;
560 
561  // If this is a 64-bit AND with an immediate that fits in 32-bits,
562  // prefer using the smaller and over folding the load. This is needed to
563  // make sure immediates created by shrinkAndImmediate are always folded.
564  // Ideally we would narrow the load during DAG combine and get the
565  // best of both worlds.
566  if (U->getOpcode() == ISD::AND &&
567  Imm->getAPIntValue().getBitWidth() == 64 &&
568  Imm->getAPIntValue().isIntN(32))
569  return false;
570  }
571 
572  // If the other operand is a TLS address, we should fold it instead.
573  // This produces
574  // movl %gs:0, %eax
575  // leal i@NTPOFF(%eax), %eax
576  // instead of
577  // movl $i@NTPOFF, %eax
578  // addl %gs:0, %eax
579  // if the block also has an access to a second TLS address this will save
580  // a load.
581  // FIXME: This is probably also true for non-TLS addresses.
582  if (Op1.getOpcode() == X86ISD::Wrapper) {
583  SDValue Val = Op1.getOperand(0);
585  return false;
586  }
587 
588  // Don't fold load if this matches the BTS/BTR/BTC patterns.
589  // BTS: (or X, (shl 1, n))
590  // BTR: (and X, (rotl -2, n))
591  // BTC: (xor X, (shl 1, n))
592  if (U->getOpcode() == ISD::OR || U->getOpcode() == ISD::XOR) {
593  if (U->getOperand(0).getOpcode() == ISD::SHL &&
595  return false;
596 
597  if (U->getOperand(1).getOpcode() == ISD::SHL &&
599  return false;
600  }
601  if (U->getOpcode() == ISD::AND) {
602  SDValue U0 = U->getOperand(0);
603  SDValue U1 = U->getOperand(1);
604  if (U0.getOpcode() == ISD::ROTL) {
605  auto *C = dyn_cast<ConstantSDNode>(U0.getOperand(0));
606  if (C && C->getSExtValue() == -2)
607  return false;
608  }
609 
610  if (U1.getOpcode() == ISD::ROTL) {
611  auto *C = dyn_cast<ConstantSDNode>(U1.getOperand(0));
612  if (C && C->getSExtValue() == -2)
613  return false;
614  }
615  }
616 
617  break;
618  }
619  case ISD::SHL:
620  case ISD::SRA:
621  case ISD::SRL:
622  // Don't fold a load into a shift by immediate. The BMI2 instructions
623  // support folding a load, but not an immediate. The legacy instructions
624  // support folding an immediate, but can't fold a load. Folding an
625  // immediate is preferable to folding a load.
626  if (isa<ConstantSDNode>(U->getOperand(1)))
627  return false;
628 
629  break;
630  }
631  }
632 
633  // Prevent folding a load if this can implemented with an insert_subreg or
634  // a move that implicitly zeroes.
635  if (Root->getOpcode() == ISD::INSERT_SUBVECTOR &&
636  isNullConstant(Root->getOperand(2)) &&
637  (Root->getOperand(0).isUndef() ||
639  return false;
640 
641  return true;
642 }
643 
644 /// Replace the original chain operand of the call with
645 /// load's chain operand and move load below the call's chain operand.
647  SDValue Call, SDValue OrigChain) {
649  SDValue Chain = OrigChain.getOperand(0);
650  if (Chain.getNode() == Load.getNode())
651  Ops.push_back(Load.getOperand(0));
652  else {
653  assert(Chain.getOpcode() == ISD::TokenFactor &&
654  "Unexpected chain operand");
655  for (unsigned i = 0, e = Chain.getNumOperands(); i != e; ++i)
656  if (Chain.getOperand(i).getNode() == Load.getNode())
657  Ops.push_back(Load.getOperand(0));
658  else
659  Ops.push_back(Chain.getOperand(i));
660  SDValue NewChain =
661  CurDAG->getNode(ISD::TokenFactor, SDLoc(Load), MVT::Other, Ops);
662  Ops.clear();
663  Ops.push_back(NewChain);
664  }
665  Ops.append(OrigChain->op_begin() + 1, OrigChain->op_end());
666  CurDAG->UpdateNodeOperands(OrigChain.getNode(), Ops);
667  CurDAG->UpdateNodeOperands(Load.getNode(), Call.getOperand(0),
668  Load.getOperand(1), Load.getOperand(2));
669 
670  Ops.clear();
671  Ops.push_back(SDValue(Load.getNode(), 1));
672  Ops.append(Call->op_begin() + 1, Call->op_end());
673  CurDAG->UpdateNodeOperands(Call.getNode(), Ops);
674 }
675 
676 /// Return true if call address is a load and it can be
677 /// moved below CALLSEQ_START and the chains leading up to the call.
678 /// Return the CALLSEQ_START by reference as a second output.
679 /// In the case of a tail call, there isn't a callseq node between the call
680 /// chain and the load.
681 static bool isCalleeLoad(SDValue Callee, SDValue &Chain, bool HasCallSeq) {
682  // The transformation is somewhat dangerous if the call's chain was glued to
683  // the call. After MoveBelowOrigChain the load is moved between the call and
684  // the chain, this can create a cycle if the load is not folded. So it is
685  // *really* important that we are sure the load will be folded.
686  if (Callee.getNode() == Chain.getNode() || !Callee.hasOneUse())
687  return false;
688  LoadSDNode *LD = dyn_cast<LoadSDNode>(Callee.getNode());
689  if (!LD ||
690  LD->isVolatile() ||
693  return false;
694 
695  // Now let's find the callseq_start.
696  while (HasCallSeq && Chain.getOpcode() != ISD::CALLSEQ_START) {
697  if (!Chain.hasOneUse())
698  return false;
699  Chain = Chain.getOperand(0);
700  }
701 
702  if (!Chain.getNumOperands())
703  return false;
704  // Since we are not checking for AA here, conservatively abort if the chain
705  // writes to memory. It's not safe to move the callee (a load) across a store.
706  if (isa<MemSDNode>(Chain.getNode()) &&
707  cast<MemSDNode>(Chain.getNode())->writeMem())
708  return false;
709  if (Chain.getOperand(0).getNode() == Callee.getNode())
710  return true;
711  if (Chain.getOperand(0).getOpcode() == ISD::TokenFactor &&
712  Callee.getValue(1).isOperandOf(Chain.getOperand(0).getNode()) &&
713  Callee.getValue(1).hasOneUse())
714  return true;
715  return false;
716 }
717 
718 void X86DAGToDAGISel::PreprocessISelDAG() {
719  // OptFor[Min]Size are used in pattern predicates that isel is matching.
720  OptForSize = MF->getFunction().optForSize();
721  OptForMinSize = MF->getFunction().optForMinSize();
722  assert((!OptForMinSize || OptForSize) && "OptForMinSize implies OptForSize");
723 
724  for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(),
725  E = CurDAG->allnodes_end(); I != E; ) {
726  SDNode *N = &*I++; // Preincrement iterator to avoid invalidation issues.
727 
728  // If this is a target specific AND node with no flag usages, turn it back
729  // into ISD::AND to enable test instruction matching.
730  if (N->getOpcode() == X86ISD::AND && !N->hasAnyUseOfValue(1)) {
731  SDValue Res = CurDAG->getNode(ISD::AND, SDLoc(N), N->getValueType(0),
732  N->getOperand(0), N->getOperand(1));
733  --I;
734  CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Res);
735  ++I;
736  CurDAG->DeleteNode(N);
737  continue;
738  }
739 
740  if (OptLevel != CodeGenOpt::None &&
741  // Only do this when the target can fold the load into the call or
742  // jmp.
743  !Subtarget->useRetpolineIndirectCalls() &&
744  ((N->getOpcode() == X86ISD::CALL && !Subtarget->slowTwoMemOps()) ||
745  (N->getOpcode() == X86ISD::TC_RETURN &&
746  (Subtarget->is64Bit() ||
747  !getTargetMachine().isPositionIndependent())))) {
748  /// Also try moving call address load from outside callseq_start to just
749  /// before the call to allow it to be folded.
750  ///
751  /// [Load chain]
752  /// ^
753  /// |
754  /// [Load]
755  /// ^ ^
756  /// | |
757  /// / \--
758  /// / |
759  ///[CALLSEQ_START] |
760  /// ^ |
761  /// | |
762  /// [LOAD/C2Reg] |
763  /// | |
764  /// \ /
765  /// \ /
766  /// [CALL]
767  bool HasCallSeq = N->getOpcode() == X86ISD::CALL;
768  SDValue Chain = N->getOperand(0);
769  SDValue Load = N->getOperand(1);
770  if (!isCalleeLoad(Load, Chain, HasCallSeq))
771  continue;
772  moveBelowOrigChain(CurDAG, Load, SDValue(N, 0), Chain);
773  ++NumLoadMoved;
774  continue;
775  }
776 
777  // Lower fpround and fpextend nodes that target the FP stack to be store and
778  // load to the stack. This is a gross hack. We would like to simply mark
779  // these as being illegal, but when we do that, legalize produces these when
780  // it expands calls, then expands these in the same legalize pass. We would
781  // like dag combine to be able to hack on these between the call expansion
782  // and the node legalization. As such this pass basically does "really
783  // late" legalization of these inline with the X86 isel pass.
784  // FIXME: This should only happen when not compiled with -O0.
785  if (N->getOpcode() != ISD::FP_ROUND && N->getOpcode() != ISD::FP_EXTEND)
786  continue;
787 
788  MVT SrcVT = N->getOperand(0).getSimpleValueType();
789  MVT DstVT = N->getSimpleValueType(0);
790 
791  // If any of the sources are vectors, no fp stack involved.
792  if (SrcVT.isVector() || DstVT.isVector())
793  continue;
794 
795  // If the source and destination are SSE registers, then this is a legal
796  // conversion that should not be lowered.
797  const X86TargetLowering *X86Lowering =
798  static_cast<const X86TargetLowering *>(TLI);
799  bool SrcIsSSE = X86Lowering->isScalarFPTypeInSSEReg(SrcVT);
800  bool DstIsSSE = X86Lowering->isScalarFPTypeInSSEReg(DstVT);
801  if (SrcIsSSE && DstIsSSE)
802  continue;
803 
804  if (!SrcIsSSE && !DstIsSSE) {
805  // If this is an FPStack extension, it is a noop.
806  if (N->getOpcode() == ISD::FP_EXTEND)
807  continue;
808  // If this is a value-preserving FPStack truncation, it is a noop.
809  if (N->getConstantOperandVal(1))
810  continue;
811  }
812 
813  // Here we could have an FP stack truncation or an FPStack <-> SSE convert.
814  // FPStack has extload and truncstore. SSE can fold direct loads into other
815  // operations. Based on this, decide what we want to do.
816  MVT MemVT;
817  if (N->getOpcode() == ISD::FP_ROUND)
818  MemVT = DstVT; // FP_ROUND must use DstVT, we can't do a 'trunc load'.
819  else
820  MemVT = SrcIsSSE ? SrcVT : DstVT;
821 
822  SDValue MemTmp = CurDAG->CreateStackTemporary(MemVT);
823  SDLoc dl(N);
824 
825  // FIXME: optimize the case where the src/dest is a load or store?
826  SDValue Store =
827  CurDAG->getTruncStore(CurDAG->getEntryNode(), dl, N->getOperand(0),
828  MemTmp, MachinePointerInfo(), MemVT);
829  SDValue Result = CurDAG->getExtLoad(ISD::EXTLOAD, dl, DstVT, Store, MemTmp,
830  MachinePointerInfo(), MemVT);
831 
832  // We're about to replace all uses of the FP_ROUND/FP_EXTEND with the
833  // extload we created. This will cause general havok on the dag because
834  // anything below the conversion could be folded into other existing nodes.
835  // To avoid invalidating 'I', back it up to the convert node.
836  --I;
837  CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
838 
839  // Now that we did that, the node is dead. Increment the iterator to the
840  // next node to process, then delete N.
841  ++I;
842  CurDAG->DeleteNode(N);
843  }
844 }
845 
846 // Look for a redundant movzx/movsx that can occur after an 8-bit divrem.
847 bool X86DAGToDAGISel::tryOptimizeRem8Extend(SDNode *N) {
848  unsigned Opc = N->getMachineOpcode();
849  if (Opc != X86::MOVZX32rr8 && Opc != X86::MOVSX32rr8 &&
850  Opc != X86::MOVSX64rr8)
851  return false;
852 
853  SDValue N0 = N->getOperand(0);
854 
855  // We need to be extracting the lower bit of an extend.
856  if (!N0.isMachineOpcode() ||
857  N0.getMachineOpcode() != TargetOpcode::EXTRACT_SUBREG ||
858  N0.getConstantOperandVal(1) != X86::sub_8bit)
859  return false;
860 
861  // We're looking for either a movsx or movzx to match the original opcode.
862  unsigned ExpectedOpc = Opc == X86::MOVZX32rr8 ? X86::MOVZX32rr8_NOREX
863  : X86::MOVSX32rr8_NOREX;
864  SDValue N00 = N0.getOperand(0);
865  if (!N00.isMachineOpcode() || N00.getMachineOpcode() != ExpectedOpc)
866  return false;
867 
868  if (Opc == X86::MOVSX64rr8) {
869  // If we had a sign extend from 8 to 64 bits. We still need to go from 32
870  // to 64.
871  MachineSDNode *Extend = CurDAG->getMachineNode(X86::MOVSX64rr32, SDLoc(N),
872  MVT::i64, N00);
873  ReplaceUses(N, Extend);
874  } else {
875  // Ok we can drop this extend and just use the original extend.
876  ReplaceUses(N, N00.getNode());
877  }
878 
879  return true;
880 }
881 
882 void X86DAGToDAGISel::PostprocessISelDAG() {
883  // Skip peepholes at -O0.
884  if (TM.getOptLevel() == CodeGenOpt::None)
885  return;
886 
887  SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end();
888 
889  bool MadeChange = false;
890  while (Position != CurDAG->allnodes_begin()) {
891  SDNode *N = &*--Position;
892  // Skip dead nodes and any non-machine opcodes.
893  if (N->use_empty() || !N->isMachineOpcode())
894  continue;
895 
896  if (tryOptimizeRem8Extend(N)) {
897  MadeChange = true;
898  continue;
899  }
900 
901  // Attempt to remove vectors moves that were inserted to zero upper bits.
902 
903  if (N->getMachineOpcode() != TargetOpcode::SUBREG_TO_REG)
904  continue;
905 
906  unsigned SubRegIdx = N->getConstantOperandVal(2);
907  if (SubRegIdx != X86::sub_xmm && SubRegIdx != X86::sub_ymm)
908  continue;
909 
910  SDValue Move = N->getOperand(1);
911  if (!Move.isMachineOpcode())
912  continue;
913 
914  // Make sure its one of the move opcodes we recognize.
915  switch (Move.getMachineOpcode()) {
916  default:
917  continue;
918  case X86::VMOVAPDrr: case X86::VMOVUPDrr:
919  case X86::VMOVAPSrr: case X86::VMOVUPSrr:
920  case X86::VMOVDQArr: case X86::VMOVDQUrr:
921  case X86::VMOVAPDYrr: case X86::VMOVUPDYrr:
922  case X86::VMOVAPSYrr: case X86::VMOVUPSYrr:
923  case X86::VMOVDQAYrr: case X86::VMOVDQUYrr:
924  case X86::VMOVAPDZ128rr: case X86::VMOVUPDZ128rr:
925  case X86::VMOVAPSZ128rr: case X86::VMOVUPSZ128rr:
926  case X86::VMOVDQA32Z128rr: case X86::VMOVDQU32Z128rr:
927  case X86::VMOVDQA64Z128rr: case X86::VMOVDQU64Z128rr:
928  case X86::VMOVAPDZ256rr: case X86::VMOVUPDZ256rr:
929  case X86::VMOVAPSZ256rr: case X86::VMOVUPSZ256rr:
930  case X86::VMOVDQA32Z256rr: case X86::VMOVDQU32Z256rr:
931  case X86::VMOVDQA64Z256rr: case X86::VMOVDQU64Z256rr:
932  break;
933  }
934 
935  SDValue In = Move.getOperand(0);
936  if (!In.isMachineOpcode() ||
937  In.getMachineOpcode() <= TargetOpcode::GENERIC_OP_END)
938  continue;
939 
940  // Make sure the instruction has a VEX, XOP, or EVEX prefix. This covers
941  // the SHA instructions which use a legacy encoding.
942  uint64_t TSFlags = getInstrInfo()->get(In.getMachineOpcode()).TSFlags;
943  if ((TSFlags & X86II::EncodingMask) != X86II::VEX &&
944  (TSFlags & X86II::EncodingMask) != X86II::EVEX &&
945  (TSFlags & X86II::EncodingMask) != X86II::XOP)
946  continue;
947 
948  // Producing instruction is another vector instruction. We can drop the
949  // move.
950  CurDAG->UpdateNodeOperands(N, N->getOperand(0), In, N->getOperand(2));
951  MadeChange = true;
952  }
953 
954  if (MadeChange)
955  CurDAG->RemoveDeadNodes();
956 }
957 
958 
959 /// Emit any code that needs to be executed only in the main function.
960 void X86DAGToDAGISel::emitSpecialCodeForMain() {
961  if (Subtarget->isTargetCygMing()) {
963  auto &DL = CurDAG->getDataLayout();
964 
966  CLI.setChain(CurDAG->getRoot())
967  .setCallee(CallingConv::C, Type::getVoidTy(*CurDAG->getContext()),
968  CurDAG->getExternalSymbol("__main", TLI->getPointerTy(DL)),
969  std::move(Args));
970  const TargetLowering &TLI = CurDAG->getTargetLoweringInfo();
971  std::pair<SDValue, SDValue> Result = TLI.LowerCallTo(CLI);
972  CurDAG->setRoot(Result.second);
973  }
974 }
975 
976 void X86DAGToDAGISel::EmitFunctionEntryCode() {
977  // If this is main, emit special code for main.
978  const Function &F = MF->getFunction();
979  if (F.hasExternalLinkage() && F.getName() == "main")
980  emitSpecialCodeForMain();
981 }
982 
983 static bool isDispSafeForFrameIndex(int64_t Val) {
984  // On 64-bit platforms, we can run into an issue where a frame index
985  // includes a displacement that, when added to the explicit displacement,
986  // will overflow the displacement field. Assuming that the frame index
987  // displacement fits into a 31-bit integer (which is only slightly more
988  // aggressive than the current fundamental assumption that it fits into
989  // a 32-bit integer), a 31-bit disp should always be safe.
990  return isInt<31>(Val);
991 }
992 
993 bool X86DAGToDAGISel::foldOffsetIntoAddress(uint64_t Offset,
994  X86ISelAddressMode &AM) {
995  // If there's no offset to fold, we don't need to do any work.
996  if (Offset == 0)
997  return false;
998 
999  // Cannot combine ExternalSymbol displacements with integer offsets.
1000  if (AM.ES || AM.MCSym)
1001  return true;
1002 
1003  int64_t Val = AM.Disp + Offset;
1004  CodeModel::Model M = TM.getCodeModel();
1005  if (Subtarget->is64Bit()) {
1007  AM.hasSymbolicDisplacement()))
1008  return true;
1009  // In addition to the checks required for a register base, check that
1010  // we do not try to use an unsafe Disp with a frame index.
1011  if (AM.BaseType == X86ISelAddressMode::FrameIndexBase &&
1013  return true;
1014  }
1015  AM.Disp = Val;
1016  return false;
1017 
1018 }
1019 
1020 bool X86DAGToDAGISel::matchLoadInAddress(LoadSDNode *N, X86ISelAddressMode &AM){
1021  SDValue Address = N->getOperand(1);
1022 
1023  // load gs:0 -> GS segment register.
1024  // load fs:0 -> FS segment register.
1025  //
1026  // This optimization is valid because the GNU TLS model defines that
1027  // gs:0 (or fs:0 on X86-64) contains its own address.
1028  // For more information see http://people.redhat.com/drepper/tls.pdf
1029  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Address))
1030  if (C->getSExtValue() == 0 && AM.Segment.getNode() == nullptr &&
1031  !IndirectTlsSegRefs &&
1032  (Subtarget->isTargetGlibc() || Subtarget->isTargetAndroid() ||
1033  Subtarget->isTargetFuchsia()))
1034  switch (N->getPointerInfo().getAddrSpace()) {
1035  case 256:
1036  AM.Segment = CurDAG->getRegister(X86::GS, MVT::i16);
1037  return false;
1038  case 257:
1039  AM.Segment = CurDAG->getRegister(X86::FS, MVT::i16);
1040  return false;
1041  // Address space 258 is not handled here, because it is not used to
1042  // address TLS areas.
1043  }
1044 
1045  return true;
1046 }
1047 
1048 /// Try to match X86ISD::Wrapper and X86ISD::WrapperRIP nodes into an addressing
1049 /// mode. These wrap things that will resolve down into a symbol reference.
1050 /// If no match is possible, this returns true, otherwise it returns false.
1051 bool X86DAGToDAGISel::matchWrapper(SDValue N, X86ISelAddressMode &AM) {
1052  // If the addressing mode already has a symbol as the displacement, we can
1053  // never match another symbol.
1054  if (AM.hasSymbolicDisplacement())
1055  return true;
1056 
1057  bool IsRIPRel = N.getOpcode() == X86ISD::WrapperRIP;
1058 
1059  // We can't use an addressing mode in the 64-bit large code model. In the
1060  // medium code model, we use can use an mode when RIP wrappers are present.
1061  // That signifies access to globals that are known to be "near", such as the
1062  // GOT itself.
1063  CodeModel::Model M = TM.getCodeModel();
1064  if (Subtarget->is64Bit() &&
1065  (M == CodeModel::Large || (M == CodeModel::Medium && !IsRIPRel)))
1066  return true;
1067 
1068  // Base and index reg must be 0 in order to use %rip as base.
1069  if (IsRIPRel && AM.hasBaseOrIndexReg())
1070  return true;
1071 
1072  // Make a local copy in case we can't do this fold.
1073  X86ISelAddressMode Backup = AM;
1074 
1075  int64_t Offset = 0;
1076  SDValue N0 = N.getOperand(0);
1077  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(N0)) {
1078  AM.GV = G->getGlobal();
1079  AM.SymbolFlags = G->getTargetFlags();
1080  Offset = G->getOffset();
1081  } else if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(N0)) {
1082  AM.CP = CP->getConstVal();
1083  AM.Align = CP->getAlignment();
1084  AM.SymbolFlags = CP->getTargetFlags();
1085  Offset = CP->getOffset();
1086  } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(N0)) {
1087  AM.ES = S->getSymbol();
1088  AM.SymbolFlags = S->getTargetFlags();
1089  } else if (auto *S = dyn_cast<MCSymbolSDNode>(N0)) {
1090  AM.MCSym = S->getMCSymbol();
1091  } else if (JumpTableSDNode *J = dyn_cast<JumpTableSDNode>(N0)) {
1092  AM.JT = J->getIndex();
1093  AM.SymbolFlags = J->getTargetFlags();
1094  } else if (BlockAddressSDNode *BA = dyn_cast<BlockAddressSDNode>(N0)) {
1095  AM.BlockAddr = BA->getBlockAddress();
1096  AM.SymbolFlags = BA->getTargetFlags();
1097  Offset = BA->getOffset();
1098  } else
1099  llvm_unreachable("Unhandled symbol reference node.");
1100 
1101  if (foldOffsetIntoAddress(Offset, AM)) {
1102  AM = Backup;
1103  return true;
1104  }
1105 
1106  if (IsRIPRel)
1107  AM.setBaseReg(CurDAG->getRegister(X86::RIP, MVT::i64));
1108 
1109  // Commit the changes now that we know this fold is safe.
1110  return false;
1111 }
1112 
1113 /// Add the specified node to the specified addressing mode, returning true if
1114 /// it cannot be done. This just pattern matches for the addressing mode.
1115 bool X86DAGToDAGISel::matchAddress(SDValue N, X86ISelAddressMode &AM) {
1116  if (matchAddressRecursively(N, AM, 0))
1117  return true;
1118 
1119  // Post-processing: Convert lea(,%reg,2) to lea(%reg,%reg), which has
1120  // a smaller encoding and avoids a scaled-index.
1121  if (AM.Scale == 2 &&
1122  AM.BaseType == X86ISelAddressMode::RegBase &&
1123  AM.Base_Reg.getNode() == nullptr) {
1124  AM.Base_Reg = AM.IndexReg;
1125  AM.Scale = 1;
1126  }
1127 
1128  // Post-processing: Convert foo to foo(%rip), even in non-PIC mode,
1129  // because it has a smaller encoding.
1130  // TODO: Which other code models can use this?
1131  if (TM.getCodeModel() == CodeModel::Small &&
1132  Subtarget->is64Bit() &&
1133  AM.Scale == 1 &&
1134  AM.BaseType == X86ISelAddressMode::RegBase &&
1135  AM.Base_Reg.getNode() == nullptr &&
1136  AM.IndexReg.getNode() == nullptr &&
1137  AM.SymbolFlags == X86II::MO_NO_FLAG &&
1138  AM.hasSymbolicDisplacement())
1139  AM.Base_Reg = CurDAG->getRegister(X86::RIP, MVT::i64);
1140 
1141  return false;
1142 }
1143 
1144 bool X86DAGToDAGISel::matchAdd(SDValue N, X86ISelAddressMode &AM,
1145  unsigned Depth) {
1146  // Add an artificial use to this node so that we can keep track of
1147  // it if it gets CSE'd with a different node.
1148  HandleSDNode Handle(N);
1149 
1150  X86ISelAddressMode Backup = AM;
1151  if (!matchAddressRecursively(N.getOperand(0), AM, Depth+1) &&
1152  !matchAddressRecursively(Handle.getValue().getOperand(1), AM, Depth+1))
1153  return false;
1154  AM = Backup;
1155 
1156  // Try again after commuting the operands.
1157  if (!matchAddressRecursively(Handle.getValue().getOperand(1), AM, Depth+1) &&
1158  !matchAddressRecursively(Handle.getValue().getOperand(0), AM, Depth+1))
1159  return false;
1160  AM = Backup;
1161 
1162  // If we couldn't fold both operands into the address at the same time,
1163  // see if we can just put each operand into a register and fold at least
1164  // the add.
1165  if (AM.BaseType == X86ISelAddressMode::RegBase &&
1166  !AM.Base_Reg.getNode() &&
1167  !AM.IndexReg.getNode()) {
1168  N = Handle.getValue();
1169  AM.Base_Reg = N.getOperand(0);
1170  AM.IndexReg = N.getOperand(1);
1171  AM.Scale = 1;
1172  return false;
1173  }
1174  N = Handle.getValue();
1175  return true;
1176 }
1177 
1178 // Insert a node into the DAG at least before the Pos node's position. This
1179 // will reposition the node as needed, and will assign it a node ID that is <=
1180 // the Pos node's ID. Note that this does *not* preserve the uniqueness of node
1181 // IDs! The selection DAG must no longer depend on their uniqueness when this
1182 // is used.
1183 static void insertDAGNode(SelectionDAG &DAG, SDValue Pos, SDValue N) {
1184  if (N->getNodeId() == -1 ||
1187  DAG.RepositionNode(Pos->getIterator(), N.getNode());
1188  // Mark Node as invalid for pruning as after this it may be a successor to a
1189  // selected node but otherwise be in the same position of Pos.
1190  // Conservatively mark it with the same -abs(Id) to assure node id
1191  // invariant is preserved.
1192  N->setNodeId(Pos->getNodeId());
1194  }
1195 }
1196 
1197 // Transform "(X >> (8-C1)) & (0xff << C1)" to "((X >> 8) & 0xff) << C1" if
1198 // safe. This allows us to convert the shift and and into an h-register
1199 // extract and a scaled index. Returns false if the simplification is
1200 // performed.
1202  uint64_t Mask,
1203  SDValue Shift, SDValue X,
1204  X86ISelAddressMode &AM) {
1205  if (Shift.getOpcode() != ISD::SRL ||
1206  !isa<ConstantSDNode>(Shift.getOperand(1)) ||
1207  !Shift.hasOneUse())
1208  return true;
1209 
1210  int ScaleLog = 8 - Shift.getConstantOperandVal(1);
1211  if (ScaleLog <= 0 || ScaleLog >= 4 ||
1212  Mask != (0xffu << ScaleLog))
1213  return true;
1214 
1215  MVT VT = N.getSimpleValueType();
1216  SDLoc DL(N);
1217  SDValue Eight = DAG.getConstant(8, DL, MVT::i8);
1218  SDValue NewMask = DAG.getConstant(0xff, DL, VT);
1219  SDValue Srl = DAG.getNode(ISD::SRL, DL, VT, X, Eight);
1220  SDValue And = DAG.getNode(ISD::AND, DL, VT, Srl, NewMask);
1221  SDValue ShlCount = DAG.getConstant(ScaleLog, DL, MVT::i8);
1222  SDValue Shl = DAG.getNode(ISD::SHL, DL, VT, And, ShlCount);
1223 
1224  // Insert the new nodes into the topological ordering. We must do this in
1225  // a valid topological ordering as nothing is going to go back and re-sort
1226  // these nodes. We continually insert before 'N' in sequence as this is
1227  // essentially a pre-flattened and pre-sorted sequence of nodes. There is no
1228  // hierarchy left to express.
1229  insertDAGNode(DAG, N, Eight);
1230  insertDAGNode(DAG, N, Srl);
1231  insertDAGNode(DAG, N, NewMask);
1232  insertDAGNode(DAG, N, And);
1233  insertDAGNode(DAG, N, ShlCount);
1234  insertDAGNode(DAG, N, Shl);
1235  DAG.ReplaceAllUsesWith(N, Shl);
1236  AM.IndexReg = And;
1237  AM.Scale = (1 << ScaleLog);
1238  return false;
1239 }
1240 
1241 // Transforms "(X << C1) & C2" to "(X & (C2>>C1)) << C1" if safe and if this
1242 // allows us to fold the shift into this addressing mode. Returns false if the
1243 // transform succeeded.
1245  uint64_t Mask,
1246  SDValue Shift, SDValue X,
1247  X86ISelAddressMode &AM) {
1248  if (Shift.getOpcode() != ISD::SHL ||
1249  !isa<ConstantSDNode>(Shift.getOperand(1)))
1250  return true;
1251 
1252  // Not likely to be profitable if either the AND or SHIFT node has more
1253  // than one use (unless all uses are for address computation). Besides,
1254  // isel mechanism requires their node ids to be reused.
1255  if (!N.hasOneUse() || !Shift.hasOneUse())
1256  return true;
1257 
1258  // Verify that the shift amount is something we can fold.
1259  unsigned ShiftAmt = Shift.getConstantOperandVal(1);
1260  if (ShiftAmt != 1 && ShiftAmt != 2 && ShiftAmt != 3)
1261  return true;
1262 
1263  MVT VT = N.getSimpleValueType();
1264  SDLoc DL(N);
1265  SDValue NewMask = DAG.getConstant(Mask >> ShiftAmt, DL, VT);
1266  SDValue NewAnd = DAG.getNode(ISD::AND, DL, VT, X, NewMask);
1267  SDValue NewShift = DAG.getNode(ISD::SHL, DL, VT, NewAnd, Shift.getOperand(1));
1268 
1269  // Insert the new nodes into the topological ordering. We must do this in
1270  // a valid topological ordering as nothing is going to go back and re-sort
1271  // these nodes. We continually insert before 'N' in sequence as this is
1272  // essentially a pre-flattened and pre-sorted sequence of nodes. There is no
1273  // hierarchy left to express.
1274  insertDAGNode(DAG, N, NewMask);
1275  insertDAGNode(DAG, N, NewAnd);
1276  insertDAGNode(DAG, N, NewShift);
1277  DAG.ReplaceAllUsesWith(N, NewShift);
1278 
1279  AM.Scale = 1 << ShiftAmt;
1280  AM.IndexReg = NewAnd;
1281  return false;
1282 }
1283 
1284 // Implement some heroics to detect shifts of masked values where the mask can
1285 // be replaced by extending the shift and undoing that in the addressing mode
1286 // scale. Patterns such as (shl (srl x, c1), c2) are canonicalized into (and
1287 // (srl x, SHIFT), MASK) by DAGCombines that don't know the shl can be done in
1288 // the addressing mode. This results in code such as:
1289 //
1290 // int f(short *y, int *lookup_table) {
1291 // ...
1292 // return *y + lookup_table[*y >> 11];
1293 // }
1294 //
1295 // Turning into:
1296 // movzwl (%rdi), %eax
1297 // movl %eax, %ecx
1298 // shrl $11, %ecx
1299 // addl (%rsi,%rcx,4), %eax
1300 //
1301 // Instead of:
1302 // movzwl (%rdi), %eax
1303 // movl %eax, %ecx
1304 // shrl $9, %ecx
1305 // andl $124, %rcx
1306 // addl (%rsi,%rcx), %eax
1307 //
1308 // Note that this function assumes the mask is provided as a mask *after* the
1309 // value is shifted. The input chain may or may not match that, but computing
1310 // such a mask is trivial.
1312  uint64_t Mask,
1313  SDValue Shift, SDValue X,
1314  X86ISelAddressMode &AM) {
1315  if (Shift.getOpcode() != ISD::SRL || !Shift.hasOneUse() ||
1316  !isa<ConstantSDNode>(Shift.getOperand(1)))
1317  return true;
1318 
1319  unsigned ShiftAmt = Shift.getConstantOperandVal(1);
1320  unsigned MaskLZ = countLeadingZeros(Mask);
1321  unsigned MaskTZ = countTrailingZeros(Mask);
1322 
1323  // The amount of shift we're trying to fit into the addressing mode is taken
1324  // from the trailing zeros of the mask.
1325  unsigned AMShiftAmt = MaskTZ;
1326 
1327  // There is nothing we can do here unless the mask is removing some bits.
1328  // Also, the addressing mode can only represent shifts of 1, 2, or 3 bits.
1329  if (AMShiftAmt <= 0 || AMShiftAmt > 3) return true;
1330 
1331  // We also need to ensure that mask is a continuous run of bits.
1332  if (countTrailingOnes(Mask >> MaskTZ) + MaskTZ + MaskLZ != 64) return true;
1333 
1334  // Scale the leading zero count down based on the actual size of the value.
1335  // Also scale it down based on the size of the shift.
1336  unsigned ScaleDown = (64 - X.getSimpleValueType().getSizeInBits()) + ShiftAmt;
1337  if (MaskLZ < ScaleDown)
1338  return true;
1339  MaskLZ -= ScaleDown;
1340 
1341  // The final check is to ensure that any masked out high bits of X are
1342  // already known to be zero. Otherwise, the mask has a semantic impact
1343  // other than masking out a couple of low bits. Unfortunately, because of
1344  // the mask, zero extensions will be removed from operands in some cases.
1345  // This code works extra hard to look through extensions because we can
1346  // replace them with zero extensions cheaply if necessary.
1347  bool ReplacingAnyExtend = false;
1348  if (X.getOpcode() == ISD::ANY_EXTEND) {
1349  unsigned ExtendBits = X.getSimpleValueType().getSizeInBits() -
1351  // Assume that we'll replace the any-extend with a zero-extend, and
1352  // narrow the search to the extended value.
1353  X = X.getOperand(0);
1354  MaskLZ = ExtendBits > MaskLZ ? 0 : MaskLZ - ExtendBits;
1355  ReplacingAnyExtend = true;
1356  }
1357  APInt MaskedHighBits =
1359  KnownBits Known;
1360  DAG.computeKnownBits(X, Known);
1361  if (MaskedHighBits != Known.Zero) return true;
1362 
1363  // We've identified a pattern that can be transformed into a single shift
1364  // and an addressing mode. Make it so.
1365  MVT VT = N.getSimpleValueType();
1366  if (ReplacingAnyExtend) {
1367  assert(X.getValueType() != VT);
1368  // We looked through an ANY_EXTEND node, insert a ZERO_EXTEND.
1369  SDValue NewX = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(X), VT, X);
1370  insertDAGNode(DAG, N, NewX);
1371  X = NewX;
1372  }
1373  SDLoc DL(N);
1374  SDValue NewSRLAmt = DAG.getConstant(ShiftAmt + AMShiftAmt, DL, MVT::i8);
1375  SDValue NewSRL = DAG.getNode(ISD::SRL, DL, VT, X, NewSRLAmt);
1376  SDValue NewSHLAmt = DAG.getConstant(AMShiftAmt, DL, MVT::i8);
1377  SDValue NewSHL = DAG.getNode(ISD::SHL, DL, VT, NewSRL, NewSHLAmt);
1378 
1379  // Insert the new nodes into the topological ordering. We must do this in
1380  // a valid topological ordering as nothing is going to go back and re-sort
1381  // these nodes. We continually insert before 'N' in sequence as this is
1382  // essentially a pre-flattened and pre-sorted sequence of nodes. There is no
1383  // hierarchy left to express.
1384  insertDAGNode(DAG, N, NewSRLAmt);
1385  insertDAGNode(DAG, N, NewSRL);
1386  insertDAGNode(DAG, N, NewSHLAmt);
1387  insertDAGNode(DAG, N, NewSHL);
1388  DAG.ReplaceAllUsesWith(N, NewSHL);
1389 
1390  AM.Scale = 1 << AMShiftAmt;
1391  AM.IndexReg = NewSRL;
1392  return false;
1393 }
1394 
1395 // Transform "(X >> SHIFT) & (MASK << C1)" to
1396 // "((X >> (SHIFT + C1)) & (MASK)) << C1". Everything before the SHL will be
1397 // matched to a BEXTR later. Returns false if the simplification is performed.
1399  uint64_t Mask,
1400  SDValue Shift, SDValue X,
1401  X86ISelAddressMode &AM,
1402  const X86Subtarget &Subtarget) {
1403  if (Shift.getOpcode() != ISD::SRL ||
1404  !isa<ConstantSDNode>(Shift.getOperand(1)) ||
1405  !Shift.hasOneUse() || !N.hasOneUse())
1406  return true;
1407 
1408  // Only do this if BEXTR will be matched by matchBEXTRFromAndImm.
1409  if (!Subtarget.hasTBM() &&
1410  !(Subtarget.hasBMI() && Subtarget.hasFastBEXTR()))
1411  return true;
1412 
1413  // We need to ensure that mask is a continuous run of bits.
1414  if (!isShiftedMask_64(Mask)) return true;
1415 
1416  unsigned ShiftAmt = Shift.getConstantOperandVal(1);
1417 
1418  // The amount of shift we're trying to fit into the addressing mode is taken
1419  // from the trailing zeros of the mask.
1420  unsigned AMShiftAmt = countTrailingZeros(Mask);
1421 
1422  // There is nothing we can do here unless the mask is removing some bits.
1423  // Also, the addressing mode can only represent shifts of 1, 2, or 3 bits.
1424  if (AMShiftAmt <= 0 || AMShiftAmt > 3) return true;
1425 
1426  MVT VT = N.getSimpleValueType();
1427  SDLoc DL(N);
1428  SDValue NewSRLAmt = DAG.getConstant(ShiftAmt + AMShiftAmt, DL, MVT::i8);
1429  SDValue NewSRL = DAG.getNode(ISD::SRL, DL, VT, X, NewSRLAmt);
1430  SDValue NewMask = DAG.getConstant(Mask >> AMShiftAmt, DL, VT);
1431  SDValue NewAnd = DAG.getNode(ISD::AND, DL, VT, NewSRL, NewMask);
1432  SDValue NewSHLAmt = DAG.getConstant(AMShiftAmt, DL, MVT::i8);
1433  SDValue NewSHL = DAG.getNode(ISD::SHL, DL, VT, NewAnd, NewSHLAmt);
1434 
1435  // Insert the new nodes into the topological ordering. We must do this in
1436  // a valid topological ordering as nothing is going to go back and re-sort
1437  // these nodes. We continually insert before 'N' in sequence as this is
1438  // essentially a pre-flattened and pre-sorted sequence of nodes. There is no
1439  // hierarchy left to express.
1440  insertDAGNode(DAG, N, NewSRLAmt);
1441  insertDAGNode(DAG, N, NewSRL);
1442  insertDAGNode(DAG, N, NewMask);
1443  insertDAGNode(DAG, N, NewAnd);
1444  insertDAGNode(DAG, N, NewSHLAmt);
1445  insertDAGNode(DAG, N, NewSHL);
1446  DAG.ReplaceAllUsesWith(N, NewSHL);
1447 
1448  AM.Scale = 1 << AMShiftAmt;
1449  AM.IndexReg = NewAnd;
1450  return false;
1451 }
1452 
1453 bool X86DAGToDAGISel::matchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
1454  unsigned Depth) {
1455  SDLoc dl(N);
1456  LLVM_DEBUG({
1457  dbgs() << "MatchAddress: ";
1458  AM.dump(CurDAG);
1459  });
1460  // Limit recursion.
1461  if (Depth > 5)
1462  return matchAddressBase(N, AM);
1463 
1464  // If this is already a %rip relative address, we can only merge immediates
1465  // into it. Instead of handling this in every case, we handle it here.
1466  // RIP relative addressing: %rip + 32-bit displacement!
1467  if (AM.isRIPRelative()) {
1468  // FIXME: JumpTable and ExternalSymbol address currently don't like
1469  // displacements. It isn't very important, but this should be fixed for
1470  // consistency.
1471  if (!(AM.ES || AM.MCSym) && AM.JT != -1)
1472  return true;
1473 
1474  if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(N))
1475  if (!foldOffsetIntoAddress(Cst->getSExtValue(), AM))
1476  return false;
1477  return true;
1478  }
1479 
1480  switch (N.getOpcode()) {
1481  default: break;
1482  case ISD::LOCAL_RECOVER: {
1483  if (!AM.hasSymbolicDisplacement() && AM.Disp == 0)
1484  if (const auto *ESNode = dyn_cast<MCSymbolSDNode>(N.getOperand(0))) {
1485  // Use the symbol and don't prefix it.
1486  AM.MCSym = ESNode->getMCSymbol();
1487  return false;
1488  }
1489  break;
1490  }
1491  case ISD::Constant: {
1492  uint64_t Val = cast<ConstantSDNode>(N)->getSExtValue();
1493  if (!foldOffsetIntoAddress(Val, AM))
1494  return false;
1495  break;
1496  }
1497 
1498  case X86ISD::Wrapper:
1499  case X86ISD::WrapperRIP:
1500  if (!matchWrapper(N, AM))
1501  return false;
1502  break;
1503 
1504  case ISD::LOAD:
1505  if (!matchLoadInAddress(cast<LoadSDNode>(N), AM))
1506  return false;
1507  break;
1508 
1509  case ISD::FrameIndex:
1510  if (AM.BaseType == X86ISelAddressMode::RegBase &&
1511  AM.Base_Reg.getNode() == nullptr &&
1512  (!Subtarget->is64Bit() || isDispSafeForFrameIndex(AM.Disp))) {
1513  AM.BaseType = X86ISelAddressMode::FrameIndexBase;
1514  AM.Base_FrameIndex = cast<FrameIndexSDNode>(N)->getIndex();
1515  return false;
1516  }
1517  break;
1518 
1519  case ISD::SHL:
1520  if (AM.IndexReg.getNode() != nullptr || AM.Scale != 1)
1521  break;
1522 
1523  if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1524  unsigned Val = CN->getZExtValue();
1525  // Note that we handle x<<1 as (,x,2) rather than (x,x) here so
1526  // that the base operand remains free for further matching. If
1527  // the base doesn't end up getting used, a post-processing step
1528  // in MatchAddress turns (,x,2) into (x,x), which is cheaper.
1529  if (Val == 1 || Val == 2 || Val == 3) {
1530  AM.Scale = 1 << Val;
1531  SDValue ShVal = N.getOperand(0);
1532 
1533  // Okay, we know that we have a scale by now. However, if the scaled
1534  // value is an add of something and a constant, we can fold the
1535  // constant into the disp field here.
1536  if (CurDAG->isBaseWithConstantOffset(ShVal)) {
1537  AM.IndexReg = ShVal.getOperand(0);
1538  ConstantSDNode *AddVal = cast<ConstantSDNode>(ShVal.getOperand(1));
1539  uint64_t Disp = (uint64_t)AddVal->getSExtValue() << Val;
1540  if (!foldOffsetIntoAddress(Disp, AM))
1541  return false;
1542  }
1543 
1544  AM.IndexReg = ShVal;
1545  return false;
1546  }
1547  }
1548  break;
1549 
1550  case ISD::SRL: {
1551  // Scale must not be used already.
1552  if (AM.IndexReg.getNode() != nullptr || AM.Scale != 1) break;
1553 
1554  SDValue And = N.getOperand(0);
1555  if (And.getOpcode() != ISD::AND) break;
1556  SDValue X = And.getOperand(0);
1557 
1558  // We only handle up to 64-bit values here as those are what matter for
1559  // addressing mode optimizations.
1560  if (X.getSimpleValueType().getSizeInBits() > 64) break;
1561 
1562  // The mask used for the transform is expected to be post-shift, but we
1563  // found the shift first so just apply the shift to the mask before passing
1564  // it down.
1565  if (!isa<ConstantSDNode>(N.getOperand(1)) ||
1566  !isa<ConstantSDNode>(And.getOperand(1)))
1567  break;
1568  uint64_t Mask = And.getConstantOperandVal(1) >> N.getConstantOperandVal(1);
1569 
1570  // Try to fold the mask and shift into the scale, and return false if we
1571  // succeed.
1572  if (!foldMaskAndShiftToScale(*CurDAG, N, Mask, N, X, AM))
1573  return false;
1574  break;
1575  }
1576 
1577  case ISD::SMUL_LOHI:
1578  case ISD::UMUL_LOHI:
1579  // A mul_lohi where we need the low part can be folded as a plain multiply.
1580  if (N.getResNo() != 0) break;
1582  case ISD::MUL:
1583  case X86ISD::MUL_IMM:
1584  // X*[3,5,9] -> X+X*[2,4,8]
1585  if (AM.BaseType == X86ISelAddressMode::RegBase &&
1586  AM.Base_Reg.getNode() == nullptr &&
1587  AM.IndexReg.getNode() == nullptr) {
1588  if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N.getOperand(1)))
1589  if (CN->getZExtValue() == 3 || CN->getZExtValue() == 5 ||
1590  CN->getZExtValue() == 9) {
1591  AM.Scale = unsigned(CN->getZExtValue())-1;
1592 
1593  SDValue MulVal = N.getOperand(0);
1594  SDValue Reg;
1595 
1596  // Okay, we know that we have a scale by now. However, if the scaled
1597  // value is an add of something and a constant, we can fold the
1598  // constant into the disp field here.
1599  if (MulVal.getNode()->getOpcode() == ISD::ADD && MulVal.hasOneUse() &&
1600  isa<ConstantSDNode>(MulVal.getOperand(1))) {
1601  Reg = MulVal.getOperand(0);
1602  ConstantSDNode *AddVal =
1603  cast<ConstantSDNode>(MulVal.getOperand(1));
1604  uint64_t Disp = AddVal->getSExtValue() * CN->getZExtValue();
1605  if (foldOffsetIntoAddress(Disp, AM))
1606  Reg = N.getOperand(0);
1607  } else {
1608  Reg = N.getOperand(0);
1609  }
1610 
1611  AM.IndexReg = AM.Base_Reg = Reg;
1612  return false;
1613  }
1614  }
1615  break;
1616 
1617  case ISD::SUB: {
1618  // Given A-B, if A can be completely folded into the address and
1619  // the index field with the index field unused, use -B as the index.
1620  // This is a win if a has multiple parts that can be folded into
1621  // the address. Also, this saves a mov if the base register has
1622  // other uses, since it avoids a two-address sub instruction, however
1623  // it costs an additional mov if the index register has other uses.
1624 
1625  // Add an artificial use to this node so that we can keep track of
1626  // it if it gets CSE'd with a different node.
1627  HandleSDNode Handle(N);
1628 
1629  // Test if the LHS of the sub can be folded.
1630  X86ISelAddressMode Backup = AM;
1631  if (matchAddressRecursively(N.getOperand(0), AM, Depth+1)) {
1632  AM = Backup;
1633  break;
1634  }
1635  // Test if the index field is free for use.
1636  if (AM.IndexReg.getNode() || AM.isRIPRelative()) {
1637  AM = Backup;
1638  break;
1639  }
1640 
1641  int Cost = 0;
1642  SDValue RHS = Handle.getValue().getOperand(1);
1643  // If the RHS involves a register with multiple uses, this
1644  // transformation incurs an extra mov, due to the neg instruction
1645  // clobbering its operand.
1646  if (!RHS.getNode()->hasOneUse() ||
1647  RHS.getNode()->getOpcode() == ISD::CopyFromReg ||
1648  RHS.getNode()->getOpcode() == ISD::TRUNCATE ||
1649  RHS.getNode()->getOpcode() == ISD::ANY_EXTEND ||
1650  (RHS.getNode()->getOpcode() == ISD::ZERO_EXTEND &&
1651  RHS.getOperand(0).getValueType() == MVT::i32))
1652  ++Cost;
1653  // If the base is a register with multiple uses, this
1654  // transformation may save a mov.
1655  // FIXME: Don't rely on DELETED_NODEs.
1656  if ((AM.BaseType == X86ISelAddressMode::RegBase && AM.Base_Reg.getNode() &&
1657  AM.Base_Reg->getOpcode() != ISD::DELETED_NODE &&
1658  !AM.Base_Reg.getNode()->hasOneUse()) ||
1659  AM.BaseType == X86ISelAddressMode::FrameIndexBase)
1660  --Cost;
1661  // If the folded LHS was interesting, this transformation saves
1662  // address arithmetic.
1663  if ((AM.hasSymbolicDisplacement() && !Backup.hasSymbolicDisplacement()) +
1664  ((AM.Disp != 0) && (Backup.Disp == 0)) +
1665  (AM.Segment.getNode() && !Backup.Segment.getNode()) >= 2)
1666  --Cost;
1667  // If it doesn't look like it may be an overall win, don't do it.
1668  if (Cost >= 0) {
1669  AM = Backup;
1670  break;
1671  }
1672 
1673  // Ok, the transformation is legal and appears profitable. Go for it.
1674  SDValue Zero = CurDAG->getConstant(0, dl, N.getValueType());
1675  SDValue Neg = CurDAG->getNode(ISD::SUB, dl, N.getValueType(), Zero, RHS);
1676  AM.IndexReg = Neg;
1677  AM.Scale = 1;
1678 
1679  // Insert the new nodes into the topological ordering.
1680  insertDAGNode(*CurDAG, Handle.getValue(), Zero);
1681  insertDAGNode(*CurDAG, Handle.getValue(), Neg);
1682  return false;
1683  }
1684 
1685  case ISD::ADD:
1686  if (!matchAdd(N, AM, Depth))
1687  return false;
1688  break;
1689 
1690  case ISD::OR:
1691  // We want to look through a transform in InstCombine and DAGCombiner that
1692  // turns 'add' into 'or', so we can treat this 'or' exactly like an 'add'.
1693  // Example: (or (and x, 1), (shl y, 3)) --> (add (and x, 1), (shl y, 3))
1694  // An 'lea' can then be used to match the shift (multiply) and add:
1695  // and $1, %esi
1696  // lea (%rsi, %rdi, 8), %rax
1697  if (CurDAG->haveNoCommonBitsSet(N.getOperand(0), N.getOperand(1)) &&
1698  !matchAdd(N, AM, Depth))
1699  return false;
1700  break;
1701 
1702  case ISD::AND: {
1703  // Perform some heroic transforms on an and of a constant-count shift
1704  // with a constant to enable use of the scaled offset field.
1705 
1706  // Scale must not be used already.
1707  if (AM.IndexReg.getNode() != nullptr || AM.Scale != 1) break;
1708 
1709  SDValue Shift = N.getOperand(0);
1710  if (Shift.getOpcode() != ISD::SRL && Shift.getOpcode() != ISD::SHL) break;
1711  SDValue X = Shift.getOperand(0);
1712 
1713  // We only handle up to 64-bit values here as those are what matter for
1714  // addressing mode optimizations.
1715  if (X.getSimpleValueType().getSizeInBits() > 64) break;
1716 
1717  if (!isa<ConstantSDNode>(N.getOperand(1)))
1718  break;
1719  uint64_t Mask = N.getConstantOperandVal(1);
1720 
1721  // Try to fold the mask and shift into an extract and scale.
1722  if (!foldMaskAndShiftToExtract(*CurDAG, N, Mask, Shift, X, AM))
1723  return false;
1724 
1725  // Try to fold the mask and shift directly into the scale.
1726  if (!foldMaskAndShiftToScale(*CurDAG, N, Mask, Shift, X, AM))
1727  return false;
1728 
1729  // Try to swap the mask and shift to place shifts which can be done as
1730  // a scale on the outside of the mask.
1731  if (!foldMaskedShiftToScaledMask(*CurDAG, N, Mask, Shift, X, AM))
1732  return false;
1733 
1734  // Try to fold the mask and shift into BEXTR and scale.
1735  if (!foldMaskedShiftToBEXTR(*CurDAG, N, Mask, Shift, X, AM, *Subtarget))
1736  return false;
1737 
1738  break;
1739  }
1740  }
1741 
1742  return matchAddressBase(N, AM);
1743 }
1744 
1745 /// Helper for MatchAddress. Add the specified node to the
1746 /// specified addressing mode without any further recursion.
1747 bool X86DAGToDAGISel::matchAddressBase(SDValue N, X86ISelAddressMode &AM) {
1748  // Is the base register already occupied?
1749  if (AM.BaseType != X86ISelAddressMode::RegBase || AM.Base_Reg.getNode()) {
1750  // If so, check to see if the scale index register is set.
1751  if (!AM.IndexReg.getNode()) {
1752  AM.IndexReg = N;
1753  AM.Scale = 1;
1754  return false;
1755  }
1756 
1757  // Otherwise, we cannot select it.
1758  return true;
1759  }
1760 
1761  // Default, generate it as a register.
1762  AM.BaseType = X86ISelAddressMode::RegBase;
1763  AM.Base_Reg = N;
1764  return false;
1765 }
1766 
1767 /// Helper for selectVectorAddr. Handles things that can be folded into a
1768 /// gather scatter address. The index register and scale should have already
1769 /// been handled.
1770 bool X86DAGToDAGISel::matchVectorAddress(SDValue N, X86ISelAddressMode &AM) {
1771  // TODO: Support other operations.
1772  switch (N.getOpcode()) {
1773  case ISD::Constant: {
1774  uint64_t Val = cast<ConstantSDNode>(N)->getSExtValue();
1775  if (!foldOffsetIntoAddress(Val, AM))
1776  return false;
1777  break;
1778  }
1779  case X86ISD::Wrapper:
1780  if (!matchWrapper(N, AM))
1781  return false;
1782  break;
1783  }
1784 
1785  return matchAddressBase(N, AM);
1786 }
1787 
1788 bool X86DAGToDAGISel::selectVectorAddr(SDNode *Parent, SDValue N, SDValue &Base,
1789  SDValue &Scale, SDValue &Index,
1790  SDValue &Disp, SDValue &Segment) {
1791  X86ISelAddressMode AM;
1792  auto *Mgs = cast<X86MaskedGatherScatterSDNode>(Parent);
1793  AM.IndexReg = Mgs->getIndex();
1794  AM.Scale = cast<ConstantSDNode>(Mgs->getScale())->getZExtValue();
1795 
1796  unsigned AddrSpace = cast<MemSDNode>(Parent)->getPointerInfo().getAddrSpace();
1797  // AddrSpace 256 -> GS, 257 -> FS, 258 -> SS.
1798  if (AddrSpace == 256)
1799  AM.Segment = CurDAG->getRegister(X86::GS, MVT::i16);
1800  if (AddrSpace == 257)
1801  AM.Segment = CurDAG->getRegister(X86::FS, MVT::i16);
1802  if (AddrSpace == 258)
1803  AM.Segment = CurDAG->getRegister(X86::SS, MVT::i16);
1804 
1805  // Try to match into the base and displacement fields.
1806  if (matchVectorAddress(N, AM))
1807  return false;
1808 
1809  MVT VT = N.getSimpleValueType();
1810  if (AM.BaseType == X86ISelAddressMode::RegBase) {
1811  if (!AM.Base_Reg.getNode())
1812  AM.Base_Reg = CurDAG->getRegister(0, VT);
1813  }
1814 
1815  getAddressOperands(AM, SDLoc(N), Base, Scale, Index, Disp, Segment);
1816  return true;
1817 }
1818 
1819 /// Returns true if it is able to pattern match an addressing mode.
1820 /// It returns the operands which make up the maximal addressing mode it can
1821 /// match by reference.
1822 ///
1823 /// Parent is the parent node of the addr operand that is being matched. It
1824 /// is always a load, store, atomic node, or null. It is only null when
1825 /// checking memory operands for inline asm nodes.
1826 bool X86DAGToDAGISel::selectAddr(SDNode *Parent, SDValue N, SDValue &Base,
1827  SDValue &Scale, SDValue &Index,
1828  SDValue &Disp, SDValue &Segment) {
1829  X86ISelAddressMode AM;
1830 
1831  if (Parent &&
1832  // This list of opcodes are all the nodes that have an "addr:$ptr" operand
1833  // that are not a MemSDNode, and thus don't have proper addrspace info.
1834  Parent->getOpcode() != ISD::INTRINSIC_W_CHAIN && // unaligned loads, fixme
1835  Parent->getOpcode() != ISD::INTRINSIC_VOID && // nontemporal stores
1836  Parent->getOpcode() != X86ISD::TLSCALL && // Fixme
1837  Parent->getOpcode() != X86ISD::EH_SJLJ_SETJMP && // setjmp
1838  Parent->getOpcode() != X86ISD::EH_SJLJ_LONGJMP) { // longjmp
1839  unsigned AddrSpace =
1840  cast<MemSDNode>(Parent)->getPointerInfo().getAddrSpace();
1841  // AddrSpace 256 -> GS, 257 -> FS, 258 -> SS.
1842  if (AddrSpace == 256)
1843  AM.Segment = CurDAG->getRegister(X86::GS, MVT::i16);
1844  if (AddrSpace == 257)
1845  AM.Segment = CurDAG->getRegister(X86::FS, MVT::i16);
1846  if (AddrSpace == 258)
1847  AM.Segment = CurDAG->getRegister(X86::SS, MVT::i16);
1848  }
1849 
1850  if (matchAddress(N, AM))
1851  return false;
1852 
1853  MVT VT = N.getSimpleValueType();
1854  if (AM.BaseType == X86ISelAddressMode::RegBase) {
1855  if (!AM.Base_Reg.getNode())
1856  AM.Base_Reg = CurDAG->getRegister(0, VT);
1857  }
1858 
1859  if (!AM.IndexReg.getNode())
1860  AM.IndexReg = CurDAG->getRegister(0, VT);
1861 
1862  getAddressOperands(AM, SDLoc(N), Base, Scale, Index, Disp, Segment);
1863  return true;
1864 }
1865 
1866 // We can only fold a load if all nodes between it and the root node have a
1867 // single use. If there are additional uses, we could end up duplicating the
1868 // load.
1869 static bool hasSingleUsesFromRoot(SDNode *Root, SDNode *User) {
1870  while (User != Root) {
1871  if (!User->hasOneUse())
1872  return false;
1873  User = *User->use_begin();
1874  }
1875 
1876  return true;
1877 }
1878 
1879 /// Match a scalar SSE load. In particular, we want to match a load whose top
1880 /// elements are either undef or zeros. The load flavor is derived from the
1881 /// type of N, which is either v4f32 or v2f64.
1882 ///
1883 /// We also return:
1884 /// PatternChainNode: this is the matched node that has a chain input and
1885 /// output.
1886 bool X86DAGToDAGISel::selectScalarSSELoad(SDNode *Root, SDNode *Parent,
1887  SDValue N, SDValue &Base,
1888  SDValue &Scale, SDValue &Index,
1889  SDValue &Disp, SDValue &Segment,
1890  SDValue &PatternNodeWithChain) {
1891  if (!hasSingleUsesFromRoot(Root, Parent))
1892  return false;
1893 
1894  // We can allow a full vector load here since narrowing a load is ok.
1895  if (ISD::isNON_EXTLoad(N.getNode())) {
1896  PatternNodeWithChain = N;
1897  if (IsProfitableToFold(PatternNodeWithChain, N.getNode(), Root) &&
1898  IsLegalToFold(PatternNodeWithChain, Parent, Root, OptLevel)) {
1899  LoadSDNode *LD = cast<LoadSDNode>(PatternNodeWithChain);
1900  return selectAddr(LD, LD->getBasePtr(), Base, Scale, Index, Disp,
1901  Segment);
1902  }
1903  }
1904 
1905  // We can also match the special zero extended load opcode.
1906  if (N.getOpcode() == X86ISD::VZEXT_LOAD) {
1907  PatternNodeWithChain = N;
1908  if (IsProfitableToFold(PatternNodeWithChain, N.getNode(), Root) &&
1909  IsLegalToFold(PatternNodeWithChain, Parent, Root, OptLevel)) {
1910  auto *MI = cast<MemIntrinsicSDNode>(PatternNodeWithChain);
1911  return selectAddr(MI, MI->getBasePtr(), Base, Scale, Index, Disp,
1912  Segment);
1913  }
1914  }
1915 
1916  // Need to make sure that the SCALAR_TO_VECTOR and load are both only used
1917  // once. Otherwise the load might get duplicated and the chain output of the
1918  // duplicate load will not be observed by all dependencies.
1919  if (N.getOpcode() == ISD::SCALAR_TO_VECTOR && N.getNode()->hasOneUse()) {
1920  PatternNodeWithChain = N.getOperand(0);
1921  if (ISD::isNON_EXTLoad(PatternNodeWithChain.getNode()) &&
1922  IsProfitableToFold(PatternNodeWithChain, N.getNode(), Root) &&
1923  IsLegalToFold(PatternNodeWithChain, N.getNode(), Root, OptLevel)) {
1924  LoadSDNode *LD = cast<LoadSDNode>(PatternNodeWithChain);
1925  return selectAddr(LD, LD->getBasePtr(), Base, Scale, Index, Disp,
1926  Segment);
1927  }
1928  }
1929 
1930  // Also handle the case where we explicitly require zeros in the top
1931  // elements. This is a vector shuffle from the zero vector.
1932  if (N.getOpcode() == X86ISD::VZEXT_MOVL && N.getNode()->hasOneUse() &&
1933  // Check to see if the top elements are all zeros (or bitcast of zeros).
1935  N.getOperand(0).getNode()->hasOneUse()) {
1936  PatternNodeWithChain = N.getOperand(0).getOperand(0);
1937  if (ISD::isNON_EXTLoad(PatternNodeWithChain.getNode()) &&
1938  IsProfitableToFold(PatternNodeWithChain, N.getNode(), Root) &&
1939  IsLegalToFold(PatternNodeWithChain, N.getNode(), Root, OptLevel)) {
1940  // Okay, this is a zero extending load. Fold it.
1941  LoadSDNode *LD = cast<LoadSDNode>(PatternNodeWithChain);
1942  return selectAddr(LD, LD->getBasePtr(), Base, Scale, Index, Disp,
1943  Segment);
1944  }
1945  }
1946 
1947  return false;
1948 }
1949 
1950 
1951 bool X86DAGToDAGISel::selectMOV64Imm32(SDValue N, SDValue &Imm) {
1952  if (const ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) {
1953  uint64_t ImmVal = CN->getZExtValue();
1954  if (!isUInt<32>(ImmVal))
1955  return false;
1956 
1957  Imm = CurDAG->getTargetConstant(ImmVal, SDLoc(N), MVT::i64);
1958  return true;
1959  }
1960 
1961  // In static codegen with small code model, we can get the address of a label
1962  // into a register with 'movl'
1963  if (N->getOpcode() != X86ISD::Wrapper)
1964  return false;
1965 
1966  N = N.getOperand(0);
1967 
1968  // At least GNU as does not accept 'movl' for TPOFF relocations.
1969  // FIXME: We could use 'movl' when we know we are targeting MC.
1971  return false;
1972 
1973  Imm = N;
1974  if (N->getOpcode() != ISD::TargetGlobalAddress)
1975  return TM.getCodeModel() == CodeModel::Small;
1976 
1978  cast<GlobalAddressSDNode>(N)->getGlobal()->getAbsoluteSymbolRange();
1979  if (!CR)
1980  return TM.getCodeModel() == CodeModel::Small;
1981 
1982  return CR->getUnsignedMax().ult(1ull << 32);
1983 }
1984 
1985 bool X86DAGToDAGISel::selectLEA64_32Addr(SDValue N, SDValue &Base,
1986  SDValue &Scale, SDValue &Index,
1987  SDValue &Disp, SDValue &Segment) {
1988  // Save the debug loc before calling selectLEAAddr, in case it invalidates N.
1989  SDLoc DL(N);
1990 
1991  if (!selectLEAAddr(N, Base, Scale, Index, Disp, Segment))
1992  return false;
1993 
1995  if (RN && RN->getReg() == 0)
1996  Base = CurDAG->getRegister(0, MVT::i64);
1997  else if (Base.getValueType() == MVT::i32 && !dyn_cast<FrameIndexSDNode>(Base)) {
1998  // Base could already be %rip, particularly in the x32 ABI.
1999  Base = SDValue(CurDAG->getMachineNode(
2000  TargetOpcode::SUBREG_TO_REG, DL, MVT::i64,
2001  CurDAG->getTargetConstant(0, DL, MVT::i64),
2002  Base,
2003  CurDAG->getTargetConstant(X86::sub_32bit, DL, MVT::i32)),
2004  0);
2005  }
2006 
2007  RN = dyn_cast<RegisterSDNode>(Index);
2008  if (RN && RN->getReg() == 0)
2009  Index = CurDAG->getRegister(0, MVT::i64);
2010  else {
2011  assert(Index.getValueType() == MVT::i32 &&
2012  "Expect to be extending 32-bit registers for use in LEA");
2013  Index = SDValue(CurDAG->getMachineNode(
2014  TargetOpcode::SUBREG_TO_REG, DL, MVT::i64,
2015  CurDAG->getTargetConstant(0, DL, MVT::i64),
2016  Index,
2017  CurDAG->getTargetConstant(X86::sub_32bit, DL,
2018  MVT::i32)),
2019  0);
2020  }
2021 
2022  return true;
2023 }
2024 
2025 /// Calls SelectAddr and determines if the maximal addressing
2026 /// mode it matches can be cost effectively emitted as an LEA instruction.
2027 bool X86DAGToDAGISel::selectLEAAddr(SDValue N,
2028  SDValue &Base, SDValue &Scale,
2029  SDValue &Index, SDValue &Disp,
2030  SDValue &Segment) {
2031  X86ISelAddressMode AM;
2032 
2033  // Save the DL and VT before calling matchAddress, it can invalidate N.
2034  SDLoc DL(N);
2035  MVT VT = N.getSimpleValueType();
2036 
2037  // Set AM.Segment to prevent MatchAddress from using one. LEA doesn't support
2038  // segments.
2039  SDValue Copy = AM.Segment;
2040  SDValue T = CurDAG->getRegister(0, MVT::i32);
2041  AM.Segment = T;
2042  if (matchAddress(N, AM))
2043  return false;
2044  assert (T == AM.Segment);
2045  AM.Segment = Copy;
2046 
2047  unsigned Complexity = 0;
2048  if (AM.BaseType == X86ISelAddressMode::RegBase)
2049  if (AM.Base_Reg.getNode())
2050  Complexity = 1;
2051  else
2052  AM.Base_Reg = CurDAG->getRegister(0, VT);
2053  else if (AM.BaseType == X86ISelAddressMode::FrameIndexBase)
2054  Complexity = 4;
2055 
2056  if (AM.IndexReg.getNode())
2057  Complexity++;
2058  else
2059  AM.IndexReg = CurDAG->getRegister(0, VT);
2060 
2061  // Don't match just leal(,%reg,2). It's cheaper to do addl %reg, %reg, or with
2062  // a simple shift.
2063  if (AM.Scale > 1)
2064  Complexity++;
2065 
2066  // FIXME: We are artificially lowering the criteria to turn ADD %reg, $GA
2067  // to a LEA. This is determined with some experimentation but is by no means
2068  // optimal (especially for code size consideration). LEA is nice because of
2069  // its three-address nature. Tweak the cost function again when we can run
2070  // convertToThreeAddress() at register allocation time.
2071  if (AM.hasSymbolicDisplacement()) {
2072  // For X86-64, always use LEA to materialize RIP-relative addresses.
2073  if (Subtarget->is64Bit())
2074  Complexity = 4;
2075  else
2076  Complexity += 2;
2077  }
2078 
2079  if (AM.Disp && (AM.Base_Reg.getNode() || AM.IndexReg.getNode()))
2080  Complexity++;
2081 
2082  // If it isn't worth using an LEA, reject it.
2083  if (Complexity <= 2)
2084  return false;
2085 
2086  getAddressOperands(AM, DL, Base, Scale, Index, Disp, Segment);
2087  return true;
2088 }
2089 
2090 /// This is only run on TargetGlobalTLSAddress nodes.
2091 bool X86DAGToDAGISel::selectTLSADDRAddr(SDValue N, SDValue &Base,
2092  SDValue &Scale, SDValue &Index,
2093  SDValue &Disp, SDValue &Segment) {
2095  const GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(N);
2096 
2097  X86ISelAddressMode AM;
2098  AM.GV = GA->getGlobal();
2099  AM.Disp += GA->getOffset();
2100  AM.Base_Reg = CurDAG->getRegister(0, N.getValueType());
2101  AM.SymbolFlags = GA->getTargetFlags();
2102 
2103  if (N.getValueType() == MVT::i32) {
2104  AM.Scale = 1;
2105  AM.IndexReg = CurDAG->getRegister(X86::EBX, MVT::i32);
2106  } else {
2107  AM.IndexReg = CurDAG->getRegister(0, MVT::i64);
2108  }
2109 
2110  getAddressOperands(AM, SDLoc(N), Base, Scale, Index, Disp, Segment);
2111  return true;
2112 }
2113 
2114 bool X86DAGToDAGISel::selectRelocImm(SDValue N, SDValue &Op) {
2115  if (auto *CN = dyn_cast<ConstantSDNode>(N)) {
2116  Op = CurDAG->getTargetConstant(CN->getAPIntValue(), SDLoc(CN),
2117  N.getValueType());
2118  return true;
2119  }
2120 
2121  // Keep track of the original value type and whether this value was
2122  // truncated. If we see a truncation from pointer type to VT that truncates
2123  // bits that are known to be zero, we can use a narrow reference.
2124  EVT VT = N.getValueType();
2125  bool WasTruncated = false;
2126  if (N.getOpcode() == ISD::TRUNCATE) {
2127  WasTruncated = true;
2128  N = N.getOperand(0);
2129  }
2130 
2131  if (N.getOpcode() != X86ISD::Wrapper)
2132  return false;
2133 
2134  // We can only use non-GlobalValues as immediates if they were not truncated,
2135  // as we do not have any range information. If we have a GlobalValue and the
2136  // address was not truncated, we can select it as an operand directly.
2137  unsigned Opc = N.getOperand(0)->getOpcode();
2138  if (Opc != ISD::TargetGlobalAddress || !WasTruncated) {
2139  Op = N.getOperand(0);
2140  // We can only select the operand directly if we didn't have to look past a
2141  // truncate.
2142  return !WasTruncated;
2143  }
2144 
2145  // Check that the global's range fits into VT.
2146  auto *GA = cast<GlobalAddressSDNode>(N.getOperand(0));
2147  Optional<ConstantRange> CR = GA->getGlobal()->getAbsoluteSymbolRange();
2148  if (!CR || CR->getUnsignedMax().uge(1ull << VT.getSizeInBits()))
2149  return false;
2150 
2151  // Okay, we can use a narrow reference.
2152  Op = CurDAG->getTargetGlobalAddress(GA->getGlobal(), SDLoc(N), VT,
2153  GA->getOffset(), GA->getTargetFlags());
2154  return true;
2155 }
2156 
2157 bool X86DAGToDAGISel::tryFoldLoad(SDNode *Root, SDNode *P, SDValue N,
2158  SDValue &Base, SDValue &Scale,
2159  SDValue &Index, SDValue &Disp,
2160  SDValue &Segment) {
2161  if (!ISD::isNON_EXTLoad(N.getNode()) ||
2162  !IsProfitableToFold(N, P, Root) ||
2163  !IsLegalToFold(N, P, Root, OptLevel))
2164  return false;
2165 
2166  return selectAddr(N.getNode(),
2167  N.getOperand(1), Base, Scale, Index, Disp, Segment);
2168 }
2169 
2170 /// Return an SDNode that returns the value of the global base register.
2171 /// Output instructions required to initialize the global base register,
2172 /// if necessary.
2173 SDNode *X86DAGToDAGISel::getGlobalBaseReg() {
2174  unsigned GlobalBaseReg = getInstrInfo()->getGlobalBaseReg(MF);
2175  auto &DL = MF->getDataLayout();
2176  return CurDAG->getRegister(GlobalBaseReg, TLI->getPointerTy(DL)).getNode();
2177 }
2178 
2179 bool X86DAGToDAGISel::isSExtAbsoluteSymbolRef(unsigned Width, SDNode *N) const {
2180  if (N->getOpcode() == ISD::TRUNCATE)
2181  N = N->getOperand(0).getNode();
2182  if (N->getOpcode() != X86ISD::Wrapper)
2183  return false;
2184 
2185  auto *GA = dyn_cast<GlobalAddressSDNode>(N->getOperand(0));
2186  if (!GA)
2187  return false;
2188 
2189  Optional<ConstantRange> CR = GA->getGlobal()->getAbsoluteSymbolRange();
2190  return CR && CR->getSignedMin().sge(-1ull << Width) &&
2191  CR->getSignedMax().slt(1ull << Width);
2192 }
2193 
2194 /// Test whether the given X86ISD::CMP node has any uses which require the SF
2195 /// or OF bits to be accurate.
2197  // Examine each user of the node.
2198  for (SDNode::use_iterator UI = N->use_begin(),
2199  UE = N->use_end(); UI != UE; ++UI) {
2200  // Only examine CopyToReg uses.
2201  if (UI->getOpcode() != ISD::CopyToReg)
2202  return false;
2203  // Only examine CopyToReg uses that copy to EFLAGS.
2204  if (cast<RegisterSDNode>(UI->getOperand(1))->getReg() !=
2205  X86::EFLAGS)
2206  return false;
2207  // Examine each user of the CopyToReg use.
2208  for (SDNode::use_iterator FlagUI = UI->use_begin(),
2209  FlagUE = UI->use_end(); FlagUI != FlagUE; ++FlagUI) {
2210  // Only examine the Flag result.
2211  if (FlagUI.getUse().getResNo() != 1) continue;
2212  // Anything unusual: assume conservatively.
2213  if (!FlagUI->isMachineOpcode()) return false;
2214  // Examine the opcode of the user.
2215  switch (FlagUI->getMachineOpcode()) {
2216  // These comparisons don't treat the most significant bit specially.
2217  case X86::SETAr: case X86::SETAEr: case X86::SETBr: case X86::SETBEr:
2218  case X86::SETEr: case X86::SETNEr: case X86::SETPr: case X86::SETNPr:
2219  case X86::SETAm: case X86::SETAEm: case X86::SETBm: case X86::SETBEm:
2220  case X86::SETEm: case X86::SETNEm: case X86::SETPm: case X86::SETNPm:
2221  case X86::JA_1: case X86::JAE_1: case X86::JB_1: case X86::JBE_1:
2222  case X86::JE_1: case X86::JNE_1: case X86::JP_1: case X86::JNP_1:
2223  case X86::CMOVA16rr: case X86::CMOVA16rm:
2224  case X86::CMOVA32rr: case X86::CMOVA32rm:
2225  case X86::CMOVA64rr: case X86::CMOVA64rm:
2226  case X86::CMOVAE16rr: case X86::CMOVAE16rm:
2227  case X86::CMOVAE32rr: case X86::CMOVAE32rm:
2228  case X86::CMOVAE64rr: case X86::CMOVAE64rm:
2229  case X86::CMOVB16rr: case X86::CMOVB16rm:
2230  case X86::CMOVB32rr: case X86::CMOVB32rm:
2231  case X86::CMOVB64rr: case X86::CMOVB64rm:
2232  case X86::CMOVBE16rr: case X86::CMOVBE16rm:
2233  case X86::CMOVBE32rr: case X86::CMOVBE32rm:
2234  case X86::CMOVBE64rr: case X86::CMOVBE64rm:
2235  case X86::CMOVE16rr: case X86::CMOVE16rm:
2236  case X86::CMOVE32rr: case X86::CMOVE32rm:
2237  case X86::CMOVE64rr: case X86::CMOVE64rm:
2238  case X86::CMOVNE16rr: case X86::CMOVNE16rm:
2239  case X86::CMOVNE32rr: case X86::CMOVNE32rm:
2240  case X86::CMOVNE64rr: case X86::CMOVNE64rm:
2241  case X86::CMOVNP16rr: case X86::CMOVNP16rm:
2242  case X86::CMOVNP32rr: case X86::CMOVNP32rm:
2243  case X86::CMOVNP64rr: case X86::CMOVNP64rm:
2244  case X86::CMOVP16rr: case X86::CMOVP16rm:
2245  case X86::CMOVP32rr: case X86::CMOVP32rm:
2246  case X86::CMOVP64rr: case X86::CMOVP64rm:
2247  continue;
2248  // Anything else: assume conservatively.
2249  default: return false;
2250  }
2251  }
2252  }
2253  return true;
2254 }
2255 
2256 /// Test whether the given node which sets flags has any uses which require the
2257 /// CF flag to be accurate.
2258 static bool hasNoCarryFlagUses(SDNode *N) {
2259  // Examine each user of the node.
2260  for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end(); UI != UE;
2261  ++UI) {
2262  // Only check things that use the flags.
2263  if (UI.getUse().getResNo() != 1)
2264  continue;
2265  // Only examine CopyToReg uses.
2266  if (UI->getOpcode() != ISD::CopyToReg)
2267  return false;
2268  // Only examine CopyToReg uses that copy to EFLAGS.
2269  if (cast<RegisterSDNode>(UI->getOperand(1))->getReg() != X86::EFLAGS)
2270  return false;
2271  // Examine each user of the CopyToReg use.
2272  for (SDNode::use_iterator FlagUI = UI->use_begin(), FlagUE = UI->use_end();
2273  FlagUI != FlagUE; ++FlagUI) {
2274  // Only examine the Flag result.
2275  if (FlagUI.getUse().getResNo() != 1)
2276  continue;
2277  // Anything unusual: assume conservatively.
2278  if (!FlagUI->isMachineOpcode())
2279  return false;
2280  // Examine the opcode of the user.
2281  switch (FlagUI->getMachineOpcode()) {
2282  // Comparisons which don't examine the CF flag.
2283  case X86::SETOr: case X86::SETNOr: case X86::SETEr: case X86::SETNEr:
2284  case X86::SETSr: case X86::SETNSr: case X86::SETPr: case X86::SETNPr:
2285  case X86::SETLr: case X86::SETGEr: case X86::SETLEr: case X86::SETGr:
2286  case X86::JO_1: case X86::JNO_1: case X86::JE_1: case X86::JNE_1:
2287  case X86::JS_1: case X86::JNS_1: case X86::JP_1: case X86::JNP_1:
2288  case X86::JL_1: case X86::JGE_1: case X86::JLE_1: case X86::JG_1:
2289  case X86::CMOVO16rr: case X86::CMOVO32rr: case X86::CMOVO64rr:
2290  case X86::CMOVO16rm: case X86::CMOVO32rm: case X86::CMOVO64rm:
2291  case X86::CMOVNO16rr: case X86::CMOVNO32rr: case X86::CMOVNO64rr:
2292  case X86::CMOVNO16rm: case X86::CMOVNO32rm: case X86::CMOVNO64rm:
2293  case X86::CMOVE16rr: case X86::CMOVE32rr: case X86::CMOVE64rr:
2294  case X86::CMOVE16rm: case X86::CMOVE32rm: case X86::CMOVE64rm:
2295  case X86::CMOVNE16rr: case X86::CMOVNE32rr: case X86::CMOVNE64rr:
2296  case X86::CMOVNE16rm: case X86::CMOVNE32rm: case X86::CMOVNE64rm:
2297  case X86::CMOVS16rr: case X86::CMOVS32rr: case X86::CMOVS64rr:
2298  case X86::CMOVS16rm: case X86::CMOVS32rm: case X86::CMOVS64rm:
2299  case X86::CMOVNS16rr: case X86::CMOVNS32rr: case X86::CMOVNS64rr:
2300  case X86::CMOVNS16rm: case X86::CMOVNS32rm: case X86::CMOVNS64rm:
2301  case X86::CMOVP16rr: case X86::CMOVP32rr: case X86::CMOVP64rr:
2302  case X86::CMOVP16rm: case X86::CMOVP32rm: case X86::CMOVP64rm:
2303  case X86::CMOVNP16rr: case X86::CMOVNP32rr: case X86::CMOVNP64rr:
2304  case X86::CMOVNP16rm: case X86::CMOVNP32rm: case X86::CMOVNP64rm:
2305  case X86::CMOVL16rr: case X86::CMOVL32rr: case X86::CMOVL64rr:
2306  case X86::CMOVL16rm: case X86::CMOVL32rm: case X86::CMOVL64rm:
2307  case X86::CMOVGE16rr: case X86::CMOVGE32rr: case X86::CMOVGE64rr:
2308  case X86::CMOVGE16rm: case X86::CMOVGE32rm: case X86::CMOVGE64rm:
2309  case X86::CMOVLE16rr: case X86::CMOVLE32rr: case X86::CMOVLE64rr:
2310  case X86::CMOVLE16rm: case X86::CMOVLE32rm: case X86::CMOVLE64rm:
2311  case X86::CMOVG16rr: case X86::CMOVG32rr: case X86::CMOVG64rr:
2312  case X86::CMOVG16rm: case X86::CMOVG32rm: case X86::CMOVG64rm:
2313  continue;
2314  // Anything else: assume conservatively.
2315  default:
2316  return false;
2317  }
2318  }
2319  }
2320  return true;
2321 }
2322 
2323 /// Check whether or not the chain ending in StoreNode is suitable for doing
2324 /// the {load; op; store} to modify transformation.
2326  SDValue StoredVal, SelectionDAG *CurDAG,
2327  unsigned LoadOpNo,
2328  LoadSDNode *&LoadNode,
2329  SDValue &InputChain) {
2330  // Is the stored value result 0 of the operation?
2331  if (StoredVal.getResNo() != 0) return false;
2332 
2333  // Are there other uses of the operation other than the store?
2334  if (!StoredVal.getNode()->hasNUsesOfValue(1, 0)) return false;
2335 
2336  // Is the store non-extending and non-indexed?
2337  if (!ISD::isNormalStore(StoreNode) || StoreNode->isNonTemporal())
2338  return false;
2339 
2340  SDValue Load = StoredVal->getOperand(LoadOpNo);
2341  // Is the stored value a non-extending and non-indexed load?
2342  if (!ISD::isNormalLoad(Load.getNode())) return false;
2343 
2344  // Return LoadNode by reference.
2345  LoadNode = cast<LoadSDNode>(Load);
2346 
2347  // Is store the only read of the loaded value?
2348  if (!Load.hasOneUse())
2349  return false;
2350 
2351  // Is the address of the store the same as the load?
2352  if (LoadNode->getBasePtr() != StoreNode->getBasePtr() ||
2353  LoadNode->getOffset() != StoreNode->getOffset())
2354  return false;
2355 
2356  bool FoundLoad = false;
2357  SmallVector<SDValue, 4> ChainOps;
2358  SmallVector<const SDNode *, 4> LoopWorklist;
2360  const unsigned int Max = 1024;
2361 
2362  // Visualization of Load-Op-Store fusion:
2363  // -------------------------
2364  // Legend:
2365  // *-lines = Chain operand dependencies.
2366  // |-lines = Normal operand dependencies.
2367  // Dependencies flow down and right. n-suffix references multiple nodes.
2368  //
2369  // C Xn C
2370  // * * *
2371  // * * *
2372  // Xn A-LD Yn TF Yn
2373  // * * \ | * |
2374  // * * \ | * |
2375  // * * \ | => A--LD_OP_ST
2376  // * * \| \
2377  // TF OP \
2378  // * | \ Zn
2379  // * | \
2380  // A-ST Zn
2381  //
2382 
2383  // This merge induced dependences from: #1: Xn -> LD, OP, Zn
2384  // #2: Yn -> LD
2385  // #3: ST -> Zn
2386 
2387  // Ensure the transform is safe by checking for the dual
2388  // dependencies to make sure we do not induce a loop.
2389 
2390  // As LD is a predecessor to both OP and ST we can do this by checking:
2391  // a). if LD is a predecessor to a member of Xn or Yn.
2392  // b). if a Zn is a predecessor to ST.
2393 
2394  // However, (b) can only occur through being a chain predecessor to
2395  // ST, which is the same as Zn being a member or predecessor of Xn,
2396  // which is a subset of LD being a predecessor of Xn. So it's
2397  // subsumed by check (a).
2398 
2399  SDValue Chain = StoreNode->getChain();
2400 
2401  // Gather X elements in ChainOps.
2402  if (Chain == Load.getValue(1)) {
2403  FoundLoad = true;
2404  ChainOps.push_back(Load.getOperand(0));
2405  } else if (Chain.getOpcode() == ISD::TokenFactor) {
2406  for (unsigned i = 0, e = Chain.getNumOperands(); i != e; ++i) {
2407  SDValue Op = Chain.getOperand(i);
2408  if (Op == Load.getValue(1)) {
2409  FoundLoad = true;
2410  // Drop Load, but keep its chain. No cycle check necessary.
2411  ChainOps.push_back(Load.getOperand(0));
2412  continue;
2413  }
2414  LoopWorklist.push_back(Op.getNode());
2415  ChainOps.push_back(Op);
2416  }
2417  }
2418 
2419  if (!FoundLoad)
2420  return false;
2421 
2422  // Worklist is currently Xn. Add Yn to worklist.
2423  for (SDValue Op : StoredVal->ops())
2424  if (Op.getNode() != LoadNode)
2425  LoopWorklist.push_back(Op.getNode());
2426 
2427  // Check (a) if Load is a predecessor to Xn + Yn
2428  if (SDNode::hasPredecessorHelper(Load.getNode(), Visited, LoopWorklist, Max,
2429  true))
2430  return false;
2431 
2432  InputChain =
2433  CurDAG->getNode(ISD::TokenFactor, SDLoc(Chain), MVT::Other, ChainOps);
2434  return true;
2435 }
2436 
2437 // Change a chain of {load; op; store} of the same value into a simple op
2438 // through memory of that value, if the uses of the modified value and its
2439 // address are suitable.
2440 //
2441 // The tablegen pattern memory operand pattern is currently not able to match
2442 // the case where the EFLAGS on the original operation are used.
2443 //
2444 // To move this to tablegen, we'll need to improve tablegen to allow flags to
2445 // be transferred from a node in the pattern to the result node, probably with
2446 // a new keyword. For example, we have this
2447 // def DEC64m : RI<0xFF, MRM1m, (outs), (ins i64mem:$dst), "dec{q}\t$dst",
2448 // [(store (add (loadi64 addr:$dst), -1), addr:$dst),
2449 // (implicit EFLAGS)]>;
2450 // but maybe need something like this
2451 // def DEC64m : RI<0xFF, MRM1m, (outs), (ins i64mem:$dst), "dec{q}\t$dst",
2452 // [(store (add (loadi64 addr:$dst), -1), addr:$dst),
2453 // (transferrable EFLAGS)]>;
2454 //
2455 // Until then, we manually fold these and instruction select the operation
2456 // here.
2457 bool X86DAGToDAGISel::foldLoadStoreIntoMemOperand(SDNode *Node) {
2458  StoreSDNode *StoreNode = cast<StoreSDNode>(Node);
2459  SDValue StoredVal = StoreNode->getOperand(1);
2460  unsigned Opc = StoredVal->getOpcode();
2461 
2462  // Before we try to select anything, make sure this is memory operand size
2463  // and opcode we can handle. Note that this must match the code below that
2464  // actually lowers the opcodes.
2465  EVT MemVT = StoreNode->getMemoryVT();
2466  if (MemVT != MVT::i64 && MemVT != MVT::i32 && MemVT != MVT::i16 &&
2467  MemVT != MVT::i8)
2468  return false;
2469 
2470  bool IsCommutable = false;
2471  switch (Opc) {
2472  default:
2473  return false;
2474  case X86ISD::INC:
2475  case X86ISD::DEC:
2476  case X86ISD::SUB:
2477  case X86ISD::SBB:
2478  break;
2479  case X86ISD::ADD:
2480  case X86ISD::ADC:
2481  case X86ISD::AND:
2482  case X86ISD::OR:
2483  case X86ISD::XOR:
2484  IsCommutable = true;
2485  break;
2486  }
2487 
2488  unsigned LoadOpNo = 0;
2489  LoadSDNode *LoadNode = nullptr;
2490  SDValue InputChain;
2491  if (!isFusableLoadOpStorePattern(StoreNode, StoredVal, CurDAG, LoadOpNo,
2492  LoadNode, InputChain)) {
2493  if (!IsCommutable)
2494  return false;
2495 
2496  // This operation is commutable, try the other operand.
2497  LoadOpNo = 1;
2498  if (!isFusableLoadOpStorePattern(StoreNode, StoredVal, CurDAG, LoadOpNo,
2499  LoadNode, InputChain))
2500  return false;
2501  }
2502 
2503  SDValue Base, Scale, Index, Disp, Segment;
2504  if (!selectAddr(LoadNode, LoadNode->getBasePtr(), Base, Scale, Index, Disp,
2505  Segment))
2506  return false;
2507 
2508  auto SelectOpcode = [&](unsigned Opc64, unsigned Opc32, unsigned Opc16,
2509  unsigned Opc8) {
2510  switch (MemVT.getSimpleVT().SimpleTy) {
2511  case MVT::i64:
2512  return Opc64;
2513  case MVT::i32:
2514  return Opc32;
2515  case MVT::i16:
2516  return Opc16;
2517  case MVT::i8:
2518  return Opc8;
2519  default:
2520  llvm_unreachable("Invalid size!");
2521  }
2522  };
2523 
2524  MachineSDNode *Result;
2525  switch (Opc) {
2526  case X86ISD::INC:
2527  case X86ISD::DEC: {
2528  unsigned NewOpc =
2529  Opc == X86ISD::INC
2530  ? SelectOpcode(X86::INC64m, X86::INC32m, X86::INC16m, X86::INC8m)
2531  : SelectOpcode(X86::DEC64m, X86::DEC32m, X86::DEC16m, X86::DEC8m);
2532  const SDValue Ops[] = {Base, Scale, Index, Disp, Segment, InputChain};
2533  Result =
2534  CurDAG->getMachineNode(NewOpc, SDLoc(Node), MVT::i32, MVT::Other, Ops);
2535  break;
2536  }
2537  case X86ISD::ADD:
2538  case X86ISD::ADC:
2539  case X86ISD::SUB:
2540  case X86ISD::SBB:
2541  case X86ISD::AND:
2542  case X86ISD::OR:
2543  case X86ISD::XOR: {
2544  auto SelectRegOpcode = [SelectOpcode](unsigned Opc) {
2545  switch (Opc) {
2546  case X86ISD::ADD:
2547  return SelectOpcode(X86::ADD64mr, X86::ADD32mr, X86::ADD16mr,
2548  X86::ADD8mr);
2549  case X86ISD::ADC:
2550  return SelectOpcode(X86::ADC64mr, X86::ADC32mr, X86::ADC16mr,
2551  X86::ADC8mr);
2552  case X86ISD::SUB:
2553  return SelectOpcode(X86::SUB64mr, X86::SUB32mr, X86::SUB16mr,
2554  X86::SUB8mr);
2555  case X86ISD::SBB:
2556  return SelectOpcode(X86::SBB64mr, X86::SBB32mr, X86::SBB16mr,
2557  X86::SBB8mr);
2558  case X86ISD::AND:
2559  return SelectOpcode(X86::AND64mr, X86::AND32mr, X86::AND16mr,
2560  X86::AND8mr);
2561  case X86ISD::OR:
2562  return SelectOpcode(X86::OR64mr, X86::OR32mr, X86::OR16mr, X86::OR8mr);
2563  case X86ISD::XOR:
2564  return SelectOpcode(X86::XOR64mr, X86::XOR32mr, X86::XOR16mr,
2565  X86::XOR8mr);
2566  default:
2567  llvm_unreachable("Invalid opcode!");
2568  }
2569  };
2570  auto SelectImm8Opcode = [SelectOpcode](unsigned Opc) {
2571  switch (Opc) {
2572  case X86ISD::ADD:
2573  return SelectOpcode(X86::ADD64mi8, X86::ADD32mi8, X86::ADD16mi8, 0);
2574  case X86ISD::ADC:
2575  return SelectOpcode(X86::ADC64mi8, X86::ADC32mi8, X86::ADC16mi8, 0);
2576  case X86ISD::SUB:
2577  return SelectOpcode(X86::SUB64mi8, X86::SUB32mi8, X86::SUB16mi8, 0);
2578  case X86ISD::SBB:
2579  return SelectOpcode(X86::SBB64mi8, X86::SBB32mi8, X86::SBB16mi8, 0);
2580  case X86ISD::AND:
2581  return SelectOpcode(X86::AND64mi8, X86::AND32mi8, X86::AND16mi8, 0);
2582  case X86ISD::OR:
2583  return SelectOpcode(X86::OR64mi8, X86::OR32mi8, X86::OR16mi8, 0);
2584  case X86ISD::XOR:
2585  return SelectOpcode(X86::XOR64mi8, X86::XOR32mi8, X86::XOR16mi8, 0);
2586  default:
2587  llvm_unreachable("Invalid opcode!");
2588  }
2589  };
2590  auto SelectImmOpcode = [SelectOpcode](unsigned Opc) {
2591  switch (Opc) {
2592  case X86ISD::ADD:
2593  return SelectOpcode(X86::ADD64mi32, X86::ADD32mi, X86::ADD16mi,
2594  X86::ADD8mi);
2595  case X86ISD::ADC:
2596  return SelectOpcode(X86::ADC64mi32, X86::ADC32mi, X86::ADC16mi,
2597  X86::ADC8mi);
2598  case X86ISD::SUB:
2599  return SelectOpcode(X86::SUB64mi32, X86::SUB32mi, X86::SUB16mi,
2600  X86::SUB8mi);
2601  case X86ISD::SBB:
2602  return SelectOpcode(X86::SBB64mi32, X86::SBB32mi, X86::SBB16mi,
2603  X86::SBB8mi);
2604  case X86ISD::AND:
2605  return SelectOpcode(X86::AND64mi32, X86::AND32mi, X86::AND16mi,
2606  X86::AND8mi);
2607  case X86ISD::OR:
2608  return SelectOpcode(X86::OR64mi32, X86::OR32mi, X86::OR16mi,
2609  X86::OR8mi);
2610  case X86ISD::XOR:
2611  return SelectOpcode(X86::XOR64mi32, X86::XOR32mi, X86::XOR16mi,
2612  X86::XOR8mi);
2613  default:
2614  llvm_unreachable("Invalid opcode!");
2615  }
2616  };
2617 
2618  unsigned NewOpc = SelectRegOpcode(Opc);
2619  SDValue Operand = StoredVal->getOperand(1-LoadOpNo);
2620 
2621  // See if the operand is a constant that we can fold into an immediate
2622  // operand.
2623  if (auto *OperandC = dyn_cast<ConstantSDNode>(Operand)) {
2624  auto OperandV = OperandC->getAPIntValue();
2625 
2626  // Check if we can shrink the operand enough to fit in an immediate (or
2627  // fit into a smaller immediate) by negating it and switching the
2628  // operation.
2629  if ((Opc == X86ISD::ADD || Opc == X86ISD::SUB) &&
2630  ((MemVT != MVT::i8 && OperandV.getMinSignedBits() > 8 &&
2631  (-OperandV).getMinSignedBits() <= 8) ||
2632  (MemVT == MVT::i64 && OperandV.getMinSignedBits() > 32 &&
2633  (-OperandV).getMinSignedBits() <= 32)) &&
2634  hasNoCarryFlagUses(StoredVal.getNode())) {
2635  OperandV = -OperandV;
2636  Opc = Opc == X86ISD::ADD ? X86ISD::SUB : X86ISD::ADD;
2637  }
2638 
2639  // First try to fit this into an Imm8 operand. If it doesn't fit, then try
2640  // the larger immediate operand.
2641  if (MemVT != MVT::i8 && OperandV.getMinSignedBits() <= 8) {
2642  Operand = CurDAG->getTargetConstant(OperandV, SDLoc(Node), MemVT);
2643  NewOpc = SelectImm8Opcode(Opc);
2644  } else if (OperandV.getActiveBits() <= MemVT.getSizeInBits() &&
2645  (MemVT != MVT::i64 || OperandV.getMinSignedBits() <= 32)) {
2646  Operand = CurDAG->getTargetConstant(OperandV, SDLoc(Node), MemVT);
2647  NewOpc = SelectImmOpcode(Opc);
2648  }
2649  }
2650 
2651  if (Opc == X86ISD::ADC || Opc == X86ISD::SBB) {
2652  SDValue CopyTo =
2653  CurDAG->getCopyToReg(InputChain, SDLoc(Node), X86::EFLAGS,
2654  StoredVal.getOperand(2), SDValue());
2655 
2656  const SDValue Ops[] = {Base, Scale, Index, Disp,
2657  Segment, Operand, CopyTo, CopyTo.getValue(1)};
2658  Result = CurDAG->getMachineNode(NewOpc, SDLoc(Node), MVT::i32, MVT::Other,
2659  Ops);
2660  } else {
2661  const SDValue Ops[] = {Base, Scale, Index, Disp,
2662  Segment, Operand, InputChain};
2663  Result = CurDAG->getMachineNode(NewOpc, SDLoc(Node), MVT::i32, MVT::Other,
2664  Ops);
2665  }
2666  break;
2667  }
2668  default:
2669  llvm_unreachable("Invalid opcode!");
2670  }
2671 
2672  MachineMemOperand *MemOps[] = {StoreNode->getMemOperand(),
2673  LoadNode->getMemOperand()};
2674  CurDAG->setNodeMemRefs(Result, MemOps);
2675 
2676  // Update Load Chain uses as well.
2677  ReplaceUses(SDValue(LoadNode, 1), SDValue(Result, 1));
2678  ReplaceUses(SDValue(StoreNode, 0), SDValue(Result, 1));
2679  ReplaceUses(SDValue(StoredVal.getNode(), 1), SDValue(Result, 0));
2680  CurDAG->RemoveDeadNode(Node);
2681  return true;
2682 }
2683 
2684 // See if this is an X & Mask that we can match to BEXTR/BZHI.
2685 // Where Mask is one of the following patterns:
2686 // a) x & (1 << nbits) - 1
2687 // b) x & ~(-1 << nbits)
2688 // c) x & (-1 >> (32 - y))
2689 // d) x << (32 - y) >> (32 - y)
2690 bool X86DAGToDAGISel::matchBitExtract(SDNode *Node) {
2691  assert(
2692  (Node->getOpcode() == ISD::AND || Node->getOpcode() == ISD::SRL) &&
2693  "Should be either an and-mask, or right-shift after clearing high bits.");
2694 
2695  // BEXTR is BMI instruction, BZHI is BMI2 instruction. We need at least one.
2696  if (!Subtarget->hasBMI() && !Subtarget->hasBMI2())
2697  return false;
2698 
2699  MVT NVT = Node->getSimpleValueType(0);
2700 
2701  // Only supported for 32 and 64 bits.
2702  if (NVT != MVT::i32 && NVT != MVT::i64)
2703  return false;
2704 
2705  unsigned Size = NVT.getSizeInBits();
2706 
2707  SDValue NBits;
2708 
2709  // If we have BMI2's BZHI, we are ok with muti-use patterns.
2710  // Else, if we only have BMI1's BEXTR, we require one-use.
2711  const bool CanHaveExtraUses = Subtarget->hasBMI2();
2712  auto checkUses = [CanHaveExtraUses](SDValue Op, unsigned NUses) {
2713  return CanHaveExtraUses ||
2714  Op.getNode()->hasNUsesOfValue(NUses, Op.getResNo());
2715  };
2716  auto checkOneUse = [checkUses](SDValue Op) { return checkUses(Op, 1); };
2717  auto checkTwoUse = [checkUses](SDValue Op) { return checkUses(Op, 2); };
2718 
2719  // a) x & ((1 << nbits) + (-1))
2720  auto matchPatternA = [&checkOneUse, &NBits](SDValue Mask) -> bool {
2721  // Match `add`. Must only have one use!
2722  if (Mask->getOpcode() != ISD::ADD || !checkOneUse(Mask))
2723  return false;
2724  // We should be adding all-ones constant (i.e. subtracting one.)
2725  if (!isAllOnesConstant(Mask->getOperand(1)))
2726  return false;
2727  // Match `1 << nbits`. Must only have one use!
2728  SDValue M0 = Mask->getOperand(0);
2729  if (M0->getOpcode() != ISD::SHL || !checkOneUse(M0))
2730  return false;
2731  if (!isOneConstant(M0->getOperand(0)))
2732  return false;
2733  NBits = M0->getOperand(1);
2734  return true;
2735  };
2736 
2737  // b) x & ~(-1 << nbits)
2738  auto matchPatternB = [&checkOneUse, &NBits](SDValue Mask) -> bool {
2739  // Match `~()`. Must only have one use!
2740  if (!isBitwiseNot(Mask) || !checkOneUse(Mask))
2741  return false;
2742  // Match `-1 << nbits`. Must only have one use!
2743  SDValue M0 = Mask->getOperand(0);
2744  if (M0->getOpcode() != ISD::SHL || !checkOneUse(M0))
2745  return false;
2746  if (!isAllOnesConstant(M0->getOperand(0)))
2747  return false;
2748  NBits = M0->getOperand(1);
2749  return true;
2750  };
2751 
2752  // Match potentially-truncated (bitwidth - y)
2753  auto matchShiftAmt = [checkOneUse, Size, &NBits](SDValue ShiftAmt) {
2754  // Skip over a truncate of the shift amount.
2755  if (ShiftAmt.getOpcode() == ISD::TRUNCATE) {
2756  ShiftAmt = ShiftAmt.getOperand(0);
2757  // The trunc should have been the only user of the real shift amount.
2758  if (!checkOneUse(ShiftAmt))
2759  return false;
2760  }
2761  // Match the shift amount as: (bitwidth - y). It should go away, too.
2762  if (ShiftAmt.getOpcode() != ISD::SUB)
2763  return false;
2764  auto V0 = dyn_cast<ConstantSDNode>(ShiftAmt.getOperand(0));
2765  if (!V0 || V0->getZExtValue() != Size)
2766  return false;
2767  NBits = ShiftAmt.getOperand(1);
2768  return true;
2769  };
2770 
2771  // c) x & (-1 >> (32 - y))
2772  auto matchPatternC = [&checkOneUse, matchShiftAmt](SDValue Mask) -> bool {
2773  // Match `l>>`. Must only have one use!
2774  if (Mask.getOpcode() != ISD::SRL || !checkOneUse(Mask))
2775  return false;
2776  // We should be shifting all-ones constant.
2777  if (!isAllOnesConstant(Mask.getOperand(0)))
2778  return false;
2779  SDValue M1 = Mask.getOperand(1);
2780  // The shift amount should not be used externally.
2781  if (!checkOneUse(M1))
2782  return false;
2783  return matchShiftAmt(M1);
2784  };
2785 
2786  SDValue X;
2787 
2788  // d) x << (32 - y) >> (32 - y)
2789  auto matchPatternD = [&checkOneUse, &checkTwoUse, matchShiftAmt,
2790  &X](SDNode *Node) -> bool {
2791  if (Node->getOpcode() != ISD::SRL)
2792  return false;
2793  SDValue N0 = Node->getOperand(0);
2794  if (N0->getOpcode() != ISD::SHL || !checkOneUse(N0))
2795  return false;
2796  SDValue N1 = Node->getOperand(1);
2797  SDValue N01 = N0->getOperand(1);
2798  // Both of the shifts must be by the exact same value.
2799  // There should not be any uses of the shift amount outside of the pattern.
2800  if (N1 != N01 || !checkTwoUse(N1))
2801  return false;
2802  if (!matchShiftAmt(N1))
2803  return false;
2804  X = N0->getOperand(0);
2805  return true;
2806  };
2807 
2808  auto matchLowBitMask = [&matchPatternA, &matchPatternB,
2809  &matchPatternC](SDValue Mask) -> bool {
2810  // FIXME: pattern c.
2811  return matchPatternA(Mask) || matchPatternB(Mask) || matchPatternC(Mask);
2812  };
2813 
2814  if (Node->getOpcode() == ISD::AND) {
2815  X = Node->getOperand(0);
2816  SDValue Mask = Node->getOperand(1);
2817 
2818  if (matchLowBitMask(Mask)) {
2819  // Great.
2820  } else {
2821  std::swap(X, Mask);
2822  if (!matchLowBitMask(Mask))
2823  return false;
2824  }
2825  } else if (!matchPatternD(Node))
2826  return false;
2827 
2828  SDLoc DL(Node);
2829 
2830  SDValue OrigNBits = NBits;
2831  if (NBits.getValueType() != NVT) {
2832  // Truncate the shift amount.
2833  NBits = CurDAG->getNode(ISD::TRUNCATE, DL, MVT::i8, NBits);
2834  insertDAGNode(*CurDAG, OrigNBits, NBits);
2835 
2836  // Insert 8-bit NBits into lowest 8 bits of NVT-sized (32 or 64-bit)
2837  // register. All the other bits are undefined, we do not care about them.
2838  SDValue ImplDef =
2839  SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, NVT), 0);
2840  insertDAGNode(*CurDAG, OrigNBits, ImplDef);
2841  NBits =
2842  CurDAG->getTargetInsertSubreg(X86::sub_8bit, DL, NVT, ImplDef, NBits);
2843  insertDAGNode(*CurDAG, OrigNBits, NBits);
2844  }
2845 
2846  if (Subtarget->hasBMI2()) {
2847  // Great, just emit the the BZHI..
2848  SDValue Extract = CurDAG->getNode(X86ISD::BZHI, DL, NVT, X, NBits);
2849  ReplaceNode(Node, Extract.getNode());
2850  SelectCode(Extract.getNode());
2851  return true;
2852  }
2853 
2854  // Else, emitting BEXTR requires one more step.
2855  // The 'control' of BEXTR has the pattern of:
2856  // [15...8 bit][ 7...0 bit] location
2857  // [ bit count][ shift] name
2858  // I.e. 0b000000011'00000001 means (x >> 0b1) & 0b11
2859 
2860  // Shift NBits left by 8 bits, thus producing 'control'.
2861  // This makes the low 8 bits to be zero.
2862  SDValue C8 = CurDAG->getConstant(8, DL, MVT::i8);
2863  SDValue Control = CurDAG->getNode(ISD::SHL, DL, NVT, NBits, C8);
2864  insertDAGNode(*CurDAG, OrigNBits, Control);
2865 
2866  // If the 'X' is *logically* shifted, we can fold that shift into 'control'.
2867  if (X.getOpcode() == ISD::SRL) {
2868  SDValue ShiftAmt = X.getOperand(1);
2869  X = X.getOperand(0);
2870 
2871  assert(ShiftAmt.getValueType() == MVT::i8 &&
2872  "Expected shift amount to be i8");
2873 
2874  // Now, *zero*-extend the shift amount. The bits 8...15 *must* be zero!
2875  SDValue OrigShiftAmt = ShiftAmt;
2876  ShiftAmt = CurDAG->getNode(ISD::ZERO_EXTEND, DL, NVT, ShiftAmt);
2877  insertDAGNode(*CurDAG, OrigShiftAmt, ShiftAmt);
2878 
2879  // And now 'or' these low 8 bits of shift amount into the 'control'.
2880  Control = CurDAG->getNode(ISD::OR, DL, NVT, Control, ShiftAmt);
2881  insertDAGNode(*CurDAG, OrigNBits, Control);
2882  }
2883 
2884  // And finally, form the BEXTR itself.
2885  SDValue Extract = CurDAG->getNode(X86ISD::BEXTR, DL, NVT, X, Control);
2886  ReplaceNode(Node, Extract.getNode());
2887  SelectCode(Extract.getNode());
2888 
2889  return true;
2890 }
2891 
2892 // See if this is an (X >> C1) & C2 that we can match to BEXTR/BEXTRI.
2893 MachineSDNode *X86DAGToDAGISel::matchBEXTRFromAndImm(SDNode *Node) {
2894  MVT NVT = Node->getSimpleValueType(0);
2895  SDLoc dl(Node);
2896 
2897  SDValue N0 = Node->getOperand(0);
2898  SDValue N1 = Node->getOperand(1);
2899 
2900  // If we have TBM we can use an immediate for the control. If we have BMI
2901  // we should only do this if the BEXTR instruction is implemented well.
2902  // Otherwise moving the control into a register makes this more costly.
2903  // TODO: Maybe load folding, greater than 32-bit masks, or a guarantee of LICM
2904  // hoisting the move immediate would make it worthwhile with a less optimal
2905  // BEXTR?
2906  if (!Subtarget->hasTBM() &&
2907  !(Subtarget->hasBMI() && Subtarget->hasFastBEXTR()))
2908  return nullptr;
2909 
2910  // Must have a shift right.
2911  if (N0->getOpcode() != ISD::SRL && N0->getOpcode() != ISD::SRA)
2912  return nullptr;
2913 
2914  // Shift can't have additional users.
2915  if (!N0->hasOneUse())
2916  return nullptr;
2917 
2918  // Only supported for 32 and 64 bits.
2919  if (NVT != MVT::i32 && NVT != MVT::i64)
2920  return nullptr;
2921 
2922  // Shift amount and RHS of and must be constant.
2923  ConstantSDNode *MaskCst = dyn_cast<ConstantSDNode>(N1);
2924  ConstantSDNode *ShiftCst = dyn_cast<ConstantSDNode>(N0->getOperand(1));
2925  if (!MaskCst || !ShiftCst)
2926  return nullptr;
2927 
2928  // And RHS must be a mask.
2929  uint64_t Mask = MaskCst->getZExtValue();
2930  if (!isMask_64(Mask))
2931  return nullptr;
2932 
2933  uint64_t Shift = ShiftCst->getZExtValue();
2934  uint64_t MaskSize = countPopulation(Mask);
2935 
2936  // Don't interfere with something that can be handled by extracting AH.
2937  // TODO: If we are able to fold a load, BEXTR might still be better than AH.
2938  if (Shift == 8 && MaskSize == 8)
2939  return nullptr;
2940 
2941  // Make sure we are only using bits that were in the original value, not
2942  // shifted in.
2943  if (Shift + MaskSize > NVT.getSizeInBits())
2944  return nullptr;
2945 
2946  SDValue New = CurDAG->getTargetConstant(Shift | (MaskSize << 8), dl, NVT);
2947  unsigned ROpc = NVT == MVT::i64 ? X86::BEXTRI64ri : X86::BEXTRI32ri;
2948  unsigned MOpc = NVT == MVT::i64 ? X86::BEXTRI64mi : X86::BEXTRI32mi;
2949 
2950  // BMI requires the immediate to placed in a register.
2951  if (!Subtarget->hasTBM()) {
2952  ROpc = NVT == MVT::i64 ? X86::BEXTR64rr : X86::BEXTR32rr;
2953  MOpc = NVT == MVT::i64 ? X86::BEXTR64rm : X86::BEXTR32rm;
2954  unsigned NewOpc = NVT == MVT::i64 ? X86::MOV32ri64 : X86::MOV32ri;
2955  New = SDValue(CurDAG->getMachineNode(NewOpc, dl, NVT, New), 0);
2956  }
2957 
2958  MachineSDNode *NewNode;
2959  SDValue Input = N0->getOperand(0);
2960  SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
2961  if (tryFoldLoad(Node, N0.getNode(), Input, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) {
2962  SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, New, Input.getOperand(0) };
2963  SDVTList VTs = CurDAG->getVTList(NVT, MVT::Other);
2964  NewNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops);
2965  // Update the chain.
2966  ReplaceUses(Input.getValue(1), SDValue(NewNode, 1));
2967  // Record the mem-refs
2968  CurDAG->setNodeMemRefs(NewNode, {cast<LoadSDNode>(Input)->getMemOperand()});
2969  } else {
2970  NewNode = CurDAG->getMachineNode(ROpc, dl, NVT, Input, New);
2971  }
2972 
2973  return NewNode;
2974 }
2975 
2976 // Emit a PCMISTR(I/M) instruction.
2977 MachineSDNode *X86DAGToDAGISel::emitPCMPISTR(unsigned ROpc, unsigned MOpc,
2978  bool MayFoldLoad, const SDLoc &dl,
2979  MVT VT, SDNode *Node) {
2980  SDValue N0 = Node->getOperand(0);
2981  SDValue N1 = Node->getOperand(1);
2982  SDValue Imm = Node->getOperand(2);
2983  const ConstantInt *Val = cast<ConstantSDNode>(Imm)->getConstantIntValue();
2984  Imm = CurDAG->getTargetConstant(*Val, SDLoc(Node), Imm.getValueType());
2985 
2986  // Try to fold a load. No need to check alignment.
2987  SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
2988  if (MayFoldLoad && tryFoldLoad(Node, N1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) {
2989  SDValue Ops[] = { N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Imm,
2990  N1.getOperand(0) };
2991  SDVTList VTs = CurDAG->getVTList(VT, MVT::i32, MVT::Other);
2992  MachineSDNode *CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops);
2993  // Update the chain.
2994  ReplaceUses(N1.getValue(1), SDValue(CNode, 2));
2995  // Record the mem-refs
2996  CurDAG->setNodeMemRefs(CNode, {cast<LoadSDNode>(N1)->getMemOperand()});
2997  return CNode;
2998  }
2999 
3000  SDValue Ops[] = { N0, N1, Imm };
3001  SDVTList VTs = CurDAG->getVTList(VT, MVT::i32);
3002  MachineSDNode *CNode = CurDAG->getMachineNode(ROpc, dl, VTs, Ops);
3003  return CNode;
3004 }
3005 
3006 // Emit a PCMESTR(I/M) instruction. Also return the Glue result in case we need
3007 // to emit a second instruction after this one. This is needed since we have two
3008 // copyToReg nodes glued before this and we need to continue that glue through.
3009 MachineSDNode *X86DAGToDAGISel::emitPCMPESTR(unsigned ROpc, unsigned MOpc,
3010  bool MayFoldLoad, const SDLoc &dl,
3011  MVT VT, SDNode *Node,
3012  SDValue &InFlag) {
3013  SDValue N0 = Node->getOperand(0);
3014  SDValue N2 = Node->getOperand(2);
3015  SDValue Imm = Node->getOperand(4);
3016  const ConstantInt *Val = cast<ConstantSDNode>(Imm)->getConstantIntValue();
3017  Imm = CurDAG->getTargetConstant(*Val, SDLoc(Node), Imm.getValueType());
3018 
3019  // Try to fold a load. No need to check alignment.
3020  SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
3021  if (MayFoldLoad && tryFoldLoad(Node, N2, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) {
3022  SDValue Ops[] = { N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Imm,
3023  N2.getOperand(0), InFlag };
3024  SDVTList VTs = CurDAG->getVTList(VT, MVT::i32, MVT::Other, MVT::Glue);
3025  MachineSDNode *CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops);
3026  InFlag = SDValue(CNode, 3);
3027  // Update the chain.
3028  ReplaceUses(N2.getValue(1), SDValue(CNode, 2));
3029  // Record the mem-refs
3030  CurDAG->setNodeMemRefs(CNode, {cast<LoadSDNode>(N2)->getMemOperand()});
3031  return CNode;
3032  }
3033 
3034  SDValue Ops[] = { N0, N2, Imm, InFlag };
3035  SDVTList VTs = CurDAG->getVTList(VT, MVT::i32, MVT::Glue);
3036  MachineSDNode *CNode = CurDAG->getMachineNode(ROpc, dl, VTs, Ops);
3037  InFlag = SDValue(CNode, 2);
3038  return CNode;
3039 }
3040 
3041 bool X86DAGToDAGISel::tryShiftAmountMod(SDNode *N) {
3042  EVT VT = N->getValueType(0);
3043 
3044  // Only handle scalar shifts.
3045  if (VT.isVector())
3046  return false;
3047 
3048  // Narrower shifts only mask to 5 bits in hardware.
3049  unsigned Size = VT == MVT::i64 ? 64 : 32;
3050 
3051  SDValue OrigShiftAmt = N->getOperand(1);
3052  SDValue ShiftAmt = OrigShiftAmt;
3053  SDLoc DL(N);
3054 
3055  // Skip over a truncate of the shift amount.
3056  if (ShiftAmt->getOpcode() == ISD::TRUNCATE)
3057  ShiftAmt = ShiftAmt->getOperand(0);
3058 
3059  // This function is called after X86DAGToDAGISel::matchBitExtract(),
3060  // so we are not afraid that we might mess up BZHI/BEXTR pattern.
3061 
3062  SDValue NewShiftAmt;
3063  if (ShiftAmt->getOpcode() == ISD::ADD || ShiftAmt->getOpcode() == ISD::SUB) {
3064  SDValue Add0 = ShiftAmt->getOperand(0);
3065  SDValue Add1 = ShiftAmt->getOperand(1);
3066  // If we are shifting by X+/-N where N == 0 mod Size, then just shift by X
3067  // to avoid the ADD/SUB.
3068  if (isa<ConstantSDNode>(Add1) &&
3069  cast<ConstantSDNode>(Add1)->getZExtValue() % Size == 0) {
3070  NewShiftAmt = Add0;
3071  // If we are shifting by N-X where N == 0 mod Size, then just shift by -X to
3072  // generate a NEG instead of a SUB of a constant.
3073  } else if (ShiftAmt->getOpcode() == ISD::SUB &&
3074  isa<ConstantSDNode>(Add0) &&
3075  cast<ConstantSDNode>(Add0)->getZExtValue() != 0 &&
3076  cast<ConstantSDNode>(Add0)->getZExtValue() % Size == 0) {
3077  // Insert a negate op.
3078  // TODO: This isn't guaranteed to replace the sub if there is a logic cone
3079  // that uses it that's not a shift.
3080  EVT SubVT = ShiftAmt.getValueType();
3081  SDValue Zero = CurDAG->getConstant(0, DL, SubVT);
3082  SDValue Neg = CurDAG->getNode(ISD::SUB, DL, SubVT, Zero, Add1);
3083  NewShiftAmt = Neg;
3084 
3085  // Insert these operands into a valid topological order so they can
3086  // get selected independently.
3087  insertDAGNode(*CurDAG, OrigShiftAmt, Zero);
3088  insertDAGNode(*CurDAG, OrigShiftAmt, Neg);
3089  } else
3090  return false;
3091  } else
3092  return false;
3093 
3094  if (NewShiftAmt.getValueType() != MVT::i8) {
3095  // Need to truncate the shift amount.
3096  NewShiftAmt = CurDAG->getNode(ISD::TRUNCATE, DL, MVT::i8, NewShiftAmt);
3097  // Add to a correct topological ordering.
3098  insertDAGNode(*CurDAG, OrigShiftAmt, NewShiftAmt);
3099  }
3100 
3101  // Insert a new mask to keep the shift amount legal. This should be removed
3102  // by isel patterns.
3103  NewShiftAmt = CurDAG->getNode(ISD::AND, DL, MVT::i8, NewShiftAmt,
3104  CurDAG->getConstant(Size - 1, DL, MVT::i8));
3105  // Place in a correct topological ordering.
3106  insertDAGNode(*CurDAG, OrigShiftAmt, NewShiftAmt);
3107 
3108  SDNode *UpdatedNode = CurDAG->UpdateNodeOperands(N, N->getOperand(0),
3109  NewShiftAmt);
3110  if (UpdatedNode != N) {
3111  // If we found an existing node, we should replace ourselves with that node
3112  // and wait for it to be selected after its other users.
3113  ReplaceNode(N, UpdatedNode);
3114  return true;
3115  }
3116 
3117  // If the original shift amount is now dead, delete it so that we don't run
3118  // it through isel.
3119  if (OrigShiftAmt.getNode()->use_empty())
3120  CurDAG->RemoveDeadNode(OrigShiftAmt.getNode());
3121 
3122  // Now that we've optimized the shift amount, defer to normal isel to get
3123  // load folding and legacy vs BMI2 selection without repeating it here.
3124  SelectCode(N);
3125  return true;
3126 }
3127 
3128 /// If the high bits of an 'and' operand are known zero, try setting the
3129 /// high bits of an 'and' constant operand to produce a smaller encoding by
3130 /// creating a small, sign-extended negative immediate rather than a large
3131 /// positive one. This reverses a transform in SimplifyDemandedBits that
3132 /// shrinks mask constants by clearing bits. There is also a possibility that
3133 /// the 'and' mask can be made -1, so the 'and' itself is unnecessary. In that
3134 /// case, just replace the 'and'. Return 'true' if the node is replaced.
3135 bool X86DAGToDAGISel::shrinkAndImmediate(SDNode *And) {
3136  // i8 is unshrinkable, i16 should be promoted to i32, and vector ops don't
3137  // have immediate operands.
3138  MVT VT = And->getSimpleValueType(0);
3139  if (VT != MVT::i32 && VT != MVT::i64)
3140  return false;
3141 
3142  auto *And1C = dyn_cast<ConstantSDNode>(And->getOperand(1));
3143  if (!And1C)
3144  return false;
3145 
3146  // Bail out if the mask constant is already negative. It's can't shrink more.
3147  // If the upper 32 bits of a 64 bit mask are all zeros, we have special isel
3148  // patterns to use a 32-bit and instead of a 64-bit and by relying on the
3149  // implicit zeroing of 32 bit ops. So we should check if the lower 32 bits
3150  // are negative too.
3151  APInt MaskVal = And1C->getAPIntValue();
3152  unsigned MaskLZ = MaskVal.countLeadingZeros();
3153  if (!MaskLZ || (VT == MVT::i64 && MaskLZ == 32))
3154  return false;
3155 
3156  // Don't extend into the upper 32 bits of a 64 bit mask.
3157  if (VT == MVT::i64 && MaskLZ >= 32) {
3158  MaskLZ -= 32;
3159  MaskVal = MaskVal.trunc(32);
3160  }
3161 
3162  SDValue And0 = And->getOperand(0);
3163  APInt HighZeros = APInt::getHighBitsSet(MaskVal.getBitWidth(), MaskLZ);
3164  APInt NegMaskVal = MaskVal | HighZeros;
3165 
3166  // If a negative constant would not allow a smaller encoding, there's no need
3167  // to continue. Only change the constant when we know it's a win.
3168  unsigned MinWidth = NegMaskVal.getMinSignedBits();
3169  if (MinWidth > 32 || (MinWidth > 8 && MaskVal.getMinSignedBits() <= 32))
3170  return false;
3171 
3172  // Extend masks if we truncated above.
3173  if (VT == MVT::i64 && MaskVal.getBitWidth() < 64) {
3174  NegMaskVal = NegMaskVal.zext(64);
3175  HighZeros = HighZeros.zext(64);
3176  }
3177 
3178  // The variable operand must be all zeros in the top bits to allow using the
3179  // new, negative constant as the mask.
3180  if (!CurDAG->MaskedValueIsZero(And0, HighZeros))
3181  return false;
3182 
3183  // Check if the mask is -1. In that case, this is an unnecessary instruction
3184  // that escaped earlier analysis.
3185  if (NegMaskVal.isAllOnesValue()) {
3186  ReplaceNode(And, And0.getNode());
3187  return true;
3188  }
3189 
3190  // A negative mask allows a smaller encoding. Create a new 'and' node.
3191  SDValue NewMask = CurDAG->getConstant(NegMaskVal, SDLoc(And), VT);
3192  SDValue NewAnd = CurDAG->getNode(ISD::AND, SDLoc(And), VT, And0, NewMask);
3193  ReplaceNode(And, NewAnd.getNode());
3194  SelectCode(NewAnd.getNode());
3195  return true;
3196 }
3197 
3198 void X86DAGToDAGISel::Select(SDNode *Node) {
3199  MVT NVT = Node->getSimpleValueType(0);
3200  unsigned Opcode = Node->getOpcode();
3201  SDLoc dl(Node);
3202 
3203  if (Node->isMachineOpcode()) {
3204  LLVM_DEBUG(dbgs() << "== "; Node->dump(CurDAG); dbgs() << '\n');
3205  Node->setNodeId(-1);
3206  return; // Already selected.
3207  }
3208 
3209  switch (Opcode) {
3210  default: break;
3211  case ISD::BRIND: {
3212  if (Subtarget->isTargetNaCl())
3213  // NaCl has its own pass where jmp %r32 are converted to jmp %r64. We
3214  // leave the instruction alone.
3215  break;
3216  if (Subtarget->isTarget64BitILP32()) {
3217  // Converts a 32-bit register to a 64-bit, zero-extended version of
3218  // it. This is needed because x86-64 can do many things, but jmp %r32
3219  // ain't one of them.
3220  const SDValue &Target = Node->getOperand(1);
3222  SDValue ZextTarget = CurDAG->getZExtOrTrunc(Target, dl, EVT(MVT::i64));
3223  SDValue Brind = CurDAG->getNode(ISD::BRIND, dl, MVT::Other,
3224  Node->getOperand(0), ZextTarget);
3225  ReplaceNode(Node, Brind.getNode());
3226  SelectCode(ZextTarget.getNode());
3227  SelectCode(Brind.getNode());
3228  return;
3229  }
3230  break;
3231  }
3232  case X86ISD::GlobalBaseReg:
3233  ReplaceNode(Node, getGlobalBaseReg());
3234  return;
3235 
3236  case ISD::BITCAST:
3237  // Just drop all 128/256/512-bit bitcasts.
3238  if (NVT.is512BitVector() || NVT.is256BitVector() || NVT.is128BitVector() ||
3239  NVT == MVT::f128) {
3240  ReplaceUses(SDValue(Node, 0), Node->getOperand(0));
3241  CurDAG->RemoveDeadNode(Node);
3242  return;
3243  }
3244  break;
3245 
3246  case X86ISD::SELECT:
3247  case X86ISD::SHRUNKBLEND: {
3248  // SHRUNKBLEND selects like a regular VSELECT. Same with X86ISD::SELECT.
3249  SDValue VSelect = CurDAG->getNode(
3250  ISD::VSELECT, SDLoc(Node), Node->getValueType(0), Node->getOperand(0),
3251  Node->getOperand(1), Node->getOperand(2));
3252  ReplaceNode(Node, VSelect.getNode());
3253  SelectCode(VSelect.getNode());
3254  // We already called ReplaceUses.
3255  return;
3256  }
3257 
3258  case ISD::SRL:
3259  if (matchBitExtract(Node))
3260  return;
3262  case ISD::SRA:
3263  case ISD::SHL:
3264  if (tryShiftAmountMod(Node))
3265  return;
3266  break;
3267 
3268  case ISD::AND:
3269  if (MachineSDNode *NewNode = matchBEXTRFromAndImm(Node)) {
3270  ReplaceUses(SDValue(Node, 0), SDValue(NewNode, 0));
3271  CurDAG->RemoveDeadNode(Node);
3272  return;
3273  }
3274  if (matchBitExtract(Node))
3275  return;
3276  if (AndImmShrink && shrinkAndImmediate(Node))
3277  return;
3278 
3280  case ISD::OR:
3281  case ISD::XOR: {
3282 
3283  // For operations of the form (x << C1) op C2, check if we can use a smaller
3284  // encoding for C2 by transforming it into (x op (C2>>C1)) << C1.
3285  SDValue N0 = Node->getOperand(0);
3286  SDValue N1 = Node->getOperand(1);
3287 
3288  if (N0->getOpcode() != ISD::SHL || !N0->hasOneUse())
3289  break;
3290 
3291  // i8 is unshrinkable, i16 should be promoted to i32.
3292  if (NVT != MVT::i32 && NVT != MVT::i64)
3293  break;
3294 
3296  ConstantSDNode *ShlCst = dyn_cast<ConstantSDNode>(N0->getOperand(1));
3297  if (!Cst || !ShlCst)
3298  break;
3299 
3300  int64_t Val = Cst->getSExtValue();
3301  uint64_t ShlVal = ShlCst->getZExtValue();
3302 
3303  // Make sure that we don't change the operation by removing bits.
3304  // This only matters for OR and XOR, AND is unaffected.
3305  uint64_t RemovedBitsMask = (1ULL << ShlVal) - 1;
3306  if (Opcode != ISD::AND && (Val & RemovedBitsMask) != 0)
3307  break;
3308 
3309  unsigned ShlOp, AddOp, Op;
3310  MVT CstVT = NVT;
3311 
3312  // Check the minimum bitwidth for the new constant.
3313  // TODO: AND32ri is the same as AND64ri32 with zext imm.
3314  // TODO: MOV32ri+OR64r is cheaper than MOV64ri64+OR64rr
3315  // TODO: Using 16 and 8 bit operations is also possible for or32 & xor32.
3316  if (!isInt<8>(Val) && isInt<8>(Val >> ShlVal))
3317  CstVT = MVT::i8;
3318  else if (!isInt<32>(Val) && isInt<32>(Val >> ShlVal))
3319  CstVT = MVT::i32;
3320 
3321  // Bail if there is no smaller encoding.
3322  if (NVT == CstVT)
3323  break;
3324 
3325  switch (NVT.SimpleTy) {
3326  default: llvm_unreachable("Unsupported VT!");
3327  case MVT::i32:
3328  assert(CstVT == MVT::i8);
3329  ShlOp = X86::SHL32ri;
3330  AddOp = X86::ADD32rr;
3331 
3332  switch (Opcode) {
3333  default: llvm_unreachable("Impossible opcode");
3334  case ISD::AND: Op = X86::AND32ri8; break;
3335  case ISD::OR: Op = X86::OR32ri8; break;
3336  case ISD::XOR: Op = X86::XOR32ri8; break;
3337  }
3338  break;
3339  case MVT::i64:
3340  assert(CstVT == MVT::i8 || CstVT == MVT::i32);
3341  ShlOp = X86::SHL64ri;
3342  AddOp = X86::ADD64rr;
3343 
3344  switch (Opcode) {
3345  default: llvm_unreachable("Impossible opcode");
3346  case ISD::AND: Op = CstVT==MVT::i8? X86::AND64ri8 : X86::AND64ri32; break;
3347  case ISD::OR: Op = CstVT==MVT::i8? X86::OR64ri8 : X86::OR64ri32; break;
3348  case ISD::XOR: Op = CstVT==MVT::i8? X86::XOR64ri8 : X86::XOR64ri32; break;
3349  }
3350  break;
3351  }
3352 
3353  // Emit the smaller op and the shift.
3354  SDValue NewCst = CurDAG->getTargetConstant(Val >> ShlVal, dl, CstVT);
3355  SDNode *New = CurDAG->getMachineNode(Op, dl, NVT, N0->getOperand(0),NewCst);
3356  if (ShlVal == 1)
3357  CurDAG->SelectNodeTo(Node, AddOp, NVT, SDValue(New, 0),
3358  SDValue(New, 0));
3359  else
3360  CurDAG->SelectNodeTo(Node, ShlOp, NVT, SDValue(New, 0),
3361  getI8Imm(ShlVal, dl));
3362  return;
3363  }
3364  case X86ISD::UMUL8:
3365  case X86ISD::SMUL8: {
3366  SDValue N0 = Node->getOperand(0);
3367  SDValue N1 = Node->getOperand(1);
3368 
3369  unsigned Opc = (Opcode == X86ISD::SMUL8 ? X86::IMUL8r : X86::MUL8r);
3370 
3371  SDValue InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, X86::AL,
3372  N0, SDValue()).getValue(1);
3373 
3374  SDVTList VTs = CurDAG->getVTList(NVT, MVT::i32);
3375  SDValue Ops[] = {N1, InFlag};
3376  SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops);
3377 
3378  ReplaceNode(Node, CNode);
3379  return;
3380  }
3381 
3382  case X86ISD::UMUL: {
3383  SDValue N0 = Node->getOperand(0);
3384  SDValue N1 = Node->getOperand(1);
3385 
3386  unsigned LoReg, Opc;
3387  switch (NVT.SimpleTy) {
3388  default: llvm_unreachable("Unsupported VT!");
3389  // MVT::i8 is handled by X86ISD::UMUL8.
3390  case MVT::i16: LoReg = X86::AX; Opc = X86::MUL16r; break;
3391  case MVT::i32: LoReg = X86::EAX; Opc = X86::MUL32r; break;
3392  case MVT::i64: LoReg = X86::RAX; Opc = X86::MUL64r; break;
3393  }
3394 
3395  SDValue InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, LoReg,
3396  N0, SDValue()).getValue(1);
3397 
3398  SDVTList VTs = CurDAG->getVTList(NVT, NVT, MVT::i32);
3399  SDValue Ops[] = {N1, InFlag};
3400  SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops);
3401 
3402  ReplaceNode(Node, CNode);
3403  return;
3404  }
3405 
3406  case ISD::SMUL_LOHI:
3407  case ISD::UMUL_LOHI: {
3408  SDValue N0 = Node->getOperand(0);
3409  SDValue N1 = Node->getOperand(1);
3410 
3411  unsigned Opc, MOpc;
3412  bool isSigned = Opcode == ISD::SMUL_LOHI;
3413  bool hasBMI2 = Subtarget->hasBMI2();
3414  if (!isSigned) {
3415  switch (NVT.SimpleTy) {
3416  default: llvm_unreachable("Unsupported VT!");
3417  case MVT::i32: Opc = hasBMI2 ? X86::MULX32rr : X86::MUL32r;
3418  MOpc = hasBMI2 ? X86::MULX32rm : X86::MUL32m; break;
3419  case MVT::i64: Opc = hasBMI2 ? X86::MULX64rr : X86::MUL64r;
3420  MOpc = hasBMI2 ? X86::MULX64rm : X86::MUL64m; break;
3421  }
3422  } else {
3423  switch (NVT.SimpleTy) {
3424  default: llvm_unreachable("Unsupported VT!");
3425  case MVT::i32: Opc = X86::IMUL32r; MOpc = X86::IMUL32m; break;
3426  case MVT::i64: Opc = X86::IMUL64r; MOpc = X86::IMUL64m; break;
3427  }
3428  }
3429 
3430  unsigned SrcReg, LoReg, HiReg;
3431  switch (Opc) {
3432  default: llvm_unreachable("Unknown MUL opcode!");
3433  case X86::IMUL32r:
3434  case X86::MUL32r:
3435  SrcReg = LoReg = X86::EAX; HiReg = X86::EDX;
3436  break;
3437  case X86::IMUL64r:
3438  case X86::MUL64r:
3439  SrcReg = LoReg = X86::RAX; HiReg = X86::RDX;
3440  break;
3441  case X86::MULX32rr:
3442  SrcReg = X86::EDX; LoReg = HiReg = 0;
3443  break;
3444  case X86::MULX64rr:
3445  SrcReg = X86::RDX; LoReg = HiReg = 0;
3446  break;
3447  }
3448 
3449  SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
3450  bool foldedLoad = tryFoldLoad(Node, N1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4);
3451  // Multiply is commmutative.
3452  if (!foldedLoad) {
3453  foldedLoad = tryFoldLoad(Node, N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4);
3454  if (foldedLoad)
3455  std::swap(N0, N1);
3456  }
3457 
3458  SDValue InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, SrcReg,
3459  N0, SDValue()).getValue(1);
3460  SDValue ResHi, ResLo;
3461 
3462  if (foldedLoad) {
3463  SDValue Chain;
3464  MachineSDNode *CNode = nullptr;
3465  SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N1.getOperand(0),
3466  InFlag };
3467  if (MOpc == X86::MULX32rm || MOpc == X86::MULX64rm) {
3468  SDVTList VTs = CurDAG->getVTList(NVT, NVT, MVT::Other, MVT::Glue);
3469  CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops);
3470  ResHi = SDValue(CNode, 0);
3471  ResLo = SDValue(CNode, 1);
3472  Chain = SDValue(CNode, 2);
3473  InFlag = SDValue(CNode, 3);
3474  } else {
3475  SDVTList VTs = CurDAG->getVTList(MVT::Other, MVT::Glue);
3476  CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops);
3477  Chain = SDValue(CNode, 0);
3478  InFlag = SDValue(CNode, 1);
3479  }
3480 
3481  // Update the chain.
3482  ReplaceUses(N1.getValue(1), Chain);
3483  // Record the mem-refs
3484  CurDAG->setNodeMemRefs(CNode, {cast<LoadSDNode>(N1)->getMemOperand()});
3485  } else {
3486  SDValue Ops[] = { N1, InFlag };
3487  if (Opc == X86::MULX32rr || Opc == X86::MULX64rr) {
3488  SDVTList VTs = CurDAG->getVTList(NVT, NVT, MVT::Glue);
3489  SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops);
3490  ResHi = SDValue(CNode, 0);
3491  ResLo = SDValue(CNode, 1);
3492  InFlag = SDValue(CNode, 2);
3493  } else {
3494  SDVTList VTs = CurDAG->getVTList(MVT::Glue);
3495  SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops);
3496  InFlag = SDValue(CNode, 0);
3497  }
3498  }
3499 
3500  // Copy the low half of the result, if it is needed.
3501  if (!SDValue(Node, 0).use_empty()) {
3502  if (!ResLo.getNode()) {
3503  assert(LoReg && "Register for low half is not defined!");
3504  ResLo = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, LoReg, NVT,
3505  InFlag);
3506  InFlag = ResLo.getValue(2);
3507  }
3508  ReplaceUses(SDValue(Node, 0), ResLo);
3509  LLVM_DEBUG(dbgs() << "=> "; ResLo.getNode()->dump(CurDAG);
3510  dbgs() << '\n');
3511  }
3512  // Copy the high half of the result, if it is needed.
3513  if (!SDValue(Node, 1).use_empty()) {
3514  if (!ResHi.getNode()) {
3515  assert(HiReg && "Register for high half is not defined!");
3516  ResHi = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, HiReg, NVT,
3517  InFlag);
3518  InFlag = ResHi.getValue(2);
3519  }
3520  ReplaceUses(SDValue(Node, 1), ResHi);
3521  LLVM_DEBUG(dbgs() << "=> "; ResHi.getNode()->dump(CurDAG);
3522  dbgs() << '\n');
3523  }
3524 
3525  CurDAG->RemoveDeadNode(Node);
3526  return;
3527  }
3528 
3529  case ISD::SDIVREM:
3530  case ISD::UDIVREM: {
3531  SDValue N0 = Node->getOperand(0);
3532  SDValue N1 = Node->getOperand(1);
3533 
3534  unsigned Opc, MOpc;
3535  bool isSigned = Opcode == ISD::SDIVREM;
3536  if (!isSigned) {
3537  switch (NVT.SimpleTy) {
3538  default: llvm_unreachable("Unsupported VT!");
3539  case MVT::i8: Opc = X86::DIV8r; MOpc = X86::DIV8m; break;
3540  case MVT::i16: Opc = X86::DIV16r; MOpc = X86::DIV16m; break;
3541  case MVT::i32: Opc = X86::DIV32r; MOpc = X86::DIV32m; break;
3542  case MVT::i64: Opc = X86::DIV64r; MOpc = X86::DIV64m; break;
3543  }
3544  } else {
3545  switch (NVT.SimpleTy) {
3546  default: llvm_unreachable("Unsupported VT!");
3547  case MVT::i8: Opc = X86::IDIV8r; MOpc = X86::IDIV8m; break;
3548  case MVT::i16: Opc = X86::IDIV16r; MOpc = X86::IDIV16m; break;
3549  case MVT::i32: Opc = X86::IDIV32r; MOpc = X86::IDIV32m; break;
3550  case MVT::i64: Opc = X86::IDIV64r; MOpc = X86::IDIV64m; break;
3551  }
3552  }
3553 
3554  unsigned LoReg, HiReg, ClrReg;
3555  unsigned SExtOpcode;
3556  switch (NVT.SimpleTy) {
3557  default: llvm_unreachable("Unsupported VT!");
3558  case MVT::i8:
3559  LoReg = X86::AL; ClrReg = HiReg = X86::AH;
3560  SExtOpcode = X86::CBW;
3561  break;
3562  case MVT::i16:
3563  LoReg = X86::AX; HiReg = X86::DX;
3564  ClrReg = X86::DX;
3565  SExtOpcode = X86::CWD;
3566  break;
3567  case MVT::i32:
3568  LoReg = X86::EAX; ClrReg = HiReg = X86::EDX;
3569  SExtOpcode = X86::CDQ;
3570  break;
3571  case MVT::i64:
3572  LoReg = X86::RAX; ClrReg = HiReg = X86::RDX;
3573  SExtOpcode = X86::CQO;
3574  break;
3575  }
3576 
3577  SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
3578  bool foldedLoad = tryFoldLoad(Node, N1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4);
3579  bool signBitIsZero = CurDAG->SignBitIsZero(N0);
3580 
3581  SDValue InFlag;
3582  if (NVT == MVT::i8 && (!isSigned || signBitIsZero)) {
3583  // Special case for div8, just use a move with zero extension to AX to
3584  // clear the upper 8 bits (AH).
3585  SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Chain;
3586  MachineSDNode *Move;
3587  if (tryFoldLoad(Node, N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) {
3588  SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N0.getOperand(0) };
3589  Move = CurDAG->getMachineNode(X86::MOVZX32rm8, dl, MVT::i32,
3590  MVT::Other, Ops);
3591  Chain = SDValue(Move, 1);
3592  ReplaceUses(N0.getValue(1), Chain);
3593  // Record the mem-refs
3594  CurDAG->setNodeMemRefs(Move, {cast<LoadSDNode>(N0)->getMemOperand()});
3595  } else {
3596  Move = CurDAG->getMachineNode(X86::MOVZX32rr8, dl, MVT::i32, N0);
3597  Chain = CurDAG->getEntryNode();
3598  }
3599  Chain = CurDAG->getCopyToReg(Chain, dl, X86::EAX, SDValue(Move, 0),
3600  SDValue());
3601  InFlag = Chain.getValue(1);
3602  } else {
3603  InFlag =
3604  CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl,
3605  LoReg, N0, SDValue()).getValue(1);
3606  if (isSigned && !signBitIsZero) {
3607  // Sign extend the low part into the high part.
3608  InFlag =
3609  SDValue(CurDAG->getMachineNode(SExtOpcode, dl, MVT::Glue, InFlag),0);
3610  } else {
3611  // Zero out the high part, effectively zero extending the input.
3612  SDValue ClrNode = SDValue(CurDAG->getMachineNode(X86::MOV32r0, dl, NVT), 0);
3613  switch (NVT.SimpleTy) {
3614  case MVT::i16:
3615  ClrNode =
3616  SDValue(CurDAG->getMachineNode(
3617  TargetOpcode::EXTRACT_SUBREG, dl, MVT::i16, ClrNode,
3618  CurDAG->getTargetConstant(X86::sub_16bit, dl,
3619  MVT::i32)),
3620  0);
3621  break;
3622  case MVT::i32:
3623  break;
3624  case MVT::i64:
3625  ClrNode =
3626  SDValue(CurDAG->getMachineNode(
3627  TargetOpcode::SUBREG_TO_REG, dl, MVT::i64,
3628  CurDAG->getTargetConstant(0, dl, MVT::i64), ClrNode,
3629  CurDAG->getTargetConstant(X86::sub_32bit, dl,
3630  MVT::i32)),
3631  0);
3632  break;
3633  default:
3634  llvm_unreachable("Unexpected division source");
3635  }
3636 
3637  InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, ClrReg,
3638  ClrNode, InFlag).getValue(1);
3639  }
3640  }
3641 
3642  if (foldedLoad) {
3643  SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N1.getOperand(0),
3644  InFlag };
3645  MachineSDNode *CNode =
3646  CurDAG->getMachineNode(MOpc, dl, MVT::Other, MVT::Glue, Ops);
3647  InFlag = SDValue(CNode, 1);
3648  // Update the chain.
3649  ReplaceUses(N1.getValue(1), SDValue(CNode, 0));
3650  // Record the mem-refs
3651  CurDAG->setNodeMemRefs(CNode, {cast<LoadSDNode>(N1)->getMemOperand()});
3652  } else {
3653  InFlag =
3654  SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Glue, N1, InFlag), 0);
3655  }
3656 
3657  // Prevent use of AH in a REX instruction by explicitly copying it to
3658  // an ABCD_L register.
3659  //
3660  // The current assumption of the register allocator is that isel
3661  // won't generate explicit references to the GR8_ABCD_H registers. If
3662  // the allocator and/or the backend get enhanced to be more robust in
3663  // that regard, this can be, and should be, removed.
3664  if (HiReg == X86::AH && !SDValue(Node, 1).use_empty()) {
3665  SDValue AHCopy = CurDAG->getRegister(X86::AH, MVT::i8);
3666  unsigned AHExtOpcode =
3667  isSigned ? X86::MOVSX32rr8_NOREX : X86::MOVZX32rr8_NOREX;
3668 
3669  SDNode *RNode = CurDAG->getMachineNode(AHExtOpcode, dl, MVT::i32,
3670  MVT::Glue, AHCopy, InFlag);
3671  SDValue Result(RNode, 0);
3672  InFlag = SDValue(RNode, 1);
3673 
3674  Result =
3675  CurDAG->getTargetExtractSubreg(X86::sub_8bit, dl, MVT::i8, Result);
3676 
3677  ReplaceUses(SDValue(Node, 1), Result);
3678  LLVM_DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG);
3679  dbgs() << '\n');
3680  }
3681  // Copy the division (low) result, if it is needed.
3682  if (!SDValue(Node, 0).use_empty()) {
3683  SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
3684  LoReg, NVT, InFlag);
3685  InFlag = Result.getValue(2);
3686  ReplaceUses(SDValue(Node, 0), Result);
3687  LLVM_DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG);
3688  dbgs() << '\n');
3689  }
3690  // Copy the remainder (high) result, if it is needed.
3691  if (!SDValue(Node, 1).use_empty()) {
3692  SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
3693  HiReg, NVT, InFlag);
3694  InFlag = Result.getValue(2);
3695  ReplaceUses(SDValue(Node, 1), Result);
3696  LLVM_DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG);
3697  dbgs() << '\n');
3698  }
3699  CurDAG->RemoveDeadNode(Node);
3700  return;
3701  }
3702 
3703  case X86ISD::CMP: {
3704  SDValue N0 = Node->getOperand(0);
3705  SDValue N1 = Node->getOperand(1);
3706 
3707  // Save the original VT of the compare.
3708  MVT CmpVT = N0.getSimpleValueType();
3709 
3710  // If we are comparing (and (shr X, C, Mask) with 0, emit a BEXTR followed
3711  // by a test instruction. The test should be removed later by
3712  // analyzeCompare if we are using only the zero flag.
3713  // TODO: Should we check the users and use the BEXTR flags directly?
3714  if (isNullConstant(N1) && N0.getOpcode() == ISD::AND && N0.hasOneUse()) {
3715  if (MachineSDNode *NewNode = matchBEXTRFromAndImm(N0.getNode())) {
3716  unsigned TestOpc = CmpVT == MVT::i64 ? X86::TEST64rr
3717  : X86::TEST32rr;
3718  SDValue BEXTR = SDValue(NewNode, 0);
3719  NewNode = CurDAG->getMachineNode(TestOpc, dl, MVT::i32, BEXTR, BEXTR);
3720  ReplaceUses(SDValue(Node, 0), SDValue(NewNode, 0));
3721  CurDAG->RemoveDeadNode(Node);
3722  return;
3723  }
3724  }
3725 
3726  // We can peek through truncates, but we need to be careful below.
3727  if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse())
3728  N0 = N0.getOperand(0);
3729 
3730  // Look for (X86cmp (and $op, $imm), 0) and see if we can convert it to
3731  // use a smaller encoding.
3732  // Look past the truncate if CMP is the only use of it.
3733  if (N0.getOpcode() == ISD::AND &&
3734  N0.getNode()->hasOneUse() &&
3735  N0.getValueType() != MVT::i8 &&
3736  isNullConstant(N1)) {
3738  if (!C) break;
3739  uint64_t Mask = C->getZExtValue();
3740 
3741  MVT VT;
3742  int SubRegOp;
3743  unsigned ROpc, MOpc;
3744 
3745  // For each of these checks we need to be careful if the sign flag is
3746  // being used. It is only safe to use the sign flag in two conditions,
3747  // either the sign bit in the shrunken mask is zero or the final test
3748  // size is equal to the original compare size.
3749 
3750  if (isUInt<8>(Mask) &&
3751  (!(Mask & 0x80) || CmpVT == MVT::i8 ||
3752  hasNoSignedComparisonUses(Node))) {
3753  // For example, convert "testl %eax, $8" to "testb %al, $8"
3754  VT = MVT::i8;
3755  SubRegOp = X86::sub_8bit;
3756  ROpc = X86::TEST8ri;
3757  MOpc = X86::TEST8mi;
3758  } else if (OptForMinSize && isUInt<16>(Mask) &&
3759  (!(Mask & 0x8000) || CmpVT == MVT::i16 ||
3760  hasNoSignedComparisonUses(Node))) {
3761  // For example, "testl %eax, $32776" to "testw %ax, $32776".
3762  // NOTE: We only want to form TESTW instructions if optimizing for
3763  // min size. Otherwise we only save one byte and possibly get a length
3764  // changing prefix penalty in the decoders.
3765  VT = MVT::i16;
3766  SubRegOp = X86::sub_16bit;
3767  ROpc = X86::TEST16ri;
3768  MOpc = X86::TEST16mi;
3769  } else if (isUInt<32>(Mask) && N0.getValueType() != MVT::i16 &&
3770  ((!(Mask & 0x80000000) &&
3771  // Without minsize 16-bit Cmps can get here so we need to
3772  // be sure we calculate the correct sign flag if needed.
3773  (CmpVT != MVT::i16 || !(Mask & 0x8000))) ||
3774  CmpVT == MVT::i32 || hasNoSignedComparisonUses(Node))) {
3775  // For example, "testq %rax, $268468232" to "testl %eax, $268468232".
3776  // NOTE: We only want to run that transform if N0 is 32 or 64 bits.
3777  // Otherwize, we find ourselves in a position where we have to do
3778  // promotion. If previous passes did not promote the and, we assume
3779  // they had a good reason not to and do not promote here.
3780  VT = MVT::i32;
3781  SubRegOp = X86::sub_32bit;
3782  ROpc = X86::TEST32ri;
3783  MOpc = X86::TEST32mi;
3784  } else {
3785  // No eligible transformation was found.
3786  break;
3787  }
3788 
3789  // FIXME: We should be able to fold loads here.
3790 
3791  SDValue Imm = CurDAG->getTargetConstant(Mask, dl, VT);
3792  SDValue Reg = N0.getOperand(0);
3793 
3794  // Emit a testl or testw.
3795  MachineSDNode *NewNode;
3796  SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
3797  if (tryFoldLoad(Node, N0.getNode(), Reg, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) {
3798  SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Imm,
3799  Reg.getOperand(0) };
3800  NewNode = CurDAG->getMachineNode(MOpc, dl, MVT::i32, MVT::Other, Ops);
3801  // Update the chain.
3802  ReplaceUses(Reg.getValue(1), SDValue(NewNode, 1));
3803  // Record the mem-refs
3804  CurDAG->setNodeMemRefs(NewNode,
3805  {cast<LoadSDNode>(Reg)->getMemOperand()});
3806  } else {
3807  // Extract the subregister if necessary.
3808  if (N0.getValueType() != VT)
3809  Reg = CurDAG->getTargetExtractSubreg(SubRegOp, dl, VT, Reg);
3810 
3811  NewNode = CurDAG->getMachineNode(ROpc, dl, MVT::i32, Reg, Imm);
3812  }
3813  // Replace CMP with TEST.
3814  ReplaceNode(Node, NewNode);
3815  return;
3816  }
3817  break;
3818  }
3819  case X86ISD::PCMPISTR: {
3820  if (!Subtarget->hasSSE42())
3821  break;
3822 
3823  bool NeedIndex = !SDValue(Node, 0).use_empty();
3824  bool NeedMask = !SDValue(Node, 1).use_empty();
3825  // We can't fold a load if we are going to make two instructions.
3826  bool MayFoldLoad = !NeedIndex || !NeedMask;
3827 
3828  MachineSDNode *CNode;
3829  if (NeedMask) {
3830  unsigned ROpc = Subtarget->hasAVX() ? X86::VPCMPISTRMrr : X86::PCMPISTRMrr;
3831  unsigned MOpc = Subtarget->hasAVX() ? X86::VPCMPISTRMrm : X86::PCMPISTRMrm;
3832  CNode = emitPCMPISTR(ROpc, MOpc, MayFoldLoad, dl, MVT::v16i8, Node);
3833  ReplaceUses(SDValue(Node, 1), SDValue(CNode, 0));
3834  }
3835  if (NeedIndex || !NeedMask) {
3836  unsigned ROpc = Subtarget->hasAVX() ? X86::VPCMPISTRIrr : X86::PCMPISTRIrr;
3837  unsigned MOpc = Subtarget->hasAVX() ? X86::VPCMPISTRIrm : X86::PCMPISTRIrm;
3838  CNode = emitPCMPISTR(ROpc, MOpc, MayFoldLoad, dl, MVT::i32, Node);
3839  ReplaceUses(SDValue(Node, 0), SDValue(CNode, 0));
3840  }
3841 
3842  // Connect the flag usage to the last instruction created.
3843  ReplaceUses(SDValue(Node, 2), SDValue(CNode, 1));
3844  CurDAG->RemoveDeadNode(Node);
3845  return;
3846  }
3847  case X86ISD::PCMPESTR: {
3848  if (!Subtarget->hasSSE42())
3849  break;
3850 
3851  // Copy the two implicit register inputs.
3852  SDValue InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, X86::EAX,
3853  Node->getOperand(1),
3854  SDValue()).getValue(1);
3855  InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, X86::EDX,
3856  Node->getOperand(3), InFlag).getValue(1);
3857 
3858  bool NeedIndex = !SDValue(Node, 0).use_empty();
3859  bool NeedMask = !SDValue(Node, 1).use_empty();
3860  // We can't fold a load if we are going to make two instructions.
3861  bool MayFoldLoad = !NeedIndex || !NeedMask;
3862 
3863  MachineSDNode *CNode;
3864  if (NeedMask) {
3865  unsigned ROpc = Subtarget->hasAVX() ? X86::VPCMPESTRMrr : X86::PCMPESTRMrr;
3866  unsigned MOpc = Subtarget->hasAVX() ? X86::VPCMPESTRMrm : X86::PCMPESTRMrm;
3867  CNode = emitPCMPESTR(ROpc, MOpc, MayFoldLoad, dl, MVT::v16i8, Node,
3868  InFlag);
3869  ReplaceUses(SDValue(Node, 1), SDValue(CNode, 0));
3870  }
3871  if (NeedIndex || !NeedMask) {
3872  unsigned ROpc = Subtarget->hasAVX() ? X86::VPCMPESTRIrr : X86::PCMPESTRIrr;
3873  unsigned MOpc = Subtarget->hasAVX() ? X86::VPCMPESTRIrm : X86::PCMPESTRIrm;
3874  CNode = emitPCMPESTR(ROpc, MOpc, MayFoldLoad, dl, MVT::i32, Node, InFlag);
3875  ReplaceUses(SDValue(Node, 0), SDValue(CNode, 0));
3876  }
3877  // Connect the flag usage to the last instruction created.
3878  ReplaceUses(SDValue(Node, 2), SDValue(CNode, 1));
3879  CurDAG->RemoveDeadNode(Node);
3880  return;
3881  }
3882 
3883  case ISD::STORE:
3884  if (foldLoadStoreIntoMemOperand(Node))
3885  return;
3886  break;
3887  }
3888 
3889  SelectCode(Node);
3890 }
3891 
3892 bool X86DAGToDAGISel::
3893 SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
3894  std::vector<SDValue> &OutOps) {
3895  SDValue Op0, Op1, Op2, Op3, Op4;
3896  switch (ConstraintID) {
3897  default:
3898  llvm_unreachable("Unexpected asm memory constraint");
3900  // FIXME: It seems strange that 'i' is needed here since it's supposed to
3901  // be an immediate and not a memory constraint.
3903  case InlineAsm::Constraint_o: // offsetable ??
3904  case InlineAsm::Constraint_v: // not offsetable ??
3905  case InlineAsm::Constraint_m: // memory
3907  if (!selectAddr(nullptr, Op, Op0, Op1, Op2, Op3, Op4))
3908  return true;
3909  break;
3910  }
3911 
3912  OutOps.push_back(Op0);
3913  OutOps.push_back(Op1);
3914  OutOps.push_back(Op2);
3915  OutOps.push_back(Op3);
3916  OutOps.push_back(Op4);
3917  return false;
3918 }
3919 
3920 /// This pass converts a legalized DAG into a X86-specific DAG,
3921 /// ready for instruction scheduling.
3923  CodeGenOpt::Level OptLevel) {
3924  return new X86DAGToDAGISel(TM, OptLevel);
3925 }
uint64_t CallInst * C
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:560
constexpr bool isUInt< 32 >(uint64_t x)
Definition: MathExtras.h:349
X = FP_ROUND(Y, TRUNC) - Rounding &#39;Y&#39; from a larger floating point type down to the precision of the ...
Definition: ISDOpcodes.h:527
static bool isCalleeLoad(SDValue Callee, SDValue &Chain, bool HasCallSeq)
Return true if call address is a load and it can be moved below CALLSEQ_START and the chains leading ...
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
static bool isFusableLoadOpStorePattern(StoreSDNode *StoreNode, SDValue StoredVal, SelectionDAG *CurDAG, unsigned LoadOpNo, LoadSDNode *&LoadNode, SDValue &InputChain)
Check whether or not the chain ending in StoreNode is suitable for doing the {load; op; store} to mod...
EVT getValueType() const
Return the ValueType of the referenced return value.
Vector comparison generating mask bits for fp and integer signed and unsigned data types...
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
const SDValue & getOffset() const
bool isUndef() const
APInt getSignedMax() const
Return the largest signed value contained in the ConstantRange.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
DELETED_NODE - This is an illegal value that is used to catch errors.
Definition: ISDOpcodes.h:42
Compute iterated dominance frontiers using a linear time algorithm.
Definition: AllocatorList.h:24
static void insertDAGNode(SelectionDAG &DAG, SDValue Pos, SDValue N)
static bool isLegalMaskCompare(SDNode *N, const X86Subtarget *Subtarget)
Tail call return.
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
Definition: MCSymbol.h:42
#define LLVM_FALLTHROUGH
Definition: Compiler.h:86
bool isVector() const
Return true if this is a vector value type.
const SDValue & getBasePtr() const
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
APInt zext(unsigned width) const
Zero extend to a new width.
Definition: APInt.cpp:858
bool is256BitVector() const
Return true if this is a 256-bit vector type.
bool slt(const APInt &RHS) const
Signed less than comparison.
Definition: APInt.h:1204
unsigned Reg
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:253
bool hasFastBEXTR() const
Definition: X86Subtarget.h:639
const SDValue & getChain() const
constexpr bool isInt< 8 >(int64_t x)
Definition: MathExtras.h:303
ISD::MemIndexedMode getAddressingMode() const
Return the addressing mode for this load or store: unindexed, pre-inc, pre-dec, post-inc, or post-dec.
TargetGlobalAddress - Like GlobalAddress, but the DAG does no folding or anything else with this node...
Definition: ISDOpcodes.h:131
unsigned getAlignment() const
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.h:321
bool isBitwiseNot(SDValue V)
Returns true if V is a bitwise not operation.
APInt trunc(unsigned width) const
Truncate to new width.
Definition: APInt.cpp:811
STATISTIC(NumFunctions, "Total number of functions")
C - The default llvm calling convention, compatible with C.
Definition: CallingConv.h:35
MVT getSimpleValueType(unsigned ResNo) const
Return the type of a specified result as a simple type.
F(f)
void setNodeId(int Id)
Set unique node id.
SDNode * getNode() const
get the SDNode which holds the desired result
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1 at the ...
Definition: ISDOpcodes.h:346
std::size_t countLeadingZeros(T Val, ZeroBehavior ZB=ZB_Width)
Count number of 0&#39;s from the most significant bit to the least stopping at the first 1...
Definition: MathExtras.h:189
static bool MayFoldLoad(SDValue Op)
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1509
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition: ISDOpcodes.h:159
GlobalBaseReg - On Darwin, this node represents the result of the mflr at function entry...
static bool hasPredecessorHelper(const SDNode *N, SmallPtrSetImpl< const SDNode *> &Visited, SmallVectorImpl< const SDNode *> &Worklist, unsigned int MaxSteps=0, bool TopologicalPrune=false)
Returns true if N is a predecessor of any node in Worklist.
unsigned getAddrSpace() const
Return the LLVM IR address space number that this pointer points into.
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition: ISDOpcodes.h:210
X86 compare and logical compare instructions.
void dump() const
Support for debugging, callable in GDB: V->dump()
Definition: AsmWriter.cpp:4272
The address of a basic block.
Definition: Constants.h:836
static cl::opt< bool > AndImmShrink("x86-and-imm-shrink", cl::init(true), cl::desc("Enable setting constant bits to reduce size of mask immediates"), cl::Hidden)
bool hasOneUse() const
Return true if there is exactly one use of this node.
A description of a memory reference used in the backend.
Shift and rotation operations.
Definition: ISDOpcodes.h:399
bool isNormalStore(const SDNode *N)
Returns true if the specified node is a non-truncating and unindexed store.
std::size_t countTrailingOnes(T Value, ZeroBehavior ZB=ZB_Width)
Count the number of ones from the least significant bit to the first zero bit.
Definition: MathExtras.h:478
bool isBuildVectorAllZeros(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are 0 or undef...
CallLoweringInfo & setChain(SDValue InChain)
CopyToReg - This node has three operands: a chain, a register number to set to this value...
Definition: ISDOpcodes.h:170
op_iterator op_end() const
uint64_t getConstantOperandVal(unsigned i) const
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
SimpleValueType SimpleTy
unsigned getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition: ValueTypes.h:304
CALLSEQ_START/CALLSEQ_END - These operators mark the beginning and end of a call sequence, and carry arbitrary information that target might want to know.
Definition: ISDOpcodes.h:702
Position
Position to insert a new instruction relative to an existing instruction.
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
This represents a list of ValueType&#39;s that has been intern&#39;d by a SelectionDAG.
bool hasVLX() const
Definition: X86Subtarget.h:657
unsigned getSizeInBits() const
bool runOnMachineFunction(MachineFunction &MF) override
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
bool hasExternalLinkage() const
Definition: GlobalValue.h:422
int64_t getSExtValue() const
static void InvalidateNodeId(SDNode *N)
unsigned getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:292
X86 FP SETCC, similar to above, but with output as an i1 mask and with optional rounding mode...
constexpr bool isMask_64(uint64_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
Definition: MathExtras.h:411
#define T
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition: ISDOpcodes.h:418
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:201
op_iterator op_begin() const
ArrayRef< SDUse > ops() const
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
bool use_empty() const
Return true if there are no nodes using value ResNo of Node.
This class is used to represent ISD::STORE nodes.
bool isOneConstant(SDValue V)
Returns true if V is a constant integer one.
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
bool is128BitVector() const
Return true if this is a 128-bit vector type.
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Get a value with high bits set.
Definition: APInt.h:636
#define P(N)
const SDValue & getBasePtr() const
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:419
bool isNormalLoad(const SDNode *N)
Returns true if the specified node is a non-extending and unindexed load.
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition: ISDOpcodes.h:166
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
constexpr bool isUInt< 8 >(uint64_t x)
Definition: MathExtras.h:343
std::size_t countTrailingZeros(T Val, ZeroBehavior ZB=ZB_Width)
Count number of 0&#39;s from the least significant bit to the most stopping at the first 1...
Definition: MathExtras.h:120
use_iterator use_begin() const
Provide iteration support to walk over all uses of an SDNode.
LOCAL_RECOVER - Represents the llvm.localrecover intrinsic.
Definition: ISDOpcodes.h:81
Machine Value Type.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
static bool foldMaskedShiftToScaledMask(SelectionDAG &DAG, SDValue N, uint64_t Mask, SDValue Shift, SDValue X, X86ISelAddressMode &AM)
bool hasAnyUseOfValue(unsigned Value) const
Return true if there are any use of the indicated value.
bool isMachineOpcode() const
unsigned getScalarSizeInBits() const
bool ult(const APInt &RHS) const
Unsigned less than comparison.
Definition: APInt.h:1185
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This is an important base class in LLVM.
Definition: Constant.h:42
const SDValue & getOperand(unsigned Num) const
bool isOperandOf(const SDNode *N) const
Return true if this node is an operand of N.
const SDValue & getOffset() const
APInt getUnsignedMax() const
Return the largest unsigned value contained in the ConstantRange.
static Type * getVoidTy(LLVMContext &C)
Definition: Type.cpp:161
This class provides iterator support for SDUse operands that use a specific SDNode.
unsigned getMachineOpcode() const
bool optForSize() const
Optimize this function for size (-Os) or minimum size (-Oz).
Definition: Function.h:598
void RepositionNode(allnodes_iterator Position, SDNode *N)
Move node N in the AllNodes list to be immediately before the given iterator Position.
void ReplaceAllUsesWith(SDValue From, SDValue To)
Modify anything using &#39;From&#39; to use &#39;To&#39; instead.
static bool hasNoCarryFlagUses(SDNode *N)
Test whether the given node which sets flags has any uses which require the CF flag to be accurate...
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:285
On Darwin, this node represents the result of the popl at function entry, used for PIC code...
self_iterator getIterator()
Definition: ilist_node.h:82
bool hasNUsesOfValue(unsigned NUses, unsigned Value) const
Return true if there are exactly NUSES uses of the indicated value.
static bool foldMaskAndShiftToExtract(SelectionDAG &DAG, SDValue N, uint64_t Mask, SDValue Shift, SDValue X, X86ISelAddressMode &AM)
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:545
std::vector< ArgListEntry > ArgListTy
Extended Value Type.
Definition: ValueTypes.h:34
This structure contains all information that is necessary for lowering calls.
bool isVolatile() const
APInt getSignedMin() const
Return the smallest signed value contained in the ConstantRange.
This class contains a discriminated union of information about pointers in memory operands...
bool isMachineOpcode() const
Test if this node has a post-isel opcode, directly corresponding to a MachineInstr opcode...
unsigned getNumOperands() const
Return the number of values used by this operation.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
BaseType
A given derived pointer can have multiple base pointers through phi/selects.
bool use_empty() const
Return true if there are no uses of this node.
These operations represent an abstract X86 call instruction, which includes a bunch of information...
TokenFactor - This node takes multiple tokens as input and produces a single token result...
Definition: ISDOpcodes.h:50
void dump() const
Dump this node, for debugging.
Iterator for intrusive lists based on ilist_node.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements...
Definition: SmallPtrSet.h:418
unsigned countPopulation(T Value)
Count the number of set bits in a value.
Definition: MathExtras.h:520
This is the shared class of boolean and integer constants.
Definition: Constants.h:84
static bool foldMaskedShiftToBEXTR(SelectionDAG &DAG, SDValue N, uint64_t Mask, SDValue Shift, SDValue X, X86ISelAddressMode &AM, const X86Subtarget &Subtarget)
constexpr bool isInt< 32 >(int64_t x)
Definition: MathExtras.h:309
bool isScalarFPTypeInSSEReg(EVT VT) const
Return true if the specified scalar FP type is computed in an SSE register, not on the X87 floating p...
SDNode * UpdateNodeOperands(SDNode *N, SDValue Op)
Mutate the specified node in-place to have the specified operands.
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:222
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:847
const DataFlowGraph & G
Definition: RDFGraph.cpp:211
An SDNode that represents everything that will be needed to construct a MachineInstr.
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
SelectionDAGISel - This is the common base class used for SelectionDAG-based pattern-matching instruc...
const Function & getFunction() const
Return the LLVM function that this machine code represents.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:133
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:941
static bool foldMaskAndShiftToScale(SelectionDAG &DAG, SDValue N, uint64_t Mask, SDValue Shift, SDValue X, X86ISelAddressMode &AM)
EVT getMemoryVT() const
Return the type of the in-memory value.
Target - Wrapper for Target specific information.
Class for arbitrary precision integers.
Definition: APInt.h:70
static unsigned getReg(const void *D, unsigned RC, unsigned RegNo)
bool sge(const APInt &RHS) const
Signed greater or equal comparison.
Definition: APInt.h:1309
static use_iterator use_end()
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:457
XOP - Opcode prefix used by XOP instructions.
Definition: X86BaseInfo.h:527
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:460
void append(in_iter in_start, in_iter in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:394
static bool hasSingleUsesFromRoot(SDNode *Root, SDNode *User)
bool is256BitVector() const
Return true if this is a 256-bit vector type.
Definition: ValueTypes.h:187
int getNodeId() const
Return the unique node id.
static bool isDispSafeForFrameIndex(int64_t Val)
std::pair< SDValue, SDValue > LowerCallTo(CallLoweringInfo &CLI) const
This function lowers an abstract call to a function into an actual call.
constexpr bool isShiftedMask_64(uint64_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (64 bit ver...
Definition: MathExtras.h:423
bool isVector() const
Return true if this is a vector value type.
Definition: ValueTypes.h:151
const SDValue & getValue() const
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:380
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition: ISDOpcodes.h:206
bool is128BitVector() const
Return true if this is a 128-bit vector type.
Definition: ValueTypes.h:182
A wrapper node for TargetConstantPool, TargetJumpTable, TargetExternalSymbol, TargetGlobalAddress, TargetGlobalTLSAddress, MCSymbol and TargetBlockAddress.
FunctionPass * createX86ISelDag(X86TargetMachine &TM, CodeGenOpt::Level OptLevel)
This pass converts a legalized DAG into a X86-specific DAG, ready for instruction scheduling...
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:603
StringRef getName() const
Return a constant reference to the value&#39;s name.
Definition: Value.cpp:215
#define I(x, y, z)
Definition: MD5.cpp:58
#define N
bool optForMinSize() const
Optimize this function for minimum size (-Oz).
Definition: Function.h:595
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:323
bool is512BitVector() const
Return true if this is a 512-bit vector type.
uint32_t Size
Definition: Profile.cpp:47
bool isNON_EXTLoad(const SDNode *N)
Returns true if the specified node is a non-extending load.
unsigned getOpcode() const
SDValue getValue(unsigned R) const
constexpr bool isUInt< 16 >(uint64_t x)
Definition: MathExtras.h:346
static bool hasNoSignedComparisonUses(SDNode *N)
Test whether the given X86ISD::CMP node has any uses which require the SF or OF bits to be accurate...
CopyFromReg - This node indicates that the input value is a virtual or physical register that is defi...
Definition: ISDOpcodes.h:175
SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
const MachinePointerInfo & getPointerInfo() const
This class is used to form a handle around another node that is persistent and is updated across invo...
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
unsigned getReg() const
unsigned getMinSignedBits() const
Get the minimum bit size for this signed APInt.
Definition: APInt.h:1552
bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
bool hasBMI() const
Definition: X86Subtarget.h:592
uint64_t getConstantOperandVal(unsigned Num) const
Helper method returns the integer value of a ConstantSDNode operand.
unsigned getResNo() const
get the index which selects a specific result in the SDNode
Dynamic (non-constant condition) vector blend where only the sign bits of the condition elements are ...
unsigned getMachineOpcode() const
This may only be called if isMachineOpcode returns true.
bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
std::underlying_type< E >::type Mask()
Get a bitmask with 1s in all places up to the high-order bit of E&#39;s largest value.
Definition: BitmaskEnum.h:81
bool isNonTemporal() const
IRTranslator LLVM IR MI
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:49
SetCC operator - This evaluates to a true value iff the condition is true.
Definition: ISDOpcodes.h:432
unsigned countLeadingZeros() const
The APInt version of the countLeadingZeros functions in MathExtras.h.
Definition: APInt.h:1596
KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
bool isOffsetSuitableForCodeModel(int64_t Offset, CodeModel::Model M, bool hasSymbolicDisplacement=true)
Returns true of the given offset can be fit into displacement field of the instruction.
unsigned getNumOperands() const
const SDValue & getOperand(unsigned i) const
uint64_t getZExtValue() const
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:463
#define LLVM_DEBUG(X)
Definition: Debug.h:123
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation...
bool hasTBM() const
Definition: X86Subtarget.h:585
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition: ISDOpcodes.h:368
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
static void moveBelowOrigChain(SelectionDAG *CurDAG, SDValue Load, SDValue Call, SDValue OrigChain)
Replace the original chain operand of the call with load&#39;s chain operand and move load below the call...
Carry-using nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:242
Special wrapper used under X86-64 PIC mode for RIP relative displacements.
BRIND - Indirect branch.
Definition: ISDOpcodes.h:623
This class is used to represent ISD::LOAD nodes.
static int getUninvalidatedNodeId(SDNode *N)