LLVM  7.0.0svn
X86ISelDAGToDAG.cpp
Go to the documentation of this file.
1 //===- X86ISelDAGToDAG.cpp - A DAG pattern matching inst selector for X86 -===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file defines a DAG pattern matching instruction selector for X86,
11 // converting from a legalized dag to a X86 dag.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "X86.h"
16 #include "X86MachineFunctionInfo.h"
17 #include "X86RegisterInfo.h"
18 #include "X86Subtarget.h"
19 #include "X86TargetMachine.h"
20 #include "llvm/ADT/Statistic.h"
24 #include "llvm/Config/llvm-config.h"
25 #include "llvm/IR/ConstantRange.h"
26 #include "llvm/IR/Function.h"
27 #include "llvm/IR/Instructions.h"
28 #include "llvm/IR/Intrinsics.h"
29 #include "llvm/IR/Type.h"
30 #include "llvm/Support/Debug.h"
32 #include "llvm/Support/KnownBits.h"
37 #include <stdint.h>
38 using namespace llvm;
39 
40 #define DEBUG_TYPE "x86-isel"
41 
42 STATISTIC(NumLoadMoved, "Number of loads moved below TokenFactor");
43 
44 //===----------------------------------------------------------------------===//
45 // Pattern Matcher Implementation
46 //===----------------------------------------------------------------------===//
47 
48 namespace {
49  /// This corresponds to X86AddressMode, but uses SDValue's instead of register
50  /// numbers for the leaves of the matched tree.
51  struct X86ISelAddressMode {
52  enum {
53  RegBase,
54  FrameIndexBase
55  } BaseType;
56 
57  // This is really a union, discriminated by BaseType!
58  SDValue Base_Reg;
59  int Base_FrameIndex;
60 
61  unsigned Scale;
62  SDValue IndexReg;
63  int32_t Disp;
64  SDValue Segment;
65  const GlobalValue *GV;
66  const Constant *CP;
67  const BlockAddress *BlockAddr;
68  const char *ES;
69  MCSymbol *MCSym;
70  int JT;
71  unsigned Align; // CP alignment.
72  unsigned char SymbolFlags; // X86II::MO_*
73 
74  X86ISelAddressMode()
75  : BaseType(RegBase), Base_FrameIndex(0), Scale(1), IndexReg(), Disp(0),
76  Segment(), GV(nullptr), CP(nullptr), BlockAddr(nullptr), ES(nullptr),
77  MCSym(nullptr), JT(-1), Align(0), SymbolFlags(X86II::MO_NO_FLAG) {}
78 
79  bool hasSymbolicDisplacement() const {
80  return GV != nullptr || CP != nullptr || ES != nullptr ||
81  MCSym != nullptr || JT != -1 || BlockAddr != nullptr;
82  }
83 
84  bool hasBaseOrIndexReg() const {
85  return BaseType == FrameIndexBase ||
86  IndexReg.getNode() != nullptr || Base_Reg.getNode() != nullptr;
87  }
88 
89  /// Return true if this addressing mode is already RIP-relative.
90  bool isRIPRelative() const {
91  if (BaseType != RegBase) return false;
92  if (RegisterSDNode *RegNode =
93  dyn_cast_or_null<RegisterSDNode>(Base_Reg.getNode()))
94  return RegNode->getReg() == X86::RIP;
95  return false;
96  }
97 
98  void setBaseReg(SDValue Reg) {
99  BaseType = RegBase;
100  Base_Reg = Reg;
101  }
102 
103 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
104  void dump(SelectionDAG *DAG = nullptr) {
105  dbgs() << "X86ISelAddressMode " << this << '\n';
106  dbgs() << "Base_Reg ";
107  if (Base_Reg.getNode())
108  Base_Reg.getNode()->dump(DAG);
109  else
110  dbgs() << "nul\n";
111  if (BaseType == FrameIndexBase)
112  dbgs() << " Base.FrameIndex " << Base_FrameIndex << '\n';
113  dbgs() << " Scale " << Scale << '\n'
114  << "IndexReg ";
115  if (IndexReg.getNode())
116  IndexReg.getNode()->dump(DAG);
117  else
118  dbgs() << "nul\n";
119  dbgs() << " Disp " << Disp << '\n'
120  << "GV ";
121  if (GV)
122  GV->dump();
123  else
124  dbgs() << "nul";
125  dbgs() << " CP ";
126  if (CP)
127  CP->dump();
128  else
129  dbgs() << "nul";
130  dbgs() << '\n'
131  << "ES ";
132  if (ES)
133  dbgs() << ES;
134  else
135  dbgs() << "nul";
136  dbgs() << " MCSym ";
137  if (MCSym)
138  dbgs() << MCSym;
139  else
140  dbgs() << "nul";
141  dbgs() << " JT" << JT << " Align" << Align << '\n';
142  }
143 #endif
144  };
145 }
146 
147 namespace {
148  //===--------------------------------------------------------------------===//
149  /// ISel - X86-specific code to select X86 machine instructions for
150  /// SelectionDAG operations.
151  ///
152  class X86DAGToDAGISel final : public SelectionDAGISel {
153  /// Keep a pointer to the X86Subtarget around so that we can
154  /// make the right decision when generating code for different targets.
155  const X86Subtarget *Subtarget;
156 
157  /// If true, selector should try to optimize for code size instead of
158  /// performance.
159  bool OptForSize;
160 
161  /// If true, selector should try to optimize for minimum code size.
162  bool OptForMinSize;
163 
164  public:
165  explicit X86DAGToDAGISel(X86TargetMachine &tm, CodeGenOpt::Level OptLevel)
166  : SelectionDAGISel(tm, OptLevel), OptForSize(false),
167  OptForMinSize(false) {}
168 
169  StringRef getPassName() const override {
170  return "X86 DAG->DAG Instruction Selection";
171  }
172 
173  bool runOnMachineFunction(MachineFunction &MF) override {
174  // Reset the subtarget each time through.
175  Subtarget = &MF.getSubtarget<X86Subtarget>();
177  return true;
178  }
179 
180  void EmitFunctionEntryCode() override;
181 
182  bool IsProfitableToFold(SDValue N, SDNode *U, SDNode *Root) const override;
183 
184  void PreprocessISelDAG() override;
185  void PostprocessISelDAG() override;
186 
187 // Include the pieces autogenerated from the target description.
188 #include "X86GenDAGISel.inc"
189 
190  private:
191  void Select(SDNode *N) override;
192 
193  bool foldOffsetIntoAddress(uint64_t Offset, X86ISelAddressMode &AM);
194  bool matchLoadInAddress(LoadSDNode *N, X86ISelAddressMode &AM);
195  bool matchWrapper(SDValue N, X86ISelAddressMode &AM);
196  bool matchAddress(SDValue N, X86ISelAddressMode &AM);
197  bool matchVectorAddress(SDValue N, X86ISelAddressMode &AM);
198  bool matchAdd(SDValue N, X86ISelAddressMode &AM, unsigned Depth);
199  bool matchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
200  unsigned Depth);
201  bool matchAddressBase(SDValue N, X86ISelAddressMode &AM);
202  bool selectAddr(SDNode *Parent, SDValue N, SDValue &Base,
203  SDValue &Scale, SDValue &Index, SDValue &Disp,
204  SDValue &Segment);
205  bool selectVectorAddr(SDNode *Parent, SDValue N, SDValue &Base,
206  SDValue &Scale, SDValue &Index, SDValue &Disp,
207  SDValue &Segment);
208  bool selectMOV64Imm32(SDValue N, SDValue &Imm);
209  bool selectLEAAddr(SDValue N, SDValue &Base,
210  SDValue &Scale, SDValue &Index, SDValue &Disp,
211  SDValue &Segment);
212  bool selectLEA64_32Addr(SDValue N, SDValue &Base,
213  SDValue &Scale, SDValue &Index, SDValue &Disp,
214  SDValue &Segment);
215  bool selectTLSADDRAddr(SDValue N, SDValue &Base,
216  SDValue &Scale, SDValue &Index, SDValue &Disp,
217  SDValue &Segment);
218  bool selectScalarSSELoad(SDNode *Root, SDNode *Parent, SDValue N,
219  SDValue &Base, SDValue &Scale,
220  SDValue &Index, SDValue &Disp,
221  SDValue &Segment,
222  SDValue &NodeWithChain);
223  bool selectRelocImm(SDValue N, SDValue &Op);
224 
225  bool tryFoldLoad(SDNode *Root, SDNode *P, SDValue N,
226  SDValue &Base, SDValue &Scale,
227  SDValue &Index, SDValue &Disp,
228  SDValue &Segment);
229 
230  // Convenience method where P is also root.
231  bool tryFoldLoad(SDNode *P, SDValue N,
232  SDValue &Base, SDValue &Scale,
233  SDValue &Index, SDValue &Disp,
234  SDValue &Segment) {
235  return tryFoldLoad(P, P, N, Base, Scale, Index, Disp, Segment);
236  }
237 
238  // Try to fold a vector load. This makes sure the load isn't non-temporal.
239  bool tryFoldVecLoad(SDNode *Root, SDNode *P, SDValue N,
240  SDValue &Base, SDValue &Scale,
241  SDValue &Index, SDValue &Disp,
242  SDValue &Segment);
243 
244  /// Implement addressing mode selection for inline asm expressions.
245  bool SelectInlineAsmMemoryOperand(const SDValue &Op,
246  unsigned ConstraintID,
247  std::vector<SDValue> &OutOps) override;
248 
249  void emitSpecialCodeForMain();
250 
251  inline void getAddressOperands(X86ISelAddressMode &AM, const SDLoc &DL,
252  SDValue &Base, SDValue &Scale,
253  SDValue &Index, SDValue &Disp,
254  SDValue &Segment) {
255  Base = (AM.BaseType == X86ISelAddressMode::FrameIndexBase)
256  ? CurDAG->getTargetFrameIndex(
257  AM.Base_FrameIndex,
258  TLI->getPointerTy(CurDAG->getDataLayout()))
259  : AM.Base_Reg;
260  Scale = getI8Imm(AM.Scale, DL);
261  Index = AM.IndexReg;
262  // These are 32-bit even in 64-bit mode since RIP-relative offset
263  // is 32-bit.
264  if (AM.GV)
265  Disp = CurDAG->getTargetGlobalAddress(AM.GV, SDLoc(),
266  MVT::i32, AM.Disp,
267  AM.SymbolFlags);
268  else if (AM.CP)
269  Disp = CurDAG->getTargetConstantPool(AM.CP, MVT::i32,
270  AM.Align, AM.Disp, AM.SymbolFlags);
271  else if (AM.ES) {
272  assert(!AM.Disp && "Non-zero displacement is ignored with ES.");
273  Disp = CurDAG->getTargetExternalSymbol(AM.ES, MVT::i32, AM.SymbolFlags);
274  } else if (AM.MCSym) {
275  assert(!AM.Disp && "Non-zero displacement is ignored with MCSym.");
276  assert(AM.SymbolFlags == 0 && "oo");
277  Disp = CurDAG->getMCSymbol(AM.MCSym, MVT::i32);
278  } else if (AM.JT != -1) {
279  assert(!AM.Disp && "Non-zero displacement is ignored with JT.");
280  Disp = CurDAG->getTargetJumpTable(AM.JT, MVT::i32, AM.SymbolFlags);
281  } else if (AM.BlockAddr)
282  Disp = CurDAG->getTargetBlockAddress(AM.BlockAddr, MVT::i32, AM.Disp,
283  AM.SymbolFlags);
284  else
285  Disp = CurDAG->getTargetConstant(AM.Disp, DL, MVT::i32);
286 
287  if (AM.Segment.getNode())
288  Segment = AM.Segment;
289  else
290  Segment = CurDAG->getRegister(0, MVT::i32);
291  }
292 
293  // Utility function to determine whether we should avoid selecting
294  // immediate forms of instructions for better code size or not.
295  // At a high level, we'd like to avoid such instructions when
296  // we have similar constants used within the same basic block
297  // that can be kept in a register.
298  //
299  bool shouldAvoidImmediateInstFormsForSize(SDNode *N) const {
300  uint32_t UseCount = 0;
301 
302  // Do not want to hoist if we're not optimizing for size.
303  // TODO: We'd like to remove this restriction.
304  // See the comment in X86InstrInfo.td for more info.
305  if (!OptForSize)
306  return false;
307 
308  // Walk all the users of the immediate.
309  for (SDNode::use_iterator UI = N->use_begin(),
310  UE = N->use_end(); (UI != UE) && (UseCount < 2); ++UI) {
311 
312  SDNode *User = *UI;
313 
314  // This user is already selected. Count it as a legitimate use and
315  // move on.
316  if (User->isMachineOpcode()) {
317  UseCount++;
318  continue;
319  }
320 
321  // We want to count stores of immediates as real uses.
322  if (User->getOpcode() == ISD::STORE &&
323  User->getOperand(1).getNode() == N) {
324  UseCount++;
325  continue;
326  }
327 
328  // We don't currently match users that have > 2 operands (except
329  // for stores, which are handled above)
330  // Those instruction won't match in ISEL, for now, and would
331  // be counted incorrectly.
332  // This may change in the future as we add additional instruction
333  // types.
334  if (User->getNumOperands() != 2)
335  continue;
336 
337  // Immediates that are used for offsets as part of stack
338  // manipulation should be left alone. These are typically
339  // used to indicate SP offsets for argument passing and
340  // will get pulled into stores/pushes (implicitly).
341  if (User->getOpcode() == X86ISD::ADD ||
342  User->getOpcode() == ISD::ADD ||
343  User->getOpcode() == X86ISD::SUB ||
344  User->getOpcode() == ISD::SUB) {
345 
346  // Find the other operand of the add/sub.
347  SDValue OtherOp = User->getOperand(0);
348  if (OtherOp.getNode() == N)
349  OtherOp = User->getOperand(1);
350 
351  // Don't count if the other operand is SP.
352  RegisterSDNode *RegNode;
353  if (OtherOp->getOpcode() == ISD::CopyFromReg &&
354  (RegNode = dyn_cast_or_null<RegisterSDNode>(
355  OtherOp->getOperand(1).getNode())))
356  if ((RegNode->getReg() == X86::ESP) ||
357  (RegNode->getReg() == X86::RSP))
358  continue;
359  }
360 
361  // ... otherwise, count this and move on.
362  UseCount++;
363  }
364 
365  // If we have more than 1 use, then recommend for hoisting.
366  return (UseCount > 1);
367  }
368 
369  /// Return a target constant with the specified value of type i8.
370  inline SDValue getI8Imm(unsigned Imm, const SDLoc &DL) {
371  return CurDAG->getTargetConstant(Imm, DL, MVT::i8);
372  }
373 
374  /// Return a target constant with the specified value, of type i32.
375  inline SDValue getI32Imm(unsigned Imm, const SDLoc &DL) {
376  return CurDAG->getTargetConstant(Imm, DL, MVT::i32);
377  }
378 
379  /// Return a target constant with the specified value, of type i64.
380  inline SDValue getI64Imm(uint64_t Imm, const SDLoc &DL) {
381  return CurDAG->getTargetConstant(Imm, DL, MVT::i64);
382  }
383 
384  SDValue getExtractVEXTRACTImmediate(SDNode *N, unsigned VecWidth,
385  const SDLoc &DL) {
386  assert((VecWidth == 128 || VecWidth == 256) && "Unexpected vector width");
387  uint64_t Index = N->getConstantOperandVal(1);
388  MVT VecVT = N->getOperand(0).getSimpleValueType();
389  return getI8Imm((Index * VecVT.getScalarSizeInBits()) / VecWidth, DL);
390  }
391 
392  SDValue getInsertVINSERTImmediate(SDNode *N, unsigned VecWidth,
393  const SDLoc &DL) {
394  assert((VecWidth == 128 || VecWidth == 256) && "Unexpected vector width");
395  uint64_t Index = N->getConstantOperandVal(2);
396  MVT VecVT = N->getSimpleValueType(0);
397  return getI8Imm((Index * VecVT.getScalarSizeInBits()) / VecWidth, DL);
398  }
399 
400  /// Return an SDNode that returns the value of the global base register.
401  /// Output instructions required to initialize the global base register,
402  /// if necessary.
403  SDNode *getGlobalBaseReg();
404 
405  /// Return a reference to the TargetMachine, casted to the target-specific
406  /// type.
407  const X86TargetMachine &getTargetMachine() const {
408  return static_cast<const X86TargetMachine &>(TM);
409  }
410 
411  /// Return a reference to the TargetInstrInfo, casted to the target-specific
412  /// type.
413  const X86InstrInfo *getInstrInfo() const {
414  return Subtarget->getInstrInfo();
415  }
416 
417  /// Address-mode matching performs shift-of-and to and-of-shift
418  /// reassociation in order to expose more scaled addressing
419  /// opportunities.
420  bool ComplexPatternFuncMutatesDAG() const override {
421  return true;
422  }
423 
424  bool isSExtAbsoluteSymbolRef(unsigned Width, SDNode *N) const;
425 
426  /// Returns whether this is a relocatable immediate in the range
427  /// [-2^Width .. 2^Width-1].
428  template <unsigned Width> bool isSExtRelocImm(SDNode *N) const {
429  if (auto *CN = dyn_cast<ConstantSDNode>(N))
430  return isInt<Width>(CN->getSExtValue());
431  return isSExtAbsoluteSymbolRef(Width, N);
432  }
433 
434  // Indicates we should prefer to use a non-temporal load for this load.
435  bool useNonTemporalLoad(LoadSDNode *N) const {
436  if (!N->isNonTemporal())
437  return false;
438 
439  unsigned StoreSize = N->getMemoryVT().getStoreSize();
440 
441  if (N->getAlignment() < StoreSize)
442  return false;
443 
444  switch (StoreSize) {
445  default: llvm_unreachable("Unsupported store size");
446  case 16:
447  return Subtarget->hasSSE41();
448  case 32:
449  return Subtarget->hasAVX2();
450  case 64:
451  return Subtarget->hasAVX512();
452  }
453  }
454 
455  bool foldLoadStoreIntoMemOperand(SDNode *Node);
456  bool matchBEXTRFromAnd(SDNode *Node);
457  bool shrinkAndImmediate(SDNode *N);
458  bool isMaskZeroExtended(SDNode *N) const;
459 
460  MachineSDNode *emitPCMPISTR(unsigned ROpc, unsigned MOpc, bool MayFoldLoad,
461  const SDLoc &dl, MVT VT, SDNode *Node);
462  MachineSDNode *emitPCMPESTR(unsigned ROpc, unsigned MOpc, bool MayFoldLoad,
463  const SDLoc &dl, MVT VT, SDNode *Node,
464  SDValue &InFlag);
465  };
466 }
467 
468 
469 // Returns true if this masked compare can be implemented legally with this
470 // type.
471 static bool isLegalMaskCompare(SDNode *N, const X86Subtarget *Subtarget) {
472  unsigned Opcode = N->getOpcode();
473  if (Opcode == X86ISD::CMPM || Opcode == ISD::SETCC ||
474  Opcode == X86ISD::CMPM_RND || Opcode == X86ISD::VFPCLASS) {
475  // We can get 256-bit 8 element types here without VLX being enabled. When
476  // this happens we will use 512-bit operations and the mask will not be
477  // zero extended.
478  EVT OpVT = N->getOperand(0).getValueType();
479  if (OpVT.is256BitVector() || OpVT.is128BitVector())
480  return Subtarget->hasVLX();
481 
482  return true;
483  }
484  // Scalar opcodes use 128 bit registers, but aren't subject to the VLX check.
485  if (Opcode == X86ISD::VFPCLASSS || Opcode == X86ISD::FSETCCM ||
486  Opcode == X86ISD::FSETCCM_RND)
487  return true;
488 
489  return false;
490 }
491 
492 // Returns true if we can assume the writer of the mask has zero extended it
493 // for us.
494 bool X86DAGToDAGISel::isMaskZeroExtended(SDNode *N) const {
495  // If this is an AND, check if we have a compare on either side. As long as
496  // one side guarantees the mask is zero extended, the AND will preserve those
497  // zeros.
498  if (N->getOpcode() == ISD::AND)
499  return isLegalMaskCompare(N->getOperand(0).getNode(), Subtarget) ||
500  isLegalMaskCompare(N->getOperand(1).getNode(), Subtarget);
501 
502  return isLegalMaskCompare(N, Subtarget);
503 }
504 
505 bool
506 X86DAGToDAGISel::IsProfitableToFold(SDValue N, SDNode *U, SDNode *Root) const {
507  if (OptLevel == CodeGenOpt::None) return false;
508 
509  if (!N.hasOneUse())
510  return false;
511 
512  if (N.getOpcode() != ISD::LOAD)
513  return true;
514 
515  // If N is a load, do additional profitability checks.
516  if (U == Root) {
517  switch (U->getOpcode()) {
518  default: break;
519  case X86ISD::ADD:
520  case X86ISD::SUB:
521  case X86ISD::AND:
522  case X86ISD::XOR:
523  case X86ISD::OR:
524  case ISD::ADD:
525  case ISD::ADDCARRY:
526  case ISD::AND:
527  case ISD::OR:
528  case ISD::XOR: {
529  SDValue Op1 = U->getOperand(1);
530 
531  // If the other operand is a 8-bit immediate we should fold the immediate
532  // instead. This reduces code size.
533  // e.g.
534  // movl 4(%esp), %eax
535  // addl $4, %eax
536  // vs.
537  // movl $4, %eax
538  // addl 4(%esp), %eax
539  // The former is 2 bytes shorter. In case where the increment is 1, then
540  // the saving can be 4 bytes (by using incl %eax).
541  if (ConstantSDNode *Imm = dyn_cast<ConstantSDNode>(Op1)) {
542  if (Imm->getAPIntValue().isSignedIntN(8))
543  return false;
544 
545  // If this is a 64-bit AND with an immediate that fits in 32-bits,
546  // prefer using the smaller and over folding the load. This is needed to
547  // make sure immediates created by shrinkAndImmediate are always folded.
548  // Ideally we would narrow the load during DAG combine and get the
549  // best of both worlds.
550  if (U->getOpcode() == ISD::AND &&
551  Imm->getAPIntValue().getBitWidth() == 64 &&
552  Imm->getAPIntValue().isIntN(32))
553  return false;
554  }
555 
556  // If the other operand is a TLS address, we should fold it instead.
557  // This produces
558  // movl %gs:0, %eax
559  // leal i@NTPOFF(%eax), %eax
560  // instead of
561  // movl $i@NTPOFF, %eax
562  // addl %gs:0, %eax
563  // if the block also has an access to a second TLS address this will save
564  // a load.
565  // FIXME: This is probably also true for non-TLS addresses.
566  if (Op1.getOpcode() == X86ISD::Wrapper) {
567  SDValue Val = Op1.getOperand(0);
569  return false;
570  }
571 
572  // Don't fold load if this matches the BTS/BTR/BTC patterns.
573  // BTS: (or X, (shl 1, n))
574  // BTR: (and X, (rotl -2, n))
575  // BTC: (xor X, (shl 1, n))
576  if (U->getOpcode() == ISD::OR || U->getOpcode() == ISD::XOR) {
577  if (U->getOperand(0).getOpcode() == ISD::SHL &&
579  return false;
580 
581  if (U->getOperand(1).getOpcode() == ISD::SHL &&
583  return false;
584  }
585  if (U->getOpcode() == ISD::AND) {
586  SDValue U0 = U->getOperand(0);
587  SDValue U1 = U->getOperand(1);
588  if (U0.getOpcode() == ISD::ROTL) {
589  auto *C = dyn_cast<ConstantSDNode>(U0.getOperand(0));
590  if (C && C->getSExtValue() == -2)
591  return false;
592  }
593 
594  if (U1.getOpcode() == ISD::ROTL) {
595  auto *C = dyn_cast<ConstantSDNode>(U1.getOperand(0));
596  if (C && C->getSExtValue() == -2)
597  return false;
598  }
599  }
600 
601  break;
602  }
603  case ISD::SHL:
604  case ISD::SRA:
605  case ISD::SRL:
606  // Don't fold a load into a shift by immediate. The BMI2 instructions
607  // support folding a load, but not an immediate. The legacy instructions
608  // support folding an immediate, but can't fold a load. Folding an
609  // immediate is preferable to folding a load.
610  if (isa<ConstantSDNode>(U->getOperand(1)))
611  return false;
612 
613  break;
614  }
615  }
616 
617  // Prevent folding a load if this can implemented with an insert_subreg or
618  // a move that implicitly zeroes.
619  if (Root->getOpcode() == ISD::INSERT_SUBVECTOR &&
620  isNullConstant(Root->getOperand(2)) &&
621  (Root->getOperand(0).isUndef() ||
623  return false;
624 
625  return true;
626 }
627 
628 /// Replace the original chain operand of the call with
629 /// load's chain operand and move load below the call's chain operand.
631  SDValue Call, SDValue OrigChain) {
633  SDValue Chain = OrigChain.getOperand(0);
634  if (Chain.getNode() == Load.getNode())
635  Ops.push_back(Load.getOperand(0));
636  else {
637  assert(Chain.getOpcode() == ISD::TokenFactor &&
638  "Unexpected chain operand");
639  for (unsigned i = 0, e = Chain.getNumOperands(); i != e; ++i)
640  if (Chain.getOperand(i).getNode() == Load.getNode())
641  Ops.push_back(Load.getOperand(0));
642  else
643  Ops.push_back(Chain.getOperand(i));
644  SDValue NewChain =
645  CurDAG->getNode(ISD::TokenFactor, SDLoc(Load), MVT::Other, Ops);
646  Ops.clear();
647  Ops.push_back(NewChain);
648  }
649  Ops.append(OrigChain->op_begin() + 1, OrigChain->op_end());
650  CurDAG->UpdateNodeOperands(OrigChain.getNode(), Ops);
651  CurDAG->UpdateNodeOperands(Load.getNode(), Call.getOperand(0),
652  Load.getOperand(1), Load.getOperand(2));
653 
654  Ops.clear();
655  Ops.push_back(SDValue(Load.getNode(), 1));
656  Ops.append(Call->op_begin() + 1, Call->op_end());
657  CurDAG->UpdateNodeOperands(Call.getNode(), Ops);
658 }
659 
660 /// Return true if call address is a load and it can be
661 /// moved below CALLSEQ_START and the chains leading up to the call.
662 /// Return the CALLSEQ_START by reference as a second output.
663 /// In the case of a tail call, there isn't a callseq node between the call
664 /// chain and the load.
665 static bool isCalleeLoad(SDValue Callee, SDValue &Chain, bool HasCallSeq) {
666  // The transformation is somewhat dangerous if the call's chain was glued to
667  // the call. After MoveBelowOrigChain the load is moved between the call and
668  // the chain, this can create a cycle if the load is not folded. So it is
669  // *really* important that we are sure the load will be folded.
670  if (Callee.getNode() == Chain.getNode() || !Callee.hasOneUse())
671  return false;
672  LoadSDNode *LD = dyn_cast<LoadSDNode>(Callee.getNode());
673  if (!LD ||
674  LD->isVolatile() ||
677  return false;
678 
679  // Now let's find the callseq_start.
680  while (HasCallSeq && Chain.getOpcode() != ISD::CALLSEQ_START) {
681  if (!Chain.hasOneUse())
682  return false;
683  Chain = Chain.getOperand(0);
684  }
685 
686  if (!Chain.getNumOperands())
687  return false;
688  // Since we are not checking for AA here, conservatively abort if the chain
689  // writes to memory. It's not safe to move the callee (a load) across a store.
690  if (isa<MemSDNode>(Chain.getNode()) &&
691  cast<MemSDNode>(Chain.getNode())->writeMem())
692  return false;
693  if (Chain.getOperand(0).getNode() == Callee.getNode())
694  return true;
695  if (Chain.getOperand(0).getOpcode() == ISD::TokenFactor &&
696  Callee.getValue(1).isOperandOf(Chain.getOperand(0).getNode()) &&
697  Callee.getValue(1).hasOneUse())
698  return true;
699  return false;
700 }
701 
702 void X86DAGToDAGISel::PreprocessISelDAG() {
703  // OptFor[Min]Size are used in pattern predicates that isel is matching.
704  OptForSize = MF->getFunction().optForSize();
705  OptForMinSize = MF->getFunction().optForMinSize();
706  assert((!OptForMinSize || OptForSize) && "OptForMinSize implies OptForSize");
707 
708  for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(),
709  E = CurDAG->allnodes_end(); I != E; ) {
710  SDNode *N = &*I++; // Preincrement iterator to avoid invalidation issues.
711 
712  // If this is a target specific AND node with no flag usages, turn it back
713  // into ISD::AND to enable test instruction matching.
714  if (N->getOpcode() == X86ISD::AND && !N->hasAnyUseOfValue(1)) {
715  SDValue Res = CurDAG->getNode(ISD::AND, SDLoc(N), N->getValueType(0),
716  N->getOperand(0), N->getOperand(1));
717  --I;
718  CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Res);
719  ++I;
720  CurDAG->DeleteNode(N);
721  continue;
722  }
723 
724  if (OptLevel != CodeGenOpt::None &&
725  // Only do this when the target can fold the load into the call or
726  // jmp.
727  !Subtarget->useRetpoline() &&
728  ((N->getOpcode() == X86ISD::CALL && !Subtarget->slowTwoMemOps()) ||
729  (N->getOpcode() == X86ISD::TC_RETURN &&
730  (Subtarget->is64Bit() ||
731  !getTargetMachine().isPositionIndependent())))) {
732  /// Also try moving call address load from outside callseq_start to just
733  /// before the call to allow it to be folded.
734  ///
735  /// [Load chain]
736  /// ^
737  /// |
738  /// [Load]
739  /// ^ ^
740  /// | |
741  /// / \--
742  /// / |
743  ///[CALLSEQ_START] |
744  /// ^ |
745  /// | |
746  /// [LOAD/C2Reg] |
747  /// | |
748  /// \ /
749  /// \ /
750  /// [CALL]
751  bool HasCallSeq = N->getOpcode() == X86ISD::CALL;
752  SDValue Chain = N->getOperand(0);
753  SDValue Load = N->getOperand(1);
754  if (!isCalleeLoad(Load, Chain, HasCallSeq))
755  continue;
756  moveBelowOrigChain(CurDAG, Load, SDValue(N, 0), Chain);
757  ++NumLoadMoved;
758  continue;
759  }
760 
761  // Lower fpround and fpextend nodes that target the FP stack to be store and
762  // load to the stack. This is a gross hack. We would like to simply mark
763  // these as being illegal, but when we do that, legalize produces these when
764  // it expands calls, then expands these in the same legalize pass. We would
765  // like dag combine to be able to hack on these between the call expansion
766  // and the node legalization. As such this pass basically does "really
767  // late" legalization of these inline with the X86 isel pass.
768  // FIXME: This should only happen when not compiled with -O0.
769  if (N->getOpcode() != ISD::FP_ROUND && N->getOpcode() != ISD::FP_EXTEND)
770  continue;
771 
772  MVT SrcVT = N->getOperand(0).getSimpleValueType();
773  MVT DstVT = N->getSimpleValueType(0);
774 
775  // If any of the sources are vectors, no fp stack involved.
776  if (SrcVT.isVector() || DstVT.isVector())
777  continue;
778 
779  // If the source and destination are SSE registers, then this is a legal
780  // conversion that should not be lowered.
781  const X86TargetLowering *X86Lowering =
782  static_cast<const X86TargetLowering *>(TLI);
783  bool SrcIsSSE = X86Lowering->isScalarFPTypeInSSEReg(SrcVT);
784  bool DstIsSSE = X86Lowering->isScalarFPTypeInSSEReg(DstVT);
785  if (SrcIsSSE && DstIsSSE)
786  continue;
787 
788  if (!SrcIsSSE && !DstIsSSE) {
789  // If this is an FPStack extension, it is a noop.
790  if (N->getOpcode() == ISD::FP_EXTEND)
791  continue;
792  // If this is a value-preserving FPStack truncation, it is a noop.
793  if (N->getConstantOperandVal(1))
794  continue;
795  }
796 
797  // Here we could have an FP stack truncation or an FPStack <-> SSE convert.
798  // FPStack has extload and truncstore. SSE can fold direct loads into other
799  // operations. Based on this, decide what we want to do.
800  MVT MemVT;
801  if (N->getOpcode() == ISD::FP_ROUND)
802  MemVT = DstVT; // FP_ROUND must use DstVT, we can't do a 'trunc load'.
803  else
804  MemVT = SrcIsSSE ? SrcVT : DstVT;
805 
806  SDValue MemTmp = CurDAG->CreateStackTemporary(MemVT);
807  SDLoc dl(N);
808 
809  // FIXME: optimize the case where the src/dest is a load or store?
810  SDValue Store =
811  CurDAG->getTruncStore(CurDAG->getEntryNode(), dl, N->getOperand(0),
812  MemTmp, MachinePointerInfo(), MemVT);
813  SDValue Result = CurDAG->getExtLoad(ISD::EXTLOAD, dl, DstVT, Store, MemTmp,
814  MachinePointerInfo(), MemVT);
815 
816  // We're about to replace all uses of the FP_ROUND/FP_EXTEND with the
817  // extload we created. This will cause general havok on the dag because
818  // anything below the conversion could be folded into other existing nodes.
819  // To avoid invalidating 'I', back it up to the convert node.
820  --I;
821  CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
822 
823  // Now that we did that, the node is dead. Increment the iterator to the
824  // next node to process, then delete N.
825  ++I;
826  CurDAG->DeleteNode(N);
827  }
828 }
829 
830 
831 void X86DAGToDAGISel::PostprocessISelDAG() {
832  // Skip peepholes at -O0.
833  if (TM.getOptLevel() == CodeGenOpt::None)
834  return;
835 
836  // Attempt to remove vectors moves that were inserted to zero upper bits.
837 
838  SelectionDAG::allnodes_iterator Position(CurDAG->getRoot().getNode());
839  ++Position;
840 
841  while (Position != CurDAG->allnodes_begin()) {
842  SDNode *N = &*--Position;
843  // Skip dead nodes and any non-machine opcodes.
844  if (N->use_empty() || !N->isMachineOpcode())
845  continue;
846 
847  if (N->getMachineOpcode() != TargetOpcode::SUBREG_TO_REG)
848  continue;
849 
850  unsigned SubRegIdx = N->getConstantOperandVal(2);
851  if (SubRegIdx != X86::sub_xmm && SubRegIdx != X86::sub_ymm)
852  continue;
853 
854  SDValue Move = N->getOperand(1);
855  if (!Move.isMachineOpcode())
856  continue;
857 
858  // Make sure its one of the move opcodes we recognize.
859  switch (Move.getMachineOpcode()) {
860  default:
861  continue;
862  case X86::VMOVAPDrr: case X86::VMOVUPDrr:
863  case X86::VMOVAPSrr: case X86::VMOVUPSrr:
864  case X86::VMOVDQArr: case X86::VMOVDQUrr:
865  case X86::VMOVAPDYrr: case X86::VMOVUPDYrr:
866  case X86::VMOVAPSYrr: case X86::VMOVUPSYrr:
867  case X86::VMOVDQAYrr: case X86::VMOVDQUYrr:
868  case X86::VMOVAPDZ128rr: case X86::VMOVUPDZ128rr:
869  case X86::VMOVAPSZ128rr: case X86::VMOVUPSZ128rr:
870  case X86::VMOVDQA32Z128rr: case X86::VMOVDQU32Z128rr:
871  case X86::VMOVDQA64Z128rr: case X86::VMOVDQU64Z128rr:
872  case X86::VMOVAPDZ256rr: case X86::VMOVUPDZ256rr:
873  case X86::VMOVAPSZ256rr: case X86::VMOVUPSZ256rr:
874  case X86::VMOVDQA32Z256rr: case X86::VMOVDQU32Z256rr:
875  case X86::VMOVDQA64Z256rr: case X86::VMOVDQU64Z256rr:
876  break;
877  }
878 
879  SDValue In = Move.getOperand(0);
880  if (!In.isMachineOpcode() ||
881  In.getMachineOpcode() <= TargetOpcode::GENERIC_OP_END)
882  continue;
883 
884  // Producing instruction is another vector instruction. We can drop the
885  // move.
886  CurDAG->UpdateNodeOperands(N, N->getOperand(0), In, N->getOperand(2));
887 
888  // If the move is now dead, delete it.
889  if (Move.getNode()->use_empty())
890  CurDAG->RemoveDeadNode(Move.getNode());
891  }
892 }
893 
894 
895 /// Emit any code that needs to be executed only in the main function.
896 void X86DAGToDAGISel::emitSpecialCodeForMain() {
897  if (Subtarget->isTargetCygMing()) {
899  auto &DL = CurDAG->getDataLayout();
900 
902  CLI.setChain(CurDAG->getRoot())
903  .setCallee(CallingConv::C, Type::getVoidTy(*CurDAG->getContext()),
904  CurDAG->getExternalSymbol("__main", TLI->getPointerTy(DL)),
905  std::move(Args));
906  const TargetLowering &TLI = CurDAG->getTargetLoweringInfo();
907  std::pair<SDValue, SDValue> Result = TLI.LowerCallTo(CLI);
908  CurDAG->setRoot(Result.second);
909  }
910 }
911 
912 void X86DAGToDAGISel::EmitFunctionEntryCode() {
913  // If this is main, emit special code for main.
914  const Function &F = MF->getFunction();
915  if (F.hasExternalLinkage() && F.getName() == "main")
916  emitSpecialCodeForMain();
917 }
918 
919 static bool isDispSafeForFrameIndex(int64_t Val) {
920  // On 64-bit platforms, we can run into an issue where a frame index
921  // includes a displacement that, when added to the explicit displacement,
922  // will overflow the displacement field. Assuming that the frame index
923  // displacement fits into a 31-bit integer (which is only slightly more
924  // aggressive than the current fundamental assumption that it fits into
925  // a 32-bit integer), a 31-bit disp should always be safe.
926  return isInt<31>(Val);
927 }
928 
929 bool X86DAGToDAGISel::foldOffsetIntoAddress(uint64_t Offset,
930  X86ISelAddressMode &AM) {
931  // If there's no offset to fold, we don't need to do any work.
932  if (Offset == 0)
933  return false;
934 
935  // Cannot combine ExternalSymbol displacements with integer offsets.
936  if (AM.ES || AM.MCSym)
937  return true;
938 
939  int64_t Val = AM.Disp + Offset;
940  CodeModel::Model M = TM.getCodeModel();
941  if (Subtarget->is64Bit()) {
943  AM.hasSymbolicDisplacement()))
944  return true;
945  // In addition to the checks required for a register base, check that
946  // we do not try to use an unsafe Disp with a frame index.
947  if (AM.BaseType == X86ISelAddressMode::FrameIndexBase &&
949  return true;
950  }
951  AM.Disp = Val;
952  return false;
953 
954 }
955 
956 bool X86DAGToDAGISel::matchLoadInAddress(LoadSDNode *N, X86ISelAddressMode &AM){
957  SDValue Address = N->getOperand(1);
958 
959  // load gs:0 -> GS segment register.
960  // load fs:0 -> FS segment register.
961  //
962  // This optimization is valid because the GNU TLS model defines that
963  // gs:0 (or fs:0 on X86-64) contains its own address.
964  // For more information see http://people.redhat.com/drepper/tls.pdf
965  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Address))
966  if (C->getSExtValue() == 0 && AM.Segment.getNode() == nullptr &&
967  (Subtarget->isTargetGlibc() || Subtarget->isTargetAndroid() ||
968  Subtarget->isTargetFuchsia()))
969  switch (N->getPointerInfo().getAddrSpace()) {
970  case 256:
971  AM.Segment = CurDAG->getRegister(X86::GS, MVT::i16);
972  return false;
973  case 257:
974  AM.Segment = CurDAG->getRegister(X86::FS, MVT::i16);
975  return false;
976  // Address space 258 is not handled here, because it is not used to
977  // address TLS areas.
978  }
979 
980  return true;
981 }
982 
983 /// Try to match X86ISD::Wrapper and X86ISD::WrapperRIP nodes into an addressing
984 /// mode. These wrap things that will resolve down into a symbol reference.
985 /// If no match is possible, this returns true, otherwise it returns false.
986 bool X86DAGToDAGISel::matchWrapper(SDValue N, X86ISelAddressMode &AM) {
987  // If the addressing mode already has a symbol as the displacement, we can
988  // never match another symbol.
989  if (AM.hasSymbolicDisplacement())
990  return true;
991 
992  bool IsRIPRel = N.getOpcode() == X86ISD::WrapperRIP;
993 
994  // Only do this address mode folding for 64-bit if we're in the small code
995  // model.
996  // FIXME: But we can do GOTPCREL addressing in the medium code model.
997  CodeModel::Model M = TM.getCodeModel();
998  if (Subtarget->is64Bit() && M != CodeModel::Small && M != CodeModel::Kernel)
999  return true;
1000 
1001  // Base and index reg must be 0 in order to use %rip as base.
1002  if (IsRIPRel && AM.hasBaseOrIndexReg())
1003  return true;
1004 
1005  // Make a local copy in case we can't do this fold.
1006  X86ISelAddressMode Backup = AM;
1007 
1008  int64_t Offset = 0;
1009  SDValue N0 = N.getOperand(0);
1010  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(N0)) {
1011  AM.GV = G->getGlobal();
1012  AM.SymbolFlags = G->getTargetFlags();
1013  Offset = G->getOffset();
1014  } else if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(N0)) {
1015  AM.CP = CP->getConstVal();
1016  AM.Align = CP->getAlignment();
1017  AM.SymbolFlags = CP->getTargetFlags();
1018  Offset = CP->getOffset();
1019  } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(N0)) {
1020  AM.ES = S->getSymbol();
1021  AM.SymbolFlags = S->getTargetFlags();
1022  } else if (auto *S = dyn_cast<MCSymbolSDNode>(N0)) {
1023  AM.MCSym = S->getMCSymbol();
1024  } else if (JumpTableSDNode *J = dyn_cast<JumpTableSDNode>(N0)) {
1025  AM.JT = J->getIndex();
1026  AM.SymbolFlags = J->getTargetFlags();
1027  } else if (BlockAddressSDNode *BA = dyn_cast<BlockAddressSDNode>(N0)) {
1028  AM.BlockAddr = BA->getBlockAddress();
1029  AM.SymbolFlags = BA->getTargetFlags();
1030  Offset = BA->getOffset();
1031  } else
1032  llvm_unreachable("Unhandled symbol reference node.");
1033 
1034  if (foldOffsetIntoAddress(Offset, AM)) {
1035  AM = Backup;
1036  return true;
1037  }
1038 
1039  if (IsRIPRel)
1040  AM.setBaseReg(CurDAG->getRegister(X86::RIP, MVT::i64));
1041 
1042  // Commit the changes now that we know this fold is safe.
1043  return false;
1044 }
1045 
1046 /// Add the specified node to the specified addressing mode, returning true if
1047 /// it cannot be done. This just pattern matches for the addressing mode.
1048 bool X86DAGToDAGISel::matchAddress(SDValue N, X86ISelAddressMode &AM) {
1049  if (matchAddressRecursively(N, AM, 0))
1050  return true;
1051 
1052  // Post-processing: Convert lea(,%reg,2) to lea(%reg,%reg), which has
1053  // a smaller encoding and avoids a scaled-index.
1054  if (AM.Scale == 2 &&
1055  AM.BaseType == X86ISelAddressMode::RegBase &&
1056  AM.Base_Reg.getNode() == nullptr) {
1057  AM.Base_Reg = AM.IndexReg;
1058  AM.Scale = 1;
1059  }
1060 
1061  // Post-processing: Convert foo to foo(%rip), even in non-PIC mode,
1062  // because it has a smaller encoding.
1063  // TODO: Which other code models can use this?
1064  if (TM.getCodeModel() == CodeModel::Small &&
1065  Subtarget->is64Bit() &&
1066  AM.Scale == 1 &&
1067  AM.BaseType == X86ISelAddressMode::RegBase &&
1068  AM.Base_Reg.getNode() == nullptr &&
1069  AM.IndexReg.getNode() == nullptr &&
1070  AM.SymbolFlags == X86II::MO_NO_FLAG &&
1071  AM.hasSymbolicDisplacement())
1072  AM.Base_Reg = CurDAG->getRegister(X86::RIP, MVT::i64);
1073 
1074  return false;
1075 }
1076 
1077 bool X86DAGToDAGISel::matchAdd(SDValue N, X86ISelAddressMode &AM,
1078  unsigned Depth) {
1079  // Add an artificial use to this node so that we can keep track of
1080  // it if it gets CSE'd with a different node.
1081  HandleSDNode Handle(N);
1082 
1083  X86ISelAddressMode Backup = AM;
1084  if (!matchAddressRecursively(N.getOperand(0), AM, Depth+1) &&
1085  !matchAddressRecursively(Handle.getValue().getOperand(1), AM, Depth+1))
1086  return false;
1087  AM = Backup;
1088 
1089  // Try again after commuting the operands.
1090  if (!matchAddressRecursively(Handle.getValue().getOperand(1), AM, Depth+1) &&
1091  !matchAddressRecursively(Handle.getValue().getOperand(0), AM, Depth+1))
1092  return false;
1093  AM = Backup;
1094 
1095  // If we couldn't fold both operands into the address at the same time,
1096  // see if we can just put each operand into a register and fold at least
1097  // the add.
1098  if (AM.BaseType == X86ISelAddressMode::RegBase &&
1099  !AM.Base_Reg.getNode() &&
1100  !AM.IndexReg.getNode()) {
1101  N = Handle.getValue();
1102  AM.Base_Reg = N.getOperand(0);
1103  AM.IndexReg = N.getOperand(1);
1104  AM.Scale = 1;
1105  return false;
1106  }
1107  N = Handle.getValue();
1108  return true;
1109 }
1110 
1111 // Insert a node into the DAG at least before the Pos node's position. This
1112 // will reposition the node as needed, and will assign it a node ID that is <=
1113 // the Pos node's ID. Note that this does *not* preserve the uniqueness of node
1114 // IDs! The selection DAG must no longer depend on their uniqueness when this
1115 // is used.
1116 static void insertDAGNode(SelectionDAG &DAG, SDValue Pos, SDValue N) {
1117  if (N->getNodeId() == -1 ||
1120  DAG.RepositionNode(Pos->getIterator(), N.getNode());
1121  // Mark Node as invalid for pruning as after this it may be a successor to a
1122  // selected node but otherwise be in the same position of Pos.
1123  // Conservatively mark it with the same -abs(Id) to assure node id
1124  // invariant is preserved.
1125  N->setNodeId(Pos->getNodeId());
1127  }
1128 }
1129 
1130 // Transform "(X >> (8-C1)) & (0xff << C1)" to "((X >> 8) & 0xff) << C1" if
1131 // safe. This allows us to convert the shift and and into an h-register
1132 // extract and a scaled index. Returns false if the simplification is
1133 // performed.
1135  uint64_t Mask,
1136  SDValue Shift, SDValue X,
1137  X86ISelAddressMode &AM) {
1138  if (Shift.getOpcode() != ISD::SRL ||
1139  !isa<ConstantSDNode>(Shift.getOperand(1)) ||
1140  !Shift.hasOneUse())
1141  return true;
1142 
1143  int ScaleLog = 8 - Shift.getConstantOperandVal(1);
1144  if (ScaleLog <= 0 || ScaleLog >= 4 ||
1145  Mask != (0xffu << ScaleLog))
1146  return true;
1147 
1148  MVT VT = N.getSimpleValueType();
1149  SDLoc DL(N);
1150  SDValue Eight = DAG.getConstant(8, DL, MVT::i8);
1151  SDValue NewMask = DAG.getConstant(0xff, DL, VT);
1152  SDValue Srl = DAG.getNode(ISD::SRL, DL, VT, X, Eight);
1153  SDValue And = DAG.getNode(ISD::AND, DL, VT, Srl, NewMask);
1154  SDValue ShlCount = DAG.getConstant(ScaleLog, DL, MVT::i8);
1155  SDValue Shl = DAG.getNode(ISD::SHL, DL, VT, And, ShlCount);
1156 
1157  // Insert the new nodes into the topological ordering. We must do this in
1158  // a valid topological ordering as nothing is going to go back and re-sort
1159  // these nodes. We continually insert before 'N' in sequence as this is
1160  // essentially a pre-flattened and pre-sorted sequence of nodes. There is no
1161  // hierarchy left to express.
1162  insertDAGNode(DAG, N, Eight);
1163  insertDAGNode(DAG, N, Srl);
1164  insertDAGNode(DAG, N, NewMask);
1165  insertDAGNode(DAG, N, And);
1166  insertDAGNode(DAG, N, ShlCount);
1167  insertDAGNode(DAG, N, Shl);
1168  DAG.ReplaceAllUsesWith(N, Shl);
1169  AM.IndexReg = And;
1170  AM.Scale = (1 << ScaleLog);
1171  return false;
1172 }
1173 
1174 // Transforms "(X << C1) & C2" to "(X & (C2>>C1)) << C1" if safe and if this
1175 // allows us to fold the shift into this addressing mode. Returns false if the
1176 // transform succeeded.
1178  uint64_t Mask,
1179  SDValue Shift, SDValue X,
1180  X86ISelAddressMode &AM) {
1181  if (Shift.getOpcode() != ISD::SHL ||
1182  !isa<ConstantSDNode>(Shift.getOperand(1)))
1183  return true;
1184 
1185  // Not likely to be profitable if either the AND or SHIFT node has more
1186  // than one use (unless all uses are for address computation). Besides,
1187  // isel mechanism requires their node ids to be reused.
1188  if (!N.hasOneUse() || !Shift.hasOneUse())
1189  return true;
1190 
1191  // Verify that the shift amount is something we can fold.
1192  unsigned ShiftAmt = Shift.getConstantOperandVal(1);
1193  if (ShiftAmt != 1 && ShiftAmt != 2 && ShiftAmt != 3)
1194  return true;
1195 
1196  MVT VT = N.getSimpleValueType();
1197  SDLoc DL(N);
1198  SDValue NewMask = DAG.getConstant(Mask >> ShiftAmt, DL, VT);
1199  SDValue NewAnd = DAG.getNode(ISD::AND, DL, VT, X, NewMask);
1200  SDValue NewShift = DAG.getNode(ISD::SHL, DL, VT, NewAnd, Shift.getOperand(1));
1201 
1202  // Insert the new nodes into the topological ordering. We must do this in
1203  // a valid topological ordering as nothing is going to go back and re-sort
1204  // these nodes. We continually insert before 'N' in sequence as this is
1205  // essentially a pre-flattened and pre-sorted sequence of nodes. There is no
1206  // hierarchy left to express.
1207  insertDAGNode(DAG, N, NewMask);
1208  insertDAGNode(DAG, N, NewAnd);
1209  insertDAGNode(DAG, N, NewShift);
1210  DAG.ReplaceAllUsesWith(N, NewShift);
1211 
1212  AM.Scale = 1 << ShiftAmt;
1213  AM.IndexReg = NewAnd;
1214  return false;
1215 }
1216 
1217 // Implement some heroics to detect shifts of masked values where the mask can
1218 // be replaced by extending the shift and undoing that in the addressing mode
1219 // scale. Patterns such as (shl (srl x, c1), c2) are canonicalized into (and
1220 // (srl x, SHIFT), MASK) by DAGCombines that don't know the shl can be done in
1221 // the addressing mode. This results in code such as:
1222 //
1223 // int f(short *y, int *lookup_table) {
1224 // ...
1225 // return *y + lookup_table[*y >> 11];
1226 // }
1227 //
1228 // Turning into:
1229 // movzwl (%rdi), %eax
1230 // movl %eax, %ecx
1231 // shrl $11, %ecx
1232 // addl (%rsi,%rcx,4), %eax
1233 //
1234 // Instead of:
1235 // movzwl (%rdi), %eax
1236 // movl %eax, %ecx
1237 // shrl $9, %ecx
1238 // andl $124, %rcx
1239 // addl (%rsi,%rcx), %eax
1240 //
1241 // Note that this function assumes the mask is provided as a mask *after* the
1242 // value is shifted. The input chain may or may not match that, but computing
1243 // such a mask is trivial.
1245  uint64_t Mask,
1246  SDValue Shift, SDValue X,
1247  X86ISelAddressMode &AM) {
1248  if (Shift.getOpcode() != ISD::SRL || !Shift.hasOneUse() ||
1249  !isa<ConstantSDNode>(Shift.getOperand(1)))
1250  return true;
1251 
1252  unsigned ShiftAmt = Shift.getConstantOperandVal(1);
1253  unsigned MaskLZ = countLeadingZeros(Mask);
1254  unsigned MaskTZ = countTrailingZeros(Mask);
1255 
1256  // The amount of shift we're trying to fit into the addressing mode is taken
1257  // from the trailing zeros of the mask.
1258  unsigned AMShiftAmt = MaskTZ;
1259 
1260  // There is nothing we can do here unless the mask is removing some bits.
1261  // Also, the addressing mode can only represent shifts of 1, 2, or 3 bits.
1262  if (AMShiftAmt <= 0 || AMShiftAmt > 3) return true;
1263 
1264  // We also need to ensure that mask is a continuous run of bits.
1265  if (countTrailingOnes(Mask >> MaskTZ) + MaskTZ + MaskLZ != 64) return true;
1266 
1267  // Scale the leading zero count down based on the actual size of the value.
1268  // Also scale it down based on the size of the shift.
1269  unsigned ScaleDown = (64 - X.getSimpleValueType().getSizeInBits()) + ShiftAmt;
1270  if (MaskLZ < ScaleDown)
1271  return true;
1272  MaskLZ -= ScaleDown;
1273 
1274  // The final check is to ensure that any masked out high bits of X are
1275  // already known to be zero. Otherwise, the mask has a semantic impact
1276  // other than masking out a couple of low bits. Unfortunately, because of
1277  // the mask, zero extensions will be removed from operands in some cases.
1278  // This code works extra hard to look through extensions because we can
1279  // replace them with zero extensions cheaply if necessary.
1280  bool ReplacingAnyExtend = false;
1281  if (X.getOpcode() == ISD::ANY_EXTEND) {
1282  unsigned ExtendBits = X.getSimpleValueType().getSizeInBits() -
1284  // Assume that we'll replace the any-extend with a zero-extend, and
1285  // narrow the search to the extended value.
1286  X = X.getOperand(0);
1287  MaskLZ = ExtendBits > MaskLZ ? 0 : MaskLZ - ExtendBits;
1288  ReplacingAnyExtend = true;
1289  }
1290  APInt MaskedHighBits =
1292  KnownBits Known;
1293  DAG.computeKnownBits(X, Known);
1294  if (MaskedHighBits != Known.Zero) return true;
1295 
1296  // We've identified a pattern that can be transformed into a single shift
1297  // and an addressing mode. Make it so.
1298  MVT VT = N.getSimpleValueType();
1299  if (ReplacingAnyExtend) {
1300  assert(X.getValueType() != VT);
1301  // We looked through an ANY_EXTEND node, insert a ZERO_EXTEND.
1302  SDValue NewX = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(X), VT, X);
1303  insertDAGNode(DAG, N, NewX);
1304  X = NewX;
1305  }
1306  SDLoc DL(N);
1307  SDValue NewSRLAmt = DAG.getConstant(ShiftAmt + AMShiftAmt, DL, MVT::i8);
1308  SDValue NewSRL = DAG.getNode(ISD::SRL, DL, VT, X, NewSRLAmt);
1309  SDValue NewSHLAmt = DAG.getConstant(AMShiftAmt, DL, MVT::i8);
1310  SDValue NewSHL = DAG.getNode(ISD::SHL, DL, VT, NewSRL, NewSHLAmt);
1311 
1312  // Insert the new nodes into the topological ordering. We must do this in
1313  // a valid topological ordering as nothing is going to go back and re-sort
1314  // these nodes. We continually insert before 'N' in sequence as this is
1315  // essentially a pre-flattened and pre-sorted sequence of nodes. There is no
1316  // hierarchy left to express.
1317  insertDAGNode(DAG, N, NewSRLAmt);
1318  insertDAGNode(DAG, N, NewSRL);
1319  insertDAGNode(DAG, N, NewSHLAmt);
1320  insertDAGNode(DAG, N, NewSHL);
1321  DAG.ReplaceAllUsesWith(N, NewSHL);
1322 
1323  AM.Scale = 1 << AMShiftAmt;
1324  AM.IndexReg = NewSRL;
1325  return false;
1326 }
1327 
1328 bool X86DAGToDAGISel::matchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
1329  unsigned Depth) {
1330  SDLoc dl(N);
1331  LLVM_DEBUG({
1332  dbgs() << "MatchAddress: ";
1333  AM.dump(CurDAG);
1334  });
1335  // Limit recursion.
1336  if (Depth > 5)
1337  return matchAddressBase(N, AM);
1338 
1339  // If this is already a %rip relative address, we can only merge immediates
1340  // into it. Instead of handling this in every case, we handle it here.
1341  // RIP relative addressing: %rip + 32-bit displacement!
1342  if (AM.isRIPRelative()) {
1343  // FIXME: JumpTable and ExternalSymbol address currently don't like
1344  // displacements. It isn't very important, but this should be fixed for
1345  // consistency.
1346  if (!(AM.ES || AM.MCSym) && AM.JT != -1)
1347  return true;
1348 
1349  if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(N))
1350  if (!foldOffsetIntoAddress(Cst->getSExtValue(), AM))
1351  return false;
1352  return true;
1353  }
1354 
1355  switch (N.getOpcode()) {
1356  default: break;
1357  case ISD::LOCAL_RECOVER: {
1358  if (!AM.hasSymbolicDisplacement() && AM.Disp == 0)
1359  if (const auto *ESNode = dyn_cast<MCSymbolSDNode>(N.getOperand(0))) {
1360  // Use the symbol and don't prefix it.
1361  AM.MCSym = ESNode->getMCSymbol();
1362  return false;
1363  }
1364  break;
1365  }
1366  case ISD::Constant: {
1367  uint64_t Val = cast<ConstantSDNode>(N)->getSExtValue();
1368  if (!foldOffsetIntoAddress(Val, AM))
1369  return false;
1370  break;
1371  }
1372 
1373  case X86ISD::Wrapper:
1374  case X86ISD::WrapperRIP:
1375  if (!matchWrapper(N, AM))
1376  return false;
1377  break;
1378 
1379  case ISD::LOAD:
1380  if (!matchLoadInAddress(cast<LoadSDNode>(N), AM))
1381  return false;
1382  break;
1383 
1384  case ISD::FrameIndex:
1385  if (AM.BaseType == X86ISelAddressMode::RegBase &&
1386  AM.Base_Reg.getNode() == nullptr &&
1387  (!Subtarget->is64Bit() || isDispSafeForFrameIndex(AM.Disp))) {
1388  AM.BaseType = X86ISelAddressMode::FrameIndexBase;
1389  AM.Base_FrameIndex = cast<FrameIndexSDNode>(N)->getIndex();
1390  return false;
1391  }
1392  break;
1393 
1394  case ISD::SHL:
1395  if (AM.IndexReg.getNode() != nullptr || AM.Scale != 1)
1396  break;
1397 
1398  if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1399  unsigned Val = CN->getZExtValue();
1400  // Note that we handle x<<1 as (,x,2) rather than (x,x) here so
1401  // that the base operand remains free for further matching. If
1402  // the base doesn't end up getting used, a post-processing step
1403  // in MatchAddress turns (,x,2) into (x,x), which is cheaper.
1404  if (Val == 1 || Val == 2 || Val == 3) {
1405  AM.Scale = 1 << Val;
1406  SDValue ShVal = N.getOperand(0);
1407 
1408  // Okay, we know that we have a scale by now. However, if the scaled
1409  // value is an add of something and a constant, we can fold the
1410  // constant into the disp field here.
1411  if (CurDAG->isBaseWithConstantOffset(ShVal)) {
1412  AM.IndexReg = ShVal.getOperand(0);
1413  ConstantSDNode *AddVal = cast<ConstantSDNode>(ShVal.getOperand(1));
1414  uint64_t Disp = (uint64_t)AddVal->getSExtValue() << Val;
1415  if (!foldOffsetIntoAddress(Disp, AM))
1416  return false;
1417  }
1418 
1419  AM.IndexReg = ShVal;
1420  return false;
1421  }
1422  }
1423  break;
1424 
1425  case ISD::SRL: {
1426  // Scale must not be used already.
1427  if (AM.IndexReg.getNode() != nullptr || AM.Scale != 1) break;
1428 
1429  SDValue And = N.getOperand(0);
1430  if (And.getOpcode() != ISD::AND) break;
1431  SDValue X = And.getOperand(0);
1432 
1433  // We only handle up to 64-bit values here as those are what matter for
1434  // addressing mode optimizations.
1435  if (X.getSimpleValueType().getSizeInBits() > 64) break;
1436 
1437  // The mask used for the transform is expected to be post-shift, but we
1438  // found the shift first so just apply the shift to the mask before passing
1439  // it down.
1440  if (!isa<ConstantSDNode>(N.getOperand(1)) ||
1441  !isa<ConstantSDNode>(And.getOperand(1)))
1442  break;
1443  uint64_t Mask = And.getConstantOperandVal(1) >> N.getConstantOperandVal(1);
1444 
1445  // Try to fold the mask and shift into the scale, and return false if we
1446  // succeed.
1447  if (!foldMaskAndShiftToScale(*CurDAG, N, Mask, N, X, AM))
1448  return false;
1449  break;
1450  }
1451 
1452  case ISD::SMUL_LOHI:
1453  case ISD::UMUL_LOHI:
1454  // A mul_lohi where we need the low part can be folded as a plain multiply.
1455  if (N.getResNo() != 0) break;
1457  case ISD::MUL:
1458  case X86ISD::MUL_IMM:
1459  // X*[3,5,9] -> X+X*[2,4,8]
1460  if (AM.BaseType == X86ISelAddressMode::RegBase &&
1461  AM.Base_Reg.getNode() == nullptr &&
1462  AM.IndexReg.getNode() == nullptr) {
1463  if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N.getOperand(1)))
1464  if (CN->getZExtValue() == 3 || CN->getZExtValue() == 5 ||
1465  CN->getZExtValue() == 9) {
1466  AM.Scale = unsigned(CN->getZExtValue())-1;
1467 
1468  SDValue MulVal = N.getOperand(0);
1469  SDValue Reg;
1470 
1471  // Okay, we know that we have a scale by now. However, if the scaled
1472  // value is an add of something and a constant, we can fold the
1473  // constant into the disp field here.
1474  if (MulVal.getNode()->getOpcode() == ISD::ADD && MulVal.hasOneUse() &&
1475  isa<ConstantSDNode>(MulVal.getOperand(1))) {
1476  Reg = MulVal.getOperand(0);
1477  ConstantSDNode *AddVal =
1478  cast<ConstantSDNode>(MulVal.getOperand(1));
1479  uint64_t Disp = AddVal->getSExtValue() * CN->getZExtValue();
1480  if (foldOffsetIntoAddress(Disp, AM))
1481  Reg = N.getOperand(0);
1482  } else {
1483  Reg = N.getOperand(0);
1484  }
1485 
1486  AM.IndexReg = AM.Base_Reg = Reg;
1487  return false;
1488  }
1489  }
1490  break;
1491 
1492  case ISD::SUB: {
1493  // Given A-B, if A can be completely folded into the address and
1494  // the index field with the index field unused, use -B as the index.
1495  // This is a win if a has multiple parts that can be folded into
1496  // the address. Also, this saves a mov if the base register has
1497  // other uses, since it avoids a two-address sub instruction, however
1498  // it costs an additional mov if the index register has other uses.
1499 
1500  // Add an artificial use to this node so that we can keep track of
1501  // it if it gets CSE'd with a different node.
1502  HandleSDNode Handle(N);
1503 
1504  // Test if the LHS of the sub can be folded.
1505  X86ISelAddressMode Backup = AM;
1506  if (matchAddressRecursively(N.getOperand(0), AM, Depth+1)) {
1507  AM = Backup;
1508  break;
1509  }
1510  // Test if the index field is free for use.
1511  if (AM.IndexReg.getNode() || AM.isRIPRelative()) {
1512  AM = Backup;
1513  break;
1514  }
1515 
1516  int Cost = 0;
1517  SDValue RHS = Handle.getValue().getOperand(1);
1518  // If the RHS involves a register with multiple uses, this
1519  // transformation incurs an extra mov, due to the neg instruction
1520  // clobbering its operand.
1521  if (!RHS.getNode()->hasOneUse() ||
1522  RHS.getNode()->getOpcode() == ISD::CopyFromReg ||
1523  RHS.getNode()->getOpcode() == ISD::TRUNCATE ||
1524  RHS.getNode()->getOpcode() == ISD::ANY_EXTEND ||
1525  (RHS.getNode()->getOpcode() == ISD::ZERO_EXTEND &&
1526  RHS.getOperand(0).getValueType() == MVT::i32))
1527  ++Cost;
1528  // If the base is a register with multiple uses, this
1529  // transformation may save a mov.
1530  // FIXME: Don't rely on DELETED_NODEs.
1531  if ((AM.BaseType == X86ISelAddressMode::RegBase && AM.Base_Reg.getNode() &&
1532  AM.Base_Reg->getOpcode() != ISD::DELETED_NODE &&
1533  !AM.Base_Reg.getNode()->hasOneUse()) ||
1534  AM.BaseType == X86ISelAddressMode::FrameIndexBase)
1535  --Cost;
1536  // If the folded LHS was interesting, this transformation saves
1537  // address arithmetic.
1538  if ((AM.hasSymbolicDisplacement() && !Backup.hasSymbolicDisplacement()) +
1539  ((AM.Disp != 0) && (Backup.Disp == 0)) +
1540  (AM.Segment.getNode() && !Backup.Segment.getNode()) >= 2)
1541  --Cost;
1542  // If it doesn't look like it may be an overall win, don't do it.
1543  if (Cost >= 0) {
1544  AM = Backup;
1545  break;
1546  }
1547 
1548  // Ok, the transformation is legal and appears profitable. Go for it.
1549  SDValue Zero = CurDAG->getConstant(0, dl, N.getValueType());
1550  SDValue Neg = CurDAG->getNode(ISD::SUB, dl, N.getValueType(), Zero, RHS);
1551  AM.IndexReg = Neg;
1552  AM.Scale = 1;
1553 
1554  // Insert the new nodes into the topological ordering.
1555  insertDAGNode(*CurDAG, Handle.getValue(), Zero);
1556  insertDAGNode(*CurDAG, Handle.getValue(), Neg);
1557  return false;
1558  }
1559 
1560  case ISD::ADD:
1561  if (!matchAdd(N, AM, Depth))
1562  return false;
1563  break;
1564 
1565  case ISD::OR:
1566  // We want to look through a transform in InstCombine and DAGCombiner that
1567  // turns 'add' into 'or', so we can treat this 'or' exactly like an 'add'.
1568  // Example: (or (and x, 1), (shl y, 3)) --> (add (and x, 1), (shl y, 3))
1569  // An 'lea' can then be used to match the shift (multiply) and add:
1570  // and $1, %esi
1571  // lea (%rsi, %rdi, 8), %rax
1572  if (CurDAG->haveNoCommonBitsSet(N.getOperand(0), N.getOperand(1)) &&
1573  !matchAdd(N, AM, Depth))
1574  return false;
1575  break;
1576 
1577  case ISD::AND: {
1578  // Perform some heroic transforms on an and of a constant-count shift
1579  // with a constant to enable use of the scaled offset field.
1580 
1581  // Scale must not be used already.
1582  if (AM.IndexReg.getNode() != nullptr || AM.Scale != 1) break;
1583 
1584  SDValue Shift = N.getOperand(0);
1585  if (Shift.getOpcode() != ISD::SRL && Shift.getOpcode() != ISD::SHL) break;
1586  SDValue X = Shift.getOperand(0);
1587 
1588  // We only handle up to 64-bit values here as those are what matter for
1589  // addressing mode optimizations.
1590  if (X.getSimpleValueType().getSizeInBits() > 64) break;
1591 
1592  if (!isa<ConstantSDNode>(N.getOperand(1)))
1593  break;
1594  uint64_t Mask = N.getConstantOperandVal(1);
1595 
1596  // Try to fold the mask and shift into an extract and scale.
1597  if (!foldMaskAndShiftToExtract(*CurDAG, N, Mask, Shift, X, AM))
1598  return false;
1599 
1600  // Try to fold the mask and shift directly into the scale.
1601  if (!foldMaskAndShiftToScale(*CurDAG, N, Mask, Shift, X, AM))
1602  return false;
1603 
1604  // Try to swap the mask and shift to place shifts which can be done as
1605  // a scale on the outside of the mask.
1606  if (!foldMaskedShiftToScaledMask(*CurDAG, N, Mask, Shift, X, AM))
1607  return false;
1608  break;
1609  }
1610  }
1611 
1612  return matchAddressBase(N, AM);
1613 }
1614 
1615 /// Helper for MatchAddress. Add the specified node to the
1616 /// specified addressing mode without any further recursion.
1617 bool X86DAGToDAGISel::matchAddressBase(SDValue N, X86ISelAddressMode &AM) {
1618  // Is the base register already occupied?
1619  if (AM.BaseType != X86ISelAddressMode::RegBase || AM.Base_Reg.getNode()) {
1620  // If so, check to see if the scale index register is set.
1621  if (!AM.IndexReg.getNode()) {
1622  AM.IndexReg = N;
1623  AM.Scale = 1;
1624  return false;
1625  }
1626 
1627  // Otherwise, we cannot select it.
1628  return true;
1629  }
1630 
1631  // Default, generate it as a register.
1632  AM.BaseType = X86ISelAddressMode::RegBase;
1633  AM.Base_Reg = N;
1634  return false;
1635 }
1636 
1637 /// Helper for selectVectorAddr. Handles things that can be folded into a
1638 /// gather scatter address. The index register and scale should have already
1639 /// been handled.
1640 bool X86DAGToDAGISel::matchVectorAddress(SDValue N, X86ISelAddressMode &AM) {
1641  // TODO: Support other operations.
1642  switch (N.getOpcode()) {
1643  case ISD::Constant: {
1644  uint64_t Val = cast<ConstantSDNode>(N)->getSExtValue();
1645  if (!foldOffsetIntoAddress(Val, AM))
1646  return false;
1647  break;
1648  }
1649  case X86ISD::Wrapper:
1650  if (!matchWrapper(N, AM))
1651  return false;
1652  break;
1653  }
1654 
1655  return matchAddressBase(N, AM);
1656 }
1657 
1658 bool X86DAGToDAGISel::selectVectorAddr(SDNode *Parent, SDValue N, SDValue &Base,
1659  SDValue &Scale, SDValue &Index,
1660  SDValue &Disp, SDValue &Segment) {
1661  X86ISelAddressMode AM;
1662  auto *Mgs = cast<X86MaskedGatherScatterSDNode>(Parent);
1663  AM.IndexReg = Mgs->getIndex();
1664  AM.Scale = cast<ConstantSDNode>(Mgs->getScale())->getZExtValue();
1665 
1666  unsigned AddrSpace = cast<MemSDNode>(Parent)->getPointerInfo().getAddrSpace();
1667  // AddrSpace 256 -> GS, 257 -> FS, 258 -> SS.
1668  if (AddrSpace == 256)
1669  AM.Segment = CurDAG->getRegister(X86::GS, MVT::i16);
1670  if (AddrSpace == 257)
1671  AM.Segment = CurDAG->getRegister(X86::FS, MVT::i16);
1672  if (AddrSpace == 258)
1673  AM.Segment = CurDAG->getRegister(X86::SS, MVT::i16);
1674 
1675  // Try to match into the base and displacement fields.
1676  if (matchVectorAddress(N, AM))
1677  return false;
1678 
1679  MVT VT = N.getSimpleValueType();
1680  if (AM.BaseType == X86ISelAddressMode::RegBase) {
1681  if (!AM.Base_Reg.getNode())
1682  AM.Base_Reg = CurDAG->getRegister(0, VT);
1683  }
1684 
1685  getAddressOperands(AM, SDLoc(N), Base, Scale, Index, Disp, Segment);
1686  return true;
1687 }
1688 
1689 /// Returns true if it is able to pattern match an addressing mode.
1690 /// It returns the operands which make up the maximal addressing mode it can
1691 /// match by reference.
1692 ///
1693 /// Parent is the parent node of the addr operand that is being matched. It
1694 /// is always a load, store, atomic node, or null. It is only null when
1695 /// checking memory operands for inline asm nodes.
1696 bool X86DAGToDAGISel::selectAddr(SDNode *Parent, SDValue N, SDValue &Base,
1697  SDValue &Scale, SDValue &Index,
1698  SDValue &Disp, SDValue &Segment) {
1699  X86ISelAddressMode AM;
1700 
1701  if (Parent &&
1702  // This list of opcodes are all the nodes that have an "addr:$ptr" operand
1703  // that are not a MemSDNode, and thus don't have proper addrspace info.
1704  Parent->getOpcode() != ISD::INTRINSIC_W_CHAIN && // unaligned loads, fixme
1705  Parent->getOpcode() != ISD::INTRINSIC_VOID && // nontemporal stores
1706  Parent->getOpcode() != X86ISD::TLSCALL && // Fixme
1707  Parent->getOpcode() != X86ISD::EH_SJLJ_SETJMP && // setjmp
1708  Parent->getOpcode() != X86ISD::EH_SJLJ_LONGJMP) { // longjmp
1709  unsigned AddrSpace =
1710  cast<MemSDNode>(Parent)->getPointerInfo().getAddrSpace();
1711  // AddrSpace 256 -> GS, 257 -> FS, 258 -> SS.
1712  if (AddrSpace == 256)
1713  AM.Segment = CurDAG->getRegister(X86::GS, MVT::i16);
1714  if (AddrSpace == 257)
1715  AM.Segment = CurDAG->getRegister(X86::FS, MVT::i16);
1716  if (AddrSpace == 258)
1717  AM.Segment = CurDAG->getRegister(X86::SS, MVT::i16);
1718  }
1719 
1720  if (matchAddress(N, AM))
1721  return false;
1722 
1723  MVT VT = N.getSimpleValueType();
1724  if (AM.BaseType == X86ISelAddressMode::RegBase) {
1725  if (!AM.Base_Reg.getNode())
1726  AM.Base_Reg = CurDAG->getRegister(0, VT);
1727  }
1728 
1729  if (!AM.IndexReg.getNode())
1730  AM.IndexReg = CurDAG->getRegister(0, VT);
1731 
1732  getAddressOperands(AM, SDLoc(N), Base, Scale, Index, Disp, Segment);
1733  return true;
1734 }
1735 
1736 // We can only fold a load if all nodes between it and the root node have a
1737 // single use. If there are additional uses, we could end up duplicating the
1738 // load.
1739 static bool hasSingleUsesFromRoot(SDNode *Root, SDNode *User) {
1740  while (User != Root) {
1741  if (!User->hasOneUse())
1742  return false;
1743  User = *User->use_begin();
1744  }
1745 
1746  return true;
1747 }
1748 
1749 /// Match a scalar SSE load. In particular, we want to match a load whose top
1750 /// elements are either undef or zeros. The load flavor is derived from the
1751 /// type of N, which is either v4f32 or v2f64.
1752 ///
1753 /// We also return:
1754 /// PatternChainNode: this is the matched node that has a chain input and
1755 /// output.
1756 bool X86DAGToDAGISel::selectScalarSSELoad(SDNode *Root, SDNode *Parent,
1757  SDValue N, SDValue &Base,
1758  SDValue &Scale, SDValue &Index,
1759  SDValue &Disp, SDValue &Segment,
1760  SDValue &PatternNodeWithChain) {
1761  if (!hasSingleUsesFromRoot(Root, Parent))
1762  return false;
1763 
1764  // We can allow a full vector load here since narrowing a load is ok.
1765  if (ISD::isNON_EXTLoad(N.getNode())) {
1766  PatternNodeWithChain = N;
1767  if (IsProfitableToFold(PatternNodeWithChain, N.getNode(), Root) &&
1768  IsLegalToFold(PatternNodeWithChain, Parent, Root, OptLevel)) {
1769  LoadSDNode *LD = cast<LoadSDNode>(PatternNodeWithChain);
1770  return selectAddr(LD, LD->getBasePtr(), Base, Scale, Index, Disp,
1771  Segment);
1772  }
1773  }
1774 
1775  // We can also match the special zero extended load opcode.
1776  if (N.getOpcode() == X86ISD::VZEXT_LOAD) {
1777  PatternNodeWithChain = N;
1778  if (IsProfitableToFold(PatternNodeWithChain, N.getNode(), Root) &&
1779  IsLegalToFold(PatternNodeWithChain, Parent, Root, OptLevel)) {
1780  auto *MI = cast<MemIntrinsicSDNode>(PatternNodeWithChain);
1781  return selectAddr(MI, MI->getBasePtr(), Base, Scale, Index, Disp,
1782  Segment);
1783  }
1784  }
1785 
1786  // Need to make sure that the SCALAR_TO_VECTOR and load are both only used
1787  // once. Otherwise the load might get duplicated and the chain output of the
1788  // duplicate load will not be observed by all dependencies.
1789  if (N.getOpcode() == ISD::SCALAR_TO_VECTOR && N.getNode()->hasOneUse()) {
1790  PatternNodeWithChain = N.getOperand(0);
1791  if (ISD::isNON_EXTLoad(PatternNodeWithChain.getNode()) &&
1792  IsProfitableToFold(PatternNodeWithChain, N.getNode(), Root) &&
1793  IsLegalToFold(PatternNodeWithChain, N.getNode(), Root, OptLevel)) {
1794  LoadSDNode *LD = cast<LoadSDNode>(PatternNodeWithChain);
1795  return selectAddr(LD, LD->getBasePtr(), Base, Scale, Index, Disp,
1796  Segment);
1797  }
1798  }
1799 
1800  // Also handle the case where we explicitly require zeros in the top
1801  // elements. This is a vector shuffle from the zero vector.
1802  if (N.getOpcode() == X86ISD::VZEXT_MOVL && N.getNode()->hasOneUse() &&
1803  // Check to see if the top elements are all zeros (or bitcast of zeros).
1805  N.getOperand(0).getNode()->hasOneUse()) {
1806  PatternNodeWithChain = N.getOperand(0).getOperand(0);
1807  if (ISD::isNON_EXTLoad(PatternNodeWithChain.getNode()) &&
1808  IsProfitableToFold(PatternNodeWithChain, N.getNode(), Root) &&
1809  IsLegalToFold(PatternNodeWithChain, N.getNode(), Root, OptLevel)) {
1810  // Okay, this is a zero extending load. Fold it.
1811  LoadSDNode *LD = cast<LoadSDNode>(PatternNodeWithChain);
1812  return selectAddr(LD, LD->getBasePtr(), Base, Scale, Index, Disp,
1813  Segment);
1814  }
1815  }
1816 
1817  return false;
1818 }
1819 
1820 
1821 bool X86DAGToDAGISel::selectMOV64Imm32(SDValue N, SDValue &Imm) {
1822  if (const ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) {
1823  uint64_t ImmVal = CN->getZExtValue();
1824  if (!isUInt<32>(ImmVal))
1825  return false;
1826 
1827  Imm = CurDAG->getTargetConstant(ImmVal, SDLoc(N), MVT::i64);
1828  return true;
1829  }
1830 
1831  // In static codegen with small code model, we can get the address of a label
1832  // into a register with 'movl'
1833  if (N->getOpcode() != X86ISD::Wrapper)
1834  return false;
1835 
1836  N = N.getOperand(0);
1837 
1838  // At least GNU as does not accept 'movl' for TPOFF relocations.
1839  // FIXME: We could use 'movl' when we know we are targeting MC.
1841  return false;
1842 
1843  Imm = N;
1844  if (N->getOpcode() != ISD::TargetGlobalAddress)
1845  return TM.getCodeModel() == CodeModel::Small;
1846 
1848  cast<GlobalAddressSDNode>(N)->getGlobal()->getAbsoluteSymbolRange();
1849  if (!CR)
1850  return TM.getCodeModel() == CodeModel::Small;
1851 
1852  return CR->getUnsignedMax().ult(1ull << 32);
1853 }
1854 
1855 bool X86DAGToDAGISel::selectLEA64_32Addr(SDValue N, SDValue &Base,
1856  SDValue &Scale, SDValue &Index,
1857  SDValue &Disp, SDValue &Segment) {
1858  // Save the debug loc before calling selectLEAAddr, in case it invalidates N.
1859  SDLoc DL(N);
1860 
1861  if (!selectLEAAddr(N, Base, Scale, Index, Disp, Segment))
1862  return false;
1863 
1865  if (RN && RN->getReg() == 0)
1866  Base = CurDAG->getRegister(0, MVT::i64);
1867  else if (Base.getValueType() == MVT::i32 && !dyn_cast<FrameIndexSDNode>(Base)) {
1868  // Base could already be %rip, particularly in the x32 ABI.
1869  Base = SDValue(CurDAG->getMachineNode(
1870  TargetOpcode::SUBREG_TO_REG, DL, MVT::i64,
1871  CurDAG->getTargetConstant(0, DL, MVT::i64),
1872  Base,
1873  CurDAG->getTargetConstant(X86::sub_32bit, DL, MVT::i32)),
1874  0);
1875  }
1876 
1877  RN = dyn_cast<RegisterSDNode>(Index);
1878  if (RN && RN->getReg() == 0)
1879  Index = CurDAG->getRegister(0, MVT::i64);
1880  else {
1881  assert(Index.getValueType() == MVT::i32 &&
1882  "Expect to be extending 32-bit registers for use in LEA");
1883  Index = SDValue(CurDAG->getMachineNode(
1884  TargetOpcode::SUBREG_TO_REG, DL, MVT::i64,
1885  CurDAG->getTargetConstant(0, DL, MVT::i64),
1886  Index,
1887  CurDAG->getTargetConstant(X86::sub_32bit, DL,
1888  MVT::i32)),
1889  0);
1890  }
1891 
1892  return true;
1893 }
1894 
1895 /// Calls SelectAddr and determines if the maximal addressing
1896 /// mode it matches can be cost effectively emitted as an LEA instruction.
1897 bool X86DAGToDAGISel::selectLEAAddr(SDValue N,
1898  SDValue &Base, SDValue &Scale,
1899  SDValue &Index, SDValue &Disp,
1900  SDValue &Segment) {
1901  X86ISelAddressMode AM;
1902 
1903  // Save the DL and VT before calling matchAddress, it can invalidate N.
1904  SDLoc DL(N);
1905  MVT VT = N.getSimpleValueType();
1906 
1907  // Set AM.Segment to prevent MatchAddress from using one. LEA doesn't support
1908  // segments.
1909  SDValue Copy = AM.Segment;
1910  SDValue T = CurDAG->getRegister(0, MVT::i32);
1911  AM.Segment = T;
1912  if (matchAddress(N, AM))
1913  return false;
1914  assert (T == AM.Segment);
1915  AM.Segment = Copy;
1916 
1917  unsigned Complexity = 0;
1918  if (AM.BaseType == X86ISelAddressMode::RegBase)
1919  if (AM.Base_Reg.getNode())
1920  Complexity = 1;
1921  else
1922  AM.Base_Reg = CurDAG->getRegister(0, VT);
1923  else if (AM.BaseType == X86ISelAddressMode::FrameIndexBase)
1924  Complexity = 4;
1925 
1926  if (AM.IndexReg.getNode())
1927  Complexity++;
1928  else
1929  AM.IndexReg = CurDAG->getRegister(0, VT);
1930 
1931  // Don't match just leal(,%reg,2). It's cheaper to do addl %reg, %reg, or with
1932  // a simple shift.
1933  if (AM.Scale > 1)
1934  Complexity++;
1935 
1936  // FIXME: We are artificially lowering the criteria to turn ADD %reg, $GA
1937  // to a LEA. This is determined with some experimentation but is by no means
1938  // optimal (especially for code size consideration). LEA is nice because of
1939  // its three-address nature. Tweak the cost function again when we can run
1940  // convertToThreeAddress() at register allocation time.
1941  if (AM.hasSymbolicDisplacement()) {
1942  // For X86-64, always use LEA to materialize RIP-relative addresses.
1943  if (Subtarget->is64Bit())
1944  Complexity = 4;
1945  else
1946  Complexity += 2;
1947  }
1948 
1949  if (AM.Disp && (AM.Base_Reg.getNode() || AM.IndexReg.getNode()))
1950  Complexity++;
1951 
1952  // If it isn't worth using an LEA, reject it.
1953  if (Complexity <= 2)
1954  return false;
1955 
1956  getAddressOperands(AM, DL, Base, Scale, Index, Disp, Segment);
1957  return true;
1958 }
1959 
1960 /// This is only run on TargetGlobalTLSAddress nodes.
1961 bool X86DAGToDAGISel::selectTLSADDRAddr(SDValue N, SDValue &Base,
1962  SDValue &Scale, SDValue &Index,
1963  SDValue &Disp, SDValue &Segment) {
1965  const GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(N);
1966 
1967  X86ISelAddressMode AM;
1968  AM.GV = GA->getGlobal();
1969  AM.Disp += GA->getOffset();
1970  AM.Base_Reg = CurDAG->getRegister(0, N.getValueType());
1971  AM.SymbolFlags = GA->getTargetFlags();
1972 
1973  if (N.getValueType() == MVT::i32) {
1974  AM.Scale = 1;
1975  AM.IndexReg = CurDAG->getRegister(X86::EBX, MVT::i32);
1976  } else {
1977  AM.IndexReg = CurDAG->getRegister(0, MVT::i64);
1978  }
1979 
1980  getAddressOperands(AM, SDLoc(N), Base, Scale, Index, Disp, Segment);
1981  return true;
1982 }
1983 
1984 bool X86DAGToDAGISel::selectRelocImm(SDValue N, SDValue &Op) {
1985  if (auto *CN = dyn_cast<ConstantSDNode>(N)) {
1986  Op = CurDAG->getTargetConstant(CN->getAPIntValue(), SDLoc(CN),
1987  N.getValueType());
1988  return true;
1989  }
1990 
1991  // Keep track of the original value type and whether this value was
1992  // truncated. If we see a truncation from pointer type to VT that truncates
1993  // bits that are known to be zero, we can use a narrow reference.
1994  EVT VT = N.getValueType();
1995  bool WasTruncated = false;
1996  if (N.getOpcode() == ISD::TRUNCATE) {
1997  WasTruncated = true;
1998  N = N.getOperand(0);
1999  }
2000 
2001  if (N.getOpcode() != X86ISD::Wrapper)
2002  return false;
2003 
2004  // We can only use non-GlobalValues as immediates if they were not truncated,
2005  // as we do not have any range information. If we have a GlobalValue and the
2006  // address was not truncated, we can select it as an operand directly.
2007  unsigned Opc = N.getOperand(0)->getOpcode();
2008  if (Opc != ISD::TargetGlobalAddress || !WasTruncated) {
2009  Op = N.getOperand(0);
2010  // We can only select the operand directly if we didn't have to look past a
2011  // truncate.
2012  return !WasTruncated;
2013  }
2014 
2015  // Check that the global's range fits into VT.
2016  auto *GA = cast<GlobalAddressSDNode>(N.getOperand(0));
2017  Optional<ConstantRange> CR = GA->getGlobal()->getAbsoluteSymbolRange();
2018  if (!CR || CR->getUnsignedMax().uge(1ull << VT.getSizeInBits()))
2019  return false;
2020 
2021  // Okay, we can use a narrow reference.
2022  Op = CurDAG->getTargetGlobalAddress(GA->getGlobal(), SDLoc(N), VT,
2023  GA->getOffset(), GA->getTargetFlags());
2024  return true;
2025 }
2026 
2027 bool X86DAGToDAGISel::tryFoldLoad(SDNode *Root, SDNode *P, SDValue N,
2028  SDValue &Base, SDValue &Scale,
2029  SDValue &Index, SDValue &Disp,
2030  SDValue &Segment) {
2031  if (!ISD::isNON_EXTLoad(N.getNode()) ||
2032  !IsProfitableToFold(N, P, Root) ||
2033  !IsLegalToFold(N, P, Root, OptLevel))
2034  return false;
2035 
2036  return selectAddr(N.getNode(),
2037  N.getOperand(1), Base, Scale, Index, Disp, Segment);
2038 }
2039 
2040 bool X86DAGToDAGISel::tryFoldVecLoad(SDNode *Root, SDNode *P, SDValue N,
2041  SDValue &Base, SDValue &Scale,
2042  SDValue &Index, SDValue &Disp,
2043  SDValue &Segment) {
2044  if (!ISD::isNON_EXTLoad(N.getNode()) ||
2045  useNonTemporalLoad(cast<LoadSDNode>(N)) ||
2046  !IsProfitableToFold(N, P, Root) ||
2047  !IsLegalToFold(N, P, Root, OptLevel))
2048  return false;
2049 
2050  return selectAddr(N.getNode(),
2051  N.getOperand(1), Base, Scale, Index, Disp, Segment);
2052 }
2053 
2054 /// Return an SDNode that returns the value of the global base register.
2055 /// Output instructions required to initialize the global base register,
2056 /// if necessary.
2057 SDNode *X86DAGToDAGISel::getGlobalBaseReg() {
2058  unsigned GlobalBaseReg = getInstrInfo()->getGlobalBaseReg(MF);
2059  auto &DL = MF->getDataLayout();
2060  return CurDAG->getRegister(GlobalBaseReg, TLI->getPointerTy(DL)).getNode();
2061 }
2062 
2063 bool X86DAGToDAGISel::isSExtAbsoluteSymbolRef(unsigned Width, SDNode *N) const {
2064  if (N->getOpcode() == ISD::TRUNCATE)
2065  N = N->getOperand(0).getNode();
2066  if (N->getOpcode() != X86ISD::Wrapper)
2067  return false;
2068 
2069  auto *GA = dyn_cast<GlobalAddressSDNode>(N->getOperand(0));
2070  if (!GA)
2071  return false;
2072 
2073  Optional<ConstantRange> CR = GA->getGlobal()->getAbsoluteSymbolRange();
2074  return CR && CR->getSignedMin().sge(-1ull << Width) &&
2075  CR->getSignedMax().slt(1ull << Width);
2076 }
2077 
2078 /// Test whether the given X86ISD::CMP node has any uses which require the SF
2079 /// or OF bits to be accurate.
2081  // Examine each user of the node.
2082  for (SDNode::use_iterator UI = N->use_begin(),
2083  UE = N->use_end(); UI != UE; ++UI) {
2084  // Only examine CopyToReg uses.
2085  if (UI->getOpcode() != ISD::CopyToReg)
2086  return false;
2087  // Only examine CopyToReg uses that copy to EFLAGS.
2088  if (cast<RegisterSDNode>(UI->getOperand(1))->getReg() !=
2089  X86::EFLAGS)
2090  return false;
2091  // Examine each user of the CopyToReg use.
2092  for (SDNode::use_iterator FlagUI = UI->use_begin(),
2093  FlagUE = UI->use_end(); FlagUI != FlagUE; ++FlagUI) {
2094  // Only examine the Flag result.
2095  if (FlagUI.getUse().getResNo() != 1) continue;
2096  // Anything unusual: assume conservatively.
2097  if (!FlagUI->isMachineOpcode()) return false;
2098  // Examine the opcode of the user.
2099  switch (FlagUI->getMachineOpcode()) {
2100  // These comparisons don't treat the most significant bit specially.
2101  case X86::SETAr: case X86::SETAEr: case X86::SETBr: case X86::SETBEr:
2102  case X86::SETEr: case X86::SETNEr: case X86::SETPr: case X86::SETNPr:
2103  case X86::SETAm: case X86::SETAEm: case X86::SETBm: case X86::SETBEm:
2104  case X86::SETEm: case X86::SETNEm: case X86::SETPm: case X86::SETNPm:
2105  case X86::JA_1: case X86::JAE_1: case X86::JB_1: case X86::JBE_1:
2106  case X86::JE_1: case X86::JNE_1: case X86::JP_1: case X86::JNP_1:
2107  case X86::CMOVA16rr: case X86::CMOVA16rm:
2108  case X86::CMOVA32rr: case X86::CMOVA32rm:
2109  case X86::CMOVA64rr: case X86::CMOVA64rm:
2110  case X86::CMOVAE16rr: case X86::CMOVAE16rm:
2111  case X86::CMOVAE32rr: case X86::CMOVAE32rm:
2112  case X86::CMOVAE64rr: case X86::CMOVAE64rm:
2113  case X86::CMOVB16rr: case X86::CMOVB16rm:
2114  case X86::CMOVB32rr: case X86::CMOVB32rm:
2115  case X86::CMOVB64rr: case X86::CMOVB64rm:
2116  case X86::CMOVBE16rr: case X86::CMOVBE16rm:
2117  case X86::CMOVBE32rr: case X86::CMOVBE32rm:
2118  case X86::CMOVBE64rr: case X86::CMOVBE64rm:
2119  case X86::CMOVE16rr: case X86::CMOVE16rm:
2120  case X86::CMOVE32rr: case X86::CMOVE32rm:
2121  case X86::CMOVE64rr: case X86::CMOVE64rm:
2122  case X86::CMOVNE16rr: case X86::CMOVNE16rm:
2123  case X86::CMOVNE32rr: case X86::CMOVNE32rm:
2124  case X86::CMOVNE64rr: case X86::CMOVNE64rm:
2125  case X86::CMOVNP16rr: case X86::CMOVNP16rm:
2126  case X86::CMOVNP32rr: case X86::CMOVNP32rm:
2127  case X86::CMOVNP64rr: case X86::CMOVNP64rm:
2128  case X86::CMOVP16rr: case X86::CMOVP16rm:
2129  case X86::CMOVP32rr: case X86::CMOVP32rm:
2130  case X86::CMOVP64rr: case X86::CMOVP64rm:
2131  continue;
2132  // Anything else: assume conservatively.
2133  default: return false;
2134  }
2135  }
2136  }
2137  return true;
2138 }
2139 
2140 /// Test whether the given node which sets flags has any uses which require the
2141 /// CF flag to be accurate.
2142 static bool hasNoCarryFlagUses(SDNode *N) {
2143  // Examine each user of the node.
2144  for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end(); UI != UE;
2145  ++UI) {
2146  // Only check things that use the flags.
2147  if (UI.getUse().getResNo() != 1)
2148  continue;
2149  // Only examine CopyToReg uses.
2150  if (UI->getOpcode() != ISD::CopyToReg)
2151  return false;
2152  // Only examine CopyToReg uses that copy to EFLAGS.
2153  if (cast<RegisterSDNode>(UI->getOperand(1))->getReg() != X86::EFLAGS)
2154  return false;
2155  // Examine each user of the CopyToReg use.
2156  for (SDNode::use_iterator FlagUI = UI->use_begin(), FlagUE = UI->use_end();
2157  FlagUI != FlagUE; ++FlagUI) {
2158  // Only examine the Flag result.
2159  if (FlagUI.getUse().getResNo() != 1)
2160  continue;
2161  // Anything unusual: assume conservatively.
2162  if (!FlagUI->isMachineOpcode())
2163  return false;
2164  // Examine the opcode of the user.
2165  switch (FlagUI->getMachineOpcode()) {
2166  // Comparisons which don't examine the CF flag.
2167  case X86::SETOr: case X86::SETNOr: case X86::SETEr: case X86::SETNEr:
2168  case X86::SETSr: case X86::SETNSr: case X86::SETPr: case X86::SETNPr:
2169  case X86::SETLr: case X86::SETGEr: case X86::SETLEr: case X86::SETGr:
2170  case X86::JO_1: case X86::JNO_1: case X86::JE_1: case X86::JNE_1:
2171  case X86::JS_1: case X86::JNS_1: case X86::JP_1: case X86::JNP_1:
2172  case X86::JL_1: case X86::JGE_1: case X86::JLE_1: case X86::JG_1:
2173  case X86::CMOVO16rr: case X86::CMOVO32rr: case X86::CMOVO64rr:
2174  case X86::CMOVO16rm: case X86::CMOVO32rm: case X86::CMOVO64rm:
2175  case X86::CMOVNO16rr: case X86::CMOVNO32rr: case X86::CMOVNO64rr:
2176  case X86::CMOVNO16rm: case X86::CMOVNO32rm: case X86::CMOVNO64rm:
2177  case X86::CMOVE16rr: case X86::CMOVE32rr: case X86::CMOVE64rr:
2178  case X86::CMOVE16rm: case X86::CMOVE32rm: case X86::CMOVE64rm:
2179  case X86::CMOVNE16rr: case X86::CMOVNE32rr: case X86::CMOVNE64rr:
2180  case X86::CMOVNE16rm: case X86::CMOVNE32rm: case X86::CMOVNE64rm:
2181  case X86::CMOVS16rr: case X86::CMOVS32rr: case X86::CMOVS64rr:
2182  case X86::CMOVS16rm: case X86::CMOVS32rm: case X86::CMOVS64rm:
2183  case X86::CMOVNS16rr: case X86::CMOVNS32rr: case X86::CMOVNS64rr:
2184  case X86::CMOVNS16rm: case X86::CMOVNS32rm: case X86::CMOVNS64rm:
2185  case X86::CMOVP16rr: case X86::CMOVP32rr: case X86::CMOVP64rr:
2186  case X86::CMOVP16rm: case X86::CMOVP32rm: case X86::CMOVP64rm:
2187  case X86::CMOVNP16rr: case X86::CMOVNP32rr: case X86::CMOVNP64rr:
2188  case X86::CMOVNP16rm: case X86::CMOVNP32rm: case X86::CMOVNP64rm:
2189  case X86::CMOVL16rr: case X86::CMOVL32rr: case X86::CMOVL64rr:
2190  case X86::CMOVL16rm: case X86::CMOVL32rm: case X86::CMOVL64rm:
2191  case X86::CMOVGE16rr: case X86::CMOVGE32rr: case X86::CMOVGE64rr:
2192  case X86::CMOVGE16rm: case X86::CMOVGE32rm: case X86::CMOVGE64rm:
2193  case X86::CMOVLE16rr: case X86::CMOVLE32rr: case X86::CMOVLE64rr:
2194  case X86::CMOVLE16rm: case X86::CMOVLE32rm: case X86::CMOVLE64rm:
2195  case X86::CMOVG16rr: case X86::CMOVG32rr: case X86::CMOVG64rr:
2196  case X86::CMOVG16rm: case X86::CMOVG32rm: case X86::CMOVG64rm:
2197  continue;
2198  // Anything else: assume conservatively.
2199  default:
2200  return false;
2201  }
2202  }
2203  }
2204  return true;
2205 }
2206 
2207 /// Check whether or not the chain ending in StoreNode is suitable for doing
2208 /// the {load; op; store} to modify transformation.
2210  SDValue StoredVal, SelectionDAG *CurDAG,
2211  LoadSDNode *&LoadNode,
2212  SDValue &InputChain) {
2213  // is the stored value result 0 of the load?
2214  if (StoredVal.getResNo() != 0) return false;
2215 
2216  // are there other uses of the loaded value than the inc or dec?
2217  if (!StoredVal.getNode()->hasNUsesOfValue(1, 0)) return false;
2218 
2219  // is the store non-extending and non-indexed?
2220  if (!ISD::isNormalStore(StoreNode) || StoreNode->isNonTemporal())
2221  return false;
2222 
2223  SDValue Load = StoredVal->getOperand(0);
2224  // Is the stored value a non-extending and non-indexed load?
2225  if (!ISD::isNormalLoad(Load.getNode())) return false;
2226 
2227  // Return LoadNode by reference.
2228  LoadNode = cast<LoadSDNode>(Load);
2229 
2230  // Is store the only read of the loaded value?
2231  if (!Load.hasOneUse())
2232  return false;
2233 
2234  // Is the address of the store the same as the load?
2235  if (LoadNode->getBasePtr() != StoreNode->getBasePtr() ||
2236  LoadNode->getOffset() != StoreNode->getOffset())
2237  return false;
2238 
2239  bool FoundLoad = false;
2240  SmallVector<SDValue, 4> ChainOps;
2241  SmallVector<const SDNode *, 4> LoopWorklist;
2243  const unsigned int Max = 1024;
2244 
2245  // Visualization of Load-Op-Store fusion:
2246  // -------------------------
2247  // Legend:
2248  // *-lines = Chain operand dependencies.
2249  // |-lines = Normal operand dependencies.
2250  // Dependencies flow down and right. n-suffix references multiple nodes.
2251  //
2252  // C Xn C
2253  // * * *
2254  // * * *
2255  // Xn A-LD Yn TF Yn
2256  // * * \ | * |
2257  // * * \ | * |
2258  // * * \ | => A--LD_OP_ST
2259  // * * \| \
2260  // TF OP \
2261  // * | \ Zn
2262  // * | \
2263  // A-ST Zn
2264  //
2265 
2266  // This merge induced dependences from: #1: Xn -> LD, OP, Zn
2267  // #2: Yn -> LD
2268  // #3: ST -> Zn
2269 
2270  // Ensure the transform is safe by checking for the dual
2271  // dependencies to make sure we do not induce a loop.
2272 
2273  // As LD is a predecessor to both OP and ST we can do this by checking:
2274  // a). if LD is a predecessor to a member of Xn or Yn.
2275  // b). if a Zn is a predecessor to ST.
2276 
2277  // However, (b) can only occur through being a chain predecessor to
2278  // ST, which is the same as Zn being a member or predecessor of Xn,
2279  // which is a subset of LD being a predecessor of Xn. So it's
2280  // subsumed by check (a).
2281 
2282  SDValue Chain = StoreNode->getChain();
2283 
2284  // Gather X elements in ChainOps.
2285  if (Chain == Load.getValue(1)) {
2286  FoundLoad = true;
2287  ChainOps.push_back(Load.getOperand(0));
2288  } else if (Chain.getOpcode() == ISD::TokenFactor) {
2289  for (unsigned i = 0, e = Chain.getNumOperands(); i != e; ++i) {
2290  SDValue Op = Chain.getOperand(i);
2291  if (Op == Load.getValue(1)) {
2292  FoundLoad = true;
2293  // Drop Load, but keep its chain. No cycle check necessary.
2294  ChainOps.push_back(Load.getOperand(0));
2295  continue;
2296  }
2297  LoopWorklist.push_back(Op.getNode());
2298  ChainOps.push_back(Op);
2299  }
2300  }
2301 
2302  if (!FoundLoad)
2303  return false;
2304 
2305  // Worklist is currently Xn. Add Yn to worklist.
2306  for (SDValue Op : StoredVal->ops())
2307  if (Op.getNode() != LoadNode)
2308  LoopWorklist.push_back(Op.getNode());
2309 
2310  // Check (a) if Load is a predecessor to Xn + Yn
2311  if (SDNode::hasPredecessorHelper(Load.getNode(), Visited, LoopWorklist, Max,
2312  true))
2313  return false;
2314 
2315  InputChain =
2316  CurDAG->getNode(ISD::TokenFactor, SDLoc(Chain), MVT::Other, ChainOps);
2317  return true;
2318 }
2319 
2320 // Change a chain of {load; op; store} of the same value into a simple op
2321 // through memory of that value, if the uses of the modified value and its
2322 // address are suitable.
2323 //
2324 // The tablegen pattern memory operand pattern is currently not able to match
2325 // the case where the EFLAGS on the original operation are used.
2326 //
2327 // To move this to tablegen, we'll need to improve tablegen to allow flags to
2328 // be transferred from a node in the pattern to the result node, probably with
2329 // a new keyword. For example, we have this
2330 // def DEC64m : RI<0xFF, MRM1m, (outs), (ins i64mem:$dst), "dec{q}\t$dst",
2331 // [(store (add (loadi64 addr:$dst), -1), addr:$dst),
2332 // (implicit EFLAGS)]>;
2333 // but maybe need something like this
2334 // def DEC64m : RI<0xFF, MRM1m, (outs), (ins i64mem:$dst), "dec{q}\t$dst",
2335 // [(store (add (loadi64 addr:$dst), -1), addr:$dst),
2336 // (transferrable EFLAGS)]>;
2337 //
2338 // Until then, we manually fold these and instruction select the operation
2339 // here.
2340 bool X86DAGToDAGISel::foldLoadStoreIntoMemOperand(SDNode *Node) {
2341  StoreSDNode *StoreNode = cast<StoreSDNode>(Node);
2342  SDValue StoredVal = StoreNode->getOperand(1);
2343  unsigned Opc = StoredVal->getOpcode();
2344 
2345  // Before we try to select anything, make sure this is memory operand size
2346  // and opcode we can handle. Note that this must match the code below that
2347  // actually lowers the opcodes.
2348  EVT MemVT = StoreNode->getMemoryVT();
2349  if (MemVT != MVT::i64 && MemVT != MVT::i32 && MemVT != MVT::i16 &&
2350  MemVT != MVT::i8)
2351  return false;
2352  switch (Opc) {
2353  default:
2354  return false;
2355  case X86ISD::INC:
2356  case X86ISD::DEC:
2357  case X86ISD::ADD:
2358  case X86ISD::ADC:
2359  case X86ISD::SUB:
2360  case X86ISD::SBB:
2361  case X86ISD::AND:
2362  case X86ISD::OR:
2363  case X86ISD::XOR:
2364  break;
2365  }
2366 
2367  LoadSDNode *LoadNode = nullptr;
2368  SDValue InputChain;
2369  if (!isFusableLoadOpStorePattern(StoreNode, StoredVal, CurDAG, LoadNode,
2370  InputChain))
2371  return false;
2372 
2373  SDValue Base, Scale, Index, Disp, Segment;
2374  if (!selectAddr(LoadNode, LoadNode->getBasePtr(), Base, Scale, Index, Disp,
2375  Segment))
2376  return false;
2377 
2378  auto SelectOpcode = [&](unsigned Opc64, unsigned Opc32, unsigned Opc16,
2379  unsigned Opc8) {
2380  switch (MemVT.getSimpleVT().SimpleTy) {
2381  case MVT::i64:
2382  return Opc64;
2383  case MVT::i32:
2384  return Opc32;
2385  case MVT::i16:
2386  return Opc16;
2387  case MVT::i8:
2388  return Opc8;
2389  default:
2390  llvm_unreachable("Invalid size!");
2391  }
2392  };
2393 
2395  switch (Opc) {
2396  case X86ISD::INC:
2397  case X86ISD::DEC: {
2398  unsigned NewOpc =
2399  Opc == X86ISD::INC
2400  ? SelectOpcode(X86::INC64m, X86::INC32m, X86::INC16m, X86::INC8m)
2401  : SelectOpcode(X86::DEC64m, X86::DEC32m, X86::DEC16m, X86::DEC8m);
2402  const SDValue Ops[] = {Base, Scale, Index, Disp, Segment, InputChain};
2403  Result =
2404  CurDAG->getMachineNode(NewOpc, SDLoc(Node), MVT::i32, MVT::Other, Ops);
2405  break;
2406  }
2407  case X86ISD::ADD:
2408  case X86ISD::ADC:
2409  case X86ISD::SUB:
2410  case X86ISD::SBB:
2411  case X86ISD::AND:
2412  case X86ISD::OR:
2413  case X86ISD::XOR: {
2414  auto SelectRegOpcode = [SelectOpcode](unsigned Opc) {
2415  switch (Opc) {
2416  case X86ISD::ADD:
2417  return SelectOpcode(X86::ADD64mr, X86::ADD32mr, X86::ADD16mr,
2418  X86::ADD8mr);
2419  case X86ISD::ADC:
2420  return SelectOpcode(X86::ADC64mr, X86::ADC32mr, X86::ADC16mr,
2421  X86::ADC8mr);
2422  case X86ISD::SUB:
2423  return SelectOpcode(X86::SUB64mr, X86::SUB32mr, X86::SUB16mr,
2424  X86::SUB8mr);
2425  case X86ISD::SBB:
2426  return SelectOpcode(X86::SBB64mr, X86::SBB32mr, X86::SBB16mr,
2427  X86::SBB8mr);
2428  case X86ISD::AND:
2429  return SelectOpcode(X86::AND64mr, X86::AND32mr, X86::AND16mr,
2430  X86::AND8mr);
2431  case X86ISD::OR:
2432  return SelectOpcode(X86::OR64mr, X86::OR32mr, X86::OR16mr, X86::OR8mr);
2433  case X86ISD::XOR:
2434  return SelectOpcode(X86::XOR64mr, X86::XOR32mr, X86::XOR16mr,
2435  X86::XOR8mr);
2436  default:
2437  llvm_unreachable("Invalid opcode!");
2438  }
2439  };
2440  auto SelectImm8Opcode = [SelectOpcode](unsigned Opc) {
2441  switch (Opc) {
2442  case X86ISD::ADD:
2443  return SelectOpcode(X86::ADD64mi8, X86::ADD32mi8, X86::ADD16mi8, 0);
2444  case X86ISD::ADC:
2445  return SelectOpcode(X86::ADC64mi8, X86::ADC32mi8, X86::ADC16mi8, 0);
2446  case X86ISD::SUB:
2447  return SelectOpcode(X86::SUB64mi8, X86::SUB32mi8, X86::SUB16mi8, 0);
2448  case X86ISD::SBB:
2449  return SelectOpcode(X86::SBB64mi8, X86::SBB32mi8, X86::SBB16mi8, 0);
2450  case X86ISD::AND:
2451  return SelectOpcode(X86::AND64mi8, X86::AND32mi8, X86::AND16mi8, 0);
2452  case X86ISD::OR:
2453  return SelectOpcode(X86::OR64mi8, X86::OR32mi8, X86::OR16mi8, 0);
2454  case X86ISD::XOR:
2455  return SelectOpcode(X86::XOR64mi8, X86::XOR32mi8, X86::XOR16mi8, 0);
2456  default:
2457  llvm_unreachable("Invalid opcode!");
2458  }
2459  };
2460  auto SelectImmOpcode = [SelectOpcode](unsigned Opc) {
2461  switch (Opc) {
2462  case X86ISD::ADD:
2463  return SelectOpcode(X86::ADD64mi32, X86::ADD32mi, X86::ADD16mi,
2464  X86::ADD8mi);
2465  case X86ISD::ADC:
2466  return SelectOpcode(X86::ADC64mi32, X86::ADC32mi, X86::ADC16mi,
2467  X86::ADC8mi);
2468  case X86ISD::SUB:
2469  return SelectOpcode(X86::SUB64mi32, X86::SUB32mi, X86::SUB16mi,
2470  X86::SUB8mi);
2471  case X86ISD::SBB:
2472  return SelectOpcode(X86::SBB64mi32, X86::SBB32mi, X86::SBB16mi,
2473  X86::SBB8mi);
2474  case X86ISD::AND:
2475  return SelectOpcode(X86::AND64mi32, X86::AND32mi, X86::AND16mi,
2476  X86::AND8mi);
2477  case X86ISD::OR:
2478  return SelectOpcode(X86::OR64mi32, X86::OR32mi, X86::OR16mi,
2479  X86::OR8mi);
2480  case X86ISD::XOR:
2481  return SelectOpcode(X86::XOR64mi32, X86::XOR32mi, X86::XOR16mi,
2482  X86::XOR8mi);
2483  default:
2484  llvm_unreachable("Invalid opcode!");
2485  }
2486  };
2487 
2488  unsigned NewOpc = SelectRegOpcode(Opc);
2489  SDValue Operand = StoredVal->getOperand(1);
2490 
2491  // See if the operand is a constant that we can fold into an immediate
2492  // operand.
2493  if (auto *OperandC = dyn_cast<ConstantSDNode>(Operand)) {
2494  auto OperandV = OperandC->getAPIntValue();
2495 
2496  // Check if we can shrink the operand enough to fit in an immediate (or
2497  // fit into a smaller immediate) by negating it and switching the
2498  // operation.
2499  if ((Opc == X86ISD::ADD || Opc == X86ISD::SUB) &&
2500  ((MemVT != MVT::i8 && OperandV.getMinSignedBits() > 8 &&
2501  (-OperandV).getMinSignedBits() <= 8) ||
2502  (MemVT == MVT::i64 && OperandV.getMinSignedBits() > 32 &&
2503  (-OperandV).getMinSignedBits() <= 32)) &&
2504  hasNoCarryFlagUses(StoredVal.getNode())) {
2505  OperandV = -OperandV;
2506  Opc = Opc == X86ISD::ADD ? X86ISD::SUB : X86ISD::ADD;
2507  }
2508 
2509  // First try to fit this into an Imm8 operand. If it doesn't fit, then try
2510  // the larger immediate operand.
2511  if (MemVT != MVT::i8 && OperandV.getMinSignedBits() <= 8) {
2512  Operand = CurDAG->getTargetConstant(OperandV, SDLoc(Node), MemVT);
2513  NewOpc = SelectImm8Opcode(Opc);
2514  } else if (OperandV.getActiveBits() <= MemVT.getSizeInBits() &&
2515  (MemVT != MVT::i64 || OperandV.getMinSignedBits() <= 32)) {
2516  Operand = CurDAG->getTargetConstant(OperandV, SDLoc(Node), MemVT);
2517  NewOpc = SelectImmOpcode(Opc);
2518  }
2519  }
2520 
2521  if (Opc == X86ISD::ADC || Opc == X86ISD::SBB) {
2522  SDValue CopyTo =
2523  CurDAG->getCopyToReg(InputChain, SDLoc(Node), X86::EFLAGS,
2524  StoredVal.getOperand(2), SDValue());
2525 
2526  const SDValue Ops[] = {Base, Scale, Index, Disp,
2527  Segment, Operand, CopyTo, CopyTo.getValue(1)};
2528  Result = CurDAG->getMachineNode(NewOpc, SDLoc(Node), MVT::i32, MVT::Other,
2529  Ops);
2530  } else {
2531  const SDValue Ops[] = {Base, Scale, Index, Disp,
2532  Segment, Operand, InputChain};
2533  Result = CurDAG->getMachineNode(NewOpc, SDLoc(Node), MVT::i32, MVT::Other,
2534  Ops);
2535  }
2536  break;
2537  }
2538  default:
2539  llvm_unreachable("Invalid opcode!");
2540  }
2541 
2542  MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(2);
2543  MemOp[0] = StoreNode->getMemOperand();
2544  MemOp[1] = LoadNode->getMemOperand();
2545  Result->setMemRefs(MemOp, MemOp + 2);
2546 
2547  // Update Load Chain uses as well.
2548  ReplaceUses(SDValue(LoadNode, 1), SDValue(Result, 1));
2549  ReplaceUses(SDValue(StoreNode, 0), SDValue(Result, 1));
2550  ReplaceUses(SDValue(StoredVal.getNode(), 1), SDValue(Result, 0));
2551  CurDAG->RemoveDeadNode(Node);
2552  return true;
2553 }
2554 
2555 // See if this is an (X >> C1) & C2 that we can match to BEXTR/BEXTRI.
2556 bool X86DAGToDAGISel::matchBEXTRFromAnd(SDNode *Node) {
2557  MVT NVT = Node->getSimpleValueType(0);
2558  SDLoc dl(Node);
2559 
2560  SDValue N0 = Node->getOperand(0);
2561  SDValue N1 = Node->getOperand(1);
2562 
2563  if (!Subtarget->hasBMI() && !Subtarget->hasTBM())
2564  return false;
2565 
2566  // Must have a shift right.
2567  if (N0->getOpcode() != ISD::SRL && N0->getOpcode() != ISD::SRA)
2568  return false;
2569 
2570  // Shift can't have additional users.
2571  if (!N0->hasOneUse())
2572  return false;
2573 
2574  // Only supported for 32 and 64 bits.
2575  if (NVT != MVT::i32 && NVT != MVT::i64)
2576  return false;
2577 
2578  // Shift amount and RHS of and must be constant.
2579  ConstantSDNode *MaskCst = dyn_cast<ConstantSDNode>(N1);
2580  ConstantSDNode *ShiftCst = dyn_cast<ConstantSDNode>(N0->getOperand(1));
2581  if (!MaskCst || !ShiftCst)
2582  return false;
2583 
2584  // And RHS must be a mask.
2585  uint64_t Mask = MaskCst->getZExtValue();
2586  if (!isMask_64(Mask))
2587  return false;
2588 
2589  uint64_t Shift = ShiftCst->getZExtValue();
2590  uint64_t MaskSize = countPopulation(Mask);
2591 
2592  // Don't interfere with something that can be handled by extracting AH.
2593  // TODO: If we are able to fold a load, BEXTR might still be better than AH.
2594  if (Shift == 8 && MaskSize == 8)
2595  return false;
2596 
2597  // Make sure we are only using bits that were in the original value, not
2598  // shifted in.
2599  if (Shift + MaskSize > NVT.getSizeInBits())
2600  return false;
2601 
2602  // Create a BEXTR node and run it through selection.
2603  SDValue C = CurDAG->getConstant(Shift | (MaskSize << 8), dl, NVT);
2604  SDValue New = CurDAG->getNode(X86ISD::BEXTR, dl, NVT,
2605  N0->getOperand(0), C);
2606  ReplaceNode(Node, New.getNode());
2607  SelectCode(New.getNode());
2608  return true;
2609 }
2610 
2611 // Emit a PCMISTR(I/M) instruction.
2612 MachineSDNode *X86DAGToDAGISel::emitPCMPISTR(unsigned ROpc, unsigned MOpc,
2613  bool MayFoldLoad, const SDLoc &dl,
2614  MVT VT, SDNode *Node) {
2615  SDValue N0 = Node->getOperand(0);
2616  SDValue N1 = Node->getOperand(1);
2617  SDValue Imm = Node->getOperand(2);
2618  const ConstantInt *Val = cast<ConstantSDNode>(Imm)->getConstantIntValue();
2619  Imm = CurDAG->getTargetConstant(*Val, SDLoc(Node), Imm.getValueType());
2620 
2621  // If there is a load, it will be behind a bitcast. We don't need to check
2622  // alignment on this load.
2623  SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
2624  if (MayFoldLoad && N1->getOpcode() == ISD::BITCAST && N1->hasOneUse() &&
2625  tryFoldVecLoad(Node, N1.getNode(), N1.getOperand(0), Tmp0, Tmp1, Tmp2,
2626  Tmp3, Tmp4)) {
2627  SDValue Load = N1.getOperand(0);
2628  SDValue Ops[] = { N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Imm,
2629  Load.getOperand(0) };
2630  SDVTList VTs = CurDAG->getVTList(VT, MVT::i32, MVT::Other);
2631  MachineSDNode *CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops);
2632  // Update the chain.
2633  ReplaceUses(Load.getValue(1), SDValue(CNode, 2));
2634  // Record the mem-refs
2635  MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
2636  MemOp[0] = cast<LoadSDNode>(Load)->getMemOperand();
2637  CNode->setMemRefs(MemOp, MemOp + 1);
2638  return CNode;
2639  }
2640 
2641  SDValue Ops[] = { N0, N1, Imm };
2642  SDVTList VTs = CurDAG->getVTList(VT, MVT::i32);
2643  MachineSDNode *CNode = CurDAG->getMachineNode(ROpc, dl, VTs, Ops);
2644  return CNode;
2645 }
2646 
2647 // Emit a PCMESTR(I/M) instruction. Also return the Glue result in case we need
2648 // to emit a second instruction after this one. This is needed since we have two
2649 // copyToReg nodes glued before this and we need to continue that glue through.
2650 MachineSDNode *X86DAGToDAGISel::emitPCMPESTR(unsigned ROpc, unsigned MOpc,
2651  bool MayFoldLoad, const SDLoc &dl,
2652  MVT VT, SDNode *Node,
2653  SDValue &InFlag) {
2654  SDValue N0 = Node->getOperand(0);
2655  SDValue N2 = Node->getOperand(2);
2656  SDValue Imm = Node->getOperand(4);
2657  const ConstantInt *Val = cast<ConstantSDNode>(Imm)->getConstantIntValue();
2658  Imm = CurDAG->getTargetConstant(*Val, SDLoc(Node), Imm.getValueType());
2659 
2660  // If there is a load, it will be behind a bitcast. We don't need to check
2661  // alignment on this load.
2662  SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
2663  if (MayFoldLoad && N2->getOpcode() == ISD::BITCAST && N2->hasOneUse() &&
2664  tryFoldVecLoad(Node, N2.getNode(), N2.getOperand(0), Tmp0, Tmp1, Tmp2,
2665  Tmp3, Tmp4)) {
2666  SDValue Load = N2.getOperand(0);
2667  SDValue Ops[] = { N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Imm,
2668  Load.getOperand(0), InFlag };
2669  SDVTList VTs = CurDAG->getVTList(VT, MVT::i32, MVT::Other, MVT::Glue);
2670  MachineSDNode *CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops);
2671  InFlag = SDValue(CNode, 3);
2672  // Update the chain.
2673  ReplaceUses(Load.getValue(1), SDValue(CNode, 2));
2674  // Record the mem-refs
2675  MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
2676  MemOp[0] = cast<LoadSDNode>(Load)->getMemOperand();
2677  CNode->setMemRefs(MemOp, MemOp + 1);
2678  return CNode;
2679  }
2680 
2681  SDValue Ops[] = { N0, N2, Imm, InFlag };
2682  SDVTList VTs = CurDAG->getVTList(VT, MVT::i32, MVT::Glue);
2683  MachineSDNode *CNode = CurDAG->getMachineNode(ROpc, dl, VTs, Ops);
2684  InFlag = SDValue(CNode, 2);
2685  return CNode;
2686 }
2687 
2688 /// If the high bits of an 'and' operand are known zero, try setting the
2689 /// high bits of an 'and' constant operand to produce a smaller encoding by
2690 /// creating a small, sign-extended negative immediate rather than a large
2691 /// positive one. This reverses a transform in SimplifyDemandedBits that
2692 /// shrinks mask constants by clearing bits. There is also a possibility that
2693 /// the 'and' mask can be made -1, so the 'and' itself is unnecessary. In that
2694 /// case, just replace the 'and'. Return 'true' if the node is replaced.
2695 bool X86DAGToDAGISel::shrinkAndImmediate(SDNode *And) {
2696  // i8 is unshrinkable, i16 should be promoted to i32, and vector ops don't
2697  // have immediate operands.
2698  MVT VT = And->getSimpleValueType(0);
2699  if (VT != MVT::i32 && VT != MVT::i64)
2700  return false;
2701 
2702  auto *And1C = dyn_cast<ConstantSDNode>(And->getOperand(1));
2703  if (!And1C)
2704  return false;
2705 
2706  // Bail out if the mask constant is already negative. It's can't shrink more.
2707  // If the upper 32 bits of a 64 bit mask are all zeros, we have special isel
2708  // patterns to use a 32-bit and instead of a 64-bit and by relying on the
2709  // implicit zeroing of 32 bit ops. So we should check if the lower 32 bits
2710  // are negative too.
2711  APInt MaskVal = And1C->getAPIntValue();
2712  unsigned MaskLZ = MaskVal.countLeadingZeros();
2713  if (!MaskLZ || (VT == MVT::i64 && MaskLZ == 32))
2714  return false;
2715 
2716  // Don't extend into the upper 32 bits of a 64 bit mask.
2717  if (VT == MVT::i64 && MaskLZ >= 32) {
2718  MaskLZ -= 32;
2719  MaskVal = MaskVal.trunc(32);
2720  }
2721 
2722  SDValue And0 = And->getOperand(0);
2723  APInt HighZeros = APInt::getHighBitsSet(MaskVal.getBitWidth(), MaskLZ);
2724  APInt NegMaskVal = MaskVal | HighZeros;
2725 
2726  // If a negative constant would not allow a smaller encoding, there's no need
2727  // to continue. Only change the constant when we know it's a win.
2728  unsigned MinWidth = NegMaskVal.getMinSignedBits();
2729  if (MinWidth > 32 || (MinWidth > 8 && MaskVal.getMinSignedBits() <= 32))
2730  return false;
2731 
2732  // Extend masks if we truncated above.
2733  if (VT == MVT::i64 && MaskVal.getBitWidth() < 64) {
2734  NegMaskVal = NegMaskVal.zext(64);
2735  HighZeros = HighZeros.zext(64);
2736  }
2737 
2738  // The variable operand must be all zeros in the top bits to allow using the
2739  // new, negative constant as the mask.
2740  if (!CurDAG->MaskedValueIsZero(And0, HighZeros))
2741  return false;
2742 
2743  // Check if the mask is -1. In that case, this is an unnecessary instruction
2744  // that escaped earlier analysis.
2745  if (NegMaskVal.isAllOnesValue()) {
2746  ReplaceNode(And, And0.getNode());
2747  return true;
2748  }
2749 
2750  // A negative mask allows a smaller encoding. Create a new 'and' node.
2751  SDValue NewMask = CurDAG->getConstant(NegMaskVal, SDLoc(And), VT);
2752  SDValue NewAnd = CurDAG->getNode(ISD::AND, SDLoc(And), VT, And0, NewMask);
2753  ReplaceNode(And, NewAnd.getNode());
2754  SelectCode(NewAnd.getNode());
2755  return true;
2756 }
2757 
2758 void X86DAGToDAGISel::Select(SDNode *Node) {
2759  MVT NVT = Node->getSimpleValueType(0);
2760  unsigned Opcode = Node->getOpcode();
2761  SDLoc dl(Node);
2762 
2763  if (Node->isMachineOpcode()) {
2764  LLVM_DEBUG(dbgs() << "== "; Node->dump(CurDAG); dbgs() << '\n');
2765  Node->setNodeId(-1);
2766  return; // Already selected.
2767  }
2768 
2769  switch (Opcode) {
2770  default: break;
2771  case ISD::BRIND: {
2772  if (Subtarget->isTargetNaCl())
2773  // NaCl has its own pass where jmp %r32 are converted to jmp %r64. We
2774  // leave the instruction alone.
2775  break;
2776  if (Subtarget->isTarget64BitILP32()) {
2777  // Converts a 32-bit register to a 64-bit, zero-extended version of
2778  // it. This is needed because x86-64 can do many things, but jmp %r32
2779  // ain't one of them.
2780  const SDValue &Target = Node->getOperand(1);
2782  SDValue ZextTarget = CurDAG->getZExtOrTrunc(Target, dl, EVT(MVT::i64));
2783  SDValue Brind = CurDAG->getNode(ISD::BRIND, dl, MVT::Other,
2784  Node->getOperand(0), ZextTarget);
2785  ReplaceNode(Node, Brind.getNode());
2786  SelectCode(ZextTarget.getNode());
2787  SelectCode(Brind.getNode());
2788  return;
2789  }
2790  break;
2791  }
2792  case X86ISD::GlobalBaseReg:
2793  ReplaceNode(Node, getGlobalBaseReg());
2794  return;
2795 
2796  case X86ISD::SELECT:
2797  case X86ISD::SHRUNKBLEND: {
2798  // SHRUNKBLEND selects like a regular VSELECT. Same with X86ISD::SELECT.
2799  SDValue VSelect = CurDAG->getNode(
2800  ISD::VSELECT, SDLoc(Node), Node->getValueType(0), Node->getOperand(0),
2801  Node->getOperand(1), Node->getOperand(2));
2802  ReplaceNode(Node, VSelect.getNode());
2803  SelectCode(VSelect.getNode());
2804  // We already called ReplaceUses.
2805  return;
2806  }
2807 
2808  case ISD::AND:
2809  if (matchBEXTRFromAnd(Node))
2810  return;
2811  if (shrinkAndImmediate(Node))
2812  return;
2813 
2815  case ISD::OR:
2816  case ISD::XOR: {
2817 
2818  // For operations of the form (x << C1) op C2, check if we can use a smaller
2819  // encoding for C2 by transforming it into (x op (C2>>C1)) << C1.
2820  SDValue N0 = Node->getOperand(0);
2821  SDValue N1 = Node->getOperand(1);
2822 
2823  if (N0->getOpcode() != ISD::SHL || !N0->hasOneUse())
2824  break;
2825 
2826  // i8 is unshrinkable, i16 should be promoted to i32.
2827  if (NVT != MVT::i32 && NVT != MVT::i64)
2828  break;
2829 
2831  ConstantSDNode *ShlCst = dyn_cast<ConstantSDNode>(N0->getOperand(1));
2832  if (!Cst || !ShlCst)
2833  break;
2834 
2835  int64_t Val = Cst->getSExtValue();
2836  uint64_t ShlVal = ShlCst->getZExtValue();
2837 
2838  // Make sure that we don't change the operation by removing bits.
2839  // This only matters for OR and XOR, AND is unaffected.
2840  uint64_t RemovedBitsMask = (1ULL << ShlVal) - 1;
2841  if (Opcode != ISD::AND && (Val & RemovedBitsMask) != 0)
2842  break;
2843 
2844  unsigned ShlOp, AddOp, Op;
2845  MVT CstVT = NVT;
2846 
2847  // Check the minimum bitwidth for the new constant.
2848  // TODO: AND32ri is the same as AND64ri32 with zext imm.
2849  // TODO: MOV32ri+OR64r is cheaper than MOV64ri64+OR64rr
2850  // TODO: Using 16 and 8 bit operations is also possible for or32 & xor32.
2851  if (!isInt<8>(Val) && isInt<8>(Val >> ShlVal))
2852  CstVT = MVT::i8;
2853  else if (!isInt<32>(Val) && isInt<32>(Val >> ShlVal))
2854  CstVT = MVT::i32;
2855 
2856  // Bail if there is no smaller encoding.
2857  if (NVT == CstVT)
2858  break;
2859 
2860  switch (NVT.SimpleTy) {
2861  default: llvm_unreachable("Unsupported VT!");
2862  case MVT::i32:
2863  assert(CstVT == MVT::i8);
2864  ShlOp = X86::SHL32ri;
2865  AddOp = X86::ADD32rr;
2866 
2867  switch (Opcode) {
2868  default: llvm_unreachable("Impossible opcode");
2869  case ISD::AND: Op = X86::AND32ri8; break;
2870  case ISD::OR: Op = X86::OR32ri8; break;
2871  case ISD::XOR: Op = X86::XOR32ri8; break;
2872  }
2873  break;
2874  case MVT::i64:
2875  assert(CstVT == MVT::i8 || CstVT == MVT::i32);
2876  ShlOp = X86::SHL64ri;
2877  AddOp = X86::ADD64rr;
2878 
2879  switch (Opcode) {
2880  default: llvm_unreachable("Impossible opcode");
2881  case ISD::AND: Op = CstVT==MVT::i8? X86::AND64ri8 : X86::AND64ri32; break;
2882  case ISD::OR: Op = CstVT==MVT::i8? X86::OR64ri8 : X86::OR64ri32; break;
2883  case ISD::XOR: Op = CstVT==MVT::i8? X86::XOR64ri8 : X86::XOR64ri32; break;
2884  }
2885  break;
2886  }
2887 
2888  // Emit the smaller op and the shift.
2889  SDValue NewCst = CurDAG->getTargetConstant(Val >> ShlVal, dl, CstVT);
2890  SDNode *New = CurDAG->getMachineNode(Op, dl, NVT, N0->getOperand(0),NewCst);
2891  if (ShlVal == 1)
2892  CurDAG->SelectNodeTo(Node, AddOp, NVT, SDValue(New, 0),
2893  SDValue(New, 0));
2894  else
2895  CurDAG->SelectNodeTo(Node, ShlOp, NVT, SDValue(New, 0),
2896  getI8Imm(ShlVal, dl));
2897  return;
2898  }
2899  case X86ISD::UMUL8:
2900  case X86ISD::SMUL8: {
2901  SDValue N0 = Node->getOperand(0);
2902  SDValue N1 = Node->getOperand(1);
2903 
2904  unsigned Opc = (Opcode == X86ISD::SMUL8 ? X86::IMUL8r : X86::MUL8r);
2905 
2906  SDValue InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, X86::AL,
2907  N0, SDValue()).getValue(1);
2908 
2909  SDVTList VTs = CurDAG->getVTList(NVT, MVT::i32);
2910  SDValue Ops[] = {N1, InFlag};
2911  SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops);
2912 
2913  ReplaceNode(Node, CNode);
2914  return;
2915  }
2916 
2917  case X86ISD::UMUL: {
2918  SDValue N0 = Node->getOperand(0);
2919  SDValue N1 = Node->getOperand(1);
2920 
2921  unsigned LoReg, Opc;
2922  switch (NVT.SimpleTy) {
2923  default: llvm_unreachable("Unsupported VT!");
2924  // MVT::i8 is handled by X86ISD::UMUL8.
2925  case MVT::i16: LoReg = X86::AX; Opc = X86::MUL16r; break;
2926  case MVT::i32: LoReg = X86::EAX; Opc = X86::MUL32r; break;
2927  case MVT::i64: LoReg = X86::RAX; Opc = X86::MUL64r; break;
2928  }
2929 
2930  SDValue InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, LoReg,
2931  N0, SDValue()).getValue(1);
2932 
2933  SDVTList VTs = CurDAG->getVTList(NVT, NVT, MVT::i32);
2934  SDValue Ops[] = {N1, InFlag};
2935  SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops);
2936 
2937  ReplaceNode(Node, CNode);
2938  return;
2939  }
2940 
2941  case ISD::SMUL_LOHI:
2942  case ISD::UMUL_LOHI: {
2943  SDValue N0 = Node->getOperand(0);
2944  SDValue N1 = Node->getOperand(1);
2945 
2946  unsigned Opc, MOpc;
2947  bool isSigned = Opcode == ISD::SMUL_LOHI;
2948  bool hasBMI2 = Subtarget->hasBMI2();
2949  if (!isSigned) {
2950  switch (NVT.SimpleTy) {
2951  default: llvm_unreachable("Unsupported VT!");
2952  case MVT::i32: Opc = hasBMI2 ? X86::MULX32rr : X86::MUL32r;
2953  MOpc = hasBMI2 ? X86::MULX32rm : X86::MUL32m; break;
2954  case MVT::i64: Opc = hasBMI2 ? X86::MULX64rr : X86::MUL64r;
2955  MOpc = hasBMI2 ? X86::MULX64rm : X86::MUL64m; break;
2956  }
2957  } else {
2958  switch (NVT.SimpleTy) {
2959  default: llvm_unreachable("Unsupported VT!");
2960  case MVT::i32: Opc = X86::IMUL32r; MOpc = X86::IMUL32m; break;
2961  case MVT::i64: Opc = X86::IMUL64r; MOpc = X86::IMUL64m; break;
2962  }
2963  }
2964 
2965  unsigned SrcReg, LoReg, HiReg;
2966  switch (Opc) {
2967  default: llvm_unreachable("Unknown MUL opcode!");
2968  case X86::IMUL32r:
2969  case X86::MUL32r:
2970  SrcReg = LoReg = X86::EAX; HiReg = X86::EDX;
2971  break;
2972  case X86::IMUL64r:
2973  case X86::MUL64r:
2974  SrcReg = LoReg = X86::RAX; HiReg = X86::RDX;
2975  break;
2976  case X86::MULX32rr:
2977  SrcReg = X86::EDX; LoReg = HiReg = 0;
2978  break;
2979  case X86::MULX64rr:
2980  SrcReg = X86::RDX; LoReg = HiReg = 0;
2981  break;
2982  }
2983 
2984  SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
2985  bool foldedLoad = tryFoldLoad(Node, N1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4);
2986  // Multiply is commmutative.
2987  if (!foldedLoad) {
2988  foldedLoad = tryFoldLoad(Node, N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4);
2989  if (foldedLoad)
2990  std::swap(N0, N1);
2991  }
2992 
2993  SDValue InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, SrcReg,
2994  N0, SDValue()).getValue(1);
2995  SDValue ResHi, ResLo;
2996 
2997  if (foldedLoad) {
2998  SDValue Chain;
2999  MachineSDNode *CNode = nullptr;
3000  SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N1.getOperand(0),
3001  InFlag };
3002  if (MOpc == X86::MULX32rm || MOpc == X86::MULX64rm) {
3003  SDVTList VTs = CurDAG->getVTList(NVT, NVT, MVT::Other, MVT::Glue);
3004  CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops);
3005  ResHi = SDValue(CNode, 0);
3006  ResLo = SDValue(CNode, 1);
3007  Chain = SDValue(CNode, 2);
3008  InFlag = SDValue(CNode, 3);
3009  } else {
3010  SDVTList VTs = CurDAG->getVTList(MVT::Other, MVT::Glue);
3011  CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops);
3012  Chain = SDValue(CNode, 0);
3013  InFlag = SDValue(CNode, 1);
3014  }
3015 
3016  // Update the chain.
3017  ReplaceUses(N1.getValue(1), Chain);
3018  // Record the mem-refs
3019  MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
3020  MemOp[0] = cast<LoadSDNode>(N1)->getMemOperand();
3021  CNode->setMemRefs(MemOp, MemOp + 1);
3022  } else {
3023  SDValue Ops[] = { N1, InFlag };
3024  if (Opc == X86::MULX32rr || Opc == X86::MULX64rr) {
3025  SDVTList VTs = CurDAG->getVTList(NVT, NVT, MVT::Glue);
3026  SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops);
3027  ResHi = SDValue(CNode, 0);
3028  ResLo = SDValue(CNode, 1);
3029  InFlag = SDValue(CNode, 2);
3030  } else {
3031  SDVTList VTs = CurDAG->getVTList(MVT::Glue);
3032  SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops);
3033  InFlag = SDValue(CNode, 0);
3034  }
3035  }
3036 
3037  // Copy the low half of the result, if it is needed.
3038  if (!SDValue(Node, 0).use_empty()) {
3039  if (!ResLo.getNode()) {
3040  assert(LoReg && "Register for low half is not defined!");
3041  ResLo = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, LoReg, NVT,
3042  InFlag);
3043  InFlag = ResLo.getValue(2);
3044  }
3045  ReplaceUses(SDValue(Node, 0), ResLo);
3046  LLVM_DEBUG(dbgs() << "=> "; ResLo.getNode()->dump(CurDAG);
3047  dbgs() << '\n');
3048  }
3049  // Copy the high half of the result, if it is needed.
3050  if (!SDValue(Node, 1).use_empty()) {
3051  if (!ResHi.getNode()) {
3052  assert(HiReg && "Register for high half is not defined!");
3053  ResHi = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, HiReg, NVT,
3054  InFlag);
3055  InFlag = ResHi.getValue(2);
3056  }
3057  ReplaceUses(SDValue(Node, 1), ResHi);
3058  LLVM_DEBUG(dbgs() << "=> "; ResHi.getNode()->dump(CurDAG);
3059  dbgs() << '\n');
3060  }
3061 
3062  CurDAG->RemoveDeadNode(Node);
3063  return;
3064  }
3065 
3066  case ISD::SDIVREM:
3067  case ISD::UDIVREM:
3070  SDValue N0 = Node->getOperand(0);
3071  SDValue N1 = Node->getOperand(1);
3072 
3073  unsigned Opc, MOpc;
3074  bool isSigned = (Opcode == ISD::SDIVREM ||
3075  Opcode == X86ISD::SDIVREM8_SEXT_HREG);
3076  if (!isSigned) {
3077  switch (NVT.SimpleTy) {
3078  default: llvm_unreachable("Unsupported VT!");
3079  case MVT::i8: Opc = X86::DIV8r; MOpc = X86::DIV8m; break;
3080  case MVT::i16: Opc = X86::DIV16r; MOpc = X86::DIV16m; break;
3081  case MVT::i32: Opc = X86::DIV32r; MOpc = X86::DIV32m; break;
3082  case MVT::i64: Opc = X86::DIV64r; MOpc = X86::DIV64m; break;
3083  }
3084  } else {
3085  switch (NVT.SimpleTy) {
3086  default: llvm_unreachable("Unsupported VT!");
3087  case MVT::i8: Opc = X86::IDIV8r; MOpc = X86::IDIV8m; break;
3088  case MVT::i16: Opc = X86::IDIV16r; MOpc = X86::IDIV16m; break;
3089  case MVT::i32: Opc = X86::IDIV32r; MOpc = X86::IDIV32m; break;
3090  case MVT::i64: Opc = X86::IDIV64r; MOpc = X86::IDIV64m; break;
3091  }
3092  }
3093 
3094  unsigned LoReg, HiReg, ClrReg;
3095  unsigned SExtOpcode;
3096  switch (NVT.SimpleTy) {
3097  default: llvm_unreachable("Unsupported VT!");
3098  case MVT::i8:
3099  LoReg = X86::AL; ClrReg = HiReg = X86::AH;
3100  SExtOpcode = X86::CBW;
3101  break;
3102  case MVT::i16:
3103  LoReg = X86::AX; HiReg = X86::DX;
3104  ClrReg = X86::DX;
3105  SExtOpcode = X86::CWD;
3106  break;
3107  case MVT::i32:
3108  LoReg = X86::EAX; ClrReg = HiReg = X86::EDX;
3109  SExtOpcode = X86::CDQ;
3110  break;
3111  case MVT::i64:
3112  LoReg = X86::RAX; ClrReg = HiReg = X86::RDX;
3113  SExtOpcode = X86::CQO;
3114  break;
3115  }
3116 
3117  SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
3118  bool foldedLoad = tryFoldLoad(Node, N1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4);
3119  bool signBitIsZero = CurDAG->SignBitIsZero(N0);
3120 
3121  SDValue InFlag;
3122  if (NVT == MVT::i8 && (!isSigned || signBitIsZero)) {
3123  // Special case for div8, just use a move with zero extension to AX to
3124  // clear the upper 8 bits (AH).
3125  SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Move, Chain;
3126  if (tryFoldLoad(Node, N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) {
3127  SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N0.getOperand(0) };
3128  Move =
3129  SDValue(CurDAG->getMachineNode(X86::MOVZX32rm8, dl, MVT::i32,
3130  MVT::Other, Ops), 0);
3131  Chain = Move.getValue(1);
3132  ReplaceUses(N0.getValue(1), Chain);
3133  } else {
3134  Move =
3135  SDValue(CurDAG->getMachineNode(X86::MOVZX32rr8, dl, MVT::i32, N0),0);
3136  Chain = CurDAG->getEntryNode();
3137  }
3138  Chain = CurDAG->getCopyToReg(Chain, dl, X86::EAX, Move, SDValue());
3139  InFlag = Chain.getValue(1);
3140  } else {
3141  InFlag =
3142  CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl,
3143  LoReg, N0, SDValue()).getValue(1);
3144  if (isSigned && !signBitIsZero) {
3145  // Sign extend the low part into the high part.
3146  InFlag =
3147  SDValue(CurDAG->getMachineNode(SExtOpcode, dl, MVT::Glue, InFlag),0);
3148  } else {
3149  // Zero out the high part, effectively zero extending the input.
3150  SDValue ClrNode = SDValue(CurDAG->getMachineNode(X86::MOV32r0, dl, NVT), 0);
3151  switch (NVT.SimpleTy) {
3152  case MVT::i16:
3153  ClrNode =
3154  SDValue(CurDAG->getMachineNode(
3155  TargetOpcode::EXTRACT_SUBREG, dl, MVT::i16, ClrNode,
3156  CurDAG->getTargetConstant(X86::sub_16bit, dl,
3157  MVT::i32)),
3158  0);
3159  break;
3160  case MVT::i32:
3161  break;
3162  case MVT::i64:
3163  ClrNode =
3164  SDValue(CurDAG->getMachineNode(
3165  TargetOpcode::SUBREG_TO_REG, dl, MVT::i64,
3166  CurDAG->getTargetConstant(0, dl, MVT::i64), ClrNode,
3167  CurDAG->getTargetConstant(X86::sub_32bit, dl,
3168  MVT::i32)),
3169  0);
3170  break;
3171  default:
3172  llvm_unreachable("Unexpected division source");
3173  }
3174 
3175  InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, ClrReg,
3176  ClrNode, InFlag).getValue(1);
3177  }
3178  }
3179 
3180  if (foldedLoad) {
3181  SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N1.getOperand(0),
3182  InFlag };
3183  MachineSDNode *CNode =
3184  CurDAG->getMachineNode(MOpc, dl, MVT::Other, MVT::Glue, Ops);
3185  InFlag = SDValue(CNode, 1);
3186  // Update the chain.
3187  ReplaceUses(N1.getValue(1), SDValue(CNode, 0));
3188  // Record the mem-refs
3189  MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
3190  MemOp[0] = cast<LoadSDNode>(N1)->getMemOperand();
3191  CNode->setMemRefs(MemOp, MemOp + 1);
3192  } else {
3193  InFlag =
3194  SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Glue, N1, InFlag), 0);
3195  }
3196 
3197  // Prevent use of AH in a REX instruction by explicitly copying it to
3198  // an ABCD_L register.
3199  //
3200  // The current assumption of the register allocator is that isel
3201  // won't generate explicit references to the GR8_ABCD_H registers. If
3202  // the allocator and/or the backend get enhanced to be more robust in
3203  // that regard, this can be, and should be, removed.
3204  if (HiReg == X86::AH && !SDValue(Node, 1).use_empty()) {
3205  SDValue AHCopy = CurDAG->getRegister(X86::AH, MVT::i8);
3206  unsigned AHExtOpcode =
3207  isSigned ? X86::MOVSX32rr8_NOREX : X86::MOVZX32rr8_NOREX;
3208 
3209  SDNode *RNode = CurDAG->getMachineNode(AHExtOpcode, dl, MVT::i32,
3210  MVT::Glue, AHCopy, InFlag);
3211  SDValue Result(RNode, 0);
3212  InFlag = SDValue(RNode, 1);
3213 
3214  if (Opcode == X86ISD::UDIVREM8_ZEXT_HREG ||
3215  Opcode == X86ISD::SDIVREM8_SEXT_HREG) {
3216  assert(Node->getValueType(1) == MVT::i32 && "Unexpected result type!");
3217  } else {
3218  Result =
3219  CurDAG->getTargetExtractSubreg(X86::sub_8bit, dl, MVT::i8, Result);
3220  }
3221  ReplaceUses(SDValue(Node, 1), Result);
3222  LLVM_DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG);
3223  dbgs() << '\n');
3224  }
3225  // Copy the division (low) result, if it is needed.
3226  if (!SDValue(Node, 0).use_empty()) {
3227  SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
3228  LoReg, NVT, InFlag);
3229  InFlag = Result.getValue(2);
3230  ReplaceUses(SDValue(Node, 0), Result);
3231  LLVM_DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG);
3232  dbgs() << '\n');
3233  }
3234  // Copy the remainder (high) result, if it is needed.
3235  if (!SDValue(Node, 1).use_empty()) {
3236  SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
3237  HiReg, NVT, InFlag);
3238  InFlag = Result.getValue(2);
3239  ReplaceUses(SDValue(Node, 1), Result);
3240  LLVM_DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG);
3241  dbgs() << '\n');
3242  }
3243  CurDAG->RemoveDeadNode(Node);
3244  return;
3245  }
3246 
3247  case X86ISD::CMP: {
3248  SDValue N0 = Node->getOperand(0);
3249  SDValue N1 = Node->getOperand(1);
3250 
3251  if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() &&
3253  N0 = N0.getOperand(0);
3254 
3255  // Look for (X86cmp (and $op, $imm), 0) and see if we can convert it to
3256  // use a smaller encoding.
3257  // Look past the truncate if CMP is the only use of it.
3258  if (N0.getOpcode() == ISD::AND &&
3259  N0.getNode()->hasOneUse() &&
3260  N0.getValueType() != MVT::i8 &&
3261  X86::isZeroNode(N1)) {
3263  if (!C) break;
3264  uint64_t Mask = C->getZExtValue();
3265 
3266  MVT VT;
3267  int SubRegOp;
3268  unsigned Op;
3269 
3270  if (isUInt<8>(Mask) &&
3271  (!(Mask & 0x80) || hasNoSignedComparisonUses(Node))) {
3272  // For example, convert "testl %eax, $8" to "testb %al, $8"
3273  VT = MVT::i8;
3274  SubRegOp = X86::sub_8bit;
3275  Op = X86::TEST8ri;
3276  } else if (OptForMinSize && isUInt<16>(Mask) &&
3277  (!(Mask & 0x8000) || hasNoSignedComparisonUses(Node))) {
3278  // For example, "testl %eax, $32776" to "testw %ax, $32776".
3279  // NOTE: We only want to form TESTW instructions if optimizing for
3280  // min size. Otherwise we only save one byte and possibly get a length
3281  // changing prefix penalty in the decoders.
3282  VT = MVT::i16;
3283  SubRegOp = X86::sub_16bit;
3284  Op = X86::TEST16ri;
3285  } else if (isUInt<32>(Mask) && N0.getValueType() != MVT::i16 &&
3286  (!(Mask & 0x80000000) || hasNoSignedComparisonUses(Node))) {
3287  // For example, "testq %rax, $268468232" to "testl %eax, $268468232".
3288  // NOTE: We only want to run that transform if N0 is 32 or 64 bits.
3289  // Otherwize, we find ourselves in a position where we have to do
3290  // promotion. If previous passes did not promote the and, we assume
3291  // they had a good reason not to and do not promote here.
3292  VT = MVT::i32;
3293  SubRegOp = X86::sub_32bit;
3294  Op = X86::TEST32ri;
3295  } else {
3296  // No eligible transformation was found.
3297  break;
3298  }
3299 
3300  SDValue Imm = CurDAG->getTargetConstant(Mask, dl, VT);
3301  SDValue Reg = N0.getOperand(0);
3302 
3303  // Extract the subregister if necessary.
3304  if (N0.getValueType() != VT)
3305  Reg = CurDAG->getTargetExtractSubreg(SubRegOp, dl, VT, Reg);
3306 
3307  // Emit a testl or testw.
3308  SDNode *NewNode = CurDAG->getMachineNode(Op, dl, MVT::i32, Reg, Imm);
3309  // Replace CMP with TEST.
3310  ReplaceNode(Node, NewNode);
3311  return;
3312  }
3313  break;
3314  }
3315  case X86ISD::PCMPISTR: {
3316  if (!Subtarget->hasSSE42())
3317  break;
3318 
3319  bool NeedIndex = !SDValue(Node, 0).use_empty();
3320  bool NeedMask = !SDValue(Node, 1).use_empty();
3321  // We can't fold a load if we are going to make two instructions.
3322  bool MayFoldLoad = !NeedIndex || !NeedMask;
3323 
3324  MachineSDNode *CNode;
3325  if (NeedMask) {
3326  unsigned ROpc = Subtarget->hasAVX() ? X86::VPCMPISTRMrr : X86::PCMPISTRMrr;
3327  unsigned MOpc = Subtarget->hasAVX() ? X86::VPCMPISTRMrm : X86::PCMPISTRMrm;
3328  CNode = emitPCMPISTR(ROpc, MOpc, MayFoldLoad, dl, MVT::v16i8, Node);
3329  ReplaceUses(SDValue(Node, 1), SDValue(CNode, 0));
3330  }
3331  if (NeedIndex || !NeedMask) {
3332  unsigned ROpc = Subtarget->hasAVX() ? X86::VPCMPISTRIrr : X86::PCMPISTRIrr;
3333  unsigned MOpc = Subtarget->hasAVX() ? X86::VPCMPISTRIrm : X86::PCMPISTRIrm;
3334  CNode = emitPCMPISTR(ROpc, MOpc, MayFoldLoad, dl, MVT::i32, Node);
3335  ReplaceUses(SDValue(Node, 0), SDValue(CNode, 0));
3336  }
3337 
3338  // Connect the flag usage to the last instruction created.
3339  ReplaceUses(SDValue(Node, 2), SDValue(CNode, 1));
3340  CurDAG->RemoveDeadNode(Node);
3341  return;
3342  }
3343  case X86ISD::PCMPESTR: {
3344  if (!Subtarget->hasSSE42())
3345  break;
3346 
3347  // Copy the two implicit register inputs.
3348  SDValue InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, X86::EAX,
3349  Node->getOperand(1),
3350  SDValue()).getValue(1);
3351  InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, X86::EDX,
3352  Node->getOperand(3), InFlag).getValue(1);
3353 
3354  bool NeedIndex = !SDValue(Node, 0).use_empty();
3355  bool NeedMask = !SDValue(Node, 1).use_empty();
3356  // We can't fold a load if we are going to make two instructions.
3357  bool MayFoldLoad = !NeedIndex || !NeedMask;
3358 
3359  MachineSDNode *CNode;
3360  if (NeedMask) {
3361  unsigned ROpc = Subtarget->hasAVX() ? X86::VPCMPESTRMrr : X86::PCMPESTRMrr;
3362  unsigned MOpc = Subtarget->hasAVX() ? X86::VPCMPESTRMrm : X86::PCMPESTRMrm;
3363  CNode = emitPCMPESTR(ROpc, MOpc, MayFoldLoad, dl, MVT::v16i8, Node,
3364  InFlag);
3365  ReplaceUses(SDValue(Node, 1), SDValue(CNode, 0));
3366  }
3367  if (NeedIndex || !NeedMask) {
3368  unsigned ROpc = Subtarget->hasAVX() ? X86::VPCMPESTRIrr : X86::PCMPESTRIrr;
3369  unsigned MOpc = Subtarget->hasAVX() ? X86::VPCMPESTRIrm : X86::PCMPESTRIrm;
3370  CNode = emitPCMPESTR(ROpc, MOpc, MayFoldLoad, dl, MVT::i32, Node, InFlag);
3371  ReplaceUses(SDValue(Node, 0), SDValue(CNode, 0));
3372  }
3373  // Connect the flag usage to the last instruction created.
3374  ReplaceUses(SDValue(Node, 2), SDValue(CNode, 1));
3375  CurDAG->RemoveDeadNode(Node);
3376  return;
3377  }
3378 
3379  case ISD::STORE:
3380  if (foldLoadStoreIntoMemOperand(Node))
3381  return;
3382  break;
3383  }
3384 
3385  SelectCode(Node);
3386 }
3387 
3388 bool X86DAGToDAGISel::
3389 SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
3390  std::vector<SDValue> &OutOps) {
3391  SDValue Op0, Op1, Op2, Op3, Op4;
3392  switch (ConstraintID) {
3393  default:
3394  llvm_unreachable("Unexpected asm memory constraint");
3396  // FIXME: It seems strange that 'i' is needed here since it's supposed to
3397  // be an immediate and not a memory constraint.
3399  case InlineAsm::Constraint_o: // offsetable ??
3400  case InlineAsm::Constraint_v: // not offsetable ??
3401  case InlineAsm::Constraint_m: // memory
3403  if (!selectAddr(nullptr, Op, Op0, Op1, Op2, Op3, Op4))
3404  return true;
3405  break;
3406  }
3407 
3408  OutOps.push_back(Op0);
3409  OutOps.push_back(Op1);
3410  OutOps.push_back(Op2);
3411  OutOps.push_back(Op3);
3412  OutOps.push_back(Op4);
3413  return false;
3414 }
3415 
3416 /// This pass converts a legalized DAG into a X86-specific DAG,
3417 /// ready for instruction scheduling.
3419  CodeGenOpt::Level OptLevel) {
3420  return new X86DAGToDAGISel(TM, OptLevel);
3421 }
uint64_t CallInst * C
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:539
constexpr bool isUInt< 32 >(uint64_t x)
Definition: MathExtras.h:349
X = FP_ROUND(Y, TRUNC) - Rounding &#39;Y&#39; from a larger floating point type down to the precision of the ...
Definition: ISDOpcodes.h:506
static bool isCalleeLoad(SDValue Callee, SDValue &Chain, bool HasCallSeq)
Return true if call address is a load and it can be moved below CALLSEQ_START and the chains leading ...
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
EVT getValueType() const
Return the ValueType of the referenced return value.
Vector comparison generating mask bits for fp and integer signed and unsigned data types...
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
const SDValue & getOffset() const
bool isUndef() const
APInt getSignedMax() const
Return the largest signed value contained in the ConstantRange.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
DELETED_NODE - This is an illegal value that is used to catch errors.
Definition: ISDOpcodes.h:42
Compute iterated dominance frontiers using a linear time algorithm.
Definition: AllocatorList.h:24
static void insertDAGNode(SelectionDAG &DAG, SDValue Pos, SDValue N)
static bool isLegalMaskCompare(SDNode *N, const X86Subtarget *Subtarget)
void setMemRefs(mmo_iterator NewMemRefs, mmo_iterator NewMemRefsEnd)
Assign this MachineSDNodes&#39;s memory reference descriptor list.
Tail call return.
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
Definition: MCSymbol.h:42
bool isVector() const
Return true if this is a vector value type.
const SDValue & getBasePtr() const
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
APInt zext(unsigned width) const
Zero extend to a new width.
Definition: APInt.cpp:864
bool slt(const APInt &RHS) const
Signed less than comparison.
Definition: APInt.h:1197
unsigned Reg
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:253
const SDValue & getChain() const
constexpr bool isInt< 8 >(int64_t x)
Definition: MathExtras.h:303
ISD::MemIndexedMode getAddressingMode() const
Return the addressing mode for this load or store: unindexed, pre-inc, pre-dec, post-inc, or post-dec.
TargetGlobalAddress - Like GlobalAddress, but the DAG does no folding or anything else with this node...
Definition: ISDOpcodes.h:131
unsigned getAlignment() const
APInt trunc(unsigned width) const
Truncate to new width.
Definition: APInt.cpp:817
STATISTIC(NumFunctions, "Total number of functions")
MVT getSimpleValueType(unsigned ResNo) const
Return the type of a specified result as a simple type.
F(f)
void setNodeId(int Id)
Set unique node id.
SDNode * getNode() const
get the SDNode which holds the desired result
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1 at the ...
Definition: ISDOpcodes.h:329
std::size_t countLeadingZeros(T Val, ZeroBehavior ZB=ZB_Width)
Count number of 0&#39;s from the most significant bit to the least stopping at the first 1...
Definition: MathExtras.h:189
static bool MayFoldLoad(SDValue Op)
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1502
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition: ISDOpcodes.h:159
GlobalBaseReg - On Darwin, this node represents the result of the mflr at function entry...
static bool hasPredecessorHelper(const SDNode *N, SmallPtrSetImpl< const SDNode *> &Visited, SmallVectorImpl< const SDNode *> &Worklist, unsigned int MaxSteps=0, bool TopologicalPrune=false)
Returns true if N is a predecessor of any node in Worklist.
unsigned getAddrSpace() const
Return the LLVM IR address space number that this pointer points into.
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition: ISDOpcodes.h:210
X86 compare and logical compare instructions.
void dump() const
Support for debugging, callable in GDB: V->dump()
Definition: AsmWriter.cpp:4216
The address of a basic block.
Definition: Constants.h:836
bool hasOneUse() const
Return true if there is exactly one use of this node.
A description of a memory reference used in the backend.
Shift and rotation operations.
Definition: ISDOpcodes.h:380
bool isNormalStore(const SDNode *N)
Returns true if the specified node is a non-truncating and unindexed store.
std::size_t countTrailingOnes(T Value, ZeroBehavior ZB=ZB_Width)
Count the number of ones from the least significant bit to the first zero bit.
Definition: MathExtras.h:478
bool isBuildVectorAllZeros(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are 0 or undef...
CallLoweringInfo & setChain(SDValue InChain)
CopyToReg - This node has three operands: a chain, a register number to set to this value...
Definition: ISDOpcodes.h:170
op_iterator op_end() const
uint64_t getConstantOperandVal(unsigned i) const
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
SimpleValueType SimpleTy
unsigned getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition: ValueTypes.h:304
CALLSEQ_START/CALLSEQ_END - These operators mark the beginning and end of a call sequence, and carry arbitrary information that target might want to know.
Definition: ISDOpcodes.h:674
Position
Position to insert a new instruction relative to an existing instruction.
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
This represents a list of ValueType&#39;s that has been intern&#39;d by a SelectionDAG.
bool hasVLX() const
Definition: X86Subtarget.h:638
unsigned getSizeInBits() const
bool runOnMachineFunction(MachineFunction &MF) override
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
bool hasExternalLinkage() const
Definition: GlobalValue.h:421
int64_t getSExtValue() const
static void InvalidateNodeId(SDNode *N)
unsigned getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:292
X86 FP SETCC, similar to above, but with output as an i1 mask and with optional rounding mode...
constexpr bool isMask_64(uint64_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
Definition: MathExtras.h:411
#define T
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition: ISDOpcodes.h:399
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:201
op_iterator op_begin() const
C - The default llvm calling convention, compatible with C.
Definition: CallingConv.h:35
ArrayRef< SDUse > ops() const
amdgpu Simplify well known AMD library false Value * Callee
bool use_empty() const
Return true if there are no nodes using value ResNo of Node.
This class is used to represent ISD::STORE nodes.
static bool isFusableLoadOpStorePattern(StoreSDNode *StoreNode, SDValue StoredVal, SelectionDAG *CurDAG, LoadSDNode *&LoadNode, SDValue &InputChain)
Check whether or not the chain ending in StoreNode is suitable for doing the {load; op; store} to mod...
bool isOneConstant(SDValue V)
Returns true if V is a constant integer one.
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Get a value with high bits set.
Definition: APInt.h:635
#define P(N)
const SDValue & getBasePtr() const
bool isNormalLoad(const SDNode *N)
Returns true if the specified node is a non-extending and unindexed load.
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition: ISDOpcodes.h:166
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
constexpr bool isUInt< 8 >(uint64_t x)
Definition: MathExtras.h:343
std::size_t countTrailingZeros(T Val, ZeroBehavior ZB=ZB_Width)
Count number of 0&#39;s from the least significant bit to the most stopping at the first 1...
Definition: MathExtras.h:120
use_iterator use_begin() const
Provide iteration support to walk over all uses of an SDNode.
LOCAL_RECOVER - Represents the llvm.localrecover intrinsic.
Definition: ISDOpcodes.h:81
Machine Value Type.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
static bool foldMaskedShiftToScaledMask(SelectionDAG &DAG, SDValue N, uint64_t Mask, SDValue Shift, SDValue X, X86ISelAddressMode &AM)
bool hasAnyUseOfValue(unsigned Value) const
Return true if there are any use of the indicated value.
bool isMachineOpcode() const
unsigned getScalarSizeInBits() const
bool ult(const APInt &RHS) const
Unsigned less than comparison.
Definition: APInt.h:1178
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This is an important base class in LLVM.
Definition: Constant.h:42
const SDValue & getOperand(unsigned Num) const
bool isOperandOf(const SDNode *N) const
Return true if this node is an operand of N.
const SDValue & getOffset() const
APInt getUnsignedMax() const
Return the largest unsigned value contained in the ConstantRange.
static Type * getVoidTy(LLVMContext &C)
Definition: Type.cpp:161
This class provides iterator support for SDUse operands that use a specific SDNode.
unsigned getMachineOpcode() const
void RepositionNode(allnodes_iterator Position, SDNode *N)
Move node N in the AllNodes list to be immediately before the given iterator Position.
void ReplaceAllUsesWith(SDValue From, SDValue To)
Modify anything using &#39;From&#39; to use &#39;To&#39; instead.
static bool hasNoCarryFlagUses(SDNode *N)
Test whether the given node which sets flags has any uses which require the CF flag to be accurate...
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:285
On Darwin, this node represents the result of the popl at function entry, used for PIC code...
self_iterator getIterator()
Definition: ilist_node.h:82
bool hasNUsesOfValue(unsigned NUses, unsigned Value) const
Return true if there are exactly NUSES uses of the indicated value.
static bool foldMaskAndShiftToExtract(SelectionDAG &DAG, SDValue N, uint64_t Mask, SDValue Shift, SDValue X, X86ISelAddressMode &AM)
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:524
std::vector< ArgListEntry > ArgListTy
Extended Value Type.
Definition: ValueTypes.h:34
This structure contains all information that is necessary for lowering calls.
bool isVolatile() const
APInt getSignedMin() const
Return the smallest signed value contained in the ConstantRange.
This class contains a discriminated union of information about pointers in memory operands...
bool isMachineOpcode() const
Test if this node has a post-isel opcode, directly corresponding to a MachineInstr opcode...
unsigned getNumOperands() const
Return the number of values used by this operation.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
BaseType
A given derived pointer can have multiple base pointers through phi/selects.
bool use_empty() const
Return true if there are no uses of this node.
These operations represent an abstract X86 call instruction, which includes a bunch of information...
TokenFactor - This node takes multiple tokens as input and produces a single token result...
Definition: ISDOpcodes.h:50
void dump() const
Dump this node, for debugging.
Iterator for intrusive lists based on ilist_node.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements...
Definition: SmallPtrSet.h:418
unsigned countPopulation(T Value)
Count the number of set bits in a value.
Definition: MathExtras.h:520
This is the shared class of boolean and integer constants.
Definition: Constants.h:84
constexpr bool isInt< 32 >(int64_t x)
Definition: MathExtras.h:309
bool isScalarFPTypeInSSEReg(EVT VT) const
Return true if the specified scalar FP type is computed in an SSE register, not on the X87 floating p...
SDNode * UpdateNodeOperands(SDNode *N, SDValue Op)
Mutate the specified node in-place to have the specified operands.
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:222
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:841
const DataFlowGraph & G
Definition: RDFGraph.cpp:211
An SDNode that represents everything that will be needed to construct a MachineInstr.
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
SelectionDAGISel - This is the common base class used for SelectionDAG-based pattern-matching instruc...
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:133
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:924
static bool foldMaskAndShiftToScale(SelectionDAG &DAG, SDValue N, uint64_t Mask, SDValue Shift, SDValue X, X86ISelAddressMode &AM)
EVT getMemoryVT() const
Return the type of the in-memory value.
Target - Wrapper for Target specific information.
Class for arbitrary precision integers.
Definition: APInt.h:69
static unsigned getReg(const void *D, unsigned RC, unsigned RegNo)
bool sge(const APInt &RHS) const
Signed greater or equal comparison.
Definition: APInt.h:1302
static use_iterator use_end()
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:438
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:441
void append(in_iter in_start, in_iter in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:389
static bool hasSingleUsesFromRoot(SDNode *Root, SDNode *User)
bool is256BitVector() const
Return true if this is a 256-bit vector type.
Definition: ValueTypes.h:187
int getNodeId() const
Return the unique node id.
static bool isDispSafeForFrameIndex(int64_t Val)
std::pair< SDValue, SDValue > LowerCallTo(CallLoweringInfo &CLI) const
This function lowers an abstract call to a function into an actual call.
const SDValue & getValue() const
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:363
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition: ISDOpcodes.h:206
bool is128BitVector() const
Return true if this is a 128-bit vector type.
Definition: ValueTypes.h:182
A wrapper node for TargetConstantPool, TargetJumpTable, TargetExternalSymbol, TargetGlobalAddress, TargetGlobalTLSAddress, MCSymbol and TargetBlockAddress.
FunctionPass * createX86ISelDag(X86TargetMachine &TM, CodeGenOpt::Level OptLevel)
This pass converts a legalized DAG into a X86-specific DAG, ready for instruction scheduling...
void computeKnownBits(SDValue Op, KnownBits &Known, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:575
StringRef getName() const
Return a constant reference to the value&#39;s name.
Definition: Value.cpp:224
#define I(x, y, z)
Definition: MD5.cpp:58
#define N
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:323
bool isNON_EXTLoad(const SDNode *N)
Returns true if the specified node is a non-extending load.
unsigned getOpcode() const
SDValue getValue(unsigned R) const
constexpr bool isUInt< 16 >(uint64_t x)
Definition: MathExtras.h:346
static bool hasNoSignedComparisonUses(SDNode *N)
Test whether the given X86ISD::CMP node has any uses which require the SF or OF bits to be accurate...
CopyFromReg - This node indicates that the input value is a virtual or physical register that is defi...
Definition: ISDOpcodes.h:175
SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
const MachinePointerInfo & getPointerInfo() const
This class is used to form a handle around another node that is persistent and is updated across invo...
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
unsigned getReg() const
unsigned getMinSignedBits() const
Get the minimum bit size for this signed APInt.
Definition: APInt.h:1545
uint64_t getConstantOperandVal(unsigned Num) const
Helper method returns the integer value of a ConstantSDNode operand.
unsigned getResNo() const
get the index which selects a specific result in the SDNode
Dynamic (non-constant condition) vector blend where only the sign bits of the condition elements are ...
unsigned getMachineOpcode() const
This may only be called if isMachineOpcode returns true.
bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
std::underlying_type< E >::type Mask()
Get a bitmask with 1s in all places up to the high-order bit of E&#39;s largest value.
Definition: BitmaskEnum.h:81
bool isNonTemporal() const
IRTranslator LLVM IR MI
#define LLVM_FALLTHROUGH
LLVM_FALLTHROUGH - Mark fallthrough cases in switch statements.
Definition: Compiler.h:238
bool isZeroNode(SDValue Elt)
Returns true if Elt is a constant zero or floating point constant +0.0.
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:49
SetCC operator - This evaluates to a true value iff the condition is true.
Definition: ISDOpcodes.h:413
unsigned countLeadingZeros() const
The APInt version of the countLeadingZeros functions in MathExtras.h.
Definition: APInt.h:1589
bool isOffsetSuitableForCodeModel(int64_t Offset, CodeModel::Model M, bool hasSymbolicDisplacement=true)
Returns true of the given offset can be fit into displacement field of the instruction.
unsigned getNumOperands() const
const SDValue & getOperand(unsigned i) const
uint64_t getZExtValue() const
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:444
#define LLVM_DEBUG(X)
Definition: Debug.h:119
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation...
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition: ISDOpcodes.h:351
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
static void moveBelowOrigChain(SelectionDAG *CurDAG, SDValue Load, SDValue Call, SDValue OrigChain)
Replace the original chain operand of the call with load&#39;s chain operand and move load below the call...
Carry-using nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:242
Special wrapper used under X86-64 PIC mode for RIP relative displacements.
BRIND - Indirect branch.
Definition: ISDOpcodes.h:595
This class is used to represent ISD::LOAD nodes.
static int getUninvalidatedNodeId(SDNode *N)