LLVM  7.0.0svn
X86ISelDAGToDAG.cpp
Go to the documentation of this file.
1 //===- X86ISelDAGToDAG.cpp - A DAG pattern matching inst selector for X86 -===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file defines a DAG pattern matching instruction selector for X86,
11 // converting from a legalized dag to a X86 dag.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "X86.h"
16 #include "X86MachineFunctionInfo.h"
17 #include "X86RegisterInfo.h"
18 #include "X86Subtarget.h"
19 #include "X86TargetMachine.h"
20 #include "llvm/ADT/Statistic.h"
24 #include "llvm/IR/ConstantRange.h"
25 #include "llvm/IR/Function.h"
26 #include "llvm/IR/Instructions.h"
27 #include "llvm/IR/Intrinsics.h"
28 #include "llvm/IR/Type.h"
29 #include "llvm/Support/Debug.h"
31 #include "llvm/Support/KnownBits.h"
36 #include <stdint.h>
37 using namespace llvm;
38 
39 #define DEBUG_TYPE "x86-isel"
40 
41 STATISTIC(NumLoadMoved, "Number of loads moved below TokenFactor");
42 
43 //===----------------------------------------------------------------------===//
44 // Pattern Matcher Implementation
45 //===----------------------------------------------------------------------===//
46 
47 namespace {
48  /// This corresponds to X86AddressMode, but uses SDValue's instead of register
49  /// numbers for the leaves of the matched tree.
50  struct X86ISelAddressMode {
51  enum {
52  RegBase,
53  FrameIndexBase
54  } BaseType;
55 
56  // This is really a union, discriminated by BaseType!
57  SDValue Base_Reg;
58  int Base_FrameIndex;
59 
60  unsigned Scale;
61  SDValue IndexReg;
62  int32_t Disp;
63  SDValue Segment;
64  const GlobalValue *GV;
65  const Constant *CP;
66  const BlockAddress *BlockAddr;
67  const char *ES;
68  MCSymbol *MCSym;
69  int JT;
70  unsigned Align; // CP alignment.
71  unsigned char SymbolFlags; // X86II::MO_*
72 
73  X86ISelAddressMode()
74  : BaseType(RegBase), Base_FrameIndex(0), Scale(1), IndexReg(), Disp(0),
75  Segment(), GV(nullptr), CP(nullptr), BlockAddr(nullptr), ES(nullptr),
76  MCSym(nullptr), JT(-1), Align(0), SymbolFlags(X86II::MO_NO_FLAG) {}
77 
78  bool hasSymbolicDisplacement() const {
79  return GV != nullptr || CP != nullptr || ES != nullptr ||
80  MCSym != nullptr || JT != -1 || BlockAddr != nullptr;
81  }
82 
83  bool hasBaseOrIndexReg() const {
84  return BaseType == FrameIndexBase ||
85  IndexReg.getNode() != nullptr || Base_Reg.getNode() != nullptr;
86  }
87 
88  /// Return true if this addressing mode is already RIP-relative.
89  bool isRIPRelative() const {
90  if (BaseType != RegBase) return false;
91  if (RegisterSDNode *RegNode =
92  dyn_cast_or_null<RegisterSDNode>(Base_Reg.getNode()))
93  return RegNode->getReg() == X86::RIP;
94  return false;
95  }
96 
97  void setBaseReg(SDValue Reg) {
98  BaseType = RegBase;
99  Base_Reg = Reg;
100  }
101 
102 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
103  void dump() {
104  dbgs() << "X86ISelAddressMode " << this << '\n';
105  dbgs() << "Base_Reg ";
106  if (Base_Reg.getNode())
107  Base_Reg.getNode()->dump();
108  else
109  dbgs() << "nul\n";
110  if (BaseType == FrameIndexBase)
111  dbgs() << " Base.FrameIndex " << Base_FrameIndex << '\n';
112  dbgs() << " Scale " << Scale << '\n'
113  << "IndexReg ";
114  if (IndexReg.getNode())
115  IndexReg.getNode()->dump();
116  else
117  dbgs() << "nul\n";
118  dbgs() << " Disp " << Disp << '\n'
119  << "GV ";
120  if (GV)
121  GV->dump();
122  else
123  dbgs() << "nul";
124  dbgs() << " CP ";
125  if (CP)
126  CP->dump();
127  else
128  dbgs() << "nul";
129  dbgs() << '\n'
130  << "ES ";
131  if (ES)
132  dbgs() << ES;
133  else
134  dbgs() << "nul";
135  dbgs() << " MCSym ";
136  if (MCSym)
137  dbgs() << MCSym;
138  else
139  dbgs() << "nul";
140  dbgs() << " JT" << JT << " Align" << Align << '\n';
141  }
142 #endif
143  };
144 }
145 
146 namespace {
147  //===--------------------------------------------------------------------===//
148  /// ISel - X86-specific code to select X86 machine instructions for
149  /// SelectionDAG operations.
150  ///
151  class X86DAGToDAGISel final : public SelectionDAGISel {
152  /// Keep a pointer to the X86Subtarget around so that we can
153  /// make the right decision when generating code for different targets.
154  const X86Subtarget *Subtarget;
155 
156  /// If true, selector should try to optimize for code size instead of
157  /// performance.
158  bool OptForSize;
159 
160  /// If true, selector should try to optimize for minimum code size.
161  bool OptForMinSize;
162 
163  public:
164  explicit X86DAGToDAGISel(X86TargetMachine &tm, CodeGenOpt::Level OptLevel)
165  : SelectionDAGISel(tm, OptLevel), OptForSize(false),
166  OptForMinSize(false) {}
167 
168  StringRef getPassName() const override {
169  return "X86 DAG->DAG Instruction Selection";
170  }
171 
172  bool runOnMachineFunction(MachineFunction &MF) override {
173  // Reset the subtarget each time through.
174  Subtarget = &MF.getSubtarget<X86Subtarget>();
176  return true;
177  }
178 
179  void EmitFunctionEntryCode() override;
180 
181  bool IsProfitableToFold(SDValue N, SDNode *U, SDNode *Root) const override;
182 
183  void PreprocessISelDAG() override;
184 
185 // Include the pieces autogenerated from the target description.
186 #include "X86GenDAGISel.inc"
187 
188  private:
189  void Select(SDNode *N) override;
190 
191  bool foldOffsetIntoAddress(uint64_t Offset, X86ISelAddressMode &AM);
192  bool matchLoadInAddress(LoadSDNode *N, X86ISelAddressMode &AM);
193  bool matchWrapper(SDValue N, X86ISelAddressMode &AM);
194  bool matchAddress(SDValue N, X86ISelAddressMode &AM);
195  bool matchVectorAddress(SDValue N, X86ISelAddressMode &AM);
196  bool matchAdd(SDValue N, X86ISelAddressMode &AM, unsigned Depth);
197  bool matchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
198  unsigned Depth);
199  bool matchAddressBase(SDValue N, X86ISelAddressMode &AM);
200  bool selectAddr(SDNode *Parent, SDValue N, SDValue &Base,
201  SDValue &Scale, SDValue &Index, SDValue &Disp,
202  SDValue &Segment);
203  bool selectVectorAddr(SDNode *Parent, SDValue N, SDValue &Base,
204  SDValue &Scale, SDValue &Index, SDValue &Disp,
205  SDValue &Segment);
206  bool selectMOV64Imm32(SDValue N, SDValue &Imm);
207  bool selectLEAAddr(SDValue N, SDValue &Base,
208  SDValue &Scale, SDValue &Index, SDValue &Disp,
209  SDValue &Segment);
210  bool selectLEA64_32Addr(SDValue N, SDValue &Base,
211  SDValue &Scale, SDValue &Index, SDValue &Disp,
212  SDValue &Segment);
213  bool selectTLSADDRAddr(SDValue N, SDValue &Base,
214  SDValue &Scale, SDValue &Index, SDValue &Disp,
215  SDValue &Segment);
216  bool selectScalarSSELoad(SDNode *Root, SDValue N,
217  SDValue &Base, SDValue &Scale,
218  SDValue &Index, SDValue &Disp,
219  SDValue &Segment,
220  SDValue &NodeWithChain);
221  bool selectRelocImm(SDValue N, SDValue &Op);
222 
223  bool tryFoldLoad(SDNode *Root, SDNode *P, SDValue N,
224  SDValue &Base, SDValue &Scale,
225  SDValue &Index, SDValue &Disp,
226  SDValue &Segment);
227 
228  // Convience method where P is also root.
229  bool tryFoldLoad(SDNode *P, SDValue N,
230  SDValue &Base, SDValue &Scale,
231  SDValue &Index, SDValue &Disp,
232  SDValue &Segment) {
233  return tryFoldLoad(P, P, N, Base, Scale, Index, Disp, Segment);
234  }
235 
236  /// Implement addressing mode selection for inline asm expressions.
237  bool SelectInlineAsmMemoryOperand(const SDValue &Op,
238  unsigned ConstraintID,
239  std::vector<SDValue> &OutOps) override;
240 
241  void emitSpecialCodeForMain();
242 
243  inline void getAddressOperands(X86ISelAddressMode &AM, const SDLoc &DL,
244  SDValue &Base, SDValue &Scale,
245  SDValue &Index, SDValue &Disp,
246  SDValue &Segment) {
247  Base = (AM.BaseType == X86ISelAddressMode::FrameIndexBase)
248  ? CurDAG->getTargetFrameIndex(
249  AM.Base_FrameIndex,
250  TLI->getPointerTy(CurDAG->getDataLayout()))
251  : AM.Base_Reg;
252  Scale = getI8Imm(AM.Scale, DL);
253  Index = AM.IndexReg;
254  // These are 32-bit even in 64-bit mode since RIP-relative offset
255  // is 32-bit.
256  if (AM.GV)
257  Disp = CurDAG->getTargetGlobalAddress(AM.GV, SDLoc(),
258  MVT::i32, AM.Disp,
259  AM.SymbolFlags);
260  else if (AM.CP)
261  Disp = CurDAG->getTargetConstantPool(AM.CP, MVT::i32,
262  AM.Align, AM.Disp, AM.SymbolFlags);
263  else if (AM.ES) {
264  assert(!AM.Disp && "Non-zero displacement is ignored with ES.");
265  Disp = CurDAG->getTargetExternalSymbol(AM.ES, MVT::i32, AM.SymbolFlags);
266  } else if (AM.MCSym) {
267  assert(!AM.Disp && "Non-zero displacement is ignored with MCSym.");
268  assert(AM.SymbolFlags == 0 && "oo");
269  Disp = CurDAG->getMCSymbol(AM.MCSym, MVT::i32);
270  } else if (AM.JT != -1) {
271  assert(!AM.Disp && "Non-zero displacement is ignored with JT.");
272  Disp = CurDAG->getTargetJumpTable(AM.JT, MVT::i32, AM.SymbolFlags);
273  } else if (AM.BlockAddr)
274  Disp = CurDAG->getTargetBlockAddress(AM.BlockAddr, MVT::i32, AM.Disp,
275  AM.SymbolFlags);
276  else
277  Disp = CurDAG->getTargetConstant(AM.Disp, DL, MVT::i32);
278 
279  if (AM.Segment.getNode())
280  Segment = AM.Segment;
281  else
282  Segment = CurDAG->getRegister(0, MVT::i32);
283  }
284 
285  // Utility function to determine whether we should avoid selecting
286  // immediate forms of instructions for better code size or not.
287  // At a high level, we'd like to avoid such instructions when
288  // we have similar constants used within the same basic block
289  // that can be kept in a register.
290  //
291  bool shouldAvoidImmediateInstFormsForSize(SDNode *N) const {
292  uint32_t UseCount = 0;
293 
294  // Do not want to hoist if we're not optimizing for size.
295  // TODO: We'd like to remove this restriction.
296  // See the comment in X86InstrInfo.td for more info.
297  if (!OptForSize)
298  return false;
299 
300  // Walk all the users of the immediate.
301  for (SDNode::use_iterator UI = N->use_begin(),
302  UE = N->use_end(); (UI != UE) && (UseCount < 2); ++UI) {
303 
304  SDNode *User = *UI;
305 
306  // This user is already selected. Count it as a legitimate use and
307  // move on.
308  if (User->isMachineOpcode()) {
309  UseCount++;
310  continue;
311  }
312 
313  // We want to count stores of immediates as real uses.
314  if (User->getOpcode() == ISD::STORE &&
315  User->getOperand(1).getNode() == N) {
316  UseCount++;
317  continue;
318  }
319 
320  // We don't currently match users that have > 2 operands (except
321  // for stores, which are handled above)
322  // Those instruction won't match in ISEL, for now, and would
323  // be counted incorrectly.
324  // This may change in the future as we add additional instruction
325  // types.
326  if (User->getNumOperands() != 2)
327  continue;
328 
329  // Immediates that are used for offsets as part of stack
330  // manipulation should be left alone. These are typically
331  // used to indicate SP offsets for argument passing and
332  // will get pulled into stores/pushes (implicitly).
333  if (User->getOpcode() == X86ISD::ADD ||
334  User->getOpcode() == ISD::ADD ||
335  User->getOpcode() == X86ISD::SUB ||
336  User->getOpcode() == ISD::SUB) {
337 
338  // Find the other operand of the add/sub.
339  SDValue OtherOp = User->getOperand(0);
340  if (OtherOp.getNode() == N)
341  OtherOp = User->getOperand(1);
342 
343  // Don't count if the other operand is SP.
344  RegisterSDNode *RegNode;
345  if (OtherOp->getOpcode() == ISD::CopyFromReg &&
346  (RegNode = dyn_cast_or_null<RegisterSDNode>(
347  OtherOp->getOperand(1).getNode())))
348  if ((RegNode->getReg() == X86::ESP) ||
349  (RegNode->getReg() == X86::RSP))
350  continue;
351  }
352 
353  // ... otherwise, count this and move on.
354  UseCount++;
355  }
356 
357  // If we have more than 1 use, then recommend for hoisting.
358  return (UseCount > 1);
359  }
360 
361  /// Return a target constant with the specified value of type i8.
362  inline SDValue getI8Imm(unsigned Imm, const SDLoc &DL) {
363  return CurDAG->getTargetConstant(Imm, DL, MVT::i8);
364  }
365 
366  /// Return a target constant with the specified value, of type i32.
367  inline SDValue getI32Imm(unsigned Imm, const SDLoc &DL) {
368  return CurDAG->getTargetConstant(Imm, DL, MVT::i32);
369  }
370 
371  SDValue getExtractVEXTRACTImmediate(SDNode *N, unsigned VecWidth,
372  const SDLoc &DL) {
373  assert((VecWidth == 128 || VecWidth == 256) && "Unexpected vector width");
374  uint64_t Index = N->getConstantOperandVal(1);
375  MVT VecVT = N->getOperand(0).getSimpleValueType();
376  return getI8Imm((Index * VecVT.getScalarSizeInBits()) / VecWidth, DL);
377  }
378 
379  SDValue getInsertVINSERTImmediate(SDNode *N, unsigned VecWidth,
380  const SDLoc &DL) {
381  assert((VecWidth == 128 || VecWidth == 256) && "Unexpected vector width");
382  uint64_t Index = N->getConstantOperandVal(2);
383  MVT VecVT = N->getSimpleValueType(0);
384  return getI8Imm((Index * VecVT.getScalarSizeInBits()) / VecWidth, DL);
385  }
386 
387  /// Return an SDNode that returns the value of the global base register.
388  /// Output instructions required to initialize the global base register,
389  /// if necessary.
390  SDNode *getGlobalBaseReg();
391 
392  /// Return a reference to the TargetMachine, casted to the target-specific
393  /// type.
394  const X86TargetMachine &getTargetMachine() const {
395  return static_cast<const X86TargetMachine &>(TM);
396  }
397 
398  /// Return a reference to the TargetInstrInfo, casted to the target-specific
399  /// type.
400  const X86InstrInfo *getInstrInfo() const {
401  return Subtarget->getInstrInfo();
402  }
403 
404  /// \brief Address-mode matching performs shift-of-and to and-of-shift
405  /// reassociation in order to expose more scaled addressing
406  /// opportunities.
407  bool ComplexPatternFuncMutatesDAG() const override {
408  return true;
409  }
410 
411  bool isSExtAbsoluteSymbolRef(unsigned Width, SDNode *N) const;
412 
413  /// Returns whether this is a relocatable immediate in the range
414  /// [-2^Width .. 2^Width-1].
415  template <unsigned Width> bool isSExtRelocImm(SDNode *N) const {
416  if (auto *CN = dyn_cast<ConstantSDNode>(N))
417  return isInt<Width>(CN->getSExtValue());
418  return isSExtAbsoluteSymbolRef(Width, N);
419  }
420 
421  // Indicates we should prefer to use a non-temporal load for this load.
422  bool useNonTemporalLoad(LoadSDNode *N) const {
423  if (!N->isNonTemporal())
424  return false;
425 
426  unsigned StoreSize = N->getMemoryVT().getStoreSize();
427 
428  if (N->getAlignment() < StoreSize)
429  return false;
430 
431  switch (StoreSize) {
432  default: llvm_unreachable("Unsupported store size");
433  case 16:
434  return Subtarget->hasSSE41();
435  case 32:
436  return Subtarget->hasAVX2();
437  case 64:
438  return Subtarget->hasAVX512();
439  }
440  }
441 
442  bool foldLoadStoreIntoMemOperand(SDNode *Node);
443 
444  bool matchBEXTRFromAnd(SDNode *Node);
445 
446  bool isMaskZeroExtended(SDNode *N) const;
447  };
448 }
449 
450 
451 // Returns true if this masked compare can be implemented legally with this
452 // type.
453 static bool isLegalMaskCompare(SDNode *N, const X86Subtarget *Subtarget) {
454  unsigned Opcode = N->getOpcode();
455  if (Opcode == X86ISD::PCMPEQM || Opcode == X86ISD::PCMPGTM ||
456  Opcode == X86ISD::CMPM || Opcode == X86ISD::TESTM ||
457  Opcode == X86ISD::TESTNM || Opcode == X86ISD::CMPMU ||
458  Opcode == X86ISD::CMPM_RND) {
459  // We can get 256-bit 8 element types here without VLX being enabled. When
460  // this happens we will use 512-bit operations and the mask will not be
461  // zero extended.
462  EVT OpVT = N->getOperand(0).getValueType();
463  if (OpVT.is256BitVector() || OpVT.is128BitVector())
464  return Subtarget->hasVLX();
465 
466  return true;
467  }
468 
469  return false;
470 }
471 
472 // Returns true if we can assume the writer of the mask has zero extended it
473 // for us.
474 bool X86DAGToDAGISel::isMaskZeroExtended(SDNode *N) const {
475  // If this is an AND, check if we have a compare on either side. As long as
476  // one side guarantees the mask is zero extended, the AND will preserve those
477  // zeros.
478  if (N->getOpcode() == ISD::AND)
479  return isLegalMaskCompare(N->getOperand(0).getNode(), Subtarget) ||
480  isLegalMaskCompare(N->getOperand(1).getNode(), Subtarget);
481 
482  return isLegalMaskCompare(N, Subtarget);
483 }
484 
485 bool
486 X86DAGToDAGISel::IsProfitableToFold(SDValue N, SDNode *U, SDNode *Root) const {
487  if (OptLevel == CodeGenOpt::None) return false;
488 
489  if (!N.hasOneUse())
490  return false;
491 
492  if (N.getOpcode() != ISD::LOAD)
493  return true;
494 
495  // If N is a load, do additional profitability checks.
496  if (U == Root) {
497  switch (U->getOpcode()) {
498  default: break;
499  case X86ISD::ADD:
500  case X86ISD::SUB:
501  case X86ISD::AND:
502  case X86ISD::XOR:
503  case X86ISD::OR:
504  case ISD::ADD:
505  case ISD::ADDCARRY:
506  case ISD::AND:
507  case ISD::OR:
508  case ISD::XOR: {
509  SDValue Op1 = U->getOperand(1);
510 
511  // If the other operand is a 8-bit immediate we should fold the immediate
512  // instead. This reduces code size.
513  // e.g.
514  // movl 4(%esp), %eax
515  // addl $4, %eax
516  // vs.
517  // movl $4, %eax
518  // addl 4(%esp), %eax
519  // The former is 2 bytes shorter. In case where the increment is 1, then
520  // the saving can be 4 bytes (by using incl %eax).
521  if (ConstantSDNode *Imm = dyn_cast<ConstantSDNode>(Op1))
522  if (Imm->getAPIntValue().isSignedIntN(8))
523  return false;
524 
525  // If the other operand is a TLS address, we should fold it instead.
526  // This produces
527  // movl %gs:0, %eax
528  // leal i@NTPOFF(%eax), %eax
529  // instead of
530  // movl $i@NTPOFF, %eax
531  // addl %gs:0, %eax
532  // if the block also has an access to a second TLS address this will save
533  // a load.
534  // FIXME: This is probably also true for non-TLS addresses.
535  if (Op1.getOpcode() == X86ISD::Wrapper) {
536  SDValue Val = Op1.getOperand(0);
538  return false;
539  }
540  }
541  }
542  }
543 
544  return true;
545 }
546 
547 /// Replace the original chain operand of the call with
548 /// load's chain operand and move load below the call's chain operand.
550  SDValue Call, SDValue OrigChain) {
552  SDValue Chain = OrigChain.getOperand(0);
553  if (Chain.getNode() == Load.getNode())
554  Ops.push_back(Load.getOperand(0));
555  else {
556  assert(Chain.getOpcode() == ISD::TokenFactor &&
557  "Unexpected chain operand");
558  for (unsigned i = 0, e = Chain.getNumOperands(); i != e; ++i)
559  if (Chain.getOperand(i).getNode() == Load.getNode())
560  Ops.push_back(Load.getOperand(0));
561  else
562  Ops.push_back(Chain.getOperand(i));
563  SDValue NewChain =
564  CurDAG->getNode(ISD::TokenFactor, SDLoc(Load), MVT::Other, Ops);
565  Ops.clear();
566  Ops.push_back(NewChain);
567  }
568  Ops.append(OrigChain->op_begin() + 1, OrigChain->op_end());
569  CurDAG->UpdateNodeOperands(OrigChain.getNode(), Ops);
570  CurDAG->UpdateNodeOperands(Load.getNode(), Call.getOperand(0),
571  Load.getOperand(1), Load.getOperand(2));
572 
573  Ops.clear();
574  Ops.push_back(SDValue(Load.getNode(), 1));
575  Ops.append(Call->op_begin() + 1, Call->op_end());
576  CurDAG->UpdateNodeOperands(Call.getNode(), Ops);
577 }
578 
579 /// Return true if call address is a load and it can be
580 /// moved below CALLSEQ_START and the chains leading up to the call.
581 /// Return the CALLSEQ_START by reference as a second output.
582 /// In the case of a tail call, there isn't a callseq node between the call
583 /// chain and the load.
584 static bool isCalleeLoad(SDValue Callee, SDValue &Chain, bool HasCallSeq) {
585  // The transformation is somewhat dangerous if the call's chain was glued to
586  // the call. After MoveBelowOrigChain the load is moved between the call and
587  // the chain, this can create a cycle if the load is not folded. So it is
588  // *really* important that we are sure the load will be folded.
589  if (Callee.getNode() == Chain.getNode() || !Callee.hasOneUse())
590  return false;
591  LoadSDNode *LD = dyn_cast<LoadSDNode>(Callee.getNode());
592  if (!LD ||
593  LD->isVolatile() ||
596  return false;
597 
598  // Now let's find the callseq_start.
599  while (HasCallSeq && Chain.getOpcode() != ISD::CALLSEQ_START) {
600  if (!Chain.hasOneUse())
601  return false;
602  Chain = Chain.getOperand(0);
603  }
604 
605  if (!Chain.getNumOperands())
606  return false;
607  // Since we are not checking for AA here, conservatively abort if the chain
608  // writes to memory. It's not safe to move the callee (a load) across a store.
609  if (isa<MemSDNode>(Chain.getNode()) &&
610  cast<MemSDNode>(Chain.getNode())->writeMem())
611  return false;
612  if (Chain.getOperand(0).getNode() == Callee.getNode())
613  return true;
614  if (Chain.getOperand(0).getOpcode() == ISD::TokenFactor &&
615  Callee.getValue(1).isOperandOf(Chain.getOperand(0).getNode()) &&
616  Callee.getValue(1).hasOneUse())
617  return true;
618  return false;
619 }
620 
621 void X86DAGToDAGISel::PreprocessISelDAG() {
622  // OptFor[Min]Size are used in pattern predicates that isel is matching.
623  OptForSize = MF->getFunction().optForSize();
624  OptForMinSize = MF->getFunction().optForMinSize();
625  assert((!OptForMinSize || OptForSize) && "OptForMinSize implies OptForSize");
626 
627  for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(),
628  E = CurDAG->allnodes_end(); I != E; ) {
629  SDNode *N = &*I++; // Preincrement iterator to avoid invalidation issues.
630 
631  if (OptLevel != CodeGenOpt::None &&
632  // Only does this when target favors doesn't favor register indirect
633  // call.
634  ((N->getOpcode() == X86ISD::CALL && !Subtarget->slowTwoMemOps()) ||
635  (N->getOpcode() == X86ISD::TC_RETURN &&
636  // Only does this if load can be folded into TC_RETURN.
637  (Subtarget->is64Bit() ||
638  !getTargetMachine().isPositionIndependent())))) {
639  /// Also try moving call address load from outside callseq_start to just
640  /// before the call to allow it to be folded.
641  ///
642  /// [Load chain]
643  /// ^
644  /// |
645  /// [Load]
646  /// ^ ^
647  /// | |
648  /// / \--
649  /// / |
650  ///[CALLSEQ_START] |
651  /// ^ |
652  /// | |
653  /// [LOAD/C2Reg] |
654  /// | |
655  /// \ /
656  /// \ /
657  /// [CALL]
658  bool HasCallSeq = N->getOpcode() == X86ISD::CALL;
659  SDValue Chain = N->getOperand(0);
660  SDValue Load = N->getOperand(1);
661  if (!isCalleeLoad(Load, Chain, HasCallSeq))
662  continue;
663  moveBelowOrigChain(CurDAG, Load, SDValue(N, 0), Chain);
664  ++NumLoadMoved;
665  continue;
666  }
667 
668  // Lower fpround and fpextend nodes that target the FP stack to be store and
669  // load to the stack. This is a gross hack. We would like to simply mark
670  // these as being illegal, but when we do that, legalize produces these when
671  // it expands calls, then expands these in the same legalize pass. We would
672  // like dag combine to be able to hack on these between the call expansion
673  // and the node legalization. As such this pass basically does "really
674  // late" legalization of these inline with the X86 isel pass.
675  // FIXME: This should only happen when not compiled with -O0.
676  if (N->getOpcode() != ISD::FP_ROUND && N->getOpcode() != ISD::FP_EXTEND)
677  continue;
678 
679  MVT SrcVT = N->getOperand(0).getSimpleValueType();
680  MVT DstVT = N->getSimpleValueType(0);
681 
682  // If any of the sources are vectors, no fp stack involved.
683  if (SrcVT.isVector() || DstVT.isVector())
684  continue;
685 
686  // If the source and destination are SSE registers, then this is a legal
687  // conversion that should not be lowered.
688  const X86TargetLowering *X86Lowering =
689  static_cast<const X86TargetLowering *>(TLI);
690  bool SrcIsSSE = X86Lowering->isScalarFPTypeInSSEReg(SrcVT);
691  bool DstIsSSE = X86Lowering->isScalarFPTypeInSSEReg(DstVT);
692  if (SrcIsSSE && DstIsSSE)
693  continue;
694 
695  if (!SrcIsSSE && !DstIsSSE) {
696  // If this is an FPStack extension, it is a noop.
697  if (N->getOpcode() == ISD::FP_EXTEND)
698  continue;
699  // If this is a value-preserving FPStack truncation, it is a noop.
700  if (N->getConstantOperandVal(1))
701  continue;
702  }
703 
704  // Here we could have an FP stack truncation or an FPStack <-> SSE convert.
705  // FPStack has extload and truncstore. SSE can fold direct loads into other
706  // operations. Based on this, decide what we want to do.
707  MVT MemVT;
708  if (N->getOpcode() == ISD::FP_ROUND)
709  MemVT = DstVT; // FP_ROUND must use DstVT, we can't do a 'trunc load'.
710  else
711  MemVT = SrcIsSSE ? SrcVT : DstVT;
712 
713  SDValue MemTmp = CurDAG->CreateStackTemporary(MemVT);
714  SDLoc dl(N);
715 
716  // FIXME: optimize the case where the src/dest is a load or store?
717  SDValue Store =
718  CurDAG->getTruncStore(CurDAG->getEntryNode(), dl, N->getOperand(0),
719  MemTmp, MachinePointerInfo(), MemVT);
720  SDValue Result = CurDAG->getExtLoad(ISD::EXTLOAD, dl, DstVT, Store, MemTmp,
721  MachinePointerInfo(), MemVT);
722 
723  // We're about to replace all uses of the FP_ROUND/FP_EXTEND with the
724  // extload we created. This will cause general havok on the dag because
725  // anything below the conversion could be folded into other existing nodes.
726  // To avoid invalidating 'I', back it up to the convert node.
727  --I;
728  CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
729 
730  // Now that we did that, the node is dead. Increment the iterator to the
731  // next node to process, then delete N.
732  ++I;
733  CurDAG->DeleteNode(N);
734  }
735 }
736 
737 
738 /// Emit any code that needs to be executed only in the main function.
739 void X86DAGToDAGISel::emitSpecialCodeForMain() {
740  if (Subtarget->isTargetCygMing()) {
742  auto &DL = CurDAG->getDataLayout();
743 
745  CLI.setChain(CurDAG->getRoot())
746  .setCallee(CallingConv::C, Type::getVoidTy(*CurDAG->getContext()),
747  CurDAG->getExternalSymbol("__main", TLI->getPointerTy(DL)),
748  std::move(Args));
749  const TargetLowering &TLI = CurDAG->getTargetLoweringInfo();
750  std::pair<SDValue, SDValue> Result = TLI.LowerCallTo(CLI);
751  CurDAG->setRoot(Result.second);
752  }
753 }
754 
755 void X86DAGToDAGISel::EmitFunctionEntryCode() {
756  // If this is main, emit special code for main.
757  const Function &F = MF->getFunction();
758  if (F.hasExternalLinkage() && F.getName() == "main")
759  emitSpecialCodeForMain();
760 }
761 
762 static bool isDispSafeForFrameIndex(int64_t Val) {
763  // On 64-bit platforms, we can run into an issue where a frame index
764  // includes a displacement that, when added to the explicit displacement,
765  // will overflow the displacement field. Assuming that the frame index
766  // displacement fits into a 31-bit integer (which is only slightly more
767  // aggressive than the current fundamental assumption that it fits into
768  // a 32-bit integer), a 31-bit disp should always be safe.
769  return isInt<31>(Val);
770 }
771 
772 bool X86DAGToDAGISel::foldOffsetIntoAddress(uint64_t Offset,
773  X86ISelAddressMode &AM) {
774  // Cannot combine ExternalSymbol displacements with integer offsets.
775  if (Offset != 0 && (AM.ES || AM.MCSym))
776  return true;
777  int64_t Val = AM.Disp + Offset;
778  CodeModel::Model M = TM.getCodeModel();
779  if (Subtarget->is64Bit()) {
781  AM.hasSymbolicDisplacement()))
782  return true;
783  // In addition to the checks required for a register base, check that
784  // we do not try to use an unsafe Disp with a frame index.
785  if (AM.BaseType == X86ISelAddressMode::FrameIndexBase &&
787  return true;
788  }
789  AM.Disp = Val;
790  return false;
791 
792 }
793 
794 bool X86DAGToDAGISel::matchLoadInAddress(LoadSDNode *N, X86ISelAddressMode &AM){
795  SDValue Address = N->getOperand(1);
796 
797  // load gs:0 -> GS segment register.
798  // load fs:0 -> FS segment register.
799  //
800  // This optimization is valid because the GNU TLS model defines that
801  // gs:0 (or fs:0 on X86-64) contains its own address.
802  // For more information see http://people.redhat.com/drepper/tls.pdf
803  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Address))
804  if (C->getSExtValue() == 0 && AM.Segment.getNode() == nullptr &&
805  (Subtarget->isTargetGlibc() || Subtarget->isTargetAndroid() ||
806  Subtarget->isTargetFuchsia()))
807  switch (N->getPointerInfo().getAddrSpace()) {
808  case 256:
809  AM.Segment = CurDAG->getRegister(X86::GS, MVT::i16);
810  return false;
811  case 257:
812  AM.Segment = CurDAG->getRegister(X86::FS, MVT::i16);
813  return false;
814  // Address space 258 is not handled here, because it is not used to
815  // address TLS areas.
816  }
817 
818  return true;
819 }
820 
821 /// Try to match X86ISD::Wrapper and X86ISD::WrapperRIP nodes into an addressing
822 /// mode. These wrap things that will resolve down into a symbol reference.
823 /// If no match is possible, this returns true, otherwise it returns false.
824 bool X86DAGToDAGISel::matchWrapper(SDValue N, X86ISelAddressMode &AM) {
825  // If the addressing mode already has a symbol as the displacement, we can
826  // never match another symbol.
827  if (AM.hasSymbolicDisplacement())
828  return true;
829 
830  SDValue N0 = N.getOperand(0);
831  CodeModel::Model M = TM.getCodeModel();
832 
833  // Handle X86-64 rip-relative addresses. We check this before checking direct
834  // folding because RIP is preferable to non-RIP accesses.
835  if (Subtarget->is64Bit() && N.getOpcode() == X86ISD::WrapperRIP &&
836  // Under X86-64 non-small code model, GV (and friends) are 64-bits, so
837  // they cannot be folded into immediate fields.
838  // FIXME: This can be improved for kernel and other models?
839  (M == CodeModel::Small || M == CodeModel::Kernel)) {
840  // Base and index reg must be 0 in order to use %rip as base.
841  if (AM.hasBaseOrIndexReg())
842  return true;
843  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(N0)) {
844  X86ISelAddressMode Backup = AM;
845  AM.GV = G->getGlobal();
846  AM.SymbolFlags = G->getTargetFlags();
847  if (foldOffsetIntoAddress(G->getOffset(), AM)) {
848  AM = Backup;
849  return true;
850  }
851  } else if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(N0)) {
852  X86ISelAddressMode Backup = AM;
853  AM.CP = CP->getConstVal();
854  AM.Align = CP->getAlignment();
855  AM.SymbolFlags = CP->getTargetFlags();
856  if (foldOffsetIntoAddress(CP->getOffset(), AM)) {
857  AM = Backup;
858  return true;
859  }
860  } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(N0)) {
861  AM.ES = S->getSymbol();
862  AM.SymbolFlags = S->getTargetFlags();
863  } else if (auto *S = dyn_cast<MCSymbolSDNode>(N0)) {
864  AM.MCSym = S->getMCSymbol();
865  } else if (JumpTableSDNode *J = dyn_cast<JumpTableSDNode>(N0)) {
866  AM.JT = J->getIndex();
867  AM.SymbolFlags = J->getTargetFlags();
868  } else if (BlockAddressSDNode *BA = dyn_cast<BlockAddressSDNode>(N0)) {
869  X86ISelAddressMode Backup = AM;
870  AM.BlockAddr = BA->getBlockAddress();
871  AM.SymbolFlags = BA->getTargetFlags();
872  if (foldOffsetIntoAddress(BA->getOffset(), AM)) {
873  AM = Backup;
874  return true;
875  }
876  } else
877  llvm_unreachable("Unhandled symbol reference node.");
878 
879  if (N.getOpcode() == X86ISD::WrapperRIP)
880  AM.setBaseReg(CurDAG->getRegister(X86::RIP, MVT::i64));
881  return false;
882  }
883 
884  // Handle the case when globals fit in our immediate field: This is true for
885  // X86-32 always and X86-64 when in -mcmodel=small mode. In 64-bit
886  // mode, this only applies to a non-RIP-relative computation.
887  if (!Subtarget->is64Bit() ||
888  M == CodeModel::Small || M == CodeModel::Kernel) {
890  "RIP-relative addressing already handled");
891  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(N0)) {
892  AM.GV = G->getGlobal();
893  AM.Disp += G->getOffset();
894  AM.SymbolFlags = G->getTargetFlags();
895  } else if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(N0)) {
896  AM.CP = CP->getConstVal();
897  AM.Align = CP->getAlignment();
898  AM.Disp += CP->getOffset();
899  AM.SymbolFlags = CP->getTargetFlags();
900  } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(N0)) {
901  AM.ES = S->getSymbol();
902  AM.SymbolFlags = S->getTargetFlags();
903  } else if (auto *S = dyn_cast<MCSymbolSDNode>(N0)) {
904  AM.MCSym = S->getMCSymbol();
905  } else if (JumpTableSDNode *J = dyn_cast<JumpTableSDNode>(N0)) {
906  AM.JT = J->getIndex();
907  AM.SymbolFlags = J->getTargetFlags();
908  } else if (BlockAddressSDNode *BA = dyn_cast<BlockAddressSDNode>(N0)) {
909  AM.BlockAddr = BA->getBlockAddress();
910  AM.Disp += BA->getOffset();
911  AM.SymbolFlags = BA->getTargetFlags();
912  } else
913  llvm_unreachable("Unhandled symbol reference node.");
914  return false;
915  }
916 
917  return true;
918 }
919 
920 /// Add the specified node to the specified addressing mode, returning true if
921 /// it cannot be done. This just pattern matches for the addressing mode.
922 bool X86DAGToDAGISel::matchAddress(SDValue N, X86ISelAddressMode &AM) {
923  if (matchAddressRecursively(N, AM, 0))
924  return true;
925 
926  // Post-processing: Convert lea(,%reg,2) to lea(%reg,%reg), which has
927  // a smaller encoding and avoids a scaled-index.
928  if (AM.Scale == 2 &&
929  AM.BaseType == X86ISelAddressMode::RegBase &&
930  AM.Base_Reg.getNode() == nullptr) {
931  AM.Base_Reg = AM.IndexReg;
932  AM.Scale = 1;
933  }
934 
935  // Post-processing: Convert foo to foo(%rip), even in non-PIC mode,
936  // because it has a smaller encoding.
937  // TODO: Which other code models can use this?
938  if (TM.getCodeModel() == CodeModel::Small &&
939  Subtarget->is64Bit() &&
940  AM.Scale == 1 &&
941  AM.BaseType == X86ISelAddressMode::RegBase &&
942  AM.Base_Reg.getNode() == nullptr &&
943  AM.IndexReg.getNode() == nullptr &&
944  AM.SymbolFlags == X86II::MO_NO_FLAG &&
945  AM.hasSymbolicDisplacement())
946  AM.Base_Reg = CurDAG->getRegister(X86::RIP, MVT::i64);
947 
948  return false;
949 }
950 
951 bool X86DAGToDAGISel::matchAdd(SDValue N, X86ISelAddressMode &AM,
952  unsigned Depth) {
953  // Add an artificial use to this node so that we can keep track of
954  // it if it gets CSE'd with a different node.
955  HandleSDNode Handle(N);
956 
957  X86ISelAddressMode Backup = AM;
958  if (!matchAddressRecursively(N.getOperand(0), AM, Depth+1) &&
959  !matchAddressRecursively(Handle.getValue().getOperand(1), AM, Depth+1))
960  return false;
961  AM = Backup;
962 
963  // Try again after commuting the operands.
964  if (!matchAddressRecursively(Handle.getValue().getOperand(1), AM, Depth+1) &&
965  !matchAddressRecursively(Handle.getValue().getOperand(0), AM, Depth+1))
966  return false;
967  AM = Backup;
968 
969  // If we couldn't fold both operands into the address at the same time,
970  // see if we can just put each operand into a register and fold at least
971  // the add.
972  if (AM.BaseType == X86ISelAddressMode::RegBase &&
973  !AM.Base_Reg.getNode() &&
974  !AM.IndexReg.getNode()) {
975  N = Handle.getValue();
976  AM.Base_Reg = N.getOperand(0);
977  AM.IndexReg = N.getOperand(1);
978  AM.Scale = 1;
979  return false;
980  }
981  N = Handle.getValue();
982  return true;
983 }
984 
985 // Insert a node into the DAG at least before the Pos node's position. This
986 // will reposition the node as needed, and will assign it a node ID that is <=
987 // the Pos node's ID. Note that this does *not* preserve the uniqueness of node
988 // IDs! The selection DAG must no longer depend on their uniqueness when this
989 // is used.
990 static void insertDAGNode(SelectionDAG &DAG, SDValue Pos, SDValue N) {
991  if (N.getNode()->getNodeId() == -1 ||
992  N.getNode()->getNodeId() > Pos.getNode()->getNodeId()) {
993  DAG.RepositionNode(Pos.getNode()->getIterator(), N.getNode());
994  N.getNode()->setNodeId(Pos.getNode()->getNodeId());
995  }
996 }
997 
998 // Transform "(X >> (8-C1)) & (0xff << C1)" to "((X >> 8) & 0xff) << C1" if
999 // safe. This allows us to convert the shift and and into an h-register
1000 // extract and a scaled index. Returns false if the simplification is
1001 // performed.
1003  uint64_t Mask,
1004  SDValue Shift, SDValue X,
1005  X86ISelAddressMode &AM) {
1006  if (Shift.getOpcode() != ISD::SRL ||
1007  !isa<ConstantSDNode>(Shift.getOperand(1)) ||
1008  !Shift.hasOneUse())
1009  return true;
1010 
1011  int ScaleLog = 8 - Shift.getConstantOperandVal(1);
1012  if (ScaleLog <= 0 || ScaleLog >= 4 ||
1013  Mask != (0xffu << ScaleLog))
1014  return true;
1015 
1016  MVT VT = N.getSimpleValueType();
1017  SDLoc DL(N);
1018  SDValue Eight = DAG.getConstant(8, DL, MVT::i8);
1019  SDValue NewMask = DAG.getConstant(0xff, DL, VT);
1020  SDValue Srl = DAG.getNode(ISD::SRL, DL, VT, X, Eight);
1021  SDValue And = DAG.getNode(ISD::AND, DL, VT, Srl, NewMask);
1022  SDValue ShlCount = DAG.getConstant(ScaleLog, DL, MVT::i8);
1023  SDValue Shl = DAG.getNode(ISD::SHL, DL, VT, And, ShlCount);
1024 
1025  // Insert the new nodes into the topological ordering. We must do this in
1026  // a valid topological ordering as nothing is going to go back and re-sort
1027  // these nodes. We continually insert before 'N' in sequence as this is
1028  // essentially a pre-flattened and pre-sorted sequence of nodes. There is no
1029  // hierarchy left to express.
1030  insertDAGNode(DAG, N, Eight);
1031  insertDAGNode(DAG, N, Srl);
1032  insertDAGNode(DAG, N, NewMask);
1033  insertDAGNode(DAG, N, And);
1034  insertDAGNode(DAG, N, ShlCount);
1035  insertDAGNode(DAG, N, Shl);
1036  DAG.ReplaceAllUsesWith(N, Shl);
1037  AM.IndexReg = And;
1038  AM.Scale = (1 << ScaleLog);
1039  return false;
1040 }
1041 
1042 // Transforms "(X << C1) & C2" to "(X & (C2>>C1)) << C1" if safe and if this
1043 // allows us to fold the shift into this addressing mode. Returns false if the
1044 // transform succeeded.
1046  uint64_t Mask,
1047  SDValue Shift, SDValue X,
1048  X86ISelAddressMode &AM) {
1049  if (Shift.getOpcode() != ISD::SHL ||
1050  !isa<ConstantSDNode>(Shift.getOperand(1)))
1051  return true;
1052 
1053  // Not likely to be profitable if either the AND or SHIFT node has more
1054  // than one use (unless all uses are for address computation). Besides,
1055  // isel mechanism requires their node ids to be reused.
1056  if (!N.hasOneUse() || !Shift.hasOneUse())
1057  return true;
1058 
1059  // Verify that the shift amount is something we can fold.
1060  unsigned ShiftAmt = Shift.getConstantOperandVal(1);
1061  if (ShiftAmt != 1 && ShiftAmt != 2 && ShiftAmt != 3)
1062  return true;
1063 
1064  MVT VT = N.getSimpleValueType();
1065  SDLoc DL(N);
1066  SDValue NewMask = DAG.getConstant(Mask >> ShiftAmt, DL, VT);
1067  SDValue NewAnd = DAG.getNode(ISD::AND, DL, VT, X, NewMask);
1068  SDValue NewShift = DAG.getNode(ISD::SHL, DL, VT, NewAnd, Shift.getOperand(1));
1069 
1070  // Insert the new nodes into the topological ordering. We must do this in
1071  // a valid topological ordering as nothing is going to go back and re-sort
1072  // these nodes. We continually insert before 'N' in sequence as this is
1073  // essentially a pre-flattened and pre-sorted sequence of nodes. There is no
1074  // hierarchy left to express.
1075  insertDAGNode(DAG, N, NewMask);
1076  insertDAGNode(DAG, N, NewAnd);
1077  insertDAGNode(DAG, N, NewShift);
1078  DAG.ReplaceAllUsesWith(N, NewShift);
1079 
1080  AM.Scale = 1 << ShiftAmt;
1081  AM.IndexReg = NewAnd;
1082  return false;
1083 }
1084 
1085 // Implement some heroics to detect shifts of masked values where the mask can
1086 // be replaced by extending the shift and undoing that in the addressing mode
1087 // scale. Patterns such as (shl (srl x, c1), c2) are canonicalized into (and
1088 // (srl x, SHIFT), MASK) by DAGCombines that don't know the shl can be done in
1089 // the addressing mode. This results in code such as:
1090 //
1091 // int f(short *y, int *lookup_table) {
1092 // ...
1093 // return *y + lookup_table[*y >> 11];
1094 // }
1095 //
1096 // Turning into:
1097 // movzwl (%rdi), %eax
1098 // movl %eax, %ecx
1099 // shrl $11, %ecx
1100 // addl (%rsi,%rcx,4), %eax
1101 //
1102 // Instead of:
1103 // movzwl (%rdi), %eax
1104 // movl %eax, %ecx
1105 // shrl $9, %ecx
1106 // andl $124, %rcx
1107 // addl (%rsi,%rcx), %eax
1108 //
1109 // Note that this function assumes the mask is provided as a mask *after* the
1110 // value is shifted. The input chain may or may not match that, but computing
1111 // such a mask is trivial.
1113  uint64_t Mask,
1114  SDValue Shift, SDValue X,
1115  X86ISelAddressMode &AM) {
1116  if (Shift.getOpcode() != ISD::SRL || !Shift.hasOneUse() ||
1117  !isa<ConstantSDNode>(Shift.getOperand(1)))
1118  return true;
1119 
1120  unsigned ShiftAmt = Shift.getConstantOperandVal(1);
1121  unsigned MaskLZ = countLeadingZeros(Mask);
1122  unsigned MaskTZ = countTrailingZeros(Mask);
1123 
1124  // The amount of shift we're trying to fit into the addressing mode is taken
1125  // from the trailing zeros of the mask.
1126  unsigned AMShiftAmt = MaskTZ;
1127 
1128  // There is nothing we can do here unless the mask is removing some bits.
1129  // Also, the addressing mode can only represent shifts of 1, 2, or 3 bits.
1130  if (AMShiftAmt <= 0 || AMShiftAmt > 3) return true;
1131 
1132  // We also need to ensure that mask is a continuous run of bits.
1133  if (countTrailingOnes(Mask >> MaskTZ) + MaskTZ + MaskLZ != 64) return true;
1134 
1135  // Scale the leading zero count down based on the actual size of the value.
1136  // Also scale it down based on the size of the shift.
1137  unsigned ScaleDown = (64 - X.getSimpleValueType().getSizeInBits()) + ShiftAmt;
1138  if (MaskLZ < ScaleDown)
1139  return true;
1140  MaskLZ -= ScaleDown;
1141 
1142  // The final check is to ensure that any masked out high bits of X are
1143  // already known to be zero. Otherwise, the mask has a semantic impact
1144  // other than masking out a couple of low bits. Unfortunately, because of
1145  // the mask, zero extensions will be removed from operands in some cases.
1146  // This code works extra hard to look through extensions because we can
1147  // replace them with zero extensions cheaply if necessary.
1148  bool ReplacingAnyExtend = false;
1149  if (X.getOpcode() == ISD::ANY_EXTEND) {
1150  unsigned ExtendBits = X.getSimpleValueType().getSizeInBits() -
1152  // Assume that we'll replace the any-extend with a zero-extend, and
1153  // narrow the search to the extended value.
1154  X = X.getOperand(0);
1155  MaskLZ = ExtendBits > MaskLZ ? 0 : MaskLZ - ExtendBits;
1156  ReplacingAnyExtend = true;
1157  }
1158  APInt MaskedHighBits =
1160  KnownBits Known;
1161  DAG.computeKnownBits(X, Known);
1162  if (MaskedHighBits != Known.Zero) return true;
1163 
1164  // We've identified a pattern that can be transformed into a single shift
1165  // and an addressing mode. Make it so.
1166  MVT VT = N.getSimpleValueType();
1167  if (ReplacingAnyExtend) {
1168  assert(X.getValueType() != VT);
1169  // We looked through an ANY_EXTEND node, insert a ZERO_EXTEND.
1170  SDValue NewX = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(X), VT, X);
1171  insertDAGNode(DAG, N, NewX);
1172  X = NewX;
1173  }
1174  SDLoc DL(N);
1175  SDValue NewSRLAmt = DAG.getConstant(ShiftAmt + AMShiftAmt, DL, MVT::i8);
1176  SDValue NewSRL = DAG.getNode(ISD::SRL, DL, VT, X, NewSRLAmt);
1177  SDValue NewSHLAmt = DAG.getConstant(AMShiftAmt, DL, MVT::i8);
1178  SDValue NewSHL = DAG.getNode(ISD::SHL, DL, VT, NewSRL, NewSHLAmt);
1179 
1180  // Insert the new nodes into the topological ordering. We must do this in
1181  // a valid topological ordering as nothing is going to go back and re-sort
1182  // these nodes. We continually insert before 'N' in sequence as this is
1183  // essentially a pre-flattened and pre-sorted sequence of nodes. There is no
1184  // hierarchy left to express.
1185  insertDAGNode(DAG, N, NewSRLAmt);
1186  insertDAGNode(DAG, N, NewSRL);
1187  insertDAGNode(DAG, N, NewSHLAmt);
1188  insertDAGNode(DAG, N, NewSHL);
1189  DAG.ReplaceAllUsesWith(N, NewSHL);
1190 
1191  AM.Scale = 1 << AMShiftAmt;
1192  AM.IndexReg = NewSRL;
1193  return false;
1194 }
1195 
1196 bool X86DAGToDAGISel::matchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
1197  unsigned Depth) {
1198  SDLoc dl(N);
1199  DEBUG({
1200  dbgs() << "MatchAddress: ";
1201  AM.dump();
1202  });
1203  // Limit recursion.
1204  if (Depth > 5)
1205  return matchAddressBase(N, AM);
1206 
1207  // If this is already a %rip relative address, we can only merge immediates
1208  // into it. Instead of handling this in every case, we handle it here.
1209  // RIP relative addressing: %rip + 32-bit displacement!
1210  if (AM.isRIPRelative()) {
1211  // FIXME: JumpTable and ExternalSymbol address currently don't like
1212  // displacements. It isn't very important, but this should be fixed for
1213  // consistency.
1214  if (!(AM.ES || AM.MCSym) && AM.JT != -1)
1215  return true;
1216 
1217  if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(N))
1218  if (!foldOffsetIntoAddress(Cst->getSExtValue(), AM))
1219  return false;
1220  return true;
1221  }
1222 
1223  switch (N.getOpcode()) {
1224  default: break;
1225  case ISD::LOCAL_RECOVER: {
1226  if (!AM.hasSymbolicDisplacement() && AM.Disp == 0)
1227  if (const auto *ESNode = dyn_cast<MCSymbolSDNode>(N.getOperand(0))) {
1228  // Use the symbol and don't prefix it.
1229  AM.MCSym = ESNode->getMCSymbol();
1230  return false;
1231  }
1232  break;
1233  }
1234  case ISD::Constant: {
1235  uint64_t Val = cast<ConstantSDNode>(N)->getSExtValue();
1236  if (!foldOffsetIntoAddress(Val, AM))
1237  return false;
1238  break;
1239  }
1240 
1241  case X86ISD::Wrapper:
1242  case X86ISD::WrapperRIP:
1243  if (!matchWrapper(N, AM))
1244  return false;
1245  break;
1246 
1247  case ISD::LOAD:
1248  if (!matchLoadInAddress(cast<LoadSDNode>(N), AM))
1249  return false;
1250  break;
1251 
1252  case ISD::FrameIndex:
1253  if (AM.BaseType == X86ISelAddressMode::RegBase &&
1254  AM.Base_Reg.getNode() == nullptr &&
1255  (!Subtarget->is64Bit() || isDispSafeForFrameIndex(AM.Disp))) {
1256  AM.BaseType = X86ISelAddressMode::FrameIndexBase;
1257  AM.Base_FrameIndex = cast<FrameIndexSDNode>(N)->getIndex();
1258  return false;
1259  }
1260  break;
1261 
1262  case ISD::SHL:
1263  if (AM.IndexReg.getNode() != nullptr || AM.Scale != 1)
1264  break;
1265 
1266  if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1267  unsigned Val = CN->getZExtValue();
1268  // Note that we handle x<<1 as (,x,2) rather than (x,x) here so
1269  // that the base operand remains free for further matching. If
1270  // the base doesn't end up getting used, a post-processing step
1271  // in MatchAddress turns (,x,2) into (x,x), which is cheaper.
1272  if (Val == 1 || Val == 2 || Val == 3) {
1273  AM.Scale = 1 << Val;
1274  SDValue ShVal = N.getOperand(0);
1275 
1276  // Okay, we know that we have a scale by now. However, if the scaled
1277  // value is an add of something and a constant, we can fold the
1278  // constant into the disp field here.
1279  if (CurDAG->isBaseWithConstantOffset(ShVal)) {
1280  AM.IndexReg = ShVal.getOperand(0);
1281  ConstantSDNode *AddVal = cast<ConstantSDNode>(ShVal.getOperand(1));
1282  uint64_t Disp = (uint64_t)AddVal->getSExtValue() << Val;
1283  if (!foldOffsetIntoAddress(Disp, AM))
1284  return false;
1285  }
1286 
1287  AM.IndexReg = ShVal;
1288  return false;
1289  }
1290  }
1291  break;
1292 
1293  case ISD::SRL: {
1294  // Scale must not be used already.
1295  if (AM.IndexReg.getNode() != nullptr || AM.Scale != 1) break;
1296 
1297  SDValue And = N.getOperand(0);
1298  if (And.getOpcode() != ISD::AND) break;
1299  SDValue X = And.getOperand(0);
1300 
1301  // We only handle up to 64-bit values here as those are what matter for
1302  // addressing mode optimizations.
1303  if (X.getSimpleValueType().getSizeInBits() > 64) break;
1304 
1305  // The mask used for the transform is expected to be post-shift, but we
1306  // found the shift first so just apply the shift to the mask before passing
1307  // it down.
1308  if (!isa<ConstantSDNode>(N.getOperand(1)) ||
1309  !isa<ConstantSDNode>(And.getOperand(1)))
1310  break;
1311  uint64_t Mask = And.getConstantOperandVal(1) >> N.getConstantOperandVal(1);
1312 
1313  // Try to fold the mask and shift into the scale, and return false if we
1314  // succeed.
1315  if (!foldMaskAndShiftToScale(*CurDAG, N, Mask, N, X, AM))
1316  return false;
1317  break;
1318  }
1319 
1320  case ISD::SMUL_LOHI:
1321  case ISD::UMUL_LOHI:
1322  // A mul_lohi where we need the low part can be folded as a plain multiply.
1323  if (N.getResNo() != 0) break;
1325  case ISD::MUL:
1326  case X86ISD::MUL_IMM:
1327  // X*[3,5,9] -> X+X*[2,4,8]
1328  if (AM.BaseType == X86ISelAddressMode::RegBase &&
1329  AM.Base_Reg.getNode() == nullptr &&
1330  AM.IndexReg.getNode() == nullptr) {
1331  if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N.getOperand(1)))
1332  if (CN->getZExtValue() == 3 || CN->getZExtValue() == 5 ||
1333  CN->getZExtValue() == 9) {
1334  AM.Scale = unsigned(CN->getZExtValue())-1;
1335 
1336  SDValue MulVal = N.getOperand(0);
1337  SDValue Reg;
1338 
1339  // Okay, we know that we have a scale by now. However, if the scaled
1340  // value is an add of something and a constant, we can fold the
1341  // constant into the disp field here.
1342  if (MulVal.getNode()->getOpcode() == ISD::ADD && MulVal.hasOneUse() &&
1343  isa<ConstantSDNode>(MulVal.getOperand(1))) {
1344  Reg = MulVal.getOperand(0);
1345  ConstantSDNode *AddVal =
1346  cast<ConstantSDNode>(MulVal.getOperand(1));
1347  uint64_t Disp = AddVal->getSExtValue() * CN->getZExtValue();
1348  if (foldOffsetIntoAddress(Disp, AM))
1349  Reg = N.getOperand(0);
1350  } else {
1351  Reg = N.getOperand(0);
1352  }
1353 
1354  AM.IndexReg = AM.Base_Reg = Reg;
1355  return false;
1356  }
1357  }
1358  break;
1359 
1360  case ISD::SUB: {
1361  // Given A-B, if A can be completely folded into the address and
1362  // the index field with the index field unused, use -B as the index.
1363  // This is a win if a has multiple parts that can be folded into
1364  // the address. Also, this saves a mov if the base register has
1365  // other uses, since it avoids a two-address sub instruction, however
1366  // it costs an additional mov if the index register has other uses.
1367 
1368  // Add an artificial use to this node so that we can keep track of
1369  // it if it gets CSE'd with a different node.
1370  HandleSDNode Handle(N);
1371 
1372  // Test if the LHS of the sub can be folded.
1373  X86ISelAddressMode Backup = AM;
1374  if (matchAddressRecursively(N.getOperand(0), AM, Depth+1)) {
1375  AM = Backup;
1376  break;
1377  }
1378  // Test if the index field is free for use.
1379  if (AM.IndexReg.getNode() || AM.isRIPRelative()) {
1380  AM = Backup;
1381  break;
1382  }
1383 
1384  int Cost = 0;
1385  SDValue RHS = Handle.getValue().getOperand(1);
1386  // If the RHS involves a register with multiple uses, this
1387  // transformation incurs an extra mov, due to the neg instruction
1388  // clobbering its operand.
1389  if (!RHS.getNode()->hasOneUse() ||
1390  RHS.getNode()->getOpcode() == ISD::CopyFromReg ||
1391  RHS.getNode()->getOpcode() == ISD::TRUNCATE ||
1392  RHS.getNode()->getOpcode() == ISD::ANY_EXTEND ||
1393  (RHS.getNode()->getOpcode() == ISD::ZERO_EXTEND &&
1394  RHS.getOperand(0).getValueType() == MVT::i32))
1395  ++Cost;
1396  // If the base is a register with multiple uses, this
1397  // transformation may save a mov.
1398  // FIXME: Don't rely on DELETED_NODEs.
1399  if ((AM.BaseType == X86ISelAddressMode::RegBase && AM.Base_Reg.getNode() &&
1400  AM.Base_Reg->getOpcode() != ISD::DELETED_NODE &&
1401  !AM.Base_Reg.getNode()->hasOneUse()) ||
1402  AM.BaseType == X86ISelAddressMode::FrameIndexBase)
1403  --Cost;
1404  // If the folded LHS was interesting, this transformation saves
1405  // address arithmetic.
1406  if ((AM.hasSymbolicDisplacement() && !Backup.hasSymbolicDisplacement()) +
1407  ((AM.Disp != 0) && (Backup.Disp == 0)) +
1408  (AM.Segment.getNode() && !Backup.Segment.getNode()) >= 2)
1409  --Cost;
1410  // If it doesn't look like it may be an overall win, don't do it.
1411  if (Cost >= 0) {
1412  AM = Backup;
1413  break;
1414  }
1415 
1416  // Ok, the transformation is legal and appears profitable. Go for it.
1417  SDValue Zero = CurDAG->getConstant(0, dl, N.getValueType());
1418  SDValue Neg = CurDAG->getNode(ISD::SUB, dl, N.getValueType(), Zero, RHS);
1419  AM.IndexReg = Neg;
1420  AM.Scale = 1;
1421 
1422  // Insert the new nodes into the topological ordering.
1423  insertDAGNode(*CurDAG, Handle.getValue(), Zero);
1424  insertDAGNode(*CurDAG, Handle.getValue(), Neg);
1425  return false;
1426  }
1427 
1428  case ISD::ADD:
1429  if (!matchAdd(N, AM, Depth))
1430  return false;
1431  break;
1432 
1433  case ISD::OR:
1434  // We want to look through a transform in InstCombine and DAGCombiner that
1435  // turns 'add' into 'or', so we can treat this 'or' exactly like an 'add'.
1436  // Example: (or (and x, 1), (shl y, 3)) --> (add (and x, 1), (shl y, 3))
1437  // An 'lea' can then be used to match the shift (multiply) and add:
1438  // and $1, %esi
1439  // lea (%rsi, %rdi, 8), %rax
1440  if (CurDAG->haveNoCommonBitsSet(N.getOperand(0), N.getOperand(1)) &&
1441  !matchAdd(N, AM, Depth))
1442  return false;
1443  break;
1444 
1445  case ISD::AND: {
1446  // Perform some heroic transforms on an and of a constant-count shift
1447  // with a constant to enable use of the scaled offset field.
1448 
1449  // Scale must not be used already.
1450  if (AM.IndexReg.getNode() != nullptr || AM.Scale != 1) break;
1451 
1452  SDValue Shift = N.getOperand(0);
1453  if (Shift.getOpcode() != ISD::SRL && Shift.getOpcode() != ISD::SHL) break;
1454  SDValue X = Shift.getOperand(0);
1455 
1456  // We only handle up to 64-bit values here as those are what matter for
1457  // addressing mode optimizations.
1458  if (X.getSimpleValueType().getSizeInBits() > 64) break;
1459 
1460  if (!isa<ConstantSDNode>(N.getOperand(1)))
1461  break;
1462  uint64_t Mask = N.getConstantOperandVal(1);
1463 
1464  // Try to fold the mask and shift into an extract and scale.
1465  if (!foldMaskAndShiftToExtract(*CurDAG, N, Mask, Shift, X, AM))
1466  return false;
1467 
1468  // Try to fold the mask and shift directly into the scale.
1469  if (!foldMaskAndShiftToScale(*CurDAG, N, Mask, Shift, X, AM))
1470  return false;
1471 
1472  // Try to swap the mask and shift to place shifts which can be done as
1473  // a scale on the outside of the mask.
1474  if (!foldMaskedShiftToScaledMask(*CurDAG, N, Mask, Shift, X, AM))
1475  return false;
1476  break;
1477  }
1478  }
1479 
1480  return matchAddressBase(N, AM);
1481 }
1482 
1483 /// Helper for MatchAddress. Add the specified node to the
1484 /// specified addressing mode without any further recursion.
1485 bool X86DAGToDAGISel::matchAddressBase(SDValue N, X86ISelAddressMode &AM) {
1486  // Is the base register already occupied?
1487  if (AM.BaseType != X86ISelAddressMode::RegBase || AM.Base_Reg.getNode()) {
1488  // If so, check to see if the scale index register is set.
1489  if (!AM.IndexReg.getNode()) {
1490  AM.IndexReg = N;
1491  AM.Scale = 1;
1492  return false;
1493  }
1494 
1495  // Otherwise, we cannot select it.
1496  return true;
1497  }
1498 
1499  // Default, generate it as a register.
1500  AM.BaseType = X86ISelAddressMode::RegBase;
1501  AM.Base_Reg = N;
1502  return false;
1503 }
1504 
1505 /// Helper for selectVectorAddr. Handles things that can be folded into a
1506 /// gather scatter address. The index register and scale should have already
1507 /// been handled.
1508 bool X86DAGToDAGISel::matchVectorAddress(SDValue N, X86ISelAddressMode &AM) {
1509  // TODO: Support other operations.
1510  switch (N.getOpcode()) {
1511  case ISD::Constant: {
1512  uint64_t Val = cast<ConstantSDNode>(N)->getSExtValue();
1513  if (!foldOffsetIntoAddress(Val, AM))
1514  return false;
1515  break;
1516  }
1517  case X86ISD::Wrapper:
1518  if (!matchWrapper(N, AM))
1519  return false;
1520  break;
1521  }
1522 
1523  return matchAddressBase(N, AM);
1524 }
1525 
1526 bool X86DAGToDAGISel::selectVectorAddr(SDNode *Parent, SDValue N, SDValue &Base,
1527  SDValue &Scale, SDValue &Index,
1528  SDValue &Disp, SDValue &Segment) {
1529  X86ISelAddressMode AM;
1530  auto *Mgs = cast<X86MaskedGatherScatterSDNode>(Parent);
1531  AM.IndexReg = Mgs->getIndex();
1532  AM.Scale = cast<ConstantSDNode>(Mgs->getScale())->getZExtValue();
1533 
1534  unsigned AddrSpace = cast<MemSDNode>(Parent)->getPointerInfo().getAddrSpace();
1535  // AddrSpace 256 -> GS, 257 -> FS, 258 -> SS.
1536  if (AddrSpace == 256)
1537  AM.Segment = CurDAG->getRegister(X86::GS, MVT::i16);
1538  if (AddrSpace == 257)
1539  AM.Segment = CurDAG->getRegister(X86::FS, MVT::i16);
1540  if (AddrSpace == 258)
1541  AM.Segment = CurDAG->getRegister(X86::SS, MVT::i16);
1542 
1543  // Try to match into the base and displacement fields.
1544  if (matchVectorAddress(N, AM))
1545  return false;
1546 
1547  MVT VT = N.getSimpleValueType();
1548  if (AM.BaseType == X86ISelAddressMode::RegBase) {
1549  if (!AM.Base_Reg.getNode())
1550  AM.Base_Reg = CurDAG->getRegister(0, VT);
1551  }
1552 
1553  getAddressOperands(AM, SDLoc(N), Base, Scale, Index, Disp, Segment);
1554  return true;
1555 }
1556 
1557 /// Returns true if it is able to pattern match an addressing mode.
1558 /// It returns the operands which make up the maximal addressing mode it can
1559 /// match by reference.
1560 ///
1561 /// Parent is the parent node of the addr operand that is being matched. It
1562 /// is always a load, store, atomic node, or null. It is only null when
1563 /// checking memory operands for inline asm nodes.
1564 bool X86DAGToDAGISel::selectAddr(SDNode *Parent, SDValue N, SDValue &Base,
1565  SDValue &Scale, SDValue &Index,
1566  SDValue &Disp, SDValue &Segment) {
1567  X86ISelAddressMode AM;
1568 
1569  if (Parent &&
1570  // This list of opcodes are all the nodes that have an "addr:$ptr" operand
1571  // that are not a MemSDNode, and thus don't have proper addrspace info.
1572  Parent->getOpcode() != ISD::INTRINSIC_W_CHAIN && // unaligned loads, fixme
1573  Parent->getOpcode() != ISD::INTRINSIC_VOID && // nontemporal stores
1574  Parent->getOpcode() != X86ISD::TLSCALL && // Fixme
1575  Parent->getOpcode() != X86ISD::EH_SJLJ_SETJMP && // setjmp
1576  Parent->getOpcode() != X86ISD::EH_SJLJ_LONGJMP) { // longjmp
1577  unsigned AddrSpace =
1578  cast<MemSDNode>(Parent)->getPointerInfo().getAddrSpace();
1579  // AddrSpace 256 -> GS, 257 -> FS, 258 -> SS.
1580  if (AddrSpace == 256)
1581  AM.Segment = CurDAG->getRegister(X86::GS, MVT::i16);
1582  if (AddrSpace == 257)
1583  AM.Segment = CurDAG->getRegister(X86::FS, MVT::i16);
1584  if (AddrSpace == 258)
1585  AM.Segment = CurDAG->getRegister(X86::SS, MVT::i16);
1586  }
1587 
1588  if (matchAddress(N, AM))
1589  return false;
1590 
1591  MVT VT = N.getSimpleValueType();
1592  if (AM.BaseType == X86ISelAddressMode::RegBase) {
1593  if (!AM.Base_Reg.getNode())
1594  AM.Base_Reg = CurDAG->getRegister(0, VT);
1595  }
1596 
1597  if (!AM.IndexReg.getNode())
1598  AM.IndexReg = CurDAG->getRegister(0, VT);
1599 
1600  getAddressOperands(AM, SDLoc(N), Base, Scale, Index, Disp, Segment);
1601  return true;
1602 }
1603 
1604 // We can only fold a load if all nodes between it and the root node have a
1605 // single use. If there are additional uses, we could end up duplicating the
1606 // load.
1607 static bool hasSingleUsesFromRoot(SDNode *Root, SDNode *N) {
1608  SDNode *User = *N->use_begin();
1609  while (User != Root) {
1610  if (!User->hasOneUse())
1611  return false;
1612  User = *User->use_begin();
1613  }
1614 
1615  return true;
1616 }
1617 
1618 /// Match a scalar SSE load. In particular, we want to match a load whose top
1619 /// elements are either undef or zeros. The load flavor is derived from the
1620 /// type of N, which is either v4f32 or v2f64.
1621 ///
1622 /// We also return:
1623 /// PatternChainNode: this is the matched node that has a chain input and
1624 /// output.
1625 bool X86DAGToDAGISel::selectScalarSSELoad(SDNode *Root,
1626  SDValue N, SDValue &Base,
1627  SDValue &Scale, SDValue &Index,
1628  SDValue &Disp, SDValue &Segment,
1629  SDValue &PatternNodeWithChain) {
1630  // We can allow a full vector load here since narrowing a load is ok.
1631  if (ISD::isNON_EXTLoad(N.getNode())) {
1632  PatternNodeWithChain = N;
1633  if (IsProfitableToFold(PatternNodeWithChain, N.getNode(), Root) &&
1634  IsLegalToFold(PatternNodeWithChain, *N->use_begin(), Root, OptLevel) &&
1635  hasSingleUsesFromRoot(Root, N.getNode())) {
1636  LoadSDNode *LD = cast<LoadSDNode>(PatternNodeWithChain);
1637  return selectAddr(LD, LD->getBasePtr(), Base, Scale, Index, Disp,
1638  Segment);
1639  }
1640  }
1641 
1642  // We can also match the special zero extended load opcode.
1643  if (N.getOpcode() == X86ISD::VZEXT_LOAD) {
1644  PatternNodeWithChain = N;
1645  if (IsProfitableToFold(PatternNodeWithChain, N.getNode(), Root) &&
1646  IsLegalToFold(PatternNodeWithChain, *N->use_begin(), Root, OptLevel) &&
1647  hasSingleUsesFromRoot(Root, N.getNode())) {
1648  auto *MI = cast<MemIntrinsicSDNode>(PatternNodeWithChain);
1649  return selectAddr(MI, MI->getBasePtr(), Base, Scale, Index, Disp,
1650  Segment);
1651  }
1652  }
1653 
1654  // Need to make sure that the SCALAR_TO_VECTOR and load are both only used
1655  // once. Otherwise the load might get duplicated and the chain output of the
1656  // duplicate load will not be observed by all dependencies.
1657  if (N.getOpcode() == ISD::SCALAR_TO_VECTOR && N.getNode()->hasOneUse()) {
1658  PatternNodeWithChain = N.getOperand(0);
1659  if (ISD::isNON_EXTLoad(PatternNodeWithChain.getNode()) &&
1660  IsProfitableToFold(PatternNodeWithChain, N.getNode(), Root) &&
1661  IsLegalToFold(PatternNodeWithChain, N.getNode(), Root, OptLevel) &&
1662  hasSingleUsesFromRoot(Root, N.getNode())) {
1663  LoadSDNode *LD = cast<LoadSDNode>(PatternNodeWithChain);
1664  return selectAddr(LD, LD->getBasePtr(), Base, Scale, Index, Disp,
1665  Segment);
1666  }
1667  }
1668 
1669  // Also handle the case where we explicitly require zeros in the top
1670  // elements. This is a vector shuffle from the zero vector.
1671  if (N.getOpcode() == X86ISD::VZEXT_MOVL && N.getNode()->hasOneUse() &&
1672  // Check to see if the top elements are all zeros (or bitcast of zeros).
1674  N.getOperand(0).getNode()->hasOneUse()) {
1675  PatternNodeWithChain = N.getOperand(0).getOperand(0);
1676  if (ISD::isNON_EXTLoad(PatternNodeWithChain.getNode()) &&
1677  IsProfitableToFold(PatternNodeWithChain, N.getNode(), Root) &&
1678  IsLegalToFold(PatternNodeWithChain, N.getNode(), Root, OptLevel) &&
1679  hasSingleUsesFromRoot(Root, N.getNode())) {
1680  // Okay, this is a zero extending load. Fold it.
1681  LoadSDNode *LD = cast<LoadSDNode>(PatternNodeWithChain);
1682  return selectAddr(LD, LD->getBasePtr(), Base, Scale, Index, Disp,
1683  Segment);
1684  }
1685  }
1686 
1687  return false;
1688 }
1689 
1690 
1691 bool X86DAGToDAGISel::selectMOV64Imm32(SDValue N, SDValue &Imm) {
1692  if (const ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) {
1693  uint64_t ImmVal = CN->getZExtValue();
1694  if (!isUInt<32>(ImmVal))
1695  return false;
1696 
1697  Imm = CurDAG->getTargetConstant(ImmVal, SDLoc(N), MVT::i64);
1698  return true;
1699  }
1700 
1701  // In static codegen with small code model, we can get the address of a label
1702  // into a register with 'movl'. TableGen has already made sure we're looking
1703  // at a label of some kind.
1704  assert(N->getOpcode() == X86ISD::Wrapper &&
1705  "Unexpected node type for MOV32ri64");
1706  N = N.getOperand(0);
1707 
1708  // At least GNU as does not accept 'movl' for TPOFF relocations.
1709  // FIXME: We could use 'movl' when we know we are targeting MC.
1710  if (N->getOpcode() == ISD::TargetGlobalTLSAddress)
1711  return false;
1712 
1713  Imm = N;
1714  if (N->getOpcode() != ISD::TargetGlobalAddress)
1715  return TM.getCodeModel() == CodeModel::Small;
1716 
1718  cast<GlobalAddressSDNode>(N)->getGlobal()->getAbsoluteSymbolRange();
1719  if (!CR)
1720  return TM.getCodeModel() == CodeModel::Small;
1721 
1722  return CR->getUnsignedMax().ult(1ull << 32);
1723 }
1724 
1725 bool X86DAGToDAGISel::selectLEA64_32Addr(SDValue N, SDValue &Base,
1726  SDValue &Scale, SDValue &Index,
1727  SDValue &Disp, SDValue &Segment) {
1728  // Save the debug loc before calling selectLEAAddr, in case it invalidates N.
1729  SDLoc DL(N);
1730 
1731  if (!selectLEAAddr(N, Base, Scale, Index, Disp, Segment))
1732  return false;
1733 
1735  if (RN && RN->getReg() == 0)
1736  Base = CurDAG->getRegister(0, MVT::i64);
1737  else if (Base.getValueType() == MVT::i32 && !dyn_cast<FrameIndexSDNode>(Base)) {
1738  // Base could already be %rip, particularly in the x32 ABI.
1739  Base = SDValue(CurDAG->getMachineNode(
1740  TargetOpcode::SUBREG_TO_REG, DL, MVT::i64,
1741  CurDAG->getTargetConstant(0, DL, MVT::i64),
1742  Base,
1743  CurDAG->getTargetConstant(X86::sub_32bit, DL, MVT::i32)),
1744  0);
1745  }
1746 
1747  RN = dyn_cast<RegisterSDNode>(Index);
1748  if (RN && RN->getReg() == 0)
1749  Index = CurDAG->getRegister(0, MVT::i64);
1750  else {
1751  assert(Index.getValueType() == MVT::i32 &&
1752  "Expect to be extending 32-bit registers for use in LEA");
1753  Index = SDValue(CurDAG->getMachineNode(
1754  TargetOpcode::SUBREG_TO_REG, DL, MVT::i64,
1755  CurDAG->getTargetConstant(0, DL, MVT::i64),
1756  Index,
1757  CurDAG->getTargetConstant(X86::sub_32bit, DL,
1758  MVT::i32)),
1759  0);
1760  }
1761 
1762  return true;
1763 }
1764 
1765 /// Calls SelectAddr and determines if the maximal addressing
1766 /// mode it matches can be cost effectively emitted as an LEA instruction.
1767 bool X86DAGToDAGISel::selectLEAAddr(SDValue N,
1768  SDValue &Base, SDValue &Scale,
1769  SDValue &Index, SDValue &Disp,
1770  SDValue &Segment) {
1771  X86ISelAddressMode AM;
1772 
1773  // Save the DL and VT before calling matchAddress, it can invalidate N.
1774  SDLoc DL(N);
1775  MVT VT = N.getSimpleValueType();
1776 
1777  // Set AM.Segment to prevent MatchAddress from using one. LEA doesn't support
1778  // segments.
1779  SDValue Copy = AM.Segment;
1780  SDValue T = CurDAG->getRegister(0, MVT::i32);
1781  AM.Segment = T;
1782  if (matchAddress(N, AM))
1783  return false;
1784  assert (T == AM.Segment);
1785  AM.Segment = Copy;
1786 
1787  unsigned Complexity = 0;
1788  if (AM.BaseType == X86ISelAddressMode::RegBase)
1789  if (AM.Base_Reg.getNode())
1790  Complexity = 1;
1791  else
1792  AM.Base_Reg = CurDAG->getRegister(0, VT);
1793  else if (AM.BaseType == X86ISelAddressMode::FrameIndexBase)
1794  Complexity = 4;
1795 
1796  if (AM.IndexReg.getNode())
1797  Complexity++;
1798  else
1799  AM.IndexReg = CurDAG->getRegister(0, VT);
1800 
1801  // Don't match just leal(,%reg,2). It's cheaper to do addl %reg, %reg, or with
1802  // a simple shift.
1803  if (AM.Scale > 1)
1804  Complexity++;
1805 
1806  // FIXME: We are artificially lowering the criteria to turn ADD %reg, $GA
1807  // to a LEA. This is determined with some experimentation but is by no means
1808  // optimal (especially for code size consideration). LEA is nice because of
1809  // its three-address nature. Tweak the cost function again when we can run
1810  // convertToThreeAddress() at register allocation time.
1811  if (AM.hasSymbolicDisplacement()) {
1812  // For X86-64, always use LEA to materialize RIP-relative addresses.
1813  if (Subtarget->is64Bit())
1814  Complexity = 4;
1815  else
1816  Complexity += 2;
1817  }
1818 
1819  if (AM.Disp && (AM.Base_Reg.getNode() || AM.IndexReg.getNode()))
1820  Complexity++;
1821 
1822  // If it isn't worth using an LEA, reject it.
1823  if (Complexity <= 2)
1824  return false;
1825 
1826  getAddressOperands(AM, DL, Base, Scale, Index, Disp, Segment);
1827  return true;
1828 }
1829 
1830 /// This is only run on TargetGlobalTLSAddress nodes.
1831 bool X86DAGToDAGISel::selectTLSADDRAddr(SDValue N, SDValue &Base,
1832  SDValue &Scale, SDValue &Index,
1833  SDValue &Disp, SDValue &Segment) {
1835  const GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(N);
1836 
1837  X86ISelAddressMode AM;
1838  AM.GV = GA->getGlobal();
1839  AM.Disp += GA->getOffset();
1840  AM.Base_Reg = CurDAG->getRegister(0, N.getValueType());
1841  AM.SymbolFlags = GA->getTargetFlags();
1842 
1843  if (N.getValueType() == MVT::i32) {
1844  AM.Scale = 1;
1845  AM.IndexReg = CurDAG->getRegister(X86::EBX, MVT::i32);
1846  } else {
1847  AM.IndexReg = CurDAG->getRegister(0, MVT::i64);
1848  }
1849 
1850  getAddressOperands(AM, SDLoc(N), Base, Scale, Index, Disp, Segment);
1851  return true;
1852 }
1853 
1854 bool X86DAGToDAGISel::selectRelocImm(SDValue N, SDValue &Op) {
1855  if (auto *CN = dyn_cast<ConstantSDNode>(N)) {
1856  Op = CurDAG->getTargetConstant(CN->getAPIntValue(), SDLoc(CN),
1857  N.getValueType());
1858  return true;
1859  }
1860 
1861  // Keep track of the original value type and whether this value was
1862  // truncated. If we see a truncation from pointer type to VT that truncates
1863  // bits that are known to be zero, we can use a narrow reference.
1864  EVT VT = N.getValueType();
1865  bool WasTruncated = false;
1866  if (N.getOpcode() == ISD::TRUNCATE) {
1867  WasTruncated = true;
1868  N = N.getOperand(0);
1869  }
1870 
1871  if (N.getOpcode() != X86ISD::Wrapper)
1872  return false;
1873 
1874  // We can only use non-GlobalValues as immediates if they were not truncated,
1875  // as we do not have any range information. If we have a GlobalValue and the
1876  // address was not truncated, we can select it as an operand directly.
1877  unsigned Opc = N.getOperand(0)->getOpcode();
1878  if (Opc != ISD::TargetGlobalAddress || !WasTruncated) {
1879  Op = N.getOperand(0);
1880  // We can only select the operand directly if we didn't have to look past a
1881  // truncate.
1882  return !WasTruncated;
1883  }
1884 
1885  // Check that the global's range fits into VT.
1886  auto *GA = cast<GlobalAddressSDNode>(N.getOperand(0));
1887  Optional<ConstantRange> CR = GA->getGlobal()->getAbsoluteSymbolRange();
1888  if (!CR || CR->getUnsignedMax().uge(1ull << VT.getSizeInBits()))
1889  return false;
1890 
1891  // Okay, we can use a narrow reference.
1892  Op = CurDAG->getTargetGlobalAddress(GA->getGlobal(), SDLoc(N), VT,
1893  GA->getOffset(), GA->getTargetFlags());
1894  return true;
1895 }
1896 
1897 bool X86DAGToDAGISel::tryFoldLoad(SDNode *Root, SDNode *P, SDValue N,
1898  SDValue &Base, SDValue &Scale,
1899  SDValue &Index, SDValue &Disp,
1900  SDValue &Segment) {
1901  if (!ISD::isNON_EXTLoad(N.getNode()) ||
1902  !IsProfitableToFold(N, P, Root) ||
1903  !IsLegalToFold(N, P, Root, OptLevel))
1904  return false;
1905 
1906  return selectAddr(N.getNode(),
1907  N.getOperand(1), Base, Scale, Index, Disp, Segment);
1908 }
1909 
1910 /// Return an SDNode that returns the value of the global base register.
1911 /// Output instructions required to initialize the global base register,
1912 /// if necessary.
1913 SDNode *X86DAGToDAGISel::getGlobalBaseReg() {
1914  unsigned GlobalBaseReg = getInstrInfo()->getGlobalBaseReg(MF);
1915  auto &DL = MF->getDataLayout();
1916  return CurDAG->getRegister(GlobalBaseReg, TLI->getPointerTy(DL)).getNode();
1917 }
1918 
1919 bool X86DAGToDAGISel::isSExtAbsoluteSymbolRef(unsigned Width, SDNode *N) const {
1920  if (N->getOpcode() == ISD::TRUNCATE)
1921  N = N->getOperand(0).getNode();
1922  if (N->getOpcode() != X86ISD::Wrapper)
1923  return false;
1924 
1925  auto *GA = dyn_cast<GlobalAddressSDNode>(N->getOperand(0));
1926  if (!GA)
1927  return false;
1928 
1929  Optional<ConstantRange> CR = GA->getGlobal()->getAbsoluteSymbolRange();
1930  return CR && CR->getSignedMin().sge(-1ull << Width) &&
1931  CR->getSignedMax().slt(1ull << Width);
1932 }
1933 
1934 /// Test whether the given X86ISD::CMP node has any uses which require the SF
1935 /// or OF bits to be accurate.
1937  // Examine each user of the node.
1938  for (SDNode::use_iterator UI = N->use_begin(),
1939  UE = N->use_end(); UI != UE; ++UI) {
1940  // Only examine CopyToReg uses.
1941  if (UI->getOpcode() != ISD::CopyToReg)
1942  return false;
1943  // Only examine CopyToReg uses that copy to EFLAGS.
1944  if (cast<RegisterSDNode>(UI->getOperand(1))->getReg() !=
1945  X86::EFLAGS)
1946  return false;
1947  // Examine each user of the CopyToReg use.
1948  for (SDNode::use_iterator FlagUI = UI->use_begin(),
1949  FlagUE = UI->use_end(); FlagUI != FlagUE; ++FlagUI) {
1950  // Only examine the Flag result.
1951  if (FlagUI.getUse().getResNo() != 1) continue;
1952  // Anything unusual: assume conservatively.
1953  if (!FlagUI->isMachineOpcode()) return false;
1954  // Examine the opcode of the user.
1955  switch (FlagUI->getMachineOpcode()) {
1956  // These comparisons don't treat the most significant bit specially.
1957  case X86::SETAr: case X86::SETAEr: case X86::SETBr: case X86::SETBEr:
1958  case X86::SETEr: case X86::SETNEr: case X86::SETPr: case X86::SETNPr:
1959  case X86::SETAm: case X86::SETAEm: case X86::SETBm: case X86::SETBEm:
1960  case X86::SETEm: case X86::SETNEm: case X86::SETPm: case X86::SETNPm:
1961  case X86::JA_1: case X86::JAE_1: case X86::JB_1: case X86::JBE_1:
1962  case X86::JE_1: case X86::JNE_1: case X86::JP_1: case X86::JNP_1:
1963  case X86::CMOVA16rr: case X86::CMOVA16rm:
1964  case X86::CMOVA32rr: case X86::CMOVA32rm:
1965  case X86::CMOVA64rr: case X86::CMOVA64rm:
1966  case X86::CMOVAE16rr: case X86::CMOVAE16rm:
1967  case X86::CMOVAE32rr: case X86::CMOVAE32rm:
1968  case X86::CMOVAE64rr: case X86::CMOVAE64rm:
1969  case X86::CMOVB16rr: case X86::CMOVB16rm:
1970  case X86::CMOVB32rr: case X86::CMOVB32rm:
1971  case X86::CMOVB64rr: case X86::CMOVB64rm:
1972  case X86::CMOVBE16rr: case X86::CMOVBE16rm:
1973  case X86::CMOVBE32rr: case X86::CMOVBE32rm:
1974  case X86::CMOVBE64rr: case X86::CMOVBE64rm:
1975  case X86::CMOVE16rr: case X86::CMOVE16rm:
1976  case X86::CMOVE32rr: case X86::CMOVE32rm:
1977  case X86::CMOVE64rr: case X86::CMOVE64rm:
1978  case X86::CMOVNE16rr: case X86::CMOVNE16rm:
1979  case X86::CMOVNE32rr: case X86::CMOVNE32rm:
1980  case X86::CMOVNE64rr: case X86::CMOVNE64rm:
1981  case X86::CMOVNP16rr: case X86::CMOVNP16rm:
1982  case X86::CMOVNP32rr: case X86::CMOVNP32rm:
1983  case X86::CMOVNP64rr: case X86::CMOVNP64rm:
1984  case X86::CMOVP16rr: case X86::CMOVP16rm:
1985  case X86::CMOVP32rr: case X86::CMOVP32rm:
1986  case X86::CMOVP64rr: case X86::CMOVP64rm:
1987  continue;
1988  // Anything else: assume conservatively.
1989  default: return false;
1990  }
1991  }
1992  }
1993  return true;
1994 }
1995 
1996 /// Test whether the given node which sets flags has any uses which require the
1997 /// CF flag to be accurate.
1998 static bool hasNoCarryFlagUses(SDNode *N) {
1999  // Examine each user of the node.
2000  for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end(); UI != UE;
2001  ++UI) {
2002  // Only check things that use the flags.
2003  if (UI.getUse().getResNo() != 1)
2004  continue;
2005  // Only examine CopyToReg uses.
2006  if (UI->getOpcode() != ISD::CopyToReg)
2007  return false;
2008  // Only examine CopyToReg uses that copy to EFLAGS.
2009  if (cast<RegisterSDNode>(UI->getOperand(1))->getReg() != X86::EFLAGS)
2010  return false;
2011  // Examine each user of the CopyToReg use.
2012  for (SDNode::use_iterator FlagUI = UI->use_begin(), FlagUE = UI->use_end();
2013  FlagUI != FlagUE; ++FlagUI) {
2014  // Only examine the Flag result.
2015  if (FlagUI.getUse().getResNo() != 1)
2016  continue;
2017  // Anything unusual: assume conservatively.
2018  if (!FlagUI->isMachineOpcode())
2019  return false;
2020  // Examine the opcode of the user.
2021  switch (FlagUI->getMachineOpcode()) {
2022  // Comparisons which don't examine the CF flag.
2023  case X86::SETOr: case X86::SETNOr: case X86::SETEr: case X86::SETNEr:
2024  case X86::SETSr: case X86::SETNSr: case X86::SETPr: case X86::SETNPr:
2025  case X86::SETLr: case X86::SETGEr: case X86::SETLEr: case X86::SETGr:
2026  case X86::JO_1: case X86::JNO_1: case X86::JE_1: case X86::JNE_1:
2027  case X86::JS_1: case X86::JNS_1: case X86::JP_1: case X86::JNP_1:
2028  case X86::JL_1: case X86::JGE_1: case X86::JLE_1: case X86::JG_1:
2029  case X86::CMOVO16rr: case X86::CMOVO32rr: case X86::CMOVO64rr:
2030  case X86::CMOVO16rm: case X86::CMOVO32rm: case X86::CMOVO64rm:
2031  case X86::CMOVNO16rr: case X86::CMOVNO32rr: case X86::CMOVNO64rr:
2032  case X86::CMOVNO16rm: case X86::CMOVNO32rm: case X86::CMOVNO64rm:
2033  case X86::CMOVE16rr: case X86::CMOVE32rr: case X86::CMOVE64rr:
2034  case X86::CMOVE16rm: case X86::CMOVE32rm: case X86::CMOVE64rm:
2035  case X86::CMOVNE16rr: case X86::CMOVNE32rr: case X86::CMOVNE64rr:
2036  case X86::CMOVNE16rm: case X86::CMOVNE32rm: case X86::CMOVNE64rm:
2037  case X86::CMOVS16rr: case X86::CMOVS32rr: case X86::CMOVS64rr:
2038  case X86::CMOVS16rm: case X86::CMOVS32rm: case X86::CMOVS64rm:
2039  case X86::CMOVNS16rr: case X86::CMOVNS32rr: case X86::CMOVNS64rr:
2040  case X86::CMOVNS16rm: case X86::CMOVNS32rm: case X86::CMOVNS64rm:
2041  case X86::CMOVP16rr: case X86::CMOVP32rr: case X86::CMOVP64rr:
2042  case X86::CMOVP16rm: case X86::CMOVP32rm: case X86::CMOVP64rm:
2043  case X86::CMOVNP16rr: case X86::CMOVNP32rr: case X86::CMOVNP64rr:
2044  case X86::CMOVNP16rm: case X86::CMOVNP32rm: case X86::CMOVNP64rm:
2045  case X86::CMOVL16rr: case X86::CMOVL32rr: case X86::CMOVL64rr:
2046  case X86::CMOVL16rm: case X86::CMOVL32rm: case X86::CMOVL64rm:
2047  case X86::CMOVGE16rr: case X86::CMOVGE32rr: case X86::CMOVGE64rr:
2048  case X86::CMOVGE16rm: case X86::CMOVGE32rm: case X86::CMOVGE64rm:
2049  case X86::CMOVLE16rr: case X86::CMOVLE32rr: case X86::CMOVLE64rr:
2050  case X86::CMOVLE16rm: case X86::CMOVLE32rm: case X86::CMOVLE64rm:
2051  case X86::CMOVG16rr: case X86::CMOVG32rr: case X86::CMOVG64rr:
2052  case X86::CMOVG16rm: case X86::CMOVG32rm: case X86::CMOVG64rm:
2053  continue;
2054  // Anything else: assume conservatively.
2055  default:
2056  return false;
2057  }
2058  }
2059  }
2060  return true;
2061 }
2062 
2063 /// Check whether or not the chain ending in StoreNode is suitable for doing
2064 /// the {load; op; store} to modify transformation.
2066  SDValue StoredVal, SelectionDAG *CurDAG,
2067  LoadSDNode *&LoadNode,
2068  SDValue &InputChain) {
2069  // is the stored value result 0 of the load?
2070  if (StoredVal.getResNo() != 0) return false;
2071 
2072  // are there other uses of the loaded value than the inc or dec?
2073  if (!StoredVal.getNode()->hasNUsesOfValue(1, 0)) return false;
2074 
2075  // is the store non-extending and non-indexed?
2076  if (!ISD::isNormalStore(StoreNode) || StoreNode->isNonTemporal())
2077  return false;
2078 
2079  SDValue Load = StoredVal->getOperand(0);
2080  // Is the stored value a non-extending and non-indexed load?
2081  if (!ISD::isNormalLoad(Load.getNode())) return false;
2082 
2083  // Return LoadNode by reference.
2084  LoadNode = cast<LoadSDNode>(Load);
2085 
2086  // Is store the only read of the loaded value?
2087  if (!Load.hasOneUse())
2088  return false;
2089 
2090  // Is the address of the store the same as the load?
2091  if (LoadNode->getBasePtr() != StoreNode->getBasePtr() ||
2092  LoadNode->getOffset() != StoreNode->getOffset())
2093  return false;
2094 
2095  // Check if the chain is produced by the load or is a TokenFactor with
2096  // the load output chain as an operand. Return InputChain by reference.
2097  SDValue Chain = StoreNode->getChain();
2098 
2099  bool ChainCheck = false;
2100  if (Chain == Load.getValue(1)) {
2101  ChainCheck = true;
2102  InputChain = LoadNode->getChain();
2103  } else if (Chain.getOpcode() == ISD::TokenFactor) {
2104  SmallVector<SDValue, 4> ChainOps;
2105  for (unsigned i = 0, e = Chain.getNumOperands(); i != e; ++i) {
2106  SDValue Op = Chain.getOperand(i);
2107  if (Op == Load.getValue(1)) {
2108  ChainCheck = true;
2109  // Drop Load, but keep its chain. No cycle check necessary.
2110  ChainOps.push_back(Load.getOperand(0));
2111  continue;
2112  }
2113 
2114  // Make sure using Op as part of the chain would not cause a cycle here.
2115  // In theory, we could check whether the chain node is a predecessor of
2116  // the load. But that can be very expensive. Instead visit the uses and
2117  // make sure they all have smaller node id than the load.
2118  int LoadId = LoadNode->getNodeId();
2119  for (SDNode::use_iterator UI = Op.getNode()->use_begin(),
2120  UE = UI->use_end(); UI != UE; ++UI) {
2121  if (UI.getUse().getResNo() != 0)
2122  continue;
2123  if (UI->getNodeId() > LoadId)
2124  return false;
2125  }
2126 
2127  ChainOps.push_back(Op);
2128  }
2129 
2130  if (ChainCheck)
2131  // Make a new TokenFactor with all the other input chains except
2132  // for the load.
2133  InputChain = CurDAG->getNode(ISD::TokenFactor, SDLoc(Chain),
2134  MVT::Other, ChainOps);
2135  }
2136  if (!ChainCheck)
2137  return false;
2138 
2139  return true;
2140 }
2141 
2142 // Change a chain of {load; op; store} of the same value into a simple op
2143 // through memory of that value, if the uses of the modified value and its
2144 // address are suitable.
2145 //
2146 // The tablegen pattern memory operand pattern is currently not able to match
2147 // the case where the EFLAGS on the original operation are used.
2148 //
2149 // To move this to tablegen, we'll need to improve tablegen to allow flags to
2150 // be transferred from a node in the pattern to the result node, probably with
2151 // a new keyword. For example, we have this
2152 // def DEC64m : RI<0xFF, MRM1m, (outs), (ins i64mem:$dst), "dec{q}\t$dst",
2153 // [(store (add (loadi64 addr:$dst), -1), addr:$dst),
2154 // (implicit EFLAGS)]>;
2155 // but maybe need something like this
2156 // def DEC64m : RI<0xFF, MRM1m, (outs), (ins i64mem:$dst), "dec{q}\t$dst",
2157 // [(store (add (loadi64 addr:$dst), -1), addr:$dst),
2158 // (transferrable EFLAGS)]>;
2159 //
2160 // Until then, we manually fold these and instruction select the operation
2161 // here.
2162 bool X86DAGToDAGISel::foldLoadStoreIntoMemOperand(SDNode *Node) {
2163  StoreSDNode *StoreNode = cast<StoreSDNode>(Node);
2164  SDValue StoredVal = StoreNode->getOperand(1);
2165  unsigned Opc = StoredVal->getOpcode();
2166 
2167  // Before we try to select anything, make sure this is memory operand size
2168  // and opcode we can handle. Note that this must match the code below that
2169  // actually lowers the opcodes.
2170  EVT MemVT = StoreNode->getMemoryVT();
2171  if (MemVT != MVT::i64 && MemVT != MVT::i32 && MemVT != MVT::i16 &&
2172  MemVT != MVT::i8)
2173  return false;
2174  switch (Opc) {
2175  default:
2176  return false;
2177  case X86ISD::INC:
2178  case X86ISD::DEC:
2179  case X86ISD::ADD:
2180  case X86ISD::SUB:
2181  case X86ISD::AND:
2182  case X86ISD::OR:
2183  case X86ISD::XOR:
2184  break;
2185  }
2186 
2187  LoadSDNode *LoadNode = nullptr;
2188  SDValue InputChain;
2189  if (!isFusableLoadOpStorePattern(StoreNode, StoredVal, CurDAG, LoadNode,
2190  InputChain))
2191  return false;
2192 
2193  SDValue Base, Scale, Index, Disp, Segment;
2194  if (!selectAddr(LoadNode, LoadNode->getBasePtr(), Base, Scale, Index, Disp,
2195  Segment))
2196  return false;
2197 
2198  auto SelectOpcode = [&](unsigned Opc64, unsigned Opc32, unsigned Opc16,
2199  unsigned Opc8) {
2200  switch (MemVT.getSimpleVT().SimpleTy) {
2201  case MVT::i64:
2202  return Opc64;
2203  case MVT::i32:
2204  return Opc32;
2205  case MVT::i16:
2206  return Opc16;
2207  case MVT::i8:
2208  return Opc8;
2209  default:
2210  llvm_unreachable("Invalid size!");
2211  }
2212  };
2213 
2214  MachineSDNode *Result;
2215  switch (Opc) {
2216  case X86ISD::INC:
2217  case X86ISD::DEC: {
2218  unsigned NewOpc =
2219  Opc == X86ISD::INC
2220  ? SelectOpcode(X86::INC64m, X86::INC32m, X86::INC16m, X86::INC8m)
2221  : SelectOpcode(X86::DEC64m, X86::DEC32m, X86::DEC16m, X86::DEC8m);
2222  const SDValue Ops[] = {Base, Scale, Index, Disp, Segment, InputChain};
2223  Result =
2224  CurDAG->getMachineNode(NewOpc, SDLoc(Node), MVT::i32, MVT::Other, Ops);
2225  break;
2226  }
2227  case X86ISD::ADD:
2228  case X86ISD::SUB:
2229  case X86ISD::AND:
2230  case X86ISD::OR:
2231  case X86ISD::XOR: {
2232  auto SelectRegOpcode = [SelectOpcode](unsigned Opc) {
2233  switch (Opc) {
2234  case X86ISD::ADD:
2235  return SelectOpcode(X86::ADD64mr, X86::ADD32mr, X86::ADD16mr,
2236  X86::ADD8mr);
2237  case X86ISD::SUB:
2238  return SelectOpcode(X86::SUB64mr, X86::SUB32mr, X86::SUB16mr,
2239  X86::SUB8mr);
2240  case X86ISD::AND:
2241  return SelectOpcode(X86::AND64mr, X86::AND32mr, X86::AND16mr,
2242  X86::AND8mr);
2243  case X86ISD::OR:
2244  return SelectOpcode(X86::OR64mr, X86::OR32mr, X86::OR16mr, X86::OR8mr);
2245  case X86ISD::XOR:
2246  return SelectOpcode(X86::XOR64mr, X86::XOR32mr, X86::XOR16mr,
2247  X86::XOR8mr);
2248  default:
2249  llvm_unreachable("Invalid opcode!");
2250  }
2251  };
2252  auto SelectImm8Opcode = [SelectOpcode](unsigned Opc) {
2253  switch (Opc) {
2254  case X86ISD::ADD:
2255  return SelectOpcode(X86::ADD64mi8, X86::ADD32mi8, X86::ADD16mi8, 0);
2256  case X86ISD::SUB:
2257  return SelectOpcode(X86::SUB64mi8, X86::SUB32mi8, X86::SUB16mi8, 0);
2258  case X86ISD::AND:
2259  return SelectOpcode(X86::AND64mi8, X86::AND32mi8, X86::AND16mi8, 0);
2260  case X86ISD::OR:
2261  return SelectOpcode(X86::OR64mi8, X86::OR32mi8, X86::OR16mi8, 0);
2262  case X86ISD::XOR:
2263  return SelectOpcode(X86::XOR64mi8, X86::XOR32mi8, X86::XOR16mi8, 0);
2264  default:
2265  llvm_unreachable("Invalid opcode!");
2266  }
2267  };
2268  auto SelectImmOpcode = [SelectOpcode](unsigned Opc) {
2269  switch (Opc) {
2270  case X86ISD::ADD:
2271  return SelectOpcode(X86::ADD64mi32, X86::ADD32mi, X86::ADD16mi,
2272  X86::ADD8mi);
2273  case X86ISD::SUB:
2274  return SelectOpcode(X86::SUB64mi32, X86::SUB32mi, X86::SUB16mi,
2275  X86::SUB8mi);
2276  case X86ISD::AND:
2277  return SelectOpcode(X86::AND64mi32, X86::AND32mi, X86::AND16mi,
2278  X86::AND8mi);
2279  case X86ISD::OR:
2280  return SelectOpcode(X86::OR64mi32, X86::OR32mi, X86::OR16mi,
2281  X86::OR8mi);
2282  case X86ISD::XOR:
2283  return SelectOpcode(X86::XOR64mi32, X86::XOR32mi, X86::XOR16mi,
2284  X86::XOR8mi);
2285  default:
2286  llvm_unreachable("Invalid opcode!");
2287  }
2288  };
2289 
2290  unsigned NewOpc = SelectRegOpcode(Opc);
2291  SDValue Operand = StoredVal->getOperand(1);
2292 
2293  // See if the operand is a constant that we can fold into an immediate
2294  // operand.
2295  if (auto *OperandC = dyn_cast<ConstantSDNode>(Operand)) {
2296  auto OperandV = OperandC->getAPIntValue();
2297 
2298  // Check if we can shrink the operand enough to fit in an immediate (or
2299  // fit into a smaller immediate) by negating it and switching the
2300  // operation.
2301  if ((Opc == X86ISD::ADD || Opc == X86ISD::SUB) &&
2302  ((MemVT != MVT::i8 && OperandV.getMinSignedBits() > 8 &&
2303  (-OperandV).getMinSignedBits() <= 8) ||
2304  (MemVT == MVT::i64 && OperandV.getMinSignedBits() > 32 &&
2305  (-OperandV).getMinSignedBits() <= 32)) &&
2306  hasNoCarryFlagUses(StoredVal.getNode())) {
2307  OperandV = -OperandV;
2308  Opc = Opc == X86ISD::ADD ? X86ISD::SUB : X86ISD::ADD;
2309  }
2310 
2311  // First try to fit this into an Imm8 operand. If it doesn't fit, then try
2312  // the larger immediate operand.
2313  if (MemVT != MVT::i8 && OperandV.getMinSignedBits() <= 8) {
2314  Operand = CurDAG->getTargetConstant(OperandV, SDLoc(Node), MemVT);
2315  NewOpc = SelectImm8Opcode(Opc);
2316  } else if (OperandV.getActiveBits() <= MemVT.getSizeInBits() &&
2317  (MemVT != MVT::i64 || OperandV.getMinSignedBits() <= 32)) {
2318  Operand = CurDAG->getTargetConstant(OperandV, SDLoc(Node), MemVT);
2319  NewOpc = SelectImmOpcode(Opc);
2320  }
2321  }
2322 
2323  const SDValue Ops[] = {Base, Scale, Index, Disp,
2324  Segment, Operand, InputChain};
2325  Result =
2326  CurDAG->getMachineNode(NewOpc, SDLoc(Node), MVT::i32, MVT::Other, Ops);
2327  break;
2328  }
2329  default:
2330  llvm_unreachable("Invalid opcode!");
2331  }
2332 
2333  MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(2);
2334  MemOp[0] = StoreNode->getMemOperand();
2335  MemOp[1] = LoadNode->getMemOperand();
2336  Result->setMemRefs(MemOp, MemOp + 2);
2337 
2338  ReplaceUses(SDValue(StoreNode, 0), SDValue(Result, 1));
2339  ReplaceUses(SDValue(StoredVal.getNode(), 1), SDValue(Result, 0));
2340  CurDAG->RemoveDeadNode(Node);
2341  return true;
2342 }
2343 
2344 // See if this is an (X >> C1) & C2 that we can match to BEXTR/BEXTRI.
2345 bool X86DAGToDAGISel::matchBEXTRFromAnd(SDNode *Node) {
2346  MVT NVT = Node->getSimpleValueType(0);
2347  SDLoc dl(Node);
2348 
2349  SDValue N0 = Node->getOperand(0);
2350  SDValue N1 = Node->getOperand(1);
2351 
2352  if (!Subtarget->hasBMI() && !Subtarget->hasTBM())
2353  return false;
2354 
2355  // Must have a shift right.
2356  if (N0->getOpcode() != ISD::SRL && N0->getOpcode() != ISD::SRA)
2357  return false;
2358 
2359  // Shift can't have additional users.
2360  if (!N0->hasOneUse())
2361  return false;
2362 
2363  // Only supported for 32 and 64 bits.
2364  if (NVT != MVT::i32 && NVT != MVT::i64)
2365  return false;
2366 
2367  // Shift amount and RHS of and must be constant.
2368  ConstantSDNode *MaskCst = dyn_cast<ConstantSDNode>(N1);
2369  ConstantSDNode *ShiftCst = dyn_cast<ConstantSDNode>(N0->getOperand(1));
2370  if (!MaskCst || !ShiftCst)
2371  return false;
2372 
2373  // And RHS must be a mask.
2374  uint64_t Mask = MaskCst->getZExtValue();
2375  if (!isMask_64(Mask))
2376  return false;
2377 
2378  uint64_t Shift = ShiftCst->getZExtValue();
2379  uint64_t MaskSize = countPopulation(Mask);
2380 
2381  // Don't interfere with something that can be handled by extracting AH.
2382  // TODO: If we are able to fold a load, BEXTR might still be better than AH.
2383  if (Shift == 8 && MaskSize == 8)
2384  return false;
2385 
2386  // Make sure we are only using bits that were in the original value, not
2387  // shifted in.
2388  if (Shift + MaskSize > NVT.getSizeInBits())
2389  return false;
2390 
2391  SDValue New = CurDAG->getTargetConstant(Shift | (MaskSize << 8), dl, NVT);
2392  unsigned ROpc = NVT == MVT::i64 ? X86::BEXTRI64ri : X86::BEXTRI32ri;
2393  unsigned MOpc = NVT == MVT::i64 ? X86::BEXTRI64mi : X86::BEXTRI32mi;
2394 
2395  // BMI requires the immediate to placed in a register.
2396  if (!Subtarget->hasTBM()) {
2397  ROpc = NVT == MVT::i64 ? X86::BEXTR64rr : X86::BEXTR32rr;
2398  MOpc = NVT == MVT::i64 ? X86::BEXTR64rm : X86::BEXTR32rm;
2399  New = SDValue(CurDAG->getMachineNode(X86::MOV32ri, dl, NVT, New), 0);
2400  if (NVT == MVT::i64) {
2401  New =
2402  SDValue(CurDAG->getMachineNode(
2403  TargetOpcode::SUBREG_TO_REG, dl, MVT::i64,
2404  CurDAG->getTargetConstant(0, dl, MVT::i64), New,
2405  CurDAG->getTargetConstant(X86::sub_32bit, dl, MVT::i32)),
2406  0);
2407  }
2408  }
2409 
2410  MachineSDNode *NewNode;
2411  SDValue Input = N0->getOperand(0);
2412  SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
2413  if (tryFoldLoad(Node, N0.getNode(), Input, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) {
2414  SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, New, Input.getOperand(0) };
2415  SDVTList VTs = CurDAG->getVTList(NVT, MVT::Other);
2416  NewNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops);
2417  // Update the chain.
2418  ReplaceUses(Input.getValue(1), SDValue(NewNode, 1));
2419  // Record the mem-refs
2420  MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
2421  MemOp[0] = cast<LoadSDNode>(Input)->getMemOperand();
2422  NewNode->setMemRefs(MemOp, MemOp + 1);
2423  } else {
2424  NewNode = CurDAG->getMachineNode(ROpc, dl, NVT, Input, New);
2425  }
2426 
2427  ReplaceUses(SDValue(Node, 0), SDValue(NewNode, 0));
2428  CurDAG->RemoveDeadNode(Node);
2429  return true;
2430 }
2431 
2432 void X86DAGToDAGISel::Select(SDNode *Node) {
2433  MVT NVT = Node->getSimpleValueType(0);
2434  unsigned Opc, MOpc;
2435  unsigned Opcode = Node->getOpcode();
2436  SDLoc dl(Node);
2437 
2438  DEBUG(dbgs() << "Selecting: "; Node->dump(CurDAG); dbgs() << '\n');
2439 
2440  if (Node->isMachineOpcode()) {
2441  DEBUG(dbgs() << "== "; Node->dump(CurDAG); dbgs() << '\n');
2442  Node->setNodeId(-1);
2443  return; // Already selected.
2444  }
2445 
2446  switch (Opcode) {
2447  default: break;
2448  case ISD::BRIND: {
2449  if (Subtarget->isTargetNaCl())
2450  // NaCl has its own pass where jmp %r32 are converted to jmp %r64. We
2451  // leave the instruction alone.
2452  break;
2453  if (Subtarget->isTarget64BitILP32()) {
2454  // Converts a 32-bit register to a 64-bit, zero-extended version of
2455  // it. This is needed because x86-64 can do many things, but jmp %r32
2456  // ain't one of them.
2457  const SDValue &Target = Node->getOperand(1);
2459  SDValue ZextTarget = CurDAG->getZExtOrTrunc(Target, dl, EVT(MVT::i64));
2460  SDValue Brind = CurDAG->getNode(ISD::BRIND, dl, MVT::Other,
2461  Node->getOperand(0), ZextTarget);
2462  ReplaceNode(Node, Brind.getNode());
2463  SelectCode(ZextTarget.getNode());
2464  SelectCode(Brind.getNode());
2465  return;
2466  }
2467  break;
2468  }
2469  case X86ISD::GlobalBaseReg:
2470  ReplaceNode(Node, getGlobalBaseReg());
2471  return;
2472 
2473  case X86ISD::SELECT:
2474  case X86ISD::SHRUNKBLEND: {
2475  // SHRUNKBLEND selects like a regular VSELECT. Same with X86ISD::SELECT.
2476  SDValue VSelect = CurDAG->getNode(
2477  ISD::VSELECT, SDLoc(Node), Node->getValueType(0), Node->getOperand(0),
2478  Node->getOperand(1), Node->getOperand(2));
2479  ReplaceNode(Node, VSelect.getNode());
2480  SelectCode(VSelect.getNode());
2481  // We already called ReplaceUses.
2482  return;
2483  }
2484 
2485  case ISD::AND:
2486  // Try to match BEXTR/BEXTRI instruction.
2487  if (matchBEXTRFromAnd(Node))
2488  return;
2489 
2491  case ISD::OR:
2492  case ISD::XOR: {
2493 
2494  // For operations of the form (x << C1) op C2, check if we can use a smaller
2495  // encoding for C2 by transforming it into (x op (C2>>C1)) << C1.
2496  SDValue N0 = Node->getOperand(0);
2497  SDValue N1 = Node->getOperand(1);
2498 
2499  if (N0->getOpcode() != ISD::SHL || !N0->hasOneUse())
2500  break;
2501 
2502  // i8 is unshrinkable, i16 should be promoted to i32.
2503  if (NVT != MVT::i32 && NVT != MVT::i64)
2504  break;
2505 
2507  ConstantSDNode *ShlCst = dyn_cast<ConstantSDNode>(N0->getOperand(1));
2508  if (!Cst || !ShlCst)
2509  break;
2510 
2511  int64_t Val = Cst->getSExtValue();
2512  uint64_t ShlVal = ShlCst->getZExtValue();
2513 
2514  // Make sure that we don't change the operation by removing bits.
2515  // This only matters for OR and XOR, AND is unaffected.
2516  uint64_t RemovedBitsMask = (1ULL << ShlVal) - 1;
2517  if (Opcode != ISD::AND && (Val & RemovedBitsMask) != 0)
2518  break;
2519 
2520  unsigned ShlOp, AddOp, Op;
2521  MVT CstVT = NVT;
2522 
2523  // Check the minimum bitwidth for the new constant.
2524  // TODO: AND32ri is the same as AND64ri32 with zext imm.
2525  // TODO: MOV32ri+OR64r is cheaper than MOV64ri64+OR64rr
2526  // TODO: Using 16 and 8 bit operations is also possible for or32 & xor32.
2527  if (!isInt<8>(Val) && isInt<8>(Val >> ShlVal))
2528  CstVT = MVT::i8;
2529  else if (!isInt<32>(Val) && isInt<32>(Val >> ShlVal))
2530  CstVT = MVT::i32;
2531 
2532  // Bail if there is no smaller encoding.
2533  if (NVT == CstVT)
2534  break;
2535 
2536  switch (NVT.SimpleTy) {
2537  default: llvm_unreachable("Unsupported VT!");
2538  case MVT::i32:
2539  assert(CstVT == MVT::i8);
2540  ShlOp = X86::SHL32ri;
2541  AddOp = X86::ADD32rr;
2542 
2543  switch (Opcode) {
2544  default: llvm_unreachable("Impossible opcode");
2545  case ISD::AND: Op = X86::AND32ri8; break;
2546  case ISD::OR: Op = X86::OR32ri8; break;
2547  case ISD::XOR: Op = X86::XOR32ri8; break;
2548  }
2549  break;
2550  case MVT::i64:
2551  assert(CstVT == MVT::i8 || CstVT == MVT::i32);
2552  ShlOp = X86::SHL64ri;
2553  AddOp = X86::ADD64rr;
2554 
2555  switch (Opcode) {
2556  default: llvm_unreachable("Impossible opcode");
2557  case ISD::AND: Op = CstVT==MVT::i8? X86::AND64ri8 : X86::AND64ri32; break;
2558  case ISD::OR: Op = CstVT==MVT::i8? X86::OR64ri8 : X86::OR64ri32; break;
2559  case ISD::XOR: Op = CstVT==MVT::i8? X86::XOR64ri8 : X86::XOR64ri32; break;
2560  }
2561  break;
2562  }
2563 
2564  // Emit the smaller op and the shift.
2565  SDValue NewCst = CurDAG->getTargetConstant(Val >> ShlVal, dl, CstVT);
2566  SDNode *New = CurDAG->getMachineNode(Op, dl, NVT, N0->getOperand(0),NewCst);
2567  if (ShlVal == 1)
2568  CurDAG->SelectNodeTo(Node, AddOp, NVT, SDValue(New, 0),
2569  SDValue(New, 0));
2570  else
2571  CurDAG->SelectNodeTo(Node, ShlOp, NVT, SDValue(New, 0),
2572  getI8Imm(ShlVal, dl));
2573  return;
2574  }
2575  case X86ISD::UMUL8:
2576  case X86ISD::SMUL8: {
2577  SDValue N0 = Node->getOperand(0);
2578  SDValue N1 = Node->getOperand(1);
2579 
2580  Opc = (Opcode == X86ISD::SMUL8 ? X86::IMUL8r : X86::MUL8r);
2581 
2582  SDValue InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, X86::AL,
2583  N0, SDValue()).getValue(1);
2584 
2585  SDVTList VTs = CurDAG->getVTList(NVT, MVT::i32);
2586  SDValue Ops[] = {N1, InFlag};
2587  SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops);
2588 
2589  ReplaceNode(Node, CNode);
2590  return;
2591  }
2592 
2593  case X86ISD::UMUL: {
2594  SDValue N0 = Node->getOperand(0);
2595  SDValue N1 = Node->getOperand(1);
2596 
2597  unsigned LoReg;
2598  switch (NVT.SimpleTy) {
2599  default: llvm_unreachable("Unsupported VT!");
2600  // MVT::i8 is handled by X86ISD::UMUL8.
2601  case MVT::i16: LoReg = X86::AX; Opc = X86::MUL16r; break;
2602  case MVT::i32: LoReg = X86::EAX; Opc = X86::MUL32r; break;
2603  case MVT::i64: LoReg = X86::RAX; Opc = X86::MUL64r; break;
2604  }
2605 
2606  SDValue InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, LoReg,
2607  N0, SDValue()).getValue(1);
2608 
2609  SDVTList VTs = CurDAG->getVTList(NVT, NVT, MVT::i32);
2610  SDValue Ops[] = {N1, InFlag};
2611  SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops);
2612 
2613  ReplaceNode(Node, CNode);
2614  return;
2615  }
2616 
2617  case ISD::SMUL_LOHI:
2618  case ISD::UMUL_LOHI: {
2619  SDValue N0 = Node->getOperand(0);
2620  SDValue N1 = Node->getOperand(1);
2621 
2622  bool isSigned = Opcode == ISD::SMUL_LOHI;
2623  bool hasBMI2 = Subtarget->hasBMI2();
2624  if (!isSigned) {
2625  switch (NVT.SimpleTy) {
2626  default: llvm_unreachable("Unsupported VT!");
2627  case MVT::i8: Opc = X86::MUL8r; MOpc = X86::MUL8m; break;
2628  case MVT::i16: Opc = X86::MUL16r; MOpc = X86::MUL16m; break;
2629  case MVT::i32: Opc = hasBMI2 ? X86::MULX32rr : X86::MUL32r;
2630  MOpc = hasBMI2 ? X86::MULX32rm : X86::MUL32m; break;
2631  case MVT::i64: Opc = hasBMI2 ? X86::MULX64rr : X86::MUL64r;
2632  MOpc = hasBMI2 ? X86::MULX64rm : X86::MUL64m; break;
2633  }
2634  } else {
2635  switch (NVT.SimpleTy) {
2636  default: llvm_unreachable("Unsupported VT!");
2637  case MVT::i8: Opc = X86::IMUL8r; MOpc = X86::IMUL8m; break;
2638  case MVT::i16: Opc = X86::IMUL16r; MOpc = X86::IMUL16m; break;
2639  case MVT::i32: Opc = X86::IMUL32r; MOpc = X86::IMUL32m; break;
2640  case MVT::i64: Opc = X86::IMUL64r; MOpc = X86::IMUL64m; break;
2641  }
2642  }
2643 
2644  unsigned SrcReg, LoReg, HiReg;
2645  switch (Opc) {
2646  default: llvm_unreachable("Unknown MUL opcode!");
2647  case X86::IMUL8r:
2648  case X86::MUL8r:
2649  SrcReg = LoReg = X86::AL; HiReg = X86::AH;
2650  break;
2651  case X86::IMUL16r:
2652  case X86::MUL16r:
2653  SrcReg = LoReg = X86::AX; HiReg = X86::DX;
2654  break;
2655  case X86::IMUL32r:
2656  case X86::MUL32r:
2657  SrcReg = LoReg = X86::EAX; HiReg = X86::EDX;
2658  break;
2659  case X86::IMUL64r:
2660  case X86::MUL64r:
2661  SrcReg = LoReg = X86::RAX; HiReg = X86::RDX;
2662  break;
2663  case X86::MULX32rr:
2664  SrcReg = X86::EDX; LoReg = HiReg = 0;
2665  break;
2666  case X86::MULX64rr:
2667  SrcReg = X86::RDX; LoReg = HiReg = 0;
2668  break;
2669  }
2670 
2671  SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
2672  bool foldedLoad = tryFoldLoad(Node, N1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4);
2673  // Multiply is commmutative.
2674  if (!foldedLoad) {
2675  foldedLoad = tryFoldLoad(Node, N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4);
2676  if (foldedLoad)
2677  std::swap(N0, N1);
2678  }
2679 
2680  SDValue InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, SrcReg,
2681  N0, SDValue()).getValue(1);
2682  SDValue ResHi, ResLo;
2683 
2684  if (foldedLoad) {
2685  SDValue Chain;
2686  MachineSDNode *CNode = nullptr;
2687  SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N1.getOperand(0),
2688  InFlag };
2689  if (MOpc == X86::MULX32rm || MOpc == X86::MULX64rm) {
2690  SDVTList VTs = CurDAG->getVTList(NVT, NVT, MVT::Other, MVT::Glue);
2691  CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops);
2692  ResHi = SDValue(CNode, 0);
2693  ResLo = SDValue(CNode, 1);
2694  Chain = SDValue(CNode, 2);
2695  InFlag = SDValue(CNode, 3);
2696  } else {
2697  SDVTList VTs = CurDAG->getVTList(MVT::Other, MVT::Glue);
2698  CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops);
2699  Chain = SDValue(CNode, 0);
2700  InFlag = SDValue(CNode, 1);
2701  }
2702 
2703  // Update the chain.
2704  ReplaceUses(N1.getValue(1), Chain);
2705  // Record the mem-refs
2706  MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
2707  MemOp[0] = cast<LoadSDNode>(N1)->getMemOperand();
2708  CNode->setMemRefs(MemOp, MemOp + 1);
2709  } else {
2710  SDValue Ops[] = { N1, InFlag };
2711  if (Opc == X86::MULX32rr || Opc == X86::MULX64rr) {
2712  SDVTList VTs = CurDAG->getVTList(NVT, NVT, MVT::Glue);
2713  SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops);
2714  ResHi = SDValue(CNode, 0);
2715  ResLo = SDValue(CNode, 1);
2716  InFlag = SDValue(CNode, 2);
2717  } else {
2718  SDVTList VTs = CurDAG->getVTList(MVT::Glue);
2719  SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops);
2720  InFlag = SDValue(CNode, 0);
2721  }
2722  }
2723 
2724  // Prevent use of AH in a REX instruction by referencing AX instead.
2725  if (HiReg == X86::AH && Subtarget->is64Bit() &&
2726  !SDValue(Node, 1).use_empty()) {
2727  SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
2728  X86::AX, MVT::i16, InFlag);
2729  InFlag = Result.getValue(2);
2730  // Get the low part if needed. Don't use getCopyFromReg for aliasing
2731  // registers.
2732  if (!SDValue(Node, 0).use_empty())
2733  ReplaceUses(SDValue(Node, 0),
2734  CurDAG->getTargetExtractSubreg(X86::sub_8bit, dl, MVT::i8, Result));
2735 
2736  // Shift AX down 8 bits.
2737  Result = SDValue(CurDAG->getMachineNode(X86::SHR16ri, dl, MVT::i16,
2738  Result,
2739  CurDAG->getTargetConstant(8, dl, MVT::i8)),
2740  0);
2741  // Then truncate it down to i8.
2742  ReplaceUses(SDValue(Node, 1),
2743  CurDAG->getTargetExtractSubreg(X86::sub_8bit, dl, MVT::i8, Result));
2744  }
2745  // Copy the low half of the result, if it is needed.
2746  if (!SDValue(Node, 0).use_empty()) {
2747  if (!ResLo.getNode()) {
2748  assert(LoReg && "Register for low half is not defined!");
2749  ResLo = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, LoReg, NVT,
2750  InFlag);
2751  InFlag = ResLo.getValue(2);
2752  }
2753  ReplaceUses(SDValue(Node, 0), ResLo);
2754  DEBUG(dbgs() << "=> "; ResLo.getNode()->dump(CurDAG); dbgs() << '\n');
2755  }
2756  // Copy the high half of the result, if it is needed.
2757  if (!SDValue(Node, 1).use_empty()) {
2758  if (!ResHi.getNode()) {
2759  assert(HiReg && "Register for high half is not defined!");
2760  ResHi = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, HiReg, NVT,
2761  InFlag);
2762  InFlag = ResHi.getValue(2);
2763  }
2764  ReplaceUses(SDValue(Node, 1), ResHi);
2765  DEBUG(dbgs() << "=> "; ResHi.getNode()->dump(CurDAG); dbgs() << '\n');
2766  }
2767 
2768  CurDAG->RemoveDeadNode(Node);
2769  return;
2770  }
2771 
2772  case ISD::SDIVREM:
2773  case ISD::UDIVREM:
2776  SDValue N0 = Node->getOperand(0);
2777  SDValue N1 = Node->getOperand(1);
2778 
2779  bool isSigned = (Opcode == ISD::SDIVREM ||
2780  Opcode == X86ISD::SDIVREM8_SEXT_HREG);
2781  if (!isSigned) {
2782  switch (NVT.SimpleTy) {
2783  default: llvm_unreachable("Unsupported VT!");
2784  case MVT::i8: Opc = X86::DIV8r; MOpc = X86::DIV8m; break;
2785  case MVT::i16: Opc = X86::DIV16r; MOpc = X86::DIV16m; break;
2786  case MVT::i32: Opc = X86::DIV32r; MOpc = X86::DIV32m; break;
2787  case MVT::i64: Opc = X86::DIV64r; MOpc = X86::DIV64m; break;
2788  }
2789  } else {
2790  switch (NVT.SimpleTy) {
2791  default: llvm_unreachable("Unsupported VT!");
2792  case MVT::i8: Opc = X86::IDIV8r; MOpc = X86::IDIV8m; break;
2793  case MVT::i16: Opc = X86::IDIV16r; MOpc = X86::IDIV16m; break;
2794  case MVT::i32: Opc = X86::IDIV32r; MOpc = X86::IDIV32m; break;
2795  case MVT::i64: Opc = X86::IDIV64r; MOpc = X86::IDIV64m; break;
2796  }
2797  }
2798 
2799  unsigned LoReg, HiReg, ClrReg;
2800  unsigned SExtOpcode;
2801  switch (NVT.SimpleTy) {
2802  default: llvm_unreachable("Unsupported VT!");
2803  case MVT::i8:
2804  LoReg = X86::AL; ClrReg = HiReg = X86::AH;
2805  SExtOpcode = X86::CBW;
2806  break;
2807  case MVT::i16:
2808  LoReg = X86::AX; HiReg = X86::DX;
2809  ClrReg = X86::DX;
2810  SExtOpcode = X86::CWD;
2811  break;
2812  case MVT::i32:
2813  LoReg = X86::EAX; ClrReg = HiReg = X86::EDX;
2814  SExtOpcode = X86::CDQ;
2815  break;
2816  case MVT::i64:
2817  LoReg = X86::RAX; ClrReg = HiReg = X86::RDX;
2818  SExtOpcode = X86::CQO;
2819  break;
2820  }
2821 
2822  SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
2823  bool foldedLoad = tryFoldLoad(Node, N1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4);
2824  bool signBitIsZero = CurDAG->SignBitIsZero(N0);
2825 
2826  SDValue InFlag;
2827  if (NVT == MVT::i8 && (!isSigned || signBitIsZero)) {
2828  // Special case for div8, just use a move with zero extension to AX to
2829  // clear the upper 8 bits (AH).
2830  SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Move, Chain;
2831  if (tryFoldLoad(Node, N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) {
2832  SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N0.getOperand(0) };
2833  Move =
2834  SDValue(CurDAG->getMachineNode(X86::MOVZX32rm8, dl, MVT::i32,
2835  MVT::Other, Ops), 0);
2836  Chain = Move.getValue(1);
2837  ReplaceUses(N0.getValue(1), Chain);
2838  } else {
2839  Move =
2840  SDValue(CurDAG->getMachineNode(X86::MOVZX32rr8, dl, MVT::i32, N0),0);
2841  Chain = CurDAG->getEntryNode();
2842  }
2843  Chain = CurDAG->getCopyToReg(Chain, dl, X86::EAX, Move, SDValue());
2844  InFlag = Chain.getValue(1);
2845  } else {
2846  InFlag =
2847  CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl,
2848  LoReg, N0, SDValue()).getValue(1);
2849  if (isSigned && !signBitIsZero) {
2850  // Sign extend the low part into the high part.
2851  InFlag =
2852  SDValue(CurDAG->getMachineNode(SExtOpcode, dl, MVT::Glue, InFlag),0);
2853  } else {
2854  // Zero out the high part, effectively zero extending the input.
2855  SDValue ClrNode = SDValue(CurDAG->getMachineNode(X86::MOV32r0, dl, NVT), 0);
2856  switch (NVT.SimpleTy) {
2857  case MVT::i16:
2858  ClrNode =
2859  SDValue(CurDAG->getMachineNode(
2860  TargetOpcode::EXTRACT_SUBREG, dl, MVT::i16, ClrNode,
2861  CurDAG->getTargetConstant(X86::sub_16bit, dl,
2862  MVT::i32)),
2863  0);
2864  break;
2865  case MVT::i32:
2866  break;
2867  case MVT::i64:
2868  ClrNode =
2869  SDValue(CurDAG->getMachineNode(
2870  TargetOpcode::SUBREG_TO_REG, dl, MVT::i64,
2871  CurDAG->getTargetConstant(0, dl, MVT::i64), ClrNode,
2872  CurDAG->getTargetConstant(X86::sub_32bit, dl,
2873  MVT::i32)),
2874  0);
2875  break;
2876  default:
2877  llvm_unreachable("Unexpected division source");
2878  }
2879 
2880  InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, ClrReg,
2881  ClrNode, InFlag).getValue(1);
2882  }
2883  }
2884 
2885  if (foldedLoad) {
2886  SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N1.getOperand(0),
2887  InFlag };
2888  MachineSDNode *CNode =
2889  CurDAG->getMachineNode(MOpc, dl, MVT::Other, MVT::Glue, Ops);
2890  InFlag = SDValue(CNode, 1);
2891  // Update the chain.
2892  ReplaceUses(N1.getValue(1), SDValue(CNode, 0));
2893  // Record the mem-refs
2894  MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
2895  MemOp[0] = cast<LoadSDNode>(N1)->getMemOperand();
2896  CNode->setMemRefs(MemOp, MemOp + 1);
2897  } else {
2898  InFlag =
2899  SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Glue, N1, InFlag), 0);
2900  }
2901 
2902  // Prevent use of AH in a REX instruction by explicitly copying it to
2903  // an ABCD_L register.
2904  //
2905  // The current assumption of the register allocator is that isel
2906  // won't generate explicit references to the GR8_ABCD_H registers. If
2907  // the allocator and/or the backend get enhanced to be more robust in
2908  // that regard, this can be, and should be, removed.
2909  if (HiReg == X86::AH && !SDValue(Node, 1).use_empty()) {
2910  SDValue AHCopy = CurDAG->getRegister(X86::AH, MVT::i8);
2911  unsigned AHExtOpcode =
2912  isSigned ? X86::MOVSX32_NOREXrr8 : X86::MOVZX32_NOREXrr8;
2913 
2914  SDNode *RNode = CurDAG->getMachineNode(AHExtOpcode, dl, MVT::i32,
2915  MVT::Glue, AHCopy, InFlag);
2916  SDValue Result(RNode, 0);
2917  InFlag = SDValue(RNode, 1);
2918 
2919  if (Opcode == X86ISD::UDIVREM8_ZEXT_HREG ||
2920  Opcode == X86ISD::SDIVREM8_SEXT_HREG) {
2921  assert(Node->getValueType(1) == MVT::i32 && "Unexpected result type!");
2922  } else {
2923  Result =
2924  CurDAG->getTargetExtractSubreg(X86::sub_8bit, dl, MVT::i8, Result);
2925  }
2926  ReplaceUses(SDValue(Node, 1), Result);
2927  DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG); dbgs() << '\n');
2928  }
2929  // Copy the division (low) result, if it is needed.
2930  if (!SDValue(Node, 0).use_empty()) {
2931  SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
2932  LoReg, NVT, InFlag);
2933  InFlag = Result.getValue(2);
2934  ReplaceUses(SDValue(Node, 0), Result);
2935  DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG); dbgs() << '\n');
2936  }
2937  // Copy the remainder (high) result, if it is needed.
2938  if (!SDValue(Node, 1).use_empty()) {
2939  SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
2940  HiReg, NVT, InFlag);
2941  InFlag = Result.getValue(2);
2942  ReplaceUses(SDValue(Node, 1), Result);
2943  DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG); dbgs() << '\n');
2944  }
2945  CurDAG->RemoveDeadNode(Node);
2946  return;
2947  }
2948 
2949  case X86ISD::CMP:
2950  case X86ISD::SUB: {
2951  // Sometimes a SUB is used to perform comparison.
2952  if (Opcode == X86ISD::SUB && Node->hasAnyUseOfValue(0))
2953  // This node is not a CMP.
2954  break;
2955  SDValue N0 = Node->getOperand(0);
2956  SDValue N1 = Node->getOperand(1);
2957 
2958  if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() &&
2960  N0 = N0.getOperand(0);
2961 
2962  // Look for (X86cmp (and $op, $imm), 0) and see if we can convert it to
2963  // use a smaller encoding.
2964  // Look past the truncate if CMP is the only use of it.
2965  if ((N0.getOpcode() == ISD::AND ||
2966  (N0.getResNo() == 0 && N0.getOpcode() == X86ISD::AND)) &&
2967  N0.getNode()->hasOneUse() &&
2968  N0.getValueType() != MVT::i8 &&
2969  X86::isZeroNode(N1)) {
2971  if (!C) break;
2972  uint64_t Mask = C->getZExtValue();
2973 
2974  // For example, convert "testl %eax, $8" to "testb %al, $8"
2975  if (isUInt<8>(Mask) &&
2976  (!(Mask & 0x80) || hasNoSignedComparisonUses(Node))) {
2977  SDValue Imm = CurDAG->getTargetConstant(Mask, dl, MVT::i8);
2978  SDValue Reg = N0.getOperand(0);
2979 
2980  // Extract the l-register.
2981  SDValue Subreg = CurDAG->getTargetExtractSubreg(X86::sub_8bit, dl,
2982  MVT::i8, Reg);
2983 
2984  // Emit a testb.
2985  SDNode *NewNode = CurDAG->getMachineNode(X86::TEST8ri, dl, MVT::i32,
2986  Subreg, Imm);
2987  // Replace SUB|CMP with TEST, since SUB has two outputs while TEST has
2988  // one, do not call ReplaceAllUsesWith.
2989  ReplaceUses(SDValue(Node, (Opcode == X86ISD::SUB ? 1 : 0)),
2990  SDValue(NewNode, 0));
2991  CurDAG->RemoveDeadNode(Node);
2992  return;
2993  }
2994 
2995  // For example, "testl %eax, $2048" to "testb %ah, $8".
2996  if (isShiftedUInt<8, 8>(Mask) &&
2997  (!(Mask & 0x8000) || hasNoSignedComparisonUses(Node))) {
2998  // Shift the immediate right by 8 bits.
2999  SDValue ShiftedImm = CurDAG->getTargetConstant(Mask >> 8, dl, MVT::i8);
3000  SDValue Reg = N0.getOperand(0);
3001 
3002  // Extract the h-register.
3003  SDValue Subreg = CurDAG->getTargetExtractSubreg(X86::sub_8bit_hi, dl,
3004  MVT::i8, Reg);
3005 
3006  // Emit a testb. The EXTRACT_SUBREG becomes a COPY that can only
3007  // target GR8_NOREX registers, so make sure the register class is
3008  // forced.
3009  SDNode *NewNode = CurDAG->getMachineNode(X86::TEST8ri_NOREX, dl,
3010  MVT::i32, Subreg, ShiftedImm);
3011  // Replace SUB|CMP with TEST, since SUB has two outputs while TEST has
3012  // one, do not call ReplaceAllUsesWith.
3013  ReplaceUses(SDValue(Node, (Opcode == X86ISD::SUB ? 1 : 0)),
3014  SDValue(NewNode, 0));
3015  CurDAG->RemoveDeadNode(Node);
3016  return;
3017  }
3018 
3019  // For example, "testl %eax, $32776" to "testw %ax, $32776".
3020  // NOTE: We only want to form TESTW instructions if optimizing for
3021  // min size. Otherwise we only save one byte and possibly get a length
3022  // changing prefix penalty in the decoders.
3023  if (OptForMinSize && isUInt<16>(Mask) && N0.getValueType() != MVT::i16 &&
3024  (!(Mask & 0x8000) || hasNoSignedComparisonUses(Node))) {
3025  SDValue Imm = CurDAG->getTargetConstant(Mask, dl, MVT::i16);
3026  SDValue Reg = N0.getOperand(0);
3027 
3028  // Extract the 16-bit subregister.
3029  SDValue Subreg = CurDAG->getTargetExtractSubreg(X86::sub_16bit, dl,
3030  MVT::i16, Reg);
3031 
3032  // Emit a testw.
3033  SDNode *NewNode = CurDAG->getMachineNode(X86::TEST16ri, dl, MVT::i32,
3034  Subreg, Imm);
3035  // Replace SUB|CMP with TEST, since SUB has two outputs while TEST has
3036  // one, do not call ReplaceAllUsesWith.
3037  ReplaceUses(SDValue(Node, (Opcode == X86ISD::SUB ? 1 : 0)),
3038  SDValue(NewNode, 0));
3039  CurDAG->RemoveDeadNode(Node);
3040  return;
3041  }
3042 
3043  // For example, "testq %rax, $268468232" to "testl %eax, $268468232".
3044  if (isUInt<32>(Mask) && N0.getValueType() == MVT::i64 &&
3045  (!(Mask & 0x80000000) || hasNoSignedComparisonUses(Node))) {
3046  SDValue Imm = CurDAG->getTargetConstant(Mask, dl, MVT::i32);
3047  SDValue Reg = N0.getOperand(0);
3048 
3049  // Extract the 32-bit subregister.
3050  SDValue Subreg = CurDAG->getTargetExtractSubreg(X86::sub_32bit, dl,
3051  MVT::i32, Reg);
3052 
3053  // Emit a testl.
3054  SDNode *NewNode = CurDAG->getMachineNode(X86::TEST32ri, dl, MVT::i32,
3055  Subreg, Imm);
3056  // Replace SUB|CMP with TEST, since SUB has two outputs while TEST has
3057  // one, do not call ReplaceAllUsesWith.
3058  ReplaceUses(SDValue(Node, (Opcode == X86ISD::SUB ? 1 : 0)),
3059  SDValue(NewNode, 0));
3060  CurDAG->RemoveDeadNode(Node);
3061  return;
3062  }
3063  }
3064  break;
3065  }
3066  case ISD::STORE:
3067  if (foldLoadStoreIntoMemOperand(Node))
3068  return;
3069  break;
3070  }
3071 
3072  SelectCode(Node);
3073 }
3074 
3075 bool X86DAGToDAGISel::
3076 SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
3077  std::vector<SDValue> &OutOps) {
3078  SDValue Op0, Op1, Op2, Op3, Op4;
3079  switch (ConstraintID) {
3080  default:
3081  llvm_unreachable("Unexpected asm memory constraint");
3083  // FIXME: It seems strange that 'i' is needed here since it's supposed to
3084  // be an immediate and not a memory constraint.
3086  case InlineAsm::Constraint_o: // offsetable ??
3087  case InlineAsm::Constraint_v: // not offsetable ??
3088  case InlineAsm::Constraint_m: // memory
3090  if (!selectAddr(nullptr, Op, Op0, Op1, Op2, Op3, Op4))
3091  return true;
3092  break;
3093  }
3094 
3095  OutOps.push_back(Op0);
3096  OutOps.push_back(Op1);
3097  OutOps.push_back(Op2);
3098  OutOps.push_back(Op3);
3099  OutOps.push_back(Op4);
3100  return false;
3101 }
3102 
3103 /// This pass converts a legalized DAG into a X86-specific DAG,
3104 /// ready for instruction scheduling.
3106  CodeGenOpt::Level OptLevel) {
3107  return new X86DAGToDAGISel(TM, OptLevel);
3108 }
uint64_t CallInst * C
constexpr bool isUInt< 32 >(uint64_t x)
Definition: MathExtras.h:341
X = FP_ROUND(Y, TRUNC) - Rounding &#39;Y&#39; from a larger floating point type down to the precision of the ...
Definition: ISDOpcodes.h:513
static bool isCalleeLoad(SDValue Callee, SDValue &Chain, bool HasCallSeq)
Return true if call address is a load and it can be moved below CALLSEQ_START and the chains leading ...
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
EVT getValueType() const
Return the ValueType of the referenced return value.
Vector comparison generating mask bits for fp and integer signed and unsigned data types...
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
const SDValue & getOffset() const
APInt getSignedMax() const
Return the largest signed value contained in the ConstantRange.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
DELETED_NODE - This is an illegal value that is used to catch errors.
Definition: ISDOpcodes.h:42
Compute iterated dominance frontiers using a linear time algorithm.
Definition: AllocatorList.h:24
static void insertDAGNode(SelectionDAG &DAG, SDValue Pos, SDValue N)
static bool isLegalMaskCompare(SDNode *N, const X86Subtarget *Subtarget)
void setMemRefs(mmo_iterator NewMemRefs, mmo_iterator NewMemRefsEnd)
Assign this MachineSDNodes&#39;s memory reference descriptor list.
Tail call return.
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
Definition: MCSymbol.h:42
bool isVector() const
Return true if this is a vector value type.
const SDValue & getBasePtr() const
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
bool slt(const APInt &RHS) const
Signed less than comparison.
Definition: APInt.h:1183
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:253
const SDValue & getChain() const
constexpr bool isInt< 8 >(int64_t x)
Definition: MathExtras.h:295
ISD::MemIndexedMode getAddressingMode() const
Return the addressing mode for this load or store: unindexed, pre-inc, pre-dec, post-inc, or post-dec.
TargetGlobalAddress - Like GlobalAddress, but the DAG does no folding or anything else with this node...
Definition: ISDOpcodes.h:131
unsigned getAlignment() const
STATISTIC(NumFunctions, "Total number of functions")
MVT getSimpleValueType(unsigned ResNo) const
Return the type of a specified result as a simple type.
F(f)
void setNodeId(int Id)
Set unique node id.
SDNode * getNode() const
get the SDNode which holds the desired result
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
std::size_t countLeadingZeros(T Val, ZeroBehavior ZB=ZB_Width)
Count number of 0&#39;s from the most significant bit to the least stopping at the first 1...
Definition: MathExtras.h:181
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition: ISDOpcodes.h:159
GlobalBaseReg - On Darwin, this node represents the result of the mflr at function entry...
unsigned getAddrSpace() const
Return the LLVM IR address space number that this pointer points into.
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition: ISDOpcodes.h:210
X86 compare and logical compare instructions.
void dump() const
Support for debugging, callable in GDB: V->dump()
Definition: AsmWriter.cpp:3646
The address of a basic block.
Definition: Constants.h:813
bool hasOneUse() const
Return true if there is exactly one use of this node.
A description of a memory reference used in the backend.
Shift and rotation operations.
Definition: ISDOpcodes.h:380
bool isNormalStore(const SDNode *N)
Returns true if the specified node is a non-truncating and unindexed store.
std::size_t countTrailingOnes(T Value, ZeroBehavior ZB=ZB_Width)
Count the number of ones from the least significant bit to the first zero bit.
Definition: MathExtras.h:470
CallLoweringInfo & setChain(SDValue InChain)
CopyToReg - This node has three operands: a chain, a register number to set to this value...
Definition: ISDOpcodes.h:170
op_iterator op_end() const
uint64_t getConstantOperandVal(unsigned i) const
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
Reg
All possible values of the reg field in the ModR/M byte.
SimpleValueType SimpleTy
unsigned getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition: ValueTypes.h:304
CALLSEQ_START/CALLSEQ_END - These operators mark the beginning and end of a call sequence, and carry arbitrary information that target might want to know.
Definition: ISDOpcodes.h:681
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
This represents a list of ValueType&#39;s that has been intern&#39;d by a SelectionDAG.
bool hasVLX() const
Definition: X86Subtarget.h:576
unsigned getSizeInBits() const
bool runOnMachineFunction(MachineFunction &MF) override
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
bool hasExternalLinkage() const
Definition: GlobalValue.h:416
int64_t getSExtValue() const
unsigned getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:292
constexpr bool isMask_64(uint64_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
Definition: MathExtras.h:403
#define T
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition: ISDOpcodes.h:399
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:201
op_iterator op_begin() const
amdgpu Simplify well known AMD library false Value * Callee
bool use_empty() const
Return true if there are no nodes using value ResNo of Node.
This class is used to represent ISD::STORE nodes.
static bool isFusableLoadOpStorePattern(StoreSDNode *StoreNode, SDValue StoredVal, SelectionDAG *CurDAG, LoadSDNode *&LoadNode, SDValue &InputChain)
Check whether or not the chain ending in StoreNode is suitable for doing the {load; op; store} to mod...
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Get a value with high bits set.
Definition: APInt.h:629
#define P(N)
const SDValue & getBasePtr() const
bool isNormalLoad(const SDNode *N)
Returns true if the specified node is a non-extending and unindexed load.
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition: ISDOpcodes.h:166
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
constexpr bool isUInt< 8 >(uint64_t x)
Definition: MathExtras.h:335
std::size_t countTrailingZeros(T Val, ZeroBehavior ZB=ZB_Width)
Count number of 0&#39;s from the least significant bit to the most stopping at the first 1...
Definition: MathExtras.h:112
use_iterator use_begin() const
Provide iteration support to walk over all uses of an SDNode.
LOCAL_RECOVER - Represents the llvm.localrecover intrinsic.
Definition: ISDOpcodes.h:81
Machine Value Type.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
static bool foldMaskedShiftToScaledMask(SelectionDAG &DAG, SDValue N, uint64_t Mask, SDValue Shift, SDValue X, X86ISelAddressMode &AM)
bool hasAnyUseOfValue(unsigned Value) const
Return true if there are any use of the indicated value.
unsigned getScalarSizeInBits() const
bool ult(const APInt &RHS) const
Unsigned less than comparison.
Definition: APInt.h:1164
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This is an important base class in LLVM.
Definition: Constant.h:42
const SDValue & getOperand(unsigned Num) const
bool isOperandOf(const SDNode *N) const
Return true if this node is an operand of N.
const SDValue & getOffset() const
APInt getUnsignedMax() const
Return the largest unsigned value contained in the ConstantRange.
static Type * getVoidTy(LLVMContext &C)
Definition: Type.cpp:161
This class provides iterator support for SDUse operands that use a specific SDNode.
void RepositionNode(allnodes_iterator Position, SDNode *N)
Move node N in the AllNodes list to be immediately before the given iterator Position.
static bool hasNoCarryFlagUses(SDNode *N)
Test whether the given node which sets flags has any uses which require the CF flag to be accurate...
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:285
On Darwin, this node represents the result of the popl at function entry, used for PIC code...
self_iterator getIterator()
Definition: ilist_node.h:82
bool hasNUsesOfValue(unsigned NUses, unsigned Value) const
Return true if there are exactly NUSES uses of the indicated value.
static bool foldMaskAndShiftToExtract(SelectionDAG &DAG, SDValue N, uint64_t Mask, SDValue Shift, SDValue X, X86ISelAddressMode &AM)
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:531
std::vector< ArgListEntry > ArgListTy
Extended Value Type.
Definition: ValueTypes.h:34
This structure contains all information that is necessary for lowering calls.
bool isVolatile() const
APInt getSignedMin() const
Return the smallest signed value contained in the ConstantRange.
This class contains a discriminated union of information about pointers in memory operands...
bool isMachineOpcode() const
Test if this node has a post-isel opcode, directly corresponding to a MachineInstr opcode...
unsigned getNumOperands() const
Return the number of values used by this operation.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
BaseType
A given derived pointer can have multiple base pointers through phi/selects.
These operations represent an abstract X86 call instruction, which includes a bunch of information...
TokenFactor - This node takes multiple tokens as input and produces a single token result...
Definition: ISDOpcodes.h:50
void dump() const
Dump this node, for debugging.
Iterator for intrusive lists based on ilist_node.
unsigned countPopulation(T Value)
Count the number of set bits in a value.
Definition: MathExtras.h:512
constexpr bool isInt< 32 >(int64_t x)
Definition: MathExtras.h:301
bool isScalarFPTypeInSSEReg(EVT VT) const
Return true if the specified scalar FP type is computed in an SSE register, not on the X87 floating p...
SDNode * UpdateNodeOperands(SDNode *N, SDValue Op)
Mutate the specified node in-place to have the specified operands.
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:210
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:862
const DataFlowGraph & G
Definition: RDFGraph.cpp:211
An SDNode that represents everything that will be needed to construct a MachineInstr.
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
SelectionDAGISel - This is the common base class used for SelectionDAG-based pattern-matching instruc...
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:923
static bool foldMaskAndShiftToScale(SelectionDAG &DAG, SDValue N, uint64_t Mask, SDValue Shift, SDValue X, X86ISelAddressMode &AM)
EVT getMemoryVT() const
Return the type of the in-memory value.
Target - Wrapper for Target specific information.
Class for arbitrary precision integers.
Definition: APInt.h:69
static unsigned getReg(const void *D, unsigned RC, unsigned RegNo)
bool sge(const APInt &RHS) const
Signed greater or equal comparison.
Definition: APInt.h:1288
static use_iterator use_end()
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:446
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:449
void append(in_iter in_start, in_iter in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:396
bool is256BitVector() const
Return true if this is a 256-bit vector type.
Definition: ValueTypes.h:187
int getNodeId() const
Return the unique node id.
static bool isDispSafeForFrameIndex(int64_t Val)
std::pair< SDValue, SDValue > LowerCallTo(CallLoweringInfo &CLI) const
This function lowers an abstract call to a function into an actual call.
C - The default llvm calling convention, compatible with C.
Definition: CallingConv.h:35
const SDValue & getValue() const
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:363
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition: ISDOpcodes.h:206
bool is128BitVector() const
Return true if this is a 128-bit vector type.
Definition: ValueTypes.h:182
A wrapper node for TargetConstantPool, TargetJumpTable, TargetExternalSymbol, TargetGlobalAddress, TargetGlobalTLSAddress, MCSymbol and TargetBlockAddress.
FunctionPass * createX86ISelDag(X86TargetMachine &TM, CodeGenOpt::Level OptLevel)
This pass converts a legalized DAG into a X86-specific DAG, ready for instruction scheduling...
void computeKnownBits(SDValue Op, KnownBits &Known, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:582
StringRef getName() const
Return a constant reference to the value&#39;s name.
Definition: Value.cpp:224
void ReplaceAllUsesWith(SDValue From, SDValue Op)
Modify anything using &#39;From&#39; to use &#39;To&#39; instead.
#define I(x, y, z)
Definition: MD5.cpp:58
#define N
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:323
bool isNON_EXTLoad(const SDNode *N)
Returns true if the specified node is a non-extending load.
unsigned getOpcode() const
SDValue getValue(unsigned R) const
constexpr bool isUInt< 16 >(uint64_t x)
Definition: MathExtras.h:338
static bool hasNoSignedComparisonUses(SDNode *N)
Test whether the given X86ISD::CMP node has any uses which require the SF or OF bits to be accurate...
CopyFromReg - This node indicates that the input value is a virtual or physical register that is defi...
Definition: ISDOpcodes.h:175
SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
const MachinePointerInfo & getPointerInfo() const
This class is used to form a handle around another node that is persistent and is updated across invo...
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
unsigned getReg() const
uint64_t getConstantOperandVal(unsigned Num) const
Helper method returns the integer value of a ConstantSDNode operand.
unsigned getResNo() const
get the index which selects a specific result in the SDNode
Dynamic (non-constant condition) vector blend where only the sign bits of the condition elements are ...
#define LLVM_FALLTHROUGH
LLVM_FALLTHROUGH - Mark fallthrough cases in switch statements.
Definition: Compiler.h:235
std::underlying_type< E >::type Mask()
Get a bitmask with 1s in all places up to the high-order bit of E&#39;s largest value.
Definition: BitmaskEnum.h:81
bool isNonTemporal() const
#define DEBUG(X)
Definition: Debug.h:118
static bool hasSingleUsesFromRoot(SDNode *Root, SDNode *N)
IRTranslator LLVM IR MI
bool isZeroNode(SDValue Elt)
Returns true if Elt is a constant zero or floating point constant +0.0.
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:49
bool isOffsetSuitableForCodeModel(int64_t Offset, CodeModel::Model M, bool hasSymbolicDisplacement=true)
Returns true of the given offset can be fit into displacement field of the instruction.
unsigned getNumOperands() const
const SDValue & getOperand(unsigned i) const
uint64_t getZExtValue() const
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:452
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation...
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition: ISDOpcodes.h:351
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
static void moveBelowOrigChain(SelectionDAG *CurDAG, SDValue Load, SDValue Call, SDValue OrigChain)
Replace the original chain operand of the call with load&#39;s chain operand and move load below the call...
Carry-using nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:242
Special wrapper used under X86-64 PIC mode for RIP relative displacements.
BRIND - Indirect branch.
Definition: ISDOpcodes.h:602
This class is used to represent ISD::LOAD nodes.