LLVM  6.0.0svn
X86ISelDAGToDAG.cpp
Go to the documentation of this file.
1 //===- X86ISelDAGToDAG.cpp - A DAG pattern matching inst selector for X86 -===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file defines a DAG pattern matching instruction selector for X86,
11 // converting from a legalized dag to a X86 dag.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "X86.h"
16 #include "X86InstrBuilder.h"
17 #include "X86MachineFunctionInfo.h"
18 #include "X86RegisterInfo.h"
19 #include "X86Subtarget.h"
20 #include "X86TargetMachine.h"
21 #include "llvm/ADT/Statistic.h"
27 #include "llvm/IR/ConstantRange.h"
28 #include "llvm/IR/Function.h"
29 #include "llvm/IR/Instructions.h"
30 #include "llvm/IR/Intrinsics.h"
31 #include "llvm/IR/Type.h"
32 #include "llvm/Support/Debug.h"
34 #include "llvm/Support/KnownBits.h"
39 #include <stdint.h>
40 using namespace llvm;
41 
42 #define DEBUG_TYPE "x86-isel"
43 
44 STATISTIC(NumLoadMoved, "Number of loads moved below TokenFactor");
45 
46 //===----------------------------------------------------------------------===//
47 // Pattern Matcher Implementation
48 //===----------------------------------------------------------------------===//
49 
50 namespace {
51  /// This corresponds to X86AddressMode, but uses SDValue's instead of register
52  /// numbers for the leaves of the matched tree.
53  struct X86ISelAddressMode {
54  enum {
55  RegBase,
56  FrameIndexBase
57  } BaseType;
58 
59  // This is really a union, discriminated by BaseType!
60  SDValue Base_Reg;
61  int Base_FrameIndex;
62 
63  unsigned Scale;
64  SDValue IndexReg;
65  int32_t Disp;
66  SDValue Segment;
67  const GlobalValue *GV;
68  const Constant *CP;
69  const BlockAddress *BlockAddr;
70  const char *ES;
71  MCSymbol *MCSym;
72  int JT;
73  unsigned Align; // CP alignment.
74  unsigned char SymbolFlags; // X86II::MO_*
75 
76  X86ISelAddressMode()
77  : BaseType(RegBase), Base_FrameIndex(0), Scale(1), IndexReg(), Disp(0),
78  Segment(), GV(nullptr), CP(nullptr), BlockAddr(nullptr), ES(nullptr),
79  MCSym(nullptr), JT(-1), Align(0), SymbolFlags(X86II::MO_NO_FLAG) {}
80 
81  bool hasSymbolicDisplacement() const {
82  return GV != nullptr || CP != nullptr || ES != nullptr ||
83  MCSym != nullptr || JT != -1 || BlockAddr != nullptr;
84  }
85 
86  bool hasBaseOrIndexReg() const {
87  return BaseType == FrameIndexBase ||
88  IndexReg.getNode() != nullptr || Base_Reg.getNode() != nullptr;
89  }
90 
91  /// Return true if this addressing mode is already RIP-relative.
92  bool isRIPRelative() const {
93  if (BaseType != RegBase) return false;
94  if (RegisterSDNode *RegNode =
95  dyn_cast_or_null<RegisterSDNode>(Base_Reg.getNode()))
96  return RegNode->getReg() == X86::RIP;
97  return false;
98  }
99 
100  void setBaseReg(SDValue Reg) {
101  BaseType = RegBase;
102  Base_Reg = Reg;
103  }
104 
105 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
106  void dump() {
107  dbgs() << "X86ISelAddressMode " << this << '\n';
108  dbgs() << "Base_Reg ";
109  if (Base_Reg.getNode())
110  Base_Reg.getNode()->dump();
111  else
112  dbgs() << "nul";
113  dbgs() << " Base.FrameIndex " << Base_FrameIndex << '\n'
114  << " Scale" << Scale << '\n'
115  << "IndexReg ";
116  if (IndexReg.getNode())
117  IndexReg.getNode()->dump();
118  else
119  dbgs() << "nul";
120  dbgs() << " Disp " << Disp << '\n'
121  << "GV ";
122  if (GV)
123  GV->dump();
124  else
125  dbgs() << "nul";
126  dbgs() << " CP ";
127  if (CP)
128  CP->dump();
129  else
130  dbgs() << "nul";
131  dbgs() << '\n'
132  << "ES ";
133  if (ES)
134  dbgs() << ES;
135  else
136  dbgs() << "nul";
137  dbgs() << " MCSym ";
138  if (MCSym)
139  dbgs() << MCSym;
140  else
141  dbgs() << "nul";
142  dbgs() << " JT" << JT << " Align" << Align << '\n';
143  }
144 #endif
145  };
146 }
147 
148 namespace {
149  //===--------------------------------------------------------------------===//
150  /// ISel - X86-specific code to select X86 machine instructions for
151  /// SelectionDAG operations.
152  ///
153  class X86DAGToDAGISel final : public SelectionDAGISel {
154  /// Keep a pointer to the X86Subtarget around so that we can
155  /// make the right decision when generating code for different targets.
156  const X86Subtarget *Subtarget;
157 
158  /// If true, selector should try to optimize for code size instead of
159  /// performance.
160  bool OptForSize;
161 
162  /// If true, selector should try to optimize for minimum code size.
163  bool OptForMinSize;
164 
165  public:
166  explicit X86DAGToDAGISel(X86TargetMachine &tm, CodeGenOpt::Level OptLevel)
167  : SelectionDAGISel(tm, OptLevel), OptForSize(false),
168  OptForMinSize(false) {}
169 
170  StringRef getPassName() const override {
171  return "X86 DAG->DAG Instruction Selection";
172  }
173 
174  bool runOnMachineFunction(MachineFunction &MF) override {
175  // Reset the subtarget each time through.
176  Subtarget = &MF.getSubtarget<X86Subtarget>();
178  return true;
179  }
180 
181  void EmitFunctionEntryCode() override;
182 
183  bool IsProfitableToFold(SDValue N, SDNode *U, SDNode *Root) const override;
184 
185  void PreprocessISelDAG() override;
186 
187 // Include the pieces autogenerated from the target description.
188 #include "X86GenDAGISel.inc"
189 
190  private:
191  void Select(SDNode *N) override;
192 
193  bool foldOffsetIntoAddress(uint64_t Offset, X86ISelAddressMode &AM);
194  bool matchLoadInAddress(LoadSDNode *N, X86ISelAddressMode &AM);
195  bool matchWrapper(SDValue N, X86ISelAddressMode &AM);
196  bool matchAddress(SDValue N, X86ISelAddressMode &AM);
197  bool matchVectorAddress(SDValue N, X86ISelAddressMode &AM);
198  bool matchAdd(SDValue N, X86ISelAddressMode &AM, unsigned Depth);
199  bool matchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
200  unsigned Depth);
201  bool matchAddressBase(SDValue N, X86ISelAddressMode &AM);
202  bool selectAddr(SDNode *Parent, SDValue N, SDValue &Base,
203  SDValue &Scale, SDValue &Index, SDValue &Disp,
204  SDValue &Segment);
205  bool selectVectorAddr(SDNode *Parent, SDValue N, SDValue &Base,
206  SDValue &Scale, SDValue &Index, SDValue &Disp,
207  SDValue &Segment);
208  bool selectMOV64Imm32(SDValue N, SDValue &Imm);
209  bool selectLEAAddr(SDValue N, SDValue &Base,
210  SDValue &Scale, SDValue &Index, SDValue &Disp,
211  SDValue &Segment);
212  bool selectLEA64_32Addr(SDValue N, SDValue &Base,
213  SDValue &Scale, SDValue &Index, SDValue &Disp,
214  SDValue &Segment);
215  bool selectTLSADDRAddr(SDValue N, SDValue &Base,
216  SDValue &Scale, SDValue &Index, SDValue &Disp,
217  SDValue &Segment);
218  bool selectScalarSSELoad(SDNode *Root, SDValue N,
219  SDValue &Base, SDValue &Scale,
220  SDValue &Index, SDValue &Disp,
221  SDValue &Segment,
222  SDValue &NodeWithChain);
223  bool selectRelocImm(SDValue N, SDValue &Op);
224 
225  bool tryFoldLoad(SDNode *Root, SDNode *P, SDValue N,
226  SDValue &Base, SDValue &Scale,
227  SDValue &Index, SDValue &Disp,
228  SDValue &Segment);
229 
230  // Convience method where P is also root.
231  bool tryFoldLoad(SDNode *P, SDValue N,
232  SDValue &Base, SDValue &Scale,
233  SDValue &Index, SDValue &Disp,
234  SDValue &Segment) {
235  return tryFoldLoad(P, P, N, Base, Scale, Index, Disp, Segment);
236  }
237 
238  /// Implement addressing mode selection for inline asm expressions.
239  bool SelectInlineAsmMemoryOperand(const SDValue &Op,
240  unsigned ConstraintID,
241  std::vector<SDValue> &OutOps) override;
242 
243  void emitSpecialCodeForMain();
244 
245  inline void getAddressOperands(X86ISelAddressMode &AM, const SDLoc &DL,
246  SDValue &Base, SDValue &Scale,
247  SDValue &Index, SDValue &Disp,
248  SDValue &Segment) {
249  Base = (AM.BaseType == X86ISelAddressMode::FrameIndexBase)
250  ? CurDAG->getTargetFrameIndex(
251  AM.Base_FrameIndex,
252  TLI->getPointerTy(CurDAG->getDataLayout()))
253  : AM.Base_Reg;
254  Scale = getI8Imm(AM.Scale, DL);
255  Index = AM.IndexReg;
256  // These are 32-bit even in 64-bit mode since RIP-relative offset
257  // is 32-bit.
258  if (AM.GV)
259  Disp = CurDAG->getTargetGlobalAddress(AM.GV, SDLoc(),
260  MVT::i32, AM.Disp,
261  AM.SymbolFlags);
262  else if (AM.CP)
263  Disp = CurDAG->getTargetConstantPool(AM.CP, MVT::i32,
264  AM.Align, AM.Disp, AM.SymbolFlags);
265  else if (AM.ES) {
266  assert(!AM.Disp && "Non-zero displacement is ignored with ES.");
267  Disp = CurDAG->getTargetExternalSymbol(AM.ES, MVT::i32, AM.SymbolFlags);
268  } else if (AM.MCSym) {
269  assert(!AM.Disp && "Non-zero displacement is ignored with MCSym.");
270  assert(AM.SymbolFlags == 0 && "oo");
271  Disp = CurDAG->getMCSymbol(AM.MCSym, MVT::i32);
272  } else if (AM.JT != -1) {
273  assert(!AM.Disp && "Non-zero displacement is ignored with JT.");
274  Disp = CurDAG->getTargetJumpTable(AM.JT, MVT::i32, AM.SymbolFlags);
275  } else if (AM.BlockAddr)
276  Disp = CurDAG->getTargetBlockAddress(AM.BlockAddr, MVT::i32, AM.Disp,
277  AM.SymbolFlags);
278  else
279  Disp = CurDAG->getTargetConstant(AM.Disp, DL, MVT::i32);
280 
281  if (AM.Segment.getNode())
282  Segment = AM.Segment;
283  else
284  Segment = CurDAG->getRegister(0, MVT::i32);
285  }
286 
287  // Utility function to determine whether we should avoid selecting
288  // immediate forms of instructions for better code size or not.
289  // At a high level, we'd like to avoid such instructions when
290  // we have similar constants used within the same basic block
291  // that can be kept in a register.
292  //
293  bool shouldAvoidImmediateInstFormsForSize(SDNode *N) const {
294  uint32_t UseCount = 0;
295 
296  // Do not want to hoist if we're not optimizing for size.
297  // TODO: We'd like to remove this restriction.
298  // See the comment in X86InstrInfo.td for more info.
299  if (!OptForSize)
300  return false;
301 
302  // Walk all the users of the immediate.
303  for (SDNode::use_iterator UI = N->use_begin(),
304  UE = N->use_end(); (UI != UE) && (UseCount < 2); ++UI) {
305 
306  SDNode *User = *UI;
307 
308  // This user is already selected. Count it as a legitimate use and
309  // move on.
310  if (User->isMachineOpcode()) {
311  UseCount++;
312  continue;
313  }
314 
315  // We want to count stores of immediates as real uses.
316  if (User->getOpcode() == ISD::STORE &&
317  User->getOperand(1).getNode() == N) {
318  UseCount++;
319  continue;
320  }
321 
322  // We don't currently match users that have > 2 operands (except
323  // for stores, which are handled above)
324  // Those instruction won't match in ISEL, for now, and would
325  // be counted incorrectly.
326  // This may change in the future as we add additional instruction
327  // types.
328  if (User->getNumOperands() != 2)
329  continue;
330 
331  // Immediates that are used for offsets as part of stack
332  // manipulation should be left alone. These are typically
333  // used to indicate SP offsets for argument passing and
334  // will get pulled into stores/pushes (implicitly).
335  if (User->getOpcode() == X86ISD::ADD ||
336  User->getOpcode() == ISD::ADD ||
337  User->getOpcode() == X86ISD::SUB ||
338  User->getOpcode() == ISD::SUB) {
339 
340  // Find the other operand of the add/sub.
341  SDValue OtherOp = User->getOperand(0);
342  if (OtherOp.getNode() == N)
343  OtherOp = User->getOperand(1);
344 
345  // Don't count if the other operand is SP.
346  RegisterSDNode *RegNode;
347  if (OtherOp->getOpcode() == ISD::CopyFromReg &&
348  (RegNode = dyn_cast_or_null<RegisterSDNode>(
349  OtherOp->getOperand(1).getNode())))
350  if ((RegNode->getReg() == X86::ESP) ||
351  (RegNode->getReg() == X86::RSP))
352  continue;
353  }
354 
355  // ... otherwise, count this and move on.
356  UseCount++;
357  }
358 
359  // If we have more than 1 use, then recommend for hoisting.
360  return (UseCount > 1);
361  }
362 
363  /// Return a target constant with the specified value of type i8.
364  inline SDValue getI8Imm(unsigned Imm, const SDLoc &DL) {
365  return CurDAG->getTargetConstant(Imm, DL, MVT::i8);
366  }
367 
368  /// Return a target constant with the specified value, of type i32.
369  inline SDValue getI32Imm(unsigned Imm, const SDLoc &DL) {
370  return CurDAG->getTargetConstant(Imm, DL, MVT::i32);
371  }
372 
373  SDValue getExtractVEXTRACTImmediate(SDNode *N, unsigned VecWidth,
374  const SDLoc &DL) {
375  assert((VecWidth == 128 || VecWidth == 256) && "Unexpected vector width");
376  uint64_t Index = N->getConstantOperandVal(1);
377  MVT VecVT = N->getOperand(0).getSimpleValueType();
378  return getI8Imm((Index * VecVT.getScalarSizeInBits()) / VecWidth, DL);
379  }
380 
381  SDValue getInsertVINSERTImmediate(SDNode *N, unsigned VecWidth,
382  const SDLoc &DL) {
383  assert((VecWidth == 128 || VecWidth == 256) && "Unexpected vector width");
384  uint64_t Index = N->getConstantOperandVal(2);
385  MVT VecVT = N->getSimpleValueType(0);
386  return getI8Imm((Index * VecVT.getScalarSizeInBits()) / VecWidth, DL);
387  }
388 
389  /// Return an SDNode that returns the value of the global base register.
390  /// Output instructions required to initialize the global base register,
391  /// if necessary.
392  SDNode *getGlobalBaseReg();
393 
394  /// Return a reference to the TargetMachine, casted to the target-specific
395  /// type.
396  const X86TargetMachine &getTargetMachine() const {
397  return static_cast<const X86TargetMachine &>(TM);
398  }
399 
400  /// Return a reference to the TargetInstrInfo, casted to the target-specific
401  /// type.
402  const X86InstrInfo *getInstrInfo() const {
403  return Subtarget->getInstrInfo();
404  }
405 
406  /// \brief Address-mode matching performs shift-of-and to and-of-shift
407  /// reassociation in order to expose more scaled addressing
408  /// opportunities.
409  bool ComplexPatternFuncMutatesDAG() const override {
410  return true;
411  }
412 
413  bool isSExtAbsoluteSymbolRef(unsigned Width, SDNode *N) const;
414 
415  /// Returns whether this is a relocatable immediate in the range
416  /// [-2^Width .. 2^Width-1].
417  template <unsigned Width> bool isSExtRelocImm(SDNode *N) const {
418  if (auto *CN = dyn_cast<ConstantSDNode>(N))
419  return isInt<Width>(CN->getSExtValue());
420  return isSExtAbsoluteSymbolRef(Width, N);
421  }
422 
423  // Indicates we should prefer to use a non-temporal load for this load.
424  bool useNonTemporalLoad(LoadSDNode *N) const {
425  if (!N->isNonTemporal())
426  return false;
427 
428  unsigned StoreSize = N->getMemoryVT().getStoreSize();
429 
430  if (N->getAlignment() < StoreSize)
431  return false;
432 
433  switch (StoreSize) {
434  default: llvm_unreachable("Unsupported store size");
435  case 16:
436  return Subtarget->hasSSE41();
437  case 32:
438  return Subtarget->hasAVX2();
439  case 64:
440  return Subtarget->hasAVX512();
441  }
442  }
443 
444  bool foldLoadStoreIntoMemOperand(SDNode *Node);
445 
446  bool matchBEXTRFromAnd(SDNode *Node);
447 
448  bool isMaskZeroExtended(SDNode *N) const;
449  };
450 }
451 
452 
453 // Returns true if this masked compare can be implemented legally with this
454 // type.
455 static bool isLegalMaskCompare(SDNode *N, const X86Subtarget *Subtarget) {
456  unsigned Opcode = N->getOpcode();
457  if (Opcode == X86ISD::PCMPEQM || Opcode == X86ISD::PCMPGTM ||
458  Opcode == X86ISD::CMPM || Opcode == X86ISD::TESTM ||
459  Opcode == X86ISD::TESTNM || Opcode == X86ISD::CMPMU) {
460  // We can get 256-bit 8 element types here without VLX being enabled. When
461  // this happens we will use 512-bit operations and the mask will not be
462  // zero extended.
463  EVT OpVT = N->getOperand(0).getValueType();
464  if (OpVT == MVT::v8i32 || OpVT == MVT::v8f32)
465  return Subtarget->hasVLX();
466 
467  return true;
468  }
469 
470  return false;
471 }
472 
473 // Returns true if we can assume the writer of the mask has zero extended it
474 // for us.
475 bool X86DAGToDAGISel::isMaskZeroExtended(SDNode *N) const {
476  // If this is an AND, check if we have a compare on either side. As long as
477  // one side guarantees the mask is zero extended, the AND will preserve those
478  // zeros.
479  if (N->getOpcode() == ISD::AND)
480  return isLegalMaskCompare(N->getOperand(0).getNode(), Subtarget) ||
481  isLegalMaskCompare(N->getOperand(1).getNode(), Subtarget);
482 
483  return isLegalMaskCompare(N, Subtarget);
484 }
485 
486 bool
487 X86DAGToDAGISel::IsProfitableToFold(SDValue N, SDNode *U, SDNode *Root) const {
488  if (OptLevel == CodeGenOpt::None) return false;
489 
490  if (!N.hasOneUse())
491  return false;
492 
493  if (N.getOpcode() != ISD::LOAD)
494  return true;
495 
496  // If N is a load, do additional profitability checks.
497  if (U == Root) {
498  switch (U->getOpcode()) {
499  default: break;
500  case X86ISD::ADD:
501  case X86ISD::SUB:
502  case X86ISD::AND:
503  case X86ISD::XOR:
504  case X86ISD::OR:
505  case ISD::ADD:
506  case ISD::ADDCARRY:
507  case ISD::AND:
508  case ISD::OR:
509  case ISD::XOR: {
510  SDValue Op1 = U->getOperand(1);
511 
512  // If the other operand is a 8-bit immediate we should fold the immediate
513  // instead. This reduces code size.
514  // e.g.
515  // movl 4(%esp), %eax
516  // addl $4, %eax
517  // vs.
518  // movl $4, %eax
519  // addl 4(%esp), %eax
520  // The former is 2 bytes shorter. In case where the increment is 1, then
521  // the saving can be 4 bytes (by using incl %eax).
522  if (ConstantSDNode *Imm = dyn_cast<ConstantSDNode>(Op1))
523  if (Imm->getAPIntValue().isSignedIntN(8))
524  return false;
525 
526  // If the other operand is a TLS address, we should fold it instead.
527  // This produces
528  // movl %gs:0, %eax
529  // leal i@NTPOFF(%eax), %eax
530  // instead of
531  // movl $i@NTPOFF, %eax
532  // addl %gs:0, %eax
533  // if the block also has an access to a second TLS address this will save
534  // a load.
535  // FIXME: This is probably also true for non-TLS addresses.
536  if (Op1.getOpcode() == X86ISD::Wrapper) {
537  SDValue Val = Op1.getOperand(0);
539  return false;
540  }
541  }
542  }
543  }
544 
545  return true;
546 }
547 
548 /// Replace the original chain operand of the call with
549 /// load's chain operand and move load below the call's chain operand.
551  SDValue Call, SDValue OrigChain) {
553  SDValue Chain = OrigChain.getOperand(0);
554  if (Chain.getNode() == Load.getNode())
555  Ops.push_back(Load.getOperand(0));
556  else {
557  assert(Chain.getOpcode() == ISD::TokenFactor &&
558  "Unexpected chain operand");
559  for (unsigned i = 0, e = Chain.getNumOperands(); i != e; ++i)
560  if (Chain.getOperand(i).getNode() == Load.getNode())
561  Ops.push_back(Load.getOperand(0));
562  else
563  Ops.push_back(Chain.getOperand(i));
564  SDValue NewChain =
565  CurDAG->getNode(ISD::TokenFactor, SDLoc(Load), MVT::Other, Ops);
566  Ops.clear();
567  Ops.push_back(NewChain);
568  }
569  Ops.append(OrigChain->op_begin() + 1, OrigChain->op_end());
570  CurDAG->UpdateNodeOperands(OrigChain.getNode(), Ops);
571  CurDAG->UpdateNodeOperands(Load.getNode(), Call.getOperand(0),
572  Load.getOperand(1), Load.getOperand(2));
573 
574  Ops.clear();
575  Ops.push_back(SDValue(Load.getNode(), 1));
576  Ops.append(Call->op_begin() + 1, Call->op_end());
577  CurDAG->UpdateNodeOperands(Call.getNode(), Ops);
578 }
579 
580 /// Return true if call address is a load and it can be
581 /// moved below CALLSEQ_START and the chains leading up to the call.
582 /// Return the CALLSEQ_START by reference as a second output.
583 /// In the case of a tail call, there isn't a callseq node between the call
584 /// chain and the load.
585 static bool isCalleeLoad(SDValue Callee, SDValue &Chain, bool HasCallSeq) {
586  // The transformation is somewhat dangerous if the call's chain was glued to
587  // the call. After MoveBelowOrigChain the load is moved between the call and
588  // the chain, this can create a cycle if the load is not folded. So it is
589  // *really* important that we are sure the load will be folded.
590  if (Callee.getNode() == Chain.getNode() || !Callee.hasOneUse())
591  return false;
592  LoadSDNode *LD = dyn_cast<LoadSDNode>(Callee.getNode());
593  if (!LD ||
594  LD->isVolatile() ||
597  return false;
598 
599  // Now let's find the callseq_start.
600  while (HasCallSeq && Chain.getOpcode() != ISD::CALLSEQ_START) {
601  if (!Chain.hasOneUse())
602  return false;
603  Chain = Chain.getOperand(0);
604  }
605 
606  if (!Chain.getNumOperands())
607  return false;
608  // Since we are not checking for AA here, conservatively abort if the chain
609  // writes to memory. It's not safe to move the callee (a load) across a store.
610  if (isa<MemSDNode>(Chain.getNode()) &&
611  cast<MemSDNode>(Chain.getNode())->writeMem())
612  return false;
613  if (Chain.getOperand(0).getNode() == Callee.getNode())
614  return true;
615  if (Chain.getOperand(0).getOpcode() == ISD::TokenFactor &&
616  Callee.getValue(1).isOperandOf(Chain.getOperand(0).getNode()) &&
617  Callee.getValue(1).hasOneUse())
618  return true;
619  return false;
620 }
621 
622 void X86DAGToDAGISel::PreprocessISelDAG() {
623  // OptFor[Min]Size are used in pattern predicates that isel is matching.
624  OptForSize = MF->getFunction()->optForSize();
625  OptForMinSize = MF->getFunction()->optForMinSize();
626  assert((!OptForMinSize || OptForSize) && "OptForMinSize implies OptForSize");
627 
628  for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(),
629  E = CurDAG->allnodes_end(); I != E; ) {
630  SDNode *N = &*I++; // Preincrement iterator to avoid invalidation issues.
631 
632  if (OptLevel != CodeGenOpt::None &&
633  // Only does this when target favors doesn't favor register indirect
634  // call.
635  ((N->getOpcode() == X86ISD::CALL && !Subtarget->slowTwoMemOps()) ||
636  (N->getOpcode() == X86ISD::TC_RETURN &&
637  // Only does this if load can be folded into TC_RETURN.
638  (Subtarget->is64Bit() ||
639  !getTargetMachine().isPositionIndependent())))) {
640  /// Also try moving call address load from outside callseq_start to just
641  /// before the call to allow it to be folded.
642  ///
643  /// [Load chain]
644  /// ^
645  /// |
646  /// [Load]
647  /// ^ ^
648  /// | |
649  /// / \--
650  /// / |
651  ///[CALLSEQ_START] |
652  /// ^ |
653  /// | |
654  /// [LOAD/C2Reg] |
655  /// | |
656  /// \ /
657  /// \ /
658  /// [CALL]
659  bool HasCallSeq = N->getOpcode() == X86ISD::CALL;
660  SDValue Chain = N->getOperand(0);
661  SDValue Load = N->getOperand(1);
662  if (!isCalleeLoad(Load, Chain, HasCallSeq))
663  continue;
664  moveBelowOrigChain(CurDAG, Load, SDValue(N, 0), Chain);
665  ++NumLoadMoved;
666  continue;
667  }
668 
669  // Lower fpround and fpextend nodes that target the FP stack to be store and
670  // load to the stack. This is a gross hack. We would like to simply mark
671  // these as being illegal, but when we do that, legalize produces these when
672  // it expands calls, then expands these in the same legalize pass. We would
673  // like dag combine to be able to hack on these between the call expansion
674  // and the node legalization. As such this pass basically does "really
675  // late" legalization of these inline with the X86 isel pass.
676  // FIXME: This should only happen when not compiled with -O0.
677  if (N->getOpcode() != ISD::FP_ROUND && N->getOpcode() != ISD::FP_EXTEND)
678  continue;
679 
680  MVT SrcVT = N->getOperand(0).getSimpleValueType();
681  MVT DstVT = N->getSimpleValueType(0);
682 
683  // If any of the sources are vectors, no fp stack involved.
684  if (SrcVT.isVector() || DstVT.isVector())
685  continue;
686 
687  // If the source and destination are SSE registers, then this is a legal
688  // conversion that should not be lowered.
689  const X86TargetLowering *X86Lowering =
690  static_cast<const X86TargetLowering *>(TLI);
691  bool SrcIsSSE = X86Lowering->isScalarFPTypeInSSEReg(SrcVT);
692  bool DstIsSSE = X86Lowering->isScalarFPTypeInSSEReg(DstVT);
693  if (SrcIsSSE && DstIsSSE)
694  continue;
695 
696  if (!SrcIsSSE && !DstIsSSE) {
697  // If this is an FPStack extension, it is a noop.
698  if (N->getOpcode() == ISD::FP_EXTEND)
699  continue;
700  // If this is a value-preserving FPStack truncation, it is a noop.
701  if (N->getConstantOperandVal(1))
702  continue;
703  }
704 
705  // Here we could have an FP stack truncation or an FPStack <-> SSE convert.
706  // FPStack has extload and truncstore. SSE can fold direct loads into other
707  // operations. Based on this, decide what we want to do.
708  MVT MemVT;
709  if (N->getOpcode() == ISD::FP_ROUND)
710  MemVT = DstVT; // FP_ROUND must use DstVT, we can't do a 'trunc load'.
711  else
712  MemVT = SrcIsSSE ? SrcVT : DstVT;
713 
714  SDValue MemTmp = CurDAG->CreateStackTemporary(MemVT);
715  SDLoc dl(N);
716 
717  // FIXME: optimize the case where the src/dest is a load or store?
718  SDValue Store =
719  CurDAG->getTruncStore(CurDAG->getEntryNode(), dl, N->getOperand(0),
720  MemTmp, MachinePointerInfo(), MemVT);
721  SDValue Result = CurDAG->getExtLoad(ISD::EXTLOAD, dl, DstVT, Store, MemTmp,
722  MachinePointerInfo(), MemVT);
723 
724  // We're about to replace all uses of the FP_ROUND/FP_EXTEND with the
725  // extload we created. This will cause general havok on the dag because
726  // anything below the conversion could be folded into other existing nodes.
727  // To avoid invalidating 'I', back it up to the convert node.
728  --I;
729  CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
730 
731  // Now that we did that, the node is dead. Increment the iterator to the
732  // next node to process, then delete N.
733  ++I;
734  CurDAG->DeleteNode(N);
735  }
736 }
737 
738 
739 /// Emit any code that needs to be executed only in the main function.
740 void X86DAGToDAGISel::emitSpecialCodeForMain() {
741  if (Subtarget->isTargetCygMing()) {
743  auto &DL = CurDAG->getDataLayout();
744 
746  CLI.setChain(CurDAG->getRoot())
747  .setCallee(CallingConv::C, Type::getVoidTy(*CurDAG->getContext()),
748  CurDAG->getExternalSymbol("__main", TLI->getPointerTy(DL)),
749  std::move(Args));
750  const TargetLowering &TLI = CurDAG->getTargetLoweringInfo();
751  std::pair<SDValue, SDValue> Result = TLI.LowerCallTo(CLI);
752  CurDAG->setRoot(Result.second);
753  }
754 }
755 
756 void X86DAGToDAGISel::EmitFunctionEntryCode() {
757  // If this is main, emit special code for main.
758  if (const Function *Fn = MF->getFunction())
759  if (Fn->hasExternalLinkage() && Fn->getName() == "main")
760  emitSpecialCodeForMain();
761 }
762 
763 static bool isDispSafeForFrameIndex(int64_t Val) {
764  // On 64-bit platforms, we can run into an issue where a frame index
765  // includes a displacement that, when added to the explicit displacement,
766  // will overflow the displacement field. Assuming that the frame index
767  // displacement fits into a 31-bit integer (which is only slightly more
768  // aggressive than the current fundamental assumption that it fits into
769  // a 32-bit integer), a 31-bit disp should always be safe.
770  return isInt<31>(Val);
771 }
772 
773 bool X86DAGToDAGISel::foldOffsetIntoAddress(uint64_t Offset,
774  X86ISelAddressMode &AM) {
775  // Cannot combine ExternalSymbol displacements with integer offsets.
776  if (Offset != 0 && (AM.ES || AM.MCSym))
777  return true;
778  int64_t Val = AM.Disp + Offset;
779  CodeModel::Model M = TM.getCodeModel();
780  if (Subtarget->is64Bit()) {
782  AM.hasSymbolicDisplacement()))
783  return true;
784  // In addition to the checks required for a register base, check that
785  // we do not try to use an unsafe Disp with a frame index.
786  if (AM.BaseType == X86ISelAddressMode::FrameIndexBase &&
788  return true;
789  }
790  AM.Disp = Val;
791  return false;
792 
793 }
794 
795 bool X86DAGToDAGISel::matchLoadInAddress(LoadSDNode *N, X86ISelAddressMode &AM){
796  SDValue Address = N->getOperand(1);
797 
798  // load gs:0 -> GS segment register.
799  // load fs:0 -> FS segment register.
800  //
801  // This optimization is valid because the GNU TLS model defines that
802  // gs:0 (or fs:0 on X86-64) contains its own address.
803  // For more information see http://people.redhat.com/drepper/tls.pdf
804  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Address))
805  if (C->getSExtValue() == 0 && AM.Segment.getNode() == nullptr &&
806  (Subtarget->isTargetGlibc() || Subtarget->isTargetAndroid() ||
807  Subtarget->isTargetFuchsia()))
808  switch (N->getPointerInfo().getAddrSpace()) {
809  case 256:
810  AM.Segment = CurDAG->getRegister(X86::GS, MVT::i16);
811  return false;
812  case 257:
813  AM.Segment = CurDAG->getRegister(X86::FS, MVT::i16);
814  return false;
815  // Address space 258 is not handled here, because it is not used to
816  // address TLS areas.
817  }
818 
819  return true;
820 }
821 
822 /// Try to match X86ISD::Wrapper and X86ISD::WrapperRIP nodes into an addressing
823 /// mode. These wrap things that will resolve down into a symbol reference.
824 /// If no match is possible, this returns true, otherwise it returns false.
825 bool X86DAGToDAGISel::matchWrapper(SDValue N, X86ISelAddressMode &AM) {
826  // If the addressing mode already has a symbol as the displacement, we can
827  // never match another symbol.
828  if (AM.hasSymbolicDisplacement())
829  return true;
830 
831  SDValue N0 = N.getOperand(0);
832  CodeModel::Model M = TM.getCodeModel();
833 
834  // Handle X86-64 rip-relative addresses. We check this before checking direct
835  // folding because RIP is preferable to non-RIP accesses.
836  if (Subtarget->is64Bit() && N.getOpcode() == X86ISD::WrapperRIP &&
837  // Under X86-64 non-small code model, GV (and friends) are 64-bits, so
838  // they cannot be folded into immediate fields.
839  // FIXME: This can be improved for kernel and other models?
840  (M == CodeModel::Small || M == CodeModel::Kernel)) {
841  // Base and index reg must be 0 in order to use %rip as base.
842  if (AM.hasBaseOrIndexReg())
843  return true;
844  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(N0)) {
845  X86ISelAddressMode Backup = AM;
846  AM.GV = G->getGlobal();
847  AM.SymbolFlags = G->getTargetFlags();
848  if (foldOffsetIntoAddress(G->getOffset(), AM)) {
849  AM = Backup;
850  return true;
851  }
852  } else if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(N0)) {
853  X86ISelAddressMode Backup = AM;
854  AM.CP = CP->getConstVal();
855  AM.Align = CP->getAlignment();
856  AM.SymbolFlags = CP->getTargetFlags();
857  if (foldOffsetIntoAddress(CP->getOffset(), AM)) {
858  AM = Backup;
859  return true;
860  }
861  } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(N0)) {
862  AM.ES = S->getSymbol();
863  AM.SymbolFlags = S->getTargetFlags();
864  } else if (auto *S = dyn_cast<MCSymbolSDNode>(N0)) {
865  AM.MCSym = S->getMCSymbol();
866  } else if (JumpTableSDNode *J = dyn_cast<JumpTableSDNode>(N0)) {
867  AM.JT = J->getIndex();
868  AM.SymbolFlags = J->getTargetFlags();
869  } else if (BlockAddressSDNode *BA = dyn_cast<BlockAddressSDNode>(N0)) {
870  X86ISelAddressMode Backup = AM;
871  AM.BlockAddr = BA->getBlockAddress();
872  AM.SymbolFlags = BA->getTargetFlags();
873  if (foldOffsetIntoAddress(BA->getOffset(), AM)) {
874  AM = Backup;
875  return true;
876  }
877  } else
878  llvm_unreachable("Unhandled symbol reference node.");
879 
880  if (N.getOpcode() == X86ISD::WrapperRIP)
881  AM.setBaseReg(CurDAG->getRegister(X86::RIP, MVT::i64));
882  return false;
883  }
884 
885  // Handle the case when globals fit in our immediate field: This is true for
886  // X86-32 always and X86-64 when in -mcmodel=small mode. In 64-bit
887  // mode, this only applies to a non-RIP-relative computation.
888  if (!Subtarget->is64Bit() ||
889  M == CodeModel::Small || M == CodeModel::Kernel) {
891  "RIP-relative addressing already handled");
892  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(N0)) {
893  AM.GV = G->getGlobal();
894  AM.Disp += G->getOffset();
895  AM.SymbolFlags = G->getTargetFlags();
896  } else if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(N0)) {
897  AM.CP = CP->getConstVal();
898  AM.Align = CP->getAlignment();
899  AM.Disp += CP->getOffset();
900  AM.SymbolFlags = CP->getTargetFlags();
901  } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(N0)) {
902  AM.ES = S->getSymbol();
903  AM.SymbolFlags = S->getTargetFlags();
904  } else if (auto *S = dyn_cast<MCSymbolSDNode>(N0)) {
905  AM.MCSym = S->getMCSymbol();
906  } else if (JumpTableSDNode *J = dyn_cast<JumpTableSDNode>(N0)) {
907  AM.JT = J->getIndex();
908  AM.SymbolFlags = J->getTargetFlags();
909  } else if (BlockAddressSDNode *BA = dyn_cast<BlockAddressSDNode>(N0)) {
910  AM.BlockAddr = BA->getBlockAddress();
911  AM.Disp += BA->getOffset();
912  AM.SymbolFlags = BA->getTargetFlags();
913  } else
914  llvm_unreachable("Unhandled symbol reference node.");
915  return false;
916  }
917 
918  return true;
919 }
920 
921 /// Add the specified node to the specified addressing mode, returning true if
922 /// it cannot be done. This just pattern matches for the addressing mode.
923 bool X86DAGToDAGISel::matchAddress(SDValue N, X86ISelAddressMode &AM) {
924  if (matchAddressRecursively(N, AM, 0))
925  return true;
926 
927  // Post-processing: Convert lea(,%reg,2) to lea(%reg,%reg), which has
928  // a smaller encoding and avoids a scaled-index.
929  if (AM.Scale == 2 &&
930  AM.BaseType == X86ISelAddressMode::RegBase &&
931  AM.Base_Reg.getNode() == nullptr) {
932  AM.Base_Reg = AM.IndexReg;
933  AM.Scale = 1;
934  }
935 
936  // Post-processing: Convert foo to foo(%rip), even in non-PIC mode,
937  // because it has a smaller encoding.
938  // TODO: Which other code models can use this?
939  if (TM.getCodeModel() == CodeModel::Small &&
940  Subtarget->is64Bit() &&
941  AM.Scale == 1 &&
942  AM.BaseType == X86ISelAddressMode::RegBase &&
943  AM.Base_Reg.getNode() == nullptr &&
944  AM.IndexReg.getNode() == nullptr &&
945  AM.SymbolFlags == X86II::MO_NO_FLAG &&
946  AM.hasSymbolicDisplacement())
947  AM.Base_Reg = CurDAG->getRegister(X86::RIP, MVT::i64);
948 
949  return false;
950 }
951 
952 bool X86DAGToDAGISel::matchAdd(SDValue N, X86ISelAddressMode &AM,
953  unsigned Depth) {
954  // Add an artificial use to this node so that we can keep track of
955  // it if it gets CSE'd with a different node.
956  HandleSDNode Handle(N);
957 
958  X86ISelAddressMode Backup = AM;
959  if (!matchAddressRecursively(N.getOperand(0), AM, Depth+1) &&
960  !matchAddressRecursively(Handle.getValue().getOperand(1), AM, Depth+1))
961  return false;
962  AM = Backup;
963 
964  // Try again after commuting the operands.
965  if (!matchAddressRecursively(Handle.getValue().getOperand(1), AM, Depth+1) &&
966  !matchAddressRecursively(Handle.getValue().getOperand(0), AM, Depth+1))
967  return false;
968  AM = Backup;
969 
970  // If we couldn't fold both operands into the address at the same time,
971  // see if we can just put each operand into a register and fold at least
972  // the add.
973  if (AM.BaseType == X86ISelAddressMode::RegBase &&
974  !AM.Base_Reg.getNode() &&
975  !AM.IndexReg.getNode()) {
976  N = Handle.getValue();
977  AM.Base_Reg = N.getOperand(0);
978  AM.IndexReg = N.getOperand(1);
979  AM.Scale = 1;
980  return false;
981  }
982  N = Handle.getValue();
983  return true;
984 }
985 
986 // Insert a node into the DAG at least before the Pos node's position. This
987 // will reposition the node as needed, and will assign it a node ID that is <=
988 // the Pos node's ID. Note that this does *not* preserve the uniqueness of node
989 // IDs! The selection DAG must no longer depend on their uniqueness when this
990 // is used.
991 static void insertDAGNode(SelectionDAG &DAG, SDValue Pos, SDValue N) {
992  if (N.getNode()->getNodeId() == -1 ||
993  N.getNode()->getNodeId() > Pos.getNode()->getNodeId()) {
994  DAG.RepositionNode(Pos.getNode()->getIterator(), N.getNode());
995  N.getNode()->setNodeId(Pos.getNode()->getNodeId());
996  }
997 }
998 
999 // Transform "(X >> (8-C1)) & (0xff << C1)" to "((X >> 8) & 0xff) << C1" if
1000 // safe. This allows us to convert the shift and and into an h-register
1001 // extract and a scaled index. Returns false if the simplification is
1002 // performed.
1004  uint64_t Mask,
1005  SDValue Shift, SDValue X,
1006  X86ISelAddressMode &AM) {
1007  if (Shift.getOpcode() != ISD::SRL ||
1008  !isa<ConstantSDNode>(Shift.getOperand(1)) ||
1009  !Shift.hasOneUse())
1010  return true;
1011 
1012  int ScaleLog = 8 - Shift.getConstantOperandVal(1);
1013  if (ScaleLog <= 0 || ScaleLog >= 4 ||
1014  Mask != (0xffu << ScaleLog))
1015  return true;
1016 
1017  MVT VT = N.getSimpleValueType();
1018  SDLoc DL(N);
1019  SDValue Eight = DAG.getConstant(8, DL, MVT::i8);
1020  SDValue NewMask = DAG.getConstant(0xff, DL, VT);
1021  SDValue Srl = DAG.getNode(ISD::SRL, DL, VT, X, Eight);
1022  SDValue And = DAG.getNode(ISD::AND, DL, VT, Srl, NewMask);
1023  SDValue ShlCount = DAG.getConstant(ScaleLog, DL, MVT::i8);
1024  SDValue Shl = DAG.getNode(ISD::SHL, DL, VT, And, ShlCount);
1025 
1026  // Insert the new nodes into the topological ordering. We must do this in
1027  // a valid topological ordering as nothing is going to go back and re-sort
1028  // these nodes. We continually insert before 'N' in sequence as this is
1029  // essentially a pre-flattened and pre-sorted sequence of nodes. There is no
1030  // hierarchy left to express.
1031  insertDAGNode(DAG, N, Eight);
1032  insertDAGNode(DAG, N, Srl);
1033  insertDAGNode(DAG, N, NewMask);
1034  insertDAGNode(DAG, N, And);
1035  insertDAGNode(DAG, N, ShlCount);
1036  insertDAGNode(DAG, N, Shl);
1037  DAG.ReplaceAllUsesWith(N, Shl);
1038  AM.IndexReg = And;
1039  AM.Scale = (1 << ScaleLog);
1040  return false;
1041 }
1042 
1043 // Transforms "(X << C1) & C2" to "(X & (C2>>C1)) << C1" if safe and if this
1044 // allows us to fold the shift into this addressing mode. Returns false if the
1045 // transform succeeded.
1047  uint64_t Mask,
1048  SDValue Shift, SDValue X,
1049  X86ISelAddressMode &AM) {
1050  if (Shift.getOpcode() != ISD::SHL ||
1051  !isa<ConstantSDNode>(Shift.getOperand(1)))
1052  return true;
1053 
1054  // Not likely to be profitable if either the AND or SHIFT node has more
1055  // than one use (unless all uses are for address computation). Besides,
1056  // isel mechanism requires their node ids to be reused.
1057  if (!N.hasOneUse() || !Shift.hasOneUse())
1058  return true;
1059 
1060  // Verify that the shift amount is something we can fold.
1061  unsigned ShiftAmt = Shift.getConstantOperandVal(1);
1062  if (ShiftAmt != 1 && ShiftAmt != 2 && ShiftAmt != 3)
1063  return true;
1064 
1065  MVT VT = N.getSimpleValueType();
1066  SDLoc DL(N);
1067  SDValue NewMask = DAG.getConstant(Mask >> ShiftAmt, DL, VT);
1068  SDValue NewAnd = DAG.getNode(ISD::AND, DL, VT, X, NewMask);
1069  SDValue NewShift = DAG.getNode(ISD::SHL, DL, VT, NewAnd, Shift.getOperand(1));
1070 
1071  // Insert the new nodes into the topological ordering. We must do this in
1072  // a valid topological ordering as nothing is going to go back and re-sort
1073  // these nodes. We continually insert before 'N' in sequence as this is
1074  // essentially a pre-flattened and pre-sorted sequence of nodes. There is no
1075  // hierarchy left to express.
1076  insertDAGNode(DAG, N, NewMask);
1077  insertDAGNode(DAG, N, NewAnd);
1078  insertDAGNode(DAG, N, NewShift);
1079  DAG.ReplaceAllUsesWith(N, NewShift);
1080 
1081  AM.Scale = 1 << ShiftAmt;
1082  AM.IndexReg = NewAnd;
1083  return false;
1084 }
1085 
1086 // Implement some heroics to detect shifts of masked values where the mask can
1087 // be replaced by extending the shift and undoing that in the addressing mode
1088 // scale. Patterns such as (shl (srl x, c1), c2) are canonicalized into (and
1089 // (srl x, SHIFT), MASK) by DAGCombines that don't know the shl can be done in
1090 // the addressing mode. This results in code such as:
1091 //
1092 // int f(short *y, int *lookup_table) {
1093 // ...
1094 // return *y + lookup_table[*y >> 11];
1095 // }
1096 //
1097 // Turning into:
1098 // movzwl (%rdi), %eax
1099 // movl %eax, %ecx
1100 // shrl $11, %ecx
1101 // addl (%rsi,%rcx,4), %eax
1102 //
1103 // Instead of:
1104 // movzwl (%rdi), %eax
1105 // movl %eax, %ecx
1106 // shrl $9, %ecx
1107 // andl $124, %rcx
1108 // addl (%rsi,%rcx), %eax
1109 //
1110 // Note that this function assumes the mask is provided as a mask *after* the
1111 // value is shifted. The input chain may or may not match that, but computing
1112 // such a mask is trivial.
1114  uint64_t Mask,
1115  SDValue Shift, SDValue X,
1116  X86ISelAddressMode &AM) {
1117  if (Shift.getOpcode() != ISD::SRL || !Shift.hasOneUse() ||
1118  !isa<ConstantSDNode>(Shift.getOperand(1)))
1119  return true;
1120 
1121  unsigned ShiftAmt = Shift.getConstantOperandVal(1);
1122  unsigned MaskLZ = countLeadingZeros(Mask);
1123  unsigned MaskTZ = countTrailingZeros(Mask);
1124 
1125  // The amount of shift we're trying to fit into the addressing mode is taken
1126  // from the trailing zeros of the mask.
1127  unsigned AMShiftAmt = MaskTZ;
1128 
1129  // There is nothing we can do here unless the mask is removing some bits.
1130  // Also, the addressing mode can only represent shifts of 1, 2, or 3 bits.
1131  if (AMShiftAmt <= 0 || AMShiftAmt > 3) return true;
1132 
1133  // We also need to ensure that mask is a continuous run of bits.
1134  if (countTrailingOnes(Mask >> MaskTZ) + MaskTZ + MaskLZ != 64) return true;
1135 
1136  // Scale the leading zero count down based on the actual size of the value.
1137  // Also scale it down based on the size of the shift.
1138  unsigned ScaleDown = (64 - X.getSimpleValueType().getSizeInBits()) + ShiftAmt;
1139  if (MaskLZ < ScaleDown)
1140  return true;
1141  MaskLZ -= ScaleDown;
1142 
1143  // The final check is to ensure that any masked out high bits of X are
1144  // already known to be zero. Otherwise, the mask has a semantic impact
1145  // other than masking out a couple of low bits. Unfortunately, because of
1146  // the mask, zero extensions will be removed from operands in some cases.
1147  // This code works extra hard to look through extensions because we can
1148  // replace them with zero extensions cheaply if necessary.
1149  bool ReplacingAnyExtend = false;
1150  if (X.getOpcode() == ISD::ANY_EXTEND) {
1151  unsigned ExtendBits = X.getSimpleValueType().getSizeInBits() -
1153  // Assume that we'll replace the any-extend with a zero-extend, and
1154  // narrow the search to the extended value.
1155  X = X.getOperand(0);
1156  MaskLZ = ExtendBits > MaskLZ ? 0 : MaskLZ - ExtendBits;
1157  ReplacingAnyExtend = true;
1158  }
1159  APInt MaskedHighBits =
1161  KnownBits Known;
1162  DAG.computeKnownBits(X, Known);
1163  if (MaskedHighBits != Known.Zero) return true;
1164 
1165  // We've identified a pattern that can be transformed into a single shift
1166  // and an addressing mode. Make it so.
1167  MVT VT = N.getSimpleValueType();
1168  if (ReplacingAnyExtend) {
1169  assert(X.getValueType() != VT);
1170  // We looked through an ANY_EXTEND node, insert a ZERO_EXTEND.
1171  SDValue NewX = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(X), VT, X);
1172  insertDAGNode(DAG, N, NewX);
1173  X = NewX;
1174  }
1175  SDLoc DL(N);
1176  SDValue NewSRLAmt = DAG.getConstant(ShiftAmt + AMShiftAmt, DL, MVT::i8);
1177  SDValue NewSRL = DAG.getNode(ISD::SRL, DL, VT, X, NewSRLAmt);
1178  SDValue NewSHLAmt = DAG.getConstant(AMShiftAmt, DL, MVT::i8);
1179  SDValue NewSHL = DAG.getNode(ISD::SHL, DL, VT, NewSRL, NewSHLAmt);
1180 
1181  // Insert the new nodes into the topological ordering. We must do this in
1182  // a valid topological ordering as nothing is going to go back and re-sort
1183  // these nodes. We continually insert before 'N' in sequence as this is
1184  // essentially a pre-flattened and pre-sorted sequence of nodes. There is no
1185  // hierarchy left to express.
1186  insertDAGNode(DAG, N, NewSRLAmt);
1187  insertDAGNode(DAG, N, NewSRL);
1188  insertDAGNode(DAG, N, NewSHLAmt);
1189  insertDAGNode(DAG, N, NewSHL);
1190  DAG.ReplaceAllUsesWith(N, NewSHL);
1191 
1192  AM.Scale = 1 << AMShiftAmt;
1193  AM.IndexReg = NewSRL;
1194  return false;
1195 }
1196 
1197 bool X86DAGToDAGISel::matchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
1198  unsigned Depth) {
1199  SDLoc dl(N);
1200  DEBUG({
1201  dbgs() << "MatchAddress: ";
1202  AM.dump();
1203  });
1204  // Limit recursion.
1205  if (Depth > 5)
1206  return matchAddressBase(N, AM);
1207 
1208  // If this is already a %rip relative address, we can only merge immediates
1209  // into it. Instead of handling this in every case, we handle it here.
1210  // RIP relative addressing: %rip + 32-bit displacement!
1211  if (AM.isRIPRelative()) {
1212  // FIXME: JumpTable and ExternalSymbol address currently don't like
1213  // displacements. It isn't very important, but this should be fixed for
1214  // consistency.
1215  if (!(AM.ES || AM.MCSym) && AM.JT != -1)
1216  return true;
1217 
1218  if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(N))
1219  if (!foldOffsetIntoAddress(Cst->getSExtValue(), AM))
1220  return false;
1221  return true;
1222  }
1223 
1224  switch (N.getOpcode()) {
1225  default: break;
1226  case ISD::LOCAL_RECOVER: {
1227  if (!AM.hasSymbolicDisplacement() && AM.Disp == 0)
1228  if (const auto *ESNode = dyn_cast<MCSymbolSDNode>(N.getOperand(0))) {
1229  // Use the symbol and don't prefix it.
1230  AM.MCSym = ESNode->getMCSymbol();
1231  return false;
1232  }
1233  break;
1234  }
1235  case ISD::Constant: {
1236  uint64_t Val = cast<ConstantSDNode>(N)->getSExtValue();
1237  if (!foldOffsetIntoAddress(Val, AM))
1238  return false;
1239  break;
1240  }
1241 
1242  case X86ISD::Wrapper:
1243  case X86ISD::WrapperRIP:
1244  if (!matchWrapper(N, AM))
1245  return false;
1246  break;
1247 
1248  case ISD::LOAD:
1249  if (!matchLoadInAddress(cast<LoadSDNode>(N), AM))
1250  return false;
1251  break;
1252 
1253  case ISD::FrameIndex:
1254  if (AM.BaseType == X86ISelAddressMode::RegBase &&
1255  AM.Base_Reg.getNode() == nullptr &&
1256  (!Subtarget->is64Bit() || isDispSafeForFrameIndex(AM.Disp))) {
1257  AM.BaseType = X86ISelAddressMode::FrameIndexBase;
1258  AM.Base_FrameIndex = cast<FrameIndexSDNode>(N)->getIndex();
1259  return false;
1260  }
1261  break;
1262 
1263  case ISD::SHL:
1264  if (AM.IndexReg.getNode() != nullptr || AM.Scale != 1)
1265  break;
1266 
1267  if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1268  unsigned Val = CN->getZExtValue();
1269  // Note that we handle x<<1 as (,x,2) rather than (x,x) here so
1270  // that the base operand remains free for further matching. If
1271  // the base doesn't end up getting used, a post-processing step
1272  // in MatchAddress turns (,x,2) into (x,x), which is cheaper.
1273  if (Val == 1 || Val == 2 || Val == 3) {
1274  AM.Scale = 1 << Val;
1275  SDValue ShVal = N.getOperand(0);
1276 
1277  // Okay, we know that we have a scale by now. However, if the scaled
1278  // value is an add of something and a constant, we can fold the
1279  // constant into the disp field here.
1280  if (CurDAG->isBaseWithConstantOffset(ShVal)) {
1281  AM.IndexReg = ShVal.getOperand(0);
1282  ConstantSDNode *AddVal = cast<ConstantSDNode>(ShVal.getOperand(1));
1283  uint64_t Disp = (uint64_t)AddVal->getSExtValue() << Val;
1284  if (!foldOffsetIntoAddress(Disp, AM))
1285  return false;
1286  }
1287 
1288  AM.IndexReg = ShVal;
1289  return false;
1290  }
1291  }
1292  break;
1293 
1294  case ISD::SRL: {
1295  // Scale must not be used already.
1296  if (AM.IndexReg.getNode() != nullptr || AM.Scale != 1) break;
1297 
1298  SDValue And = N.getOperand(0);
1299  if (And.getOpcode() != ISD::AND) break;
1300  SDValue X = And.getOperand(0);
1301 
1302  // We only handle up to 64-bit values here as those are what matter for
1303  // addressing mode optimizations.
1304  if (X.getSimpleValueType().getSizeInBits() > 64) break;
1305 
1306  // The mask used for the transform is expected to be post-shift, but we
1307  // found the shift first so just apply the shift to the mask before passing
1308  // it down.
1309  if (!isa<ConstantSDNode>(N.getOperand(1)) ||
1310  !isa<ConstantSDNode>(And.getOperand(1)))
1311  break;
1312  uint64_t Mask = And.getConstantOperandVal(1) >> N.getConstantOperandVal(1);
1313 
1314  // Try to fold the mask and shift into the scale, and return false if we
1315  // succeed.
1316  if (!foldMaskAndShiftToScale(*CurDAG, N, Mask, N, X, AM))
1317  return false;
1318  break;
1319  }
1320 
1321  case ISD::SMUL_LOHI:
1322  case ISD::UMUL_LOHI:
1323  // A mul_lohi where we need the low part can be folded as a plain multiply.
1324  if (N.getResNo() != 0) break;
1326  case ISD::MUL:
1327  case X86ISD::MUL_IMM:
1328  // X*[3,5,9] -> X+X*[2,4,8]
1329  if (AM.BaseType == X86ISelAddressMode::RegBase &&
1330  AM.Base_Reg.getNode() == nullptr &&
1331  AM.IndexReg.getNode() == nullptr) {
1332  if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N.getOperand(1)))
1333  if (CN->getZExtValue() == 3 || CN->getZExtValue() == 5 ||
1334  CN->getZExtValue() == 9) {
1335  AM.Scale = unsigned(CN->getZExtValue())-1;
1336 
1337  SDValue MulVal = N.getOperand(0);
1338  SDValue Reg;
1339 
1340  // Okay, we know that we have a scale by now. However, if the scaled
1341  // value is an add of something and a constant, we can fold the
1342  // constant into the disp field here.
1343  if (MulVal.getNode()->getOpcode() == ISD::ADD && MulVal.hasOneUse() &&
1344  isa<ConstantSDNode>(MulVal.getOperand(1))) {
1345  Reg = MulVal.getOperand(0);
1346  ConstantSDNode *AddVal =
1347  cast<ConstantSDNode>(MulVal.getOperand(1));
1348  uint64_t Disp = AddVal->getSExtValue() * CN->getZExtValue();
1349  if (foldOffsetIntoAddress(Disp, AM))
1350  Reg = N.getOperand(0);
1351  } else {
1352  Reg = N.getOperand(0);
1353  }
1354 
1355  AM.IndexReg = AM.Base_Reg = Reg;
1356  return false;
1357  }
1358  }
1359  break;
1360 
1361  case ISD::SUB: {
1362  // Given A-B, if A can be completely folded into the address and
1363  // the index field with the index field unused, use -B as the index.
1364  // This is a win if a has multiple parts that can be folded into
1365  // the address. Also, this saves a mov if the base register has
1366  // other uses, since it avoids a two-address sub instruction, however
1367  // it costs an additional mov if the index register has other uses.
1368 
1369  // Add an artificial use to this node so that we can keep track of
1370  // it if it gets CSE'd with a different node.
1371  HandleSDNode Handle(N);
1372 
1373  // Test if the LHS of the sub can be folded.
1374  X86ISelAddressMode Backup = AM;
1375  if (matchAddressRecursively(N.getOperand(0), AM, Depth+1)) {
1376  AM = Backup;
1377  break;
1378  }
1379  // Test if the index field is free for use.
1380  if (AM.IndexReg.getNode() || AM.isRIPRelative()) {
1381  AM = Backup;
1382  break;
1383  }
1384 
1385  int Cost = 0;
1386  SDValue RHS = Handle.getValue().getOperand(1);
1387  // If the RHS involves a register with multiple uses, this
1388  // transformation incurs an extra mov, due to the neg instruction
1389  // clobbering its operand.
1390  if (!RHS.getNode()->hasOneUse() ||
1391  RHS.getNode()->getOpcode() == ISD::CopyFromReg ||
1392  RHS.getNode()->getOpcode() == ISD::TRUNCATE ||
1393  RHS.getNode()->getOpcode() == ISD::ANY_EXTEND ||
1394  (RHS.getNode()->getOpcode() == ISD::ZERO_EXTEND &&
1395  RHS.getOperand(0).getValueType() == MVT::i32))
1396  ++Cost;
1397  // If the base is a register with multiple uses, this
1398  // transformation may save a mov.
1399  // FIXME: Don't rely on DELETED_NODEs.
1400  if ((AM.BaseType == X86ISelAddressMode::RegBase && AM.Base_Reg.getNode() &&
1401  AM.Base_Reg->getOpcode() != ISD::DELETED_NODE &&
1402  !AM.Base_Reg.getNode()->hasOneUse()) ||
1403  AM.BaseType == X86ISelAddressMode::FrameIndexBase)
1404  --Cost;
1405  // If the folded LHS was interesting, this transformation saves
1406  // address arithmetic.
1407  if ((AM.hasSymbolicDisplacement() && !Backup.hasSymbolicDisplacement()) +
1408  ((AM.Disp != 0) && (Backup.Disp == 0)) +
1409  (AM.Segment.getNode() && !Backup.Segment.getNode()) >= 2)
1410  --Cost;
1411  // If it doesn't look like it may be an overall win, don't do it.
1412  if (Cost >= 0) {
1413  AM = Backup;
1414  break;
1415  }
1416 
1417  // Ok, the transformation is legal and appears profitable. Go for it.
1418  SDValue Zero = CurDAG->getConstant(0, dl, N.getValueType());
1419  SDValue Neg = CurDAG->getNode(ISD::SUB, dl, N.getValueType(), Zero, RHS);
1420  AM.IndexReg = Neg;
1421  AM.Scale = 1;
1422 
1423  // Insert the new nodes into the topological ordering.
1424  insertDAGNode(*CurDAG, Handle.getValue(), Zero);
1425  insertDAGNode(*CurDAG, Handle.getValue(), Neg);
1426  return false;
1427  }
1428 
1429  case ISD::ADD:
1430  if (!matchAdd(N, AM, Depth))
1431  return false;
1432  break;
1433 
1434  case ISD::OR:
1435  // We want to look through a transform in InstCombine and DAGCombiner that
1436  // turns 'add' into 'or', so we can treat this 'or' exactly like an 'add'.
1437  // Example: (or (and x, 1), (shl y, 3)) --> (add (and x, 1), (shl y, 3))
1438  // An 'lea' can then be used to match the shift (multiply) and add:
1439  // and $1, %esi
1440  // lea (%rsi, %rdi, 8), %rax
1441  if (CurDAG->haveNoCommonBitsSet(N.getOperand(0), N.getOperand(1)) &&
1442  !matchAdd(N, AM, Depth))
1443  return false;
1444  break;
1445 
1446  case ISD::AND: {
1447  // Perform some heroic transforms on an and of a constant-count shift
1448  // with a constant to enable use of the scaled offset field.
1449 
1450  // Scale must not be used already.
1451  if (AM.IndexReg.getNode() != nullptr || AM.Scale != 1) break;
1452 
1453  SDValue Shift = N.getOperand(0);
1454  if (Shift.getOpcode() != ISD::SRL && Shift.getOpcode() != ISD::SHL) break;
1455  SDValue X = Shift.getOperand(0);
1456 
1457  // We only handle up to 64-bit values here as those are what matter for
1458  // addressing mode optimizations.
1459  if (X.getSimpleValueType().getSizeInBits() > 64) break;
1460 
1461  if (!isa<ConstantSDNode>(N.getOperand(1)))
1462  break;
1463  uint64_t Mask = N.getConstantOperandVal(1);
1464 
1465  // Try to fold the mask and shift into an extract and scale.
1466  if (!foldMaskAndShiftToExtract(*CurDAG, N, Mask, Shift, X, AM))
1467  return false;
1468 
1469  // Try to fold the mask and shift directly into the scale.
1470  if (!foldMaskAndShiftToScale(*CurDAG, N, Mask, Shift, X, AM))
1471  return false;
1472 
1473  // Try to swap the mask and shift to place shifts which can be done as
1474  // a scale on the outside of the mask.
1475  if (!foldMaskedShiftToScaledMask(*CurDAG, N, Mask, Shift, X, AM))
1476  return false;
1477  break;
1478  }
1479  }
1480 
1481  return matchAddressBase(N, AM);
1482 }
1483 
1484 /// Helper for MatchAddress. Add the specified node to the
1485 /// specified addressing mode without any further recursion.
1486 bool X86DAGToDAGISel::matchAddressBase(SDValue N, X86ISelAddressMode &AM) {
1487  // Is the base register already occupied?
1488  if (AM.BaseType != X86ISelAddressMode::RegBase || AM.Base_Reg.getNode()) {
1489  // If so, check to see if the scale index register is set.
1490  if (!AM.IndexReg.getNode()) {
1491  AM.IndexReg = N;
1492  AM.Scale = 1;
1493  return false;
1494  }
1495 
1496  // Otherwise, we cannot select it.
1497  return true;
1498  }
1499 
1500  // Default, generate it as a register.
1501  AM.BaseType = X86ISelAddressMode::RegBase;
1502  AM.Base_Reg = N;
1503  return false;
1504 }
1505 
1506 /// Helper for selectVectorAddr. Handles things that can be folded into a
1507 /// gather scatter address. The index register and scale should have already
1508 /// been handled.
1509 bool X86DAGToDAGISel::matchVectorAddress(SDValue N, X86ISelAddressMode &AM) {
1510  // TODO: Support other operations.
1511  switch (N.getOpcode()) {
1512  case X86ISD::Wrapper:
1513  if (!matchWrapper(N, AM))
1514  return false;
1515  break;
1516  }
1517 
1518  return matchAddressBase(N, AM);
1519 }
1520 
1521 bool X86DAGToDAGISel::selectVectorAddr(SDNode *Parent, SDValue N, SDValue &Base,
1522  SDValue &Scale, SDValue &Index,
1523  SDValue &Disp, SDValue &Segment) {
1524  X86ISelAddressMode AM;
1525  if (auto Mgs = dyn_cast<MaskedGatherScatterSDNode>(Parent)) {
1526  AM.IndexReg = Mgs->getIndex();
1527  AM.Scale = Mgs->getValue().getScalarValueSizeInBits() / 8;
1528  } else {
1529  auto X86Gather = cast<X86MaskedGatherSDNode>(Parent);
1530  AM.IndexReg = X86Gather->getIndex();
1531  AM.Scale = X86Gather->getValue().getScalarValueSizeInBits() / 8;
1532  }
1533 
1534  unsigned AddrSpace = cast<MemSDNode>(Parent)->getPointerInfo().getAddrSpace();
1535  // AddrSpace 256 -> GS, 257 -> FS, 258 -> SS.
1536  if (AddrSpace == 256)
1537  AM.Segment = CurDAG->getRegister(X86::GS, MVT::i16);
1538  if (AddrSpace == 257)
1539  AM.Segment = CurDAG->getRegister(X86::FS, MVT::i16);
1540  if (AddrSpace == 258)
1541  AM.Segment = CurDAG->getRegister(X86::SS, MVT::i16);
1542 
1543  // If Base is 0, the whole address is in index and the Scale is 1
1544  if (isa<ConstantSDNode>(N)) {
1545  assert(cast<ConstantSDNode>(N)->isNullValue() &&
1546  "Unexpected base in gather/scatter");
1547  AM.Scale = 1;
1548  }
1549  // Otherwise, try to match into the base and displacement fields.
1550  else if (matchVectorAddress(N, AM))
1551  return false;
1552 
1553  MVT VT = N.getSimpleValueType();
1554  if (AM.BaseType == X86ISelAddressMode::RegBase) {
1555  if (!AM.Base_Reg.getNode())
1556  AM.Base_Reg = CurDAG->getRegister(0, VT);
1557  }
1558 
1559  getAddressOperands(AM, SDLoc(N), Base, Scale, Index, Disp, Segment);
1560  return true;
1561 }
1562 
1563 /// Returns true if it is able to pattern match an addressing mode.
1564 /// It returns the operands which make up the maximal addressing mode it can
1565 /// match by reference.
1566 ///
1567 /// Parent is the parent node of the addr operand that is being matched. It
1568 /// is always a load, store, atomic node, or null. It is only null when
1569 /// checking memory operands for inline asm nodes.
1570 bool X86DAGToDAGISel::selectAddr(SDNode *Parent, SDValue N, SDValue &Base,
1571  SDValue &Scale, SDValue &Index,
1572  SDValue &Disp, SDValue &Segment) {
1573  X86ISelAddressMode AM;
1574 
1575  if (Parent &&
1576  // This list of opcodes are all the nodes that have an "addr:$ptr" operand
1577  // that are not a MemSDNode, and thus don't have proper addrspace info.
1578  Parent->getOpcode() != ISD::INTRINSIC_W_CHAIN && // unaligned loads, fixme
1579  Parent->getOpcode() != ISD::INTRINSIC_VOID && // nontemporal stores
1580  Parent->getOpcode() != X86ISD::TLSCALL && // Fixme
1581  Parent->getOpcode() != X86ISD::EH_SJLJ_SETJMP && // setjmp
1582  Parent->getOpcode() != X86ISD::EH_SJLJ_LONGJMP) { // longjmp
1583  unsigned AddrSpace =
1584  cast<MemSDNode>(Parent)->getPointerInfo().getAddrSpace();
1585  // AddrSpace 256 -> GS, 257 -> FS, 258 -> SS.
1586  if (AddrSpace == 256)
1587  AM.Segment = CurDAG->getRegister(X86::GS, MVT::i16);
1588  if (AddrSpace == 257)
1589  AM.Segment = CurDAG->getRegister(X86::FS, MVT::i16);
1590  if (AddrSpace == 258)
1591  AM.Segment = CurDAG->getRegister(X86::SS, MVT::i16);
1592  }
1593 
1594  if (matchAddress(N, AM))
1595  return false;
1596 
1597  MVT VT = N.getSimpleValueType();
1598  if (AM.BaseType == X86ISelAddressMode::RegBase) {
1599  if (!AM.Base_Reg.getNode())
1600  AM.Base_Reg = CurDAG->getRegister(0, VT);
1601  }
1602 
1603  if (!AM.IndexReg.getNode())
1604  AM.IndexReg = CurDAG->getRegister(0, VT);
1605 
1606  getAddressOperands(AM, SDLoc(N), Base, Scale, Index, Disp, Segment);
1607  return true;
1608 }
1609 
1610 // We can only fold a load if all nodes between it and the root node have a
1611 // single use. If there are additional uses, we could end up duplicating the
1612 // load.
1613 static bool hasSingleUsesFromRoot(SDNode *Root, SDNode *N) {
1614  SDNode *User = *N->use_begin();
1615  while (User != Root) {
1616  if (!User->hasOneUse())
1617  return false;
1618  User = *User->use_begin();
1619  }
1620 
1621  return true;
1622 }
1623 
1624 /// Match a scalar SSE load. In particular, we want to match a load whose top
1625 /// elements are either undef or zeros. The load flavor is derived from the
1626 /// type of N, which is either v4f32 or v2f64.
1627 ///
1628 /// We also return:
1629 /// PatternChainNode: this is the matched node that has a chain input and
1630 /// output.
1631 bool X86DAGToDAGISel::selectScalarSSELoad(SDNode *Root,
1632  SDValue N, SDValue &Base,
1633  SDValue &Scale, SDValue &Index,
1634  SDValue &Disp, SDValue &Segment,
1635  SDValue &PatternNodeWithChain) {
1636  // We can allow a full vector load here since narrowing a load is ok.
1637  if (ISD::isNON_EXTLoad(N.getNode())) {
1638  PatternNodeWithChain = N;
1639  if (IsProfitableToFold(PatternNodeWithChain, N.getNode(), Root) &&
1640  IsLegalToFold(PatternNodeWithChain, *N->use_begin(), Root, OptLevel) &&
1641  hasSingleUsesFromRoot(Root, N.getNode())) {
1642  LoadSDNode *LD = cast<LoadSDNode>(PatternNodeWithChain);
1643  return selectAddr(LD, LD->getBasePtr(), Base, Scale, Index, Disp,
1644  Segment);
1645  }
1646  }
1647 
1648  // We can also match the special zero extended load opcode.
1649  if (N.getOpcode() == X86ISD::VZEXT_LOAD) {
1650  PatternNodeWithChain = N;
1651  if (IsProfitableToFold(PatternNodeWithChain, N.getNode(), Root) &&
1652  IsLegalToFold(PatternNodeWithChain, *N->use_begin(), Root, OptLevel) &&
1653  hasSingleUsesFromRoot(Root, N.getNode())) {
1654  auto *MI = cast<MemIntrinsicSDNode>(PatternNodeWithChain);
1655  return selectAddr(MI, MI->getBasePtr(), Base, Scale, Index, Disp,
1656  Segment);
1657  }
1658  }
1659 
1660  // Need to make sure that the SCALAR_TO_VECTOR and load are both only used
1661  // once. Otherwise the load might get duplicated and the chain output of the
1662  // duplicate load will not be observed by all dependencies.
1663  if (N.getOpcode() == ISD::SCALAR_TO_VECTOR && N.getNode()->hasOneUse()) {
1664  PatternNodeWithChain = N.getOperand(0);
1665  if (ISD::isNON_EXTLoad(PatternNodeWithChain.getNode()) &&
1666  IsProfitableToFold(PatternNodeWithChain, N.getNode(), Root) &&
1667  IsLegalToFold(PatternNodeWithChain, N.getNode(), Root, OptLevel) &&
1668  hasSingleUsesFromRoot(Root, N.getNode())) {
1669  LoadSDNode *LD = cast<LoadSDNode>(PatternNodeWithChain);
1670  return selectAddr(LD, LD->getBasePtr(), Base, Scale, Index, Disp,
1671  Segment);
1672  }
1673  }
1674 
1675  // Also handle the case where we explicitly require zeros in the top
1676  // elements. This is a vector shuffle from the zero vector.
1677  if (N.getOpcode() == X86ISD::VZEXT_MOVL && N.getNode()->hasOneUse() &&
1678  // Check to see if the top elements are all zeros (or bitcast of zeros).
1680  N.getOperand(0).getNode()->hasOneUse()) {
1681  PatternNodeWithChain = N.getOperand(0).getOperand(0);
1682  if (ISD::isNON_EXTLoad(PatternNodeWithChain.getNode()) &&
1683  IsProfitableToFold(PatternNodeWithChain, N.getNode(), Root) &&
1684  IsLegalToFold(PatternNodeWithChain, N.getNode(), Root, OptLevel) &&
1685  hasSingleUsesFromRoot(Root, N.getNode())) {
1686  // Okay, this is a zero extending load. Fold it.
1687  LoadSDNode *LD = cast<LoadSDNode>(PatternNodeWithChain);
1688  return selectAddr(LD, LD->getBasePtr(), Base, Scale, Index, Disp,
1689  Segment);
1690  }
1691  }
1692 
1693  return false;
1694 }
1695 
1696 
1697 bool X86DAGToDAGISel::selectMOV64Imm32(SDValue N, SDValue &Imm) {
1698  if (const ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) {
1699  uint64_t ImmVal = CN->getZExtValue();
1700  if (!isUInt<32>(ImmVal))
1701  return false;
1702 
1703  Imm = CurDAG->getTargetConstant(ImmVal, SDLoc(N), MVT::i64);
1704  return true;
1705  }
1706 
1707  // In static codegen with small code model, we can get the address of a label
1708  // into a register with 'movl'. TableGen has already made sure we're looking
1709  // at a label of some kind.
1710  assert(N->getOpcode() == X86ISD::Wrapper &&
1711  "Unexpected node type for MOV32ri64");
1712  N = N.getOperand(0);
1713 
1714  // At least GNU as does not accept 'movl' for TPOFF relocations.
1715  // FIXME: We could use 'movl' when we know we are targeting MC.
1716  if (N->getOpcode() == ISD::TargetGlobalTLSAddress)
1717  return false;
1718 
1719  Imm = N;
1720  if (N->getOpcode() != ISD::TargetGlobalAddress)
1721  return TM.getCodeModel() == CodeModel::Small;
1722 
1724  cast<GlobalAddressSDNode>(N)->getGlobal()->getAbsoluteSymbolRange();
1725  if (!CR)
1726  return TM.getCodeModel() == CodeModel::Small;
1727 
1728  return CR->getUnsignedMax().ult(1ull << 32);
1729 }
1730 
1731 bool X86DAGToDAGISel::selectLEA64_32Addr(SDValue N, SDValue &Base,
1732  SDValue &Scale, SDValue &Index,
1733  SDValue &Disp, SDValue &Segment) {
1734  // Save the debug loc before calling selectLEAAddr, in case it invalidates N.
1735  SDLoc DL(N);
1736 
1737  if (!selectLEAAddr(N, Base, Scale, Index, Disp, Segment))
1738  return false;
1739 
1741  if (RN && RN->getReg() == 0)
1742  Base = CurDAG->getRegister(0, MVT::i64);
1743  else if (Base.getValueType() == MVT::i32 && !dyn_cast<FrameIndexSDNode>(Base)) {
1744  // Base could already be %rip, particularly in the x32 ABI.
1745  Base = SDValue(CurDAG->getMachineNode(
1746  TargetOpcode::SUBREG_TO_REG, DL, MVT::i64,
1747  CurDAG->getTargetConstant(0, DL, MVT::i64),
1748  Base,
1749  CurDAG->getTargetConstant(X86::sub_32bit, DL, MVT::i32)),
1750  0);
1751  }
1752 
1753  RN = dyn_cast<RegisterSDNode>(Index);
1754  if (RN && RN->getReg() == 0)
1755  Index = CurDAG->getRegister(0, MVT::i64);
1756  else {
1757  assert(Index.getValueType() == MVT::i32 &&
1758  "Expect to be extending 32-bit registers for use in LEA");
1759  Index = SDValue(CurDAG->getMachineNode(
1760  TargetOpcode::SUBREG_TO_REG, DL, MVT::i64,
1761  CurDAG->getTargetConstant(0, DL, MVT::i64),
1762  Index,
1763  CurDAG->getTargetConstant(X86::sub_32bit, DL,
1764  MVT::i32)),
1765  0);
1766  }
1767 
1768  return true;
1769 }
1770 
1771 /// Calls SelectAddr and determines if the maximal addressing
1772 /// mode it matches can be cost effectively emitted as an LEA instruction.
1773 bool X86DAGToDAGISel::selectLEAAddr(SDValue N,
1774  SDValue &Base, SDValue &Scale,
1775  SDValue &Index, SDValue &Disp,
1776  SDValue &Segment) {
1777  X86ISelAddressMode AM;
1778 
1779  // Save the DL and VT before calling matchAddress, it can invalidate N.
1780  SDLoc DL(N);
1781  MVT VT = N.getSimpleValueType();
1782 
1783  // Set AM.Segment to prevent MatchAddress from using one. LEA doesn't support
1784  // segments.
1785  SDValue Copy = AM.Segment;
1786  SDValue T = CurDAG->getRegister(0, MVT::i32);
1787  AM.Segment = T;
1788  if (matchAddress(N, AM))
1789  return false;
1790  assert (T == AM.Segment);
1791  AM.Segment = Copy;
1792 
1793  unsigned Complexity = 0;
1794  if (AM.BaseType == X86ISelAddressMode::RegBase)
1795  if (AM.Base_Reg.getNode())
1796  Complexity = 1;
1797  else
1798  AM.Base_Reg = CurDAG->getRegister(0, VT);
1799  else if (AM.BaseType == X86ISelAddressMode::FrameIndexBase)
1800  Complexity = 4;
1801 
1802  if (AM.IndexReg.getNode())
1803  Complexity++;
1804  else
1805  AM.IndexReg = CurDAG->getRegister(0, VT);
1806 
1807  // Don't match just leal(,%reg,2). It's cheaper to do addl %reg, %reg, or with
1808  // a simple shift.
1809  if (AM.Scale > 1)
1810  Complexity++;
1811 
1812  // FIXME: We are artificially lowering the criteria to turn ADD %reg, $GA
1813  // to a LEA. This is determined with some experimentation but is by no means
1814  // optimal (especially for code size consideration). LEA is nice because of
1815  // its three-address nature. Tweak the cost function again when we can run
1816  // convertToThreeAddress() at register allocation time.
1817  if (AM.hasSymbolicDisplacement()) {
1818  // For X86-64, always use LEA to materialize RIP-relative addresses.
1819  if (Subtarget->is64Bit())
1820  Complexity = 4;
1821  else
1822  Complexity += 2;
1823  }
1824 
1825  if (AM.Disp && (AM.Base_Reg.getNode() || AM.IndexReg.getNode()))
1826  Complexity++;
1827 
1828  // If it isn't worth using an LEA, reject it.
1829  if (Complexity <= 2)
1830  return false;
1831 
1832  getAddressOperands(AM, DL, Base, Scale, Index, Disp, Segment);
1833  return true;
1834 }
1835 
1836 /// This is only run on TargetGlobalTLSAddress nodes.
1837 bool X86DAGToDAGISel::selectTLSADDRAddr(SDValue N, SDValue &Base,
1838  SDValue &Scale, SDValue &Index,
1839  SDValue &Disp, SDValue &Segment) {
1841  const GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(N);
1842 
1843  X86ISelAddressMode AM;
1844  AM.GV = GA->getGlobal();
1845  AM.Disp += GA->getOffset();
1846  AM.Base_Reg = CurDAG->getRegister(0, N.getValueType());
1847  AM.SymbolFlags = GA->getTargetFlags();
1848 
1849  if (N.getValueType() == MVT::i32) {
1850  AM.Scale = 1;
1851  AM.IndexReg = CurDAG->getRegister(X86::EBX, MVT::i32);
1852  } else {
1853  AM.IndexReg = CurDAG->getRegister(0, MVT::i64);
1854  }
1855 
1856  getAddressOperands(AM, SDLoc(N), Base, Scale, Index, Disp, Segment);
1857  return true;
1858 }
1859 
1860 bool X86DAGToDAGISel::selectRelocImm(SDValue N, SDValue &Op) {
1861  if (auto *CN = dyn_cast<ConstantSDNode>(N)) {
1862  Op = CurDAG->getTargetConstant(CN->getAPIntValue(), SDLoc(CN),
1863  N.getValueType());
1864  return true;
1865  }
1866 
1867  // Keep track of the original value type and whether this value was
1868  // truncated. If we see a truncation from pointer type to VT that truncates
1869  // bits that are known to be zero, we can use a narrow reference.
1870  EVT VT = N.getValueType();
1871  bool WasTruncated = false;
1872  if (N.getOpcode() == ISD::TRUNCATE) {
1873  WasTruncated = true;
1874  N = N.getOperand(0);
1875  }
1876 
1877  if (N.getOpcode() != X86ISD::Wrapper)
1878  return false;
1879 
1880  // We can only use non-GlobalValues as immediates if they were not truncated,
1881  // as we do not have any range information. If we have a GlobalValue and the
1882  // address was not truncated, we can select it as an operand directly.
1883  unsigned Opc = N.getOperand(0)->getOpcode();
1884  if (Opc != ISD::TargetGlobalAddress || !WasTruncated) {
1885  Op = N.getOperand(0);
1886  // We can only select the operand directly if we didn't have to look past a
1887  // truncate.
1888  return !WasTruncated;
1889  }
1890 
1891  // Check that the global's range fits into VT.
1892  auto *GA = cast<GlobalAddressSDNode>(N.getOperand(0));
1893  Optional<ConstantRange> CR = GA->getGlobal()->getAbsoluteSymbolRange();
1894  if (!CR || CR->getUnsignedMax().uge(1ull << VT.getSizeInBits()))
1895  return false;
1896 
1897  // Okay, we can use a narrow reference.
1898  Op = CurDAG->getTargetGlobalAddress(GA->getGlobal(), SDLoc(N), VT,
1899  GA->getOffset(), GA->getTargetFlags());
1900  return true;
1901 }
1902 
1903 bool X86DAGToDAGISel::tryFoldLoad(SDNode *Root, SDNode *P, SDValue N,
1904  SDValue &Base, SDValue &Scale,
1905  SDValue &Index, SDValue &Disp,
1906  SDValue &Segment) {
1907  if (!ISD::isNON_EXTLoad(N.getNode()) ||
1908  !IsProfitableToFold(N, P, Root) ||
1909  !IsLegalToFold(N, P, Root, OptLevel))
1910  return false;
1911 
1912  return selectAddr(N.getNode(),
1913  N.getOperand(1), Base, Scale, Index, Disp, Segment);
1914 }
1915 
1916 /// Return an SDNode that returns the value of the global base register.
1917 /// Output instructions required to initialize the global base register,
1918 /// if necessary.
1919 SDNode *X86DAGToDAGISel::getGlobalBaseReg() {
1920  unsigned GlobalBaseReg = getInstrInfo()->getGlobalBaseReg(MF);
1921  auto &DL = MF->getDataLayout();
1922  return CurDAG->getRegister(GlobalBaseReg, TLI->getPointerTy(DL)).getNode();
1923 }
1924 
1925 bool X86DAGToDAGISel::isSExtAbsoluteSymbolRef(unsigned Width, SDNode *N) const {
1926  if (N->getOpcode() == ISD::TRUNCATE)
1927  N = N->getOperand(0).getNode();
1928  if (N->getOpcode() != X86ISD::Wrapper)
1929  return false;
1930 
1931  auto *GA = dyn_cast<GlobalAddressSDNode>(N->getOperand(0));
1932  if (!GA)
1933  return false;
1934 
1935  Optional<ConstantRange> CR = GA->getGlobal()->getAbsoluteSymbolRange();
1936  return CR && CR->getSignedMin().sge(-1ull << Width) &&
1937  CR->getSignedMax().slt(1ull << Width);
1938 }
1939 
1940 /// Test whether the given X86ISD::CMP node has any uses which require the SF
1941 /// or OF bits to be accurate.
1943  // Examine each user of the node.
1944  for (SDNode::use_iterator UI = N->use_begin(),
1945  UE = N->use_end(); UI != UE; ++UI) {
1946  // Only examine CopyToReg uses.
1947  if (UI->getOpcode() != ISD::CopyToReg)
1948  return false;
1949  // Only examine CopyToReg uses that copy to EFLAGS.
1950  if (cast<RegisterSDNode>(UI->getOperand(1))->getReg() !=
1951  X86::EFLAGS)
1952  return false;
1953  // Examine each user of the CopyToReg use.
1954  for (SDNode::use_iterator FlagUI = UI->use_begin(),
1955  FlagUE = UI->use_end(); FlagUI != FlagUE; ++FlagUI) {
1956  // Only examine the Flag result.
1957  if (FlagUI.getUse().getResNo() != 1) continue;
1958  // Anything unusual: assume conservatively.
1959  if (!FlagUI->isMachineOpcode()) return false;
1960  // Examine the opcode of the user.
1961  switch (FlagUI->getMachineOpcode()) {
1962  // These comparisons don't treat the most significant bit specially.
1963  case X86::SETAr: case X86::SETAEr: case X86::SETBr: case X86::SETBEr:
1964  case X86::SETEr: case X86::SETNEr: case X86::SETPr: case X86::SETNPr:
1965  case X86::SETAm: case X86::SETAEm: case X86::SETBm: case X86::SETBEm:
1966  case X86::SETEm: case X86::SETNEm: case X86::SETPm: case X86::SETNPm:
1967  case X86::JA_1: case X86::JAE_1: case X86::JB_1: case X86::JBE_1:
1968  case X86::JE_1: case X86::JNE_1: case X86::JP_1: case X86::JNP_1:
1969  case X86::CMOVA16rr: case X86::CMOVA16rm:
1970  case X86::CMOVA32rr: case X86::CMOVA32rm:
1971  case X86::CMOVA64rr: case X86::CMOVA64rm:
1972  case X86::CMOVAE16rr: case X86::CMOVAE16rm:
1973  case X86::CMOVAE32rr: case X86::CMOVAE32rm:
1974  case X86::CMOVAE64rr: case X86::CMOVAE64rm:
1975  case X86::CMOVB16rr: case X86::CMOVB16rm:
1976  case X86::CMOVB32rr: case X86::CMOVB32rm:
1977  case X86::CMOVB64rr: case X86::CMOVB64rm:
1978  case X86::CMOVBE16rr: case X86::CMOVBE16rm:
1979  case X86::CMOVBE32rr: case X86::CMOVBE32rm:
1980  case X86::CMOVBE64rr: case X86::CMOVBE64rm:
1981  case X86::CMOVE16rr: case X86::CMOVE16rm:
1982  case X86::CMOVE32rr: case X86::CMOVE32rm:
1983  case X86::CMOVE64rr: case X86::CMOVE64rm:
1984  case X86::CMOVNE16rr: case X86::CMOVNE16rm:
1985  case X86::CMOVNE32rr: case X86::CMOVNE32rm:
1986  case X86::CMOVNE64rr: case X86::CMOVNE64rm:
1987  case X86::CMOVNP16rr: case X86::CMOVNP16rm:
1988  case X86::CMOVNP32rr: case X86::CMOVNP32rm:
1989  case X86::CMOVNP64rr: case X86::CMOVNP64rm:
1990  case X86::CMOVP16rr: case X86::CMOVP16rm:
1991  case X86::CMOVP32rr: case X86::CMOVP32rm:
1992  case X86::CMOVP64rr: case X86::CMOVP64rm:
1993  continue;
1994  // Anything else: assume conservatively.
1995  default: return false;
1996  }
1997  }
1998  }
1999  return true;
2000 }
2001 
2002 /// Test whether the given node which sets flags has any uses which require the
2003 /// CF flag to be accurate.
2004 static bool hasNoCarryFlagUses(SDNode *N) {
2005  // Examine each user of the node.
2006  for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end(); UI != UE;
2007  ++UI) {
2008  // Only check things that use the flags.
2009  if (UI.getUse().getResNo() != 1)
2010  continue;
2011  // Only examine CopyToReg uses.
2012  if (UI->getOpcode() != ISD::CopyToReg)
2013  return false;
2014  // Only examine CopyToReg uses that copy to EFLAGS.
2015  if (cast<RegisterSDNode>(UI->getOperand(1))->getReg() != X86::EFLAGS)
2016  return false;
2017  // Examine each user of the CopyToReg use.
2018  for (SDNode::use_iterator FlagUI = UI->use_begin(), FlagUE = UI->use_end();
2019  FlagUI != FlagUE; ++FlagUI) {
2020  // Only examine the Flag result.
2021  if (FlagUI.getUse().getResNo() != 1)
2022  continue;
2023  // Anything unusual: assume conservatively.
2024  if (!FlagUI->isMachineOpcode())
2025  return false;
2026  // Examine the opcode of the user.
2027  switch (FlagUI->getMachineOpcode()) {
2028  // Comparisons which don't examine the CF flag.
2029  case X86::SETOr: case X86::SETNOr: case X86::SETEr: case X86::SETNEr:
2030  case X86::SETSr: case X86::SETNSr: case X86::SETPr: case X86::SETNPr:
2031  case X86::SETLr: case X86::SETGEr: case X86::SETLEr: case X86::SETGr:
2032  case X86::JO_1: case X86::JNO_1: case X86::JE_1: case X86::JNE_1:
2033  case X86::JS_1: case X86::JNS_1: case X86::JP_1: case X86::JNP_1:
2034  case X86::JL_1: case X86::JGE_1: case X86::JLE_1: case X86::JG_1:
2035  case X86::CMOVO16rr: case X86::CMOVO32rr: case X86::CMOVO64rr:
2036  case X86::CMOVO16rm: case X86::CMOVO32rm: case X86::CMOVO64rm:
2037  case X86::CMOVNO16rr: case X86::CMOVNO32rr: case X86::CMOVNO64rr:
2038  case X86::CMOVNO16rm: case X86::CMOVNO32rm: case X86::CMOVNO64rm:
2039  case X86::CMOVE16rr: case X86::CMOVE32rr: case X86::CMOVE64rr:
2040  case X86::CMOVE16rm: case X86::CMOVE32rm: case X86::CMOVE64rm:
2041  case X86::CMOVNE16rr: case X86::CMOVNE32rr: case X86::CMOVNE64rr:
2042  case X86::CMOVNE16rm: case X86::CMOVNE32rm: case X86::CMOVNE64rm:
2043  case X86::CMOVS16rr: case X86::CMOVS32rr: case X86::CMOVS64rr:
2044  case X86::CMOVS16rm: case X86::CMOVS32rm: case X86::CMOVS64rm:
2045  case X86::CMOVNS16rr: case X86::CMOVNS32rr: case X86::CMOVNS64rr:
2046  case X86::CMOVNS16rm: case X86::CMOVNS32rm: case X86::CMOVNS64rm:
2047  case X86::CMOVP16rr: case X86::CMOVP32rr: case X86::CMOVP64rr:
2048  case X86::CMOVP16rm: case X86::CMOVP32rm: case X86::CMOVP64rm:
2049  case X86::CMOVNP16rr: case X86::CMOVNP32rr: case X86::CMOVNP64rr:
2050  case X86::CMOVNP16rm: case X86::CMOVNP32rm: case X86::CMOVNP64rm:
2051  case X86::CMOVL16rr: case X86::CMOVL32rr: case X86::CMOVL64rr:
2052  case X86::CMOVL16rm: case X86::CMOVL32rm: case X86::CMOVL64rm:
2053  case X86::CMOVGE16rr: case X86::CMOVGE32rr: case X86::CMOVGE64rr:
2054  case X86::CMOVGE16rm: case X86::CMOVGE32rm: case X86::CMOVGE64rm:
2055  case X86::CMOVLE16rr: case X86::CMOVLE32rr: case X86::CMOVLE64rr:
2056  case X86::CMOVLE16rm: case X86::CMOVLE32rm: case X86::CMOVLE64rm:
2057  case X86::CMOVG16rr: case X86::CMOVG32rr: case X86::CMOVG64rr:
2058  case X86::CMOVG16rm: case X86::CMOVG32rm: case X86::CMOVG64rm:
2059  continue;
2060  // Anything else: assume conservatively.
2061  default:
2062  return false;
2063  }
2064  }
2065  }
2066  return true;
2067 }
2068 
2069 /// Check whether or not the chain ending in StoreNode is suitable for doing
2070 /// the {load; op; store} to modify transformation.
2072  SDValue StoredVal, SelectionDAG *CurDAG,
2073  LoadSDNode *&LoadNode,
2074  SDValue &InputChain) {
2075  // is the stored value result 0 of the load?
2076  if (StoredVal.getResNo() != 0) return false;
2077 
2078  // are there other uses of the loaded value than the inc or dec?
2079  if (!StoredVal.getNode()->hasNUsesOfValue(1, 0)) return false;
2080 
2081  // is the store non-extending and non-indexed?
2082  if (!ISD::isNormalStore(StoreNode) || StoreNode->isNonTemporal())
2083  return false;
2084 
2085  SDValue Load = StoredVal->getOperand(0);
2086  // Is the stored value a non-extending and non-indexed load?
2087  if (!ISD::isNormalLoad(Load.getNode())) return false;
2088 
2089  // Return LoadNode by reference.
2090  LoadNode = cast<LoadSDNode>(Load);
2091 
2092  // Is store the only read of the loaded value?
2093  if (!Load.hasOneUse())
2094  return false;
2095 
2096  // Is the address of the store the same as the load?
2097  if (LoadNode->getBasePtr() != StoreNode->getBasePtr() ||
2098  LoadNode->getOffset() != StoreNode->getOffset())
2099  return false;
2100 
2101  // Check if the chain is produced by the load or is a TokenFactor with
2102  // the load output chain as an operand. Return InputChain by reference.
2103  SDValue Chain = StoreNode->getChain();
2104 
2105  bool ChainCheck = false;
2106  if (Chain == Load.getValue(1)) {
2107  ChainCheck = true;
2108  InputChain = LoadNode->getChain();
2109  } else if (Chain.getOpcode() == ISD::TokenFactor) {
2110  SmallVector<SDValue, 4> ChainOps;
2111  for (unsigned i = 0, e = Chain.getNumOperands(); i != e; ++i) {
2112  SDValue Op = Chain.getOperand(i);
2113  if (Op == Load.getValue(1)) {
2114  ChainCheck = true;
2115  // Drop Load, but keep its chain. No cycle check necessary.
2116  ChainOps.push_back(Load.getOperand(0));
2117  continue;
2118  }
2119 
2120  // Make sure using Op as part of the chain would not cause a cycle here.
2121  // In theory, we could check whether the chain node is a predecessor of
2122  // the load. But that can be very expensive. Instead visit the uses and
2123  // make sure they all have smaller node id than the load.
2124  int LoadId = LoadNode->getNodeId();
2125  for (SDNode::use_iterator UI = Op.getNode()->use_begin(),
2126  UE = UI->use_end(); UI != UE; ++UI) {
2127  if (UI.getUse().getResNo() != 0)
2128  continue;
2129  if (UI->getNodeId() > LoadId)
2130  return false;
2131  }
2132 
2133  ChainOps.push_back(Op);
2134  }
2135 
2136  if (ChainCheck)
2137  // Make a new TokenFactor with all the other input chains except
2138  // for the load.
2139  InputChain = CurDAG->getNode(ISD::TokenFactor, SDLoc(Chain),
2140  MVT::Other, ChainOps);
2141  }
2142  if (!ChainCheck)
2143  return false;
2144 
2145  return true;
2146 }
2147 
2148 // Change a chain of {load; op; store} of the same value into a simple op
2149 // through memory of that value, if the uses of the modified value and its
2150 // address are suitable.
2151 //
2152 // The tablegen pattern memory operand pattern is currently not able to match
2153 // the case where the EFLAGS on the original operation are used.
2154 //
2155 // To move this to tablegen, we'll need to improve tablegen to allow flags to
2156 // be transferred from a node in the pattern to the result node, probably with
2157 // a new keyword. For example, we have this
2158 // def DEC64m : RI<0xFF, MRM1m, (outs), (ins i64mem:$dst), "dec{q}\t$dst",
2159 // [(store (add (loadi64 addr:$dst), -1), addr:$dst),
2160 // (implicit EFLAGS)]>;
2161 // but maybe need something like this
2162 // def DEC64m : RI<0xFF, MRM1m, (outs), (ins i64mem:$dst), "dec{q}\t$dst",
2163 // [(store (add (loadi64 addr:$dst), -1), addr:$dst),
2164 // (transferrable EFLAGS)]>;
2165 //
2166 // Until then, we manually fold these and instruction select the operation
2167 // here.
2168 bool X86DAGToDAGISel::foldLoadStoreIntoMemOperand(SDNode *Node) {
2169  StoreSDNode *StoreNode = cast<StoreSDNode>(Node);
2170  SDValue StoredVal = StoreNode->getOperand(1);
2171  unsigned Opc = StoredVal->getOpcode();
2172 
2173  // Before we try to select anything, make sure this is memory operand size
2174  // and opcode we can handle. Note that this must match the code below that
2175  // actually lowers the opcodes.
2176  EVT MemVT = StoreNode->getMemoryVT();
2177  if (MemVT != MVT::i64 && MemVT != MVT::i32 && MemVT != MVT::i16 &&
2178  MemVT != MVT::i8)
2179  return false;
2180  switch (Opc) {
2181  default:
2182  return false;
2183  case X86ISD::INC:
2184  case X86ISD::DEC:
2185  case X86ISD::ADD:
2186  case X86ISD::SUB:
2187  case X86ISD::AND:
2188  case X86ISD::OR:
2189  case X86ISD::XOR:
2190  break;
2191  }
2192 
2193  LoadSDNode *LoadNode = nullptr;
2194  SDValue InputChain;
2195  if (!isFusableLoadOpStorePattern(StoreNode, StoredVal, CurDAG, LoadNode,
2196  InputChain))
2197  return false;
2198 
2199  SDValue Base, Scale, Index, Disp, Segment;
2200  if (!selectAddr(LoadNode, LoadNode->getBasePtr(), Base, Scale, Index, Disp,
2201  Segment))
2202  return false;
2203 
2204  auto SelectOpcode = [&](unsigned Opc64, unsigned Opc32, unsigned Opc16,
2205  unsigned Opc8) {
2206  switch (MemVT.getSimpleVT().SimpleTy) {
2207  case MVT::i64:
2208  return Opc64;
2209  case MVT::i32:
2210  return Opc32;
2211  case MVT::i16:
2212  return Opc16;
2213  case MVT::i8:
2214  return Opc8;
2215  default:
2216  llvm_unreachable("Invalid size!");
2217  }
2218  };
2219 
2220  MachineSDNode *Result;
2221  switch (Opc) {
2222  case X86ISD::INC:
2223  case X86ISD::DEC: {
2224  unsigned NewOpc =
2225  Opc == X86ISD::INC
2226  ? SelectOpcode(X86::INC64m, X86::INC32m, X86::INC16m, X86::INC8m)
2227  : SelectOpcode(X86::DEC64m, X86::DEC32m, X86::DEC16m, X86::DEC8m);
2228  const SDValue Ops[] = {Base, Scale, Index, Disp, Segment, InputChain};
2229  Result =
2230  CurDAG->getMachineNode(NewOpc, SDLoc(Node), MVT::i32, MVT::Other, Ops);
2231  break;
2232  }
2233  case X86ISD::ADD:
2234  case X86ISD::SUB:
2235  case X86ISD::AND:
2236  case X86ISD::OR:
2237  case X86ISD::XOR: {
2238  auto SelectRegOpcode = [SelectOpcode](unsigned Opc) {
2239  switch (Opc) {
2240  case X86ISD::ADD:
2241  return SelectOpcode(X86::ADD64mr, X86::ADD32mr, X86::ADD16mr,
2242  X86::ADD8mr);
2243  case X86ISD::SUB:
2244  return SelectOpcode(X86::SUB64mr, X86::SUB32mr, X86::SUB16mr,
2245  X86::SUB8mr);
2246  case X86ISD::AND:
2247  return SelectOpcode(X86::AND64mr, X86::AND32mr, X86::AND16mr,
2248  X86::AND8mr);
2249  case X86ISD::OR:
2250  return SelectOpcode(X86::OR64mr, X86::OR32mr, X86::OR16mr, X86::OR8mr);
2251  case X86ISD::XOR:
2252  return SelectOpcode(X86::XOR64mr, X86::XOR32mr, X86::XOR16mr,
2253  X86::XOR8mr);
2254  default:
2255  llvm_unreachable("Invalid opcode!");
2256  }
2257  };
2258  auto SelectImm8Opcode = [SelectOpcode](unsigned Opc) {
2259  switch (Opc) {
2260  case X86ISD::ADD:
2261  return SelectOpcode(X86::ADD64mi8, X86::ADD32mi8, X86::ADD16mi8, 0);
2262  case X86ISD::SUB:
2263  return SelectOpcode(X86::SUB64mi8, X86::SUB32mi8, X86::SUB16mi8, 0);
2264  case X86ISD::AND:
2265  return SelectOpcode(X86::AND64mi8, X86::AND32mi8, X86::AND16mi8, 0);
2266  case X86ISD::OR:
2267  return SelectOpcode(X86::OR64mi8, X86::OR32mi8, X86::OR16mi8, 0);
2268  case X86ISD::XOR:
2269  return SelectOpcode(X86::XOR64mi8, X86::XOR32mi8, X86::XOR16mi8, 0);
2270  default:
2271  llvm_unreachable("Invalid opcode!");
2272  }
2273  };
2274  auto SelectImmOpcode = [SelectOpcode](unsigned Opc) {
2275  switch (Opc) {
2276  case X86ISD::ADD:
2277  return SelectOpcode(X86::ADD64mi32, X86::ADD32mi, X86::ADD16mi,
2278  X86::ADD8mi);
2279  case X86ISD::SUB:
2280  return SelectOpcode(X86::SUB64mi32, X86::SUB32mi, X86::SUB16mi,
2281  X86::SUB8mi);
2282  case X86ISD::AND:
2283  return SelectOpcode(X86::AND64mi32, X86::AND32mi, X86::AND16mi,
2284  X86::AND8mi);
2285  case X86ISD::OR:
2286  return SelectOpcode(X86::OR64mi32, X86::OR32mi, X86::OR16mi,
2287  X86::OR8mi);
2288  case X86ISD::XOR:
2289  return SelectOpcode(X86::XOR64mi32, X86::XOR32mi, X86::XOR16mi,
2290  X86::XOR8mi);
2291  default:
2292  llvm_unreachable("Invalid opcode!");
2293  }
2294  };
2295 
2296  unsigned NewOpc = SelectRegOpcode(Opc);
2297  SDValue Operand = StoredVal->getOperand(1);
2298 
2299  // See if the operand is a constant that we can fold into an immediate
2300  // operand.
2301  if (auto *OperandC = dyn_cast<ConstantSDNode>(Operand)) {
2302  auto OperandV = OperandC->getAPIntValue();
2303 
2304  // Check if we can shrink the operand enough to fit in an immediate (or
2305  // fit into a smaller immediate) by negating it and switching the
2306  // operation.
2307  if ((Opc == X86ISD::ADD || Opc == X86ISD::SUB) &&
2308  ((MemVT != MVT::i8 && OperandV.getMinSignedBits() > 8 &&
2309  (-OperandV).getMinSignedBits() <= 8) ||
2310  (MemVT == MVT::i64 && OperandV.getMinSignedBits() > 32 &&
2311  (-OperandV).getMinSignedBits() <= 32)) &&
2312  hasNoCarryFlagUses(StoredVal.getNode())) {
2313  OperandV = -OperandV;
2314  Opc = Opc == X86ISD::ADD ? X86ISD::SUB : X86ISD::ADD;
2315  }
2316 
2317  // First try to fit this into an Imm8 operand. If it doesn't fit, then try
2318  // the larger immediate operand.
2319  if (MemVT != MVT::i8 && OperandV.getMinSignedBits() <= 8) {
2320  Operand = CurDAG->getTargetConstant(OperandV, SDLoc(Node), MemVT);
2321  NewOpc = SelectImm8Opcode(Opc);
2322  } else if (OperandV.getActiveBits() <= MemVT.getSizeInBits() &&
2323  (MemVT != MVT::i64 || OperandV.getMinSignedBits() <= 32)) {
2324  Operand = CurDAG->getTargetConstant(OperandV, SDLoc(Node), MemVT);
2325  NewOpc = SelectImmOpcode(Opc);
2326  }
2327  }
2328 
2329  const SDValue Ops[] = {Base, Scale, Index, Disp,
2330  Segment, Operand, InputChain};
2331  Result =
2332  CurDAG->getMachineNode(NewOpc, SDLoc(Node), MVT::i32, MVT::Other, Ops);
2333  break;
2334  }
2335  default:
2336  llvm_unreachable("Invalid opcode!");
2337  }
2338 
2339  MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(2);
2340  MemOp[0] = StoreNode->getMemOperand();
2341  MemOp[1] = LoadNode->getMemOperand();
2342  Result->setMemRefs(MemOp, MemOp + 2);
2343 
2344  ReplaceUses(SDValue(StoreNode, 0), SDValue(Result, 1));
2345  ReplaceUses(SDValue(StoredVal.getNode(), 1), SDValue(Result, 0));
2346  CurDAG->RemoveDeadNode(Node);
2347  return true;
2348 }
2349 
2350 // See if this is an (X >> C1) & C2 that we can match to BEXTR/BEXTRI.
2351 bool X86DAGToDAGISel::matchBEXTRFromAnd(SDNode *Node) {
2352  MVT NVT = Node->getSimpleValueType(0);
2353  SDLoc dl(Node);
2354 
2355  SDValue N0 = Node->getOperand(0);
2356  SDValue N1 = Node->getOperand(1);
2357 
2358  if (!Subtarget->hasBMI() && !Subtarget->hasTBM())
2359  return false;
2360 
2361  // Must have a shift right.
2362  if (N0->getOpcode() != ISD::SRL && N0->getOpcode() != ISD::SRA)
2363  return false;
2364 
2365  // Shift can't have additional users.
2366  if (!N0->hasOneUse())
2367  return false;
2368 
2369  // Only supported for 32 and 64 bits.
2370  if (NVT != MVT::i32 && NVT != MVT::i64)
2371  return false;
2372 
2373  // Shift amount and RHS of and must be constant.
2374  ConstantSDNode *MaskCst = dyn_cast<ConstantSDNode>(N1);
2375  ConstantSDNode *ShiftCst = dyn_cast<ConstantSDNode>(N0->getOperand(1));
2376  if (!MaskCst || !ShiftCst)
2377  return false;
2378 
2379  // And RHS must be a mask.
2380  uint64_t Mask = MaskCst->getZExtValue();
2381  if (!isMask_64(Mask))
2382  return false;
2383 
2384  uint64_t Shift = ShiftCst->getZExtValue();
2385  uint64_t MaskSize = countPopulation(Mask);
2386 
2387  // Don't interfere with something that can be handled by extracting AH.
2388  // TODO: If we are able to fold a load, BEXTR might still be better than AH.
2389  if (Shift == 8 && MaskSize == 8)
2390  return false;
2391 
2392  // Make sure we are only using bits that were in the original value, not
2393  // shifted in.
2394  if (Shift + MaskSize > NVT.getSizeInBits())
2395  return false;
2396 
2397  SDValue New = CurDAG->getTargetConstant(Shift | (MaskSize << 8), dl, NVT);
2398  unsigned ROpc = NVT == MVT::i64 ? X86::BEXTRI64ri : X86::BEXTRI32ri;
2399  unsigned MOpc = NVT == MVT::i64 ? X86::BEXTRI64mi : X86::BEXTRI32mi;
2400 
2401  // BMI requires the immediate to placed in a register.
2402  if (!Subtarget->hasTBM()) {
2403  ROpc = NVT == MVT::i64 ? X86::BEXTR64rr : X86::BEXTR32rr;
2404  MOpc = NVT == MVT::i64 ? X86::BEXTR64rm : X86::BEXTR32rm;
2405  New = SDValue(CurDAG->getMachineNode(X86::MOV32ri, dl, NVT, New), 0);
2406  if (NVT == MVT::i64) {
2407  New =
2408  SDValue(CurDAG->getMachineNode(
2409  TargetOpcode::SUBREG_TO_REG, dl, MVT::i64,
2410  CurDAG->getTargetConstant(0, dl, MVT::i64), New,
2411  CurDAG->getTargetConstant(X86::sub_32bit, dl, MVT::i32)),
2412  0);
2413  }
2414  }
2415 
2416  MachineSDNode *NewNode;
2417  SDValue Input = N0->getOperand(0);
2418  SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
2419  if (tryFoldLoad(Node, N0.getNode(), Input, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) {
2420  SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, New, Input.getOperand(0) };
2421  SDVTList VTs = CurDAG->getVTList(NVT, MVT::Other);
2422  NewNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops);
2423  // Update the chain.
2424  ReplaceUses(Input.getValue(1), SDValue(NewNode, 1));
2425  // Record the mem-refs
2426  MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
2427  MemOp[0] = cast<LoadSDNode>(Input)->getMemOperand();
2428  NewNode->setMemRefs(MemOp, MemOp + 1);
2429  } else {
2430  NewNode = CurDAG->getMachineNode(ROpc, dl, NVT, Input, New);
2431  }
2432 
2433  ReplaceUses(SDValue(Node, 0), SDValue(NewNode, 0));
2434  CurDAG->RemoveDeadNode(Node);
2435  return true;
2436 }
2437 
2438 void X86DAGToDAGISel::Select(SDNode *Node) {
2439  MVT NVT = Node->getSimpleValueType(0);
2440  unsigned Opc, MOpc;
2441  unsigned Opcode = Node->getOpcode();
2442  SDLoc dl(Node);
2443 
2444  DEBUG(dbgs() << "Selecting: "; Node->dump(CurDAG); dbgs() << '\n');
2445 
2446  if (Node->isMachineOpcode()) {
2447  DEBUG(dbgs() << "== "; Node->dump(CurDAG); dbgs() << '\n');
2448  Node->setNodeId(-1);
2449  return; // Already selected.
2450  }
2451 
2452  switch (Opcode) {
2453  default: break;
2454  case ISD::BRIND: {
2455  if (Subtarget->isTargetNaCl())
2456  // NaCl has its own pass where jmp %r32 are converted to jmp %r64. We
2457  // leave the instruction alone.
2458  break;
2459  if (Subtarget->isTarget64BitILP32()) {
2460  // Converts a 32-bit register to a 64-bit, zero-extended version of
2461  // it. This is needed because x86-64 can do many things, but jmp %r32
2462  // ain't one of them.
2463  const SDValue &Target = Node->getOperand(1);
2465  SDValue ZextTarget = CurDAG->getZExtOrTrunc(Target, dl, EVT(MVT::i64));
2466  SDValue Brind = CurDAG->getNode(ISD::BRIND, dl, MVT::Other,
2467  Node->getOperand(0), ZextTarget);
2468  ReplaceNode(Node, Brind.getNode());
2469  SelectCode(ZextTarget.getNode());
2470  SelectCode(Brind.getNode());
2471  return;
2472  }
2473  break;
2474  }
2475  case X86ISD::GlobalBaseReg:
2476  ReplaceNode(Node, getGlobalBaseReg());
2477  return;
2478 
2479  case X86ISD::SELECT:
2480  case X86ISD::SHRUNKBLEND: {
2481  // SHRUNKBLEND selects like a regular VSELECT. Same with X86ISD::SELECT.
2482  SDValue VSelect = CurDAG->getNode(
2483  ISD::VSELECT, SDLoc(Node), Node->getValueType(0), Node->getOperand(0),
2484  Node->getOperand(1), Node->getOperand(2));
2485  ReplaceNode(Node, VSelect.getNode());
2486  SelectCode(VSelect.getNode());
2487  // We already called ReplaceUses.
2488  return;
2489  }
2490 
2491  case ISD::AND:
2492  // Try to match BEXTR/BEXTRI instruction.
2493  if (matchBEXTRFromAnd(Node))
2494  return;
2495 
2497  case ISD::OR:
2498  case ISD::XOR: {
2499 
2500  // For operations of the form (x << C1) op C2, check if we can use a smaller
2501  // encoding for C2 by transforming it into (x op (C2>>C1)) << C1.
2502  SDValue N0 = Node->getOperand(0);
2503  SDValue N1 = Node->getOperand(1);
2504 
2505  if (N0->getOpcode() != ISD::SHL || !N0->hasOneUse())
2506  break;
2507 
2508  // i8 is unshrinkable, i16 should be promoted to i32.
2509  if (NVT != MVT::i32 && NVT != MVT::i64)
2510  break;
2511 
2513  ConstantSDNode *ShlCst = dyn_cast<ConstantSDNode>(N0->getOperand(1));
2514  if (!Cst || !ShlCst)
2515  break;
2516 
2517  int64_t Val = Cst->getSExtValue();
2518  uint64_t ShlVal = ShlCst->getZExtValue();
2519 
2520  // Make sure that we don't change the operation by removing bits.
2521  // This only matters for OR and XOR, AND is unaffected.
2522  uint64_t RemovedBitsMask = (1ULL << ShlVal) - 1;
2523  if (Opcode != ISD::AND && (Val & RemovedBitsMask) != 0)
2524  break;
2525 
2526  unsigned ShlOp, AddOp, Op;
2527  MVT CstVT = NVT;
2528 
2529  // Check the minimum bitwidth for the new constant.
2530  // TODO: AND32ri is the same as AND64ri32 with zext imm.
2531  // TODO: MOV32ri+OR64r is cheaper than MOV64ri64+OR64rr
2532  // TODO: Using 16 and 8 bit operations is also possible for or32 & xor32.
2533  if (!isInt<8>(Val) && isInt<8>(Val >> ShlVal))
2534  CstVT = MVT::i8;
2535  else if (!isInt<32>(Val) && isInt<32>(Val >> ShlVal))
2536  CstVT = MVT::i32;
2537 
2538  // Bail if there is no smaller encoding.
2539  if (NVT == CstVT)
2540  break;
2541 
2542  switch (NVT.SimpleTy) {
2543  default: llvm_unreachable("Unsupported VT!");
2544  case MVT::i32:
2545  assert(CstVT == MVT::i8);
2546  ShlOp = X86::SHL32ri;
2547  AddOp = X86::ADD32rr;
2548 
2549  switch (Opcode) {
2550  default: llvm_unreachable("Impossible opcode");
2551  case ISD::AND: Op = X86::AND32ri8; break;
2552  case ISD::OR: Op = X86::OR32ri8; break;
2553  case ISD::XOR: Op = X86::XOR32ri8; break;
2554  }
2555  break;
2556  case MVT::i64:
2557  assert(CstVT == MVT::i8 || CstVT == MVT::i32);
2558  ShlOp = X86::SHL64ri;
2559  AddOp = X86::ADD64rr;
2560 
2561  switch (Opcode) {
2562  default: llvm_unreachable("Impossible opcode");
2563  case ISD::AND: Op = CstVT==MVT::i8? X86::AND64ri8 : X86::AND64ri32; break;
2564  case ISD::OR: Op = CstVT==MVT::i8? X86::OR64ri8 : X86::OR64ri32; break;
2565  case ISD::XOR: Op = CstVT==MVT::i8? X86::XOR64ri8 : X86::XOR64ri32; break;
2566  }
2567  break;
2568  }
2569 
2570  // Emit the smaller op and the shift.
2571  SDValue NewCst = CurDAG->getTargetConstant(Val >> ShlVal, dl, CstVT);
2572  SDNode *New = CurDAG->getMachineNode(Op, dl, NVT, N0->getOperand(0),NewCst);
2573  if (ShlVal == 1)
2574  CurDAG->SelectNodeTo(Node, AddOp, NVT, SDValue(New, 0),
2575  SDValue(New, 0));
2576  else
2577  CurDAG->SelectNodeTo(Node, ShlOp, NVT, SDValue(New, 0),
2578  getI8Imm(ShlVal, dl));
2579  return;
2580  }
2581  case X86ISD::UMUL8:
2582  case X86ISD::SMUL8: {
2583  SDValue N0 = Node->getOperand(0);
2584  SDValue N1 = Node->getOperand(1);
2585 
2586  Opc = (Opcode == X86ISD::SMUL8 ? X86::IMUL8r : X86::MUL8r);
2587 
2588  SDValue InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, X86::AL,
2589  N0, SDValue()).getValue(1);
2590 
2591  SDVTList VTs = CurDAG->getVTList(NVT, MVT::i32);
2592  SDValue Ops[] = {N1, InFlag};
2593  SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops);
2594 
2595  ReplaceNode(Node, CNode);
2596  return;
2597  }
2598 
2599  case X86ISD::UMUL: {
2600  SDValue N0 = Node->getOperand(0);
2601  SDValue N1 = Node->getOperand(1);
2602 
2603  unsigned LoReg;
2604  switch (NVT.SimpleTy) {
2605  default: llvm_unreachable("Unsupported VT!");
2606  // MVT::i8 is handled by X86ISD::UMUL8.
2607  case MVT::i16: LoReg = X86::AX; Opc = X86::MUL16r; break;
2608  case MVT::i32: LoReg = X86::EAX; Opc = X86::MUL32r; break;
2609  case MVT::i64: LoReg = X86::RAX; Opc = X86::MUL64r; break;
2610  }
2611 
2612  SDValue InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, LoReg,
2613  N0, SDValue()).getValue(1);
2614 
2615  SDVTList VTs = CurDAG->getVTList(NVT, NVT, MVT::i32);
2616  SDValue Ops[] = {N1, InFlag};
2617  SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops);
2618 
2619  ReplaceNode(Node, CNode);
2620  return;
2621  }
2622 
2623  case ISD::SMUL_LOHI:
2624  case ISD::UMUL_LOHI: {
2625  SDValue N0 = Node->getOperand(0);
2626  SDValue N1 = Node->getOperand(1);
2627 
2628  bool isSigned = Opcode == ISD::SMUL_LOHI;
2629  bool hasBMI2 = Subtarget->hasBMI2();
2630  if (!isSigned) {
2631  switch (NVT.SimpleTy) {
2632  default: llvm_unreachable("Unsupported VT!");
2633  case MVT::i8: Opc = X86::MUL8r; MOpc = X86::MUL8m; break;
2634  case MVT::i16: Opc = X86::MUL16r; MOpc = X86::MUL16m; break;
2635  case MVT::i32: Opc = hasBMI2 ? X86::MULX32rr : X86::MUL32r;
2636  MOpc = hasBMI2 ? X86::MULX32rm : X86::MUL32m; break;
2637  case MVT::i64: Opc = hasBMI2 ? X86::MULX64rr : X86::MUL64r;
2638  MOpc = hasBMI2 ? X86::MULX64rm : X86::MUL64m; break;
2639  }
2640  } else {
2641  switch (NVT.SimpleTy) {
2642  default: llvm_unreachable("Unsupported VT!");
2643  case MVT::i8: Opc = X86::IMUL8r; MOpc = X86::IMUL8m; break;
2644  case MVT::i16: Opc = X86::IMUL16r; MOpc = X86::IMUL16m; break;
2645  case MVT::i32: Opc = X86::IMUL32r; MOpc = X86::IMUL32m; break;
2646  case MVT::i64: Opc = X86::IMUL64r; MOpc = X86::IMUL64m; break;
2647  }
2648  }
2649 
2650  unsigned SrcReg, LoReg, HiReg;
2651  switch (Opc) {
2652  default: llvm_unreachable("Unknown MUL opcode!");
2653  case X86::IMUL8r:
2654  case X86::MUL8r:
2655  SrcReg = LoReg = X86::AL; HiReg = X86::AH;
2656  break;
2657  case X86::IMUL16r:
2658  case X86::MUL16r:
2659  SrcReg = LoReg = X86::AX; HiReg = X86::DX;
2660  break;
2661  case X86::IMUL32r:
2662  case X86::MUL32r:
2663  SrcReg = LoReg = X86::EAX; HiReg = X86::EDX;
2664  break;
2665  case X86::IMUL64r:
2666  case X86::MUL64r:
2667  SrcReg = LoReg = X86::RAX; HiReg = X86::RDX;
2668  break;
2669  case X86::MULX32rr:
2670  SrcReg = X86::EDX; LoReg = HiReg = 0;
2671  break;
2672  case X86::MULX64rr:
2673  SrcReg = X86::RDX; LoReg = HiReg = 0;
2674  break;
2675  }
2676 
2677  SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
2678  bool foldedLoad = tryFoldLoad(Node, N1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4);
2679  // Multiply is commmutative.
2680  if (!foldedLoad) {
2681  foldedLoad = tryFoldLoad(Node, N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4);
2682  if (foldedLoad)
2683  std::swap(N0, N1);
2684  }
2685 
2686  SDValue InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, SrcReg,
2687  N0, SDValue()).getValue(1);
2688  SDValue ResHi, ResLo;
2689 
2690  if (foldedLoad) {
2691  SDValue Chain;
2692  MachineSDNode *CNode = nullptr;
2693  SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N1.getOperand(0),
2694  InFlag };
2695  if (MOpc == X86::MULX32rm || MOpc == X86::MULX64rm) {
2696  SDVTList VTs = CurDAG->getVTList(NVT, NVT, MVT::Other, MVT::Glue);
2697  CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops);
2698  ResHi = SDValue(CNode, 0);
2699  ResLo = SDValue(CNode, 1);
2700  Chain = SDValue(CNode, 2);
2701  InFlag = SDValue(CNode, 3);
2702  } else {
2703  SDVTList VTs = CurDAG->getVTList(MVT::Other, MVT::Glue);
2704  CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops);
2705  Chain = SDValue(CNode, 0);
2706  InFlag = SDValue(CNode, 1);
2707  }
2708 
2709  // Update the chain.
2710  ReplaceUses(N1.getValue(1), Chain);
2711  // Record the mem-refs
2712  MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
2713  MemOp[0] = cast<LoadSDNode>(N1)->getMemOperand();
2714  CNode->setMemRefs(MemOp, MemOp + 1);
2715  } else {
2716  SDValue Ops[] = { N1, InFlag };
2717  if (Opc == X86::MULX32rr || Opc == X86::MULX64rr) {
2718  SDVTList VTs = CurDAG->getVTList(NVT, NVT, MVT::Glue);
2719  SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops);
2720  ResHi = SDValue(CNode, 0);
2721  ResLo = SDValue(CNode, 1);
2722  InFlag = SDValue(CNode, 2);
2723  } else {
2724  SDVTList VTs = CurDAG->getVTList(MVT::Glue);
2725  SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops);
2726  InFlag = SDValue(CNode, 0);
2727  }
2728  }
2729 
2730  // Prevent use of AH in a REX instruction by referencing AX instead.
2731  if (HiReg == X86::AH && Subtarget->is64Bit() &&
2732  !SDValue(Node, 1).use_empty()) {
2733  SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
2734  X86::AX, MVT::i16, InFlag);
2735  InFlag = Result.getValue(2);
2736  // Get the low part if needed. Don't use getCopyFromReg for aliasing
2737  // registers.
2738  if (!SDValue(Node, 0).use_empty())
2739  ReplaceUses(SDValue(Node, 0),
2740  CurDAG->getTargetExtractSubreg(X86::sub_8bit, dl, MVT::i8, Result));
2741 
2742  // Shift AX down 8 bits.
2743  Result = SDValue(CurDAG->getMachineNode(X86::SHR16ri, dl, MVT::i16,
2744  Result,
2745  CurDAG->getTargetConstant(8, dl, MVT::i8)),
2746  0);
2747  // Then truncate it down to i8.
2748  ReplaceUses(SDValue(Node, 1),
2749  CurDAG->getTargetExtractSubreg(X86::sub_8bit, dl, MVT::i8, Result));
2750  }
2751  // Copy the low half of the result, if it is needed.
2752  if (!SDValue(Node, 0).use_empty()) {
2753  if (!ResLo.getNode()) {
2754  assert(LoReg && "Register for low half is not defined!");
2755  ResLo = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, LoReg, NVT,
2756  InFlag);
2757  InFlag = ResLo.getValue(2);
2758  }
2759  ReplaceUses(SDValue(Node, 0), ResLo);
2760  DEBUG(dbgs() << "=> "; ResLo.getNode()->dump(CurDAG); dbgs() << '\n');
2761  }
2762  // Copy the high half of the result, if it is needed.
2763  if (!SDValue(Node, 1).use_empty()) {
2764  if (!ResHi.getNode()) {
2765  assert(HiReg && "Register for high half is not defined!");
2766  ResHi = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, HiReg, NVT,
2767  InFlag);
2768  InFlag = ResHi.getValue(2);
2769  }
2770  ReplaceUses(SDValue(Node, 1), ResHi);
2771  DEBUG(dbgs() << "=> "; ResHi.getNode()->dump(CurDAG); dbgs() << '\n');
2772  }
2773 
2774  CurDAG->RemoveDeadNode(Node);
2775  return;
2776  }
2777 
2778  case ISD::SDIVREM:
2779  case ISD::UDIVREM:
2782  SDValue N0 = Node->getOperand(0);
2783  SDValue N1 = Node->getOperand(1);
2784 
2785  bool isSigned = (Opcode == ISD::SDIVREM ||
2786  Opcode == X86ISD::SDIVREM8_SEXT_HREG);
2787  if (!isSigned) {
2788  switch (NVT.SimpleTy) {
2789  default: llvm_unreachable("Unsupported VT!");
2790  case MVT::i8: Opc = X86::DIV8r; MOpc = X86::DIV8m; break;
2791  case MVT::i16: Opc = X86::DIV16r; MOpc = X86::DIV16m; break;
2792  case MVT::i32: Opc = X86::DIV32r; MOpc = X86::DIV32m; break;
2793  case MVT::i64: Opc = X86::DIV64r; MOpc = X86::DIV64m; break;
2794  }
2795  } else {
2796  switch (NVT.SimpleTy) {
2797  default: llvm_unreachable("Unsupported VT!");
2798  case MVT::i8: Opc = X86::IDIV8r; MOpc = X86::IDIV8m; break;
2799  case MVT::i16: Opc = X86::IDIV16r; MOpc = X86::IDIV16m; break;
2800  case MVT::i32: Opc = X86::IDIV32r; MOpc = X86::IDIV32m; break;
2801  case MVT::i64: Opc = X86::IDIV64r; MOpc = X86::IDIV64m; break;
2802  }
2803  }
2804 
2805  unsigned LoReg, HiReg, ClrReg;
2806  unsigned SExtOpcode;
2807  switch (NVT.SimpleTy) {
2808  default: llvm_unreachable("Unsupported VT!");
2809  case MVT::i8:
2810  LoReg = X86::AL; ClrReg = HiReg = X86::AH;
2811  SExtOpcode = X86::CBW;
2812  break;
2813  case MVT::i16:
2814  LoReg = X86::AX; HiReg = X86::DX;
2815  ClrReg = X86::DX;
2816  SExtOpcode = X86::CWD;
2817  break;
2818  case MVT::i32:
2819  LoReg = X86::EAX; ClrReg = HiReg = X86::EDX;
2820  SExtOpcode = X86::CDQ;
2821  break;
2822  case MVT::i64:
2823  LoReg = X86::RAX; ClrReg = HiReg = X86::RDX;
2824  SExtOpcode = X86::CQO;
2825  break;
2826  }
2827 
2828  SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
2829  bool foldedLoad = tryFoldLoad(Node, N1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4);
2830  bool signBitIsZero = CurDAG->SignBitIsZero(N0);
2831 
2832  SDValue InFlag;
2833  if (NVT == MVT::i8 && (!isSigned || signBitIsZero)) {
2834  // Special case for div8, just use a move with zero extension to AX to
2835  // clear the upper 8 bits (AH).
2836  SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Move, Chain;
2837  if (tryFoldLoad(Node, N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) {
2838  SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N0.getOperand(0) };
2839  Move =
2840  SDValue(CurDAG->getMachineNode(X86::MOVZX32rm8, dl, MVT::i32,
2841  MVT::Other, Ops), 0);
2842  Chain = Move.getValue(1);
2843  ReplaceUses(N0.getValue(1), Chain);
2844  } else {
2845  Move =
2846  SDValue(CurDAG->getMachineNode(X86::MOVZX32rr8, dl, MVT::i32, N0),0);
2847  Chain = CurDAG->getEntryNode();
2848  }
2849  Chain = CurDAG->getCopyToReg(Chain, dl, X86::EAX, Move, SDValue());
2850  InFlag = Chain.getValue(1);
2851  } else {
2852  InFlag =
2853  CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl,
2854  LoReg, N0, SDValue()).getValue(1);
2855  if (isSigned && !signBitIsZero) {
2856  // Sign extend the low part into the high part.
2857  InFlag =
2858  SDValue(CurDAG->getMachineNode(SExtOpcode, dl, MVT::Glue, InFlag),0);
2859  } else {
2860  // Zero out the high part, effectively zero extending the input.
2861  SDValue ClrNode = SDValue(CurDAG->getMachineNode(X86::MOV32r0, dl, NVT), 0);
2862  switch (NVT.SimpleTy) {
2863  case MVT::i16:
2864  ClrNode =
2865  SDValue(CurDAG->getMachineNode(
2866  TargetOpcode::EXTRACT_SUBREG, dl, MVT::i16, ClrNode,
2867  CurDAG->getTargetConstant(X86::sub_16bit, dl,
2868  MVT::i32)),
2869  0);
2870  break;
2871  case MVT::i32:
2872  break;
2873  case MVT::i64:
2874  ClrNode =
2875  SDValue(CurDAG->getMachineNode(
2876  TargetOpcode::SUBREG_TO_REG, dl, MVT::i64,
2877  CurDAG->getTargetConstant(0, dl, MVT::i64), ClrNode,
2878  CurDAG->getTargetConstant(X86::sub_32bit, dl,
2879  MVT::i32)),
2880  0);
2881  break;
2882  default:
2883  llvm_unreachable("Unexpected division source");
2884  }
2885 
2886  InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, ClrReg,
2887  ClrNode, InFlag).getValue(1);
2888  }
2889  }
2890 
2891  if (foldedLoad) {
2892  SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N1.getOperand(0),
2893  InFlag };
2894  MachineSDNode *CNode =
2895  CurDAG->getMachineNode(MOpc, dl, MVT::Other, MVT::Glue, Ops);
2896  InFlag = SDValue(CNode, 1);
2897  // Update the chain.
2898  ReplaceUses(N1.getValue(1), SDValue(CNode, 0));
2899  // Record the mem-refs
2900  MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
2901  MemOp[0] = cast<LoadSDNode>(N1)->getMemOperand();
2902  CNode->setMemRefs(MemOp, MemOp + 1);
2903  } else {
2904  InFlag =
2905  SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Glue, N1, InFlag), 0);
2906  }
2907 
2908  // Prevent use of AH in a REX instruction by explicitly copying it to
2909  // an ABCD_L register.
2910  //
2911  // The current assumption of the register allocator is that isel
2912  // won't generate explicit references to the GR8_ABCD_H registers. If
2913  // the allocator and/or the backend get enhanced to be more robust in
2914  // that regard, this can be, and should be, removed.
2915  if (HiReg == X86::AH && !SDValue(Node, 1).use_empty()) {
2916  SDValue AHCopy = CurDAG->getRegister(X86::AH, MVT::i8);
2917  unsigned AHExtOpcode =
2918  isSigned ? X86::MOVSX32_NOREXrr8 : X86::MOVZX32_NOREXrr8;
2919 
2920  SDNode *RNode = CurDAG->getMachineNode(AHExtOpcode, dl, MVT::i32,
2921  MVT::Glue, AHCopy, InFlag);
2922  SDValue Result(RNode, 0);
2923  InFlag = SDValue(RNode, 1);
2924 
2925  if (Opcode == X86ISD::UDIVREM8_ZEXT_HREG ||
2926  Opcode == X86ISD::SDIVREM8_SEXT_HREG) {
2927  assert(Node->getValueType(1) == MVT::i32 && "Unexpected result type!");
2928  } else {
2929  Result =
2930  CurDAG->getTargetExtractSubreg(X86::sub_8bit, dl, MVT::i8, Result);
2931  }
2932  ReplaceUses(SDValue(Node, 1), Result);
2933  DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG); dbgs() << '\n');
2934  }
2935  // Copy the division (low) result, if it is needed.
2936  if (!SDValue(Node, 0).use_empty()) {
2937  SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
2938  LoReg, NVT, InFlag);
2939  InFlag = Result.getValue(2);
2940  ReplaceUses(SDValue(Node, 0), Result);
2941  DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG); dbgs() << '\n');
2942  }
2943  // Copy the remainder (high) result, if it is needed.
2944  if (!SDValue(Node, 1).use_empty()) {
2945  SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
2946  HiReg, NVT, InFlag);
2947  InFlag = Result.getValue(2);
2948  ReplaceUses(SDValue(Node, 1), Result);
2949  DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG); dbgs() << '\n');
2950  }
2951  CurDAG->RemoveDeadNode(Node);
2952  return;
2953  }
2954 
2955  case X86ISD::CMP:
2956  case X86ISD::SUB: {
2957  // Sometimes a SUB is used to perform comparison.
2958  if (Opcode == X86ISD::SUB && Node->hasAnyUseOfValue(0))
2959  // This node is not a CMP.
2960  break;
2961  SDValue N0 = Node->getOperand(0);
2962  SDValue N1 = Node->getOperand(1);
2963 
2964  if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() &&
2966  N0 = N0.getOperand(0);
2967 
2968  // Look for (X86cmp (and $op, $imm), 0) and see if we can convert it to
2969  // use a smaller encoding.
2970  // Look past the truncate if CMP is the only use of it.
2971  if ((N0.getOpcode() == ISD::AND ||
2972  (N0.getResNo() == 0 && N0.getOpcode() == X86ISD::AND)) &&
2973  N0.getNode()->hasOneUse() &&
2974  N0.getValueType() != MVT::i8 &&
2975  X86::isZeroNode(N1)) {
2977  if (!C) break;
2978  uint64_t Mask = C->getZExtValue();
2979 
2980  // For example, convert "testl %eax, $8" to "testb %al, $8"
2981  if (isUInt<8>(Mask) &&
2982  (!(Mask & 0x80) || hasNoSignedComparisonUses(Node))) {
2983  SDValue Imm = CurDAG->getTargetConstant(Mask, dl, MVT::i8);
2984  SDValue Reg = N0.getOperand(0);
2985 
2986  // Extract the l-register.
2987  SDValue Subreg = CurDAG->getTargetExtractSubreg(X86::sub_8bit, dl,
2988  MVT::i8, Reg);
2989 
2990  // Emit a testb.
2991  SDNode *NewNode = CurDAG->getMachineNode(X86::TEST8ri, dl, MVT::i32,
2992  Subreg, Imm);
2993  // Replace SUB|CMP with TEST, since SUB has two outputs while TEST has
2994  // one, do not call ReplaceAllUsesWith.
2995  ReplaceUses(SDValue(Node, (Opcode == X86ISD::SUB ? 1 : 0)),
2996  SDValue(NewNode, 0));
2997  CurDAG->RemoveDeadNode(Node);
2998  return;
2999  }
3000 
3001  // For example, "testl %eax, $2048" to "testb %ah, $8".
3002  if (isShiftedUInt<8, 8>(Mask) &&
3003  (!(Mask & 0x8000) || hasNoSignedComparisonUses(Node))) {
3004  // Shift the immediate right by 8 bits.
3005  SDValue ShiftedImm = CurDAG->getTargetConstant(Mask >> 8, dl, MVT::i8);
3006  SDValue Reg = N0.getOperand(0);
3007 
3008  // Extract the h-register.
3009  SDValue Subreg = CurDAG->getTargetExtractSubreg(X86::sub_8bit_hi, dl,
3010  MVT::i8, Reg);
3011 
3012  // Emit a testb. The EXTRACT_SUBREG becomes a COPY that can only
3013  // target GR8_NOREX registers, so make sure the register class is
3014  // forced.
3015  SDNode *NewNode = CurDAG->getMachineNode(X86::TEST8ri_NOREX, dl,
3016  MVT::i32, Subreg, ShiftedImm);
3017  // Replace SUB|CMP with TEST, since SUB has two outputs while TEST has
3018  // one, do not call ReplaceAllUsesWith.
3019  ReplaceUses(SDValue(Node, (Opcode == X86ISD::SUB ? 1 : 0)),
3020  SDValue(NewNode, 0));
3021  CurDAG->RemoveDeadNode(Node);
3022  return;
3023  }
3024 
3025  // For example, "testl %eax, $32776" to "testw %ax, $32776".
3026  // NOTE: We only want to form TESTW instructions if optimizing for
3027  // min size. Otherwise we only save one byte and possibly get a length
3028  // changing prefix penalty in the decoders.
3029  if (OptForMinSize && isUInt<16>(Mask) && N0.getValueType() != MVT::i16 &&
3030  (!(Mask & 0x8000) || hasNoSignedComparisonUses(Node))) {
3031  SDValue Imm = CurDAG->getTargetConstant(Mask, dl, MVT::i16);
3032  SDValue Reg = N0.getOperand(0);
3033 
3034  // Extract the 16-bit subregister.
3035  SDValue Subreg = CurDAG->getTargetExtractSubreg(X86::sub_16bit, dl,
3036  MVT::i16, Reg);
3037 
3038  // Emit a testw.
3039  SDNode *NewNode = CurDAG->getMachineNode(X86::TEST16ri, dl, MVT::i32,
3040  Subreg, Imm);
3041  // Replace SUB|CMP with TEST, since SUB has two outputs while TEST has
3042  // one, do not call ReplaceAllUsesWith.
3043  ReplaceUses(SDValue(Node, (Opcode == X86ISD::SUB ? 1 : 0)),
3044  SDValue(NewNode, 0));
3045  CurDAG->RemoveDeadNode(Node);
3046  return;
3047  }
3048 
3049  // For example, "testq %rax, $268468232" to "testl %eax, $268468232".
3050  if (isUInt<32>(Mask) && N0.getValueType() == MVT::i64 &&
3051  (!(Mask & 0x80000000) || hasNoSignedComparisonUses(Node))) {
3052  SDValue Imm = CurDAG->getTargetConstant(Mask, dl, MVT::i32);
3053  SDValue Reg = N0.getOperand(0);
3054 
3055  // Extract the 32-bit subregister.
3056  SDValue Subreg = CurDAG->getTargetExtractSubreg(X86::sub_32bit, dl,
3057  MVT::i32, Reg);
3058 
3059  // Emit a testl.
3060  SDNode *NewNode = CurDAG->getMachineNode(X86::TEST32ri, dl, MVT::i32,
3061  Subreg, Imm);
3062  // Replace SUB|CMP with TEST, since SUB has two outputs while TEST has
3063  // one, do not call ReplaceAllUsesWith.
3064  ReplaceUses(SDValue(Node, (Opcode == X86ISD::SUB ? 1 : 0)),
3065  SDValue(NewNode, 0));
3066  CurDAG->RemoveDeadNode(Node);
3067  return;
3068  }
3069  }
3070  break;
3071  }
3072  case ISD::STORE:
3073  if (foldLoadStoreIntoMemOperand(Node))
3074  return;
3075  break;
3076  }
3077 
3078  SelectCode(Node);
3079 }
3080 
3081 bool X86DAGToDAGISel::
3082 SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
3083  std::vector<SDValue> &OutOps) {
3084  SDValue Op0, Op1, Op2, Op3, Op4;
3085  switch (ConstraintID) {
3086  default:
3087  llvm_unreachable("Unexpected asm memory constraint");
3089  // FIXME: It seems strange that 'i' is needed here since it's supposed to
3090  // be an immediate and not a memory constraint.
3092  case InlineAsm::Constraint_o: // offsetable ??
3093  case InlineAsm::Constraint_v: // not offsetable ??
3094  case InlineAsm::Constraint_m: // memory
3096  if (!selectAddr(nullptr, Op, Op0, Op1, Op2, Op3, Op4))
3097  return true;
3098  break;
3099  }
3100 
3101  OutOps.push_back(Op0);
3102  OutOps.push_back(Op1);
3103  OutOps.push_back(Op2);
3104  OutOps.push_back(Op3);
3105  OutOps.push_back(Op4);
3106  return false;
3107 }
3108 
3109 /// This pass converts a legalized DAG into a X86-specific DAG,
3110 /// ready for instruction scheduling.
3112  CodeGenOpt::Level OptLevel) {
3113  return new X86DAGToDAGISel(TM, OptLevel);
3114 }
uint64_t CallInst * C
constexpr bool isUInt< 32 >(uint64_t x)
Definition: MathExtras.h:341
X = FP_ROUND(Y, TRUNC) - Rounding &#39;Y&#39; from a larger floating point type down to the precision of the ...
Definition: ISDOpcodes.h:512
static bool isCalleeLoad(SDValue Callee, SDValue &Chain, bool HasCallSeq)
Return true if call address is a load and it can be moved below CALLSEQ_START and the chains leading ...
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
EVT getValueType() const
Return the ValueType of the referenced return value.
Vector comparison generating mask bits for fp and integer signed and unsigned data types...
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
const SDValue & getOffset() const
APInt getSignedMax() const
Return the largest signed value contained in the ConstantRange.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
DELETED_NODE - This is an illegal value that is used to catch errors.
Definition: ISDOpcodes.h:42
Compute iterated dominance frontiers using a linear time algorithm.
Definition: AllocatorList.h:24
static void insertDAGNode(SelectionDAG &DAG, SDValue Pos, SDValue N)
static bool isLegalMaskCompare(SDNode *N, const X86Subtarget *Subtarget)
void setMemRefs(mmo_iterator NewMemRefs, mmo_iterator NewMemRefsEnd)
Assign this MachineSDNodes&#39;s memory reference descriptor list.
Tail call return.
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
Definition: MCSymbol.h:42
bool isVector() const
Return true if this is a vector value type.
const SDValue & getBasePtr() const
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
bool slt(const APInt &RHS) const
Signed less than comparison.
Definition: APInt.h:1183
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:253
const SDValue & getChain() const
constexpr bool isInt< 8 >(int64_t x)
Definition: MathExtras.h:295
ISD::MemIndexedMode getAddressingMode() const
Return the addressing mode for this load or store: unindexed, pre-inc, pre-dec, post-inc, or post-dec.
TargetGlobalAddress - Like GlobalAddress, but the DAG does no folding or anything else with this node...
Definition: ISDOpcodes.h:131
unsigned getAlignment() const
STATISTIC(NumFunctions, "Total number of functions")
MVT getSimpleValueType(unsigned ResNo) const
Return the type of a specified result as a simple type.
void setNodeId(int Id)
Set unique node id.
SDNode * getNode() const
get the SDNode which holds the desired result
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
std::size_t countLeadingZeros(T Val, ZeroBehavior ZB=ZB_Width)
Count number of 0&#39;s from the most significant bit to the least stopping at the first 1...
Definition: MathExtras.h:181
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition: ISDOpcodes.h:159
GlobalBaseReg - On Darwin, this node represents the result of the mflr at function entry...
unsigned getAddrSpace() const
Return the LLVM IR address space number that this pointer points into.
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition: ISDOpcodes.h:209
X86 compare and logical compare instructions.
void dump() const
Support for debugging, callable in GDB: V->dump()
Definition: AsmWriter.cpp:3641
The address of a basic block.
Definition: Constants.h:813
bool hasOneUse() const
Return true if there is exactly one use of this node.
A description of a memory reference used in the backend.
Shift and rotation operations.
Definition: ISDOpcodes.h:379
bool isNormalStore(const SDNode *N)
Returns true if the specified node is a non-truncating and unindexed store.
std::size_t countTrailingOnes(T Value, ZeroBehavior ZB=ZB_Width)
Count the number of ones from the least significant bit to the first zero bit.
Definition: MathExtras.h:470
CallLoweringInfo & setChain(SDValue InChain)
CopyToReg - This node has three operands: a chain, a register number to set to this value...
Definition: ISDOpcodes.h:170
op_iterator op_end() const
uint64_t getConstantOperandVal(unsigned i) const
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
Reg
All possible values of the reg field in the ModR/M byte.
SimpleValueType SimpleTy
unsigned getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition: ValueTypes.h:304
CALLSEQ_START/CALLSEQ_END - These operators mark the beginning and end of a call sequence, and carry arbitrary information that target might want to know.
Definition: ISDOpcodes.h:680
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
This represents a list of ValueType&#39;s that has been intern&#39;d by a SelectionDAG.
bool hasVLX() const
Definition: X86Subtarget.h:521
unsigned getSizeInBits() const
bool runOnMachineFunction(MachineFunction &MF) override
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
int64_t getSExtValue() const
unsigned getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:292
constexpr bool isMask_64(uint64_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
Definition: MathExtras.h:403
C - The default llvm calling convention, compatible with C.
Definition: CallingConv.h:35
#define T
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition: ISDOpcodes.h:398
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:200
op_iterator op_begin() const
amdgpu Simplify well known AMD library false Value * Callee
bool use_empty() const
Return true if there are no nodes using value ResNo of Node.
This class is used to represent ISD::STORE nodes.
static bool isFusableLoadOpStorePattern(StoreSDNode *StoreNode, SDValue StoredVal, SelectionDAG *CurDAG, LoadSDNode *&LoadNode, SDValue &InputChain)
Check whether or not the chain ending in StoreNode is suitable for doing the {load; op; store} to mod...
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Get a value with high bits set.
Definition: APInt.h:629
#define P(N)
const SDValue & getBasePtr() const
bool isNormalLoad(const SDNode *N)
Returns true if the specified node is a non-extending and unindexed load.
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition: ISDOpcodes.h:166
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
constexpr bool isUInt< 8 >(uint64_t x)
Definition: MathExtras.h:335
std::size_t countTrailingZeros(T Val, ZeroBehavior ZB=ZB_Width)
Count number of 0&#39;s from the least significant bit to the most stopping at the first 1...
Definition: MathExtras.h:112
use_iterator use_begin() const
Provide iteration support to walk over all uses of an SDNode.
LOCAL_RECOVER - Represents the llvm.localrecover intrinsic.
Definition: ISDOpcodes.h:81
Machine Value Type.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
static bool foldMaskedShiftToScaledMask(SelectionDAG &DAG, SDValue N, uint64_t Mask, SDValue Shift, SDValue X, X86ISelAddressMode &AM)
bool hasAnyUseOfValue(unsigned Value) const
Return true if there are any use of the indicated value.
unsigned getScalarSizeInBits() const
bool ult(const APInt &RHS) const
Unsigned less than comparison.
Definition: APInt.h:1164
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This is an important base class in LLVM.
Definition: Constant.h:42
const SDValue & getOperand(unsigned Num) const
bool isOperandOf(const SDNode *N) const
Return true if this node is an operand of N.
const SDValue & getOffset() const
APInt getUnsignedMax() const
Return the largest unsigned value contained in the ConstantRange.
static Type * getVoidTy(LLVMContext &C)
Definition: Type.cpp:161
This class provides iterator support for SDUse operands that use a specific SDNode.
void RepositionNode(allnodes_iterator Position, SDNode *N)
Move node N in the AllNodes list to be immediately before the given iterator Position.
static bool hasNoCarryFlagUses(SDNode *N)
Test whether the given node which sets flags has any uses which require the CF flag to be accurate...
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:285
On Darwin, this node represents the result of the popl at function entry, used for PIC code...
self_iterator getIterator()
Definition: ilist_node.h:82
bool hasNUsesOfValue(unsigned NUses, unsigned Value) const
Return true if there are exactly NUSES uses of the indicated value.
static bool foldMaskAndShiftToExtract(SelectionDAG &DAG, SDValue N, uint64_t Mask, SDValue Shift, SDValue X, X86ISelAddressMode &AM)
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:530
std::vector< ArgListEntry > ArgListTy
Extended Value Type.
Definition: ValueTypes.h:34
This structure contains all information that is necessary for lowering calls.
bool isVolatile() const
APInt getSignedMin() const
Return the smallest signed value contained in the ConstantRange.
This class contains a discriminated union of information about pointers in memory operands...
bool isMachineOpcode() const
Test if this node has a post-isel opcode, directly corresponding to a MachineInstr opcode...
unsigned getNumOperands() const
Return the number of values used by this operation.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
BaseType
A given derived pointer can have multiple base pointers through phi/selects.
These operations represent an abstract X86 call instruction, which includes a bunch of information...
TokenFactor - This node takes multiple tokens as input and produces a single token result...
Definition: ISDOpcodes.h:50
void dump() const
Dump this node, for debugging.
Iterator for intrusive lists based on ilist_node.
unsigned countPopulation(T Value)
Count the number of set bits in a value.
Definition: MathExtras.h:512
constexpr bool isInt< 32 >(int64_t x)
Definition: MathExtras.h:301
bool isScalarFPTypeInSSEReg(EVT VT) const
Return true if the specified scalar FP type is computed in an SSE register, not on the X87 floating p...
SDNode * UpdateNodeOperands(SDNode *N, SDValue Op)
Mutate the specified node in-place to have the specified operands.
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:209
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:864
const DataFlowGraph & G
Definition: RDFGraph.cpp:211
An SDNode that represents everything that will be needed to construct a MachineInstr.
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
SelectionDAGISel - This is the common base class used for SelectionDAG-based pattern-matching instruc...
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:923
static bool foldMaskAndShiftToScale(SelectionDAG &DAG, SDValue N, uint64_t Mask, SDValue Shift, SDValue X, X86ISelAddressMode &AM)
EVT getMemoryVT() const
Return the type of the in-memory value.
Target - Wrapper for Target specific information.
Class for arbitrary precision integers.
Definition: APInt.h:69
static unsigned getReg(const void *D, unsigned RC, unsigned RegNo)
bool sge(const APInt &RHS) const
Signed greather or equal comparison.
Definition: APInt.h:1288
static use_iterator use_end()
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:445
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:448
void append(in_iter in_start, in_iter in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:398
int getNodeId() const
Return the unique node id.
static bool isDispSafeForFrameIndex(int64_t Val)
std::pair< SDValue, SDValue > LowerCallTo(CallLoweringInfo &CLI) const
This function lowers an abstract call to a function into an actual call.
const SDValue & getValue() const
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:362
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition: ISDOpcodes.h:205
A wrapper node for TargetConstantPool, TargetJumpTable, TargetExternalSymbol, TargetGlobalAddress, TargetGlobalTLSAddress, MCSymbol and TargetBlockAddress.
FunctionPass * createX86ISelDag(X86TargetMachine &TM, CodeGenOpt::Level OptLevel)
This pass converts a legalized DAG into a X86-specific DAG, ready for instruction scheduling...
void computeKnownBits(SDValue Op, KnownBits &Known, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:581
void ReplaceAllUsesWith(SDValue From, SDValue Op)
Modify anything using &#39;From&#39; to use &#39;To&#39; instead.
#define I(x, y, z)
Definition: MD5.cpp:58
#define N
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:323
bool isNON_EXTLoad(const SDNode *N)
Returns true if the specified node is a non-extending load.
unsigned getOpcode() const
SDValue getValue(unsigned R) const
constexpr bool isUInt< 16 >(uint64_t x)
Definition: MathExtras.h:338
static bool hasNoSignedComparisonUses(SDNode *N)
Test whether the given X86ISD::CMP node has any uses which require the SF or OF bits to be accurate...
CopyFromReg - This node indicates that the input value is a virtual or physical register that is defi...
Definition: ISDOpcodes.h:175
SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
const MachinePointerInfo & getPointerInfo() const
const Function * getFunction() const
Definition: Function.h:134
This class is used to form a handle around another node that is persistent and is updated across invo...
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
unsigned getReg() const
uint64_t getConstantOperandVal(unsigned Num) const
Helper method returns the integer value of a ConstantSDNode operand.
unsigned getResNo() const
get the index which selects a specific result in the SDNode
Dynamic (non-constant condition) vector blend where only the sign bits of the condition elements are ...
#define LLVM_FALLTHROUGH
LLVM_FALLTHROUGH - Mark fallthrough cases in switch statements.
Definition: Compiler.h:235
std::underlying_type< E >::type Mask()
Get a bitmask with 1s in all places up to the high-order bit of E&#39;s largest value.
Definition: BitmaskEnum.h:81
bool isNonTemporal() const
#define DEBUG(X)
Definition: Debug.h:118
static bool hasSingleUsesFromRoot(SDNode *Root, SDNode *N)
IRTranslator LLVM IR MI
bool isZeroNode(SDValue Elt)
Returns true if Elt is a constant zero or floating point constant +0.0.
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:49
bool isOffsetSuitableForCodeModel(int64_t Offset, CodeModel::Model M, bool hasSymbolicDisplacement=true)
Returns true of the given offset can be fit into displacement field of the instruction.
unsigned getNumOperands() const
const SDValue & getOperand(unsigned i) const
uint64_t getZExtValue() const
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:451
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation...
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition: ISDOpcodes.h:350
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
static void moveBelowOrigChain(SelectionDAG *CurDAG, SDValue Load, SDValue Call, SDValue OrigChain)
Replace the original chain operand of the call with load&#39;s chain operand and move load below the call...
Carry-using nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:241
Special wrapper used under X86-64 PIC mode for RIP relative displacements.
BRIND - Indirect branch.
Definition: ISDOpcodes.h:601
This class is used to represent ISD::LOAD nodes.