LLVM  7.0.0svn
X86ISelDAGToDAG.cpp
Go to the documentation of this file.
1 //===- X86ISelDAGToDAG.cpp - A DAG pattern matching inst selector for X86 -===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file defines a DAG pattern matching instruction selector for X86,
11 // converting from a legalized dag to a X86 dag.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "X86.h"
16 #include "X86MachineFunctionInfo.h"
17 #include "X86RegisterInfo.h"
18 #include "X86Subtarget.h"
19 #include "X86TargetMachine.h"
20 #include "llvm/ADT/Statistic.h"
24 #include "llvm/IR/ConstantRange.h"
25 #include "llvm/IR/Function.h"
26 #include "llvm/IR/Instructions.h"
27 #include "llvm/IR/Intrinsics.h"
28 #include "llvm/IR/Type.h"
29 #include "llvm/Support/Debug.h"
31 #include "llvm/Support/KnownBits.h"
36 #include <stdint.h>
37 using namespace llvm;
38 
39 #define DEBUG_TYPE "x86-isel"
40 
41 STATISTIC(NumLoadMoved, "Number of loads moved below TokenFactor");
42 
43 //===----------------------------------------------------------------------===//
44 // Pattern Matcher Implementation
45 //===----------------------------------------------------------------------===//
46 
47 namespace {
48  /// This corresponds to X86AddressMode, but uses SDValue's instead of register
49  /// numbers for the leaves of the matched tree.
50  struct X86ISelAddressMode {
51  enum {
52  RegBase,
53  FrameIndexBase
54  } BaseType;
55 
56  // This is really a union, discriminated by BaseType!
57  SDValue Base_Reg;
58  int Base_FrameIndex;
59 
60  unsigned Scale;
61  SDValue IndexReg;
62  int32_t Disp;
63  SDValue Segment;
64  const GlobalValue *GV;
65  const Constant *CP;
66  const BlockAddress *BlockAddr;
67  const char *ES;
68  MCSymbol *MCSym;
69  int JT;
70  unsigned Align; // CP alignment.
71  unsigned char SymbolFlags; // X86II::MO_*
72 
73  X86ISelAddressMode()
74  : BaseType(RegBase), Base_FrameIndex(0), Scale(1), IndexReg(), Disp(0),
75  Segment(), GV(nullptr), CP(nullptr), BlockAddr(nullptr), ES(nullptr),
76  MCSym(nullptr), JT(-1), Align(0), SymbolFlags(X86II::MO_NO_FLAG) {}
77 
78  bool hasSymbolicDisplacement() const {
79  return GV != nullptr || CP != nullptr || ES != nullptr ||
80  MCSym != nullptr || JT != -1 || BlockAddr != nullptr;
81  }
82 
83  bool hasBaseOrIndexReg() const {
84  return BaseType == FrameIndexBase ||
85  IndexReg.getNode() != nullptr || Base_Reg.getNode() != nullptr;
86  }
87 
88  /// Return true if this addressing mode is already RIP-relative.
89  bool isRIPRelative() const {
90  if (BaseType != RegBase) return false;
91  if (RegisterSDNode *RegNode =
92  dyn_cast_or_null<RegisterSDNode>(Base_Reg.getNode()))
93  return RegNode->getReg() == X86::RIP;
94  return false;
95  }
96 
97  void setBaseReg(SDValue Reg) {
98  BaseType = RegBase;
99  Base_Reg = Reg;
100  }
101 
102 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
103  void dump(SelectionDAG *DAG = nullptr) {
104  dbgs() << "X86ISelAddressMode " << this << '\n';
105  dbgs() << "Base_Reg ";
106  if (Base_Reg.getNode())
107  Base_Reg.getNode()->dump(DAG);
108  else
109  dbgs() << "nul\n";
110  if (BaseType == FrameIndexBase)
111  dbgs() << " Base.FrameIndex " << Base_FrameIndex << '\n';
112  dbgs() << " Scale " << Scale << '\n'
113  << "IndexReg ";
114  if (IndexReg.getNode())
115  IndexReg.getNode()->dump(DAG);
116  else
117  dbgs() << "nul\n";
118  dbgs() << " Disp " << Disp << '\n'
119  << "GV ";
120  if (GV)
121  GV->dump();
122  else
123  dbgs() << "nul";
124  dbgs() << " CP ";
125  if (CP)
126  CP->dump();
127  else
128  dbgs() << "nul";
129  dbgs() << '\n'
130  << "ES ";
131  if (ES)
132  dbgs() << ES;
133  else
134  dbgs() << "nul";
135  dbgs() << " MCSym ";
136  if (MCSym)
137  dbgs() << MCSym;
138  else
139  dbgs() << "nul";
140  dbgs() << " JT" << JT << " Align" << Align << '\n';
141  }
142 #endif
143  };
144 }
145 
146 namespace {
147  //===--------------------------------------------------------------------===//
148  /// ISel - X86-specific code to select X86 machine instructions for
149  /// SelectionDAG operations.
150  ///
151  class X86DAGToDAGISel final : public SelectionDAGISel {
152  /// Keep a pointer to the X86Subtarget around so that we can
153  /// make the right decision when generating code for different targets.
154  const X86Subtarget *Subtarget;
155 
156  /// If true, selector should try to optimize for code size instead of
157  /// performance.
158  bool OptForSize;
159 
160  /// If true, selector should try to optimize for minimum code size.
161  bool OptForMinSize;
162 
163  public:
164  explicit X86DAGToDAGISel(X86TargetMachine &tm, CodeGenOpt::Level OptLevel)
165  : SelectionDAGISel(tm, OptLevel), OptForSize(false),
166  OptForMinSize(false) {}
167 
168  StringRef getPassName() const override {
169  return "X86 DAG->DAG Instruction Selection";
170  }
171 
172  bool runOnMachineFunction(MachineFunction &MF) override {
173  // Reset the subtarget each time through.
174  Subtarget = &MF.getSubtarget<X86Subtarget>();
176  return true;
177  }
178 
179  void EmitFunctionEntryCode() override;
180 
181  bool IsProfitableToFold(SDValue N, SDNode *U, SDNode *Root) const override;
182 
183  void PreprocessISelDAG() override;
184  void PostprocessISelDAG() override;
185 
186 // Include the pieces autogenerated from the target description.
187 #include "X86GenDAGISel.inc"
188 
189  private:
190  void Select(SDNode *N) override;
191 
192  bool foldOffsetIntoAddress(uint64_t Offset, X86ISelAddressMode &AM);
193  bool matchLoadInAddress(LoadSDNode *N, X86ISelAddressMode &AM);
194  bool matchWrapper(SDValue N, X86ISelAddressMode &AM);
195  bool matchAddress(SDValue N, X86ISelAddressMode &AM);
196  bool matchVectorAddress(SDValue N, X86ISelAddressMode &AM);
197  bool matchAdd(SDValue N, X86ISelAddressMode &AM, unsigned Depth);
198  bool matchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
199  unsigned Depth);
200  bool matchAddressBase(SDValue N, X86ISelAddressMode &AM);
201  bool selectAddr(SDNode *Parent, SDValue N, SDValue &Base,
202  SDValue &Scale, SDValue &Index, SDValue &Disp,
203  SDValue &Segment);
204  bool selectVectorAddr(SDNode *Parent, SDValue N, SDValue &Base,
205  SDValue &Scale, SDValue &Index, SDValue &Disp,
206  SDValue &Segment);
207  bool selectMOV64Imm32(SDValue N, SDValue &Imm);
208  bool selectLEAAddr(SDValue N, SDValue &Base,
209  SDValue &Scale, SDValue &Index, SDValue &Disp,
210  SDValue &Segment);
211  bool selectLEA64_32Addr(SDValue N, SDValue &Base,
212  SDValue &Scale, SDValue &Index, SDValue &Disp,
213  SDValue &Segment);
214  bool selectTLSADDRAddr(SDValue N, SDValue &Base,
215  SDValue &Scale, SDValue &Index, SDValue &Disp,
216  SDValue &Segment);
217  bool selectScalarSSELoad(SDNode *Root, SDValue N,
218  SDValue &Base, SDValue &Scale,
219  SDValue &Index, SDValue &Disp,
220  SDValue &Segment,
221  SDValue &NodeWithChain);
222  bool selectRelocImm(SDValue N, SDValue &Op);
223 
224  bool tryFoldLoad(SDNode *Root, SDNode *P, SDValue N,
225  SDValue &Base, SDValue &Scale,
226  SDValue &Index, SDValue &Disp,
227  SDValue &Segment);
228 
229  // Convience method where P is also root.
230  bool tryFoldLoad(SDNode *P, SDValue N,
231  SDValue &Base, SDValue &Scale,
232  SDValue &Index, SDValue &Disp,
233  SDValue &Segment) {
234  return tryFoldLoad(P, P, N, Base, Scale, Index, Disp, Segment);
235  }
236 
237  /// Implement addressing mode selection for inline asm expressions.
238  bool SelectInlineAsmMemoryOperand(const SDValue &Op,
239  unsigned ConstraintID,
240  std::vector<SDValue> &OutOps) override;
241 
242  void emitSpecialCodeForMain();
243 
244  inline void getAddressOperands(X86ISelAddressMode &AM, const SDLoc &DL,
245  SDValue &Base, SDValue &Scale,
246  SDValue &Index, SDValue &Disp,
247  SDValue &Segment) {
248  Base = (AM.BaseType == X86ISelAddressMode::FrameIndexBase)
249  ? CurDAG->getTargetFrameIndex(
250  AM.Base_FrameIndex,
251  TLI->getPointerTy(CurDAG->getDataLayout()))
252  : AM.Base_Reg;
253  Scale = getI8Imm(AM.Scale, DL);
254  Index = AM.IndexReg;
255  // These are 32-bit even in 64-bit mode since RIP-relative offset
256  // is 32-bit.
257  if (AM.GV)
258  Disp = CurDAG->getTargetGlobalAddress(AM.GV, SDLoc(),
259  MVT::i32, AM.Disp,
260  AM.SymbolFlags);
261  else if (AM.CP)
262  Disp = CurDAG->getTargetConstantPool(AM.CP, MVT::i32,
263  AM.Align, AM.Disp, AM.SymbolFlags);
264  else if (AM.ES) {
265  assert(!AM.Disp && "Non-zero displacement is ignored with ES.");
266  Disp = CurDAG->getTargetExternalSymbol(AM.ES, MVT::i32, AM.SymbolFlags);
267  } else if (AM.MCSym) {
268  assert(!AM.Disp && "Non-zero displacement is ignored with MCSym.");
269  assert(AM.SymbolFlags == 0 && "oo");
270  Disp = CurDAG->getMCSymbol(AM.MCSym, MVT::i32);
271  } else if (AM.JT != -1) {
272  assert(!AM.Disp && "Non-zero displacement is ignored with JT.");
273  Disp = CurDAG->getTargetJumpTable(AM.JT, MVT::i32, AM.SymbolFlags);
274  } else if (AM.BlockAddr)
275  Disp = CurDAG->getTargetBlockAddress(AM.BlockAddr, MVT::i32, AM.Disp,
276  AM.SymbolFlags);
277  else
278  Disp = CurDAG->getTargetConstant(AM.Disp, DL, MVT::i32);
279 
280  if (AM.Segment.getNode())
281  Segment = AM.Segment;
282  else
283  Segment = CurDAG->getRegister(0, MVT::i32);
284  }
285 
286  // Utility function to determine whether we should avoid selecting
287  // immediate forms of instructions for better code size or not.
288  // At a high level, we'd like to avoid such instructions when
289  // we have similar constants used within the same basic block
290  // that can be kept in a register.
291  //
292  bool shouldAvoidImmediateInstFormsForSize(SDNode *N) const {
293  uint32_t UseCount = 0;
294 
295  // Do not want to hoist if we're not optimizing for size.
296  // TODO: We'd like to remove this restriction.
297  // See the comment in X86InstrInfo.td for more info.
298  if (!OptForSize)
299  return false;
300 
301  // Walk all the users of the immediate.
302  for (SDNode::use_iterator UI = N->use_begin(),
303  UE = N->use_end(); (UI != UE) && (UseCount < 2); ++UI) {
304 
305  SDNode *User = *UI;
306 
307  // This user is already selected. Count it as a legitimate use and
308  // move on.
309  if (User->isMachineOpcode()) {
310  UseCount++;
311  continue;
312  }
313 
314  // We want to count stores of immediates as real uses.
315  if (User->getOpcode() == ISD::STORE &&
316  User->getOperand(1).getNode() == N) {
317  UseCount++;
318  continue;
319  }
320 
321  // We don't currently match users that have > 2 operands (except
322  // for stores, which are handled above)
323  // Those instruction won't match in ISEL, for now, and would
324  // be counted incorrectly.
325  // This may change in the future as we add additional instruction
326  // types.
327  if (User->getNumOperands() != 2)
328  continue;
329 
330  // Immediates that are used for offsets as part of stack
331  // manipulation should be left alone. These are typically
332  // used to indicate SP offsets for argument passing and
333  // will get pulled into stores/pushes (implicitly).
334  if (User->getOpcode() == X86ISD::ADD ||
335  User->getOpcode() == ISD::ADD ||
336  User->getOpcode() == X86ISD::SUB ||
337  User->getOpcode() == ISD::SUB) {
338 
339  // Find the other operand of the add/sub.
340  SDValue OtherOp = User->getOperand(0);
341  if (OtherOp.getNode() == N)
342  OtherOp = User->getOperand(1);
343 
344  // Don't count if the other operand is SP.
345  RegisterSDNode *RegNode;
346  if (OtherOp->getOpcode() == ISD::CopyFromReg &&
347  (RegNode = dyn_cast_or_null<RegisterSDNode>(
348  OtherOp->getOperand(1).getNode())))
349  if ((RegNode->getReg() == X86::ESP) ||
350  (RegNode->getReg() == X86::RSP))
351  continue;
352  }
353 
354  // ... otherwise, count this and move on.
355  UseCount++;
356  }
357 
358  // If we have more than 1 use, then recommend for hoisting.
359  return (UseCount > 1);
360  }
361 
362  /// Return a target constant with the specified value of type i8.
363  inline SDValue getI8Imm(unsigned Imm, const SDLoc &DL) {
364  return CurDAG->getTargetConstant(Imm, DL, MVT::i8);
365  }
366 
367  /// Return a target constant with the specified value, of type i32.
368  inline SDValue getI32Imm(unsigned Imm, const SDLoc &DL) {
369  return CurDAG->getTargetConstant(Imm, DL, MVT::i32);
370  }
371 
372  /// Return a target constant with the specified value, of type i64.
373  inline SDValue getI64Imm(uint64_t Imm, const SDLoc &DL) {
374  return CurDAG->getTargetConstant(Imm, DL, MVT::i64);
375  }
376 
377  SDValue getExtractVEXTRACTImmediate(SDNode *N, unsigned VecWidth,
378  const SDLoc &DL) {
379  assert((VecWidth == 128 || VecWidth == 256) && "Unexpected vector width");
380  uint64_t Index = N->getConstantOperandVal(1);
381  MVT VecVT = N->getOperand(0).getSimpleValueType();
382  return getI8Imm((Index * VecVT.getScalarSizeInBits()) / VecWidth, DL);
383  }
384 
385  SDValue getInsertVINSERTImmediate(SDNode *N, unsigned VecWidth,
386  const SDLoc &DL) {
387  assert((VecWidth == 128 || VecWidth == 256) && "Unexpected vector width");
388  uint64_t Index = N->getConstantOperandVal(2);
389  MVT VecVT = N->getSimpleValueType(0);
390  return getI8Imm((Index * VecVT.getScalarSizeInBits()) / VecWidth, DL);
391  }
392 
393  /// Return an SDNode that returns the value of the global base register.
394  /// Output instructions required to initialize the global base register,
395  /// if necessary.
396  SDNode *getGlobalBaseReg();
397 
398  /// Return a reference to the TargetMachine, casted to the target-specific
399  /// type.
400  const X86TargetMachine &getTargetMachine() const {
401  return static_cast<const X86TargetMachine &>(TM);
402  }
403 
404  /// Return a reference to the TargetInstrInfo, casted to the target-specific
405  /// type.
406  const X86InstrInfo *getInstrInfo() const {
407  return Subtarget->getInstrInfo();
408  }
409 
410  /// \brief Address-mode matching performs shift-of-and to and-of-shift
411  /// reassociation in order to expose more scaled addressing
412  /// opportunities.
413  bool ComplexPatternFuncMutatesDAG() const override {
414  return true;
415  }
416 
417  bool isSExtAbsoluteSymbolRef(unsigned Width, SDNode *N) const;
418 
419  /// Returns whether this is a relocatable immediate in the range
420  /// [-2^Width .. 2^Width-1].
421  template <unsigned Width> bool isSExtRelocImm(SDNode *N) const {
422  if (auto *CN = dyn_cast<ConstantSDNode>(N))
423  return isInt<Width>(CN->getSExtValue());
424  return isSExtAbsoluteSymbolRef(Width, N);
425  }
426 
427  // Indicates we should prefer to use a non-temporal load for this load.
428  bool useNonTemporalLoad(LoadSDNode *N) const {
429  if (!N->isNonTemporal())
430  return false;
431 
432  unsigned StoreSize = N->getMemoryVT().getStoreSize();
433 
434  if (N->getAlignment() < StoreSize)
435  return false;
436 
437  switch (StoreSize) {
438  default: llvm_unreachable("Unsupported store size");
439  case 16:
440  return Subtarget->hasSSE41();
441  case 32:
442  return Subtarget->hasAVX2();
443  case 64:
444  return Subtarget->hasAVX512();
445  }
446  }
447 
448  bool foldLoadStoreIntoMemOperand(SDNode *Node);
449  bool matchBEXTRFromAnd(SDNode *Node);
450  bool shrinkAndImmediate(SDNode *N);
451  bool isMaskZeroExtended(SDNode *N) const;
452  };
453 }
454 
455 
456 // Returns true if this masked compare can be implemented legally with this
457 // type.
458 static bool isLegalMaskCompare(SDNode *N, const X86Subtarget *Subtarget) {
459  unsigned Opcode = N->getOpcode();
460  if (Opcode == X86ISD::CMPM || Opcode == X86ISD::CMPMU ||
461  Opcode == X86ISD::CMPM_RND || Opcode == X86ISD::VFPCLASS) {
462  // We can get 256-bit 8 element types here without VLX being enabled. When
463  // this happens we will use 512-bit operations and the mask will not be
464  // zero extended.
465  EVT OpVT = N->getOperand(0).getValueType();
466  if (OpVT.is256BitVector() || OpVT.is128BitVector())
467  return Subtarget->hasVLX();
468 
469  return true;
470  }
471  // Scalar opcodes use 128 bit registers, but aren't subject to the VLX check.
472  if (Opcode == X86ISD::VFPCLASSS || Opcode == X86ISD::FSETCCM ||
473  Opcode == X86ISD::FSETCCM_RND)
474  return true;
475 
476  return false;
477 }
478 
479 // Returns true if we can assume the writer of the mask has zero extended it
480 // for us.
481 bool X86DAGToDAGISel::isMaskZeroExtended(SDNode *N) const {
482  // If this is an AND, check if we have a compare on either side. As long as
483  // one side guarantees the mask is zero extended, the AND will preserve those
484  // zeros.
485  if (N->getOpcode() == ISD::AND)
486  return isLegalMaskCompare(N->getOperand(0).getNode(), Subtarget) ||
487  isLegalMaskCompare(N->getOperand(1).getNode(), Subtarget);
488 
489  return isLegalMaskCompare(N, Subtarget);
490 }
491 
492 bool
493 X86DAGToDAGISel::IsProfitableToFold(SDValue N, SDNode *U, SDNode *Root) const {
494  if (OptLevel == CodeGenOpt::None) return false;
495 
496  if (!N.hasOneUse())
497  return false;
498 
499  if (N.getOpcode() != ISD::LOAD)
500  return true;
501 
502  // If N is a load, do additional profitability checks.
503  if (U == Root) {
504  switch (U->getOpcode()) {
505  default: break;
506  case X86ISD::ADD:
507  case X86ISD::SUB:
508  case X86ISD::AND:
509  case X86ISD::XOR:
510  case X86ISD::OR:
511  case ISD::ADD:
512  case ISD::ADDCARRY:
513  case ISD::AND:
514  case ISD::OR:
515  case ISD::XOR: {
516  SDValue Op1 = U->getOperand(1);
517 
518  // If the other operand is a 8-bit immediate we should fold the immediate
519  // instead. This reduces code size.
520  // e.g.
521  // movl 4(%esp), %eax
522  // addl $4, %eax
523  // vs.
524  // movl $4, %eax
525  // addl 4(%esp), %eax
526  // The former is 2 bytes shorter. In case where the increment is 1, then
527  // the saving can be 4 bytes (by using incl %eax).
528  if (ConstantSDNode *Imm = dyn_cast<ConstantSDNode>(Op1)) {
529  if (Imm->getAPIntValue().isSignedIntN(8))
530  return false;
531 
532  // If this is a 64-bit AND with an immediate that fits in 32-bits,
533  // prefer using the smaller and over folding the load. This is needed to
534  // make sure immediates created by shrinkAndImmediate are always folded.
535  // Ideally we would narrow the load during DAG combine and get the
536  // best of both worlds.
537  if (U->getOpcode() == ISD::AND &&
538  Imm->getAPIntValue().getBitWidth() == 64 &&
539  Imm->getAPIntValue().isIntN(32))
540  return false;
541  }
542 
543  // If the other operand is a TLS address, we should fold it instead.
544  // This produces
545  // movl %gs:0, %eax
546  // leal i@NTPOFF(%eax), %eax
547  // instead of
548  // movl $i@NTPOFF, %eax
549  // addl %gs:0, %eax
550  // if the block also has an access to a second TLS address this will save
551  // a load.
552  // FIXME: This is probably also true for non-TLS addresses.
553  if (Op1.getOpcode() == X86ISD::Wrapper) {
554  SDValue Val = Op1.getOperand(0);
556  return false;
557  }
558  }
559  }
560  }
561 
562  return true;
563 }
564 
565 /// Replace the original chain operand of the call with
566 /// load's chain operand and move load below the call's chain operand.
568  SDValue Call, SDValue OrigChain) {
570  SDValue Chain = OrigChain.getOperand(0);
571  if (Chain.getNode() == Load.getNode())
572  Ops.push_back(Load.getOperand(0));
573  else {
574  assert(Chain.getOpcode() == ISD::TokenFactor &&
575  "Unexpected chain operand");
576  for (unsigned i = 0, e = Chain.getNumOperands(); i != e; ++i)
577  if (Chain.getOperand(i).getNode() == Load.getNode())
578  Ops.push_back(Load.getOperand(0));
579  else
580  Ops.push_back(Chain.getOperand(i));
581  SDValue NewChain =
582  CurDAG->getNode(ISD::TokenFactor, SDLoc(Load), MVT::Other, Ops);
583  Ops.clear();
584  Ops.push_back(NewChain);
585  }
586  Ops.append(OrigChain->op_begin() + 1, OrigChain->op_end());
587  CurDAG->UpdateNodeOperands(OrigChain.getNode(), Ops);
588  CurDAG->UpdateNodeOperands(Load.getNode(), Call.getOperand(0),
589  Load.getOperand(1), Load.getOperand(2));
590 
591  Ops.clear();
592  Ops.push_back(SDValue(Load.getNode(), 1));
593  Ops.append(Call->op_begin() + 1, Call->op_end());
594  CurDAG->UpdateNodeOperands(Call.getNode(), Ops);
595 }
596 
597 /// Return true if call address is a load and it can be
598 /// moved below CALLSEQ_START and the chains leading up to the call.
599 /// Return the CALLSEQ_START by reference as a second output.
600 /// In the case of a tail call, there isn't a callseq node between the call
601 /// chain and the load.
602 static bool isCalleeLoad(SDValue Callee, SDValue &Chain, bool HasCallSeq) {
603  // The transformation is somewhat dangerous if the call's chain was glued to
604  // the call. After MoveBelowOrigChain the load is moved between the call and
605  // the chain, this can create a cycle if the load is not folded. So it is
606  // *really* important that we are sure the load will be folded.
607  if (Callee.getNode() == Chain.getNode() || !Callee.hasOneUse())
608  return false;
609  LoadSDNode *LD = dyn_cast<LoadSDNode>(Callee.getNode());
610  if (!LD ||
611  LD->isVolatile() ||
614  return false;
615 
616  // Now let's find the callseq_start.
617  while (HasCallSeq && Chain.getOpcode() != ISD::CALLSEQ_START) {
618  if (!Chain.hasOneUse())
619  return false;
620  Chain = Chain.getOperand(0);
621  }
622 
623  if (!Chain.getNumOperands())
624  return false;
625  // Since we are not checking for AA here, conservatively abort if the chain
626  // writes to memory. It's not safe to move the callee (a load) across a store.
627  if (isa<MemSDNode>(Chain.getNode()) &&
628  cast<MemSDNode>(Chain.getNode())->writeMem())
629  return false;
630  if (Chain.getOperand(0).getNode() == Callee.getNode())
631  return true;
632  if (Chain.getOperand(0).getOpcode() == ISD::TokenFactor &&
633  Callee.getValue(1).isOperandOf(Chain.getOperand(0).getNode()) &&
634  Callee.getValue(1).hasOneUse())
635  return true;
636  return false;
637 }
638 
639 void X86DAGToDAGISel::PreprocessISelDAG() {
640  // OptFor[Min]Size are used in pattern predicates that isel is matching.
641  OptForSize = MF->getFunction().optForSize();
642  OptForMinSize = MF->getFunction().optForMinSize();
643  assert((!OptForMinSize || OptForSize) && "OptForMinSize implies OptForSize");
644 
645  for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(),
646  E = CurDAG->allnodes_end(); I != E; ) {
647  SDNode *N = &*I++; // Preincrement iterator to avoid invalidation issues.
648 
649  // If this is a target specific AND node with no flag usages, turn it back
650  // into ISD::AND to enable test instruction matching.
651  if (N->getOpcode() == X86ISD::AND && !N->hasAnyUseOfValue(1)) {
652  SDValue Res = CurDAG->getNode(ISD::AND, SDLoc(N), N->getValueType(0),
653  N->getOperand(0), N->getOperand(1));
654  --I;
655  CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Res);
656  ++I;
657  CurDAG->DeleteNode(N);
658  }
659 
660  if (OptLevel != CodeGenOpt::None &&
661  // Only do this when the target can fold the load into the call or
662  // jmp.
663  !Subtarget->useRetpoline() &&
664  ((N->getOpcode() == X86ISD::CALL && !Subtarget->slowTwoMemOps()) ||
665  (N->getOpcode() == X86ISD::TC_RETURN &&
666  (Subtarget->is64Bit() ||
667  !getTargetMachine().isPositionIndependent())))) {
668  /// Also try moving call address load from outside callseq_start to just
669  /// before the call to allow it to be folded.
670  ///
671  /// [Load chain]
672  /// ^
673  /// |
674  /// [Load]
675  /// ^ ^
676  /// | |
677  /// / \--
678  /// / |
679  ///[CALLSEQ_START] |
680  /// ^ |
681  /// | |
682  /// [LOAD/C2Reg] |
683  /// | |
684  /// \ /
685  /// \ /
686  /// [CALL]
687  bool HasCallSeq = N->getOpcode() == X86ISD::CALL;
688  SDValue Chain = N->getOperand(0);
689  SDValue Load = N->getOperand(1);
690  if (!isCalleeLoad(Load, Chain, HasCallSeq))
691  continue;
692  moveBelowOrigChain(CurDAG, Load, SDValue(N, 0), Chain);
693  ++NumLoadMoved;
694  continue;
695  }
696 
697  // Lower fpround and fpextend nodes that target the FP stack to be store and
698  // load to the stack. This is a gross hack. We would like to simply mark
699  // these as being illegal, but when we do that, legalize produces these when
700  // it expands calls, then expands these in the same legalize pass. We would
701  // like dag combine to be able to hack on these between the call expansion
702  // and the node legalization. As such this pass basically does "really
703  // late" legalization of these inline with the X86 isel pass.
704  // FIXME: This should only happen when not compiled with -O0.
705  if (N->getOpcode() != ISD::FP_ROUND && N->getOpcode() != ISD::FP_EXTEND)
706  continue;
707 
708  MVT SrcVT = N->getOperand(0).getSimpleValueType();
709  MVT DstVT = N->getSimpleValueType(0);
710 
711  // If any of the sources are vectors, no fp stack involved.
712  if (SrcVT.isVector() || DstVT.isVector())
713  continue;
714 
715  // If the source and destination are SSE registers, then this is a legal
716  // conversion that should not be lowered.
717  const X86TargetLowering *X86Lowering =
718  static_cast<const X86TargetLowering *>(TLI);
719  bool SrcIsSSE = X86Lowering->isScalarFPTypeInSSEReg(SrcVT);
720  bool DstIsSSE = X86Lowering->isScalarFPTypeInSSEReg(DstVT);
721  if (SrcIsSSE && DstIsSSE)
722  continue;
723 
724  if (!SrcIsSSE && !DstIsSSE) {
725  // If this is an FPStack extension, it is a noop.
726  if (N->getOpcode() == ISD::FP_EXTEND)
727  continue;
728  // If this is a value-preserving FPStack truncation, it is a noop.
729  if (N->getConstantOperandVal(1))
730  continue;
731  }
732 
733  // Here we could have an FP stack truncation or an FPStack <-> SSE convert.
734  // FPStack has extload and truncstore. SSE can fold direct loads into other
735  // operations. Based on this, decide what we want to do.
736  MVT MemVT;
737  if (N->getOpcode() == ISD::FP_ROUND)
738  MemVT = DstVT; // FP_ROUND must use DstVT, we can't do a 'trunc load'.
739  else
740  MemVT = SrcIsSSE ? SrcVT : DstVT;
741 
742  SDValue MemTmp = CurDAG->CreateStackTemporary(MemVT);
743  SDLoc dl(N);
744 
745  // FIXME: optimize the case where the src/dest is a load or store?
746  SDValue Store =
747  CurDAG->getTruncStore(CurDAG->getEntryNode(), dl, N->getOperand(0),
748  MemTmp, MachinePointerInfo(), MemVT);
749  SDValue Result = CurDAG->getExtLoad(ISD::EXTLOAD, dl, DstVT, Store, MemTmp,
750  MachinePointerInfo(), MemVT);
751 
752  // We're about to replace all uses of the FP_ROUND/FP_EXTEND with the
753  // extload we created. This will cause general havok on the dag because
754  // anything below the conversion could be folded into other existing nodes.
755  // To avoid invalidating 'I', back it up to the convert node.
756  --I;
757  CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
758 
759  // Now that we did that, the node is dead. Increment the iterator to the
760  // next node to process, then delete N.
761  ++I;
762  CurDAG->DeleteNode(N);
763  }
764 }
765 
766 
767 void X86DAGToDAGISel::PostprocessISelDAG() {
768  // Skip peepholes at -O0.
769  if (TM.getOptLevel() == CodeGenOpt::None)
770  return;
771 
772  // Attempt to remove vectors moves that were inserted to zero upper bits.
773 
774  SelectionDAG::allnodes_iterator Position(CurDAG->getRoot().getNode());
775  ++Position;
776 
777  while (Position != CurDAG->allnodes_begin()) {
778  SDNode *N = &*--Position;
779  // Skip dead nodes and any non-machine opcodes.
780  if (N->use_empty() || !N->isMachineOpcode())
781  continue;
782 
783  if (N->getMachineOpcode() != TargetOpcode::SUBREG_TO_REG)
784  continue;
785 
786  unsigned SubRegIdx = N->getConstantOperandVal(2);
787  if (SubRegIdx != X86::sub_xmm && SubRegIdx != X86::sub_ymm)
788  continue;
789 
790  SDValue Move = N->getOperand(1);
791  if (!Move.isMachineOpcode())
792  continue;
793 
794  // Make sure its one of the move opcodes we recognize.
795  switch (Move.getMachineOpcode()) {
796  default:
797  continue;
798  case X86::VMOVAPDrr: case X86::VMOVUPDrr:
799  case X86::VMOVAPSrr: case X86::VMOVUPSrr:
800  case X86::VMOVDQArr: case X86::VMOVDQUrr:
801  case X86::VMOVAPDYrr: case X86::VMOVUPDYrr:
802  case X86::VMOVAPSYrr: case X86::VMOVUPSYrr:
803  case X86::VMOVDQAYrr: case X86::VMOVDQUYrr:
804  case X86::VMOVAPDZ128rr: case X86::VMOVUPDZ128rr:
805  case X86::VMOVAPSZ128rr: case X86::VMOVUPSZ128rr:
806  case X86::VMOVDQA32Z128rr: case X86::VMOVDQU32Z128rr:
807  case X86::VMOVDQA64Z128rr: case X86::VMOVDQU64Z128rr:
808  case X86::VMOVAPDZ256rr: case X86::VMOVUPDZ256rr:
809  case X86::VMOVAPSZ256rr: case X86::VMOVUPSZ256rr:
810  case X86::VMOVDQA32Z256rr: case X86::VMOVDQU32Z256rr:
811  case X86::VMOVDQA64Z256rr: case X86::VMOVDQU64Z256rr:
812  break;
813  }
814 
815  SDValue In = Move.getOperand(0);
816  if (!In.isMachineOpcode() ||
817  In.getMachineOpcode() <= TargetOpcode::GENERIC_OP_END)
818  continue;
819 
820  // Producing instruction is another vector instruction. We can drop the
821  // move.
822  CurDAG->UpdateNodeOperands(N, N->getOperand(0), In, N->getOperand(2));
823 
824  // If the move is now dead, delete it.
825  if (Move.getNode()->use_empty())
826  CurDAG->RemoveDeadNode(Move.getNode());
827  }
828 }
829 
830 
831 /// Emit any code that needs to be executed only in the main function.
832 void X86DAGToDAGISel::emitSpecialCodeForMain() {
833  if (Subtarget->isTargetCygMing()) {
835  auto &DL = CurDAG->getDataLayout();
836 
838  CLI.setChain(CurDAG->getRoot())
839  .setCallee(CallingConv::C, Type::getVoidTy(*CurDAG->getContext()),
840  CurDAG->getExternalSymbol("__main", TLI->getPointerTy(DL)),
841  std::move(Args));
842  const TargetLowering &TLI = CurDAG->getTargetLoweringInfo();
843  std::pair<SDValue, SDValue> Result = TLI.LowerCallTo(CLI);
844  CurDAG->setRoot(Result.second);
845  }
846 }
847 
848 void X86DAGToDAGISel::EmitFunctionEntryCode() {
849  // If this is main, emit special code for main.
850  const Function &F = MF->getFunction();
851  if (F.hasExternalLinkage() && F.getName() == "main")
852  emitSpecialCodeForMain();
853 }
854 
855 static bool isDispSafeForFrameIndex(int64_t Val) {
856  // On 64-bit platforms, we can run into an issue where a frame index
857  // includes a displacement that, when added to the explicit displacement,
858  // will overflow the displacement field. Assuming that the frame index
859  // displacement fits into a 31-bit integer (which is only slightly more
860  // aggressive than the current fundamental assumption that it fits into
861  // a 32-bit integer), a 31-bit disp should always be safe.
862  return isInt<31>(Val);
863 }
864 
865 bool X86DAGToDAGISel::foldOffsetIntoAddress(uint64_t Offset,
866  X86ISelAddressMode &AM) {
867  // Cannot combine ExternalSymbol displacements with integer offsets.
868  if (Offset != 0 && (AM.ES || AM.MCSym))
869  return true;
870  int64_t Val = AM.Disp + Offset;
871  CodeModel::Model M = TM.getCodeModel();
872  if (Subtarget->is64Bit()) {
874  AM.hasSymbolicDisplacement()))
875  return true;
876  // In addition to the checks required for a register base, check that
877  // we do not try to use an unsafe Disp with a frame index.
878  if (AM.BaseType == X86ISelAddressMode::FrameIndexBase &&
880  return true;
881  }
882  AM.Disp = Val;
883  return false;
884 
885 }
886 
887 bool X86DAGToDAGISel::matchLoadInAddress(LoadSDNode *N, X86ISelAddressMode &AM){
888  SDValue Address = N->getOperand(1);
889 
890  // load gs:0 -> GS segment register.
891  // load fs:0 -> FS segment register.
892  //
893  // This optimization is valid because the GNU TLS model defines that
894  // gs:0 (or fs:0 on X86-64) contains its own address.
895  // For more information see http://people.redhat.com/drepper/tls.pdf
896  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Address))
897  if (C->getSExtValue() == 0 && AM.Segment.getNode() == nullptr &&
898  (Subtarget->isTargetGlibc() || Subtarget->isTargetAndroid() ||
899  Subtarget->isTargetFuchsia()))
900  switch (N->getPointerInfo().getAddrSpace()) {
901  case 256:
902  AM.Segment = CurDAG->getRegister(X86::GS, MVT::i16);
903  return false;
904  case 257:
905  AM.Segment = CurDAG->getRegister(X86::FS, MVT::i16);
906  return false;
907  // Address space 258 is not handled here, because it is not used to
908  // address TLS areas.
909  }
910 
911  return true;
912 }
913 
914 /// Try to match X86ISD::Wrapper and X86ISD::WrapperRIP nodes into an addressing
915 /// mode. These wrap things that will resolve down into a symbol reference.
916 /// If no match is possible, this returns true, otherwise it returns false.
917 bool X86DAGToDAGISel::matchWrapper(SDValue N, X86ISelAddressMode &AM) {
918  // If the addressing mode already has a symbol as the displacement, we can
919  // never match another symbol.
920  if (AM.hasSymbolicDisplacement())
921  return true;
922 
923  SDValue N0 = N.getOperand(0);
924  CodeModel::Model M = TM.getCodeModel();
925 
926  // Handle X86-64 rip-relative addresses. We check this before checking direct
927  // folding because RIP is preferable to non-RIP accesses.
928  if (Subtarget->is64Bit() && N.getOpcode() == X86ISD::WrapperRIP &&
929  // Under X86-64 non-small code model, GV (and friends) are 64-bits, so
930  // they cannot be folded into immediate fields.
931  // FIXME: This can be improved for kernel and other models?
932  (M == CodeModel::Small || M == CodeModel::Kernel)) {
933  // Base and index reg must be 0 in order to use %rip as base.
934  if (AM.hasBaseOrIndexReg())
935  return true;
936  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(N0)) {
937  X86ISelAddressMode Backup = AM;
938  AM.GV = G->getGlobal();
939  AM.SymbolFlags = G->getTargetFlags();
940  if (foldOffsetIntoAddress(G->getOffset(), AM)) {
941  AM = Backup;
942  return true;
943  }
944  } else if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(N0)) {
945  X86ISelAddressMode Backup = AM;
946  AM.CP = CP->getConstVal();
947  AM.Align = CP->getAlignment();
948  AM.SymbolFlags = CP->getTargetFlags();
949  if (foldOffsetIntoAddress(CP->getOffset(), AM)) {
950  AM = Backup;
951  return true;
952  }
953  } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(N0)) {
954  AM.ES = S->getSymbol();
955  AM.SymbolFlags = S->getTargetFlags();
956  } else if (auto *S = dyn_cast<MCSymbolSDNode>(N0)) {
957  AM.MCSym = S->getMCSymbol();
958  } else if (JumpTableSDNode *J = dyn_cast<JumpTableSDNode>(N0)) {
959  AM.JT = J->getIndex();
960  AM.SymbolFlags = J->getTargetFlags();
961  } else if (BlockAddressSDNode *BA = dyn_cast<BlockAddressSDNode>(N0)) {
962  X86ISelAddressMode Backup = AM;
963  AM.BlockAddr = BA->getBlockAddress();
964  AM.SymbolFlags = BA->getTargetFlags();
965  if (foldOffsetIntoAddress(BA->getOffset(), AM)) {
966  AM = Backup;
967  return true;
968  }
969  } else
970  llvm_unreachable("Unhandled symbol reference node.");
971 
972  if (N.getOpcode() == X86ISD::WrapperRIP)
973  AM.setBaseReg(CurDAG->getRegister(X86::RIP, MVT::i64));
974  return false;
975  }
976 
977  // Handle the case when globals fit in our immediate field: This is true for
978  // X86-32 always and X86-64 when in -mcmodel=small mode. In 64-bit
979  // mode, this only applies to a non-RIP-relative computation.
980  if (!Subtarget->is64Bit() ||
981  M == CodeModel::Small || M == CodeModel::Kernel) {
983  "RIP-relative addressing already handled");
984  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(N0)) {
985  AM.GV = G->getGlobal();
986  AM.Disp += G->getOffset();
987  AM.SymbolFlags = G->getTargetFlags();
988  } else if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(N0)) {
989  AM.CP = CP->getConstVal();
990  AM.Align = CP->getAlignment();
991  AM.Disp += CP->getOffset();
992  AM.SymbolFlags = CP->getTargetFlags();
993  } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(N0)) {
994  AM.ES = S->getSymbol();
995  AM.SymbolFlags = S->getTargetFlags();
996  } else if (auto *S = dyn_cast<MCSymbolSDNode>(N0)) {
997  AM.MCSym = S->getMCSymbol();
998  } else if (JumpTableSDNode *J = dyn_cast<JumpTableSDNode>(N0)) {
999  AM.JT = J->getIndex();
1000  AM.SymbolFlags = J->getTargetFlags();
1001  } else if (BlockAddressSDNode *BA = dyn_cast<BlockAddressSDNode>(N0)) {
1002  AM.BlockAddr = BA->getBlockAddress();
1003  AM.Disp += BA->getOffset();
1004  AM.SymbolFlags = BA->getTargetFlags();
1005  } else
1006  llvm_unreachable("Unhandled symbol reference node.");
1007  return false;
1008  }
1009 
1010  return true;
1011 }
1012 
1013 /// Add the specified node to the specified addressing mode, returning true if
1014 /// it cannot be done. This just pattern matches for the addressing mode.
1015 bool X86DAGToDAGISel::matchAddress(SDValue N, X86ISelAddressMode &AM) {
1016  if (matchAddressRecursively(N, AM, 0))
1017  return true;
1018 
1019  // Post-processing: Convert lea(,%reg,2) to lea(%reg,%reg), which has
1020  // a smaller encoding and avoids a scaled-index.
1021  if (AM.Scale == 2 &&
1022  AM.BaseType == X86ISelAddressMode::RegBase &&
1023  AM.Base_Reg.getNode() == nullptr) {
1024  AM.Base_Reg = AM.IndexReg;
1025  AM.Scale = 1;
1026  }
1027 
1028  // Post-processing: Convert foo to foo(%rip), even in non-PIC mode,
1029  // because it has a smaller encoding.
1030  // TODO: Which other code models can use this?
1031  if (TM.getCodeModel() == CodeModel::Small &&
1032  Subtarget->is64Bit() &&
1033  AM.Scale == 1 &&
1034  AM.BaseType == X86ISelAddressMode::RegBase &&
1035  AM.Base_Reg.getNode() == nullptr &&
1036  AM.IndexReg.getNode() == nullptr &&
1037  AM.SymbolFlags == X86II::MO_NO_FLAG &&
1038  AM.hasSymbolicDisplacement())
1039  AM.Base_Reg = CurDAG->getRegister(X86::RIP, MVT::i64);
1040 
1041  return false;
1042 }
1043 
1044 bool X86DAGToDAGISel::matchAdd(SDValue N, X86ISelAddressMode &AM,
1045  unsigned Depth) {
1046  // Add an artificial use to this node so that we can keep track of
1047  // it if it gets CSE'd with a different node.
1048  HandleSDNode Handle(N);
1049 
1050  X86ISelAddressMode Backup = AM;
1051  if (!matchAddressRecursively(N.getOperand(0), AM, Depth+1) &&
1052  !matchAddressRecursively(Handle.getValue().getOperand(1), AM, Depth+1))
1053  return false;
1054  AM = Backup;
1055 
1056  // Try again after commuting the operands.
1057  if (!matchAddressRecursively(Handle.getValue().getOperand(1), AM, Depth+1) &&
1058  !matchAddressRecursively(Handle.getValue().getOperand(0), AM, Depth+1))
1059  return false;
1060  AM = Backup;
1061 
1062  // If we couldn't fold both operands into the address at the same time,
1063  // see if we can just put each operand into a register and fold at least
1064  // the add.
1065  if (AM.BaseType == X86ISelAddressMode::RegBase &&
1066  !AM.Base_Reg.getNode() &&
1067  !AM.IndexReg.getNode()) {
1068  N = Handle.getValue();
1069  AM.Base_Reg = N.getOperand(0);
1070  AM.IndexReg = N.getOperand(1);
1071  AM.Scale = 1;
1072  return false;
1073  }
1074  N = Handle.getValue();
1075  return true;
1076 }
1077 
1078 // Insert a node into the DAG at least before the Pos node's position. This
1079 // will reposition the node as needed, and will assign it a node ID that is <=
1080 // the Pos node's ID. Note that this does *not* preserve the uniqueness of node
1081 // IDs! The selection DAG must no longer depend on their uniqueness when this
1082 // is used.
1083 static void insertDAGNode(SelectionDAG &DAG, SDValue Pos, SDValue N) {
1084  if (N->getNodeId() == -1 ||
1087  DAG.RepositionNode(Pos->getIterator(), N.getNode());
1088  // Mark Node as invalid for pruning as after this it may be a successor to a
1089  // selected node but otherwise be in the same position of Pos.
1090  // Conservatively mark it with the same -abs(Id) to assure node id
1091  // invariant is preserved.
1092  N->setNodeId(Pos->getNodeId());
1094  }
1095 }
1096 
1097 // Transform "(X >> (8-C1)) & (0xff << C1)" to "((X >> 8) & 0xff) << C1" if
1098 // safe. This allows us to convert the shift and and into an h-register
1099 // extract and a scaled index. Returns false if the simplification is
1100 // performed.
1102  uint64_t Mask,
1103  SDValue Shift, SDValue X,
1104  X86ISelAddressMode &AM) {
1105  if (Shift.getOpcode() != ISD::SRL ||
1106  !isa<ConstantSDNode>(Shift.getOperand(1)) ||
1107  !Shift.hasOneUse())
1108  return true;
1109 
1110  int ScaleLog = 8 - Shift.getConstantOperandVal(1);
1111  if (ScaleLog <= 0 || ScaleLog >= 4 ||
1112  Mask != (0xffu << ScaleLog))
1113  return true;
1114 
1115  MVT VT = N.getSimpleValueType();
1116  SDLoc DL(N);
1117  SDValue Eight = DAG.getConstant(8, DL, MVT::i8);
1118  SDValue NewMask = DAG.getConstant(0xff, DL, VT);
1119  SDValue Srl = DAG.getNode(ISD::SRL, DL, VT, X, Eight);
1120  SDValue And = DAG.getNode(ISD::AND, DL, VT, Srl, NewMask);
1121  SDValue ShlCount = DAG.getConstant(ScaleLog, DL, MVT::i8);
1122  SDValue Shl = DAG.getNode(ISD::SHL, DL, VT, And, ShlCount);
1123 
1124  // Insert the new nodes into the topological ordering. We must do this in
1125  // a valid topological ordering as nothing is going to go back and re-sort
1126  // these nodes. We continually insert before 'N' in sequence as this is
1127  // essentially a pre-flattened and pre-sorted sequence of nodes. There is no
1128  // hierarchy left to express.
1129  insertDAGNode(DAG, N, Eight);
1130  insertDAGNode(DAG, N, Srl);
1131  insertDAGNode(DAG, N, NewMask);
1132  insertDAGNode(DAG, N, And);
1133  insertDAGNode(DAG, N, ShlCount);
1134  insertDAGNode(DAG, N, Shl);
1135  DAG.ReplaceAllUsesWith(N, Shl);
1136  AM.IndexReg = And;
1137  AM.Scale = (1 << ScaleLog);
1138  return false;
1139 }
1140 
1141 // Transforms "(X << C1) & C2" to "(X & (C2>>C1)) << C1" if safe and if this
1142 // allows us to fold the shift into this addressing mode. Returns false if the
1143 // transform succeeded.
1145  uint64_t Mask,
1146  SDValue Shift, SDValue X,
1147  X86ISelAddressMode &AM) {
1148  if (Shift.getOpcode() != ISD::SHL ||
1149  !isa<ConstantSDNode>(Shift.getOperand(1)))
1150  return true;
1151 
1152  // Not likely to be profitable if either the AND or SHIFT node has more
1153  // than one use (unless all uses are for address computation). Besides,
1154  // isel mechanism requires their node ids to be reused.
1155  if (!N.hasOneUse() || !Shift.hasOneUse())
1156  return true;
1157 
1158  // Verify that the shift amount is something we can fold.
1159  unsigned ShiftAmt = Shift.getConstantOperandVal(1);
1160  if (ShiftAmt != 1 && ShiftAmt != 2 && ShiftAmt != 3)
1161  return true;
1162 
1163  MVT VT = N.getSimpleValueType();
1164  SDLoc DL(N);
1165  SDValue NewMask = DAG.getConstant(Mask >> ShiftAmt, DL, VT);
1166  SDValue NewAnd = DAG.getNode(ISD::AND, DL, VT, X, NewMask);
1167  SDValue NewShift = DAG.getNode(ISD::SHL, DL, VT, NewAnd, Shift.getOperand(1));
1168 
1169  // Insert the new nodes into the topological ordering. We must do this in
1170  // a valid topological ordering as nothing is going to go back and re-sort
1171  // these nodes. We continually insert before 'N' in sequence as this is
1172  // essentially a pre-flattened and pre-sorted sequence of nodes. There is no
1173  // hierarchy left to express.
1174  insertDAGNode(DAG, N, NewMask);
1175  insertDAGNode(DAG, N, NewAnd);
1176  insertDAGNode(DAG, N, NewShift);
1177  DAG.ReplaceAllUsesWith(N, NewShift);
1178 
1179  AM.Scale = 1 << ShiftAmt;
1180  AM.IndexReg = NewAnd;
1181  return false;
1182 }
1183 
1184 // Implement some heroics to detect shifts of masked values where the mask can
1185 // be replaced by extending the shift and undoing that in the addressing mode
1186 // scale. Patterns such as (shl (srl x, c1), c2) are canonicalized into (and
1187 // (srl x, SHIFT), MASK) by DAGCombines that don't know the shl can be done in
1188 // the addressing mode. This results in code such as:
1189 //
1190 // int f(short *y, int *lookup_table) {
1191 // ...
1192 // return *y + lookup_table[*y >> 11];
1193 // }
1194 //
1195 // Turning into:
1196 // movzwl (%rdi), %eax
1197 // movl %eax, %ecx
1198 // shrl $11, %ecx
1199 // addl (%rsi,%rcx,4), %eax
1200 //
1201 // Instead of:
1202 // movzwl (%rdi), %eax
1203 // movl %eax, %ecx
1204 // shrl $9, %ecx
1205 // andl $124, %rcx
1206 // addl (%rsi,%rcx), %eax
1207 //
1208 // Note that this function assumes the mask is provided as a mask *after* the
1209 // value is shifted. The input chain may or may not match that, but computing
1210 // such a mask is trivial.
1212  uint64_t Mask,
1213  SDValue Shift, SDValue X,
1214  X86ISelAddressMode &AM) {
1215  if (Shift.getOpcode() != ISD::SRL || !Shift.hasOneUse() ||
1216  !isa<ConstantSDNode>(Shift.getOperand(1)))
1217  return true;
1218 
1219  unsigned ShiftAmt = Shift.getConstantOperandVal(1);
1220  unsigned MaskLZ = countLeadingZeros(Mask);
1221  unsigned MaskTZ = countTrailingZeros(Mask);
1222 
1223  // The amount of shift we're trying to fit into the addressing mode is taken
1224  // from the trailing zeros of the mask.
1225  unsigned AMShiftAmt = MaskTZ;
1226 
1227  // There is nothing we can do here unless the mask is removing some bits.
1228  // Also, the addressing mode can only represent shifts of 1, 2, or 3 bits.
1229  if (AMShiftAmt <= 0 || AMShiftAmt > 3) return true;
1230 
1231  // We also need to ensure that mask is a continuous run of bits.
1232  if (countTrailingOnes(Mask >> MaskTZ) + MaskTZ + MaskLZ != 64) return true;
1233 
1234  // Scale the leading zero count down based on the actual size of the value.
1235  // Also scale it down based on the size of the shift.
1236  unsigned ScaleDown = (64 - X.getSimpleValueType().getSizeInBits()) + ShiftAmt;
1237  if (MaskLZ < ScaleDown)
1238  return true;
1239  MaskLZ -= ScaleDown;
1240 
1241  // The final check is to ensure that any masked out high bits of X are
1242  // already known to be zero. Otherwise, the mask has a semantic impact
1243  // other than masking out a couple of low bits. Unfortunately, because of
1244  // the mask, zero extensions will be removed from operands in some cases.
1245  // This code works extra hard to look through extensions because we can
1246  // replace them with zero extensions cheaply if necessary.
1247  bool ReplacingAnyExtend = false;
1248  if (X.getOpcode() == ISD::ANY_EXTEND) {
1249  unsigned ExtendBits = X.getSimpleValueType().getSizeInBits() -
1251  // Assume that we'll replace the any-extend with a zero-extend, and
1252  // narrow the search to the extended value.
1253  X = X.getOperand(0);
1254  MaskLZ = ExtendBits > MaskLZ ? 0 : MaskLZ - ExtendBits;
1255  ReplacingAnyExtend = true;
1256  }
1257  APInt MaskedHighBits =
1259  KnownBits Known;
1260  DAG.computeKnownBits(X, Known);
1261  if (MaskedHighBits != Known.Zero) return true;
1262 
1263  // We've identified a pattern that can be transformed into a single shift
1264  // and an addressing mode. Make it so.
1265  MVT VT = N.getSimpleValueType();
1266  if (ReplacingAnyExtend) {
1267  assert(X.getValueType() != VT);
1268  // We looked through an ANY_EXTEND node, insert a ZERO_EXTEND.
1269  SDValue NewX = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(X), VT, X);
1270  insertDAGNode(DAG, N, NewX);
1271  X = NewX;
1272  }
1273  SDLoc DL(N);
1274  SDValue NewSRLAmt = DAG.getConstant(ShiftAmt + AMShiftAmt, DL, MVT::i8);
1275  SDValue NewSRL = DAG.getNode(ISD::SRL, DL, VT, X, NewSRLAmt);
1276  SDValue NewSHLAmt = DAG.getConstant(AMShiftAmt, DL, MVT::i8);
1277  SDValue NewSHL = DAG.getNode(ISD::SHL, DL, VT, NewSRL, NewSHLAmt);
1278 
1279  // Insert the new nodes into the topological ordering. We must do this in
1280  // a valid topological ordering as nothing is going to go back and re-sort
1281  // these nodes. We continually insert before 'N' in sequence as this is
1282  // essentially a pre-flattened and pre-sorted sequence of nodes. There is no
1283  // hierarchy left to express.
1284  insertDAGNode(DAG, N, NewSRLAmt);
1285  insertDAGNode(DAG, N, NewSRL);
1286  insertDAGNode(DAG, N, NewSHLAmt);
1287  insertDAGNode(DAG, N, NewSHL);
1288  DAG.ReplaceAllUsesWith(N, NewSHL);
1289 
1290  AM.Scale = 1 << AMShiftAmt;
1291  AM.IndexReg = NewSRL;
1292  return false;
1293 }
1294 
1295 bool X86DAGToDAGISel::matchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
1296  unsigned Depth) {
1297  SDLoc dl(N);
1298  DEBUG({
1299  dbgs() << "MatchAddress: ";
1300  AM.dump(CurDAG);
1301  });
1302  // Limit recursion.
1303  if (Depth > 5)
1304  return matchAddressBase(N, AM);
1305 
1306  // If this is already a %rip relative address, we can only merge immediates
1307  // into it. Instead of handling this in every case, we handle it here.
1308  // RIP relative addressing: %rip + 32-bit displacement!
1309  if (AM.isRIPRelative()) {
1310  // FIXME: JumpTable and ExternalSymbol address currently don't like
1311  // displacements. It isn't very important, but this should be fixed for
1312  // consistency.
1313  if (!(AM.ES || AM.MCSym) && AM.JT != -1)
1314  return true;
1315 
1316  if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(N))
1317  if (!foldOffsetIntoAddress(Cst->getSExtValue(), AM))
1318  return false;
1319  return true;
1320  }
1321 
1322  switch (N.getOpcode()) {
1323  default: break;
1324  case ISD::LOCAL_RECOVER: {
1325  if (!AM.hasSymbolicDisplacement() && AM.Disp == 0)
1326  if (const auto *ESNode = dyn_cast<MCSymbolSDNode>(N.getOperand(0))) {
1327  // Use the symbol and don't prefix it.
1328  AM.MCSym = ESNode->getMCSymbol();
1329  return false;
1330  }
1331  break;
1332  }
1333  case ISD::Constant: {
1334  uint64_t Val = cast<ConstantSDNode>(N)->getSExtValue();
1335  if (!foldOffsetIntoAddress(Val, AM))
1336  return false;
1337  break;
1338  }
1339 
1340  case X86ISD::Wrapper:
1341  case X86ISD::WrapperRIP:
1342  if (!matchWrapper(N, AM))
1343  return false;
1344  break;
1345 
1346  case ISD::LOAD:
1347  if (!matchLoadInAddress(cast<LoadSDNode>(N), AM))
1348  return false;
1349  break;
1350 
1351  case ISD::FrameIndex:
1352  if (AM.BaseType == X86ISelAddressMode::RegBase &&
1353  AM.Base_Reg.getNode() == nullptr &&
1354  (!Subtarget->is64Bit() || isDispSafeForFrameIndex(AM.Disp))) {
1355  AM.BaseType = X86ISelAddressMode::FrameIndexBase;
1356  AM.Base_FrameIndex = cast<FrameIndexSDNode>(N)->getIndex();
1357  return false;
1358  }
1359  break;
1360 
1361  case ISD::SHL:
1362  if (AM.IndexReg.getNode() != nullptr || AM.Scale != 1)
1363  break;
1364 
1365  if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1366  unsigned Val = CN->getZExtValue();
1367  // Note that we handle x<<1 as (,x,2) rather than (x,x) here so
1368  // that the base operand remains free for further matching. If
1369  // the base doesn't end up getting used, a post-processing step
1370  // in MatchAddress turns (,x,2) into (x,x), which is cheaper.
1371  if (Val == 1 || Val == 2 || Val == 3) {
1372  AM.Scale = 1 << Val;
1373  SDValue ShVal = N.getOperand(0);
1374 
1375  // Okay, we know that we have a scale by now. However, if the scaled
1376  // value is an add of something and a constant, we can fold the
1377  // constant into the disp field here.
1378  if (CurDAG->isBaseWithConstantOffset(ShVal)) {
1379  AM.IndexReg = ShVal.getOperand(0);
1380  ConstantSDNode *AddVal = cast<ConstantSDNode>(ShVal.getOperand(1));
1381  uint64_t Disp = (uint64_t)AddVal->getSExtValue() << Val;
1382  if (!foldOffsetIntoAddress(Disp, AM))
1383  return false;
1384  }
1385 
1386  AM.IndexReg = ShVal;
1387  return false;
1388  }
1389  }
1390  break;
1391 
1392  case ISD::SRL: {
1393  // Scale must not be used already.
1394  if (AM.IndexReg.getNode() != nullptr || AM.Scale != 1) break;
1395 
1396  SDValue And = N.getOperand(0);
1397  if (And.getOpcode() != ISD::AND) break;
1398  SDValue X = And.getOperand(0);
1399 
1400  // We only handle up to 64-bit values here as those are what matter for
1401  // addressing mode optimizations.
1402  if (X.getSimpleValueType().getSizeInBits() > 64) break;
1403 
1404  // The mask used for the transform is expected to be post-shift, but we
1405  // found the shift first so just apply the shift to the mask before passing
1406  // it down.
1407  if (!isa<ConstantSDNode>(N.getOperand(1)) ||
1408  !isa<ConstantSDNode>(And.getOperand(1)))
1409  break;
1410  uint64_t Mask = And.getConstantOperandVal(1) >> N.getConstantOperandVal(1);
1411 
1412  // Try to fold the mask and shift into the scale, and return false if we
1413  // succeed.
1414  if (!foldMaskAndShiftToScale(*CurDAG, N, Mask, N, X, AM))
1415  return false;
1416  break;
1417  }
1418 
1419  case ISD::SMUL_LOHI:
1420  case ISD::UMUL_LOHI:
1421  // A mul_lohi where we need the low part can be folded as a plain multiply.
1422  if (N.getResNo() != 0) break;
1424  case ISD::MUL:
1425  case X86ISD::MUL_IMM:
1426  // X*[3,5,9] -> X+X*[2,4,8]
1427  if (AM.BaseType == X86ISelAddressMode::RegBase &&
1428  AM.Base_Reg.getNode() == nullptr &&
1429  AM.IndexReg.getNode() == nullptr) {
1430  if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N.getOperand(1)))
1431  if (CN->getZExtValue() == 3 || CN->getZExtValue() == 5 ||
1432  CN->getZExtValue() == 9) {
1433  AM.Scale = unsigned(CN->getZExtValue())-1;
1434 
1435  SDValue MulVal = N.getOperand(0);
1436  SDValue Reg;
1437 
1438  // Okay, we know that we have a scale by now. However, if the scaled
1439  // value is an add of something and a constant, we can fold the
1440  // constant into the disp field here.
1441  if (MulVal.getNode()->getOpcode() == ISD::ADD && MulVal.hasOneUse() &&
1442  isa<ConstantSDNode>(MulVal.getOperand(1))) {
1443  Reg = MulVal.getOperand(0);
1444  ConstantSDNode *AddVal =
1445  cast<ConstantSDNode>(MulVal.getOperand(1));
1446  uint64_t Disp = AddVal->getSExtValue() * CN->getZExtValue();
1447  if (foldOffsetIntoAddress(Disp, AM))
1448  Reg = N.getOperand(0);
1449  } else {
1450  Reg = N.getOperand(0);
1451  }
1452 
1453  AM.IndexReg = AM.Base_Reg = Reg;
1454  return false;
1455  }
1456  }
1457  break;
1458 
1459  case ISD::SUB: {
1460  // Given A-B, if A can be completely folded into the address and
1461  // the index field with the index field unused, use -B as the index.
1462  // This is a win if a has multiple parts that can be folded into
1463  // the address. Also, this saves a mov if the base register has
1464  // other uses, since it avoids a two-address sub instruction, however
1465  // it costs an additional mov if the index register has other uses.
1466 
1467  // Add an artificial use to this node so that we can keep track of
1468  // it if it gets CSE'd with a different node.
1469  HandleSDNode Handle(N);
1470 
1471  // Test if the LHS of the sub can be folded.
1472  X86ISelAddressMode Backup = AM;
1473  if (matchAddressRecursively(N.getOperand(0), AM, Depth+1)) {
1474  AM = Backup;
1475  break;
1476  }
1477  // Test if the index field is free for use.
1478  if (AM.IndexReg.getNode() || AM.isRIPRelative()) {
1479  AM = Backup;
1480  break;
1481  }
1482 
1483  int Cost = 0;
1484  SDValue RHS = Handle.getValue().getOperand(1);
1485  // If the RHS involves a register with multiple uses, this
1486  // transformation incurs an extra mov, due to the neg instruction
1487  // clobbering its operand.
1488  if (!RHS.getNode()->hasOneUse() ||
1489  RHS.getNode()->getOpcode() == ISD::CopyFromReg ||
1490  RHS.getNode()->getOpcode() == ISD::TRUNCATE ||
1491  RHS.getNode()->getOpcode() == ISD::ANY_EXTEND ||
1492  (RHS.getNode()->getOpcode() == ISD::ZERO_EXTEND &&
1493  RHS.getOperand(0).getValueType() == MVT::i32))
1494  ++Cost;
1495  // If the base is a register with multiple uses, this
1496  // transformation may save a mov.
1497  // FIXME: Don't rely on DELETED_NODEs.
1498  if ((AM.BaseType == X86ISelAddressMode::RegBase && AM.Base_Reg.getNode() &&
1499  AM.Base_Reg->getOpcode() != ISD::DELETED_NODE &&
1500  !AM.Base_Reg.getNode()->hasOneUse()) ||
1501  AM.BaseType == X86ISelAddressMode::FrameIndexBase)
1502  --Cost;
1503  // If the folded LHS was interesting, this transformation saves
1504  // address arithmetic.
1505  if ((AM.hasSymbolicDisplacement() && !Backup.hasSymbolicDisplacement()) +
1506  ((AM.Disp != 0) && (Backup.Disp == 0)) +
1507  (AM.Segment.getNode() && !Backup.Segment.getNode()) >= 2)
1508  --Cost;
1509  // If it doesn't look like it may be an overall win, don't do it.
1510  if (Cost >= 0) {
1511  AM = Backup;
1512  break;
1513  }
1514 
1515  // Ok, the transformation is legal and appears profitable. Go for it.
1516  SDValue Zero = CurDAG->getConstant(0, dl, N.getValueType());
1517  SDValue Neg = CurDAG->getNode(ISD::SUB, dl, N.getValueType(), Zero, RHS);
1518  AM.IndexReg = Neg;
1519  AM.Scale = 1;
1520 
1521  // Insert the new nodes into the topological ordering.
1522  insertDAGNode(*CurDAG, Handle.getValue(), Zero);
1523  insertDAGNode(*CurDAG, Handle.getValue(), Neg);
1524  return false;
1525  }
1526 
1527  case ISD::ADD:
1528  if (!matchAdd(N, AM, Depth))
1529  return false;
1530  break;
1531 
1532  case ISD::OR:
1533  // We want to look through a transform in InstCombine and DAGCombiner that
1534  // turns 'add' into 'or', so we can treat this 'or' exactly like an 'add'.
1535  // Example: (or (and x, 1), (shl y, 3)) --> (add (and x, 1), (shl y, 3))
1536  // An 'lea' can then be used to match the shift (multiply) and add:
1537  // and $1, %esi
1538  // lea (%rsi, %rdi, 8), %rax
1539  if (CurDAG->haveNoCommonBitsSet(N.getOperand(0), N.getOperand(1)) &&
1540  !matchAdd(N, AM, Depth))
1541  return false;
1542  break;
1543 
1544  case ISD::AND: {
1545  // Perform some heroic transforms on an and of a constant-count shift
1546  // with a constant to enable use of the scaled offset field.
1547 
1548  // Scale must not be used already.
1549  if (AM.IndexReg.getNode() != nullptr || AM.Scale != 1) break;
1550 
1551  SDValue Shift = N.getOperand(0);
1552  if (Shift.getOpcode() != ISD::SRL && Shift.getOpcode() != ISD::SHL) break;
1553  SDValue X = Shift.getOperand(0);
1554 
1555  // We only handle up to 64-bit values here as those are what matter for
1556  // addressing mode optimizations.
1557  if (X.getSimpleValueType().getSizeInBits() > 64) break;
1558 
1559  if (!isa<ConstantSDNode>(N.getOperand(1)))
1560  break;
1561  uint64_t Mask = N.getConstantOperandVal(1);
1562 
1563  // Try to fold the mask and shift into an extract and scale.
1564  if (!foldMaskAndShiftToExtract(*CurDAG, N, Mask, Shift, X, AM))
1565  return false;
1566 
1567  // Try to fold the mask and shift directly into the scale.
1568  if (!foldMaskAndShiftToScale(*CurDAG, N, Mask, Shift, X, AM))
1569  return false;
1570 
1571  // Try to swap the mask and shift to place shifts which can be done as
1572  // a scale on the outside of the mask.
1573  if (!foldMaskedShiftToScaledMask(*CurDAG, N, Mask, Shift, X, AM))
1574  return false;
1575  break;
1576  }
1577  }
1578 
1579  return matchAddressBase(N, AM);
1580 }
1581 
1582 /// Helper for MatchAddress. Add the specified node to the
1583 /// specified addressing mode without any further recursion.
1584 bool X86DAGToDAGISel::matchAddressBase(SDValue N, X86ISelAddressMode &AM) {
1585  // Is the base register already occupied?
1586  if (AM.BaseType != X86ISelAddressMode::RegBase || AM.Base_Reg.getNode()) {
1587  // If so, check to see if the scale index register is set.
1588  if (!AM.IndexReg.getNode()) {
1589  AM.IndexReg = N;
1590  AM.Scale = 1;
1591  return false;
1592  }
1593 
1594  // Otherwise, we cannot select it.
1595  return true;
1596  }
1597 
1598  // Default, generate it as a register.
1599  AM.BaseType = X86ISelAddressMode::RegBase;
1600  AM.Base_Reg = N;
1601  return false;
1602 }
1603 
1604 /// Helper for selectVectorAddr. Handles things that can be folded into a
1605 /// gather scatter address. The index register and scale should have already
1606 /// been handled.
1607 bool X86DAGToDAGISel::matchVectorAddress(SDValue N, X86ISelAddressMode &AM) {
1608  // TODO: Support other operations.
1609  switch (N.getOpcode()) {
1610  case ISD::Constant: {
1611  uint64_t Val = cast<ConstantSDNode>(N)->getSExtValue();
1612  if (!foldOffsetIntoAddress(Val, AM))
1613  return false;
1614  break;
1615  }
1616  case X86ISD::Wrapper:
1617  if (!matchWrapper(N, AM))
1618  return false;
1619  break;
1620  }
1621 
1622  return matchAddressBase(N, AM);
1623 }
1624 
1625 bool X86DAGToDAGISel::selectVectorAddr(SDNode *Parent, SDValue N, SDValue &Base,
1626  SDValue &Scale, SDValue &Index,
1627  SDValue &Disp, SDValue &Segment) {
1628  X86ISelAddressMode AM;
1629  auto *Mgs = cast<X86MaskedGatherScatterSDNode>(Parent);
1630  AM.IndexReg = Mgs->getIndex();
1631  AM.Scale = cast<ConstantSDNode>(Mgs->getScale())->getZExtValue();
1632 
1633  unsigned AddrSpace = cast<MemSDNode>(Parent)->getPointerInfo().getAddrSpace();
1634  // AddrSpace 256 -> GS, 257 -> FS, 258 -> SS.
1635  if (AddrSpace == 256)
1636  AM.Segment = CurDAG->getRegister(X86::GS, MVT::i16);
1637  if (AddrSpace == 257)
1638  AM.Segment = CurDAG->getRegister(X86::FS, MVT::i16);
1639  if (AddrSpace == 258)
1640  AM.Segment = CurDAG->getRegister(X86::SS, MVT::i16);
1641 
1642  // Try to match into the base and displacement fields.
1643  if (matchVectorAddress(N, AM))
1644  return false;
1645 
1646  MVT VT = N.getSimpleValueType();
1647  if (AM.BaseType == X86ISelAddressMode::RegBase) {
1648  if (!AM.Base_Reg.getNode())
1649  AM.Base_Reg = CurDAG->getRegister(0, VT);
1650  }
1651 
1652  getAddressOperands(AM, SDLoc(N), Base, Scale, Index, Disp, Segment);
1653  return true;
1654 }
1655 
1656 /// Returns true if it is able to pattern match an addressing mode.
1657 /// It returns the operands which make up the maximal addressing mode it can
1658 /// match by reference.
1659 ///
1660 /// Parent is the parent node of the addr operand that is being matched. It
1661 /// is always a load, store, atomic node, or null. It is only null when
1662 /// checking memory operands for inline asm nodes.
1663 bool X86DAGToDAGISel::selectAddr(SDNode *Parent, SDValue N, SDValue &Base,
1664  SDValue &Scale, SDValue &Index,
1665  SDValue &Disp, SDValue &Segment) {
1666  X86ISelAddressMode AM;
1667 
1668  if (Parent &&
1669  // This list of opcodes are all the nodes that have an "addr:$ptr" operand
1670  // that are not a MemSDNode, and thus don't have proper addrspace info.
1671  Parent->getOpcode() != ISD::INTRINSIC_W_CHAIN && // unaligned loads, fixme
1672  Parent->getOpcode() != ISD::INTRINSIC_VOID && // nontemporal stores
1673  Parent->getOpcode() != X86ISD::TLSCALL && // Fixme
1674  Parent->getOpcode() != X86ISD::EH_SJLJ_SETJMP && // setjmp
1675  Parent->getOpcode() != X86ISD::EH_SJLJ_LONGJMP) { // longjmp
1676  unsigned AddrSpace =
1677  cast<MemSDNode>(Parent)->getPointerInfo().getAddrSpace();
1678  // AddrSpace 256 -> GS, 257 -> FS, 258 -> SS.
1679  if (AddrSpace == 256)
1680  AM.Segment = CurDAG->getRegister(X86::GS, MVT::i16);
1681  if (AddrSpace == 257)
1682  AM.Segment = CurDAG->getRegister(X86::FS, MVT::i16);
1683  if (AddrSpace == 258)
1684  AM.Segment = CurDAG->getRegister(X86::SS, MVT::i16);
1685  }
1686 
1687  if (matchAddress(N, AM))
1688  return false;
1689 
1690  MVT VT = N.getSimpleValueType();
1691  if (AM.BaseType == X86ISelAddressMode::RegBase) {
1692  if (!AM.Base_Reg.getNode())
1693  AM.Base_Reg = CurDAG->getRegister(0, VT);
1694  }
1695 
1696  if (!AM.IndexReg.getNode())
1697  AM.IndexReg = CurDAG->getRegister(0, VT);
1698 
1699  getAddressOperands(AM, SDLoc(N), Base, Scale, Index, Disp, Segment);
1700  return true;
1701 }
1702 
1703 // We can only fold a load if all nodes between it and the root node have a
1704 // single use. If there are additional uses, we could end up duplicating the
1705 // load.
1706 static bool hasSingleUsesFromRoot(SDNode *Root, SDNode *N) {
1707  SDNode *User = *N->use_begin();
1708  while (User != Root) {
1709  if (!User->hasOneUse())
1710  return false;
1711  User = *User->use_begin();
1712  }
1713 
1714  return true;
1715 }
1716 
1717 /// Match a scalar SSE load. In particular, we want to match a load whose top
1718 /// elements are either undef or zeros. The load flavor is derived from the
1719 /// type of N, which is either v4f32 or v2f64.
1720 ///
1721 /// We also return:
1722 /// PatternChainNode: this is the matched node that has a chain input and
1723 /// output.
1724 bool X86DAGToDAGISel::selectScalarSSELoad(SDNode *Root,
1725  SDValue N, SDValue &Base,
1726  SDValue &Scale, SDValue &Index,
1727  SDValue &Disp, SDValue &Segment,
1728  SDValue &PatternNodeWithChain) {
1729  // We can allow a full vector load here since narrowing a load is ok.
1730  if (ISD::isNON_EXTLoad(N.getNode())) {
1731  PatternNodeWithChain = N;
1732  if (IsProfitableToFold(PatternNodeWithChain, N.getNode(), Root) &&
1733  IsLegalToFold(PatternNodeWithChain, *N->use_begin(), Root, OptLevel) &&
1734  hasSingleUsesFromRoot(Root, N.getNode())) {
1735  LoadSDNode *LD = cast<LoadSDNode>(PatternNodeWithChain);
1736  return selectAddr(LD, LD->getBasePtr(), Base, Scale, Index, Disp,
1737  Segment);
1738  }
1739  }
1740 
1741  // We can also match the special zero extended load opcode.
1742  if (N.getOpcode() == X86ISD::VZEXT_LOAD) {
1743  PatternNodeWithChain = N;
1744  if (IsProfitableToFold(PatternNodeWithChain, N.getNode(), Root) &&
1745  IsLegalToFold(PatternNodeWithChain, *N->use_begin(), Root, OptLevel) &&
1746  hasSingleUsesFromRoot(Root, N.getNode())) {
1747  auto *MI = cast<MemIntrinsicSDNode>(PatternNodeWithChain);
1748  return selectAddr(MI, MI->getBasePtr(), Base, Scale, Index, Disp,
1749  Segment);
1750  }
1751  }
1752 
1753  // Need to make sure that the SCALAR_TO_VECTOR and load are both only used
1754  // once. Otherwise the load might get duplicated and the chain output of the
1755  // duplicate load will not be observed by all dependencies.
1756  if (N.getOpcode() == ISD::SCALAR_TO_VECTOR && N.getNode()->hasOneUse()) {
1757  PatternNodeWithChain = N.getOperand(0);
1758  if (ISD::isNON_EXTLoad(PatternNodeWithChain.getNode()) &&
1759  IsProfitableToFold(PatternNodeWithChain, N.getNode(), Root) &&
1760  IsLegalToFold(PatternNodeWithChain, N.getNode(), Root, OptLevel) &&
1761  hasSingleUsesFromRoot(Root, N.getNode())) {
1762  LoadSDNode *LD = cast<LoadSDNode>(PatternNodeWithChain);
1763  return selectAddr(LD, LD->getBasePtr(), Base, Scale, Index, Disp,
1764  Segment);
1765  }
1766  }
1767 
1768  // Also handle the case where we explicitly require zeros in the top
1769  // elements. This is a vector shuffle from the zero vector.
1770  if (N.getOpcode() == X86ISD::VZEXT_MOVL && N.getNode()->hasOneUse() &&
1771  // Check to see if the top elements are all zeros (or bitcast of zeros).
1773  N.getOperand(0).getNode()->hasOneUse()) {
1774  PatternNodeWithChain = N.getOperand(0).getOperand(0);
1775  if (ISD::isNON_EXTLoad(PatternNodeWithChain.getNode()) &&
1776  IsProfitableToFold(PatternNodeWithChain, N.getNode(), Root) &&
1777  IsLegalToFold(PatternNodeWithChain, N.getNode(), Root, OptLevel) &&
1778  hasSingleUsesFromRoot(Root, N.getNode())) {
1779  // Okay, this is a zero extending load. Fold it.
1780  LoadSDNode *LD = cast<LoadSDNode>(PatternNodeWithChain);
1781  return selectAddr(LD, LD->getBasePtr(), Base, Scale, Index, Disp,
1782  Segment);
1783  }
1784  }
1785 
1786  return false;
1787 }
1788 
1789 
1790 bool X86DAGToDAGISel::selectMOV64Imm32(SDValue N, SDValue &Imm) {
1791  if (const ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) {
1792  uint64_t ImmVal = CN->getZExtValue();
1793  if (!isUInt<32>(ImmVal))
1794  return false;
1795 
1796  Imm = CurDAG->getTargetConstant(ImmVal, SDLoc(N), MVT::i64);
1797  return true;
1798  }
1799 
1800  // In static codegen with small code model, we can get the address of a label
1801  // into a register with 'movl'. TableGen has already made sure we're looking
1802  // at a label of some kind.
1803  assert(N->getOpcode() == X86ISD::Wrapper &&
1804  "Unexpected node type for MOV32ri64");
1805  N = N.getOperand(0);
1806 
1807  // At least GNU as does not accept 'movl' for TPOFF relocations.
1808  // FIXME: We could use 'movl' when we know we are targeting MC.
1809  if (N->getOpcode() == ISD::TargetGlobalTLSAddress)
1810  return false;
1811 
1812  Imm = N;
1813  if (N->getOpcode() != ISD::TargetGlobalAddress)
1814  return TM.getCodeModel() == CodeModel::Small;
1815 
1817  cast<GlobalAddressSDNode>(N)->getGlobal()->getAbsoluteSymbolRange();
1818  if (!CR)
1819  return TM.getCodeModel() == CodeModel::Small;
1820 
1821  return CR->getUnsignedMax().ult(1ull << 32);
1822 }
1823 
1824 bool X86DAGToDAGISel::selectLEA64_32Addr(SDValue N, SDValue &Base,
1825  SDValue &Scale, SDValue &Index,
1826  SDValue &Disp, SDValue &Segment) {
1827  // Save the debug loc before calling selectLEAAddr, in case it invalidates N.
1828  SDLoc DL(N);
1829 
1830  if (!selectLEAAddr(N, Base, Scale, Index, Disp, Segment))
1831  return false;
1832 
1834  if (RN && RN->getReg() == 0)
1835  Base = CurDAG->getRegister(0, MVT::i64);
1836  else if (Base.getValueType() == MVT::i32 && !dyn_cast<FrameIndexSDNode>(Base)) {
1837  // Base could already be %rip, particularly in the x32 ABI.
1838  Base = SDValue(CurDAG->getMachineNode(
1839  TargetOpcode::SUBREG_TO_REG, DL, MVT::i64,
1840  CurDAG->getTargetConstant(0, DL, MVT::i64),
1841  Base,
1842  CurDAG->getTargetConstant(X86::sub_32bit, DL, MVT::i32)),
1843  0);
1844  }
1845 
1846  RN = dyn_cast<RegisterSDNode>(Index);
1847  if (RN && RN->getReg() == 0)
1848  Index = CurDAG->getRegister(0, MVT::i64);
1849  else {
1850  assert(Index.getValueType() == MVT::i32 &&
1851  "Expect to be extending 32-bit registers for use in LEA");
1852  Index = SDValue(CurDAG->getMachineNode(
1853  TargetOpcode::SUBREG_TO_REG, DL, MVT::i64,
1854  CurDAG->getTargetConstant(0, DL, MVT::i64),
1855  Index,
1856  CurDAG->getTargetConstant(X86::sub_32bit, DL,
1857  MVT::i32)),
1858  0);
1859  }
1860 
1861  return true;
1862 }
1863 
1864 /// Calls SelectAddr and determines if the maximal addressing
1865 /// mode it matches can be cost effectively emitted as an LEA instruction.
1866 bool X86DAGToDAGISel::selectLEAAddr(SDValue N,
1867  SDValue &Base, SDValue &Scale,
1868  SDValue &Index, SDValue &Disp,
1869  SDValue &Segment) {
1870  X86ISelAddressMode AM;
1871 
1872  // Save the DL and VT before calling matchAddress, it can invalidate N.
1873  SDLoc DL(N);
1874  MVT VT = N.getSimpleValueType();
1875 
1876  // Set AM.Segment to prevent MatchAddress from using one. LEA doesn't support
1877  // segments.
1878  SDValue Copy = AM.Segment;
1879  SDValue T = CurDAG->getRegister(0, MVT::i32);
1880  AM.Segment = T;
1881  if (matchAddress(N, AM))
1882  return false;
1883  assert (T == AM.Segment);
1884  AM.Segment = Copy;
1885 
1886  unsigned Complexity = 0;
1887  if (AM.BaseType == X86ISelAddressMode::RegBase)
1888  if (AM.Base_Reg.getNode())
1889  Complexity = 1;
1890  else
1891  AM.Base_Reg = CurDAG->getRegister(0, VT);
1892  else if (AM.BaseType == X86ISelAddressMode::FrameIndexBase)
1893  Complexity = 4;
1894 
1895  if (AM.IndexReg.getNode())
1896  Complexity++;
1897  else
1898  AM.IndexReg = CurDAG->getRegister(0, VT);
1899 
1900  // Don't match just leal(,%reg,2). It's cheaper to do addl %reg, %reg, or with
1901  // a simple shift.
1902  if (AM.Scale > 1)
1903  Complexity++;
1904 
1905  // FIXME: We are artificially lowering the criteria to turn ADD %reg, $GA
1906  // to a LEA. This is determined with some experimentation but is by no means
1907  // optimal (especially for code size consideration). LEA is nice because of
1908  // its three-address nature. Tweak the cost function again when we can run
1909  // convertToThreeAddress() at register allocation time.
1910  if (AM.hasSymbolicDisplacement()) {
1911  // For X86-64, always use LEA to materialize RIP-relative addresses.
1912  if (Subtarget->is64Bit())
1913  Complexity = 4;
1914  else
1915  Complexity += 2;
1916  }
1917 
1918  if (AM.Disp && (AM.Base_Reg.getNode() || AM.IndexReg.getNode()))
1919  Complexity++;
1920 
1921  // If it isn't worth using an LEA, reject it.
1922  if (Complexity <= 2)
1923  return false;
1924 
1925  getAddressOperands(AM, DL, Base, Scale, Index, Disp, Segment);
1926  return true;
1927 }
1928 
1929 /// This is only run on TargetGlobalTLSAddress nodes.
1930 bool X86DAGToDAGISel::selectTLSADDRAddr(SDValue N, SDValue &Base,
1931  SDValue &Scale, SDValue &Index,
1932  SDValue &Disp, SDValue &Segment) {
1934  const GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(N);
1935 
1936  X86ISelAddressMode AM;
1937  AM.GV = GA->getGlobal();
1938  AM.Disp += GA->getOffset();
1939  AM.Base_Reg = CurDAG->getRegister(0, N.getValueType());
1940  AM.SymbolFlags = GA->getTargetFlags();
1941 
1942  if (N.getValueType() == MVT::i32) {
1943  AM.Scale = 1;
1944  AM.IndexReg = CurDAG->getRegister(X86::EBX, MVT::i32);
1945  } else {
1946  AM.IndexReg = CurDAG->getRegister(0, MVT::i64);
1947  }
1948 
1949  getAddressOperands(AM, SDLoc(N), Base, Scale, Index, Disp, Segment);
1950  return true;
1951 }
1952 
1953 bool X86DAGToDAGISel::selectRelocImm(SDValue N, SDValue &Op) {
1954  if (auto *CN = dyn_cast<ConstantSDNode>(N)) {
1955  Op = CurDAG->getTargetConstant(CN->getAPIntValue(), SDLoc(CN),
1956  N.getValueType());
1957  return true;
1958  }
1959 
1960  // Keep track of the original value type and whether this value was
1961  // truncated. If we see a truncation from pointer type to VT that truncates
1962  // bits that are known to be zero, we can use a narrow reference.
1963  EVT VT = N.getValueType();
1964  bool WasTruncated = false;
1965  if (N.getOpcode() == ISD::TRUNCATE) {
1966  WasTruncated = true;
1967  N = N.getOperand(0);
1968  }
1969 
1970  if (N.getOpcode() != X86ISD::Wrapper)
1971  return false;
1972 
1973  // We can only use non-GlobalValues as immediates if they were not truncated,
1974  // as we do not have any range information. If we have a GlobalValue and the
1975  // address was not truncated, we can select it as an operand directly.
1976  unsigned Opc = N.getOperand(0)->getOpcode();
1977  if (Opc != ISD::TargetGlobalAddress || !WasTruncated) {
1978  Op = N.getOperand(0);
1979  // We can only select the operand directly if we didn't have to look past a
1980  // truncate.
1981  return !WasTruncated;
1982  }
1983 
1984  // Check that the global's range fits into VT.
1985  auto *GA = cast<GlobalAddressSDNode>(N.getOperand(0));
1986  Optional<ConstantRange> CR = GA->getGlobal()->getAbsoluteSymbolRange();
1987  if (!CR || CR->getUnsignedMax().uge(1ull << VT.getSizeInBits()))
1988  return false;
1989 
1990  // Okay, we can use a narrow reference.
1991  Op = CurDAG->getTargetGlobalAddress(GA->getGlobal(), SDLoc(N), VT,
1992  GA->getOffset(), GA->getTargetFlags());
1993  return true;
1994 }
1995 
1996 bool X86DAGToDAGISel::tryFoldLoad(SDNode *Root, SDNode *P, SDValue N,
1997  SDValue &Base, SDValue &Scale,
1998  SDValue &Index, SDValue &Disp,
1999  SDValue &Segment) {
2000  if (!ISD::isNON_EXTLoad(N.getNode()) ||
2001  !IsProfitableToFold(N, P, Root) ||
2002  !IsLegalToFold(N, P, Root, OptLevel))
2003  return false;
2004 
2005  return selectAddr(N.getNode(),
2006  N.getOperand(1), Base, Scale, Index, Disp, Segment);
2007 }
2008 
2009 /// Return an SDNode that returns the value of the global base register.
2010 /// Output instructions required to initialize the global base register,
2011 /// if necessary.
2012 SDNode *X86DAGToDAGISel::getGlobalBaseReg() {
2013  unsigned GlobalBaseReg = getInstrInfo()->getGlobalBaseReg(MF);
2014  auto &DL = MF->getDataLayout();
2015  return CurDAG->getRegister(GlobalBaseReg, TLI->getPointerTy(DL)).getNode();
2016 }
2017 
2018 bool X86DAGToDAGISel::isSExtAbsoluteSymbolRef(unsigned Width, SDNode *N) const {
2019  if (N->getOpcode() == ISD::TRUNCATE)
2020  N = N->getOperand(0).getNode();
2021  if (N->getOpcode() != X86ISD::Wrapper)
2022  return false;
2023 
2024  auto *GA = dyn_cast<GlobalAddressSDNode>(N->getOperand(0));
2025  if (!GA)
2026  return false;
2027 
2028  Optional<ConstantRange> CR = GA->getGlobal()->getAbsoluteSymbolRange();
2029  return CR && CR->getSignedMin().sge(-1ull << Width) &&
2030  CR->getSignedMax().slt(1ull << Width);
2031 }
2032 
2033 /// Test whether the given X86ISD::CMP node has any uses which require the SF
2034 /// or OF bits to be accurate.
2036  // Examine each user of the node.
2037  for (SDNode::use_iterator UI = N->use_begin(),
2038  UE = N->use_end(); UI != UE; ++UI) {
2039  // Only examine CopyToReg uses.
2040  if (UI->getOpcode() != ISD::CopyToReg)
2041  return false;
2042  // Only examine CopyToReg uses that copy to EFLAGS.
2043  if (cast<RegisterSDNode>(UI->getOperand(1))->getReg() !=
2044  X86::EFLAGS)
2045  return false;
2046  // Examine each user of the CopyToReg use.
2047  for (SDNode::use_iterator FlagUI = UI->use_begin(),
2048  FlagUE = UI->use_end(); FlagUI != FlagUE; ++FlagUI) {
2049  // Only examine the Flag result.
2050  if (FlagUI.getUse().getResNo() != 1) continue;
2051  // Anything unusual: assume conservatively.
2052  if (!FlagUI->isMachineOpcode()) return false;
2053  // Examine the opcode of the user.
2054  switch (FlagUI->getMachineOpcode()) {
2055  // These comparisons don't treat the most significant bit specially.
2056  case X86::SETAr: case X86::SETAEr: case X86::SETBr: case X86::SETBEr:
2057  case X86::SETEr: case X86::SETNEr: case X86::SETPr: case X86::SETNPr:
2058  case X86::SETAm: case X86::SETAEm: case X86::SETBm: case X86::SETBEm:
2059  case X86::SETEm: case X86::SETNEm: case X86::SETPm: case X86::SETNPm:
2060  case X86::JA_1: case X86::JAE_1: case X86::JB_1: case X86::JBE_1:
2061  case X86::JE_1: case X86::JNE_1: case X86::JP_1: case X86::JNP_1:
2062  case X86::CMOVA16rr: case X86::CMOVA16rm:
2063  case X86::CMOVA32rr: case X86::CMOVA32rm:
2064  case X86::CMOVA64rr: case X86::CMOVA64rm:
2065  case X86::CMOVAE16rr: case X86::CMOVAE16rm:
2066  case X86::CMOVAE32rr: case X86::CMOVAE32rm:
2067  case X86::CMOVAE64rr: case X86::CMOVAE64rm:
2068  case X86::CMOVB16rr: case X86::CMOVB16rm:
2069  case X86::CMOVB32rr: case X86::CMOVB32rm:
2070  case X86::CMOVB64rr: case X86::CMOVB64rm:
2071  case X86::CMOVBE16rr: case X86::CMOVBE16rm:
2072  case X86::CMOVBE32rr: case X86::CMOVBE32rm:
2073  case X86::CMOVBE64rr: case X86::CMOVBE64rm:
2074  case X86::CMOVE16rr: case X86::CMOVE16rm:
2075  case X86::CMOVE32rr: case X86::CMOVE32rm:
2076  case X86::CMOVE64rr: case X86::CMOVE64rm:
2077  case X86::CMOVNE16rr: case X86::CMOVNE16rm:
2078  case X86::CMOVNE32rr: case X86::CMOVNE32rm:
2079  case X86::CMOVNE64rr: case X86::CMOVNE64rm:
2080  case X86::CMOVNP16rr: case X86::CMOVNP16rm:
2081  case X86::CMOVNP32rr: case X86::CMOVNP32rm:
2082  case X86::CMOVNP64rr: case X86::CMOVNP64rm:
2083  case X86::CMOVP16rr: case X86::CMOVP16rm:
2084  case X86::CMOVP32rr: case X86::CMOVP32rm:
2085  case X86::CMOVP64rr: case X86::CMOVP64rm:
2086  continue;
2087  // Anything else: assume conservatively.
2088  default: return false;
2089  }
2090  }
2091  }
2092  return true;
2093 }
2094 
2095 /// Test whether the given node which sets flags has any uses which require the
2096 /// CF flag to be accurate.
2097 static bool hasNoCarryFlagUses(SDNode *N) {
2098  // Examine each user of the node.
2099  for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end(); UI != UE;
2100  ++UI) {
2101  // Only check things that use the flags.
2102  if (UI.getUse().getResNo() != 1)
2103  continue;
2104  // Only examine CopyToReg uses.
2105  if (UI->getOpcode() != ISD::CopyToReg)
2106  return false;
2107  // Only examine CopyToReg uses that copy to EFLAGS.
2108  if (cast<RegisterSDNode>(UI->getOperand(1))->getReg() != X86::EFLAGS)
2109  return false;
2110  // Examine each user of the CopyToReg use.
2111  for (SDNode::use_iterator FlagUI = UI->use_begin(), FlagUE = UI->use_end();
2112  FlagUI != FlagUE; ++FlagUI) {
2113  // Only examine the Flag result.
2114  if (FlagUI.getUse().getResNo() != 1)
2115  continue;
2116  // Anything unusual: assume conservatively.
2117  if (!FlagUI->isMachineOpcode())
2118  return false;
2119  // Examine the opcode of the user.
2120  switch (FlagUI->getMachineOpcode()) {
2121  // Comparisons which don't examine the CF flag.
2122  case X86::SETOr: case X86::SETNOr: case X86::SETEr: case X86::SETNEr:
2123  case X86::SETSr: case X86::SETNSr: case X86::SETPr: case X86::SETNPr:
2124  case X86::SETLr: case X86::SETGEr: case X86::SETLEr: case X86::SETGr:
2125  case X86::JO_1: case X86::JNO_1: case X86::JE_1: case X86::JNE_1:
2126  case X86::JS_1: case X86::JNS_1: case X86::JP_1: case X86::JNP_1:
2127  case X86::JL_1: case X86::JGE_1: case X86::JLE_1: case X86::JG_1:
2128  case X86::CMOVO16rr: case X86::CMOVO32rr: case X86::CMOVO64rr:
2129  case X86::CMOVO16rm: case X86::CMOVO32rm: case X86::CMOVO64rm:
2130  case X86::CMOVNO16rr: case X86::CMOVNO32rr: case X86::CMOVNO64rr:
2131  case X86::CMOVNO16rm: case X86::CMOVNO32rm: case X86::CMOVNO64rm:
2132  case X86::CMOVE16rr: case X86::CMOVE32rr: case X86::CMOVE64rr:
2133  case X86::CMOVE16rm: case X86::CMOVE32rm: case X86::CMOVE64rm:
2134  case X86::CMOVNE16rr: case X86::CMOVNE32rr: case X86::CMOVNE64rr:
2135  case X86::CMOVNE16rm: case X86::CMOVNE32rm: case X86::CMOVNE64rm:
2136  case X86::CMOVS16rr: case X86::CMOVS32rr: case X86::CMOVS64rr:
2137  case X86::CMOVS16rm: case X86::CMOVS32rm: case X86::CMOVS64rm:
2138  case X86::CMOVNS16rr: case X86::CMOVNS32rr: case X86::CMOVNS64rr:
2139  case X86::CMOVNS16rm: case X86::CMOVNS32rm: case X86::CMOVNS64rm:
2140  case X86::CMOVP16rr: case X86::CMOVP32rr: case X86::CMOVP64rr:
2141  case X86::CMOVP16rm: case X86::CMOVP32rm: case X86::CMOVP64rm:
2142  case X86::CMOVNP16rr: case X86::CMOVNP32rr: case X86::CMOVNP64rr:
2143  case X86::CMOVNP16rm: case X86::CMOVNP32rm: case X86::CMOVNP64rm:
2144  case X86::CMOVL16rr: case X86::CMOVL32rr: case X86::CMOVL64rr:
2145  case X86::CMOVL16rm: case X86::CMOVL32rm: case X86::CMOVL64rm:
2146  case X86::CMOVGE16rr: case X86::CMOVGE32rr: case X86::CMOVGE64rr:
2147  case X86::CMOVGE16rm: case X86::CMOVGE32rm: case X86::CMOVGE64rm:
2148  case X86::CMOVLE16rr: case X86::CMOVLE32rr: case X86::CMOVLE64rr:
2149  case X86::CMOVLE16rm: case X86::CMOVLE32rm: case X86::CMOVLE64rm:
2150  case X86::CMOVG16rr: case X86::CMOVG32rr: case X86::CMOVG64rr:
2151  case X86::CMOVG16rm: case X86::CMOVG32rm: case X86::CMOVG64rm:
2152  continue;
2153  // Anything else: assume conservatively.
2154  default:
2155  return false;
2156  }
2157  }
2158  }
2159  return true;
2160 }
2161 
2162 /// Check whether or not the chain ending in StoreNode is suitable for doing
2163 /// the {load; op; store} to modify transformation.
2165  SDValue StoredVal, SelectionDAG *CurDAG,
2166  LoadSDNode *&LoadNode,
2167  SDValue &InputChain) {
2168  // is the stored value result 0 of the load?
2169  if (StoredVal.getResNo() != 0) return false;
2170 
2171  // are there other uses of the loaded value than the inc or dec?
2172  if (!StoredVal.getNode()->hasNUsesOfValue(1, 0)) return false;
2173 
2174  // is the store non-extending and non-indexed?
2175  if (!ISD::isNormalStore(StoreNode) || StoreNode->isNonTemporal())
2176  return false;
2177 
2178  SDValue Load = StoredVal->getOperand(0);
2179  // Is the stored value a non-extending and non-indexed load?
2180  if (!ISD::isNormalLoad(Load.getNode())) return false;
2181 
2182  // Return LoadNode by reference.
2183  LoadNode = cast<LoadSDNode>(Load);
2184 
2185  // Is store the only read of the loaded value?
2186  if (!Load.hasOneUse())
2187  return false;
2188 
2189  // Is the address of the store the same as the load?
2190  if (LoadNode->getBasePtr() != StoreNode->getBasePtr() ||
2191  LoadNode->getOffset() != StoreNode->getOffset())
2192  return false;
2193 
2194  bool FoundLoad = false;
2195  SmallVector<SDValue, 4> ChainOps;
2196  SmallVector<const SDNode *, 4> LoopWorklist;
2198  const unsigned int Max = 1024;
2199 
2200  // Visualization of Load-Op-Store fusion:
2201  // -------------------------
2202  // Legend:
2203  // *-lines = Chain operand dependencies.
2204  // |-lines = Normal operand dependencies.
2205  // Dependencies flow down and right. n-suffix references multiple nodes.
2206  //
2207  // C Xn C
2208  // * * *
2209  // * * *
2210  // Xn A-LD Yn TF Yn
2211  // * * \ | * |
2212  // * * \ | * |
2213  // * * \ | => A--LD_OP_ST
2214  // * * \| \
2215  // TF OP \
2216  // * | \ Zn
2217  // * | \
2218  // A-ST Zn
2219  //
2220 
2221  // This merge induced dependences from: #1: Xn -> LD, OP, Zn
2222  // #2: Yn -> LD
2223  // #3: ST -> Zn
2224 
2225  // Ensure the transform is safe by checking for the dual
2226  // dependencies to make sure we do not induce a loop.
2227 
2228  // As LD is a predecessor to both OP and ST we can do this by checking:
2229  // a). if LD is a predecessor to a member of Xn or Yn.
2230  // b). if a Zn is a predecessor to ST.
2231 
2232  // However, (b) can only occur through being a chain predecessor to
2233  // ST, which is the same as Zn being a member or predecessor of Xn,
2234  // which is a subset of LD being a predecessor of Xn. So it's
2235  // subsumed by check (a).
2236 
2237  SDValue Chain = StoreNode->getChain();
2238 
2239  // Gather X elements in ChainOps.
2240  if (Chain == Load.getValue(1)) {
2241  FoundLoad = true;
2242  ChainOps.push_back(Load.getOperand(0));
2243  } else if (Chain.getOpcode() == ISD::TokenFactor) {
2244  for (unsigned i = 0, e = Chain.getNumOperands(); i != e; ++i) {
2245  SDValue Op = Chain.getOperand(i);
2246  if (Op == Load.getValue(1)) {
2247  FoundLoad = true;
2248  // Drop Load, but keep its chain. No cycle check necessary.
2249  ChainOps.push_back(Load.getOperand(0));
2250  continue;
2251  }
2252  LoopWorklist.push_back(Op.getNode());
2253  ChainOps.push_back(Op);
2254  }
2255  }
2256 
2257  if (!FoundLoad)
2258  return false;
2259 
2260  // Worklist is currently Xn. Add Yn to worklist.
2261  for (SDValue Op : StoredVal->ops())
2262  if (Op.getNode() != LoadNode)
2263  LoopWorklist.push_back(Op.getNode());
2264 
2265  // Check (a) if Load is a predecessor to Xn + Yn
2266  if (SDNode::hasPredecessorHelper(Load.getNode(), Visited, LoopWorklist, Max,
2267  true))
2268  return false;
2269 
2270  InputChain =
2271  CurDAG->getNode(ISD::TokenFactor, SDLoc(Chain), MVT::Other, ChainOps);
2272  return true;
2273 }
2274 
2275 // Change a chain of {load; op; store} of the same value into a simple op
2276 // through memory of that value, if the uses of the modified value and its
2277 // address are suitable.
2278 //
2279 // The tablegen pattern memory operand pattern is currently not able to match
2280 // the case where the EFLAGS on the original operation are used.
2281 //
2282 // To move this to tablegen, we'll need to improve tablegen to allow flags to
2283 // be transferred from a node in the pattern to the result node, probably with
2284 // a new keyword. For example, we have this
2285 // def DEC64m : RI<0xFF, MRM1m, (outs), (ins i64mem:$dst), "dec{q}\t$dst",
2286 // [(store (add (loadi64 addr:$dst), -1), addr:$dst),
2287 // (implicit EFLAGS)]>;
2288 // but maybe need something like this
2289 // def DEC64m : RI<0xFF, MRM1m, (outs), (ins i64mem:$dst), "dec{q}\t$dst",
2290 // [(store (add (loadi64 addr:$dst), -1), addr:$dst),
2291 // (transferrable EFLAGS)]>;
2292 //
2293 // Until then, we manually fold these and instruction select the operation
2294 // here.
2295 bool X86DAGToDAGISel::foldLoadStoreIntoMemOperand(SDNode *Node) {
2296  StoreSDNode *StoreNode = cast<StoreSDNode>(Node);
2297  SDValue StoredVal = StoreNode->getOperand(1);
2298  unsigned Opc = StoredVal->getOpcode();
2299 
2300  // Before we try to select anything, make sure this is memory operand size
2301  // and opcode we can handle. Note that this must match the code below that
2302  // actually lowers the opcodes.
2303  EVT MemVT = StoreNode->getMemoryVT();
2304  if (MemVT != MVT::i64 && MemVT != MVT::i32 && MemVT != MVT::i16 &&
2305  MemVT != MVT::i8)
2306  return false;
2307  switch (Opc) {
2308  default:
2309  return false;
2310  case X86ISD::INC:
2311  case X86ISD::DEC:
2312  case X86ISD::ADD:
2313  case X86ISD::ADC:
2314  case X86ISD::SUB:
2315  case X86ISD::SBB:
2316  case X86ISD::AND:
2317  case X86ISD::OR:
2318  case X86ISD::XOR:
2319  break;
2320  }
2321 
2322  LoadSDNode *LoadNode = nullptr;
2323  SDValue InputChain;
2324  if (!isFusableLoadOpStorePattern(StoreNode, StoredVal, CurDAG, LoadNode,
2325  InputChain))
2326  return false;
2327 
2328  SDValue Base, Scale, Index, Disp, Segment;
2329  if (!selectAddr(LoadNode, LoadNode->getBasePtr(), Base, Scale, Index, Disp,
2330  Segment))
2331  return false;
2332 
2333  auto SelectOpcode = [&](unsigned Opc64, unsigned Opc32, unsigned Opc16,
2334  unsigned Opc8) {
2335  switch (MemVT.getSimpleVT().SimpleTy) {
2336  case MVT::i64:
2337  return Opc64;
2338  case MVT::i32:
2339  return Opc32;
2340  case MVT::i16:
2341  return Opc16;
2342  case MVT::i8:
2343  return Opc8;
2344  default:
2345  llvm_unreachable("Invalid size!");
2346  }
2347  };
2348 
2349  MachineSDNode *Result;
2350  switch (Opc) {
2351  case X86ISD::INC:
2352  case X86ISD::DEC: {
2353  unsigned NewOpc =
2354  Opc == X86ISD::INC
2355  ? SelectOpcode(X86::INC64m, X86::INC32m, X86::INC16m, X86::INC8m)
2356  : SelectOpcode(X86::DEC64m, X86::DEC32m, X86::DEC16m, X86::DEC8m);
2357  const SDValue Ops[] = {Base, Scale, Index, Disp, Segment, InputChain};
2358  Result =
2359  CurDAG->getMachineNode(NewOpc, SDLoc(Node), MVT::i32, MVT::Other, Ops);
2360  break;
2361  }
2362  case X86ISD::ADD:
2363  case X86ISD::ADC:
2364  case X86ISD::SUB:
2365  case X86ISD::SBB:
2366  case X86ISD::AND:
2367  case X86ISD::OR:
2368  case X86ISD::XOR: {
2369  auto SelectRegOpcode = [SelectOpcode](unsigned Opc) {
2370  switch (Opc) {
2371  case X86ISD::ADD:
2372  return SelectOpcode(X86::ADD64mr, X86::ADD32mr, X86::ADD16mr,
2373  X86::ADD8mr);
2374  case X86ISD::ADC:
2375  return SelectOpcode(X86::ADC64mr, X86::ADC32mr, X86::ADC16mr,
2376  X86::ADC8mr);
2377  case X86ISD::SUB:
2378  return SelectOpcode(X86::SUB64mr, X86::SUB32mr, X86::SUB16mr,
2379  X86::SUB8mr);
2380  case X86ISD::SBB:
2381  return SelectOpcode(X86::SBB64mr, X86::SBB32mr, X86::SBB16mr,
2382  X86::SBB8mr);
2383  case X86ISD::AND:
2384  return SelectOpcode(X86::AND64mr, X86::AND32mr, X86::AND16mr,
2385  X86::AND8mr);
2386  case X86ISD::OR:
2387  return SelectOpcode(X86::OR64mr, X86::OR32mr, X86::OR16mr, X86::OR8mr);
2388  case X86ISD::XOR:
2389  return SelectOpcode(X86::XOR64mr, X86::XOR32mr, X86::XOR16mr,
2390  X86::XOR8mr);
2391  default:
2392  llvm_unreachable("Invalid opcode!");
2393  }
2394  };
2395  auto SelectImm8Opcode = [SelectOpcode](unsigned Opc) {
2396  switch (Opc) {
2397  case X86ISD::ADD:
2398  return SelectOpcode(X86::ADD64mi8, X86::ADD32mi8, X86::ADD16mi8, 0);
2399  case X86ISD::ADC:
2400  return SelectOpcode(X86::ADC64mi8, X86::ADC32mi8, X86::ADC16mi8, 0);
2401  case X86ISD::SUB:
2402  return SelectOpcode(X86::SUB64mi8, X86::SUB32mi8, X86::SUB16mi8, 0);
2403  case X86ISD::SBB:
2404  return SelectOpcode(X86::SBB64mi8, X86::SBB32mi8, X86::SBB16mi8, 0);
2405  case X86ISD::AND:
2406  return SelectOpcode(X86::AND64mi8, X86::AND32mi8, X86::AND16mi8, 0);
2407  case X86ISD::OR:
2408  return SelectOpcode(X86::OR64mi8, X86::OR32mi8, X86::OR16mi8, 0);
2409  case X86ISD::XOR:
2410  return SelectOpcode(X86::XOR64mi8, X86::XOR32mi8, X86::XOR16mi8, 0);
2411  default:
2412  llvm_unreachable("Invalid opcode!");
2413  }
2414  };
2415  auto SelectImmOpcode = [SelectOpcode](unsigned Opc) {
2416  switch (Opc) {
2417  case X86ISD::ADD:
2418  return SelectOpcode(X86::ADD64mi32, X86::ADD32mi, X86::ADD16mi,
2419  X86::ADD8mi);
2420  case X86ISD::ADC:
2421  return SelectOpcode(X86::ADC64mi32, X86::ADC32mi, X86::ADC16mi,
2422  X86::ADC8mi);
2423  case X86ISD::SUB:
2424  return SelectOpcode(X86::SUB64mi32, X86::SUB32mi, X86::SUB16mi,
2425  X86::SUB8mi);
2426  case X86ISD::SBB:
2427  return SelectOpcode(X86::SBB64mi32, X86::SBB32mi, X86::SBB16mi,
2428  X86::SBB8mi);
2429  case X86ISD::AND:
2430  return SelectOpcode(X86::AND64mi32, X86::AND32mi, X86::AND16mi,
2431  X86::AND8mi);
2432  case X86ISD::OR:
2433  return SelectOpcode(X86::OR64mi32, X86::OR32mi, X86::OR16mi,
2434  X86::OR8mi);
2435  case X86ISD::XOR:
2436  return SelectOpcode(X86::XOR64mi32, X86::XOR32mi, X86::XOR16mi,
2437  X86::XOR8mi);
2438  default:
2439  llvm_unreachable("Invalid opcode!");
2440  }
2441  };
2442 
2443  unsigned NewOpc = SelectRegOpcode(Opc);
2444  SDValue Operand = StoredVal->getOperand(1);
2445 
2446  // See if the operand is a constant that we can fold into an immediate
2447  // operand.
2448  if (auto *OperandC = dyn_cast<ConstantSDNode>(Operand)) {
2449  auto OperandV = OperandC->getAPIntValue();
2450 
2451  // Check if we can shrink the operand enough to fit in an immediate (or
2452  // fit into a smaller immediate) by negating it and switching the
2453  // operation.
2454  if ((Opc == X86ISD::ADD || Opc == X86ISD::SUB) &&
2455  ((MemVT != MVT::i8 && OperandV.getMinSignedBits() > 8 &&
2456  (-OperandV).getMinSignedBits() <= 8) ||
2457  (MemVT == MVT::i64 && OperandV.getMinSignedBits() > 32 &&
2458  (-OperandV).getMinSignedBits() <= 32)) &&
2459  hasNoCarryFlagUses(StoredVal.getNode())) {
2460  OperandV = -OperandV;
2461  Opc = Opc == X86ISD::ADD ? X86ISD::SUB : X86ISD::ADD;
2462  }
2463 
2464  // First try to fit this into an Imm8 operand. If it doesn't fit, then try
2465  // the larger immediate operand.
2466  if (MemVT != MVT::i8 && OperandV.getMinSignedBits() <= 8) {
2467  Operand = CurDAG->getTargetConstant(OperandV, SDLoc(Node), MemVT);
2468  NewOpc = SelectImm8Opcode(Opc);
2469  } else if (OperandV.getActiveBits() <= MemVT.getSizeInBits() &&
2470  (MemVT != MVT::i64 || OperandV.getMinSignedBits() <= 32)) {
2471  Operand = CurDAG->getTargetConstant(OperandV, SDLoc(Node), MemVT);
2472  NewOpc = SelectImmOpcode(Opc);
2473  }
2474  }
2475 
2476  if (Opc == X86ISD::ADC || Opc == X86ISD::SBB) {
2477  SDValue CopyTo =
2478  CurDAG->getCopyToReg(InputChain, SDLoc(Node), X86::EFLAGS,
2479  StoredVal.getOperand(2), SDValue());
2480 
2481  const SDValue Ops[] = {Base, Scale, Index, Disp,
2482  Segment, Operand, CopyTo, CopyTo.getValue(1)};
2483  Result = CurDAG->getMachineNode(NewOpc, SDLoc(Node), MVT::i32, MVT::Other,
2484  Ops);
2485  } else {
2486  const SDValue Ops[] = {Base, Scale, Index, Disp,
2487  Segment, Operand, InputChain};
2488  Result = CurDAG->getMachineNode(NewOpc, SDLoc(Node), MVT::i32, MVT::Other,
2489  Ops);
2490  }
2491  break;
2492  }
2493  default:
2494  llvm_unreachable("Invalid opcode!");
2495  }
2496 
2497  MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(2);
2498  MemOp[0] = StoreNode->getMemOperand();
2499  MemOp[1] = LoadNode->getMemOperand();
2500  Result->setMemRefs(MemOp, MemOp + 2);
2501 
2502  // Update Load Chain uses as well.
2503  ReplaceUses(SDValue(LoadNode, 1), SDValue(Result, 1));
2504  ReplaceUses(SDValue(StoreNode, 0), SDValue(Result, 1));
2505  ReplaceUses(SDValue(StoredVal.getNode(), 1), SDValue(Result, 0));
2506  CurDAG->RemoveDeadNode(Node);
2507  return true;
2508 }
2509 
2510 // See if this is an (X >> C1) & C2 that we can match to BEXTR/BEXTRI.
2511 bool X86DAGToDAGISel::matchBEXTRFromAnd(SDNode *Node) {
2512  MVT NVT = Node->getSimpleValueType(0);
2513  SDLoc dl(Node);
2514 
2515  SDValue N0 = Node->getOperand(0);
2516  SDValue N1 = Node->getOperand(1);
2517 
2518  if (!Subtarget->hasBMI() && !Subtarget->hasTBM())
2519  return false;
2520 
2521  // Must have a shift right.
2522  if (N0->getOpcode() != ISD::SRL && N0->getOpcode() != ISD::SRA)
2523  return false;
2524 
2525  // Shift can't have additional users.
2526  if (!N0->hasOneUse())
2527  return false;
2528 
2529  // Only supported for 32 and 64 bits.
2530  if (NVT != MVT::i32 && NVT != MVT::i64)
2531  return false;
2532 
2533  // Shift amount and RHS of and must be constant.
2534  ConstantSDNode *MaskCst = dyn_cast<ConstantSDNode>(N1);
2535  ConstantSDNode *ShiftCst = dyn_cast<ConstantSDNode>(N0->getOperand(1));
2536  if (!MaskCst || !ShiftCst)
2537  return false;
2538 
2539  // And RHS must be a mask.
2540  uint64_t Mask = MaskCst->getZExtValue();
2541  if (!isMask_64(Mask))
2542  return false;
2543 
2544  uint64_t Shift = ShiftCst->getZExtValue();
2545  uint64_t MaskSize = countPopulation(Mask);
2546 
2547  // Don't interfere with something that can be handled by extracting AH.
2548  // TODO: If we are able to fold a load, BEXTR might still be better than AH.
2549  if (Shift == 8 && MaskSize == 8)
2550  return false;
2551 
2552  // Make sure we are only using bits that were in the original value, not
2553  // shifted in.
2554  if (Shift + MaskSize > NVT.getSizeInBits())
2555  return false;
2556 
2557  // Create a BEXTR node and run it through selection.
2558  SDValue C = CurDAG->getConstant(Shift | (MaskSize << 8), dl, NVT);
2559  SDValue New = CurDAG->getNode(X86ISD::BEXTR, dl, NVT,
2560  N0->getOperand(0), C);
2561  ReplaceNode(Node, New.getNode());
2562  SelectCode(New.getNode());
2563  return true;
2564 }
2565 
2566 /// If the high bits of an 'and' operand are known zero, try setting the
2567 /// high bits of an 'and' constant operand to produce a smaller encoding by
2568 /// creating a small, sign-extended negative immediate rather than a large
2569 /// positive one. This reverses a transform in SimplifyDemandedBits that
2570 /// shrinks mask constants by clearing bits. There is also a possibility that
2571 /// the 'and' mask can be made -1, so the 'and' itself is unnecessary. In that
2572 /// case, just replace the 'and'. Return 'true' if the node is replaced.
2573 bool X86DAGToDAGISel::shrinkAndImmediate(SDNode *And) {
2574  // i8 is unshrinkable, i16 should be promoted to i32, and vector ops don't
2575  // have immediate operands.
2576  MVT VT = And->getSimpleValueType(0);
2577  if (VT != MVT::i32 && VT != MVT::i64)
2578  return false;
2579 
2580  auto *And1C = dyn_cast<ConstantSDNode>(And->getOperand(1));
2581  if (!And1C)
2582  return false;
2583 
2584  // Bail out if the mask constant is already negative. It's can't shrink more.
2585  // If the upper 32 bits of a 64 bit mask are all zeros, we have special isel
2586  // patterns to use a 32-bit and instead of a 64-bit and by relying on the
2587  // implicit zeroing of 32 bit ops. So we should check if the lower 32 bits
2588  // are negative too.
2589  APInt MaskVal = And1C->getAPIntValue();
2590  unsigned MaskLZ = MaskVal.countLeadingZeros();
2591  if (!MaskLZ || (VT == MVT::i64 && MaskLZ == 32))
2592  return false;
2593 
2594  // Don't extend into the upper 32 bits of a 64 bit mask.
2595  if (VT == MVT::i64 && MaskLZ >= 32) {
2596  MaskLZ -= 32;
2597  MaskVal = MaskVal.trunc(32);
2598  }
2599 
2600  SDValue And0 = And->getOperand(0);
2601  APInt HighZeros = APInt::getHighBitsSet(MaskVal.getBitWidth(), MaskLZ);
2602  APInt NegMaskVal = MaskVal | HighZeros;
2603 
2604  // If a negative constant would not allow a smaller encoding, there's no need
2605  // to continue. Only change the constant when we know it's a win.
2606  unsigned MinWidth = NegMaskVal.getMinSignedBits();
2607  if (MinWidth > 32 || (MinWidth > 8 && MaskVal.getMinSignedBits() <= 32))
2608  return false;
2609 
2610  // Extend masks if we truncated above.
2611  if (VT == MVT::i64 && MaskVal.getBitWidth() < 64) {
2612  NegMaskVal = NegMaskVal.zext(64);
2613  HighZeros = HighZeros.zext(64);
2614  }
2615 
2616  // The variable operand must be all zeros in the top bits to allow using the
2617  // new, negative constant as the mask.
2618  if (!CurDAG->MaskedValueIsZero(And0, HighZeros))
2619  return false;
2620 
2621  // Check if the mask is -1. In that case, this is an unnecessary instruction
2622  // that escaped earlier analysis.
2623  if (NegMaskVal.isAllOnesValue()) {
2624  ReplaceNode(And, And0.getNode());
2625  return true;
2626  }
2627 
2628  // A negative mask allows a smaller encoding. Create a new 'and' node.
2629  SDValue NewMask = CurDAG->getConstant(NegMaskVal, SDLoc(And), VT);
2630  SDValue NewAnd = CurDAG->getNode(ISD::AND, SDLoc(And), VT, And0, NewMask);
2631  ReplaceNode(And, NewAnd.getNode());
2632  SelectCode(NewAnd.getNode());
2633  return true;
2634 }
2635 
2636 void X86DAGToDAGISel::Select(SDNode *Node) {
2637  MVT NVT = Node->getSimpleValueType(0);
2638  unsigned Opc, MOpc;
2639  unsigned Opcode = Node->getOpcode();
2640  SDLoc dl(Node);
2641 
2642  if (Node->isMachineOpcode()) {
2643  DEBUG(dbgs() << "== "; Node->dump(CurDAG); dbgs() << '\n');
2644  Node->setNodeId(-1);
2645  return; // Already selected.
2646  }
2647 
2648  switch (Opcode) {
2649  default: break;
2650  case ISD::BRIND: {
2651  if (Subtarget->isTargetNaCl())
2652  // NaCl has its own pass where jmp %r32 are converted to jmp %r64. We
2653  // leave the instruction alone.
2654  break;
2655  if (Subtarget->isTarget64BitILP32()) {
2656  // Converts a 32-bit register to a 64-bit, zero-extended version of
2657  // it. This is needed because x86-64 can do many things, but jmp %r32
2658  // ain't one of them.
2659  const SDValue &Target = Node->getOperand(1);
2661  SDValue ZextTarget = CurDAG->getZExtOrTrunc(Target, dl, EVT(MVT::i64));
2662  SDValue Brind = CurDAG->getNode(ISD::BRIND, dl, MVT::Other,
2663  Node->getOperand(0), ZextTarget);
2664  ReplaceNode(Node, Brind.getNode());
2665  SelectCode(ZextTarget.getNode());
2666  SelectCode(Brind.getNode());
2667  return;
2668  }
2669  break;
2670  }
2671  case X86ISD::GlobalBaseReg:
2672  ReplaceNode(Node, getGlobalBaseReg());
2673  return;
2674 
2675  case X86ISD::SELECT:
2676  case X86ISD::SHRUNKBLEND: {
2677  // SHRUNKBLEND selects like a regular VSELECT. Same with X86ISD::SELECT.
2678  SDValue VSelect = CurDAG->getNode(
2679  ISD::VSELECT, SDLoc(Node), Node->getValueType(0), Node->getOperand(0),
2680  Node->getOperand(1), Node->getOperand(2));
2681  ReplaceNode(Node, VSelect.getNode());
2682  SelectCode(VSelect.getNode());
2683  // We already called ReplaceUses.
2684  return;
2685  }
2686 
2687  case ISD::AND:
2688  if (matchBEXTRFromAnd(Node))
2689  return;
2690  if (shrinkAndImmediate(Node))
2691  return;
2692 
2694  case ISD::OR:
2695  case ISD::XOR: {
2696 
2697  // For operations of the form (x << C1) op C2, check if we can use a smaller
2698  // encoding for C2 by transforming it into (x op (C2>>C1)) << C1.
2699  SDValue N0 = Node->getOperand(0);
2700  SDValue N1 = Node->getOperand(1);
2701 
2702  if (N0->getOpcode() != ISD::SHL || !N0->hasOneUse())
2703  break;
2704 
2705  // i8 is unshrinkable, i16 should be promoted to i32.
2706  if (NVT != MVT::i32 && NVT != MVT::i64)
2707  break;
2708 
2710  ConstantSDNode *ShlCst = dyn_cast<ConstantSDNode>(N0->getOperand(1));
2711  if (!Cst || !ShlCst)
2712  break;
2713 
2714  int64_t Val = Cst->getSExtValue();
2715  uint64_t ShlVal = ShlCst->getZExtValue();
2716 
2717  // Make sure that we don't change the operation by removing bits.
2718  // This only matters for OR and XOR, AND is unaffected.
2719  uint64_t RemovedBitsMask = (1ULL << ShlVal) - 1;
2720  if (Opcode != ISD::AND && (Val & RemovedBitsMask) != 0)
2721  break;
2722 
2723  unsigned ShlOp, AddOp, Op;
2724  MVT CstVT = NVT;
2725 
2726  // Check the minimum bitwidth for the new constant.
2727  // TODO: AND32ri is the same as AND64ri32 with zext imm.
2728  // TODO: MOV32ri+OR64r is cheaper than MOV64ri64+OR64rr
2729  // TODO: Using 16 and 8 bit operations is also possible for or32 & xor32.
2730  if (!isInt<8>(Val) && isInt<8>(Val >> ShlVal))
2731  CstVT = MVT::i8;
2732  else if (!isInt<32>(Val) && isInt<32>(Val >> ShlVal))
2733  CstVT = MVT::i32;
2734 
2735  // Bail if there is no smaller encoding.
2736  if (NVT == CstVT)
2737  break;
2738 
2739  switch (NVT.SimpleTy) {
2740  default: llvm_unreachable("Unsupported VT!");
2741  case MVT::i32:
2742  assert(CstVT == MVT::i8);
2743  ShlOp = X86::SHL32ri;
2744  AddOp = X86::ADD32rr;
2745 
2746  switch (Opcode) {
2747  default: llvm_unreachable("Impossible opcode");
2748  case ISD::AND: Op = X86::AND32ri8; break;
2749  case ISD::OR: Op = X86::OR32ri8; break;
2750  case ISD::XOR: Op = X86::XOR32ri8; break;
2751  }
2752  break;
2753  case MVT::i64:
2754  assert(CstVT == MVT::i8 || CstVT == MVT::i32);
2755  ShlOp = X86::SHL64ri;
2756  AddOp = X86::ADD64rr;
2757 
2758  switch (Opcode) {
2759  default: llvm_unreachable("Impossible opcode");
2760  case ISD::AND: Op = CstVT==MVT::i8? X86::AND64ri8 : X86::AND64ri32; break;
2761  case ISD::OR: Op = CstVT==MVT::i8? X86::OR64ri8 : X86::OR64ri32; break;
2762  case ISD::XOR: Op = CstVT==MVT::i8? X86::XOR64ri8 : X86::XOR64ri32; break;
2763  }
2764  break;
2765  }
2766 
2767  // Emit the smaller op and the shift.
2768  SDValue NewCst = CurDAG->getTargetConstant(Val >> ShlVal, dl, CstVT);
2769  SDNode *New = CurDAG->getMachineNode(Op, dl, NVT, N0->getOperand(0),NewCst);
2770  if (ShlVal == 1)
2771  CurDAG->SelectNodeTo(Node, AddOp, NVT, SDValue(New, 0),
2772  SDValue(New, 0));
2773  else
2774  CurDAG->SelectNodeTo(Node, ShlOp, NVT, SDValue(New, 0),
2775  getI8Imm(ShlVal, dl));
2776  return;
2777  }
2778  case X86ISD::UMUL8:
2779  case X86ISD::SMUL8: {
2780  SDValue N0 = Node->getOperand(0);
2781  SDValue N1 = Node->getOperand(1);
2782 
2783  Opc = (Opcode == X86ISD::SMUL8 ? X86::IMUL8r : X86::MUL8r);
2784 
2785  SDValue InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, X86::AL,
2786  N0, SDValue()).getValue(1);
2787 
2788  SDVTList VTs = CurDAG->getVTList(NVT, MVT::i32);
2789  SDValue Ops[] = {N1, InFlag};
2790  SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops);
2791 
2792  ReplaceNode(Node, CNode);
2793  return;
2794  }
2795 
2796  case X86ISD::UMUL: {
2797  SDValue N0 = Node->getOperand(0);
2798  SDValue N1 = Node->getOperand(1);
2799 
2800  unsigned LoReg;
2801  switch (NVT.SimpleTy) {
2802  default: llvm_unreachable("Unsupported VT!");
2803  // MVT::i8 is handled by X86ISD::UMUL8.
2804  case MVT::i16: LoReg = X86::AX; Opc = X86::MUL16r; break;
2805  case MVT::i32: LoReg = X86::EAX; Opc = X86::MUL32r; break;
2806  case MVT::i64: LoReg = X86::RAX; Opc = X86::MUL64r; break;
2807  }
2808 
2809  SDValue InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, LoReg,
2810  N0, SDValue()).getValue(1);
2811 
2812  SDVTList VTs = CurDAG->getVTList(NVT, NVT, MVT::i32);
2813  SDValue Ops[] = {N1, InFlag};
2814  SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops);
2815 
2816  ReplaceNode(Node, CNode);
2817  return;
2818  }
2819 
2820  case ISD::SMUL_LOHI:
2821  case ISD::UMUL_LOHI: {
2822  SDValue N0 = Node->getOperand(0);
2823  SDValue N1 = Node->getOperand(1);
2824 
2825  bool isSigned = Opcode == ISD::SMUL_LOHI;
2826  bool hasBMI2 = Subtarget->hasBMI2();
2827  if (!isSigned) {
2828  switch (NVT.SimpleTy) {
2829  default: llvm_unreachable("Unsupported VT!");
2830  case MVT::i32: Opc = hasBMI2 ? X86::MULX32rr : X86::MUL32r;
2831  MOpc = hasBMI2 ? X86::MULX32rm : X86::MUL32m; break;
2832  case MVT::i64: Opc = hasBMI2 ? X86::MULX64rr : X86::MUL64r;
2833  MOpc = hasBMI2 ? X86::MULX64rm : X86::MUL64m; break;
2834  }
2835  } else {
2836  switch (NVT.SimpleTy) {
2837  default: llvm_unreachable("Unsupported VT!");
2838  case MVT::i32: Opc = X86::IMUL32r; MOpc = X86::IMUL32m; break;
2839  case MVT::i64: Opc = X86::IMUL64r; MOpc = X86::IMUL64m; break;
2840  }
2841  }
2842 
2843  unsigned SrcReg, LoReg, HiReg;
2844  switch (Opc) {
2845  default: llvm_unreachable("Unknown MUL opcode!");
2846  case X86::IMUL32r:
2847  case X86::MUL32r:
2848  SrcReg = LoReg = X86::EAX; HiReg = X86::EDX;
2849  break;
2850  case X86::IMUL64r:
2851  case X86::MUL64r:
2852  SrcReg = LoReg = X86::RAX; HiReg = X86::RDX;
2853  break;
2854  case X86::MULX32rr:
2855  SrcReg = X86::EDX; LoReg = HiReg = 0;
2856  break;
2857  case X86::MULX64rr:
2858  SrcReg = X86::RDX; LoReg = HiReg = 0;
2859  break;
2860  }
2861 
2862  SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
2863  bool foldedLoad = tryFoldLoad(Node, N1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4);
2864  // Multiply is commmutative.
2865  if (!foldedLoad) {
2866  foldedLoad = tryFoldLoad(Node, N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4);
2867  if (foldedLoad)
2868  std::swap(N0, N1);
2869  }
2870 
2871  SDValue InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, SrcReg,
2872  N0, SDValue()).getValue(1);
2873  SDValue ResHi, ResLo;
2874 
2875  if (foldedLoad) {
2876  SDValue Chain;
2877  MachineSDNode *CNode = nullptr;
2878  SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N1.getOperand(0),
2879  InFlag };
2880  if (MOpc == X86::MULX32rm || MOpc == X86::MULX64rm) {
2881  SDVTList VTs = CurDAG->getVTList(NVT, NVT, MVT::Other, MVT::Glue);
2882  CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops);
2883  ResHi = SDValue(CNode, 0);
2884  ResLo = SDValue(CNode, 1);
2885  Chain = SDValue(CNode, 2);
2886  InFlag = SDValue(CNode, 3);
2887  } else {
2888  SDVTList VTs = CurDAG->getVTList(MVT::Other, MVT::Glue);
2889  CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops);
2890  Chain = SDValue(CNode, 0);
2891  InFlag = SDValue(CNode, 1);
2892  }
2893 
2894  // Update the chain.
2895  ReplaceUses(N1.getValue(1), Chain);
2896  // Record the mem-refs
2897  MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
2898  MemOp[0] = cast<LoadSDNode>(N1)->getMemOperand();
2899  CNode->setMemRefs(MemOp, MemOp + 1);
2900  } else {
2901  SDValue Ops[] = { N1, InFlag };
2902  if (Opc == X86::MULX32rr || Opc == X86::MULX64rr) {
2903  SDVTList VTs = CurDAG->getVTList(NVT, NVT, MVT::Glue);
2904  SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops);
2905  ResHi = SDValue(CNode, 0);
2906  ResLo = SDValue(CNode, 1);
2907  InFlag = SDValue(CNode, 2);
2908  } else {
2909  SDVTList VTs = CurDAG->getVTList(MVT::Glue);
2910  SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops);
2911  InFlag = SDValue(CNode, 0);
2912  }
2913  }
2914 
2915  // Copy the low half of the result, if it is needed.
2916  if (!SDValue(Node, 0).use_empty()) {
2917  if (!ResLo.getNode()) {
2918  assert(LoReg && "Register for low half is not defined!");
2919  ResLo = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, LoReg, NVT,
2920  InFlag);
2921  InFlag = ResLo.getValue(2);
2922  }
2923  ReplaceUses(SDValue(Node, 0), ResLo);
2924  DEBUG(dbgs() << "=> "; ResLo.getNode()->dump(CurDAG); dbgs() << '\n');
2925  }
2926  // Copy the high half of the result, if it is needed.
2927  if (!SDValue(Node, 1).use_empty()) {
2928  if (!ResHi.getNode()) {
2929  assert(HiReg && "Register for high half is not defined!");
2930  ResHi = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, HiReg, NVT,
2931  InFlag);
2932  InFlag = ResHi.getValue(2);
2933  }
2934  ReplaceUses(SDValue(Node, 1), ResHi);
2935  DEBUG(dbgs() << "=> "; ResHi.getNode()->dump(CurDAG); dbgs() << '\n');
2936  }
2937 
2938  CurDAG->RemoveDeadNode(Node);
2939  return;
2940  }
2941 
2942  case ISD::SDIVREM:
2943  case ISD::UDIVREM:
2946  SDValue N0 = Node->getOperand(0);
2947  SDValue N1 = Node->getOperand(1);
2948 
2949  bool isSigned = (Opcode == ISD::SDIVREM ||
2950  Opcode == X86ISD::SDIVREM8_SEXT_HREG);
2951  if (!isSigned) {
2952  switch (NVT.SimpleTy) {
2953  default: llvm_unreachable("Unsupported VT!");
2954  case MVT::i8: Opc = X86::DIV8r; MOpc = X86::DIV8m; break;
2955  case MVT::i16: Opc = X86::DIV16r; MOpc = X86::DIV16m; break;
2956  case MVT::i32: Opc = X86::DIV32r; MOpc = X86::DIV32m; break;
2957  case MVT::i64: Opc = X86::DIV64r; MOpc = X86::DIV64m; break;
2958  }
2959  } else {
2960  switch (NVT.SimpleTy) {
2961  default: llvm_unreachable("Unsupported VT!");
2962  case MVT::i8: Opc = X86::IDIV8r; MOpc = X86::IDIV8m; break;
2963  case MVT::i16: Opc = X86::IDIV16r; MOpc = X86::IDIV16m; break;
2964  case MVT::i32: Opc = X86::IDIV32r; MOpc = X86::IDIV32m; break;
2965  case MVT::i64: Opc = X86::IDIV64r; MOpc = X86::IDIV64m; break;
2966  }
2967  }
2968 
2969  unsigned LoReg, HiReg, ClrReg;
2970  unsigned SExtOpcode;
2971  switch (NVT.SimpleTy) {
2972  default: llvm_unreachable("Unsupported VT!");
2973  case MVT::i8:
2974  LoReg = X86::AL; ClrReg = HiReg = X86::AH;
2975  SExtOpcode = X86::CBW;
2976  break;
2977  case MVT::i16:
2978  LoReg = X86::AX; HiReg = X86::DX;
2979  ClrReg = X86::DX;
2980  SExtOpcode = X86::CWD;
2981  break;
2982  case MVT::i32:
2983  LoReg = X86::EAX; ClrReg = HiReg = X86::EDX;
2984  SExtOpcode = X86::CDQ;
2985  break;
2986  case MVT::i64:
2987  LoReg = X86::RAX; ClrReg = HiReg = X86::RDX;
2988  SExtOpcode = X86::CQO;
2989  break;
2990  }
2991 
2992  SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
2993  bool foldedLoad = tryFoldLoad(Node, N1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4);
2994  bool signBitIsZero = CurDAG->SignBitIsZero(N0);
2995 
2996  SDValue InFlag;
2997  if (NVT == MVT::i8 && (!isSigned || signBitIsZero)) {
2998  // Special case for div8, just use a move with zero extension to AX to
2999  // clear the upper 8 bits (AH).
3000  SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Move, Chain;
3001  if (tryFoldLoad(Node, N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) {
3002  SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N0.getOperand(0) };
3003  Move =
3004  SDValue(CurDAG->getMachineNode(X86::MOVZX32rm8, dl, MVT::i32,
3005  MVT::Other, Ops), 0);
3006  Chain = Move.getValue(1);
3007  ReplaceUses(N0.getValue(1), Chain);
3008  } else {
3009  Move =
3010  SDValue(CurDAG->getMachineNode(X86::MOVZX32rr8, dl, MVT::i32, N0),0);
3011  Chain = CurDAG->getEntryNode();
3012  }
3013  Chain = CurDAG->getCopyToReg(Chain, dl, X86::EAX, Move, SDValue());
3014  InFlag = Chain.getValue(1);
3015  } else {
3016  InFlag =
3017  CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl,
3018  LoReg, N0, SDValue()).getValue(1);
3019  if (isSigned && !signBitIsZero) {
3020  // Sign extend the low part into the high part.
3021  InFlag =
3022  SDValue(CurDAG->getMachineNode(SExtOpcode, dl, MVT::Glue, InFlag),0);
3023  } else {
3024  // Zero out the high part, effectively zero extending the input.
3025  SDValue ClrNode = SDValue(CurDAG->getMachineNode(X86::MOV32r0, dl, NVT), 0);
3026  switch (NVT.SimpleTy) {
3027  case MVT::i16:
3028  ClrNode =
3029  SDValue(CurDAG->getMachineNode(
3030  TargetOpcode::EXTRACT_SUBREG, dl, MVT::i16, ClrNode,
3031  CurDAG->getTargetConstant(X86::sub_16bit, dl,
3032  MVT::i32)),
3033  0);
3034  break;
3035  case MVT::i32:
3036  break;
3037  case MVT::i64:
3038  ClrNode =
3039  SDValue(CurDAG->getMachineNode(
3040  TargetOpcode::SUBREG_TO_REG, dl, MVT::i64,
3041  CurDAG->getTargetConstant(0, dl, MVT::i64), ClrNode,
3042  CurDAG->getTargetConstant(X86::sub_32bit, dl,
3043  MVT::i32)),
3044  0);
3045  break;
3046  default:
3047  llvm_unreachable("Unexpected division source");
3048  }
3049 
3050  InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, ClrReg,
3051  ClrNode, InFlag).getValue(1);
3052  }
3053  }
3054 
3055  if (foldedLoad) {
3056  SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N1.getOperand(0),
3057  InFlag };
3058  MachineSDNode *CNode =
3059  CurDAG->getMachineNode(MOpc, dl, MVT::Other, MVT::Glue, Ops);
3060  InFlag = SDValue(CNode, 1);
3061  // Update the chain.
3062  ReplaceUses(N1.getValue(1), SDValue(CNode, 0));
3063  // Record the mem-refs
3064  MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
3065  MemOp[0] = cast<LoadSDNode>(N1)->getMemOperand();
3066  CNode->setMemRefs(MemOp, MemOp + 1);
3067  } else {
3068  InFlag =
3069  SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Glue, N1, InFlag), 0);
3070  }
3071 
3072  // Prevent use of AH in a REX instruction by explicitly copying it to
3073  // an ABCD_L register.
3074  //
3075  // The current assumption of the register allocator is that isel
3076  // won't generate explicit references to the GR8_ABCD_H registers. If
3077  // the allocator and/or the backend get enhanced to be more robust in
3078  // that regard, this can be, and should be, removed.
3079  if (HiReg == X86::AH && !SDValue(Node, 1).use_empty()) {
3080  SDValue AHCopy = CurDAG->getRegister(X86::AH, MVT::i8);
3081  unsigned AHExtOpcode =
3082  isSigned ? X86::MOVSX32rr8_NOREX : X86::MOVZX32rr8_NOREX;
3083 
3084  SDNode *RNode = CurDAG->getMachineNode(AHExtOpcode, dl, MVT::i32,
3085  MVT::Glue, AHCopy, InFlag);
3086  SDValue Result(RNode, 0);
3087  InFlag = SDValue(RNode, 1);
3088 
3089  if (Opcode == X86ISD::UDIVREM8_ZEXT_HREG ||
3090  Opcode == X86ISD::SDIVREM8_SEXT_HREG) {
3091  assert(Node->getValueType(1) == MVT::i32 && "Unexpected result type!");
3092  } else {
3093  Result =
3094  CurDAG->getTargetExtractSubreg(X86::sub_8bit, dl, MVT::i8, Result);
3095  }
3096  ReplaceUses(SDValue(Node, 1), Result);
3097  DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG); dbgs() << '\n');
3098  }
3099  // Copy the division (low) result, if it is needed.
3100  if (!SDValue(Node, 0).use_empty()) {
3101  SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
3102  LoReg, NVT, InFlag);
3103  InFlag = Result.getValue(2);
3104  ReplaceUses(SDValue(Node, 0), Result);
3105  DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG); dbgs() << '\n');
3106  }
3107  // Copy the remainder (high) result, if it is needed.
3108  if (!SDValue(Node, 1).use_empty()) {
3109  SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
3110  HiReg, NVT, InFlag);
3111  InFlag = Result.getValue(2);
3112  ReplaceUses(SDValue(Node, 1), Result);
3113  DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG); dbgs() << '\n');
3114  }
3115  CurDAG->RemoveDeadNode(Node);
3116  return;
3117  }
3118 
3119  case X86ISD::CMP: {
3120  SDValue N0 = Node->getOperand(0);
3121  SDValue N1 = Node->getOperand(1);
3122 
3123  if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() &&
3125  N0 = N0.getOperand(0);
3126 
3127  // Look for (X86cmp (and $op, $imm), 0) and see if we can convert it to
3128  // use a smaller encoding.
3129  // Look past the truncate if CMP is the only use of it.
3130  if (N0.getOpcode() == ISD::AND &&
3131  N0.getNode()->hasOneUse() &&
3132  N0.getValueType() != MVT::i8 &&
3133  X86::isZeroNode(N1)) {
3135  if (!C) break;
3136  uint64_t Mask = C->getZExtValue();
3137 
3138  MVT VT;
3139  int SubRegOp;
3140  unsigned Op;
3141 
3142  if (isUInt<8>(Mask) &&
3143  (!(Mask & 0x80) || hasNoSignedComparisonUses(Node))) {
3144  // For example, convert "testl %eax, $8" to "testb %al, $8"
3145  VT = MVT::i8;
3146  SubRegOp = X86::sub_8bit;
3147  Op = X86::TEST8ri;
3148  } else if (OptForMinSize && isUInt<16>(Mask) &&
3149  (!(Mask & 0x8000) || hasNoSignedComparisonUses(Node))) {
3150  // For example, "testl %eax, $32776" to "testw %ax, $32776".
3151  // NOTE: We only want to form TESTW instructions if optimizing for
3152  // min size. Otherwise we only save one byte and possibly get a length
3153  // changing prefix penalty in the decoders.
3154  VT = MVT::i16;
3155  SubRegOp = X86::sub_16bit;
3156  Op = X86::TEST16ri;
3157  } else if (isUInt<32>(Mask) && N0.getValueType() != MVT::i16 &&
3158  (!(Mask & 0x80000000) || hasNoSignedComparisonUses(Node))) {
3159  // For example, "testq %rax, $268468232" to "testl %eax, $268468232".
3160  // NOTE: We only want to run that transform if N0 is 32 or 64 bits.
3161  // Otherwize, we find ourselves in a position where we have to do
3162  // promotion. If previous passes did not promote the and, we assume
3163  // they had a good reason not to and do not promote here.
3164  VT = MVT::i32;
3165  SubRegOp = X86::sub_32bit;
3166  Op = X86::TEST32ri;
3167  } else {
3168  // No eligible transformation was found.
3169  break;
3170  }
3171 
3172  SDValue Imm = CurDAG->getTargetConstant(Mask, dl, VT);
3173  SDValue Reg = N0.getOperand(0);
3174 
3175  // Extract the subregister if necessary.
3176  if (N0.getValueType() != VT)
3177  Reg = CurDAG->getTargetExtractSubreg(SubRegOp, dl, VT, Reg);
3178 
3179  // Emit a testl or testw.
3180  SDNode *NewNode = CurDAG->getMachineNode(Op, dl, MVT::i32, Reg, Imm);
3181  // Replace CMP with TEST.
3182  ReplaceNode(Node, NewNode);
3183  return;
3184  }
3185  break;
3186  }
3187  case ISD::STORE:
3188  if (foldLoadStoreIntoMemOperand(Node))
3189  return;
3190  break;
3191  }
3192 
3193  SelectCode(Node);
3194 }
3195 
3196 bool X86DAGToDAGISel::
3197 SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
3198  std::vector<SDValue> &OutOps) {
3199  SDValue Op0, Op1, Op2, Op3, Op4;
3200  switch (ConstraintID) {
3201  default:
3202  llvm_unreachable("Unexpected asm memory constraint");
3204  // FIXME: It seems strange that 'i' is needed here since it's supposed to
3205  // be an immediate and not a memory constraint.
3207  case InlineAsm::Constraint_o: // offsetable ??
3208  case InlineAsm::Constraint_v: // not offsetable ??
3209  case InlineAsm::Constraint_m: // memory
3211  if (!selectAddr(nullptr, Op, Op0, Op1, Op2, Op3, Op4))
3212  return true;
3213  break;
3214  }
3215 
3216  OutOps.push_back(Op0);
3217  OutOps.push_back(Op1);
3218  OutOps.push_back(Op2);
3219  OutOps.push_back(Op3);
3220  OutOps.push_back(Op4);
3221  return false;
3222 }
3223 
3224 /// This pass converts a legalized DAG into a X86-specific DAG,
3225 /// ready for instruction scheduling.
3227  CodeGenOpt::Level OptLevel) {
3228  return new X86DAGToDAGISel(TM, OptLevel);
3229 }
uint64_t CallInst * C
constexpr bool isUInt< 32 >(uint64_t x)
Definition: MathExtras.h:341
X = FP_ROUND(Y, TRUNC) - Rounding &#39;Y&#39; from a larger floating point type down to the precision of the ...
Definition: ISDOpcodes.h:513
static bool isCalleeLoad(SDValue Callee, SDValue &Chain, bool HasCallSeq)
Return true if call address is a load and it can be moved below CALLSEQ_START and the chains leading ...
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
EVT getValueType() const
Return the ValueType of the referenced return value.
Vector comparison generating mask bits for fp and integer signed and unsigned data types...
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
const SDValue & getOffset() const
APInt getSignedMax() const
Return the largest signed value contained in the ConstantRange.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
DELETED_NODE - This is an illegal value that is used to catch errors.
Definition: ISDOpcodes.h:42
Compute iterated dominance frontiers using a linear time algorithm.
Definition: AllocatorList.h:24
static void insertDAGNode(SelectionDAG &DAG, SDValue Pos, SDValue N)
static bool isLegalMaskCompare(SDNode *N, const X86Subtarget *Subtarget)
void setMemRefs(mmo_iterator NewMemRefs, mmo_iterator NewMemRefsEnd)
Assign this MachineSDNodes&#39;s memory reference descriptor list.
Tail call return.
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
Definition: MCSymbol.h:42
#define LLVM_FALLTHROUGH
LLVM_FALLTHROUGH - Mark fallthrough cases in switch statements.
Definition: Compiler.h:235
bool isVector() const
Return true if this is a vector value type.
C - The default llvm calling convention, compatible with C.
Definition: CallingConv.h:35
const SDValue & getBasePtr() const
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
APInt zext(unsigned width) const
Zero extend to a new width.
Definition: APInt.cpp:863
bool slt(const APInt &RHS) const
Signed less than comparison.
Definition: APInt.h:1183
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:253
const SDValue & getChain() const
constexpr bool isInt< 8 >(int64_t x)
Definition: MathExtras.h:295
ISD::MemIndexedMode getAddressingMode() const
Return the addressing mode for this load or store: unindexed, pre-inc, pre-dec, post-inc, or post-dec.
TargetGlobalAddress - Like GlobalAddress, but the DAG does no folding or anything else with this node...
Definition: ISDOpcodes.h:131
unsigned getAlignment() const
APInt trunc(unsigned width) const
Truncate to new width.
Definition: APInt.cpp:816
STATISTIC(NumFunctions, "Total number of functions")
MVT getSimpleValueType(unsigned ResNo) const
Return the type of a specified result as a simple type.
F(f)
void setNodeId(int Id)
Set unique node id.
SDNode * getNode() const
get the SDNode which holds the desired result
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
std::size_t countLeadingZeros(T Val, ZeroBehavior ZB=ZB_Width)
Count number of 0&#39;s from the most significant bit to the least stopping at the first 1...
Definition: MathExtras.h:181
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1488
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition: ISDOpcodes.h:159
GlobalBaseReg - On Darwin, this node represents the result of the mflr at function entry...
static bool hasPredecessorHelper(const SDNode *N, SmallPtrSetImpl< const SDNode *> &Visited, SmallVectorImpl< const SDNode *> &Worklist, unsigned int MaxSteps=0, bool TopologicalPrune=false)
Returns true if N is a predecessor of any node in Worklist.
unsigned getAddrSpace() const
Return the LLVM IR address space number that this pointer points into.
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition: ISDOpcodes.h:210
X86 compare and logical compare instructions.
void dump() const
Support for debugging, callable in GDB: V->dump()
Definition: AsmWriter.cpp:3682
The address of a basic block.
Definition: Constants.h:826
bool hasOneUse() const
Return true if there is exactly one use of this node.
A description of a memory reference used in the backend.
Shift and rotation operations.
Definition: ISDOpcodes.h:380
bool isNormalStore(const SDNode *N)
Returns true if the specified node is a non-truncating and unindexed store.
std::size_t countTrailingOnes(T Value, ZeroBehavior ZB=ZB_Width)
Count the number of ones from the least significant bit to the first zero bit.
Definition: MathExtras.h:470
CallLoweringInfo & setChain(SDValue InChain)
CopyToReg - This node has three operands: a chain, a register number to set to this value...
Definition: ISDOpcodes.h:170
op_iterator op_end() const
uint64_t getConstantOperandVal(unsigned i) const
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
Reg
All possible values of the reg field in the ModR/M byte.
SimpleValueType SimpleTy
unsigned getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition: ValueTypes.h:304
CALLSEQ_START/CALLSEQ_END - These operators mark the beginning and end of a call sequence, and carry arbitrary information that target might want to know.
Definition: ISDOpcodes.h:681
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
This represents a list of ValueType&#39;s that has been intern&#39;d by a SelectionDAG.
bool hasVLX() const
Definition: X86Subtarget.h:620
unsigned getSizeInBits() const
bool runOnMachineFunction(MachineFunction &MF) override
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
bool hasExternalLinkage() const
Definition: GlobalValue.h:421
int64_t getSExtValue() const
static void InvalidateNodeId(SDNode *N)
unsigned getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:292
X86 FP SETCC, similar to above, but with output as an i1 mask and with optional rounding mode...
constexpr bool isMask_64(uint64_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
Definition: MathExtras.h:403
#define T
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition: ISDOpcodes.h:399
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:201
op_iterator op_begin() const
ArrayRef< SDUse > ops() const
amdgpu Simplify well known AMD library false Value * Callee
bool use_empty() const
Return true if there are no nodes using value ResNo of Node.
This class is used to represent ISD::STORE nodes.
static bool isFusableLoadOpStorePattern(StoreSDNode *StoreNode, SDValue StoredVal, SelectionDAG *CurDAG, LoadSDNode *&LoadNode, SDValue &InputChain)
Check whether or not the chain ending in StoreNode is suitable for doing the {load; op; store} to mod...
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Get a value with high bits set.
Definition: APInt.h:629
#define P(N)
const SDValue & getBasePtr() const
bool isNormalLoad(const SDNode *N)
Returns true if the specified node is a non-extending and unindexed load.
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition: ISDOpcodes.h:166
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
constexpr bool isUInt< 8 >(uint64_t x)
Definition: MathExtras.h:335
std::size_t countTrailingZeros(T Val, ZeroBehavior ZB=ZB_Width)
Count number of 0&#39;s from the least significant bit to the most stopping at the first 1...
Definition: MathExtras.h:112
use_iterator use_begin() const
Provide iteration support to walk over all uses of an SDNode.
LOCAL_RECOVER - Represents the llvm.localrecover intrinsic.
Definition: ISDOpcodes.h:81
Machine Value Type.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
static bool foldMaskedShiftToScaledMask(SelectionDAG &DAG, SDValue N, uint64_t Mask, SDValue Shift, SDValue X, X86ISelAddressMode &AM)
bool hasAnyUseOfValue(unsigned Value) const
Return true if there are any use of the indicated value.
bool isMachineOpcode() const
unsigned getScalarSizeInBits() const
bool ult(const APInt &RHS) const
Unsigned less than comparison.
Definition: APInt.h:1164
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This is an important base class in LLVM.
Definition: Constant.h:42
const SDValue & getOperand(unsigned Num) const
bool isOperandOf(const SDNode *N) const
Return true if this node is an operand of N.
const SDValue & getOffset() const
APInt getUnsignedMax() const
Return the largest unsigned value contained in the ConstantRange.
static Type * getVoidTy(LLVMContext &C)
Definition: Type.cpp:161
This class provides iterator support for SDUse operands that use a specific SDNode.
unsigned getMachineOpcode() const
void RepositionNode(allnodes_iterator Position, SDNode *N)
Move node N in the AllNodes list to be immediately before the given iterator Position.
static bool hasNoCarryFlagUses(SDNode *N)
Test whether the given node which sets flags has any uses which require the CF flag to be accurate...
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:285
On Darwin, this node represents the result of the popl at function entry, used for PIC code...
self_iterator getIterator()
Definition: ilist_node.h:82
bool hasNUsesOfValue(unsigned NUses, unsigned Value) const
Return true if there are exactly NUSES uses of the indicated value.
static bool foldMaskAndShiftToExtract(SelectionDAG &DAG, SDValue N, uint64_t Mask, SDValue Shift, SDValue X, X86ISelAddressMode &AM)
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:531
std::vector< ArgListEntry > ArgListTy
Extended Value Type.
Definition: ValueTypes.h:34
This structure contains all information that is necessary for lowering calls.
bool isVolatile() const
APInt getSignedMin() const
Return the smallest signed value contained in the ConstantRange.
This class contains a discriminated union of information about pointers in memory operands...
bool isMachineOpcode() const
Test if this node has a post-isel opcode, directly corresponding to a MachineInstr opcode...
unsigned getNumOperands() const
Return the number of values used by this operation.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
BaseType
A given derived pointer can have multiple base pointers through phi/selects.
bool use_empty() const
Return true if there are no uses of this node.
These operations represent an abstract X86 call instruction, which includes a bunch of information...
TokenFactor - This node takes multiple tokens as input and produces a single token result...
Definition: ISDOpcodes.h:50
void dump() const
Dump this node, for debugging.
Iterator for intrusive lists based on ilist_node.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements...
Definition: SmallPtrSet.h:418
unsigned countPopulation(T Value)
Count the number of set bits in a value.
Definition: MathExtras.h:512
constexpr bool isInt< 32 >(int64_t x)
Definition: MathExtras.h:301
bool isScalarFPTypeInSSEReg(EVT VT) const
Return true if the specified scalar FP type is computed in an SSE register, not on the X87 floating p...
SDNode * UpdateNodeOperands(SDNode *N, SDValue Op)
Mutate the specified node in-place to have the specified operands.
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:212
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:862
const DataFlowGraph & G
Definition: RDFGraph.cpp:211
An SDNode that represents everything that will be needed to construct a MachineInstr.
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
SelectionDAGISel - This is the common base class used for SelectionDAG-based pattern-matching instruc...
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:924
static bool foldMaskAndShiftToScale(SelectionDAG &DAG, SDValue N, uint64_t Mask, SDValue Shift, SDValue X, X86ISelAddressMode &AM)
EVT getMemoryVT() const
Return the type of the in-memory value.
Target - Wrapper for Target specific information.
Class for arbitrary precision integers.
Definition: APInt.h:69
static unsigned getReg(const void *D, unsigned RC, unsigned RegNo)
bool sge(const APInt &RHS) const
Signed greater or equal comparison.
Definition: APInt.h:1288
static use_iterator use_end()
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:446
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:449
void append(in_iter in_start, in_iter in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:396
bool is256BitVector() const
Return true if this is a 256-bit vector type.
Definition: ValueTypes.h:187
int getNodeId() const
Return the unique node id.
static bool isDispSafeForFrameIndex(int64_t Val)
std::pair< SDValue, SDValue > LowerCallTo(CallLoweringInfo &CLI) const
This function lowers an abstract call to a function into an actual call.
const SDValue & getValue() const
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:363
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition: ISDOpcodes.h:206
bool is128BitVector() const
Return true if this is a 128-bit vector type.
Definition: ValueTypes.h:182
A wrapper node for TargetConstantPool, TargetJumpTable, TargetExternalSymbol, TargetGlobalAddress, TargetGlobalTLSAddress, MCSymbol and TargetBlockAddress.
FunctionPass * createX86ISelDag(X86TargetMachine &TM, CodeGenOpt::Level OptLevel)
This pass converts a legalized DAG into a X86-specific DAG, ready for instruction scheduling...
void computeKnownBits(SDValue Op, KnownBits &Known, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:582
StringRef getName() const
Return a constant reference to the value&#39;s name.
Definition: Value.cpp:224
void ReplaceAllUsesWith(SDValue From, SDValue Op)
Modify anything using &#39;From&#39; to use &#39;To&#39; instead.
#define I(x, y, z)
Definition: MD5.cpp:58
#define N
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:323
bool isNON_EXTLoad(const SDNode *N)
Returns true if the specified node is a non-extending load.
unsigned getOpcode() const
SDValue getValue(unsigned R) const
constexpr bool isUInt< 16 >(uint64_t x)
Definition: MathExtras.h:338
static bool hasNoSignedComparisonUses(SDNode *N)
Test whether the given X86ISD::CMP node has any uses which require the SF or OF bits to be accurate...
CopyFromReg - This node indicates that the input value is a virtual or physical register that is defi...
Definition: ISDOpcodes.h:175
SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
const MachinePointerInfo & getPointerInfo() const
This class is used to form a handle around another node that is persistent and is updated across invo...
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
unsigned getReg() const
unsigned getMinSignedBits() const
Get the minimum bit size for this signed APInt.
Definition: APInt.h:1531
uint64_t getConstantOperandVal(unsigned Num) const
Helper method returns the integer value of a ConstantSDNode operand.
unsigned getResNo() const
get the index which selects a specific result in the SDNode
Dynamic (non-constant condition) vector blend where only the sign bits of the condition elements are ...
unsigned getMachineOpcode() const
This may only be called if isMachineOpcode returns true.
std::underlying_type< E >::type Mask()
Get a bitmask with 1s in all places up to the high-order bit of E&#39;s largest value.
Definition: BitmaskEnum.h:81
bool isNonTemporal() const
#define DEBUG(X)
Definition: Debug.h:118
static bool hasSingleUsesFromRoot(SDNode *Root, SDNode *N)
IRTranslator LLVM IR MI
bool isZeroNode(SDValue Elt)
Returns true if Elt is a constant zero or floating point constant +0.0.
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:49
unsigned countLeadingZeros() const
The APInt version of the countLeadingZeros functions in MathExtras.h.
Definition: APInt.h:1575
bool isOffsetSuitableForCodeModel(int64_t Offset, CodeModel::Model M, bool hasSymbolicDisplacement=true)
Returns true of the given offset can be fit into displacement field of the instruction.
unsigned getNumOperands() const
const SDValue & getOperand(unsigned i) const
uint64_t getZExtValue() const
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:452
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation...
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition: ISDOpcodes.h:351
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
static void moveBelowOrigChain(SelectionDAG *CurDAG, SDValue Load, SDValue Call, SDValue OrigChain)
Replace the original chain operand of the call with load&#39;s chain operand and move load below the call...
Carry-using nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:242
Special wrapper used under X86-64 PIC mode for RIP relative displacements.
BRIND - Indirect branch.
Definition: ISDOpcodes.h:602
This class is used to represent ISD::LOAD nodes.
static int getUninvalidatedNodeId(SDNode *N)