LLVM  4.0.0
X86ISelDAGToDAG.cpp
Go to the documentation of this file.
1 //===- X86ISelDAGToDAG.cpp - A DAG pattern matching inst selector for X86 -===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file defines a DAG pattern matching instruction selector for X86,
11 // converting from a legalized dag to a X86 dag.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "X86.h"
16 #include "X86InstrBuilder.h"
17 #include "X86MachineFunctionInfo.h"
18 #include "X86RegisterInfo.h"
19 #include "X86Subtarget.h"
20 #include "X86TargetMachine.h"
21 #include "llvm/ADT/Statistic.h"
27 #include "llvm/IR/ConstantRange.h"
28 #include "llvm/IR/Function.h"
29 #include "llvm/IR/Instructions.h"
30 #include "llvm/IR/Intrinsics.h"
31 #include "llvm/IR/Type.h"
32 #include "llvm/Support/Debug.h"
38 #include <stdint.h>
39 using namespace llvm;
40 
41 #define DEBUG_TYPE "x86-isel"
42 
43 STATISTIC(NumLoadMoved, "Number of loads moved below TokenFactor");
44 
45 //===----------------------------------------------------------------------===//
46 // Pattern Matcher Implementation
47 //===----------------------------------------------------------------------===//
48 
49 namespace {
50  /// This corresponds to X86AddressMode, but uses SDValue's instead of register
51  /// numbers for the leaves of the matched tree.
52  struct X86ISelAddressMode {
53  enum {
54  RegBase,
55  FrameIndexBase
56  } BaseType;
57 
58  // This is really a union, discriminated by BaseType!
59  SDValue Base_Reg;
60  int Base_FrameIndex;
61 
62  unsigned Scale;
63  SDValue IndexReg;
64  int32_t Disp;
65  SDValue Segment;
66  const GlobalValue *GV;
67  const Constant *CP;
68  const BlockAddress *BlockAddr;
69  const char *ES;
70  MCSymbol *MCSym;
71  int JT;
72  unsigned Align; // CP alignment.
73  unsigned char SymbolFlags; // X86II::MO_*
74 
75  X86ISelAddressMode()
76  : BaseType(RegBase), Base_FrameIndex(0), Scale(1), IndexReg(), Disp(0),
77  Segment(), GV(nullptr), CP(nullptr), BlockAddr(nullptr), ES(nullptr),
78  MCSym(nullptr), JT(-1), Align(0), SymbolFlags(X86II::MO_NO_FLAG) {}
79 
80  bool hasSymbolicDisplacement() const {
81  return GV != nullptr || CP != nullptr || ES != nullptr ||
82  MCSym != nullptr || JT != -1 || BlockAddr != nullptr;
83  }
84 
85  bool hasBaseOrIndexReg() const {
86  return BaseType == FrameIndexBase ||
87  IndexReg.getNode() != nullptr || Base_Reg.getNode() != nullptr;
88  }
89 
90  /// Return true if this addressing mode is already RIP-relative.
91  bool isRIPRelative() const {
92  if (BaseType != RegBase) return false;
93  if (RegisterSDNode *RegNode =
94  dyn_cast_or_null<RegisterSDNode>(Base_Reg.getNode()))
95  return RegNode->getReg() == X86::RIP;
96  return false;
97  }
98 
99  void setBaseReg(SDValue Reg) {
100  BaseType = RegBase;
101  Base_Reg = Reg;
102  }
103 
104 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
105  void dump() {
106  dbgs() << "X86ISelAddressMode " << this << '\n';
107  dbgs() << "Base_Reg ";
108  if (Base_Reg.getNode())
109  Base_Reg.getNode()->dump();
110  else
111  dbgs() << "nul";
112  dbgs() << " Base.FrameIndex " << Base_FrameIndex << '\n'
113  << " Scale" << Scale << '\n'
114  << "IndexReg ";
115  if (IndexReg.getNode())
116  IndexReg.getNode()->dump();
117  else
118  dbgs() << "nul";
119  dbgs() << " Disp " << Disp << '\n'
120  << "GV ";
121  if (GV)
122  GV->dump();
123  else
124  dbgs() << "nul";
125  dbgs() << " CP ";
126  if (CP)
127  CP->dump();
128  else
129  dbgs() << "nul";
130  dbgs() << '\n'
131  << "ES ";
132  if (ES)
133  dbgs() << ES;
134  else
135  dbgs() << "nul";
136  dbgs() << " MCSym ";
137  if (MCSym)
138  dbgs() << MCSym;
139  else
140  dbgs() << "nul";
141  dbgs() << " JT" << JT << " Align" << Align << '\n';
142  }
143 #endif
144  };
145 }
146 
147 namespace {
148  //===--------------------------------------------------------------------===//
149  /// ISel - X86-specific code to select X86 machine instructions for
150  /// SelectionDAG operations.
151  ///
152  class X86DAGToDAGISel final : public SelectionDAGISel {
153  /// Keep a pointer to the X86Subtarget around so that we can
154  /// make the right decision when generating code for different targets.
155  const X86Subtarget *Subtarget;
156 
157  /// If true, selector should try to optimize for code size instead of
158  /// performance.
159  bool OptForSize;
160 
161  /// If true, selector should try to optimize for minimum code size.
162  bool OptForMinSize;
163 
164  public:
165  explicit X86DAGToDAGISel(X86TargetMachine &tm, CodeGenOpt::Level OptLevel)
166  : SelectionDAGISel(tm, OptLevel), OptForSize(false),
167  OptForMinSize(false) {}
168 
169  StringRef getPassName() const override {
170  return "X86 DAG->DAG Instruction Selection";
171  }
172 
173  bool runOnMachineFunction(MachineFunction &MF) override {
174  // Reset the subtarget each time through.
175  Subtarget = &MF.getSubtarget<X86Subtarget>();
177  return true;
178  }
179 
180  void EmitFunctionEntryCode() override;
181 
182  bool IsProfitableToFold(SDValue N, SDNode *U, SDNode *Root) const override;
183 
184  void PreprocessISelDAG() override;
185 
186 // Include the pieces autogenerated from the target description.
187 #include "X86GenDAGISel.inc"
188 
189  private:
190  void Select(SDNode *N) override;
191  bool tryGather(SDNode *N, unsigned Opc);
192 
193  bool foldOffsetIntoAddress(uint64_t Offset, X86ISelAddressMode &AM);
194  bool matchLoadInAddress(LoadSDNode *N, X86ISelAddressMode &AM);
195  bool matchWrapper(SDValue N, X86ISelAddressMode &AM);
196  bool matchAddress(SDValue N, X86ISelAddressMode &AM);
197  bool matchAdd(SDValue N, X86ISelAddressMode &AM, unsigned Depth);
198  bool matchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
199  unsigned Depth);
200  bool matchAddressBase(SDValue N, X86ISelAddressMode &AM);
201  bool selectAddr(SDNode *Parent, SDValue N, SDValue &Base,
202  SDValue &Scale, SDValue &Index, SDValue &Disp,
203  SDValue &Segment);
204  bool selectVectorAddr(SDNode *Parent, SDValue N, SDValue &Base,
205  SDValue &Scale, SDValue &Index, SDValue &Disp,
206  SDValue &Segment);
207  bool selectMOV64Imm32(SDValue N, SDValue &Imm);
208  bool selectLEAAddr(SDValue N, SDValue &Base,
209  SDValue &Scale, SDValue &Index, SDValue &Disp,
210  SDValue &Segment);
211  bool selectLEA64_32Addr(SDValue N, SDValue &Base,
212  SDValue &Scale, SDValue &Index, SDValue &Disp,
213  SDValue &Segment);
214  bool selectTLSADDRAddr(SDValue N, SDValue &Base,
215  SDValue &Scale, SDValue &Index, SDValue &Disp,
216  SDValue &Segment);
217  bool selectScalarSSELoad(SDNode *Root, SDValue N,
218  SDValue &Base, SDValue &Scale,
219  SDValue &Index, SDValue &Disp,
220  SDValue &Segment,
221  SDValue &NodeWithChain);
222  bool selectRelocImm(SDValue N, SDValue &Op);
223 
224  bool tryFoldLoad(SDNode *P, SDValue N,
225  SDValue &Base, SDValue &Scale,
226  SDValue &Index, SDValue &Disp,
227  SDValue &Segment);
228 
229  /// Implement addressing mode selection for inline asm expressions.
230  bool SelectInlineAsmMemoryOperand(const SDValue &Op,
231  unsigned ConstraintID,
232  std::vector<SDValue> &OutOps) override;
233 
234  void emitSpecialCodeForMain();
235 
236  inline void getAddressOperands(X86ISelAddressMode &AM, const SDLoc &DL,
237  SDValue &Base, SDValue &Scale,
238  SDValue &Index, SDValue &Disp,
239  SDValue &Segment) {
240  Base = (AM.BaseType == X86ISelAddressMode::FrameIndexBase)
241  ? CurDAG->getTargetFrameIndex(
242  AM.Base_FrameIndex,
243  TLI->getPointerTy(CurDAG->getDataLayout()))
244  : AM.Base_Reg;
245  Scale = getI8Imm(AM.Scale, DL);
246  Index = AM.IndexReg;
247  // These are 32-bit even in 64-bit mode since RIP-relative offset
248  // is 32-bit.
249  if (AM.GV)
250  Disp = CurDAG->getTargetGlobalAddress(AM.GV, SDLoc(),
251  MVT::i32, AM.Disp,
252  AM.SymbolFlags);
253  else if (AM.CP)
254  Disp = CurDAG->getTargetConstantPool(AM.CP, MVT::i32,
255  AM.Align, AM.Disp, AM.SymbolFlags);
256  else if (AM.ES) {
257  assert(!AM.Disp && "Non-zero displacement is ignored with ES.");
258  Disp = CurDAG->getTargetExternalSymbol(AM.ES, MVT::i32, AM.SymbolFlags);
259  } else if (AM.MCSym) {
260  assert(!AM.Disp && "Non-zero displacement is ignored with MCSym.");
261  assert(AM.SymbolFlags == 0 && "oo");
262  Disp = CurDAG->getMCSymbol(AM.MCSym, MVT::i32);
263  } else if (AM.JT != -1) {
264  assert(!AM.Disp && "Non-zero displacement is ignored with JT.");
265  Disp = CurDAG->getTargetJumpTable(AM.JT, MVT::i32, AM.SymbolFlags);
266  } else if (AM.BlockAddr)
267  Disp = CurDAG->getTargetBlockAddress(AM.BlockAddr, MVT::i32, AM.Disp,
268  AM.SymbolFlags);
269  else
270  Disp = CurDAG->getTargetConstant(AM.Disp, DL, MVT::i32);
271 
272  if (AM.Segment.getNode())
273  Segment = AM.Segment;
274  else
275  Segment = CurDAG->getRegister(0, MVT::i32);
276  }
277 
278  // Utility function to determine whether we should avoid selecting
279  // immediate forms of instructions for better code size or not.
280  // At a high level, we'd like to avoid such instructions when
281  // we have similar constants used within the same basic block
282  // that can be kept in a register.
283  //
284  bool shouldAvoidImmediateInstFormsForSize(SDNode *N) const {
285  uint32_t UseCount = 0;
286 
287  // Do not want to hoist if we're not optimizing for size.
288  // TODO: We'd like to remove this restriction.
289  // See the comment in X86InstrInfo.td for more info.
290  if (!OptForSize)
291  return false;
292 
293  // Walk all the users of the immediate.
294  for (SDNode::use_iterator UI = N->use_begin(),
295  UE = N->use_end(); (UI != UE) && (UseCount < 2); ++UI) {
296 
297  SDNode *User = *UI;
298 
299  // This user is already selected. Count it as a legitimate use and
300  // move on.
301  if (User->isMachineOpcode()) {
302  UseCount++;
303  continue;
304  }
305 
306  // We want to count stores of immediates as real uses.
307  if (User->getOpcode() == ISD::STORE &&
308  User->getOperand(1).getNode() == N) {
309  UseCount++;
310  continue;
311  }
312 
313  // We don't currently match users that have > 2 operands (except
314  // for stores, which are handled above)
315  // Those instruction won't match in ISEL, for now, and would
316  // be counted incorrectly.
317  // This may change in the future as we add additional instruction
318  // types.
319  if (User->getNumOperands() != 2)
320  continue;
321 
322  // Immediates that are used for offsets as part of stack
323  // manipulation should be left alone. These are typically
324  // used to indicate SP offsets for argument passing and
325  // will get pulled into stores/pushes (implicitly).
326  if (User->getOpcode() == X86ISD::ADD ||
327  User->getOpcode() == ISD::ADD ||
328  User->getOpcode() == X86ISD::SUB ||
329  User->getOpcode() == ISD::SUB) {
330 
331  // Find the other operand of the add/sub.
332  SDValue OtherOp = User->getOperand(0);
333  if (OtherOp.getNode() == N)
334  OtherOp = User->getOperand(1);
335 
336  // Don't count if the other operand is SP.
337  RegisterSDNode *RegNode;
338  if (OtherOp->getOpcode() == ISD::CopyFromReg &&
339  (RegNode = dyn_cast_or_null<RegisterSDNode>(
340  OtherOp->getOperand(1).getNode())))
341  if ((RegNode->getReg() == X86::ESP) ||
342  (RegNode->getReg() == X86::RSP))
343  continue;
344  }
345 
346  // ... otherwise, count this and move on.
347  UseCount++;
348  }
349 
350  // If we have more than 1 use, then recommend for hoisting.
351  return (UseCount > 1);
352  }
353 
354  /// Return a target constant with the specified value of type i8.
355  inline SDValue getI8Imm(unsigned Imm, const SDLoc &DL) {
356  return CurDAG->getTargetConstant(Imm, DL, MVT::i8);
357  }
358 
359  /// Return a target constant with the specified value, of type i32.
360  inline SDValue getI32Imm(unsigned Imm, const SDLoc &DL) {
361  return CurDAG->getTargetConstant(Imm, DL, MVT::i32);
362  }
363 
364  /// Return an SDNode that returns the value of the global base register.
365  /// Output instructions required to initialize the global base register,
366  /// if necessary.
367  SDNode *getGlobalBaseReg();
368 
369  /// Return a reference to the TargetMachine, casted to the target-specific
370  /// type.
371  const X86TargetMachine &getTargetMachine() const {
372  return static_cast<const X86TargetMachine &>(TM);
373  }
374 
375  /// Return a reference to the TargetInstrInfo, casted to the target-specific
376  /// type.
377  const X86InstrInfo *getInstrInfo() const {
378  return Subtarget->getInstrInfo();
379  }
380 
381  /// \brief Address-mode matching performs shift-of-and to and-of-shift
382  /// reassociation in order to expose more scaled addressing
383  /// opportunities.
384  bool ComplexPatternFuncMutatesDAG() const override {
385  return true;
386  }
387  };
388 }
389 
390 
391 bool
392 X86DAGToDAGISel::IsProfitableToFold(SDValue N, SDNode *U, SDNode *Root) const {
393  if (OptLevel == CodeGenOpt::None) return false;
394 
395  if (!N.hasOneUse())
396  return false;
397 
398  if (N.getOpcode() != ISD::LOAD)
399  return true;
400 
401  // If N is a load, do additional profitability checks.
402  if (U == Root) {
403  switch (U->getOpcode()) {
404  default: break;
405  case X86ISD::ADD:
406  case X86ISD::SUB:
407  case X86ISD::AND:
408  case X86ISD::XOR:
409  case X86ISD::OR:
410  case ISD::ADD:
411  case ISD::ADDC:
412  case ISD::ADDE:
413  case ISD::AND:
414  case ISD::OR:
415  case ISD::XOR: {
416  SDValue Op1 = U->getOperand(1);
417 
418  // If the other operand is a 8-bit immediate we should fold the immediate
419  // instead. This reduces code size.
420  // e.g.
421  // movl 4(%esp), %eax
422  // addl $4, %eax
423  // vs.
424  // movl $4, %eax
425  // addl 4(%esp), %eax
426  // The former is 2 bytes shorter. In case where the increment is 1, then
427  // the saving can be 4 bytes (by using incl %eax).
428  if (ConstantSDNode *Imm = dyn_cast<ConstantSDNode>(Op1))
429  if (Imm->getAPIntValue().isSignedIntN(8))
430  return false;
431 
432  // If the other operand is a TLS address, we should fold it instead.
433  // This produces
434  // movl %gs:0, %eax
435  // leal i@NTPOFF(%eax), %eax
436  // instead of
437  // movl $i@NTPOFF, %eax
438  // addl %gs:0, %eax
439  // if the block also has an access to a second TLS address this will save
440  // a load.
441  // FIXME: This is probably also true for non-TLS addresses.
442  if (Op1.getOpcode() == X86ISD::Wrapper) {
443  SDValue Val = Op1.getOperand(0);
445  return false;
446  }
447  }
448  }
449  }
450 
451  return true;
452 }
453 
454 /// Replace the original chain operand of the call with
455 /// load's chain operand and move load below the call's chain operand.
457  SDValue Call, SDValue OrigChain) {
459  SDValue Chain = OrigChain.getOperand(0);
460  if (Chain.getNode() == Load.getNode())
461  Ops.push_back(Load.getOperand(0));
462  else {
463  assert(Chain.getOpcode() == ISD::TokenFactor &&
464  "Unexpected chain operand");
465  for (unsigned i = 0, e = Chain.getNumOperands(); i != e; ++i)
466  if (Chain.getOperand(i).getNode() == Load.getNode())
467  Ops.push_back(Load.getOperand(0));
468  else
469  Ops.push_back(Chain.getOperand(i));
470  SDValue NewChain =
471  CurDAG->getNode(ISD::TokenFactor, SDLoc(Load), MVT::Other, Ops);
472  Ops.clear();
473  Ops.push_back(NewChain);
474  }
475  Ops.append(OrigChain->op_begin() + 1, OrigChain->op_end());
476  CurDAG->UpdateNodeOperands(OrigChain.getNode(), Ops);
477  CurDAG->UpdateNodeOperands(Load.getNode(), Call.getOperand(0),
478  Load.getOperand(1), Load.getOperand(2));
479 
480  Ops.clear();
481  Ops.push_back(SDValue(Load.getNode(), 1));
482  Ops.append(Call->op_begin() + 1, Call->op_end());
483  CurDAG->UpdateNodeOperands(Call.getNode(), Ops);
484 }
485 
486 /// Return true if call address is a load and it can be
487 /// moved below CALLSEQ_START and the chains leading up to the call.
488 /// Return the CALLSEQ_START by reference as a second output.
489 /// In the case of a tail call, there isn't a callseq node between the call
490 /// chain and the load.
491 static bool isCalleeLoad(SDValue Callee, SDValue &Chain, bool HasCallSeq) {
492  // The transformation is somewhat dangerous if the call's chain was glued to
493  // the call. After MoveBelowOrigChain the load is moved between the call and
494  // the chain, this can create a cycle if the load is not folded. So it is
495  // *really* important that we are sure the load will be folded.
496  if (Callee.getNode() == Chain.getNode() || !Callee.hasOneUse())
497  return false;
498  LoadSDNode *LD = dyn_cast<LoadSDNode>(Callee.getNode());
499  if (!LD ||
500  LD->isVolatile() ||
503  return false;
504 
505  // Now let's find the callseq_start.
506  while (HasCallSeq && Chain.getOpcode() != ISD::CALLSEQ_START) {
507  if (!Chain.hasOneUse())
508  return false;
509  Chain = Chain.getOperand(0);
510  }
511 
512  if (!Chain.getNumOperands())
513  return false;
514  // Since we are not checking for AA here, conservatively abort if the chain
515  // writes to memory. It's not safe to move the callee (a load) across a store.
516  if (isa<MemSDNode>(Chain.getNode()) &&
517  cast<MemSDNode>(Chain.getNode())->writeMem())
518  return false;
519  if (Chain.getOperand(0).getNode() == Callee.getNode())
520  return true;
521  if (Chain.getOperand(0).getOpcode() == ISD::TokenFactor &&
522  Callee.getValue(1).isOperandOf(Chain.getOperand(0).getNode()) &&
523  Callee.getValue(1).hasOneUse())
524  return true;
525  return false;
526 }
527 
528 void X86DAGToDAGISel::PreprocessISelDAG() {
529  // OptFor[Min]Size are used in pattern predicates that isel is matching.
530  OptForSize = MF->getFunction()->optForSize();
531  OptForMinSize = MF->getFunction()->optForMinSize();
532  assert((!OptForMinSize || OptForSize) && "OptForMinSize implies OptForSize");
533 
534  for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(),
535  E = CurDAG->allnodes_end(); I != E; ) {
536  SDNode *N = &*I++; // Preincrement iterator to avoid invalidation issues.
537 
538  if (OptLevel != CodeGenOpt::None &&
539  // Only does this when target favors doesn't favor register indirect
540  // call.
541  ((N->getOpcode() == X86ISD::CALL && !Subtarget->callRegIndirect()) ||
542  (N->getOpcode() == X86ISD::TC_RETURN &&
543  // Only does this if load can be folded into TC_RETURN.
544  (Subtarget->is64Bit() ||
545  !getTargetMachine().isPositionIndependent())))) {
546  /// Also try moving call address load from outside callseq_start to just
547  /// before the call to allow it to be folded.
548  ///
549  /// [Load chain]
550  /// ^
551  /// |
552  /// [Load]
553  /// ^ ^
554  /// | |
555  /// / \--
556  /// / |
557  ///[CALLSEQ_START] |
558  /// ^ |
559  /// | |
560  /// [LOAD/C2Reg] |
561  /// | |
562  /// \ /
563  /// \ /
564  /// [CALL]
565  bool HasCallSeq = N->getOpcode() == X86ISD::CALL;
566  SDValue Chain = N->getOperand(0);
567  SDValue Load = N->getOperand(1);
568  if (!isCalleeLoad(Load, Chain, HasCallSeq))
569  continue;
570  moveBelowOrigChain(CurDAG, Load, SDValue(N, 0), Chain);
571  ++NumLoadMoved;
572  continue;
573  }
574 
575  // Lower fpround and fpextend nodes that target the FP stack to be store and
576  // load to the stack. This is a gross hack. We would like to simply mark
577  // these as being illegal, but when we do that, legalize produces these when
578  // it expands calls, then expands these in the same legalize pass. We would
579  // like dag combine to be able to hack on these between the call expansion
580  // and the node legalization. As such this pass basically does "really
581  // late" legalization of these inline with the X86 isel pass.
582  // FIXME: This should only happen when not compiled with -O0.
583  if (N->getOpcode() != ISD::FP_ROUND && N->getOpcode() != ISD::FP_EXTEND)
584  continue;
585 
586  MVT SrcVT = N->getOperand(0).getSimpleValueType();
587  MVT DstVT = N->getSimpleValueType(0);
588 
589  // If any of the sources are vectors, no fp stack involved.
590  if (SrcVT.isVector() || DstVT.isVector())
591  continue;
592 
593  // If the source and destination are SSE registers, then this is a legal
594  // conversion that should not be lowered.
595  const X86TargetLowering *X86Lowering =
596  static_cast<const X86TargetLowering *>(TLI);
597  bool SrcIsSSE = X86Lowering->isScalarFPTypeInSSEReg(SrcVT);
598  bool DstIsSSE = X86Lowering->isScalarFPTypeInSSEReg(DstVT);
599  if (SrcIsSSE && DstIsSSE)
600  continue;
601 
602  if (!SrcIsSSE && !DstIsSSE) {
603  // If this is an FPStack extension, it is a noop.
604  if (N->getOpcode() == ISD::FP_EXTEND)
605  continue;
606  // If this is a value-preserving FPStack truncation, it is a noop.
607  if (N->getConstantOperandVal(1))
608  continue;
609  }
610 
611  // Here we could have an FP stack truncation or an FPStack <-> SSE convert.
612  // FPStack has extload and truncstore. SSE can fold direct loads into other
613  // operations. Based on this, decide what we want to do.
614  MVT MemVT;
615  if (N->getOpcode() == ISD::FP_ROUND)
616  MemVT = DstVT; // FP_ROUND must use DstVT, we can't do a 'trunc load'.
617  else
618  MemVT = SrcIsSSE ? SrcVT : DstVT;
619 
620  SDValue MemTmp = CurDAG->CreateStackTemporary(MemVT);
621  SDLoc dl(N);
622 
623  // FIXME: optimize the case where the src/dest is a load or store?
624  SDValue Store =
625  CurDAG->getTruncStore(CurDAG->getEntryNode(), dl, N->getOperand(0),
626  MemTmp, MachinePointerInfo(), MemVT);
627  SDValue Result = CurDAG->getExtLoad(ISD::EXTLOAD, dl, DstVT, Store, MemTmp,
628  MachinePointerInfo(), MemVT);
629 
630  // We're about to replace all uses of the FP_ROUND/FP_EXTEND with the
631  // extload we created. This will cause general havok on the dag because
632  // anything below the conversion could be folded into other existing nodes.
633  // To avoid invalidating 'I', back it up to the convert node.
634  --I;
635  CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
636 
637  // Now that we did that, the node is dead. Increment the iterator to the
638  // next node to process, then delete N.
639  ++I;
640  CurDAG->DeleteNode(N);
641  }
642 }
643 
644 
645 /// Emit any code that needs to be executed only in the main function.
646 void X86DAGToDAGISel::emitSpecialCodeForMain() {
647  if (Subtarget->isTargetCygMing()) {
649  auto &DL = CurDAG->getDataLayout();
650 
652  CLI.setChain(CurDAG->getRoot())
653  .setCallee(CallingConv::C, Type::getVoidTy(*CurDAG->getContext()),
654  CurDAG->getExternalSymbol("__main", TLI->getPointerTy(DL)),
655  std::move(Args));
656  const TargetLowering &TLI = CurDAG->getTargetLoweringInfo();
657  std::pair<SDValue, SDValue> Result = TLI.LowerCallTo(CLI);
658  CurDAG->setRoot(Result.second);
659  }
660 }
661 
662 void X86DAGToDAGISel::EmitFunctionEntryCode() {
663  // If this is main, emit special code for main.
664  if (const Function *Fn = MF->getFunction())
665  if (Fn->hasExternalLinkage() && Fn->getName() == "main")
666  emitSpecialCodeForMain();
667 }
668 
669 static bool isDispSafeForFrameIndex(int64_t Val) {
670  // On 64-bit platforms, we can run into an issue where a frame index
671  // includes a displacement that, when added to the explicit displacement,
672  // will overflow the displacement field. Assuming that the frame index
673  // displacement fits into a 31-bit integer (which is only slightly more
674  // aggressive than the current fundamental assumption that it fits into
675  // a 32-bit integer), a 31-bit disp should always be safe.
676  return isInt<31>(Val);
677 }
678 
679 bool X86DAGToDAGISel::foldOffsetIntoAddress(uint64_t Offset,
680  X86ISelAddressMode &AM) {
681  // Cannot combine ExternalSymbol displacements with integer offsets.
682  if (Offset != 0 && (AM.ES || AM.MCSym))
683  return true;
684  int64_t Val = AM.Disp + Offset;
685  CodeModel::Model M = TM.getCodeModel();
686  if (Subtarget->is64Bit()) {
688  AM.hasSymbolicDisplacement()))
689  return true;
690  // In addition to the checks required for a register base, check that
691  // we do not try to use an unsafe Disp with a frame index.
692  if (AM.BaseType == X86ISelAddressMode::FrameIndexBase &&
694  return true;
695  }
696  AM.Disp = Val;
697  return false;
698 
699 }
700 
701 bool X86DAGToDAGISel::matchLoadInAddress(LoadSDNode *N, X86ISelAddressMode &AM){
702  SDValue Address = N->getOperand(1);
703 
704  // load gs:0 -> GS segment register.
705  // load fs:0 -> FS segment register.
706  //
707  // This optimization is valid because the GNU TLS model defines that
708  // gs:0 (or fs:0 on X86-64) contains its own address.
709  // For more information see http://people.redhat.com/drepper/tls.pdf
710  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Address))
711  if (C->getSExtValue() == 0 && AM.Segment.getNode() == nullptr &&
712  Subtarget->isTargetGlibc())
713  switch (N->getPointerInfo().getAddrSpace()) {
714  case 256:
715  AM.Segment = CurDAG->getRegister(X86::GS, MVT::i16);
716  return false;
717  case 257:
718  AM.Segment = CurDAG->getRegister(X86::FS, MVT::i16);
719  return false;
720  // Address space 258 is not handled here, because it is not used to
721  // address TLS areas.
722  }
723 
724  return true;
725 }
726 
727 /// Try to match X86ISD::Wrapper and X86ISD::WrapperRIP nodes into an addressing
728 /// mode. These wrap things that will resolve down into a symbol reference.
729 /// If no match is possible, this returns true, otherwise it returns false.
730 bool X86DAGToDAGISel::matchWrapper(SDValue N, X86ISelAddressMode &AM) {
731  // If the addressing mode already has a symbol as the displacement, we can
732  // never match another symbol.
733  if (AM.hasSymbolicDisplacement())
734  return true;
735 
736  SDValue N0 = N.getOperand(0);
737  CodeModel::Model M = TM.getCodeModel();
738 
739  // Handle X86-64 rip-relative addresses. We check this before checking direct
740  // folding because RIP is preferable to non-RIP accesses.
741  if (Subtarget->is64Bit() && N.getOpcode() == X86ISD::WrapperRIP &&
742  // Under X86-64 non-small code model, GV (and friends) are 64-bits, so
743  // they cannot be folded into immediate fields.
744  // FIXME: This can be improved for kernel and other models?
745  (M == CodeModel::Small || M == CodeModel::Kernel)) {
746  // Base and index reg must be 0 in order to use %rip as base.
747  if (AM.hasBaseOrIndexReg())
748  return true;
749  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(N0)) {
750  X86ISelAddressMode Backup = AM;
751  AM.GV = G->getGlobal();
752  AM.SymbolFlags = G->getTargetFlags();
753  if (foldOffsetIntoAddress(G->getOffset(), AM)) {
754  AM = Backup;
755  return true;
756  }
757  } else if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(N0)) {
758  X86ISelAddressMode Backup = AM;
759  AM.CP = CP->getConstVal();
760  AM.Align = CP->getAlignment();
761  AM.SymbolFlags = CP->getTargetFlags();
762  if (foldOffsetIntoAddress(CP->getOffset(), AM)) {
763  AM = Backup;
764  return true;
765  }
766  } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(N0)) {
767  AM.ES = S->getSymbol();
768  AM.SymbolFlags = S->getTargetFlags();
769  } else if (auto *S = dyn_cast<MCSymbolSDNode>(N0)) {
770  AM.MCSym = S->getMCSymbol();
771  } else if (JumpTableSDNode *J = dyn_cast<JumpTableSDNode>(N0)) {
772  AM.JT = J->getIndex();
773  AM.SymbolFlags = J->getTargetFlags();
774  } else if (BlockAddressSDNode *BA = dyn_cast<BlockAddressSDNode>(N0)) {
775  X86ISelAddressMode Backup = AM;
776  AM.BlockAddr = BA->getBlockAddress();
777  AM.SymbolFlags = BA->getTargetFlags();
778  if (foldOffsetIntoAddress(BA->getOffset(), AM)) {
779  AM = Backup;
780  return true;
781  }
782  } else
783  llvm_unreachable("Unhandled symbol reference node.");
784 
785  if (N.getOpcode() == X86ISD::WrapperRIP)
786  AM.setBaseReg(CurDAG->getRegister(X86::RIP, MVT::i64));
787  return false;
788  }
789 
790  // Handle the case when globals fit in our immediate field: This is true for
791  // X86-32 always and X86-64 when in -mcmodel=small mode. In 64-bit
792  // mode, this only applies to a non-RIP-relative computation.
793  if (!Subtarget->is64Bit() ||
794  M == CodeModel::Small || M == CodeModel::Kernel) {
796  "RIP-relative addressing already handled");
797  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(N0)) {
798  AM.GV = G->getGlobal();
799  AM.Disp += G->getOffset();
800  AM.SymbolFlags = G->getTargetFlags();
801  } else if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(N0)) {
802  AM.CP = CP->getConstVal();
803  AM.Align = CP->getAlignment();
804  AM.Disp += CP->getOffset();
805  AM.SymbolFlags = CP->getTargetFlags();
806  } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(N0)) {
807  AM.ES = S->getSymbol();
808  AM.SymbolFlags = S->getTargetFlags();
809  } else if (auto *S = dyn_cast<MCSymbolSDNode>(N0)) {
810  AM.MCSym = S->getMCSymbol();
811  } else if (JumpTableSDNode *J = dyn_cast<JumpTableSDNode>(N0)) {
812  AM.JT = J->getIndex();
813  AM.SymbolFlags = J->getTargetFlags();
814  } else if (BlockAddressSDNode *BA = dyn_cast<BlockAddressSDNode>(N0)) {
815  AM.BlockAddr = BA->getBlockAddress();
816  AM.Disp += BA->getOffset();
817  AM.SymbolFlags = BA->getTargetFlags();
818  } else
819  llvm_unreachable("Unhandled symbol reference node.");
820  return false;
821  }
822 
823  return true;
824 }
825 
826 /// Add the specified node to the specified addressing mode, returning true if
827 /// it cannot be done. This just pattern matches for the addressing mode.
828 bool X86DAGToDAGISel::matchAddress(SDValue N, X86ISelAddressMode &AM) {
829  if (matchAddressRecursively(N, AM, 0))
830  return true;
831 
832  // Post-processing: Convert lea(,%reg,2) to lea(%reg,%reg), which has
833  // a smaller encoding and avoids a scaled-index.
834  if (AM.Scale == 2 &&
835  AM.BaseType == X86ISelAddressMode::RegBase &&
836  AM.Base_Reg.getNode() == nullptr) {
837  AM.Base_Reg = AM.IndexReg;
838  AM.Scale = 1;
839  }
840 
841  // Post-processing: Convert foo to foo(%rip), even in non-PIC mode,
842  // because it has a smaller encoding.
843  // TODO: Which other code models can use this?
844  if (TM.getCodeModel() == CodeModel::Small &&
845  Subtarget->is64Bit() &&
846  AM.Scale == 1 &&
847  AM.BaseType == X86ISelAddressMode::RegBase &&
848  AM.Base_Reg.getNode() == nullptr &&
849  AM.IndexReg.getNode() == nullptr &&
850  AM.SymbolFlags == X86II::MO_NO_FLAG &&
851  AM.hasSymbolicDisplacement())
852  AM.Base_Reg = CurDAG->getRegister(X86::RIP, MVT::i64);
853 
854  return false;
855 }
856 
857 bool X86DAGToDAGISel::matchAdd(SDValue N, X86ISelAddressMode &AM,
858  unsigned Depth) {
859  // Add an artificial use to this node so that we can keep track of
860  // it if it gets CSE'd with a different node.
861  HandleSDNode Handle(N);
862 
863  X86ISelAddressMode Backup = AM;
864  if (!matchAddressRecursively(N.getOperand(0), AM, Depth+1) &&
865  !matchAddressRecursively(Handle.getValue().getOperand(1), AM, Depth+1))
866  return false;
867  AM = Backup;
868 
869  // Try again after commuting the operands.
870  if (!matchAddressRecursively(Handle.getValue().getOperand(1), AM, Depth+1) &&
871  !matchAddressRecursively(Handle.getValue().getOperand(0), AM, Depth+1))
872  return false;
873  AM = Backup;
874 
875  // If we couldn't fold both operands into the address at the same time,
876  // see if we can just put each operand into a register and fold at least
877  // the add.
878  if (AM.BaseType == X86ISelAddressMode::RegBase &&
879  !AM.Base_Reg.getNode() &&
880  !AM.IndexReg.getNode()) {
881  N = Handle.getValue();
882  AM.Base_Reg = N.getOperand(0);
883  AM.IndexReg = N.getOperand(1);
884  AM.Scale = 1;
885  return false;
886  }
887  N = Handle.getValue();
888  return true;
889 }
890 
891 // Insert a node into the DAG at least before the Pos node's position. This
892 // will reposition the node as needed, and will assign it a node ID that is <=
893 // the Pos node's ID. Note that this does *not* preserve the uniqueness of node
894 // IDs! The selection DAG must no longer depend on their uniqueness when this
895 // is used.
896 static void insertDAGNode(SelectionDAG &DAG, SDValue Pos, SDValue N) {
897  if (N.getNode()->getNodeId() == -1 ||
898  N.getNode()->getNodeId() > Pos.getNode()->getNodeId()) {
899  DAG.RepositionNode(Pos.getNode()->getIterator(), N.getNode());
900  N.getNode()->setNodeId(Pos.getNode()->getNodeId());
901  }
902 }
903 
904 // Transform "(X >> (8-C1)) & (0xff << C1)" to "((X >> 8) & 0xff) << C1" if
905 // safe. This allows us to convert the shift and and into an h-register
906 // extract and a scaled index. Returns false if the simplification is
907 // performed.
909  uint64_t Mask,
910  SDValue Shift, SDValue X,
911  X86ISelAddressMode &AM) {
912  if (Shift.getOpcode() != ISD::SRL ||
913  !isa<ConstantSDNode>(Shift.getOperand(1)) ||
914  !Shift.hasOneUse())
915  return true;
916 
917  int ScaleLog = 8 - Shift.getConstantOperandVal(1);
918  if (ScaleLog <= 0 || ScaleLog >= 4 ||
919  Mask != (0xffu << ScaleLog))
920  return true;
921 
922  MVT VT = N.getSimpleValueType();
923  SDLoc DL(N);
924  SDValue Eight = DAG.getConstant(8, DL, MVT::i8);
925  SDValue NewMask = DAG.getConstant(0xff, DL, VT);
926  SDValue Srl = DAG.getNode(ISD::SRL, DL, VT, X, Eight);
927  SDValue And = DAG.getNode(ISD::AND, DL, VT, Srl, NewMask);
928  SDValue ShlCount = DAG.getConstant(ScaleLog, DL, MVT::i8);
929  SDValue Shl = DAG.getNode(ISD::SHL, DL, VT, And, ShlCount);
930 
931  // Insert the new nodes into the topological ordering. We must do this in
932  // a valid topological ordering as nothing is going to go back and re-sort
933  // these nodes. We continually insert before 'N' in sequence as this is
934  // essentially a pre-flattened and pre-sorted sequence of nodes. There is no
935  // hierarchy left to express.
936  insertDAGNode(DAG, N, Eight);
937  insertDAGNode(DAG, N, Srl);
938  insertDAGNode(DAG, N, NewMask);
939  insertDAGNode(DAG, N, And);
940  insertDAGNode(DAG, N, ShlCount);
941  insertDAGNode(DAG, N, Shl);
942  DAG.ReplaceAllUsesWith(N, Shl);
943  AM.IndexReg = And;
944  AM.Scale = (1 << ScaleLog);
945  return false;
946 }
947 
948 // Transforms "(X << C1) & C2" to "(X & (C2>>C1)) << C1" if safe and if this
949 // allows us to fold the shift into this addressing mode. Returns false if the
950 // transform succeeded.
952  uint64_t Mask,
953  SDValue Shift, SDValue X,
954  X86ISelAddressMode &AM) {
955  if (Shift.getOpcode() != ISD::SHL ||
956  !isa<ConstantSDNode>(Shift.getOperand(1)))
957  return true;
958 
959  // Not likely to be profitable if either the AND or SHIFT node has more
960  // than one use (unless all uses are for address computation). Besides,
961  // isel mechanism requires their node ids to be reused.
962  if (!N.hasOneUse() || !Shift.hasOneUse())
963  return true;
964 
965  // Verify that the shift amount is something we can fold.
966  unsigned ShiftAmt = Shift.getConstantOperandVal(1);
967  if (ShiftAmt != 1 && ShiftAmt != 2 && ShiftAmt != 3)
968  return true;
969 
970  MVT VT = N.getSimpleValueType();
971  SDLoc DL(N);
972  SDValue NewMask = DAG.getConstant(Mask >> ShiftAmt, DL, VT);
973  SDValue NewAnd = DAG.getNode(ISD::AND, DL, VT, X, NewMask);
974  SDValue NewShift = DAG.getNode(ISD::SHL, DL, VT, NewAnd, Shift.getOperand(1));
975 
976  // Insert the new nodes into the topological ordering. We must do this in
977  // a valid topological ordering as nothing is going to go back and re-sort
978  // these nodes. We continually insert before 'N' in sequence as this is
979  // essentially a pre-flattened and pre-sorted sequence of nodes. There is no
980  // hierarchy left to express.
981  insertDAGNode(DAG, N, NewMask);
982  insertDAGNode(DAG, N, NewAnd);
983  insertDAGNode(DAG, N, NewShift);
984  DAG.ReplaceAllUsesWith(N, NewShift);
985 
986  AM.Scale = 1 << ShiftAmt;
987  AM.IndexReg = NewAnd;
988  return false;
989 }
990 
991 // Implement some heroics to detect shifts of masked values where the mask can
992 // be replaced by extending the shift and undoing that in the addressing mode
993 // scale. Patterns such as (shl (srl x, c1), c2) are canonicalized into (and
994 // (srl x, SHIFT), MASK) by DAGCombines that don't know the shl can be done in
995 // the addressing mode. This results in code such as:
996 //
997 // int f(short *y, int *lookup_table) {
998 // ...
999 // return *y + lookup_table[*y >> 11];
1000 // }
1001 //
1002 // Turning into:
1003 // movzwl (%rdi), %eax
1004 // movl %eax, %ecx
1005 // shrl $11, %ecx
1006 // addl (%rsi,%rcx,4), %eax
1007 //
1008 // Instead of:
1009 // movzwl (%rdi), %eax
1010 // movl %eax, %ecx
1011 // shrl $9, %ecx
1012 // andl $124, %rcx
1013 // addl (%rsi,%rcx), %eax
1014 //
1015 // Note that this function assumes the mask is provided as a mask *after* the
1016 // value is shifted. The input chain may or may not match that, but computing
1017 // such a mask is trivial.
1019  uint64_t Mask,
1020  SDValue Shift, SDValue X,
1021  X86ISelAddressMode &AM) {
1022  if (Shift.getOpcode() != ISD::SRL || !Shift.hasOneUse() ||
1023  !isa<ConstantSDNode>(Shift.getOperand(1)))
1024  return true;
1025 
1026  unsigned ShiftAmt = Shift.getConstantOperandVal(1);
1027  unsigned MaskLZ = countLeadingZeros(Mask);
1028  unsigned MaskTZ = countTrailingZeros(Mask);
1029 
1030  // The amount of shift we're trying to fit into the addressing mode is taken
1031  // from the trailing zeros of the mask.
1032  unsigned AMShiftAmt = MaskTZ;
1033 
1034  // There is nothing we can do here unless the mask is removing some bits.
1035  // Also, the addressing mode can only represent shifts of 1, 2, or 3 bits.
1036  if (AMShiftAmt <= 0 || AMShiftAmt > 3) return true;
1037 
1038  // We also need to ensure that mask is a continuous run of bits.
1039  if (countTrailingOnes(Mask >> MaskTZ) + MaskTZ + MaskLZ != 64) return true;
1040 
1041  // Scale the leading zero count down based on the actual size of the value.
1042  // Also scale it down based on the size of the shift.
1043  MaskLZ -= (64 - X.getSimpleValueType().getSizeInBits()) + ShiftAmt;
1044 
1045  // The final check is to ensure that any masked out high bits of X are
1046  // already known to be zero. Otherwise, the mask has a semantic impact
1047  // other than masking out a couple of low bits. Unfortunately, because of
1048  // the mask, zero extensions will be removed from operands in some cases.
1049  // This code works extra hard to look through extensions because we can
1050  // replace them with zero extensions cheaply if necessary.
1051  bool ReplacingAnyExtend = false;
1052  if (X.getOpcode() == ISD::ANY_EXTEND) {
1053  unsigned ExtendBits = X.getSimpleValueType().getSizeInBits() -
1055  // Assume that we'll replace the any-extend with a zero-extend, and
1056  // narrow the search to the extended value.
1057  X = X.getOperand(0);
1058  MaskLZ = ExtendBits > MaskLZ ? 0 : MaskLZ - ExtendBits;
1059  ReplacingAnyExtend = true;
1060  }
1061  APInt MaskedHighBits =
1063  APInt KnownZero, KnownOne;
1064  DAG.computeKnownBits(X, KnownZero, KnownOne);
1065  if (MaskedHighBits != KnownZero) return true;
1066 
1067  // We've identified a pattern that can be transformed into a single shift
1068  // and an addressing mode. Make it so.
1069  MVT VT = N.getSimpleValueType();
1070  if (ReplacingAnyExtend) {
1071  assert(X.getValueType() != VT);
1072  // We looked through an ANY_EXTEND node, insert a ZERO_EXTEND.
1073  SDValue NewX = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(X), VT, X);
1074  insertDAGNode(DAG, N, NewX);
1075  X = NewX;
1076  }
1077  SDLoc DL(N);
1078  SDValue NewSRLAmt = DAG.getConstant(ShiftAmt + AMShiftAmt, DL, MVT::i8);
1079  SDValue NewSRL = DAG.getNode(ISD::SRL, DL, VT, X, NewSRLAmt);
1080  SDValue NewSHLAmt = DAG.getConstant(AMShiftAmt, DL, MVT::i8);
1081  SDValue NewSHL = DAG.getNode(ISD::SHL, DL, VT, NewSRL, NewSHLAmt);
1082 
1083  // Insert the new nodes into the topological ordering. We must do this in
1084  // a valid topological ordering as nothing is going to go back and re-sort
1085  // these nodes. We continually insert before 'N' in sequence as this is
1086  // essentially a pre-flattened and pre-sorted sequence of nodes. There is no
1087  // hierarchy left to express.
1088  insertDAGNode(DAG, N, NewSRLAmt);
1089  insertDAGNode(DAG, N, NewSRL);
1090  insertDAGNode(DAG, N, NewSHLAmt);
1091  insertDAGNode(DAG, N, NewSHL);
1092  DAG.ReplaceAllUsesWith(N, NewSHL);
1093 
1094  AM.Scale = 1 << AMShiftAmt;
1095  AM.IndexReg = NewSRL;
1096  return false;
1097 }
1098 
1099 bool X86DAGToDAGISel::matchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
1100  unsigned Depth) {
1101  SDLoc dl(N);
1102  DEBUG({
1103  dbgs() << "MatchAddress: ";
1104  AM.dump();
1105  });
1106  // Limit recursion.
1107  if (Depth > 5)
1108  return matchAddressBase(N, AM);
1109 
1110  // If this is already a %rip relative address, we can only merge immediates
1111  // into it. Instead of handling this in every case, we handle it here.
1112  // RIP relative addressing: %rip + 32-bit displacement!
1113  if (AM.isRIPRelative()) {
1114  // FIXME: JumpTable and ExternalSymbol address currently don't like
1115  // displacements. It isn't very important, but this should be fixed for
1116  // consistency.
1117  if (!(AM.ES || AM.MCSym) && AM.JT != -1)
1118  return true;
1119 
1120  if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(N))
1121  if (!foldOffsetIntoAddress(Cst->getSExtValue(), AM))
1122  return false;
1123  return true;
1124  }
1125 
1126  switch (N.getOpcode()) {
1127  default: break;
1128  case ISD::LOCAL_RECOVER: {
1129  if (!AM.hasSymbolicDisplacement() && AM.Disp == 0)
1130  if (const auto *ESNode = dyn_cast<MCSymbolSDNode>(N.getOperand(0))) {
1131  // Use the symbol and don't prefix it.
1132  AM.MCSym = ESNode->getMCSymbol();
1133  return false;
1134  }
1135  break;
1136  }
1137  case ISD::Constant: {
1138  uint64_t Val = cast<ConstantSDNode>(N)->getSExtValue();
1139  if (!foldOffsetIntoAddress(Val, AM))
1140  return false;
1141  break;
1142  }
1143 
1144  case X86ISD::Wrapper:
1145  case X86ISD::WrapperRIP:
1146  if (!matchWrapper(N, AM))
1147  return false;
1148  break;
1149 
1150  case ISD::LOAD:
1151  if (!matchLoadInAddress(cast<LoadSDNode>(N), AM))
1152  return false;
1153  break;
1154 
1155  case ISD::FrameIndex:
1156  if (AM.BaseType == X86ISelAddressMode::RegBase &&
1157  AM.Base_Reg.getNode() == nullptr &&
1158  (!Subtarget->is64Bit() || isDispSafeForFrameIndex(AM.Disp))) {
1159  AM.BaseType = X86ISelAddressMode::FrameIndexBase;
1160  AM.Base_FrameIndex = cast<FrameIndexSDNode>(N)->getIndex();
1161  return false;
1162  }
1163  break;
1164 
1165  case ISD::SHL:
1166  if (AM.IndexReg.getNode() != nullptr || AM.Scale != 1)
1167  break;
1168 
1169  if (ConstantSDNode
1170  *CN = dyn_cast<ConstantSDNode>(N.getNode()->getOperand(1))) {
1171  unsigned Val = CN->getZExtValue();
1172  // Note that we handle x<<1 as (,x,2) rather than (x,x) here so
1173  // that the base operand remains free for further matching. If
1174  // the base doesn't end up getting used, a post-processing step
1175  // in MatchAddress turns (,x,2) into (x,x), which is cheaper.
1176  if (Val == 1 || Val == 2 || Val == 3) {
1177  AM.Scale = 1 << Val;
1178  SDValue ShVal = N.getNode()->getOperand(0);
1179 
1180  // Okay, we know that we have a scale by now. However, if the scaled
1181  // value is an add of something and a constant, we can fold the
1182  // constant into the disp field here.
1183  if (CurDAG->isBaseWithConstantOffset(ShVal)) {
1184  AM.IndexReg = ShVal.getNode()->getOperand(0);
1185  ConstantSDNode *AddVal =
1186  cast<ConstantSDNode>(ShVal.getNode()->getOperand(1));
1187  uint64_t Disp = (uint64_t)AddVal->getSExtValue() << Val;
1188  if (!foldOffsetIntoAddress(Disp, AM))
1189  return false;
1190  }
1191 
1192  AM.IndexReg = ShVal;
1193  return false;
1194  }
1195  }
1196  break;
1197 
1198  case ISD::SRL: {
1199  // Scale must not be used already.
1200  if (AM.IndexReg.getNode() != nullptr || AM.Scale != 1) break;
1201 
1202  SDValue And = N.getOperand(0);
1203  if (And.getOpcode() != ISD::AND) break;
1204  SDValue X = And.getOperand(0);
1205 
1206  // We only handle up to 64-bit values here as those are what matter for
1207  // addressing mode optimizations.
1208  if (X.getSimpleValueType().getSizeInBits() > 64) break;
1209 
1210  // The mask used for the transform is expected to be post-shift, but we
1211  // found the shift first so just apply the shift to the mask before passing
1212  // it down.
1213  if (!isa<ConstantSDNode>(N.getOperand(1)) ||
1214  !isa<ConstantSDNode>(And.getOperand(1)))
1215  break;
1216  uint64_t Mask = And.getConstantOperandVal(1) >> N.getConstantOperandVal(1);
1217 
1218  // Try to fold the mask and shift into the scale, and return false if we
1219  // succeed.
1220  if (!foldMaskAndShiftToScale(*CurDAG, N, Mask, N, X, AM))
1221  return false;
1222  break;
1223  }
1224 
1225  case ISD::SMUL_LOHI:
1226  case ISD::UMUL_LOHI:
1227  // A mul_lohi where we need the low part can be folded as a plain multiply.
1228  if (N.getResNo() != 0) break;
1230  case ISD::MUL:
1231  case X86ISD::MUL_IMM:
1232  // X*[3,5,9] -> X+X*[2,4,8]
1233  if (AM.BaseType == X86ISelAddressMode::RegBase &&
1234  AM.Base_Reg.getNode() == nullptr &&
1235  AM.IndexReg.getNode() == nullptr) {
1236  if (ConstantSDNode
1237  *CN = dyn_cast<ConstantSDNode>(N.getNode()->getOperand(1)))
1238  if (CN->getZExtValue() == 3 || CN->getZExtValue() == 5 ||
1239  CN->getZExtValue() == 9) {
1240  AM.Scale = unsigned(CN->getZExtValue())-1;
1241 
1242  SDValue MulVal = N.getNode()->getOperand(0);
1243  SDValue Reg;
1244 
1245  // Okay, we know that we have a scale by now. However, if the scaled
1246  // value is an add of something and a constant, we can fold the
1247  // constant into the disp field here.
1248  if (MulVal.getNode()->getOpcode() == ISD::ADD && MulVal.hasOneUse() &&
1249  isa<ConstantSDNode>(MulVal.getNode()->getOperand(1))) {
1250  Reg = MulVal.getNode()->getOperand(0);
1251  ConstantSDNode *AddVal =
1252  cast<ConstantSDNode>(MulVal.getNode()->getOperand(1));
1253  uint64_t Disp = AddVal->getSExtValue() * CN->getZExtValue();
1254  if (foldOffsetIntoAddress(Disp, AM))
1255  Reg = N.getNode()->getOperand(0);
1256  } else {
1257  Reg = N.getNode()->getOperand(0);
1258  }
1259 
1260  AM.IndexReg = AM.Base_Reg = Reg;
1261  return false;
1262  }
1263  }
1264  break;
1265 
1266  case ISD::SUB: {
1267  // Given A-B, if A can be completely folded into the address and
1268  // the index field with the index field unused, use -B as the index.
1269  // This is a win if a has multiple parts that can be folded into
1270  // the address. Also, this saves a mov if the base register has
1271  // other uses, since it avoids a two-address sub instruction, however
1272  // it costs an additional mov if the index register has other uses.
1273 
1274  // Add an artificial use to this node so that we can keep track of
1275  // it if it gets CSE'd with a different node.
1276  HandleSDNode Handle(N);
1277 
1278  // Test if the LHS of the sub can be folded.
1279  X86ISelAddressMode Backup = AM;
1280  if (matchAddressRecursively(N.getNode()->getOperand(0), AM, Depth+1)) {
1281  AM = Backup;
1282  break;
1283  }
1284  // Test if the index field is free for use.
1285  if (AM.IndexReg.getNode() || AM.isRIPRelative()) {
1286  AM = Backup;
1287  break;
1288  }
1289 
1290  int Cost = 0;
1291  SDValue RHS = Handle.getValue().getNode()->getOperand(1);
1292  // If the RHS involves a register with multiple uses, this
1293  // transformation incurs an extra mov, due to the neg instruction
1294  // clobbering its operand.
1295  if (!RHS.getNode()->hasOneUse() ||
1296  RHS.getNode()->getOpcode() == ISD::CopyFromReg ||
1297  RHS.getNode()->getOpcode() == ISD::TRUNCATE ||
1298  RHS.getNode()->getOpcode() == ISD::ANY_EXTEND ||
1299  (RHS.getNode()->getOpcode() == ISD::ZERO_EXTEND &&
1300  RHS.getNode()->getOperand(0).getValueType() == MVT::i32))
1301  ++Cost;
1302  // If the base is a register with multiple uses, this
1303  // transformation may save a mov.
1304  if ((AM.BaseType == X86ISelAddressMode::RegBase &&
1305  AM.Base_Reg.getNode() &&
1306  !AM.Base_Reg.getNode()->hasOneUse()) ||
1307  AM.BaseType == X86ISelAddressMode::FrameIndexBase)
1308  --Cost;
1309  // If the folded LHS was interesting, this transformation saves
1310  // address arithmetic.
1311  if ((AM.hasSymbolicDisplacement() && !Backup.hasSymbolicDisplacement()) +
1312  ((AM.Disp != 0) && (Backup.Disp == 0)) +
1313  (AM.Segment.getNode() && !Backup.Segment.getNode()) >= 2)
1314  --Cost;
1315  // If it doesn't look like it may be an overall win, don't do it.
1316  if (Cost >= 0) {
1317  AM = Backup;
1318  break;
1319  }
1320 
1321  // Ok, the transformation is legal and appears profitable. Go for it.
1322  SDValue Zero = CurDAG->getConstant(0, dl, N.getValueType());
1323  SDValue Neg = CurDAG->getNode(ISD::SUB, dl, N.getValueType(), Zero, RHS);
1324  AM.IndexReg = Neg;
1325  AM.Scale = 1;
1326 
1327  // Insert the new nodes into the topological ordering.
1328  insertDAGNode(*CurDAG, N, Zero);
1329  insertDAGNode(*CurDAG, N, Neg);
1330  return false;
1331  }
1332 
1333  case ISD::ADD:
1334  if (!matchAdd(N, AM, Depth))
1335  return false;
1336  break;
1337 
1338  case ISD::OR:
1339  // We want to look through a transform in InstCombine and DAGCombiner that
1340  // turns 'add' into 'or', so we can treat this 'or' exactly like an 'add'.
1341  // Example: (or (and x, 1), (shl y, 3)) --> (add (and x, 1), (shl y, 3))
1342  // An 'lea' can then be used to match the shift (multiply) and add:
1343  // and $1, %esi
1344  // lea (%rsi, %rdi, 8), %rax
1345  if (CurDAG->haveNoCommonBitsSet(N.getOperand(0), N.getOperand(1)) &&
1346  !matchAdd(N, AM, Depth))
1347  return false;
1348  break;
1349 
1350  case ISD::AND: {
1351  // Perform some heroic transforms on an and of a constant-count shift
1352  // with a constant to enable use of the scaled offset field.
1353 
1354  // Scale must not be used already.
1355  if (AM.IndexReg.getNode() != nullptr || AM.Scale != 1) break;
1356 
1357  SDValue Shift = N.getOperand(0);
1358  if (Shift.getOpcode() != ISD::SRL && Shift.getOpcode() != ISD::SHL) break;
1359  SDValue X = Shift.getOperand(0);
1360 
1361  // We only handle up to 64-bit values here as those are what matter for
1362  // addressing mode optimizations.
1363  if (X.getSimpleValueType().getSizeInBits() > 64) break;
1364 
1365  if (!isa<ConstantSDNode>(N.getOperand(1)))
1366  break;
1367  uint64_t Mask = N.getConstantOperandVal(1);
1368 
1369  // Try to fold the mask and shift into an extract and scale.
1370  if (!foldMaskAndShiftToExtract(*CurDAG, N, Mask, Shift, X, AM))
1371  return false;
1372 
1373  // Try to fold the mask and shift directly into the scale.
1374  if (!foldMaskAndShiftToScale(*CurDAG, N, Mask, Shift, X, AM))
1375  return false;
1376 
1377  // Try to swap the mask and shift to place shifts which can be done as
1378  // a scale on the outside of the mask.
1379  if (!foldMaskedShiftToScaledMask(*CurDAG, N, Mask, Shift, X, AM))
1380  return false;
1381  break;
1382  }
1383  }
1384 
1385  return matchAddressBase(N, AM);
1386 }
1387 
1388 /// Helper for MatchAddress. Add the specified node to the
1389 /// specified addressing mode without any further recursion.
1390 bool X86DAGToDAGISel::matchAddressBase(SDValue N, X86ISelAddressMode &AM) {
1391  // Is the base register already occupied?
1392  if (AM.BaseType != X86ISelAddressMode::RegBase || AM.Base_Reg.getNode()) {
1393  // If so, check to see if the scale index register is set.
1394  if (!AM.IndexReg.getNode()) {
1395  AM.IndexReg = N;
1396  AM.Scale = 1;
1397  return false;
1398  }
1399 
1400  // Otherwise, we cannot select it.
1401  return true;
1402  }
1403 
1404  // Default, generate it as a register.
1405  AM.BaseType = X86ISelAddressMode::RegBase;
1406  AM.Base_Reg = N;
1407  return false;
1408 }
1409 
1410 bool X86DAGToDAGISel::selectVectorAddr(SDNode *Parent, SDValue N, SDValue &Base,
1411  SDValue &Scale, SDValue &Index,
1412  SDValue &Disp, SDValue &Segment) {
1413 
1415  if (!Mgs)
1416  return false;
1417  X86ISelAddressMode AM;
1418  unsigned AddrSpace = Mgs->getPointerInfo().getAddrSpace();
1419  // AddrSpace 256 -> GS, 257 -> FS, 258 -> SS.
1420  if (AddrSpace == 256)
1421  AM.Segment = CurDAG->getRegister(X86::GS, MVT::i16);
1422  if (AddrSpace == 257)
1423  AM.Segment = CurDAG->getRegister(X86::FS, MVT::i16);
1424  if (AddrSpace == 258)
1425  AM.Segment = CurDAG->getRegister(X86::SS, MVT::i16);
1426 
1427  SDLoc DL(N);
1428  Base = Mgs->getBasePtr();
1429  Index = Mgs->getIndex();
1430  unsigned ScalarSize = Mgs->getValue().getScalarValueSizeInBits();
1431  Scale = getI8Imm(ScalarSize/8, DL);
1432 
1433  // If Base is 0, the whole address is in index and the Scale is 1
1434  if (isa<ConstantSDNode>(Base)) {
1435  assert(cast<ConstantSDNode>(Base)->isNullValue() &&
1436  "Unexpected base in gather/scatter");
1437  Scale = getI8Imm(1, DL);
1438  Base = CurDAG->getRegister(0, MVT::i32);
1439  }
1440  if (AM.Segment.getNode())
1441  Segment = AM.Segment;
1442  else
1443  Segment = CurDAG->getRegister(0, MVT::i32);
1444  Disp = CurDAG->getTargetConstant(0, DL, MVT::i32);
1445  return true;
1446 }
1447 
1448 /// Returns true if it is able to pattern match an addressing mode.
1449 /// It returns the operands which make up the maximal addressing mode it can
1450 /// match by reference.
1451 ///
1452 /// Parent is the parent node of the addr operand that is being matched. It
1453 /// is always a load, store, atomic node, or null. It is only null when
1454 /// checking memory operands for inline asm nodes.
1455 bool X86DAGToDAGISel::selectAddr(SDNode *Parent, SDValue N, SDValue &Base,
1456  SDValue &Scale, SDValue &Index,
1457  SDValue &Disp, SDValue &Segment) {
1458  X86ISelAddressMode AM;
1459 
1460  if (Parent &&
1461  // This list of opcodes are all the nodes that have an "addr:$ptr" operand
1462  // that are not a MemSDNode, and thus don't have proper addrspace info.
1463  Parent->getOpcode() != ISD::INTRINSIC_W_CHAIN && // unaligned loads, fixme
1464  Parent->getOpcode() != ISD::INTRINSIC_VOID && // nontemporal stores
1465  Parent->getOpcode() != X86ISD::TLSCALL && // Fixme
1466  Parent->getOpcode() != X86ISD::EH_SJLJ_SETJMP && // setjmp
1467  Parent->getOpcode() != X86ISD::EH_SJLJ_LONGJMP) { // longjmp
1468  unsigned AddrSpace =
1469  cast<MemSDNode>(Parent)->getPointerInfo().getAddrSpace();
1470  // AddrSpace 256 -> GS, 257 -> FS, 258 -> SS.
1471  if (AddrSpace == 256)
1472  AM.Segment = CurDAG->getRegister(X86::GS, MVT::i16);
1473  if (AddrSpace == 257)
1474  AM.Segment = CurDAG->getRegister(X86::FS, MVT::i16);
1475  if (AddrSpace == 258)
1476  AM.Segment = CurDAG->getRegister(X86::SS, MVT::i16);
1477  }
1478 
1479  if (matchAddress(N, AM))
1480  return false;
1481 
1482  MVT VT = N.getSimpleValueType();
1483  if (AM.BaseType == X86ISelAddressMode::RegBase) {
1484  if (!AM.Base_Reg.getNode())
1485  AM.Base_Reg = CurDAG->getRegister(0, VT);
1486  }
1487 
1488  if (!AM.IndexReg.getNode())
1489  AM.IndexReg = CurDAG->getRegister(0, VT);
1490 
1491  getAddressOperands(AM, SDLoc(N), Base, Scale, Index, Disp, Segment);
1492  return true;
1493 }
1494 
1495 /// Match a scalar SSE load. In particular, we want to match a load whose top
1496 /// elements are either undef or zeros. The load flavor is derived from the
1497 /// type of N, which is either v4f32 or v2f64.
1498 ///
1499 /// We also return:
1500 /// PatternChainNode: this is the matched node that has a chain input and
1501 /// output.
1502 bool X86DAGToDAGISel::selectScalarSSELoad(SDNode *Root,
1503  SDValue N, SDValue &Base,
1504  SDValue &Scale, SDValue &Index,
1505  SDValue &Disp, SDValue &Segment,
1506  SDValue &PatternNodeWithChain) {
1507  // We can allow a full vector load here since narrowing a load is ok.
1508  if (ISD::isNON_EXTLoad(N.getNode())) {
1509  PatternNodeWithChain = N;
1510  if (IsProfitableToFold(PatternNodeWithChain, N.getNode(), Root) &&
1511  IsLegalToFold(PatternNodeWithChain, *N->use_begin(), Root, OptLevel)) {
1512  LoadSDNode *LD = cast<LoadSDNode>(PatternNodeWithChain);
1513  return selectAddr(LD, LD->getBasePtr(), Base, Scale, Index, Disp,
1514  Segment);
1515  }
1516  }
1517 
1518  // We can also match the special zero extended load opcode.
1519  if (N.getOpcode() == X86ISD::VZEXT_LOAD) {
1520  PatternNodeWithChain = N;
1521  if (IsProfitableToFold(PatternNodeWithChain, N.getNode(), Root) &&
1522  IsLegalToFold(PatternNodeWithChain, *N->use_begin(), Root, OptLevel)) {
1523  auto *MI = cast<MemIntrinsicSDNode>(PatternNodeWithChain);
1524  return selectAddr(MI, MI->getBasePtr(), Base, Scale, Index, Disp,
1525  Segment);
1526  }
1527  }
1528 
1529  // Need to make sure that the SCALAR_TO_VECTOR and load are both only used
1530  // once. Otherwise the load might get duplicated and the chain output of the
1531  // duplicate load will not be observed by all dependencies.
1532  if (N.getOpcode() == ISD::SCALAR_TO_VECTOR && N.getNode()->hasOneUse()) {
1533  PatternNodeWithChain = N.getOperand(0);
1534  if (ISD::isNON_EXTLoad(PatternNodeWithChain.getNode()) &&
1535  IsProfitableToFold(PatternNodeWithChain, N.getNode(), Root) &&
1536  IsLegalToFold(PatternNodeWithChain, N.getNode(), Root, OptLevel)) {
1537  LoadSDNode *LD = cast<LoadSDNode>(PatternNodeWithChain);
1538  return selectAddr(LD, LD->getBasePtr(), Base, Scale, Index, Disp,
1539  Segment);
1540  }
1541  }
1542 
1543  // Also handle the case where we explicitly require zeros in the top
1544  // elements. This is a vector shuffle from the zero vector.
1545  if (N.getOpcode() == X86ISD::VZEXT_MOVL && N.getNode()->hasOneUse() &&
1546  // Check to see if the top elements are all zeros (or bitcast of zeros).
1548  N.getOperand(0).getNode()->hasOneUse()) {
1549  PatternNodeWithChain = N.getOperand(0).getOperand(0);
1550  if (ISD::isNON_EXTLoad(PatternNodeWithChain.getNode()) &&
1551  IsProfitableToFold(PatternNodeWithChain, N.getNode(), Root) &&
1552  IsLegalToFold(PatternNodeWithChain, N.getNode(), Root, OptLevel)) {
1553  // Okay, this is a zero extending load. Fold it.
1554  LoadSDNode *LD = cast<LoadSDNode>(PatternNodeWithChain);
1555  return selectAddr(LD, LD->getBasePtr(), Base, Scale, Index, Disp,
1556  Segment);
1557  }
1558  }
1559 
1560  return false;
1561 }
1562 
1563 
1564 bool X86DAGToDAGISel::selectMOV64Imm32(SDValue N, SDValue &Imm) {
1565  if (const ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) {
1566  uint64_t ImmVal = CN->getZExtValue();
1567  if ((uint32_t)ImmVal != (uint64_t)ImmVal)
1568  return false;
1569 
1570  Imm = CurDAG->getTargetConstant(ImmVal, SDLoc(N), MVT::i64);
1571  return true;
1572  }
1573 
1574  // In static codegen with small code model, we can get the address of a label
1575  // into a register with 'movl'. TableGen has already made sure we're looking
1576  // at a label of some kind.
1577  assert(N->getOpcode() == X86ISD::Wrapper &&
1578  "Unexpected node type for MOV32ri64");
1579  N = N.getOperand(0);
1580 
1581  // At least GNU as does not accept 'movl' for TPOFF relocations.
1582  // FIXME: We could use 'movl' when we know we are targeting MC.
1583  if (N->getOpcode() == ISD::TargetGlobalTLSAddress)
1584  return false;
1585 
1586  Imm = N;
1587  if (N->getOpcode() != ISD::TargetGlobalAddress)
1588  return TM.getCodeModel() == CodeModel::Small;
1589 
1591  cast<GlobalAddressSDNode>(N)->getGlobal()->getAbsoluteSymbolRange();
1592  if (!CR)
1593  return TM.getCodeModel() == CodeModel::Small;
1594 
1595  return CR->getUnsignedMax().ult(1ull << 32);
1596 }
1597 
1598 bool X86DAGToDAGISel::selectLEA64_32Addr(SDValue N, SDValue &Base,
1599  SDValue &Scale, SDValue &Index,
1600  SDValue &Disp, SDValue &Segment) {
1601  // Save the debug loc before calling selectLEAAddr, in case it invalidates N.
1602  SDLoc DL(N);
1603 
1604  if (!selectLEAAddr(N, Base, Scale, Index, Disp, Segment))
1605  return false;
1606 
1608  if (RN && RN->getReg() == 0)
1609  Base = CurDAG->getRegister(0, MVT::i64);
1610  else if (Base.getValueType() == MVT::i32 && !dyn_cast<FrameIndexSDNode>(Base)) {
1611  // Base could already be %rip, particularly in the x32 ABI.
1612  Base = SDValue(CurDAG->getMachineNode(
1613  TargetOpcode::SUBREG_TO_REG, DL, MVT::i64,
1614  CurDAG->getTargetConstant(0, DL, MVT::i64),
1615  Base,
1616  CurDAG->getTargetConstant(X86::sub_32bit, DL, MVT::i32)),
1617  0);
1618  }
1619 
1620  RN = dyn_cast<RegisterSDNode>(Index);
1621  if (RN && RN->getReg() == 0)
1622  Index = CurDAG->getRegister(0, MVT::i64);
1623  else {
1624  assert(Index.getValueType() == MVT::i32 &&
1625  "Expect to be extending 32-bit registers for use in LEA");
1626  Index = SDValue(CurDAG->getMachineNode(
1627  TargetOpcode::SUBREG_TO_REG, DL, MVT::i64,
1628  CurDAG->getTargetConstant(0, DL, MVT::i64),
1629  Index,
1630  CurDAG->getTargetConstant(X86::sub_32bit, DL,
1631  MVT::i32)),
1632  0);
1633  }
1634 
1635  return true;
1636 }
1637 
1638 /// Calls SelectAddr and determines if the maximal addressing
1639 /// mode it matches can be cost effectively emitted as an LEA instruction.
1640 bool X86DAGToDAGISel::selectLEAAddr(SDValue N,
1641  SDValue &Base, SDValue &Scale,
1642  SDValue &Index, SDValue &Disp,
1643  SDValue &Segment) {
1644  X86ISelAddressMode AM;
1645 
1646  // Save the DL and VT before calling matchAddress, it can invalidate N.
1647  SDLoc DL(N);
1648  MVT VT = N.getSimpleValueType();
1649 
1650  // Set AM.Segment to prevent MatchAddress from using one. LEA doesn't support
1651  // segments.
1652  SDValue Copy = AM.Segment;
1653  SDValue T = CurDAG->getRegister(0, MVT::i32);
1654  AM.Segment = T;
1655  if (matchAddress(N, AM))
1656  return false;
1657  assert (T == AM.Segment);
1658  AM.Segment = Copy;
1659 
1660  unsigned Complexity = 0;
1661  if (AM.BaseType == X86ISelAddressMode::RegBase)
1662  if (AM.Base_Reg.getNode())
1663  Complexity = 1;
1664  else
1665  AM.Base_Reg = CurDAG->getRegister(0, VT);
1666  else if (AM.BaseType == X86ISelAddressMode::FrameIndexBase)
1667  Complexity = 4;
1668 
1669  if (AM.IndexReg.getNode())
1670  Complexity++;
1671  else
1672  AM.IndexReg = CurDAG->getRegister(0, VT);
1673 
1674  // Don't match just leal(,%reg,2). It's cheaper to do addl %reg, %reg, or with
1675  // a simple shift.
1676  if (AM.Scale > 1)
1677  Complexity++;
1678 
1679  // FIXME: We are artificially lowering the criteria to turn ADD %reg, $GA
1680  // to a LEA. This is determined with some experimentation but is by no means
1681  // optimal (especially for code size consideration). LEA is nice because of
1682  // its three-address nature. Tweak the cost function again when we can run
1683  // convertToThreeAddress() at register allocation time.
1684  if (AM.hasSymbolicDisplacement()) {
1685  // For X86-64, always use LEA to materialize RIP-relative addresses.
1686  if (Subtarget->is64Bit())
1687  Complexity = 4;
1688  else
1689  Complexity += 2;
1690  }
1691 
1692  if (AM.Disp && (AM.Base_Reg.getNode() || AM.IndexReg.getNode()))
1693  Complexity++;
1694 
1695  // If it isn't worth using an LEA, reject it.
1696  if (Complexity <= 2)
1697  return false;
1698 
1699  getAddressOperands(AM, DL, Base, Scale, Index, Disp, Segment);
1700  return true;
1701 }
1702 
1703 /// This is only run on TargetGlobalTLSAddress nodes.
1704 bool X86DAGToDAGISel::selectTLSADDRAddr(SDValue N, SDValue &Base,
1705  SDValue &Scale, SDValue &Index,
1706  SDValue &Disp, SDValue &Segment) {
1708  const GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(N);
1709 
1710  X86ISelAddressMode AM;
1711  AM.GV = GA->getGlobal();
1712  AM.Disp += GA->getOffset();
1713  AM.Base_Reg = CurDAG->getRegister(0, N.getValueType());
1714  AM.SymbolFlags = GA->getTargetFlags();
1715 
1716  if (N.getValueType() == MVT::i32) {
1717  AM.Scale = 1;
1718  AM.IndexReg = CurDAG->getRegister(X86::EBX, MVT::i32);
1719  } else {
1720  AM.IndexReg = CurDAG->getRegister(0, MVT::i64);
1721  }
1722 
1723  getAddressOperands(AM, SDLoc(N), Base, Scale, Index, Disp, Segment);
1724  return true;
1725 }
1726 
1727 bool X86DAGToDAGISel::selectRelocImm(SDValue N, SDValue &Op) {
1728  if (auto *CN = dyn_cast<ConstantSDNode>(N)) {
1729  Op = CurDAG->getTargetConstant(CN->getAPIntValue(), SDLoc(CN),
1730  N.getValueType());
1731  return true;
1732  }
1733 
1734  // Keep track of the original value type and whether this value was
1735  // truncated. If we see a truncation from pointer type to VT that truncates
1736  // bits that are known to be zero, we can use a narrow reference.
1737  EVT VT = N.getValueType();
1738  bool WasTruncated = false;
1739  if (N.getOpcode() == ISD::TRUNCATE) {
1740  WasTruncated = true;
1741  N = N.getOperand(0);
1742  }
1743 
1744  if (N.getOpcode() != X86ISD::Wrapper)
1745  return false;
1746 
1747  // We can only use non-GlobalValues as immediates if they were not truncated,
1748  // as we do not have any range information. If we have a GlobalValue and the
1749  // address was not truncated, we can select it as an operand directly.
1750  unsigned Opc = N.getOperand(0)->getOpcode();
1751  if (Opc != ISD::TargetGlobalAddress || !WasTruncated) {
1752  Op = N.getOperand(0);
1753  // We can only select the operand directly if we didn't have to look past a
1754  // truncate.
1755  return !WasTruncated;
1756  }
1757 
1758  // Check that the global's range fits into VT.
1759  auto *GA = cast<GlobalAddressSDNode>(N.getOperand(0));
1760  Optional<ConstantRange> CR = GA->getGlobal()->getAbsoluteSymbolRange();
1761  if (!CR || CR->getUnsignedMax().uge(1ull << VT.getSizeInBits()))
1762  return false;
1763 
1764  // Okay, we can use a narrow reference.
1765  Op = CurDAG->getTargetGlobalAddress(GA->getGlobal(), SDLoc(N), VT,
1766  GA->getOffset(), GA->getTargetFlags());
1767  return true;
1768 }
1769 
1770 bool X86DAGToDAGISel::tryFoldLoad(SDNode *P, SDValue N,
1771  SDValue &Base, SDValue &Scale,
1772  SDValue &Index, SDValue &Disp,
1773  SDValue &Segment) {
1774  if (!ISD::isNON_EXTLoad(N.getNode()) ||
1775  !IsProfitableToFold(N, P, P) ||
1776  !IsLegalToFold(N, P, P, OptLevel))
1777  return false;
1778 
1779  return selectAddr(N.getNode(),
1780  N.getOperand(1), Base, Scale, Index, Disp, Segment);
1781 }
1782 
1783 /// Return an SDNode that returns the value of the global base register.
1784 /// Output instructions required to initialize the global base register,
1785 /// if necessary.
1786 SDNode *X86DAGToDAGISel::getGlobalBaseReg() {
1787  unsigned GlobalBaseReg = getInstrInfo()->getGlobalBaseReg(MF);
1788  auto &DL = MF->getDataLayout();
1789  return CurDAG->getRegister(GlobalBaseReg, TLI->getPointerTy(DL)).getNode();
1790 }
1791 
1792 /// Test whether the given X86ISD::CMP node has any uses which require the SF
1793 /// or OF bits to be accurate.
1795  // Examine each user of the node.
1796  for (SDNode::use_iterator UI = N->use_begin(),
1797  UE = N->use_end(); UI != UE; ++UI) {
1798  // Only examine CopyToReg uses.
1799  if (UI->getOpcode() != ISD::CopyToReg)
1800  return false;
1801  // Only examine CopyToReg uses that copy to EFLAGS.
1802  if (cast<RegisterSDNode>(UI->getOperand(1))->getReg() !=
1803  X86::EFLAGS)
1804  return false;
1805  // Examine each user of the CopyToReg use.
1806  for (SDNode::use_iterator FlagUI = UI->use_begin(),
1807  FlagUE = UI->use_end(); FlagUI != FlagUE; ++FlagUI) {
1808  // Only examine the Flag result.
1809  if (FlagUI.getUse().getResNo() != 1) continue;
1810  // Anything unusual: assume conservatively.
1811  if (!FlagUI->isMachineOpcode()) return false;
1812  // Examine the opcode of the user.
1813  switch (FlagUI->getMachineOpcode()) {
1814  // These comparisons don't treat the most significant bit specially.
1815  case X86::SETAr: case X86::SETAEr: case X86::SETBr: case X86::SETBEr:
1816  case X86::SETEr: case X86::SETNEr: case X86::SETPr: case X86::SETNPr:
1817  case X86::SETAm: case X86::SETAEm: case X86::SETBm: case X86::SETBEm:
1818  case X86::SETEm: case X86::SETNEm: case X86::SETPm: case X86::SETNPm:
1819  case X86::JA_1: case X86::JAE_1: case X86::JB_1: case X86::JBE_1:
1820  case X86::JE_1: case X86::JNE_1: case X86::JP_1: case X86::JNP_1:
1821  case X86::CMOVA16rr: case X86::CMOVA16rm:
1822  case X86::CMOVA32rr: case X86::CMOVA32rm:
1823  case X86::CMOVA64rr: case X86::CMOVA64rm:
1824  case X86::CMOVAE16rr: case X86::CMOVAE16rm:
1825  case X86::CMOVAE32rr: case X86::CMOVAE32rm:
1826  case X86::CMOVAE64rr: case X86::CMOVAE64rm:
1827  case X86::CMOVB16rr: case X86::CMOVB16rm:
1828  case X86::CMOVB32rr: case X86::CMOVB32rm:
1829  case X86::CMOVB64rr: case X86::CMOVB64rm:
1830  case X86::CMOVBE16rr: case X86::CMOVBE16rm:
1831  case X86::CMOVBE32rr: case X86::CMOVBE32rm:
1832  case X86::CMOVBE64rr: case X86::CMOVBE64rm:
1833  case X86::CMOVE16rr: case X86::CMOVE16rm:
1834  case X86::CMOVE32rr: case X86::CMOVE32rm:
1835  case X86::CMOVE64rr: case X86::CMOVE64rm:
1836  case X86::CMOVNE16rr: case X86::CMOVNE16rm:
1837  case X86::CMOVNE32rr: case X86::CMOVNE32rm:
1838  case X86::CMOVNE64rr: case X86::CMOVNE64rm:
1839  case X86::CMOVNP16rr: case X86::CMOVNP16rm:
1840  case X86::CMOVNP32rr: case X86::CMOVNP32rm:
1841  case X86::CMOVNP64rr: case X86::CMOVNP64rm:
1842  case X86::CMOVP16rr: case X86::CMOVP16rm:
1843  case X86::CMOVP32rr: case X86::CMOVP32rm:
1844  case X86::CMOVP64rr: case X86::CMOVP64rm:
1845  continue;
1846  // Anything else: assume conservatively.
1847  default: return false;
1848  }
1849  }
1850  }
1851  return true;
1852 }
1853 
1854 /// Check whether or not the chain ending in StoreNode is suitable for doing
1855 /// the {load; increment or decrement; store} to modify transformation.
1856 static bool isLoadIncOrDecStore(StoreSDNode *StoreNode, unsigned Opc,
1857  SDValue StoredVal, SelectionDAG *CurDAG,
1858  LoadSDNode* &LoadNode, SDValue &InputChain) {
1859 
1860  // is the value stored the result of a DEC or INC?
1861  if (!(Opc == X86ISD::DEC || Opc == X86ISD::INC)) return false;
1862 
1863  // is the stored value result 0 of the load?
1864  if (StoredVal.getResNo() != 0) return false;
1865 
1866  // are there other uses of the loaded value than the inc or dec?
1867  if (!StoredVal.getNode()->hasNUsesOfValue(1, 0)) return false;
1868 
1869  // is the store non-extending and non-indexed?
1870  if (!ISD::isNormalStore(StoreNode) || StoreNode->isNonTemporal())
1871  return false;
1872 
1873  SDValue Load = StoredVal->getOperand(0);
1874  // Is the stored value a non-extending and non-indexed load?
1875  if (!ISD::isNormalLoad(Load.getNode())) return false;
1876 
1877  // Return LoadNode by reference.
1878  LoadNode = cast<LoadSDNode>(Load);
1879  // is the size of the value one that we can handle? (i.e. 64, 32, 16, or 8)
1880  EVT LdVT = LoadNode->getMemoryVT();
1881  if (LdVT != MVT::i64 && LdVT != MVT::i32 && LdVT != MVT::i16 &&
1882  LdVT != MVT::i8)
1883  return false;
1884 
1885  // Is store the only read of the loaded value?
1886  if (!Load.hasOneUse())
1887  return false;
1888 
1889  // Is the address of the store the same as the load?
1890  if (LoadNode->getBasePtr() != StoreNode->getBasePtr() ||
1891  LoadNode->getOffset() != StoreNode->getOffset())
1892  return false;
1893 
1894  // Check if the chain is produced by the load or is a TokenFactor with
1895  // the load output chain as an operand. Return InputChain by reference.
1896  SDValue Chain = StoreNode->getChain();
1897 
1898  bool ChainCheck = false;
1899  if (Chain == Load.getValue(1)) {
1900  ChainCheck = true;
1901  InputChain = LoadNode->getChain();
1902  } else if (Chain.getOpcode() == ISD::TokenFactor) {
1903  SmallVector<SDValue, 4> ChainOps;
1904  for (unsigned i = 0, e = Chain.getNumOperands(); i != e; ++i) {
1905  SDValue Op = Chain.getOperand(i);
1906  if (Op == Load.getValue(1)) {
1907  ChainCheck = true;
1908  continue;
1909  }
1910 
1911  // Make sure using Op as part of the chain would not cause a cycle here.
1912  // In theory, we could check whether the chain node is a predecessor of
1913  // the load. But that can be very expensive. Instead visit the uses and
1914  // make sure they all have smaller node id than the load.
1915  int LoadId = LoadNode->getNodeId();
1916  for (SDNode::use_iterator UI = Op.getNode()->use_begin(),
1917  UE = UI->use_end(); UI != UE; ++UI) {
1918  if (UI.getUse().getResNo() != 0)
1919  continue;
1920  if (UI->getNodeId() > LoadId)
1921  return false;
1922  }
1923 
1924  ChainOps.push_back(Op);
1925  }
1926 
1927  if (ChainCheck)
1928  // Make a new TokenFactor with all the other input chains except
1929  // for the load.
1930  InputChain = CurDAG->getNode(ISD::TokenFactor, SDLoc(Chain),
1931  MVT::Other, ChainOps);
1932  }
1933  if (!ChainCheck)
1934  return false;
1935 
1936  return true;
1937 }
1938 
1939 /// Get the appropriate X86 opcode for an in-memory increment or decrement.
1940 /// Opc should be X86ISD::DEC or X86ISD::INC.
1941 static unsigned getFusedLdStOpcode(EVT &LdVT, unsigned Opc) {
1942  if (Opc == X86ISD::DEC) {
1943  if (LdVT == MVT::i64) return X86::DEC64m;
1944  if (LdVT == MVT::i32) return X86::DEC32m;
1945  if (LdVT == MVT::i16) return X86::DEC16m;
1946  if (LdVT == MVT::i8) return X86::DEC8m;
1947  } else {
1948  assert(Opc == X86ISD::INC && "unrecognized opcode");
1949  if (LdVT == MVT::i64) return X86::INC64m;
1950  if (LdVT == MVT::i32) return X86::INC32m;
1951  if (LdVT == MVT::i16) return X86::INC16m;
1952  if (LdVT == MVT::i8) return X86::INC8m;
1953  }
1954  llvm_unreachable("unrecognized size for LdVT");
1955 }
1956 
1957 /// Customized ISel for GATHER operations.
1958 bool X86DAGToDAGISel::tryGather(SDNode *Node, unsigned Opc) {
1959  // Operands of Gather: VSrc, Base, VIdx, VMask, Scale
1960  SDValue Chain = Node->getOperand(0);
1961  SDValue VSrc = Node->getOperand(2);
1962  SDValue Base = Node->getOperand(3);
1963  SDValue VIdx = Node->getOperand(4);
1964  SDValue VMask = Node->getOperand(5);
1965  ConstantSDNode *Scale = dyn_cast<ConstantSDNode>(Node->getOperand(6));
1966  if (!Scale)
1967  return false;
1968 
1969  SDVTList VTs = CurDAG->getVTList(VSrc.getValueType(), VSrc.getValueType(),
1970  MVT::Other);
1971 
1972  SDLoc DL(Node);
1973 
1974  // Memory Operands: Base, Scale, Index, Disp, Segment
1975  SDValue Disp = CurDAG->getTargetConstant(0, DL, MVT::i32);
1976  SDValue Segment = CurDAG->getRegister(0, MVT::i32);
1977  const SDValue Ops[] = { VSrc, Base, getI8Imm(Scale->getSExtValue(), DL), VIdx,
1978  Disp, Segment, VMask, Chain};
1979  SDNode *ResNode = CurDAG->getMachineNode(Opc, DL, VTs, Ops);
1980  // Node has 2 outputs: VDst and MVT::Other.
1981  // ResNode has 3 outputs: VDst, VMask_wb, and MVT::Other.
1982  // We replace VDst of Node with VDst of ResNode, and Other of Node with Other
1983  // of ResNode.
1984  ReplaceUses(SDValue(Node, 0), SDValue(ResNode, 0));
1985  ReplaceUses(SDValue(Node, 1), SDValue(ResNode, 2));
1986  CurDAG->RemoveDeadNode(Node);
1987  return true;
1988 }
1989 
1990 void X86DAGToDAGISel::Select(SDNode *Node) {
1991  MVT NVT = Node->getSimpleValueType(0);
1992  unsigned Opc, MOpc;
1993  unsigned Opcode = Node->getOpcode();
1994  SDLoc dl(Node);
1995 
1996  DEBUG(dbgs() << "Selecting: "; Node->dump(CurDAG); dbgs() << '\n');
1997 
1998  if (Node->isMachineOpcode()) {
1999  DEBUG(dbgs() << "== "; Node->dump(CurDAG); dbgs() << '\n');
2000  Node->setNodeId(-1);
2001  return; // Already selected.
2002  }
2003 
2004  switch (Opcode) {
2005  default: break;
2006  case ISD::BRIND: {
2007  if (Subtarget->isTargetNaCl())
2008  // NaCl has its own pass where jmp %r32 are converted to jmp %r64. We
2009  // leave the instruction alone.
2010  break;
2011  if (Subtarget->isTarget64BitILP32()) {
2012  // Converts a 32-bit register to a 64-bit, zero-extended version of
2013  // it. This is needed because x86-64 can do many things, but jmp %r32
2014  // ain't one of them.
2015  const SDValue &Target = Node->getOperand(1);
2017  SDValue ZextTarget = CurDAG->getZExtOrTrunc(Target, dl, EVT(MVT::i64));
2018  SDValue Brind = CurDAG->getNode(ISD::BRIND, dl, MVT::Other,
2019  Node->getOperand(0), ZextTarget);
2020  ReplaceNode(Node, Brind.getNode());
2021  SelectCode(ZextTarget.getNode());
2022  SelectCode(Brind.getNode());
2023  return;
2024  }
2025  break;
2026  }
2027  case ISD::INTRINSIC_W_CHAIN: {
2028  unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue();
2029  switch (IntNo) {
2030  default: break;
2031  case Intrinsic::x86_avx2_gather_d_pd:
2032  case Intrinsic::x86_avx2_gather_d_pd_256:
2033  case Intrinsic::x86_avx2_gather_q_pd:
2034  case Intrinsic::x86_avx2_gather_q_pd_256:
2035  case Intrinsic::x86_avx2_gather_d_ps:
2036  case Intrinsic::x86_avx2_gather_d_ps_256:
2037  case Intrinsic::x86_avx2_gather_q_ps:
2038  case Intrinsic::x86_avx2_gather_q_ps_256:
2039  case Intrinsic::x86_avx2_gather_d_q:
2040  case Intrinsic::x86_avx2_gather_d_q_256:
2041  case Intrinsic::x86_avx2_gather_q_q:
2042  case Intrinsic::x86_avx2_gather_q_q_256:
2043  case Intrinsic::x86_avx2_gather_d_d:
2044  case Intrinsic::x86_avx2_gather_d_d_256:
2045  case Intrinsic::x86_avx2_gather_q_d:
2046  case Intrinsic::x86_avx2_gather_q_d_256: {
2047  if (!Subtarget->hasAVX2())
2048  break;
2049  unsigned Opc;
2050  switch (IntNo) {
2051  default: llvm_unreachable("Impossible intrinsic");
2052  case Intrinsic::x86_avx2_gather_d_pd: Opc = X86::VGATHERDPDrm; break;
2053  case Intrinsic::x86_avx2_gather_d_pd_256: Opc = X86::VGATHERDPDYrm; break;
2054  case Intrinsic::x86_avx2_gather_q_pd: Opc = X86::VGATHERQPDrm; break;
2055  case Intrinsic::x86_avx2_gather_q_pd_256: Opc = X86::VGATHERQPDYrm; break;
2056  case Intrinsic::x86_avx2_gather_d_ps: Opc = X86::VGATHERDPSrm; break;
2057  case Intrinsic::x86_avx2_gather_d_ps_256: Opc = X86::VGATHERDPSYrm; break;
2058  case Intrinsic::x86_avx2_gather_q_ps: Opc = X86::VGATHERQPSrm; break;
2059  case Intrinsic::x86_avx2_gather_q_ps_256: Opc = X86::VGATHERQPSYrm; break;
2060  case Intrinsic::x86_avx2_gather_d_q: Opc = X86::VPGATHERDQrm; break;
2061  case Intrinsic::x86_avx2_gather_d_q_256: Opc = X86::VPGATHERDQYrm; break;
2062  case Intrinsic::x86_avx2_gather_q_q: Opc = X86::VPGATHERQQrm; break;
2063  case Intrinsic::x86_avx2_gather_q_q_256: Opc = X86::VPGATHERQQYrm; break;
2064  case Intrinsic::x86_avx2_gather_d_d: Opc = X86::VPGATHERDDrm; break;
2065  case Intrinsic::x86_avx2_gather_d_d_256: Opc = X86::VPGATHERDDYrm; break;
2066  case Intrinsic::x86_avx2_gather_q_d: Opc = X86::VPGATHERQDrm; break;
2067  case Intrinsic::x86_avx2_gather_q_d_256: Opc = X86::VPGATHERQDYrm; break;
2068  }
2069  if (tryGather(Node, Opc))
2070  return;
2071  break;
2072  }
2073  }
2074  break;
2075  }
2076  case X86ISD::GlobalBaseReg:
2077  ReplaceNode(Node, getGlobalBaseReg());
2078  return;
2079 
2080  case X86ISD::SHRUNKBLEND: {
2081  // SHRUNKBLEND selects like a regular VSELECT.
2082  SDValue VSelect = CurDAG->getNode(
2083  ISD::VSELECT, SDLoc(Node), Node->getValueType(0), Node->getOperand(0),
2084  Node->getOperand(1), Node->getOperand(2));
2085  ReplaceUses(SDValue(Node, 0), VSelect);
2086  SelectCode(VSelect.getNode());
2087  // We already called ReplaceUses.
2088  return;
2089  }
2090 
2091  case ISD::AND:
2092  case ISD::OR:
2093  case ISD::XOR: {
2094  // For operations of the form (x << C1) op C2, check if we can use a smaller
2095  // encoding for C2 by transforming it into (x op (C2>>C1)) << C1.
2096  SDValue N0 = Node->getOperand(0);
2097  SDValue N1 = Node->getOperand(1);
2098 
2099  if (N0->getOpcode() != ISD::SHL || !N0->hasOneUse())
2100  break;
2101 
2102  // i8 is unshrinkable, i16 should be promoted to i32.
2103  if (NVT != MVT::i32 && NVT != MVT::i64)
2104  break;
2105 
2107  ConstantSDNode *ShlCst = dyn_cast<ConstantSDNode>(N0->getOperand(1));
2108  if (!Cst || !ShlCst)
2109  break;
2110 
2111  int64_t Val = Cst->getSExtValue();
2112  uint64_t ShlVal = ShlCst->getZExtValue();
2113 
2114  // Make sure that we don't change the operation by removing bits.
2115  // This only matters for OR and XOR, AND is unaffected.
2116  uint64_t RemovedBitsMask = (1ULL << ShlVal) - 1;
2117  if (Opcode != ISD::AND && (Val & RemovedBitsMask) != 0)
2118  break;
2119 
2120  unsigned ShlOp, AddOp, Op;
2121  MVT CstVT = NVT;
2122 
2123  // Check the minimum bitwidth for the new constant.
2124  // TODO: AND32ri is the same as AND64ri32 with zext imm.
2125  // TODO: MOV32ri+OR64r is cheaper than MOV64ri64+OR64rr
2126  // TODO: Using 16 and 8 bit operations is also possible for or32 & xor32.
2127  if (!isInt<8>(Val) && isInt<8>(Val >> ShlVal))
2128  CstVT = MVT::i8;
2129  else if (!isInt<32>(Val) && isInt<32>(Val >> ShlVal))
2130  CstVT = MVT::i32;
2131 
2132  // Bail if there is no smaller encoding.
2133  if (NVT == CstVT)
2134  break;
2135 
2136  switch (NVT.SimpleTy) {
2137  default: llvm_unreachable("Unsupported VT!");
2138  case MVT::i32:
2139  assert(CstVT == MVT::i8);
2140  ShlOp = X86::SHL32ri;
2141  AddOp = X86::ADD32rr;
2142 
2143  switch (Opcode) {
2144  default: llvm_unreachable("Impossible opcode");
2145  case ISD::AND: Op = X86::AND32ri8; break;
2146  case ISD::OR: Op = X86::OR32ri8; break;
2147  case ISD::XOR: Op = X86::XOR32ri8; break;
2148  }
2149  break;
2150  case MVT::i64:
2151  assert(CstVT == MVT::i8 || CstVT == MVT::i32);
2152  ShlOp = X86::SHL64ri;
2153  AddOp = X86::ADD64rr;
2154 
2155  switch (Opcode) {
2156  default: llvm_unreachable("Impossible opcode");
2157  case ISD::AND: Op = CstVT==MVT::i8? X86::AND64ri8 : X86::AND64ri32; break;
2158  case ISD::OR: Op = CstVT==MVT::i8? X86::OR64ri8 : X86::OR64ri32; break;
2159  case ISD::XOR: Op = CstVT==MVT::i8? X86::XOR64ri8 : X86::XOR64ri32; break;
2160  }
2161  break;
2162  }
2163 
2164  // Emit the smaller op and the shift.
2165  SDValue NewCst = CurDAG->getTargetConstant(Val >> ShlVal, dl, CstVT);
2166  SDNode *New = CurDAG->getMachineNode(Op, dl, NVT, N0->getOperand(0),NewCst);
2167  if (ShlVal == 1)
2168  CurDAG->SelectNodeTo(Node, AddOp, NVT, SDValue(New, 0),
2169  SDValue(New, 0));
2170  else
2171  CurDAG->SelectNodeTo(Node, ShlOp, NVT, SDValue(New, 0),
2172  getI8Imm(ShlVal, dl));
2173  return;
2174  }
2175  case X86ISD::UMUL8:
2176  case X86ISD::SMUL8: {
2177  SDValue N0 = Node->getOperand(0);
2178  SDValue N1 = Node->getOperand(1);
2179 
2180  Opc = (Opcode == X86ISD::SMUL8 ? X86::IMUL8r : X86::MUL8r);
2181 
2182  SDValue InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, X86::AL,
2183  N0, SDValue()).getValue(1);
2184 
2185  SDVTList VTs = CurDAG->getVTList(NVT, MVT::i32);
2186  SDValue Ops[] = {N1, InFlag};
2187  SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops);
2188 
2189  ReplaceNode(Node, CNode);
2190  return;
2191  }
2192 
2193  case X86ISD::UMUL: {
2194  SDValue N0 = Node->getOperand(0);
2195  SDValue N1 = Node->getOperand(1);
2196 
2197  unsigned LoReg;
2198  switch (NVT.SimpleTy) {
2199  default: llvm_unreachable("Unsupported VT!");
2200  case MVT::i8: LoReg = X86::AL; Opc = X86::MUL8r; break;
2201  case MVT::i16: LoReg = X86::AX; Opc = X86::MUL16r; break;
2202  case MVT::i32: LoReg = X86::EAX; Opc = X86::MUL32r; break;
2203  case MVT::i64: LoReg = X86::RAX; Opc = X86::MUL64r; break;
2204  }
2205 
2206  SDValue InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, LoReg,
2207  N0, SDValue()).getValue(1);
2208 
2209  SDVTList VTs = CurDAG->getVTList(NVT, NVT, MVT::i32);
2210  SDValue Ops[] = {N1, InFlag};
2211  SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops);
2212 
2213  ReplaceNode(Node, CNode);
2214  return;
2215  }
2216 
2217  case ISD::SMUL_LOHI:
2218  case ISD::UMUL_LOHI: {
2219  SDValue N0 = Node->getOperand(0);
2220  SDValue N1 = Node->getOperand(1);
2221 
2222  bool isSigned = Opcode == ISD::SMUL_LOHI;
2223  bool hasBMI2 = Subtarget->hasBMI2();
2224  if (!isSigned) {
2225  switch (NVT.SimpleTy) {
2226  default: llvm_unreachable("Unsupported VT!");
2227  case MVT::i8: Opc = X86::MUL8r; MOpc = X86::MUL8m; break;
2228  case MVT::i16: Opc = X86::MUL16r; MOpc = X86::MUL16m; break;
2229  case MVT::i32: Opc = hasBMI2 ? X86::MULX32rr : X86::MUL32r;
2230  MOpc = hasBMI2 ? X86::MULX32rm : X86::MUL32m; break;
2231  case MVT::i64: Opc = hasBMI2 ? X86::MULX64rr : X86::MUL64r;
2232  MOpc = hasBMI2 ? X86::MULX64rm : X86::MUL64m; break;
2233  }
2234  } else {
2235  switch (NVT.SimpleTy) {
2236  default: llvm_unreachable("Unsupported VT!");
2237  case MVT::i8: Opc = X86::IMUL8r; MOpc = X86::IMUL8m; break;
2238  case MVT::i16: Opc = X86::IMUL16r; MOpc = X86::IMUL16m; break;
2239  case MVT::i32: Opc = X86::IMUL32r; MOpc = X86::IMUL32m; break;
2240  case MVT::i64: Opc = X86::IMUL64r; MOpc = X86::IMUL64m; break;
2241  }
2242  }
2243 
2244  unsigned SrcReg, LoReg, HiReg;
2245  switch (Opc) {
2246  default: llvm_unreachable("Unknown MUL opcode!");
2247  case X86::IMUL8r:
2248  case X86::MUL8r:
2249  SrcReg = LoReg = X86::AL; HiReg = X86::AH;
2250  break;
2251  case X86::IMUL16r:
2252  case X86::MUL16r:
2253  SrcReg = LoReg = X86::AX; HiReg = X86::DX;
2254  break;
2255  case X86::IMUL32r:
2256  case X86::MUL32r:
2257  SrcReg = LoReg = X86::EAX; HiReg = X86::EDX;
2258  break;
2259  case X86::IMUL64r:
2260  case X86::MUL64r:
2261  SrcReg = LoReg = X86::RAX; HiReg = X86::RDX;
2262  break;
2263  case X86::MULX32rr:
2264  SrcReg = X86::EDX; LoReg = HiReg = 0;
2265  break;
2266  case X86::MULX64rr:
2267  SrcReg = X86::RDX; LoReg = HiReg = 0;
2268  break;
2269  }
2270 
2271  SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
2272  bool foldedLoad = tryFoldLoad(Node, N1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4);
2273  // Multiply is commmutative.
2274  if (!foldedLoad) {
2275  foldedLoad = tryFoldLoad(Node, N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4);
2276  if (foldedLoad)
2277  std::swap(N0, N1);
2278  }
2279 
2280  SDValue InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, SrcReg,
2281  N0, SDValue()).getValue(1);
2282  SDValue ResHi, ResLo;
2283 
2284  if (foldedLoad) {
2285  SDValue Chain;
2286  MachineSDNode *CNode = nullptr;
2287  SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N1.getOperand(0),
2288  InFlag };
2289  if (MOpc == X86::MULX32rm || MOpc == X86::MULX64rm) {
2290  SDVTList VTs = CurDAG->getVTList(NVT, NVT, MVT::Other, MVT::Glue);
2291  CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops);
2292  ResHi = SDValue(CNode, 0);
2293  ResLo = SDValue(CNode, 1);
2294  Chain = SDValue(CNode, 2);
2295  InFlag = SDValue(CNode, 3);
2296  } else {
2297  SDVTList VTs = CurDAG->getVTList(MVT::Other, MVT::Glue);
2298  CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops);
2299  Chain = SDValue(CNode, 0);
2300  InFlag = SDValue(CNode, 1);
2301  }
2302 
2303  // Update the chain.
2304  ReplaceUses(N1.getValue(1), Chain);
2305  // Record the mem-refs
2306  LoadSDNode *LoadNode = cast<LoadSDNode>(N1);
2307  if (LoadNode) {
2308  MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
2309  MemOp[0] = LoadNode->getMemOperand();
2310  CNode->setMemRefs(MemOp, MemOp + 1);
2311  }
2312  } else {
2313  SDValue Ops[] = { N1, InFlag };
2314  if (Opc == X86::MULX32rr || Opc == X86::MULX64rr) {
2315  SDVTList VTs = CurDAG->getVTList(NVT, NVT, MVT::Glue);
2316  SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops);
2317  ResHi = SDValue(CNode, 0);
2318  ResLo = SDValue(CNode, 1);
2319  InFlag = SDValue(CNode, 2);
2320  } else {
2321  SDVTList VTs = CurDAG->getVTList(MVT::Glue);
2322  SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops);
2323  InFlag = SDValue(CNode, 0);
2324  }
2325  }
2326 
2327  // Prevent use of AH in a REX instruction by referencing AX instead.
2328  if (HiReg == X86::AH && Subtarget->is64Bit() &&
2329  !SDValue(Node, 1).use_empty()) {
2330  SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
2331  X86::AX, MVT::i16, InFlag);
2332  InFlag = Result.getValue(2);
2333  // Get the low part if needed. Don't use getCopyFromReg for aliasing
2334  // registers.
2335  if (!SDValue(Node, 0).use_empty())
2336  ReplaceUses(SDValue(Node, 1),
2337  CurDAG->getTargetExtractSubreg(X86::sub_8bit, dl, MVT::i8, Result));
2338 
2339  // Shift AX down 8 bits.
2340  Result = SDValue(CurDAG->getMachineNode(X86::SHR16ri, dl, MVT::i16,
2341  Result,
2342  CurDAG->getTargetConstant(8, dl, MVT::i8)),
2343  0);
2344  // Then truncate it down to i8.
2345  ReplaceUses(SDValue(Node, 1),
2346  CurDAG->getTargetExtractSubreg(X86::sub_8bit, dl, MVT::i8, Result));
2347  }
2348  // Copy the low half of the result, if it is needed.
2349  if (!SDValue(Node, 0).use_empty()) {
2350  if (!ResLo.getNode()) {
2351  assert(LoReg && "Register for low half is not defined!");
2352  ResLo = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, LoReg, NVT,
2353  InFlag);
2354  InFlag = ResLo.getValue(2);
2355  }
2356  ReplaceUses(SDValue(Node, 0), ResLo);
2357  DEBUG(dbgs() << "=> "; ResLo.getNode()->dump(CurDAG); dbgs() << '\n');
2358  }
2359  // Copy the high half of the result, if it is needed.
2360  if (!SDValue(Node, 1).use_empty()) {
2361  if (!ResHi.getNode()) {
2362  assert(HiReg && "Register for high half is not defined!");
2363  ResHi = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, HiReg, NVT,
2364  InFlag);
2365  InFlag = ResHi.getValue(2);
2366  }
2367  ReplaceUses(SDValue(Node, 1), ResHi);
2368  DEBUG(dbgs() << "=> "; ResHi.getNode()->dump(CurDAG); dbgs() << '\n');
2369  }
2370 
2371  return;
2372  }
2373 
2374  case ISD::SDIVREM:
2375  case ISD::UDIVREM:
2378  SDValue N0 = Node->getOperand(0);
2379  SDValue N1 = Node->getOperand(1);
2380 
2381  bool isSigned = (Opcode == ISD::SDIVREM ||
2382  Opcode == X86ISD::SDIVREM8_SEXT_HREG);
2383  if (!isSigned) {
2384  switch (NVT.SimpleTy) {
2385  default: llvm_unreachable("Unsupported VT!");
2386  case MVT::i8: Opc = X86::DIV8r; MOpc = X86::DIV8m; break;
2387  case MVT::i16: Opc = X86::DIV16r; MOpc = X86::DIV16m; break;
2388  case MVT::i32: Opc = X86::DIV32r; MOpc = X86::DIV32m; break;
2389  case MVT::i64: Opc = X86::DIV64r; MOpc = X86::DIV64m; break;
2390  }
2391  } else {
2392  switch (NVT.SimpleTy) {
2393  default: llvm_unreachable("Unsupported VT!");
2394  case MVT::i8: Opc = X86::IDIV8r; MOpc = X86::IDIV8m; break;
2395  case MVT::i16: Opc = X86::IDIV16r; MOpc = X86::IDIV16m; break;
2396  case MVT::i32: Opc = X86::IDIV32r; MOpc = X86::IDIV32m; break;
2397  case MVT::i64: Opc = X86::IDIV64r; MOpc = X86::IDIV64m; break;
2398  }
2399  }
2400 
2401  unsigned LoReg, HiReg, ClrReg;
2402  unsigned SExtOpcode;
2403  switch (NVT.SimpleTy) {
2404  default: llvm_unreachable("Unsupported VT!");
2405  case MVT::i8:
2406  LoReg = X86::AL; ClrReg = HiReg = X86::AH;
2407  SExtOpcode = X86::CBW;
2408  break;
2409  case MVT::i16:
2410  LoReg = X86::AX; HiReg = X86::DX;
2411  ClrReg = X86::DX;
2412  SExtOpcode = X86::CWD;
2413  break;
2414  case MVT::i32:
2415  LoReg = X86::EAX; ClrReg = HiReg = X86::EDX;
2416  SExtOpcode = X86::CDQ;
2417  break;
2418  case MVT::i64:
2419  LoReg = X86::RAX; ClrReg = HiReg = X86::RDX;
2420  SExtOpcode = X86::CQO;
2421  break;
2422  }
2423 
2424  SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
2425  bool foldedLoad = tryFoldLoad(Node, N1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4);
2426  bool signBitIsZero = CurDAG->SignBitIsZero(N0);
2427 
2428  SDValue InFlag;
2429  if (NVT == MVT::i8 && (!isSigned || signBitIsZero)) {
2430  // Special case for div8, just use a move with zero extension to AX to
2431  // clear the upper 8 bits (AH).
2432  SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Move, Chain;
2433  if (tryFoldLoad(Node, N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) {
2434  SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N0.getOperand(0) };
2435  Move =
2436  SDValue(CurDAG->getMachineNode(X86::MOVZX32rm8, dl, MVT::i32,
2437  MVT::Other, Ops), 0);
2438  Chain = Move.getValue(1);
2439  ReplaceUses(N0.getValue(1), Chain);
2440  } else {
2441  Move =
2442  SDValue(CurDAG->getMachineNode(X86::MOVZX32rr8, dl, MVT::i32, N0),0);
2443  Chain = CurDAG->getEntryNode();
2444  }
2445  Chain = CurDAG->getCopyToReg(Chain, dl, X86::EAX, Move, SDValue());
2446  InFlag = Chain.getValue(1);
2447  } else {
2448  InFlag =
2449  CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl,
2450  LoReg, N0, SDValue()).getValue(1);
2451  if (isSigned && !signBitIsZero) {
2452  // Sign extend the low part into the high part.
2453  InFlag =
2454  SDValue(CurDAG->getMachineNode(SExtOpcode, dl, MVT::Glue, InFlag),0);
2455  } else {
2456  // Zero out the high part, effectively zero extending the input.
2457  SDValue ClrNode = SDValue(CurDAG->getMachineNode(X86::MOV32r0, dl, NVT), 0);
2458  switch (NVT.SimpleTy) {
2459  case MVT::i16:
2460  ClrNode =
2461  SDValue(CurDAG->getMachineNode(
2462  TargetOpcode::EXTRACT_SUBREG, dl, MVT::i16, ClrNode,
2463  CurDAG->getTargetConstant(X86::sub_16bit, dl,
2464  MVT::i32)),
2465  0);
2466  break;
2467  case MVT::i32:
2468  break;
2469  case MVT::i64:
2470  ClrNode =
2471  SDValue(CurDAG->getMachineNode(
2472  TargetOpcode::SUBREG_TO_REG, dl, MVT::i64,
2473  CurDAG->getTargetConstant(0, dl, MVT::i64), ClrNode,
2474  CurDAG->getTargetConstant(X86::sub_32bit, dl,
2475  MVT::i32)),
2476  0);
2477  break;
2478  default:
2479  llvm_unreachable("Unexpected division source");
2480  }
2481 
2482  InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, ClrReg,
2483  ClrNode, InFlag).getValue(1);
2484  }
2485  }
2486 
2487  if (foldedLoad) {
2488  SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N1.getOperand(0),
2489  InFlag };
2490  SDNode *CNode =
2491  CurDAG->getMachineNode(MOpc, dl, MVT::Other, MVT::Glue, Ops);
2492  InFlag = SDValue(CNode, 1);
2493  // Update the chain.
2494  ReplaceUses(N1.getValue(1), SDValue(CNode, 0));
2495  } else {
2496  InFlag =
2497  SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Glue, N1, InFlag), 0);
2498  }
2499 
2500  // Prevent use of AH in a REX instruction by explicitly copying it to
2501  // an ABCD_L register.
2502  //
2503  // The current assumption of the register allocator is that isel
2504  // won't generate explicit references to the GR8_ABCD_H registers. If
2505  // the allocator and/or the backend get enhanced to be more robust in
2506  // that regard, this can be, and should be, removed.
2507  if (HiReg == X86::AH && !SDValue(Node, 1).use_empty()) {
2508  SDValue AHCopy = CurDAG->getRegister(X86::AH, MVT::i8);
2509  unsigned AHExtOpcode =
2510  isSigned ? X86::MOVSX32_NOREXrr8 : X86::MOVZX32_NOREXrr8;
2511 
2512  SDNode *RNode = CurDAG->getMachineNode(AHExtOpcode, dl, MVT::i32,
2513  MVT::Glue, AHCopy, InFlag);
2514  SDValue Result(RNode, 0);
2515  InFlag = SDValue(RNode, 1);
2516 
2517  if (Opcode == X86ISD::UDIVREM8_ZEXT_HREG ||
2518  Opcode == X86ISD::SDIVREM8_SEXT_HREG) {
2519  if (Node->getValueType(1) == MVT::i64) {
2520  // It's not possible to directly movsx AH to a 64bit register, because
2521  // the latter needs the REX prefix, but the former can't have it.
2522  assert(Opcode != X86ISD::SDIVREM8_SEXT_HREG &&
2523  "Unexpected i64 sext of h-register");
2524  Result =
2525  SDValue(CurDAG->getMachineNode(
2526  TargetOpcode::SUBREG_TO_REG, dl, MVT::i64,
2527  CurDAG->getTargetConstant(0, dl, MVT::i64), Result,
2528  CurDAG->getTargetConstant(X86::sub_32bit, dl,
2529  MVT::i32)),
2530  0);
2531  }
2532  } else {
2533  Result =
2534  CurDAG->getTargetExtractSubreg(X86::sub_8bit, dl, MVT::i8, Result);
2535  }
2536  ReplaceUses(SDValue(Node, 1), Result);
2537  DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG); dbgs() << '\n');
2538  }
2539  // Copy the division (low) result, if it is needed.
2540  if (!SDValue(Node, 0).use_empty()) {
2541  SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
2542  LoReg, NVT, InFlag);
2543  InFlag = Result.getValue(2);
2544  ReplaceUses(SDValue(Node, 0), Result);
2545  DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG); dbgs() << '\n');
2546  }
2547  // Copy the remainder (high) result, if it is needed.
2548  if (!SDValue(Node, 1).use_empty()) {
2549  SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
2550  HiReg, NVT, InFlag);
2551  InFlag = Result.getValue(2);
2552  ReplaceUses(SDValue(Node, 1), Result);
2553  DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG); dbgs() << '\n');
2554  }
2555  return;
2556  }
2557 
2558  case X86ISD::CMP:
2559  case X86ISD::SUB: {
2560  // Sometimes a SUB is used to perform comparison.
2561  if (Opcode == X86ISD::SUB && Node->hasAnyUseOfValue(0))
2562  // This node is not a CMP.
2563  break;
2564  SDValue N0 = Node->getOperand(0);
2565  SDValue N1 = Node->getOperand(1);
2566 
2567  if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() &&
2569  N0 = N0.getOperand(0);
2570 
2571  // Look for (X86cmp (and $op, $imm), 0) and see if we can convert it to
2572  // use a smaller encoding.
2573  // Look past the truncate if CMP is the only use of it.
2574  if ((N0.getNode()->getOpcode() == ISD::AND ||
2575  (N0.getResNo() == 0 && N0.getNode()->getOpcode() == X86ISD::AND)) &&
2576  N0.getNode()->hasOneUse() &&
2577  N0.getValueType() != MVT::i8 &&
2578  X86::isZeroNode(N1)) {
2580  if (!C) break;
2581 
2582  // For example, convert "testl %eax, $8" to "testb %al, $8"
2583  if ((C->getZExtValue() & ~UINT64_C(0xff)) == 0 &&
2584  (!(C->getZExtValue() & 0x80) ||
2585  hasNoSignedComparisonUses(Node))) {
2586  SDValue Imm = CurDAG->getTargetConstant(C->getZExtValue(), dl, MVT::i8);
2587  SDValue Reg = N0.getNode()->getOperand(0);
2588 
2589  // On x86-32, only the ABCD registers have 8-bit subregisters.
2590  if (!Subtarget->is64Bit()) {
2591  const TargetRegisterClass *TRC;
2592  switch (N0.getSimpleValueType().SimpleTy) {
2593  case MVT::i32: TRC = &X86::GR32_ABCDRegClass; break;
2594  case MVT::i16: TRC = &X86::GR16_ABCDRegClass; break;
2595  default: llvm_unreachable("Unsupported TEST operand type!");
2596  }
2597  SDValue RC = CurDAG->getTargetConstant(TRC->getID(), dl, MVT::i32);
2598  Reg = SDValue(CurDAG->getMachineNode(X86::COPY_TO_REGCLASS, dl,
2599  Reg.getValueType(), Reg, RC), 0);
2600  }
2601 
2602  // Extract the l-register.
2603  SDValue Subreg = CurDAG->getTargetExtractSubreg(X86::sub_8bit, dl,
2604  MVT::i8, Reg);
2605 
2606  // Emit a testb.
2607  SDNode *NewNode = CurDAG->getMachineNode(X86::TEST8ri, dl, MVT::i32,
2608  Subreg, Imm);
2609  // Replace SUB|CMP with TEST, since SUB has two outputs while TEST has
2610  // one, do not call ReplaceAllUsesWith.
2611  ReplaceUses(SDValue(Node, (Opcode == X86ISD::SUB ? 1 : 0)),
2612  SDValue(NewNode, 0));
2613  return;
2614  }
2615 
2616  // For example, "testl %eax, $2048" to "testb %ah, $8".
2617  if ((C->getZExtValue() & ~UINT64_C(0xff00)) == 0 &&
2618  (!(C->getZExtValue() & 0x8000) ||
2619  hasNoSignedComparisonUses(Node))) {
2620  // Shift the immediate right by 8 bits.
2621  SDValue ShiftedImm = CurDAG->getTargetConstant(C->getZExtValue() >> 8,
2622  dl, MVT::i8);
2623  SDValue Reg = N0.getNode()->getOperand(0);
2624 
2625  // Put the value in an ABCD register.
2626  const TargetRegisterClass *TRC;
2627  switch (N0.getSimpleValueType().SimpleTy) {
2628  case MVT::i64: TRC = &X86::GR64_ABCDRegClass; break;
2629  case MVT::i32: TRC = &X86::GR32_ABCDRegClass; break;
2630  case MVT::i16: TRC = &X86::GR16_ABCDRegClass; break;
2631  default: llvm_unreachable("Unsupported TEST operand type!");
2632  }
2633  SDValue RC = CurDAG->getTargetConstant(TRC->getID(), dl, MVT::i32);
2634  Reg = SDValue(CurDAG->getMachineNode(X86::COPY_TO_REGCLASS, dl,
2635  Reg.getValueType(), Reg, RC), 0);
2636 
2637  // Extract the h-register.
2638  SDValue Subreg = CurDAG->getTargetExtractSubreg(X86::sub_8bit_hi, dl,
2639  MVT::i8, Reg);
2640 
2641  // Emit a testb. The EXTRACT_SUBREG becomes a COPY that can only
2642  // target GR8_NOREX registers, so make sure the register class is
2643  // forced.
2644  SDNode *NewNode = CurDAG->getMachineNode(X86::TEST8ri_NOREX, dl,
2645  MVT::i32, Subreg, ShiftedImm);
2646  // Replace SUB|CMP with TEST, since SUB has two outputs while TEST has
2647  // one, do not call ReplaceAllUsesWith.
2648  ReplaceUses(SDValue(Node, (Opcode == X86ISD::SUB ? 1 : 0)),
2649  SDValue(NewNode, 0));
2650  return;
2651  }
2652 
2653  // For example, "testl %eax, $32776" to "testw %ax, $32776".
2654  if ((C->getZExtValue() & ~UINT64_C(0xffff)) == 0 &&
2655  N0.getValueType() != MVT::i16 &&
2656  (!(C->getZExtValue() & 0x8000) ||
2657  hasNoSignedComparisonUses(Node))) {
2658  SDValue Imm = CurDAG->getTargetConstant(C->getZExtValue(), dl,
2659  MVT::i16);
2660  SDValue Reg = N0.getNode()->getOperand(0);
2661 
2662  // Extract the 16-bit subregister.
2663  SDValue Subreg = CurDAG->getTargetExtractSubreg(X86::sub_16bit, dl,
2664  MVT::i16, Reg);
2665 
2666  // Emit a testw.
2667  SDNode *NewNode = CurDAG->getMachineNode(X86::TEST16ri, dl, MVT::i32,
2668  Subreg, Imm);
2669  // Replace SUB|CMP with TEST, since SUB has two outputs while TEST has
2670  // one, do not call ReplaceAllUsesWith.
2671  ReplaceUses(SDValue(Node, (Opcode == X86ISD::SUB ? 1 : 0)),
2672  SDValue(NewNode, 0));
2673  return;
2674  }
2675 
2676  // For example, "testq %rax, $268468232" to "testl %eax, $268468232".
2677  if ((C->getZExtValue() & ~UINT64_C(0xffffffff)) == 0 &&
2678  N0.getValueType() == MVT::i64 &&
2679  (!(C->getZExtValue() & 0x80000000) ||
2680  hasNoSignedComparisonUses(Node))) {
2681  SDValue Imm = CurDAG->getTargetConstant(C->getZExtValue(), dl,
2682  MVT::i32);
2683  SDValue Reg = N0.getNode()->getOperand(0);
2684 
2685  // Extract the 32-bit subregister.
2686  SDValue Subreg = CurDAG->getTargetExtractSubreg(X86::sub_32bit, dl,
2687  MVT::i32, Reg);
2688 
2689  // Emit a testl.
2690  SDNode *NewNode = CurDAG->getMachineNode(X86::TEST32ri, dl, MVT::i32,
2691  Subreg, Imm);
2692  // Replace SUB|CMP with TEST, since SUB has two outputs while TEST has
2693  // one, do not call ReplaceAllUsesWith.
2694  ReplaceUses(SDValue(Node, (Opcode == X86ISD::SUB ? 1 : 0)),
2695  SDValue(NewNode, 0));
2696  return;
2697  }
2698  }
2699  break;
2700  }
2701  case ISD::STORE: {
2702  // Change a chain of {load; incr or dec; store} of the same value into
2703  // a simple increment or decrement through memory of that value, if the
2704  // uses of the modified value and its address are suitable.
2705  // The DEC64m tablegen pattern is currently not able to match the case where
2706  // the EFLAGS on the original DEC are used. (This also applies to
2707  // {INC,DEC}X{64,32,16,8}.)
2708  // We'll need to improve tablegen to allow flags to be transferred from a
2709  // node in the pattern to the result node. probably with a new keyword
2710  // for example, we have this
2711  // def DEC64m : RI<0xFF, MRM1m, (outs), (ins i64mem:$dst), "dec{q}\t$dst",
2712  // [(store (add (loadi64 addr:$dst), -1), addr:$dst),
2713  // (implicit EFLAGS)]>;
2714  // but maybe need something like this
2715  // def DEC64m : RI<0xFF, MRM1m, (outs), (ins i64mem:$dst), "dec{q}\t$dst",
2716  // [(store (add (loadi64 addr:$dst), -1), addr:$dst),
2717  // (transferrable EFLAGS)]>;
2718 
2719  StoreSDNode *StoreNode = cast<StoreSDNode>(Node);
2720  SDValue StoredVal = StoreNode->getOperand(1);
2721  unsigned Opc = StoredVal->getOpcode();
2722 
2723  LoadSDNode *LoadNode = nullptr;
2724  SDValue InputChain;
2725  if (!isLoadIncOrDecStore(StoreNode, Opc, StoredVal, CurDAG,
2726  LoadNode, InputChain))
2727  break;
2728 
2729  SDValue Base, Scale, Index, Disp, Segment;
2730  if (!selectAddr(LoadNode, LoadNode->getBasePtr(),
2731  Base, Scale, Index, Disp, Segment))
2732  break;
2733 
2734  MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(2);
2735  MemOp[0] = StoreNode->getMemOperand();
2736  MemOp[1] = LoadNode->getMemOperand();
2737  const SDValue Ops[] = { Base, Scale, Index, Disp, Segment, InputChain };
2738  EVT LdVT = LoadNode->getMemoryVT();
2739  unsigned newOpc = getFusedLdStOpcode(LdVT, Opc);
2740  MachineSDNode *Result = CurDAG->getMachineNode(newOpc,
2741  SDLoc(Node),
2742  MVT::i32, MVT::Other, Ops);
2743  Result->setMemRefs(MemOp, MemOp + 2);
2744 
2745  ReplaceUses(SDValue(StoreNode, 0), SDValue(Result, 1));
2746  ReplaceUses(SDValue(StoredVal.getNode(), 1), SDValue(Result, 0));
2747  CurDAG->RemoveDeadNode(Node);
2748  return;
2749  }
2750  }
2751 
2752  SelectCode(Node);
2753 }
2754 
2755 bool X86DAGToDAGISel::
2756 SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
2757  std::vector<SDValue> &OutOps) {
2758  SDValue Op0, Op1, Op2, Op3, Op4;
2759  switch (ConstraintID) {
2760  default:
2761  llvm_unreachable("Unexpected asm memory constraint");
2763  // FIXME: It seems strange that 'i' is needed here since it's supposed to
2764  // be an immediate and not a memory constraint.
2766  case InlineAsm::Constraint_o: // offsetable ??
2767  case InlineAsm::Constraint_v: // not offsetable ??
2768  case InlineAsm::Constraint_m: // memory
2770  if (!selectAddr(nullptr, Op, Op0, Op1, Op2, Op3, Op4))
2771  return true;
2772  break;
2773  }
2774 
2775  OutOps.push_back(Op0);
2776  OutOps.push_back(Op1);
2777  OutOps.push_back(Op2);
2778  OutOps.push_back(Op3);
2779  OutOps.push_back(Op4);
2780  return false;
2781 }
2782 
2783 /// This pass converts a legalized DAG into a X86-specific DAG,
2784 /// ready for instruction scheduling.
2786  CodeGenOpt::Level OptLevel) {
2787  return new X86DAGToDAGISel(TM, OptLevel);
2788 }
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition: ISDOpcodes.h:467
static bool isCalleeLoad(SDValue Callee, SDValue &Chain, bool HasCallSeq)
Return true if call address is a load and it can be moved below CALLSEQ_START and the chains leading ...
SDValue getValue(unsigned R) const
const SDValue & getValue() const
STATISTIC(NumFunctions,"Total number of functions")
size_t i
static void insertDAGNode(SelectionDAG &DAG, SDValue Pos, SDValue N)
void setMemRefs(mmo_iterator NewMemRefs, mmo_iterator NewMemRefsEnd)
Assign this MachineSDNodes's memory reference descriptor list.
bool hasOneUse() const
Return true if there is exactly one use of this node.
bool hasNUsesOfValue(unsigned NUses, unsigned Value) const
Return true if there are exactly NUSES uses of the indicated value.
Tail call return.
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
Definition: MCSymbol.h:39
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
unsigned getReg() const
Carry-setting nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:219
void computeKnownBits(SDValue Op, APInt &KnownZero, APInt &KnownOne, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in the KnownZero/KnownO...
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
unsigned getID() const
Return the register class ID number.
constexpr bool isInt< 8 >(int64_t x)
Definition: MathExtras.h:268
unsigned getSizeInBits() const
TargetGlobalAddress - Like GlobalAddress, but the DAG does no folding or anything else with this node...
Definition: ISDOpcodes.h:131
unsigned getNumOperands() const
Return the number of values used by this operation.
unsigned getNumOperands() const
const SDValue & getOperand(unsigned Num) const
void setNodeId(int Id)
Set unique node id.
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
const SDValue & getBasePtr() const
std::size_t countLeadingZeros(T Val, ZeroBehavior ZB=ZB_Width)
Count number of 0's from the most significant bit to the least stopping at the first 1...
Definition: MathExtras.h:180
unsigned getResNo() const
get the index which selects a specific result in the SDNode
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition: ISDOpcodes.h:159
GlobalBaseReg - On Darwin, this node represents the result of the mflr at function entry...
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition: ISDOpcodes.h:209
X86 compare and logical compare instructions.
The address of a basic block.
Definition: Constants.h:822
A description of a memory reference used in the backend.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Shift and rotation operations.
Definition: ISDOpcodes.h:344
bool isNormalStore(const SDNode *N)
Returns true if the specified node is a non-truncating and unindexed store.
std::size_t countTrailingOnes(T Value, ZeroBehavior ZB=ZB_Width)
Count the number of ones from the least significant bit to the first zero bit.
Definition: MathExtras.h:452
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
CopyToReg - This node has three operands: a chain, a register number to set to this value...
Definition: ISDOpcodes.h:170
Reg
All possible values of the reg field in the ModR/M byte.
SimpleValueType SimpleTy
CALLSEQ_START/CALLSEQ_END - These operators mark the beginning and end of a call sequence, and carry arbitrary information that target might want to know.
Definition: ISDOpcodes.h:622
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
bool runOnMachineFunction(MachineFunction &MF) override
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
std::pair< SDValue, SDValue > LowerCallTo(CallLoweringInfo &CLI) const
This function lowers an abstract call to a function into an actual call.
static bool isLoadIncOrDecStore(StoreSDNode *StoreNode, unsigned Opc, SDValue StoredVal, SelectionDAG *CurDAG, LoadSDNode *&LoadNode, SDValue &InputChain)
Check whether or not the chain ending in StoreNode is suitable for doing the {load; increment or decr...
#define T
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition: ISDOpcodes.h:363
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:200
Function Alias Analysis false
const SDValue & getBasePtr() const
EVT getMemoryVT() const
Return the type of the in-memory value.
bool ult(const APInt &RHS) const
Unsigned less than comparison.
Definition: APInt.cpp:501
This class is used to represent ISD::STORE nodes.
static GCRegistry::Add< CoreCLRGC > E("coreclr","CoreCLR-compatible GC")
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Get a value with high bits set.
Definition: APInt.h:518
SDNode * getNode() const
get the SDNode which holds the desired result
#define P(N)
bool isNormalLoad(const SDNode *N)
Returns true if the specified node is a non-extending and unindexed load.
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition: ISDOpcodes.h:166
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
std::size_t countTrailingZeros(T Val, ZeroBehavior ZB=ZB_Width)
Count number of 0's from the least significant bit to the most stopping at the first 1...
Definition: MathExtras.h:111
LOCAL_RECOVER - Represents the llvm.localrecover intrinsic.
Definition: ISDOpcodes.h:81
MVT - Machine Value Type.
const SDValue & getOperand(unsigned i) const
static bool foldMaskedShiftToScaledMask(SelectionDAG &DAG, SDValue N, uint64_t Mask, SDValue Shift, SDValue X, X86ISelAddressMode &AM)
bool isNonTemporal() const
C - The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
This is an important base class in LLVM.
Definition: Constant.h:42
bool isVector() const
isVector - Return true if this is a vector value type.
Carry-using nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:228
This is a base class used to represent MGATHER and MSCATTER nodes.
unsigned getScalarValueSizeInBits() const
static Type * getVoidTy(LLVMContext &C)
Definition: Type.cpp:154
This class provides iterator support for SDUse operands that use a specific SDNode.
uint32_t Offset
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang","erlang-compatible garbage collector")
void RepositionNode(allnodes_iterator Position, SDNode *N)
Move node N in the AllNodes list to be immediately before the given iterator Position.
unsigned getOpcode() const
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:298
const SDValue & getBasePtr() const
On Darwin, this node represents the result of the popl at function entry, used for PIC code...
self_iterator getIterator()
Definition: ilist_node.h:81
use_iterator use_begin() const
Provide iteration support to walk over all uses of an SDNode.
bool isVolatile() const
static bool foldMaskAndShiftToExtract(SelectionDAG &DAG, SDValue N, uint64_t Mask, SDValue Shift, SDValue X, X86ISelAddressMode &AM)
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:485
void append(in_iter in_start, in_iter in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:392
EVT - Extended Value Type.
Definition: ValueTypes.h:31
std::vector< ArgListEntry > ArgListTy
This structure contains all information that is necessary for lowering calls.
ISD::MemIndexedMode getAddressingMode() const
Return the addressing mode for this load or store: unindexed, pre-inc, pre-dec, post-inc, or post-dec.
This class contains a discriminated union of information about pointers in memory operands...
uint64_t getConstantOperandVal(unsigned Num) const
Helper method returns the integer value of a ConstantSDNode operand.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
const MachinePointerInfo & getPointerInfo() const
const SDValue & getOffset() const
These operations represent an abstract X86 call instruction, which includes a bunch of information...
TokenFactor - This node takes multiple tokens as input and produces a single token result...
Definition: ISDOpcodes.h:50
Iterator for intrusive lists based on ilist_node.
constexpr bool isInt< 32 >(int64_t x)
Definition: MathExtras.h:274
void dump() const
Dump this node, for debugging.
SDNode * UpdateNodeOperands(SDNode *N, SDValue Op)
Mutate the specified node in-place to have the specified operands.
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:166
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:843
const DataFlowGraph & G
Definition: RDFGraph.cpp:206
An SDNode that represents everything that will be needed to construct a MachineInstr.
const SDValue & getChain() const
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
unsigned getAddrSpace() const
Return the LLVM IR address space number that this pointer points into.
Represents one node in the SelectionDAG.
bool use_empty() const
Return true if there are no nodes using value ResNo of Node.
SelectionDAGISel - This is the common base class used for SelectionDAG-based pattern-matching instruc...
static GCRegistry::Add< ShadowStackGC > C("shadow-stack","Very portable GC for uncooperative code generators")
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:586
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
static bool foldMaskAndShiftToScale(SelectionDAG &DAG, SDValue N, uint64_t Mask, SDValue Shift, SDValue X, X86ISelAddressMode &AM)
Target - Wrapper for Target specific information.
Class for arbitrary precision integers.
Definition: APInt.h:77
static unsigned getReg(const void *D, unsigned RC, unsigned RegNo)
int64_t getSExtValue() const
op_iterator op_begin() const
static use_iterator use_end()
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:400
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:403
APInt And(const APInt &LHS, const APInt &RHS)
Bitwise AND function for APInt.
Definition: APInt.h:1942
static bool isDispSafeForFrameIndex(int64_t Val)
uint64_t getConstantOperandVal(unsigned i) const
static unsigned getFusedLdStOpcode(EVT &LdVT, unsigned Opc)
Get the appropriate X86 opcode for an in-memory increment or decrement.
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:333
bool hasAnyUseOfValue(unsigned Value) const
Return true if there are any use of the indicated value.
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition: ISDOpcodes.h:205
A wrapper node for TargetConstantPool, TargetJumpTable, TargetExternalSymbol, TargetGlobalAddress, TargetGlobalTLSAddress, MCSymbol and TargetBlockAddress.
FunctionPass * createX86ISelDag(X86TargetMachine &TM, CodeGenOpt::Level OptLevel)
This pass converts a legalized DAG into a X86-specific DAG, ready for instruction scheduling...
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:536
unsigned getSizeInBits() const
getSizeInBits - Return the size of the specified value type in bits.
Definition: ValueTypes.h:256
void ReplaceAllUsesWith(SDValue From, SDValue Op)
Modify anything using 'From' to use 'To' instead.
#define I(x, y, z)
Definition: MD5.cpp:54
#define N
bool isOperandOf(const SDNode *N) const
Return true if this node is an operand of N.
op_iterator op_end() const
const SDValue & getOffset() const
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:287
static volatile int Zero
bool isNON_EXTLoad(const SDNode *N)
Returns true if the specified node is a non-extending load.
const SDValue & getIndex() const
int getNodeId() const
Return the unique node id.
static bool hasNoSignedComparisonUses(SDNode *N)
Test whether the given X86ISD::CMP node has any uses which require the SF or OF bits to be accurate...
CopyFromReg - This node indicates that the input value is a virtual or physical register that is defi...
Definition: ISDOpcodes.h:175
EVT getValueType() const
Return the ValueType of the referenced return value.
SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
This class is used to form a handle around another node that is persistent and is updated across invo...
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
Blend where the condition has been shrunk.
#define LLVM_FALLTHROUGH
LLVM_FALLTHROUGH - Mark fallthrough cases in switch statements.
Definition: Compiler.h:239
std::underlying_type< E >::type Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:81
#define DEBUG(X)
Definition: Debug.h:100
IRTranslator LLVM IR MI
bool isZeroNode(SDValue Elt)
Returns true if Elt is a constant zero or floating point constant +0.0.
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:47
bool isOffsetSuitableForCodeModel(int64_t Offset, CodeModel::Model M, bool hasSymbolicDisplacement=true)
Returns true of the given offset can be fit into displacement field of the instruction.
APInt getUnsignedMax() const
Return the largest unsigned value contained in the ConstantRange.
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:406
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation...
MVT getSimpleValueType(unsigned ResNo) const
Return the type of a specified result as a simple type.
bool isScalarFPTypeInSSEReg(EVT VT) const
Return true if the specified scalar FP type is computed in an SSE register, not on the X87 floating p...
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition: ISDOpcodes.h:321
static void moveBelowOrigChain(SelectionDAG *CurDAG, SDValue Load, SDValue Call, SDValue OrigChain)
Replace the original chain operand of the call with load's chain operand and move load below the call...
bool isMachineOpcode() const
Test if this node has a post-isel opcode, directly corresponding to a MachineInstr opcode...
Special wrapper used under X86-64 PIC mode for RIP relative displacements.
uint64_t getZExtValue() const
BRIND - Indirect branch.
Definition: ISDOpcodes.h:556
This class is used to represent ISD::LOAD nodes.