LLVM  3.7.0
X86ISelDAGToDAG.cpp
Go to the documentation of this file.
1 //===- X86ISelDAGToDAG.cpp - A DAG pattern matching inst selector for X86 -===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file defines a DAG pattern matching instruction selector for X86,
11 // converting from a legalized dag to a X86 dag.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "X86.h"
16 #include "X86InstrBuilder.h"
17 #include "X86MachineFunctionInfo.h"
18 #include "X86RegisterInfo.h"
19 #include "X86Subtarget.h"
20 #include "X86TargetMachine.h"
21 #include "llvm/ADT/Statistic.h"
27 #include "llvm/IR/Function.h"
28 #include "llvm/IR/Instructions.h"
29 #include "llvm/IR/Intrinsics.h"
30 #include "llvm/IR/Type.h"
31 #include "llvm/Support/Debug.h"
37 #include <stdint.h>
38 using namespace llvm;
39 
40 #define DEBUG_TYPE "x86-isel"
41 
42 STATISTIC(NumLoadMoved, "Number of loads moved below TokenFactor");
43 
44 //===----------------------------------------------------------------------===//
45 // Pattern Matcher Implementation
46 //===----------------------------------------------------------------------===//
47 
48 namespace {
49  /// X86ISelAddressMode - This corresponds to X86AddressMode, but uses
50  /// SDValue's instead of register numbers for the leaves of the matched
51  /// tree.
52  struct X86ISelAddressMode {
53  enum {
54  RegBase,
55  FrameIndexBase
56  } BaseType;
57 
58  // This is really a union, discriminated by BaseType!
59  SDValue Base_Reg;
60  int Base_FrameIndex;
61 
62  unsigned Scale;
63  SDValue IndexReg;
64  int32_t Disp;
65  SDValue Segment;
66  const GlobalValue *GV;
67  const Constant *CP;
68  const BlockAddress *BlockAddr;
69  const char *ES;
70  MCSymbol *MCSym;
71  int JT;
72  unsigned Align; // CP alignment.
73  unsigned char SymbolFlags; // X86II::MO_*
74 
75  X86ISelAddressMode()
76  : BaseType(RegBase), Base_FrameIndex(0), Scale(1), IndexReg(), Disp(0),
77  Segment(), GV(nullptr), CP(nullptr), BlockAddr(nullptr), ES(nullptr),
78  MCSym(nullptr), JT(-1), Align(0), SymbolFlags(X86II::MO_NO_FLAG) {}
79 
80  bool hasSymbolicDisplacement() const {
81  return GV != nullptr || CP != nullptr || ES != nullptr ||
82  MCSym != nullptr || JT != -1 || BlockAddr != nullptr;
83  }
84 
85  bool hasBaseOrIndexReg() const {
86  return BaseType == FrameIndexBase ||
87  IndexReg.getNode() != nullptr || Base_Reg.getNode() != nullptr;
88  }
89 
90  /// isRIPRelative - Return true if this addressing mode is already RIP
91  /// relative.
92  bool isRIPRelative() const {
93  if (BaseType != RegBase) return false;
94  if (RegisterSDNode *RegNode =
95  dyn_cast_or_null<RegisterSDNode>(Base_Reg.getNode()))
96  return RegNode->getReg() == X86::RIP;
97  return false;
98  }
99 
100  void setBaseReg(SDValue Reg) {
101  BaseType = RegBase;
102  Base_Reg = Reg;
103  }
104 
105 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
106  void dump() {
107  dbgs() << "X86ISelAddressMode " << this << '\n';
108  dbgs() << "Base_Reg ";
109  if (Base_Reg.getNode())
110  Base_Reg.getNode()->dump();
111  else
112  dbgs() << "nul";
113  dbgs() << " Base.FrameIndex " << Base_FrameIndex << '\n'
114  << " Scale" << Scale << '\n'
115  << "IndexReg ";
116  if (IndexReg.getNode())
117  IndexReg.getNode()->dump();
118  else
119  dbgs() << "nul";
120  dbgs() << " Disp " << Disp << '\n'
121  << "GV ";
122  if (GV)
123  GV->dump();
124  else
125  dbgs() << "nul";
126  dbgs() << " CP ";
127  if (CP)
128  CP->dump();
129  else
130  dbgs() << "nul";
131  dbgs() << '\n'
132  << "ES ";
133  if (ES)
134  dbgs() << ES;
135  else
136  dbgs() << "nul";
137  dbgs() << " MCSym ";
138  if (MCSym)
139  dbgs() << MCSym;
140  else
141  dbgs() << "nul";
142  dbgs() << " JT" << JT << " Align" << Align << '\n';
143  }
144 #endif
145  };
146 }
147 
148 namespace {
149  //===--------------------------------------------------------------------===//
150  /// ISel - X86 specific code to select X86 machine instructions for
151  /// SelectionDAG operations.
152  ///
153  class X86DAGToDAGISel final : public SelectionDAGISel {
154  /// Subtarget - Keep a pointer to the X86Subtarget around so that we can
155  /// make the right decision when generating code for different targets.
156  const X86Subtarget *Subtarget;
157 
158  /// OptForSize - If true, selector should try to optimize for code size
159  /// instead of performance.
160  bool OptForSize;
161 
162  public:
163  explicit X86DAGToDAGISel(X86TargetMachine &tm, CodeGenOpt::Level OptLevel)
164  : SelectionDAGISel(tm, OptLevel), OptForSize(false) {}
165 
166  const char *getPassName() const override {
167  return "X86 DAG->DAG Instruction Selection";
168  }
169 
170  bool runOnMachineFunction(MachineFunction &MF) override {
171  // Reset the subtarget each time through.
172  Subtarget = &MF.getSubtarget<X86Subtarget>();
174  return true;
175  }
176 
177  void EmitFunctionEntryCode() override;
178 
179  bool IsProfitableToFold(SDValue N, SDNode *U, SDNode *Root) const override;
180 
181  void PreprocessISelDAG() override;
182 
183  inline bool immSext8(SDNode *N) const {
184  return isInt<8>(cast<ConstantSDNode>(N)->getSExtValue());
185  }
186 
187  // i64immSExt32 predicate - True if the 64-bit immediate fits in a 32-bit
188  // sign extended field.
189  inline bool i64immSExt32(SDNode *N) const {
190  uint64_t v = cast<ConstantSDNode>(N)->getZExtValue();
191  return (int64_t)v == (int32_t)v;
192  }
193 
194 // Include the pieces autogenerated from the target description.
195 #include "X86GenDAGISel.inc"
196 
197  private:
198  SDNode *Select(SDNode *N) override;
199  SDNode *SelectGather(SDNode *N, unsigned Opc);
200  SDNode *SelectAtomicLoadArith(SDNode *Node, MVT NVT);
201 
202  bool FoldOffsetIntoAddress(uint64_t Offset, X86ISelAddressMode &AM);
203  bool MatchLoadInAddress(LoadSDNode *N, X86ISelAddressMode &AM);
204  bool MatchWrapper(SDValue N, X86ISelAddressMode &AM);
205  bool MatchAddress(SDValue N, X86ISelAddressMode &AM);
206  bool MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
207  unsigned Depth);
208  bool MatchAddressBase(SDValue N, X86ISelAddressMode &AM);
209  bool SelectAddr(SDNode *Parent, SDValue N, SDValue &Base,
210  SDValue &Scale, SDValue &Index, SDValue &Disp,
211  SDValue &Segment);
212  bool SelectVectorAddr(SDNode *Parent, SDValue N, SDValue &Base,
213  SDValue &Scale, SDValue &Index, SDValue &Disp,
214  SDValue &Segment);
215  bool SelectMOV64Imm32(SDValue N, SDValue &Imm);
216  bool SelectLEAAddr(SDValue N, SDValue &Base,
217  SDValue &Scale, SDValue &Index, SDValue &Disp,
218  SDValue &Segment);
219  bool SelectLEA64_32Addr(SDValue N, SDValue &Base,
220  SDValue &Scale, SDValue &Index, SDValue &Disp,
221  SDValue &Segment);
222  bool SelectTLSADDRAddr(SDValue N, SDValue &Base,
223  SDValue &Scale, SDValue &Index, SDValue &Disp,
224  SDValue &Segment);
225  bool SelectScalarSSELoad(SDNode *Root, SDValue N,
226  SDValue &Base, SDValue &Scale,
227  SDValue &Index, SDValue &Disp,
228  SDValue &Segment,
229  SDValue &NodeWithChain);
230 
231  bool TryFoldLoad(SDNode *P, SDValue N,
232  SDValue &Base, SDValue &Scale,
233  SDValue &Index, SDValue &Disp,
234  SDValue &Segment);
235 
236  /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
237  /// inline asm expressions.
238  bool SelectInlineAsmMemoryOperand(const SDValue &Op,
239  unsigned ConstraintID,
240  std::vector<SDValue> &OutOps) override;
241 
242  void EmitSpecialCodeForMain();
243 
244  inline void getAddressOperands(X86ISelAddressMode &AM, SDLoc DL,
245  SDValue &Base, SDValue &Scale,
246  SDValue &Index, SDValue &Disp,
247  SDValue &Segment) {
248  Base = (AM.BaseType == X86ISelAddressMode::FrameIndexBase)
249  ? CurDAG->getTargetFrameIndex(
250  AM.Base_FrameIndex,
251  TLI->getPointerTy(CurDAG->getDataLayout()))
252  : AM.Base_Reg;
253  Scale = getI8Imm(AM.Scale, DL);
254  Index = AM.IndexReg;
255  // These are 32-bit even in 64-bit mode since RIP relative offset
256  // is 32-bit.
257  if (AM.GV)
258  Disp = CurDAG->getTargetGlobalAddress(AM.GV, SDLoc(),
259  MVT::i32, AM.Disp,
260  AM.SymbolFlags);
261  else if (AM.CP)
262  Disp = CurDAG->getTargetConstantPool(AM.CP, MVT::i32,
263  AM.Align, AM.Disp, AM.SymbolFlags);
264  else if (AM.ES) {
265  assert(!AM.Disp && "Non-zero displacement is ignored with ES.");
266  Disp = CurDAG->getTargetExternalSymbol(AM.ES, MVT::i32, AM.SymbolFlags);
267  } else if (AM.MCSym) {
268  assert(!AM.Disp && "Non-zero displacement is ignored with MCSym.");
269  assert(AM.SymbolFlags == 0 && "oo");
270  Disp = CurDAG->getMCSymbol(AM.MCSym, MVT::i32);
271  } else if (AM.JT != -1) {
272  assert(!AM.Disp && "Non-zero displacement is ignored with JT.");
273  Disp = CurDAG->getTargetJumpTable(AM.JT, MVT::i32, AM.SymbolFlags);
274  } else if (AM.BlockAddr)
275  Disp = CurDAG->getTargetBlockAddress(AM.BlockAddr, MVT::i32, AM.Disp,
276  AM.SymbolFlags);
277  else
278  Disp = CurDAG->getTargetConstant(AM.Disp, DL, MVT::i32);
279 
280  if (AM.Segment.getNode())
281  Segment = AM.Segment;
282  else
283  Segment = CurDAG->getRegister(0, MVT::i32);
284  }
285 
286  /// getI8Imm - Return a target constant with the specified value, of type
287  /// i8.
288  inline SDValue getI8Imm(unsigned Imm, SDLoc DL) {
289  return CurDAG->getTargetConstant(Imm, DL, MVT::i8);
290  }
291 
292  /// getI32Imm - Return a target constant with the specified value, of type
293  /// i32.
294  inline SDValue getI32Imm(unsigned Imm, SDLoc DL) {
295  return CurDAG->getTargetConstant(Imm, DL, MVT::i32);
296  }
297 
298  /// getGlobalBaseReg - Return an SDNode that returns the value of
299  /// the global base register. Output instructions required to
300  /// initialize the global base register, if necessary.
301  ///
302  SDNode *getGlobalBaseReg();
303 
304  /// getTargetMachine - Return a reference to the TargetMachine, casted
305  /// to the target-specific type.
306  const X86TargetMachine &getTargetMachine() const {
307  return static_cast<const X86TargetMachine &>(TM);
308  }
309 
310  /// getInstrInfo - Return a reference to the TargetInstrInfo, casted
311  /// to the target-specific type.
312  const X86InstrInfo *getInstrInfo() const {
313  return Subtarget->getInstrInfo();
314  }
315 
316  /// \brief Address-mode matching performs shift-of-and to and-of-shift
317  /// reassociation in order to expose more scaled addressing
318  /// opportunities.
319  bool ComplexPatternFuncMutatesDAG() const override {
320  return true;
321  }
322  };
323 }
324 
325 
326 bool
327 X86DAGToDAGISel::IsProfitableToFold(SDValue N, SDNode *U, SDNode *Root) const {
328  if (OptLevel == CodeGenOpt::None) return false;
329 
330  if (!N.hasOneUse())
331  return false;
332 
333  if (N.getOpcode() != ISD::LOAD)
334  return true;
335 
336  // If N is a load, do additional profitability checks.
337  if (U == Root) {
338  switch (U->getOpcode()) {
339  default: break;
340  case X86ISD::ADD:
341  case X86ISD::SUB:
342  case X86ISD::AND:
343  case X86ISD::XOR:
344  case X86ISD::OR:
345  case ISD::ADD:
346  case ISD::ADDC:
347  case ISD::ADDE:
348  case ISD::AND:
349  case ISD::OR:
350  case ISD::XOR: {
351  SDValue Op1 = U->getOperand(1);
352 
353  // If the other operand is a 8-bit immediate we should fold the immediate
354  // instead. This reduces code size.
355  // e.g.
356  // movl 4(%esp), %eax
357  // addl $4, %eax
358  // vs.
359  // movl $4, %eax
360  // addl 4(%esp), %eax
361  // The former is 2 bytes shorter. In case where the increment is 1, then
362  // the saving can be 4 bytes (by using incl %eax).
363  if (ConstantSDNode *Imm = dyn_cast<ConstantSDNode>(Op1))
364  if (Imm->getAPIntValue().isSignedIntN(8))
365  return false;
366 
367  // If the other operand is a TLS address, we should fold it instead.
368  // This produces
369  // movl %gs:0, %eax
370  // leal i@NTPOFF(%eax), %eax
371  // instead of
372  // movl $i@NTPOFF, %eax
373  // addl %gs:0, %eax
374  // if the block also has an access to a second TLS address this will save
375  // a load.
376  // FIXME: This is probably also true for non-TLS addresses.
377  if (Op1.getOpcode() == X86ISD::Wrapper) {
378  SDValue Val = Op1.getOperand(0);
380  return false;
381  }
382  }
383  }
384  }
385 
386  return true;
387 }
388 
389 /// MoveBelowCallOrigChain - Replace the original chain operand of the call with
390 /// load's chain operand and move load below the call's chain operand.
392  SDValue Call, SDValue OrigChain) {
394  SDValue Chain = OrigChain.getOperand(0);
395  if (Chain.getNode() == Load.getNode())
396  Ops.push_back(Load.getOperand(0));
397  else {
398  assert(Chain.getOpcode() == ISD::TokenFactor &&
399  "Unexpected chain operand");
400  for (unsigned i = 0, e = Chain.getNumOperands(); i != e; ++i)
401  if (Chain.getOperand(i).getNode() == Load.getNode())
402  Ops.push_back(Load.getOperand(0));
403  else
404  Ops.push_back(Chain.getOperand(i));
405  SDValue NewChain =
406  CurDAG->getNode(ISD::TokenFactor, SDLoc(Load), MVT::Other, Ops);
407  Ops.clear();
408  Ops.push_back(NewChain);
409  }
410  Ops.append(OrigChain->op_begin() + 1, OrigChain->op_end());
411  CurDAG->UpdateNodeOperands(OrigChain.getNode(), Ops);
412  CurDAG->UpdateNodeOperands(Load.getNode(), Call.getOperand(0),
413  Load.getOperand(1), Load.getOperand(2));
414 
415  Ops.clear();
416  Ops.push_back(SDValue(Load.getNode(), 1));
417  Ops.append(Call->op_begin() + 1, Call->op_end());
418  CurDAG->UpdateNodeOperands(Call.getNode(), Ops);
419 }
420 
421 /// isCalleeLoad - Return true if call address is a load and it can be
422 /// moved below CALLSEQ_START and the chains leading up to the call.
423 /// Return the CALLSEQ_START by reference as a second output.
424 /// In the case of a tail call, there isn't a callseq node between the call
425 /// chain and the load.
426 static bool isCalleeLoad(SDValue Callee, SDValue &Chain, bool HasCallSeq) {
427  // The transformation is somewhat dangerous if the call's chain was glued to
428  // the call. After MoveBelowOrigChain the load is moved between the call and
429  // the chain, this can create a cycle if the load is not folded. So it is
430  // *really* important that we are sure the load will be folded.
431  if (Callee.getNode() == Chain.getNode() || !Callee.hasOneUse())
432  return false;
433  LoadSDNode *LD = dyn_cast<LoadSDNode>(Callee.getNode());
434  if (!LD ||
435  LD->isVolatile() ||
438  return false;
439 
440  // Now let's find the callseq_start.
441  while (HasCallSeq && Chain.getOpcode() != ISD::CALLSEQ_START) {
442  if (!Chain.hasOneUse())
443  return false;
444  Chain = Chain.getOperand(0);
445  }
446 
447  if (!Chain.getNumOperands())
448  return false;
449  // Since we are not checking for AA here, conservatively abort if the chain
450  // writes to memory. It's not safe to move the callee (a load) across a store.
451  if (isa<MemSDNode>(Chain.getNode()) &&
452  cast<MemSDNode>(Chain.getNode())->writeMem())
453  return false;
454  if (Chain.getOperand(0).getNode() == Callee.getNode())
455  return true;
456  if (Chain.getOperand(0).getOpcode() == ISD::TokenFactor &&
457  Callee.getValue(1).isOperandOf(Chain.getOperand(0).getNode()) &&
458  Callee.getValue(1).hasOneUse())
459  return true;
460  return false;
461 }
462 
463 void X86DAGToDAGISel::PreprocessISelDAG() {
464  // OptForSize is used in pattern predicates that isel is matching.
465  OptForSize = MF->getFunction()->hasFnAttribute(Attribute::OptimizeForSize);
466 
467  for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(),
468  E = CurDAG->allnodes_end(); I != E; ) {
469  SDNode *N = I++; // Preincrement iterator to avoid invalidation issues.
470 
471  if (OptLevel != CodeGenOpt::None &&
472  // Only does this when target favors doesn't favor register indirect
473  // call.
474  ((N->getOpcode() == X86ISD::CALL && !Subtarget->callRegIndirect()) ||
475  (N->getOpcode() == X86ISD::TC_RETURN &&
476  // Only does this if load can be folded into TC_RETURN.
477  (Subtarget->is64Bit() ||
478  getTargetMachine().getRelocationModel() != Reloc::PIC_)))) {
479  /// Also try moving call address load from outside callseq_start to just
480  /// before the call to allow it to be folded.
481  ///
482  /// [Load chain]
483  /// ^
484  /// |
485  /// [Load]
486  /// ^ ^
487  /// | |
488  /// / \--
489  /// / |
490  ///[CALLSEQ_START] |
491  /// ^ |
492  /// | |
493  /// [LOAD/C2Reg] |
494  /// | |
495  /// \ /
496  /// \ /
497  /// [CALL]
498  bool HasCallSeq = N->getOpcode() == X86ISD::CALL;
499  SDValue Chain = N->getOperand(0);
500  SDValue Load = N->getOperand(1);
501  if (!isCalleeLoad(Load, Chain, HasCallSeq))
502  continue;
503  MoveBelowOrigChain(CurDAG, Load, SDValue(N, 0), Chain);
504  ++NumLoadMoved;
505  continue;
506  }
507 
508  // Lower fpround and fpextend nodes that target the FP stack to be store and
509  // load to the stack. This is a gross hack. We would like to simply mark
510  // these as being illegal, but when we do that, legalize produces these when
511  // it expands calls, then expands these in the same legalize pass. We would
512  // like dag combine to be able to hack on these between the call expansion
513  // and the node legalization. As such this pass basically does "really
514  // late" legalization of these inline with the X86 isel pass.
515  // FIXME: This should only happen when not compiled with -O0.
516  if (N->getOpcode() != ISD::FP_ROUND && N->getOpcode() != ISD::FP_EXTEND)
517  continue;
518 
519  MVT SrcVT = N->getOperand(0).getSimpleValueType();
520  MVT DstVT = N->getSimpleValueType(0);
521 
522  // If any of the sources are vectors, no fp stack involved.
523  if (SrcVT.isVector() || DstVT.isVector())
524  continue;
525 
526  // If the source and destination are SSE registers, then this is a legal
527  // conversion that should not be lowered.
528  const X86TargetLowering *X86Lowering =
529  static_cast<const X86TargetLowering *>(TLI);
530  bool SrcIsSSE = X86Lowering->isScalarFPTypeInSSEReg(SrcVT);
531  bool DstIsSSE = X86Lowering->isScalarFPTypeInSSEReg(DstVT);
532  if (SrcIsSSE && DstIsSSE)
533  continue;
534 
535  if (!SrcIsSSE && !DstIsSSE) {
536  // If this is an FPStack extension, it is a noop.
537  if (N->getOpcode() == ISD::FP_EXTEND)
538  continue;
539  // If this is a value-preserving FPStack truncation, it is a noop.
540  if (N->getConstantOperandVal(1))
541  continue;
542  }
543 
544  // Here we could have an FP stack truncation or an FPStack <-> SSE convert.
545  // FPStack has extload and truncstore. SSE can fold direct loads into other
546  // operations. Based on this, decide what we want to do.
547  MVT MemVT;
548  if (N->getOpcode() == ISD::FP_ROUND)
549  MemVT = DstVT; // FP_ROUND must use DstVT, we can't do a 'trunc load'.
550  else
551  MemVT = SrcIsSSE ? SrcVT : DstVT;
552 
553  SDValue MemTmp = CurDAG->CreateStackTemporary(MemVT);
554  SDLoc dl(N);
555 
556  // FIXME: optimize the case where the src/dest is a load or store?
557  SDValue Store = CurDAG->getTruncStore(CurDAG->getEntryNode(), dl,
558  N->getOperand(0),
559  MemTmp, MachinePointerInfo(), MemVT,
560  false, false, 0);
561  SDValue Result = CurDAG->getExtLoad(ISD::EXTLOAD, dl, DstVT, Store, MemTmp,
563  MemVT, false, false, false, 0);
564 
565  // We're about to replace all uses of the FP_ROUND/FP_EXTEND with the
566  // extload we created. This will cause general havok on the dag because
567  // anything below the conversion could be folded into other existing nodes.
568  // To avoid invalidating 'I', back it up to the convert node.
569  --I;
570  CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
571 
572  // Now that we did that, the node is dead. Increment the iterator to the
573  // next node to process, then delete N.
574  ++I;
575  CurDAG->DeleteNode(N);
576  }
577 }
578 
579 
580 /// EmitSpecialCodeForMain - Emit any code that needs to be executed only in
581 /// the main function.
582 void X86DAGToDAGISel::EmitSpecialCodeForMain() {
583  if (Subtarget->isTargetCygMing()) {
585  auto &DL = CurDAG->getDataLayout();
586 
588  CLI.setChain(CurDAG->getRoot())
589  .setCallee(CallingConv::C, Type::getVoidTy(*CurDAG->getContext()),
590  CurDAG->getExternalSymbol("__main", TLI->getPointerTy(DL)),
591  std::move(Args), 0);
592  const TargetLowering &TLI = CurDAG->getTargetLoweringInfo();
593  std::pair<SDValue, SDValue> Result = TLI.LowerCallTo(CLI);
594  CurDAG->setRoot(Result.second);
595  }
596 }
597 
598 void X86DAGToDAGISel::EmitFunctionEntryCode() {
599  // If this is main, emit special code for main.
600  if (const Function *Fn = MF->getFunction())
601  if (Fn->hasExternalLinkage() && Fn->getName() == "main")
602  EmitSpecialCodeForMain();
603 }
604 
605 static bool isDispSafeForFrameIndex(int64_t Val) {
606  // On 64-bit platforms, we can run into an issue where a frame index
607  // includes a displacement that, when added to the explicit displacement,
608  // will overflow the displacement field. Assuming that the frame index
609  // displacement fits into a 31-bit integer (which is only slightly more
610  // aggressive than the current fundamental assumption that it fits into
611  // a 32-bit integer), a 31-bit disp should always be safe.
612  return isInt<31>(Val);
613 }
614 
615 bool X86DAGToDAGISel::FoldOffsetIntoAddress(uint64_t Offset,
616  X86ISelAddressMode &AM) {
617  // Cannot combine ExternalSymbol displacements with integer offsets.
618  if (Offset != 0 && (AM.ES || AM.MCSym))
619  return true;
620  int64_t Val = AM.Disp + Offset;
621  CodeModel::Model M = TM.getCodeModel();
622  if (Subtarget->is64Bit()) {
624  AM.hasSymbolicDisplacement()))
625  return true;
626  // In addition to the checks required for a register base, check that
627  // we do not try to use an unsafe Disp with a frame index.
628  if (AM.BaseType == X86ISelAddressMode::FrameIndexBase &&
630  return true;
631  }
632  AM.Disp = Val;
633  return false;
634 
635 }
636 
637 bool X86DAGToDAGISel::MatchLoadInAddress(LoadSDNode *N, X86ISelAddressMode &AM){
638  SDValue Address = N->getOperand(1);
639 
640  // load gs:0 -> GS segment register.
641  // load fs:0 -> FS segment register.
642  //
643  // This optimization is valid because the GNU TLS model defines that
644  // gs:0 (or fs:0 on X86-64) contains its own address.
645  // For more information see http://people.redhat.com/drepper/tls.pdf
646  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Address))
647  if (C->getSExtValue() == 0 && AM.Segment.getNode() == nullptr &&
648  Subtarget->isTargetLinux())
649  switch (N->getPointerInfo().getAddrSpace()) {
650  case 256:
651  AM.Segment = CurDAG->getRegister(X86::GS, MVT::i16);
652  return false;
653  case 257:
654  AM.Segment = CurDAG->getRegister(X86::FS, MVT::i16);
655  return false;
656  }
657 
658  return true;
659 }
660 
661 /// MatchWrapper - Try to match X86ISD::Wrapper and X86ISD::WrapperRIP nodes
662 /// into an addressing mode. These wrap things that will resolve down into a
663 /// symbol reference. If no match is possible, this returns true, otherwise it
664 /// returns false.
665 bool X86DAGToDAGISel::MatchWrapper(SDValue N, X86ISelAddressMode &AM) {
666  // If the addressing mode already has a symbol as the displacement, we can
667  // never match another symbol.
668  if (AM.hasSymbolicDisplacement())
669  return true;
670 
671  SDValue N0 = N.getOperand(0);
672  CodeModel::Model M = TM.getCodeModel();
673 
674  // Handle X86-64 rip-relative addresses. We check this before checking direct
675  // folding because RIP is preferable to non-RIP accesses.
676  if (Subtarget->is64Bit() && N.getOpcode() == X86ISD::WrapperRIP &&
677  // Under X86-64 non-small code model, GV (and friends) are 64-bits, so
678  // they cannot be folded into immediate fields.
679  // FIXME: This can be improved for kernel and other models?
680  (M == CodeModel::Small || M == CodeModel::Kernel)) {
681  // Base and index reg must be 0 in order to use %rip as base.
682  if (AM.hasBaseOrIndexReg())
683  return true;
684  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(N0)) {
685  X86ISelAddressMode Backup = AM;
686  AM.GV = G->getGlobal();
687  AM.SymbolFlags = G->getTargetFlags();
688  if (FoldOffsetIntoAddress(G->getOffset(), AM)) {
689  AM = Backup;
690  return true;
691  }
692  } else if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(N0)) {
693  X86ISelAddressMode Backup = AM;
694  AM.CP = CP->getConstVal();
695  AM.Align = CP->getAlignment();
696  AM.SymbolFlags = CP->getTargetFlags();
697  if (FoldOffsetIntoAddress(CP->getOffset(), AM)) {
698  AM = Backup;
699  return true;
700  }
701  } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(N0)) {
702  AM.ES = S->getSymbol();
703  AM.SymbolFlags = S->getTargetFlags();
704  } else if (auto *S = dyn_cast<MCSymbolSDNode>(N0)) {
705  AM.MCSym = S->getMCSymbol();
706  } else if (JumpTableSDNode *J = dyn_cast<JumpTableSDNode>(N0)) {
707  AM.JT = J->getIndex();
708  AM.SymbolFlags = J->getTargetFlags();
709  } else if (BlockAddressSDNode *BA = dyn_cast<BlockAddressSDNode>(N0)) {
710  X86ISelAddressMode Backup = AM;
711  AM.BlockAddr = BA->getBlockAddress();
712  AM.SymbolFlags = BA->getTargetFlags();
713  if (FoldOffsetIntoAddress(BA->getOffset(), AM)) {
714  AM = Backup;
715  return true;
716  }
717  } else
718  llvm_unreachable("Unhandled symbol reference node.");
719 
720  if (N.getOpcode() == X86ISD::WrapperRIP)
721  AM.setBaseReg(CurDAG->getRegister(X86::RIP, MVT::i64));
722  return false;
723  }
724 
725  // Handle the case when globals fit in our immediate field: This is true for
726  // X86-32 always and X86-64 when in -mcmodel=small mode. In 64-bit
727  // mode, this only applies to a non-RIP-relative computation.
728  if (!Subtarget->is64Bit() ||
729  M == CodeModel::Small || M == CodeModel::Kernel) {
730  assert(N.getOpcode() != X86ISD::WrapperRIP &&
731  "RIP-relative addressing already handled");
732  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(N0)) {
733  AM.GV = G->getGlobal();
734  AM.Disp += G->getOffset();
735  AM.SymbolFlags = G->getTargetFlags();
736  } else if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(N0)) {
737  AM.CP = CP->getConstVal();
738  AM.Align = CP->getAlignment();
739  AM.Disp += CP->getOffset();
740  AM.SymbolFlags = CP->getTargetFlags();
741  } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(N0)) {
742  AM.ES = S->getSymbol();
743  AM.SymbolFlags = S->getTargetFlags();
744  } else if (auto *S = dyn_cast<MCSymbolSDNode>(N0)) {
745  AM.MCSym = S->getMCSymbol();
746  } else if (JumpTableSDNode *J = dyn_cast<JumpTableSDNode>(N0)) {
747  AM.JT = J->getIndex();
748  AM.SymbolFlags = J->getTargetFlags();
749  } else if (BlockAddressSDNode *BA = dyn_cast<BlockAddressSDNode>(N0)) {
750  AM.BlockAddr = BA->getBlockAddress();
751  AM.Disp += BA->getOffset();
752  AM.SymbolFlags = BA->getTargetFlags();
753  } else
754  llvm_unreachable("Unhandled symbol reference node.");
755  return false;
756  }
757 
758  return true;
759 }
760 
761 /// MatchAddress - Add the specified node to the specified addressing mode,
762 /// returning true if it cannot be done. This just pattern matches for the
763 /// addressing mode.
764 bool X86DAGToDAGISel::MatchAddress(SDValue N, X86ISelAddressMode &AM) {
765  if (MatchAddressRecursively(N, AM, 0))
766  return true;
767 
768  // Post-processing: Convert lea(,%reg,2) to lea(%reg,%reg), which has
769  // a smaller encoding and avoids a scaled-index.
770  if (AM.Scale == 2 &&
771  AM.BaseType == X86ISelAddressMode::RegBase &&
772  AM.Base_Reg.getNode() == nullptr) {
773  AM.Base_Reg = AM.IndexReg;
774  AM.Scale = 1;
775  }
776 
777  // Post-processing: Convert foo to foo(%rip), even in non-PIC mode,
778  // because it has a smaller encoding.
779  // TODO: Which other code models can use this?
780  if (TM.getCodeModel() == CodeModel::Small &&
781  Subtarget->is64Bit() &&
782  AM.Scale == 1 &&
783  AM.BaseType == X86ISelAddressMode::RegBase &&
784  AM.Base_Reg.getNode() == nullptr &&
785  AM.IndexReg.getNode() == nullptr &&
786  AM.SymbolFlags == X86II::MO_NO_FLAG &&
787  AM.hasSymbolicDisplacement())
788  AM.Base_Reg = CurDAG->getRegister(X86::RIP, MVT::i64);
789 
790  return false;
791 }
792 
793 // Insert a node into the DAG at least before the Pos node's position. This
794 // will reposition the node as needed, and will assign it a node ID that is <=
795 // the Pos node's ID. Note that this does *not* preserve the uniqueness of node
796 // IDs! The selection DAG must no longer depend on their uniqueness when this
797 // is used.
798 static void InsertDAGNode(SelectionDAG &DAG, SDValue Pos, SDValue N) {
799  if (N.getNode()->getNodeId() == -1 ||
800  N.getNode()->getNodeId() > Pos.getNode()->getNodeId()) {
801  DAG.RepositionNode(Pos.getNode(), N.getNode());
802  N.getNode()->setNodeId(Pos.getNode()->getNodeId());
803  }
804 }
805 
806 // Transform "(X >> (8-C1)) & (0xff << C1)" to "((X >> 8) & 0xff) << C1" if
807 // safe. This allows us to convert the shift and and into an h-register
808 // extract and a scaled index. Returns false if the simplification is
809 // performed.
811  uint64_t Mask,
812  SDValue Shift, SDValue X,
813  X86ISelAddressMode &AM) {
814  if (Shift.getOpcode() != ISD::SRL ||
815  !isa<ConstantSDNode>(Shift.getOperand(1)) ||
816  !Shift.hasOneUse())
817  return true;
818 
819  int ScaleLog = 8 - Shift.getConstantOperandVal(1);
820  if (ScaleLog <= 0 || ScaleLog >= 4 ||
821  Mask != (0xffu << ScaleLog))
822  return true;
823 
824  MVT VT = N.getSimpleValueType();
825  SDLoc DL(N);
826  SDValue Eight = DAG.getConstant(8, DL, MVT::i8);
827  SDValue NewMask = DAG.getConstant(0xff, DL, VT);
828  SDValue Srl = DAG.getNode(ISD::SRL, DL, VT, X, Eight);
829  SDValue And = DAG.getNode(ISD::AND, DL, VT, Srl, NewMask);
830  SDValue ShlCount = DAG.getConstant(ScaleLog, DL, MVT::i8);
831  SDValue Shl = DAG.getNode(ISD::SHL, DL, VT, And, ShlCount);
832 
833  // Insert the new nodes into the topological ordering. We must do this in
834  // a valid topological ordering as nothing is going to go back and re-sort
835  // these nodes. We continually insert before 'N' in sequence as this is
836  // essentially a pre-flattened and pre-sorted sequence of nodes. There is no
837  // hierarchy left to express.
838  InsertDAGNode(DAG, N, Eight);
839  InsertDAGNode(DAG, N, Srl);
840  InsertDAGNode(DAG, N, NewMask);
841  InsertDAGNode(DAG, N, And);
842  InsertDAGNode(DAG, N, ShlCount);
843  InsertDAGNode(DAG, N, Shl);
844  DAG.ReplaceAllUsesWith(N, Shl);
845  AM.IndexReg = And;
846  AM.Scale = (1 << ScaleLog);
847  return false;
848 }
849 
850 // Transforms "(X << C1) & C2" to "(X & (C2>>C1)) << C1" if safe and if this
851 // allows us to fold the shift into this addressing mode. Returns false if the
852 // transform succeeded.
854  uint64_t Mask,
855  SDValue Shift, SDValue X,
856  X86ISelAddressMode &AM) {
857  if (Shift.getOpcode() != ISD::SHL ||
858  !isa<ConstantSDNode>(Shift.getOperand(1)))
859  return true;
860 
861  // Not likely to be profitable if either the AND or SHIFT node has more
862  // than one use (unless all uses are for address computation). Besides,
863  // isel mechanism requires their node ids to be reused.
864  if (!N.hasOneUse() || !Shift.hasOneUse())
865  return true;
866 
867  // Verify that the shift amount is something we can fold.
868  unsigned ShiftAmt = Shift.getConstantOperandVal(1);
869  if (ShiftAmt != 1 && ShiftAmt != 2 && ShiftAmt != 3)
870  return true;
871 
872  MVT VT = N.getSimpleValueType();
873  SDLoc DL(N);
874  SDValue NewMask = DAG.getConstant(Mask >> ShiftAmt, DL, VT);
875  SDValue NewAnd = DAG.getNode(ISD::AND, DL, VT, X, NewMask);
876  SDValue NewShift = DAG.getNode(ISD::SHL, DL, VT, NewAnd, Shift.getOperand(1));
877 
878  // Insert the new nodes into the topological ordering. We must do this in
879  // a valid topological ordering as nothing is going to go back and re-sort
880  // these nodes. We continually insert before 'N' in sequence as this is
881  // essentially a pre-flattened and pre-sorted sequence of nodes. There is no
882  // hierarchy left to express.
883  InsertDAGNode(DAG, N, NewMask);
884  InsertDAGNode(DAG, N, NewAnd);
885  InsertDAGNode(DAG, N, NewShift);
886  DAG.ReplaceAllUsesWith(N, NewShift);
887 
888  AM.Scale = 1 << ShiftAmt;
889  AM.IndexReg = NewAnd;
890  return false;
891 }
892 
893 // Implement some heroics to detect shifts of masked values where the mask can
894 // be replaced by extending the shift and undoing that in the addressing mode
895 // scale. Patterns such as (shl (srl x, c1), c2) are canonicalized into (and
896 // (srl x, SHIFT), MASK) by DAGCombines that don't know the shl can be done in
897 // the addressing mode. This results in code such as:
898 //
899 // int f(short *y, int *lookup_table) {
900 // ...
901 // return *y + lookup_table[*y >> 11];
902 // }
903 //
904 // Turning into:
905 // movzwl (%rdi), %eax
906 // movl %eax, %ecx
907 // shrl $11, %ecx
908 // addl (%rsi,%rcx,4), %eax
909 //
910 // Instead of:
911 // movzwl (%rdi), %eax
912 // movl %eax, %ecx
913 // shrl $9, %ecx
914 // andl $124, %rcx
915 // addl (%rsi,%rcx), %eax
916 //
917 // Note that this function assumes the mask is provided as a mask *after* the
918 // value is shifted. The input chain may or may not match that, but computing
919 // such a mask is trivial.
921  uint64_t Mask,
922  SDValue Shift, SDValue X,
923  X86ISelAddressMode &AM) {
924  if (Shift.getOpcode() != ISD::SRL || !Shift.hasOneUse() ||
925  !isa<ConstantSDNode>(Shift.getOperand(1)))
926  return true;
927 
928  unsigned ShiftAmt = Shift.getConstantOperandVal(1);
929  unsigned MaskLZ = countLeadingZeros(Mask);
930  unsigned MaskTZ = countTrailingZeros(Mask);
931 
932  // The amount of shift we're trying to fit into the addressing mode is taken
933  // from the trailing zeros of the mask.
934  unsigned AMShiftAmt = MaskTZ;
935 
936  // There is nothing we can do here unless the mask is removing some bits.
937  // Also, the addressing mode can only represent shifts of 1, 2, or 3 bits.
938  if (AMShiftAmt <= 0 || AMShiftAmt > 3) return true;
939 
940  // We also need to ensure that mask is a continuous run of bits.
941  if (countTrailingOnes(Mask >> MaskTZ) + MaskTZ + MaskLZ != 64) return true;
942 
943  // Scale the leading zero count down based on the actual size of the value.
944  // Also scale it down based on the size of the shift.
945  MaskLZ -= (64 - X.getSimpleValueType().getSizeInBits()) + ShiftAmt;
946 
947  // The final check is to ensure that any masked out high bits of X are
948  // already known to be zero. Otherwise, the mask has a semantic impact
949  // other than masking out a couple of low bits. Unfortunately, because of
950  // the mask, zero extensions will be removed from operands in some cases.
951  // This code works extra hard to look through extensions because we can
952  // replace them with zero extensions cheaply if necessary.
953  bool ReplacingAnyExtend = false;
954  if (X.getOpcode() == ISD::ANY_EXTEND) {
955  unsigned ExtendBits = X.getSimpleValueType().getSizeInBits() -
957  // Assume that we'll replace the any-extend with a zero-extend, and
958  // narrow the search to the extended value.
959  X = X.getOperand(0);
960  MaskLZ = ExtendBits > MaskLZ ? 0 : MaskLZ - ExtendBits;
961  ReplacingAnyExtend = true;
962  }
963  APInt MaskedHighBits =
965  APInt KnownZero, KnownOne;
966  DAG.computeKnownBits(X, KnownZero, KnownOne);
967  if (MaskedHighBits != KnownZero) return true;
968 
969  // We've identified a pattern that can be transformed into a single shift
970  // and an addressing mode. Make it so.
971  MVT VT = N.getSimpleValueType();
972  if (ReplacingAnyExtend) {
973  assert(X.getValueType() != VT);
974  // We looked through an ANY_EXTEND node, insert a ZERO_EXTEND.
975  SDValue NewX = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(X), VT, X);
976  InsertDAGNode(DAG, N, NewX);
977  X = NewX;
978  }
979  SDLoc DL(N);
980  SDValue NewSRLAmt = DAG.getConstant(ShiftAmt + AMShiftAmt, DL, MVT::i8);
981  SDValue NewSRL = DAG.getNode(ISD::SRL, DL, VT, X, NewSRLAmt);
982  SDValue NewSHLAmt = DAG.getConstant(AMShiftAmt, DL, MVT::i8);
983  SDValue NewSHL = DAG.getNode(ISD::SHL, DL, VT, NewSRL, NewSHLAmt);
984 
985  // Insert the new nodes into the topological ordering. We must do this in
986  // a valid topological ordering as nothing is going to go back and re-sort
987  // these nodes. We continually insert before 'N' in sequence as this is
988  // essentially a pre-flattened and pre-sorted sequence of nodes. There is no
989  // hierarchy left to express.
990  InsertDAGNode(DAG, N, NewSRLAmt);
991  InsertDAGNode(DAG, N, NewSRL);
992  InsertDAGNode(DAG, N, NewSHLAmt);
993  InsertDAGNode(DAG, N, NewSHL);
994  DAG.ReplaceAllUsesWith(N, NewSHL);
995 
996  AM.Scale = 1 << AMShiftAmt;
997  AM.IndexReg = NewSRL;
998  return false;
999 }
1000 
1001 bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
1002  unsigned Depth) {
1003  SDLoc dl(N);
1004  DEBUG({
1005  dbgs() << "MatchAddress: ";
1006  AM.dump();
1007  });
1008  // Limit recursion.
1009  if (Depth > 5)
1010  return MatchAddressBase(N, AM);
1011 
1012  // If this is already a %rip relative address, we can only merge immediates
1013  // into it. Instead of handling this in every case, we handle it here.
1014  // RIP relative addressing: %rip + 32-bit displacement!
1015  if (AM.isRIPRelative()) {
1016  // FIXME: JumpTable and ExternalSymbol address currently don't like
1017  // displacements. It isn't very important, but this should be fixed for
1018  // consistency.
1019  if (!(AM.ES || AM.MCSym) && AM.JT != -1)
1020  return true;
1021 
1022  if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(N))
1023  if (!FoldOffsetIntoAddress(Cst->getSExtValue(), AM))
1024  return false;
1025  return true;
1026  }
1027 
1028  switch (N.getOpcode()) {
1029  default: break;
1030  case ISD::LOCAL_RECOVER: {
1031  if (!AM.hasSymbolicDisplacement() && AM.Disp == 0)
1032  if (const auto *ESNode = dyn_cast<MCSymbolSDNode>(N.getOperand(0))) {
1033  // Use the symbol and don't prefix it.
1034  AM.MCSym = ESNode->getMCSymbol();
1035  return false;
1036  }
1037  break;
1038  }
1039  case ISD::Constant: {
1040  uint64_t Val = cast<ConstantSDNode>(N)->getSExtValue();
1041  if (!FoldOffsetIntoAddress(Val, AM))
1042  return false;
1043  break;
1044  }
1045 
1046  case X86ISD::Wrapper:
1047  case X86ISD::WrapperRIP:
1048  if (!MatchWrapper(N, AM))
1049  return false;
1050  break;
1051 
1052  case ISD::LOAD:
1053  if (!MatchLoadInAddress(cast<LoadSDNode>(N), AM))
1054  return false;
1055  break;
1056 
1057  case ISD::FrameIndex:
1058  if (AM.BaseType == X86ISelAddressMode::RegBase &&
1059  AM.Base_Reg.getNode() == nullptr &&
1060  (!Subtarget->is64Bit() || isDispSafeForFrameIndex(AM.Disp))) {
1061  AM.BaseType = X86ISelAddressMode::FrameIndexBase;
1062  AM.Base_FrameIndex = cast<FrameIndexSDNode>(N)->getIndex();
1063  return false;
1064  }
1065  break;
1066 
1067  case ISD::SHL:
1068  if (AM.IndexReg.getNode() != nullptr || AM.Scale != 1)
1069  break;
1070 
1071  if (ConstantSDNode
1072  *CN = dyn_cast<ConstantSDNode>(N.getNode()->getOperand(1))) {
1073  unsigned Val = CN->getZExtValue();
1074  // Note that we handle x<<1 as (,x,2) rather than (x,x) here so
1075  // that the base operand remains free for further matching. If
1076  // the base doesn't end up getting used, a post-processing step
1077  // in MatchAddress turns (,x,2) into (x,x), which is cheaper.
1078  if (Val == 1 || Val == 2 || Val == 3) {
1079  AM.Scale = 1 << Val;
1080  SDValue ShVal = N.getNode()->getOperand(0);
1081 
1082  // Okay, we know that we have a scale by now. However, if the scaled
1083  // value is an add of something and a constant, we can fold the
1084  // constant into the disp field here.
1085  if (CurDAG->isBaseWithConstantOffset(ShVal)) {
1086  AM.IndexReg = ShVal.getNode()->getOperand(0);
1087  ConstantSDNode *AddVal =
1088  cast<ConstantSDNode>(ShVal.getNode()->getOperand(1));
1089  uint64_t Disp = (uint64_t)AddVal->getSExtValue() << Val;
1090  if (!FoldOffsetIntoAddress(Disp, AM))
1091  return false;
1092  }
1093 
1094  AM.IndexReg = ShVal;
1095  return false;
1096  }
1097  }
1098  break;
1099 
1100  case ISD::SRL: {
1101  // Scale must not be used already.
1102  if (AM.IndexReg.getNode() != nullptr || AM.Scale != 1) break;
1103 
1104  SDValue And = N.getOperand(0);
1105  if (And.getOpcode() != ISD::AND) break;
1106  SDValue X = And.getOperand(0);
1107 
1108  // We only handle up to 64-bit values here as those are what matter for
1109  // addressing mode optimizations.
1110  if (X.getSimpleValueType().getSizeInBits() > 64) break;
1111 
1112  // The mask used for the transform is expected to be post-shift, but we
1113  // found the shift first so just apply the shift to the mask before passing
1114  // it down.
1115  if (!isa<ConstantSDNode>(N.getOperand(1)) ||
1116  !isa<ConstantSDNode>(And.getOperand(1)))
1117  break;
1118  uint64_t Mask = And.getConstantOperandVal(1) >> N.getConstantOperandVal(1);
1119 
1120  // Try to fold the mask and shift into the scale, and return false if we
1121  // succeed.
1122  if (!FoldMaskAndShiftToScale(*CurDAG, N, Mask, N, X, AM))
1123  return false;
1124  break;
1125  }
1126 
1127  case ISD::SMUL_LOHI:
1128  case ISD::UMUL_LOHI:
1129  // A mul_lohi where we need the low part can be folded as a plain multiply.
1130  if (N.getResNo() != 0) break;
1131  // FALL THROUGH
1132  case ISD::MUL:
1133  case X86ISD::MUL_IMM:
1134  // X*[3,5,9] -> X+X*[2,4,8]
1135  if (AM.BaseType == X86ISelAddressMode::RegBase &&
1136  AM.Base_Reg.getNode() == nullptr &&
1137  AM.IndexReg.getNode() == nullptr) {
1138  if (ConstantSDNode
1139  *CN = dyn_cast<ConstantSDNode>(N.getNode()->getOperand(1)))
1140  if (CN->getZExtValue() == 3 || CN->getZExtValue() == 5 ||
1141  CN->getZExtValue() == 9) {
1142  AM.Scale = unsigned(CN->getZExtValue())-1;
1143 
1144  SDValue MulVal = N.getNode()->getOperand(0);
1145  SDValue Reg;
1146 
1147  // Okay, we know that we have a scale by now. However, if the scaled
1148  // value is an add of something and a constant, we can fold the
1149  // constant into the disp field here.
1150  if (MulVal.getNode()->getOpcode() == ISD::ADD && MulVal.hasOneUse() &&
1151  isa<ConstantSDNode>(MulVal.getNode()->getOperand(1))) {
1152  Reg = MulVal.getNode()->getOperand(0);
1153  ConstantSDNode *AddVal =
1154  cast<ConstantSDNode>(MulVal.getNode()->getOperand(1));
1155  uint64_t Disp = AddVal->getSExtValue() * CN->getZExtValue();
1156  if (FoldOffsetIntoAddress(Disp, AM))
1157  Reg = N.getNode()->getOperand(0);
1158  } else {
1159  Reg = N.getNode()->getOperand(0);
1160  }
1161 
1162  AM.IndexReg = AM.Base_Reg = Reg;
1163  return false;
1164  }
1165  }
1166  break;
1167 
1168  case ISD::SUB: {
1169  // Given A-B, if A can be completely folded into the address and
1170  // the index field with the index field unused, use -B as the index.
1171  // This is a win if a has multiple parts that can be folded into
1172  // the address. Also, this saves a mov if the base register has
1173  // other uses, since it avoids a two-address sub instruction, however
1174  // it costs an additional mov if the index register has other uses.
1175 
1176  // Add an artificial use to this node so that we can keep track of
1177  // it if it gets CSE'd with a different node.
1178  HandleSDNode Handle(N);
1179 
1180  // Test if the LHS of the sub can be folded.
1181  X86ISelAddressMode Backup = AM;
1182  if (MatchAddressRecursively(N.getNode()->getOperand(0), AM, Depth+1)) {
1183  AM = Backup;
1184  break;
1185  }
1186  // Test if the index field is free for use.
1187  if (AM.IndexReg.getNode() || AM.isRIPRelative()) {
1188  AM = Backup;
1189  break;
1190  }
1191 
1192  int Cost = 0;
1193  SDValue RHS = Handle.getValue().getNode()->getOperand(1);
1194  // If the RHS involves a register with multiple uses, this
1195  // transformation incurs an extra mov, due to the neg instruction
1196  // clobbering its operand.
1197  if (!RHS.getNode()->hasOneUse() ||
1198  RHS.getNode()->getOpcode() == ISD::CopyFromReg ||
1199  RHS.getNode()->getOpcode() == ISD::TRUNCATE ||
1200  RHS.getNode()->getOpcode() == ISD::ANY_EXTEND ||
1201  (RHS.getNode()->getOpcode() == ISD::ZERO_EXTEND &&
1202  RHS.getNode()->getOperand(0).getValueType() == MVT::i32))
1203  ++Cost;
1204  // If the base is a register with multiple uses, this
1205  // transformation may save a mov.
1206  if ((AM.BaseType == X86ISelAddressMode::RegBase &&
1207  AM.Base_Reg.getNode() &&
1208  !AM.Base_Reg.getNode()->hasOneUse()) ||
1209  AM.BaseType == X86ISelAddressMode::FrameIndexBase)
1210  --Cost;
1211  // If the folded LHS was interesting, this transformation saves
1212  // address arithmetic.
1213  if ((AM.hasSymbolicDisplacement() && !Backup.hasSymbolicDisplacement()) +
1214  ((AM.Disp != 0) && (Backup.Disp == 0)) +
1215  (AM.Segment.getNode() && !Backup.Segment.getNode()) >= 2)
1216  --Cost;
1217  // If it doesn't look like it may be an overall win, don't do it.
1218  if (Cost >= 0) {
1219  AM = Backup;
1220  break;
1221  }
1222 
1223  // Ok, the transformation is legal and appears profitable. Go for it.
1224  SDValue Zero = CurDAG->getConstant(0, dl, N.getValueType());
1225  SDValue Neg = CurDAG->getNode(ISD::SUB, dl, N.getValueType(), Zero, RHS);
1226  AM.IndexReg = Neg;
1227  AM.Scale = 1;
1228 
1229  // Insert the new nodes into the topological ordering.
1230  InsertDAGNode(*CurDAG, N, Zero);
1231  InsertDAGNode(*CurDAG, N, Neg);
1232  return false;
1233  }
1234 
1235  case ISD::ADD: {
1236  // Add an artificial use to this node so that we can keep track of
1237  // it if it gets CSE'd with a different node.
1238  HandleSDNode Handle(N);
1239 
1240  X86ISelAddressMode Backup = AM;
1241  if (!MatchAddressRecursively(N.getOperand(0), AM, Depth+1) &&
1242  !MatchAddressRecursively(Handle.getValue().getOperand(1), AM, Depth+1))
1243  return false;
1244  AM = Backup;
1245 
1246  // Try again after commuting the operands.
1247  if (!MatchAddressRecursively(Handle.getValue().getOperand(1), AM, Depth+1)&&
1248  !MatchAddressRecursively(Handle.getValue().getOperand(0), AM, Depth+1))
1249  return false;
1250  AM = Backup;
1251 
1252  // If we couldn't fold both operands into the address at the same time,
1253  // see if we can just put each operand into a register and fold at least
1254  // the add.
1255  if (AM.BaseType == X86ISelAddressMode::RegBase &&
1256  !AM.Base_Reg.getNode() &&
1257  !AM.IndexReg.getNode()) {
1258  N = Handle.getValue();
1259  AM.Base_Reg = N.getOperand(0);
1260  AM.IndexReg = N.getOperand(1);
1261  AM.Scale = 1;
1262  return false;
1263  }
1264  N = Handle.getValue();
1265  break;
1266  }
1267 
1268  case ISD::OR:
1269  // Handle "X | C" as "X + C" iff X is known to have C bits clear.
1270  if (CurDAG->isBaseWithConstantOffset(N)) {
1271  X86ISelAddressMode Backup = AM;
1272  ConstantSDNode *CN = cast<ConstantSDNode>(N.getOperand(1));
1273 
1274  // Start with the LHS as an addr mode.
1275  if (!MatchAddressRecursively(N.getOperand(0), AM, Depth+1) &&
1276  !FoldOffsetIntoAddress(CN->getSExtValue(), AM))
1277  return false;
1278  AM = Backup;
1279  }
1280  break;
1281 
1282  case ISD::AND: {
1283  // Perform some heroic transforms on an and of a constant-count shift
1284  // with a constant to enable use of the scaled offset field.
1285 
1286  // Scale must not be used already.
1287  if (AM.IndexReg.getNode() != nullptr || AM.Scale != 1) break;
1288 
1289  SDValue Shift = N.getOperand(0);
1290  if (Shift.getOpcode() != ISD::SRL && Shift.getOpcode() != ISD::SHL) break;
1291  SDValue X = Shift.getOperand(0);
1292 
1293  // We only handle up to 64-bit values here as those are what matter for
1294  // addressing mode optimizations.
1295  if (X.getSimpleValueType().getSizeInBits() > 64) break;
1296 
1297  if (!isa<ConstantSDNode>(N.getOperand(1)))
1298  break;
1299  uint64_t Mask = N.getConstantOperandVal(1);
1300 
1301  // Try to fold the mask and shift into an extract and scale.
1302  if (!FoldMaskAndShiftToExtract(*CurDAG, N, Mask, Shift, X, AM))
1303  return false;
1304 
1305  // Try to fold the mask and shift directly into the scale.
1306  if (!FoldMaskAndShiftToScale(*CurDAG, N, Mask, Shift, X, AM))
1307  return false;
1308 
1309  // Try to swap the mask and shift to place shifts which can be done as
1310  // a scale on the outside of the mask.
1311  if (!FoldMaskedShiftToScaledMask(*CurDAG, N, Mask, Shift, X, AM))
1312  return false;
1313  break;
1314  }
1315  }
1316 
1317  return MatchAddressBase(N, AM);
1318 }
1319 
1320 /// MatchAddressBase - Helper for MatchAddress. Add the specified node to the
1321 /// specified addressing mode without any further recursion.
1322 bool X86DAGToDAGISel::MatchAddressBase(SDValue N, X86ISelAddressMode &AM) {
1323  // Is the base register already occupied?
1324  if (AM.BaseType != X86ISelAddressMode::RegBase || AM.Base_Reg.getNode()) {
1325  // If so, check to see if the scale index register is set.
1326  if (!AM.IndexReg.getNode()) {
1327  AM.IndexReg = N;
1328  AM.Scale = 1;
1329  return false;
1330  }
1331 
1332  // Otherwise, we cannot select it.
1333  return true;
1334  }
1335 
1336  // Default, generate it as a register.
1337  AM.BaseType = X86ISelAddressMode::RegBase;
1338  AM.Base_Reg = N;
1339  return false;
1340 }
1341 
1342 bool X86DAGToDAGISel::SelectVectorAddr(SDNode *Parent, SDValue N, SDValue &Base,
1343  SDValue &Scale, SDValue &Index,
1344  SDValue &Disp, SDValue &Segment) {
1345 
1347  if (!Mgs)
1348  return false;
1349  X86ISelAddressMode AM;
1350  unsigned AddrSpace = Mgs->getPointerInfo().getAddrSpace();
1351  // AddrSpace 256 -> GS, 257 -> FS.
1352  if (AddrSpace == 256)
1353  AM.Segment = CurDAG->getRegister(X86::GS, MVT::i16);
1354  if (AddrSpace == 257)
1355  AM.Segment = CurDAG->getRegister(X86::FS, MVT::i16);
1356 
1357  SDLoc DL(N);
1358  Base = Mgs->getBasePtr();
1359  Index = Mgs->getIndex();
1360  unsigned ScalarSize = Mgs->getValue().getValueType().getScalarSizeInBits();
1361  Scale = getI8Imm(ScalarSize/8, DL);
1362 
1363  // If Base is 0, the whole address is in index and the Scale is 1
1364  if (isa<ConstantSDNode>(Base)) {
1365  assert(dyn_cast<ConstantSDNode>(Base)->isNullValue() &&
1366  "Unexpected base in gather/scatter");
1367  Scale = getI8Imm(1, DL);
1368  Base = CurDAG->getRegister(0, MVT::i32);
1369  }
1370  if (AM.Segment.getNode())
1371  Segment = AM.Segment;
1372  else
1373  Segment = CurDAG->getRegister(0, MVT::i32);
1374  Disp = CurDAG->getTargetConstant(0, DL, MVT::i32);
1375  return true;
1376 }
1377 
1378 /// SelectAddr - returns true if it is able pattern match an addressing mode.
1379 /// It returns the operands which make up the maximal addressing mode it can
1380 /// match by reference.
1381 ///
1382 /// Parent is the parent node of the addr operand that is being matched. It
1383 /// is always a load, store, atomic node, or null. It is only null when
1384 /// checking memory operands for inline asm nodes.
1385 bool X86DAGToDAGISel::SelectAddr(SDNode *Parent, SDValue N, SDValue &Base,
1386  SDValue &Scale, SDValue &Index,
1387  SDValue &Disp, SDValue &Segment) {
1388  X86ISelAddressMode AM;
1389 
1390  if (Parent &&
1391  // This list of opcodes are all the nodes that have an "addr:$ptr" operand
1392  // that are not a MemSDNode, and thus don't have proper addrspace info.
1393  Parent->getOpcode() != ISD::INTRINSIC_W_CHAIN && // unaligned loads, fixme
1394  Parent->getOpcode() != ISD::INTRINSIC_VOID && // nontemporal stores
1395  Parent->getOpcode() != X86ISD::TLSCALL && // Fixme
1396  Parent->getOpcode() != X86ISD::EH_SJLJ_SETJMP && // setjmp
1397  Parent->getOpcode() != X86ISD::EH_SJLJ_LONGJMP) { // longjmp
1398  unsigned AddrSpace =
1399  cast<MemSDNode>(Parent)->getPointerInfo().getAddrSpace();
1400  // AddrSpace 256 -> GS, 257 -> FS.
1401  if (AddrSpace == 256)
1402  AM.Segment = CurDAG->getRegister(X86::GS, MVT::i16);
1403  if (AddrSpace == 257)
1404  AM.Segment = CurDAG->getRegister(X86::FS, MVT::i16);
1405  }
1406 
1407  if (MatchAddress(N, AM))
1408  return false;
1409 
1410  MVT VT = N.getSimpleValueType();
1411  if (AM.BaseType == X86ISelAddressMode::RegBase) {
1412  if (!AM.Base_Reg.getNode())
1413  AM.Base_Reg = CurDAG->getRegister(0, VT);
1414  }
1415 
1416  if (!AM.IndexReg.getNode())
1417  AM.IndexReg = CurDAG->getRegister(0, VT);
1418 
1419  getAddressOperands(AM, SDLoc(N), Base, Scale, Index, Disp, Segment);
1420  return true;
1421 }
1422 
1423 /// SelectScalarSSELoad - Match a scalar SSE load. In particular, we want to
1424 /// match a load whose top elements are either undef or zeros. The load flavor
1425 /// is derived from the type of N, which is either v4f32 or v2f64.
1426 ///
1427 /// We also return:
1428 /// PatternChainNode: this is the matched node that has a chain input and
1429 /// output.
1430 bool X86DAGToDAGISel::SelectScalarSSELoad(SDNode *Root,
1431  SDValue N, SDValue &Base,
1432  SDValue &Scale, SDValue &Index,
1433  SDValue &Disp, SDValue &Segment,
1434  SDValue &PatternNodeWithChain) {
1435  if (N.getOpcode() == ISD::SCALAR_TO_VECTOR) {
1436  PatternNodeWithChain = N.getOperand(0);
1437  if (ISD::isNON_EXTLoad(PatternNodeWithChain.getNode()) &&
1438  PatternNodeWithChain.hasOneUse() &&
1439  IsProfitableToFold(N.getOperand(0), N.getNode(), Root) &&
1440  IsLegalToFold(N.getOperand(0), N.getNode(), Root, OptLevel)) {
1441  LoadSDNode *LD = cast<LoadSDNode>(PatternNodeWithChain);
1442  if (!SelectAddr(LD, LD->getBasePtr(), Base, Scale, Index, Disp, Segment))
1443  return false;
1444  return true;
1445  }
1446  }
1447 
1448  // Also handle the case where we explicitly require zeros in the top
1449  // elements. This is a vector shuffle from the zero vector.
1450  if (N.getOpcode() == X86ISD::VZEXT_MOVL && N.getNode()->hasOneUse() &&
1451  // Check to see if the top elements are all zeros (or bitcast of zeros).
1453  N.getOperand(0).getNode()->hasOneUse() &&
1455  N.getOperand(0).getOperand(0).hasOneUse() &&
1456  IsProfitableToFold(N.getOperand(0), N.getNode(), Root) &&
1457  IsLegalToFold(N.getOperand(0), N.getNode(), Root, OptLevel)) {
1458  // Okay, this is a zero extending load. Fold it.
1459  LoadSDNode *LD = cast<LoadSDNode>(N.getOperand(0).getOperand(0));
1460  if (!SelectAddr(LD, LD->getBasePtr(), Base, Scale, Index, Disp, Segment))
1461  return false;
1462  PatternNodeWithChain = SDValue(LD, 0);
1463  return true;
1464  }
1465  return false;
1466 }
1467 
1468 
1469 bool X86DAGToDAGISel::SelectMOV64Imm32(SDValue N, SDValue &Imm) {
1470  if (const ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) {
1471  uint64_t ImmVal = CN->getZExtValue();
1472  if ((uint32_t)ImmVal != (uint64_t)ImmVal)
1473  return false;
1474 
1475  Imm = CurDAG->getTargetConstant(ImmVal, SDLoc(N), MVT::i64);
1476  return true;
1477  }
1478 
1479  // In static codegen with small code model, we can get the address of a label
1480  // into a register with 'movl'. TableGen has already made sure we're looking
1481  // at a label of some kind.
1482  assert(N->getOpcode() == X86ISD::Wrapper &&
1483  "Unexpected node type for MOV32ri64");
1484  N = N.getOperand(0);
1485 
1486  if (N->getOpcode() != ISD::TargetConstantPool &&
1487  N->getOpcode() != ISD::TargetJumpTable &&
1488  N->getOpcode() != ISD::TargetGlobalAddress &&
1489  N->getOpcode() != ISD::TargetExternalSymbol &&
1490  N->getOpcode() != ISD::MCSymbol &&
1491  N->getOpcode() != ISD::TargetBlockAddress)
1492  return false;
1493 
1494  Imm = N;
1495  return TM.getCodeModel() == CodeModel::Small;
1496 }
1497 
1498 bool X86DAGToDAGISel::SelectLEA64_32Addr(SDValue N, SDValue &Base,
1499  SDValue &Scale, SDValue &Index,
1500  SDValue &Disp, SDValue &Segment) {
1501  if (!SelectLEAAddr(N, Base, Scale, Index, Disp, Segment))
1502  return false;
1503 
1504  SDLoc DL(N);
1506  if (RN && RN->getReg() == 0)
1507  Base = CurDAG->getRegister(0, MVT::i64);
1508  else if (Base.getValueType() == MVT::i32 && !dyn_cast<FrameIndexSDNode>(Base)) {
1509  // Base could already be %rip, particularly in the x32 ABI.
1510  Base = SDValue(CurDAG->getMachineNode(
1512  CurDAG->getTargetConstant(0, DL, MVT::i64),
1513  Base,
1514  CurDAG->getTargetConstant(X86::sub_32bit, DL, MVT::i32)),
1515  0);
1516  }
1517 
1518  RN = dyn_cast<RegisterSDNode>(Index);
1519  if (RN && RN->getReg() == 0)
1520  Index = CurDAG->getRegister(0, MVT::i64);
1521  else {
1522  assert(Index.getValueType() == MVT::i32 &&
1523  "Expect to be extending 32-bit registers for use in LEA");
1524  Index = SDValue(CurDAG->getMachineNode(
1526  CurDAG->getTargetConstant(0, DL, MVT::i64),
1527  Index,
1528  CurDAG->getTargetConstant(X86::sub_32bit, DL,
1529  MVT::i32)),
1530  0);
1531  }
1532 
1533  return true;
1534 }
1535 
1536 /// SelectLEAAddr - it calls SelectAddr and determines if the maximal addressing
1537 /// mode it matches can be cost effectively emitted as an LEA instruction.
1538 bool X86DAGToDAGISel::SelectLEAAddr(SDValue N,
1539  SDValue &Base, SDValue &Scale,
1540  SDValue &Index, SDValue &Disp,
1541  SDValue &Segment) {
1542  X86ISelAddressMode AM;
1543 
1544  // Set AM.Segment to prevent MatchAddress from using one. LEA doesn't support
1545  // segments.
1546  SDValue Copy = AM.Segment;
1547  SDValue T = CurDAG->getRegister(0, MVT::i32);
1548  AM.Segment = T;
1549  if (MatchAddress(N, AM))
1550  return false;
1551  assert (T == AM.Segment);
1552  AM.Segment = Copy;
1553 
1554  MVT VT = N.getSimpleValueType();
1555  unsigned Complexity = 0;
1556  if (AM.BaseType == X86ISelAddressMode::RegBase)
1557  if (AM.Base_Reg.getNode())
1558  Complexity = 1;
1559  else
1560  AM.Base_Reg = CurDAG->getRegister(0, VT);
1561  else if (AM.BaseType == X86ISelAddressMode::FrameIndexBase)
1562  Complexity = 4;
1563 
1564  if (AM.IndexReg.getNode())
1565  Complexity++;
1566  else
1567  AM.IndexReg = CurDAG->getRegister(0, VT);
1568 
1569  // Don't match just leal(,%reg,2). It's cheaper to do addl %reg, %reg, or with
1570  // a simple shift.
1571  if (AM.Scale > 1)
1572  Complexity++;
1573 
1574  // FIXME: We are artificially lowering the criteria to turn ADD %reg, $GA
1575  // to a LEA. This is determined with some expermentation but is by no means
1576  // optimal (especially for code size consideration). LEA is nice because of
1577  // its three-address nature. Tweak the cost function again when we can run
1578  // convertToThreeAddress() at register allocation time.
1579  if (AM.hasSymbolicDisplacement()) {
1580  // For X86-64, we should always use lea to materialize RIP relative
1581  // addresses.
1582  if (Subtarget->is64Bit())
1583  Complexity = 4;
1584  else
1585  Complexity += 2;
1586  }
1587 
1588  if (AM.Disp && (AM.Base_Reg.getNode() || AM.IndexReg.getNode()))
1589  Complexity++;
1590 
1591  // If it isn't worth using an LEA, reject it.
1592  if (Complexity <= 2)
1593  return false;
1594 
1595  getAddressOperands(AM, SDLoc(N), Base, Scale, Index, Disp, Segment);
1596  return true;
1597 }
1598 
1599 /// SelectTLSADDRAddr - This is only run on TargetGlobalTLSAddress nodes.
1600 bool X86DAGToDAGISel::SelectTLSADDRAddr(SDValue N, SDValue &Base,
1601  SDValue &Scale, SDValue &Index,
1602  SDValue &Disp, SDValue &Segment) {
1603  assert(N.getOpcode() == ISD::TargetGlobalTLSAddress);
1604  const GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(N);
1605 
1606  X86ISelAddressMode AM;
1607  AM.GV = GA->getGlobal();
1608  AM.Disp += GA->getOffset();
1609  AM.Base_Reg = CurDAG->getRegister(0, N.getValueType());
1610  AM.SymbolFlags = GA->getTargetFlags();
1611 
1612  if (N.getValueType() == MVT::i32) {
1613  AM.Scale = 1;
1614  AM.IndexReg = CurDAG->getRegister(X86::EBX, MVT::i32);
1615  } else {
1616  AM.IndexReg = CurDAG->getRegister(0, MVT::i64);
1617  }
1618 
1619  getAddressOperands(AM, SDLoc(N), Base, Scale, Index, Disp, Segment);
1620  return true;
1621 }
1622 
1623 
1624 bool X86DAGToDAGISel::TryFoldLoad(SDNode *P, SDValue N,
1625  SDValue &Base, SDValue &Scale,
1626  SDValue &Index, SDValue &Disp,
1627  SDValue &Segment) {
1628  if (!ISD::isNON_EXTLoad(N.getNode()) ||
1629  !IsProfitableToFold(N, P, P) ||
1630  !IsLegalToFold(N, P, P, OptLevel))
1631  return false;
1632 
1633  return SelectAddr(N.getNode(),
1634  N.getOperand(1), Base, Scale, Index, Disp, Segment);
1635 }
1636 
1637 /// getGlobalBaseReg - Return an SDNode that returns the value of
1638 /// the global base register. Output instructions required to
1639 /// initialize the global base register, if necessary.
1640 ///
1641 SDNode *X86DAGToDAGISel::getGlobalBaseReg() {
1642  unsigned GlobalBaseReg = getInstrInfo()->getGlobalBaseReg(MF);
1643  auto &DL = MF->getDataLayout();
1644  return CurDAG->getRegister(GlobalBaseReg, TLI->getPointerTy(DL)).getNode();
1645 }
1646 
1647 /// Atomic opcode table
1648 ///
1658 };
1659 
1660 enum AtomicSz {
1673 };
1674 
1675 static const uint16_t AtomicOpcTbl[AtomicOpcEnd][AtomicSzEnd] = {
1676  {
1677  X86::LOCK_ADD8mi,
1678  X86::LOCK_ADD8mr,
1679  X86::LOCK_ADD16mi8,
1680  X86::LOCK_ADD16mi,
1681  X86::LOCK_ADD16mr,
1682  X86::LOCK_ADD32mi8,
1683  X86::LOCK_ADD32mi,
1684  X86::LOCK_ADD32mr,
1685  X86::LOCK_ADD64mi8,
1686  X86::LOCK_ADD64mi32,
1687  X86::LOCK_ADD64mr,
1688  },
1689  {
1690  X86::LOCK_SUB8mi,
1691  X86::LOCK_SUB8mr,
1692  X86::LOCK_SUB16mi8,
1693  X86::LOCK_SUB16mi,
1694  X86::LOCK_SUB16mr,
1695  X86::LOCK_SUB32mi8,
1696  X86::LOCK_SUB32mi,
1697  X86::LOCK_SUB32mr,
1698  X86::LOCK_SUB64mi8,
1699  X86::LOCK_SUB64mi32,
1700  X86::LOCK_SUB64mr,
1701  },
1702  {
1703  0,
1704  X86::LOCK_INC8m,
1705  0,
1706  0,
1707  X86::LOCK_INC16m,
1708  0,
1709  0,
1710  X86::LOCK_INC32m,
1711  0,
1712  0,
1713  X86::LOCK_INC64m,
1714  },
1715  {
1716  0,
1717  X86::LOCK_DEC8m,
1718  0,
1719  0,
1720  X86::LOCK_DEC16m,
1721  0,
1722  0,
1723  X86::LOCK_DEC32m,
1724  0,
1725  0,
1726  X86::LOCK_DEC64m,
1727  },
1728  {
1729  X86::LOCK_OR8mi,
1730  X86::LOCK_OR8mr,
1731  X86::LOCK_OR16mi8,
1732  X86::LOCK_OR16mi,
1733  X86::LOCK_OR16mr,
1734  X86::LOCK_OR32mi8,
1735  X86::LOCK_OR32mi,
1736  X86::LOCK_OR32mr,
1737  X86::LOCK_OR64mi8,
1738  X86::LOCK_OR64mi32,
1739  X86::LOCK_OR64mr,
1740  },
1741  {
1742  X86::LOCK_AND8mi,
1743  X86::LOCK_AND8mr,
1744  X86::LOCK_AND16mi8,
1745  X86::LOCK_AND16mi,
1746  X86::LOCK_AND16mr,
1747  X86::LOCK_AND32mi8,
1748  X86::LOCK_AND32mi,
1749  X86::LOCK_AND32mr,
1750  X86::LOCK_AND64mi8,
1751  X86::LOCK_AND64mi32,
1752  X86::LOCK_AND64mr,
1753  },
1754  {
1755  X86::LOCK_XOR8mi,
1756  X86::LOCK_XOR8mr,
1757  X86::LOCK_XOR16mi8,
1758  X86::LOCK_XOR16mi,
1759  X86::LOCK_XOR16mr,
1760  X86::LOCK_XOR32mi8,
1761  X86::LOCK_XOR32mi,
1762  X86::LOCK_XOR32mr,
1763  X86::LOCK_XOR64mi8,
1764  X86::LOCK_XOR64mi32,
1765  X86::LOCK_XOR64mr,
1766  }
1767 };
1768 
1769 // Return the target constant operand for atomic-load-op and do simple
1770 // translations, such as from atomic-load-add to lock-sub. The return value is
1771 // one of the following 3 cases:
1772 // + target-constant, the operand could be supported as a target constant.
1773 // + empty, the operand is not needed any more with the new op selected.
1774 // + non-empty, otherwise.
1776  SDLoc dl,
1777  enum AtomicOpc &Op, MVT NVT,
1778  SDValue Val,
1779  const X86Subtarget *Subtarget) {
1780  if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Val)) {
1781  int64_t CNVal = CN->getSExtValue();
1782  // Quit if not 32-bit imm.
1783  if ((int32_t)CNVal != CNVal)
1784  return Val;
1785  // Quit if INT32_MIN: it would be negated as it is negative and overflow,
1786  // producing an immediate that does not fit in the 32 bits available for
1787  // an immediate operand to sub. However, it still fits in 32 bits for the
1788  // add (since it is not negated) so we can return target-constant.
1789  if (CNVal == INT32_MIN)
1790  return CurDAG->getTargetConstant(CNVal, dl, NVT);
1791  // For atomic-load-add, we could do some optimizations.
1792  if (Op == ADD) {
1793  // Translate to INC/DEC if ADD by 1 or -1.
1794  if (((CNVal == 1) || (CNVal == -1)) && !Subtarget->slowIncDec()) {
1795  Op = (CNVal == 1) ? INC : DEC;
1796  // No more constant operand after being translated into INC/DEC.
1797  return SDValue();
1798  }
1799  // Translate to SUB if ADD by negative value.
1800  if (CNVal < 0) {
1801  Op = SUB;
1802  CNVal = -CNVal;
1803  }
1804  }
1805  return CurDAG->getTargetConstant(CNVal, dl, NVT);
1806  }
1807 
1808  // If the value operand is single-used, try to optimize it.
1809  if (Op == ADD && Val.hasOneUse()) {
1810  // Translate (atomic-load-add ptr (sub 0 x)) back to (lock-sub x).
1811  if (Val.getOpcode() == ISD::SUB && X86::isZeroNode(Val.getOperand(0))) {
1812  Op = SUB;
1813  return Val.getOperand(1);
1814  }
1815  // A special case for i16, which needs truncating as, in most cases, it's
1816  // promoted to i32. We will translate
1817  // (atomic-load-add (truncate (sub 0 x))) to (lock-sub (EXTRACT_SUBREG x))
1818  if (Val.getOpcode() == ISD::TRUNCATE && NVT == MVT::i16 &&
1819  Val.getOperand(0).getOpcode() == ISD::SUB &&
1820  X86::isZeroNode(Val.getOperand(0).getOperand(0))) {
1821  Op = SUB;
1822  Val = Val.getOperand(0);
1823  return CurDAG->getTargetExtractSubreg(X86::sub_16bit, dl, NVT,
1824  Val.getOperand(1));
1825  }
1826  }
1827 
1828  return Val;
1829 }
1830 
1831 SDNode *X86DAGToDAGISel::SelectAtomicLoadArith(SDNode *Node, MVT NVT) {
1832  if (Node->hasAnyUseOfValue(0))
1833  return nullptr;
1834 
1835  SDLoc dl(Node);
1836 
1837  // Optimize common patterns for __sync_or_and_fetch and similar arith
1838  // operations where the result is not used. This allows us to use the "lock"
1839  // version of the arithmetic instruction.
1840  SDValue Chain = Node->getOperand(0);
1841  SDValue Ptr = Node->getOperand(1);
1842  SDValue Val = Node->getOperand(2);
1843  SDValue Base, Scale, Index, Disp, Segment;
1844  if (!SelectAddr(Node, Ptr, Base, Scale, Index, Disp, Segment))
1845  return nullptr;
1846 
1847  // Which index into the table.
1848  enum AtomicOpc Op;
1849  switch (Node->getOpcode()) {
1850  default:
1851  return nullptr;
1852  case ISD::ATOMIC_LOAD_OR:
1853  Op = OR;
1854  break;
1855  case ISD::ATOMIC_LOAD_AND:
1856  Op = AND;
1857  break;
1858  case ISD::ATOMIC_LOAD_XOR:
1859  Op = XOR;
1860  break;
1861  case ISD::ATOMIC_LOAD_ADD:
1862  Op = ADD;
1863  break;
1864  }
1865 
1866  Val = getAtomicLoadArithTargetConstant(CurDAG, dl, Op, NVT, Val, Subtarget);
1867  bool isUnOp = !Val.getNode();
1868  bool isCN = Val.getNode() && (Val.getOpcode() == ISD::TargetConstant);
1869 
1870  unsigned Opc = 0;
1871  switch (NVT.SimpleTy) {
1872  default: return nullptr;
1873  case MVT::i8:
1874  if (isCN)
1875  Opc = AtomicOpcTbl[Op][ConstantI8];
1876  else
1877  Opc = AtomicOpcTbl[Op][I8];
1878  break;
1879  case MVT::i16:
1880  if (isCN) {
1881  if (immSext8(Val.getNode()))
1882  Opc = AtomicOpcTbl[Op][SextConstantI16];
1883  else
1884  Opc = AtomicOpcTbl[Op][ConstantI16];
1885  } else
1886  Opc = AtomicOpcTbl[Op][I16];
1887  break;
1888  case MVT::i32:
1889  if (isCN) {
1890  if (immSext8(Val.getNode()))
1891  Opc = AtomicOpcTbl[Op][SextConstantI32];
1892  else
1893  Opc = AtomicOpcTbl[Op][ConstantI32];
1894  } else
1895  Opc = AtomicOpcTbl[Op][I32];
1896  break;
1897  case MVT::i64:
1898  if (isCN) {
1899  if (immSext8(Val.getNode()))
1900  Opc = AtomicOpcTbl[Op][SextConstantI64];
1901  else if (i64immSExt32(Val.getNode()))
1902  Opc = AtomicOpcTbl[Op][ConstantI64];
1903  else
1904  llvm_unreachable("True 64 bits constant in SelectAtomicLoadArith");
1905  } else
1906  Opc = AtomicOpcTbl[Op][I64];
1907  break;
1908  }
1909 
1910  assert(Opc != 0 && "Invalid arith lock transform!");
1911 
1912  // Building the new node.
1913  SDValue Ret;
1914  if (isUnOp) {
1915  SDValue Ops[] = { Base, Scale, Index, Disp, Segment, Chain };
1916  Ret = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops), 0);
1917  } else {
1918  SDValue Ops[] = { Base, Scale, Index, Disp, Segment, Val, Chain };
1919  Ret = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops), 0);
1920  }
1921 
1922  // Copying the MachineMemOperand.
1923  MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
1924  MemOp[0] = cast<MemSDNode>(Node)->getMemOperand();
1925  cast<MachineSDNode>(Ret)->setMemRefs(MemOp, MemOp + 1);
1926 
1927  // We need to have two outputs as that is what the original instruction had.
1928  // So we add a dummy, undefined output. This is safe as we checked first
1929  // that no-one uses our output anyway.
1930  SDValue Undef = SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,
1931  dl, NVT), 0);
1932  SDValue RetVals[] = { Undef, Ret };
1933  return CurDAG->getMergeValues(RetVals, dl).getNode();
1934 }
1935 
1936 /// HasNoSignedComparisonUses - Test whether the given X86ISD::CMP node has
1937 /// any uses which require the SF or OF bits to be accurate.
1939  // Examine each user of the node.
1940  for (SDNode::use_iterator UI = N->use_begin(),
1941  UE = N->use_end(); UI != UE; ++UI) {
1942  // Only examine CopyToReg uses.
1943  if (UI->getOpcode() != ISD::CopyToReg)
1944  return false;
1945  // Only examine CopyToReg uses that copy to EFLAGS.
1946  if (cast<RegisterSDNode>(UI->getOperand(1))->getReg() !=
1947  X86::EFLAGS)
1948  return false;
1949  // Examine each user of the CopyToReg use.
1950  for (SDNode::use_iterator FlagUI = UI->use_begin(),
1951  FlagUE = UI->use_end(); FlagUI != FlagUE; ++FlagUI) {
1952  // Only examine the Flag result.
1953  if (FlagUI.getUse().getResNo() != 1) continue;
1954  // Anything unusual: assume conservatively.
1955  if (!FlagUI->isMachineOpcode()) return false;
1956  // Examine the opcode of the user.
1957  switch (FlagUI->getMachineOpcode()) {
1958  // These comparisons don't treat the most significant bit specially.
1959  case X86::SETAr: case X86::SETAEr: case X86::SETBr: case X86::SETBEr:
1960  case X86::SETEr: case X86::SETNEr: case X86::SETPr: case X86::SETNPr:
1961  case X86::SETAm: case X86::SETAEm: case X86::SETBm: case X86::SETBEm:
1962  case X86::SETEm: case X86::SETNEm: case X86::SETPm: case X86::SETNPm:
1963  case X86::JA_1: case X86::JAE_1: case X86::JB_1: case X86::JBE_1:
1964  case X86::JE_1: case X86::JNE_1: case X86::JP_1: case X86::JNP_1:
1965  case X86::CMOVA16rr: case X86::CMOVA16rm:
1966  case X86::CMOVA32rr: case X86::CMOVA32rm:
1967  case X86::CMOVA64rr: case X86::CMOVA64rm:
1968  case X86::CMOVAE16rr: case X86::CMOVAE16rm:
1969  case X86::CMOVAE32rr: case X86::CMOVAE32rm:
1970  case X86::CMOVAE64rr: case X86::CMOVAE64rm:
1971  case X86::CMOVB16rr: case X86::CMOVB16rm:
1972  case X86::CMOVB32rr: case X86::CMOVB32rm:
1973  case X86::CMOVB64rr: case X86::CMOVB64rm:
1974  case X86::CMOVBE16rr: case X86::CMOVBE16rm:
1975  case X86::CMOVBE32rr: case X86::CMOVBE32rm:
1976  case X86::CMOVBE64rr: case X86::CMOVBE64rm:
1977  case X86::CMOVE16rr: case X86::CMOVE16rm:
1978  case X86::CMOVE32rr: case X86::CMOVE32rm:
1979  case X86::CMOVE64rr: case X86::CMOVE64rm:
1980  case X86::CMOVNE16rr: case X86::CMOVNE16rm:
1981  case X86::CMOVNE32rr: case X86::CMOVNE32rm:
1982  case X86::CMOVNE64rr: case X86::CMOVNE64rm:
1983  case X86::CMOVNP16rr: case X86::CMOVNP16rm:
1984  case X86::CMOVNP32rr: case X86::CMOVNP32rm:
1985  case X86::CMOVNP64rr: case X86::CMOVNP64rm:
1986  case X86::CMOVP16rr: case X86::CMOVP16rm:
1987  case X86::CMOVP32rr: case X86::CMOVP32rm:
1988  case X86::CMOVP64rr: case X86::CMOVP64rm:
1989  continue;
1990  // Anything else: assume conservatively.
1991  default: return false;
1992  }
1993  }
1994  }
1995  return true;
1996 }
1997 
1998 /// isLoadIncOrDecStore - Check whether or not the chain ending in StoreNode
1999 /// is suitable for doing the {load; increment or decrement; store} to modify
2000 /// transformation.
2001 static bool isLoadIncOrDecStore(StoreSDNode *StoreNode, unsigned Opc,
2002  SDValue StoredVal, SelectionDAG *CurDAG,
2003  LoadSDNode* &LoadNode, SDValue &InputChain) {
2004 
2005  // is the value stored the result of a DEC or INC?
2006  if (!(Opc == X86ISD::DEC || Opc == X86ISD::INC)) return false;
2007 
2008  // is the stored value result 0 of the load?
2009  if (StoredVal.getResNo() != 0) return false;
2010 
2011  // are there other uses of the loaded value than the inc or dec?
2012  if (!StoredVal.getNode()->hasNUsesOfValue(1, 0)) return false;
2013 
2014  // is the store non-extending and non-indexed?
2015  if (!ISD::isNormalStore(StoreNode) || StoreNode->isNonTemporal())
2016  return false;
2017 
2018  SDValue Load = StoredVal->getOperand(0);
2019  // Is the stored value a non-extending and non-indexed load?
2020  if (!ISD::isNormalLoad(Load.getNode())) return false;
2021 
2022  // Return LoadNode by reference.
2023  LoadNode = cast<LoadSDNode>(Load);
2024  // is the size of the value one that we can handle? (i.e. 64, 32, 16, or 8)
2025  EVT LdVT = LoadNode->getMemoryVT();
2026  if (LdVT != MVT::i64 && LdVT != MVT::i32 && LdVT != MVT::i16 &&
2027  LdVT != MVT::i8)
2028  return false;
2029 
2030  // Is store the only read of the loaded value?
2031  if (!Load.hasOneUse())
2032  return false;
2033 
2034  // Is the address of the store the same as the load?
2035  if (LoadNode->getBasePtr() != StoreNode->getBasePtr() ||
2036  LoadNode->getOffset() != StoreNode->getOffset())
2037  return false;
2038 
2039  // Check if the chain is produced by the load or is a TokenFactor with
2040  // the load output chain as an operand. Return InputChain by reference.
2041  SDValue Chain = StoreNode->getChain();
2042 
2043  bool ChainCheck = false;
2044  if (Chain == Load.getValue(1)) {
2045  ChainCheck = true;
2046  InputChain = LoadNode->getChain();
2047  } else if (Chain.getOpcode() == ISD::TokenFactor) {
2048  SmallVector<SDValue, 4> ChainOps;
2049  for (unsigned i = 0, e = Chain.getNumOperands(); i != e; ++i) {
2050  SDValue Op = Chain.getOperand(i);
2051  if (Op == Load.getValue(1)) {
2052  ChainCheck = true;
2053  continue;
2054  }
2055 
2056  // Make sure using Op as part of the chain would not cause a cycle here.
2057  // In theory, we could check whether the chain node is a predecessor of
2058  // the load. But that can be very expensive. Instead visit the uses and
2059  // make sure they all have smaller node id than the load.
2060  int LoadId = LoadNode->getNodeId();
2061  for (SDNode::use_iterator UI = Op.getNode()->use_begin(),
2062  UE = UI->use_end(); UI != UE; ++UI) {
2063  if (UI.getUse().getResNo() != 0)
2064  continue;
2065  if (UI->getNodeId() > LoadId)
2066  return false;
2067  }
2068 
2069  ChainOps.push_back(Op);
2070  }
2071 
2072  if (ChainCheck)
2073  // Make a new TokenFactor with all the other input chains except
2074  // for the load.
2075  InputChain = CurDAG->getNode(ISD::TokenFactor, SDLoc(Chain),
2076  MVT::Other, ChainOps);
2077  }
2078  if (!ChainCheck)
2079  return false;
2080 
2081  return true;
2082 }
2083 
2084 /// getFusedLdStOpcode - Get the appropriate X86 opcode for an in memory
2085 /// increment or decrement. Opc should be X86ISD::DEC or X86ISD::INC.
2086 static unsigned getFusedLdStOpcode(EVT &LdVT, unsigned Opc) {
2087  if (Opc == X86ISD::DEC) {
2088  if (LdVT == MVT::i64) return X86::DEC64m;
2089  if (LdVT == MVT::i32) return X86::DEC32m;
2090  if (LdVT == MVT::i16) return X86::DEC16m;
2091  if (LdVT == MVT::i8) return X86::DEC8m;
2092  } else {
2093  assert(Opc == X86ISD::INC && "unrecognized opcode");
2094  if (LdVT == MVT::i64) return X86::INC64m;
2095  if (LdVT == MVT::i32) return X86::INC32m;
2096  if (LdVT == MVT::i16) return X86::INC16m;
2097  if (LdVT == MVT::i8) return X86::INC8m;
2098  }
2099  llvm_unreachable("unrecognized size for LdVT");
2100 }
2101 
2102 /// SelectGather - Customized ISel for GATHER operations.
2103 ///
2104 SDNode *X86DAGToDAGISel::SelectGather(SDNode *Node, unsigned Opc) {
2105  // Operands of Gather: VSrc, Base, VIdx, VMask, Scale
2106  SDValue Chain = Node->getOperand(0);
2107  SDValue VSrc = Node->getOperand(2);
2108  SDValue Base = Node->getOperand(3);
2109  SDValue VIdx = Node->getOperand(4);
2110  SDValue VMask = Node->getOperand(5);
2111  ConstantSDNode *Scale = dyn_cast<ConstantSDNode>(Node->getOperand(6));
2112  if (!Scale)
2113  return nullptr;
2114 
2115  SDVTList VTs = CurDAG->getVTList(VSrc.getValueType(), VSrc.getValueType(),
2116  MVT::Other);
2117 
2118  SDLoc DL(Node);
2119 
2120  // Memory Operands: Base, Scale, Index, Disp, Segment
2121  SDValue Disp = CurDAG->getTargetConstant(0, DL, MVT::i32);
2122  SDValue Segment = CurDAG->getRegister(0, MVT::i32);
2123  const SDValue Ops[] = { VSrc, Base, getI8Imm(Scale->getSExtValue(), DL), VIdx,
2124  Disp, Segment, VMask, Chain};
2125  SDNode *ResNode = CurDAG->getMachineNode(Opc, DL, VTs, Ops);
2126  // Node has 2 outputs: VDst and MVT::Other.
2127  // ResNode has 3 outputs: VDst, VMask_wb, and MVT::Other.
2128  // We replace VDst of Node with VDst of ResNode, and Other of Node with Other
2129  // of ResNode.
2130  ReplaceUses(SDValue(Node, 0), SDValue(ResNode, 0));
2131  ReplaceUses(SDValue(Node, 1), SDValue(ResNode, 2));
2132  return ResNode;
2133 }
2134 
2136  MVT NVT = Node->getSimpleValueType(0);
2137  unsigned Opc, MOpc;
2138  unsigned Opcode = Node->getOpcode();
2139  SDLoc dl(Node);
2140 
2141  DEBUG(dbgs() << "Selecting: "; Node->dump(CurDAG); dbgs() << '\n');
2142 
2143  if (Node->isMachineOpcode()) {
2144  DEBUG(dbgs() << "== "; Node->dump(CurDAG); dbgs() << '\n');
2145  Node->setNodeId(-1);
2146  return nullptr; // Already selected.
2147  }
2148 
2149  switch (Opcode) {
2150  default: break;
2151  case ISD::INTRINSIC_W_CHAIN: {
2152  unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue();
2153  switch (IntNo) {
2154  default: break;
2155  case Intrinsic::x86_avx2_gather_d_pd:
2156  case Intrinsic::x86_avx2_gather_d_pd_256:
2157  case Intrinsic::x86_avx2_gather_q_pd:
2158  case Intrinsic::x86_avx2_gather_q_pd_256:
2159  case Intrinsic::x86_avx2_gather_d_ps:
2160  case Intrinsic::x86_avx2_gather_d_ps_256:
2161  case Intrinsic::x86_avx2_gather_q_ps:
2162  case Intrinsic::x86_avx2_gather_q_ps_256:
2163  case Intrinsic::x86_avx2_gather_d_q:
2164  case Intrinsic::x86_avx2_gather_d_q_256:
2165  case Intrinsic::x86_avx2_gather_q_q:
2166  case Intrinsic::x86_avx2_gather_q_q_256:
2167  case Intrinsic::x86_avx2_gather_d_d:
2168  case Intrinsic::x86_avx2_gather_d_d_256:
2169  case Intrinsic::x86_avx2_gather_q_d:
2170  case Intrinsic::x86_avx2_gather_q_d_256: {
2171  if (!Subtarget->hasAVX2())
2172  break;
2173  unsigned Opc;
2174  switch (IntNo) {
2175  default: llvm_unreachable("Impossible intrinsic");
2176  case Intrinsic::x86_avx2_gather_d_pd: Opc = X86::VGATHERDPDrm; break;
2177  case Intrinsic::x86_avx2_gather_d_pd_256: Opc = X86::VGATHERDPDYrm; break;
2178  case Intrinsic::x86_avx2_gather_q_pd: Opc = X86::VGATHERQPDrm; break;
2179  case Intrinsic::x86_avx2_gather_q_pd_256: Opc = X86::VGATHERQPDYrm; break;
2180  case Intrinsic::x86_avx2_gather_d_ps: Opc = X86::VGATHERDPSrm; break;
2181  case Intrinsic::x86_avx2_gather_d_ps_256: Opc = X86::VGATHERDPSYrm; break;
2182  case Intrinsic::x86_avx2_gather_q_ps: Opc = X86::VGATHERQPSrm; break;
2183  case Intrinsic::x86_avx2_gather_q_ps_256: Opc = X86::VGATHERQPSYrm; break;
2184  case Intrinsic::x86_avx2_gather_d_q: Opc = X86::VPGATHERDQrm; break;
2185  case Intrinsic::x86_avx2_gather_d_q_256: Opc = X86::VPGATHERDQYrm; break;
2186  case Intrinsic::x86_avx2_gather_q_q: Opc = X86::VPGATHERQQrm; break;
2187  case Intrinsic::x86_avx2_gather_q_q_256: Opc = X86::VPGATHERQQYrm; break;
2188  case Intrinsic::x86_avx2_gather_d_d: Opc = X86::VPGATHERDDrm; break;
2189  case Intrinsic::x86_avx2_gather_d_d_256: Opc = X86::VPGATHERDDYrm; break;
2190  case Intrinsic::x86_avx2_gather_q_d: Opc = X86::VPGATHERQDrm; break;
2191  case Intrinsic::x86_avx2_gather_q_d_256: Opc = X86::VPGATHERQDYrm; break;
2192  }
2193  SDNode *RetVal = SelectGather(Node, Opc);
2194  if (RetVal)
2195  // We already called ReplaceUses inside SelectGather.
2196  return nullptr;
2197  break;
2198  }
2199  }
2200  break;
2201  }
2202  case X86ISD::GlobalBaseReg:
2203  return getGlobalBaseReg();
2204 
2205  case X86ISD::SHRUNKBLEND: {
2206  // SHRUNKBLEND selects like a regular VSELECT.
2207  SDValue VSelect = CurDAG->getNode(
2208  ISD::VSELECT, SDLoc(Node), Node->getValueType(0), Node->getOperand(0),
2209  Node->getOperand(1), Node->getOperand(2));
2210  ReplaceUses(SDValue(Node, 0), VSelect);
2211  SelectCode(VSelect.getNode());
2212  // We already called ReplaceUses.
2213  return nullptr;
2214  }
2215 
2216  case ISD::ATOMIC_LOAD_XOR:
2217  case ISD::ATOMIC_LOAD_AND:
2218  case ISD::ATOMIC_LOAD_OR:
2219  case ISD::ATOMIC_LOAD_ADD: {
2220  SDNode *RetVal = SelectAtomicLoadArith(Node, NVT);
2221  if (RetVal)
2222  return RetVal;
2223  break;
2224  }
2225  case ISD::AND:
2226  case ISD::OR:
2227  case ISD::XOR: {
2228  // For operations of the form (x << C1) op C2, check if we can use a smaller
2229  // encoding for C2 by transforming it into (x op (C2>>C1)) << C1.
2230  SDValue N0 = Node->getOperand(0);
2231  SDValue N1 = Node->getOperand(1);
2232 
2233  if (N0->getOpcode() != ISD::SHL || !N0->hasOneUse())
2234  break;
2235 
2236  // i8 is unshrinkable, i16 should be promoted to i32.
2237  if (NVT != MVT::i32 && NVT != MVT::i64)
2238  break;
2239 
2241  ConstantSDNode *ShlCst = dyn_cast<ConstantSDNode>(N0->getOperand(1));
2242  if (!Cst || !ShlCst)
2243  break;
2244 
2245  int64_t Val = Cst->getSExtValue();
2246  uint64_t ShlVal = ShlCst->getZExtValue();
2247 
2248  // Make sure that we don't change the operation by removing bits.
2249  // This only matters for OR and XOR, AND is unaffected.
2250  uint64_t RemovedBitsMask = (1ULL << ShlVal) - 1;
2251  if (Opcode != ISD::AND && (Val & RemovedBitsMask) != 0)
2252  break;
2253 
2254  unsigned ShlOp, AddOp, Op;
2255  MVT CstVT = NVT;
2256 
2257  // Check the minimum bitwidth for the new constant.
2258  // TODO: AND32ri is the same as AND64ri32 with zext imm.
2259  // TODO: MOV32ri+OR64r is cheaper than MOV64ri64+OR64rr
2260  // TODO: Using 16 and 8 bit operations is also possible for or32 & xor32.
2261  if (!isInt<8>(Val) && isInt<8>(Val >> ShlVal))
2262  CstVT = MVT::i8;
2263  else if (!isInt<32>(Val) && isInt<32>(Val >> ShlVal))
2264  CstVT = MVT::i32;
2265 
2266  // Bail if there is no smaller encoding.
2267  if (NVT == CstVT)
2268  break;
2269 
2270  switch (NVT.SimpleTy) {
2271  default: llvm_unreachable("Unsupported VT!");
2272  case MVT::i32:
2273  assert(CstVT == MVT::i8);
2274  ShlOp = X86::SHL32ri;
2275  AddOp = X86::ADD32rr;
2276 
2277  switch (Opcode) {
2278  default: llvm_unreachable("Impossible opcode");
2279  case ISD::AND: Op = X86::AND32ri8; break;
2280  case ISD::OR: Op = X86::OR32ri8; break;
2281  case ISD::XOR: Op = X86::XOR32ri8; break;
2282  }
2283  break;
2284  case MVT::i64:
2285  assert(CstVT == MVT::i8 || CstVT == MVT::i32);
2286  ShlOp = X86::SHL64ri;
2287  AddOp = X86::ADD64rr;
2288 
2289  switch (Opcode) {
2290  default: llvm_unreachable("Impossible opcode");
2291  case ISD::AND: Op = CstVT==MVT::i8? X86::AND64ri8 : X86::AND64ri32; break;
2292  case ISD::OR: Op = CstVT==MVT::i8? X86::OR64ri8 : X86::OR64ri32; break;
2293  case ISD::XOR: Op = CstVT==MVT::i8? X86::XOR64ri8 : X86::XOR64ri32; break;
2294  }
2295  break;
2296  }
2297 
2298  // Emit the smaller op and the shift.
2299  SDValue NewCst = CurDAG->getTargetConstant(Val >> ShlVal, dl, CstVT);
2300  SDNode *New = CurDAG->getMachineNode(Op, dl, NVT, N0->getOperand(0),NewCst);
2301  if (ShlVal == 1)
2302  return CurDAG->SelectNodeTo(Node, AddOp, NVT, SDValue(New, 0),
2303  SDValue(New, 0));
2304  return CurDAG->SelectNodeTo(Node, ShlOp, NVT, SDValue(New, 0),
2305  getI8Imm(ShlVal, dl));
2306  }
2307  case X86ISD::UMUL8:
2308  case X86ISD::SMUL8: {
2309  SDValue N0 = Node->getOperand(0);
2310  SDValue N1 = Node->getOperand(1);
2311 
2312  Opc = (Opcode == X86ISD::SMUL8 ? X86::IMUL8r : X86::MUL8r);
2313 
2314  SDValue InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, X86::AL,
2315  N0, SDValue()).getValue(1);
2316 
2317  SDVTList VTs = CurDAG->getVTList(NVT, MVT::i32);
2318  SDValue Ops[] = {N1, InFlag};
2319  SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops);
2320 
2321  ReplaceUses(SDValue(Node, 0), SDValue(CNode, 0));
2322  ReplaceUses(SDValue(Node, 1), SDValue(CNode, 1));
2323  return nullptr;
2324  }
2325 
2326  case X86ISD::UMUL: {
2327  SDValue N0 = Node->getOperand(0);
2328  SDValue N1 = Node->getOperand(1);
2329 
2330  unsigned LoReg;
2331  switch (NVT.SimpleTy) {
2332  default: llvm_unreachable("Unsupported VT!");
2333  case MVT::i8: LoReg = X86::AL; Opc = X86::MUL8r; break;
2334  case MVT::i16: LoReg = X86::AX; Opc = X86::MUL16r; break;
2335  case MVT::i32: LoReg = X86::EAX; Opc = X86::MUL32r; break;
2336  case MVT::i64: LoReg = X86::RAX; Opc = X86::MUL64r; break;
2337  }
2338 
2339  SDValue InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, LoReg,
2340  N0, SDValue()).getValue(1);
2341 
2342  SDVTList VTs = CurDAG->getVTList(NVT, NVT, MVT::i32);
2343  SDValue Ops[] = {N1, InFlag};
2344  SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops);
2345 
2346  ReplaceUses(SDValue(Node, 0), SDValue(CNode, 0));
2347  ReplaceUses(SDValue(Node, 1), SDValue(CNode, 1));
2348  ReplaceUses(SDValue(Node, 2), SDValue(CNode, 2));
2349  return nullptr;
2350  }
2351 
2352  case ISD::SMUL_LOHI:
2353  case ISD::UMUL_LOHI: {
2354  SDValue N0 = Node->getOperand(0);
2355  SDValue N1 = Node->getOperand(1);
2356 
2357  bool isSigned = Opcode == ISD::SMUL_LOHI;
2358  bool hasBMI2 = Subtarget->hasBMI2();
2359  if (!isSigned) {
2360  switch (NVT.SimpleTy) {
2361  default: llvm_unreachable("Unsupported VT!");
2362  case MVT::i8: Opc = X86::MUL8r; MOpc = X86::MUL8m; break;
2363  case MVT::i16: Opc = X86::MUL16r; MOpc = X86::MUL16m; break;
2364  case MVT::i32: Opc = hasBMI2 ? X86::MULX32rr : X86::MUL32r;
2365  MOpc = hasBMI2 ? X86::MULX32rm : X86::MUL32m; break;
2366  case MVT::i64: Opc = hasBMI2 ? X86::MULX64rr : X86::MUL64r;
2367  MOpc = hasBMI2 ? X86::MULX64rm : X86::MUL64m; break;
2368  }
2369  } else {
2370  switch (NVT.SimpleTy) {
2371  default: llvm_unreachable("Unsupported VT!");
2372  case MVT::i8: Opc = X86::IMUL8r; MOpc = X86::IMUL8m; break;
2373  case MVT::i16: Opc = X86::IMUL16r; MOpc = X86::IMUL16m; break;
2374  case MVT::i32: Opc = X86::IMUL32r; MOpc = X86::IMUL32m; break;
2375  case MVT::i64: Opc = X86::IMUL64r; MOpc = X86::IMUL64m; break;
2376  }
2377  }
2378 
2379  unsigned SrcReg, LoReg, HiReg;
2380  switch (Opc) {
2381  default: llvm_unreachable("Unknown MUL opcode!");
2382  case X86::IMUL8r:
2383  case X86::MUL8r:
2384  SrcReg = LoReg = X86::AL; HiReg = X86::AH;
2385  break;
2386  case X86::IMUL16r:
2387  case X86::MUL16r:
2388  SrcReg = LoReg = X86::AX; HiReg = X86::DX;
2389  break;
2390  case X86::IMUL32r:
2391  case X86::MUL32r:
2392  SrcReg = LoReg = X86::EAX; HiReg = X86::EDX;
2393  break;
2394  case X86::IMUL64r:
2395  case X86::MUL64r:
2396  SrcReg = LoReg = X86::RAX; HiReg = X86::RDX;
2397  break;
2398  case X86::MULX32rr:
2399  SrcReg = X86::EDX; LoReg = HiReg = 0;
2400  break;
2401  case X86::MULX64rr:
2402  SrcReg = X86::RDX; LoReg = HiReg = 0;
2403  break;
2404  }
2405 
2406  SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
2407  bool foldedLoad = TryFoldLoad(Node, N1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4);
2408  // Multiply is commmutative.
2409  if (!foldedLoad) {
2410  foldedLoad = TryFoldLoad(Node, N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4);
2411  if (foldedLoad)
2412  std::swap(N0, N1);
2413  }
2414 
2415  SDValue InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, SrcReg,
2416  N0, SDValue()).getValue(1);
2417  SDValue ResHi, ResLo;
2418 
2419  if (foldedLoad) {
2420  SDValue Chain;
2421  SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N1.getOperand(0),
2422  InFlag };
2423  if (MOpc == X86::MULX32rm || MOpc == X86::MULX64rm) {
2424  SDVTList VTs = CurDAG->getVTList(NVT, NVT, MVT::Other, MVT::Glue);
2425  SDNode *CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops);
2426  ResHi = SDValue(CNode, 0);
2427  ResLo = SDValue(CNode, 1);
2428  Chain = SDValue(CNode, 2);
2429  InFlag = SDValue(CNode, 3);
2430  } else {
2431  SDVTList VTs = CurDAG->getVTList(MVT::Other, MVT::Glue);
2432  SDNode *CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops);
2433  Chain = SDValue(CNode, 0);
2434  InFlag = SDValue(CNode, 1);
2435  }
2436 
2437  // Update the chain.
2438  ReplaceUses(N1.getValue(1), Chain);
2439  } else {
2440  SDValue Ops[] = { N1, InFlag };
2441  if (Opc == X86::MULX32rr || Opc == X86::MULX64rr) {
2442  SDVTList VTs = CurDAG->getVTList(NVT, NVT, MVT::Glue);
2443  SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops);
2444  ResHi = SDValue(CNode, 0);
2445  ResLo = SDValue(CNode, 1);
2446  InFlag = SDValue(CNode, 2);
2447  } else {
2448  SDVTList VTs = CurDAG->getVTList(MVT::Glue);
2449  SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops);
2450  InFlag = SDValue(CNode, 0);
2451  }
2452  }
2453 
2454  // Prevent use of AH in a REX instruction by referencing AX instead.
2455  if (HiReg == X86::AH && Subtarget->is64Bit() &&
2456  !SDValue(Node, 1).use_empty()) {
2457  SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
2458  X86::AX, MVT::i16, InFlag);
2459  InFlag = Result.getValue(2);
2460  // Get the low part if needed. Don't use getCopyFromReg for aliasing
2461  // registers.
2462  if (!SDValue(Node, 0).use_empty())
2463  ReplaceUses(SDValue(Node, 1),
2464  CurDAG->getTargetExtractSubreg(X86::sub_8bit, dl, MVT::i8, Result));
2465 
2466  // Shift AX down 8 bits.
2467  Result = SDValue(CurDAG->getMachineNode(X86::SHR16ri, dl, MVT::i16,
2468  Result,
2469  CurDAG->getTargetConstant(8, dl, MVT::i8)),
2470  0);
2471  // Then truncate it down to i8.
2472  ReplaceUses(SDValue(Node, 1),
2473  CurDAG->getTargetExtractSubreg(X86::sub_8bit, dl, MVT::i8, Result));
2474  }
2475  // Copy the low half of the result, if it is needed.
2476  if (!SDValue(Node, 0).use_empty()) {
2477  if (!ResLo.getNode()) {
2478  assert(LoReg && "Register for low half is not defined!");
2479  ResLo = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, LoReg, NVT,
2480  InFlag);
2481  InFlag = ResLo.getValue(2);
2482  }
2483  ReplaceUses(SDValue(Node, 0), ResLo);
2484  DEBUG(dbgs() << "=> "; ResLo.getNode()->dump(CurDAG); dbgs() << '\n');
2485  }
2486  // Copy the high half of the result, if it is needed.
2487  if (!SDValue(Node, 1).use_empty()) {
2488  if (!ResHi.getNode()) {
2489  assert(HiReg && "Register for high half is not defined!");
2490  ResHi = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, HiReg, NVT,
2491  InFlag);
2492  InFlag = ResHi.getValue(2);
2493  }
2494  ReplaceUses(SDValue(Node, 1), ResHi);
2495  DEBUG(dbgs() << "=> "; ResHi.getNode()->dump(CurDAG); dbgs() << '\n');
2496  }
2497 
2498  return nullptr;
2499  }
2500 
2501  case ISD::SDIVREM:
2502  case ISD::UDIVREM:
2505  SDValue N0 = Node->getOperand(0);
2506  SDValue N1 = Node->getOperand(1);
2507 
2508  bool isSigned = (Opcode == ISD::SDIVREM ||
2509  Opcode == X86ISD::SDIVREM8_SEXT_HREG);
2510  if (!isSigned) {
2511  switch (NVT.SimpleTy) {
2512  default: llvm_unreachable("Unsupported VT!");
2513  case MVT::i8: Opc = X86::DIV8r; MOpc = X86::DIV8m; break;
2514  case MVT::i16: Opc = X86::DIV16r; MOpc = X86::DIV16m; break;
2515  case MVT::i32: Opc = X86::DIV32r; MOpc = X86::DIV32m; break;
2516  case MVT::i64: Opc = X86::DIV64r; MOpc = X86::DIV64m; break;
2517  }
2518  } else {
2519  switch (NVT.SimpleTy) {
2520  default: llvm_unreachable("Unsupported VT!");
2521  case MVT::i8: Opc = X86::IDIV8r; MOpc = X86::IDIV8m; break;
2522  case MVT::i16: Opc = X86::IDIV16r; MOpc = X86::IDIV16m; break;
2523  case MVT::i32: Opc = X86::IDIV32r; MOpc = X86::IDIV32m; break;
2524  case MVT::i64: Opc = X86::IDIV64r; MOpc = X86::IDIV64m; break;
2525  }
2526  }
2527 
2528  unsigned LoReg, HiReg, ClrReg;
2529  unsigned SExtOpcode;
2530  switch (NVT.SimpleTy) {
2531  default: llvm_unreachable("Unsupported VT!");
2532  case MVT::i8:
2533  LoReg = X86::AL; ClrReg = HiReg = X86::AH;
2534  SExtOpcode = X86::CBW;
2535  break;
2536  case MVT::i16:
2537  LoReg = X86::AX; HiReg = X86::DX;
2538  ClrReg = X86::DX;
2539  SExtOpcode = X86::CWD;
2540  break;
2541  case MVT::i32:
2542  LoReg = X86::EAX; ClrReg = HiReg = X86::EDX;
2543  SExtOpcode = X86::CDQ;
2544  break;
2545  case MVT::i64:
2546  LoReg = X86::RAX; ClrReg = HiReg = X86::RDX;
2547  SExtOpcode = X86::CQO;
2548  break;
2549  }
2550 
2551  SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
2552  bool foldedLoad = TryFoldLoad(Node, N1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4);
2553  bool signBitIsZero = CurDAG->SignBitIsZero(N0);
2554 
2555  SDValue InFlag;
2556  if (NVT == MVT::i8 && (!isSigned || signBitIsZero)) {
2557  // Special case for div8, just use a move with zero extension to AX to
2558  // clear the upper 8 bits (AH).
2559  SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Move, Chain;
2560  if (TryFoldLoad(Node, N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) {
2561  SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N0.getOperand(0) };
2562  Move =
2563  SDValue(CurDAG->getMachineNode(X86::MOVZX32rm8, dl, MVT::i32,
2564  MVT::Other, Ops), 0);
2565  Chain = Move.getValue(1);
2566  ReplaceUses(N0.getValue(1), Chain);
2567  } else {
2568  Move =
2569  SDValue(CurDAG->getMachineNode(X86::MOVZX32rr8, dl, MVT::i32, N0),0);
2570  Chain = CurDAG->getEntryNode();
2571  }
2572  Chain = CurDAG->getCopyToReg(Chain, dl, X86::EAX, Move, SDValue());
2573  InFlag = Chain.getValue(1);
2574  } else {
2575  InFlag =
2576  CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl,
2577  LoReg, N0, SDValue()).getValue(1);
2578  if (isSigned && !signBitIsZero) {
2579  // Sign extend the low part into the high part.
2580  InFlag =
2581  SDValue(CurDAG->getMachineNode(SExtOpcode, dl, MVT::Glue, InFlag),0);
2582  } else {
2583  // Zero out the high part, effectively zero extending the input.
2584  SDValue ClrNode = SDValue(CurDAG->getMachineNode(X86::MOV32r0, dl, NVT), 0);
2585  switch (NVT.SimpleTy) {
2586  case MVT::i16:
2587  ClrNode =
2588  SDValue(CurDAG->getMachineNode(
2589  TargetOpcode::EXTRACT_SUBREG, dl, MVT::i16, ClrNode,
2590  CurDAG->getTargetConstant(X86::sub_16bit, dl,
2591  MVT::i32)),
2592  0);
2593  break;
2594  case MVT::i32:
2595  break;
2596  case MVT::i64:
2597  ClrNode =
2598  SDValue(CurDAG->getMachineNode(
2600  CurDAG->getTargetConstant(0, dl, MVT::i64), ClrNode,
2601  CurDAG->getTargetConstant(X86::sub_32bit, dl,
2602  MVT::i32)),
2603  0);
2604  break;
2605  default:
2606  llvm_unreachable("Unexpected division source");
2607  }
2608 
2609  InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, ClrReg,
2610  ClrNode, InFlag).getValue(1);
2611  }
2612  }
2613 
2614  if (foldedLoad) {
2615  SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N1.getOperand(0),
2616  InFlag };
2617  SDNode *CNode =
2618  CurDAG->getMachineNode(MOpc, dl, MVT::Other, MVT::Glue, Ops);
2619  InFlag = SDValue(CNode, 1);
2620  // Update the chain.
2621  ReplaceUses(N1.getValue(1), SDValue(CNode, 0));
2622  } else {
2623  InFlag =
2624  SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Glue, N1, InFlag), 0);
2625  }
2626 
2627  // Prevent use of AH in a REX instruction by explicitly copying it to
2628  // an ABCD_L register.
2629  //
2630  // The current assumption of the register allocator is that isel
2631  // won't generate explicit references to the GR8_ABCD_H registers. If
2632  // the allocator and/or the backend get enhanced to be more robust in
2633  // that regard, this can be, and should be, removed.
2634  if (HiReg == X86::AH && !SDValue(Node, 1).use_empty()) {
2635  SDValue AHCopy = CurDAG->getRegister(X86::AH, MVT::i8);
2636  unsigned AHExtOpcode =
2637  isSigned ? X86::MOVSX32_NOREXrr8 : X86::MOVZX32_NOREXrr8;
2638 
2639  SDNode *RNode = CurDAG->getMachineNode(AHExtOpcode, dl, MVT::i32,
2640  MVT::Glue, AHCopy, InFlag);
2641  SDValue Result(RNode, 0);
2642  InFlag = SDValue(RNode, 1);
2643 
2644  if (Opcode == X86ISD::UDIVREM8_ZEXT_HREG ||
2645  Opcode == X86ISD::SDIVREM8_SEXT_HREG) {
2646  if (Node->getValueType(1) == MVT::i64) {
2647  // It's not possible to directly movsx AH to a 64bit register, because
2648  // the latter needs the REX prefix, but the former can't have it.
2649  assert(Opcode != X86ISD::SDIVREM8_SEXT_HREG &&
2650  "Unexpected i64 sext of h-register");
2651  Result =
2652  SDValue(CurDAG->getMachineNode(
2654  CurDAG->getTargetConstant(0, dl, MVT::i64), Result,
2655  CurDAG->getTargetConstant(X86::sub_32bit, dl,
2656  MVT::i32)),
2657  0);
2658  }
2659  } else {
2660  Result =
2661  CurDAG->getTargetExtractSubreg(X86::sub_8bit, dl, MVT::i8, Result);
2662  }
2663  ReplaceUses(SDValue(Node, 1), Result);
2664  DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG); dbgs() << '\n');
2665  }
2666  // Copy the division (low) result, if it is needed.
2667  if (!SDValue(Node, 0).use_empty()) {
2668  SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
2669  LoReg, NVT, InFlag);
2670  InFlag = Result.getValue(2);
2671  ReplaceUses(SDValue(Node, 0), Result);
2672  DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG); dbgs() << '\n');
2673  }
2674  // Copy the remainder (high) result, if it is needed.
2675  if (!SDValue(Node, 1).use_empty()) {
2676  SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
2677  HiReg, NVT, InFlag);
2678  InFlag = Result.getValue(2);
2679  ReplaceUses(SDValue(Node, 1), Result);
2680  DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG); dbgs() << '\n');
2681  }
2682  return nullptr;
2683  }
2684 
2685  case X86ISD::CMP:
2686  case X86ISD::SUB: {
2687  // Sometimes a SUB is used to perform comparison.
2688  if (Opcode == X86ISD::SUB && Node->hasAnyUseOfValue(0))
2689  // This node is not a CMP.
2690  break;
2691  SDValue N0 = Node->getOperand(0);
2692  SDValue N1 = Node->getOperand(1);
2693 
2694  if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() &&
2696  N0 = N0.getOperand(0);
2697 
2698  // Look for (X86cmp (and $op, $imm), 0) and see if we can convert it to
2699  // use a smaller encoding.
2700  // Look past the truncate if CMP is the only use of it.
2701  if ((N0.getNode()->getOpcode() == ISD::AND ||
2702  (N0.getResNo() == 0 && N0.getNode()->getOpcode() == X86ISD::AND)) &&
2703  N0.getNode()->hasOneUse() &&
2704  N0.getValueType() != MVT::i8 &&
2705  X86::isZeroNode(N1)) {
2707  if (!C) break;
2708 
2709  // For example, convert "testl %eax, $8" to "testb %al, $8"
2710  if ((C->getZExtValue() & ~UINT64_C(0xff)) == 0 &&
2711  (!(C->getZExtValue() & 0x80) ||
2712  HasNoSignedComparisonUses(Node))) {
2713  SDValue Imm = CurDAG->getTargetConstant(C->getZExtValue(), dl, MVT::i8);
2714  SDValue Reg = N0.getNode()->getOperand(0);
2715 
2716  // On x86-32, only the ABCD registers have 8-bit subregisters.
2717  if (!Subtarget->is64Bit()) {
2718  const TargetRegisterClass *TRC;
2719  switch (N0.getSimpleValueType().SimpleTy) {
2720  case MVT::i32: TRC = &X86::GR32_ABCDRegClass; break;
2721  case MVT::i16: TRC = &X86::GR16_ABCDRegClass; break;
2722  default: llvm_unreachable("Unsupported TEST operand type!");
2723  }
2724  SDValue RC = CurDAG->getTargetConstant(TRC->getID(), dl, MVT::i32);
2725  Reg = SDValue(CurDAG->getMachineNode(X86::COPY_TO_REGCLASS, dl,
2726  Reg.getValueType(), Reg, RC), 0);
2727  }
2728 
2729  // Extract the l-register.
2730  SDValue Subreg = CurDAG->getTargetExtractSubreg(X86::sub_8bit, dl,
2731  MVT::i8, Reg);
2732 
2733  // Emit a testb.
2734  SDNode *NewNode = CurDAG->getMachineNode(X86::TEST8ri, dl, MVT::i32,
2735  Subreg, Imm);
2736  // Replace SUB|CMP with TEST, since SUB has two outputs while TEST has
2737  // one, do not call ReplaceAllUsesWith.
2738  ReplaceUses(SDValue(Node, (Opcode == X86ISD::SUB ? 1 : 0)),
2739  SDValue(NewNode, 0));
2740  return nullptr;
2741  }
2742 
2743  // For example, "testl %eax, $2048" to "testb %ah, $8".
2744  if ((C->getZExtValue() & ~UINT64_C(0xff00)) == 0 &&
2745  (!(C->getZExtValue() & 0x8000) ||
2746  HasNoSignedComparisonUses(Node))) {
2747  // Shift the immediate right by 8 bits.
2748  SDValue ShiftedImm = CurDAG->getTargetConstant(C->getZExtValue() >> 8,
2749  dl, MVT::i8);
2750  SDValue Reg = N0.getNode()->getOperand(0);
2751 
2752  // Put the value in an ABCD register.
2753  const TargetRegisterClass *TRC;
2754  switch (N0.getSimpleValueType().SimpleTy) {
2755  case MVT::i64: TRC = &X86::GR64_ABCDRegClass; break;
2756  case MVT::i32: TRC = &X86::GR32_ABCDRegClass; break;
2757  case MVT::i16: TRC = &X86::GR16_ABCDRegClass; break;
2758  default: llvm_unreachable("Unsupported TEST operand type!");
2759  }
2760  SDValue RC = CurDAG->getTargetConstant(TRC->getID(), dl, MVT::i32);
2761  Reg = SDValue(CurDAG->getMachineNode(X86::COPY_TO_REGCLASS, dl,
2762  Reg.getValueType(), Reg, RC), 0);
2763 
2764  // Extract the h-register.
2765  SDValue Subreg = CurDAG->getTargetExtractSubreg(X86::sub_8bit_hi, dl,
2766  MVT::i8, Reg);
2767 
2768  // Emit a testb. The EXTRACT_SUBREG becomes a COPY that can only
2769  // target GR8_NOREX registers, so make sure the register class is
2770  // forced.
2771  SDNode *NewNode = CurDAG->getMachineNode(X86::TEST8ri_NOREX, dl,
2772  MVT::i32, Subreg, ShiftedImm);
2773  // Replace SUB|CMP with TEST, since SUB has two outputs while TEST has
2774  // one, do not call ReplaceAllUsesWith.
2775  ReplaceUses(SDValue(Node, (Opcode == X86ISD::SUB ? 1 : 0)),
2776  SDValue(NewNode, 0));
2777  return nullptr;
2778  }
2779 
2780  // For example, "testl %eax, $32776" to "testw %ax, $32776".
2781  if ((C->getZExtValue() & ~UINT64_C(0xffff)) == 0 &&
2782  N0.getValueType() != MVT::i16 &&
2783  (!(C->getZExtValue() & 0x8000) ||
2784  HasNoSignedComparisonUses(Node))) {
2785  SDValue Imm = CurDAG->getTargetConstant(C->getZExtValue(), dl,
2786  MVT::i16);
2787  SDValue Reg = N0.getNode()->getOperand(0);
2788 
2789  // Extract the 16-bit subregister.
2790  SDValue Subreg = CurDAG->getTargetExtractSubreg(X86::sub_16bit, dl,
2791  MVT::i16, Reg);
2792 
2793  // Emit a testw.
2794  SDNode *NewNode = CurDAG->getMachineNode(X86::TEST16ri, dl, MVT::i32,
2795  Subreg, Imm);
2796  // Replace SUB|CMP with TEST, since SUB has two outputs while TEST has
2797  // one, do not call ReplaceAllUsesWith.
2798  ReplaceUses(SDValue(Node, (Opcode == X86ISD::SUB ? 1 : 0)),
2799  SDValue(NewNode, 0));
2800  return nullptr;
2801  }
2802 
2803  // For example, "testq %rax, $268468232" to "testl %eax, $268468232".
2804  if ((C->getZExtValue() & ~UINT64_C(0xffffffff)) == 0 &&
2805  N0.getValueType() == MVT::i64 &&
2806  (!(C->getZExtValue() & 0x80000000) ||
2807  HasNoSignedComparisonUses(Node))) {
2808  SDValue Imm = CurDAG->getTargetConstant(C->getZExtValue(), dl,
2809  MVT::i32);
2810  SDValue Reg = N0.getNode()->getOperand(0);
2811 
2812  // Extract the 32-bit subregister.
2813  SDValue Subreg = CurDAG->getTargetExtractSubreg(X86::sub_32bit, dl,
2814  MVT::i32, Reg);
2815 
2816  // Emit a testl.
2817  SDNode *NewNode = CurDAG->getMachineNode(X86::TEST32ri, dl, MVT::i32,
2818  Subreg, Imm);
2819  // Replace SUB|CMP with TEST, since SUB has two outputs while TEST has
2820  // one, do not call ReplaceAllUsesWith.
2821  ReplaceUses(SDValue(Node, (Opcode == X86ISD::SUB ? 1 : 0)),
2822  SDValue(NewNode, 0));
2823  return nullptr;
2824  }
2825  }
2826  break;
2827  }
2828  case ISD::STORE: {
2829  // Change a chain of {load; incr or dec; store} of the same value into
2830  // a simple increment or decrement through memory of that value, if the
2831  // uses of the modified value and its address are suitable.
2832  // The DEC64m tablegen pattern is currently not able to match the case where
2833  // the EFLAGS on the original DEC are used. (This also applies to
2834  // {INC,DEC}X{64,32,16,8}.)
2835  // We'll need to improve tablegen to allow flags to be transferred from a
2836  // node in the pattern to the result node. probably with a new keyword
2837  // for example, we have this
2838  // def DEC64m : RI<0xFF, MRM1m, (outs), (ins i64mem:$dst), "dec{q}\t$dst",
2839  // [(store (add (loadi64 addr:$dst), -1), addr:$dst),
2840  // (implicit EFLAGS)]>;
2841  // but maybe need something like this
2842  // def DEC64m : RI<0xFF, MRM1m, (outs), (ins i64mem:$dst), "dec{q}\t$dst",
2843  // [(store (add (loadi64 addr:$dst), -1), addr:$dst),
2844  // (transferrable EFLAGS)]>;
2845 
2846  StoreSDNode *StoreNode = cast<StoreSDNode>(Node);
2847  SDValue StoredVal = StoreNode->getOperand(1);
2848  unsigned Opc = StoredVal->getOpcode();
2849 
2850  LoadSDNode *LoadNode = nullptr;
2851  SDValue InputChain;
2852  if (!isLoadIncOrDecStore(StoreNode, Opc, StoredVal, CurDAG,
2853  LoadNode, InputChain))
2854  break;
2855 
2856  SDValue Base, Scale, Index, Disp, Segment;
2857  if (!SelectAddr(LoadNode, LoadNode->getBasePtr(),
2858  Base, Scale, Index, Disp, Segment))
2859  break;
2860 
2861  MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(2);
2862  MemOp[0] = StoreNode->getMemOperand();
2863  MemOp[1] = LoadNode->getMemOperand();
2864  const SDValue Ops[] = { Base, Scale, Index, Disp, Segment, InputChain };
2865  EVT LdVT = LoadNode->getMemoryVT();
2866  unsigned newOpc = getFusedLdStOpcode(LdVT, Opc);
2867  MachineSDNode *Result = CurDAG->getMachineNode(newOpc,
2868  SDLoc(Node),
2869  MVT::i32, MVT::Other, Ops);
2870  Result->setMemRefs(MemOp, MemOp + 2);
2871 
2872  ReplaceUses(SDValue(StoreNode, 0), SDValue(Result, 1));
2873  ReplaceUses(SDValue(StoredVal.getNode(), 1), SDValue(Result, 0));
2874 
2875  return Result;
2876  }
2877  }
2878 
2879  SDNode *ResNode = SelectCode(Node);
2880 
2881  DEBUG(dbgs() << "=> ";
2882  if (ResNode == nullptr || ResNode == Node)
2883  Node->dump(CurDAG);
2884  else
2885  ResNode->dump(CurDAG);
2886  dbgs() << '\n');
2887 
2888  return ResNode;
2889 }
2890 
2891 bool X86DAGToDAGISel::
2892 SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
2893  std::vector<SDValue> &OutOps) {
2894  SDValue Op0, Op1, Op2, Op3, Op4;
2895  switch (ConstraintID) {
2896  default:
2897  llvm_unreachable("Unexpected asm memory constraint");
2899  // FIXME: It seems strange that 'i' is needed here since it's supposed to
2900  // be an immediate and not a memory constraint.
2901  // Fallthrough.
2902  case InlineAsm::Constraint_o: // offsetable ??
2903  case InlineAsm::Constraint_v: // not offsetable ??
2904  case InlineAsm::Constraint_m: // memory
2906  if (!SelectAddr(nullptr, Op, Op0, Op1, Op2, Op3, Op4))
2907  return true;
2908  break;
2909  }
2910 
2911  OutOps.push_back(Op0);
2912  OutOps.push_back(Op1);
2913  OutOps.push_back(Op2);
2914  OutOps.push_back(Op3);
2915  OutOps.push_back(Op4);
2916  return false;
2917 }
2918 
2919 /// createX86ISelDag - This pass converts a legalized DAG into a
2920 /// X86-specific DAG, ready for instruction scheduling.
2921 ///
2923  CodeGenOpt::Level OptLevel) {
2924  return new X86DAGToDAGISel(TM, OptLevel);
2925 }
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
bool isInt< 32 >(int64_t x)
Definition: MathExtras.h:276
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition: ISDOpcodes.h:450
static bool isCalleeLoad(SDValue Callee, SDValue &Chain, bool HasCallSeq)
isCalleeLoad - Return true if call address is a load and it can be moved below CALLSEQ_START and the ...
SDValue getValue(unsigned R) const
const SDValue & getValue() const
void dump() const
Dump this node, for debugging.
STATISTIC(NumFunctions,"Total number of functions")
void setMemRefs(mmo_iterator NewMemRefs, mmo_iterator NewMemRefsEnd)
Assign this MachineSDNodes's memory reference descriptor list.
bool hasOneUse() const
Return true if there is exactly one use of this node.
bool hasNUsesOfValue(unsigned NUses, unsigned Value) const
Return true if there are exactly NUSES uses of the indicated value.
Tail call return.
static bool FoldMaskAndShiftToScale(SelectionDAG &DAG, SDValue N, uint64_t Mask, SDValue Shift, SDValue X, X86ISelAddressMode &AM)
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
Definition: MCSymbol.h:39
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
bool slowIncDec() const
Definition: X86Subtarget.h:371
unsigned getReg() const
Carry-setting nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:210
AtomicOpc
Atomic opcode table.
void computeKnownBits(SDValue Op, APInt &KnownZero, APInt &KnownOne, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in the KnownZero/KnownO...
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
unsigned getID() const
getID() - Return the register class ID number.
unsigned getSizeInBits() const
TargetGlobalAddress - Like GlobalAddress, but the DAG does no folding or anything else with this node...
Definition: ISDOpcodes.h:122
unsigned getNumOperands() const
const SDValue & getOperand(unsigned Num) const
void setNodeId(int Id)
Set unique node id.
const SDValue & getBasePtr() const
std::size_t countLeadingZeros(T Val, ZeroBehavior ZB=ZB_Width)
Count number of 0's from the most significant bit to the least stopping at the first 1...
Definition: MathExtras.h:178
unsigned getResNo() const
get the index which selects a specific result in the SDNode
static SDValue getAtomicLoadArithTargetConstant(SelectionDAG *CurDAG, SDLoc dl, enum AtomicOpc &Op, MVT NVT, SDValue Val, const X86Subtarget *Subtarget)
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition: ISDOpcodes.h:150
GlobalBaseReg - On Darwin, this node represents the result of the mflr at function entry...
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition: ISDOpcodes.h:200
X86 compare and logical compare instructions.
BlockAddress - The address of a basic block.
Definition: Constants.h:802
MachineMemOperand - A description of a memory reference used in the backend.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Shift and rotation operations.
Definition: ISDOpcodes.h:332
bool isNormalStore(const SDNode *N)
Returns true if the specified node is a non-truncating and unindexed store.
std::size_t countTrailingOnes(T Value, ZeroBehavior ZB=ZB_Width)
Count the number of ones from the least significant bit to the first zero bit.
Definition: MathExtras.h:407
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:98
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
bool isInt< 8 >(int64_t x)
Definition: MathExtras.h:268
CopyToReg - This node has three operands: a chain, a register number to set to this value...
Definition: ISDOpcodes.h:161
Reg
All possible values of the reg field in the ModR/M byte.
static bool HasNoSignedComparisonUses(SDNode *N)
HasNoSignedComparisonUses - Test whether the given X86ISD::CMP node has any uses which require the SF...
SimpleValueType SimpleTy
CALLSEQ_START/CALLSEQ_END - These operators mark the beginning and end of a call sequence, and carry arbitrary information that target might want to know.
Definition: ISDOpcodes.h:592
#define false
Definition: ConvertUTF.c:65
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
#define G(x, y, z)
Definition: MD5.cpp:52
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
bool runOnMachineFunction(MachineFunction &MF) override
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
static bool FoldMaskAndShiftToExtract(SelectionDAG &DAG, SDValue N, uint64_t Mask, SDValue Shift, SDValue X, X86ISelAddressMode &AM)
std::pair< SDValue, SDValue > LowerCallTo(CallLoweringInfo &CLI) const
This function lowers an abstract call to a function into an actual call.
static bool isLoadIncOrDecStore(StoreSDNode *StoreNode, unsigned Opc, SDValue StoredVal, SelectionDAG *CurDAG, LoadSDNode *&LoadNode, SDValue &InputChain)
isLoadIncOrDecStore - Check whether or not the chain ending in StoreNode is suitable for doing the {l...
#define T
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition: ISDOpcodes.h:351
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:191
const SDValue & getBasePtr() const
static const uint16_t AtomicOpcTbl[AtomicOpcEnd][AtomicSzEnd]
TargetConstant* - Like Constant*, but the DAG does not do any folding, simplification, or lowering of the constant.
Definition: ISDOpcodes.h:116
EVT getMemoryVT() const
Return the type of the in-memory value.
This class is used to represent ISD::STORE nodes.
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Get a value with high bits set.
Definition: APInt.h:513
SDNode * getNode() const
get the SDNode which holds the desired result
IMPLICIT_DEF - This is the MachineInstr-level equivalent of undef.
Definition: TargetOpcodes.h:52
unsigned getScalarSizeInBits() const
Definition: ValueTypes.h:239
#define P(N)
bool isNormalLoad(const SDNode *N)
Returns true if the specified node is a non-extending and unindexed load.
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition: ISDOpcodes.h:157
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
std::size_t countTrailingZeros(T Val, ZeroBehavior ZB=ZB_Width)
Count number of 0's from the least significant bit to the most stopping at the first 1...
Definition: MathExtras.h:109
LOCAL_RECOVER - Represents the llvm.localrecover intrinsic.
Definition: ISDOpcodes.h:81
MVT - Machine Value Type.
const SDValue & getOperand(unsigned i) const
bool isNonTemporal() const
This is an important base class in LLVM.
Definition: Constant.h:41
bool isVector() const
isVector - Return true if this is a vector value type.
Carry-using nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:219
This is a base class used to represent MGATHER and MSCATTER nodes.
static Type * getVoidTy(LLVMContext &C)
Definition: Type.cpp:225
This class provides iterator support for SDUse operands that use a specific SDNode.
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang","erlang-compatible garbage collector")
SDValue getTargetConstant(uint64_t Val, SDLoc DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:436
void RepositionNode(allnodes_iterator Position, SDNode *N)
Move node N in the AllNodes list to be immediately before the given iterator Position.
unsigned getOpcode() const
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:294
const SDValue & getBasePtr() const
On Darwin, this node represents the result of the popl at function entry, used for PIC code...
use_iterator use_begin() const
Provide iteration support to walk over all uses of an SDNode.
bool isVolatile() const
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:468
void append(in_iter in_start, in_iter in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:416
EVT - Extended Value Type.
Definition: ValueTypes.h:31
std::vector< ArgListEntry > ArgListTy
This structure contains all information that is necessary for lowering calls.
ISD::MemIndexedMode getAddressingMode() const
Return the addressing mode for this load or store: unindexed, pre-inc, pre-dec, post-inc, or post-dec.
MachinePointerInfo - This class contains a discriminated union of information about pointers in memor...
uint64_t getConstantOperandVal(unsigned Num) const
Helper method returns the integer value of a ConstantSDNode operand.
const MachinePointerInfo & getPointerInfo() const
const SDValue & getOffset() const
These operations represent an abstract X86 call instruction, which includes a bunch of information...
TokenFactor - This node takes multiple tokens as input and produces a single token result...
Definition: ISDOpcodes.h:50
EXTRACT_SUBREG - This instruction takes two operands: a register that has subregisters, and a subregister index.
Definition: TargetOpcodes.h:41
SDNode * UpdateNodeOperands(SDNode *N, SDValue Op)
Mutate the specified node in-place to have the specified operands.
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:179
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:861
An SDNode that represents everything that will be needed to construct a MachineInstr.
const SDValue & getChain() const
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
unsigned getAddrSpace() const
getAddrSpace - Return the LLVM IR address space number that this pointer points into.
Represents one node in the SelectionDAG.
bool use_empty() const
Return true if there are no nodes using value ResNo of Node.
static void InsertDAGNode(SelectionDAG &DAG, SDValue Pos, SDValue N)
SelectionDAGISel - This is the common base class used for SelectionDAG-based pattern-matching instruc...
static cl::opt< AlignMode > Align(cl::desc("Load/store alignment support"), cl::Hidden, cl::init(NoStrictAlign), cl::values(clEnumValN(StrictAlign,"aarch64-strict-align","Disallow all unaligned memory accesses"), clEnumValN(NoStrictAlign,"aarch64-no-strict-align","Allow unaligned memory accesses"), clEnumValEnd))
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:123
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:576
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
Class for arbitrary precision integers.
Definition: APInt.h:73
static unsigned getReg(const void *D, unsigned RC, unsigned RegNo)
int64_t getSExtValue() const
op_iterator op_begin() const
static use_iterator use_end()
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:383
LLVM_ATTRIBUTE_UNUSED_RESULT std::enable_if< !is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:285
SDValue getNode(unsigned Opcode, SDLoc DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:386
APInt And(const APInt &LHS, const APInt &RHS)
Bitwise AND function for APInt.
Definition: APInt.h:1890
static bool isDispSafeForFrameIndex(int64_t Val)
uint64_t getConstantOperandVal(unsigned i) const
static unsigned getFusedLdStOpcode(EVT &LdVT, unsigned Opc)
getFusedLdStOpcode - Get the appropriate X86 opcode for an in memory increment or decrement...
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:321
bool hasAnyUseOfValue(unsigned Value) const
Return true if there are any use of the indicated value.
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition: ISDOpcodes.h:196
A wrapper node for TargetConstantPool, TargetExternalSymbol, and TargetGlobalAddress.
FunctionPass * createX86ISelDag(X86TargetMachine &TM, CodeGenOpt::Level OptLevel)
createX86ISelDag - This pass converts a legalized DAG into a X86-specific DAG, ready for instruction ...
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:518
void ReplaceAllUsesWith(SDValue From, SDValue Op)
Modify anything using 'From' to use 'To' instead.
COPY_TO_REGCLASS - This instruction is a placeholder for a plain register-to-register copy into a spe...
Definition: TargetOpcodes.h:66
#define I(x, y, z)
Definition: MD5.cpp:54
#define N
bool isOperandOf(const SDNode *N) const
isOperand - Return true if this node is an operand of N.
static bool FoldMaskedShiftToScaledMask(SelectionDAG &DAG, SDValue N, uint64_t Mask, SDValue Shift, SDValue X, X86ISelAddressMode &AM)
static void MoveBelowOrigChain(SelectionDAG *CurDAG, SDValue Load, SDValue Call, SDValue OrigChain)
MoveBelowCallOrigChain - Replace the original chain operand of the call with load's chain operand and...
op_iterator op_end() const
const SDValue & getOffset() const
bool isNON_EXTLoad(const SDNode *N)
Returns true if the specified node is a non-extending load.
const SDValue & getIndex() const
int getNodeId() const
Return the unique node id.
CopyFromReg - This node indicates that the input value is a virtual or physical register that is defi...
Definition: ISDOpcodes.h:166
EVT getValueType() const
Return the ValueType of the referenced return value.
SDValue getConstant(uint64_t Val, SDLoc DL, EVT VT, bool isTarget=false, bool isOpaque=false)
This class is used to form a handle around another node that is persistent and is updated across invo...
MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
SDValue getTargetExtractSubreg(int SRIdx, SDLoc DL, EVT VT, SDValue Operand)
A convenience function for creating TargetInstrInfo::EXTRACT_SUBREG nodes.
Blend where the condition has been shrunk.
#define DEBUG(X)
Definition: Debug.h:92
C - The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
bool isZeroNode(SDValue Elt)
Returns true if Elt is a constant zero or floating point constant +0.0.
bool isOffsetSuitableForCodeModel(int64_t Offset, CodeModel::Model M, bool hasSymbolicDisplacement=true)
Returns true of the given offset can be fit into displacement field of the instruction.
SUBREG_TO_REG - This instruction is similar to INSERT_SUBREG except that the first operand is an imme...
Definition: TargetOpcodes.h:58
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:389
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation...
MVT getSimpleValueType(unsigned ResNo) const
Return the type of a specified result as a simple type.
bool isScalarFPTypeInSSEReg(EVT VT) const
Return true if the specified scalar FP type is computed in an SSE register, not on the X87 floating p...
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition: ISDOpcodes.h:309
bool isMachineOpcode() const
Test if this node has a post-isel opcode, directly corresponding to a MachineInstr opcode...
Special wrapper used under X86-64 PIC mode for RIP relative displacements.
uint64_t getZExtValue() const
This class is used to represent ISD::LOAD nodes.