LLVM  9.0.0svn
X86ISelDAGToDAG.cpp
Go to the documentation of this file.
1 //===- X86ISelDAGToDAG.cpp - A DAG pattern matching inst selector for X86 -===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines a DAG pattern matching instruction selector for X86,
10 // converting from a legalized dag to a X86 dag.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "X86.h"
15 #include "X86MachineFunctionInfo.h"
16 #include "X86RegisterInfo.h"
17 #include "X86Subtarget.h"
18 #include "X86TargetMachine.h"
19 #include "llvm/ADT/Statistic.h"
23 #include "llvm/Config/llvm-config.h"
24 #include "llvm/IR/ConstantRange.h"
25 #include "llvm/IR/Function.h"
26 #include "llvm/IR/Instructions.h"
27 #include "llvm/IR/Intrinsics.h"
28 #include "llvm/IR/Type.h"
29 #include "llvm/Support/Debug.h"
31 #include "llvm/Support/KnownBits.h"
36 #include <stdint.h>
37 using namespace llvm;
38 
39 #define DEBUG_TYPE "x86-isel"
40 
41 STATISTIC(NumLoadMoved, "Number of loads moved below TokenFactor");
42 
43 static cl::opt<bool> AndImmShrink("x86-and-imm-shrink", cl::init(true),
44  cl::desc("Enable setting constant bits to reduce size of mask immediates"),
45  cl::Hidden);
46 
47 //===----------------------------------------------------------------------===//
48 // Pattern Matcher Implementation
49 //===----------------------------------------------------------------------===//
50 
51 namespace {
52  /// This corresponds to X86AddressMode, but uses SDValue's instead of register
53  /// numbers for the leaves of the matched tree.
54  struct X86ISelAddressMode {
55  enum {
56  RegBase,
57  FrameIndexBase
58  } BaseType;
59 
60  // This is really a union, discriminated by BaseType!
61  SDValue Base_Reg;
62  int Base_FrameIndex;
63 
64  unsigned Scale;
65  SDValue IndexReg;
66  int32_t Disp;
67  SDValue Segment;
68  const GlobalValue *GV;
69  const Constant *CP;
70  const BlockAddress *BlockAddr;
71  const char *ES;
72  MCSymbol *MCSym;
73  int JT;
74  unsigned Align; // CP alignment.
75  unsigned char SymbolFlags; // X86II::MO_*
76 
77  X86ISelAddressMode()
78  : BaseType(RegBase), Base_FrameIndex(0), Scale(1), IndexReg(), Disp(0),
79  Segment(), GV(nullptr), CP(nullptr), BlockAddr(nullptr), ES(nullptr),
80  MCSym(nullptr), JT(-1), Align(0), SymbolFlags(X86II::MO_NO_FLAG) {}
81 
82  bool hasSymbolicDisplacement() const {
83  return GV != nullptr || CP != nullptr || ES != nullptr ||
84  MCSym != nullptr || JT != -1 || BlockAddr != nullptr;
85  }
86 
87  bool hasBaseOrIndexReg() const {
88  return BaseType == FrameIndexBase ||
89  IndexReg.getNode() != nullptr || Base_Reg.getNode() != nullptr;
90  }
91 
92  /// Return true if this addressing mode is already RIP-relative.
93  bool isRIPRelative() const {
94  if (BaseType != RegBase) return false;
95  if (RegisterSDNode *RegNode =
96  dyn_cast_or_null<RegisterSDNode>(Base_Reg.getNode()))
97  return RegNode->getReg() == X86::RIP;
98  return false;
99  }
100 
101  void setBaseReg(SDValue Reg) {
102  BaseType = RegBase;
103  Base_Reg = Reg;
104  }
105 
106 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
107  void dump(SelectionDAG *DAG = nullptr) {
108  dbgs() << "X86ISelAddressMode " << this << '\n';
109  dbgs() << "Base_Reg ";
110  if (Base_Reg.getNode())
111  Base_Reg.getNode()->dump(DAG);
112  else
113  dbgs() << "nul\n";
114  if (BaseType == FrameIndexBase)
115  dbgs() << " Base.FrameIndex " << Base_FrameIndex << '\n';
116  dbgs() << " Scale " << Scale << '\n'
117  << "IndexReg ";
118  if (IndexReg.getNode())
119  IndexReg.getNode()->dump(DAG);
120  else
121  dbgs() << "nul\n";
122  dbgs() << " Disp " << Disp << '\n'
123  << "GV ";
124  if (GV)
125  GV->dump();
126  else
127  dbgs() << "nul";
128  dbgs() << " CP ";
129  if (CP)
130  CP->dump();
131  else
132  dbgs() << "nul";
133  dbgs() << '\n'
134  << "ES ";
135  if (ES)
136  dbgs() << ES;
137  else
138  dbgs() << "nul";
139  dbgs() << " MCSym ";
140  if (MCSym)
141  dbgs() << MCSym;
142  else
143  dbgs() << "nul";
144  dbgs() << " JT" << JT << " Align" << Align << '\n';
145  }
146 #endif
147  };
148 }
149 
150 namespace {
151  //===--------------------------------------------------------------------===//
152  /// ISel - X86-specific code to select X86 machine instructions for
153  /// SelectionDAG operations.
154  ///
155  class X86DAGToDAGISel final : public SelectionDAGISel {
156  /// Keep a pointer to the X86Subtarget around so that we can
157  /// make the right decision when generating code for different targets.
158  const X86Subtarget *Subtarget;
159 
160  /// If true, selector should try to optimize for code size instead of
161  /// performance.
162  bool OptForSize;
163 
164  /// If true, selector should try to optimize for minimum code size.
165  bool OptForMinSize;
166 
167  /// Disable direct TLS access through segment registers.
168  bool IndirectTlsSegRefs;
169 
170  public:
171  explicit X86DAGToDAGISel(X86TargetMachine &tm, CodeGenOpt::Level OptLevel)
172  : SelectionDAGISel(tm, OptLevel), OptForSize(false),
173  OptForMinSize(false) {}
174 
175  StringRef getPassName() const override {
176  return "X86 DAG->DAG Instruction Selection";
177  }
178 
179  bool runOnMachineFunction(MachineFunction &MF) override {
180  // Reset the subtarget each time through.
181  Subtarget = &MF.getSubtarget<X86Subtarget>();
182  IndirectTlsSegRefs = MF.getFunction().hasFnAttribute(
183  "indirect-tls-seg-refs");
184 
185  // OptFor[Min]Size are used in pattern predicates that isel is matching.
186  OptForSize = MF.getFunction().hasOptSize();
187  OptForMinSize = MF.getFunction().hasMinSize();
188  assert((!OptForMinSize || OptForSize) &&
189  "OptForMinSize implies OptForSize");
190 
192  return true;
193  }
194 
195  void EmitFunctionEntryCode() override;
196 
197  bool IsProfitableToFold(SDValue N, SDNode *U, SDNode *Root) const override;
198 
199  void PreprocessISelDAG() override;
200  void PostprocessISelDAG() override;
201 
202 // Include the pieces autogenerated from the target description.
203 #include "X86GenDAGISel.inc"
204 
205  private:
206  void Select(SDNode *N) override;
207 
208  bool foldOffsetIntoAddress(uint64_t Offset, X86ISelAddressMode &AM);
209  bool matchLoadInAddress(LoadSDNode *N, X86ISelAddressMode &AM);
210  bool matchWrapper(SDValue N, X86ISelAddressMode &AM);
211  bool matchAddress(SDValue N, X86ISelAddressMode &AM);
212  bool matchVectorAddress(SDValue N, X86ISelAddressMode &AM);
213  bool matchAdd(SDValue &N, X86ISelAddressMode &AM, unsigned Depth);
214  bool matchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
215  unsigned Depth);
216  bool matchAddressBase(SDValue N, X86ISelAddressMode &AM);
217  bool selectAddr(SDNode *Parent, SDValue N, SDValue &Base,
218  SDValue &Scale, SDValue &Index, SDValue &Disp,
219  SDValue &Segment);
220  bool selectVectorAddr(SDNode *Parent, SDValue N, SDValue &Base,
221  SDValue &Scale, SDValue &Index, SDValue &Disp,
222  SDValue &Segment);
223  bool selectMOV64Imm32(SDValue N, SDValue &Imm);
224  bool selectLEAAddr(SDValue N, SDValue &Base,
225  SDValue &Scale, SDValue &Index, SDValue &Disp,
226  SDValue &Segment);
227  bool selectLEA64_32Addr(SDValue N, SDValue &Base,
228  SDValue &Scale, SDValue &Index, SDValue &Disp,
229  SDValue &Segment);
230  bool selectTLSADDRAddr(SDValue N, SDValue &Base,
231  SDValue &Scale, SDValue &Index, SDValue &Disp,
232  SDValue &Segment);
233  bool selectScalarSSELoad(SDNode *Root, SDNode *Parent, SDValue N,
234  SDValue &Base, SDValue &Scale,
235  SDValue &Index, SDValue &Disp,
236  SDValue &Segment,
237  SDValue &NodeWithChain);
238  bool selectRelocImm(SDValue N, SDValue &Op);
239 
240  bool tryFoldLoad(SDNode *Root, SDNode *P, SDValue N,
241  SDValue &Base, SDValue &Scale,
242  SDValue &Index, SDValue &Disp,
243  SDValue &Segment);
244 
245  // Convenience method where P is also root.
246  bool tryFoldLoad(SDNode *P, SDValue N,
247  SDValue &Base, SDValue &Scale,
248  SDValue &Index, SDValue &Disp,
249  SDValue &Segment) {
250  return tryFoldLoad(P, P, N, Base, Scale, Index, Disp, Segment);
251  }
252 
253  /// Implement addressing mode selection for inline asm expressions.
254  bool SelectInlineAsmMemoryOperand(const SDValue &Op,
255  unsigned ConstraintID,
256  std::vector<SDValue> &OutOps) override;
257 
258  void emitSpecialCodeForMain();
259 
260  inline void getAddressOperands(X86ISelAddressMode &AM, const SDLoc &DL,
261  SDValue &Base, SDValue &Scale,
262  SDValue &Index, SDValue &Disp,
263  SDValue &Segment) {
264  Base = (AM.BaseType == X86ISelAddressMode::FrameIndexBase)
265  ? CurDAG->getTargetFrameIndex(
266  AM.Base_FrameIndex,
267  TLI->getPointerTy(CurDAG->getDataLayout()))
268  : AM.Base_Reg;
269  Scale = getI8Imm(AM.Scale, DL);
270  Index = AM.IndexReg;
271  // These are 32-bit even in 64-bit mode since RIP-relative offset
272  // is 32-bit.
273  if (AM.GV)
274  Disp = CurDAG->getTargetGlobalAddress(AM.GV, SDLoc(),
275  MVT::i32, AM.Disp,
276  AM.SymbolFlags);
277  else if (AM.CP)
278  Disp = CurDAG->getTargetConstantPool(AM.CP, MVT::i32,
279  AM.Align, AM.Disp, AM.SymbolFlags);
280  else if (AM.ES) {
281  assert(!AM.Disp && "Non-zero displacement is ignored with ES.");
282  Disp = CurDAG->getTargetExternalSymbol(AM.ES, MVT::i32, AM.SymbolFlags);
283  } else if (AM.MCSym) {
284  assert(!AM.Disp && "Non-zero displacement is ignored with MCSym.");
285  assert(AM.SymbolFlags == 0 && "oo");
286  Disp = CurDAG->getMCSymbol(AM.MCSym, MVT::i32);
287  } else if (AM.JT != -1) {
288  assert(!AM.Disp && "Non-zero displacement is ignored with JT.");
289  Disp = CurDAG->getTargetJumpTable(AM.JT, MVT::i32, AM.SymbolFlags);
290  } else if (AM.BlockAddr)
291  Disp = CurDAG->getTargetBlockAddress(AM.BlockAddr, MVT::i32, AM.Disp,
292  AM.SymbolFlags);
293  else
294  Disp = CurDAG->getTargetConstant(AM.Disp, DL, MVT::i32);
295 
296  if (AM.Segment.getNode())
297  Segment = AM.Segment;
298  else
299  Segment = CurDAG->getRegister(0, MVT::i32);
300  }
301 
302  // Utility function to determine whether we should avoid selecting
303  // immediate forms of instructions for better code size or not.
304  // At a high level, we'd like to avoid such instructions when
305  // we have similar constants used within the same basic block
306  // that can be kept in a register.
307  //
308  bool shouldAvoidImmediateInstFormsForSize(SDNode *N) const {
309  uint32_t UseCount = 0;
310 
311  // Do not want to hoist if we're not optimizing for size.
312  // TODO: We'd like to remove this restriction.
313  // See the comment in X86InstrInfo.td for more info.
314  if (!OptForSize)
315  return false;
316 
317  // Walk all the users of the immediate.
318  for (SDNode::use_iterator UI = N->use_begin(),
319  UE = N->use_end(); (UI != UE) && (UseCount < 2); ++UI) {
320 
321  SDNode *User = *UI;
322 
323  // This user is already selected. Count it as a legitimate use and
324  // move on.
325  if (User->isMachineOpcode()) {
326  UseCount++;
327  continue;
328  }
329 
330  // We want to count stores of immediates as real uses.
331  if (User->getOpcode() == ISD::STORE &&
332  User->getOperand(1).getNode() == N) {
333  UseCount++;
334  continue;
335  }
336 
337  // We don't currently match users that have > 2 operands (except
338  // for stores, which are handled above)
339  // Those instruction won't match in ISEL, for now, and would
340  // be counted incorrectly.
341  // This may change in the future as we add additional instruction
342  // types.
343  if (User->getNumOperands() != 2)
344  continue;
345 
346  // Immediates that are used for offsets as part of stack
347  // manipulation should be left alone. These are typically
348  // used to indicate SP offsets for argument passing and
349  // will get pulled into stores/pushes (implicitly).
350  if (User->getOpcode() == X86ISD::ADD ||
351  User->getOpcode() == ISD::ADD ||
352  User->getOpcode() == X86ISD::SUB ||
353  User->getOpcode() == ISD::SUB) {
354 
355  // Find the other operand of the add/sub.
356  SDValue OtherOp = User->getOperand(0);
357  if (OtherOp.getNode() == N)
358  OtherOp = User->getOperand(1);
359 
360  // Don't count if the other operand is SP.
361  RegisterSDNode *RegNode;
362  if (OtherOp->getOpcode() == ISD::CopyFromReg &&
363  (RegNode = dyn_cast_or_null<RegisterSDNode>(
364  OtherOp->getOperand(1).getNode())))
365  if ((RegNode->getReg() == X86::ESP) ||
366  (RegNode->getReg() == X86::RSP))
367  continue;
368  }
369 
370  // ... otherwise, count this and move on.
371  UseCount++;
372  }
373 
374  // If we have more than 1 use, then recommend for hoisting.
375  return (UseCount > 1);
376  }
377 
378  /// Return a target constant with the specified value of type i8.
379  inline SDValue getI8Imm(unsigned Imm, const SDLoc &DL) {
380  return CurDAG->getTargetConstant(Imm, DL, MVT::i8);
381  }
382 
383  /// Return a target constant with the specified value, of type i32.
384  inline SDValue getI32Imm(unsigned Imm, const SDLoc &DL) {
385  return CurDAG->getTargetConstant(Imm, DL, MVT::i32);
386  }
387 
388  /// Return a target constant with the specified value, of type i64.
389  inline SDValue getI64Imm(uint64_t Imm, const SDLoc &DL) {
390  return CurDAG->getTargetConstant(Imm, DL, MVT::i64);
391  }
392 
393  SDValue getExtractVEXTRACTImmediate(SDNode *N, unsigned VecWidth,
394  const SDLoc &DL) {
395  assert((VecWidth == 128 || VecWidth == 256) && "Unexpected vector width");
396  uint64_t Index = N->getConstantOperandVal(1);
397  MVT VecVT = N->getOperand(0).getSimpleValueType();
398  return getI8Imm((Index * VecVT.getScalarSizeInBits()) / VecWidth, DL);
399  }
400 
401  SDValue getInsertVINSERTImmediate(SDNode *N, unsigned VecWidth,
402  const SDLoc &DL) {
403  assert((VecWidth == 128 || VecWidth == 256) && "Unexpected vector width");
404  uint64_t Index = N->getConstantOperandVal(2);
405  MVT VecVT = N->getSimpleValueType(0);
406  return getI8Imm((Index * VecVT.getScalarSizeInBits()) / VecWidth, DL);
407  }
408 
409  // Helper to detect unneeded and instructions on shift amounts. Called
410  // from PatFrags in tablegen.
411  bool isUnneededShiftMask(SDNode *N, unsigned Width) const {
412  assert(N->getOpcode() == ISD::AND && "Unexpected opcode");
413  const APInt &Val = cast<ConstantSDNode>(N->getOperand(1))->getAPIntValue();
414 
415  if (Val.countTrailingOnes() >= Width)
416  return true;
417 
418  APInt Mask = Val | CurDAG->computeKnownBits(N->getOperand(0)).Zero;
419  return Mask.countTrailingOnes() >= Width;
420  }
421 
422  /// Return an SDNode that returns the value of the global base register.
423  /// Output instructions required to initialize the global base register,
424  /// if necessary.
425  SDNode *getGlobalBaseReg();
426 
427  /// Return a reference to the TargetMachine, casted to the target-specific
428  /// type.
429  const X86TargetMachine &getTargetMachine() const {
430  return static_cast<const X86TargetMachine &>(TM);
431  }
432 
433  /// Return a reference to the TargetInstrInfo, casted to the target-specific
434  /// type.
435  const X86InstrInfo *getInstrInfo() const {
436  return Subtarget->getInstrInfo();
437  }
438 
439  /// Address-mode matching performs shift-of-and to and-of-shift
440  /// reassociation in order to expose more scaled addressing
441  /// opportunities.
442  bool ComplexPatternFuncMutatesDAG() const override {
443  return true;
444  }
445 
446  bool isSExtAbsoluteSymbolRef(unsigned Width, SDNode *N) const;
447 
448  /// Returns whether this is a relocatable immediate in the range
449  /// [-2^Width .. 2^Width-1].
450  template <unsigned Width> bool isSExtRelocImm(SDNode *N) const {
451  if (auto *CN = dyn_cast<ConstantSDNode>(N))
452  return isInt<Width>(CN->getSExtValue());
453  return isSExtAbsoluteSymbolRef(Width, N);
454  }
455 
456  // Indicates we should prefer to use a non-temporal load for this load.
457  bool useNonTemporalLoad(LoadSDNode *N) const {
458  if (!N->isNonTemporal())
459  return false;
460 
461  unsigned StoreSize = N->getMemoryVT().getStoreSize();
462 
463  if (N->getAlignment() < StoreSize)
464  return false;
465 
466  switch (StoreSize) {
467  default: llvm_unreachable("Unsupported store size");
468  case 4:
469  case 8:
470  return false;
471  case 16:
472  return Subtarget->hasSSE41();
473  case 32:
474  return Subtarget->hasAVX2();
475  case 64:
476  return Subtarget->hasAVX512();
477  }
478  }
479 
480  bool foldLoadStoreIntoMemOperand(SDNode *Node);
481  MachineSDNode *matchBEXTRFromAndImm(SDNode *Node);
482  bool matchBitExtract(SDNode *Node);
483  bool shrinkAndImmediate(SDNode *N);
484  bool isMaskZeroExtended(SDNode *N) const;
485  bool tryShiftAmountMod(SDNode *N);
486  bool tryVPTESTM(SDNode *Root, SDValue Setcc, SDValue Mask);
487 
488  MachineSDNode *emitPCMPISTR(unsigned ROpc, unsigned MOpc, bool MayFoldLoad,
489  const SDLoc &dl, MVT VT, SDNode *Node);
490  MachineSDNode *emitPCMPESTR(unsigned ROpc, unsigned MOpc, bool MayFoldLoad,
491  const SDLoc &dl, MVT VT, SDNode *Node,
492  SDValue &InFlag);
493 
494  bool tryOptimizeRem8Extend(SDNode *N);
495 
496  bool onlyUsesZeroFlag(SDValue Flags) const;
497  bool hasNoSignFlagUses(SDValue Flags) const;
498  bool hasNoCarryFlagUses(SDValue Flags) const;
499  };
500 }
501 
502 
503 // Returns true if this masked compare can be implemented legally with this
504 // type.
505 static bool isLegalMaskCompare(SDNode *N, const X86Subtarget *Subtarget) {
506  unsigned Opcode = N->getOpcode();
507  if (Opcode == X86ISD::CMPM || Opcode == ISD::SETCC ||
508  Opcode == X86ISD::CMPM_SAE || Opcode == X86ISD::VFPCLASS) {
509  // We can get 256-bit 8 element types here without VLX being enabled. When
510  // this happens we will use 512-bit operations and the mask will not be
511  // zero extended.
512  EVT OpVT = N->getOperand(0).getValueType();
513  if (OpVT.is256BitVector() || OpVT.is128BitVector())
514  return Subtarget->hasVLX();
515 
516  return true;
517  }
518  // Scalar opcodes use 128 bit registers, but aren't subject to the VLX check.
519  if (Opcode == X86ISD::VFPCLASSS || Opcode == X86ISD::FSETCCM ||
520  Opcode == X86ISD::FSETCCM_SAE)
521  return true;
522 
523  return false;
524 }
525 
526 // Returns true if we can assume the writer of the mask has zero extended it
527 // for us.
528 bool X86DAGToDAGISel::isMaskZeroExtended(SDNode *N) const {
529  // If this is an AND, check if we have a compare on either side. As long as
530  // one side guarantees the mask is zero extended, the AND will preserve those
531  // zeros.
532  if (N->getOpcode() == ISD::AND)
533  return isLegalMaskCompare(N->getOperand(0).getNode(), Subtarget) ||
534  isLegalMaskCompare(N->getOperand(1).getNode(), Subtarget);
535 
536  return isLegalMaskCompare(N, Subtarget);
537 }
538 
539 bool
540 X86DAGToDAGISel::IsProfitableToFold(SDValue N, SDNode *U, SDNode *Root) const {
541  if (OptLevel == CodeGenOpt::None) return false;
542 
543  if (!N.hasOneUse())
544  return false;
545 
546  if (N.getOpcode() != ISD::LOAD)
547  return true;
548 
549  // Don't fold non-temporal loads if we have an instruction for them.
550  if (useNonTemporalLoad(cast<LoadSDNode>(N)))
551  return false;
552 
553  // If N is a load, do additional profitability checks.
554  if (U == Root) {
555  switch (U->getOpcode()) {
556  default: break;
557  case X86ISD::ADD:
558  case X86ISD::ADC:
559  case X86ISD::SUB:
560  case X86ISD::SBB:
561  case X86ISD::AND:
562  case X86ISD::XOR:
563  case X86ISD::OR:
564  case ISD::ADD:
565  case ISD::ADDCARRY:
566  case ISD::AND:
567  case ISD::OR:
568  case ISD::XOR: {
569  SDValue Op1 = U->getOperand(1);
570 
571  // If the other operand is a 8-bit immediate we should fold the immediate
572  // instead. This reduces code size.
573  // e.g.
574  // movl 4(%esp), %eax
575  // addl $4, %eax
576  // vs.
577  // movl $4, %eax
578  // addl 4(%esp), %eax
579  // The former is 2 bytes shorter. In case where the increment is 1, then
580  // the saving can be 4 bytes (by using incl %eax).
581  if (ConstantSDNode *Imm = dyn_cast<ConstantSDNode>(Op1)) {
582  if (Imm->getAPIntValue().isSignedIntN(8))
583  return false;
584 
585  // If this is a 64-bit AND with an immediate that fits in 32-bits,
586  // prefer using the smaller and over folding the load. This is needed to
587  // make sure immediates created by shrinkAndImmediate are always folded.
588  // Ideally we would narrow the load during DAG combine and get the
589  // best of both worlds.
590  if (U->getOpcode() == ISD::AND &&
591  Imm->getAPIntValue().getBitWidth() == 64 &&
592  Imm->getAPIntValue().isIntN(32))
593  return false;
594 
595  // ADD/SUB with can negate the immediate and use the opposite operation
596  // to fit 128 into a sign extended 8 bit immediate.
597  if ((U->getOpcode() == ISD::ADD || U->getOpcode() == ISD::SUB) &&
598  (-Imm->getAPIntValue()).isSignedIntN(8))
599  return false;
600  }
601 
602  // If the other operand is a TLS address, we should fold it instead.
603  // This produces
604  // movl %gs:0, %eax
605  // leal i@NTPOFF(%eax), %eax
606  // instead of
607  // movl $i@NTPOFF, %eax
608  // addl %gs:0, %eax
609  // if the block also has an access to a second TLS address this will save
610  // a load.
611  // FIXME: This is probably also true for non-TLS addresses.
612  if (Op1.getOpcode() == X86ISD::Wrapper) {
613  SDValue Val = Op1.getOperand(0);
615  return false;
616  }
617 
618  // Don't fold load if this matches the BTS/BTR/BTC patterns.
619  // BTS: (or X, (shl 1, n))
620  // BTR: (and X, (rotl -2, n))
621  // BTC: (xor X, (shl 1, n))
622  if (U->getOpcode() == ISD::OR || U->getOpcode() == ISD::XOR) {
623  if (U->getOperand(0).getOpcode() == ISD::SHL &&
625  return false;
626 
627  if (U->getOperand(1).getOpcode() == ISD::SHL &&
629  return false;
630  }
631  if (U->getOpcode() == ISD::AND) {
632  SDValue U0 = U->getOperand(0);
633  SDValue U1 = U->getOperand(1);
634  if (U0.getOpcode() == ISD::ROTL) {
635  auto *C = dyn_cast<ConstantSDNode>(U0.getOperand(0));
636  if (C && C->getSExtValue() == -2)
637  return false;
638  }
639 
640  if (U1.getOpcode() == ISD::ROTL) {
641  auto *C = dyn_cast<ConstantSDNode>(U1.getOperand(0));
642  if (C && C->getSExtValue() == -2)
643  return false;
644  }
645  }
646 
647  break;
648  }
649  case ISD::SHL:
650  case ISD::SRA:
651  case ISD::SRL:
652  // Don't fold a load into a shift by immediate. The BMI2 instructions
653  // support folding a load, but not an immediate. The legacy instructions
654  // support folding an immediate, but can't fold a load. Folding an
655  // immediate is preferable to folding a load.
656  if (isa<ConstantSDNode>(U->getOperand(1)))
657  return false;
658 
659  break;
660  }
661  }
662 
663  // Prevent folding a load if this can implemented with an insert_subreg or
664  // a move that implicitly zeroes.
665  if (Root->getOpcode() == ISD::INSERT_SUBVECTOR &&
666  isNullConstant(Root->getOperand(2)) &&
667  (Root->getOperand(0).isUndef() ||
669  return false;
670 
671  return true;
672 }
673 
674 /// Replace the original chain operand of the call with
675 /// load's chain operand and move load below the call's chain operand.
677  SDValue Call, SDValue OrigChain) {
679  SDValue Chain = OrigChain.getOperand(0);
680  if (Chain.getNode() == Load.getNode())
681  Ops.push_back(Load.getOperand(0));
682  else {
683  assert(Chain.getOpcode() == ISD::TokenFactor &&
684  "Unexpected chain operand");
685  for (unsigned i = 0, e = Chain.getNumOperands(); i != e; ++i)
686  if (Chain.getOperand(i).getNode() == Load.getNode())
687  Ops.push_back(Load.getOperand(0));
688  else
689  Ops.push_back(Chain.getOperand(i));
690  SDValue NewChain =
691  CurDAG->getNode(ISD::TokenFactor, SDLoc(Load), MVT::Other, Ops);
692  Ops.clear();
693  Ops.push_back(NewChain);
694  }
695  Ops.append(OrigChain->op_begin() + 1, OrigChain->op_end());
696  CurDAG->UpdateNodeOperands(OrigChain.getNode(), Ops);
697  CurDAG->UpdateNodeOperands(Load.getNode(), Call.getOperand(0),
698  Load.getOperand(1), Load.getOperand(2));
699 
700  Ops.clear();
701  Ops.push_back(SDValue(Load.getNode(), 1));
702  Ops.append(Call->op_begin() + 1, Call->op_end());
703  CurDAG->UpdateNodeOperands(Call.getNode(), Ops);
704 }
705 
706 /// Return true if call address is a load and it can be
707 /// moved below CALLSEQ_START and the chains leading up to the call.
708 /// Return the CALLSEQ_START by reference as a second output.
709 /// In the case of a tail call, there isn't a callseq node between the call
710 /// chain and the load.
711 static bool isCalleeLoad(SDValue Callee, SDValue &Chain, bool HasCallSeq) {
712  // The transformation is somewhat dangerous if the call's chain was glued to
713  // the call. After MoveBelowOrigChain the load is moved between the call and
714  // the chain, this can create a cycle if the load is not folded. So it is
715  // *really* important that we are sure the load will be folded.
716  if (Callee.getNode() == Chain.getNode() || !Callee.hasOneUse())
717  return false;
718  LoadSDNode *LD = dyn_cast<LoadSDNode>(Callee.getNode());
719  if (!LD ||
720  LD->isVolatile() ||
723  return false;
724 
725  // Now let's find the callseq_start.
726  while (HasCallSeq && Chain.getOpcode() != ISD::CALLSEQ_START) {
727  if (!Chain.hasOneUse())
728  return false;
729  Chain = Chain.getOperand(0);
730  }
731 
732  if (!Chain.getNumOperands())
733  return false;
734  // Since we are not checking for AA here, conservatively abort if the chain
735  // writes to memory. It's not safe to move the callee (a load) across a store.
736  if (isa<MemSDNode>(Chain.getNode()) &&
737  cast<MemSDNode>(Chain.getNode())->writeMem())
738  return false;
739  if (Chain.getOperand(0).getNode() == Callee.getNode())
740  return true;
741  if (Chain.getOperand(0).getOpcode() == ISD::TokenFactor &&
742  Callee.getValue(1).isOperandOf(Chain.getOperand(0).getNode()) &&
743  Callee.getValue(1).hasOneUse())
744  return true;
745  return false;
746 }
747 
748 void X86DAGToDAGISel::PreprocessISelDAG() {
749  for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(),
750  E = CurDAG->allnodes_end(); I != E; ) {
751  SDNode *N = &*I++; // Preincrement iterator to avoid invalidation issues.
752 
753  // If this is a target specific AND node with no flag usages, turn it back
754  // into ISD::AND to enable test instruction matching.
755  if (N->getOpcode() == X86ISD::AND && !N->hasAnyUseOfValue(1)) {
756  SDValue Res = CurDAG->getNode(ISD::AND, SDLoc(N), N->getValueType(0),
757  N->getOperand(0), N->getOperand(1));
758  --I;
759  CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Res);
760  ++I;
761  CurDAG->DeleteNode(N);
762  continue;
763  }
764 
765  // Replace vector shifts with their X86 specific equivalent so we don't
766  // need 2 sets of patterns.
767  switch (N->getOpcode()) {
768  case ISD::SHL:
769  case ISD::SRA:
770  case ISD::SRL:
771  if (N->getValueType(0).isVector()) {
772  unsigned NewOpc;
773  switch (N->getOpcode()) {
774  default: llvm_unreachable("Unexpected opcode!");
775  case ISD::SHL: NewOpc = X86ISD::VSHLV; break;
776  case ISD::SRA: NewOpc = X86ISD::VSRAV; break;
777  case ISD::SRL: NewOpc = X86ISD::VSRLV; break;
778  }
779  SDValue Res = CurDAG->getNode(NewOpc, SDLoc(N), N->getValueType(0),
780  N->getOperand(0), N->getOperand(1));
781  --I;
782  CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Res);
783  ++I;
784  CurDAG->DeleteNode(N);
785  continue;
786  }
787  }
788 
789  if (OptLevel != CodeGenOpt::None &&
790  // Only do this when the target can fold the load into the call or
791  // jmp.
792  !Subtarget->useRetpolineIndirectCalls() &&
793  ((N->getOpcode() == X86ISD::CALL && !Subtarget->slowTwoMemOps()) ||
794  (N->getOpcode() == X86ISD::TC_RETURN &&
795  (Subtarget->is64Bit() ||
796  !getTargetMachine().isPositionIndependent())))) {
797  /// Also try moving call address load from outside callseq_start to just
798  /// before the call to allow it to be folded.
799  ///
800  /// [Load chain]
801  /// ^
802  /// |
803  /// [Load]
804  /// ^ ^
805  /// | |
806  /// / \--
807  /// / |
808  ///[CALLSEQ_START] |
809  /// ^ |
810  /// | |
811  /// [LOAD/C2Reg] |
812  /// | |
813  /// \ /
814  /// \ /
815  /// [CALL]
816  bool HasCallSeq = N->getOpcode() == X86ISD::CALL;
817  SDValue Chain = N->getOperand(0);
818  SDValue Load = N->getOperand(1);
819  if (!isCalleeLoad(Load, Chain, HasCallSeq))
820  continue;
821  moveBelowOrigChain(CurDAG, Load, SDValue(N, 0), Chain);
822  ++NumLoadMoved;
823  continue;
824  }
825 
826  // Lower fpround and fpextend nodes that target the FP stack to be store and
827  // load to the stack. This is a gross hack. We would like to simply mark
828  // these as being illegal, but when we do that, legalize produces these when
829  // it expands calls, then expands these in the same legalize pass. We would
830  // like dag combine to be able to hack on these between the call expansion
831  // and the node legalization. As such this pass basically does "really
832  // late" legalization of these inline with the X86 isel pass.
833  // FIXME: This should only happen when not compiled with -O0.
834  if (N->getOpcode() != ISD::FP_ROUND && N->getOpcode() != ISD::FP_EXTEND)
835  continue;
836 
837  MVT SrcVT = N->getOperand(0).getSimpleValueType();
838  MVT DstVT = N->getSimpleValueType(0);
839 
840  // If any of the sources are vectors, no fp stack involved.
841  if (SrcVT.isVector() || DstVT.isVector())
842  continue;
843 
844  // If the source and destination are SSE registers, then this is a legal
845  // conversion that should not be lowered.
846  const X86TargetLowering *X86Lowering =
847  static_cast<const X86TargetLowering *>(TLI);
848  bool SrcIsSSE = X86Lowering->isScalarFPTypeInSSEReg(SrcVT);
849  bool DstIsSSE = X86Lowering->isScalarFPTypeInSSEReg(DstVT);
850  if (SrcIsSSE && DstIsSSE)
851  continue;
852 
853  if (!SrcIsSSE && !DstIsSSE) {
854  // If this is an FPStack extension, it is a noop.
855  if (N->getOpcode() == ISD::FP_EXTEND)
856  continue;
857  // If this is a value-preserving FPStack truncation, it is a noop.
858  if (N->getConstantOperandVal(1))
859  continue;
860  }
861 
862  // Here we could have an FP stack truncation or an FPStack <-> SSE convert.
863  // FPStack has extload and truncstore. SSE can fold direct loads into other
864  // operations. Based on this, decide what we want to do.
865  MVT MemVT;
866  if (N->getOpcode() == ISD::FP_ROUND)
867  MemVT = DstVT; // FP_ROUND must use DstVT, we can't do a 'trunc load'.
868  else
869  MemVT = SrcIsSSE ? SrcVT : DstVT;
870 
871  SDValue MemTmp = CurDAG->CreateStackTemporary(MemVT);
872  SDLoc dl(N);
873 
874  // FIXME: optimize the case where the src/dest is a load or store?
875  SDValue Store =
876  CurDAG->getTruncStore(CurDAG->getEntryNode(), dl, N->getOperand(0),
877  MemTmp, MachinePointerInfo(), MemVT);
878  SDValue Result = CurDAG->getExtLoad(ISD::EXTLOAD, dl, DstVT, Store, MemTmp,
879  MachinePointerInfo(), MemVT);
880 
881  // We're about to replace all uses of the FP_ROUND/FP_EXTEND with the
882  // extload we created. This will cause general havok on the dag because
883  // anything below the conversion could be folded into other existing nodes.
884  // To avoid invalidating 'I', back it up to the convert node.
885  --I;
886  CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
887 
888  // Now that we did that, the node is dead. Increment the iterator to the
889  // next node to process, then delete N.
890  ++I;
891  CurDAG->DeleteNode(N);
892  }
893 }
894 
895 // Look for a redundant movzx/movsx that can occur after an 8-bit divrem.
896 bool X86DAGToDAGISel::tryOptimizeRem8Extend(SDNode *N) {
897  unsigned Opc = N->getMachineOpcode();
898  if (Opc != X86::MOVZX32rr8 && Opc != X86::MOVSX32rr8 &&
899  Opc != X86::MOVSX64rr8)
900  return false;
901 
902  SDValue N0 = N->getOperand(0);
903 
904  // We need to be extracting the lower bit of an extend.
905  if (!N0.isMachineOpcode() ||
906  N0.getMachineOpcode() != TargetOpcode::EXTRACT_SUBREG ||
907  N0.getConstantOperandVal(1) != X86::sub_8bit)
908  return false;
909 
910  // We're looking for either a movsx or movzx to match the original opcode.
911  unsigned ExpectedOpc = Opc == X86::MOVZX32rr8 ? X86::MOVZX32rr8_NOREX
912  : X86::MOVSX32rr8_NOREX;
913  SDValue N00 = N0.getOperand(0);
914  if (!N00.isMachineOpcode() || N00.getMachineOpcode() != ExpectedOpc)
915  return false;
916 
917  if (Opc == X86::MOVSX64rr8) {
918  // If we had a sign extend from 8 to 64 bits. We still need to go from 32
919  // to 64.
920  MachineSDNode *Extend = CurDAG->getMachineNode(X86::MOVSX64rr32, SDLoc(N),
921  MVT::i64, N00);
922  ReplaceUses(N, Extend);
923  } else {
924  // Ok we can drop this extend and just use the original extend.
925  ReplaceUses(N, N00.getNode());
926  }
927 
928  return true;
929 }
930 
931 void X86DAGToDAGISel::PostprocessISelDAG() {
932  // Skip peepholes at -O0.
933  if (TM.getOptLevel() == CodeGenOpt::None)
934  return;
935 
936  SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end();
937 
938  bool MadeChange = false;
939  while (Position != CurDAG->allnodes_begin()) {
940  SDNode *N = &*--Position;
941  // Skip dead nodes and any non-machine opcodes.
942  if (N->use_empty() || !N->isMachineOpcode())
943  continue;
944 
945  if (tryOptimizeRem8Extend(N)) {
946  MadeChange = true;
947  continue;
948  }
949 
950  // Look for a TESTrr+ANDrr pattern where both operands of the test are
951  // the same. Rewrite to remove the AND.
952  unsigned Opc = N->getMachineOpcode();
953  if ((Opc == X86::TEST8rr || Opc == X86::TEST16rr ||
954  Opc == X86::TEST32rr || Opc == X86::TEST64rr) &&
955  N->getOperand(0) == N->getOperand(1) &&
956  N->isOnlyUserOf(N->getOperand(0).getNode()) &&
957  N->getOperand(0).isMachineOpcode()) {
958  SDValue And = N->getOperand(0);
959  unsigned N0Opc = And.getMachineOpcode();
960  if (N0Opc == X86::AND8rr || N0Opc == X86::AND16rr ||
961  N0Opc == X86::AND32rr || N0Opc == X86::AND64rr) {
962  MachineSDNode *Test = CurDAG->getMachineNode(Opc, SDLoc(N),
963  MVT::i32,
964  And.getOperand(0),
965  And.getOperand(1));
966  ReplaceUses(N, Test);
967  MadeChange = true;
968  continue;
969  }
970  if (N0Opc == X86::AND8rm || N0Opc == X86::AND16rm ||
971  N0Opc == X86::AND32rm || N0Opc == X86::AND64rm) {
972  unsigned NewOpc;
973  switch (N0Opc) {
974  case X86::AND8rm: NewOpc = X86::TEST8mr; break;
975  case X86::AND16rm: NewOpc = X86::TEST16mr; break;
976  case X86::AND32rm: NewOpc = X86::TEST32mr; break;
977  case X86::AND64rm: NewOpc = X86::TEST64mr; break;
978  }
979 
980  // Need to swap the memory and register operand.
981  SDValue Ops[] = { And.getOperand(1),
982  And.getOperand(2),
983  And.getOperand(3),
984  And.getOperand(4),
985  And.getOperand(5),
986  And.getOperand(0),
987  And.getOperand(6) /* Chain */ };
988  MachineSDNode *Test = CurDAG->getMachineNode(NewOpc, SDLoc(N),
989  MVT::i32, MVT::Other, Ops);
990  ReplaceUses(N, Test);
991  MadeChange = true;
992  continue;
993  }
994  }
995 
996  // Look for a KAND+KORTEST and turn it into KTEST if only the zero flag is
997  // used. We're doing this late so we can prefer to fold the AND into masked
998  // comparisons. Doing that can be better for the live range of the mask
999  // register.
1000  if ((Opc == X86::KORTESTBrr || Opc == X86::KORTESTWrr ||
1001  Opc == X86::KORTESTDrr || Opc == X86::KORTESTQrr) &&
1002  N->getOperand(0) == N->getOperand(1) &&
1003  N->isOnlyUserOf(N->getOperand(0).getNode()) &&
1004  N->getOperand(0).isMachineOpcode() &&
1005  onlyUsesZeroFlag(SDValue(N, 0))) {
1006  SDValue And = N->getOperand(0);
1007  unsigned N0Opc = And.getMachineOpcode();
1008  // KANDW is legal with AVX512F, but KTESTW requires AVX512DQ. The other
1009  // KAND instructions and KTEST use the same ISA feature.
1010  if (N0Opc == X86::KANDBrr ||
1011  (N0Opc == X86::KANDWrr && Subtarget->hasDQI()) ||
1012  N0Opc == X86::KANDDrr || N0Opc == X86::KANDQrr) {
1013  unsigned NewOpc;
1014  switch (Opc) {
1015  default: llvm_unreachable("Unexpected opcode!");
1016  case X86::KORTESTBrr: NewOpc = X86::KTESTBrr; break;
1017  case X86::KORTESTWrr: NewOpc = X86::KTESTWrr; break;
1018  case X86::KORTESTDrr: NewOpc = X86::KTESTDrr; break;
1019  case X86::KORTESTQrr: NewOpc = X86::KTESTQrr; break;
1020  }
1021  MachineSDNode *KTest = CurDAG->getMachineNode(NewOpc, SDLoc(N),
1022  MVT::i32,
1023  And.getOperand(0),
1024  And.getOperand(1));
1025  ReplaceUses(N, KTest);
1026  MadeChange = true;
1027  continue;
1028  }
1029  }
1030 
1031  // Attempt to remove vectors moves that were inserted to zero upper bits.
1032  if (Opc != TargetOpcode::SUBREG_TO_REG)
1033  continue;
1034 
1035  unsigned SubRegIdx = N->getConstantOperandVal(2);
1036  if (SubRegIdx != X86::sub_xmm && SubRegIdx != X86::sub_ymm)
1037  continue;
1038 
1039  SDValue Move = N->getOperand(1);
1040  if (!Move.isMachineOpcode())
1041  continue;
1042 
1043  // Make sure its one of the move opcodes we recognize.
1044  switch (Move.getMachineOpcode()) {
1045  default:
1046  continue;
1047  case X86::VMOVAPDrr: case X86::VMOVUPDrr:
1048  case X86::VMOVAPSrr: case X86::VMOVUPSrr:
1049  case X86::VMOVDQArr: case X86::VMOVDQUrr:
1050  case X86::VMOVAPDYrr: case X86::VMOVUPDYrr:
1051  case X86::VMOVAPSYrr: case X86::VMOVUPSYrr:
1052  case X86::VMOVDQAYrr: case X86::VMOVDQUYrr:
1053  case X86::VMOVAPDZ128rr: case X86::VMOVUPDZ128rr:
1054  case X86::VMOVAPSZ128rr: case X86::VMOVUPSZ128rr:
1055  case X86::VMOVDQA32Z128rr: case X86::VMOVDQU32Z128rr:
1056  case X86::VMOVDQA64Z128rr: case X86::VMOVDQU64Z128rr:
1057  case X86::VMOVAPDZ256rr: case X86::VMOVUPDZ256rr:
1058  case X86::VMOVAPSZ256rr: case X86::VMOVUPSZ256rr:
1059  case X86::VMOVDQA32Z256rr: case X86::VMOVDQU32Z256rr:
1060  case X86::VMOVDQA64Z256rr: case X86::VMOVDQU64Z256rr:
1061  break;
1062  }
1063 
1064  SDValue In = Move.getOperand(0);
1065  if (!In.isMachineOpcode() ||
1066  In.getMachineOpcode() <= TargetOpcode::GENERIC_OP_END)
1067  continue;
1068 
1069  // Make sure the instruction has a VEX, XOP, or EVEX prefix. This covers
1070  // the SHA instructions which use a legacy encoding.
1071  uint64_t TSFlags = getInstrInfo()->get(In.getMachineOpcode()).TSFlags;
1072  if ((TSFlags & X86II::EncodingMask) != X86II::VEX &&
1073  (TSFlags & X86II::EncodingMask) != X86II::EVEX &&
1074  (TSFlags & X86II::EncodingMask) != X86II::XOP)
1075  continue;
1076 
1077  // Producing instruction is another vector instruction. We can drop the
1078  // move.
1079  CurDAG->UpdateNodeOperands(N, N->getOperand(0), In, N->getOperand(2));
1080  MadeChange = true;
1081  }
1082 
1083  if (MadeChange)
1084  CurDAG->RemoveDeadNodes();
1085 }
1086 
1087 
1088 /// Emit any code that needs to be executed only in the main function.
1089 void X86DAGToDAGISel::emitSpecialCodeForMain() {
1090  if (Subtarget->isTargetCygMing()) {
1092  auto &DL = CurDAG->getDataLayout();
1093 
1094  TargetLowering::CallLoweringInfo CLI(*CurDAG);
1095  CLI.setChain(CurDAG->getRoot())
1096  .setCallee(CallingConv::C, Type::getVoidTy(*CurDAG->getContext()),
1097  CurDAG->getExternalSymbol("__main", TLI->getPointerTy(DL)),
1098  std::move(Args));
1099  const TargetLowering &TLI = CurDAG->getTargetLoweringInfo();
1100  std::pair<SDValue, SDValue> Result = TLI.LowerCallTo(CLI);
1101  CurDAG->setRoot(Result.second);
1102  }
1103 }
1104 
1105 void X86DAGToDAGISel::EmitFunctionEntryCode() {
1106  // If this is main, emit special code for main.
1107  const Function &F = MF->getFunction();
1108  if (F.hasExternalLinkage() && F.getName() == "main")
1109  emitSpecialCodeForMain();
1110 }
1111 
1112 static bool isDispSafeForFrameIndex(int64_t Val) {
1113  // On 64-bit platforms, we can run into an issue where a frame index
1114  // includes a displacement that, when added to the explicit displacement,
1115  // will overflow the displacement field. Assuming that the frame index
1116  // displacement fits into a 31-bit integer (which is only slightly more
1117  // aggressive than the current fundamental assumption that it fits into
1118  // a 32-bit integer), a 31-bit disp should always be safe.
1119  return isInt<31>(Val);
1120 }
1121 
1122 bool X86DAGToDAGISel::foldOffsetIntoAddress(uint64_t Offset,
1123  X86ISelAddressMode &AM) {
1124  // If there's no offset to fold, we don't need to do any work.
1125  if (Offset == 0)
1126  return false;
1127 
1128  // Cannot combine ExternalSymbol displacements with integer offsets.
1129  if (AM.ES || AM.MCSym)
1130  return true;
1131 
1132  int64_t Val = AM.Disp + Offset;
1133  CodeModel::Model M = TM.getCodeModel();
1134  if (Subtarget->is64Bit()) {
1136  AM.hasSymbolicDisplacement()))
1137  return true;
1138  // In addition to the checks required for a register base, check that
1139  // we do not try to use an unsafe Disp with a frame index.
1140  if (AM.BaseType == X86ISelAddressMode::FrameIndexBase &&
1142  return true;
1143  }
1144  AM.Disp = Val;
1145  return false;
1146 
1147 }
1148 
1149 bool X86DAGToDAGISel::matchLoadInAddress(LoadSDNode *N, X86ISelAddressMode &AM){
1150  SDValue Address = N->getOperand(1);
1151 
1152  // load gs:0 -> GS segment register.
1153  // load fs:0 -> FS segment register.
1154  //
1155  // This optimization is valid because the GNU TLS model defines that
1156  // gs:0 (or fs:0 on X86-64) contains its own address.
1157  // For more information see http://people.redhat.com/drepper/tls.pdf
1158  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Address))
1159  if (C->getSExtValue() == 0 && AM.Segment.getNode() == nullptr &&
1160  !IndirectTlsSegRefs &&
1161  (Subtarget->isTargetGlibc() || Subtarget->isTargetAndroid() ||
1162  Subtarget->isTargetFuchsia()))
1163  switch (N->getPointerInfo().getAddrSpace()) {
1164  case 256:
1165  AM.Segment = CurDAG->getRegister(X86::GS, MVT::i16);
1166  return false;
1167  case 257:
1168  AM.Segment = CurDAG->getRegister(X86::FS, MVT::i16);
1169  return false;
1170  // Address space 258 is not handled here, because it is not used to
1171  // address TLS areas.
1172  }
1173 
1174  return true;
1175 }
1176 
1177 /// Try to match X86ISD::Wrapper and X86ISD::WrapperRIP nodes into an addressing
1178 /// mode. These wrap things that will resolve down into a symbol reference.
1179 /// If no match is possible, this returns true, otherwise it returns false.
1180 bool X86DAGToDAGISel::matchWrapper(SDValue N, X86ISelAddressMode &AM) {
1181  // If the addressing mode already has a symbol as the displacement, we can
1182  // never match another symbol.
1183  if (AM.hasSymbolicDisplacement())
1184  return true;
1185 
1186  bool IsRIPRelTLS = false;
1187  bool IsRIPRel = N.getOpcode() == X86ISD::WrapperRIP;
1188  if (IsRIPRel) {
1189  SDValue Val = N.getOperand(0);
1191  IsRIPRelTLS = true;
1192  }
1193 
1194  // We can't use an addressing mode in the 64-bit large code model.
1195  // Global TLS addressing is an exception. In the medium code model,
1196  // we use can use a mode when RIP wrappers are present.
1197  // That signifies access to globals that are known to be "near",
1198  // such as the GOT itself.
1199  CodeModel::Model M = TM.getCodeModel();
1200  if (Subtarget->is64Bit() &&
1201  ((M == CodeModel::Large && !IsRIPRelTLS) ||
1202  (M == CodeModel::Medium && !IsRIPRel)))
1203  return true;
1204 
1205  // Base and index reg must be 0 in order to use %rip as base.
1206  if (IsRIPRel && AM.hasBaseOrIndexReg())
1207  return true;
1208 
1209  // Make a local copy in case we can't do this fold.
1210  X86ISelAddressMode Backup = AM;
1211 
1212  int64_t Offset = 0;
1213  SDValue N0 = N.getOperand(0);
1214  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(N0)) {
1215  AM.GV = G->getGlobal();
1216  AM.SymbolFlags = G->getTargetFlags();
1217  Offset = G->getOffset();
1218  } else if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(N0)) {
1219  AM.CP = CP->getConstVal();
1220  AM.Align = CP->getAlignment();
1221  AM.SymbolFlags = CP->getTargetFlags();
1222  Offset = CP->getOffset();
1223  } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(N0)) {
1224  AM.ES = S->getSymbol();
1225  AM.SymbolFlags = S->getTargetFlags();
1226  } else if (auto *S = dyn_cast<MCSymbolSDNode>(N0)) {
1227  AM.MCSym = S->getMCSymbol();
1228  } else if (JumpTableSDNode *J = dyn_cast<JumpTableSDNode>(N0)) {
1229  AM.JT = J->getIndex();
1230  AM.SymbolFlags = J->getTargetFlags();
1231  } else if (BlockAddressSDNode *BA = dyn_cast<BlockAddressSDNode>(N0)) {
1232  AM.BlockAddr = BA->getBlockAddress();
1233  AM.SymbolFlags = BA->getTargetFlags();
1234  Offset = BA->getOffset();
1235  } else
1236  llvm_unreachable("Unhandled symbol reference node.");
1237 
1238  if (foldOffsetIntoAddress(Offset, AM)) {
1239  AM = Backup;
1240  return true;
1241  }
1242 
1243  if (IsRIPRel)
1244  AM.setBaseReg(CurDAG->getRegister(X86::RIP, MVT::i64));
1245 
1246  // Commit the changes now that we know this fold is safe.
1247  return false;
1248 }
1249 
1250 /// Add the specified node to the specified addressing mode, returning true if
1251 /// it cannot be done. This just pattern matches for the addressing mode.
1252 bool X86DAGToDAGISel::matchAddress(SDValue N, X86ISelAddressMode &AM) {
1253  if (matchAddressRecursively(N, AM, 0))
1254  return true;
1255 
1256  // Post-processing: Convert lea(,%reg,2) to lea(%reg,%reg), which has
1257  // a smaller encoding and avoids a scaled-index.
1258  if (AM.Scale == 2 &&
1259  AM.BaseType == X86ISelAddressMode::RegBase &&
1260  AM.Base_Reg.getNode() == nullptr) {
1261  AM.Base_Reg = AM.IndexReg;
1262  AM.Scale = 1;
1263  }
1264 
1265  // Post-processing: Convert foo to foo(%rip), even in non-PIC mode,
1266  // because it has a smaller encoding.
1267  // TODO: Which other code models can use this?
1268  switch (TM.getCodeModel()) {
1269  default: break;
1270  case CodeModel::Small:
1271  case CodeModel::Kernel:
1272  if (Subtarget->is64Bit() &&
1273  AM.Scale == 1 &&
1274  AM.BaseType == X86ISelAddressMode::RegBase &&
1275  AM.Base_Reg.getNode() == nullptr &&
1276  AM.IndexReg.getNode() == nullptr &&
1277  AM.SymbolFlags == X86II::MO_NO_FLAG &&
1278  AM.hasSymbolicDisplacement())
1279  AM.Base_Reg = CurDAG->getRegister(X86::RIP, MVT::i64);
1280  break;
1281  }
1282 
1283  return false;
1284 }
1285 
1286 bool X86DAGToDAGISel::matchAdd(SDValue &N, X86ISelAddressMode &AM,
1287  unsigned Depth) {
1288  // Add an artificial use to this node so that we can keep track of
1289  // it if it gets CSE'd with a different node.
1290  HandleSDNode Handle(N);
1291 
1292  X86ISelAddressMode Backup = AM;
1293  if (!matchAddressRecursively(N.getOperand(0), AM, Depth+1) &&
1294  !matchAddressRecursively(Handle.getValue().getOperand(1), AM, Depth+1))
1295  return false;
1296  AM = Backup;
1297 
1298  // Try again after commuting the operands.
1299  if (!matchAddressRecursively(Handle.getValue().getOperand(1), AM, Depth+1) &&
1300  !matchAddressRecursively(Handle.getValue().getOperand(0), AM, Depth+1))
1301  return false;
1302  AM = Backup;
1303 
1304  // If we couldn't fold both operands into the address at the same time,
1305  // see if we can just put each operand into a register and fold at least
1306  // the add.
1307  if (AM.BaseType == X86ISelAddressMode::RegBase &&
1308  !AM.Base_Reg.getNode() &&
1309  !AM.IndexReg.getNode()) {
1310  N = Handle.getValue();
1311  AM.Base_Reg = N.getOperand(0);
1312  AM.IndexReg = N.getOperand(1);
1313  AM.Scale = 1;
1314  return false;
1315  }
1316  N = Handle.getValue();
1317  return true;
1318 }
1319 
1320 // Insert a node into the DAG at least before the Pos node's position. This
1321 // will reposition the node as needed, and will assign it a node ID that is <=
1322 // the Pos node's ID. Note that this does *not* preserve the uniqueness of node
1323 // IDs! The selection DAG must no longer depend on their uniqueness when this
1324 // is used.
1325 static void insertDAGNode(SelectionDAG &DAG, SDValue Pos, SDValue N) {
1326  if (N->getNodeId() == -1 ||
1329  DAG.RepositionNode(Pos->getIterator(), N.getNode());
1330  // Mark Node as invalid for pruning as after this it may be a successor to a
1331  // selected node but otherwise be in the same position of Pos.
1332  // Conservatively mark it with the same -abs(Id) to assure node id
1333  // invariant is preserved.
1334  N->setNodeId(Pos->getNodeId());
1336  }
1337 }
1338 
1339 // Transform "(X >> (8-C1)) & (0xff << C1)" to "((X >> 8) & 0xff) << C1" if
1340 // safe. This allows us to convert the shift and and into an h-register
1341 // extract and a scaled index. Returns false if the simplification is
1342 // performed.
1344  uint64_t Mask,
1345  SDValue Shift, SDValue X,
1346  X86ISelAddressMode &AM) {
1347  if (Shift.getOpcode() != ISD::SRL ||
1348  !isa<ConstantSDNode>(Shift.getOperand(1)) ||
1349  !Shift.hasOneUse())
1350  return true;
1351 
1352  int ScaleLog = 8 - Shift.getConstantOperandVal(1);
1353  if (ScaleLog <= 0 || ScaleLog >= 4 ||
1354  Mask != (0xffu << ScaleLog))
1355  return true;
1356 
1357  MVT VT = N.getSimpleValueType();
1358  SDLoc DL(N);
1359  SDValue Eight = DAG.getConstant(8, DL, MVT::i8);
1360  SDValue NewMask = DAG.getConstant(0xff, DL, VT);
1361  SDValue Srl = DAG.getNode(ISD::SRL, DL, VT, X, Eight);
1362  SDValue And = DAG.getNode(ISD::AND, DL, VT, Srl, NewMask);
1363  SDValue ShlCount = DAG.getConstant(ScaleLog, DL, MVT::i8);
1364  SDValue Shl = DAG.getNode(ISD::SHL, DL, VT, And, ShlCount);
1365 
1366  // Insert the new nodes into the topological ordering. We must do this in
1367  // a valid topological ordering as nothing is going to go back and re-sort
1368  // these nodes. We continually insert before 'N' in sequence as this is
1369  // essentially a pre-flattened and pre-sorted sequence of nodes. There is no
1370  // hierarchy left to express.
1371  insertDAGNode(DAG, N, Eight);
1372  insertDAGNode(DAG, N, Srl);
1373  insertDAGNode(DAG, N, NewMask);
1374  insertDAGNode(DAG, N, And);
1375  insertDAGNode(DAG, N, ShlCount);
1376  insertDAGNode(DAG, N, Shl);
1377  DAG.ReplaceAllUsesWith(N, Shl);
1378  AM.IndexReg = And;
1379  AM.Scale = (1 << ScaleLog);
1380  return false;
1381 }
1382 
1383 // Transforms "(X << C1) & C2" to "(X & (C2>>C1)) << C1" if safe and if this
1384 // allows us to fold the shift into this addressing mode. Returns false if the
1385 // transform succeeded.
1387  X86ISelAddressMode &AM) {
1388  SDValue Shift = N.getOperand(0);
1389 
1390  // Use a signed mask so that shifting right will insert sign bits. These
1391  // bits will be removed when we shift the result left so it doesn't matter
1392  // what we use. This might allow a smaller immediate encoding.
1393  int64_t Mask = cast<ConstantSDNode>(N->getOperand(1))->getSExtValue();
1394 
1395  // If we have an any_extend feeding the AND, look through it to see if there
1396  // is a shift behind it. But only if the AND doesn't use the extended bits.
1397  // FIXME: Generalize this to other ANY_EXTEND than i32 to i64?
1398  bool FoundAnyExtend = false;
1399  if (Shift.getOpcode() == ISD::ANY_EXTEND && Shift.hasOneUse() &&
1400  Shift.getOperand(0).getSimpleValueType() == MVT::i32 &&
1401  isUInt<32>(Mask)) {
1402  FoundAnyExtend = true;
1403  Shift = Shift.getOperand(0);
1404  }
1405 
1406  if (Shift.getOpcode() != ISD::SHL ||
1407  !isa<ConstantSDNode>(Shift.getOperand(1)))
1408  return true;
1409 
1410  SDValue X = Shift.getOperand(0);
1411 
1412  // Not likely to be profitable if either the AND or SHIFT node has more
1413  // than one use (unless all uses are for address computation). Besides,
1414  // isel mechanism requires their node ids to be reused.
1415  if (!N.hasOneUse() || !Shift.hasOneUse())
1416  return true;
1417 
1418  // Verify that the shift amount is something we can fold.
1419  unsigned ShiftAmt = Shift.getConstantOperandVal(1);
1420  if (ShiftAmt != 1 && ShiftAmt != 2 && ShiftAmt != 3)
1421  return true;
1422 
1423  MVT VT = N.getSimpleValueType();
1424  SDLoc DL(N);
1425  if (FoundAnyExtend) {
1426  SDValue NewX = DAG.getNode(ISD::ANY_EXTEND, DL, VT, X);
1427  insertDAGNode(DAG, N, NewX);
1428  X = NewX;
1429  }
1430 
1431  SDValue NewMask = DAG.getConstant(Mask >> ShiftAmt, DL, VT);
1432  SDValue NewAnd = DAG.getNode(ISD::AND, DL, VT, X, NewMask);
1433  SDValue NewShift = DAG.getNode(ISD::SHL, DL, VT, NewAnd, Shift.getOperand(1));
1434 
1435  // Insert the new nodes into the topological ordering. We must do this in
1436  // a valid topological ordering as nothing is going to go back and re-sort
1437  // these nodes. We continually insert before 'N' in sequence as this is
1438  // essentially a pre-flattened and pre-sorted sequence of nodes. There is no
1439  // hierarchy left to express.
1440  insertDAGNode(DAG, N, NewMask);
1441  insertDAGNode(DAG, N, NewAnd);
1442  insertDAGNode(DAG, N, NewShift);
1443  DAG.ReplaceAllUsesWith(N, NewShift);
1444 
1445  AM.Scale = 1 << ShiftAmt;
1446  AM.IndexReg = NewAnd;
1447  return false;
1448 }
1449 
1450 // Implement some heroics to detect shifts of masked values where the mask can
1451 // be replaced by extending the shift and undoing that in the addressing mode
1452 // scale. Patterns such as (shl (srl x, c1), c2) are canonicalized into (and
1453 // (srl x, SHIFT), MASK) by DAGCombines that don't know the shl can be done in
1454 // the addressing mode. This results in code such as:
1455 //
1456 // int f(short *y, int *lookup_table) {
1457 // ...
1458 // return *y + lookup_table[*y >> 11];
1459 // }
1460 //
1461 // Turning into:
1462 // movzwl (%rdi), %eax
1463 // movl %eax, %ecx
1464 // shrl $11, %ecx
1465 // addl (%rsi,%rcx,4), %eax
1466 //
1467 // Instead of:
1468 // movzwl (%rdi), %eax
1469 // movl %eax, %ecx
1470 // shrl $9, %ecx
1471 // andl $124, %rcx
1472 // addl (%rsi,%rcx), %eax
1473 //
1474 // Note that this function assumes the mask is provided as a mask *after* the
1475 // value is shifted. The input chain may or may not match that, but computing
1476 // such a mask is trivial.
1478  uint64_t Mask,
1479  SDValue Shift, SDValue X,
1480  X86ISelAddressMode &AM) {
1481  if (Shift.getOpcode() != ISD::SRL || !Shift.hasOneUse() ||
1482  !isa<ConstantSDNode>(Shift.getOperand(1)))
1483  return true;
1484 
1485  unsigned ShiftAmt = Shift.getConstantOperandVal(1);
1486  unsigned MaskLZ = countLeadingZeros(Mask);
1487  unsigned MaskTZ = countTrailingZeros(Mask);
1488 
1489  // The amount of shift we're trying to fit into the addressing mode is taken
1490  // from the trailing zeros of the mask.
1491  unsigned AMShiftAmt = MaskTZ;
1492 
1493  // There is nothing we can do here unless the mask is removing some bits.
1494  // Also, the addressing mode can only represent shifts of 1, 2, or 3 bits.
1495  if (AMShiftAmt <= 0 || AMShiftAmt > 3) return true;
1496 
1497  // We also need to ensure that mask is a continuous run of bits.
1498  if (countTrailingOnes(Mask >> MaskTZ) + MaskTZ + MaskLZ != 64) return true;
1499 
1500  // Scale the leading zero count down based on the actual size of the value.
1501  // Also scale it down based on the size of the shift.
1502  unsigned ScaleDown = (64 - X.getSimpleValueType().getSizeInBits()) + ShiftAmt;
1503  if (MaskLZ < ScaleDown)
1504  return true;
1505  MaskLZ -= ScaleDown;
1506 
1507  // The final check is to ensure that any masked out high bits of X are
1508  // already known to be zero. Otherwise, the mask has a semantic impact
1509  // other than masking out a couple of low bits. Unfortunately, because of
1510  // the mask, zero extensions will be removed from operands in some cases.
1511  // This code works extra hard to look through extensions because we can
1512  // replace them with zero extensions cheaply if necessary.
1513  bool ReplacingAnyExtend = false;
1514  if (X.getOpcode() == ISD::ANY_EXTEND) {
1515  unsigned ExtendBits = X.getSimpleValueType().getSizeInBits() -
1517  // Assume that we'll replace the any-extend with a zero-extend, and
1518  // narrow the search to the extended value.
1519  X = X.getOperand(0);
1520  MaskLZ = ExtendBits > MaskLZ ? 0 : MaskLZ - ExtendBits;
1521  ReplacingAnyExtend = true;
1522  }
1523  APInt MaskedHighBits =
1525  KnownBits Known = DAG.computeKnownBits(X);
1526  if (MaskedHighBits != Known.Zero) return true;
1527 
1528  // We've identified a pattern that can be transformed into a single shift
1529  // and an addressing mode. Make it so.
1530  MVT VT = N.getSimpleValueType();
1531  if (ReplacingAnyExtend) {
1532  assert(X.getValueType() != VT);
1533  // We looked through an ANY_EXTEND node, insert a ZERO_EXTEND.
1534  SDValue NewX = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(X), VT, X);
1535  insertDAGNode(DAG, N, NewX);
1536  X = NewX;
1537  }
1538  SDLoc DL(N);
1539  SDValue NewSRLAmt = DAG.getConstant(ShiftAmt + AMShiftAmt, DL, MVT::i8);
1540  SDValue NewSRL = DAG.getNode(ISD::SRL, DL, VT, X, NewSRLAmt);
1541  SDValue NewSHLAmt = DAG.getConstant(AMShiftAmt, DL, MVT::i8);
1542  SDValue NewSHL = DAG.getNode(ISD::SHL, DL, VT, NewSRL, NewSHLAmt);
1543 
1544  // Insert the new nodes into the topological ordering. We must do this in
1545  // a valid topological ordering as nothing is going to go back and re-sort
1546  // these nodes. We continually insert before 'N' in sequence as this is
1547  // essentially a pre-flattened and pre-sorted sequence of nodes. There is no
1548  // hierarchy left to express.
1549  insertDAGNode(DAG, N, NewSRLAmt);
1550  insertDAGNode(DAG, N, NewSRL);
1551  insertDAGNode(DAG, N, NewSHLAmt);
1552  insertDAGNode(DAG, N, NewSHL);
1553  DAG.ReplaceAllUsesWith(N, NewSHL);
1554 
1555  AM.Scale = 1 << AMShiftAmt;
1556  AM.IndexReg = NewSRL;
1557  return false;
1558 }
1559 
1560 // Transform "(X >> SHIFT) & (MASK << C1)" to
1561 // "((X >> (SHIFT + C1)) & (MASK)) << C1". Everything before the SHL will be
1562 // matched to a BEXTR later. Returns false if the simplification is performed.
1564  uint64_t Mask,
1565  SDValue Shift, SDValue X,
1566  X86ISelAddressMode &AM,
1567  const X86Subtarget &Subtarget) {
1568  if (Shift.getOpcode() != ISD::SRL ||
1569  !isa<ConstantSDNode>(Shift.getOperand(1)) ||
1570  !Shift.hasOneUse() || !N.hasOneUse())
1571  return true;
1572 
1573  // Only do this if BEXTR will be matched by matchBEXTRFromAndImm.
1574  if (!Subtarget.hasTBM() &&
1575  !(Subtarget.hasBMI() && Subtarget.hasFastBEXTR()))
1576  return true;
1577 
1578  // We need to ensure that mask is a continuous run of bits.
1579  if (!isShiftedMask_64(Mask)) return true;
1580 
1581  unsigned ShiftAmt = Shift.getConstantOperandVal(1);
1582 
1583  // The amount of shift we're trying to fit into the addressing mode is taken
1584  // from the trailing zeros of the mask.
1585  unsigned AMShiftAmt = countTrailingZeros(Mask);
1586 
1587  // There is nothing we can do here unless the mask is removing some bits.
1588  // Also, the addressing mode can only represent shifts of 1, 2, or 3 bits.
1589  if (AMShiftAmt <= 0 || AMShiftAmt > 3) return true;
1590 
1591  MVT VT = N.getSimpleValueType();
1592  SDLoc DL(N);
1593  SDValue NewSRLAmt = DAG.getConstant(ShiftAmt + AMShiftAmt, DL, MVT::i8);
1594  SDValue NewSRL = DAG.getNode(ISD::SRL, DL, VT, X, NewSRLAmt);
1595  SDValue NewMask = DAG.getConstant(Mask >> AMShiftAmt, DL, VT);
1596  SDValue NewAnd = DAG.getNode(ISD::AND, DL, VT, NewSRL, NewMask);
1597  SDValue NewSHLAmt = DAG.getConstant(AMShiftAmt, DL, MVT::i8);
1598  SDValue NewSHL = DAG.getNode(ISD::SHL, DL, VT, NewAnd, NewSHLAmt);
1599 
1600  // Insert the new nodes into the topological ordering. We must do this in
1601  // a valid topological ordering as nothing is going to go back and re-sort
1602  // these nodes. We continually insert before 'N' in sequence as this is
1603  // essentially a pre-flattened and pre-sorted sequence of nodes. There is no
1604  // hierarchy left to express.
1605  insertDAGNode(DAG, N, NewSRLAmt);
1606  insertDAGNode(DAG, N, NewSRL);
1607  insertDAGNode(DAG, N, NewMask);
1608  insertDAGNode(DAG, N, NewAnd);
1609  insertDAGNode(DAG, N, NewSHLAmt);
1610  insertDAGNode(DAG, N, NewSHL);
1611  DAG.ReplaceAllUsesWith(N, NewSHL);
1612 
1613  AM.Scale = 1 << AMShiftAmt;
1614  AM.IndexReg = NewAnd;
1615  return false;
1616 }
1617 
1618 bool X86DAGToDAGISel::matchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
1619  unsigned Depth) {
1620  SDLoc dl(N);
1621  LLVM_DEBUG({
1622  dbgs() << "MatchAddress: ";
1623  AM.dump(CurDAG);
1624  });
1625  // Limit recursion.
1626  if (Depth > 5)
1627  return matchAddressBase(N, AM);
1628 
1629  // If this is already a %rip relative address, we can only merge immediates
1630  // into it. Instead of handling this in every case, we handle it here.
1631  // RIP relative addressing: %rip + 32-bit displacement!
1632  if (AM.isRIPRelative()) {
1633  // FIXME: JumpTable and ExternalSymbol address currently don't like
1634  // displacements. It isn't very important, but this should be fixed for
1635  // consistency.
1636  if (!(AM.ES || AM.MCSym) && AM.JT != -1)
1637  return true;
1638 
1639  if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(N))
1640  if (!foldOffsetIntoAddress(Cst->getSExtValue(), AM))
1641  return false;
1642  return true;
1643  }
1644 
1645  switch (N.getOpcode()) {
1646  default: break;
1647  case ISD::LOCAL_RECOVER: {
1648  if (!AM.hasSymbolicDisplacement() && AM.Disp == 0)
1649  if (const auto *ESNode = dyn_cast<MCSymbolSDNode>(N.getOperand(0))) {
1650  // Use the symbol and don't prefix it.
1651  AM.MCSym = ESNode->getMCSymbol();
1652  return false;
1653  }
1654  break;
1655  }
1656  case ISD::Constant: {
1657  uint64_t Val = cast<ConstantSDNode>(N)->getSExtValue();
1658  if (!foldOffsetIntoAddress(Val, AM))
1659  return false;
1660  break;
1661  }
1662 
1663  case X86ISD::Wrapper:
1664  case X86ISD::WrapperRIP:
1665  if (!matchWrapper(N, AM))
1666  return false;
1667  break;
1668 
1669  case ISD::LOAD:
1670  if (!matchLoadInAddress(cast<LoadSDNode>(N), AM))
1671  return false;
1672  break;
1673 
1674  case ISD::FrameIndex:
1675  if (AM.BaseType == X86ISelAddressMode::RegBase &&
1676  AM.Base_Reg.getNode() == nullptr &&
1677  (!Subtarget->is64Bit() || isDispSafeForFrameIndex(AM.Disp))) {
1678  AM.BaseType = X86ISelAddressMode::FrameIndexBase;
1679  AM.Base_FrameIndex = cast<FrameIndexSDNode>(N)->getIndex();
1680  return false;
1681  }
1682  break;
1683 
1684  case ISD::SHL:
1685  if (AM.IndexReg.getNode() != nullptr || AM.Scale != 1)
1686  break;
1687 
1688  if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1689  unsigned Val = CN->getZExtValue();
1690  // Note that we handle x<<1 as (,x,2) rather than (x,x) here so
1691  // that the base operand remains free for further matching. If
1692  // the base doesn't end up getting used, a post-processing step
1693  // in MatchAddress turns (,x,2) into (x,x), which is cheaper.
1694  if (Val == 1 || Val == 2 || Val == 3) {
1695  AM.Scale = 1 << Val;
1696  SDValue ShVal = N.getOperand(0);
1697 
1698  // Okay, we know that we have a scale by now. However, if the scaled
1699  // value is an add of something and a constant, we can fold the
1700  // constant into the disp field here.
1701  if (CurDAG->isBaseWithConstantOffset(ShVal)) {
1702  AM.IndexReg = ShVal.getOperand(0);
1703  ConstantSDNode *AddVal = cast<ConstantSDNode>(ShVal.getOperand(1));
1704  uint64_t Disp = (uint64_t)AddVal->getSExtValue() << Val;
1705  if (!foldOffsetIntoAddress(Disp, AM))
1706  return false;
1707  }
1708 
1709  AM.IndexReg = ShVal;
1710  return false;
1711  }
1712  }
1713  break;
1714 
1715  case ISD::SRL: {
1716  // Scale must not be used already.
1717  if (AM.IndexReg.getNode() != nullptr || AM.Scale != 1) break;
1718 
1719  // We only handle up to 64-bit values here as those are what matter for
1720  // addressing mode optimizations.
1721  assert(N.getSimpleValueType().getSizeInBits() <= 64 &&
1722  "Unexpected value size!");
1723 
1724  SDValue And = N.getOperand(0);
1725  if (And.getOpcode() != ISD::AND) break;
1726  SDValue X = And.getOperand(0);
1727 
1728  // The mask used for the transform is expected to be post-shift, but we
1729  // found the shift first so just apply the shift to the mask before passing
1730  // it down.
1731  if (!isa<ConstantSDNode>(N.getOperand(1)) ||
1732  !isa<ConstantSDNode>(And.getOperand(1)))
1733  break;
1734  uint64_t Mask = And.getConstantOperandVal(1) >> N.getConstantOperandVal(1);
1735 
1736  // Try to fold the mask and shift into the scale, and return false if we
1737  // succeed.
1738  if (!foldMaskAndShiftToScale(*CurDAG, N, Mask, N, X, AM))
1739  return false;
1740  break;
1741  }
1742 
1743  case ISD::SMUL_LOHI:
1744  case ISD::UMUL_LOHI:
1745  // A mul_lohi where we need the low part can be folded as a plain multiply.
1746  if (N.getResNo() != 0) break;
1748  case ISD::MUL:
1749  case X86ISD::MUL_IMM:
1750  // X*[3,5,9] -> X+X*[2,4,8]
1751  if (AM.BaseType == X86ISelAddressMode::RegBase &&
1752  AM.Base_Reg.getNode() == nullptr &&
1753  AM.IndexReg.getNode() == nullptr) {
1754  if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N.getOperand(1)))
1755  if (CN->getZExtValue() == 3 || CN->getZExtValue() == 5 ||
1756  CN->getZExtValue() == 9) {
1757  AM.Scale = unsigned(CN->getZExtValue())-1;
1758 
1759  SDValue MulVal = N.getOperand(0);
1760  SDValue Reg;
1761 
1762  // Okay, we know that we have a scale by now. However, if the scaled
1763  // value is an add of something and a constant, we can fold the
1764  // constant into the disp field here.
1765  if (MulVal.getNode()->getOpcode() == ISD::ADD && MulVal.hasOneUse() &&
1766  isa<ConstantSDNode>(MulVal.getOperand(1))) {
1767  Reg = MulVal.getOperand(0);
1768  ConstantSDNode *AddVal =
1769  cast<ConstantSDNode>(MulVal.getOperand(1));
1770  uint64_t Disp = AddVal->getSExtValue() * CN->getZExtValue();
1771  if (foldOffsetIntoAddress(Disp, AM))
1772  Reg = N.getOperand(0);
1773  } else {
1774  Reg = N.getOperand(0);
1775  }
1776 
1777  AM.IndexReg = AM.Base_Reg = Reg;
1778  return false;
1779  }
1780  }
1781  break;
1782 
1783  case ISD::SUB: {
1784  // Given A-B, if A can be completely folded into the address and
1785  // the index field with the index field unused, use -B as the index.
1786  // This is a win if a has multiple parts that can be folded into
1787  // the address. Also, this saves a mov if the base register has
1788  // other uses, since it avoids a two-address sub instruction, however
1789  // it costs an additional mov if the index register has other uses.
1790 
1791  // Add an artificial use to this node so that we can keep track of
1792  // it if it gets CSE'd with a different node.
1793  HandleSDNode Handle(N);
1794 
1795  // Test if the LHS of the sub can be folded.
1796  X86ISelAddressMode Backup = AM;
1797  if (matchAddressRecursively(N.getOperand(0), AM, Depth+1)) {
1798  N = Handle.getValue();
1799  AM = Backup;
1800  break;
1801  }
1802  N = Handle.getValue();
1803  // Test if the index field is free for use.
1804  if (AM.IndexReg.getNode() || AM.isRIPRelative()) {
1805  AM = Backup;
1806  break;
1807  }
1808 
1809  int Cost = 0;
1810  SDValue RHS = N.getOperand(1);
1811  // If the RHS involves a register with multiple uses, this
1812  // transformation incurs an extra mov, due to the neg instruction
1813  // clobbering its operand.
1814  if (!RHS.getNode()->hasOneUse() ||
1815  RHS.getNode()->getOpcode() == ISD::CopyFromReg ||
1816  RHS.getNode()->getOpcode() == ISD::TRUNCATE ||
1817  RHS.getNode()->getOpcode() == ISD::ANY_EXTEND ||
1818  (RHS.getNode()->getOpcode() == ISD::ZERO_EXTEND &&
1819  RHS.getOperand(0).getValueType() == MVT::i32))
1820  ++Cost;
1821  // If the base is a register with multiple uses, this
1822  // transformation may save a mov.
1823  if ((AM.BaseType == X86ISelAddressMode::RegBase && AM.Base_Reg.getNode() &&
1824  !AM.Base_Reg.getNode()->hasOneUse()) ||
1825  AM.BaseType == X86ISelAddressMode::FrameIndexBase)
1826  --Cost;
1827  // If the folded LHS was interesting, this transformation saves
1828  // address arithmetic.
1829  if ((AM.hasSymbolicDisplacement() && !Backup.hasSymbolicDisplacement()) +
1830  ((AM.Disp != 0) && (Backup.Disp == 0)) +
1831  (AM.Segment.getNode() && !Backup.Segment.getNode()) >= 2)
1832  --Cost;
1833  // If it doesn't look like it may be an overall win, don't do it.
1834  if (Cost >= 0) {
1835  AM = Backup;
1836  break;
1837  }
1838 
1839  // Ok, the transformation is legal and appears profitable. Go for it.
1840  SDValue Zero = CurDAG->getConstant(0, dl, N.getValueType());
1841  SDValue Neg = CurDAG->getNode(ISD::SUB, dl, N.getValueType(), Zero, RHS);
1842  AM.IndexReg = Neg;
1843  AM.Scale = 1;
1844 
1845  // Insert the new nodes into the topological ordering.
1846  insertDAGNode(*CurDAG, N, Zero);
1847  insertDAGNode(*CurDAG, N, Neg);
1848  return false;
1849  }
1850 
1851  case ISD::ADD:
1852  if (!matchAdd(N, AM, Depth))
1853  return false;
1854  break;
1855 
1856  case ISD::OR:
1857  // We want to look through a transform in InstCombine and DAGCombiner that
1858  // turns 'add' into 'or', so we can treat this 'or' exactly like an 'add'.
1859  // Example: (or (and x, 1), (shl y, 3)) --> (add (and x, 1), (shl y, 3))
1860  // An 'lea' can then be used to match the shift (multiply) and add:
1861  // and $1, %esi
1862  // lea (%rsi, %rdi, 8), %rax
1863  if (CurDAG->haveNoCommonBitsSet(N.getOperand(0), N.getOperand(1)) &&
1864  !matchAdd(N, AM, Depth))
1865  return false;
1866  break;
1867 
1868  case ISD::AND: {
1869  // Perform some heroic transforms on an and of a constant-count shift
1870  // with a constant to enable use of the scaled offset field.
1871 
1872  // Scale must not be used already.
1873  if (AM.IndexReg.getNode() != nullptr || AM.Scale != 1) break;
1874 
1875  // We only handle up to 64-bit values here as those are what matter for
1876  // addressing mode optimizations.
1877  assert(N.getSimpleValueType().getSizeInBits() <= 64 &&
1878  "Unexpected value size!");
1879 
1880  if (!isa<ConstantSDNode>(N.getOperand(1)))
1881  break;
1882 
1883  if (N.getOperand(0).getOpcode() == ISD::SRL) {
1884  SDValue Shift = N.getOperand(0);
1885  SDValue X = Shift.getOperand(0);
1886 
1887  uint64_t Mask = N.getConstantOperandVal(1);
1888 
1889  // Try to fold the mask and shift into an extract and scale.
1890  if (!foldMaskAndShiftToExtract(*CurDAG, N, Mask, Shift, X, AM))
1891  return false;
1892 
1893  // Try to fold the mask and shift directly into the scale.
1894  if (!foldMaskAndShiftToScale(*CurDAG, N, Mask, Shift, X, AM))
1895  return false;
1896 
1897  // Try to fold the mask and shift into BEXTR and scale.
1898  if (!foldMaskedShiftToBEXTR(*CurDAG, N, Mask, Shift, X, AM, *Subtarget))
1899  return false;
1900  }
1901 
1902  // Try to swap the mask and shift to place shifts which can be done as
1903  // a scale on the outside of the mask.
1904  if (!foldMaskedShiftToScaledMask(*CurDAG, N, AM))
1905  return false;
1906 
1907  break;
1908  }
1909  case ISD::ZERO_EXTEND: {
1910  // Try to widen a zexted shift left to the same size as its use, so we can
1911  // match the shift as a scale factor.
1912  if (AM.IndexReg.getNode() != nullptr || AM.Scale != 1)
1913  break;
1914  if (N.getOperand(0).getOpcode() != ISD::SHL || !N.getOperand(0).hasOneUse())
1915  break;
1916 
1917  // Give up if the shift is not a valid scale factor [1,2,3].
1918  SDValue Shl = N.getOperand(0);
1919  auto *ShAmtC = dyn_cast<ConstantSDNode>(Shl.getOperand(1));
1920  if (!ShAmtC || ShAmtC->getZExtValue() > 3)
1921  break;
1922 
1923  // The narrow shift must only shift out zero bits (it must be 'nuw').
1924  // That makes it safe to widen to the destination type.
1926  ShAmtC->getZExtValue());
1927  if (!CurDAG->MaskedValueIsZero(Shl.getOperand(0), HighZeros))
1928  break;
1929 
1930  // zext (shl nuw i8 %x, C) to i32 --> shl (zext i8 %x to i32), (zext C)
1931  MVT VT = N.getSimpleValueType();
1932  SDLoc DL(N);
1933  SDValue Zext = CurDAG->getNode(ISD::ZERO_EXTEND, DL, VT, Shl.getOperand(0));
1934  SDValue NewShl = CurDAG->getNode(ISD::SHL, DL, VT, Zext, Shl.getOperand(1));
1935 
1936  // Convert the shift to scale factor.
1937  AM.Scale = 1 << ShAmtC->getZExtValue();
1938  AM.IndexReg = Zext;
1939 
1940  insertDAGNode(*CurDAG, N, Zext);
1941  insertDAGNode(*CurDAG, N, NewShl);
1942  CurDAG->ReplaceAllUsesWith(N, NewShl);
1943  return false;
1944  }
1945  }
1946 
1947  return matchAddressBase(N, AM);
1948 }
1949 
1950 /// Helper for MatchAddress. Add the specified node to the
1951 /// specified addressing mode without any further recursion.
1952 bool X86DAGToDAGISel::matchAddressBase(SDValue N, X86ISelAddressMode &AM) {
1953  // Is the base register already occupied?
1954  if (AM.BaseType != X86ISelAddressMode::RegBase || AM.Base_Reg.getNode()) {
1955  // If so, check to see if the scale index register is set.
1956  if (!AM.IndexReg.getNode()) {
1957  AM.IndexReg = N;
1958  AM.Scale = 1;
1959  return false;
1960  }
1961 
1962  // Otherwise, we cannot select it.
1963  return true;
1964  }
1965 
1966  // Default, generate it as a register.
1967  AM.BaseType = X86ISelAddressMode::RegBase;
1968  AM.Base_Reg = N;
1969  return false;
1970 }
1971 
1972 /// Helper for selectVectorAddr. Handles things that can be folded into a
1973 /// gather scatter address. The index register and scale should have already
1974 /// been handled.
1975 bool X86DAGToDAGISel::matchVectorAddress(SDValue N, X86ISelAddressMode &AM) {
1976  // TODO: Support other operations.
1977  switch (N.getOpcode()) {
1978  case ISD::Constant: {
1979  uint64_t Val = cast<ConstantSDNode>(N)->getSExtValue();
1980  if (!foldOffsetIntoAddress(Val, AM))
1981  return false;
1982  break;
1983  }
1984  case X86ISD::Wrapper:
1985  if (!matchWrapper(N, AM))
1986  return false;
1987  break;
1988  }
1989 
1990  return matchAddressBase(N, AM);
1991 }
1992 
1993 bool X86DAGToDAGISel::selectVectorAddr(SDNode *Parent, SDValue N, SDValue &Base,
1994  SDValue &Scale, SDValue &Index,
1995  SDValue &Disp, SDValue &Segment) {
1996  X86ISelAddressMode AM;
1997  auto *Mgs = cast<X86MaskedGatherScatterSDNode>(Parent);
1998  AM.IndexReg = Mgs->getIndex();
1999  AM.Scale = cast<ConstantSDNode>(Mgs->getScale())->getZExtValue();
2000 
2001  unsigned AddrSpace = cast<MemSDNode>(Parent)->getPointerInfo().getAddrSpace();
2002  // AddrSpace 256 -> GS, 257 -> FS, 258 -> SS.
2003  if (AddrSpace == 256)
2004  AM.Segment = CurDAG->getRegister(X86::GS, MVT::i16);
2005  if (AddrSpace == 257)
2006  AM.Segment = CurDAG->getRegister(X86::FS, MVT::i16);
2007  if (AddrSpace == 258)
2008  AM.Segment = CurDAG->getRegister(X86::SS, MVT::i16);
2009 
2010  // Try to match into the base and displacement fields.
2011  if (matchVectorAddress(N, AM))
2012  return false;
2013 
2014  MVT VT = N.getSimpleValueType();
2015  if (AM.BaseType == X86ISelAddressMode::RegBase) {
2016  if (!AM.Base_Reg.getNode())
2017  AM.Base_Reg = CurDAG->getRegister(0, VT);
2018  }
2019 
2020  getAddressOperands(AM, SDLoc(N), Base, Scale, Index, Disp, Segment);
2021  return true;
2022 }
2023 
2024 /// Returns true if it is able to pattern match an addressing mode.
2025 /// It returns the operands which make up the maximal addressing mode it can
2026 /// match by reference.
2027 ///
2028 /// Parent is the parent node of the addr operand that is being matched. It
2029 /// is always a load, store, atomic node, or null. It is only null when
2030 /// checking memory operands for inline asm nodes.
2031 bool X86DAGToDAGISel::selectAddr(SDNode *Parent, SDValue N, SDValue &Base,
2032  SDValue &Scale, SDValue &Index,
2033  SDValue &Disp, SDValue &Segment) {
2034  X86ISelAddressMode AM;
2035 
2036  if (Parent &&
2037  // This list of opcodes are all the nodes that have an "addr:$ptr" operand
2038  // that are not a MemSDNode, and thus don't have proper addrspace info.
2039  Parent->getOpcode() != ISD::INTRINSIC_W_CHAIN && // unaligned loads, fixme
2040  Parent->getOpcode() != ISD::INTRINSIC_VOID && // nontemporal stores
2041  Parent->getOpcode() != X86ISD::TLSCALL && // Fixme
2042  Parent->getOpcode() != X86ISD::EH_SJLJ_SETJMP && // setjmp
2043  Parent->getOpcode() != X86ISD::EH_SJLJ_LONGJMP) { // longjmp
2044  unsigned AddrSpace =
2045  cast<MemSDNode>(Parent)->getPointerInfo().getAddrSpace();
2046  // AddrSpace 256 -> GS, 257 -> FS, 258 -> SS.
2047  if (AddrSpace == 256)
2048  AM.Segment = CurDAG->getRegister(X86::GS, MVT::i16);
2049  if (AddrSpace == 257)
2050  AM.Segment = CurDAG->getRegister(X86::FS, MVT::i16);
2051  if (AddrSpace == 258)
2052  AM.Segment = CurDAG->getRegister(X86::SS, MVT::i16);
2053  }
2054 
2055  if (matchAddress(N, AM))
2056  return false;
2057 
2058  MVT VT = N.getSimpleValueType();
2059  if (AM.BaseType == X86ISelAddressMode::RegBase) {
2060  if (!AM.Base_Reg.getNode())
2061  AM.Base_Reg = CurDAG->getRegister(0, VT);
2062  }
2063 
2064  if (!AM.IndexReg.getNode())
2065  AM.IndexReg = CurDAG->getRegister(0, VT);
2066 
2067  getAddressOperands(AM, SDLoc(N), Base, Scale, Index, Disp, Segment);
2068  return true;
2069 }
2070 
2071 // We can only fold a load if all nodes between it and the root node have a
2072 // single use. If there are additional uses, we could end up duplicating the
2073 // load.
2074 static bool hasSingleUsesFromRoot(SDNode *Root, SDNode *User) {
2075  while (User != Root) {
2076  if (!User->hasOneUse())
2077  return false;
2078  User = *User->use_begin();
2079  }
2080 
2081  return true;
2082 }
2083 
2084 /// Match a scalar SSE load. In particular, we want to match a load whose top
2085 /// elements are either undef or zeros. The load flavor is derived from the
2086 /// type of N, which is either v4f32 or v2f64.
2087 ///
2088 /// We also return:
2089 /// PatternChainNode: this is the matched node that has a chain input and
2090 /// output.
2091 bool X86DAGToDAGISel::selectScalarSSELoad(SDNode *Root, SDNode *Parent,
2092  SDValue N, SDValue &Base,
2093  SDValue &Scale, SDValue &Index,
2094  SDValue &Disp, SDValue &Segment,
2095  SDValue &PatternNodeWithChain) {
2096  if (!hasSingleUsesFromRoot(Root, Parent))
2097  return false;
2098 
2099  // We can allow a full vector load here since narrowing a load is ok.
2100  if (ISD::isNON_EXTLoad(N.getNode())) {
2101  PatternNodeWithChain = N;
2102  if (IsProfitableToFold(PatternNodeWithChain, N.getNode(), Root) &&
2103  IsLegalToFold(PatternNodeWithChain, Parent, Root, OptLevel)) {
2104  LoadSDNode *LD = cast<LoadSDNode>(PatternNodeWithChain);
2105  return selectAddr(LD, LD->getBasePtr(), Base, Scale, Index, Disp,
2106  Segment);
2107  }
2108  }
2109 
2110  // We can also match the special zero extended load opcode.
2111  if (N.getOpcode() == X86ISD::VZEXT_LOAD) {
2112  PatternNodeWithChain = N;
2113  if (IsProfitableToFold(PatternNodeWithChain, N.getNode(), Root) &&
2114  IsLegalToFold(PatternNodeWithChain, Parent, Root, OptLevel)) {
2115  auto *MI = cast<MemIntrinsicSDNode>(PatternNodeWithChain);
2116  return selectAddr(MI, MI->getBasePtr(), Base, Scale, Index, Disp,
2117  Segment);
2118  }
2119  }
2120 
2121  // Need to make sure that the SCALAR_TO_VECTOR and load are both only used
2122  // once. Otherwise the load might get duplicated and the chain output of the
2123  // duplicate load will not be observed by all dependencies.
2124  if (N.getOpcode() == ISD::SCALAR_TO_VECTOR && N.getNode()->hasOneUse()) {
2125  PatternNodeWithChain = N.getOperand(0);
2126  if (ISD::isNON_EXTLoad(PatternNodeWithChain.getNode()) &&
2127  IsProfitableToFold(PatternNodeWithChain, N.getNode(), Root) &&
2128  IsLegalToFold(PatternNodeWithChain, N.getNode(), Root, OptLevel)) {
2129  LoadSDNode *LD = cast<LoadSDNode>(PatternNodeWithChain);
2130  return selectAddr(LD, LD->getBasePtr(), Base, Scale, Index, Disp,
2131  Segment);
2132  }
2133  }
2134 
2135  // Also handle the case where we explicitly require zeros in the top
2136  // elements. This is a vector shuffle from the zero vector.
2137  if (N.getOpcode() == X86ISD::VZEXT_MOVL && N.getNode()->hasOneUse() &&
2138  // Check to see if the top elements are all zeros (or bitcast of zeros).
2140  N.getOperand(0).getNode()->hasOneUse()) {
2141  PatternNodeWithChain = N.getOperand(0).getOperand(0);
2142  if (ISD::isNON_EXTLoad(PatternNodeWithChain.getNode()) &&
2143  IsProfitableToFold(PatternNodeWithChain, N.getNode(), Root) &&
2144  IsLegalToFold(PatternNodeWithChain, N.getNode(), Root, OptLevel)) {
2145  // Okay, this is a zero extending load. Fold it.
2146  LoadSDNode *LD = cast<LoadSDNode>(PatternNodeWithChain);
2147  return selectAddr(LD, LD->getBasePtr(), Base, Scale, Index, Disp,
2148  Segment);
2149  }
2150  }
2151 
2152  return false;
2153 }
2154 
2155 
2156 bool X86DAGToDAGISel::selectMOV64Imm32(SDValue N, SDValue &Imm) {
2157  if (const ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) {
2158  uint64_t ImmVal = CN->getZExtValue();
2159  if (!isUInt<32>(ImmVal))
2160  return false;
2161 
2162  Imm = CurDAG->getTargetConstant(ImmVal, SDLoc(N), MVT::i64);
2163  return true;
2164  }
2165 
2166  // In static codegen with small code model, we can get the address of a label
2167  // into a register with 'movl'
2168  if (N->getOpcode() != X86ISD::Wrapper)
2169  return false;
2170 
2171  N = N.getOperand(0);
2172 
2173  // At least GNU as does not accept 'movl' for TPOFF relocations.
2174  // FIXME: We could use 'movl' when we know we are targeting MC.
2176  return false;
2177 
2178  Imm = N;
2179  if (N->getOpcode() != ISD::TargetGlobalAddress)
2180  return TM.getCodeModel() == CodeModel::Small;
2181 
2183  cast<GlobalAddressSDNode>(N)->getGlobal()->getAbsoluteSymbolRange();
2184  if (!CR)
2185  return TM.getCodeModel() == CodeModel::Small;
2186 
2187  return CR->getUnsignedMax().ult(1ull << 32);
2188 }
2189 
2190 bool X86DAGToDAGISel::selectLEA64_32Addr(SDValue N, SDValue &Base,
2191  SDValue &Scale, SDValue &Index,
2192  SDValue &Disp, SDValue &Segment) {
2193  // Save the debug loc before calling selectLEAAddr, in case it invalidates N.
2194  SDLoc DL(N);
2195 
2196  if (!selectLEAAddr(N, Base, Scale, Index, Disp, Segment))
2197  return false;
2198 
2200  if (RN && RN->getReg() == 0)
2201  Base = CurDAG->getRegister(0, MVT::i64);
2202  else if (Base.getValueType() == MVT::i32 && !isa<FrameIndexSDNode>(Base)) {
2203  // Base could already be %rip, particularly in the x32 ABI.
2204  SDValue ImplDef = SDValue(CurDAG->getMachineNode(X86::IMPLICIT_DEF, DL,
2205  MVT::i64), 0);
2206  Base = CurDAG->getTargetInsertSubreg(X86::sub_32bit, DL, MVT::i64, ImplDef,
2207  Base);
2208  }
2209 
2210  RN = dyn_cast<RegisterSDNode>(Index);
2211  if (RN && RN->getReg() == 0)
2212  Index = CurDAG->getRegister(0, MVT::i64);
2213  else {
2214  assert(Index.getValueType() == MVT::i32 &&
2215  "Expect to be extending 32-bit registers for use in LEA");
2216  SDValue ImplDef = SDValue(CurDAG->getMachineNode(X86::IMPLICIT_DEF, DL,
2217  MVT::i64), 0);
2218  Index = CurDAG->getTargetInsertSubreg(X86::sub_32bit, DL, MVT::i64, ImplDef,
2219  Index);
2220  }
2221 
2222  return true;
2223 }
2224 
2225 /// Calls SelectAddr and determines if the maximal addressing
2226 /// mode it matches can be cost effectively emitted as an LEA instruction.
2227 bool X86DAGToDAGISel::selectLEAAddr(SDValue N,
2228  SDValue &Base, SDValue &Scale,
2229  SDValue &Index, SDValue &Disp,
2230  SDValue &Segment) {
2231  X86ISelAddressMode AM;
2232 
2233  // Save the DL and VT before calling matchAddress, it can invalidate N.
2234  SDLoc DL(N);
2235  MVT VT = N.getSimpleValueType();
2236 
2237  // Set AM.Segment to prevent MatchAddress from using one. LEA doesn't support
2238  // segments.
2239  SDValue Copy = AM.Segment;
2240  SDValue T = CurDAG->getRegister(0, MVT::i32);
2241  AM.Segment = T;
2242  if (matchAddress(N, AM))
2243  return false;
2244  assert (T == AM.Segment);
2245  AM.Segment = Copy;
2246 
2247  unsigned Complexity = 0;
2248  if (AM.BaseType == X86ISelAddressMode::RegBase)
2249  if (AM.Base_Reg.getNode())
2250  Complexity = 1;
2251  else
2252  AM.Base_Reg = CurDAG->getRegister(0, VT);
2253  else if (AM.BaseType == X86ISelAddressMode::FrameIndexBase)
2254  Complexity = 4;
2255 
2256  if (AM.IndexReg.getNode())
2257  Complexity++;
2258  else
2259  AM.IndexReg = CurDAG->getRegister(0, VT);
2260 
2261  // Don't match just leal(,%reg,2). It's cheaper to do addl %reg, %reg, or with
2262  // a simple shift.
2263  if (AM.Scale > 1)
2264  Complexity++;
2265 
2266  // FIXME: We are artificially lowering the criteria to turn ADD %reg, $GA
2267  // to a LEA. This is determined with some experimentation but is by no means
2268  // optimal (especially for code size consideration). LEA is nice because of
2269  // its three-address nature. Tweak the cost function again when we can run
2270  // convertToThreeAddress() at register allocation time.
2271  if (AM.hasSymbolicDisplacement()) {
2272  // For X86-64, always use LEA to materialize RIP-relative addresses.
2273  if (Subtarget->is64Bit())
2274  Complexity = 4;
2275  else
2276  Complexity += 2;
2277  }
2278 
2279  if (AM.Disp && (AM.Base_Reg.getNode() || AM.IndexReg.getNode()))
2280  Complexity++;
2281 
2282  // If it isn't worth using an LEA, reject it.
2283  if (Complexity <= 2)
2284  return false;
2285 
2286  getAddressOperands(AM, DL, Base, Scale, Index, Disp, Segment);
2287  return true;
2288 }
2289 
2290 /// This is only run on TargetGlobalTLSAddress nodes.
2291 bool X86DAGToDAGISel::selectTLSADDRAddr(SDValue N, SDValue &Base,
2292  SDValue &Scale, SDValue &Index,
2293  SDValue &Disp, SDValue &Segment) {
2295  const GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(N);
2296 
2297  X86ISelAddressMode AM;
2298  AM.GV = GA->getGlobal();
2299  AM.Disp += GA->getOffset();
2300  AM.Base_Reg = CurDAG->getRegister(0, N.getValueType());
2301  AM.SymbolFlags = GA->getTargetFlags();
2302 
2303  if (N.getValueType() == MVT::i32) {
2304  AM.Scale = 1;
2305  AM.IndexReg = CurDAG->getRegister(X86::EBX, MVT::i32);
2306  } else {
2307  AM.IndexReg = CurDAG->getRegister(0, MVT::i64);
2308  }
2309 
2310  getAddressOperands(AM, SDLoc(N), Base, Scale, Index, Disp, Segment);
2311  return true;
2312 }
2313 
2314 bool X86DAGToDAGISel::selectRelocImm(SDValue N, SDValue &Op) {
2315  if (auto *CN = dyn_cast<ConstantSDNode>(N)) {
2316  Op = CurDAG->getTargetConstant(CN->getAPIntValue(), SDLoc(CN),
2317  N.getValueType());
2318  return true;
2319  }
2320 
2321  // Keep track of the original value type and whether this value was
2322  // truncated. If we see a truncation from pointer type to VT that truncates
2323  // bits that are known to be zero, we can use a narrow reference.
2324  EVT VT = N.getValueType();
2325  bool WasTruncated = false;
2326  if (N.getOpcode() == ISD::TRUNCATE) {
2327  WasTruncated = true;
2328  N = N.getOperand(0);
2329  }
2330 
2331  if (N.getOpcode() != X86ISD::Wrapper)
2332  return false;
2333 
2334  // We can only use non-GlobalValues as immediates if they were not truncated,
2335  // as we do not have any range information. If we have a GlobalValue and the
2336  // address was not truncated, we can select it as an operand directly.
2337  unsigned Opc = N.getOperand(0)->getOpcode();
2338  if (Opc != ISD::TargetGlobalAddress || !WasTruncated) {
2339  Op = N.getOperand(0);
2340  // We can only select the operand directly if we didn't have to look past a
2341  // truncate.
2342  return !WasTruncated;
2343  }
2344 
2345  // Check that the global's range fits into VT.
2346  auto *GA = cast<GlobalAddressSDNode>(N.getOperand(0));
2347  Optional<ConstantRange> CR = GA->getGlobal()->getAbsoluteSymbolRange();
2348  if (!CR || CR->getUnsignedMax().uge(1ull << VT.getSizeInBits()))
2349  return false;
2350 
2351  // Okay, we can use a narrow reference.
2352  Op = CurDAG->getTargetGlobalAddress(GA->getGlobal(), SDLoc(N), VT,
2353  GA->getOffset(), GA->getTargetFlags());
2354  return true;
2355 }
2356 
2357 bool X86DAGToDAGISel::tryFoldLoad(SDNode *Root, SDNode *P, SDValue N,
2358  SDValue &Base, SDValue &Scale,
2359  SDValue &Index, SDValue &Disp,
2360  SDValue &Segment) {
2361  if (!ISD::isNON_EXTLoad(N.getNode()) ||
2362  !IsProfitableToFold(N, P, Root) ||
2363  !IsLegalToFold(N, P, Root, OptLevel))
2364  return false;
2365 
2366  return selectAddr(N.getNode(),
2367  N.getOperand(1), Base, Scale, Index, Disp, Segment);
2368 }
2369 
2370 /// Return an SDNode that returns the value of the global base register.
2371 /// Output instructions required to initialize the global base register,
2372 /// if necessary.
2373 SDNode *X86DAGToDAGISel::getGlobalBaseReg() {
2374  unsigned GlobalBaseReg = getInstrInfo()->getGlobalBaseReg(MF);
2375  auto &DL = MF->getDataLayout();
2376  return CurDAG->getRegister(GlobalBaseReg, TLI->getPointerTy(DL)).getNode();
2377 }
2378 
2379 bool X86DAGToDAGISel::isSExtAbsoluteSymbolRef(unsigned Width, SDNode *N) const {
2380  if (N->getOpcode() == ISD::TRUNCATE)
2381  N = N->getOperand(0).getNode();
2382  if (N->getOpcode() != X86ISD::Wrapper)
2383  return false;
2384 
2385  auto *GA = dyn_cast<GlobalAddressSDNode>(N->getOperand(0));
2386  if (!GA)
2387  return false;
2388 
2389  Optional<ConstantRange> CR = GA->getGlobal()->getAbsoluteSymbolRange();
2390  return CR && CR->getSignedMin().sge(-1ull << Width) &&
2391  CR->getSignedMax().slt(1ull << Width);
2392 }
2393 
2395  assert(N->isMachineOpcode() && "Unexpected node");
2397  unsigned Opc = N->getMachineOpcode();
2398  if (Opc == X86::JCC_1)
2399  CC = static_cast<X86::CondCode>(N->getConstantOperandVal(1));
2400  else if (Opc == X86::SETCCr)
2401  CC = static_cast<X86::CondCode>(N->getConstantOperandVal(0));
2402  else if (Opc == X86::SETCCm)
2403  CC = static_cast<X86::CondCode>(N->getConstantOperandVal(5));
2404  else if (Opc == X86::CMOV16rr || Opc == X86::CMOV32rr ||
2405  Opc == X86::CMOV64rr)
2406  CC = static_cast<X86::CondCode>(N->getConstantOperandVal(2));
2407  else if (Opc == X86::CMOV16rm || Opc == X86::CMOV32rm ||
2408  Opc == X86::CMOV64rm)
2409  CC = static_cast<X86::CondCode>(N->getConstantOperandVal(6));
2410 
2411  return CC;
2412 }
2413 
2414 /// Test whether the given X86ISD::CMP node has any users that use a flag
2415 /// other than ZF.
2416 bool X86DAGToDAGISel::onlyUsesZeroFlag(SDValue Flags) const {
2417  // Examine each user of the node.
2418  for (SDNode::use_iterator UI = Flags->use_begin(), UE = Flags->use_end();
2419  UI != UE; ++UI) {
2420  // Only check things that use the flags.
2421  if (UI.getUse().getResNo() != Flags.getResNo())
2422  continue;
2423  // Only examine CopyToReg uses that copy to EFLAGS.
2424  if (UI->getOpcode() != ISD::CopyToReg ||
2425  cast<RegisterSDNode>(UI->getOperand(1))->getReg() != X86::EFLAGS)
2426  return false;
2427  // Examine each user of the CopyToReg use.
2428  for (SDNode::use_iterator FlagUI = UI->use_begin(),
2429  FlagUE = UI->use_end(); FlagUI != FlagUE; ++FlagUI) {
2430  // Only examine the Flag result.
2431  if (FlagUI.getUse().getResNo() != 1) continue;
2432  // Anything unusual: assume conservatively.
2433  if (!FlagUI->isMachineOpcode()) return false;
2434  // Examine the condition code of the user.
2435  X86::CondCode CC = getCondFromNode(*FlagUI);
2436 
2437  switch (CC) {
2438  // Comparisons which only use the zero flag.
2439  case X86::COND_E: case X86::COND_NE:
2440  continue;
2441  // Anything else: assume conservatively.
2442  default:
2443  return false;
2444  }
2445  }
2446  }
2447  return true;
2448 }
2449 
2450 /// Test whether the given X86ISD::CMP node has any uses which require the SF
2451 /// flag to be accurate.
2452 bool X86DAGToDAGISel::hasNoSignFlagUses(SDValue Flags) const {
2453  // Examine each user of the node.
2454  for (SDNode::use_iterator UI = Flags->use_begin(), UE = Flags->use_end();
2455  UI != UE; ++UI) {
2456  // Only check things that use the flags.
2457  if (UI.getUse().getResNo() != Flags.getResNo())
2458  continue;
2459  // Only examine CopyToReg uses that copy to EFLAGS.
2460  if (UI->getOpcode() != ISD::CopyToReg ||
2461  cast<RegisterSDNode>(UI->getOperand(1))->getReg() != X86::EFLAGS)
2462  return false;
2463  // Examine each user of the CopyToReg use.
2464  for (SDNode::use_iterator FlagUI = UI->use_begin(),
2465  FlagUE = UI->use_end(); FlagUI != FlagUE; ++FlagUI) {
2466  // Only examine the Flag result.
2467  if (FlagUI.getUse().getResNo() != 1) continue;
2468  // Anything unusual: assume conservatively.
2469  if (!FlagUI->isMachineOpcode()) return false;
2470  // Examine the condition code of the user.
2471  X86::CondCode CC = getCondFromNode(*FlagUI);
2472 
2473  switch (CC) {
2474  // Comparisons which don't examine the SF flag.
2475  case X86::COND_A: case X86::COND_AE:
2476  case X86::COND_B: case X86::COND_BE:
2477  case X86::COND_E: case X86::COND_NE:
2478  case X86::COND_O: case X86::COND_NO:
2479  case X86::COND_P: case X86::COND_NP:
2480  continue;
2481  // Anything else: assume conservatively.
2482  default:
2483  return false;
2484  }
2485  }
2486  }
2487  return true;
2488 }
2489 
2491  switch (CC) {
2492  // Comparisons which don't examine the CF flag.
2493  case X86::COND_O: case X86::COND_NO:
2494  case X86::COND_E: case X86::COND_NE:
2495  case X86::COND_S: case X86::COND_NS:
2496  case X86::COND_P: case X86::COND_NP:
2497  case X86::COND_L: case X86::COND_GE:
2498  case X86::COND_G: case X86::COND_LE:
2499  return false;
2500  // Anything else: assume conservatively.
2501  default:
2502  return true;
2503  }
2504 }
2505 
2506 /// Test whether the given node which sets flags has any uses which require the
2507 /// CF flag to be accurate.
2508  bool X86DAGToDAGISel::hasNoCarryFlagUses(SDValue Flags) const {
2509  // Examine each user of the node.
2510  for (SDNode::use_iterator UI = Flags->use_begin(), UE = Flags->use_end();
2511  UI != UE; ++UI) {
2512  // Only check things that use the flags.
2513  if (UI.getUse().getResNo() != Flags.getResNo())
2514  continue;
2515 
2516  unsigned UIOpc = UI->getOpcode();
2517 
2518  if (UIOpc == ISD::CopyToReg) {
2519  // Only examine CopyToReg uses that copy to EFLAGS.
2520  if (cast<RegisterSDNode>(UI->getOperand(1))->getReg() != X86::EFLAGS)
2521  return false;
2522  // Examine each user of the CopyToReg use.
2523  for (SDNode::use_iterator FlagUI = UI->use_begin(), FlagUE = UI->use_end();
2524  FlagUI != FlagUE; ++FlagUI) {
2525  // Only examine the Flag result.
2526  if (FlagUI.getUse().getResNo() != 1)
2527  continue;
2528  // Anything unusual: assume conservatively.
2529  if (!FlagUI->isMachineOpcode())
2530  return false;
2531  // Examine the condition code of the user.
2532  X86::CondCode CC = getCondFromNode(*FlagUI);
2533 
2534  if (mayUseCarryFlag(CC))
2535  return false;
2536  }
2537 
2538  // This CopyToReg is ok. Move on to the next user.
2539  continue;
2540  }
2541 
2542  // This might be an unselected node. So look for the pre-isel opcodes that
2543  // use flags.
2544  unsigned CCOpNo;
2545  switch (UIOpc) {
2546  default:
2547  // Something unusual. Be conservative.
2548  return false;
2549  case X86ISD::SETCC: CCOpNo = 0; break;
2550  case X86ISD::SETCC_CARRY: CCOpNo = 0; break;
2551  case X86ISD::CMOV: CCOpNo = 2; break;
2552  case X86ISD::BRCOND: CCOpNo = 2; break;
2553  }
2554 
2555  X86::CondCode CC = (X86::CondCode)UI->getConstantOperandVal(CCOpNo);
2556  if (mayUseCarryFlag(CC))
2557  return false;
2558  }
2559  return true;
2560 }
2561 
2562 /// Check whether or not the chain ending in StoreNode is suitable for doing
2563 /// the {load; op; store} to modify transformation.
2565  SDValue StoredVal, SelectionDAG *CurDAG,
2566  unsigned LoadOpNo,
2567  LoadSDNode *&LoadNode,
2568  SDValue &InputChain) {
2569  // Is the stored value result 0 of the operation?
2570  if (StoredVal.getResNo() != 0) return false;
2571 
2572  // Are there other uses of the operation other than the store?
2573  if (!StoredVal.getNode()->hasNUsesOfValue(1, 0)) return false;
2574 
2575  // Is the store non-extending and non-indexed?
2576  if (!ISD::isNormalStore(StoreNode) || StoreNode->isNonTemporal())
2577  return false;
2578 
2579  SDValue Load = StoredVal->getOperand(LoadOpNo);
2580  // Is the stored value a non-extending and non-indexed load?
2581  if (!ISD::isNormalLoad(Load.getNode())) return false;
2582 
2583  // Return LoadNode by reference.
2584  LoadNode = cast<LoadSDNode>(Load);
2585 
2586  // Is store the only read of the loaded value?
2587  if (!Load.hasOneUse())
2588  return false;
2589 
2590  // Is the address of the store the same as the load?
2591  if (LoadNode->getBasePtr() != StoreNode->getBasePtr() ||
2592  LoadNode->getOffset() != StoreNode->getOffset())
2593  return false;
2594 
2595  bool FoundLoad = false;
2596  SmallVector<SDValue, 4> ChainOps;
2597  SmallVector<const SDNode *, 4> LoopWorklist;
2599  const unsigned int Max = 1024;
2600 
2601  // Visualization of Load-Op-Store fusion:
2602  // -------------------------
2603  // Legend:
2604  // *-lines = Chain operand dependencies.
2605  // |-lines = Normal operand dependencies.
2606  // Dependencies flow down and right. n-suffix references multiple nodes.
2607  //
2608  // C Xn C
2609  // * * *
2610  // * * *
2611  // Xn A-LD Yn TF Yn
2612  // * * \ | * |
2613  // * * \ | * |
2614  // * * \ | => A--LD_OP_ST
2615  // * * \| \
2616  // TF OP \
2617  // * | \ Zn
2618  // * | \
2619  // A-ST Zn
2620  //
2621 
2622  // This merge induced dependences from: #1: Xn -> LD, OP, Zn
2623  // #2: Yn -> LD
2624  // #3: ST -> Zn
2625 
2626  // Ensure the transform is safe by checking for the dual
2627  // dependencies to make sure we do not induce a loop.
2628 
2629  // As LD is a predecessor to both OP and ST we can do this by checking:
2630  // a). if LD is a predecessor to a member of Xn or Yn.
2631  // b). if a Zn is a predecessor to ST.
2632 
2633  // However, (b) can only occur through being a chain predecessor to
2634  // ST, which is the same as Zn being a member or predecessor of Xn,
2635  // which is a subset of LD being a predecessor of Xn. So it's
2636  // subsumed by check (a).
2637 
2638  SDValue Chain = StoreNode->getChain();
2639 
2640  // Gather X elements in ChainOps.
2641  if (Chain == Load.getValue(1)) {
2642  FoundLoad = true;
2643  ChainOps.push_back(Load.getOperand(0));
2644  } else if (Chain.getOpcode() == ISD::TokenFactor) {
2645  for (unsigned i = 0, e = Chain.getNumOperands(); i != e; ++i) {
2646  SDValue Op = Chain.getOperand(i);
2647  if (Op == Load.getValue(1)) {
2648  FoundLoad = true;
2649  // Drop Load, but keep its chain. No cycle check necessary.
2650  ChainOps.push_back(Load.getOperand(0));
2651  continue;
2652  }
2653  LoopWorklist.push_back(Op.getNode());
2654  ChainOps.push_back(Op);
2655  }
2656  }
2657 
2658  if (!FoundLoad)
2659  return false;
2660 
2661  // Worklist is currently Xn. Add Yn to worklist.
2662  for (SDValue Op : StoredVal->ops())
2663  if (Op.getNode() != LoadNode)
2664  LoopWorklist.push_back(Op.getNode());
2665 
2666  // Check (a) if Load is a predecessor to Xn + Yn
2667  if (SDNode::hasPredecessorHelper(Load.getNode(), Visited, LoopWorklist, Max,
2668  true))
2669  return false;
2670 
2671  InputChain =
2672  CurDAG->getNode(ISD::TokenFactor, SDLoc(Chain), MVT::Other, ChainOps);
2673  return true;
2674 }
2675 
2676 // Change a chain of {load; op; store} of the same value into a simple op
2677 // through memory of that value, if the uses of the modified value and its
2678 // address are suitable.
2679 //
2680 // The tablegen pattern memory operand pattern is currently not able to match
2681 // the case where the EFLAGS on the original operation are used.
2682 //
2683 // To move this to tablegen, we'll need to improve tablegen to allow flags to
2684 // be transferred from a node in the pattern to the result node, probably with
2685 // a new keyword. For example, we have this
2686 // def DEC64m : RI<0xFF, MRM1m, (outs), (ins i64mem:$dst), "dec{q}\t$dst",
2687 // [(store (add (loadi64 addr:$dst), -1), addr:$dst),
2688 // (implicit EFLAGS)]>;
2689 // but maybe need something like this
2690 // def DEC64m : RI<0xFF, MRM1m, (outs), (ins i64mem:$dst), "dec{q}\t$dst",
2691 // [(store (add (loadi64 addr:$dst), -1), addr:$dst),
2692 // (transferrable EFLAGS)]>;
2693 //
2694 // Until then, we manually fold these and instruction select the operation
2695 // here.
2696 bool X86DAGToDAGISel::foldLoadStoreIntoMemOperand(SDNode *Node) {
2697  StoreSDNode *StoreNode = cast<StoreSDNode>(Node);
2698  SDValue StoredVal = StoreNode->getOperand(1);
2699  unsigned Opc = StoredVal->getOpcode();
2700 
2701  // Before we try to select anything, make sure this is memory operand size
2702  // and opcode we can handle. Note that this must match the code below that
2703  // actually lowers the opcodes.
2704  EVT MemVT = StoreNode->getMemoryVT();
2705  if (MemVT != MVT::i64 && MemVT != MVT::i32 && MemVT != MVT::i16 &&
2706  MemVT != MVT::i8)
2707  return false;
2708 
2709  bool IsCommutable = false;
2710  bool IsNegate = false;
2711  switch (Opc) {
2712  default:
2713  return false;
2714  case X86ISD::SUB:
2715  IsNegate = isNullConstant(StoredVal.getOperand(0));
2716  break;
2717  case X86ISD::SBB:
2718  break;
2719  case X86ISD::ADD:
2720  case X86ISD::ADC:
2721  case X86ISD::AND:
2722  case X86ISD::OR:
2723  case X86ISD::XOR:
2724  IsCommutable = true;
2725  break;
2726  }
2727 
2728  unsigned LoadOpNo = IsNegate ? 1 : 0;
2729  LoadSDNode *LoadNode = nullptr;
2730  SDValue InputChain;
2731  if (!isFusableLoadOpStorePattern(StoreNode, StoredVal, CurDAG, LoadOpNo,
2732  LoadNode, InputChain)) {
2733  if (!IsCommutable)
2734  return false;
2735 
2736  // This operation is commutable, try the other operand.
2737  LoadOpNo = 1;
2738  if (!isFusableLoadOpStorePattern(StoreNode, StoredVal, CurDAG, LoadOpNo,
2739  LoadNode, InputChain))
2740  return false;
2741  }
2742 
2743  SDValue Base, Scale, Index, Disp, Segment;
2744  if (!selectAddr(LoadNode, LoadNode->getBasePtr(), Base, Scale, Index, Disp,
2745  Segment))
2746  return false;
2747 
2748  auto SelectOpcode = [&](unsigned Opc64, unsigned Opc32, unsigned Opc16,
2749  unsigned Opc8) {
2750  switch (MemVT.getSimpleVT().SimpleTy) {
2751  case MVT::i64:
2752  return Opc64;
2753  case MVT::i32:
2754  return Opc32;
2755  case MVT::i16:
2756  return Opc16;
2757  case MVT::i8:
2758  return Opc8;
2759  default:
2760  llvm_unreachable("Invalid size!");
2761  }
2762  };
2763 
2764  MachineSDNode *Result;
2765  switch (Opc) {
2766  case X86ISD::SUB:
2767  // Handle negate.
2768  if (IsNegate) {
2769  unsigned NewOpc = SelectOpcode(X86::NEG64m, X86::NEG32m, X86::NEG16m,
2770  X86::NEG8m);
2771  const SDValue Ops[] = {Base, Scale, Index, Disp, Segment, InputChain};
2772  Result = CurDAG->getMachineNode(NewOpc, SDLoc(Node), MVT::i32,
2773  MVT::Other, Ops);
2774  break;
2775  }
2777  case X86ISD::ADD:
2778  // Try to match inc/dec.
2779  if (!Subtarget->slowIncDec() || OptForSize) {
2780  bool IsOne = isOneConstant(StoredVal.getOperand(1));
2781  bool IsNegOne = isAllOnesConstant(StoredVal.getOperand(1));
2782  // ADD/SUB with 1/-1 and carry flag isn't used can use inc/dec.
2783  if ((IsOne || IsNegOne) && hasNoCarryFlagUses(StoredVal.getValue(1))) {
2784  unsigned NewOpc =
2785  ((Opc == X86ISD::ADD) == IsOne)
2786  ? SelectOpcode(X86::INC64m, X86::INC32m, X86::INC16m, X86::INC8m)
2787  : SelectOpcode(X86::DEC64m, X86::DEC32m, X86::DEC16m, X86::DEC8m);
2788  const SDValue Ops[] = {Base, Scale, Index, Disp, Segment, InputChain};
2789  Result = CurDAG->getMachineNode(NewOpc, SDLoc(Node), MVT::i32,
2790  MVT::Other, Ops);
2791  break;
2792  }
2793  }
2795  case X86ISD::ADC:
2796  case X86ISD::SBB:
2797  case X86ISD::AND:
2798  case X86ISD::OR:
2799  case X86ISD::XOR: {
2800  auto SelectRegOpcode = [SelectOpcode](unsigned Opc) {
2801  switch (Opc) {
2802  case X86ISD::ADD:
2803  return SelectOpcode(X86::ADD64mr, X86::ADD32mr, X86::ADD16mr,
2804  X86::ADD8mr);
2805  case X86ISD::ADC:
2806  return SelectOpcode(X86::ADC64mr, X86::ADC32mr, X86::ADC16mr,
2807  X86::ADC8mr);
2808  case X86ISD::SUB:
2809  return SelectOpcode(X86::SUB64mr, X86::SUB32mr, X86::SUB16mr,
2810  X86::SUB8mr);
2811  case X86ISD::SBB:
2812  return SelectOpcode(X86::SBB64mr, X86::SBB32mr, X86::SBB16mr,
2813  X86::SBB8mr);
2814  case X86ISD::AND:
2815  return SelectOpcode(X86::AND64mr, X86::AND32mr, X86::AND16mr,
2816  X86::AND8mr);
2817  case X86ISD::OR:
2818  return SelectOpcode(X86::OR64mr, X86::OR32mr, X86::OR16mr, X86::OR8mr);
2819  case X86ISD::XOR:
2820  return SelectOpcode(X86::XOR64mr, X86::XOR32mr, X86::XOR16mr,
2821  X86::XOR8mr);
2822  default:
2823  llvm_unreachable("Invalid opcode!");
2824  }
2825  };
2826  auto SelectImm8Opcode = [SelectOpcode](unsigned Opc) {
2827  switch (Opc) {
2828  case X86ISD::ADD:
2829  return SelectOpcode(X86::ADD64mi8, X86::ADD32mi8, X86::ADD16mi8, 0);
2830  case X86ISD::ADC:
2831  return SelectOpcode(X86::ADC64mi8, X86::ADC32mi8, X86::ADC16mi8, 0);
2832  case X86ISD::SUB:
2833  return SelectOpcode(X86::SUB64mi8, X86::SUB32mi8, X86::SUB16mi8, 0);
2834  case X86ISD::SBB:
2835  return SelectOpcode(X86::SBB64mi8, X86::SBB32mi8, X86::SBB16mi8, 0);
2836  case X86ISD::AND:
2837  return SelectOpcode(X86::AND64mi8, X86::AND32mi8, X86::AND16mi8, 0);
2838  case X86ISD::OR:
2839  return SelectOpcode(X86::OR64mi8, X86::OR32mi8, X86::OR16mi8, 0);
2840  case X86ISD::XOR:
2841  return SelectOpcode(X86::XOR64mi8, X86::XOR32mi8, X86::XOR16mi8, 0);
2842  default:
2843  llvm_unreachable("Invalid opcode!");
2844  }
2845  };
2846  auto SelectImmOpcode = [SelectOpcode](unsigned Opc) {
2847  switch (Opc) {
2848  case X86ISD::ADD:
2849  return SelectOpcode(X86::ADD64mi32, X86::ADD32mi, X86::ADD16mi,
2850  X86::ADD8mi);
2851  case X86ISD::ADC:
2852  return SelectOpcode(X86::ADC64mi32, X86::ADC32mi, X86::ADC16mi,
2853  X86::ADC8mi);
2854  case X86ISD::SUB:
2855  return SelectOpcode(X86::SUB64mi32, X86::SUB32mi, X86::SUB16mi,
2856  X86::SUB8mi);
2857  case X86ISD::SBB:
2858  return SelectOpcode(X86::SBB64mi32, X86::SBB32mi, X86::SBB16mi,
2859  X86::SBB8mi);
2860  case X86ISD::AND:
2861  return SelectOpcode(X86::AND64mi32, X86::AND32mi, X86::AND16mi,
2862  X86::AND8mi);
2863  case X86ISD::OR:
2864  return SelectOpcode(X86::OR64mi32, X86::OR32mi, X86::OR16mi,
2865  X86::OR8mi);
2866  case X86ISD::XOR:
2867  return SelectOpcode(X86::XOR64mi32, X86::XOR32mi, X86::XOR16mi,
2868  X86::XOR8mi);
2869  default:
2870  llvm_unreachable("Invalid opcode!");
2871  }
2872  };
2873 
2874  unsigned NewOpc = SelectRegOpcode(Opc);
2875  SDValue Operand = StoredVal->getOperand(1-LoadOpNo);
2876 
2877  // See if the operand is a constant that we can fold into an immediate
2878  // operand.
2879  if (auto *OperandC = dyn_cast<ConstantSDNode>(Operand)) {
2880  int64_t OperandV = OperandC->getSExtValue();
2881 
2882  // Check if we can shrink the operand enough to fit in an immediate (or
2883  // fit into a smaller immediate) by negating it and switching the
2884  // operation.
2885  if ((Opc == X86ISD::ADD || Opc == X86ISD::SUB) &&
2886  ((MemVT != MVT::i8 && !isInt<8>(OperandV) && isInt<8>(-OperandV)) ||
2887  (MemVT == MVT::i64 && !isInt<32>(OperandV) &&
2888  isInt<32>(-OperandV))) &&
2889  hasNoCarryFlagUses(StoredVal.getValue(1))) {
2890  OperandV = -OperandV;
2891  Opc = Opc == X86ISD::ADD ? X86ISD::SUB : X86ISD::ADD;
2892  }
2893 
2894  // First try to fit this into an Imm8 operand. If it doesn't fit, then try
2895  // the larger immediate operand.
2896  if (MemVT != MVT::i8 && isInt<8>(OperandV)) {
2897  Operand = CurDAG->getTargetConstant(OperandV, SDLoc(Node), MemVT);
2898  NewOpc = SelectImm8Opcode(Opc);
2899  } else if (MemVT != MVT::i64 || isInt<32>(OperandV)) {
2900  Operand = CurDAG->getTargetConstant(OperandV, SDLoc(Node), MemVT);
2901  NewOpc = SelectImmOpcode(Opc);
2902  }
2903  }
2904 
2905  if (Opc == X86ISD::ADC || Opc == X86ISD::SBB) {
2906  SDValue CopyTo =
2907  CurDAG->getCopyToReg(InputChain, SDLoc(Node), X86::EFLAGS,
2908  StoredVal.getOperand(2), SDValue());
2909 
2910  const SDValue Ops[] = {Base, Scale, Index, Disp,
2911  Segment, Operand, CopyTo, CopyTo.getValue(1)};
2912  Result = CurDAG->getMachineNode(NewOpc, SDLoc(Node), MVT::i32, MVT::Other,
2913  Ops);
2914  } else {
2915  const SDValue Ops[] = {Base, Scale, Index, Disp,
2916  Segment, Operand, InputChain};
2917  Result = CurDAG->getMachineNode(NewOpc, SDLoc(Node), MVT::i32, MVT::Other,
2918  Ops);
2919  }
2920  break;
2921  }
2922  default:
2923  llvm_unreachable("Invalid opcode!");
2924  }
2925 
2926  MachineMemOperand *MemOps[] = {StoreNode->getMemOperand(),
2927  LoadNode->getMemOperand()};
2928  CurDAG->setNodeMemRefs(Result, MemOps);
2929 
2930  // Update Load Chain uses as well.
2931  ReplaceUses(SDValue(LoadNode, 1), SDValue(Result, 1));
2932  ReplaceUses(SDValue(StoreNode, 0), SDValue(Result, 1));
2933  ReplaceUses(SDValue(StoredVal.getNode(), 1), SDValue(Result, 0));
2934  CurDAG->RemoveDeadNode(Node);
2935  return true;
2936 }
2937 
2938 // See if this is an X & Mask that we can match to BEXTR/BZHI.
2939 // Where Mask is one of the following patterns:
2940 // a) x & (1 << nbits) - 1
2941 // b) x & ~(-1 << nbits)
2942 // c) x & (-1 >> (32 - y))
2943 // d) x << (32 - y) >> (32 - y)
2944 bool X86DAGToDAGISel::matchBitExtract(SDNode *Node) {
2945  assert(
2946  (Node->getOpcode() == ISD::AND || Node->getOpcode() == ISD::SRL) &&
2947  "Should be either an and-mask, or right-shift after clearing high bits.");
2948 
2949  // BEXTR is BMI instruction, BZHI is BMI2 instruction. We need at least one.
2950  if (!Subtarget->hasBMI() && !Subtarget->hasBMI2())
2951  return false;
2952 
2953  MVT NVT = Node->getSimpleValueType(0);
2954 
2955  // Only supported for 32 and 64 bits.
2956  if (NVT != MVT::i32 && NVT != MVT::i64)
2957  return false;
2958 
2959  unsigned Size = NVT.getSizeInBits();
2960 
2961  SDValue NBits;
2962 
2963  // If we have BMI2's BZHI, we are ok with muti-use patterns.
2964  // Else, if we only have BMI1's BEXTR, we require one-use.
2965  const bool CanHaveExtraUses = Subtarget->hasBMI2();
2966  auto checkUses = [CanHaveExtraUses](SDValue Op, unsigned NUses) {
2967  return CanHaveExtraUses ||
2968  Op.getNode()->hasNUsesOfValue(NUses, Op.getResNo());
2969  };
2970  auto checkOneUse = [checkUses](SDValue Op) { return checkUses(Op, 1); };
2971  auto checkTwoUse = [checkUses](SDValue Op) { return checkUses(Op, 2); };
2972 
2973  // a) x & ((1 << nbits) + (-1))
2974  auto matchPatternA = [&checkOneUse, &NBits](SDValue Mask) -> bool {
2975  // Match `add`. Must only have one use!
2976  if (Mask->getOpcode() != ISD::ADD || !checkOneUse(Mask))
2977  return false;
2978  // We should be adding all-ones constant (i.e. subtracting one.)
2979  if (!isAllOnesConstant(Mask->getOperand(1)))
2980  return false;
2981  // Match `1 << nbits`. Must only have one use!
2982  SDValue M0 = Mask->getOperand(0);
2983  if (M0->getOpcode() != ISD::SHL || !checkOneUse(M0))
2984  return false;
2985  if (!isOneConstant(M0->getOperand(0)))
2986  return false;
2987  NBits = M0->getOperand(1);
2988  return true;
2989  };
2990 
2991  // b) x & ~(-1 << nbits)
2992  auto matchPatternB = [&checkOneUse, &NBits](SDValue Mask) -> bool {
2993  // Match `~()`. Must only have one use!
2994  if (!isBitwiseNot(Mask) || !checkOneUse(Mask))
2995  return false;
2996  // Match `-1 << nbits`. Must only have one use!
2997  SDValue M0 = Mask->getOperand(0);
2998  if (M0->getOpcode() != ISD::SHL || !checkOneUse(M0))
2999  return false;
3000  if (!isAllOnesConstant(M0->getOperand(0)))
3001  return false;
3002  NBits = M0->getOperand(1);
3003  return true;
3004  };
3005 
3006  // Match potentially-truncated (bitwidth - y)
3007  auto matchShiftAmt = [checkOneUse, Size, &NBits](SDValue ShiftAmt) {
3008  // Skip over a truncate of the shift amount.
3009  if (ShiftAmt.getOpcode() == ISD::TRUNCATE) {
3010  ShiftAmt = ShiftAmt.getOperand(0);
3011  // The trunc should have been the only user of the real shift amount.
3012  if (!checkOneUse(ShiftAmt))
3013  return false;
3014  }
3015  // Match the shift amount as: (bitwidth - y). It should go away, too.
3016  if (ShiftAmt.getOpcode() != ISD::SUB)
3017  return false;
3018  auto V0 = dyn_cast<ConstantSDNode>(ShiftAmt.getOperand(0));
3019  if (!V0 || V0->getZExtValue() != Size)
3020  return false;
3021  NBits = ShiftAmt.getOperand(1);
3022  return true;
3023  };
3024 
3025  // c) x & (-1 >> (32 - y))
3026  auto matchPatternC = [&checkOneUse, matchShiftAmt](SDValue Mask) -> bool {
3027  // Match `l>>`. Must only have one use!
3028  if (Mask.getOpcode() != ISD::SRL || !checkOneUse(Mask))
3029  return false;
3030  // We should be shifting all-ones constant.
3031  if (!isAllOnesConstant(Mask.getOperand(0)))
3032  return false;
3033  SDValue M1 = Mask.getOperand(1);
3034  // The shift amount should not be used externally.
3035  if (!checkOneUse(M1))
3036  return false;
3037  return matchShiftAmt(M1);
3038  };
3039 
3040  SDValue X;
3041 
3042  // d) x << (32 - y) >> (32 - y)
3043  auto matchPatternD = [&checkOneUse, &checkTwoUse, matchShiftAmt,
3044  &X](SDNode *Node) -> bool {
3045  if (Node->getOpcode() != ISD::SRL)
3046  return false;
3047  SDValue N0 = Node->getOperand(0);
3048  if (N0->getOpcode() != ISD::SHL || !checkOneUse(N0))
3049  return false;
3050  SDValue N1 = Node->getOperand(1);
3051  SDValue N01 = N0->getOperand(1);
3052  // Both of the shifts must be by the exact same value.
3053  // There should not be any uses of the shift amount outside of the pattern.
3054  if (N1 != N01 || !checkTwoUse(N1))
3055  return false;
3056  if (!matchShiftAmt(N1))
3057  return false;
3058  X = N0->getOperand(0);
3059  return true;
3060  };
3061 
3062  auto matchLowBitMask = [&matchPatternA, &matchPatternB,
3063  &matchPatternC](SDValue Mask) -> bool {
3064  // FIXME: pattern c.
3065  return matchPatternA(Mask) || matchPatternB(Mask) || matchPatternC(Mask);
3066  };
3067 
3068  if (Node->getOpcode() == ISD::AND) {
3069  X = Node->getOperand(0);
3070  SDValue Mask = Node->getOperand(1);
3071 
3072  if (matchLowBitMask(Mask)) {
3073  // Great.
3074  } else {
3075  std::swap(X, Mask);
3076  if (!matchLowBitMask(Mask))
3077  return false;
3078  }
3079  } else if (!matchPatternD(Node))
3080  return false;
3081 
3082  SDLoc DL(Node);
3083 
3084  // If we do *NOT* have BMI2, let's find out if the if the 'X' is *logically*
3085  // shifted (potentially with one-use trunc inbetween),
3086  // and if so look past one-use truncation.
3087  MVT XVT = NVT;
3088  if (!Subtarget->hasBMI2() && X.getOpcode() == ISD::TRUNCATE &&
3089  X.hasOneUse() && X.getOperand(0).getOpcode() == ISD::SRL) {
3090  assert(NVT == MVT::i32 && "Expected target valuetype to be i32");
3091  X = X.getOperand(0);
3092  XVT = X.getSimpleValueType();
3093  assert(XVT == MVT::i64 && "Expected truncation from i64");
3094  }
3095 
3096  // Truncate the shift amount.
3097  NBits = CurDAG->getNode(ISD::TRUNCATE, DL, MVT::i8, NBits);
3098  insertDAGNode(*CurDAG, SDValue(Node, 0), NBits);
3099 
3100  // Insert 8-bit NBits into lowest 8 bits of 32-bit register.
3101  // All the other bits are undefined, we do not care about them.
3102  SDValue ImplDef = SDValue(
3103  CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, MVT::i32), 0);
3104  insertDAGNode(*CurDAG, SDValue(Node, 0), ImplDef);
3105  NBits = CurDAG->getTargetInsertSubreg(X86::sub_8bit, DL, MVT::i32, ImplDef,
3106  NBits);
3107  insertDAGNode(*CurDAG, SDValue(Node, 0), NBits);
3108 
3109  if (Subtarget->hasBMI2()) {
3110  // Great, just emit the the BZHI..
3111  if (XVT != MVT::i32) {
3112  // But have to place the bit count into the wide-enough register first.
3113  NBits = CurDAG->getNode(ISD::ANY_EXTEND, DL, XVT, NBits);
3114  insertDAGNode(*CurDAG, SDValue(Node, 0), NBits);
3115  }
3116 
3117  SDValue Extract = CurDAG->getNode(X86ISD::BZHI, DL, XVT, X, NBits);
3118  ReplaceNode(Node, Extract.getNode());
3119  SelectCode(Extract.getNode());
3120  return true;
3121  }
3122 
3123  // Else, emitting BEXTR requires one more step.
3124  // The 'control' of BEXTR has the pattern of:
3125  // [15...8 bit][ 7...0 bit] location
3126  // [ bit count][ shift] name
3127  // I.e. 0b000000011'00000001 means (x >> 0b1) & 0b11
3128 
3129  // Shift NBits left by 8 bits, thus producing 'control'.
3130  // This makes the low 8 bits to be zero.
3131  SDValue C8 = CurDAG->getConstant(8, DL, MVT::i8);
3132  SDValue Control = CurDAG->getNode(ISD::SHL, DL, MVT::i32, NBits, C8);
3133  insertDAGNode(*CurDAG, SDValue(Node, 0), Control);
3134 
3135  // If the 'X' is *logically* shifted, we can fold that shift into 'control'.
3136  if (X.getOpcode() == ISD::SRL) {
3137  SDValue ShiftAmt = X.getOperand(1);
3138  X = X.getOperand(0);
3139 
3140  assert(ShiftAmt.getValueType() == MVT::i8 &&
3141  "Expected shift amount to be i8");
3142 
3143  // Now, *zero*-extend the shift amount. The bits 8...15 *must* be zero!
3144  // We could zext to i16 in some form, but we intentionally don't do that.
3145  SDValue OrigShiftAmt = ShiftAmt;
3146  ShiftAmt = CurDAG->getNode(ISD::ZERO_EXTEND, DL, MVT::i32, ShiftAmt);
3147  insertDAGNode(*CurDAG, OrigShiftAmt, ShiftAmt);
3148 
3149  // And now 'or' these low 8 bits of shift amount into the 'control'.
3150  Control = CurDAG->getNode(ISD::OR, DL, MVT::i32, Control, ShiftAmt);
3151  insertDAGNode(*CurDAG, SDValue(Node, 0), Control);
3152  }
3153 
3154  // But have to place the 'control' into the wide-enough register first.
3155  if (XVT != MVT::i32) {
3156  Control = CurDAG->getNode(ISD::ANY_EXTEND, DL, XVT, Control);
3157  insertDAGNode(*CurDAG, SDValue(Node, 0), Control);
3158  }
3159 
3160  // And finally, form the BEXTR itself.
3161  SDValue Extract = CurDAG->getNode(X86ISD::BEXTR, DL, XVT, X, Control);
3162 
3163  // The 'X' was originally truncated. Do that now.
3164  if (XVT != NVT) {
3165  insertDAGNode(*CurDAG, SDValue(Node, 0), Extract);
3166  Extract = CurDAG->getNode(ISD::TRUNCATE, DL, NVT, Extract);
3167  }
3168 
3169  ReplaceNode(Node, Extract.getNode());
3170  SelectCode(Extract.getNode());
3171 
3172  return true;
3173 }
3174 
3175 // See if this is an (X >> C1) & C2 that we can match to BEXTR/BEXTRI.
3176 MachineSDNode *X86DAGToDAGISel::matchBEXTRFromAndImm(SDNode *Node) {
3177  MVT NVT = Node->getSimpleValueType(0);
3178  SDLoc dl(Node);
3179 
3180  SDValue N0 = Node->getOperand(0);
3181  SDValue N1 = Node->getOperand(1);
3182 
3183  // If we have TBM we can use an immediate for the control. If we have BMI
3184  // we should only do this if the BEXTR instruction is implemented well.
3185  // Otherwise moving the control into a register makes this more costly.
3186  // TODO: Maybe load folding, greater than 32-bit masks, or a guarantee of LICM
3187  // hoisting the move immediate would make it worthwhile with a less optimal
3188  // BEXTR?
3189  if (!Subtarget->hasTBM() &&
3190  !(Subtarget->hasBMI() && Subtarget->hasFastBEXTR()))
3191  return nullptr;
3192 
3193  // Must have a shift right.
3194  if (N0->getOpcode() != ISD::SRL && N0->getOpcode() != ISD::SRA)
3195  return nullptr;
3196 
3197  // Shift can't have additional users.
3198  if (!N0->hasOneUse())
3199  return nullptr;
3200 
3201  // Only supported for 32 and 64 bits.
3202  if (NVT != MVT::i32 && NVT != MVT::i64)
3203  return nullptr;
3204 
3205  // Shift amount and RHS of and must be constant.
3206  ConstantSDNode *MaskCst = dyn_cast<ConstantSDNode>(N1);
3207  ConstantSDNode *ShiftCst = dyn_cast<ConstantSDNode>(N0->getOperand(1));
3208  if (!MaskCst || !ShiftCst)
3209  return nullptr;
3210 
3211  // And RHS must be a mask.
3212  uint64_t Mask = MaskCst->getZExtValue();
3213  if (!isMask_64(Mask))
3214  return nullptr;
3215 
3216  uint64_t Shift = ShiftCst->getZExtValue();
3217  uint64_t MaskSize = countPopulation(Mask);
3218 
3219  // Don't interfere with something that can be handled by extracting AH.
3220  // TODO: If we are able to fold a load, BEXTR might still be better than AH.
3221  if (Shift == 8 && MaskSize == 8)
3222  return nullptr;
3223 
3224  // Make sure we are only using bits that were in the original value, not
3225  // shifted in.
3226  if (Shift + MaskSize > NVT.getSizeInBits())
3227  return nullptr;
3228 
3229  SDValue New = CurDAG->getTargetConstant(Shift | (MaskSize << 8), dl, NVT);
3230  unsigned ROpc = NVT == MVT::i64 ? X86::BEXTRI64ri : X86::BEXTRI32ri;
3231  unsigned MOpc = NVT == MVT::i64 ? X86::BEXTRI64mi : X86::BEXTRI32mi;
3232 
3233  // BMI requires the immediate to placed in a register.
3234  if (!Subtarget->hasTBM()) {
3235  ROpc = NVT == MVT::i64 ? X86::BEXTR64rr : X86::BEXTR32rr;
3236  MOpc = NVT == MVT::i64 ? X86::BEXTR64rm : X86::BEXTR32rm;
3237  unsigned NewOpc = NVT == MVT::i64 ? X86::MOV32ri64 : X86::MOV32ri;
3238  New = SDValue(CurDAG->getMachineNode(NewOpc, dl, NVT, New), 0);
3239  }
3240 
3241  MachineSDNode *NewNode;
3242  SDValue Input = N0->getOperand(0);
3243  SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
3244  if (tryFoldLoad(Node, N0.getNode(), Input, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) {
3245  SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, New, Input.getOperand(0) };
3246  SDVTList VTs = CurDAG->getVTList(NVT, MVT::i32, MVT::Other);
3247  NewNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops);
3248  // Update the chain.
3249  ReplaceUses(Input.getValue(1), SDValue(NewNode, 2));
3250  // Record the mem-refs
3251  CurDAG->setNodeMemRefs(NewNode, {cast<LoadSDNode>(Input)->getMemOperand()});
3252  } else {
3253  NewNode = CurDAG->getMachineNode(ROpc, dl, NVT, MVT::i32, Input, New);
3254  }
3255 
3256  return NewNode;
3257 }
3258 
3259 // Emit a PCMISTR(I/M) instruction.
3260 MachineSDNode *X86DAGToDAGISel::emitPCMPISTR(unsigned ROpc, unsigned MOpc,
3261  bool MayFoldLoad, const SDLoc &dl,
3262  MVT VT, SDNode *Node) {
3263  SDValue N0 = Node->getOperand(0);
3264  SDValue N1 = Node->getOperand(1);
3265  SDValue Imm = Node->getOperand(2);
3266  const ConstantInt *Val = cast<ConstantSDNode>(Imm)->getConstantIntValue();
3267  Imm = CurDAG->getTargetConstant(*Val, SDLoc(Node), Imm.getValueType());
3268 
3269  // Try to fold a load. No need to check alignment.
3270  SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
3271  if (MayFoldLoad && tryFoldLoad(Node, N1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) {
3272  SDValue Ops[] = { N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Imm,
3273  N1.getOperand(0) };
3274  SDVTList VTs = CurDAG->getVTList(VT, MVT::i32, MVT::Other);
3275  MachineSDNode *CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops);
3276  // Update the chain.
3277  ReplaceUses(N1.getValue(1), SDValue(CNode, 2));
3278  // Record the mem-refs
3279  CurDAG->setNodeMemRefs(CNode, {cast<LoadSDNode>(N1)->getMemOperand()});
3280  return CNode;
3281  }
3282 
3283  SDValue Ops[] = { N0, N1, Imm };
3284  SDVTList VTs = CurDAG->getVTList(VT, MVT::i32);
3285  MachineSDNode *CNode = CurDAG->getMachineNode(ROpc, dl, VTs, Ops);
3286  return CNode;
3287 }
3288 
3289 // Emit a PCMESTR(I/M) instruction. Also return the Glue result in case we need
3290 // to emit a second instruction after this one. This is needed since we have two
3291 // copyToReg nodes glued before this and we need to continue that glue through.
3292 MachineSDNode *X86DAGToDAGISel::emitPCMPESTR(unsigned ROpc, unsigned MOpc,
3293  bool MayFoldLoad, const SDLoc &dl,
3294  MVT VT, SDNode *Node,
3295  SDValue &InFlag) {
3296  SDValue N0 = Node->getOperand(0);
3297  SDValue N2 = Node->getOperand(2);
3298  SDValue Imm = Node->getOperand(4);
3299  const ConstantInt *Val = cast<ConstantSDNode>(Imm)->getConstantIntValue();
3300  Imm = CurDAG->getTargetConstant(*Val, SDLoc(Node), Imm.getValueType());
3301 
3302  // Try to fold a load. No need to check alignment.
3303  SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
3304  if (MayFoldLoad && tryFoldLoad(Node, N2, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) {
3305  SDValue Ops[] = { N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Imm,
3306  N2.getOperand(0), InFlag };
3307  SDVTList VTs = CurDAG->getVTList(VT, MVT::i32, MVT::Other, MVT::Glue);
3308  MachineSDNode *CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops);
3309  InFlag = SDValue(CNode, 3);
3310  // Update the chain.
3311  ReplaceUses(N2.getValue(1), SDValue(CNode, 2));
3312  // Record the mem-refs
3313  CurDAG->setNodeMemRefs(CNode, {cast<LoadSDNode>(N2)->getMemOperand()});
3314  return CNode;
3315  }
3316 
3317  SDValue Ops[] = { N0, N2, Imm, InFlag };
3318  SDVTList VTs = CurDAG->getVTList(VT, MVT::i32, MVT::Glue);
3319  MachineSDNode *CNode = CurDAG->getMachineNode(ROpc, dl, VTs, Ops);
3320  InFlag = SDValue(CNode, 2);
3321  return CNode;
3322 }
3323 
3324 bool X86DAGToDAGISel::tryShiftAmountMod(SDNode *N) {
3325  EVT VT = N->getValueType(0);
3326 
3327  // Only handle scalar shifts.
3328  if (VT.isVector())
3329  return false;
3330 
3331  // Narrower shifts only mask to 5 bits in hardware.
3332  unsigned Size = VT == MVT::i64 ? 64 : 32;
3333 
3334  SDValue OrigShiftAmt = N->getOperand(1);
3335  SDValue ShiftAmt = OrigShiftAmt;
3336  SDLoc DL(N);
3337 
3338  // Skip over a truncate of the shift amount.
3339  if (ShiftAmt->getOpcode() == ISD::TRUNCATE)
3340  ShiftAmt = ShiftAmt->getOperand(0);
3341 
3342  // This function is called after X86DAGToDAGISel::matchBitExtract(),
3343  // so we are not afraid that we might mess up BZHI/BEXTR pattern.
3344 
3345  SDValue NewShiftAmt;
3346  if (ShiftAmt->getOpcode() == ISD::ADD || ShiftAmt->getOpcode() == ISD::SUB) {
3347  SDValue Add0 = ShiftAmt->getOperand(0);
3348  SDValue Add1 = ShiftAmt->getOperand(1);
3349  // If we are shifting by X+/-N where N == 0 mod Size, then just shift by X
3350  // to avoid the ADD/SUB.
3351  if (isa<ConstantSDNode>(Add1) &&
3352  cast<ConstantSDNode>(Add1)->getZExtValue() % Size == 0) {
3353  NewShiftAmt = Add0;
3354  // If we are shifting by N-X where N == 0 mod Size, then just shift by -X to
3355  // generate a NEG instead of a SUB of a constant.
3356  } else if (ShiftAmt->getOpcode() == ISD::SUB &&
3357  isa<ConstantSDNode>(Add0) &&
3358  cast<ConstantSDNode>(Add0)->getZExtValue() != 0 &&
3359  cast<ConstantSDNode>(Add0)->getZExtValue() % Size == 0) {
3360  // Insert a negate op.
3361  // TODO: This isn't guaranteed to replace the sub if there is a logic cone
3362  // that uses it that's not a shift.
3363  EVT SubVT = ShiftAmt.getValueType();
3364  SDValue Zero = CurDAG->getConstant(0, DL, SubVT);
3365  SDValue Neg = CurDAG->getNode(ISD::SUB, DL, SubVT, Zero, Add1);
3366  NewShiftAmt = Neg;
3367 
3368  // Insert these operands into a valid topological order so they can
3369  // get selected independently.
3370  insertDAGNode(*CurDAG, OrigShiftAmt, Zero);
3371  insertDAGNode(*CurDAG, OrigShiftAmt, Neg);
3372  } else
3373  return false;
3374  } else
3375  return false;
3376 
3377  if (NewShiftAmt.getValueType() != MVT::i8) {
3378  // Need to truncate the shift amount.
3379  NewShiftAmt = CurDAG->getNode(ISD::TRUNCATE, DL, MVT::i8, NewShiftAmt);
3380  // Add to a correct topological ordering.
3381  insertDAGNode(*CurDAG, OrigShiftAmt, NewShiftAmt);
3382  }
3383 
3384  // Insert a new mask to keep the shift amount legal. This should be removed
3385  // by isel patterns.
3386  NewShiftAmt = CurDAG->getNode(ISD::AND, DL, MVT::i8, NewShiftAmt,
3387  CurDAG->getConstant(Size - 1, DL, MVT::i8));
3388  // Place in a correct topological ordering.
3389  insertDAGNode(*CurDAG, OrigShiftAmt, NewShiftAmt);
3390 
3391  SDNode *UpdatedNode = CurDAG->UpdateNodeOperands(N, N->getOperand(0),
3392  NewShiftAmt);
3393  if (UpdatedNode != N) {
3394  // If we found an existing node, we should replace ourselves with that node
3395  // and wait for it to be selected after its other users.
3396  ReplaceNode(N, UpdatedNode);
3397  return true;
3398  }
3399 
3400  // If the original shift amount is now dead, delete it so that we don't run
3401  // it through isel.
3402  if (OrigShiftAmt.getNode()->use_empty())
3403  CurDAG->RemoveDeadNode(OrigShiftAmt.getNode());
3404 
3405  // Now that we've optimized the shift amount, defer to normal isel to get
3406  // load folding and legacy vs BMI2 selection without repeating it here.
3407  SelectCode(N);
3408  return true;
3409 }
3410 
3411 /// If the high bits of an 'and' operand are known zero, try setting the
3412 /// high bits of an 'and' constant operand to produce a smaller encoding by
3413 /// creating a small, sign-extended negative immediate rather than a large
3414 /// positive one. This reverses a transform in SimplifyDemandedBits that
3415 /// shrinks mask constants by clearing bits. There is also a possibility that
3416 /// the 'and' mask can be made -1, so the 'and' itself is unnecessary. In that
3417 /// case, just replace the 'and'. Return 'true' if the node is replaced.
3418 bool X86DAGToDAGISel::shrinkAndImmediate(SDNode *And) {
3419  // i8 is unshrinkable, i16 should be promoted to i32, and vector ops don't
3420  // have immediate operands.
3421  MVT VT = And->getSimpleValueType(0);
3422  if (VT != MVT::i32 && VT != MVT::i64)
3423  return false;
3424 
3425  auto *And1C = dyn_cast<ConstantSDNode>(And->getOperand(1));
3426  if (!And1C)
3427  return false;
3428 
3429  // Bail out if the mask constant is already negative. It's can't shrink more.
3430  // If the upper 32 bits of a 64 bit mask are all zeros, we have special isel
3431  // patterns to use a 32-bit and instead of a 64-bit and by relying on the
3432  // implicit zeroing of 32 bit ops. So we should check if the lower 32 bits
3433  // are negative too.
3434  APInt MaskVal = And1C->getAPIntValue();
3435  unsigned MaskLZ = MaskVal.countLeadingZeros();
3436  if (!MaskLZ || (VT == MVT::i64 && MaskLZ == 32))
3437  return false;
3438 
3439  // Don't extend into the upper 32 bits of a 64 bit mask.
3440  if (VT == MVT::i64 && MaskLZ >= 32) {
3441  MaskLZ -= 32;
3442  MaskVal = MaskVal.trunc(32);
3443  }
3444 
3445  SDValue And0 = And->getOperand(0);
3446  APInt HighZeros = APInt::getHighBitsSet(MaskVal.getBitWidth(), MaskLZ);
3447  APInt NegMaskVal = MaskVal | HighZeros;
3448 
3449  // If a negative constant would not allow a smaller encoding, there's no need
3450  // to continue. Only change the constant when we know it's a win.
3451  unsigned MinWidth = NegMaskVal.getMinSignedBits();
3452  if (MinWidth > 32 || (MinWidth > 8 && MaskVal.getMinSignedBits() <= 32))
3453  return false;
3454 
3455  // Extend masks if we truncated above.
3456  if (VT == MVT::i64 && MaskVal.getBitWidth() < 64) {
3457  NegMaskVal = NegMaskVal.zext(64);
3458  HighZeros = HighZeros.zext(64);
3459  }
3460 
3461  // The variable operand must be all zeros in the top bits to allow using the
3462  // new, negative constant as the mask.
3463  if (!CurDAG->MaskedValueIsZero(And0, HighZeros))
3464  return false;
3465 
3466  // Check if the mask is -1. In that case, this is an unnecessary instruction
3467  // that escaped earlier analysis.
3468  if (NegMaskVal.isAllOnesValue()) {
3469  ReplaceNode(And, And0.getNode());
3470  return true;
3471  }
3472 
3473  // A negative mask allows a smaller encoding. Create a new 'and' node.
3474  SDValue NewMask = CurDAG->getConstant(NegMaskVal, SDLoc(And), VT);
3475  SDValue NewAnd = CurDAG->getNode(ISD::AND, SDLoc(And), VT, And0, NewMask);
3476  ReplaceNode(And, NewAnd.getNode());
3477  SelectCode(NewAnd.getNode());
3478  return true;
3479 }
3480 
3481 static unsigned getVPTESTMOpc(MVT TestVT, bool IsTestN, bool FoldedLoad,
3482  bool FoldedBCast, bool Masked) {
3483  if (Masked) {
3484  if (FoldedLoad) {
3485  switch (TestVT.SimpleTy) {
3486  default: llvm_unreachable("Unexpected VT!");
3487  case MVT::v16i8:
3488  return IsTestN ? X86::VPTESTNMBZ128rmk : X86::VPTESTMBZ128rmk;
3489  case MVT::v8i16:
3490  return IsTestN ? X86::VPTESTNMWZ128rmk : X86::VPTESTMWZ128rmk;
3491  case MVT::v4i32:
3492  return IsTestN ? X86::VPTESTNMDZ128rmk : X86::VPTESTMDZ128rmk;
3493  case MVT::v2i64:
3494  return IsTestN ? X86::VPTESTNMQZ128rmk : X86::VPTESTMQZ128rmk;
3495  case MVT::v32i8:
3496  return IsTestN ? X86::VPTESTNMBZ256rmk : X86::VPTESTMBZ256rmk;
3497  case MVT::v16i16:
3498  return IsTestN ? X86::VPTESTNMWZ256rmk : X86::VPTESTMWZ256rmk;
3499  case MVT::v8i32:
3500  return IsTestN ? X86::VPTESTNMDZ256rmk : X86::VPTESTMDZ256rmk;
3501  case MVT::v4i64:
3502  return IsTestN ? X86::VPTESTNMQZ256rmk : X86::VPTESTMQZ256rmk;
3503  case MVT::v64i8:
3504  return IsTestN ? X86::VPTESTNMBZrmk : X86::VPTESTMBZrmk;
3505  case MVT::v32i16:
3506  return IsTestN ? X86::VPTESTNMWZrmk : X86::VPTESTMWZrmk;
3507  case MVT::v16i32:
3508  return IsTestN ? X86::VPTESTNMDZrmk : X86::VPTESTMDZrmk;
3509  case MVT::v8i64:
3510  return IsTestN ? X86::VPTESTNMQZrmk : X86::VPTESTMQZrmk;
3511  }
3512  }
3513 
3514  if (FoldedBCast) {
3515  switch (TestVT.SimpleTy) {
3516  default: llvm_unreachable("Unexpected VT!");
3517  case MVT::v4i32:
3518  return IsTestN ? X86::VPTESTNMDZ128rmbk : X86::VPTESTMDZ128rmbk;
3519  case MVT::v2i64:
3520  return IsTestN ? X86::VPTESTNMQZ128rmbk : X86::VPTESTMQZ128rmbk;
3521  case MVT::v8i32:
3522  return IsTestN ? X86::VPTESTNMDZ256rmbk : X86::VPTESTMDZ256rmbk;
3523  case MVT::v4i64:
3524  return IsTestN ? X86::VPTESTNMQZ256rmbk : X86::VPTESTMQZ256rmbk;
3525  case MVT::v16i32:
3526  return IsTestN ? X86::VPTESTNMDZrmbk : X86::VPTESTMDZrmbk;
3527  case MVT::v8i64:
3528  return IsTestN ? X86::VPTESTNMQZrmbk : X86::VPTESTMQZrmbk;
3529  }
3530  }
3531 
3532  switch (TestVT.SimpleTy) {
3533  default: llvm_unreachable("Unexpected VT!");
3534  case MVT::v16i8:
3535  return IsTestN ? X86::VPTESTNMBZ128rrk : X86::VPTESTMBZ128rrk;
3536  case MVT::v8i16:
3537  return IsTestN ? X86::VPTESTNMWZ128rrk : X86::VPTESTMWZ128rrk;
3538  case MVT::v4i32:
3539  return IsTestN ? X86::VPTESTNMDZ128rrk : X86::VPTESTMDZ128rrk;
3540  case MVT::v2i64:
3541  return IsTestN ? X86::VPTESTNMQZ128rrk : X86::VPTESTMQZ128rrk;
3542  case MVT::v32i8:
3543  return IsTestN ? X86::VPTESTNMBZ256rrk : X86::VPTESTMBZ256rrk;
3544  case MVT::v16i16:
3545  return IsTestN ? X86::VPTESTNMWZ256rrk : X86::VPTESTMWZ256rrk;
3546  case MVT::v8i32:
3547  return IsTestN ? X86::VPTESTNMDZ256rrk : X86::VPTESTMDZ256rrk;
3548  case MVT::v4i64:
3549  return IsTestN ? X86::VPTESTNMQZ256rrk : X86::VPTESTMQZ256rrk;
3550  case MVT::v64i8:
3551  return IsTestN ? X86::VPTESTNMBZrrk : X86::VPTESTMBZrrk;
3552  case MVT::v32i16:
3553  return IsTestN ? X86::VPTESTNMWZrrk : X86::VPTESTMWZrrk;
3554  case MVT::v16i32:
3555  return IsTestN ? X86::VPTESTNMDZrrk : X86::VPTESTMDZrrk;
3556  case MVT::v8i64:
3557  return IsTestN ? X86::VPTESTNMQZrrk : X86::VPTESTMQZrrk;
3558  }
3559  }
3560 
3561  if (FoldedLoad) {
3562  switch (TestVT.SimpleTy) {
3563  default: llvm_unreachable("Unexpected VT!");
3564  case MVT::v16i8:
3565  return IsTestN ? X86::VPTESTNMBZ128rm : X86::VPTESTMBZ128rm;
3566  case MVT::v8i16:
3567  return IsTestN ? X86::VPTESTNMWZ128rm : X86::VPTESTMWZ128rm;
3568  case MVT::v4i32:
3569  return IsTestN ? X86::VPTESTNMDZ128rm : X86::VPTESTMDZ128rm;
3570  case MVT::v2i64:
3571  return IsTestN ? X86::VPTESTNMQZ128rm : X86::VPTESTMQZ128rm;
3572  case MVT::v32i8:
3573  return IsTestN ? X86::VPTESTNMBZ256rm : X86::VPTESTMBZ256rm;
3574  case MVT::v16i16:
3575  return IsTestN ? X86::VPTESTNMWZ256rm : X86::VPTESTMWZ256rm;
3576  case MVT::v8i32:
3577  return IsTestN ? X86::VPTESTNMDZ256rm : X86::VPTESTMDZ256rm;
3578  case MVT::v4i64:
3579  return IsTestN ? X86::VPTESTNMQZ256rm : X86::VPTESTMQZ256rm;
3580  case MVT::v64i8:
3581  return IsTestN ? X86::VPTESTNMBZrm : X86::VPTESTMBZrm;
3582  case MVT::v32i16:
3583  return IsTestN ? X86::VPTESTNMWZrm : X86::VPTESTMWZrm;
3584  case MVT::v16i32:
3585  return IsTestN ? X86::VPTESTNMDZrm : X86::VPTESTMDZrm;
3586  case MVT::v8i64:
3587  return IsTestN ? X86::VPTESTNMQZrm : X86::VPTESTMQZrm;
3588  }
3589  }
3590 
3591  if (FoldedBCast) {
3592  switch (TestVT.SimpleTy) {
3593  default: llvm_unreachable("Unexpected VT!");
3594  case MVT::v4i32:
3595  return IsTestN ? X86::VPTESTNMDZ128rmb : X86::VPTESTMDZ128rmb;
3596  case MVT::v2i64:
3597  return IsTestN ? X86::VPTESTNMQZ128rmb : X86::VPTESTMQZ128rmb;
3598  case MVT::v8i32:
3599  return IsTestN ? X86::VPTESTNMDZ256rmb : X86::VPTESTMDZ256rmb;
3600  case MVT::v4i64:
3601  return IsTestN ? X86::VPTESTNMQZ256rmb : X86::VPTESTMQZ256rmb;
3602  case MVT::v16i32:
3603  return IsTestN ? X86::VPTESTNMDZrmb : X86::VPTESTMDZrmb;
3604  case MVT::v8i64:
3605  return IsTestN ? X86::VPTESTNMQZrmb : X86::VPTESTMQZrmb;
3606  }
3607  }
3608 
3609  switch (TestVT.SimpleTy) {
3610  default: llvm_unreachable("Unexpected VT!");
3611  case MVT::v16i8:
3612  return IsTestN ? X86::VPTESTNMBZ128rr : X86::VPTESTMBZ128rr;
3613  case MVT::v8i16:
3614  return IsTestN ? X86::VPTESTNMWZ128rr : X86::VPTESTMWZ128rr;
3615  case MVT::v4i32:
3616  return IsTestN ? X86::VPTESTNMDZ128rr : X86::VPTESTMDZ128rr;
3617  case MVT::v2i64:
3618  return IsTestN ? X86::VPTESTNMQZ128rr : X86::VPTESTMQZ128rr;
3619  case MVT::v32i8:
3620  return IsTestN ? X86::VPTESTNMBZ256rr : X86::VPTESTMBZ256rr;
3621  case MVT::v16i16:
3622  return IsTestN ? X86::VPTESTNMWZ256rr : X86::VPTESTMWZ256rr;
3623  case MVT::v8i32:
3624  return IsTestN ? X86::VPTESTNMDZ256rr : X86::VPTESTMDZ256rr;
3625  case MVT::v4i64:
3626  return IsTestN ? X86::VPTESTNMQZ256rr : X86::VPTESTMQZ256rr;
3627  case MVT::v64i8:
3628  return IsTestN ? X86::VPTESTNMBZrr : X86::VPTESTMBZrr;
3629  case MVT::v32i16:
3630  return IsTestN ? X86::VPTESTNMWZrr : X86::VPTESTMWZrr;
3631  case MVT::v16i32:
3632  return IsTestN ? X86::VPTESTNMDZrr : X86::VPTESTMDZrr;
3633  case MVT::v8i64:
3634  return IsTestN ? X86::VPTESTNMQZrr : X86::VPTESTMQZrr;
3635  }
3636 }
3637 
3638 // Try to create VPTESTM instruction. If InMask is not null, it will be used
3639 // to form a masked operation.
3640 bool X86DAGToDAGISel::tryVPTESTM(SDNode *Root, SDValue Setcc,
3641  SDValue InMask) {
3642  assert(Subtarget->hasAVX512() && "Expected AVX512!");
3644  "Unexpected VT!");
3645 
3646  // Look for equal and not equal compares.
3647  ISD::CondCode CC = cast<CondCodeSDNode>(Setcc.getOperand(2))->get();
3648  if (CC != ISD::SETEQ && CC != ISD::SETNE)
3649  return false;
3650 
3651  // See if we're comparing against zero. This should have been canonicalized
3652  // to RHS during lowering.
3654  return false;
3655 
3656  SDValue N0 = Setcc.getOperand(0);
3657 
3658  MVT CmpVT = N0.getSimpleValueType();
3659  MVT CmpSVT = CmpVT.getVectorElementType();
3660 
3661  // Start with both operands the same. We'll try to refine this.
3662  SDValue Src0 = N0;
3663  SDValue Src1 = N0;
3664 
3665  {
3666  // Look through single use bitcasts.
3667  SDValue N0Temp = N0;
3668  if (N0Temp.getOpcode() == ISD::BITCAST && N0Temp.hasOneUse())
3669  N0Temp = N0.getOperand(0);
3670 
3671  // Look for single use AND.
3672  if (N0Temp.getOpcode() == ISD::AND && N0Temp.hasOneUse()) {
3673  Src0 = N0Temp.getOperand(0);
3674  Src1 = N0Temp.getOperand(1);
3675  }
3676  }
3677 
3678  // Without VLX we need to widen the load.
3679  bool Widen = !Subtarget->hasVLX() && !CmpVT.is512BitVector();
3680 
3681  // We can only fold loads if the sources are unique.
3682  bool CanFoldLoads = Src0 != Src1;
3683 
3684  // Try to fold loads unless we need to widen.
3685  bool FoldedLoad = false;
3686  SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Load;
3687  if (!Widen && CanFoldLoads) {
3688  Load = Src1;
3689  FoldedLoad = tryFoldLoad(Root, N0.getNode(), Load, Tmp0, Tmp1, Tmp2, Tmp3,
3690  Tmp4);
3691  if (!FoldedLoad) {
3692  // And is computative.
3693  Load = Src0;
3694  FoldedLoad = tryFoldLoad(Root, N0.getNode(), Load, Tmp0, Tmp1, Tmp2,
3695  Tmp3, Tmp4);
3696  if (FoldedLoad)
3697  std::swap(Src0, Src1);
3698  }
3699  }
3700 
3701  auto findBroadcastedOp = [](SDValue Src, MVT CmpSVT, SDNode *&Parent) {
3702  // Look through single use bitcasts.
3703  if (Src.getOpcode() == ISD::BITCAST && Src.hasOneUse())
3704  Src = Src.getOperand(0);
3705 
3706  if (Src.getOpcode() == X86ISD::VBROADCAST && Src.hasOneUse()) {
3707  Parent = Src.getNode();
3708  Src = Src.getOperand(0);
3709  if (Src.getSimpleValueType() == CmpSVT)
3710  return Src;
3711  }
3712 
3713  return SDValue();
3714  };
3715 
3716  // If we didn't fold a load, try to match broadcast. No widening limitation
3717  // for this. But only 32 and 64 bit types are supported.
3718  bool FoldedBCast = false;
3719  if (!FoldedLoad && CanFoldLoads &&
3720  (CmpSVT == MVT::i32 || CmpSVT == MVT::i64)) {
3721  SDNode *ParentNode;
3722  if ((Load = findBroadcastedOp(Src1, CmpSVT, ParentNode))) {
3723  FoldedBCast = tryFoldLoad(Root, ParentNode, Load, Tmp0,
3724  Tmp1, Tmp2, Tmp3, Tmp4);
3725  }
3726 
3727  // Try the other operand.
3728  if (!FoldedBCast) {
3729  if ((Load = findBroadcastedOp(Src0, CmpSVT, ParentNode))) {
3730  FoldedBCast = tryFoldLoad(Root, ParentNode, Load, Tmp0,
3731  Tmp1, Tmp2, Tmp3, Tmp4);
3732  if (FoldedBCast)
3733  std::swap(Src0, Src1);
3734  }
3735  }
3736  }
3737 
3738  auto getMaskRC = [](MVT MaskVT) {
3739  switch (MaskVT.SimpleTy) {
3740  default: llvm_unreachable("Unexpected VT!");
3741  case MVT::v2i1: return X86::VK2RegClassID;
3742  case MVT::v4i1: return X86::VK4RegClassID;
3743  case MVT::v8i1: return X86::VK8RegClassID;
3744  case MVT::v16i1: return X86::VK16RegClassID;
3745  case MVT::v32i1: return X86::VK32RegClassID;
3746  case MVT::v64i1: return X86::VK64RegClassID;
3747  }
3748  };
3749 
3750  bool IsMasked = InMask.getNode() != nullptr;
3751 
3752  SDLoc dl(Root);
3753 
3754  MVT ResVT = Setcc.getSimpleValueType();
3755  MVT MaskVT = ResVT;
3756  if (Widen) {
3757  // Widen the inputs using insert_subreg or copy_to_regclass.
3758  unsigned Scale = CmpVT.is128BitVector() ? 4 : 2;
3759  unsigned SubReg = CmpVT.is128BitVector() ? X86::sub_xmm : X86::sub_ymm;
3760  unsigned NumElts = CmpVT.getVectorNumElements() * Scale;
3761  CmpVT = MVT::getVectorVT(CmpSVT, NumElts);
3762  MaskVT = MVT::getVectorVT(MVT::i1, NumElts);
3763  SDValue ImplDef = SDValue(CurDAG->getMachineNode(X86::IMPLICIT_DEF, dl,
3764  CmpVT), 0);
3765  Src0 = CurDAG->getTargetInsertSubreg(SubReg, dl, CmpVT, ImplDef, Src0);
3766 
3767  assert(!FoldedLoad && "Shouldn't have folded the load");
3768  if (!FoldedBCast)
3769  Src1 = CurDAG->getTargetInsertSubreg(SubReg, dl, CmpVT, ImplDef, Src1);
3770 
3771  if (IsMasked) {
3772  // Widen the mask.
3773  unsigned RegClass = getMaskRC(MaskVT);
3774  SDValue RC = CurDAG->getTargetConstant(RegClass, dl, MVT::i32);
3775  InMask = SDValue(CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS,
3776  dl, MaskVT, InMask, RC), 0);
3777  }
3778  }
3779 
3780  bool IsTestN = CC == ISD::SETEQ;
3781  unsigned Opc = getVPTESTMOpc(CmpVT, IsTestN, FoldedLoad, FoldedBCast,
3782  IsMasked);
3783 
3784  MachineSDNode *CNode;
3785  if (FoldedLoad || FoldedBCast) {
3786  SDVTList VTs = CurDAG->getVTList(MaskVT, MVT::Other);
3787 
3788  if (IsMasked) {
3789  SDValue Ops[] = { InMask, Src0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4,
3790  Load.getOperand(0) };
3791  CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops);
3792  } else {
3793  SDValue Ops[] = { Src0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4,
3794  Load.getOperand(0) };
3795  CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops);
3796  }
3797 
3798  // Update the chain.
3799  ReplaceUses(Load.getValue(1), SDValue(CNode, 1));
3800  // Record the mem-refs
3801  CurDAG->setNodeMemRefs(CNode, {cast<LoadSDNode>(Load)->getMemOperand()});
3802  } else {
3803  if (IsMasked)
3804  CNode = CurDAG->getMachineNode(Opc, dl, MaskVT, InMask, Src0, Src1);
3805  else
3806  CNode = CurDAG->getMachineNode(Opc, dl, MaskVT, Src0, Src1);
3807  }
3808 
3809  // If we widened, we need to shrink the mask VT.
3810  if (Widen) {
3811  unsigned RegClass = getMaskRC(ResVT);
3812  SDValue RC = CurDAG->getTargetConstant(RegClass, dl, MVT::i32);
3813  CNode = CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS,
3814  dl, ResVT, SDValue(CNode, 0), RC);
3815  }
3816 
3817  ReplaceUses(SDValue(Root, 0), SDValue(CNode, 0));
3818  CurDAG->RemoveDeadNode(Root);
3819  return true;
3820 }
3821 
3822 void X86DAGToDAGISel::Select(SDNode *Node) {
3823  MVT NVT = Node->getSimpleValueType(0);
3824  unsigned Opcode = Node->getOpcode();
3825  SDLoc dl(Node);
3826 
3827  if (Node->isMachineOpcode()) {
3828  LLVM_DEBUG(dbgs() << "== "; Node->dump(CurDAG); dbgs() << '\n');
3829  Node->setNodeId(-1);
3830  return; // Already selected.
3831  }
3832 
3833  switch (Opcode) {
3834  default: break;
3835  case ISD::INTRINSIC_VOID: {
3836  unsigned IntNo = Node->getConstantOperandVal(1);
3837  switch (IntNo) {
3838  default: break;
3839  case Intrinsic::x86_sse3_monitor:
3840  case Intrinsic::x86_monitorx:
3841  case Intrinsic::x86_clzero: {
3842  bool Use64BitPtr = Node->getOperand(2).getValueType() == MVT::i64;
3843 
3844  unsigned Opc = 0;
3845  switch (IntNo) {
3846  case Intrinsic::x86_sse3_monitor:
3847  if (!Subtarget->hasSSE3())
3848  break;
3849  Opc = Use64BitPtr ? X86::MONITOR64rrr : X86::MONITOR32rrr;
3850  break;
3851  case Intrinsic::x86_monitorx:
3852  if (!Subtarget->hasMWAITX())
3853  break;
3854  Opc = Use64BitPtr ? X86::MONITORX64rrr : X86::MONITORX32rrr;
3855  break;
3856  case Intrinsic::x86_clzero:
3857  if (!Subtarget->hasCLZERO())
3858  break;
3859  Opc = Use64BitPtr ? X86::CLZERO64r : X86::CLZERO32r;
3860  break;
3861  }
3862 
3863  if (Opc) {
3864  unsigned PtrReg = Use64BitPtr ? X86::RAX : X86::EAX;
3865  SDValue Chain = CurDAG->getCopyToReg(Node->getOperand(0), dl, PtrReg,
3866  Node->getOperand(2), SDValue());
3867  SDValue InFlag = Chain.getValue(1);
3868 
3869  if (IntNo == Intrinsic::x86_sse3_monitor ||
3870  IntNo == Intrinsic::x86_monitorx) {
3871  // Copy the other two operands to ECX and EDX.
3872  Chain = CurDAG->getCopyToReg(Chain, dl, X86::ECX, Node->getOperand(3),
3873  InFlag);
3874  InFlag = Chain.getValue(1);
3875  Chain = CurDAG->getCopyToReg(Chain, dl, X86::EDX, Node->getOperand(4),
3876  InFlag);
3877  InFlag = Chain.getValue(1);
3878  }
3879 
3880  MachineSDNode *CNode = CurDAG->getMachineNode(Opc, dl, MVT::Other,
3881  { Chain, InFlag});
3882  ReplaceNode(Node, CNode);
3883  return;
3884  }
3885  }
3886  }
3887 
3888  break;
3889  }
3890  case ISD::BRIND: {
3891  if (Subtarget->isTargetNaCl())
3892  // NaCl has its own pass where jmp %r32 are converted to jmp %r64. We
3893  // leave the instruction alone.
3894  break;
3895  if (Subtarget->isTarget64BitILP32()) {
3896  // Converts a 32-bit register to a 64-bit, zero-extended version of
3897  // it. This is needed because x86-64 can do many things, but jmp %r32
3898  // ain't one of them.
3899  const SDValue &Target = Node->getOperand(1);
3901  SDValue ZextTarget = CurDAG->getZExtOrTrunc(Target, dl, EVT(MVT::i64));
3902  SDValue Brind = CurDAG->getNode(ISD::BRIND, dl, MVT::Other,
3903  Node->getOperand(0), ZextTarget);
3904  ReplaceNode(Node, Brind.getNode());
3905  SelectCode(ZextTarget.getNode());
3906  SelectCode(Brind.getNode());
3907  return;
3908  }
3909  break;
3910  }
3911  case X86ISD::GlobalBaseReg:
3912  ReplaceNode(Node, getGlobalBaseReg());
3913  return;
3914 
3915  case ISD::BITCAST:
3916  // Just drop all 128/256/512-bit bitcasts.
3917  if (NVT.is512BitVector() || NVT.is256BitVector() || NVT.is128BitVector() ||
3918  NVT == MVT::f128) {
3919  ReplaceUses(SDValue(Node, 0), Node->getOperand(0));
3920  CurDAG->RemoveDeadNode(Node);
3921  return;
3922  }
3923  break;
3924 
3925  case ISD::VSELECT: {
3926  // Replace VSELECT with non-mask conditions with with BLENDV.
3928  break;
3929 
3930  assert(Subtarget->hasSSE41() && "Expected SSE4.1 support!");
3931  SDValue Blendv = CurDAG->getNode(
3932  X86ISD::BLENDV, SDLoc(Node), Node->getValueType(0), Node->getOperand(0),
3933  Node->getOperand(1), Node->getOperand(2));
3934  ReplaceNode(Node, Blendv.getNode());
3935  SelectCode(Blendv.getNode());
3936  // We already called ReplaceUses.
3937  return;
3938  }
3939 
3940  case ISD::SRL:
3941  if (matchBitExtract(Node))
3942  return;
3944  case ISD::SRA:
3945  case ISD::SHL:
3946  if (tryShiftAmountMod(Node))
3947  return;
3948  break;
3949 
3950  case ISD::AND:
3951  if (NVT.isVector() && NVT.getVectorElementType() == MVT::i1) {
3952  // Try to form a masked VPTESTM. Operands can be in either order.
3953  SDValue N0 = Node->getOperand(0);
3954  SDValue N1 = Node->getOperand(1);
3955  if (N0.getOpcode() == ISD::SETCC && N0.hasOneUse() &&
3956  tryVPTESTM(Node, N0, N1))
3957  return;
3958  if (N1.getOpcode() == ISD::SETCC && N1.hasOneUse() &&
3959  tryVPTESTM(Node, N1, N0))
3960  return;
3961  }
3962 
3963  if (MachineSDNode *NewNode = matchBEXTRFromAndImm(Node)) {
3964  ReplaceUses(SDValue(Node, 0), SDValue(NewNode, 0));
3965  CurDAG->RemoveDeadNode(Node);
3966  return;
3967  }
3968  if (matchBitExtract(Node))
3969  return;
3970  if (AndImmShrink && shrinkAndImmediate(Node))
3971  return;
3972 
3974  case ISD::OR:
3975  case ISD::XOR: {
3976 
3977  // For operations of the form (x << C1) op C2, check if we can use a smaller
3978  // encoding for C2 by transforming it into (x op (C2>>C1)) << C1.
3979  SDValue Shift = Node->getOperand(0);
3980  SDValue N1 = Node->getOperand(1);
3981 
3983  if (!Cst)
3984  break;
3985 
3986  int64_t Val = Cst->getSExtValue();
3987 
3988  // If we have an any_extend feeding the AND, look through it to see if there
3989  // is a shift behind it. But only if the AND doesn't use the extended bits.
3990  // FIXME: Generalize this to other ANY_EXTEND than i32 to i64?
3991  bool FoundAnyExtend = false;
3992  if (Shift.getOpcode() == ISD::ANY_EXTEND && Shift.hasOneUse() &&
3993  Shift.getOperand(0).getSimpleValueType() == MVT::i32 &&
3994  isUInt<32>(Val)) {
3995  FoundAnyExtend = true;
3996  Shift = Shift.getOperand(0);
3997  }
3998 
3999  if (Shift.getOpcode() != ISD::SHL || !Shift.hasOneUse())
4000  break;
4001 
4002  // i8 is unshrinkable, i16 should be promoted to i32.
4003  if (NVT != MVT::i32 && NVT != MVT::i64)
4004  break;
4005 
4006  ConstantSDNode *ShlCst = dyn_cast<ConstantSDNode>(Shift.getOperand(1));
4007  if (!ShlCst)
4008  break;
4009 
4010  uint64_t ShAmt = ShlCst->getZExtValue();
4011 
4012  // Make sure that we don't change the operation by removing bits.
4013  // This only matters for OR and XOR, AND is unaffected.
4014  uint64_t RemovedBitsMask = (1ULL << ShAmt) - 1;
4015  if (Opcode != ISD::AND && (Val & RemovedBitsMask) != 0)
4016  break;
4017 
4018  // Check the minimum bitwidth for the new constant.
4019  // TODO: Using 16 and 8 bit operations is also possible for or32 & xor32.
4020  auto CanShrinkImmediate = [&](int64_t &ShiftedVal) {
4021  if (Opcode == ISD::AND) {
4022  // AND32ri is the same as AND64ri32 with zext imm.
4023  // Try this before sign extended immediates below.
4024  ShiftedVal = (uint64_t)Val >> ShAmt;
4025  if (NVT == MVT::i64 && !isUInt<32>(Val) && isUInt<32>(ShiftedVal))
4026  return true;
4027  // Also swap order when the AND can become MOVZX.
4028  if (ShiftedVal == UINT8_MAX || ShiftedVal == UINT16_MAX)
4029  return true;
4030  }
4031  ShiftedVal = Val >> ShAmt;
4032  if ((!isInt<8>(Val) && isInt<8>(ShiftedVal)) ||
4033  (!isInt<32>(Val) && isInt<32>(ShiftedVal)))
4034  return true;
4035  if (Opcode != ISD::AND) {
4036  // MOV32ri+OR64r/XOR64r is cheaper than MOV64ri64+OR64rr/XOR64rr
4037  ShiftedVal = (uint64_t)Val >> ShAmt;
4038  if (NVT == MVT::i64 && !isUInt<32>(Val) && isUInt<32>(ShiftedVal))
4039  return true;
4040  }
4041  return false;
4042  };
4043 
4044  int64_t ShiftedVal;
4045  if (!CanShrinkImmediate(ShiftedVal))
4046  break;
4047 
4048  // Ok, we can reorder to get a smaller immediate.
4049 
4050  // But, its possible the original immediate allowed an AND to become MOVZX.
4051  // Doing this late due to avoid the MakedValueIsZero call as late as
4052  // possible.
4053  if (Opcode == ISD::AND) {
4054  // Find the smallest zext this could possibly be.
4055  unsigned ZExtWidth = Cst->getAPIntValue().getActiveBits();
4056  ZExtWidth = PowerOf2Ceil(std::max(ZExtWidth, 8U));
4057 
4058  // Figure out which bits need to be zero to achieve that mask.
4059  APInt NeededMask = APInt::getLowBitsSet(NVT.getSizeInBits(),
4060  ZExtWidth);
4061  NeededMask &= ~Cst->getAPIntValue();
4062 
4063  if (CurDAG->MaskedValueIsZero(Node->getOperand(0), NeededMask))
4064  break;
4065  }
4066 
4067  SDValue X = Shift.getOperand(0);
4068  if (FoundAnyExtend) {
4069  SDValue NewX = CurDAG->getNode(ISD::ANY_EXTEND, dl, NVT, X);
4070  insertDAGNode(*CurDAG, SDValue(Node, 0), NewX);
4071  X = NewX;
4072  }
4073 
4074  SDValue NewCst = CurDAG->getConstant(ShiftedVal, dl, NVT);
4075  insertDAGNode(*CurDAG, SDValue(Node, 0), NewCst);
4076  SDValue NewBinOp = CurDAG->getNode(Opcode, dl, NVT, X, NewCst);
4077  insertDAGNode(*CurDAG, SDValue(Node, 0), NewBinOp);
4078  SDValue NewSHL = CurDAG->getNode(ISD::SHL, dl, NVT, NewBinOp,
4079  Shift.getOperand(1));
4080  ReplaceNode(Node, NewSHL.getNode());
4081  SelectCode(NewSHL.getNode());
4082  return;
4083  }
4084  case X86ISD::SMUL:
4085  // i16/i32/i64 are handled with isel patterns.
4086  if (NVT != MVT::i8)
4087  break;
4089  case X86ISD::UMUL: {
4090  SDValue N0 = Node->getOperand(0);
4091  SDValue N1 = Node->getOperand(1);
4092 
4093  unsigned LoReg, ROpc, MOpc;
4094  switch (NVT.SimpleTy) {
4095  default: llvm_unreachable("Unsupported VT!");
4096  case MVT::i8:
4097  LoReg = X86::AL;
4098  ROpc = Opcode == X86ISD::SMUL ? X86::IMUL8r : X86::MUL8r;
4099  MOpc = Opcode == X86ISD::SMUL ? X86::IMUL8m : X86::MUL8m;
4100  break;
4101  case MVT::i16:
4102  LoReg = X86::AX;
4103  ROpc = X86::MUL16r;
4104  MOpc = X86::MUL16m;
4105  break;
4106  case MVT::i32:
4107  LoReg = X86::EAX;
4108  ROpc = X86::MUL32r;
4109  MOpc = X86::MUL32m;
4110  break;
4111  case MVT::i64:
4112  LoReg = X86::RAX;
4113  ROpc = X86::MUL64r;
4114  MOpc = X86::MUL64m;
4115  break;
4116  }
4117 
4118  SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
4119  bool FoldedLoad = tryFoldLoad(Node, N1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4);
4120  // Multiply is commmutative.
4121  if (!FoldedLoad) {
4122  FoldedLoad = tryFoldLoad(Node, N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4);
4123  if (FoldedLoad)
4124  std::swap(N0, N1);
4125  }
4126 
4127  SDValue InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, LoReg,
4128  N0, SDValue()).getValue(1);
4129 
4130  MachineSDNode *CNode;
4131  if (FoldedLoad) {
4132  // i16/i32/i64 use an instruction that produces a low and high result even
4133  // though only the low result is used.
4134  SDVTList VTs;
4135  if (NVT == MVT::i8)
4136  VTs = CurDAG->getVTList(NVT, MVT::i32, MVT::Other);
4137  else
4138  VTs = CurDAG->getVTList(NVT, NVT, MVT::i32, MVT::Other);
4139 
4140  SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N1.getOperand(0),
4141  InFlag };
4142  CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops);
4143 
4144  // Update the chain.
4145  ReplaceUses(N1.getValue(1), SDValue(CNode, NVT == MVT::i8 ? 2 : 3));
4146  // Record the mem-refs
4147  CurDAG->setNodeMemRefs(CNode, {cast<LoadSDNode>(N1)->getMemOperand()});
4148  } else {
4149  // i16/i32/i64 use an instruction that produces a low and high result even
4150  // though only the low result is used.
4151  SDVTList VTs;
4152  if (NVT == MVT::i8)
4153  VTs = CurDAG->getVTList(NVT, MVT::i32);
4154  else
4155  VTs = CurDAG->getVTList(NVT, NVT, MVT::i32);
4156 
4157  CNode = CurDAG->getMachineNode(ROpc, dl, VTs, {N1, InFlag});
4158  }
4159 
4160  ReplaceUses(SDValue(Node, 0), SDValue(CNode, 0));
4161  ReplaceUses(SDValue(Node, 1), SDValue(CNode, NVT == MVT::i8 ? 1 : 2));
4162  CurDAG->RemoveDeadNode(Node);
4163  return;
4164  }
4165 
4166  case ISD::SMUL_LOHI:
4167  case ISD::UMUL_LOHI: {
4168  SDValue N0 = Node->getOperand(0);
4169  SDValue N1 = Node->getOperand(1);
4170 
4171  unsigned Opc, MOpc;
4172  bool isSigned = Opcode == ISD::SMUL_LOHI;
4173  if (!isSigned) {
4174  switch (NVT.SimpleTy) {
4175  default: llvm_unreachable("Unsupported VT!");
4176  case MVT::i32: Opc = X86::MUL32r; MOpc = X86::MUL32m; break;
4177  case MVT::i64: Opc = X86::MUL64r; MOpc = X86::MUL64m; break;
4178  }
4179  } else {
4180  switch (NVT.SimpleTy) {
4181  default: llvm_unreachable("Unsupported VT!");
4182  case MVT::i32: Opc = X86::IMUL32r; MOpc = X86::IMUL32m; break;
4183  case MVT::i64: Opc = X86::IMUL64r; MOpc = X86::IMUL64m; break;
4184  }
4185  }
4186 
4187  unsigned SrcReg, LoReg, HiReg;
4188  switch (Opc) {
4189  default: llvm_unreachable("Unknown MUL opcode!");
4190  case X86::IMUL32r:
4191  case X86::MUL32r:
4192  SrcReg = LoReg = X86::EAX; HiReg = X86::EDX;
4193  break;
4194  case X86::IMUL64r:
4195  case X86::MUL64r:
4196  SrcReg = LoReg = X86::RAX; HiReg = X86::RDX;
4197  break;
4198  }
4199 
4200  SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
4201  bool foldedLoad = tryFoldLoad(Node, N1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4);
4202  // Multiply is commmutative.
4203  if (!foldedLoad) {
4204  foldedLoad = tryFoldLoad(Node, N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4);
4205  if (foldedLoad)
4206  std::swap(N0, N1);
4207  }
4208 
4209  SDValue InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, SrcReg,
4210  N0, SDValue()).getValue(1);
4211  if (foldedLoad) {
4212  SDValue Chain;
4213  MachineSDNode *CNode = nullptr;
4214  SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N1.getOperand(0),
4215  InFlag };
4216  SDVTList VTs = CurDAG->getVTList(MVT::Other, MVT::Glue);
4217  CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops);
4218  Chain = SDValue(CNode, 0);
4219  InFlag = SDValue(CNode, 1);
4220 
4221  // Update the chain.
4222  ReplaceUses(N1.getValue(1), Chain);
4223  // Record the mem-refs
4224  CurDAG->setNodeMemRefs(CNode, {cast<LoadSDNode>(N1)->getMemOperand()});
4225  } else {
4226  SDValue Ops[] = { N1, InFlag };
4227  SDVTList VTs = CurDAG->getVTList(MVT::Glue);
4228  SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops);
4229  InFlag = SDValue(CNode, 0);
4230  }
4231 
4232  // Copy the low half of the result, if it is needed.
4233  if (!SDValue(Node, 0).use_empty()) {
4234  assert(LoReg && "Register for low half is not defined!");
4235  SDValue ResLo = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, LoReg,
4236  NVT, InFlag);
4237  InFlag = ResLo.getValue(2);
4238  ReplaceUses(SDValue(Node, 0), ResLo);
4239  LLVM_DEBUG(dbgs() << "=> "; ResLo.getNode()->dump(CurDAG);
4240  dbgs() << '\n');
4241  }
4242  // Copy the high half of the result, if it is needed.
4243  if (!SDValue(Node, 1).use_empty()) {
4244  assert(HiReg && "Register for high half is not defined!");
4245  SDValue ResHi = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, HiReg,
4246  NVT, InFlag);
4247  InFlag = ResHi.getValue(2);
4248  ReplaceUses(SDValue(Node, 1), ResHi);
4249  LLVM_DEBUG(dbgs() << "=> "; ResHi.getNode()->dump(CurDAG);
4250  dbgs() << '\n');
4251  }
4252 
4253  CurDAG->RemoveDeadNode(Node);
4254  return;
4255  }
4256 
4257  case ISD::SDIVREM:
4258  case ISD::UDIVREM: {
4259  SDValue N0 = Node->getOperand(0);
4260  SDValue N1 = Node->getOperand(1);
4261 
4262  unsigned Opc, MOpc;
4263  bool isSigned = Opcode == ISD::SDIVREM;
4264  if (!isSigned) {
4265  switch (NVT.SimpleTy) {
4266  default: llvm_unreachable("Unsupported VT!");
4267  case MVT::i8: Opc = X86::DIV8r; MOpc = X86::DIV8m; break;
4268  case MVT::i16: Opc = X86::DIV16r; MOpc = X86::DIV16m; break;
4269  case MVT::i32: Opc = X86::DIV32r; MOpc = X86::DIV32m; break;
4270  case MVT::i64: Opc = X86::DIV64r; MOpc = X86::DIV64m; break;
4271  }
4272  } else {
4273  switch (NVT.SimpleTy) {
4274  default: llvm_unreachable("Unsupported VT!");
4275  case MVT::i8: Opc = X86::IDIV8r; MOpc = X86::IDIV8m; break;
4276  case MVT::i16: Opc = X86::IDIV16r; MOpc = X86::IDIV16m; break;
4277  case MVT::i32: Opc = X86::IDIV32r; MOpc = X86::IDIV32m; break;
4278  case MVT::i64: Opc = X86::IDIV64r; MOpc = X86::IDIV64m; break;
4279  }
4280  }
4281 
4282  unsigned LoReg, HiReg, ClrReg;
4283  unsigned SExtOpcode;
4284  switch (NVT.SimpleTy) {
4285  default: llvm_unreachable("Unsupported VT!");
4286  case MVT::i8:
4287  LoReg = X86::AL; ClrReg = HiReg = X86::AH;
4288  SExtOpcode = X86::CBW;
4289  break;
4290  case MVT::i16:
4291  LoReg = X86::AX; HiReg = X86::DX;
4292  ClrReg = X86::DX;
4293  SExtOpcode = X86::CWD;
4294  break;
4295  case MVT::i32:
4296  LoReg = X86::EAX; ClrReg = HiReg = X86::EDX;
4297  SExtOpcode = X86::CDQ;
4298  break;
4299  case MVT::i64:
4300  LoReg = X86::RAX; ClrReg = HiReg = X86::RDX;
4301  SExtOpcode = X86::CQO;
4302  break;
4303  }
4304 
4305  SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
4306  bool foldedLoad = tryFoldLoad(Node, N1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4);
4307  bool signBitIsZero = CurDAG->SignBitIsZero(N0);
4308 
4309  SDValue InFlag;
4310  if (NVT == MVT::i8 && (!isSigned || signBitIsZero)) {
4311  // Special case for div8, just use a move with zero extension to AX to
4312  // clear the upper 8 bits (AH).
4313  SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Chain;
4314  MachineSDNode *Move;
4315  if (tryFoldLoad(Node, N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) {
4316  SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N0.getOperand(0) };
4317  Move = CurDAG->getMachineNode(X86::MOVZX32rm8, dl, MVT::i32,
4318  MVT::Other, Ops);
4319  Chain = SDValue(Move, 1);
4320  ReplaceUses(N0.getValue(1), Chain);
4321  // Record the mem-refs
4322  CurDAG->setNodeMemRefs(Move, {cast<LoadSDNode>(N0)->getMemOperand()});
4323  } else {
4324  Move = CurDAG->getMachineNode(X86::MOVZX32rr8, dl, MVT::i32, N0);
4325  Chain = CurDAG->getEntryNode();
4326  }
4327  Chain = CurDAG->getCopyToReg(Chain, dl, X86::EAX, SDValue(Move, 0),
4328  SDValue());
4329  InFlag = Chain.getValue(1);
4330  } else {
4331  InFlag =
4332  CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl,
4333  LoReg, N0, SDValue()).getValue(1);
4334  if (isSigned && !signBitIsZero) {
4335  // Sign extend the low part into the high part.
4336  InFlag =
4337  SDValue(CurDAG->getMachineNode(SExtOpcode, dl, MVT::Glue, InFlag),0);
4338  } else {
4339  // Zero out the high part, effectively zero extending the input.
4340  SDValue ClrNode = SDValue(CurDAG->getMachineNode(X86::MOV32r0, dl, NVT), 0);
4341  switch (NVT.SimpleTy) {
4342  case MVT::i16:
4343  ClrNode =
4344  SDValue(CurDAG->getMachineNode(
4345  TargetOpcode::EXTRACT_SUBREG, dl, MVT::i16, ClrNode,
4346  CurDAG->getTargetConstant(X86::sub_16bit, dl,
4347  MVT::i32)),
4348  0);
4349  break;
4350  case MVT::i32:
4351  break;
4352  case MVT::i64:
4353  ClrNode =
4354  SDValue(CurDAG->getMachineNode(
4355  TargetOpcode::SUBREG_TO_REG, dl, MVT::i64,
4356  CurDAG->getTargetConstant(0, dl, MVT::i64), ClrNode,
4357  CurDAG->getTargetConstant(X86::sub_32bit, dl,
4358  MVT::i32)),
4359  0);
4360  break;
4361  default:
4362  llvm_unreachable("Unexpected division source");
4363  }
4364 
4365  InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, ClrReg,
4366  ClrNode, InFlag).getValue(1);
4367  }
4368  }
4369 
4370  if (foldedLoad) {
4371  SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N1.getOperand(0),
4372  InFlag };
4373  MachineSDNode *CNode =
4374  CurDAG->getMachineNode(MOpc, dl, MVT::Other, MVT::Glue, Ops);
4375  InFlag = SDValue(CNode, 1);
4376  // Update the chain.
4377  ReplaceUses(N1.getValue(1), SDValue(CNode, 0));
4378  // Record the mem-refs
4379  CurDAG->setNodeMemRefs(CNode, {cast<LoadSDNode>(N1)->getMemOperand()});
4380  } else {
4381  InFlag =
4382  SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Glue, N1, InFlag), 0);
4383  }
4384 
4385  // Prevent use of AH in a REX instruction by explicitly copying it to
4386  // an ABCD_L register.
4387  //
4388  // The current assumption of the register allocator is that isel
4389  // won't generate explicit references to the GR8_ABCD_H registers. If
4390  // the allocator and/or the backend get enhanced to be more robust in
4391  // that regard, this can be, and should be, removed.
4392  if (HiReg == X86::AH && !SDValue(Node, 1).use_empty()) {
4393  SDValue AHCopy = CurDAG->getRegister(X86::AH, MVT::i8);
4394  unsigned AHExtOpcode =
4395  isSigned ? X86::MOVSX32rr8_NOREX : X86::MOVZX32rr8_NOREX;
4396 
4397  SDNode *RNode = CurDAG->getMachineNode(AHExtOpcode, dl, MVT::i32,
4398  MVT::Glue, AHCopy, InFlag);
4399  SDValue Result(RNode, 0);
4400  InFlag = SDValue(RNode, 1);
4401 
4402  Result =
4403  CurDAG->getTargetExtractSubreg(X86::sub_8bit, dl, MVT::i8, Result);
4404 
4405  ReplaceUses(SDValue(Node, 1), Result);
4406  LLVM_DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG);
4407  dbgs() << '\n');
4408  }
4409  // Copy the division (low) result, if it is needed.
4410  if (!SDValue(Node, 0).use_empty()) {
4411  SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
4412  LoReg, NVT, InFlag);
4413  InFlag = Result.getValue(2);
4414  ReplaceUses(SDValue(Node, 0), Result);
4415  LLVM_DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG);
4416  dbgs() << '\n');
4417  }
4418  // Copy the remainder (high) result, if it is needed.
4419  if (!SDValue(Node, 1).use_empty()) {
4420  SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
4421  HiReg, NVT, InFlag);
4422  InFlag = Result.getValue(2);
4423  ReplaceUses(SDValue(Node, 1), Result);
4424  LLVM_DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG);
4425  dbgs() << '\n');
4426  }
4427  CurDAG->RemoveDeadNode(Node);
4428  return;
4429  }
4430 
4431  case X86ISD::CMP: {
4432  SDValue N0 = Node->getOperand(0);
4433  SDValue N1 = Node->getOperand(1);
4434 
4435  // Optimizations for TEST compares.
4436  if (!isNullConstant(N1))
4437  break;
4438 
4439  // Save the original VT of the compare.
4440  MVT CmpVT = N0.getSimpleValueType();
4441 
4442  // If we are comparing (and (shr X, C, Mask) with 0, emit a BEXTR followed
4443  // by a test instruction. The test should be removed later by
4444  // analyzeCompare if we are using only the zero flag.
4445  // TODO: Should we check the users and use the BEXTR flags directly?
4446  if (N0.getOpcode() == ISD::AND && N0.hasOneUse()) {
4447  if (MachineSDNode *NewNode = matchBEXTRFromAndImm(N0.getNode())) {
4448  unsigned TestOpc = CmpVT == MVT::i64 ? X86::TEST64rr
4449  : X86::TEST32rr;
4450  SDValue BEXTR = SDValue(NewNode, 0);
4451  NewNode = CurDAG->getMachineNode(TestOpc, dl, MVT::i32, BEXTR, BEXTR);
4452  ReplaceUses(SDValue(Node, 0), SDValue(NewNode, 0));
4453  CurDAG->RemoveDeadNode(Node);
4454  return;
4455  }
4456  }
4457 
4458  // We can peek through truncates, but we need to be careful below.
4459  if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse())
4460  N0 = N0.getOperand(0);
4461 
4462  // Look for (X86cmp (and $op, $imm), 0) and see if we can convert it to
4463  // use a smaller encoding.
4464  // Look past the truncate if CMP is the only use of it.
4465  if (N0.getOpcode() == ISD::AND &&
4466  N0.getNode()->hasOneUse() &&
4467  N0.getValueType() != MVT::i8) {
4469  if (!C) break;
4470  uint64_t Mask = C->getZExtValue();
4471 
4472  // Check if we can replace AND+IMM64 with a shift. This is possible for
4473  // masks/ like 0xFF000000 or 0x00FFFFFF and if we care only about the zero
4474  // flag.
4475  if (CmpVT == MVT::i64 && !isInt<32>(Mask) &&
4476  onlyUsesZeroFlag(SDValue(Node, 0))) {
4477  if (isMask_64(~Mask)) {
4478  unsigned TrailingZeros = countTrailingZeros(Mask);
4479  SDValue Imm = CurDAG->getTargetConstant(TrailingZeros, dl, MVT::i64);
4480  SDValue Shift =
4481  SDValue(CurDAG->getMachineNode(X86::SHR64ri, dl, MVT::i64, MVT::i32,
4482  N0.getOperand(0), Imm), 0);
4483  MachineSDNode *Test = CurDAG->getMachineNode(X86::TEST64rr, dl,
4484  MVT::i32, Shift, Shift);
4485  ReplaceNode(Node, Test);
4486  return;
4487  }
4488  if (isMask_64(Mask)) {
4489  unsigned LeadingZeros = countLeadingZeros(Mask);
4490  SDValue Imm = CurDAG->getTargetConstant(LeadingZeros, dl, MVT::i64);
4491  SDValue Shift =
4492  SDValue(CurDAG->getMachineNode(X86::SHL64ri, dl, MVT::i64, MVT::i32,
4493  N0.getOperand(0), Imm), 0);
4494  MachineSDNode *Test = CurDAG->getMachineNode(X86::TEST64rr, dl,
4495  MVT::i32, Shift, Shift);
4496  ReplaceNode(Node, Test);
4497  return;
4498  }
4499  }
4500 
4501  MVT VT;
4502  int SubRegOp;
4503  unsigned ROpc, MOpc;
4504 
4505  // For each of these checks we need to be careful if the sign flag is
4506  // being used. It is only safe to use the sign flag in two conditions,
4507  // either the sign bit in the shrunken mask is zero or the final test
4508  // size is equal to the original compare size.
4509 
4510  if (isUInt<8>(Mask) &&
4511  (!(Mask & 0x80) || CmpVT == MVT::i8 ||
4512  hasNoSignFlagUses(SDValue(Node, 0)))) {
4513  // For example, convert "testl %eax, $8" to "testb %al, $8"
4514  VT = MVT::i8;
4515  SubRegOp = X86::sub_8bit;
4516  ROpc = X86::TEST8ri;
4517  MOpc = X86::TEST8mi;
4518  } else if (OptForMinSize && isUInt<16>(Mask) &&
4519  (!(Mask & 0x8000) || CmpVT == MVT::i16 ||
4520  hasNoSignFlagUses(SDValue(Node, 0)))) {
4521  // For example, "testl %eax, $32776" to "testw %ax, $32776".
4522  // NOTE: We only want to form TESTW instructions if optimizing for
4523  // min size. Otherwise we only save one byte and possibly get a length
4524  // changing prefix penalty in the decoders.
4525  VT = MVT::i16;
4526  SubRegOp = X86::sub_16bit;
4527  ROpc = X86::TEST16ri;
4528  MOpc = X86::TEST16mi;
4529  } else if (isUInt<32>(Mask) && N0.getValueType() != MVT::i16 &&
4530  ((!(Mask & 0x80000000) &&
4531  // Without minsize 16-bit Cmps can get here so we need to
4532  // be sure we calculate the correct sign flag if needed.
4533  (CmpVT != MVT::i16 || !(Mask & 0x8000))) ||
4534  CmpVT == MVT::i32 ||
4535  hasNoSignFlagUses(SDValue(Node, 0)))) {
4536  // For example, "testq %rax, $268468232" to "testl %eax, $268468232".
4537  // NOTE: We only want to run that transform if N0 is 32 or 64 bits.
4538  // Otherwize, we find ourselves in a position where we have to do
4539  // promotion. If previous passes did not promote the and, we assume
4540  // they had a good reason not to and do not promote here.
4541  VT = MVT::i32;
4542  SubRegOp = X86::sub_32bit;
4543  ROpc = X86::TEST32ri;
4544  MOpc = X86::TEST32mi;
4545  } else {
4546  // No eligible transformation was found.
4547  break;
4548  }
4549 
4550  SDValue Imm = CurDAG->getTargetConstant(Mask, dl, VT);
4551  SDValue Reg = N0.getOperand(0);
4552 
4553  // Emit a testl or testw.
4554  MachineSDNode *NewNode;
4555  SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
4556  if (tryFoldLoad(Node, N0.getNode(), Reg, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) {
4557  SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Imm,
4558  Reg.getOperand(0) };
4559  NewNode = CurDAG->getMachineNode(MOpc, dl, MVT::i32, MVT::Other, Ops);
4560  // Update the chain.
4561  ReplaceUses(Reg.getValue(1), SDValue(NewNode, 1));
4562  // Record the mem-refs
4563  CurDAG->setNodeMemRefs(NewNode,
4564  {cast<LoadSDNode>(Reg)->getMemOperand()});
4565  } else {
4566  // Extract the subregister if necessary.
4567  if (N0.getValueType() != VT)
4568  Reg = CurDAG->getTargetExtractSubreg(SubRegOp, dl, VT, Reg);
4569 
4570  NewNode = CurDAG->getMachineNode(ROpc, dl, MVT::i32, Reg, Imm);
4571  }
4572  // Replace CMP with TEST.
4573  ReplaceNode(Node, NewNode);
4574  return;
4575  }
4576  break;
4577  }
4578  case X86ISD::PCMPISTR: {
4579  if (!Subtarget->hasSSE42())
4580  break;
4581 
4582  bool NeedIndex = !SDValue(Node, 0).use_empty();
4583  bool NeedMask = !SDValue(Node, 1).use_empty();
4584  // We can't fold a load if we are going to make two instructions.
4585  bool MayFoldLoad = !NeedIndex || !NeedMask;
4586 
4587  MachineSDNode *CNode;
4588  if (NeedMask) {
4589  unsigned ROpc = Subtarget->hasAVX() ? X86::VPCMPISTRMrr : X86::PCMPISTRMrr;
4590  unsigned MOpc = Subtarget->hasAVX() ? X86::VPCMPISTRMrm : X86::PCMPISTRMrm;
4591  CNode = emitPCMPISTR(ROpc, MOpc, MayFoldLoad, dl, MVT::v16i8, Node);
4592  ReplaceUses(SDValue(Node, 1), SDValue(CNode, 0));
4593  }
4594  if (NeedIndex || !NeedMask) {
4595  unsigned ROpc = Subtarget->hasAVX() ? X86::VPCMPISTRIrr : X86::PCMPISTRIrr;
4596  unsigned MOpc = Subtarget->hasAVX() ? X86::VPCMPISTRIrm : X86::PCMPISTRIrm;
4597  CNode = emitPCMPISTR(ROpc, MOpc, MayFoldLoad, dl, MVT::i32, Node);
4598  ReplaceUses(SDValue(Node, 0), SDValue(CNode, 0));
4599  }
4600 
4601  // Connect the flag usage to the last instruction created.
4602  ReplaceUses(SDValue(Node, 2), SDValue(CNode, 1));
4603  CurDAG->RemoveDeadNode(Node);
4604  return;
4605  }
4606  case X86ISD::PCMPESTR: {
4607  if (!Subtarget->hasSSE42())
4608  break;
4609 
4610  // Copy the two implicit register inputs.
4611  SDValue InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, X86::EAX,
4612  Node->getOperand(1),
4613  SDValue()).getValue(1);
4614  InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, X86::EDX,
4615  Node->getOperand(3), InFlag).getValue(1);
4616 
4617  bool NeedIndex = !SDValue(Node, 0).use_empty();
4618  bool NeedMask = !SDValue(Node, 1).use_empty();
4619  // We can't fold a load if we are going to make two instructions.
4620  bool MayFoldLoad = !NeedIndex || !NeedMask;
4621 
4622  MachineSDNode *CNode;
4623  if (NeedMask) {
4624  unsigned ROpc = Subtarget->hasAVX() ? X86::VPCMPESTRMrr : X86::PCMPESTRMrr;
4625  unsigned MOpc = Subtarget->hasAVX() ? X86::VPCMPESTRMrm : X86::PCMPESTRMrm;
4626  CNode = emitPCMPESTR(ROpc, MOpc, MayFoldLoad, dl, MVT::v16i8, Node,
4627  InFlag);
4628  ReplaceUses(SDValue(Node, 1), SDValue(CNode, 0));
4629  }
4630  if (NeedIndex || !NeedMask) {
4631  unsigned ROpc = Subtarget->hasAVX() ? X86::VPCMPESTRIrr : X86::PCMPESTRIrr;
4632  unsigned MOpc = Subtarget->hasAVX() ? X86::VPCMPESTRIrm : X86::PCMPESTRIrm;
4633  CNode = emitPCMPESTR(ROpc, MOpc, MayFoldLoad, dl, MVT::i32, Node, InFlag);
4634  ReplaceUses(SDValue(Node, 0), SDValue(CNode, 0));
4635  }
4636  // Connect the flag usage to the last instruction created.
4637  ReplaceUses(SDValue(Node, 2), SDValue(CNode, 1));
4638  CurDAG->RemoveDeadNode(Node);
4639  return;
4640  }
4641 
4642  case ISD::SETCC: {
4643  if (NVT.isVector() && tryVPTESTM(Node, SDValue(Node, 0), SDValue()))
4644  return;
4645 
4646  break;
4647  }
4648 
4649  case ISD::STORE:
4650  if (foldLoadStoreIntoMemOperand(Node))
4651  return;
4652  break;
4653  }
4654 
4655  SelectCode(Node);
4656 }
4657 
4658 bool X86DAGToDAGISel::
4659 SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
4660  std::vector<SDValue> &OutOps) {
4661  SDValue Op0, Op1, Op2, Op3, Op4;
4662  switch (ConstraintID) {
4663  default:
4664  llvm_unreachable("Unexpected asm memory constraint");
4666  // FIXME: It seems strange that 'i' is needed here since it's supposed to
4667  // be an immediate and not a memory constraint.
4669  case InlineAsm::Constraint_o: // offsetable ??
4670  case InlineAsm::Constraint_v: // not offsetable ??
4671  case InlineAsm::Constraint_m: // memory
4673  if (!selectAddr(nullptr, Op, Op0, Op1, Op2, Op3, Op4))
4674  return true;
4675  break;
4676  }
4677 
4678  OutOps.push_back(Op0);
4679  OutOps.push_back(Op1);
4680  OutOps.push_back(Op2);
4681  OutOps.push_back(Op3);
4682  OutOps.push_back(Op4);
4683  return false;
4684 }
4685 
4686 /// This pass converts a legalized DAG into a X86-specific DAG,
4687 /// ready for instruction scheduling.
4689  CodeGenOpt::Level OptLevel) {
4690  return new X86DAGToDAGISel(TM, OptLevel);
4691 }
static bool foldMaskedShiftToScaledMask(SelectionDAG &DAG, SDValue N, X86ISelAddressMode &AM)
uint64_t CallInst * C
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:570
constexpr bool isUInt< 32 >(uint64_t x)
Definition: MathExtras.h:348
X = FP_ROUND(Y, TRUNC) - Rounding &#39;Y&#39; from a larger floating point type down to the precision of the ...
Definition: ISDOpcodes.h:537
static bool isCalleeLoad(SDValue Callee, SDValue &Chain, bool HasCallSeq)
Return true if call address is a load and it can be moved below CALLSEQ_START and the chains leading ...
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
static bool isFusableLoadOpStorePattern(StoreSDNode *StoreNode, SDValue StoredVal, SelectionDAG *CurDAG, unsigned LoadOpNo, LoadSDNode *&LoadNode, SDValue &InputChain)
Check whether or not the chain ending in StoreNode is suitable for doing the {load; op; store} to mod...
EVT getValueType() const
Return the ValueType of the referenced return value.
Vector comparison generating mask bits for fp and integer signed and unsigned data types...
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
const SDValue & getOffset() const
bool isUndef() const
APInt getSignedMax() const
Return the largest signed value contained in the ConstantRange.
GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2)
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
This class represents lattice values for constants.
Definition: AllocatorList.h:23
static void insertDAGNode(SelectionDAG &DAG, SDValue Pos, SDValue N)
static bool isLegalMaskCompare(SDNode *N, const X86Subtarget *Subtarget)
static MVT getVectorVT(MVT VT, unsigned NumElements)
Tail call return.
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
Definition: MCSymbol.h:41
bool hasOptSize() const
Optimize this function for size (-Os) or minimum size (-Oz).
Definition: Function.h:600
bool isVector() const
Return true if this is a vector value type.
const SDValue & getBasePtr() const
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
APInt zext(unsigned width) const
Zero extend to a new width.
Definition: APInt.cpp:857
X86 conditional moves.
bool is256BitVector() const
Return true if this is a 256-bit vector type.
bool slt(const APInt &RHS) const
Signed less than comparison.
Definition: APInt.h:1203
unsigned Reg
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Get a value with low bits set.
Definition: APInt.h:647
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:252
bool hasFastBEXTR() const
Definition: X86Subtarget.h:645
unsigned getVectorNumElements() const
const SDValue & getChain() const
constexpr bool isInt< 8 >(int64_t x)
Definition: MathExtras.h:302
ISD::MemIndexedMode getAddressingMode() const
Return the addressing mode for this load or store: unindexed, pre-inc, pre-dec, post-inc, or post-dec.
TargetGlobalAddress - Like GlobalAddress, but the DAG does no folding or anything else with this node...
Definition: ISDOpcodes.h:130
unsigned getAlignment() const
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.h:320
bool isBitwiseNot(SDValue V)
Returns true if V is a bitwise not operation.
APInt trunc(unsigned width) const
Truncate to new width.
Definition: APInt.cpp:810
STATISTIC(NumFunctions, "Total number of functions")
MVT getSimpleValueType(unsigned ResNo) const
Return the type of a specified result as a simple type.
F(f)
void setNodeId(int Id)
Set unique node id.
SDNode * getNode() const
get the SDNode which holds the desired result
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
C - The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1 at the ...
Definition: ISDOpcodes.h:352
std::size_t countLeadingZeros(T Val, ZeroBehavior ZB=ZB_Width)
Count number of 0&#39;s from the most significant bit to the least stopping at the first 1...
Definition: MathExtras.h:188
unsigned getValueSizeInBits() const
Returns the size of the value in bits.
static bool MayFoldLoad(SDValue Op)
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1508
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition: ISDOpcodes.h:158
GlobalBaseReg - On Darwin, this node represents the result of the mflr at function entry...
static bool hasPredecessorHelper(const SDNode *N, SmallPtrSetImpl< const SDNode *> &Visited, SmallVectorImpl< const SDNode *> &Worklist, unsigned int MaxSteps=0, bool TopologicalPrune=false)
Returns true if N is a predecessor of any node in Worklist.
unsigned getAddrSpace() const
Return the LLVM IR address space number that this pointer points into.
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition: ISDOpcodes.h:209
X86 compare and logical compare instructions.
void dump() const
Support for debugging, callable in GDB: V->dump()
Definition: AsmWriter.cpp:4361
The address of a basic block.
Definition: Constants.h:839
static cl::opt< bool > AndImmShrink("x86-and-imm-shrink", cl::init(true), cl::desc("Enable setting constant bits to reduce size of mask immediates"), cl::Hidden)
bool hasOneUse() const
Return true if there is exactly one use of this node.
A description of a memory reference used in the backend.
Dynamic (non-constant condition) vector blend where only the sign bits of the condition elements are ...
Shift and rotation operations.
Definition: ISDOpcodes.h:409
bool isNormalStore(const SDNode *N)
Returns true if the specified node is a non-truncating and unindexed store.
std::size_t countTrailingOnes(T Value, ZeroBehavior ZB=ZB_Width)
Count the number of ones from the least significant bit to the first zero bit.
Definition: MathExtras.h:477
bool isBuildVectorAllZeros(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are 0 or undef...
CallLoweringInfo & setChain(SDValue InChain)
CopyToReg - This node has three operands: a chain, a register number to set to this value...
Definition: ISDOpcodes.h:169
SymbolFlags
Symbol flags.
Definition: Symbol.h:24
unsigned SubReg
op_iterator op_end() const
uint64_t getConstantOperandVal(unsigned i) const
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
SimpleValueType SimpleTy
unsigned getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition: ValueTypes.h:303
CALLSEQ_START/CALLSEQ_END - These operators mark the beginning and end of a call sequence, and carry arbitrary information that target might want to know.
Definition: ISDOpcodes.h:717
Position
Position to insert a new instruction relative to an existing instruction.
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
This represents a list of ValueType&#39;s that has been intern&#39;d by a SelectionDAG.
bool hasVLX() const
Definition: X86Subtarget.h:664
unsigned getSizeInBits() const
bool runOnMachineFunction(MachineFunction &MF) override
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
bool hasExternalLinkage() const
Definition: GlobalValue.h:421
unsigned getActiveBits() const
Compute the number of active bits in the value.
Definition: APInt.h:1532
int64_t getSExtValue() const
static void InvalidateNodeId(SDNode *N)
unsigned getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:291
X86 FP SETCC, similar to above, but with output as an i1 mask and and a version with SAE...
constexpr bool isMask_64(uint64_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
Definition: MathExtras.h:410
#define T
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition: ISDOpcodes.h:428
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:200
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out...
Definition: ISDOpcodes.h:968
op_iterator op_begin() const
ArrayRef< SDUse > ops() const
MVT getVectorElementType() const
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
bool use_empty() const
Return true if there are no nodes using value ResNo of Node.
This class is used to represent ISD::STORE nodes.
static unsigned getVPTESTMOpc(MVT TestVT, bool IsTestN, bool FoldedLoad, bool FoldedBCast, bool Masked)
bool isOneConstant(SDValue V)
Returns true if V is a constant integer one.
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
bool is128BitVector() const
Return true if this is a 128-bit vector type.
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Get a value with high bits set.
Definition: APInt.h:635
#define P(N)
const SDValue & getBasePtr() const
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:427
bool isNormalLoad(const SDNode *N)
Returns true if the specified node is a non-extending and unindexed load.
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition: ISDOpcodes.h:165
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
constexpr bool isUInt< 8 >(uint64_t x)
Definition: MathExtras.h:342
std::size_t countTrailingZeros(T Val, ZeroBehavior ZB=ZB_Width)
Count number of 0&#39;s from the least significant bit to the most stopping at the first 1...
Definition: MathExtras.h:119
use_iterator use_begin() const
Provide iteration support to walk over all uses of an SDNode.
LOCAL_RECOVER - Represents the llvm.localrecover intrinsic.
Definition: ISDOpcodes.h:80
Machine Value Type.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
bool hasAnyUseOfValue(unsigned Value) const
Return true if there are any use of the indicated value.
bool isMachineOpcode() const
static bool mayUseCarryFlag(X86::CondCode CC)
unsigned getScalarSizeInBits() const
bool ult(const APInt &RHS) const
Unsigned less than comparison.
Definition: APInt.h:1184
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This is an important base class in LLVM.
Definition: Constant.h:41
const SDValue & getOperand(unsigned Num) const
X86 conditional branches.
bool isOperandOf(const SDNode *N) const
Return true if this node is an operand of N.
const SDValue & getOffset() const
APInt getUnsignedMax() const
Return the largest unsigned value contained in the ConstantRange.
static Type * getVoidTy(LLVMContext &C)
Definition: Type.cpp:160
This class provides iterator support for SDUse operands that use a specific SDNode.
unsigned getMachineOpcode() const
void RepositionNode(allnodes_iterator Position, SDNode *N)
Move node N in the AllNodes list to be immediately before the given iterator Position.
void ReplaceAllUsesWith(SDValue From, SDValue To)
Modify anything using &#39;From&#39; to use &#39;To&#39; instead.
const APInt & getAPIntValue() const
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:284
On Darwin, this node represents the result of the popl at function entry, used for PIC code...
self_iterator getIterator()
Definition: ilist_node.h:81
bool hasNUsesOfValue(unsigned NUses, unsigned Value) const
Return true if there are exactly NUSES uses of the indicated value.
static bool foldMaskAndShiftToExtract(SelectionDAG &DAG, SDValue N, uint64_t Mask, SDValue Shift, SDValue X, X86ISelAddressMode &AM)
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:555
std::vector< ArgListEntry > ArgListTy
Extended Value Type.
Definition: ValueTypes.h:33
This structure contains all information that is necessary for lowering calls.
bool isVolatile() const
APInt getSignedMin() const
Return the smallest signed value contained in the ConstantRange.
This class contains a discriminated union of information about pointers in memory operands...
bool isMachineOpcode() const
Test if this node has a post-isel opcode, directly corresponding to a MachineInstr opcode...
unsigned getNumOperands() const
Return the number of values used by this operation.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
static SDValue Widen(SelectionDAG *CurDAG, SDValue N)
BaseType
A given derived pointer can have multiple base pointers through phi/selects.
bool use_empty() const
Return true if there are no uses of this node.
These operations represent an abstract X86 call instruction, which includes a bunch of information...
TokenFactor - This node takes multiple tokens as input and produces a single token result...
Definition: ISDOpcodes.h:49
void dump() const
Dump this node, for debugging.
static X86::CondCode getCondFromNode(SDNode *N)
Iterator for intrusive lists based on ilist_node.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements...
Definition: SmallPtrSet.h:417
unsigned countPopulation(T Value)
Count the number of set bits in a value.
Definition: MathExtras.h:519
This is the shared class of boolean and integer constants.
Definition: Constants.h:83
static bool foldMaskedShiftToBEXTR(SelectionDAG &DAG, SDValue N, uint64_t Mask, SDValue Shift, SDValue X, X86ISelAddressMode &AM, const X86Subtarget &Subtarget)
constexpr bool isInt< 32 >(int64_t x)
Definition: MathExtras.h:308
bool isScalarFPTypeInSSEReg(EVT VT) const
Return true if the specified scalar FP type is computed in an SSE register, not on the X87 floating p...
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition: ValueTypes.h:264
SDNode * UpdateNodeOperands(SDNode *N, SDValue Op)
Mutate the specified node in-place to have the specified operands.
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:221
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:841
const DataFlowGraph & G
Definition: RDFGraph.cpp:202
An SDNode that represents everything that will be needed to construct a MachineInstr.
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
SelectionDAGISel - This is the common base class used for SelectionDAG-based pattern-matching instruc...
const Function & getFunction() const
Return the LLVM function that this machine code represents.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:940
unsigned countTrailingOnes() const
Count the number of trailing one bits.
Definition: APInt.h:1645
static bool foldMaskAndShiftToScale(SelectionDAG &DAG, SDValue N, uint64_t Mask, SDValue Shift, SDValue X, X86ISelAddressMode &AM)
EVT getMemoryVT() const
Return the type of the in-memory value.
Target - Wrapper for Target specific information.
Class for arbitrary precision integers.
Definition: APInt.h:69
static unsigned getReg(const void *D, unsigned RC, unsigned RegNo)
bool sge(const APInt &RHS) const
Signed greater or equal comparison.
Definition: APInt.h:1308
static use_iterator use_end()
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:467
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:470
void append(in_iter in_start, in_iter in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:387
static bool hasSingleUsesFromRoot(SDNode *Root, SDNode *User)
bool is256BitVector() const
Return true if this is a 256-bit vector type.
Definition: ValueTypes.h:186
int getNodeId() const
Return the unique node id.
static bool isDispSafeForFrameIndex(int64_t Val)
std::pair< SDValue, SDValue > LowerCallTo(CallLoweringInfo &CLI) const
This function lowers an abstract call to a function into an actual call.
constexpr bool isShiftedMask_64(uint64_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (64 bit ver...
Definition: MathExtras.h:422
bool isVector() const
Return true if this is a vector value type.
Definition: ValueTypes.h:150
const SDValue & getValue() const
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:386
bool isOnlyUserOf(const SDNode *N) const
Return true if this node is the only use of N.
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition: ISDOpcodes.h:205
bool is128BitVector() const
Return true if this is a 128-bit vector type.
Definition: ValueTypes.h:181
A wrapper node for TargetConstantPool, TargetJumpTable, TargetExternalSymbol, TargetGlobalAddress, TargetGlobalTLSAddress, MCSymbol and TargetBlockAddress.
FunctionPass * createX86ISelDag(X86TargetMachine &TM, CodeGenOpt::Level OptLevel)
This pass converts a legalized DAG into a X86-specific DAG, ready for instruction scheduling...
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:615
StringRef getName() const
Return a constant reference to the value&#39;s name.
Definition: Value.cpp:214
#define I(x, y, z)
Definition: MD5.cpp:58
#define N
XOP - Opcode prefix used by XOP instructions.
Definition: X86BaseInfo.h:590
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition: Function.h:597
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:332
bool is512BitVector() const
Return true if this is a 512-bit vector type.
uint32_t Size
Definition: Profile.cpp:46
bool isNON_EXTLoad(const SDNode *N)
Returns true if the specified node is a non-extending load.
unsigned getOpcode() const
SDValue getValue(unsigned R) const
constexpr bool isUInt< 16 >(uint64_t x)
Definition: MathExtras.h:345
CopyFromReg - This node indicates that the input value is a virtual or physical register that is defi...
Definition: ISDOpcodes.h:174
SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.