LLVM  9.0.0svn
X86ISelDAGToDAG.cpp
Go to the documentation of this file.
1 //===- X86ISelDAGToDAG.cpp - A DAG pattern matching inst selector for X86 -===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines a DAG pattern matching instruction selector for X86,
10 // converting from a legalized dag to a X86 dag.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "X86.h"
15 #include "X86MachineFunctionInfo.h"
16 #include "X86RegisterInfo.h"
17 #include "X86Subtarget.h"
18 #include "X86TargetMachine.h"
19 #include "llvm/ADT/Statistic.h"
23 #include "llvm/Config/llvm-config.h"
24 #include "llvm/IR/ConstantRange.h"
25 #include "llvm/IR/Function.h"
26 #include "llvm/IR/Instructions.h"
27 #include "llvm/IR/Intrinsics.h"
28 #include "llvm/IR/Type.h"
29 #include "llvm/Support/Debug.h"
31 #include "llvm/Support/KnownBits.h"
36 #include <stdint.h>
37 using namespace llvm;
38 
39 #define DEBUG_TYPE "x86-isel"
40 
41 STATISTIC(NumLoadMoved, "Number of loads moved below TokenFactor");
42 
43 static cl::opt<bool> AndImmShrink("x86-and-imm-shrink", cl::init(true),
44  cl::desc("Enable setting constant bits to reduce size of mask immediates"),
45  cl::Hidden);
46 
47 //===----------------------------------------------------------------------===//
48 // Pattern Matcher Implementation
49 //===----------------------------------------------------------------------===//
50 
51 namespace {
52  /// This corresponds to X86AddressMode, but uses SDValue's instead of register
53  /// numbers for the leaves of the matched tree.
54  struct X86ISelAddressMode {
55  enum {
56  RegBase,
57  FrameIndexBase
58  } BaseType;
59 
60  // This is really a union, discriminated by BaseType!
61  SDValue Base_Reg;
62  int Base_FrameIndex;
63 
64  unsigned Scale;
65  SDValue IndexReg;
66  int32_t Disp;
67  SDValue Segment;
68  const GlobalValue *GV;
69  const Constant *CP;
70  const BlockAddress *BlockAddr;
71  const char *ES;
72  MCSymbol *MCSym;
73  int JT;
74  unsigned Align; // CP alignment.
75  unsigned char SymbolFlags; // X86II::MO_*
76  bool NegateIndex = false;
77 
78  X86ISelAddressMode()
79  : BaseType(RegBase), Base_FrameIndex(0), Scale(1), IndexReg(), Disp(0),
80  Segment(), GV(nullptr), CP(nullptr), BlockAddr(nullptr), ES(nullptr),
81  MCSym(nullptr), JT(-1), Align(0), SymbolFlags(X86II::MO_NO_FLAG) {}
82 
83  bool hasSymbolicDisplacement() const {
84  return GV != nullptr || CP != nullptr || ES != nullptr ||
85  MCSym != nullptr || JT != -1 || BlockAddr != nullptr;
86  }
87 
88  bool hasBaseOrIndexReg() const {
89  return BaseType == FrameIndexBase ||
90  IndexReg.getNode() != nullptr || Base_Reg.getNode() != nullptr;
91  }
92 
93  /// Return true if this addressing mode is already RIP-relative.
94  bool isRIPRelative() const {
95  if (BaseType != RegBase) return false;
96  if (RegisterSDNode *RegNode =
97  dyn_cast_or_null<RegisterSDNode>(Base_Reg.getNode()))
98  return RegNode->getReg() == X86::RIP;
99  return false;
100  }
101 
102  void setBaseReg(SDValue Reg) {
103  BaseType = RegBase;
104  Base_Reg = Reg;
105  }
106 
107 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
108  void dump(SelectionDAG *DAG = nullptr) {
109  dbgs() << "X86ISelAddressMode " << this << '\n';
110  dbgs() << "Base_Reg ";
111  if (Base_Reg.getNode())
112  Base_Reg.getNode()->dump(DAG);
113  else
114  dbgs() << "nul\n";
115  if (BaseType == FrameIndexBase)
116  dbgs() << " Base.FrameIndex " << Base_FrameIndex << '\n';
117  dbgs() << " Scale " << Scale << '\n'
118  << "IndexReg ";
119  if (NegateIndex)
120  dbgs() << "negate ";
121  if (IndexReg.getNode())
122  IndexReg.getNode()->dump(DAG);
123  else
124  dbgs() << "nul\n";
125  dbgs() << " Disp " << Disp << '\n'
126  << "GV ";
127  if (GV)
128  GV->dump();
129  else
130  dbgs() << "nul";
131  dbgs() << " CP ";
132  if (CP)
133  CP->dump();
134  else
135  dbgs() << "nul";
136  dbgs() << '\n'
137  << "ES ";
138  if (ES)
139  dbgs() << ES;
140  else
141  dbgs() << "nul";
142  dbgs() << " MCSym ";
143  if (MCSym)
144  dbgs() << MCSym;
145  else
146  dbgs() << "nul";
147  dbgs() << " JT" << JT << " Align" << Align << '\n';
148  }
149 #endif
150  };
151 }
152 
153 namespace {
154  //===--------------------------------------------------------------------===//
155  /// ISel - X86-specific code to select X86 machine instructions for
156  /// SelectionDAG operations.
157  ///
158  class X86DAGToDAGISel final : public SelectionDAGISel {
159  /// Keep a pointer to the X86Subtarget around so that we can
160  /// make the right decision when generating code for different targets.
161  const X86Subtarget *Subtarget;
162 
163  /// If true, selector should try to optimize for code size instead of
164  /// performance.
165  bool OptForSize;
166 
167  /// If true, selector should try to optimize for minimum code size.
168  bool OptForMinSize;
169 
170  /// Disable direct TLS access through segment registers.
171  bool IndirectTlsSegRefs;
172 
173  public:
174  explicit X86DAGToDAGISel(X86TargetMachine &tm, CodeGenOpt::Level OptLevel)
175  : SelectionDAGISel(tm, OptLevel), Subtarget(nullptr), OptForSize(false),
176  OptForMinSize(false), IndirectTlsSegRefs(false) {}
177 
178  StringRef getPassName() const override {
179  return "X86 DAG->DAG Instruction Selection";
180  }
181 
182  bool runOnMachineFunction(MachineFunction &MF) override {
183  // Reset the subtarget each time through.
184  Subtarget = &MF.getSubtarget<X86Subtarget>();
185  IndirectTlsSegRefs = MF.getFunction().hasFnAttribute(
186  "indirect-tls-seg-refs");
187 
188  // OptFor[Min]Size are used in pattern predicates that isel is matching.
189  OptForSize = MF.getFunction().hasOptSize();
190  OptForMinSize = MF.getFunction().hasMinSize();
191  assert((!OptForMinSize || OptForSize) &&
192  "OptForMinSize implies OptForSize");
193 
195  return true;
196  }
197 
198  void EmitFunctionEntryCode() override;
199 
200  bool IsProfitableToFold(SDValue N, SDNode *U, SDNode *Root) const override;
201 
202  void PreprocessISelDAG() override;
203  void PostprocessISelDAG() override;
204 
205 // Include the pieces autogenerated from the target description.
206 #include "X86GenDAGISel.inc"
207 
208  private:
209  void Select(SDNode *N) override;
210 
211  bool foldOffsetIntoAddress(uint64_t Offset, X86ISelAddressMode &AM);
212  bool matchLoadInAddress(LoadSDNode *N, X86ISelAddressMode &AM);
213  bool matchWrapper(SDValue N, X86ISelAddressMode &AM);
214  bool matchAddress(SDValue N, X86ISelAddressMode &AM);
215  bool matchVectorAddress(SDValue N, X86ISelAddressMode &AM);
216  bool matchAdd(SDValue &N, X86ISelAddressMode &AM, unsigned Depth);
217  bool matchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
218  unsigned Depth);
219  bool matchAddressBase(SDValue N, X86ISelAddressMode &AM);
220  bool selectAddr(SDNode *Parent, SDValue N, SDValue &Base,
221  SDValue &Scale, SDValue &Index, SDValue &Disp,
222  SDValue &Segment);
223  bool selectVectorAddr(SDNode *Parent, SDValue N, SDValue &Base,
224  SDValue &Scale, SDValue &Index, SDValue &Disp,
225  SDValue &Segment);
226  bool selectMOV64Imm32(SDValue N, SDValue &Imm);
227  bool selectLEAAddr(SDValue N, SDValue &Base,
228  SDValue &Scale, SDValue &Index, SDValue &Disp,
229  SDValue &Segment);
230  bool selectLEA64_32Addr(SDValue N, SDValue &Base,
231  SDValue &Scale, SDValue &Index, SDValue &Disp,
232  SDValue &Segment);
233  bool selectTLSADDRAddr(SDValue N, SDValue &Base,
234  SDValue &Scale, SDValue &Index, SDValue &Disp,
235  SDValue &Segment);
236  bool selectScalarSSELoad(SDNode *Root, SDNode *Parent, SDValue N,
237  SDValue &Base, SDValue &Scale,
238  SDValue &Index, SDValue &Disp,
239  SDValue &Segment,
240  SDValue &NodeWithChain);
241  bool selectRelocImm(SDValue N, SDValue &Op);
242 
243  bool tryFoldLoad(SDNode *Root, SDNode *P, SDValue N,
244  SDValue &Base, SDValue &Scale,
245  SDValue &Index, SDValue &Disp,
246  SDValue &Segment);
247 
248  // Convenience method where P is also root.
249  bool tryFoldLoad(SDNode *P, SDValue N,
250  SDValue &Base, SDValue &Scale,
251  SDValue &Index, SDValue &Disp,
252  SDValue &Segment) {
253  return tryFoldLoad(P, P, N, Base, Scale, Index, Disp, Segment);
254  }
255 
256  /// Implement addressing mode selection for inline asm expressions.
257  bool SelectInlineAsmMemoryOperand(const SDValue &Op,
258  unsigned ConstraintID,
259  std::vector<SDValue> &OutOps) override;
260 
261  void emitSpecialCodeForMain();
262 
263  inline void getAddressOperands(X86ISelAddressMode &AM, const SDLoc &DL,
264  MVT VT, SDValue &Base, SDValue &Scale,
265  SDValue &Index, SDValue &Disp,
266  SDValue &Segment) {
267  if (AM.BaseType == X86ISelAddressMode::FrameIndexBase)
268  Base = CurDAG->getTargetFrameIndex(
269  AM.Base_FrameIndex, TLI->getPointerTy(CurDAG->getDataLayout()));
270  else if (AM.Base_Reg.getNode())
271  Base = AM.Base_Reg;
272  else
273  Base = CurDAG->getRegister(0, VT);
274 
275  Scale = getI8Imm(AM.Scale, DL);
276 
277  // Negate the index if needed.
278  if (AM.NegateIndex) {
279  unsigned NegOpc = VT == MVT::i64 ? X86::NEG64r : X86::NEG32r;
280  SDValue Neg = SDValue(CurDAG->getMachineNode(NegOpc, DL, VT, MVT::i32,
281  AM.IndexReg), 0);
282  AM.IndexReg = Neg;
283  }
284 
285  if (AM.IndexReg.getNode())
286  Index = AM.IndexReg;
287  else
288  Index = CurDAG->getRegister(0, VT);
289 
290  // These are 32-bit even in 64-bit mode since RIP-relative offset
291  // is 32-bit.
292  if (AM.GV)
293  Disp = CurDAG->getTargetGlobalAddress(AM.GV, SDLoc(),
294  MVT::i32, AM.Disp,
295  AM.SymbolFlags);
296  else if (AM.CP)
297  Disp = CurDAG->getTargetConstantPool(AM.CP, MVT::i32,
298  AM.Align, AM.Disp, AM.SymbolFlags);
299  else if (AM.ES) {
300  assert(!AM.Disp && "Non-zero displacement is ignored with ES.");
301  Disp = CurDAG->getTargetExternalSymbol(AM.ES, MVT::i32, AM.SymbolFlags);
302  } else if (AM.MCSym) {
303  assert(!AM.Disp && "Non-zero displacement is ignored with MCSym.");
304  assert(AM.SymbolFlags == 0 && "oo");
305  Disp = CurDAG->getMCSymbol(AM.MCSym, MVT::i32);
306  } else if (AM.JT != -1) {
307  assert(!AM.Disp && "Non-zero displacement is ignored with JT.");
308  Disp = CurDAG->getTargetJumpTable(AM.JT, MVT::i32, AM.SymbolFlags);
309  } else if (AM.BlockAddr)
310  Disp = CurDAG->getTargetBlockAddress(AM.BlockAddr, MVT::i32, AM.Disp,
311  AM.SymbolFlags);
312  else
313  Disp = CurDAG->getTargetConstant(AM.Disp, DL, MVT::i32);
314 
315  if (AM.Segment.getNode())
316  Segment = AM.Segment;
317  else
318  Segment = CurDAG->getRegister(0, MVT::i16);
319  }
320 
321  // Utility function to determine whether we should avoid selecting
322  // immediate forms of instructions for better code size or not.
323  // At a high level, we'd like to avoid such instructions when
324  // we have similar constants used within the same basic block
325  // that can be kept in a register.
326  //
327  bool shouldAvoidImmediateInstFormsForSize(SDNode *N) const {
328  uint32_t UseCount = 0;
329 
330  // Do not want to hoist if we're not optimizing for size.
331  // TODO: We'd like to remove this restriction.
332  // See the comment in X86InstrInfo.td for more info.
333  if (!OptForSize)
334  return false;
335 
336  // Walk all the users of the immediate.
337  for (SDNode::use_iterator UI = N->use_begin(),
338  UE = N->use_end(); (UI != UE) && (UseCount < 2); ++UI) {
339 
340  SDNode *User = *UI;
341 
342  // This user is already selected. Count it as a legitimate use and
343  // move on.
344  if (User->isMachineOpcode()) {
345  UseCount++;
346  continue;
347  }
348 
349  // We want to count stores of immediates as real uses.
350  if (User->getOpcode() == ISD::STORE &&
351  User->getOperand(1).getNode() == N) {
352  UseCount++;
353  continue;
354  }
355 
356  // We don't currently match users that have > 2 operands (except
357  // for stores, which are handled above)
358  // Those instruction won't match in ISEL, for now, and would
359  // be counted incorrectly.
360  // This may change in the future as we add additional instruction
361  // types.
362  if (User->getNumOperands() != 2)
363  continue;
364 
365  // Immediates that are used for offsets as part of stack
366  // manipulation should be left alone. These are typically
367  // used to indicate SP offsets for argument passing and
368  // will get pulled into stores/pushes (implicitly).
369  if (User->getOpcode() == X86ISD::ADD ||
370  User->getOpcode() == ISD::ADD ||
371  User->getOpcode() == X86ISD::SUB ||
372  User->getOpcode() == ISD::SUB) {
373 
374  // Find the other operand of the add/sub.
375  SDValue OtherOp = User->getOperand(0);
376  if (OtherOp.getNode() == N)
377  OtherOp = User->getOperand(1);
378 
379  // Don't count if the other operand is SP.
380  RegisterSDNode *RegNode;
381  if (OtherOp->getOpcode() == ISD::CopyFromReg &&
382  (RegNode = dyn_cast_or_null<RegisterSDNode>(
383  OtherOp->getOperand(1).getNode())))
384  if ((RegNode->getReg() == X86::ESP) ||
385  (RegNode->getReg() == X86::RSP))
386  continue;
387  }
388 
389  // ... otherwise, count this and move on.
390  UseCount++;
391  }
392 
393  // If we have more than 1 use, then recommend for hoisting.
394  return (UseCount > 1);
395  }
396 
397  /// Return a target constant with the specified value of type i8.
398  inline SDValue getI8Imm(unsigned Imm, const SDLoc &DL) {
399  return CurDAG->getTargetConstant(Imm, DL, MVT::i8);
400  }
401 
402  /// Return a target constant with the specified value, of type i32.
403  inline SDValue getI32Imm(unsigned Imm, const SDLoc &DL) {
404  return CurDAG->getTargetConstant(Imm, DL, MVT::i32);
405  }
406 
407  /// Return a target constant with the specified value, of type i64.
408  inline SDValue getI64Imm(uint64_t Imm, const SDLoc &DL) {
409  return CurDAG->getTargetConstant(Imm, DL, MVT::i64);
410  }
411 
412  SDValue getExtractVEXTRACTImmediate(SDNode *N, unsigned VecWidth,
413  const SDLoc &DL) {
414  assert((VecWidth == 128 || VecWidth == 256) && "Unexpected vector width");
415  uint64_t Index = N->getConstantOperandVal(1);
416  MVT VecVT = N->getOperand(0).getSimpleValueType();
417  return getI8Imm((Index * VecVT.getScalarSizeInBits()) / VecWidth, DL);
418  }
419 
420  SDValue getInsertVINSERTImmediate(SDNode *N, unsigned VecWidth,
421  const SDLoc &DL) {
422  assert((VecWidth == 128 || VecWidth == 256) && "Unexpected vector width");
423  uint64_t Index = N->getConstantOperandVal(2);
424  MVT VecVT = N->getSimpleValueType(0);
425  return getI8Imm((Index * VecVT.getScalarSizeInBits()) / VecWidth, DL);
426  }
427 
428  // Helper to detect unneeded and instructions on shift amounts. Called
429  // from PatFrags in tablegen.
430  bool isUnneededShiftMask(SDNode *N, unsigned Width) const {
431  assert(N->getOpcode() == ISD::AND && "Unexpected opcode");
432  const APInt &Val = cast<ConstantSDNode>(N->getOperand(1))->getAPIntValue();
433 
434  if (Val.countTrailingOnes() >= Width)
435  return true;
436 
437  APInt Mask = Val | CurDAG->computeKnownBits(N->getOperand(0)).Zero;
438  return Mask.countTrailingOnes() >= Width;
439  }
440 
441  /// Return an SDNode that returns the value of the global base register.
442  /// Output instructions required to initialize the global base register,
443  /// if necessary.
444  SDNode *getGlobalBaseReg();
445 
446  /// Return a reference to the TargetMachine, casted to the target-specific
447  /// type.
448  const X86TargetMachine &getTargetMachine() const {
449  return static_cast<const X86TargetMachine &>(TM);
450  }
451 
452  /// Return a reference to the TargetInstrInfo, casted to the target-specific
453  /// type.
454  const X86InstrInfo *getInstrInfo() const {
455  return Subtarget->getInstrInfo();
456  }
457 
458  /// Address-mode matching performs shift-of-and to and-of-shift
459  /// reassociation in order to expose more scaled addressing
460  /// opportunities.
461  bool ComplexPatternFuncMutatesDAG() const override {
462  return true;
463  }
464 
465  bool isSExtAbsoluteSymbolRef(unsigned Width, SDNode *N) const;
466 
467  /// Returns whether this is a relocatable immediate in the range
468  /// [-2^Width .. 2^Width-1].
469  template <unsigned Width> bool isSExtRelocImm(SDNode *N) const {
470  if (auto *CN = dyn_cast<ConstantSDNode>(N))
471  return isInt<Width>(CN->getSExtValue());
472  return isSExtAbsoluteSymbolRef(Width, N);
473  }
474 
475  // Indicates we should prefer to use a non-temporal load for this load.
476  bool useNonTemporalLoad(LoadSDNode *N) const {
477  if (!N->isNonTemporal())
478  return false;
479 
480  unsigned StoreSize = N->getMemoryVT().getStoreSize();
481 
482  if (N->getAlignment() < StoreSize)
483  return false;
484 
485  switch (StoreSize) {
486  default: llvm_unreachable("Unsupported store size");
487  case 4:
488  case 8:
489  return false;
490  case 16:
491  return Subtarget->hasSSE41();
492  case 32:
493  return Subtarget->hasAVX2();
494  case 64:
495  return Subtarget->hasAVX512();
496  }
497  }
498 
499  bool foldLoadStoreIntoMemOperand(SDNode *Node);
500  MachineSDNode *matchBEXTRFromAndImm(SDNode *Node);
501  bool matchBitExtract(SDNode *Node);
502  bool shrinkAndImmediate(SDNode *N);
503  bool isMaskZeroExtended(SDNode *N) const;
504  bool tryShiftAmountMod(SDNode *N);
505  bool tryShrinkShlLogicImm(SDNode *N);
506  bool tryVPTESTM(SDNode *Root, SDValue Setcc, SDValue Mask);
507 
508  MachineSDNode *emitPCMPISTR(unsigned ROpc, unsigned MOpc, bool MayFoldLoad,
509  const SDLoc &dl, MVT VT, SDNode *Node);
510  MachineSDNode *emitPCMPESTR(unsigned ROpc, unsigned MOpc, bool MayFoldLoad,
511  const SDLoc &dl, MVT VT, SDNode *Node,
512  SDValue &InFlag);
513 
514  bool tryOptimizeRem8Extend(SDNode *N);
515 
516  bool onlyUsesZeroFlag(SDValue Flags) const;
517  bool hasNoSignFlagUses(SDValue Flags) const;
518  bool hasNoCarryFlagUses(SDValue Flags) const;
519  };
520 }
521 
522 
523 // Returns true if this masked compare can be implemented legally with this
524 // type.
525 static bool isLegalMaskCompare(SDNode *N, const X86Subtarget *Subtarget) {
526  unsigned Opcode = N->getOpcode();
527  if (Opcode == X86ISD::CMPM || Opcode == ISD::SETCC ||
528  Opcode == X86ISD::CMPM_SAE || Opcode == X86ISD::VFPCLASS) {
529  // We can get 256-bit 8 element types here without VLX being enabled. When
530  // this happens we will use 512-bit operations and the mask will not be
531  // zero extended.
532  EVT OpVT = N->getOperand(0).getValueType();
533  if (OpVT.is256BitVector() || OpVT.is128BitVector())
534  return Subtarget->hasVLX();
535 
536  return true;
537  }
538  // Scalar opcodes use 128 bit registers, but aren't subject to the VLX check.
539  if (Opcode == X86ISD::VFPCLASSS || Opcode == X86ISD::FSETCCM ||
540  Opcode == X86ISD::FSETCCM_SAE)
541  return true;
542 
543  return false;
544 }
545 
546 // Returns true if we can assume the writer of the mask has zero extended it
547 // for us.
548 bool X86DAGToDAGISel::isMaskZeroExtended(SDNode *N) const {
549  // If this is an AND, check if we have a compare on either side. As long as
550  // one side guarantees the mask is zero extended, the AND will preserve those
551  // zeros.
552  if (N->getOpcode() == ISD::AND)
553  return isLegalMaskCompare(N->getOperand(0).getNode(), Subtarget) ||
554  isLegalMaskCompare(N->getOperand(1).getNode(), Subtarget);
555 
556  return isLegalMaskCompare(N, Subtarget);
557 }
558 
559 bool
560 X86DAGToDAGISel::IsProfitableToFold(SDValue N, SDNode *U, SDNode *Root) const {
561  if (OptLevel == CodeGenOpt::None) return false;
562 
563  if (!N.hasOneUse())
564  return false;
565 
566  if (N.getOpcode() != ISD::LOAD)
567  return true;
568 
569  // Don't fold non-temporal loads if we have an instruction for them.
570  if (useNonTemporalLoad(cast<LoadSDNode>(N)))
571  return false;
572 
573  // If N is a load, do additional profitability checks.
574  if (U == Root) {
575  switch (U->getOpcode()) {
576  default: break;
577  case X86ISD::ADD:
578  case X86ISD::ADC:
579  case X86ISD::SUB:
580  case X86ISD::SBB:
581  case X86ISD::AND:
582  case X86ISD::XOR:
583  case X86ISD::OR:
584  case ISD::ADD:
585  case ISD::ADDCARRY:
586  case ISD::AND:
587  case ISD::OR:
588  case ISD::XOR: {
589  SDValue Op1 = U->getOperand(1);
590 
591  // If the other operand is a 8-bit immediate we should fold the immediate
592  // instead. This reduces code size.
593  // e.g.
594  // movl 4(%esp), %eax
595  // addl $4, %eax
596  // vs.
597  // movl $4, %eax
598  // addl 4(%esp), %eax
599  // The former is 2 bytes shorter. In case where the increment is 1, then
600  // the saving can be 4 bytes (by using incl %eax).
601  if (ConstantSDNode *Imm = dyn_cast<ConstantSDNode>(Op1)) {
602  if (Imm->getAPIntValue().isSignedIntN(8))
603  return false;
604 
605  // If this is a 64-bit AND with an immediate that fits in 32-bits,
606  // prefer using the smaller and over folding the load. This is needed to
607  // make sure immediates created by shrinkAndImmediate are always folded.
608  // Ideally we would narrow the load during DAG combine and get the
609  // best of both worlds.
610  if (U->getOpcode() == ISD::AND &&
611  Imm->getAPIntValue().getBitWidth() == 64 &&
612  Imm->getAPIntValue().isIntN(32))
613  return false;
614 
615  // If this really a zext_inreg that can be represented with a movzx
616  // instruction, prefer that.
617  // TODO: We could shrink the load and fold if it is non-volatile.
618  if (U->getOpcode() == ISD::AND &&
619  (Imm->getAPIntValue() == UINT8_MAX ||
620  Imm->getAPIntValue() == UINT16_MAX ||
621  Imm->getAPIntValue() == UINT32_MAX))
622  return false;
623 
624  // ADD/SUB with can negate the immediate and use the opposite operation
625  // to fit 128 into a sign extended 8 bit immediate.
626  if ((U->getOpcode() == ISD::ADD || U->getOpcode() == ISD::SUB) &&
627  (-Imm->getAPIntValue()).isSignedIntN(8))
628  return false;
629  }
630 
631  // If the other operand is a TLS address, we should fold it instead.
632  // This produces
633  // movl %gs:0, %eax
634  // leal i@NTPOFF(%eax), %eax
635  // instead of
636  // movl $i@NTPOFF, %eax
637  // addl %gs:0, %eax
638  // if the block also has an access to a second TLS address this will save
639  // a load.
640  // FIXME: This is probably also true for non-TLS addresses.
641  if (Op1.getOpcode() == X86ISD::Wrapper) {
642  SDValue Val = Op1.getOperand(0);
644  return false;
645  }
646 
647  // Don't fold load if this matches the BTS/BTR/BTC patterns.
648  // BTS: (or X, (shl 1, n))
649  // BTR: (and X, (rotl -2, n))
650  // BTC: (xor X, (shl 1, n))
651  if (U->getOpcode() == ISD::OR || U->getOpcode() == ISD::XOR) {
652  if (U->getOperand(0).getOpcode() == ISD::SHL &&
654  return false;
655 
656  if (U->getOperand(1).getOpcode() == ISD::SHL &&
658  return false;
659  }
660  if (U->getOpcode() == ISD::AND) {
661  SDValue U0 = U->getOperand(0);
662  SDValue U1 = U->getOperand(1);
663  if (U0.getOpcode() == ISD::ROTL) {
664  auto *C = dyn_cast<ConstantSDNode>(U0.getOperand(0));
665  if (C && C->getSExtValue() == -2)
666  return false;
667  }
668 
669  if (U1.getOpcode() == ISD::ROTL) {
670  auto *C = dyn_cast<ConstantSDNode>(U1.getOperand(0));
671  if (C && C->getSExtValue() == -2)
672  return false;
673  }
674  }
675 
676  break;
677  }
678  case ISD::SHL:
679  case ISD::SRA:
680  case ISD::SRL:
681  // Don't fold a load into a shift by immediate. The BMI2 instructions
682  // support folding a load, but not an immediate. The legacy instructions
683  // support folding an immediate, but can't fold a load. Folding an
684  // immediate is preferable to folding a load.
685  if (isa<ConstantSDNode>(U->getOperand(1)))
686  return false;
687 
688  break;
689  }
690  }
691 
692  // Prevent folding a load if this can implemented with an insert_subreg or
693  // a move that implicitly zeroes.
694  if (Root->getOpcode() == ISD::INSERT_SUBVECTOR &&
695  isNullConstant(Root->getOperand(2)) &&
696  (Root->getOperand(0).isUndef() ||
698  return false;
699 
700  return true;
701 }
702 
703 /// Replace the original chain operand of the call with
704 /// load's chain operand and move load below the call's chain operand.
706  SDValue Call, SDValue OrigChain) {
708  SDValue Chain = OrigChain.getOperand(0);
709  if (Chain.getNode() == Load.getNode())
710  Ops.push_back(Load.getOperand(0));
711  else {
712  assert(Chain.getOpcode() == ISD::TokenFactor &&
713  "Unexpected chain operand");
714  for (unsigned i = 0, e = Chain.getNumOperands(); i != e; ++i)
715  if (Chain.getOperand(i).getNode() == Load.getNode())
716  Ops.push_back(Load.getOperand(0));
717  else
718  Ops.push_back(Chain.getOperand(i));
719  SDValue NewChain =
720  CurDAG->getNode(ISD::TokenFactor, SDLoc(Load), MVT::Other, Ops);
721  Ops.clear();
722  Ops.push_back(NewChain);
723  }
724  Ops.append(OrigChain->op_begin() + 1, OrigChain->op_end());
725  CurDAG->UpdateNodeOperands(OrigChain.getNode(), Ops);
726  CurDAG->UpdateNodeOperands(Load.getNode(), Call.getOperand(0),
727  Load.getOperand(1), Load.getOperand(2));
728 
729  Ops.clear();
730  Ops.push_back(SDValue(Load.getNode(), 1));
731  Ops.append(Call->op_begin() + 1, Call->op_end());
732  CurDAG->UpdateNodeOperands(Call.getNode(), Ops);
733 }
734 
735 /// Return true if call address is a load and it can be
736 /// moved below CALLSEQ_START and the chains leading up to the call.
737 /// Return the CALLSEQ_START by reference as a second output.
738 /// In the case of a tail call, there isn't a callseq node between the call
739 /// chain and the load.
740 static bool isCalleeLoad(SDValue Callee, SDValue &Chain, bool HasCallSeq) {
741  // The transformation is somewhat dangerous if the call's chain was glued to
742  // the call. After MoveBelowOrigChain the load is moved between the call and
743  // the chain, this can create a cycle if the load is not folded. So it is
744  // *really* important that we are sure the load will be folded.
745  if (Callee.getNode() == Chain.getNode() || !Callee.hasOneUse())
746  return false;
747  LoadSDNode *LD = dyn_cast<LoadSDNode>(Callee.getNode());
748  if (!LD ||
749  LD->isVolatile() ||
752  return false;
753 
754  // Now let's find the callseq_start.
755  while (HasCallSeq && Chain.getOpcode() != ISD::CALLSEQ_START) {
756  if (!Chain.hasOneUse())
757  return false;
758  Chain = Chain.getOperand(0);
759  }
760 
761  if (!Chain.getNumOperands())
762  return false;
763  // Since we are not checking for AA here, conservatively abort if the chain
764  // writes to memory. It's not safe to move the callee (a load) across a store.
765  if (isa<MemSDNode>(Chain.getNode()) &&
766  cast<MemSDNode>(Chain.getNode())->writeMem())
767  return false;
768  if (Chain.getOperand(0).getNode() == Callee.getNode())
769  return true;
770  if (Chain.getOperand(0).getOpcode() == ISD::TokenFactor &&
771  Callee.getValue(1).isOperandOf(Chain.getOperand(0).getNode()) &&
772  Callee.getValue(1).hasOneUse())
773  return true;
774  return false;
775 }
776 
777 void X86DAGToDAGISel::PreprocessISelDAG() {
778  for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(),
779  E = CurDAG->allnodes_end(); I != E; ) {
780  SDNode *N = &*I++; // Preincrement iterator to avoid invalidation issues.
781 
782  // If this is a target specific AND node with no flag usages, turn it back
783  // into ISD::AND to enable test instruction matching.
784  if (N->getOpcode() == X86ISD::AND && !N->hasAnyUseOfValue(1)) {
785  SDValue Res = CurDAG->getNode(ISD::AND, SDLoc(N), N->getValueType(0),
786  N->getOperand(0), N->getOperand(1));
787  --I;
788  CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Res);
789  ++I;
790  CurDAG->DeleteNode(N);
791  continue;
792  }
793 
794  switch (N->getOpcode()) {
795  case ISD::FP_TO_SINT:
796  case ISD::FP_TO_UINT: {
797  // Replace vector fp_to_s/uint with their X86 specific equivalent so we
798  // don't need 2 sets of patterns.
799  if (!N->getSimpleValueType(0).isVector())
800  break;
801 
802  unsigned NewOpc;
803  switch (N->getOpcode()) {
804  default: llvm_unreachable("Unexpected opcode!");
805  case ISD::FP_TO_SINT: NewOpc = X86ISD::CVTTP2SI; break;
806  case ISD::FP_TO_UINT: NewOpc = X86ISD::CVTTP2UI; break;
807  }
808  SDValue Res = CurDAG->getNode(NewOpc, SDLoc(N), N->getValueType(0),
809  N->getOperand(0));
810  --I;
811  CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Res);
812  ++I;
813  CurDAG->DeleteNode(N);
814  continue;
815  }
816  case ISD::SHL:
817  case ISD::SRA:
818  case ISD::SRL: {
819  // Replace vector shifts with their X86 specific equivalent so we don't
820  // need 2 sets of patterns.
821  if (!N->getValueType(0).isVector())
822  break;
823 
824  unsigned NewOpc;
825  switch (N->getOpcode()) {
826  default: llvm_unreachable("Unexpected opcode!");
827  case ISD::SHL: NewOpc = X86ISD::VSHLV; break;
828  case ISD::SRA: NewOpc = X86ISD::VSRAV; break;
829  case ISD::SRL: NewOpc = X86ISD::VSRLV; break;
830  }
831  SDValue Res = CurDAG->getNode(NewOpc, SDLoc(N), N->getValueType(0),
832  N->getOperand(0), N->getOperand(1));
833  --I;
834  CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Res);
835  ++I;
836  CurDAG->DeleteNode(N);
837  continue;
838  }
839  case ISD::ANY_EXTEND:
841  // Replace vector any extend with the zero extend equivalents so we don't
842  // need 2 sets of patterns. Ignore vXi1 extensions.
843  if (!N->getValueType(0).isVector() ||
845  break;
846 
847  unsigned NewOpc = N->getOpcode() == ISD::ANY_EXTEND
850 
851  SDValue Res = CurDAG->getNode(NewOpc, SDLoc(N), N->getValueType(0),
852  N->getOperand(0));
853  --I;
854  CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Res);
855  ++I;
856  CurDAG->DeleteNode(N);
857  continue;
858  }
859  case ISD::FCEIL:
860  case ISD::FFLOOR:
861  case ISD::FTRUNC:
862  case ISD::FNEARBYINT:
863  case ISD::FRINT: {
864  // Replace fp rounding with their X86 specific equivalent so we don't
865  // need 2 sets of patterns.
866  unsigned Imm;
867  switch (N->getOpcode()) {
868  default: llvm_unreachable("Unexpected opcode!");
869  case ISD::FCEIL: Imm = 0xA; break;
870  case ISD::FFLOOR: Imm = 0x9; break;
871  case ISD::FTRUNC: Imm = 0xB; break;
872  case ISD::FNEARBYINT: Imm = 0xC; break;
873  case ISD::FRINT: Imm = 0x4; break;
874  }
875  SDLoc dl(N);
876  SDValue Res = CurDAG->getNode(X86ISD::VRNDSCALE, dl,
877  N->getValueType(0),
878  N->getOperand(0),
879  CurDAG->getConstant(Imm, dl, MVT::i8));
880  --I;
881  CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Res);
882  ++I;
883  CurDAG->DeleteNode(N);
884  continue;
885  }
886  case X86ISD::FANDN:
887  case X86ISD::FAND:
888  case X86ISD::FOR:
889  case X86ISD::FXOR: {
890  // Widen scalar fp logic ops to vector to reduce isel patterns.
891  // FIXME: Can we do this during lowering/combine.
892  MVT VT = N->getSimpleValueType(0);
893  if (VT.isVector() || VT == MVT::f128)
894  break;
895 
896  MVT VecVT = VT == MVT::f64 ? MVT::v2f64 : MVT::v4f32;
897  SDLoc dl(N);
898  SDValue Op0 = CurDAG->getNode(ISD::SCALAR_TO_VECTOR, dl, VecVT,
899  N->getOperand(0));
900  SDValue Op1 = CurDAG->getNode(ISD::SCALAR_TO_VECTOR, dl, VecVT,
901  N->getOperand(1));
902 
903  SDValue Res;
904  if (Subtarget->hasSSE2()) {
905  EVT IntVT = EVT(VecVT).changeVectorElementTypeToInteger();
906  Op0 = CurDAG->getNode(ISD::BITCAST, dl, IntVT, Op0);
907  Op1 = CurDAG->getNode(ISD::BITCAST, dl, IntVT, Op1);
908  unsigned Opc;
909  switch (N->getOpcode()) {
910  default: llvm_unreachable("Unexpected opcode!");
911  case X86ISD::FANDN: Opc = X86ISD::ANDNP; break;
912  case X86ISD::FAND: Opc = ISD::AND; break;
913  case X86ISD::FOR: Opc = ISD::OR; break;
914  case X86ISD::FXOR: Opc = ISD::XOR; break;
915  }
916  Res = CurDAG->getNode(Opc, dl, IntVT, Op0, Op1);
917  Res = CurDAG->getNode(ISD::BITCAST, dl, VecVT, Res);
918  } else {
919  Res = CurDAG->getNode(N->getOpcode(), dl, VecVT, Op0, Op1);
920  }
921  Res = CurDAG->getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, Res,
922  CurDAG->getIntPtrConstant(0, dl));
923  --I;
924  CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Res);
925  ++I;
926  CurDAG->DeleteNode(N);
927  continue;
928  }
929  }
930 
931  if (OptLevel != CodeGenOpt::None &&
932  // Only do this when the target can fold the load into the call or
933  // jmp.
934  !Subtarget->useRetpolineIndirectCalls() &&
935  ((N->getOpcode() == X86ISD::CALL && !Subtarget->slowTwoMemOps()) ||
936  (N->getOpcode() == X86ISD::TC_RETURN &&
937  (Subtarget->is64Bit() ||
938  !getTargetMachine().isPositionIndependent())))) {
939  /// Also try moving call address load from outside callseq_start to just
940  /// before the call to allow it to be folded.
941  ///
942  /// [Load chain]
943  /// ^
944  /// |
945  /// [Load]
946  /// ^ ^
947  /// | |
948  /// / \--
949  /// / |
950  ///[CALLSEQ_START] |
951  /// ^ |
952  /// | |
953  /// [LOAD/C2Reg] |
954  /// | |
955  /// \ /
956  /// \ /
957  /// [CALL]
958  bool HasCallSeq = N->getOpcode() == X86ISD::CALL;
959  SDValue Chain = N->getOperand(0);
960  SDValue Load = N->getOperand(1);
961  if (!isCalleeLoad(Load, Chain, HasCallSeq))
962  continue;
963  moveBelowOrigChain(CurDAG, Load, SDValue(N, 0), Chain);
964  ++NumLoadMoved;
965  continue;
966  }
967 
968  // Lower fpround and fpextend nodes that target the FP stack to be store and
969  // load to the stack. This is a gross hack. We would like to simply mark
970  // these as being illegal, but when we do that, legalize produces these when
971  // it expands calls, then expands these in the same legalize pass. We would
972  // like dag combine to be able to hack on these between the call expansion
973  // and the node legalization. As such this pass basically does "really
974  // late" legalization of these inline with the X86 isel pass.
975  // FIXME: This should only happen when not compiled with -O0.
976  switch (N->getOpcode()) {
977  default: continue;
978  case ISD::FP_ROUND:
979  case ISD::FP_EXTEND:
980  {
981  MVT SrcVT = N->getOperand(0).getSimpleValueType();
982  MVT DstVT = N->getSimpleValueType(0);
983 
984  // If any of the sources are vectors, no fp stack involved.
985  if (SrcVT.isVector() || DstVT.isVector())
986  continue;
987 
988  // If the source and destination are SSE registers, then this is a legal
989  // conversion that should not be lowered.
990  const X86TargetLowering *X86Lowering =
991  static_cast<const X86TargetLowering *>(TLI);
992  bool SrcIsSSE = X86Lowering->isScalarFPTypeInSSEReg(SrcVT);
993  bool DstIsSSE = X86Lowering->isScalarFPTypeInSSEReg(DstVT);
994  if (SrcIsSSE && DstIsSSE)
995  continue;
996 
997  if (!SrcIsSSE && !DstIsSSE) {
998  // If this is an FPStack extension, it is a noop.
999  if (N->getOpcode() == ISD::FP_EXTEND)
1000  continue;
1001  // If this is a value-preserving FPStack truncation, it is a noop.
1002  if (N->getConstantOperandVal(1))
1003  continue;
1004  }
1005 
1006  // Here we could have an FP stack truncation or an FPStack <-> SSE convert.
1007  // FPStack has extload and truncstore. SSE can fold direct loads into other
1008  // operations. Based on this, decide what we want to do.
1009  MVT MemVT;
1010  if (N->getOpcode() == ISD::FP_ROUND)
1011  MemVT = DstVT; // FP_ROUND must use DstVT, we can't do a 'trunc load'.
1012  else
1013  MemVT = SrcIsSSE ? SrcVT : DstVT;
1014 
1015  SDValue MemTmp = CurDAG->CreateStackTemporary(MemVT);
1016  SDLoc dl(N);
1017 
1018  // FIXME: optimize the case where the src/dest is a load or store?
1019 
1020  SDValue Store = CurDAG->getTruncStore(CurDAG->getEntryNode(), dl, N->getOperand(0),
1021  MemTmp, MachinePointerInfo(), MemVT);
1022  SDValue Result = CurDAG->getExtLoad(ISD::EXTLOAD, dl, DstVT, Store, MemTmp,
1023  MachinePointerInfo(), MemVT);
1024 
1025  // We're about to replace all uses of the FP_ROUND/FP_EXTEND with the
1026  // extload we created. This will cause general havok on the dag because
1027  // anything below the conversion could be folded into other existing nodes.
1028  // To avoid invalidating 'I', back it up to the convert node.
1029  --I;
1030  CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
1031  break;
1032  }
1033 
1034  //The sequence of events for lowering STRICT_FP versions of these nodes requires
1035  //dealing with the chain differently, as there is already a preexisting chain.
1036  case ISD::STRICT_FP_ROUND:
1037  case ISD::STRICT_FP_EXTEND:
1038  {
1039  MVT SrcVT = N->getOperand(1).getSimpleValueType();
1040  MVT DstVT = N->getSimpleValueType(0);
1041 
1042  // If any of the sources are vectors, no fp stack involved.
1043  if (SrcVT.isVector() || DstVT.isVector())
1044  continue;
1045 
1046  // If the source and destination are SSE registers, then this is a legal
1047  // conversion that should not be lowered.
1048  const X86TargetLowering *X86Lowering =
1049  static_cast<const X86TargetLowering *>(TLI);
1050  bool SrcIsSSE = X86Lowering->isScalarFPTypeInSSEReg(SrcVT);
1051  bool DstIsSSE = X86Lowering->isScalarFPTypeInSSEReg(DstVT);
1052  if (SrcIsSSE && DstIsSSE)
1053  continue;
1054 
1055  if (!SrcIsSSE && !DstIsSSE) {
1056  // If this is an FPStack extension, it is a noop.
1057  if (N->getOpcode() == ISD::STRICT_FP_EXTEND)
1058  continue;
1059  // If this is a value-preserving FPStack truncation, it is a noop.
1060  if (N->getConstantOperandVal(2))
1061  continue;
1062  }
1063 
1064  // Here we could have an FP stack truncation or an FPStack <-> SSE convert.
1065  // FPStack has extload and truncstore. SSE can fold direct loads into other
1066  // operations. Based on this, decide what we want to do.
1067  MVT MemVT;
1068  if (N->getOpcode() == ISD::STRICT_FP_ROUND)
1069  MemVT = DstVT; // FP_ROUND must use DstVT, we can't do a 'trunc load'.
1070  else
1071  MemVT = SrcIsSSE ? SrcVT : DstVT;
1072 
1073  SDValue MemTmp = CurDAG->CreateStackTemporary(MemVT);
1074  SDLoc dl(N);
1075 
1076  // FIXME: optimize the case where the src/dest is a load or store?
1077 
1078  //Since the operation is StrictFP, use the preexisting chain.
1079  SDValue Store = CurDAG->getTruncStore(N->getOperand(0), dl, N->getOperand(1),
1080  MemTmp, MachinePointerInfo(), MemVT);
1081  SDValue Result = CurDAG->getExtLoad(ISD::EXTLOAD, dl, DstVT, Store, MemTmp,
1082  MachinePointerInfo(), MemVT);
1083 
1084  // We're about to replace all uses of the FP_ROUND/FP_EXTEND with the
1085  // extload we created. This will cause general havok on the dag because
1086  // anything below the conversion could be folded into other existing nodes.
1087  // To avoid invalidating 'I', back it up to the convert node.
1088  --I;
1089  CurDAG->ReplaceAllUsesWith(N, Result.getNode());
1090  break;
1091  }
1092  }
1093 
1094 
1095  // Now that we did that, the node is dead. Increment the iterator to the
1096  // next node to process, then delete N.
1097  ++I;
1098  CurDAG->DeleteNode(N);
1099  }
1100 
1101  // The load+call transform above can leave some dead nodes in the graph. Make
1102  // sure we remove them. Its possible some of the other transforms do to so
1103  // just remove dead nodes unconditionally.
1104  CurDAG->RemoveDeadNodes();
1105 }
1106 
1107 // Look for a redundant movzx/movsx that can occur after an 8-bit divrem.
1108 bool X86DAGToDAGISel::tryOptimizeRem8Extend(SDNode *N) {
1109  unsigned Opc = N->getMachineOpcode();
1110  if (Opc != X86::MOVZX32rr8 && Opc != X86::MOVSX32rr8 &&
1111  Opc != X86::MOVSX64rr8)
1112  return false;
1113 
1114  SDValue N0 = N->getOperand(0);
1115 
1116  // We need to be extracting the lower bit of an extend.
1117  if (!N0.isMachineOpcode() ||
1118  N0.getMachineOpcode() != TargetOpcode::EXTRACT_SUBREG ||
1119  N0.getConstantOperandVal(1) != X86::sub_8bit)
1120  return false;
1121 
1122  // We're looking for either a movsx or movzx to match the original opcode.
1123  unsigned ExpectedOpc = Opc == X86::MOVZX32rr8 ? X86::MOVZX32rr8_NOREX
1124  : X86::MOVSX32rr8_NOREX;
1125  SDValue N00 = N0.getOperand(0);
1126  if (!N00.isMachineOpcode() || N00.getMachineOpcode() != ExpectedOpc)
1127  return false;
1128 
1129  if (Opc == X86::MOVSX64rr8) {
1130  // If we had a sign extend from 8 to 64 bits. We still need to go from 32
1131  // to 64.
1132  MachineSDNode *Extend = CurDAG->getMachineNode(X86::MOVSX64rr32, SDLoc(N),
1133  MVT::i64, N00);
1134  ReplaceUses(N, Extend);
1135  } else {
1136  // Ok we can drop this extend and just use the original extend.
1137  ReplaceUses(N, N00.getNode());
1138  }
1139 
1140  return true;
1141 }
1142 
1143 void X86DAGToDAGISel::PostprocessISelDAG() {
1144  // Skip peepholes at -O0.
1145  if (TM.getOptLevel() == CodeGenOpt::None)
1146  return;
1147 
1148  SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end();
1149 
1150  bool MadeChange = false;
1151  while (Position != CurDAG->allnodes_begin()) {
1152  SDNode *N = &*--Position;
1153  // Skip dead nodes and any non-machine opcodes.
1154  if (N->use_empty() || !N->isMachineOpcode())
1155  continue;
1156 
1157  if (tryOptimizeRem8Extend(N)) {
1158  MadeChange = true;
1159  continue;
1160  }
1161 
1162  // Look for a TESTrr+ANDrr pattern where both operands of the test are
1163  // the same. Rewrite to remove the AND.
1164  unsigned Opc = N->getMachineOpcode();
1165  if ((Opc == X86::TEST8rr || Opc == X86::TEST16rr ||
1166  Opc == X86::TEST32rr || Opc == X86::TEST64rr) &&
1167  N->getOperand(0) == N->getOperand(1) &&
1168  N->isOnlyUserOf(N->getOperand(0).getNode()) &&
1169  N->getOperand(0).isMachineOpcode()) {
1170  SDValue And = N->getOperand(0);
1171  unsigned N0Opc = And.getMachineOpcode();
1172  if (N0Opc == X86::AND8rr || N0Opc == X86::AND16rr ||
1173  N0Opc == X86::AND32rr || N0Opc == X86::AND64rr) {
1174  MachineSDNode *Test = CurDAG->getMachineNode(Opc, SDLoc(N),
1175  MVT::i32,
1176  And.getOperand(0),
1177  And.getOperand(1));
1178  ReplaceUses(N, Test);
1179  MadeChange = true;
1180  continue;
1181  }
1182  if (N0Opc == X86::AND8rm || N0Opc == X86::AND16rm ||
1183  N0Opc == X86::AND32rm || N0Opc == X86::AND64rm) {
1184  unsigned NewOpc;
1185  switch (N0Opc) {
1186  case X86::AND8rm: NewOpc = X86::TEST8mr; break;
1187  case X86::AND16rm: NewOpc = X86::TEST16mr; break;
1188  case X86::AND32rm: NewOpc = X86::TEST32mr; break;
1189  case X86::AND64rm: NewOpc = X86::TEST64mr; break;
1190  }
1191 
1192  // Need to swap the memory and register operand.
1193  SDValue Ops[] = { And.getOperand(1),
1194  And.getOperand(2),
1195  And.getOperand(3),
1196  And.getOperand(4),
1197  And.getOperand(5),
1198  And.getOperand(0),
1199  And.getOperand(6) /* Chain */ };
1200  MachineSDNode *Test = CurDAG->getMachineNode(NewOpc, SDLoc(N),
1201  MVT::i32, MVT::Other, Ops);
1202  ReplaceUses(N, Test);
1203  MadeChange = true;
1204  continue;
1205  }
1206  }
1207 
1208  // Look for a KAND+KORTEST and turn it into KTEST if only the zero flag is
1209  // used. We're doing this late so we can prefer to fold the AND into masked
1210  // comparisons. Doing that can be better for the live range of the mask
1211  // register.
1212  if ((Opc == X86::KORTESTBrr || Opc == X86::KORTESTWrr ||
1213  Opc == X86::KORTESTDrr || Opc == X86::KORTESTQrr) &&
1214  N->getOperand(0) == N->getOperand(1) &&
1215  N->isOnlyUserOf(N->getOperand(0).getNode()) &&
1216  N->getOperand(0).isMachineOpcode() &&
1217  onlyUsesZeroFlag(SDValue(N, 0))) {
1218  SDValue And = N->getOperand(0);
1219  unsigned N0Opc = And.getMachineOpcode();
1220  // KANDW is legal with AVX512F, but KTESTW requires AVX512DQ. The other
1221  // KAND instructions and KTEST use the same ISA feature.
1222  if (N0Opc == X86::KANDBrr ||
1223  (N0Opc == X86::KANDWrr && Subtarget->hasDQI()) ||
1224  N0Opc == X86::KANDDrr || N0Opc == X86::KANDQrr) {
1225  unsigned NewOpc;
1226  switch (Opc) {
1227  default: llvm_unreachable("Unexpected opcode!");
1228  case X86::KORTESTBrr: NewOpc = X86::KTESTBrr; break;
1229  case X86::KORTESTWrr: NewOpc = X86::KTESTWrr; break;
1230  case X86::KORTESTDrr: NewOpc = X86::KTESTDrr; break;
1231  case X86::KORTESTQrr: NewOpc = X86::KTESTQrr; break;
1232  }
1233  MachineSDNode *KTest = CurDAG->getMachineNode(NewOpc, SDLoc(N),
1234  MVT::i32,
1235  And.getOperand(0),
1236  And.getOperand(1));
1237  ReplaceUses(N, KTest);
1238  MadeChange = true;
1239  continue;
1240  }
1241  }
1242 
1243  // Attempt to remove vectors moves that were inserted to zero upper bits.
1244  if (Opc != TargetOpcode::SUBREG_TO_REG)
1245  continue;
1246 
1247  unsigned SubRegIdx = N->getConstantOperandVal(2);
1248  if (SubRegIdx != X86::sub_xmm && SubRegIdx != X86::sub_ymm)
1249  continue;
1250 
1251  SDValue Move = N->getOperand(1);
1252  if (!Move.isMachineOpcode())
1253  continue;
1254 
1255  // Make sure its one of the move opcodes we recognize.
1256  switch (Move.getMachineOpcode()) {
1257  default:
1258  continue;
1259  case X86::VMOVAPDrr: case X86::VMOVUPDrr:
1260  case X86::VMOVAPSrr: case X86::VMOVUPSrr:
1261  case X86::VMOVDQArr: case X86::VMOVDQUrr:
1262  case X86::VMOVAPDYrr: case X86::VMOVUPDYrr:
1263  case X86::VMOVAPSYrr: case X86::VMOVUPSYrr:
1264  case X86::VMOVDQAYrr: case X86::VMOVDQUYrr:
1265  case X86::VMOVAPDZ128rr: case X86::VMOVUPDZ128rr:
1266  case X86::VMOVAPSZ128rr: case X86::VMOVUPSZ128rr:
1267  case X86::VMOVDQA32Z128rr: case X86::VMOVDQU32Z128rr:
1268  case X86::VMOVDQA64Z128rr: case X86::VMOVDQU64Z128rr:
1269  case X86::VMOVAPDZ256rr: case X86::VMOVUPDZ256rr:
1270  case X86::VMOVAPSZ256rr: case X86::VMOVUPSZ256rr:
1271  case X86::VMOVDQA32Z256rr: case X86::VMOVDQU32Z256rr:
1272  case X86::VMOVDQA64Z256rr: case X86::VMOVDQU64Z256rr:
1273  break;
1274  }
1275 
1276  SDValue In = Move.getOperand(0);
1277  if (!In.isMachineOpcode() ||
1278  In.getMachineOpcode() <= TargetOpcode::GENERIC_OP_END)
1279  continue;
1280 
1281  // Make sure the instruction has a VEX, XOP, or EVEX prefix. This covers
1282  // the SHA instructions which use a legacy encoding.
1283  uint64_t TSFlags = getInstrInfo()->get(In.getMachineOpcode()).TSFlags;
1284  if ((TSFlags & X86II::EncodingMask) != X86II::VEX &&
1285  (TSFlags & X86II::EncodingMask) != X86II::EVEX &&
1286  (TSFlags & X86II::EncodingMask) != X86II::XOP)
1287  continue;
1288 
1289  // Producing instruction is another vector instruction. We can drop the
1290  // move.
1291  CurDAG->UpdateNodeOperands(N, N->getOperand(0), In, N->getOperand(2));
1292  MadeChange = true;
1293  }
1294 
1295  if (MadeChange)
1296  CurDAG->RemoveDeadNodes();
1297 }
1298 
1299 
1300 /// Emit any code that needs to be executed only in the main function.
1301 void X86DAGToDAGISel::emitSpecialCodeForMain() {
1302  if (Subtarget->isTargetCygMing()) {
1304  auto &DL = CurDAG->getDataLayout();
1305 
1306  TargetLowering::CallLoweringInfo CLI(*CurDAG);
1307  CLI.setChain(CurDAG->getRoot())
1308  .setCallee(CallingConv::C, Type::getVoidTy(*CurDAG->getContext()),
1309  CurDAG->getExternalSymbol("__main", TLI->getPointerTy(DL)),
1310  std::move(Args));
1311  const TargetLowering &TLI = CurDAG->getTargetLoweringInfo();
1312  std::pair<SDValue, SDValue> Result = TLI.LowerCallTo(CLI);
1313  CurDAG->setRoot(Result.second);
1314  }
1315 }
1316 
1317 void X86DAGToDAGISel::EmitFunctionEntryCode() {
1318  // If this is main, emit special code for main.
1319  const Function &F = MF->getFunction();
1320  if (F.hasExternalLinkage() && F.getName() == "main")
1321  emitSpecialCodeForMain();
1322 }
1323 
1324 static bool isDispSafeForFrameIndex(int64_t Val) {
1325  // On 64-bit platforms, we can run into an issue where a frame index
1326  // includes a displacement that, when added to the explicit displacement,
1327  // will overflow the displacement field. Assuming that the frame index
1328  // displacement fits into a 31-bit integer (which is only slightly more
1329  // aggressive than the current fundamental assumption that it fits into
1330  // a 32-bit integer), a 31-bit disp should always be safe.
1331  return isInt<31>(Val);
1332 }
1333 
1334 bool X86DAGToDAGISel::foldOffsetIntoAddress(uint64_t Offset,
1335  X86ISelAddressMode &AM) {
1336  // If there's no offset to fold, we don't need to do any work.
1337  if (Offset == 0)
1338  return false;
1339 
1340  // Cannot combine ExternalSymbol displacements with integer offsets.
1341  if (AM.ES || AM.MCSym)
1342  return true;
1343 
1344  int64_t Val = AM.Disp + Offset;
1345  CodeModel::Model M = TM.getCodeModel();
1346  if (Subtarget->is64Bit()) {
1348  AM.hasSymbolicDisplacement()))
1349  return true;
1350  // In addition to the checks required for a register base, check that
1351  // we do not try to use an unsafe Disp with a frame index.
1352  if (AM.BaseType == X86ISelAddressMode::FrameIndexBase &&
1354  return true;
1355  }
1356  AM.Disp = Val;
1357  return false;
1358 
1359 }
1360 
1361 bool X86DAGToDAGISel::matchLoadInAddress(LoadSDNode *N, X86ISelAddressMode &AM){
1362  SDValue Address = N->getOperand(1);
1363 
1364  // load gs:0 -> GS segment register.
1365  // load fs:0 -> FS segment register.
1366  //
1367  // This optimization is valid because the GNU TLS model defines that
1368  // gs:0 (or fs:0 on X86-64) contains its own address.
1369  // For more information see http://people.redhat.com/drepper/tls.pdf
1370  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Address))
1371  if (C->getSExtValue() == 0 && AM.Segment.getNode() == nullptr &&
1372  !IndirectTlsSegRefs &&
1373  (Subtarget->isTargetGlibc() || Subtarget->isTargetAndroid() ||
1374  Subtarget->isTargetFuchsia()))
1375  switch (N->getPointerInfo().getAddrSpace()) {
1376  case 256:
1377  AM.Segment = CurDAG->getRegister(X86::GS, MVT::i16);
1378  return false;
1379  case 257:
1380  AM.Segment = CurDAG->getRegister(X86::FS, MVT::i16);
1381  return false;
1382  // Address space 258 is not handled here, because it is not used to
1383  // address TLS areas.
1384  }
1385 
1386  return true;
1387 }
1388 
1389 /// Try to match X86ISD::Wrapper and X86ISD::WrapperRIP nodes into an addressing
1390 /// mode. These wrap things that will resolve down into a symbol reference.
1391 /// If no match is possible, this returns true, otherwise it returns false.
1392 bool X86DAGToDAGISel::matchWrapper(SDValue N, X86ISelAddressMode &AM) {
1393  // If the addressing mode already has a symbol as the displacement, we can
1394  // never match another symbol.
1395  if (AM.hasSymbolicDisplacement())
1396  return true;
1397 
1398  bool IsRIPRelTLS = false;
1399  bool IsRIPRel = N.getOpcode() == X86ISD::WrapperRIP;
1400  if (IsRIPRel) {
1401  SDValue Val = N.getOperand(0);
1403  IsRIPRelTLS = true;
1404  }
1405 
1406  // We can't use an addressing mode in the 64-bit large code model.
1407  // Global TLS addressing is an exception. In the medium code model,
1408  // we use can use a mode when RIP wrappers are present.
1409  // That signifies access to globals that are known to be "near",
1410  // such as the GOT itself.
1411  CodeModel::Model M = TM.getCodeModel();
1412  if (Subtarget->is64Bit() &&
1413  ((M == CodeModel::Large && !IsRIPRelTLS) ||
1414  (M == CodeModel::Medium && !IsRIPRel)))
1415  return true;
1416 
1417  // Base and index reg must be 0 in order to use %rip as base.
1418  if (IsRIPRel && AM.hasBaseOrIndexReg())
1419  return true;
1420 
1421  // Make a local copy in case we can't do this fold.
1422  X86ISelAddressMode Backup = AM;
1423 
1424  int64_t Offset = 0;
1425  SDValue N0 = N.getOperand(0);
1426  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(N0)) {
1427  AM.GV = G->getGlobal();
1428  AM.SymbolFlags = G->getTargetFlags();
1429  Offset = G->getOffset();
1430  } else if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(N0)) {
1431  AM.CP = CP->getConstVal();
1432  AM.Align = CP->getAlignment();
1433  AM.SymbolFlags = CP->getTargetFlags();
1434  Offset = CP->getOffset();
1435  } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(N0)) {
1436  AM.ES = S->getSymbol();
1437  AM.SymbolFlags = S->getTargetFlags();
1438  } else if (auto *S = dyn_cast<MCSymbolSDNode>(N0)) {
1439  AM.MCSym = S->getMCSymbol();
1440  } else if (JumpTableSDNode *J = dyn_cast<JumpTableSDNode>(N0)) {
1441  AM.JT = J->getIndex();
1442  AM.SymbolFlags = J->getTargetFlags();
1443  } else if (BlockAddressSDNode *BA = dyn_cast<BlockAddressSDNode>(N0)) {
1444  AM.BlockAddr = BA->getBlockAddress();
1445  AM.SymbolFlags = BA->getTargetFlags();
1446  Offset = BA->getOffset();
1447  } else
1448  llvm_unreachable("Unhandled symbol reference node.");
1449 
1450  if (foldOffsetIntoAddress(Offset, AM)) {
1451  AM = Backup;
1452  return true;
1453  }
1454 
1455  if (IsRIPRel)
1456  AM.setBaseReg(CurDAG->getRegister(X86::RIP, MVT::i64));
1457 
1458  // Commit the changes now that we know this fold is safe.
1459  return false;
1460 }
1461 
1462 /// Add the specified node to the specified addressing mode, returning true if
1463 /// it cannot be done. This just pattern matches for the addressing mode.
1464 bool X86DAGToDAGISel::matchAddress(SDValue N, X86ISelAddressMode &AM) {
1465  if (matchAddressRecursively(N, AM, 0))
1466  return true;
1467 
1468  // Post-processing: Convert lea(,%reg,2) to lea(%reg,%reg), which has
1469  // a smaller encoding and avoids a scaled-index.
1470  if (AM.Scale == 2 &&
1471  AM.BaseType == X86ISelAddressMode::RegBase &&
1472  AM.Base_Reg.getNode() == nullptr) {
1473  AM.Base_Reg = AM.IndexReg;
1474  AM.Scale = 1;
1475  }
1476 
1477  // Post-processing: Convert foo to foo(%rip), even in non-PIC mode,
1478  // because it has a smaller encoding.
1479  // TODO: Which other code models can use this?
1480  switch (TM.getCodeModel()) {
1481  default: break;
1482  case CodeModel::Small:
1483  case CodeModel::Kernel:
1484  if (Subtarget->is64Bit() &&
1485  AM.Scale == 1 &&
1486  AM.BaseType == X86ISelAddressMode::RegBase &&
1487  AM.Base_Reg.getNode() == nullptr &&
1488  AM.IndexReg.getNode() == nullptr &&
1489  AM.SymbolFlags == X86II::MO_NO_FLAG &&
1490  AM.hasSymbolicDisplacement())
1491  AM.Base_Reg = CurDAG->getRegister(X86::RIP, MVT::i64);
1492  break;
1493  }
1494 
1495  return false;
1496 }
1497 
1498 bool X86DAGToDAGISel::matchAdd(SDValue &N, X86ISelAddressMode &AM,
1499  unsigned Depth) {
1500  // Add an artificial use to this node so that we can keep track of
1501  // it if it gets CSE'd with a different node.
1502  HandleSDNode Handle(N);
1503 
1504  X86ISelAddressMode Backup = AM;
1505  if (!matchAddressRecursively(N.getOperand(0), AM, Depth+1) &&
1506  !matchAddressRecursively(Handle.getValue().getOperand(1), AM, Depth+1))
1507  return false;
1508  AM = Backup;
1509 
1510  // Try again after commuting the operands.
1511  if (!matchAddressRecursively(Handle.getValue().getOperand(1), AM, Depth+1) &&
1512  !matchAddressRecursively(Handle.getValue().getOperand(0), AM, Depth+1))
1513  return false;
1514  AM = Backup;
1515 
1516  // If we couldn't fold both operands into the address at the same time,
1517  // see if we can just put each operand into a register and fold at least
1518  // the add.
1519  if (AM.BaseType == X86ISelAddressMode::RegBase &&
1520  !AM.Base_Reg.getNode() &&
1521  !AM.IndexReg.getNode()) {
1522  N = Handle.getValue();
1523  AM.Base_Reg = N.getOperand(0);
1524  AM.IndexReg = N.getOperand(1);
1525  AM.Scale = 1;
1526  return false;
1527  }
1528  N = Handle.getValue();
1529  return true;
1530 }
1531 
1532 // Insert a node into the DAG at least before the Pos node's position. This
1533 // will reposition the node as needed, and will assign it a node ID that is <=
1534 // the Pos node's ID. Note that this does *not* preserve the uniqueness of node
1535 // IDs! The selection DAG must no longer depend on their uniqueness when this
1536 // is used.
1537 static void insertDAGNode(SelectionDAG &DAG, SDValue Pos, SDValue N) {
1538  if (N->getNodeId() == -1 ||
1541  DAG.RepositionNode(Pos->getIterator(), N.getNode());
1542  // Mark Node as invalid for pruning as after this it may be a successor to a
1543  // selected node but otherwise be in the same position of Pos.
1544  // Conservatively mark it with the same -abs(Id) to assure node id
1545  // invariant is preserved.
1546  N->setNodeId(Pos->getNodeId());
1548  }
1549 }
1550 
1551 // Transform "(X >> (8-C1)) & (0xff << C1)" to "((X >> 8) & 0xff) << C1" if
1552 // safe. This allows us to convert the shift and and into an h-register
1553 // extract and a scaled index. Returns false if the simplification is
1554 // performed.
1556  uint64_t Mask,
1557  SDValue Shift, SDValue X,
1558  X86ISelAddressMode &AM) {
1559  if (Shift.getOpcode() != ISD::SRL ||
1560  !isa<ConstantSDNode>(Shift.getOperand(1)) ||
1561  !Shift.hasOneUse())
1562  return true;
1563 
1564  int ScaleLog = 8 - Shift.getConstantOperandVal(1);
1565  if (ScaleLog <= 0 || ScaleLog >= 4 ||
1566  Mask != (0xffu << ScaleLog))
1567  return true;
1568 
1569  MVT VT = N.getSimpleValueType();
1570  SDLoc DL(N);
1571  SDValue Eight = DAG.getConstant(8, DL, MVT::i8);
1572  SDValue NewMask = DAG.getConstant(0xff, DL, VT);
1573  SDValue Srl = DAG.getNode(ISD::SRL, DL, VT, X, Eight);
1574  SDValue And = DAG.getNode(ISD::AND, DL, VT, Srl, NewMask);
1575  SDValue ShlCount = DAG.getConstant(ScaleLog, DL, MVT::i8);
1576  SDValue Shl = DAG.getNode(ISD::SHL, DL, VT, And, ShlCount);
1577 
1578  // Insert the new nodes into the topological ordering. We must do this in
1579  // a valid topological ordering as nothing is going to go back and re-sort
1580  // these nodes. We continually insert before 'N' in sequence as this is
1581  // essentially a pre-flattened and pre-sorted sequence of nodes. There is no
1582  // hierarchy left to express.
1583  insertDAGNode(DAG, N, Eight);
1584  insertDAGNode(DAG, N, Srl);
1585  insertDAGNode(DAG, N, NewMask);
1586  insertDAGNode(DAG, N, And);
1587  insertDAGNode(DAG, N, ShlCount);
1588  insertDAGNode(DAG, N, Shl);
1589  DAG.ReplaceAllUsesWith(N, Shl);
1590  DAG.RemoveDeadNode(N.getNode());
1591  AM.IndexReg = And;
1592  AM.Scale = (1 << ScaleLog);
1593  return false;
1594 }
1595 
1596 // Transforms "(X << C1) & C2" to "(X & (C2>>C1)) << C1" if safe and if this
1597 // allows us to fold the shift into this addressing mode. Returns false if the
1598 // transform succeeded.
1600  X86ISelAddressMode &AM) {
1601  SDValue Shift = N.getOperand(0);
1602 
1603  // Use a signed mask so that shifting right will insert sign bits. These
1604  // bits will be removed when we shift the result left so it doesn't matter
1605  // what we use. This might allow a smaller immediate encoding.
1606  int64_t Mask = cast<ConstantSDNode>(N->getOperand(1))->getSExtValue();
1607 
1608  // If we have an any_extend feeding the AND, look through it to see if there
1609  // is a shift behind it. But only if the AND doesn't use the extended bits.
1610  // FIXME: Generalize this to other ANY_EXTEND than i32 to i64?
1611  bool FoundAnyExtend = false;
1612  if (Shift.getOpcode() == ISD::ANY_EXTEND && Shift.hasOneUse() &&
1613  Shift.getOperand(0).getSimpleValueType() == MVT::i32 &&
1614  isUInt<32>(Mask)) {
1615  FoundAnyExtend = true;
1616  Shift = Shift.getOperand(0);
1617  }
1618 
1619  if (Shift.getOpcode() != ISD::SHL ||
1620  !isa<ConstantSDNode>(Shift.getOperand(1)))
1621  return true;
1622 
1623  SDValue X = Shift.getOperand(0);
1624 
1625  // Not likely to be profitable if either the AND or SHIFT node has more
1626  // than one use (unless all uses are for address computation). Besides,
1627  // isel mechanism requires their node ids to be reused.
1628  if (!N.hasOneUse() || !Shift.hasOneUse())
1629  return true;
1630 
1631  // Verify that the shift amount is something we can fold.
1632  unsigned ShiftAmt = Shift.getConstantOperandVal(1);
1633  if (ShiftAmt != 1 && ShiftAmt != 2 && ShiftAmt != 3)
1634  return true;
1635 
1636  MVT VT = N.getSimpleValueType();
1637  SDLoc DL(N);
1638  if (FoundAnyExtend) {
1639  SDValue NewX = DAG.getNode(ISD::ANY_EXTEND, DL, VT, X);
1640  insertDAGNode(DAG, N, NewX);
1641  X = NewX;
1642  }
1643 
1644  SDValue NewMask = DAG.getConstant(Mask >> ShiftAmt, DL, VT);
1645  SDValue NewAnd = DAG.getNode(ISD::AND, DL, VT, X, NewMask);
1646  SDValue NewShift = DAG.getNode(ISD::SHL, DL, VT, NewAnd, Shift.getOperand(1));
1647 
1648  // Insert the new nodes into the topological ordering. We must do this in
1649  // a valid topological ordering as nothing is going to go back and re-sort
1650  // these nodes. We continually insert before 'N' in sequence as this is
1651  // essentially a pre-flattened and pre-sorted sequence of nodes. There is no
1652  // hierarchy left to express.
1653  insertDAGNode(DAG, N, NewMask);
1654  insertDAGNode(DAG, N, NewAnd);
1655  insertDAGNode(DAG, N, NewShift);
1656  DAG.ReplaceAllUsesWith(N, NewShift);
1657  DAG.RemoveDeadNode(N.getNode());
1658 
1659  AM.Scale = 1 << ShiftAmt;
1660  AM.IndexReg = NewAnd;
1661  return false;
1662 }
1663 
1664 // Implement some heroics to detect shifts of masked values where the mask can
1665 // be replaced by extending the shift and undoing that in the addressing mode
1666 // scale. Patterns such as (shl (srl x, c1), c2) are canonicalized into (and
1667 // (srl x, SHIFT), MASK) by DAGCombines that don't know the shl can be done in
1668 // the addressing mode. This results in code such as:
1669 //
1670 // int f(short *y, int *lookup_table) {
1671 // ...
1672 // return *y + lookup_table[*y >> 11];
1673 // }
1674 //
1675 // Turning into:
1676 // movzwl (%rdi), %eax
1677 // movl %eax, %ecx
1678 // shrl $11, %ecx
1679 // addl (%rsi,%rcx,4), %eax
1680 //
1681 // Instead of:
1682 // movzwl (%rdi), %eax
1683 // movl %eax, %ecx
1684 // shrl $9, %ecx
1685 // andl $124, %rcx
1686 // addl (%rsi,%rcx), %eax
1687 //
1688 // Note that this function assumes the mask is provided as a mask *after* the
1689 // value is shifted. The input chain may or may not match that, but computing
1690 // such a mask is trivial.
1692  uint64_t Mask,
1693  SDValue Shift, SDValue X,
1694  X86ISelAddressMode &AM) {
1695  if (Shift.getOpcode() != ISD::SRL || !Shift.hasOneUse() ||
1696  !isa<ConstantSDNode>(Shift.getOperand(1)))
1697  return true;
1698 
1699  unsigned ShiftAmt = Shift.getConstantOperandVal(1);
1700  unsigned MaskLZ = countLeadingZeros(Mask);
1701  unsigned MaskTZ = countTrailingZeros(Mask);
1702 
1703  // The amount of shift we're trying to fit into the addressing mode is taken
1704  // from the trailing zeros of the mask.
1705  unsigned AMShiftAmt = MaskTZ;
1706 
1707  // There is nothing we can do here unless the mask is removing some bits.
1708  // Also, the addressing mode can only represent shifts of 1, 2, or 3 bits.
1709  if (AMShiftAmt <= 0 || AMShiftAmt > 3) return true;
1710 
1711  // We also need to ensure that mask is a continuous run of bits.
1712  if (countTrailingOnes(Mask >> MaskTZ) + MaskTZ + MaskLZ != 64) return true;
1713 
1714  // Scale the leading zero count down based on the actual size of the value.
1715  // Also scale it down based on the size of the shift.
1716  unsigned ScaleDown = (64 - X.getSimpleValueType().getSizeInBits()) + ShiftAmt;
1717  if (MaskLZ < ScaleDown)
1718  return true;
1719  MaskLZ -= ScaleDown;
1720 
1721  // The final check is to ensure that any masked out high bits of X are
1722  // already known to be zero. Otherwise, the mask has a semantic impact
1723  // other than masking out a couple of low bits. Unfortunately, because of
1724  // the mask, zero extensions will be removed from operands in some cases.
1725  // This code works extra hard to look through extensions because we can
1726  // replace them with zero extensions cheaply if necessary.
1727  bool ReplacingAnyExtend = false;
1728  if (X.getOpcode() == ISD::ANY_EXTEND) {
1729  unsigned ExtendBits = X.getSimpleValueType().getSizeInBits() -
1731  // Assume that we'll replace the any-extend with a zero-extend, and
1732  // narrow the search to the extended value.
1733  X = X.getOperand(0);
1734  MaskLZ = ExtendBits > MaskLZ ? 0 : MaskLZ - ExtendBits;
1735  ReplacingAnyExtend = true;
1736  }
1737  APInt MaskedHighBits =
1739  KnownBits Known = DAG.computeKnownBits(X);
1740  if (MaskedHighBits != Known.Zero) return true;
1741 
1742  // We've identified a pattern that can be transformed into a single shift
1743  // and an addressing mode. Make it so.
1744  MVT VT = N.getSimpleValueType();
1745  if (ReplacingAnyExtend) {
1746  assert(X.getValueType() != VT);
1747  // We looked through an ANY_EXTEND node, insert a ZERO_EXTEND.
1748  SDValue NewX = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(X), VT, X);
1749  insertDAGNode(DAG, N, NewX);
1750  X = NewX;
1751  }
1752  SDLoc DL(N);
1753  SDValue NewSRLAmt = DAG.getConstant(ShiftAmt + AMShiftAmt, DL, MVT::i8);
1754  SDValue NewSRL = DAG.getNode(ISD::SRL, DL, VT, X, NewSRLAmt);
1755  SDValue NewSHLAmt = DAG.getConstant(AMShiftAmt, DL, MVT::i8);
1756  SDValue NewSHL = DAG.getNode(ISD::SHL, DL, VT, NewSRL, NewSHLAmt);
1757 
1758  // Insert the new nodes into the topological ordering. We must do this in
1759  // a valid topological ordering as nothing is going to go back and re-sort
1760  // these nodes. We continually insert before 'N' in sequence as this is
1761  // essentially a pre-flattened and pre-sorted sequence of nodes. There is no
1762  // hierarchy left to express.
1763  insertDAGNode(DAG, N, NewSRLAmt);
1764  insertDAGNode(DAG, N, NewSRL);
1765  insertDAGNode(DAG, N, NewSHLAmt);
1766  insertDAGNode(DAG, N, NewSHL);
1767  DAG.ReplaceAllUsesWith(N, NewSHL);
1768  DAG.RemoveDeadNode(N.getNode());
1769 
1770  AM.Scale = 1 << AMShiftAmt;
1771  AM.IndexReg = NewSRL;
1772  return false;
1773 }
1774 
1775 // Transform "(X >> SHIFT) & (MASK << C1)" to
1776 // "((X >> (SHIFT + C1)) & (MASK)) << C1". Everything before the SHL will be
1777 // matched to a BEXTR later. Returns false if the simplification is performed.
1779  uint64_t Mask,
1780  SDValue Shift, SDValue X,
1781  X86ISelAddressMode &AM,
1782  const X86Subtarget &Subtarget) {
1783  if (Shift.getOpcode() != ISD::SRL ||
1784  !isa<ConstantSDNode>(Shift.getOperand(1)) ||
1785  !Shift.hasOneUse() || !N.hasOneUse())
1786  return true;
1787 
1788  // Only do this if BEXTR will be matched by matchBEXTRFromAndImm.
1789  if (!Subtarget.hasTBM() &&
1790  !(Subtarget.hasBMI() && Subtarget.hasFastBEXTR()))
1791  return true;
1792 
1793  // We need to ensure that mask is a continuous run of bits.
1794  if (!isShiftedMask_64(Mask)) return true;
1795 
1796  unsigned ShiftAmt = Shift.getConstantOperandVal(1);
1797 
1798  // The amount of shift we're trying to fit into the addressing mode is taken
1799  // from the trailing zeros of the mask.
1800  unsigned AMShiftAmt = countTrailingZeros(Mask);
1801 
1802  // There is nothing we can do here unless the mask is removing some bits.
1803  // Also, the addressing mode can only represent shifts of 1, 2, or 3 bits.
1804  if (AMShiftAmt <= 0 || AMShiftAmt > 3) return true;
1805 
1806  MVT VT = N.getSimpleValueType();
1807  SDLoc DL(N);
1808  SDValue NewSRLAmt = DAG.getConstant(ShiftAmt + AMShiftAmt, DL, MVT::i8);
1809  SDValue NewSRL = DAG.getNode(ISD::SRL, DL, VT, X, NewSRLAmt);
1810  SDValue NewMask = DAG.getConstant(Mask >> AMShiftAmt, DL, VT);
1811  SDValue NewAnd = DAG.getNode(ISD::AND, DL, VT, NewSRL, NewMask);
1812  SDValue NewSHLAmt = DAG.getConstant(AMShiftAmt, DL, MVT::i8);
1813  SDValue NewSHL = DAG.getNode(ISD::SHL, DL, VT, NewAnd, NewSHLAmt);
1814 
1815  // Insert the new nodes into the topological ordering. We must do this in
1816  // a valid topological ordering as nothing is going to go back and re-sort
1817  // these nodes. We continually insert before 'N' in sequence as this is
1818  // essentially a pre-flattened and pre-sorted sequence of nodes. There is no
1819  // hierarchy left to express.
1820  insertDAGNode(DAG, N, NewSRLAmt);
1821  insertDAGNode(DAG, N, NewSRL);
1822  insertDAGNode(DAG, N, NewMask);
1823  insertDAGNode(DAG, N, NewAnd);
1824  insertDAGNode(DAG, N, NewSHLAmt);
1825  insertDAGNode(DAG, N, NewSHL);
1826  DAG.ReplaceAllUsesWith(N, NewSHL);
1827  DAG.RemoveDeadNode(N.getNode());
1828 
1829  AM.Scale = 1 << AMShiftAmt;
1830  AM.IndexReg = NewAnd;
1831  return false;
1832 }
1833 
1834 bool X86DAGToDAGISel::matchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
1835  unsigned Depth) {
1836  SDLoc dl(N);
1837  LLVM_DEBUG({
1838  dbgs() << "MatchAddress: ";
1839  AM.dump(CurDAG);
1840  });
1841  // Limit recursion.
1842  if (Depth > 5)
1843  return matchAddressBase(N, AM);
1844 
1845  // If this is already a %rip relative address, we can only merge immediates
1846  // into it. Instead of handling this in every case, we handle it here.
1847  // RIP relative addressing: %rip + 32-bit displacement!
1848  if (AM.isRIPRelative()) {
1849  // FIXME: JumpTable and ExternalSymbol address currently don't like
1850  // displacements. It isn't very important, but this should be fixed for
1851  // consistency.
1852  if (!(AM.ES || AM.MCSym) && AM.JT != -1)
1853  return true;
1854 
1855  if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(N))
1856  if (!foldOffsetIntoAddress(Cst->getSExtValue(), AM))
1857  return false;
1858  return true;
1859  }
1860 
1861  switch (N.getOpcode()) {
1862  default: break;
1863  case ISD::LOCAL_RECOVER: {
1864  if (!AM.hasSymbolicDisplacement() && AM.Disp == 0)
1865  if (const auto *ESNode = dyn_cast<MCSymbolSDNode>(N.getOperand(0))) {
1866  // Use the symbol and don't prefix it.
1867  AM.MCSym = ESNode->getMCSymbol();
1868  return false;
1869  }
1870  break;
1871  }
1872  case ISD::Constant: {
1873  uint64_t Val = cast<ConstantSDNode>(N)->getSExtValue();
1874  if (!foldOffsetIntoAddress(Val, AM))
1875  return false;
1876  break;
1877  }
1878 
1879  case X86ISD::Wrapper:
1880  case X86ISD::WrapperRIP:
1881  if (!matchWrapper(N, AM))
1882  return false;
1883  break;
1884 
1885  case ISD::LOAD:
1886  if (!matchLoadInAddress(cast<LoadSDNode>(N), AM))
1887  return false;
1888  break;
1889 
1890  case ISD::FrameIndex:
1891  if (AM.BaseType == X86ISelAddressMode::RegBase &&
1892  AM.Base_Reg.getNode() == nullptr &&
1893  (!Subtarget->is64Bit() || isDispSafeForFrameIndex(AM.Disp))) {
1894  AM.BaseType = X86ISelAddressMode::FrameIndexBase;
1895  AM.Base_FrameIndex = cast<FrameIndexSDNode>(N)->getIndex();
1896  return false;
1897  }
1898  break;
1899 
1900  case ISD::SHL:
1901  if (AM.IndexReg.getNode() != nullptr || AM.Scale != 1)
1902  break;
1903 
1904  if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1905  unsigned Val = CN->getZExtValue();
1906  // Note that we handle x<<1 as (,x,2) rather than (x,x) here so
1907  // that the base operand remains free for further matching. If
1908  // the base doesn't end up getting used, a post-processing step
1909  // in MatchAddress turns (,x,2) into (x,x), which is cheaper.
1910  if (Val == 1 || Val == 2 || Val == 3) {
1911  AM.Scale = 1 << Val;
1912  SDValue ShVal = N.getOperand(0);
1913 
1914  // Okay, we know that we have a scale by now. However, if the scaled
1915  // value is an add of something and a constant, we can fold the
1916  // constant into the disp field here.
1917  if (CurDAG->isBaseWithConstantOffset(ShVal)) {
1918  AM.IndexReg = ShVal.getOperand(0);
1919  ConstantSDNode *AddVal = cast<ConstantSDNode>(ShVal.getOperand(1));
1920  uint64_t Disp = (uint64_t)AddVal->getSExtValue() << Val;
1921  if (!foldOffsetIntoAddress(Disp, AM))
1922  return false;
1923  }
1924 
1925  AM.IndexReg = ShVal;
1926  return false;
1927  }
1928  }
1929  break;
1930 
1931  case ISD::SRL: {
1932  // Scale must not be used already.
1933  if (AM.IndexReg.getNode() != nullptr || AM.Scale != 1) break;
1934 
1935  // We only handle up to 64-bit values here as those are what matter for
1936  // addressing mode optimizations.
1937  assert(N.getSimpleValueType().getSizeInBits() <= 64 &&
1938  "Unexpected value size!");
1939 
1940  SDValue And = N.getOperand(0);
1941  if (And.getOpcode() != ISD::AND) break;
1942  SDValue X = And.getOperand(0);
1943 
1944  // The mask used for the transform is expected to be post-shift, but we
1945  // found the shift first so just apply the shift to the mask before passing
1946  // it down.
1947  if (!isa<ConstantSDNode>(N.getOperand(1)) ||
1948  !isa<ConstantSDNode>(And.getOperand(1)))
1949  break;
1950  uint64_t Mask = And.getConstantOperandVal(1) >> N.getConstantOperandVal(1);
1951 
1952  // Try to fold the mask and shift into the scale, and return false if we
1953  // succeed.
1954  if (!foldMaskAndShiftToScale(*CurDAG, N, Mask, N, X, AM))
1955  return false;
1956  break;
1957  }
1958 
1959  case ISD::SMUL_LOHI:
1960  case ISD::UMUL_LOHI:
1961  // A mul_lohi where we need the low part can be folded as a plain multiply.
1962  if (N.getResNo() != 0) break;
1964  case ISD::MUL:
1965  case X86ISD::MUL_IMM:
1966  // X*[3,5,9] -> X+X*[2,4,8]
1967  if (AM.BaseType == X86ISelAddressMode::RegBase &&
1968  AM.Base_Reg.getNode() == nullptr &&
1969  AM.IndexReg.getNode() == nullptr) {
1970  if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N.getOperand(1)))
1971  if (CN->getZExtValue() == 3 || CN->getZExtValue() == 5 ||
1972  CN->getZExtValue() == 9) {
1973  AM.Scale = unsigned(CN->getZExtValue())-1;
1974 
1975  SDValue MulVal = N.getOperand(0);
1976  SDValue Reg;
1977 
1978  // Okay, we know that we have a scale by now. However, if the scaled
1979  // value is an add of something and a constant, we can fold the
1980  // constant into the disp field here.
1981  if (MulVal.getNode()->getOpcode() == ISD::ADD && MulVal.hasOneUse() &&
1982  isa<ConstantSDNode>(MulVal.getOperand(1))) {
1983  Reg = MulVal.getOperand(0);
1984  ConstantSDNode *AddVal =
1985  cast<ConstantSDNode>(MulVal.getOperand(1));
1986  uint64_t Disp = AddVal->getSExtValue() * CN->getZExtValue();
1987  if (foldOffsetIntoAddress(Disp, AM))
1988  Reg = N.getOperand(0);
1989  } else {
1990  Reg = N.getOperand(0);
1991  }
1992 
1993  AM.IndexReg = AM.Base_Reg = Reg;
1994  return false;
1995  }
1996  }
1997  break;
1998 
1999  case ISD::SUB: {
2000  // Given A-B, if A can be completely folded into the address and
2001  // the index field with the index field unused, use -B as the index.
2002  // This is a win if a has multiple parts that can be folded into
2003  // the address. Also, this saves a mov if the base register has
2004  // other uses, since it avoids a two-address sub instruction, however
2005  // it costs an additional mov if the index register has other uses.
2006 
2007  // Add an artificial use to this node so that we can keep track of
2008  // it if it gets CSE'd with a different node.
2009  HandleSDNode Handle(N);
2010 
2011  // Test if the LHS of the sub can be folded.
2012  X86ISelAddressMode Backup = AM;
2013  if (matchAddressRecursively(N.getOperand(0), AM, Depth+1)) {
2014  N = Handle.getValue();
2015  AM = Backup;
2016  break;
2017  }
2018  N = Handle.getValue();
2019  // Test if the index field is free for use.
2020  if (AM.IndexReg.getNode() || AM.isRIPRelative()) {
2021  AM = Backup;
2022  break;
2023  }
2024 
2025  int Cost = 0;
2026  SDValue RHS = N.getOperand(1);
2027  // If the RHS involves a register with multiple uses, this
2028  // transformation incurs an extra mov, due to the neg instruction
2029  // clobbering its operand.
2030  if (!RHS.getNode()->hasOneUse() ||
2031  RHS.getNode()->getOpcode() == ISD::CopyFromReg ||
2032  RHS.getNode()->getOpcode() == ISD::TRUNCATE ||
2033  RHS.getNode()->getOpcode() == ISD::ANY_EXTEND ||
2034  (RHS.getNode()->getOpcode() == ISD::ZERO_EXTEND &&
2035  RHS.getOperand(0).getValueType() == MVT::i32))
2036  ++Cost;
2037  // If the base is a register with multiple uses, this
2038  // transformation may save a mov.
2039  if ((AM.BaseType == X86ISelAddressMode::RegBase && AM.Base_Reg.getNode() &&
2040  !AM.Base_Reg.getNode()->hasOneUse()) ||
2041  AM.BaseType == X86ISelAddressMode::FrameIndexBase)
2042  --Cost;
2043  // If the folded LHS was interesting, this transformation saves
2044  // address arithmetic.
2045  if ((AM.hasSymbolicDisplacement() && !Backup.hasSymbolicDisplacement()) +
2046  ((AM.Disp != 0) && (Backup.Disp == 0)) +
2047  (AM.Segment.getNode() && !Backup.Segment.getNode()) >= 2)
2048  --Cost;
2049  // If it doesn't look like it may be an overall win, don't do it.
2050  if (Cost >= 0) {
2051  AM = Backup;
2052  break;
2053  }
2054 
2055  // Ok, the transformation is legal and appears profitable. Go for it.
2056  // Negation will be emitted later to avoid creating dangling nodes if this
2057  // was an unprofitable LEA.
2058  AM.IndexReg = RHS;
2059  AM.NegateIndex = true;
2060  AM.Scale = 1;
2061  return false;
2062  }
2063 
2064  case ISD::ADD:
2065  if (!matchAdd(N, AM, Depth))
2066  return false;
2067  break;
2068 
2069  case ISD::OR:
2070  // We want to look through a transform in InstCombine and DAGCombiner that
2071  // turns 'add' into 'or', so we can treat this 'or' exactly like an 'add'.
2072  // Example: (or (and x, 1), (shl y, 3)) --> (add (and x, 1), (shl y, 3))
2073  // An 'lea' can then be used to match the shift (multiply) and add:
2074  // and $1, %esi
2075  // lea (%rsi, %rdi, 8), %rax
2076  if (CurDAG->haveNoCommonBitsSet(N.getOperand(0), N.getOperand(1)) &&
2077  !matchAdd(N, AM, Depth))
2078  return false;
2079  break;
2080 
2081  case ISD::AND: {
2082  // Perform some heroic transforms on an and of a constant-count shift
2083  // with a constant to enable use of the scaled offset field.
2084 
2085  // Scale must not be used already.
2086  if (AM.IndexReg.getNode() != nullptr || AM.Scale != 1) break;
2087 
2088  // We only handle up to 64-bit values here as those are what matter for
2089  // addressing mode optimizations.
2090  assert(N.getSimpleValueType().getSizeInBits() <= 64 &&
2091  "Unexpected value size!");
2092 
2093  if (!isa<ConstantSDNode>(N.getOperand(1)))
2094  break;
2095 
2096  if (N.getOperand(0).getOpcode() == ISD::SRL) {
2097  SDValue Shift = N.getOperand(0);
2098  SDValue X = Shift.getOperand(0);
2099 
2100  uint64_t Mask = N.getConstantOperandVal(1);
2101 
2102  // Try to fold the mask and shift into an extract and scale.
2103  if (!foldMaskAndShiftToExtract(*CurDAG, N, Mask, Shift, X, AM))
2104  return false;
2105 
2106  // Try to fold the mask and shift directly into the scale.
2107  if (!foldMaskAndShiftToScale(*CurDAG, N, Mask, Shift, X, AM))
2108  return false;
2109 
2110  // Try to fold the mask and shift into BEXTR and scale.
2111  if (!foldMaskedShiftToBEXTR(*CurDAG, N, Mask, Shift, X, AM, *Subtarget))
2112  return false;
2113  }
2114 
2115  // Try to swap the mask and shift to place shifts which can be done as
2116  // a scale on the outside of the mask.
2117  if (!foldMaskedShiftToScaledMask(*CurDAG, N, AM))
2118  return false;
2119 
2120  break;
2121  }
2122  case ISD::ZERO_EXTEND: {
2123  // Try to widen a zexted shift left to the same size as its use, so we can
2124  // match the shift as a scale factor.
2125  if (AM.IndexReg.getNode() != nullptr || AM.Scale != 1)
2126  break;
2127  if (N.getOperand(0).getOpcode() != ISD::SHL || !N.getOperand(0).hasOneUse())
2128  break;
2129 
2130  // Give up if the shift is not a valid scale factor [1,2,3].
2131  SDValue Shl = N.getOperand(0);
2132  auto *ShAmtC = dyn_cast<ConstantSDNode>(Shl.getOperand(1));
2133  if (!ShAmtC || ShAmtC->getZExtValue() > 3)
2134  break;
2135 
2136  // The narrow shift must only shift out zero bits (it must be 'nuw').
2137  // That makes it safe to widen to the destination type.
2139  ShAmtC->getZExtValue());
2140  if (!CurDAG->MaskedValueIsZero(Shl.getOperand(0), HighZeros))
2141  break;
2142 
2143  // zext (shl nuw i8 %x, C) to i32 --> shl (zext i8 %x to i32), (zext C)
2144  MVT VT = N.getSimpleValueType();
2145  SDLoc DL(N);
2146  SDValue Zext = CurDAG->getNode(ISD::ZERO_EXTEND, DL, VT, Shl.getOperand(0));
2147  SDValue NewShl = CurDAG->getNode(ISD::SHL, DL, VT, Zext, Shl.getOperand(1));
2148 
2149  // Convert the shift to scale factor.
2150  AM.Scale = 1 << ShAmtC->getZExtValue();
2151  AM.IndexReg = Zext;
2152 
2153  insertDAGNode(*CurDAG, N, Zext);
2154  insertDAGNode(*CurDAG, N, NewShl);
2155  CurDAG->ReplaceAllUsesWith(N, NewShl);
2156  CurDAG->RemoveDeadNode(N.getNode());
2157  return false;
2158  }
2159  }
2160 
2161  return matchAddressBase(N, AM);
2162 }
2163 
2164 /// Helper for MatchAddress. Add the specified node to the
2165 /// specified addressing mode without any further recursion.
2166 bool X86DAGToDAGISel::matchAddressBase(SDValue N, X86ISelAddressMode &AM) {
2167  // Is the base register already occupied?
2168  if (AM.BaseType != X86ISelAddressMode::RegBase || AM.Base_Reg.getNode()) {
2169  // If so, check to see if the scale index register is set.
2170  if (!AM.IndexReg.getNode()) {
2171  AM.IndexReg = N;
2172  AM.Scale = 1;
2173  return false;
2174  }
2175 
2176  // Otherwise, we cannot select it.
2177  return true;
2178  }
2179 
2180  // Default, generate it as a register.
2181  AM.BaseType = X86ISelAddressMode::RegBase;
2182  AM.Base_Reg = N;
2183  return false;
2184 }
2185 
2186 /// Helper for selectVectorAddr. Handles things that can be folded into a
2187 /// gather scatter address. The index register and scale should have already
2188 /// been handled.
2189 bool X86DAGToDAGISel::matchVectorAddress(SDValue N, X86ISelAddressMode &AM) {
2190  // TODO: Support other operations.
2191  switch (N.getOpcode()) {
2192  case ISD::Constant: {
2193  uint64_t Val = cast<ConstantSDNode>(N)->getSExtValue();
2194  if (!foldOffsetIntoAddress(Val, AM))
2195  return false;
2196  break;
2197  }
2198  case X86ISD::Wrapper:
2199  if (!matchWrapper(N, AM))
2200  return false;
2201  break;
2202  }
2203 
2204  return matchAddressBase(N, AM);
2205 }
2206 
2207 bool X86DAGToDAGISel::selectVectorAddr(SDNode *Parent, SDValue N, SDValue &Base,
2208  SDValue &Scale, SDValue &Index,
2209  SDValue &Disp, SDValue &Segment) {
2210  X86ISelAddressMode AM;
2211  auto *Mgs = cast<X86MaskedGatherScatterSDNode>(Parent);
2212  AM.IndexReg = Mgs->getIndex();
2213  AM.Scale = cast<ConstantSDNode>(Mgs->getScale())->getZExtValue();
2214 
2215  unsigned AddrSpace = cast<MemSDNode>(Parent)->getPointerInfo().getAddrSpace();
2216  // AddrSpace 256 -> GS, 257 -> FS, 258 -> SS.
2217  if (AddrSpace == 256)
2218  AM.Segment = CurDAG->getRegister(X86::GS, MVT::i16);
2219  if (AddrSpace == 257)
2220  AM.Segment = CurDAG->getRegister(X86::FS, MVT::i16);
2221  if (AddrSpace == 258)
2222  AM.Segment = CurDAG->getRegister(X86::SS, MVT::i16);
2223 
2224  SDLoc DL(N);
2225  MVT VT = N.getSimpleValueType();
2226 
2227  // Try to match into the base and displacement fields.
2228  if (matchVectorAddress(N, AM))
2229  return false;
2230 
2231  getAddressOperands(AM, DL, VT, Base, Scale, Index, Disp, Segment);
2232  return true;
2233 }
2234 
2235 /// Returns true if it is able to pattern match an addressing mode.
2236 /// It returns the operands which make up the maximal addressing mode it can
2237 /// match by reference.
2238 ///
2239 /// Parent is the parent node of the addr operand that is being matched. It
2240 /// is always a load, store, atomic node, or null. It is only null when
2241 /// checking memory operands for inline asm nodes.
2242 bool X86DAGToDAGISel::selectAddr(SDNode *Parent, SDValue N, SDValue &Base,
2243  SDValue &Scale, SDValue &Index,
2244  SDValue &Disp, SDValue &Segment) {
2245  X86ISelAddressMode AM;
2246 
2247  if (Parent &&
2248  // This list of opcodes are all the nodes that have an "addr:$ptr" operand
2249  // that are not a MemSDNode, and thus don't have proper addrspace info.
2250  Parent->getOpcode() != ISD::INTRINSIC_W_CHAIN && // unaligned loads, fixme
2251  Parent->getOpcode() != ISD::INTRINSIC_VOID && // nontemporal stores
2252  Parent->getOpcode() != X86ISD::TLSCALL && // Fixme
2253  Parent->getOpcode() != X86ISD::ENQCMD && // Fixme
2254  Parent->getOpcode() != X86ISD::ENQCMDS && // Fixme
2255  Parent->getOpcode() != X86ISD::EH_SJLJ_SETJMP && // setjmp
2256  Parent->getOpcode() != X86ISD::EH_SJLJ_LONGJMP) { // longjmp
2257  unsigned AddrSpace =
2258  cast<MemSDNode>(Parent)->getPointerInfo().getAddrSpace();
2259  // AddrSpace 256 -> GS, 257 -> FS, 258 -> SS.
2260  if (AddrSpace == 256)
2261  AM.Segment = CurDAG->getRegister(X86::GS, MVT::i16);
2262  if (AddrSpace == 257)
2263  AM.Segment = CurDAG->getRegister(X86::FS, MVT::i16);
2264  if (AddrSpace == 258)
2265  AM.Segment = CurDAG->getRegister(X86::SS, MVT::i16);
2266  }
2267 
2268  // Save the DL and VT before calling matchAddress, it can invalidate N.
2269  SDLoc DL(N);
2270  MVT VT = N.getSimpleValueType();
2271 
2272  if (matchAddress(N, AM))
2273  return false;
2274 
2275  getAddressOperands(AM, DL, VT, Base, Scale, Index, Disp, Segment);
2276  return true;
2277 }
2278 
2279 // We can only fold a load if all nodes between it and the root node have a
2280 // single use. If there are additional uses, we could end up duplicating the
2281 // load.
2282 static bool hasSingleUsesFromRoot(SDNode *Root, SDNode *User) {
2283  while (User != Root) {
2284  if (!User->hasOneUse())
2285  return false;
2286  User = *User->use_begin();
2287  }
2288 
2289  return true;
2290 }
2291 
2292 /// Match a scalar SSE load. In particular, we want to match a load whose top
2293 /// elements are either undef or zeros. The load flavor is derived from the
2294 /// type of N, which is either v4f32 or v2f64.
2295 ///
2296 /// We also return:
2297 /// PatternChainNode: this is the matched node that has a chain input and
2298 /// output.
2299 bool X86DAGToDAGISel::selectScalarSSELoad(SDNode *Root, SDNode *Parent,
2300  SDValue N, SDValue &Base,
2301  SDValue &Scale, SDValue &Index,
2302  SDValue &Disp, SDValue &Segment,
2303  SDValue &PatternNodeWithChain) {
2304  if (!hasSingleUsesFromRoot(Root, Parent))
2305  return false;
2306 
2307  // We can allow a full vector load here since narrowing a load is ok unless
2308  // it's volatile.
2309  if (ISD::isNON_EXTLoad(N.getNode())) {
2310  LoadSDNode *LD = cast<LoadSDNode>(N);
2311  if (!LD->isVolatile() &&
2312  IsProfitableToFold(N, LD, Root) &&
2313  IsLegalToFold(N, Parent, Root, OptLevel)) {
2314  PatternNodeWithChain = N;
2315  return selectAddr(LD, LD->getBasePtr(), Base, Scale, Index, Disp,
2316  Segment);
2317  }
2318  }
2319 
2320  // We can also match the special zero extended load opcode.
2321  if (N.getOpcode() == X86ISD::VZEXT_LOAD) {
2322  PatternNodeWithChain = N;
2323  if (IsProfitableToFold(PatternNodeWithChain, N.getNode(), Root) &&
2324  IsLegalToFold(PatternNodeWithChain, Parent, Root, OptLevel)) {
2325  auto *MI = cast<MemIntrinsicSDNode>(PatternNodeWithChain);
2326  return selectAddr(MI, MI->getBasePtr(), Base, Scale, Index, Disp,
2327  Segment);
2328  }
2329  }
2330 
2331  // Need to make sure that the SCALAR_TO_VECTOR and load are both only used
2332  // once. Otherwise the load might get duplicated and the chain output of the
2333  // duplicate load will not be observed by all dependencies.
2334  if (N.getOpcode() == ISD::SCALAR_TO_VECTOR && N.getNode()->hasOneUse()) {
2335  PatternNodeWithChain = N.getOperand(0);
2336  if (ISD::isNON_EXTLoad(PatternNodeWithChain.getNode()) &&
2337  IsProfitableToFold(PatternNodeWithChain, N.getNode(), Root) &&
2338  IsLegalToFold(PatternNodeWithChain, N.getNode(), Root, OptLevel)) {
2339  LoadSDNode *LD = cast<LoadSDNode>(PatternNodeWithChain);
2340  return selectAddr(LD, LD->getBasePtr(), Base, Scale, Index, Disp,
2341  Segment);
2342  }
2343  }
2344 
2345  return false;
2346 }
2347 
2348 
2349 bool X86DAGToDAGISel::selectMOV64Imm32(SDValue N, SDValue &Imm) {
2350  if (const ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) {
2351  uint64_t ImmVal = CN->getZExtValue();
2352  if (!isUInt<32>(ImmVal))
2353  return false;
2354 
2355  Imm = CurDAG->getTargetConstant(ImmVal, SDLoc(N), MVT::i64);
2356  return true;
2357  }
2358 
2359  // In static codegen with small code model, we can get the address of a label
2360  // into a register with 'movl'
2361  if (N->getOpcode() != X86ISD::Wrapper)
2362  return false;
2363 
2364  N = N.getOperand(0);
2365 
2366  // At least GNU as does not accept 'movl' for TPOFF relocations.
2367  // FIXME: We could use 'movl' when we know we are targeting MC.
2369  return false;
2370 
2371  Imm = N;
2372  if (N->getOpcode() != ISD::TargetGlobalAddress)
2373  return TM.getCodeModel() == CodeModel::Small;
2374 
2376  cast<GlobalAddressSDNode>(N)->getGlobal()->getAbsoluteSymbolRange();
2377  if (!CR)
2378  return TM.getCodeModel() == CodeModel::Small;
2379 
2380  return CR->getUnsignedMax().ult(1ull << 32);
2381 }
2382 
2383 bool X86DAGToDAGISel::selectLEA64_32Addr(SDValue N, SDValue &Base,
2384  SDValue &Scale, SDValue &Index,
2385  SDValue &Disp, SDValue &Segment) {
2386  // Save the debug loc before calling selectLEAAddr, in case it invalidates N.
2387  SDLoc DL(N);
2388 
2389  if (!selectLEAAddr(N, Base, Scale, Index, Disp, Segment))
2390  return false;
2391 
2393  if (RN && RN->getReg() == 0)
2394  Base = CurDAG->getRegister(0, MVT::i64);
2395  else if (Base.getValueType() == MVT::i32 && !isa<FrameIndexSDNode>(Base)) {
2396  // Base could already be %rip, particularly in the x32 ABI.
2397  SDValue ImplDef = SDValue(CurDAG->getMachineNode(X86::IMPLICIT_DEF, DL,
2398  MVT::i64), 0);
2399  Base = CurDAG->getTargetInsertSubreg(X86::sub_32bit, DL, MVT::i64, ImplDef,
2400  Base);
2401  }
2402 
2403  RN = dyn_cast<RegisterSDNode>(Index);
2404  if (RN && RN->getReg() == 0)
2405  Index = CurDAG->getRegister(0, MVT::i64);
2406  else {
2407  assert(Index.getValueType() == MVT::i32 &&
2408  "Expect to be extending 32-bit registers for use in LEA");
2409  SDValue ImplDef = SDValue(CurDAG->getMachineNode(X86::IMPLICIT_DEF, DL,
2410  MVT::i64), 0);
2411  Index = CurDAG->getTargetInsertSubreg(X86::sub_32bit, DL, MVT::i64, ImplDef,
2412  Index);
2413  }
2414 
2415  return true;
2416 }
2417 
2418 /// Calls SelectAddr and determines if the maximal addressing
2419 /// mode it matches can be cost effectively emitted as an LEA instruction.
2420 bool X86DAGToDAGISel::selectLEAAddr(SDValue N,
2421  SDValue &Base, SDValue &Scale,
2422  SDValue &Index, SDValue &Disp,
2423  SDValue &Segment) {
2424  X86ISelAddressMode AM;
2425 
2426  // Save the DL and VT before calling matchAddress, it can invalidate N.
2427  SDLoc DL(N);
2428  MVT VT = N.getSimpleValueType();
2429 
2430  // Set AM.Segment to prevent MatchAddress from using one. LEA doesn't support
2431  // segments.
2432  SDValue Copy = AM.Segment;
2433  SDValue T = CurDAG->getRegister(0, MVT::i32);
2434  AM.Segment = T;
2435  if (matchAddress(N, AM))
2436  return false;
2437  assert (T == AM.Segment);
2438  AM.Segment = Copy;
2439 
2440  unsigned Complexity = 0;
2441  if (AM.BaseType == X86ISelAddressMode::RegBase && AM.Base_Reg.getNode())
2442  Complexity = 1;
2443  else if (AM.BaseType == X86ISelAddressMode::FrameIndexBase)
2444  Complexity = 4;
2445 
2446  if (AM.IndexReg.getNode())
2447  Complexity++;
2448 
2449  // Don't match just leal(,%reg,2). It's cheaper to do addl %reg, %reg, or with
2450  // a simple shift.
2451  if (AM.Scale > 1)
2452  Complexity++;
2453 
2454  // FIXME: We are artificially lowering the criteria to turn ADD %reg, $GA
2455  // to a LEA. This is determined with some experimentation but is by no means
2456  // optimal (especially for code size consideration). LEA is nice because of
2457  // its three-address nature. Tweak the cost function again when we can run
2458  // convertToThreeAddress() at register allocation time.
2459  if (AM.hasSymbolicDisplacement()) {
2460  // For X86-64, always use LEA to materialize RIP-relative addresses.
2461  if (Subtarget->is64Bit())
2462  Complexity = 4;
2463  else
2464  Complexity += 2;
2465  }
2466 
2467  if (AM.Disp)
2468  Complexity++;
2469 
2470  // If it isn't worth using an LEA, reject it.
2471  if (Complexity <= 2)
2472  return false;
2473 
2474  getAddressOperands(AM, DL, VT, Base, Scale, Index, Disp, Segment);
2475  return true;
2476 }
2477 
2478 /// This is only run on TargetGlobalTLSAddress nodes.
2479 bool X86DAGToDAGISel::selectTLSADDRAddr(SDValue N, SDValue &Base,
2480  SDValue &Scale, SDValue &Index,
2481  SDValue &Disp, SDValue &Segment) {
2483  const GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(N);
2484 
2485  X86ISelAddressMode AM;
2486  AM.GV = GA->getGlobal();
2487  AM.Disp += GA->getOffset();
2488  AM.SymbolFlags = GA->getTargetFlags();
2489 
2490  MVT VT = N.getSimpleValueType();
2491  if (VT == MVT::i32) {
2492  AM.Scale = 1;
2493  AM.IndexReg = CurDAG->getRegister(X86::EBX, MVT::i32);
2494  }
2495 
2496  getAddressOperands(AM, SDLoc(N), VT, Base, Scale, Index, Disp, Segment);
2497  return true;
2498 }
2499 
2500 bool X86DAGToDAGISel::selectRelocImm(SDValue N, SDValue &Op) {
2501  if (auto *CN = dyn_cast<ConstantSDNode>(N)) {
2502  Op = CurDAG->getTargetConstant(CN->getAPIntValue(), SDLoc(CN),
2503  N.getValueType());
2504  return true;
2505  }
2506 
2507  // Keep track of the original value type and whether this value was
2508  // truncated. If we see a truncation from pointer type to VT that truncates
2509  // bits that are known to be zero, we can use a narrow reference.
2510  EVT VT = N.getValueType();
2511  bool WasTruncated = false;
2512  if (N.getOpcode() == ISD::TRUNCATE) {
2513  WasTruncated = true;
2514  N = N.getOperand(0);
2515  }
2516 
2517  if (N.getOpcode() != X86ISD::Wrapper)
2518  return false;
2519 
2520  // We can only use non-GlobalValues as immediates if they were not truncated,
2521  // as we do not have any range information. If we have a GlobalValue and the
2522  // address was not truncated, we can select it as an operand directly.
2523  unsigned Opc = N.getOperand(0)->getOpcode();
2524  if (Opc != ISD::TargetGlobalAddress || !WasTruncated) {
2525  Op = N.getOperand(0);
2526  // We can only select the operand directly if we didn't have to look past a
2527  // truncate.
2528  return !WasTruncated;
2529  }
2530 
2531  // Check that the global's range fits into VT.
2532  auto *GA = cast<GlobalAddressSDNode>(N.getOperand(0));
2533  Optional<ConstantRange> CR = GA->getGlobal()->getAbsoluteSymbolRange();
2534  if (!CR || CR->getUnsignedMax().uge(1ull << VT.getSizeInBits()))
2535  return false;
2536 
2537  // Okay, we can use a narrow reference.
2538  Op = CurDAG->getTargetGlobalAddress(GA->getGlobal(), SDLoc(N), VT,
2539  GA->getOffset(), GA->getTargetFlags());
2540  return true;
2541 }
2542 
2543 bool X86DAGToDAGISel::tryFoldLoad(SDNode *Root, SDNode *P, SDValue N,
2544  SDValue &Base, SDValue &Scale,
2545  SDValue &Index, SDValue &Disp,
2546  SDValue &Segment) {
2547  if (!ISD::isNON_EXTLoad(N.getNode()) ||
2548  !IsProfitableToFold(N, P, Root) ||
2549  !IsLegalToFold(N, P, Root, OptLevel))
2550  return false;
2551 
2552  return selectAddr(N.getNode(),
2553  N.getOperand(1), Base, Scale, Index, Disp, Segment);
2554 }
2555 
2556 /// Return an SDNode that returns the value of the global base register.
2557 /// Output instructions required to initialize the global base register,
2558 /// if necessary.
2559 SDNode *X86DAGToDAGISel::getGlobalBaseReg() {
2560  unsigned GlobalBaseReg = getInstrInfo()->getGlobalBaseReg(MF);
2561  auto &DL = MF->getDataLayout();
2562  return CurDAG->getRegister(GlobalBaseReg, TLI->getPointerTy(DL)).getNode();
2563 }
2564 
2565 bool X86DAGToDAGISel::isSExtAbsoluteSymbolRef(unsigned Width, SDNode *N) const {
2566  if (N->getOpcode() == ISD::TRUNCATE)
2567  N = N->getOperand(0).getNode();
2568  if (N->getOpcode() != X86ISD::Wrapper)
2569  return false;
2570 
2571  auto *GA = dyn_cast<GlobalAddressSDNode>(N->getOperand(0));
2572  if (!GA)
2573  return false;
2574 
2575  Optional<ConstantRange> CR = GA->getGlobal()->getAbsoluteSymbolRange();
2576  return CR && CR->getSignedMin().sge(-1ull << Width) &&
2577  CR->getSignedMax().slt(1ull << Width);
2578 }
2579 
2581  assert(N->isMachineOpcode() && "Unexpected node");
2583  unsigned Opc = N->getMachineOpcode();
2584  if (Opc == X86::JCC_1)
2585  CC = static_cast<X86::CondCode>(N->getConstantOperandVal(1));
2586  else if (Opc == X86::SETCCr)
2587  CC = static_cast<X86::CondCode>(N->getConstantOperandVal(0));
2588  else if (Opc == X86::SETCCm)
2589  CC = static_cast<X86::CondCode>(N->getConstantOperandVal(5));
2590  else if (Opc == X86::CMOV16rr || Opc == X86::CMOV32rr ||
2591  Opc == X86::CMOV64rr)
2592  CC = static_cast<X86::CondCode>(N->getConstantOperandVal(2));
2593  else if (Opc == X86::CMOV16rm || Opc == X86::CMOV32rm ||
2594  Opc == X86::CMOV64rm)
2595  CC = static_cast<X86::CondCode>(N->getConstantOperandVal(6));
2596 
2597  return CC;
2598 }
2599 
2600 /// Test whether the given X86ISD::CMP node has any users that use a flag
2601 /// other than ZF.
2602 bool X86DAGToDAGISel::onlyUsesZeroFlag(SDValue Flags) const {
2603  // Examine each user of the node.
2604  for (SDNode::use_iterator UI = Flags->use_begin(), UE = Flags->use_end();
2605  UI != UE; ++UI) {
2606  // Only check things that use the flags.
2607  if (UI.getUse().getResNo() != Flags.getResNo())
2608  continue;
2609  // Only examine CopyToReg uses that copy to EFLAGS.
2610  if (UI->getOpcode() != ISD::CopyToReg ||
2611  cast<RegisterSDNode>(UI->getOperand(1))->getReg() != X86::EFLAGS)
2612  return false;
2613  // Examine each user of the CopyToReg use.
2614  for (SDNode::use_iterator FlagUI = UI->use_begin(),
2615  FlagUE = UI->use_end(); FlagUI != FlagUE; ++FlagUI) {
2616  // Only examine the Flag result.
2617  if (FlagUI.getUse().getResNo() != 1) continue;
2618  // Anything unusual: assume conservatively.
2619  if (!FlagUI->isMachineOpcode()) return false;
2620  // Examine the condition code of the user.
2621  X86::CondCode CC = getCondFromNode(*FlagUI);
2622 
2623  switch (CC) {
2624  // Comparisons which only use the zero flag.
2625  case X86::COND_E: case X86::COND_NE:
2626  continue;
2627  // Anything else: assume conservatively.
2628  default:
2629  return false;
2630  }
2631  }
2632  }
2633  return true;
2634 }
2635 
2636 /// Test whether the given X86ISD::CMP node has any uses which require the SF
2637 /// flag to be accurate.
2638 bool X86DAGToDAGISel::hasNoSignFlagUses(SDValue Flags) const {
2639  // Examine each user of the node.
2640  for (SDNode::use_iterator UI = Flags->use_begin(), UE = Flags->use_end();
2641  UI != UE; ++UI) {
2642  // Only check things that use the flags.
2643  if (UI.getUse().getResNo() != Flags.getResNo())
2644  continue;
2645  // Only examine CopyToReg uses that copy to EFLAGS.
2646  if (UI->getOpcode() != ISD::CopyToReg ||
2647  cast<RegisterSDNode>(UI->getOperand(1))->getReg() != X86::EFLAGS)
2648  return false;
2649  // Examine each user of the CopyToReg use.
2650  for (SDNode::use_iterator FlagUI = UI->use_begin(),
2651  FlagUE = UI->use_end(); FlagUI != FlagUE; ++FlagUI) {
2652  // Only examine the Flag result.
2653  if (FlagUI.getUse().getResNo() != 1) continue;
2654  // Anything unusual: assume conservatively.
2655  if (!FlagUI->isMachineOpcode()) return false;
2656  // Examine the condition code of the user.
2657  X86::CondCode CC = getCondFromNode(*FlagUI);
2658 
2659  switch (CC) {
2660  // Comparisons which don't examine the SF flag.
2661  case X86::COND_A: case X86::COND_AE:
2662  case X86::COND_B: case X86::COND_BE:
2663  case X86::COND_E: case X86::COND_NE:
2664  case X86::COND_O: case X86::COND_NO:
2665  case X86::COND_P: case X86::COND_NP:
2666  continue;
2667  // Anything else: assume conservatively.
2668  default:
2669  return false;
2670  }
2671  }
2672  }
2673  return true;
2674 }
2675 
2677  switch (CC) {
2678  // Comparisons which don't examine the CF flag.
2679  case X86::COND_O: case X86::COND_NO:
2680  case X86::COND_E: case X86::COND_NE:
2681  case X86::COND_S: case X86::COND_NS:
2682  case X86::COND_P: case X86::COND_NP:
2683  case X86::COND_L: case X86::COND_GE:
2684  case X86::COND_G: case X86::COND_LE:
2685  return false;
2686  // Anything else: assume conservatively.
2687  default:
2688  return true;
2689  }
2690 }
2691 
2692 /// Test whether the given node which sets flags has any uses which require the
2693 /// CF flag to be accurate.
2694  bool X86DAGToDAGISel::hasNoCarryFlagUses(SDValue Flags) const {
2695  // Examine each user of the node.
2696  for (SDNode::use_iterator UI = Flags->use_begin(), UE = Flags->use_end();
2697  UI != UE; ++UI) {
2698  // Only check things that use the flags.
2699  if (UI.getUse().getResNo() != Flags.getResNo())
2700  continue;
2701 
2702  unsigned UIOpc = UI->getOpcode();
2703 
2704  if (UIOpc == ISD::CopyToReg) {
2705  // Only examine CopyToReg uses that copy to EFLAGS.
2706  if (cast<RegisterSDNode>(UI->getOperand(1))->getReg() != X86::EFLAGS)
2707  return false;
2708  // Examine each user of the CopyToReg use.
2709  for (SDNode::use_iterator FlagUI = UI->use_begin(), FlagUE = UI->use_end();
2710  FlagUI != FlagUE; ++FlagUI) {
2711  // Only examine the Flag result.
2712  if (FlagUI.getUse().getResNo() != 1)
2713  continue;
2714  // Anything unusual: assume conservatively.
2715  if (!FlagUI->isMachineOpcode())
2716  return false;
2717  // Examine the condition code of the user.
2718  X86::CondCode CC = getCondFromNode(*FlagUI);
2719 
2720  if (mayUseCarryFlag(CC))
2721  return false;
2722  }
2723 
2724  // This CopyToReg is ok. Move on to the next user.
2725  continue;
2726  }
2727 
2728  // This might be an unselected node. So look for the pre-isel opcodes that
2729  // use flags.
2730  unsigned CCOpNo;
2731  switch (UIOpc) {
2732  default:
2733  // Something unusual. Be conservative.
2734  return false;
2735  case X86ISD::SETCC: CCOpNo = 0; break;
2736  case X86ISD::SETCC_CARRY: CCOpNo = 0; break;
2737  case X86ISD::CMOV: CCOpNo = 2; break;
2738  case X86ISD::BRCOND: CCOpNo = 2; break;
2739  }
2740 
2741  X86::CondCode CC = (X86::CondCode)UI->getConstantOperandVal(CCOpNo);
2742  if (mayUseCarryFlag(CC))
2743  return false;
2744  }
2745  return true;
2746 }
2747 
2748 /// Check whether or not the chain ending in StoreNode is suitable for doing
2749 /// the {load; op; store} to modify transformation.
2751  SDValue StoredVal, SelectionDAG *CurDAG,
2752  unsigned LoadOpNo,
2753  LoadSDNode *&LoadNode,
2754  SDValue &InputChain) {
2755  // Is the stored value result 0 of the operation?
2756  if (StoredVal.getResNo() != 0) return false;
2757 
2758  // Are there other uses of the operation other than the store?
2759  if (!StoredVal.getNode()->hasNUsesOfValue(1, 0)) return false;
2760 
2761  // Is the store non-extending and non-indexed?
2762  if (!ISD::isNormalStore(StoreNode) || StoreNode->isNonTemporal())
2763  return false;
2764 
2765  SDValue Load = StoredVal->getOperand(LoadOpNo);
2766  // Is the stored value a non-extending and non-indexed load?
2767  if (!ISD::isNormalLoad(Load.getNode())) return false;
2768 
2769  // Return LoadNode by reference.
2770  LoadNode = cast<LoadSDNode>(Load);
2771 
2772  // Is store the only read of the loaded value?
2773  if (!Load.hasOneUse())
2774  return false;
2775 
2776  // Is the address of the store the same as the load?
2777  if (LoadNode->getBasePtr() != StoreNode->getBasePtr() ||
2778  LoadNode->getOffset() != StoreNode->getOffset())
2779  return false;
2780 
2781  bool FoundLoad = false;
2782  SmallVector<SDValue, 4> ChainOps;
2783  SmallVector<const SDNode *, 4> LoopWorklist;
2785  const unsigned int Max = 1024;
2786 
2787  // Visualization of Load-Op-Store fusion:
2788  // -------------------------
2789  // Legend:
2790  // *-lines = Chain operand dependencies.
2791  // |-lines = Normal operand dependencies.
2792  // Dependencies flow down and right. n-suffix references multiple nodes.
2793  //
2794  // C Xn C
2795  // * * *
2796  // * * *
2797  // Xn A-LD Yn TF Yn
2798  // * * \ | * |
2799  // * * \ | * |
2800  // * * \ | => A--LD_OP_ST
2801  // * * \| \
2802  // TF OP \
2803  // * | \ Zn
2804  // * | \
2805  // A-ST Zn
2806  //
2807 
2808  // This merge induced dependences from: #1: Xn -> LD, OP, Zn
2809  // #2: Yn -> LD
2810  // #3: ST -> Zn
2811 
2812  // Ensure the transform is safe by checking for the dual
2813  // dependencies to make sure we do not induce a loop.
2814 
2815  // As LD is a predecessor to both OP and ST we can do this by checking:
2816  // a). if LD is a predecessor to a member of Xn or Yn.
2817  // b). if a Zn is a predecessor to ST.
2818 
2819  // However, (b) can only occur through being a chain predecessor to
2820  // ST, which is the same as Zn being a member or predecessor of Xn,
2821  // which is a subset of LD being a predecessor of Xn. So it's
2822  // subsumed by check (a).
2823 
2824  SDValue Chain = StoreNode->getChain();
2825 
2826  // Gather X elements in ChainOps.
2827  if (Chain == Load.getValue(1)) {
2828  FoundLoad = true;
2829  ChainOps.push_back(Load.getOperand(0));
2830  } else if (Chain.getOpcode() == ISD::TokenFactor) {
2831  for (unsigned i = 0, e = Chain.getNumOperands(); i != e; ++i) {
2832  SDValue Op = Chain.getOperand(i);
2833  if (Op == Load.getValue(1)) {
2834  FoundLoad = true;
2835  // Drop Load, but keep its chain. No cycle check necessary.
2836  ChainOps.push_back(Load.getOperand(0));
2837  continue;
2838  }
2839  LoopWorklist.push_back(Op.getNode());
2840  ChainOps.push_back(Op);
2841  }
2842  }
2843 
2844  if (!FoundLoad)
2845  return false;
2846 
2847  // Worklist is currently Xn. Add Yn to worklist.
2848  for (SDValue Op : StoredVal->ops())
2849  if (Op.getNode() != LoadNode)
2850  LoopWorklist.push_back(Op.getNode());
2851 
2852  // Check (a) if Load is a predecessor to Xn + Yn
2853  if (SDNode::hasPredecessorHelper(Load.getNode(), Visited, LoopWorklist, Max,
2854  true))
2855  return false;
2856 
2857  InputChain =
2858  CurDAG->getNode(ISD::TokenFactor, SDLoc(Chain), MVT::Other, ChainOps);
2859  return true;
2860 }
2861 
2862 // Change a chain of {load; op; store} of the same value into a simple op
2863 // through memory of that value, if the uses of the modified value and its
2864 // address are suitable.
2865 //
2866 // The tablegen pattern memory operand pattern is currently not able to match
2867 // the case where the EFLAGS on the original operation are used.
2868 //
2869 // To move this to tablegen, we'll need to improve tablegen to allow flags to
2870 // be transferred from a node in the pattern to the result node, probably with
2871 // a new keyword. For example, we have this
2872 // def DEC64m : RI<0xFF, MRM1m, (outs), (ins i64mem:$dst), "dec{q}\t$dst",
2873 // [(store (add (loadi64 addr:$dst), -1), addr:$dst),
2874 // (implicit EFLAGS)]>;
2875 // but maybe need something like this
2876 // def DEC64m : RI<0xFF, MRM1m, (outs), (ins i64mem:$dst), "dec{q}\t$dst",
2877 // [(store (add (loadi64 addr:$dst), -1), addr:$dst),
2878 // (transferrable EFLAGS)]>;
2879 //
2880 // Until then, we manually fold these and instruction select the operation
2881 // here.
2882 bool X86DAGToDAGISel::foldLoadStoreIntoMemOperand(SDNode *Node) {
2883  StoreSDNode *StoreNode = cast<StoreSDNode>(Node);
2884  SDValue StoredVal = StoreNode->getOperand(1);
2885  unsigned Opc = StoredVal->getOpcode();
2886 
2887  // Before we try to select anything, make sure this is memory operand size
2888  // and opcode we can handle. Note that this must match the code below that
2889  // actually lowers the opcodes.
2890  EVT MemVT = StoreNode->getMemoryVT();
2891  if (MemVT != MVT::i64 && MemVT != MVT::i32 && MemVT != MVT::i16 &&
2892  MemVT != MVT::i8)
2893  return false;
2894 
2895  bool IsCommutable = false;
2896  bool IsNegate = false;
2897  switch (Opc) {
2898  default:
2899  return false;
2900  case X86ISD::SUB:
2901  IsNegate = isNullConstant(StoredVal.getOperand(0));
2902  break;
2903  case X86ISD::SBB:
2904  break;
2905  case X86ISD::ADD:
2906  case X86ISD::ADC:
2907  case X86ISD::AND:
2908  case X86ISD::OR:
2909  case X86ISD::XOR:
2910  IsCommutable = true;
2911  break;
2912  }
2913 
2914  unsigned LoadOpNo = IsNegate ? 1 : 0;
2915  LoadSDNode *LoadNode = nullptr;
2916  SDValue InputChain;
2917  if (!isFusableLoadOpStorePattern(StoreNode, StoredVal, CurDAG, LoadOpNo,
2918  LoadNode, InputChain)) {
2919  if (!IsCommutable)
2920  return false;
2921 
2922  // This operation is commutable, try the other operand.
2923  LoadOpNo = 1;
2924  if (!isFusableLoadOpStorePattern(StoreNode, StoredVal, CurDAG, LoadOpNo,
2925  LoadNode, InputChain))
2926  return false;
2927  }
2928 
2929  SDValue Base, Scale, Index, Disp, Segment;
2930  if (!selectAddr(LoadNode, LoadNode->getBasePtr(), Base, Scale, Index, Disp,
2931  Segment))
2932  return false;
2933 
2934  auto SelectOpcode = [&](unsigned Opc64, unsigned Opc32, unsigned Opc16,
2935  unsigned Opc8) {
2936  switch (MemVT.getSimpleVT().SimpleTy) {
2937  case MVT::i64:
2938  return Opc64;
2939  case MVT::i32:
2940  return Opc32;
2941  case MVT::i16:
2942  return Opc16;
2943  case MVT::i8:
2944  return Opc8;
2945  default:
2946  llvm_unreachable("Invalid size!");
2947  }
2948  };
2949 
2950  MachineSDNode *Result;
2951  switch (Opc) {
2952  case X86ISD::SUB:
2953  // Handle negate.
2954  if (IsNegate) {
2955  unsigned NewOpc = SelectOpcode(X86::NEG64m, X86::NEG32m, X86::NEG16m,
2956  X86::NEG8m);
2957  const SDValue Ops[] = {Base, Scale, Index, Disp, Segment, InputChain};
2958  Result = CurDAG->getMachineNode(NewOpc, SDLoc(Node), MVT::i32,
2959  MVT::Other, Ops);
2960  break;
2961  }
2963  case X86ISD::ADD:
2964  // Try to match inc/dec.
2965  if (!Subtarget->slowIncDec() || OptForSize) {
2966  bool IsOne = isOneConstant(StoredVal.getOperand(1));
2967  bool IsNegOne = isAllOnesConstant(StoredVal.getOperand(1));
2968  // ADD/SUB with 1/-1 and carry flag isn't used can use inc/dec.
2969  if ((IsOne || IsNegOne) && hasNoCarryFlagUses(StoredVal.getValue(1))) {
2970  unsigned NewOpc =
2971  ((Opc == X86ISD::ADD) == IsOne)
2972  ? SelectOpcode(X86::INC64m, X86::INC32m, X86::INC16m, X86::INC8m)
2973  : SelectOpcode(X86::DEC64m, X86::DEC32m, X86::DEC16m, X86::DEC8m);
2974  const SDValue Ops[] = {Base, Scale, Index, Disp, Segment, InputChain};
2975  Result = CurDAG->getMachineNode(NewOpc, SDLoc(Node), MVT::i32,
2976  MVT::Other, Ops);
2977  break;
2978  }
2979  }
2981  case X86ISD::ADC:
2982  case X86ISD::SBB:
2983  case X86ISD::AND:
2984  case X86ISD::OR:
2985  case X86ISD::XOR: {
2986  auto SelectRegOpcode = [SelectOpcode](unsigned Opc) {
2987  switch (Opc) {
2988  case X86ISD::ADD:
2989  return SelectOpcode(X86::ADD64mr, X86::ADD32mr, X86::ADD16mr,
2990  X86::ADD8mr);
2991  case X86ISD::ADC:
2992  return SelectOpcode(X86::ADC64mr, X86::ADC32mr, X86::ADC16mr,
2993  X86::ADC8mr);
2994  case X86ISD::SUB:
2995  return SelectOpcode(X86::SUB64mr, X86::SUB32mr, X86::SUB16mr,
2996  X86::SUB8mr);
2997  case X86ISD::SBB:
2998  return SelectOpcode(X86::SBB64mr, X86::SBB32mr, X86::SBB16mr,
2999  X86::SBB8mr);
3000  case X86ISD::AND:
3001  return SelectOpcode(X86::AND64mr, X86::AND32mr, X86::AND16mr,
3002  X86::AND8mr);
3003  case X86ISD::OR:
3004  return SelectOpcode(X86::OR64mr, X86::OR32mr, X86::OR16mr, X86::OR8mr);
3005  case X86ISD::XOR:
3006  return SelectOpcode(X86::XOR64mr, X86::XOR32mr, X86::XOR16mr,
3007  X86::XOR8mr);
3008  default:
3009  llvm_unreachable("Invalid opcode!");
3010  }
3011  };
3012  auto SelectImm8Opcode = [SelectOpcode](unsigned Opc) {
3013  switch (Opc) {
3014  case X86ISD::ADD:
3015  return SelectOpcode(X86::ADD64mi8, X86::ADD32mi8, X86::ADD16mi8, 0);
3016  case X86ISD::ADC:
3017  return SelectOpcode(X86::ADC64mi8, X86::ADC32mi8, X86::ADC16mi8, 0);
3018  case X86ISD::SUB:
3019  return SelectOpcode(X86::SUB64mi8, X86::SUB32mi8, X86::SUB16mi8, 0);
3020  case X86ISD::SBB:
3021  return SelectOpcode(X86::SBB64mi8, X86::SBB32mi8, X86::SBB16mi8, 0);
3022  case X86ISD::AND:
3023  return SelectOpcode(X86::AND64mi8, X86::AND32mi8, X86::AND16mi8, 0);
3024  case X86ISD::OR:
3025  return SelectOpcode(X86::OR64mi8, X86::OR32mi8, X86::OR16mi8, 0);
3026  case X86ISD::XOR:
3027  return SelectOpcode(X86::XOR64mi8, X86::XOR32mi8, X86::XOR16mi8, 0);
3028  default:
3029  llvm_unreachable("Invalid opcode!");
3030  }
3031  };
3032  auto SelectImmOpcode = [SelectOpcode](unsigned Opc) {
3033  switch (Opc) {
3034  case X86ISD::ADD:
3035  return SelectOpcode(X86::ADD64mi32, X86::ADD32mi, X86::ADD16mi,
3036  X86::ADD8mi);
3037  case X86ISD::ADC:
3038  return SelectOpcode(X86::ADC64mi32, X86::ADC32mi, X86::ADC16mi,
3039  X86::ADC8mi);
3040  case X86ISD::SUB:
3041  return SelectOpcode(X86::SUB64mi32, X86::SUB32mi, X86::SUB16mi,
3042  X86::SUB8mi);
3043  case X86ISD::SBB:
3044  return SelectOpcode(X86::SBB64mi32, X86::SBB32mi, X86::SBB16mi,
3045  X86::SBB8mi);
3046  case X86ISD::AND:
3047  return SelectOpcode(X86::AND64mi32, X86::AND32mi, X86::AND16mi,
3048  X86::AND8mi);
3049  case X86ISD::OR:
3050  return SelectOpcode(X86::OR64mi32, X86::OR32mi, X86::OR16mi,
3051  X86::OR8mi);
3052  case X86ISD::XOR:
3053  return SelectOpcode(X86::XOR64mi32, X86::XOR32mi, X86::XOR16mi,
3054  X86::XOR8mi);
3055  default:
3056  llvm_unreachable("Invalid opcode!");
3057  }
3058  };
3059 
3060  unsigned NewOpc = SelectRegOpcode(Opc);
3061  SDValue Operand = StoredVal->getOperand(1-LoadOpNo);
3062 
3063  // See if the operand is a constant that we can fold into an immediate
3064  // operand.
3065  if (auto *OperandC = dyn_cast<ConstantSDNode>(Operand)) {
3066  int64_t OperandV = OperandC->getSExtValue();
3067 
3068  // Check if we can shrink the operand enough to fit in an immediate (or
3069  // fit into a smaller immediate) by negating it and switching the
3070  // operation.
3071  if ((Opc == X86ISD::ADD || Opc == X86ISD::SUB) &&
3072  ((MemVT != MVT::i8 && !isInt<8>(OperandV) && isInt<8>(-OperandV)) ||
3073  (MemVT == MVT::i64 && !isInt<32>(OperandV) &&
3074  isInt<32>(-OperandV))) &&
3075  hasNoCarryFlagUses(StoredVal.getValue(1))) {
3076  OperandV = -OperandV;
3077  Opc = Opc == X86ISD::ADD ? X86ISD::SUB : X86ISD::ADD;
3078  }
3079 
3080  // First try to fit this into an Imm8 operand. If it doesn't fit, then try
3081  // the larger immediate operand.
3082  if (MemVT != MVT::i8 && isInt<8>(OperandV)) {
3083  Operand = CurDAG->getTargetConstant(OperandV, SDLoc(Node), MemVT);
3084  NewOpc = SelectImm8Opcode(Opc);
3085  } else if (MemVT != MVT::i64 || isInt<32>(OperandV)) {
3086  Operand = CurDAG->getTargetConstant(OperandV, SDLoc(Node), MemVT);
3087  NewOpc = SelectImmOpcode(Opc);
3088  }
3089  }
3090 
3091  if (Opc == X86ISD::ADC || Opc == X86ISD::SBB) {
3092  SDValue CopyTo =
3093  CurDAG->getCopyToReg(InputChain, SDLoc(Node), X86::EFLAGS,
3094  StoredVal.getOperand(2), SDValue());
3095 
3096  const SDValue Ops[] = {Base, Scale, Index, Disp,
3097  Segment, Operand, CopyTo, CopyTo.getValue(1)};
3098  Result = CurDAG->getMachineNode(NewOpc, SDLoc(Node), MVT::i32, MVT::Other,
3099  Ops);
3100  } else {
3101  const SDValue Ops[] = {Base, Scale, Index, Disp,
3102  Segment, Operand, InputChain};
3103  Result = CurDAG->getMachineNode(NewOpc, SDLoc(Node), MVT::i32, MVT::Other,
3104  Ops);
3105  }
3106  break;
3107  }
3108  default:
3109  llvm_unreachable("Invalid opcode!");
3110  }
3111 
3112  MachineMemOperand *MemOps[] = {StoreNode->getMemOperand(),
3113  LoadNode->getMemOperand()};
3114  CurDAG->setNodeMemRefs(Result, MemOps);
3115 
3116  // Update Load Chain uses as well.
3117  ReplaceUses(SDValue(LoadNode, 1), SDValue(Result, 1));
3118  ReplaceUses(SDValue(StoreNode, 0), SDValue(Result, 1));
3119  ReplaceUses(SDValue(StoredVal.getNode(), 1), SDValue(Result, 0));
3120  CurDAG->RemoveDeadNode(Node);
3121  return true;
3122 }
3123 
3124 // See if this is an X & Mask that we can match to BEXTR/BZHI.
3125 // Where Mask is one of the following patterns:
3126 // a) x & (1 << nbits) - 1
3127 // b) x & ~(-1 << nbits)
3128 // c) x & (-1 >> (32 - y))
3129 // d) x << (32 - y) >> (32 - y)
3130 bool X86DAGToDAGISel::matchBitExtract(SDNode *Node) {
3131  assert(
3132  (Node->getOpcode() == ISD::AND || Node->getOpcode() == ISD::SRL) &&
3133  "Should be either an and-mask, or right-shift after clearing high bits.");
3134 
3135  // BEXTR is BMI instruction, BZHI is BMI2 instruction. We need at least one.
3136  if (!Subtarget->hasBMI() && !Subtarget->hasBMI2())
3137  return false;
3138 
3139  MVT NVT = Node->getSimpleValueType(0);
3140 
3141  // Only supported for 32 and 64 bits.
3142  if (NVT != MVT::i32 && NVT != MVT::i64)
3143  return false;
3144 
3145  SDValue NBits;
3146 
3147  // If we have BMI2's BZHI, we are ok with muti-use patterns.
3148  // Else, if we only have BMI1's BEXTR, we require one-use.
3149  const bool CanHaveExtraUses = Subtarget->hasBMI2();
3150  auto checkUses = [CanHaveExtraUses](SDValue Op, unsigned NUses) {
3151  return CanHaveExtraUses ||
3152  Op.getNode()->hasNUsesOfValue(NUses, Op.getResNo());
3153  };
3154  auto checkOneUse = [checkUses](SDValue Op) { return checkUses(Op, 1); };
3155  auto checkTwoUse = [checkUses](SDValue Op) { return checkUses(Op, 2); };
3156 
3157  auto peekThroughOneUseTruncation = [checkOneUse](SDValue V) {
3158  if (V->getOpcode() == ISD::TRUNCATE && checkOneUse(V)) {
3159  assert(V.getSimpleValueType() == MVT::i32 &&
3160  V.getOperand(0).getSimpleValueType() == MVT::i64 &&
3161  "Expected i64 -> i32 truncation");
3162  V = V.getOperand(0);
3163  }
3164  return V;
3165  };
3166 
3167  // a) x & ((1 << nbits) + (-1))
3168  auto matchPatternA = [checkOneUse, peekThroughOneUseTruncation,
3169  &NBits](SDValue Mask) -> bool {
3170  // Match `add`. Must only have one use!
3171  if (Mask->getOpcode() != ISD::ADD || !checkOneUse(Mask))
3172  return false;
3173  // We should be adding all-ones constant (i.e. subtracting one.)
3174  if (!isAllOnesConstant(Mask->getOperand(1)))
3175  return false;
3176  // Match `1 << nbits`. Might be truncated. Must only have one use!
3177  SDValue M0 = peekThroughOneUseTruncation(Mask->getOperand(0));
3178  if (M0->getOpcode() != ISD::SHL || !checkOneUse(M0))
3179  return false;
3180  if (!isOneConstant(M0->getOperand(0)))
3181  return false;
3182  NBits = M0->getOperand(1);
3183  return true;
3184  };
3185 
3186  auto isAllOnes = [this, peekThroughOneUseTruncation, NVT](SDValue V) {
3187  V = peekThroughOneUseTruncation(V);
3188  return CurDAG->MaskedValueIsAllOnes(
3189  V, APInt::getLowBitsSet(V.getSimpleValueType().getSizeInBits(),
3190  NVT.getSizeInBits()));
3191  };
3192 
3193  // b) x & ~(-1 << nbits)
3194  auto matchPatternB = [checkOneUse, isAllOnes, peekThroughOneUseTruncation,
3195  &NBits](SDValue Mask) -> bool {
3196  // Match `~()`. Must only have one use!
3197  if (Mask.getOpcode() != ISD::XOR || !checkOneUse(Mask))
3198  return false;
3199  // The -1 only has to be all-ones for the final Node's NVT.
3200  if (!isAllOnes(Mask->getOperand(1)))
3201  return false;
3202  // Match `-1 << nbits`. Might be truncated. Must only have one use!
3203  SDValue M0 = peekThroughOneUseTruncation(Mask->getOperand(0));
3204  if (M0->getOpcode() != ISD::SHL || !checkOneUse(M0))
3205  return false;
3206  // The -1 only has to be all-ones for the final Node's NVT.
3207  if (!isAllOnes(M0->getOperand(0)))
3208  return false;
3209  NBits = M0->getOperand(1);
3210  return true;
3211  };
3212 
3213  // Match potentially-truncated (bitwidth - y)
3214  auto matchShiftAmt = [checkOneUse, &NBits](SDValue ShiftAmt,
3215  unsigned Bitwidth) {
3216  // Skip over a truncate of the shift amount.
3217  if (ShiftAmt.getOpcode() == ISD::TRUNCATE) {
3218  ShiftAmt = ShiftAmt.getOperand(0);
3219  // The trunc should have been the only user of the real shift amount.
3220  if (!checkOneUse(ShiftAmt))
3221  return false;
3222  }
3223  // Match the shift amount as: (bitwidth - y). It should go away, too.
3224  if (ShiftAmt.getOpcode() != ISD::SUB)
3225  return false;
3226  auto V0 = dyn_cast<ConstantSDNode>(ShiftAmt.getOperand(0));
3227  if (!V0 || V0->getZExtValue() != Bitwidth)
3228  return false;
3229  NBits = ShiftAmt.getOperand(1);
3230  return true;
3231  };
3232 
3233  // c) x & (-1 >> (32 - y))
3234  auto matchPatternC = [checkOneUse, peekThroughOneUseTruncation,
3235  matchShiftAmt](SDValue Mask) -> bool {
3236  // The mask itself may be truncated.
3237  Mask = peekThroughOneUseTruncation(Mask);
3238  unsigned Bitwidth = Mask.getSimpleValueType().getSizeInBits();
3239  // Match `l>>`. Must only have one use!
3240  if (Mask.getOpcode() != ISD::SRL || !checkOneUse(Mask))
3241  return false;
3242  // We should be shifting truly all-ones constant.
3243  if (!isAllOnesConstant(Mask.getOperand(0)))
3244  return false;
3245  SDValue M1 = Mask.getOperand(1);
3246  // The shift amount should not be used externally.
3247  if (!checkOneUse(M1))
3248  return false;
3249  return matchShiftAmt(M1, Bitwidth);
3250  };
3251 
3252  SDValue X;
3253 
3254  // d) x << (32 - y) >> (32 - y)
3255  auto matchPatternD = [checkOneUse, checkTwoUse, matchShiftAmt,
3256  &X](SDNode *Node) -> bool {
3257  if (Node->getOpcode() != ISD::SRL)
3258  return false;
3259  SDValue N0 = Node->getOperand(0);
3260  if (N0->getOpcode() != ISD::SHL || !checkOneUse(N0))
3261  return false;
3262  unsigned Bitwidth = N0.getSimpleValueType().getSizeInBits();
3263  SDValue N1 = Node->getOperand(1);
3264  SDValue N01 = N0->getOperand(1);
3265  // Both of the shifts must be by the exact same value.
3266  // There should not be any uses of the shift amount outside of the pattern.
3267  if (N1 != N01 || !checkTwoUse(N1))
3268  return false;
3269  if (!matchShiftAmt(N1, Bitwidth))
3270  return false;
3271  X = N0->getOperand(0);
3272  return true;
3273  };
3274 
3275  auto matchLowBitMask = [matchPatternA, matchPatternB,
3276  matchPatternC](SDValue Mask) -> bool {
3277  return matchPatternA(Mask) || matchPatternB(Mask) || matchPatternC(Mask);
3278  };
3279 
3280  if (Node->getOpcode() == ISD::AND) {
3281  X = Node->getOperand(0);
3282  SDValue Mask = Node->getOperand(1);
3283 
3284  if (matchLowBitMask(Mask)) {
3285  // Great.
3286  } else {
3287  std::swap(X, Mask);
3288  if (!matchLowBitMask(Mask))
3289  return false;
3290  }
3291  } else if (!matchPatternD(Node))
3292  return false;
3293 
3294  SDLoc DL(Node);
3295 
3296  // Truncate the shift amount.
3297  NBits = CurDAG->getNode(ISD::TRUNCATE, DL, MVT::i8, NBits);
3298  insertDAGNode(*CurDAG, SDValue(Node, 0), NBits);
3299 
3300  // Insert 8-bit NBits into lowest 8 bits of 32-bit register.
3301  // All the other bits are undefined, we do not care about them.
3302  SDValue ImplDef = SDValue(
3303  CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, MVT::i32), 0);
3304  insertDAGNode(*CurDAG, SDValue(Node, 0), ImplDef);
3305  NBits = CurDAG->getTargetInsertSubreg(X86::sub_8bit, DL, MVT::i32, ImplDef,
3306  NBits);
3307  insertDAGNode(*CurDAG, SDValue(Node, 0), NBits);
3308 
3309  if (Subtarget->hasBMI2()) {
3310  // Great, just emit the the BZHI..
3311  if (NVT != MVT::i32) {
3312  // But have to place the bit count into the wide-enough register first.
3313  NBits = CurDAG->getNode(ISD::ANY_EXTEND, DL, NVT, NBits);
3314  insertDAGNode(*CurDAG, SDValue(Node, 0), NBits);
3315  }
3316 
3317  SDValue Extract = CurDAG->getNode(X86ISD::BZHI, DL, NVT, X, NBits);
3318  ReplaceNode(Node, Extract.getNode());
3319  SelectCode(Extract.getNode());
3320  return true;
3321  }
3322 
3323  // Else, if we do *NOT* have BMI2, let's find out if the if the 'X' is
3324  // *logically* shifted (potentially with one-use trunc inbetween),
3325  // and the truncation was the only use of the shift,
3326  // and if so look past one-use truncation.
3327  {
3328  SDValue RealX = peekThroughOneUseTruncation(X);
3329  // FIXME: only if the shift is one-use?
3330  if (RealX != X && RealX.getOpcode() == ISD::SRL)
3331  X = RealX;
3332  }
3333 
3334  MVT XVT = X.getSimpleValueType();
3335 
3336  // Else, emitting BEXTR requires one more step.
3337  // The 'control' of BEXTR has the pattern of:
3338  // [15...8 bit][ 7...0 bit] location
3339  // [ bit count][ shift] name
3340  // I.e. 0b000000011'00000001 means (x >> 0b1) & 0b11
3341 
3342  // Shift NBits left by 8 bits, thus producing 'control'.
3343  // This makes the low 8 bits to be zero.
3344  SDValue C8 = CurDAG->getConstant(8, DL, MVT::i8);
3345  SDValue Control = CurDAG->getNode(ISD::SHL, DL, MVT::i32, NBits, C8);
3346  insertDAGNode(*CurDAG, SDValue(Node, 0), Control);
3347 
3348  // If the 'X' is *logically* shifted, we can fold that shift into 'control'.
3349  // FIXME: only if the shift is one-use?
3350  if (X.getOpcode() == ISD::SRL) {
3351  SDValue ShiftAmt = X.getOperand(1);
3352  X = X.getOperand(0);
3353 
3354  assert(ShiftAmt.getValueType() == MVT::i8 &&
3355  "Expected shift amount to be i8");
3356 
3357  // Now, *zero*-extend the shift amount. The bits 8...15 *must* be zero!
3358  // We could zext to i16 in some form, but we intentionally don't do that.
3359  SDValue OrigShiftAmt = ShiftAmt;
3360  ShiftAmt = CurDAG->getNode(ISD::ZERO_EXTEND, DL, MVT::i32, ShiftAmt);
3361  insertDAGNode(*CurDAG, OrigShiftAmt, ShiftAmt);
3362 
3363  // And now 'or' these low 8 bits of shift amount into the 'control'.
3364  Control = CurDAG->getNode(ISD::OR, DL, MVT::i32, Control, ShiftAmt);
3365  insertDAGNode(*CurDAG, SDValue(Node, 0), Control);
3366  }
3367 
3368  // But have to place the 'control' into the wide-enough register first.
3369  if (XVT != MVT::i32) {
3370  Control = CurDAG->getNode(ISD::ANY_EXTEND, DL, XVT, Control);
3371  insertDAGNode(*CurDAG, SDValue(Node, 0), Control);
3372  }
3373 
3374  // And finally, form the BEXTR itself.
3375  SDValue Extract = CurDAG->getNode(X86ISD::BEXTR, DL, XVT, X, Control);
3376 
3377  // The 'X' was originally truncated. Do that now.
3378  if (XVT != NVT) {
3379  insertDAGNode(*CurDAG, SDValue(Node, 0), Extract);
3380  Extract = CurDAG->getNode(ISD::TRUNCATE, DL, NVT, Extract);
3381  }
3382 
3383  ReplaceNode(Node, Extract.getNode());
3384  SelectCode(Extract.getNode());
3385 
3386  return true;
3387 }
3388 
3389 // See if this is an (X >> C1) & C2 that we can match to BEXTR/BEXTRI.
3390 MachineSDNode *X86DAGToDAGISel::matchBEXTRFromAndImm(SDNode *Node) {
3391  MVT NVT = Node->getSimpleValueType(0);
3392  SDLoc dl(Node);
3393 
3394  SDValue N0 = Node->getOperand(0);
3395  SDValue N1 = Node->getOperand(1);
3396 
3397  // If we have TBM we can use an immediate for the control. If we have BMI
3398  // we should only do this if the BEXTR instruction is implemented well.
3399  // Otherwise moving the control into a register makes this more costly.
3400  // TODO: Maybe load folding, greater than 32-bit masks, or a guarantee of LICM
3401  // hoisting the move immediate would make it worthwhile with a less optimal
3402  // BEXTR?
3403  if (!Subtarget->hasTBM() &&
3404  !(Subtarget->hasBMI() && Subtarget->hasFastBEXTR()))
3405  return nullptr;
3406 
3407  // Must have a shift right.
3408  if (N0->getOpcode() != ISD::SRL && N0->getOpcode() != ISD::SRA)
3409  return nullptr;
3410 
3411  // Shift can't have additional users.
3412  if (!N0->hasOneUse())
3413  return nullptr;
3414 
3415  // Only supported for 32 and 64 bits.
3416  if (NVT != MVT::i32 && NVT != MVT::i64)
3417  return nullptr;
3418 
3419  // Shift amount and RHS of and must be constant.
3420  ConstantSDNode *MaskCst = dyn_cast<ConstantSDNode>(N1);
3421  ConstantSDNode *ShiftCst = dyn_cast<ConstantSDNode>(N0->getOperand(1));
3422  if (!MaskCst || !ShiftCst)
3423  return nullptr;
3424 
3425  // And RHS must be a mask.
3426  uint64_t Mask = MaskCst->getZExtValue();
3427  if (!isMask_64(Mask))
3428  return nullptr;
3429 
3430  uint64_t Shift = ShiftCst->getZExtValue();
3431  uint64_t MaskSize = countPopulation(Mask);
3432 
3433  // Don't interfere with something that can be handled by extracting AH.
3434  // TODO: If we are able to fold a load, BEXTR might still be better than AH.
3435  if (Shift == 8 && MaskSize == 8)
3436  return nullptr;
3437 
3438  // Make sure we are only using bits that were in the original value, not
3439  // shifted in.
3440  if (Shift + MaskSize > NVT.getSizeInBits())
3441  return nullptr;
3442 
3443  SDValue New = CurDAG->getTargetConstant(Shift | (MaskSize << 8), dl, NVT);
3444  unsigned ROpc = NVT == MVT::i64 ? X86::BEXTRI64ri : X86::BEXTRI32ri;
3445  unsigned MOpc = NVT == MVT::i64 ? X86::BEXTRI64mi : X86::BEXTRI32mi;
3446 
3447  // BMI requires the immediate to placed in a register.
3448  if (!Subtarget->hasTBM()) {
3449  ROpc = NVT == MVT::i64 ? X86::BEXTR64rr : X86::BEXTR32rr;
3450  MOpc = NVT == MVT::i64 ? X86::BEXTR64rm : X86::BEXTR32rm;
3451  unsigned NewOpc = NVT == MVT::i64 ? X86::MOV32ri64 : X86::MOV32ri;
3452  New = SDValue(CurDAG->getMachineNode(NewOpc, dl, NVT, New), 0);
3453  }
3454 
3455  MachineSDNode *NewNode;
3456  SDValue Input = N0->getOperand(0);
3457  SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
3458  if (tryFoldLoad(Node, N0.getNode(), Input, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) {
3459  SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, New, Input.getOperand(0) };
3460  SDVTList VTs = CurDAG->getVTList(NVT, MVT::i32, MVT::Other);
3461  NewNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops);
3462  // Update the chain.
3463  ReplaceUses(Input.getValue(1), SDValue(NewNode, 2));
3464  // Record the mem-refs
3465  CurDAG->setNodeMemRefs(NewNode, {cast<LoadSDNode>(Input)->getMemOperand()});
3466  } else {
3467  NewNode = CurDAG->getMachineNode(ROpc, dl, NVT, MVT::i32, Input, New);
3468  }
3469 
3470  return NewNode;
3471 }
3472 
3473 // Emit a PCMISTR(I/M) instruction.
3474 MachineSDNode *X86DAGToDAGISel::emitPCMPISTR(unsigned ROpc, unsigned MOpc,
3475  bool MayFoldLoad, const SDLoc &dl,
3476  MVT VT, SDNode *Node) {
3477  SDValue N0 = Node->getOperand(0);
3478  SDValue N1 = Node->getOperand(1);
3479  SDValue Imm = Node->getOperand(2);
3480  const ConstantInt *Val = cast<ConstantSDNode>(Imm)->getConstantIntValue();
3481  Imm = CurDAG->getTargetConstant(*Val, SDLoc(Node), Imm.getValueType());
3482 
3483  // Try to fold a load. No need to check alignment.
3484  SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
3485  if (MayFoldLoad && tryFoldLoad(Node, N1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) {
3486  SDValue Ops[] = { N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Imm,
3487  N1.getOperand(0) };
3488  SDVTList VTs = CurDAG->getVTList(VT, MVT::i32, MVT::Other);
3489  MachineSDNode *CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops);
3490  // Update the chain.
3491  ReplaceUses(N1.getValue(1), SDValue(CNode, 2));
3492  // Record the mem-refs
3493  CurDAG->setNodeMemRefs(CNode, {cast<LoadSDNode>(N1)->getMemOperand()});
3494  return CNode;
3495  }
3496 
3497  SDValue Ops[] = { N0, N1, Imm };
3498  SDVTList VTs = CurDAG->getVTList(VT, MVT::i32);
3499  MachineSDNode *CNode = CurDAG->getMachineNode(ROpc, dl, VTs, Ops);
3500  return CNode;
3501 }
3502 
3503 // Emit a PCMESTR(I/M) instruction. Also return the Glue result in case we need
3504 // to emit a second instruction after this one. This is needed since we have two
3505 // copyToReg nodes glued before this and we need to continue that glue through.
3506 MachineSDNode *X86DAGToDAGISel::emitPCMPESTR(unsigned ROpc, unsigned MOpc,
3507  bool MayFoldLoad, const SDLoc &dl,
3508  MVT VT, SDNode *Node,
3509  SDValue &InFlag) {
3510  SDValue N0 = Node->getOperand(0);
3511  SDValue N2 = Node->getOperand(2);
3512  SDValue Imm = Node->getOperand(4);
3513  const ConstantInt *Val = cast<ConstantSDNode>(Imm)->getConstantIntValue();
3514  Imm = CurDAG->getTargetConstant(*Val, SDLoc(Node), Imm.getValueType());
3515 
3516  // Try to fold a load. No need to check alignment.
3517  SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
3518  if (MayFoldLoad && tryFoldLoad(Node, N2, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) {
3519  SDValue Ops[] = { N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Imm,
3520  N2.getOperand(0), InFlag };
3521  SDVTList VTs = CurDAG->getVTList(VT, MVT::i32, MVT::Other, MVT::Glue);
3522  MachineSDNode *CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops);
3523  InFlag = SDValue(CNode, 3);
3524  // Update the chain.
3525  ReplaceUses(N2.getValue(1), SDValue(CNode, 2));
3526  // Record the mem-refs
3527  CurDAG->setNodeMemRefs(CNode, {cast<LoadSDNode>(N2)->getMemOperand()});
3528  return CNode;
3529  }
3530 
3531  SDValue Ops[] = { N0, N2, Imm, InFlag };
3532  SDVTList VTs = CurDAG->getVTList(VT, MVT::i32, MVT::Glue);
3533  MachineSDNode *CNode = CurDAG->getMachineNode(ROpc, dl, VTs, Ops);
3534  InFlag = SDValue(CNode, 2);
3535  return CNode;
3536 }
3537 
3538 bool X86DAGToDAGISel::tryShiftAmountMod(SDNode *N) {
3539  EVT VT = N->getValueType(0);
3540 
3541  // Only handle scalar shifts.
3542  if (VT.isVector())
3543  return false;
3544 
3545  // Narrower shifts only mask to 5 bits in hardware.
3546  unsigned Size = VT == MVT::i64 ? 64 : 32;
3547 
3548  SDValue OrigShiftAmt = N->getOperand(1);
3549  SDValue ShiftAmt = OrigShiftAmt;
3550  SDLoc DL(N);
3551 
3552  // Skip over a truncate of the shift amount.
3553  if (ShiftAmt->getOpcode() == ISD::TRUNCATE)
3554  ShiftAmt = ShiftAmt->getOperand(0);
3555 
3556  // This function is called after X86DAGToDAGISel::matchBitExtract(),
3557  // so we are not afraid that we might mess up BZHI/BEXTR pattern.
3558 
3559  SDValue NewShiftAmt;
3560  if (ShiftAmt->getOpcode() == ISD::ADD || ShiftAmt->getOpcode() == ISD::SUB) {
3561  SDValue Add0 = ShiftAmt->getOperand(0);
3562  SDValue Add1 = ShiftAmt->getOperand(1);
3563  // If we are shifting by X+/-N where N == 0 mod Size, then just shift by X
3564  // to avoid the ADD/SUB.
3565  if (isa<ConstantSDNode>(Add1) &&
3566  cast<ConstantSDNode>(Add1)->getZExtValue() % Size == 0) {
3567  NewShiftAmt = Add0;
3568  // If we are shifting by N-X where N == 0 mod Size, then just shift by -X to
3569  // generate a NEG instead of a SUB of a constant.
3570  } else if (ShiftAmt->getOpcode() == ISD::SUB &&
3571  isa<ConstantSDNode>(Add0) &&
3572  cast<ConstantSDNode>(Add0)->getZExtValue() != 0 &&
3573  cast<ConstantSDNode>(Add0)->getZExtValue() % Size == 0) {
3574  // Insert a negate op.
3575  // TODO: This isn't guaranteed to replace the sub if there is a logic cone
3576  // that uses it that's not a shift.
3577  EVT SubVT = ShiftAmt.getValueType();
3578  SDValue Zero = CurDAG->getConstant(0, DL, SubVT);
3579  SDValue Neg = CurDAG->getNode(ISD::SUB, DL, SubVT, Zero, Add1);
3580  NewShiftAmt = Neg;
3581 
3582  // Insert these operands into a valid topological order so they can
3583  // get selected independently.
3584  insertDAGNode(*CurDAG, OrigShiftAmt, Zero);
3585  insertDAGNode(*CurDAG, OrigShiftAmt, Neg);
3586  } else
3587  return false;
3588  } else
3589  return false;
3590 
3591  if (NewShiftAmt.getValueType() != MVT::i8) {
3592  // Need to truncate the shift amount.
3593  NewShiftAmt = CurDAG->getNode(ISD::TRUNCATE, DL, MVT::i8, NewShiftAmt);
3594  // Add to a correct topological ordering.
3595  insertDAGNode(*CurDAG, OrigShiftAmt, NewShiftAmt);
3596  }
3597 
3598  // Insert a new mask to keep the shift amount legal. This should be removed
3599  // by isel patterns.
3600  NewShiftAmt = CurDAG->getNode(ISD::AND, DL, MVT::i8, NewShiftAmt,
3601  CurDAG->getConstant(Size - 1, DL, MVT::i8));
3602  // Place in a correct topological ordering.
3603  insertDAGNode(*CurDAG, OrigShiftAmt, NewShiftAmt);
3604 
3605  SDNode *UpdatedNode = CurDAG->UpdateNodeOperands(N, N->getOperand(0),
3606  NewShiftAmt);
3607  if (UpdatedNode != N) {
3608  // If we found an existing node, we should replace ourselves with that node
3609  // and wait for it to be selected after its other users.
3610  ReplaceNode(N, UpdatedNode);
3611  return true;
3612  }
3613 
3614  // If the original shift amount is now dead, delete it so that we don't run
3615  // it through isel.
3616  if (OrigShiftAmt.getNode()->use_empty())
3617  CurDAG->RemoveDeadNode(OrigShiftAmt.getNode());
3618 
3619  // Now that we've optimized the shift amount, defer to normal isel to get
3620  // load folding and legacy vs BMI2 selection without repeating it here.
3621  SelectCode(N);
3622  return true;
3623 }
3624 
3625 bool X86DAGToDAGISel::tryShrinkShlLogicImm(SDNode *N) {
3626  MVT NVT = N->getSimpleValueType(0);
3627  unsigned Opcode = N->getOpcode();
3628  SDLoc dl(N);
3629 
3630  // For operations of the form (x << C1) op C2, check if we can use a smaller
3631  // encoding for C2 by transforming it into (x op (C2>>C1)) << C1.
3632  SDValue Shift = N->getOperand(0);
3633  SDValue N1 = N->getOperand(1);
3634 
3636  if (!Cst)
3637  return false;
3638 
3639  int64_t Val = Cst->getSExtValue();
3640 
3641  // If we have an any_extend feeding the AND, look through it to see if there
3642  // is a shift behind it. But only if the AND doesn't use the extended bits.
3643  // FIXME: Generalize this to other ANY_EXTEND than i32 to i64?
3644  bool FoundAnyExtend = false;
3645  if (Shift.getOpcode() == ISD::ANY_EXTEND && Shift.hasOneUse() &&
3646  Shift.getOperand(0).getSimpleValueType() == MVT::i32 &&
3647  isUInt<32>(Val)) {
3648  FoundAnyExtend = true;
3649  Shift = Shift.getOperand(0);
3650  }
3651 
3652  if (Shift.getOpcode() != ISD::SHL || !Shift.hasOneUse())
3653  return false;
3654 
3655  // i8 is unshrinkable, i16 should be promoted to i32.
3656  if (NVT != MVT::i32 && NVT != MVT::i64)
3657  return false;
3658 
3659  ConstantSDNode *ShlCst = dyn_cast<ConstantSDNode>(Shift.getOperand(1));
3660  if (!ShlCst)
3661  return false;
3662 
3663  uint64_t ShAmt = ShlCst->getZExtValue();
3664 
3665  // Make sure that we don't change the operation by removing bits.
3666  // This only matters for OR and XOR, AND is unaffected.
3667  uint64_t RemovedBitsMask = (1ULL << ShAmt) - 1;
3668  if (Opcode != ISD::AND && (Val & RemovedBitsMask) != 0)
3669  return false;
3670 
3671  // Check the minimum bitwidth for the new constant.
3672  // TODO: Using 16 and 8 bit operations is also possible for or32 & xor32.
3673  auto CanShrinkImmediate = [&](int64_t &ShiftedVal) {
3674  if (Opcode == ISD::AND) {
3675  // AND32ri is the same as AND64ri32 with zext imm.
3676  // Try this before sign extended immediates below.
3677  ShiftedVal = (uint64_t)Val >> ShAmt;
3678  if (NVT == MVT::i64 && !isUInt<32>(Val) && isUInt<32>(ShiftedVal))
3679  return true;
3680  // Also swap order when the AND can become MOVZX.
3681  if (ShiftedVal == UINT8_MAX || ShiftedVal == UINT16_MAX)
3682  return true;
3683  }
3684  ShiftedVal = Val >> ShAmt;
3685  if ((!isInt<8>(Val) && isInt<8>(ShiftedVal)) ||
3686  (!isInt<32>(Val) && isInt<32>(ShiftedVal)))
3687  return true;
3688  if (Opcode != ISD::AND) {
3689  // MOV32ri+OR64r/XOR64r is cheaper than MOV64ri64+OR64rr/XOR64rr
3690  ShiftedVal = (uint64_t)Val >> ShAmt;
3691  if (NVT == MVT::i64 && !isUInt<32>(Val) && isUInt<32>(ShiftedVal))
3692  return true;
3693  }
3694  return false;
3695  };
3696 
3697  int64_t ShiftedVal;
3698  if (!CanShrinkImmediate(ShiftedVal))
3699  return false;
3700 
3701  // Ok, we can reorder to get a smaller immediate.
3702 
3703  // But, its possible the original immediate allowed an AND to become MOVZX.
3704  // Doing this late due to avoid the MakedValueIsZero call as late as
3705  // possible.
3706  if (Opcode == ISD::AND) {
3707  // Find the smallest zext this could possibly be.
3708  unsigned ZExtWidth = Cst->getAPIntValue().getActiveBits();
3709  ZExtWidth = PowerOf2Ceil(std::max(ZExtWidth, 8U));
3710 
3711  // Figure out which bits need to be zero to achieve that mask.
3712  APInt NeededMask = APInt::getLowBitsSet(NVT.getSizeInBits(),
3713  ZExtWidth);
3714  NeededMask &= ~Cst->getAPIntValue();
3715 
3716  if (CurDAG->MaskedValueIsZero(N->getOperand(0), NeededMask))
3717  return false;
3718  }
3719 
3720  SDValue X = Shift.getOperand(0);
3721  if (FoundAnyExtend) {
3722  SDValue NewX = CurDAG->getNode(ISD::ANY_EXTEND, dl, NVT, X);
3723  insertDAGNode(*CurDAG, SDValue(N, 0), NewX);
3724  X = NewX;
3725  }
3726 
3727  SDValue NewCst = CurDAG->getConstant(ShiftedVal, dl, NVT);
3728  insertDAGNode(*CurDAG, SDValue(N, 0), NewCst);
3729  SDValue NewBinOp = CurDAG->getNode(Opcode, dl, NVT, X, NewCst);
3730  insertDAGNode(*CurDAG, SDValue(N, 0), NewBinOp);
3731  SDValue NewSHL = CurDAG->getNode(ISD::SHL, dl, NVT, NewBinOp,
3732  Shift.getOperand(1));
3733  ReplaceNode(N, NewSHL.getNode());
3734  SelectCode(NewSHL.getNode());
3735  return true;
3736 }
3737 
3738 /// If the high bits of an 'and' operand are known zero, try setting the
3739 /// high bits of an 'and' constant operand to produce a smaller encoding by
3740 /// creating a small, sign-extended negative immediate rather than a large
3741 /// positive one. This reverses a transform in SimplifyDemandedBits that
3742 /// shrinks mask constants by clearing bits. There is also a possibility that
3743 /// the 'and' mask can be made -1, so the 'and' itself is unnecessary. In that
3744 /// case, just replace the 'and'. Return 'true' if the node is replaced.
3745 bool X86DAGToDAGISel::shrinkAndImmediate(SDNode *And) {
3746  // i8 is unshrinkable, i16 should be promoted to i32, and vector ops don't
3747  // have immediate operands.
3748  MVT VT = And->getSimpleValueType(0);
3749  if (VT != MVT::i32 && VT != MVT::i64)
3750  return false;
3751 
3752  auto *And1C = dyn_cast<ConstantSDNode>(And->getOperand(1));
3753  if (!And1C)
3754  return false;
3755 
3756  // Bail out if the mask constant is already negative. It's can't shrink more.
3757  // If the upper 32 bits of a 64 bit mask are all zeros, we have special isel
3758  // patterns to use a 32-bit and instead of a 64-bit and by relying on the
3759  // implicit zeroing of 32 bit ops. So we should check if the lower 32 bits
3760  // are negative too.
3761  APInt MaskVal = And1C->getAPIntValue();
3762  unsigned MaskLZ = MaskVal.countLeadingZeros();
3763  if (!MaskLZ || (VT == MVT::i64 && MaskLZ == 32))
3764  return false;
3765 
3766  // Don't extend into the upper 32 bits of a 64 bit mask.
3767  if (VT == MVT::i64 && MaskLZ >= 32) {
3768  MaskLZ -= 32;
3769  MaskVal = MaskVal.trunc(32);
3770  }
3771 
3772  SDValue And0 = And->getOperand(0);
3773  APInt HighZeros = APInt::getHighBitsSet(MaskVal.getBitWidth(), MaskLZ);
3774  APInt NegMaskVal = MaskVal | HighZeros;
3775 
3776  // If a negative constant would not allow a smaller encoding, there's no need
3777  // to continue. Only change the constant when we know it's a win.
3778  unsigned MinWidth = NegMaskVal.getMinSignedBits();
3779  if (MinWidth > 32 || (MinWidth > 8 && MaskVal.getMinSignedBits() <= 32))
3780  return false;
3781 
3782  // Extend masks if we truncated above.
3783  if (VT == MVT::i64 && MaskVal.getBitWidth() < 64) {
3784  NegMaskVal = NegMaskVal.zext(64);
3785  HighZeros = HighZeros.zext(64);
3786  }
3787 
3788  // The variable operand must be all zeros in the top bits to allow using the
3789  // new, negative constant as the mask.
3790  if (!CurDAG->MaskedValueIsZero(And0, HighZeros))
3791  return false;
3792 
3793  // Check if the mask is -1. In that case, this is an unnecessary instruction
3794  // that escaped earlier analysis.
3795  if (NegMaskVal.isAllOnesValue()) {
3796  ReplaceNode(And, And0.getNode());
3797  return true;
3798  }
3799 
3800  // A negative mask allows a smaller encoding. Create a new 'and' node.
3801  SDValue NewMask = CurDAG->getConstant(NegMaskVal, SDLoc(And), VT);
3802  SDValue NewAnd = CurDAG->getNode(ISD::AND, SDLoc(And), VT, And0, NewMask);
3803  ReplaceNode(And, NewAnd.getNode());
3804  SelectCode(NewAnd.getNode());
3805  return true;
3806 }
3807 
3808 static unsigned getVPTESTMOpc(MVT TestVT, bool IsTestN, bool FoldedLoad,
3809  bool FoldedBCast, bool Masked) {
3810  if (Masked) {
3811  if (FoldedLoad) {
3812  switch (TestVT.SimpleTy) {
3813  default: llvm_unreachable("Unexpected VT!");
3814  case MVT::v16i8:
3815  return IsTestN ? X86::VPTESTNMBZ128rmk : X86::VPTESTMBZ128rmk;
3816  case MVT::v8i16:
3817  return IsTestN ? X86::VPTESTNMWZ128rmk : X86::VPTESTMWZ128rmk;
3818  case MVT::v4i32:
3819  return IsTestN ? X86::VPTESTNMDZ128rmk : X86::VPTESTMDZ128rmk;
3820  case MVT::v2i64:
3821  return IsTestN ? X86::VPTESTNMQZ128rmk : X86::VPTESTMQZ128rmk;
3822  case MVT::v32i8:
3823  return IsTestN ? X86::VPTESTNMBZ256rmk : X86::VPTESTMBZ256rmk;
3824  case MVT::v16i16:
3825  return IsTestN ? X86::VPTESTNMWZ256rmk : X86::VPTESTMWZ256rmk;
3826  case MVT::v8i32:
3827  return IsTestN ? X86::VPTESTNMDZ256rmk : X86::VPTESTMDZ256rmk;
3828  case MVT::v4i64:
3829  return IsTestN ? X86::VPTESTNMQZ256rmk : X86::VPTESTMQZ256rmk;
3830  case MVT::v64i8:
3831  return IsTestN ? X86::VPTESTNMBZrmk : X86::VPTESTMBZrmk;
3832  case MVT::v32i16:
3833  return IsTestN ? X86::VPTESTNMWZrmk : X86::VPTESTMWZrmk;
3834  case MVT::v16i32:
3835  return IsTestN ? X86::VPTESTNMDZrmk : X86::VPTESTMDZrmk;
3836  case MVT::v8i64:
3837  return IsTestN ? X86::VPTESTNMQZrmk : X86::VPTESTMQZrmk;
3838  }
3839  }
3840 
3841  if (FoldedBCast) {
3842  switch (TestVT.SimpleTy) {
3843  default: llvm_unreachable("Unexpected VT!");
3844  case MVT::v4i32:
3845  return IsTestN ? X86::VPTESTNMDZ128rmbk : X86::VPTESTMDZ128rmbk;
3846  case MVT::v2i64:
3847  return IsTestN ? X86::VPTESTNMQZ128rmbk : X86::VPTESTMQZ128rmbk;
3848  case MVT::v8i32:
3849  return IsTestN ? X86::VPTESTNMDZ256rmbk : X86::VPTESTMDZ256rmbk;
3850  case MVT::v4i64:
3851  return IsTestN ? X86::VPTESTNMQZ256rmbk : X86::VPTESTMQZ256rmbk;
3852  case MVT::v16i32:
3853  return IsTestN ? X86::VPTESTNMDZrmbk : X86::VPTESTMDZrmbk;
3854  case MVT::v8i64:
3855  return IsTestN ? X86::VPTESTNMQZrmbk : X86::VPTESTMQZrmbk;
3856  }
3857  }
3858 
3859  switch (TestVT.SimpleTy) {
3860  default: llvm_unreachable("Unexpected VT!");
3861  case MVT::v16i8:
3862  return IsTestN ? X86::VPTESTNMBZ128rrk : X86::VPTESTMBZ128rrk;
3863  case MVT::v8i16:
3864  return IsTestN ? X86::VPTESTNMWZ128rrk : X86::VPTESTMWZ128rrk;
3865  case MVT::v4i32:
3866  return IsTestN ? X86::VPTESTNMDZ128rrk : X86::VPTESTMDZ128rrk;
3867  case MVT::v2i64:
3868  return IsTestN ? X86::VPTESTNMQZ128rrk : X86::VPTESTMQZ128rrk;
3869  case MVT::v32i8:
3870  return IsTestN ? X86::VPTESTNMBZ256rrk : X86::VPTESTMBZ256rrk;
3871  case MVT::v16i16:
3872  return IsTestN ? X86::VPTESTNMWZ256rrk : X86::VPTESTMWZ256rrk;
3873  case MVT::v8i32:
3874  return IsTestN ? X86::VPTESTNMDZ256rrk : X86::VPTESTMDZ256rrk;
3875  case MVT::v4i64:
3876  return IsTestN ? X86::VPTESTNMQZ256rrk : X86::VPTESTMQZ256rrk;
3877  case MVT::v64i8:
3878  return IsTestN ? X86::VPTESTNMBZrrk : X86::VPTESTMBZrrk;
3879  case MVT::v32i16:
3880  return IsTestN ? X86::VPTESTNMWZrrk : X86::VPTESTMWZrrk;
3881  case MVT::v16i32:
3882  return IsTestN ? X86::VPTESTNMDZrrk : X86::VPTESTMDZrrk;
3883  case MVT::v8i64:
3884  return IsTestN ? X86::VPTESTNMQZrrk : X86::VPTESTMQZrrk;
3885  }
3886  }
3887 
3888  if (FoldedLoad) {
3889  switch (TestVT.SimpleTy) {
3890  default: llvm_unreachable("Unexpected VT!");
3891  case MVT::v16i8:
3892  return IsTestN ? X86::VPTESTNMBZ128rm : X86::VPTESTMBZ128rm;
3893  case MVT::v8i16:
3894  return IsTestN ? X86::VPTESTNMWZ128rm : X86::VPTESTMWZ128rm;
3895  case MVT::v4i32:
3896  return IsTestN ? X86::VPTESTNMDZ128rm : X86::VPTESTMDZ128rm;
3897  case MVT::v2i64:
3898  return IsTestN ? X86::VPTESTNMQZ128rm : X86::VPTESTMQZ128rm;
3899  case MVT::v32i8:
3900  return IsTestN ? X86::VPTESTNMBZ256rm : X86::VPTESTMBZ256rm;
3901  case MVT::v16i16:
3902  return IsTestN ? X86::VPTESTNMWZ256rm : X86::VPTESTMWZ256rm;
3903  case MVT::v8i32:
3904  return IsTestN ? X86::VPTESTNMDZ256rm : X86::VPTESTMDZ256rm;
3905  case MVT::v4i64:
3906  return IsTestN ? X86::VPTESTNMQZ256rm : X86::VPTESTMQZ256rm;
3907  case MVT::v64i8:
3908  return IsTestN ? X86::VPTESTNMBZrm : X86::VPTESTMBZrm;
3909  case MVT::v32i16:
3910  return IsTestN ? X86::VPTESTNMWZrm : X86::VPTESTMWZrm;
3911  case MVT::v16i32:
3912  return IsTestN ? X86::VPTESTNMDZrm : X86::VPTESTMDZrm;
3913  case MVT::v8i64:
3914  return IsTestN ? X86::VPTESTNMQZrm : X86::VPTESTMQZrm;
3915  }
3916  }
3917 
3918  if (FoldedBCast) {
3919  switch (TestVT.SimpleTy) {
3920  default: llvm_unreachable("Unexpected VT!");
3921  case MVT::v4i32:
3922  return IsTestN ? X86::VPTESTNMDZ128rmb : X86::VPTESTMDZ128rmb;
3923  case MVT::v2i64:
3924  return IsTestN ? X86::VPTESTNMQZ128rmb : X86::VPTESTMQZ128rmb;
3925  case MVT::v8i32:
3926  return IsTestN ? X86::VPTESTNMDZ256rmb : X86::VPTESTMDZ256rmb;
3927  case MVT::v4i64:
3928  return IsTestN ? X86::VPTESTNMQZ256rmb : X86::VPTESTMQZ256rmb;
3929  case MVT::v16i32:
3930  return IsTestN ? X86::VPTESTNMDZrmb : X86::VPTESTMDZrmb;
3931  case MVT::v8i64:
3932  return IsTestN ? X86::VPTESTNMQZrmb : X86::VPTESTMQZrmb;
3933  }
3934  }
3935 
3936  switch (TestVT.SimpleTy) {
3937  default: llvm_unreachable("Unexpected VT!");
3938  case MVT::v16i8:
3939  return IsTestN ? X86::VPTESTNMBZ128rr : X86::VPTESTMBZ128rr;
3940  case MVT::v8i16:
3941  return IsTestN ? X86::VPTESTNMWZ128rr : X86::VPTESTMWZ128rr;
3942  case MVT::v4i32:
3943  return IsTestN ? X86::VPTESTNMDZ128rr : X86::VPTESTMDZ128rr;
3944  case MVT::v2i64:
3945  return IsTestN ? X86::VPTESTNMQZ128rr : X86::VPTESTMQZ128rr;
3946  case MVT::v32i8:
3947  return IsTestN ? X86::VPTESTNMBZ256rr : X86::VPTESTMBZ256rr;
3948  case MVT::v16i16:
3949  return IsTestN ? X86::VPTESTNMWZ256rr : X86::VPTESTMWZ256rr;
3950  case MVT::v8i32:
3951  return IsTestN ? X86::VPTESTNMDZ256rr : X86::VPTESTMDZ256rr;
3952  case MVT::v4i64:
3953  return IsTestN ? X86::VPTESTNMQZ256rr : X86::VPTESTMQZ256rr;
3954  case MVT::v64i8:
3955  return IsTestN ? X86::VPTESTNMBZrr : X86::VPTESTMBZrr;
3956  case MVT::v32i16:
3957  return IsTestN ? X86::VPTESTNMWZrr : X86::VPTESTMWZrr;
3958  case MVT::v16i32:
3959  return IsTestN ? X86::VPTESTNMDZrr : X86::VPTESTMDZrr;
3960  case MVT::v8i64:
3961  return IsTestN ? X86::VPTESTNMQZrr : X86::VPTESTMQZrr;
3962  }
3963 }
3964 
3965 // Try to create VPTESTM instruction. If InMask is not null, it will be used
3966 // to form a masked operation.
3967 bool X86DAGToDAGISel::tryVPTESTM(SDNode *Root, SDValue Setcc,
3968  SDValue InMask) {
3969  assert(Subtarget->hasAVX512() && "Expected AVX512!");
3971  "Unexpected VT!");
3972 
3973  // Look for equal and not equal compares.
3974  ISD::CondCode CC = cast<CondCodeSDNode>(Setcc.getOperand(2))->get();
3975  if (CC != ISD::SETEQ && CC != ISD::SETNE)
3976  return false;
3977 
3978  // See if we're comparing against zero. This should have been canonicalized
3979  // to RHS during lowering.
3981  return false;
3982 
3983  SDValue N0 = Setcc.getOperand(0);
3984 
3985  MVT CmpVT = N0.getSimpleValueType();
3986  MVT CmpSVT = CmpVT.getVectorElementType();
3987 
3988  // Start with both operands the same. We'll try to refine this.
3989  SDValue Src0 = N0;
3990  SDValue Src1 = N0;
3991 
3992  {
3993  // Look through single use bitcasts.
3994  SDValue N0Temp = N0;
3995  if (N0Temp.getOpcode() == ISD::BITCAST && N0Temp.hasOneUse())
3996  N0Temp = N0.getOperand(0);
3997 
3998  // Look for single use AND.
3999  if (N0Temp.getOpcode() == ISD::AND && N0Temp.hasOneUse()) {
4000  Src0 = N0Temp.getOperand(0);
4001  Src1 = N0Temp.getOperand(1);
4002  }
4003  }
4004 
4005  // Without VLX we need to widen the load.
4006  bool Widen = !Subtarget->hasVLX() && !CmpVT.is512BitVector();
4007 
4008  // We can only fold loads if the sources are unique.
4009  bool CanFoldLoads = Src0 != Src1;
4010 
4011  // Try to fold loads unless we need to widen.
4012  bool FoldedLoad = false;
4013  SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Load;
4014  if (!Widen && CanFoldLoads) {
4015  Load = Src1;
4016  FoldedLoad = tryFoldLoad(Root, N0.getNode(), Load, Tmp0, Tmp1, Tmp2, Tmp3,
4017  Tmp4);
4018  if (!FoldedLoad) {
4019  // And is computative.
4020  Load = Src0;
4021  FoldedLoad = tryFoldLoad(Root, N0.getNode(), Load, Tmp0, Tmp1, Tmp2,
4022  Tmp3, Tmp4);
4023  if (FoldedLoad)
4024  std::swap(Src0, Src1);
4025  }
4026  }
4027 
4028  auto findBroadcastedOp = [](SDValue Src, MVT CmpSVT, SDNode *&Parent) {
4029  // Look through single use bitcasts.
4030  if (Src.getOpcode() == ISD::BITCAST && Src.hasOneUse())
4031  Src = Src.getOperand(0);
4032 
4033  if (Src.getOpcode() == X86ISD::VBROADCAST && Src.hasOneUse()) {
4034  Parent = Src.getNode();
4035  Src = Src.getOperand(0);
4036  if (Src.getSimpleValueType() == CmpSVT)
4037  return Src;
4038  }
4039 
4040  return SDValue();
4041  };
4042 
4043  // If we didn't fold a load, try to match broadcast. No widening limitation
4044  // for this. But only 32 and 64 bit types are supported.
4045  bool FoldedBCast = false;
4046  if (!FoldedLoad && CanFoldLoads &&
4047  (CmpSVT == MVT::i32 || CmpSVT == MVT::i64)) {
4048  SDNode *ParentNode = nullptr;
4049  if ((Load = findBroadcastedOp(Src1, CmpSVT, ParentNode))) {
4050  FoldedBCast = tryFoldLoad(Root, ParentNode, Load, Tmp0,
4051  Tmp1, Tmp2, Tmp3, Tmp4);
4052  }
4053 
4054  // Try the other operand.
4055  if (!FoldedBCast) {
4056  if ((Load = findBroadcastedOp(Src0, CmpSVT, ParentNode))) {
4057  FoldedBCast = tryFoldLoad(Root, ParentNode, Load, Tmp0,
4058  Tmp1, Tmp2, Tmp3, Tmp4);
4059  if (FoldedBCast)
4060  std::swap(Src0, Src1);
4061  }
4062  }
4063  }
4064 
4065  auto getMaskRC = [](MVT MaskVT) {
4066  switch (MaskVT.SimpleTy) {
4067  default: llvm_unreachable("Unexpected VT!");
4068  case MVT::v2i1: return X86::VK2RegClassID;
4069  case MVT::v4i1: return X86::VK4RegClassID;
4070  case MVT::v8i1: return X86::VK8RegClassID;
4071  case MVT::v16i1: return X86::VK16RegClassID;
4072  case MVT::v32i1: return X86::VK32RegClassID;
4073  case MVT::v64i1: return X86::VK64RegClassID;
4074  }
4075  };
4076 
4077  bool IsMasked = InMask.getNode() != nullptr;
4078 
4079  SDLoc dl(Root);
4080 
4081  MVT ResVT = Setcc.getSimpleValueType();
4082  MVT MaskVT = ResVT;
4083  if (Widen) {
4084  // Widen the inputs using insert_subreg or copy_to_regclass.
4085  unsigned Scale = CmpVT.is128BitVector() ? 4 : 2;
4086  unsigned SubReg = CmpVT.is128BitVector() ? X86::sub_xmm : X86::sub_ymm;
4087  unsigned NumElts = CmpVT.getVectorNumElements() * Scale;
4088  CmpVT = MVT::getVectorVT(CmpSVT, NumElts);
4089  MaskVT = MVT::getVectorVT(MVT::i1, NumElts);
4090  SDValue ImplDef = SDValue(CurDAG->getMachineNode(X86::IMPLICIT_DEF, dl,
4091  CmpVT), 0);
4092  Src0 = CurDAG->getTargetInsertSubreg(SubReg, dl, CmpVT, ImplDef, Src0);
4093 
4094  assert(!FoldedLoad && "Shouldn't have folded the load");
4095  if (!FoldedBCast)
4096  Src1 = CurDAG->getTargetInsertSubreg(SubReg, dl, CmpVT, ImplDef, Src1);
4097 
4098  if (IsMasked) {
4099  // Widen the mask.
4100  unsigned RegClass = getMaskRC(MaskVT);
4101  SDValue RC = CurDAG->getTargetConstant(RegClass, dl, MVT::i32);
4102  InMask = SDValue(CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS,
4103  dl, MaskVT, InMask, RC), 0);
4104  }
4105  }
4106 
4107  bool IsTestN = CC == ISD::SETEQ;
4108  unsigned Opc = getVPTESTMOpc(CmpVT, IsTestN, FoldedLoad, FoldedBCast,
4109  IsMasked);
4110 
4111  MachineSDNode *CNode;
4112  if (FoldedLoad || FoldedBCast) {
4113  SDVTList VTs = CurDAG->getVTList(MaskVT, MVT::Other);
4114 
4115  if (IsMasked) {
4116  SDValue Ops[] = { InMask, Src0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4,
4117  Load.getOperand(0) };
4118  CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops);
4119  } else {
4120  SDValue Ops[] = { Src0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4,
4121  Load.getOperand(0) };
4122  CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops);
4123  }
4124 
4125  // Update the chain.
4126  ReplaceUses(Load.getValue(1), SDValue(CNode, 1));
4127  // Record the mem-refs
4128  CurDAG->setNodeMemRefs(CNode, {cast<LoadSDNode>(Load)->getMemOperand()});
4129  } else {
4130  if (IsMasked)
4131  CNode = CurDAG->getMachineNode(Opc, dl, MaskVT, InMask, Src0, Src1);
4132  else
4133  CNode = CurDAG->getMachineNode(Opc, dl, MaskVT, Src0, Src1);
4134  }
4135 
4136  // If we widened, we need to shrink the mask VT.
4137  if (Widen) {
4138  unsigned RegClass = getMaskRC(ResVT);
4139  SDValue RC = CurDAG->getTargetConstant(RegClass, dl, MVT::i32);
4140  CNode = CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS,
4141  dl, ResVT, SDValue(CNode, 0), RC);
4142  }
4143 
4144  ReplaceUses(SDValue(Root, 0), SDValue(CNode, 0));
4145  CurDAG->RemoveDeadNode(Root);
4146  return true;
4147 }
4148 
4149 void X86DAGToDAGISel::Select(SDNode *Node) {
4150  MVT NVT = Node->getSimpleValueType(0);
4151  unsigned Opcode = Node->getOpcode();
4152  SDLoc dl(Node);
4153 
4154  if (Node->isMachineOpcode()) {
4155  LLVM_DEBUG(dbgs() << "== "; Node->dump(CurDAG); dbgs() << '\n');
4156  Node->setNodeId(-1);
4157  return; // Already selected.
4158  }
4159 
4160  switch (Opcode) {
4161  default: break;
4162  case ISD::INTRINSIC_VOID: {
4163  unsigned IntNo = Node->getConstantOperandVal(1);
4164  switch (IntNo) {
4165  default: break;
4166  case Intrinsic::x86_sse3_monitor:
4167  case Intrinsic::x86_monitorx:
4168  case Intrinsic::x86_clzero: {
4169  bool Use64BitPtr = Node->getOperand(2).getValueType() == MVT::i64;
4170 
4171  unsigned Opc = 0;
4172  switch (IntNo) {
4173  case Intrinsic::x86_sse3_monitor:
4174  if (!Subtarget->hasSSE3())
4175  break;
4176  Opc = Use64BitPtr ? X86::MONITOR64rrr : X86::MONITOR32rrr;
4177  break;
4178  case Intrinsic::x86_monitorx:
4179  if (!Subtarget->hasMWAITX())
4180  break;
4181  Opc = Use64BitPtr ? X86::MONITORX64rrr : X86::MONITORX32rrr;
4182  break;
4183  case Intrinsic::x86_clzero:
4184  if (!Subtarget->hasCLZERO())
4185  break;
4186  Opc = Use64BitPtr ? X86::CLZERO64r : X86::CLZERO32r;
4187  break;
4188  }
4189 
4190  if (Opc) {
4191  unsigned PtrReg = Use64BitPtr ? X86::RAX : X86::EAX;
4192  SDValue Chain = CurDAG->getCopyToReg(Node->getOperand(0), dl, PtrReg,
4193  Node->getOperand(2), SDValue());
4194  SDValue InFlag = Chain.getValue(1);
4195 
4196  if (IntNo == Intrinsic::x86_sse3_monitor ||
4197  IntNo == Intrinsic::x86_monitorx) {
4198  // Copy the other two operands to ECX and EDX.
4199  Chain = CurDAG->getCopyToReg(Chain, dl, X86::ECX, Node->getOperand(3),
4200  InFlag);
4201  InFlag = Chain.getValue(1);
4202  Chain = CurDAG->getCopyToReg(Chain, dl, X86::EDX, Node->getOperand(4),
4203  InFlag);
4204  InFlag = Chain.getValue(1);
4205  }
4206 
4207  MachineSDNode *CNode = CurDAG->getMachineNode(Opc, dl, MVT::Other,
4208  { Chain, InFlag});
4209  ReplaceNode(Node, CNode);
4210  return;
4211  }
4212  }
4213  }
4214 
4215  break;
4216  }
4217  case ISD::BRIND: {
4218  if (Subtarget->isTargetNaCl())
4219  // NaCl has its own pass where jmp %r32 are converted to jmp %r64. We
4220  // leave the instruction alone.
4221  break;
4222  if (Subtarget->isTarget64BitILP32()) {
4223  // Converts a 32-bit register to a 64-bit, zero-extended version of
4224  // it. This is needed because x86-64 can do many things, but jmp %r32
4225  // ain't one of them.
4226  const SDValue &Target = Node->getOperand(1);
4228  SDValue ZextTarget = CurDAG->getZExtOrTrunc(Target, dl, EVT(MVT::i64));
4229  SDValue Brind = CurDAG->getNode(ISD::BRIND, dl, MVT::Other,
4230  Node->getOperand(0), ZextTarget);
4231  ReplaceNode(Node, Brind.getNode());
4232  SelectCode(ZextTarget.getNode());
4233  SelectCode(Brind.getNode());
4234  return;
4235  }
4236  break;
4237  }
4238  case X86ISD::GlobalBaseReg:
4239  ReplaceNode(Node, getGlobalBaseReg());
4240  return;
4241 
4242  case ISD::BITCAST:
4243  // Just drop all 128/256/512-bit bitcasts.
4244  if (NVT.is512BitVector() || NVT.is256BitVector() || NVT.is128BitVector() ||
4245  NVT == MVT::f128) {
4246  ReplaceUses(SDValue(Node, 0), Node->getOperand(0));
4247  CurDAG->RemoveDeadNode(Node);
4248  return;
4249  }
4250  break;
4251 
4252  case ISD::VSELECT: {
4253  // Replace VSELECT with non-mask conditions with with BLENDV.
4255  break;
4256 
4257  assert(Subtarget->hasSSE41() && "Expected SSE4.1 support!");
4258  SDValue Blendv = CurDAG->getNode(
4259  X86ISD::BLENDV, SDLoc(Node), Node->getValueType(0), Node->getOperand(0),
4260  Node->getOperand(1), Node->getOperand(2));
4261  ReplaceNode(Node, Blendv.getNode());
4262  SelectCode(Blendv.getNode());
4263  // We already called ReplaceUses.
4264  return;
4265  }
4266 
4267  case ISD::SRL:
4268  if (matchBitExtract(Node))
4269  return;
4271  case ISD::SRA:
4272  case ISD::SHL:
4273  if (tryShiftAmountMod(Node))
4274  return;
4275  break;
4276 
4277  case ISD::AND:
4278  if (NVT.isVector() && NVT.getVectorElementType() == MVT::i1) {
4279  // Try to form a masked VPTESTM. Operands can be in either order.
4280  SDValue N0 = Node->getOperand(0);
4281  SDValue N1 = Node->getOperand(1);
4282  if (N0.getOpcode() == ISD::SETCC && N0.hasOneUse() &&
4283  tryVPTESTM(Node, N0, N1))
4284  return;
4285  if (N1.getOpcode() == ISD::SETCC && N1.hasOneUse() &&
4286  tryVPTESTM(Node, N1, N0))
4287  return;
4288  }
4289 
4290  if (MachineSDNode *NewNode = matchBEXTRFromAndImm(Node)) {
4291  ReplaceUses(SDValue(Node, 0), SDValue(NewNode, 0));
4292  CurDAG->RemoveDeadNode(Node);
4293  return;
4294  }
4295  if (matchBitExtract(Node))
4296  return;
4297  if (AndImmShrink && shrinkAndImmediate(Node))
4298  return;
4299 
4301  case ISD::OR:
4302  case ISD::XOR:
4303  if (tryShrinkShlLogicImm(Node))
4304  return;
4305 
4307  case ISD::ADD:
4308  case ISD::SUB: {
4309  // Try to avoid folding immediates with multiple uses for optsize.
4310  // This code tries to select to register form directly to avoid going
4311  // through the isel table which might fold the immediate. We can't change
4312  // the patterns on the add/sub/and/or/xor with immediate paterns in the
4313  // tablegen files to check immediate use count without making the patterns
4314  // unavailable to the fast-isel table.
4315  if (!OptForSize)
4316  break;
4317 
4318  // Only handle i8/i16/i32/i64.
4319  if (NVT != MVT::i8 && NVT != MVT::i16 && NVT != MVT::i32 && NVT != MVT::i64)
4320  break;
4321 
4322  SDValue N0 = Node->getOperand(0);
4323  SDValue N1 = Node->getOperand(1);
4324 
4326  if (!Cst)
4327  break;
4328 
4329  int64_t Val = Cst->getSExtValue();
4330 
4331  // Make sure its an immediate that is considered foldable.
4332  // FIXME: Handle unsigned 32 bit immediates for 64-bit AND.
4333  if (!isInt<8>(Val) && !isInt<32>(Val))
4334  break;
4335 
4336  // Check if we should avoid folding this immediate.
4337  if (!shouldAvoidImmediateInstFormsForSize(N1.getNode()))
4338  break;
4339 
4340  // We should not fold the immediate. So we need a register form instead.
4341  unsigned ROpc, MOpc;
4342  switch (NVT.SimpleTy) {
4343  default: llvm_unreachable("Unexpected VT!");
4344  case MVT::i8:
4345  switch (Opcode) {
4346  default: llvm_unreachable("Unexpected opcode!");
4347  case ISD::ADD: ROpc = X86::ADD8rr; MOpc = X86::ADD8rm; break;
4348  case ISD::SUB: ROpc = X86::SUB8rr; MOpc = X86::SUB8rm; break;
4349  case ISD::AND: ROpc = X86::AND8rr; MOpc = X86::AND8rm; break;
4350  case ISD::OR: ROpc = X86::OR8rr; MOpc = X86::OR8rm; break;
4351  case ISD::XOR: ROpc = X86::XOR8rr; MOpc = X86::XOR8rm; break;
4352  }
4353  break;
4354  case MVT::i16:
4355  switch (Opcode) {
4356  default: llvm_unreachable("Unexpected opcode!");
4357  case ISD::ADD: ROpc = X86::ADD16rr; MOpc = X86::ADD16rm; break;
4358  case ISD::SUB: ROpc = X86::SUB16rr; MOpc = X86::SUB16rm; break;
4359  case ISD::AND: ROpc = X86::AND16rr; MOpc = X86::AND16rm; break;
4360  case ISD::OR: ROpc = X86::OR16rr; MOpc = X86::OR16rm; break;
4361  case ISD::XOR: ROpc = X86::XOR16rr; MOpc = X86::XOR16rm; break;
4362  }
4363  break;
4364  case MVT::i32:
4365  switch (Opcode) {
4366  default: llvm_unreachable("Unexpected opcode!");
4367  case ISD::ADD: ROpc = X86::ADD32rr; MOpc = X86::ADD32rm; break;
4368  case ISD::SUB: ROpc = X86::SUB32rr; MOpc = X86::SUB32rm; break;
4369  case ISD::AND: ROpc = X86::AND32rr; MOpc = X86::AND32rm; break;
4370  case ISD::OR: ROpc = X86::OR32rr; MOpc = X86::OR32rm; break;
4371  case ISD::XOR: ROpc = X86::XOR32rr; MOpc = X86::XOR32rm; break;
4372  }
4373  break;
4374  case MVT::i64:
4375  switch (Opcode) {
4376  default: llvm_unreachable("Unexpected opcode!");
4377  case ISD::ADD: ROpc = X86::ADD64rr; MOpc = X86::ADD64rm; break;
4378  case ISD::SUB: ROpc = X86::SUB64rr; MOpc = X86::SUB64rm; break;
4379  case ISD::AND: ROpc = X86::AND64rr; MOpc = X86::AND64rm; break;
4380  case ISD::OR: ROpc = X86::OR64rr; MOpc = X86::OR64rm; break;
4381  case ISD::XOR: ROpc = X86::XOR64rr; MOpc = X86::XOR64rm; break;
4382  }
4383  break;
4384  }
4385 
4386  // Ok this is a AND/OR/XOR/ADD/SUB with constant.
4387 
4388  // If this is a not a subtract, we can still try to fold a load.
4389  if (Opcode != ISD::SUB) {
4390  SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
4391  if (tryFoldLoad(Node, N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) {
4392  SDValue Ops[] = { N1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N0.getOperand(0) };
4393  SDVTList VTs = CurDAG->getVTList(NVT, MVT::i32, MVT::Other);
4394  MachineSDNode *CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops);
4395  // Update the chain.
4396  ReplaceUses(N0.getValue(1), SDValue(CNode, 2));
4397  // Record the mem-refs
4398  CurDAG->setNodeMemRefs(CNode, {cast<LoadSDNode>(N0)->getMemOperand()});
4399  ReplaceUses(SDValue(Node, 0), SDValue(CNode, 0));
4400  CurDAG->RemoveDeadNode(Node);
4401  return;
4402  }
4403  }
4404 
4405  CurDAG->SelectNodeTo(Node, ROpc, NVT, MVT::i32, N0, N1);
4406  return;
4407  }
4408 
4409  case X86ISD::SMUL:
4410  // i16/i32/i64 are handled with isel patterns.
4411  if (NVT != MVT::i8)
4412  break;
4414  case X86ISD::UMUL: {
4415  SDValue N0 = Node->getOperand(0);
4416  SDValue N1 = Node->getOperand(1);
4417 
4418  unsigned LoReg, ROpc, MOpc;
4419  switch (NVT.SimpleTy) {
4420  default: llvm_unreachable("Unsupported VT!");
4421  case MVT::i8:
4422  LoReg = X86::AL;
4423  ROpc = Opcode == X86ISD::SMUL ? X86::IMUL8r : X86::MUL8r;
4424  MOpc = Opcode == X86ISD::SMUL ? X86::IMUL8m : X86::MUL8m;
4425  break;
4426  case MVT::i16:
4427  LoReg = X86::AX;
4428  ROpc = X86::MUL16r;
4429  MOpc = X86::MUL16m;
4430  break;
4431  case MVT::i32:
4432  LoReg = X86::EAX;
4433  ROpc = X86::MUL32r;
4434  MOpc = X86::MUL32m;
4435  break;
4436  case MVT::i64:
4437  LoReg = X86::RAX;
4438  ROpc = X86::MUL64r;
4439  MOpc = X86::MUL64m;
4440  break;
4441  }
4442 
4443  SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
4444  bool FoldedLoad = tryFoldLoad(Node, N1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4);
4445  // Multiply is commmutative.
4446  if (!FoldedLoad) {
4447  FoldedLoad = tryFoldLoad(Node, N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4);
4448  if (FoldedLoad)
4449  std::swap(N0, N1);
4450  }
4451 
4452  SDValue InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, LoReg,
4453  N0, SDValue()).getValue(1);
4454 
4455  MachineSDNode *CNode;
4456  if (FoldedLoad) {
4457  // i16/i32/i64 use an instruction that produces a low and high result even
4458  // though only the low result is used.
4459  SDVTList VTs;
4460  if (NVT == MVT::i8)
4461  VTs = CurDAG->getVTList(NVT, MVT::i32, MVT::Other);
4462  else
4463  VTs = CurDAG->getVTList(NVT, NVT, MVT::i32, MVT::Other);
4464 
4465  SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N1.getOperand(0),
4466  InFlag };
4467  CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops);
4468 
4469  // Update the chain.
4470  ReplaceUses(N1.getValue(1), SDValue(CNode, NVT == MVT::i8 ? 2 : 3));
4471  // Record the mem-refs
4472  CurDAG->setNodeMemRefs(CNode, {cast<LoadSDNode>(N1)->getMemOperand()});
4473  } else {
4474  // i16/i32/i64 use an instruction that produces a low and high result even
4475  // though only the low result is used.
4476  SDVTList VTs;
4477  if (NVT == MVT::i8)
4478  VTs = CurDAG->getVTList(NVT, MVT::i32);
4479  else
4480  VTs = CurDAG->getVTList(NVT, NVT, MVT::i32);
4481 
4482  CNode = CurDAG->getMachineNode(ROpc, dl, VTs, {N1, InFlag});
4483  }
4484 
4485  ReplaceUses(SDValue(Node, 0), SDValue(CNode, 0));
4486  ReplaceUses(SDValue(Node, 1), SDValue(CNode, NVT == MVT::i8 ? 1 : 2));
4487  CurDAG->RemoveDeadNode(Node);
4488  return;
4489  }
4490 
4491  case ISD::SMUL_LOHI:
4492  case ISD::UMUL_LOHI: {
4493  SDValue N0 = Node->getOperand(0);
4494  SDValue N1 = Node->getOperand(1);
4495 
4496  unsigned Opc, MOpc;
4497  bool isSigned = Opcode == ISD::SMUL_LOHI;
4498  if (!isSigned) {
4499  switch (NVT.SimpleTy) {
4500  default: llvm_unreachable("Unsupported VT!");
4501  case MVT::i32: Opc = X86::MUL32r; MOpc = X86::MUL32m; break;
4502  case MVT::i64: Opc = X86::MUL64r; MOpc = X86::MUL64m; break;
4503  }
4504  } else {
4505  switch (NVT.SimpleTy) {
4506  default: llvm_unreachable("Unsupported VT!");
4507  case MVT::i32: Opc = X86::IMUL32r; MOpc = X86::IMUL32m; break;
4508  case MVT::i64: Opc = X86::IMUL64r; MOpc = X86::IMUL64m; break;
4509  }
4510  }
4511 
4512  unsigned SrcReg, LoReg, HiReg;
4513  switch (Opc) {
4514  default: llvm_unreachable("Unknown MUL opcode!");
4515  case X86::IMUL32r:
4516  case X86::MUL32r:
4517  SrcReg = LoReg = X86::EAX; HiReg = X86::EDX;
4518  break;
4519  case X86::IMUL64r:
4520  case X86::MUL64r:
4521  SrcReg = LoReg = X86::RAX; HiReg = X86::RDX;
4522  break;
4523  }
4524 
4525  SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
4526  bool foldedLoad = tryFoldLoad(Node, N1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4);
4527  // Multiply is commmutative.
4528  if (!foldedLoad) {
4529  foldedLoad = tryFoldLoad(Node, N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4);
4530  if (foldedLoad)
4531  std::swap(N0, N1);
4532  }
4533 
4534  SDValue InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, SrcReg,
4535  N0, SDValue()).getValue(1);
4536  if (foldedLoad) {
4537  SDValue Chain;
4538  MachineSDNode *CNode = nullptr;
4539  SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N1.getOperand(0),
4540  InFlag };
4541  SDVTList VTs = CurDAG->getVTList(MVT::Other, MVT::Glue);
4542  CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops);
4543  Chain = SDValue(CNode, 0);
4544  InFlag = SDValue(CNode, 1);
4545 
4546  // Update the chain.
4547  ReplaceUses(N1.getValue(1), Chain);
4548  // Record the mem-refs
4549  CurDAG->setNodeMemRefs(CNode, {cast<LoadSDNode>(N1)->getMemOperand()});
4550  } else {
4551  SDValue Ops[] = { N1, InFlag };
4552  SDVTList VTs = CurDAG->getVTList(MVT::Glue);
4553  SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops);
4554  InFlag = SDValue(CNode, 0);
4555  }
4556 
4557  // Copy the low half of the result, if it is needed.
4558  if (!SDValue(Node, 0).use_empty()) {
4559  assert(LoReg && "Register for low half is not defined!");
4560  SDValue ResLo = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, LoReg,
4561  NVT, InFlag);
4562  InFlag = ResLo.getValue(2);
4563  ReplaceUses(SDValue(Node, 0), ResLo);
4564  LLVM_DEBUG(dbgs() << "=> "; ResLo.getNode()->dump(CurDAG);
4565  dbgs() << '\n');
4566  }
4567  // Copy the high half of the result, if it is needed.
4568  if (!SDValue(Node, 1).use_empty()) {
4569  assert(HiReg && "Register for high half is not defined!");
4570  SDValue ResHi = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, HiReg,
4571  NVT, InFlag);
4572  InFlag = ResHi.getValue(2);
4573  ReplaceUses(SDValue(Node, 1), ResHi);
4574  LLVM_DEBUG(dbgs() << "=> "; ResHi.getNode()->dump(CurDAG);
4575  dbgs() << '\n');
4576  }
4577 
4578  CurDAG->RemoveDeadNode(Node);
4579  return;
4580  }
4581 
4582  case ISD::SDIVREM:
4583  case ISD::UDIVREM: {
4584  SDValue N0 = Node->getOperand(0);
4585  SDValue N1 = Node->getOperand(1);
4586 
4587  unsigned Opc, MOpc;
4588  bool isSigned = Opcode == ISD::SDIVREM;
4589  if (!isSigned) {
4590  switch (NVT.SimpleTy) {
4591  default: llvm_unreachable("Unsupported VT!");
4592  case MVT::i8: Opc = X86::DIV8r; MOpc = X86::DIV8m; break;
4593  case MVT::i16: Opc = X86::DIV16r; MOpc = X86::DIV16m; break;
4594  case MVT::i32: Opc = X86::DIV32r; MOpc = X86::DIV32m; break;
4595  case MVT::i64: Opc = X86::DIV64r; MOpc = X86::DIV64m; break;
4596  }
4597  } else {
4598  switch (NVT.SimpleTy) {
4599  default: llvm_unreachable("Unsupported VT!");
4600  case MVT::i8: Opc = X86::IDIV8r; MOpc = X86::IDIV8m; break;
4601  case MVT::i16: Opc = X86::IDIV16r; MOpc = X86::IDIV16m; break;
4602  case MVT::i32: Opc = X86::IDIV32r; MOpc = X86::IDIV32m; break;
4603  case MVT::i64: Opc = X86::IDIV64r; MOpc = X86::IDIV64m; break;
4604  }
4605  }
4606 
4607  unsigned LoReg, HiReg, ClrReg;
4608  unsigned SExtOpcode;
4609  switch (NVT.SimpleTy) {
4610  default: llvm_unreachable("Unsupported VT!");
4611  case MVT::i8:
4612  LoReg = X86::AL; ClrReg = HiReg = X86::AH;
4613  SExtOpcode = X86::CBW;
4614  break;
4615  case MVT::i16:
4616  LoReg = X86::AX; HiReg = X86::DX;
4617  ClrReg = X86::DX;
4618  SExtOpcode = X86::CWD;
4619  break;
4620  case MVT::i32:
4621  LoReg = X86::EAX; ClrReg = HiReg = X86::EDX;
4622  SExtOpcode = X86::CDQ;
4623  break;
4624  case MVT::i64:
4625  LoReg = X86::RAX; ClrReg = HiReg = X86::RDX;
4626  SExtOpcode = X86::CQO;
4627  break;
4628  }
4629 
4630  SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
4631  bool foldedLoad = tryFoldLoad(Node, N1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4);
4632  bool signBitIsZero = CurDAG->SignBitIsZero(N0);
4633 
4634  SDValue InFlag;
4635  if (NVT == MVT::i8 && (!isSigned || signBitIsZero)) {
4636  // Special case for div8, just use a move with zero extension to AX to
4637  // clear the upper 8 bits (AH).
4638  SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Chain;
4639  MachineSDNode *Move;
4640  if (tryFoldLoad(Node, N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) {
4641  SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N0.getOperand(0) };
4642  Move = CurDAG->getMachineNode(X86::MOVZX32rm8, dl, MVT::i32,
4643  MVT::Other, Ops);
4644  Chain = SDValue(Move, 1);
4645  ReplaceUses(N0.getValue(1), Chain);
4646  // Record the mem-refs
4647  CurDAG->setNodeMemRefs(Move, {cast<LoadSDNode>(N0)->getMemOperand()});
4648  } else {
4649  Move = CurDAG->getMachineNode(X86::MOVZX32rr8, dl, MVT::i32, N0);
4650  Chain = CurDAG->getEntryNode();
4651  }
4652  Chain = CurDAG->getCopyToReg(Chain, dl, X86::EAX, SDValue(Move, 0),
4653  SDValue());
4654  InFlag = Chain.getValue(1);
4655  } else {
4656  InFlag =
4657  CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl,
4658  LoReg, N0, SDValue()).getValue(1);
4659  if (isSigned && !signBitIsZero) {
4660  // Sign extend the low part into the high part.
4661  InFlag =
4662  SDValue(CurDAG->getMachineNode(SExtOpcode, dl, MVT::Glue, InFlag),0);
4663  } else {
4664  // Zero out the high part, effectively zero extending the input.
4665  SDValue ClrNode = SDValue(CurDAG->getMachineNode(X86::MOV32r0, dl, NVT), 0);
4666  switch (NVT.SimpleTy) {
4667  case MVT::i16:
4668  ClrNode =
4669  SDValue(CurDAG->getMachineNode(
4670  TargetOpcode::EXTRACT_SUBREG, dl, MVT::i16, ClrNode,
4671  CurDAG->getTargetConstant(X86::sub_16bit, dl,
4672  MVT::i32)),
4673  0);
4674  break;
4675  case MVT::i32:
4676  break;
4677  case MVT::i64:
4678  ClrNode =
4679  SDValue(CurDAG->getMachineNode(
4680  TargetOpcode::SUBREG_TO_REG, dl, MVT::i64,
4681  CurDAG->getTargetConstant(0, dl, MVT::i64), ClrNode,
4682  CurDAG->getTargetConstant(X86::sub_32bit, dl,
4683  MVT::i32)),
4684  0);
4685  break;
4686  default:
4687  llvm_unreachable("Unexpected division source");
4688  }
4689 
4690  InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, ClrReg,
4691  ClrNode, InFlag).getValue(1);
4692  }
4693  }
4694 
4695  if (foldedLoad) {
4696  SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N1.getOperand(0),
4697  InFlag };
4698  MachineSDNode *CNode =
4699  CurDAG->getMachineNode(MOpc, dl, MVT::Other, MVT::Glue, Ops);
4700  InFlag = SDValue(CNode, 1);
4701  // Update the chain.
4702  ReplaceUses(N1.getValue(1), SDValue(CNode, 0));
4703  // Record the mem-refs
4704  CurDAG->setNodeMemRefs(CNode, {cast<LoadSDNode>(N1)->getMemOperand()});
4705  } else {
4706  InFlag =
4707  SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Glue, N1, InFlag), 0);
4708  }
4709 
4710  // Prevent use of AH in a REX instruction by explicitly copying it to
4711  // an ABCD_L register.
4712  //
4713  // The current assumption of the register allocator is that isel
4714  // won't generate explicit references to the GR8_ABCD_H registers. If
4715  // the allocator and/or the backend get enhanced to be more robust in
4716  // that regard, this can be, and should be, removed.
4717  if (HiReg == X86::AH && !SDValue(Node, 1).use_empty()) {
4718  SDValue AHCopy = CurDAG->getRegister(X86::AH, MVT::i8);
4719  unsigned AHExtOpcode =
4720  isSigned ? X86::MOVSX32rr8_NOREX : X86::MOVZX32rr8_NOREX;
4721 
4722  SDNode *RNode = CurDAG->getMachineNode(AHExtOpcode, dl, MVT::i32,
4723  MVT::Glue, AHCopy, InFlag);
4724  SDValue Result(RNode, 0);
4725  InFlag = SDValue(RNode, 1);
4726 
4727  Result =
4728  CurDAG->getTargetExtractSubreg(X86::sub_8bit, dl, MVT::i8, Result);
4729 
4730  ReplaceUses(SDValue(Node, 1), Result);
4731  LLVM_DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG);
4732  dbgs() << '\n');
4733  }
4734  // Copy the division (low) result, if it is needed.
4735  if (!SDValue(Node, 0).use_empty()) {
4736  SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
4737  LoReg, NVT, InFlag);
4738  InFlag = Result.getValue(2);
4739  ReplaceUses(SDValue(Node, 0), Result);
4740  LLVM_DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG);
4741  dbgs() << '\n');
4742  }
4743  // Copy the remainder (high) result, if it is needed.
4744  if (!SDValue(Node, 1).use_empty()) {
4745  SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
4746  HiReg, NVT, InFlag);
4747  InFlag = Result.getValue(2);
4748  ReplaceUses(SDValue(Node, 1), Result);
4749  LLVM_DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG);
4750  dbgs() << '\n');
4751  }
4752  CurDAG->RemoveDeadNode(Node);
4753  return;
4754  }
4755 
4756  case X86ISD::CMP: {
4757  SDValue N0 = Node->getOperand(0);
4758  SDValue N1 = Node->getOperand(1);
4759 
4760  // Optimizations for TEST compares.
4761  if (!isNullConstant(N1))
4762  break;
4763 
4764  // Save the original VT of the compare.
4765  MVT CmpVT = N0.getSimpleValueType();
4766 
4767  // If we are comparing (and (shr X, C, Mask) with 0, emit a BEXTR followed
4768  // by a test instruction. The test should be removed later by
4769  // analyzeCompare if we are using only the zero flag.
4770  // TODO: Should we check the users and use the BEXTR flags directly?
4771  if (N0.getOpcode() == ISD::AND && N0.hasOneUse()) {
4772  if (MachineSDNode *NewNode = matchBEXTRFromAndImm(N0.getNode())) {
4773  unsigned TestOpc = CmpVT == MVT::i64 ? X86::TEST64rr
4774  : X86::TEST32rr;
4775  SDValue BEXTR = SDValue(NewNode, 0);
4776  NewNode = CurDAG->getMachineNode(TestOpc, dl, MVT::i32, BEXTR, BEXTR);
4777  ReplaceUses(SDValue(Node, 0), SDValue(NewNode, 0));
4778  CurDAG->RemoveDeadNode(Node);
4779  return;
4780  }
4781  }
4782 
4783  // We can peek through truncates, but we need to be careful below.
4784  if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse())
4785  N0 = N0.getOperand(0);
4786 
4787  // Look for (X86cmp (and $op, $imm), 0) and see if we can convert it to
4788  // use a smaller encoding.
4789  // Look past the truncate if CMP is the only use of it.
4790  if (N0.getOpcode() == ISD::AND &&
4791  N0.getNode()->hasOneUse() &&
4792  N0.getValueType() != MVT::i8) {
4794  if (!C) break;
4795  uint64_t Mask = C->getZExtValue();
4796 
4797  // Check if we can replace AND+IMM64 with a shift. This is possible for
4798  // masks/ like 0xFF000000 or 0x00FFFFFF and if we care only about the zero
4799  // flag.
4800  if (CmpVT == MVT::i64 && !isInt<32>(Mask) &&
4801  onlyUsesZeroFlag(SDValue(Node, 0))) {
4802  if (isMask_64(~Mask)) {
4803  unsigned TrailingZeros = countTrailingZeros(Mask);
4804  SDValue Imm = CurDAG->getTargetConstant(TrailingZeros, dl, MVT::i64);
4805  SDValue Shift =
4806  SDValue(CurDAG->getMachineNode(X86::SHR64ri, dl, MVT::i64, MVT::i32,
4807  N0.getOperand(0), Imm), 0);
4808  MachineSDNode *Test = CurDAG->getMachineNode(X86::TEST64rr, dl,
4809  MVT::i32, Shift, Shift);
4810  ReplaceNode(Node, Test);
4811  return;
4812  }
4813  if (isMask_64(Mask)) {
4814  unsigned LeadingZeros = countLeadingZeros(Mask);
4815  SDValue Imm = CurDAG->getTargetConstant(LeadingZeros, dl, MVT::i64);
4816  SDValue Shift =
4817  SDValue(CurDAG->getMachineNode(X86::SHL64ri, dl, MVT::i64, MVT::i32,
4818  N0.getOperand(0), Imm), 0);
4819  MachineSDNode *Test = CurDAG->getMachineNode(X86::TEST64rr, dl,
4820  MVT::i32, Shift, Shift);
4821  ReplaceNode(Node, Test);
4822  return;
4823  }
4824  }
4825 
4826  MVT VT;
4827  int SubRegOp;
4828  unsigned ROpc, MOpc;
4829 
4830  // For each of these checks we need to be careful if the sign flag is
4831  // being used. It is only safe to use the sign flag in two conditions,
4832  // either the sign bit in the shrunken mask is zero or the final test
4833  // size is equal to the original compare size.
4834 
4835  if (isUInt<8>(Mask) &&
4836  (!(Mask & 0x80) || CmpVT == MVT::i8 ||
4837  hasNoSignFlagUses(SDValue(Node, 0)))) {
4838  // For example, convert "testl %eax, $8" to "testb %al, $8"
4839  VT = MVT::i8;
4840  SubRegOp = X86::sub_8bit;
4841  ROpc = X86::TEST8ri;
4842  MOpc = X86::TEST8mi;
4843  } else if (OptForMinSize && isUInt<16>(Mask) &&
4844  (!(Mask & 0x8000) || CmpVT == MVT::i16 ||
4845  hasNoSignFlagUses(SDValue(Node, 0)))) {
4846  // For example, "testl %eax, $32776" to "testw %ax, $32776".
4847  // NOTE: We only want to form TESTW instructions if optimizing for
4848  // min size. Otherwise we only save one byte and possibly get a length
4849  // changing prefix penalty in the decoders.
4850  VT = MVT::i16;
4851  SubRegOp = X86::sub_16bit;
4852  ROpc = X86::TEST16ri;
4853  MOpc = X86::TEST16mi;
4854  } else if (isUInt<32>(Mask) && N0.getValueType() != MVT::i16 &&
4855  ((!(Mask & 0x80000000) &&
4856  // Without minsize 16-bit Cmps can get here so we need to
4857  // be sure we calculate the correct sign flag if needed.
4858  (CmpVT != MVT::i16 || !(Mask & 0x8000))) ||
4859  CmpVT == MVT::i32 ||
4860  hasNoSignFlagUses(SDValue(Node, 0)))) {
4861  // For example, "testq %rax, $268468232" to "testl %eax, $268468232".
4862  // NOTE: We only want to run that transform if N0 is 32 or 64 bits.
4863  // Otherwize, we find ourselves in a position where we have to do
4864  // promotion. If previous passes did not promote the and, we assume
4865  // they had a good reason not to and do not promote here.
4866  VT = MVT::i32;
4867  SubRegOp = X86::sub_32bit;
4868  ROpc = X86::TEST32ri;
4869  MOpc = X86::TEST32mi;
4870  } else {
4871  // No eligible transformation was found.
4872  break;
4873  }
4874 
4875  SDValue Imm = CurDAG->getTargetConstant(Mask, dl, VT);
4876  SDValue Reg = N0.getOperand(0);
4877 
4878  // Emit a testl or testw.
4879  MachineSDNode *NewNode;
4880  SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
4881  if (tryFoldLoad(Node, N0.getNode(), Reg, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) {
4882  SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Imm,
4883  Reg.getOperand(0) };
4884  NewNode = CurDAG->getMachineNode(MOpc, dl, MVT::i32, MVT::Other, Ops);
4885  // Update the chain.
4886  ReplaceUses(Reg.getValue(1), SDValue(NewNode, 1));
4887  // Record the mem-refs
4888  CurDAG->setNodeMemRefs(NewNode,
4889  {cast<LoadSDNode>(Reg)->getMemOperand()});
4890  } else {
4891  // Extract the subregister if necessary.
4892  if (N0.getValueType() != VT)
4893  Reg = CurDAG->getTargetExtractSubreg(SubRegOp, dl, VT, Reg);
4894 
4895  NewNode = CurDAG->getMachineNode(ROpc, dl, MVT::i32, Reg, Imm);
4896  }
4897  // Replace CMP with TEST.
4898  ReplaceNode(Node, NewNode);
4899  return;
4900  }
4901  break;
4902  }
4903  case X86ISD::PCMPISTR: {
4904  if (!Subtarget->hasSSE42())
4905  break;
4906 
4907  bool NeedIndex = !SDValue(Node, 0).use_empty();
4908  bool NeedMask = !SDValue(Node, 1).use_empty();
4909  // We can't fold a load if we are going to make two instructions.
4910  bool MayFoldLoad = !NeedIndex || !NeedMask;
4911 
4912  MachineSDNode *CNode;
4913  if (NeedMask) {
4914  unsigned ROpc = Subtarget->hasAVX() ? X86::VPCMPISTRMrr : X86::PCMPISTRMrr;
4915  unsigned MOpc = Subtarget->hasAVX() ? X86::VPCMPISTRMrm : X86::PCMPISTRMrm;
4916  CNode = emitPCMPISTR(ROpc, MOpc, MayFoldLoad, dl, MVT::v16i8, Node);
4917  ReplaceUses(SDValue(Node, 1), SDValue(CNode, 0));
4918  }
4919  if (NeedIndex || !NeedMask) {
4920  unsigned ROpc = Subtarget->hasAVX() ? X86::VPCMPISTRIrr : X86::PCMPISTRIrr;
4921  unsigned MOpc = Subtarget->hasAVX() ? X86::VPCMPISTRIrm : X86::PCMPISTRIrm;
4922  CNode = emitPCMPISTR(ROpc, MOpc, MayFoldLoad, dl, MVT::i32, Node);
4923  ReplaceUses(SDValue(Node, 0), SDValue(CNode, 0));
4924  }
4925 
4926  // Connect the flag usage to the last instruction created.
4927  ReplaceUses(SDValue(Node, 2), SDValue(CNode, 1));
4928  CurDAG->RemoveDeadNode(Node);
4929  return;
4930  }
4931  case X86ISD::PCMPESTR: {
4932  if (!Subtarget->hasSSE42())
4933  break;
4934 
4935  // Copy the two implicit register inputs.
4936  SDValue InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, X86::EAX,
4937  Node->getOperand(1),
4938  SDValue()).getValue(1);
4939  InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, X86::EDX,
4940  Node->getOperand(3), InFlag).getValue(1);
4941 
4942  bool NeedIndex = !SDValue(Node, 0).use_empty();
4943  bool NeedMask = !SDValue(Node, 1).use_empty();
4944  // We can't fold a load if we are going to make two instructions.
4945  bool MayFoldLoad = !NeedIndex || !NeedMask;
4946 
4947  MachineSDNode *CNode;
4948  if (NeedMask) {
4949  unsigned ROpc = Subtarget->hasAVX() ? X86::VPCMPESTRMrr : X86::PCMPESTRMrr;
4950  unsigned MOpc = Subtarget->hasAVX() ? X86::VPCMPESTRMrm : X86::PCMPESTRMrm;
4951  CNode = emitPCMPESTR(ROpc, MOpc, MayFoldLoad, dl, MVT::v16i8, Node,
4952  InFlag);
4953  ReplaceUses(SDValue(Node, 1), SDValue(CNode, 0));
4954  }
4955  if (NeedIndex || !NeedMask) {
4956  unsigned ROpc = Subtarget->hasAVX() ? X86::VPCMPESTRIrr : X86::PCMPESTRIrr;
4957  unsigned MOpc = Subtarget->hasAVX() ? X86::VPCMPESTRIrm : X86::PCMPESTRIrm;
4958  CNode = emitPCMPESTR(ROpc, MOpc, MayFoldLoad, dl, MVT::i32, Node, InFlag);
4959  ReplaceUses(SDValue(Node, 0), SDValue(CNode, 0));
4960  }
4961  // Connect the flag usage to the last instruction created.
4962  ReplaceUses(SDValue(Node, 2), SDValue(CNode, 1));
4963  CurDAG->RemoveDeadNode(Node);
4964  return;
4965  }
4966 
4967  case ISD::SETCC: {
4968  if (NVT.isVector() && tryVPTESTM(Node, SDValue(Node, 0), SDValue()))
4969  return;
4970 
4971  break;
4972  }
4973 
4974  case ISD::STORE:
4975  if (foldLoadStoreIntoMemOperand(Node))
4976  return;
4977  break;
4978  case ISD::FCEIL:
4979  case ISD::FFLOOR:
4980  case ISD::FTRUNC:
4981  case ISD::FNEARBYINT:
4982  case ISD::FRINT: {
4983  // Replace fp rounding with their X86 specific equivalent so we don't
4984  // need 2 sets of patterns.
4985  // FIXME: This can only happen when the nodes started as STRICT_* and have
4986  // been mutated into their non-STRICT equivalents. Eventually this
4987  // mutation will be removed and we should switch the STRICT_ nodes to a
4988  // strict version of RNDSCALE in PreProcessISelDAG.
4989  unsigned Imm;
4990  switch (Node->getOpcode()) {
4991  default: llvm_unreachable("Unexpected opcode!");
4992  case ISD::FCEIL: Imm = 0xA; break;
4993  case ISD::FFLOOR: Imm = 0x9; break;
4994  case ISD::FTRUNC: Imm = 0xB; break;
4995  case ISD::FNEARBYINT: Imm = 0xC; break;
4996  case ISD::FRINT: Imm = 0x4; break;
4997  }
4998  SDLoc dl(Node);
4999  SDValue Res = CurDAG->getNode(X86ISD::VRNDSCALE, dl,
5000  Node->getValueType(0),
5001  Node->getOperand(0),
5002  CurDAG->getConstant(Imm, dl, MVT::i8));
5003  ReplaceNode(Node, Res.getNode());
5004  SelectCode(Res.getNode());
5005  return;
5006  }
5007  }
5008 
5009  SelectCode(Node);
5010 }
5011 
5012 bool X86DAGToDAGISel::
5013 SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
5014  std::vector<SDValue> &OutOps) {
5015  SDValue Op0, Op1, Op2, Op3, Op4;
5016  switch (ConstraintID) {
5017  default:
5018  llvm_unreachable("Unexpected asm memory constraint");
5020  // FIXME: It seems strange that 'i' is needed here since it's supposed to
5021  // be an immediate and not a memory constraint.
5023  case InlineAsm::Constraint_o: // offsetable ??
5024  case InlineAsm::Constraint_v: // not offsetable ??
5025  case InlineAsm::Constraint_m: // memory
5027  if (!selectAddr(nullptr, Op, Op0, Op1, Op2, Op3, Op4))
5028  return true;
5029  break;
5030  }
5031 
5032  OutOps.push_back(Op0);
5033  OutOps.push_back(Op1);
5034  OutOps.push_back(Op2);
5035  OutOps.push_back(Op3);
5036  OutOps.push_back(Op4);
5037  return false;
5038 }
5039 
5040 /// This pass converts a legalized DAG into a X86-specific DAG,
5041 /// ready for instruction scheduling.
5043  CodeGenOpt::Level OptLevel) {
5044  return new X86DAGToDAGISel(TM, OptLevel);
5045 }
static bool foldMaskedShiftToScaledMask(SelectionDAG &DAG, SDValue N, X86ISelAddressMode &AM)
uint64_t CallInst * C
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:595
constexpr bool isUInt< 32 >(uint64_t x)
Definition: MathExtras.h:348
X = FP_ROUND(Y, TRUNC) - Rounding &#39;Y&#39; from a larger floating point type down to the precision of the ...
Definition: ISDOpcodes.h:562
static bool isCalleeLoad(SDValue Callee, SDValue &Chain, bool HasCallSeq)
Return true if call address is a load and it can be moved below CALLSEQ_START and the chains leading ...
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
static bool isFusableLoadOpStorePattern(StoreSDNode *StoreNode, SDValue StoredVal, SelectionDAG *CurDAG, unsigned LoadOpNo, LoadSDNode *&LoadNode, SDValue &InputChain)
Check whether or not the chain ending in StoreNode is suitable for doing the {load; op; store} to mod...
EVT getValueType() const
Return the ValueType of the referenced return value.
Vector comparison generating mask bits for fp and integer signed and unsigned data types...
C - The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
const SDValue & getOffset() const
bool isUndef() const
APInt getSignedMax() const
Return the largest signed value contained in the ConstantRange.
GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2)
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
This class represents lattice values for constants.
Definition: AllocatorList.h:23
static void insertDAGNode(SelectionDAG &DAG, SDValue Pos, SDValue N)
static bool isLegalMaskCompare(SDNode *N, const X86Subtarget *Subtarget)
static MVT getVectorVT(MVT VT, unsigned NumElements)
Tail call return.
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
Definition: MCSymbol.h:41
bool hasOptSize() const
Optimize this function for size (-Os) or minimum size (-Oz).
Definition: Function.h:622
ZERO_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register zero-extension of the low ...
Definition: ISDOpcodes.h:543
bool isVector() const
Return true if this is a vector value type.
const SDValue & getBasePtr() const
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
APInt zext(unsigned width) const
Zero extend to a new width.
Definition: APInt.cpp:860
X86 conditional moves.
bool is256BitVector() const
Return true if this is a 256-bit vector type.
bool slt(const APInt &RHS) const
Signed less than comparison.
Definition: APInt.h:1203
unsigned Reg
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Get a value with low bits set.
Definition: APInt.h:647
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:252
bool hasFastBEXTR() const
Definition: X86Subtarget.h:660
unsigned getVectorNumElements() const
const SDValue & getChain() const
constexpr bool isInt< 8 >(int64_t x)
Definition: MathExtras.h:302
ISD::MemIndexedMode getAddressingMode() const
Return the addressing mode for this load or store: unindexed, pre-inc, pre-dec, post-inc, or post-dec.
TargetGlobalAddress - Like GlobalAddress, but the DAG does no folding or anything else with this node...
Definition: ISDOpcodes.h:130
unsigned getAlignment() const
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.h:323
APInt trunc(unsigned width) const
Truncate to new width.
Definition: APInt.cpp:813
STATISTIC(NumFunctions, "Total number of functions")
MVT getSimpleValueType(unsigned ResNo) const
Return the type of a specified result as a simple type.
F(f)
Bitwise logical ANDNOT of floating point values.
void setNodeId(int Id)
Set unique node id.
SDNode * getNode() const
get the SDNode which holds the desired result
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1 at the ...
Definition: ISDOpcodes.h:377
unsigned getValueSizeInBits() const
Returns the size of the value in bits.
static bool MayFoldLoad(SDValue Op)
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1508
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition: ISDOpcodes.h:158
GlobalBaseReg - On Darwin, this node represents the result of the mflr at function entry...
static bool hasPredecessorHelper(const SDNode *N, SmallPtrSetImpl< const SDNode *> &Visited, SmallVectorImpl< const SDNode *> &Worklist, unsigned int MaxSteps=0, bool TopologicalPrune=false)
Returns true if N is a predecessor of any node in Worklist.
unsigned getAddrSpace() const
Return the LLVM IR address space number that this pointer points into.
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition: ISDOpcodes.h:209
X86 compare and logical compare instructions.
unsigned countTrailingZeros(T Val, ZeroBehavior ZB=ZB_Width)
Count number of 0&#39;s from the least significant bit to the most stopping at the first 1...
Definition: MathExtras.h:119
void dump() const
Support for debugging, callable in GDB: V->dump()
Definition: AsmWriter.cpp:4424
The address of a basic block.
Definition: Constants.h:839
static cl::opt< bool > AndImmShrink("x86-and-imm-shrink", cl::init(true), cl::desc("Enable setting constant bits to reduce size of mask immediates"), cl::Hidden)
bool hasOneUse() const
Return true if there is exactly one use of this node.
A description of a memory reference used in the backend.
Dynamic (non-constant condition) vector blend where only the sign bits of the condition elements are ...
Bitwise Logical AND NOT of Packed FP values.
Shift and rotation operations.
Definition: ISDOpcodes.h:434
bool isNormalStore(const SDNode *N)
Returns true if the specified node is a non-truncating and unindexed store.
bool isBuildVectorAllZeros(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are 0 or undef...
static bool signBitIsZero(const MachineOperand &Op, const MachineRegisterInfo &MRI)
CallLoweringInfo & setChain(SDValue InChain)
CopyToReg - This node has three operands: a chain, a register number to set to this value...
Definition: ISDOpcodes.h:169
SymbolFlags
Symbol flags.
Definition: Symbol.h:24
unsigned SubReg
op_iterator op_end() const
unsigned getScalarValueSizeInBits() const
uint64_t getConstantOperandVal(unsigned i) const
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
SimpleValueType SimpleTy
unsigned getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition: ValueTypes.h:303
CALLSEQ_START/CALLSEQ_END - These operators mark the beginning and end of a call sequence, and carry arbitrary information that target might want to know.
Definition: ISDOpcodes.h:744
Position
Position to insert a new instruction relative to an existing instruction.
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
This represents a list of ValueType&#39;s that has been intern&#39;d by a SelectionDAG.
bool hasVLX() const
Definition: X86Subtarget.h:681
unsigned getSizeInBits() const
bool runOnMachineFunction(MachineFunction &MF) override
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
bool hasExternalLinkage() const
Definition: GlobalValue.h:431
unsigned getActiveBits() const
Compute the number of active bits in the value.
Definition: APInt.h:1532
int64_t getSExtValue() const
static void InvalidateNodeId(SDNode *N)
unsigned getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:291
X86 FP SETCC, similar to above, but with output as an i1 mask and and a version with SAE...
constexpr bool isMask_64(uint64_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
Definition: MathExtras.h:410
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition: ISDOpcodes.h:453
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:200
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out...
Definition: ISDOpcodes.h:995
op_iterator op_begin() const
X = STRICT_FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:323
ANY_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register any-extension of the low la...
Definition: ISDOpcodes.h:521
ArrayRef< SDUse > ops() const
MVT getVectorElementType() const
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
bool use_empty() const
Return true if there are no nodes using value ResNo of Node.
This class is used to represent ISD::STORE nodes.