LLVM  10.0.0svn
X86ISelDAGToDAG.cpp
Go to the documentation of this file.
1 //===- X86ISelDAGToDAG.cpp - A DAG pattern matching inst selector for X86 -===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines a DAG pattern matching instruction selector for X86,
10 // converting from a legalized dag to a X86 dag.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "X86.h"
15 #include "X86MachineFunctionInfo.h"
16 #include "X86RegisterInfo.h"
17 #include "X86Subtarget.h"
18 #include "X86TargetMachine.h"
19 #include "llvm/ADT/Statistic.h"
23 #include "llvm/Config/llvm-config.h"
24 #include "llvm/IR/ConstantRange.h"
25 #include "llvm/IR/Function.h"
26 #include "llvm/IR/Instructions.h"
27 #include "llvm/IR/Intrinsics.h"
28 #include "llvm/IR/Type.h"
29 #include "llvm/Support/Debug.h"
31 #include "llvm/Support/KnownBits.h"
36 #include <stdint.h>
37 using namespace llvm;
38 
39 #define DEBUG_TYPE "x86-isel"
40 
41 STATISTIC(NumLoadMoved, "Number of loads moved below TokenFactor");
42 
43 static cl::opt<bool> AndImmShrink("x86-and-imm-shrink", cl::init(true),
44  cl::desc("Enable setting constant bits to reduce size of mask immediates"),
45  cl::Hidden);
46 
47 //===----------------------------------------------------------------------===//
48 // Pattern Matcher Implementation
49 //===----------------------------------------------------------------------===//
50 
51 namespace {
52  /// This corresponds to X86AddressMode, but uses SDValue's instead of register
53  /// numbers for the leaves of the matched tree.
54  struct X86ISelAddressMode {
55  enum {
56  RegBase,
57  FrameIndexBase
58  } BaseType;
59 
60  // This is really a union, discriminated by BaseType!
61  SDValue Base_Reg;
62  int Base_FrameIndex;
63 
64  unsigned Scale;
65  SDValue IndexReg;
66  int32_t Disp;
67  SDValue Segment;
68  const GlobalValue *GV;
69  const Constant *CP;
70  const BlockAddress *BlockAddr;
71  const char *ES;
72  MCSymbol *MCSym;
73  int JT;
74  unsigned Align; // CP alignment.
75  unsigned char SymbolFlags; // X86II::MO_*
76  bool NegateIndex = false;
77 
78  X86ISelAddressMode()
79  : BaseType(RegBase), Base_FrameIndex(0), Scale(1), IndexReg(), Disp(0),
80  Segment(), GV(nullptr), CP(nullptr), BlockAddr(nullptr), ES(nullptr),
81  MCSym(nullptr), JT(-1), Align(0), SymbolFlags(X86II::MO_NO_FLAG) {}
82 
83  bool hasSymbolicDisplacement() const {
84  return GV != nullptr || CP != nullptr || ES != nullptr ||
85  MCSym != nullptr || JT != -1 || BlockAddr != nullptr;
86  }
87 
88  bool hasBaseOrIndexReg() const {
89  return BaseType == FrameIndexBase ||
90  IndexReg.getNode() != nullptr || Base_Reg.getNode() != nullptr;
91  }
92 
93  /// Return true if this addressing mode is already RIP-relative.
94  bool isRIPRelative() const {
95  if (BaseType != RegBase) return false;
96  if (RegisterSDNode *RegNode =
97  dyn_cast_or_null<RegisterSDNode>(Base_Reg.getNode()))
98  return RegNode->getReg() == X86::RIP;
99  return false;
100  }
101 
102  void setBaseReg(SDValue Reg) {
103  BaseType = RegBase;
104  Base_Reg = Reg;
105  }
106 
107 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
108  void dump(SelectionDAG *DAG = nullptr) {
109  dbgs() << "X86ISelAddressMode " << this << '\n';
110  dbgs() << "Base_Reg ";
111  if (Base_Reg.getNode())
112  Base_Reg.getNode()->dump(DAG);
113  else
114  dbgs() << "nul\n";
115  if (BaseType == FrameIndexBase)
116  dbgs() << " Base.FrameIndex " << Base_FrameIndex << '\n';
117  dbgs() << " Scale " << Scale << '\n'
118  << "IndexReg ";
119  if (NegateIndex)
120  dbgs() << "negate ";
121  if (IndexReg.getNode())
122  IndexReg.getNode()->dump(DAG);
123  else
124  dbgs() << "nul\n";
125  dbgs() << " Disp " << Disp << '\n'
126  << "GV ";
127  if (GV)
128  GV->dump();
129  else
130  dbgs() << "nul";
131  dbgs() << " CP ";
132  if (CP)
133  CP->dump();
134  else
135  dbgs() << "nul";
136  dbgs() << '\n'
137  << "ES ";
138  if (ES)
139  dbgs() << ES;
140  else
141  dbgs() << "nul";
142  dbgs() << " MCSym ";
143  if (MCSym)
144  dbgs() << MCSym;
145  else
146  dbgs() << "nul";
147  dbgs() << " JT" << JT << " Align" << Align << '\n';
148  }
149 #endif
150  };
151 }
152 
153 namespace {
154  //===--------------------------------------------------------------------===//
155  /// ISel - X86-specific code to select X86 machine instructions for
156  /// SelectionDAG operations.
157  ///
158  class X86DAGToDAGISel final : public SelectionDAGISel {
159  /// Keep a pointer to the X86Subtarget around so that we can
160  /// make the right decision when generating code for different targets.
161  const X86Subtarget *Subtarget;
162 
163  /// If true, selector should try to optimize for code size instead of
164  /// performance.
165  bool OptForSize;
166 
167  /// If true, selector should try to optimize for minimum code size.
168  bool OptForMinSize;
169 
170  /// Disable direct TLS access through segment registers.
171  bool IndirectTlsSegRefs;
172 
173  public:
174  explicit X86DAGToDAGISel(X86TargetMachine &tm, CodeGenOpt::Level OptLevel)
175  : SelectionDAGISel(tm, OptLevel), Subtarget(nullptr), OptForSize(false),
176  OptForMinSize(false), IndirectTlsSegRefs(false) {}
177 
178  StringRef getPassName() const override {
179  return "X86 DAG->DAG Instruction Selection";
180  }
181 
182  bool runOnMachineFunction(MachineFunction &MF) override {
183  // Reset the subtarget each time through.
184  Subtarget = &MF.getSubtarget<X86Subtarget>();
185  IndirectTlsSegRefs = MF.getFunction().hasFnAttribute(
186  "indirect-tls-seg-refs");
187 
188  // OptFor[Min]Size are used in pattern predicates that isel is matching.
189  OptForSize = MF.getFunction().hasOptSize();
190  OptForMinSize = MF.getFunction().hasMinSize();
191  assert((!OptForMinSize || OptForSize) &&
192  "OptForMinSize implies OptForSize");
193 
195  return true;
196  }
197 
198  void EmitFunctionEntryCode() override;
199 
200  bool IsProfitableToFold(SDValue N, SDNode *U, SDNode *Root) const override;
201 
202  void PreprocessISelDAG() override;
203  void PostprocessISelDAG() override;
204 
205 // Include the pieces autogenerated from the target description.
206 #include "X86GenDAGISel.inc"
207 
208  private:
209  void Select(SDNode *N) override;
210 
211  bool foldOffsetIntoAddress(uint64_t Offset, X86ISelAddressMode &AM);
212  bool matchLoadInAddress(LoadSDNode *N, X86ISelAddressMode &AM);
213  bool matchWrapper(SDValue N, X86ISelAddressMode &AM);
214  bool matchAddress(SDValue N, X86ISelAddressMode &AM);
215  bool matchVectorAddress(SDValue N, X86ISelAddressMode &AM);
216  bool matchAdd(SDValue &N, X86ISelAddressMode &AM, unsigned Depth);
217  bool matchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
218  unsigned Depth);
219  bool matchAddressBase(SDValue N, X86ISelAddressMode &AM);
220  bool selectAddr(SDNode *Parent, SDValue N, SDValue &Base,
221  SDValue &Scale, SDValue &Index, SDValue &Disp,
222  SDValue &Segment);
223  bool selectVectorAddr(SDNode *Parent, SDValue N, SDValue &Base,
224  SDValue &Scale, SDValue &Index, SDValue &Disp,
225  SDValue &Segment);
226  bool selectMOV64Imm32(SDValue N, SDValue &Imm);
227  bool selectLEAAddr(SDValue N, SDValue &Base,
228  SDValue &Scale, SDValue &Index, SDValue &Disp,
229  SDValue &Segment);
230  bool selectLEA64_32Addr(SDValue N, SDValue &Base,
231  SDValue &Scale, SDValue &Index, SDValue &Disp,
232  SDValue &Segment);
233  bool selectTLSADDRAddr(SDValue N, SDValue &Base,
234  SDValue &Scale, SDValue &Index, SDValue &Disp,
235  SDValue &Segment);
236  bool selectScalarSSELoad(SDNode *Root, SDNode *Parent, SDValue N,
237  SDValue &Base, SDValue &Scale,
238  SDValue &Index, SDValue &Disp,
239  SDValue &Segment,
240  SDValue &NodeWithChain);
241  bool selectRelocImm(SDValue N, SDValue &Op);
242 
243  bool tryFoldLoad(SDNode *Root, SDNode *P, SDValue N,
244  SDValue &Base, SDValue &Scale,
245  SDValue &Index, SDValue &Disp,
246  SDValue &Segment);
247 
248  // Convenience method where P is also root.
249  bool tryFoldLoad(SDNode *P, SDValue N,
250  SDValue &Base, SDValue &Scale,
251  SDValue &Index, SDValue &Disp,
252  SDValue &Segment) {
253  return tryFoldLoad(P, P, N, Base, Scale, Index, Disp, Segment);
254  }
255 
256  /// Implement addressing mode selection for inline asm expressions.
257  bool SelectInlineAsmMemoryOperand(const SDValue &Op,
258  unsigned ConstraintID,
259  std::vector<SDValue> &OutOps) override;
260 
261  void emitSpecialCodeForMain();
262 
263  inline void getAddressOperands(X86ISelAddressMode &AM, const SDLoc &DL,
264  MVT VT, SDValue &Base, SDValue &Scale,
265  SDValue &Index, SDValue &Disp,
266  SDValue &Segment) {
267  if (AM.BaseType == X86ISelAddressMode::FrameIndexBase)
268  Base = CurDAG->getTargetFrameIndex(
269  AM.Base_FrameIndex, TLI->getPointerTy(CurDAG->getDataLayout()));
270  else if (AM.Base_Reg.getNode())
271  Base = AM.Base_Reg;
272  else
273  Base = CurDAG->getRegister(0, VT);
274 
275  Scale = getI8Imm(AM.Scale, DL);
276 
277  // Negate the index if needed.
278  if (AM.NegateIndex) {
279  unsigned NegOpc = VT == MVT::i64 ? X86::NEG64r : X86::NEG32r;
280  SDValue Neg = SDValue(CurDAG->getMachineNode(NegOpc, DL, VT, MVT::i32,
281  AM.IndexReg), 0);
282  AM.IndexReg = Neg;
283  }
284 
285  if (AM.IndexReg.getNode())
286  Index = AM.IndexReg;
287  else
288  Index = CurDAG->getRegister(0, VT);
289 
290  // These are 32-bit even in 64-bit mode since RIP-relative offset
291  // is 32-bit.
292  if (AM.GV)
293  Disp = CurDAG->getTargetGlobalAddress(AM.GV, SDLoc(),
294  MVT::i32, AM.Disp,
295  AM.SymbolFlags);
296  else if (AM.CP)
297  Disp = CurDAG->getTargetConstantPool(AM.CP, MVT::i32,
298  AM.Align, AM.Disp, AM.SymbolFlags);
299  else if (AM.ES) {
300  assert(!AM.Disp && "Non-zero displacement is ignored with ES.");
301  Disp = CurDAG->getTargetExternalSymbol(AM.ES, MVT::i32, AM.SymbolFlags);
302  } else if (AM.MCSym) {
303  assert(!AM.Disp && "Non-zero displacement is ignored with MCSym.");
304  assert(AM.SymbolFlags == 0 && "oo");
305  Disp = CurDAG->getMCSymbol(AM.MCSym, MVT::i32);
306  } else if (AM.JT != -1) {
307  assert(!AM.Disp && "Non-zero displacement is ignored with JT.");
308  Disp = CurDAG->getTargetJumpTable(AM.JT, MVT::i32, AM.SymbolFlags);
309  } else if (AM.BlockAddr)
310  Disp = CurDAG->getTargetBlockAddress(AM.BlockAddr, MVT::i32, AM.Disp,
311  AM.SymbolFlags);
312  else
313  Disp = CurDAG->getTargetConstant(AM.Disp, DL, MVT::i32);
314 
315  if (AM.Segment.getNode())
316  Segment = AM.Segment;
317  else
318  Segment = CurDAG->getRegister(0, MVT::i16);
319  }
320 
321  // Utility function to determine whether we should avoid selecting
322  // immediate forms of instructions for better code size or not.
323  // At a high level, we'd like to avoid such instructions when
324  // we have similar constants used within the same basic block
325  // that can be kept in a register.
326  //
327  bool shouldAvoidImmediateInstFormsForSize(SDNode *N) const {
328  uint32_t UseCount = 0;
329 
330  // Do not want to hoist if we're not optimizing for size.
331  // TODO: We'd like to remove this restriction.
332  // See the comment in X86InstrInfo.td for more info.
333  if (!OptForSize)
334  return false;
335 
336  // Walk all the users of the immediate.
337  for (SDNode::use_iterator UI = N->use_begin(),
338  UE = N->use_end(); (UI != UE) && (UseCount < 2); ++UI) {
339 
340  SDNode *User = *UI;
341 
342  // This user is already selected. Count it as a legitimate use and
343  // move on.
344  if (User->isMachineOpcode()) {
345  UseCount++;
346  continue;
347  }
348 
349  // We want to count stores of immediates as real uses.
350  if (User->getOpcode() == ISD::STORE &&
351  User->getOperand(1).getNode() == N) {
352  UseCount++;
353  continue;
354  }
355 
356  // We don't currently match users that have > 2 operands (except
357  // for stores, which are handled above)
358  // Those instruction won't match in ISEL, for now, and would
359  // be counted incorrectly.
360  // This may change in the future as we add additional instruction
361  // types.
362  if (User->getNumOperands() != 2)
363  continue;
364 
365  // Immediates that are used for offsets as part of stack
366  // manipulation should be left alone. These are typically
367  // used to indicate SP offsets for argument passing and
368  // will get pulled into stores/pushes (implicitly).
369  if (User->getOpcode() == X86ISD::ADD ||
370  User->getOpcode() == ISD::ADD ||
371  User->getOpcode() == X86ISD::SUB ||
372  User->getOpcode() == ISD::SUB) {
373 
374  // Find the other operand of the add/sub.
375  SDValue OtherOp = User->getOperand(0);
376  if (OtherOp.getNode() == N)
377  OtherOp = User->getOperand(1);
378 
379  // Don't count if the other operand is SP.
380  RegisterSDNode *RegNode;
381  if (OtherOp->getOpcode() == ISD::CopyFromReg &&
382  (RegNode = dyn_cast_or_null<RegisterSDNode>(
383  OtherOp->getOperand(1).getNode())))
384  if ((RegNode->getReg() == X86::ESP) ||
385  (RegNode->getReg() == X86::RSP))
386  continue;
387  }
388 
389  // ... otherwise, count this and move on.
390  UseCount++;
391  }
392 
393  // If we have more than 1 use, then recommend for hoisting.
394  return (UseCount > 1);
395  }
396 
397  /// Return a target constant with the specified value of type i8.
398  inline SDValue getI8Imm(unsigned Imm, const SDLoc &DL) {
399  return CurDAG->getTargetConstant(Imm, DL, MVT::i8);
400  }
401 
402  /// Return a target constant with the specified value, of type i32.
403  inline SDValue getI32Imm(unsigned Imm, const SDLoc &DL) {
404  return CurDAG->getTargetConstant(Imm, DL, MVT::i32);
405  }
406 
407  /// Return a target constant with the specified value, of type i64.
408  inline SDValue getI64Imm(uint64_t Imm, const SDLoc &DL) {
409  return CurDAG->getTargetConstant(Imm, DL, MVT::i64);
410  }
411 
412  SDValue getExtractVEXTRACTImmediate(SDNode *N, unsigned VecWidth,
413  const SDLoc &DL) {
414  assert((VecWidth == 128 || VecWidth == 256) && "Unexpected vector width");
415  uint64_t Index = N->getConstantOperandVal(1);
416  MVT VecVT = N->getOperand(0).getSimpleValueType();
417  return getI8Imm((Index * VecVT.getScalarSizeInBits()) / VecWidth, DL);
418  }
419 
420  SDValue getInsertVINSERTImmediate(SDNode *N, unsigned VecWidth,
421  const SDLoc &DL) {
422  assert((VecWidth == 128 || VecWidth == 256) && "Unexpected vector width");
423  uint64_t Index = N->getConstantOperandVal(2);
424  MVT VecVT = N->getSimpleValueType(0);
425  return getI8Imm((Index * VecVT.getScalarSizeInBits()) / VecWidth, DL);
426  }
427 
428  // Helper to detect unneeded and instructions on shift amounts. Called
429  // from PatFrags in tablegen.
430  bool isUnneededShiftMask(SDNode *N, unsigned Width) const {
431  assert(N->getOpcode() == ISD::AND && "Unexpected opcode");
432  const APInt &Val = cast<ConstantSDNode>(N->getOperand(1))->getAPIntValue();
433 
434  if (Val.countTrailingOnes() >= Width)
435  return true;
436 
437  APInt Mask = Val | CurDAG->computeKnownBits(N->getOperand(0)).Zero;
438  return Mask.countTrailingOnes() >= Width;
439  }
440 
441  /// Return an SDNode that returns the value of the global base register.
442  /// Output instructions required to initialize the global base register,
443  /// if necessary.
444  SDNode *getGlobalBaseReg();
445 
446  /// Return a reference to the TargetMachine, casted to the target-specific
447  /// type.
448  const X86TargetMachine &getTargetMachine() const {
449  return static_cast<const X86TargetMachine &>(TM);
450  }
451 
452  /// Return a reference to the TargetInstrInfo, casted to the target-specific
453  /// type.
454  const X86InstrInfo *getInstrInfo() const {
455  return Subtarget->getInstrInfo();
456  }
457 
458  /// Address-mode matching performs shift-of-and to and-of-shift
459  /// reassociation in order to expose more scaled addressing
460  /// opportunities.
461  bool ComplexPatternFuncMutatesDAG() const override {
462  return true;
463  }
464 
465  bool isSExtAbsoluteSymbolRef(unsigned Width, SDNode *N) const;
466 
467  /// Returns whether this is a relocatable immediate in the range
468  /// [-2^Width .. 2^Width-1].
469  template <unsigned Width> bool isSExtRelocImm(SDNode *N) const {
470  if (auto *CN = dyn_cast<ConstantSDNode>(N))
471  return isInt<Width>(CN->getSExtValue());
472  return isSExtAbsoluteSymbolRef(Width, N);
473  }
474 
475  // Indicates we should prefer to use a non-temporal load for this load.
476  bool useNonTemporalLoad(LoadSDNode *N) const {
477  if (!N->isNonTemporal())
478  return false;
479 
480  unsigned StoreSize = N->getMemoryVT().getStoreSize();
481 
482  if (N->getAlignment() < StoreSize)
483  return false;
484 
485  switch (StoreSize) {
486  default: llvm_unreachable("Unsupported store size");
487  case 4:
488  case 8:
489  return false;
490  case 16:
491  return Subtarget->hasSSE41();
492  case 32:
493  return Subtarget->hasAVX2();
494  case 64:
495  return Subtarget->hasAVX512();
496  }
497  }
498 
499  bool foldLoadStoreIntoMemOperand(SDNode *Node);
500  MachineSDNode *matchBEXTRFromAndImm(SDNode *Node);
501  bool matchBitExtract(SDNode *Node);
502  bool shrinkAndImmediate(SDNode *N);
503  bool isMaskZeroExtended(SDNode *N) const;
504  bool tryShiftAmountMod(SDNode *N);
505  bool tryShrinkShlLogicImm(SDNode *N);
506  bool tryVPTESTM(SDNode *Root, SDValue Setcc, SDValue Mask);
507 
508  MachineSDNode *emitPCMPISTR(unsigned ROpc, unsigned MOpc, bool MayFoldLoad,
509  const SDLoc &dl, MVT VT, SDNode *Node);
510  MachineSDNode *emitPCMPESTR(unsigned ROpc, unsigned MOpc, bool MayFoldLoad,
511  const SDLoc &dl, MVT VT, SDNode *Node,
512  SDValue &InFlag);
513 
514  bool tryOptimizeRem8Extend(SDNode *N);
515 
516  bool onlyUsesZeroFlag(SDValue Flags) const;
517  bool hasNoSignFlagUses(SDValue Flags) const;
518  bool hasNoCarryFlagUses(SDValue Flags) const;
519  };
520 }
521 
522 
523 // Returns true if this masked compare can be implemented legally with this
524 // type.
525 static bool isLegalMaskCompare(SDNode *N, const X86Subtarget *Subtarget) {
526  unsigned Opcode = N->getOpcode();
527  if (Opcode == X86ISD::CMPM || Opcode == ISD::SETCC ||
528  Opcode == X86ISD::CMPM_SAE || Opcode == X86ISD::VFPCLASS) {
529  // We can get 256-bit 8 element types here without VLX being enabled. When
530  // this happens we will use 512-bit operations and the mask will not be
531  // zero extended.
532  EVT OpVT = N->getOperand(0).getValueType();
533  if (OpVT.is256BitVector() || OpVT.is128BitVector())
534  return Subtarget->hasVLX();
535 
536  return true;
537  }
538  // Scalar opcodes use 128 bit registers, but aren't subject to the VLX check.
539  if (Opcode == X86ISD::VFPCLASSS || Opcode == X86ISD::FSETCCM ||
540  Opcode == X86ISD::FSETCCM_SAE)
541  return true;
542 
543  return false;
544 }
545 
546 // Returns true if we can assume the writer of the mask has zero extended it
547 // for us.
548 bool X86DAGToDAGISel::isMaskZeroExtended(SDNode *N) const {
549  // If this is an AND, check if we have a compare on either side. As long as
550  // one side guarantees the mask is zero extended, the AND will preserve those
551  // zeros.
552  if (N->getOpcode() == ISD::AND)
553  return isLegalMaskCompare(N->getOperand(0).getNode(), Subtarget) ||
554  isLegalMaskCompare(N->getOperand(1).getNode(), Subtarget);
555 
556  return isLegalMaskCompare(N, Subtarget);
557 }
558 
559 bool
560 X86DAGToDAGISel::IsProfitableToFold(SDValue N, SDNode *U, SDNode *Root) const {
561  if (OptLevel == CodeGenOpt::None) return false;
562 
563  if (!N.hasOneUse())
564  return false;
565 
566  if (N.getOpcode() != ISD::LOAD)
567  return true;
568 
569  // Don't fold non-temporal loads if we have an instruction for them.
570  if (useNonTemporalLoad(cast<LoadSDNode>(N)))
571  return false;
572 
573  // If N is a load, do additional profitability checks.
574  if (U == Root) {
575  switch (U->getOpcode()) {
576  default: break;
577  case X86ISD::ADD:
578  case X86ISD::ADC:
579  case X86ISD::SUB:
580  case X86ISD::SBB:
581  case X86ISD::AND:
582  case X86ISD::XOR:
583  case X86ISD::OR:
584  case ISD::ADD:
585  case ISD::ADDCARRY:
586  case ISD::AND:
587  case ISD::OR:
588  case ISD::XOR: {
589  SDValue Op1 = U->getOperand(1);
590 
591  // If the other operand is a 8-bit immediate we should fold the immediate
592  // instead. This reduces code size.
593  // e.g.
594  // movl 4(%esp), %eax
595  // addl $4, %eax
596  // vs.
597  // movl $4, %eax
598  // addl 4(%esp), %eax
599  // The former is 2 bytes shorter. In case where the increment is 1, then
600  // the saving can be 4 bytes (by using incl %eax).
601  if (ConstantSDNode *Imm = dyn_cast<ConstantSDNode>(Op1)) {
602  if (Imm->getAPIntValue().isSignedIntN(8))
603  return false;
604 
605  // If this is a 64-bit AND with an immediate that fits in 32-bits,
606  // prefer using the smaller and over folding the load. This is needed to
607  // make sure immediates created by shrinkAndImmediate are always folded.
608  // Ideally we would narrow the load during DAG combine and get the
609  // best of both worlds.
610  if (U->getOpcode() == ISD::AND &&
611  Imm->getAPIntValue().getBitWidth() == 64 &&
612  Imm->getAPIntValue().isIntN(32))
613  return false;
614 
615  // If this really a zext_inreg that can be represented with a movzx
616  // instruction, prefer that.
617  // TODO: We could shrink the load and fold if it is non-volatile.
618  if (U->getOpcode() == ISD::AND &&
619  (Imm->getAPIntValue() == UINT8_MAX ||
620  Imm->getAPIntValue() == UINT16_MAX ||
621  Imm->getAPIntValue() == UINT32_MAX))
622  return false;
623 
624  // ADD/SUB with can negate the immediate and use the opposite operation
625  // to fit 128 into a sign extended 8 bit immediate.
626  if ((U->getOpcode() == ISD::ADD || U->getOpcode() == ISD::SUB) &&
627  (-Imm->getAPIntValue()).isSignedIntN(8))
628  return false;
629  }
630 
631  // If the other operand is a TLS address, we should fold it instead.
632  // This produces
633  // movl %gs:0, %eax
634  // leal i@NTPOFF(%eax), %eax
635  // instead of
636  // movl $i@NTPOFF, %eax
637  // addl %gs:0, %eax
638  // if the block also has an access to a second TLS address this will save
639  // a load.
640  // FIXME: This is probably also true for non-TLS addresses.
641  if (Op1.getOpcode() == X86ISD::Wrapper) {
642  SDValue Val = Op1.getOperand(0);
644  return false;
645  }
646 
647  // Don't fold load if this matches the BTS/BTR/BTC patterns.
648  // BTS: (or X, (shl 1, n))
649  // BTR: (and X, (rotl -2, n))
650  // BTC: (xor X, (shl 1, n))
651  if (U->getOpcode() == ISD::OR || U->getOpcode() == ISD::XOR) {
652  if (U->getOperand(0).getOpcode() == ISD::SHL &&
654  return false;
655 
656  if (U->getOperand(1).getOpcode() == ISD::SHL &&
658  return false;
659  }
660  if (U->getOpcode() == ISD::AND) {
661  SDValue U0 = U->getOperand(0);
662  SDValue U1 = U->getOperand(1);
663  if (U0.getOpcode() == ISD::ROTL) {
664  auto *C = dyn_cast<ConstantSDNode>(U0.getOperand(0));
665  if (C && C->getSExtValue() == -2)
666  return false;
667  }
668 
669  if (U1.getOpcode() == ISD::ROTL) {
670  auto *C = dyn_cast<ConstantSDNode>(U1.getOperand(0));
671  if (C && C->getSExtValue() == -2)
672  return false;
673  }
674  }
675 
676  break;
677  }
678  case ISD::SHL:
679  case ISD::SRA:
680  case ISD::SRL:
681  // Don't fold a load into a shift by immediate. The BMI2 instructions
682  // support folding a load, but not an immediate. The legacy instructions
683  // support folding an immediate, but can't fold a load. Folding an
684  // immediate is preferable to folding a load.
685  if (isa<ConstantSDNode>(U->getOperand(1)))
686  return false;
687 
688  break;
689  }
690  }
691 
692  // Prevent folding a load if this can implemented with an insert_subreg or
693  // a move that implicitly zeroes.
694  if (Root->getOpcode() == ISD::INSERT_SUBVECTOR &&
695  isNullConstant(Root->getOperand(2)) &&
696  (Root->getOperand(0).isUndef() ||
698  return false;
699 
700  return true;
701 }
702 
703 /// Replace the original chain operand of the call with
704 /// load's chain operand and move load below the call's chain operand.
706  SDValue Call, SDValue OrigChain) {
708  SDValue Chain = OrigChain.getOperand(0);
709  if (Chain.getNode() == Load.getNode())
710  Ops.push_back(Load.getOperand(0));
711  else {
712  assert(Chain.getOpcode() == ISD::TokenFactor &&
713  "Unexpected chain operand");
714  for (unsigned i = 0, e = Chain.getNumOperands(); i != e; ++i)
715  if (Chain.getOperand(i).getNode() == Load.getNode())
716  Ops.push_back(Load.getOperand(0));
717  else
718  Ops.push_back(Chain.getOperand(i));
719  SDValue NewChain =
720  CurDAG->getNode(ISD::TokenFactor, SDLoc(Load), MVT::Other, Ops);
721  Ops.clear();
722  Ops.push_back(NewChain);
723  }
724  Ops.append(OrigChain->op_begin() + 1, OrigChain->op_end());
725  CurDAG->UpdateNodeOperands(OrigChain.getNode(), Ops);
726  CurDAG->UpdateNodeOperands(Load.getNode(), Call.getOperand(0),
727  Load.getOperand(1), Load.getOperand(2));
728 
729  Ops.clear();
730  Ops.push_back(SDValue(Load.getNode(), 1));
731  Ops.append(Call->op_begin() + 1, Call->op_end());
732  CurDAG->UpdateNodeOperands(Call.getNode(), Ops);
733 }
734 
735 /// Return true if call address is a load and it can be
736 /// moved below CALLSEQ_START and the chains leading up to the call.
737 /// Return the CALLSEQ_START by reference as a second output.
738 /// In the case of a tail call, there isn't a callseq node between the call
739 /// chain and the load.
740 static bool isCalleeLoad(SDValue Callee, SDValue &Chain, bool HasCallSeq) {
741  // The transformation is somewhat dangerous if the call's chain was glued to
742  // the call. After MoveBelowOrigChain the load is moved between the call and
743  // the chain, this can create a cycle if the load is not folded. So it is
744  // *really* important that we are sure the load will be folded.
745  if (Callee.getNode() == Chain.getNode() || !Callee.hasOneUse())
746  return false;
747  LoadSDNode *LD = dyn_cast<LoadSDNode>(Callee.getNode());
748  if (!LD ||
749  LD->isVolatile() ||
752  return false;
753 
754  // Now let's find the callseq_start.
755  while (HasCallSeq && Chain.getOpcode() != ISD::CALLSEQ_START) {
756  if (!Chain.hasOneUse())
757  return false;
758  Chain = Chain.getOperand(0);
759  }
760 
761  if (!Chain.getNumOperands())
762  return false;
763  // Since we are not checking for AA here, conservatively abort if the chain
764  // writes to memory. It's not safe to move the callee (a load) across a store.
765  if (isa<MemSDNode>(Chain.getNode()) &&
766  cast<MemSDNode>(Chain.getNode())->writeMem())
767  return false;
768  if (Chain.getOperand(0).getNode() == Callee.getNode())
769  return true;
770  if (Chain.getOperand(0).getOpcode() == ISD::TokenFactor &&
771  Callee.getValue(1).isOperandOf(Chain.getOperand(0).getNode()) &&
772  Callee.getValue(1).hasOneUse())
773  return true;
774  return false;
775 }
776 
777 void X86DAGToDAGISel::PreprocessISelDAG() {
778  for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(),
779  E = CurDAG->allnodes_end(); I != E; ) {
780  SDNode *N = &*I++; // Preincrement iterator to avoid invalidation issues.
781 
782  // If this is a target specific AND node with no flag usages, turn it back
783  // into ISD::AND to enable test instruction matching.
784  if (N->getOpcode() == X86ISD::AND && !N->hasAnyUseOfValue(1)) {
785  SDValue Res = CurDAG->getNode(ISD::AND, SDLoc(N), N->getValueType(0),
786  N->getOperand(0), N->getOperand(1));
787  --I;
788  CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Res);
789  ++I;
790  CurDAG->DeleteNode(N);
791  continue;
792  }
793 
794  switch (N->getOpcode()) {
795  case ISD::FP_TO_SINT:
796  case ISD::FP_TO_UINT: {
797  // Replace vector fp_to_s/uint with their X86 specific equivalent so we
798  // don't need 2 sets of patterns.
799  if (!N->getSimpleValueType(0).isVector())
800  break;
801 
802  unsigned NewOpc;
803  switch (N->getOpcode()) {
804  default: llvm_unreachable("Unexpected opcode!");
805  case ISD::FP_TO_SINT: NewOpc = X86ISD::CVTTP2SI; break;
806  case ISD::FP_TO_UINT: NewOpc = X86ISD::CVTTP2UI; break;
807  }
808  SDValue Res = CurDAG->getNode(NewOpc, SDLoc(N), N->getValueType(0),
809  N->getOperand(0));
810  --I;
811  CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Res);
812  ++I;
813  CurDAG->DeleteNode(N);
814  continue;
815  }
816  case ISD::SHL:
817  case ISD::SRA:
818  case ISD::SRL: {
819  // Replace vector shifts with their X86 specific equivalent so we don't
820  // need 2 sets of patterns.
821  if (!N->getValueType(0).isVector())
822  break;
823 
824  unsigned NewOpc;
825  switch (N->getOpcode()) {
826  default: llvm_unreachable("Unexpected opcode!");
827  case ISD::SHL: NewOpc = X86ISD::VSHLV; break;
828  case ISD::SRA: NewOpc = X86ISD::VSRAV; break;
829  case ISD::SRL: NewOpc = X86ISD::VSRLV; break;
830  }
831  SDValue Res = CurDAG->getNode(NewOpc, SDLoc(N), N->getValueType(0),
832  N->getOperand(0), N->getOperand(1));
833  --I;
834  CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Res);
835  ++I;
836  CurDAG->DeleteNode(N);
837  continue;
838  }
839  case ISD::ANY_EXTEND:
841  // Replace vector any extend with the zero extend equivalents so we don't
842  // need 2 sets of patterns. Ignore vXi1 extensions.
843  if (!N->getValueType(0).isVector() ||
845  break;
846 
847  unsigned NewOpc = N->getOpcode() == ISD::ANY_EXTEND
850 
851  SDValue Res = CurDAG->getNode(NewOpc, SDLoc(N), N->getValueType(0),
852  N->getOperand(0));
853  --I;
854  CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Res);
855  ++I;
856  CurDAG->DeleteNode(N);
857  continue;
858  }
859  case ISD::FCEIL:
860  case ISD::FFLOOR:
861  case ISD::FTRUNC:
862  case ISD::FNEARBYINT:
863  case ISD::FRINT: {
864  // Replace fp rounding with their X86 specific equivalent so we don't
865  // need 2 sets of patterns.
866  unsigned Imm;
867  switch (N->getOpcode()) {
868  default: llvm_unreachable("Unexpected opcode!");
869  case ISD::FCEIL: Imm = 0xA; break;
870  case ISD::FFLOOR: Imm = 0x9; break;
871  case ISD::FTRUNC: Imm = 0xB; break;
872  case ISD::FNEARBYINT: Imm = 0xC; break;
873  case ISD::FRINT: Imm = 0x4; break;
874  }
875  SDLoc dl(N);
876  SDValue Res = CurDAG->getNode(X86ISD::VRNDSCALE, dl,
877  N->getValueType(0),
878  N->getOperand(0),
879  CurDAG->getConstant(Imm, dl, MVT::i8));
880  --I;
881  CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Res);
882  ++I;
883  CurDAG->DeleteNode(N);
884  continue;
885  }
886  case X86ISD::FANDN:
887  case X86ISD::FAND:
888  case X86ISD::FOR:
889  case X86ISD::FXOR: {
890  // Widen scalar fp logic ops to vector to reduce isel patterns.
891  // FIXME: Can we do this during lowering/combine.
892  MVT VT = N->getSimpleValueType(0);
893  if (VT.isVector() || VT == MVT::f128)
894  break;
895 
896  MVT VecVT = VT == MVT::f64 ? MVT::v2f64 : MVT::v4f32;
897  SDLoc dl(N);
898  SDValue Op0 = CurDAG->getNode(ISD::SCALAR_TO_VECTOR, dl, VecVT,
899  N->getOperand(0));
900  SDValue Op1 = CurDAG->getNode(ISD::SCALAR_TO_VECTOR, dl, VecVT,
901  N->getOperand(1));
902 
903  SDValue Res;
904  if (Subtarget->hasSSE2()) {
905  EVT IntVT = EVT(VecVT).changeVectorElementTypeToInteger();
906  Op0 = CurDAG->getNode(ISD::BITCAST, dl, IntVT, Op0);
907  Op1 = CurDAG->getNode(ISD::BITCAST, dl, IntVT, Op1);
908  unsigned Opc;
909  switch (N->getOpcode()) {
910  default: llvm_unreachable("Unexpected opcode!");
911  case X86ISD::FANDN: Opc = X86ISD::ANDNP; break;
912  case X86ISD::FAND: Opc = ISD::AND; break;
913  case X86ISD::FOR: Opc = ISD::OR; break;
914  case X86ISD::FXOR: Opc = ISD::XOR; break;
915  }
916  Res = CurDAG->getNode(Opc, dl, IntVT, Op0, Op1);
917  Res = CurDAG->getNode(ISD::BITCAST, dl, VecVT, Res);
918  } else {
919  Res = CurDAG->getNode(N->getOpcode(), dl, VecVT, Op0, Op1);
920  }
921  Res = CurDAG->getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, Res,
922  CurDAG->getIntPtrConstant(0, dl));
923  --I;
924  CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Res);
925  ++I;
926  CurDAG->DeleteNode(N);
927  continue;
928  }
929  }
930 
931  if (OptLevel != CodeGenOpt::None &&
932  // Only do this when the target can fold the load into the call or
933  // jmp.
934  !Subtarget->useRetpolineIndirectCalls() &&
935  ((N->getOpcode() == X86ISD::CALL && !Subtarget->slowTwoMemOps()) ||
936  (N->getOpcode() == X86ISD::TC_RETURN &&
937  (Subtarget->is64Bit() ||
938  !getTargetMachine().isPositionIndependent())))) {
939  /// Also try moving call address load from outside callseq_start to just
940  /// before the call to allow it to be folded.
941  ///
942  /// [Load chain]
943  /// ^
944  /// |
945  /// [Load]
946  /// ^ ^
947  /// | |
948  /// / \--
949  /// / |
950  ///[CALLSEQ_START] |
951  /// ^ |
952  /// | |
953  /// [LOAD/C2Reg] |
954  /// | |
955  /// \ /
956  /// \ /
957  /// [CALL]
958  bool HasCallSeq = N->getOpcode() == X86ISD::CALL;
959  SDValue Chain = N->getOperand(0);
960  SDValue Load = N->getOperand(1);
961  if (!isCalleeLoad(Load, Chain, HasCallSeq))
962  continue;
963  moveBelowOrigChain(CurDAG, Load, SDValue(N, 0), Chain);
964  ++NumLoadMoved;
965  continue;
966  }
967 
968  // Lower fpround and fpextend nodes that target the FP stack to be store and
969  // load to the stack. This is a gross hack. We would like to simply mark
970  // these as being illegal, but when we do that, legalize produces these when
971  // it expands calls, then expands these in the same legalize pass. We would
972  // like dag combine to be able to hack on these between the call expansion
973  // and the node legalization. As such this pass basically does "really
974  // late" legalization of these inline with the X86 isel pass.
975  // FIXME: This should only happen when not compiled with -O0.
976  switch (N->getOpcode()) {
977  default: continue;
978  case ISD::FP_ROUND:
979  case ISD::FP_EXTEND:
980  {
981  MVT SrcVT = N->getOperand(0).getSimpleValueType();
982  MVT DstVT = N->getSimpleValueType(0);
983 
984  // If any of the sources are vectors, no fp stack involved.
985  if (SrcVT.isVector() || DstVT.isVector())
986  continue;
987 
988  // If the source and destination are SSE registers, then this is a legal
989  // conversion that should not be lowered.
990  const X86TargetLowering *X86Lowering =
991  static_cast<const X86TargetLowering *>(TLI);
992  bool SrcIsSSE = X86Lowering->isScalarFPTypeInSSEReg(SrcVT);
993  bool DstIsSSE = X86Lowering->isScalarFPTypeInSSEReg(DstVT);
994  if (SrcIsSSE && DstIsSSE)
995  continue;
996 
997  if (!SrcIsSSE && !DstIsSSE) {
998  // If this is an FPStack extension, it is a noop.
999  if (N->getOpcode() == ISD::FP_EXTEND)
1000  continue;
1001  // If this is a value-preserving FPStack truncation, it is a noop.
1002  if (N->getConstantOperandVal(1))
1003  continue;
1004  }
1005 
1006  // Here we could have an FP stack truncation or an FPStack <-> SSE convert.
1007  // FPStack has extload and truncstore. SSE can fold direct loads into other
1008  // operations. Based on this, decide what we want to do.
1009  MVT MemVT;
1010  if (N->getOpcode() == ISD::FP_ROUND)
1011  MemVT = DstVT; // FP_ROUND must use DstVT, we can't do a 'trunc load'.
1012  else
1013  MemVT = SrcIsSSE ? SrcVT : DstVT;
1014 
1015  SDValue MemTmp = CurDAG->CreateStackTemporary(MemVT);
1016  SDLoc dl(N);
1017 
1018  // FIXME: optimize the case where the src/dest is a load or store?
1019 
1020  SDValue Store = CurDAG->getTruncStore(CurDAG->getEntryNode(), dl, N->getOperand(0),
1021  MemTmp, MachinePointerInfo(), MemVT);
1022  SDValue Result = CurDAG->getExtLoad(ISD::EXTLOAD, dl, DstVT, Store, MemTmp,
1023  MachinePointerInfo(), MemVT);
1024 
1025  // We're about to replace all uses of the FP_ROUND/FP_EXTEND with the
1026  // extload we created. This will cause general havok on the dag because
1027  // anything below the conversion could be folded into other existing nodes.
1028  // To avoid invalidating 'I', back it up to the convert node.
1029  --I;
1030  CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
1031  break;
1032  }
1033 
1034  //The sequence of events for lowering STRICT_FP versions of these nodes requires
1035  //dealing with the chain differently, as there is already a preexisting chain.
1036  case ISD::STRICT_FP_ROUND:
1037  case ISD::STRICT_FP_EXTEND:
1038  {
1039  MVT SrcVT = N->getOperand(1).getSimpleValueType();
1040  MVT DstVT = N->getSimpleValueType(0);
1041 
1042  // If any of the sources are vectors, no fp stack involved.
1043  if (SrcVT.isVector() || DstVT.isVector())
1044  continue;
1045 
1046  // If the source and destination are SSE registers, then this is a legal
1047  // conversion that should not be lowered.
1048  const X86TargetLowering *X86Lowering =
1049  static_cast<const X86TargetLowering *>(TLI);
1050  bool SrcIsSSE = X86Lowering->isScalarFPTypeInSSEReg(SrcVT);
1051  bool DstIsSSE = X86Lowering->isScalarFPTypeInSSEReg(DstVT);
1052  if (SrcIsSSE && DstIsSSE)
1053  continue;
1054 
1055  if (!SrcIsSSE && !DstIsSSE) {
1056  // If this is an FPStack extension, it is a noop.
1057  if (N->getOpcode() == ISD::STRICT_FP_EXTEND)
1058  continue;
1059  // If this is a value-preserving FPStack truncation, it is a noop.
1060  if (N->getConstantOperandVal(2))
1061  continue;
1062  }
1063 
1064  // Here we could have an FP stack truncation or an FPStack <-> SSE convert.
1065  // FPStack has extload and truncstore. SSE can fold direct loads into other
1066  // operations. Based on this, decide what we want to do.
1067  MVT MemVT;
1068  if (N->getOpcode() == ISD::STRICT_FP_ROUND)
1069  MemVT = DstVT; // FP_ROUND must use DstVT, we can't do a 'trunc load'.
1070  else
1071  MemVT = SrcIsSSE ? SrcVT : DstVT;
1072 
1073  SDValue MemTmp = CurDAG->CreateStackTemporary(MemVT);
1074  SDLoc dl(N);
1075 
1076  // FIXME: optimize the case where the src/dest is a load or store?
1077 
1078  //Since the operation is StrictFP, use the preexisting chain.
1079  SDValue Store = CurDAG->getTruncStore(N->getOperand(0), dl, N->getOperand(1),
1080  MemTmp, MachinePointerInfo(), MemVT);
1081  SDValue Result = CurDAG->getExtLoad(ISD::EXTLOAD, dl, DstVT, Store, MemTmp,
1082  MachinePointerInfo(), MemVT);
1083 
1084  // We're about to replace all uses of the FP_ROUND/FP_EXTEND with the
1085  // extload we created. This will cause general havok on the dag because
1086  // anything below the conversion could be folded into other existing nodes.
1087  // To avoid invalidating 'I', back it up to the convert node.
1088  --I;
1089  CurDAG->ReplaceAllUsesWith(N, Result.getNode());
1090  break;
1091  }
1092  }
1093 
1094 
1095  // Now that we did that, the node is dead. Increment the iterator to the
1096  // next node to process, then delete N.
1097  ++I;
1098  CurDAG->DeleteNode(N);
1099  }
1100 
1101  // The load+call transform above can leave some dead nodes in the graph. Make
1102  // sure we remove them. Its possible some of the other transforms do to so
1103  // just remove dead nodes unconditionally.
1104  CurDAG->RemoveDeadNodes();
1105 }
1106 
1107 // Look for a redundant movzx/movsx that can occur after an 8-bit divrem.
1108 bool X86DAGToDAGISel::tryOptimizeRem8Extend(SDNode *N) {
1109  unsigned Opc = N->getMachineOpcode();
1110  if (Opc != X86::MOVZX32rr8 && Opc != X86::MOVSX32rr8 &&
1111  Opc != X86::MOVSX64rr8)
1112  return false;
1113 
1114  SDValue N0 = N->getOperand(0);
1115 
1116  // We need to be extracting the lower bit of an extend.
1117  if (!N0.isMachineOpcode() ||
1118  N0.getMachineOpcode() != TargetOpcode::EXTRACT_SUBREG ||
1119  N0.getConstantOperandVal(1) != X86::sub_8bit)
1120  return false;
1121 
1122  // We're looking for either a movsx or movzx to match the original opcode.
1123  unsigned ExpectedOpc = Opc == X86::MOVZX32rr8 ? X86::MOVZX32rr8_NOREX
1124  : X86::MOVSX32rr8_NOREX;
1125  SDValue N00 = N0.getOperand(0);
1126  if (!N00.isMachineOpcode() || N00.getMachineOpcode() != ExpectedOpc)
1127  return false;
1128 
1129  if (Opc == X86::MOVSX64rr8) {
1130  // If we had a sign extend from 8 to 64 bits. We still need to go from 32
1131  // to 64.
1132  MachineSDNode *Extend = CurDAG->getMachineNode(X86::MOVSX64rr32, SDLoc(N),
1133  MVT::i64, N00);
1134  ReplaceUses(N, Extend);
1135  } else {
1136  // Ok we can drop this extend and just use the original extend.
1137  ReplaceUses(N, N00.getNode());
1138  }
1139 
1140  return true;
1141 }
1142 
1143 void X86DAGToDAGISel::PostprocessISelDAG() {
1144  // Skip peepholes at -O0.
1145  if (TM.getOptLevel() == CodeGenOpt::None)
1146  return;
1147 
1148  SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end();
1149 
1150  bool MadeChange = false;
1151  while (Position != CurDAG->allnodes_begin()) {
1152  SDNode *N = &*--Position;
1153  // Skip dead nodes and any non-machine opcodes.
1154  if (N->use_empty() || !N->isMachineOpcode())
1155  continue;
1156 
1157  if (tryOptimizeRem8Extend(N)) {
1158  MadeChange = true;
1159  continue;
1160  }
1161 
1162  // Look for a TESTrr+ANDrr pattern where both operands of the test are
1163  // the same. Rewrite to remove the AND.
1164  unsigned Opc = N->getMachineOpcode();
1165  if ((Opc == X86::TEST8rr || Opc == X86::TEST16rr ||
1166  Opc == X86::TEST32rr || Opc == X86::TEST64rr) &&
1167  N->getOperand(0) == N->getOperand(1) &&
1168  N->isOnlyUserOf(N->getOperand(0).getNode()) &&
1169  N->getOperand(0).isMachineOpcode()) {
1170  SDValue And = N->getOperand(0);
1171  unsigned N0Opc = And.getMachineOpcode();
1172  if (N0Opc == X86::AND8rr || N0Opc == X86::AND16rr ||
1173  N0Opc == X86::AND32rr || N0Opc == X86::AND64rr) {
1174  MachineSDNode *Test = CurDAG->getMachineNode(Opc, SDLoc(N),
1175  MVT::i32,
1176  And.getOperand(0),
1177  And.getOperand(1));
1178  ReplaceUses(N, Test);
1179  MadeChange = true;
1180  continue;
1181  }
1182  if (N0Opc == X86::AND8rm || N0Opc == X86::AND16rm ||
1183  N0Opc == X86::AND32rm || N0Opc == X86::AND64rm) {
1184  unsigned NewOpc;
1185  switch (N0Opc) {
1186  case X86::AND8rm: NewOpc = X86::TEST8mr; break;
1187  case X86::AND16rm: NewOpc = X86::TEST16mr; break;
1188  case X86::AND32rm: NewOpc = X86::TEST32mr; break;
1189  case X86::AND64rm: NewOpc = X86::TEST64mr; break;
1190  }
1191 
1192  // Need to swap the memory and register operand.
1193  SDValue Ops[] = { And.getOperand(1),
1194  And.getOperand(2),
1195  And.getOperand(3),
1196  And.getOperand(4),
1197  And.getOperand(5),
1198  And.getOperand(0),
1199  And.getOperand(6) /* Chain */ };
1200  MachineSDNode *Test = CurDAG->getMachineNode(NewOpc, SDLoc(N),
1201  MVT::i32, MVT::Other, Ops);
1202  ReplaceUses(N, Test);
1203  MadeChange = true;
1204  continue;
1205  }
1206  }
1207 
1208  // Look for a KAND+KORTEST and turn it into KTEST if only the zero flag is
1209  // used. We're doing this late so we can prefer to fold the AND into masked
1210  // comparisons. Doing that can be better for the live range of the mask
1211  // register.
1212  if ((Opc == X86::KORTESTBrr || Opc == X86::KORTESTWrr ||
1213  Opc == X86::KORTESTDrr || Opc == X86::KORTESTQrr) &&
1214  N->getOperand(0) == N->getOperand(1) &&
1215  N->isOnlyUserOf(N->getOperand(0).getNode()) &&
1216  N->getOperand(0).isMachineOpcode() &&
1217  onlyUsesZeroFlag(SDValue(N, 0))) {
1218  SDValue And = N->getOperand(0);
1219  unsigned N0Opc = And.getMachineOpcode();
1220  // KANDW is legal with AVX512F, but KTESTW requires AVX512DQ. The other
1221  // KAND instructions and KTEST use the same ISA feature.
1222  if (N0Opc == X86::KANDBrr ||
1223  (N0Opc == X86::KANDWrr && Subtarget->hasDQI()) ||
1224  N0Opc == X86::KANDDrr || N0Opc == X86::KANDQrr) {
1225  unsigned NewOpc;
1226  switch (Opc) {
1227  default: llvm_unreachable("Unexpected opcode!");
1228  case X86::KORTESTBrr: NewOpc = X86::KTESTBrr; break;
1229  case X86::KORTESTWrr: NewOpc = X86::KTESTWrr; break;
1230  case X86::KORTESTDrr: NewOpc = X86::KTESTDrr; break;
1231  case X86::KORTESTQrr: NewOpc = X86::KTESTQrr; break;
1232  }
1233  MachineSDNode *KTest = CurDAG->getMachineNode(NewOpc, SDLoc(N),
1234  MVT::i32,
1235  And.getOperand(0),
1236  And.getOperand(1));
1237  ReplaceUses(N, KTest);
1238  MadeChange = true;
1239  continue;
1240  }
1241  }
1242 
1243  // Attempt to remove vectors moves that were inserted to zero upper bits.
1244  if (Opc != TargetOpcode::SUBREG_TO_REG)
1245  continue;
1246 
1247  unsigned SubRegIdx = N->getConstantOperandVal(2);
1248  if (SubRegIdx != X86::sub_xmm && SubRegIdx != X86::sub_ymm)
1249  continue;
1250 
1251  SDValue Move = N->getOperand(1);
1252  if (!Move.isMachineOpcode())
1253  continue;
1254 
1255  // Make sure its one of the move opcodes we recognize.
1256  switch (Move.getMachineOpcode()) {
1257  default:
1258  continue;
1259  case X86::VMOVAPDrr: case X86::VMOVUPDrr:
1260  case X86::VMOVAPSrr: case X86::VMOVUPSrr:
1261  case X86::VMOVDQArr: case X86::VMOVDQUrr:
1262  case X86::VMOVAPDYrr: case X86::VMOVUPDYrr:
1263  case X86::VMOVAPSYrr: case X86::VMOVUPSYrr:
1264  case X86::VMOVDQAYrr: case X86::VMOVDQUYrr:
1265  case X86::VMOVAPDZ128rr: case X86::VMOVUPDZ128rr:
1266  case X86::VMOVAPSZ128rr: case X86::VMOVUPSZ128rr:
1267  case X86::VMOVDQA32Z128rr: case X86::VMOVDQU32Z128rr:
1268  case X86::VMOVDQA64Z128rr: case X86::VMOVDQU64Z128rr:
1269  case X86::VMOVAPDZ256rr: case X86::VMOVUPDZ256rr:
1270  case X86::VMOVAPSZ256rr: case X86::VMOVUPSZ256rr:
1271  case X86::VMOVDQA32Z256rr: case X86::VMOVDQU32Z256rr:
1272  case X86::VMOVDQA64Z256rr: case X86::VMOVDQU64Z256rr:
1273  break;
1274  }
1275 
1276  SDValue In = Move.getOperand(0);
1277  if (!In.isMachineOpcode() ||
1278  In.getMachineOpcode() <= TargetOpcode::GENERIC_OP_END)
1279  continue;
1280 
1281  // Make sure the instruction has a VEX, XOP, or EVEX prefix. This covers
1282  // the SHA instructions which use a legacy encoding.
1283  uint64_t TSFlags = getInstrInfo()->get(In.getMachineOpcode()).TSFlags;
1284  if ((TSFlags & X86II::EncodingMask) != X86II::VEX &&
1285  (TSFlags & X86II::EncodingMask) != X86II::EVEX &&
1286  (TSFlags & X86II::EncodingMask) != X86II::XOP)
1287  continue;
1288 
1289  // Producing instruction is another vector instruction. We can drop the
1290  // move.
1291  CurDAG->UpdateNodeOperands(N, N->getOperand(0), In, N->getOperand(2));
1292  MadeChange = true;
1293  }
1294 
1295  if (MadeChange)
1296  CurDAG->RemoveDeadNodes();
1297 }
1298 
1299 
1300 /// Emit any code that needs to be executed only in the main function.
1301 void X86DAGToDAGISel::emitSpecialCodeForMain() {
1302  if (Subtarget->isTargetCygMing()) {
1304  auto &DL = CurDAG->getDataLayout();
1305 
1306  TargetLowering::CallLoweringInfo CLI(*CurDAG);
1307  CLI.setChain(CurDAG->getRoot())
1308  .setCallee(CallingConv::C, Type::getVoidTy(*CurDAG->getContext()),
1309  CurDAG->getExternalSymbol("__main", TLI->getPointerTy(DL)),
1310  std::move(Args));
1311  const TargetLowering &TLI = CurDAG->getTargetLoweringInfo();
1312  std::pair<SDValue, SDValue> Result = TLI.LowerCallTo(CLI);
1313  CurDAG->setRoot(Result.second);
1314  }
1315 }
1316 
1317 void X86DAGToDAGISel::EmitFunctionEntryCode() {
1318  // If this is main, emit special code for main.
1319  const Function &F = MF->getFunction();
1320  if (F.hasExternalLinkage() && F.getName() == "main")
1321  emitSpecialCodeForMain();
1322 }
1323 
1324 static bool isDispSafeForFrameIndex(int64_t Val) {
1325  // On 64-bit platforms, we can run into an issue where a frame index
1326  // includes a displacement that, when added to the explicit displacement,
1327  // will overflow the displacement field. Assuming that the frame index
1328  // displacement fits into a 31-bit integer (which is only slightly more
1329  // aggressive than the current fundamental assumption that it fits into
1330  // a 32-bit integer), a 31-bit disp should always be safe.
1331  return isInt<31>(Val);
1332 }
1333 
1334 bool X86DAGToDAGISel::foldOffsetIntoAddress(uint64_t Offset,
1335  X86ISelAddressMode &AM) {
1336  // If there's no offset to fold, we don't need to do any work.
1337  if (Offset == 0)
1338  return false;
1339 
1340  // Cannot combine ExternalSymbol displacements with integer offsets.
1341  if (AM.ES || AM.MCSym)
1342  return true;
1343 
1344  int64_t Val = AM.Disp + Offset;
1345  CodeModel::Model M = TM.getCodeModel();
1346  if (Subtarget->is64Bit()) {
1348  AM.hasSymbolicDisplacement()))
1349  return true;
1350  // In addition to the checks required for a register base, check that
1351  // we do not try to use an unsafe Disp with a frame index.
1352  if (AM.BaseType == X86ISelAddressMode::FrameIndexBase &&
1354  return true;
1355  }
1356  AM.Disp = Val;
1357  return false;
1358 
1359 }
1360 
1361 bool X86DAGToDAGISel::matchLoadInAddress(LoadSDNode *N, X86ISelAddressMode &AM){
1362  SDValue Address = N->getOperand(1);
1363 
1364  // load gs:0 -> GS segment register.
1365  // load fs:0 -> FS segment register.
1366  //
1367  // This optimization is valid because the GNU TLS model defines that
1368  // gs:0 (or fs:0 on X86-64) contains its own address.
1369  // For more information see http://people.redhat.com/drepper/tls.pdf
1370  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Address))
1371  if (C->getSExtValue() == 0 && AM.Segment.getNode() == nullptr &&
1372  !IndirectTlsSegRefs &&
1373  (Subtarget->isTargetGlibc() || Subtarget->isTargetAndroid() ||
1374  Subtarget->isTargetFuchsia()))
1375  switch (N->getPointerInfo().getAddrSpace()) {
1376  case 256:
1377  AM.Segment = CurDAG->getRegister(X86::GS, MVT::i16);
1378  return false;
1379  case 257:
1380  AM.Segment = CurDAG->getRegister(X86::FS, MVT::i16);
1381  return false;
1382  // Address space 258 is not handled here, because it is not used to
1383  // address TLS areas.
1384  }
1385 
1386  return true;
1387 }
1388 
1389 /// Try to match X86ISD::Wrapper and X86ISD::WrapperRIP nodes into an addressing
1390 /// mode. These wrap things that will resolve down into a symbol reference.
1391 /// If no match is possible, this returns true, otherwise it returns false.
1392 bool X86DAGToDAGISel::matchWrapper(SDValue N, X86ISelAddressMode &AM) {
1393  // If the addressing mode already has a symbol as the displacement, we can
1394  // never match another symbol.
1395  if (AM.hasSymbolicDisplacement())
1396  return true;
1397 
1398  bool IsRIPRelTLS = false;
1399  bool IsRIPRel = N.getOpcode() == X86ISD::WrapperRIP;
1400  if (IsRIPRel) {
1401  SDValue Val = N.getOperand(0);
1403  IsRIPRelTLS = true;
1404  }
1405 
1406  // We can't use an addressing mode in the 64-bit large code model.
1407  // Global TLS addressing is an exception. In the medium code model,
1408  // we use can use a mode when RIP wrappers are present.
1409  // That signifies access to globals that are known to be "near",
1410  // such as the GOT itself.
1411  CodeModel::Model M = TM.getCodeModel();
1412  if (Subtarget->is64Bit() &&
1413  ((M == CodeModel::Large && !IsRIPRelTLS) ||
1414  (M == CodeModel::Medium && !IsRIPRel)))
1415  return true;
1416 
1417  // Base and index reg must be 0 in order to use %rip as base.
1418  if (IsRIPRel && AM.hasBaseOrIndexReg())
1419  return true;
1420 
1421  // Make a local copy in case we can't do this fold.
1422  X86ISelAddressMode Backup = AM;
1423 
1424  int64_t Offset = 0;
1425  SDValue N0 = N.getOperand(0);
1426  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(N0)) {
1427  AM.GV = G->getGlobal();
1428  AM.SymbolFlags = G->getTargetFlags();
1429  Offset = G->getOffset();
1430  } else if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(N0)) {
1431  AM.CP = CP->getConstVal();
1432  AM.Align = CP->getAlignment();
1433  AM.SymbolFlags = CP->getTargetFlags();
1434  Offset = CP->getOffset();
1435  } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(N0)) {
1436  AM.ES = S->getSymbol();
1437  AM.SymbolFlags = S->getTargetFlags();
1438  } else if (auto *S = dyn_cast<MCSymbolSDNode>(N0)) {
1439  AM.MCSym = S->getMCSymbol();
1440  } else if (JumpTableSDNode *J = dyn_cast<JumpTableSDNode>(N0)) {
1441  AM.JT = J->getIndex();
1442  AM.SymbolFlags = J->getTargetFlags();
1443  } else if (BlockAddressSDNode *BA = dyn_cast<BlockAddressSDNode>(N0)) {
1444  AM.BlockAddr = BA->getBlockAddress();
1445  AM.SymbolFlags = BA->getTargetFlags();
1446  Offset = BA->getOffset();
1447  } else
1448  llvm_unreachable("Unhandled symbol reference node.");
1449 
1450  if (foldOffsetIntoAddress(Offset, AM)) {
1451  AM = Backup;
1452  return true;
1453  }
1454 
1455  if (IsRIPRel)
1456  AM.setBaseReg(CurDAG->getRegister(X86::RIP, MVT::i64));
1457 
1458  // Commit the changes now that we know this fold is safe.
1459  return false;
1460 }
1461 
1462 /// Add the specified node to the specified addressing mode, returning true if
1463 /// it cannot be done. This just pattern matches for the addressing mode.
1464 bool X86DAGToDAGISel::matchAddress(SDValue N, X86ISelAddressMode &AM) {
1465  if (matchAddressRecursively(N, AM, 0))
1466  return true;
1467 
1468  // Post-processing: Convert lea(,%reg,2) to lea(%reg,%reg), which has
1469  // a smaller encoding and avoids a scaled-index.
1470  if (AM.Scale == 2 &&
1471  AM.BaseType == X86ISelAddressMode::RegBase &&
1472  AM.Base_Reg.getNode() == nullptr) {
1473  AM.Base_Reg = AM.IndexReg;
1474  AM.Scale = 1;
1475  }
1476 
1477  // Post-processing: Convert foo to foo(%rip), even in non-PIC mode,
1478  // because it has a smaller encoding.
1479  // TODO: Which other code models can use this?
1480  switch (TM.getCodeModel()) {
1481  default: break;
1482  case CodeModel::Small:
1483  case CodeModel::Kernel:
1484  if (Subtarget->is64Bit() &&
1485  AM.Scale == 1 &&
1486  AM.BaseType == X86ISelAddressMode::RegBase &&
1487  AM.Base_Reg.getNode() == nullptr &&
1488  AM.IndexReg.getNode() == nullptr &&
1489  AM.SymbolFlags == X86II::MO_NO_FLAG &&
1490  AM.hasSymbolicDisplacement())
1491  AM.Base_Reg = CurDAG->getRegister(X86::RIP, MVT::i64);
1492  break;
1493  }
1494 
1495  return false;
1496 }
1497 
1498 bool X86DAGToDAGISel::matchAdd(SDValue &N, X86ISelAddressMode &AM,
1499  unsigned Depth) {
1500  // Add an artificial use to this node so that we can keep track of
1501  // it if it gets CSE'd with a different node.
1502  HandleSDNode Handle(N);
1503 
1504  X86ISelAddressMode Backup = AM;
1505  if (!matchAddressRecursively(N.getOperand(0), AM, Depth+1) &&
1506  !matchAddressRecursively(Handle.getValue().getOperand(1), AM, Depth+1))
1507  return false;
1508  AM = Backup;
1509 
1510  // Try again after commuting the operands.
1511  if (!matchAddressRecursively(Handle.getValue().getOperand(1), AM, Depth+1) &&
1512  !matchAddressRecursively(Handle.getValue().getOperand(0), AM, Depth+1))
1513  return false;
1514  AM = Backup;
1515 
1516  // If we couldn't fold both operands into the address at the same time,
1517  // see if we can just put each operand into a register and fold at least
1518  // the add.
1519  if (AM.BaseType == X86ISelAddressMode::RegBase &&
1520  !AM.Base_Reg.getNode() &&
1521  !AM.IndexReg.getNode()) {
1522  N = Handle.getValue();
1523  AM.Base_Reg = N.getOperand(0);
1524  AM.IndexReg = N.getOperand(1);
1525  AM.Scale = 1;
1526  return false;
1527  }
1528  N = Handle.getValue();
1529  return true;
1530 }
1531 
1532 // Insert a node into the DAG at least before the Pos node's position. This
1533 // will reposition the node as needed, and will assign it a node ID that is <=
1534 // the Pos node's ID. Note that this does *not* preserve the uniqueness of node
1535 // IDs! The selection DAG must no longer depend on their uniqueness when this
1536 // is used.
1537 static void insertDAGNode(SelectionDAG &DAG, SDValue Pos, SDValue N) {
1538  if (N->getNodeId() == -1 ||
1541  DAG.RepositionNode(Pos->getIterator(), N.getNode());
1542  // Mark Node as invalid for pruning as after this it may be a successor to a
1543  // selected node but otherwise be in the same position of Pos.
1544  // Conservatively mark it with the same -abs(Id) to assure node id
1545  // invariant is preserved.
1546  N->setNodeId(Pos->getNodeId());
1548  }
1549 }
1550 
1551 // Transform "(X >> (8-C1)) & (0xff << C1)" to "((X >> 8) & 0xff) << C1" if
1552 // safe. This allows us to convert the shift and and into an h-register
1553 // extract and a scaled index. Returns false if the simplification is
1554 // performed.
1556  uint64_t Mask,
1557  SDValue Shift, SDValue X,
1558  X86ISelAddressMode &AM) {
1559  if (Shift.getOpcode() != ISD::SRL ||
1560  !isa<ConstantSDNode>(Shift.getOperand(1)) ||
1561  !Shift.hasOneUse())
1562  return true;
1563 
1564  int ScaleLog = 8 - Shift.getConstantOperandVal(1);
1565  if (ScaleLog <= 0 || ScaleLog >= 4 ||
1566  Mask != (0xffu << ScaleLog))
1567  return true;
1568 
1569  MVT VT = N.getSimpleValueType();
1570  SDLoc DL(N);
1571  SDValue Eight = DAG.getConstant(8, DL, MVT::i8);
1572  SDValue NewMask = DAG.getConstant(0xff, DL, VT);
1573  SDValue Srl = DAG.getNode(ISD::SRL, DL, VT, X, Eight);
1574  SDValue And = DAG.getNode(ISD::AND, DL, VT, Srl, NewMask);
1575  SDValue ShlCount = DAG.getConstant(ScaleLog, DL, MVT::i8);
1576  SDValue Shl = DAG.getNode(ISD::SHL, DL, VT, And, ShlCount);
1577 
1578  // Insert the new nodes into the topological ordering. We must do this in
1579  // a valid topological ordering as nothing is going to go back and re-sort
1580  // these nodes. We continually insert before 'N' in sequence as this is
1581  // essentially a pre-flattened and pre-sorted sequence of nodes. There is no
1582  // hierarchy left to express.
1583  insertDAGNode(DAG, N, Eight);
1584  insertDAGNode(DAG, N, Srl);
1585  insertDAGNode(DAG, N, NewMask);
1586  insertDAGNode(DAG, N, And);
1587  insertDAGNode(DAG, N, ShlCount);
1588  insertDAGNode(DAG, N, Shl);
1589  DAG.ReplaceAllUsesWith(N, Shl);
1590  DAG.RemoveDeadNode(N.getNode());
1591  AM.IndexReg = And;
1592  AM.Scale = (1 << ScaleLog);
1593  return false;
1594 }
1595 
1596 // Transforms "(X << C1) & C2" to "(X & (C2>>C1)) << C1" if safe and if this
1597 // allows us to fold the shift into this addressing mode. Returns false if the
1598 // transform succeeded.
1600  X86ISelAddressMode &AM) {
1601  SDValue Shift = N.getOperand(0);
1602 
1603  // Use a signed mask so that shifting right will insert sign bits. These
1604  // bits will be removed when we shift the result left so it doesn't matter
1605  // what we use. This might allow a smaller immediate encoding.
1606  int64_t Mask = cast<ConstantSDNode>(N->getOperand(1))->getSExtValue();
1607 
1608  // If we have an any_extend feeding the AND, look through it to see if there
1609  // is a shift behind it. But only if the AND doesn't use the extended bits.
1610  // FIXME: Generalize this to other ANY_EXTEND than i32 to i64?
1611  bool FoundAnyExtend = false;
1612  if (Shift.getOpcode() == ISD::ANY_EXTEND && Shift.hasOneUse() &&
1613  Shift.getOperand(0).getSimpleValueType() == MVT::i32 &&
1614  isUInt<32>(Mask)) {
1615  FoundAnyExtend = true;
1616  Shift = Shift.getOperand(0);
1617  }
1618 
1619  if (Shift.getOpcode() != ISD::SHL ||
1620  !isa<ConstantSDNode>(Shift.getOperand(1)))
1621  return true;
1622 
1623  SDValue X = Shift.getOperand(0);
1624 
1625  // Not likely to be profitable if either the AND or SHIFT node has more
1626  // than one use (unless all uses are for address computation). Besides,
1627  // isel mechanism requires their node ids to be reused.
1628  if (!N.hasOneUse() || !Shift.hasOneUse())
1629  return true;
1630 
1631  // Verify that the shift amount is something we can fold.
1632  unsigned ShiftAmt = Shift.getConstantOperandVal(1);
1633  if (ShiftAmt != 1 && ShiftAmt != 2 && ShiftAmt != 3)
1634  return true;
1635 
1636  MVT VT = N.getSimpleValueType();
1637  SDLoc DL(N);
1638  if (FoundAnyExtend) {
1639  SDValue NewX = DAG.getNode(ISD::ANY_EXTEND, DL, VT, X);
1640  insertDAGNode(DAG, N, NewX);
1641  X = NewX;
1642  }
1643 
1644  SDValue NewMask = DAG.getConstant(Mask >> ShiftAmt, DL, VT);
1645  SDValue NewAnd = DAG.getNode(ISD::AND, DL, VT, X, NewMask);
1646  SDValue NewShift = DAG.getNode(ISD::SHL, DL, VT, NewAnd, Shift.getOperand(1));
1647 
1648  // Insert the new nodes into the topological ordering. We must do this in
1649  // a valid topological ordering as nothing is going to go back and re-sort
1650  // these nodes. We continually insert before 'N' in sequence as this is
1651  // essentially a pre-flattened and pre-sorted sequence of nodes. There is no
1652  // hierarchy left to express.
1653  insertDAGNode(DAG, N, NewMask);
1654  insertDAGNode(DAG, N, NewAnd);
1655  insertDAGNode(DAG, N, NewShift);
1656  DAG.ReplaceAllUsesWith(N, NewShift);
1657  DAG.RemoveDeadNode(N.getNode());
1658 
1659  AM.Scale = 1 << ShiftAmt;
1660  AM.IndexReg = NewAnd;
1661  return false;
1662 }
1663 
1664 // Implement some heroics to detect shifts of masked values where the mask can
1665 // be replaced by extending the shift and undoing that in the addressing mode
1666 // scale. Patterns such as (shl (srl x, c1), c2) are canonicalized into (and
1667 // (srl x, SHIFT), MASK) by DAGCombines that don't know the shl can be done in
1668 // the addressing mode. This results in code such as:
1669 //
1670 // int f(short *y, int *lookup_table) {
1671 // ...
1672 // return *y + lookup_table[*y >> 11];
1673 // }
1674 //
1675 // Turning into:
1676 // movzwl (%rdi), %eax
1677 // movl %eax, %ecx
1678 // shrl $11, %ecx
1679 // addl (%rsi,%rcx,4), %eax
1680 //
1681 // Instead of:
1682 // movzwl (%rdi), %eax
1683 // movl %eax, %ecx
1684 // shrl $9, %ecx
1685 // andl $124, %rcx
1686 // addl (%rsi,%rcx), %eax
1687 //
1688 // Note that this function assumes the mask is provided as a mask *after* the
1689 // value is shifted. The input chain may or may not match that, but computing
1690 // such a mask is trivial.
1692  uint64_t Mask,
1693  SDValue Shift, SDValue X,
1694  X86ISelAddressMode &AM) {
1695  if (Shift.getOpcode() != ISD::SRL || !Shift.hasOneUse() ||
1696  !isa<ConstantSDNode>(Shift.getOperand(1)))
1697  return true;
1698 
1699  unsigned ShiftAmt = Shift.getConstantOperandVal(1);
1700  unsigned MaskLZ = countLeadingZeros(Mask);
1701  unsigned MaskTZ = countTrailingZeros(Mask);
1702 
1703  // The amount of shift we're trying to fit into the addressing mode is taken
1704  // from the trailing zeros of the mask.
1705  unsigned AMShiftAmt = MaskTZ;
1706 
1707  // There is nothing we can do here unless the mask is removing some bits.
1708  // Also, the addressing mode can only represent shifts of 1, 2, or 3 bits.
1709  if (AMShiftAmt <= 0 || AMShiftAmt > 3) return true;
1710 
1711  // We also need to ensure that mask is a continuous run of bits.
1712  if (countTrailingOnes(Mask >> MaskTZ) + MaskTZ + MaskLZ != 64) return true;
1713 
1714  // Scale the leading zero count down based on the actual size of the value.
1715  // Also scale it down based on the size of the shift.
1716  unsigned ScaleDown = (64 - X.getSimpleValueType().getSizeInBits()) + ShiftAmt;
1717  if (MaskLZ < ScaleDown)
1718  return true;
1719  MaskLZ -= ScaleDown;
1720 
1721  // The final check is to ensure that any masked out high bits of X are
1722  // already known to be zero. Otherwise, the mask has a semantic impact
1723  // other than masking out a couple of low bits. Unfortunately, because of
1724  // the mask, zero extensions will be removed from operands in some cases.
1725  // This code works extra hard to look through extensions because we can
1726  // replace them with zero extensions cheaply if necessary.
1727  bool ReplacingAnyExtend = false;
1728  if (X.getOpcode() == ISD::ANY_EXTEND) {
1729  unsigned ExtendBits = X.getSimpleValueType().getSizeInBits() -
1731  // Assume that we'll replace the any-extend with a zero-extend, and
1732  // narrow the search to the extended value.
1733  X = X.getOperand(0);
1734  MaskLZ = ExtendBits > MaskLZ ? 0 : MaskLZ - ExtendBits;
1735  ReplacingAnyExtend = true;
1736  }
1737  APInt MaskedHighBits =
1739  KnownBits Known = DAG.computeKnownBits(X);
1740  if (MaskedHighBits != Known.Zero) return true;
1741 
1742  // We've identified a pattern that can be transformed into a single shift
1743  // and an addressing mode. Make it so.
1744  MVT VT = N.getSimpleValueType();
1745  if (ReplacingAnyExtend) {
1746  assert(X.getValueType() != VT);
1747  // We looked through an ANY_EXTEND node, insert a ZERO_EXTEND.
1748  SDValue NewX = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(X), VT, X);
1749  insertDAGNode(DAG, N, NewX);
1750  X = NewX;
1751  }
1752  SDLoc DL(N);
1753  SDValue NewSRLAmt = DAG.getConstant(ShiftAmt + AMShiftAmt, DL, MVT::i8);
1754  SDValue NewSRL = DAG.getNode(ISD::SRL, DL, VT, X, NewSRLAmt);
1755  SDValue NewSHLAmt = DAG.getConstant(AMShiftAmt, DL, MVT::i8);
1756  SDValue NewSHL = DAG.getNode(ISD::SHL, DL, VT, NewSRL, NewSHLAmt);
1757 
1758  // Insert the new nodes into the topological ordering. We must do this in
1759  // a valid topological ordering as nothing is going to go back and re-sort
1760  // these nodes. We continually insert before 'N' in sequence as this is
1761  // essentially a pre-flattened and pre-sorted sequence of nodes. There is no
1762  // hierarchy left to express.
1763  insertDAGNode(DAG, N, NewSRLAmt);
1764  insertDAGNode(DAG, N, NewSRL);
1765  insertDAGNode(DAG, N, NewSHLAmt);
1766  insertDAGNode(DAG, N, NewSHL);
1767  DAG.ReplaceAllUsesWith(N, NewSHL);
1768  DAG.RemoveDeadNode(N.getNode());
1769 
1770  AM.Scale = 1 << AMShiftAmt;
1771  AM.IndexReg = NewSRL;
1772  return false;
1773 }
1774 
1775 // Transform "(X >> SHIFT) & (MASK << C1)" to
1776 // "((X >> (SHIFT + C1)) & (MASK)) << C1". Everything before the SHL will be
1777 // matched to a BEXTR later. Returns false if the simplification is performed.
1779  uint64_t Mask,
1780  SDValue Shift, SDValue X,
1781  X86ISelAddressMode &AM,
1782  const X86Subtarget &Subtarget) {
1783  if (Shift.getOpcode() != ISD::SRL ||
1784  !isa<ConstantSDNode>(Shift.getOperand(1)) ||
1785  !Shift.hasOneUse() || !N.hasOneUse())
1786  return true;
1787 
1788  // Only do this if BEXTR will be matched by matchBEXTRFromAndImm.
1789  if (!Subtarget.hasTBM() &&
1790  !(Subtarget.hasBMI() && Subtarget.hasFastBEXTR()))
1791  return true;
1792 
1793  // We need to ensure that mask is a continuous run of bits.
1794  if (!isShiftedMask_64(Mask)) return true;
1795 
1796  unsigned ShiftAmt = Shift.getConstantOperandVal(1);
1797 
1798  // The amount of shift we're trying to fit into the addressing mode is taken
1799  // from the trailing zeros of the mask.
1800  unsigned AMShiftAmt = countTrailingZeros(Mask);
1801 
1802  // There is nothing we can do here unless the mask is removing some bits.
1803  // Also, the addressing mode can only represent shifts of 1, 2, or 3 bits.
1804  if (AMShiftAmt <= 0 || AMShiftAmt > 3) return true;
1805 
1806  MVT VT = N.getSimpleValueType();
1807  SDLoc DL(N);
1808  SDValue NewSRLAmt = DAG.getConstant(ShiftAmt + AMShiftAmt, DL, MVT::i8);
1809  SDValue NewSRL = DAG.getNode(ISD::SRL, DL, VT, X, NewSRLAmt);
1810  SDValue NewMask = DAG.getConstant(Mask >> AMShiftAmt, DL, VT);
1811  SDValue NewAnd = DAG.getNode(ISD::AND, DL, VT, NewSRL, NewMask);
1812  SDValue NewSHLAmt = DAG.getConstant(AMShiftAmt, DL, MVT::i8);
1813  SDValue NewSHL = DAG.getNode(ISD::SHL, DL, VT, NewAnd, NewSHLAmt);
1814 
1815  // Insert the new nodes into the topological ordering. We must do this in
1816  // a valid topological ordering as nothing is going to go back and re-sort
1817  // these nodes. We continually insert before 'N' in sequence as this is
1818  // essentially a pre-flattened and pre-sorted sequence of nodes. There is no
1819  // hierarchy left to express.
1820  insertDAGNode(DAG, N, NewSRLAmt);
1821  insertDAGNode(DAG, N, NewSRL);
1822  insertDAGNode(DAG, N, NewMask);
1823  insertDAGNode(DAG, N, NewAnd);
1824  insertDAGNode(DAG, N, NewSHLAmt);
1825  insertDAGNode(DAG, N, NewSHL);
1826  DAG.ReplaceAllUsesWith(N, NewSHL);
1827  DAG.RemoveDeadNode(N.getNode());
1828 
1829  AM.Scale = 1 << AMShiftAmt;
1830  AM.IndexReg = NewAnd;
1831  return false;
1832 }
1833 
1834 bool X86DAGToDAGISel::matchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
1835  unsigned Depth) {
1836  SDLoc dl(N);
1837  LLVM_DEBUG({
1838  dbgs() << "MatchAddress: ";
1839  AM.dump(CurDAG);
1840  });
1841  // Limit recursion.
1842  if (Depth > 5)
1843  return matchAddressBase(N, AM);
1844 
1845  // If this is already a %rip relative address, we can only merge immediates
1846  // into it. Instead of handling this in every case, we handle it here.
1847  // RIP relative addressing: %rip + 32-bit displacement!
1848  if (AM.isRIPRelative()) {
1849  // FIXME: JumpTable and ExternalSymbol address currently don't like
1850  // displacements. It isn't very important, but this should be fixed for
1851  // consistency.
1852  if (!(AM.ES || AM.MCSym) && AM.JT != -1)
1853  return true;
1854 
1855  if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(N))
1856  if (!foldOffsetIntoAddress(Cst->getSExtValue(), AM))
1857  return false;
1858  return true;
1859  }
1860 
1861  switch (N.getOpcode()) {
1862  default: break;
1863  case ISD::LOCAL_RECOVER: {
1864  if (!AM.hasSymbolicDisplacement() && AM.Disp == 0)
1865  if (const auto *ESNode = dyn_cast<MCSymbolSDNode>(N.getOperand(0))) {
1866  // Use the symbol and don't prefix it.
1867  AM.MCSym = ESNode->getMCSymbol();
1868  return false;
1869  }
1870  break;
1871  }
1872  case ISD::Constant: {
1873  uint64_t Val = cast<ConstantSDNode>(N)->getSExtValue();
1874  if (!foldOffsetIntoAddress(Val, AM))
1875  return false;
1876  break;
1877  }
1878 
1879  case X86ISD::Wrapper:
1880  case X86ISD::WrapperRIP:
1881  if (!matchWrapper(N, AM))
1882  return false;
1883  break;
1884 
1885  case ISD::LOAD:
1886  if (!matchLoadInAddress(cast<LoadSDNode>(N), AM))
1887  return false;
1888  break;
1889 
1890  case ISD::FrameIndex:
1891  if (AM.BaseType == X86ISelAddressMode::RegBase &&
1892  AM.Base_Reg.getNode() == nullptr &&
1893  (!Subtarget->is64Bit() || isDispSafeForFrameIndex(AM.Disp))) {
1894  AM.BaseType = X86ISelAddressMode::FrameIndexBase;
1895  AM.Base_FrameIndex = cast<FrameIndexSDNode>(N)->getIndex();
1896  return false;
1897  }
1898  break;
1899 
1900  case ISD::SHL:
1901  if (AM.IndexReg.getNode() != nullptr || AM.Scale != 1)
1902  break;
1903 
1904  if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1905  unsigned Val = CN->getZExtValue();
1906  // Note that we handle x<<1 as (,x,2) rather than (x,x) here so
1907  // that the base operand remains free for further matching. If
1908  // the base doesn't end up getting used, a post-processing step
1909  // in MatchAddress turns (,x,2) into (x,x), which is cheaper.
1910  if (Val == 1 || Val == 2 || Val == 3) {
1911  AM.Scale = 1 << Val;
1912  SDValue ShVal = N.getOperand(0);
1913 
1914  // Okay, we know that we have a scale by now. However, if the scaled
1915  // value is an add of something and a constant, we can fold the
1916  // constant into the disp field here.
1917  if (CurDAG->isBaseWithConstantOffset(ShVal)) {
1918  AM.IndexReg = ShVal.getOperand(0);
1919  ConstantSDNode *AddVal = cast<ConstantSDNode>(ShVal.getOperand(1));
1920  uint64_t Disp = (uint64_t)AddVal->getSExtValue() << Val;
1921  if (!foldOffsetIntoAddress(Disp, AM))
1922  return false;
1923  }
1924 
1925  AM.IndexReg = ShVal;
1926  return false;
1927  }
1928  }
1929  break;
1930 
1931  case ISD::SRL: {
1932  // Scale must not be used already.
1933  if (AM.IndexReg.getNode() != nullptr || AM.Scale != 1) break;
1934 
1935  // We only handle up to 64-bit values here as those are what matter for
1936  // addressing mode optimizations.
1937  assert(N.getSimpleValueType().getSizeInBits() <= 64 &&
1938  "Unexpected value size!");
1939 
1940  SDValue And = N.getOperand(0);
1941  if (And.getOpcode() != ISD::AND) break;
1942  SDValue X = And.getOperand(0);
1943 
1944  // The mask used for the transform is expected to be post-shift, but we
1945  // found the shift first so just apply the shift to the mask before passing
1946  // it down.
1947  if (!isa<ConstantSDNode>(N.getOperand(1)) ||
1948  !isa<ConstantSDNode>(And.getOperand(1)))
1949  break;
1950  uint64_t Mask = And.getConstantOperandVal(1) >> N.getConstantOperandVal(1);
1951 
1952  // Try to fold the mask and shift into the scale, and return false if we
1953  // succeed.
1954  if (!foldMaskAndShiftToScale(*CurDAG, N, Mask, N, X, AM))
1955  return false;
1956  break;
1957  }
1958 
1959  case ISD::SMUL_LOHI:
1960  case ISD::UMUL_LOHI:
1961  // A mul_lohi where we need the low part can be folded as a plain multiply.
1962  if (N.getResNo() != 0) break;
1964  case ISD::MUL:
1965  case X86ISD::MUL_IMM:
1966  // X*[3,5,9] -> X+X*[2,4,8]
1967  if (AM.BaseType == X86ISelAddressMode::RegBase &&
1968  AM.Base_Reg.getNode() == nullptr &&
1969  AM.IndexReg.getNode() == nullptr) {
1970  if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N.getOperand(1)))
1971  if (CN->getZExtValue() == 3 || CN->getZExtValue() == 5 ||
1972  CN->getZExtValue() == 9) {
1973  AM.Scale = unsigned(CN->getZExtValue())-1;
1974 
1975  SDValue MulVal = N.getOperand(0);
1976  SDValue Reg;
1977 
1978  // Okay, we know that we have a scale by now. However, if the scaled
1979  // value is an add of something and a constant, we can fold the
1980  // constant into the disp field here.
1981  if (MulVal.getNode()->getOpcode() == ISD::ADD && MulVal.hasOneUse() &&
1982  isa<ConstantSDNode>(MulVal.getOperand(1))) {
1983  Reg = MulVal.getOperand(0);
1984  ConstantSDNode *AddVal =
1985  cast<ConstantSDNode>(MulVal.getOperand(1));
1986  uint64_t Disp = AddVal->getSExtValue() * CN->getZExtValue();
1987  if (foldOffsetIntoAddress(Disp, AM))
1988  Reg = N.getOperand(0);
1989  } else {
1990  Reg = N.getOperand(0);
1991  }
1992 
1993  AM.IndexReg = AM.Base_Reg = Reg;
1994  return false;
1995  }
1996  }
1997  break;
1998 
1999  case ISD::SUB: {
2000  // Given A-B, if A can be completely folded into the address and
2001  // the index field with the index field unused, use -B as the index.
2002  // This is a win if a has multiple parts that can be folded into
2003  // the address. Also, this saves a mov if the base register has
2004  // other uses, since it avoids a two-address sub instruction, however
2005  // it costs an additional mov if the index register has other uses.
2006 
2007  // Add an artificial use to this node so that we can keep track of
2008  // it if it gets CSE'd with a different node.
2009  HandleSDNode Handle(N);
2010 
2011  // Test if the LHS of the sub can be folded.
2012  X86ISelAddressMode Backup = AM;
2013  if (matchAddressRecursively(N.getOperand(0), AM, Depth+1)) {
2014  N = Handle.getValue();
2015  AM = Backup;
2016  break;
2017  }
2018  N = Handle.getValue();
2019  // Test if the index field is free for use.
2020  if (AM.IndexReg.getNode() || AM.isRIPRelative()) {
2021  AM = Backup;
2022  break;
2023  }
2024 
2025  int Cost = 0;
2026  SDValue RHS = N.getOperand(1);
2027  // If the RHS involves a register with multiple uses, this
2028  // transformation incurs an extra mov, due to the neg instruction
2029  // clobbering its operand.
2030  if (!RHS.getNode()->hasOneUse() ||
2031  RHS.getNode()->getOpcode() == ISD::CopyFromReg ||
2032  RHS.getNode()->getOpcode() == ISD::TRUNCATE ||
2033  RHS.getNode()->getOpcode() == ISD::ANY_EXTEND ||
2034  (RHS.getNode()->getOpcode() == ISD::ZERO_EXTEND &&
2035  RHS.getOperand(0).getValueType() == MVT::i32))
2036  ++Cost;
2037  // If the base is a register with multiple uses, this
2038  // transformation may save a mov.
2039  if ((AM.BaseType == X86ISelAddressMode::RegBase && AM.Base_Reg.getNode() &&
2040  !AM.Base_Reg.getNode()->hasOneUse()) ||
2041  AM.BaseType == X86ISelAddressMode::FrameIndexBase)
2042  --Cost;
2043  // If the folded LHS was interesting, this transformation saves
2044  // address arithmetic.
2045  if ((AM.hasSymbolicDisplacement() && !Backup.hasSymbolicDisplacement()) +
2046  ((AM.Disp != 0) && (Backup.Disp == 0)) +
2047  (AM.Segment.getNode() && !Backup.Segment.getNode()) >= 2)
2048  --Cost;
2049  // If it doesn't look like it may be an overall win, don't do it.
2050  if (Cost >= 0) {
2051  AM = Backup;
2052  break;
2053  }
2054 
2055  // Ok, the transformation is legal and appears profitable. Go for it.
2056  // Negation will be emitted later to avoid creating dangling nodes if this
2057  // was an unprofitable LEA.
2058  AM.IndexReg = RHS;
2059  AM.NegateIndex = true;
2060  AM.Scale = 1;
2061  return false;
2062  }
2063 
2064  case ISD::ADD:
2065  if (!matchAdd(N, AM, Depth))
2066  return false;
2067  break;
2068 
2069  case ISD::OR:
2070  // We want to look through a transform in InstCombine and DAGCombiner that
2071  // turns 'add' into 'or', so we can treat this 'or' exactly like an 'add'.
2072  // Example: (or (and x, 1), (shl y, 3)) --> (add (and x, 1), (shl y, 3))
2073  // An 'lea' can then be used to match the shift (multiply) and add:
2074  // and $1, %esi
2075  // lea (%rsi, %rdi, 8), %rax
2076  if (CurDAG->haveNoCommonBitsSet(N.getOperand(0), N.getOperand(1)) &&
2077  !matchAdd(N, AM, Depth))
2078  return false;
2079  break;
2080 
2081  case ISD::AND: {
2082  // Perform some heroic transforms on an and of a constant-count shift
2083  // with a constant to enable use of the scaled offset field.
2084 
2085  // Scale must not be used already.
2086  if (AM.IndexReg.getNode() != nullptr || AM.Scale != 1) break;
2087 
2088  // We only handle up to 64-bit values here as those are what matter for
2089  // addressing mode optimizations.
2090  assert(N.getSimpleValueType().getSizeInBits() <= 64 &&
2091  "Unexpected value size!");
2092 
2093  if (!isa<ConstantSDNode>(N.getOperand(1)))
2094  break;
2095 
2096  if (N.getOperand(0).getOpcode() == ISD::SRL) {
2097  SDValue Shift = N.getOperand(0);
2098  SDValue X = Shift.getOperand(0);
2099 
2100  uint64_t Mask = N.getConstantOperandVal(1);
2101 
2102  // Try to fold the mask and shift into an extract and scale.
2103  if (!foldMaskAndShiftToExtract(*CurDAG, N, Mask, Shift, X, AM))
2104  return false;
2105 
2106  // Try to fold the mask and shift directly into the scale.
2107  if (!foldMaskAndShiftToScale(*CurDAG, N, Mask, Shift, X, AM))
2108  return false;
2109 
2110  // Try to fold the mask and shift into BEXTR and scale.
2111  if (!foldMaskedShiftToBEXTR(*CurDAG, N, Mask, Shift, X, AM, *Subtarget))
2112  return false;
2113  }
2114 
2115  // Try to swap the mask and shift to place shifts which can be done as
2116  // a scale on the outside of the mask.
2117  if (!foldMaskedShiftToScaledMask(*CurDAG, N, AM))
2118  return false;
2119 
2120  break;
2121  }
2122  case ISD::ZERO_EXTEND: {
2123  // Try to widen a zexted shift left to the same size as its use, so we can
2124  // match the shift as a scale factor.
2125  if (AM.IndexReg.getNode() != nullptr || AM.Scale != 1)
2126  break;
2127  if (N.getOperand(0).getOpcode() != ISD::SHL || !N.getOperand(0).hasOneUse())
2128  break;
2129 
2130  // Give up if the shift is not a valid scale factor [1,2,3].
2131  SDValue Shl = N.getOperand(0);
2132  auto *ShAmtC = dyn_cast<ConstantSDNode>(Shl.getOperand(1));
2133  if (!ShAmtC || ShAmtC->getZExtValue() > 3)
2134  break;
2135 
2136  // The narrow shift must only shift out zero bits (it must be 'nuw').
2137  // That makes it safe to widen to the destination type.
2139  ShAmtC->getZExtValue());
2140  if (!CurDAG->MaskedValueIsZero(Shl.getOperand(0), HighZeros))
2141  break;
2142 
2143  // zext (shl nuw i8 %x, C) to i32 --> shl (zext i8 %x to i32), (zext C)
2144  MVT VT = N.getSimpleValueType();
2145  SDLoc DL(N);
2146  SDValue Zext = CurDAG->getNode(ISD::ZERO_EXTEND, DL, VT, Shl.getOperand(0));
2147  SDValue NewShl = CurDAG->getNode(ISD::SHL, DL, VT, Zext, Shl.getOperand(1));
2148 
2149  // Convert the shift to scale factor.
2150  AM.Scale = 1 << ShAmtC->getZExtValue();
2151  AM.IndexReg = Zext;
2152 
2153  insertDAGNode(*CurDAG, N, Zext);
2154  insertDAGNode(*CurDAG, N, NewShl);
2155  CurDAG->ReplaceAllUsesWith(N, NewShl);
2156  CurDAG->RemoveDeadNode(N.getNode());
2157  return false;
2158  }
2159  }
2160 
2161  return matchAddressBase(N, AM);
2162 }
2163 
2164 /// Helper for MatchAddress. Add the specified node to the
2165 /// specified addressing mode without any further recursion.
2166 bool X86DAGToDAGISel::matchAddressBase(SDValue N, X86ISelAddressMode &AM) {
2167  // Is the base register already occupied?
2168  if (AM.BaseType != X86ISelAddressMode::RegBase || AM.Base_Reg.getNode()) {
2169  // If so, check to see if the scale index register is set.
2170  if (!AM.IndexReg.getNode()) {
2171  AM.IndexReg = N;
2172  AM.Scale = 1;
2173  return false;
2174  }
2175 
2176  // Otherwise, we cannot select it.
2177  return true;
2178  }
2179 
2180  // Default, generate it as a register.
2181  AM.BaseType = X86ISelAddressMode::RegBase;
2182  AM.Base_Reg = N;
2183  return false;
2184 }
2185 
2186 /// Helper for selectVectorAddr. Handles things that can be folded into a
2187 /// gather scatter address. The index register and scale should have already
2188 /// been handled.
2189 bool X86DAGToDAGISel::matchVectorAddress(SDValue N, X86ISelAddressMode &AM) {
2190  // TODO: Support other operations.
2191  switch (N.getOpcode()) {
2192  case ISD::Constant: {
2193  uint64_t Val = cast<ConstantSDNode>(N)->getSExtValue();
2194  if (!foldOffsetIntoAddress(Val, AM))
2195  return false;
2196  break;
2197  }
2198  case X86ISD::Wrapper:
2199  if (!matchWrapper(N, AM))
2200  return false;
2201  break;
2202  }
2203 
2204  return matchAddressBase(N, AM);
2205 }
2206 
2207 bool X86DAGToDAGISel::selectVectorAddr(SDNode *Parent, SDValue N, SDValue &Base,
2208  SDValue &Scale, SDValue &Index,
2209  SDValue &Disp, SDValue &Segment) {
2210  X86ISelAddressMode AM;
2211  auto *Mgs = cast<X86MaskedGatherScatterSDNode>(Parent);
2212  AM.IndexReg = Mgs->getIndex();
2213  AM.Scale = cast<ConstantSDNode>(Mgs->getScale())->getZExtValue();
2214 
2215  unsigned AddrSpace = cast<MemSDNode>(Parent)->getPointerInfo().getAddrSpace();
2216  // AddrSpace 256 -> GS, 257 -> FS, 258 -> SS.
2217  if (AddrSpace == 256)
2218  AM.Segment = CurDAG->getRegister(X86::GS, MVT::i16);
2219  if (AddrSpace == 257)
2220  AM.Segment = CurDAG->getRegister(X86::FS, MVT::i16);
2221  if (AddrSpace == 258)
2222  AM.Segment = CurDAG->getRegister(X86::SS, MVT::i16);
2223 
2224  SDLoc DL(N);
2225  MVT VT = N.getSimpleValueType();
2226 
2227  // Try to match into the base and displacement fields.
2228  if (matchVectorAddress(N, AM))
2229  return false;
2230 
2231  getAddressOperands(AM, DL, VT, Base, Scale, Index, Disp, Segment);
2232  return true;
2233 }
2234 
2235 /// Returns true if it is able to pattern match an addressing mode.
2236 /// It returns the operands which make up the maximal addressing mode it can
2237 /// match by reference.
2238 ///
2239 /// Parent is the parent node of the addr operand that is being matched. It
2240 /// is always a load, store, atomic node, or null. It is only null when
2241 /// checking memory operands for inline asm nodes.
2242 bool X86DAGToDAGISel::selectAddr(SDNode *Parent, SDValue N, SDValue &Base,
2243  SDValue &Scale, SDValue &Index,
2244  SDValue &Disp, SDValue &Segment) {
2245  X86ISelAddressMode AM;
2246 
2247  if (Parent &&
2248  // This list of opcodes are all the nodes that have an "addr:$ptr" operand
2249  // that are not a MemSDNode, and thus don't have proper addrspace info.
2250  Parent->getOpcode() != ISD::INTRINSIC_W_CHAIN && // unaligned loads, fixme
2251  Parent->getOpcode() != ISD::INTRINSIC_VOID && // nontemporal stores
2252  Parent->getOpcode() != X86ISD::TLSCALL && // Fixme
2253  Parent->getOpcode() != X86ISD::ENQCMD && // Fixme
2254  Parent->getOpcode() != X86ISD::ENQCMDS && // Fixme
2255  Parent->getOpcode() != X86ISD::EH_SJLJ_SETJMP && // setjmp
2256  Parent->getOpcode() != X86ISD::EH_SJLJ_LONGJMP) { // longjmp
2257  unsigned AddrSpace =
2258  cast<MemSDNode>(Parent)->getPointerInfo().getAddrSpace();
2259  // AddrSpace 256 -> GS, 257 -> FS, 258 -> SS.
2260  if (AddrSpace == 256)
2261  AM.Segment = CurDAG->getRegister(X86::GS, MVT::i16);
2262  if (AddrSpace == 257)
2263  AM.Segment = CurDAG->getRegister(X86::FS, MVT::i16);
2264  if (AddrSpace == 258)
2265  AM.Segment = CurDAG->getRegister(X86::SS, MVT::i16);
2266  }
2267 
2268  // Save the DL and VT before calling matchAddress, it can invalidate N.
2269  SDLoc DL(N);
2270  MVT VT = N.getSimpleValueType();
2271 
2272  if (matchAddress(N, AM))
2273  return false;
2274 
2275  getAddressOperands(AM, DL, VT, Base, Scale, Index, Disp, Segment);
2276  return true;
2277 }
2278 
2279 // We can only fold a load if all nodes between it and the root node have a
2280 // single use. If there are additional uses, we could end up duplicating the
2281 // load.
2282 static bool hasSingleUsesFromRoot(SDNode *Root, SDNode *User) {
2283  while (User != Root) {
2284  if (!User->hasOneUse())
2285  return false;
2286  User = *User->use_begin();
2287  }
2288 
2289  return true;
2290 }
2291 
2292 /// Match a scalar SSE load. In particular, we want to match a load whose top
2293 /// elements are either undef or zeros. The load flavor is derived from the
2294 /// type of N, which is either v4f32 or v2f64.
2295 ///
2296 /// We also return:
2297 /// PatternChainNode: this is the matched node that has a chain input and
2298 /// output.
2299 bool X86DAGToDAGISel::selectScalarSSELoad(SDNode *Root, SDNode *Parent,
2300  SDValue N, SDValue &Base,
2301  SDValue &Scale, SDValue &Index,
2302  SDValue &Disp, SDValue &Segment,
2303  SDValue &PatternNodeWithChain) {
2304  if (!hasSingleUsesFromRoot(Root, Parent))
2305  return false;
2306 
2307  // We can allow a full vector load here since narrowing a load is ok unless
2308  // it's volatile.
2309  if (ISD::isNON_EXTLoad(N.getNode())) {
2310  LoadSDNode *LD = cast<LoadSDNode>(N);
2311  if (!LD->isVolatile() &&
2312  IsProfitableToFold(N, LD, Root) &&
2313  IsLegalToFold(N, Parent, Root, OptLevel)) {
2314  PatternNodeWithChain = N;
2315  return selectAddr(LD, LD->getBasePtr(), Base, Scale, Index, Disp,
2316  Segment);
2317  }
2318  }
2319 
2320  // We can also match the special zero extended load opcode.
2321  if (N.getOpcode() == X86ISD::VZEXT_LOAD) {
2322  PatternNodeWithChain = N;
2323  if (IsProfitableToFold(PatternNodeWithChain, N.getNode(), Root) &&
2324  IsLegalToFold(PatternNodeWithChain, Parent, Root, OptLevel)) {
2325  auto *MI = cast<MemIntrinsicSDNode>(PatternNodeWithChain);
2326  return selectAddr(MI, MI->getBasePtr(), Base, Scale, Index, Disp,
2327  Segment);
2328  }
2329  }
2330 
2331  // Need to make sure that the SCALAR_TO_VECTOR and load are both only used
2332  // once. Otherwise the load might get duplicated and the chain output of the
2333  // duplicate load will not be observed by all dependencies.
2334  if (N.getOpcode() == ISD::SCALAR_TO_VECTOR && N.getNode()->hasOneUse()) {
2335  PatternNodeWithChain = N.getOperand(0);
2336  if (ISD::isNON_EXTLoad(PatternNodeWithChain.getNode()) &&
2337  IsProfitableToFold(PatternNodeWithChain, N.getNode(), Root) &&
2338  IsLegalToFold(PatternNodeWithChain, N.getNode(), Root, OptLevel)) {
2339  LoadSDNode *LD = cast<LoadSDNode>(PatternNodeWithChain);
2340  return selectAddr(LD, LD->getBasePtr(), Base, Scale, Index, Disp,
2341  Segment);
2342  }
2343  }
2344 
2345  return false;
2346 }
2347 
2348 
2349 bool X86DAGToDAGISel::selectMOV64Imm32(SDValue N, SDValue &Imm) {
2350  if (const ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) {
2351  uint64_t ImmVal = CN->getZExtValue();
2352  if (!isUInt<32>(ImmVal))
2353  return false;
2354 
2355  Imm = CurDAG->getTargetConstant(ImmVal, SDLoc(N), MVT::i64);
2356  return true;
2357  }
2358 
2359  // In static codegen with small code model, we can get the address of a label
2360  // into a register with 'movl'
2361  if (N->getOpcode() != X86ISD::Wrapper)
2362  return false;
2363 
2364  N = N.getOperand(0);
2365 
2366  // At least GNU as does not accept 'movl' for TPOFF relocations.
2367  // FIXME: We could use 'movl' when we know we are targeting MC.
2369  return false;
2370 
2371  Imm = N;
2372  if (N->getOpcode() != ISD::TargetGlobalAddress)
2373  return TM.getCodeModel() == CodeModel::Small;
2374 
2376  cast<GlobalAddressSDNode>(N)->getGlobal()->getAbsoluteSymbolRange();
2377  if (!CR)
2378  return TM.getCodeModel() == CodeModel::Small;
2379 
2380  return CR->getUnsignedMax().ult(1ull << 32);
2381 }
2382 
2383 bool X86DAGToDAGISel::selectLEA64_32Addr(SDValue N, SDValue &Base,
2384  SDValue &Scale, SDValue &Index,
2385  SDValue &Disp, SDValue &Segment) {
2386  // Save the debug loc before calling selectLEAAddr, in case it invalidates N.
2387  SDLoc DL(N);
2388 
2389  if (!selectLEAAddr(N, Base, Scale, Index, Disp, Segment))
2390  return false;
2391 
2393  if (RN && RN->getReg() == 0)
2394  Base = CurDAG->getRegister(0, MVT::i64);
2395  else if (Base.getValueType() == MVT::i32 && !isa<FrameIndexSDNode>(Base)) {
2396  // Base could already be %rip, particularly in the x32 ABI.
2397  SDValue ImplDef = SDValue(CurDAG->getMachineNode(X86::IMPLICIT_DEF, DL,
2398  MVT::i64), 0);
2399  Base = CurDAG->getTargetInsertSubreg(X86::sub_32bit, DL, MVT::i64, ImplDef,
2400  Base);
2401  }
2402 
2403  RN = dyn_cast<RegisterSDNode>(Index);
2404  if (RN && RN->getReg() == 0)
2405  Index = CurDAG->getRegister(0, MVT::i64);
2406  else {
2407  assert(Index.getValueType() == MVT::i32 &&
2408  "Expect to be extending 32-bit registers for use in LEA");
2409  SDValue ImplDef = SDValue(CurDAG->getMachineNode(X86::IMPLICIT_DEF, DL,
2410  MVT::i64), 0);
2411  Index = CurDAG->getTargetInsertSubreg(X86::sub_32bit, DL, MVT::i64, ImplDef,
2412  Index);
2413  }
2414 
2415  return true;
2416 }
2417 
2418 /// Calls SelectAddr and determines if the maximal addressing
2419 /// mode it matches can be cost effectively emitted as an LEA instruction.
2420 bool X86DAGToDAGISel::selectLEAAddr(SDValue N,
2421  SDValue &Base, SDValue &Scale,
2422  SDValue &Index, SDValue &Disp,
2423  SDValue &Segment) {
2424  X86ISelAddressMode AM;
2425 
2426  // Save the DL and VT before calling matchAddress, it can invalidate N.
2427  SDLoc DL(N);
2428  MVT VT = N.getSimpleValueType();
2429 
2430  // Set AM.Segment to prevent MatchAddress from using one. LEA doesn't support
2431  // segments.
2432  SDValue Copy = AM.Segment;
2433  SDValue T = CurDAG->getRegister(0, MVT::i32);
2434  AM.Segment = T;
2435  if (matchAddress(N, AM))
2436  return false;
2437  assert (T == AM.Segment);
2438  AM.Segment = Copy;
2439 
2440  unsigned Complexity = 0;
2441  if (AM.BaseType == X86ISelAddressMode::RegBase && AM.Base_Reg.getNode())
2442  Complexity = 1;
2443  else if (AM.BaseType == X86ISelAddressMode::FrameIndexBase)
2444  Complexity = 4;
2445 
2446  if (AM.IndexReg.getNode())
2447  Complexity++;
2448 
2449  // Don't match just leal(,%reg,2). It's cheaper to do addl %reg, %reg, or with
2450  // a simple shift.
2451  if (AM.Scale > 1)
2452  Complexity++;
2453 
2454  // FIXME: We are artificially lowering the criteria to turn ADD %reg, $GA
2455  // to a LEA. This is determined with some experimentation but is by no means
2456  // optimal (especially for code size consideration). LEA is nice because of
2457  // its three-address nature. Tweak the cost function again when we can run
2458  // convertToThreeAddress() at register allocation time.
2459  if (AM.hasSymbolicDisplacement()) {
2460  // For X86-64, always use LEA to materialize RIP-relative addresses.
2461  if (Subtarget->is64Bit())
2462  Complexity = 4;
2463  else
2464  Complexity += 2;
2465  }
2466 
2467  // Heuristic: try harder to form an LEA from ADD if the operands set flags.
2468  // Unlike ADD, LEA does not affect flags, so we will be less likely to require
2469  // duplicating flag-producing instructions later in the pipeline.
2470  if (N.getOpcode() == ISD::ADD) {
2471  auto isMathWithFlags = [](SDValue V) {
2472  switch (V.getOpcode()) {
2473  case X86ISD::ADD:
2474  case X86ISD::SUB:
2475  case X86ISD::ADC:
2476  case X86ISD::SBB:
2477  /* TODO: These opcodes can be added safely, but we may want to justify
2478  their inclusion for different reasons (better for reg-alloc).
2479  case X86ISD::SMUL:
2480  case X86ISD::UMUL:
2481  case X86ISD::OR:
2482  case X86ISD::XOR:
2483  case X86ISD::AND:
2484  */
2485  // Value 1 is the flag output of the node - verify it's not dead.
2486  return !SDValue(V.getNode(), 1).use_empty();
2487  default:
2488  return false;
2489  }
2490  };
2491  // TODO: This could be an 'or' rather than 'and' to make the transform more
2492  // likely to happen. We might want to factor in whether there's a
2493  // load folding opportunity for the math op that disappears with LEA.
2494  if (isMathWithFlags(N.getOperand(0)) && isMathWithFlags(N.getOperand(1)))
2495  Complexity++;
2496  }
2497 
2498  if (AM.Disp)
2499  Complexity++;
2500 
2501  // If it isn't worth using an LEA, reject it.
2502  if (Complexity <= 2)
2503  return false;
2504 
2505  getAddressOperands(AM, DL, VT, Base, Scale, Index, Disp, Segment);
2506  return true;
2507 }
2508 
2509 /// This is only run on TargetGlobalTLSAddress nodes.
2510 bool X86DAGToDAGISel::selectTLSADDRAddr(SDValue N, SDValue &Base,
2511  SDValue &Scale, SDValue &Index,
2512  SDValue &Disp, SDValue &Segment) {
2514  const GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(N);
2515 
2516  X86ISelAddressMode AM;
2517  AM.GV = GA->getGlobal();
2518  AM.Disp += GA->getOffset();
2519  AM.SymbolFlags = GA->getTargetFlags();
2520 
2521  MVT VT = N.getSimpleValueType();
2522  if (VT == MVT::i32) {
2523  AM.Scale = 1;
2524  AM.IndexReg = CurDAG->getRegister(X86::EBX, MVT::i32);
2525  }
2526 
2527  getAddressOperands(AM, SDLoc(N), VT, Base, Scale, Index, Disp, Segment);
2528  return true;
2529 }
2530 
2531 bool X86DAGToDAGISel::selectRelocImm(SDValue N, SDValue &Op) {
2532  if (auto *CN = dyn_cast<ConstantSDNode>(N)) {
2533  Op = CurDAG->getTargetConstant(CN->getAPIntValue(), SDLoc(CN),
2534  N.getValueType());
2535  return true;
2536  }
2537 
2538  // Keep track of the original value type and whether this value was
2539  // truncated. If we see a truncation from pointer type to VT that truncates
2540  // bits that are known to be zero, we can use a narrow reference.
2541  EVT VT = N.getValueType();
2542  bool WasTruncated = false;
2543  if (N.getOpcode() == ISD::TRUNCATE) {
2544  WasTruncated = true;
2545  N = N.getOperand(0);
2546  }
2547 
2548  if (N.getOpcode() != X86ISD::Wrapper)
2549  return false;
2550 
2551  // We can only use non-GlobalValues as immediates if they were not truncated,
2552  // as we do not have any range information. If we have a GlobalValue and the
2553  // address was not truncated, we can select it as an operand directly.
2554  unsigned Opc = N.getOperand(0)->getOpcode();
2555  if (Opc != ISD::TargetGlobalAddress || !WasTruncated) {
2556  Op = N.getOperand(0);
2557  // We can only select the operand directly if we didn't have to look past a
2558  // truncate.
2559  return !WasTruncated;
2560  }
2561 
2562  // Check that the global's range fits into VT.
2563  auto *GA = cast<GlobalAddressSDNode>(N.getOperand(0));
2564  Optional<ConstantRange> CR = GA->getGlobal()->getAbsoluteSymbolRange();
2565  if (!CR || CR->getUnsignedMax().uge(1ull << VT.getSizeInBits()))
2566  return false;
2567 
2568  // Okay, we can use a narrow reference.
2569  Op = CurDAG->getTargetGlobalAddress(GA->getGlobal(), SDLoc(N), VT,
2570  GA->getOffset(), GA->getTargetFlags());
2571  return true;
2572 }
2573 
2574 bool X86DAGToDAGISel::tryFoldLoad(SDNode *Root, SDNode *P, SDValue N,
2575  SDValue &Base, SDValue &Scale,
2576  SDValue &Index, SDValue &Disp,
2577  SDValue &Segment) {
2578  if (!ISD::isNON_EXTLoad(N.getNode()) ||
2579  !IsProfitableToFold(N, P, Root) ||
2580  !IsLegalToFold(N, P, Root, OptLevel))
2581  return false;
2582 
2583  return selectAddr(N.getNode(),
2584  N.getOperand(1), Base, Scale, Index, Disp, Segment);
2585 }
2586 
2587 /// Return an SDNode that returns the value of the global base register.
2588 /// Output instructions required to initialize the global base register,
2589 /// if necessary.
2590 SDNode *X86DAGToDAGISel::getGlobalBaseReg() {
2591  unsigned GlobalBaseReg = getInstrInfo()->getGlobalBaseReg(MF);
2592  auto &DL = MF->getDataLayout();
2593  return CurDAG->getRegister(GlobalBaseReg, TLI->getPointerTy(DL)).getNode();
2594 }
2595 
2596 bool X86DAGToDAGISel::isSExtAbsoluteSymbolRef(unsigned Width, SDNode *N) const {
2597  if (N->getOpcode() == ISD::TRUNCATE)
2598  N = N->getOperand(0).getNode();
2599  if (N->getOpcode() != X86ISD::Wrapper)
2600  return false;
2601 
2602  auto *GA = dyn_cast<GlobalAddressSDNode>(N->getOperand(0));
2603  if (!GA)
2604  return false;
2605 
2606  Optional<ConstantRange> CR = GA->getGlobal()->getAbsoluteSymbolRange();
2607  return CR && CR->getSignedMin().sge(-1ull << Width) &&
2608  CR->getSignedMax().slt(1ull << Width);
2609 }
2610 
2612  assert(N->isMachineOpcode() && "Unexpected node");
2614  unsigned Opc = N->getMachineOpcode();
2615  if (Opc == X86::JCC_1)
2616  CC = static_cast<X86::CondCode>(N->getConstantOperandVal(1));
2617  else if (Opc == X86::SETCCr)
2618  CC = static_cast<X86::CondCode>(N->getConstantOperandVal(0));
2619  else if (Opc == X86::SETCCm)
2620  CC = static_cast<X86::CondCode>(N->getConstantOperandVal(5));
2621  else if (Opc == X86::CMOV16rr || Opc == X86::CMOV32rr ||
2622  Opc == X86::CMOV64rr)
2623  CC = static_cast<X86::CondCode>(N->getConstantOperandVal(2));
2624  else if (Opc == X86::CMOV16rm || Opc == X86::CMOV32rm ||
2625  Opc == X86::CMOV64rm)
2626  CC = static_cast<X86::CondCode>(N->getConstantOperandVal(6));
2627 
2628  return CC;
2629 }
2630 
2631 /// Test whether the given X86ISD::CMP node has any users that use a flag
2632 /// other than ZF.
2633 bool X86DAGToDAGISel::onlyUsesZeroFlag(SDValue Flags) const {
2634  // Examine each user of the node.
2635  for (SDNode::use_iterator UI = Flags->use_begin(), UE = Flags->use_end();
2636  UI != UE; ++UI) {
2637  // Only check things that use the flags.
2638  if (UI.getUse().getResNo() != Flags.getResNo())
2639  continue;
2640  // Only examine CopyToReg uses that copy to EFLAGS.
2641  if (UI->getOpcode() != ISD::CopyToReg ||
2642  cast<RegisterSDNode>(UI->getOperand(1))->getReg() != X86::EFLAGS)
2643  return false;
2644  // Examine each user of the CopyToReg use.
2645  for (SDNode::use_iterator FlagUI = UI->use_begin(),
2646  FlagUE = UI->use_end(); FlagUI != FlagUE; ++FlagUI) {
2647  // Only examine the Flag result.
2648  if (FlagUI.getUse().getResNo() != 1) continue;
2649  // Anything unusual: assume conservatively.
2650  if (!FlagUI->isMachineOpcode()) return false;
2651  // Examine the condition code of the user.
2652  X86::CondCode CC = getCondFromNode(*FlagUI);
2653 
2654  switch (CC) {
2655  // Comparisons which only use the zero flag.
2656  case X86::COND_E: case X86::COND_NE:
2657  continue;
2658  // Anything else: assume conservatively.
2659  default:
2660  return false;
2661  }
2662  }
2663  }
2664  return true;
2665 }
2666 
2667 /// Test whether the given X86ISD::CMP node has any uses which require the SF
2668 /// flag to be accurate.
2669 bool X86DAGToDAGISel::hasNoSignFlagUses(SDValue Flags) const {
2670  // Examine each user of the node.
2671  for (SDNode::use_iterator UI = Flags->use_begin(), UE = Flags->use_end();
2672  UI != UE; ++UI) {
2673  // Only check things that use the flags.
2674  if (UI.getUse().getResNo() != Flags.getResNo())
2675  continue;
2676  // Only examine CopyToReg uses that copy to EFLAGS.
2677  if (UI->getOpcode() != ISD::CopyToReg ||
2678  cast<RegisterSDNode>(UI->getOperand(1))->getReg() != X86::EFLAGS)
2679  return false;
2680  // Examine each user of the CopyToReg use.
2681  for (SDNode::use_iterator FlagUI = UI->use_begin(),
2682  FlagUE = UI->use_end(); FlagUI != FlagUE; ++FlagUI) {
2683  // Only examine the Flag result.
2684  if (FlagUI.getUse().getResNo() != 1) continue;
2685  // Anything unusual: assume conservatively.
2686  if (!FlagUI->isMachineOpcode()) return false;
2687  // Examine the condition code of the user.
2688  X86::CondCode CC = getCondFromNode(*FlagUI);
2689 
2690  switch (CC) {
2691  // Comparisons which don't examine the SF flag.
2692  case X86::COND_A: case X86::COND_AE:
2693  case X86::COND_B: case X86::COND_BE:
2694  case X86::COND_E: case X86::COND_NE:
2695  case X86::COND_O: case X86::COND_NO:
2696  case X86::COND_P: case X86::COND_NP:
2697  continue;
2698  // Anything else: assume conservatively.
2699  default:
2700  return false;
2701  }
2702  }
2703  }
2704  return true;
2705 }
2706 
2708  switch (CC) {
2709  // Comparisons which don't examine the CF flag.
2710  case X86::COND_O: case X86::COND_NO:
2711  case X86::COND_E: case X86::COND_NE:
2712  case X86::COND_S: case X86::COND_NS:
2713  case X86::COND_P: case X86::COND_NP:
2714  case X86::COND_L: case X86::COND_GE:
2715  case X86::COND_G: case X86::COND_LE:
2716  return false;
2717  // Anything else: assume conservatively.
2718  default:
2719  return true;
2720  }
2721 }
2722 
2723 /// Test whether the given node which sets flags has any uses which require the
2724 /// CF flag to be accurate.
2725  bool X86DAGToDAGISel::hasNoCarryFlagUses(SDValue Flags) const {
2726  // Examine each user of the node.
2727  for (SDNode::use_iterator UI = Flags->use_begin(), UE = Flags->use_end();
2728  UI != UE; ++UI) {
2729  // Only check things that use the flags.
2730  if (UI.getUse().getResNo() != Flags.getResNo())
2731  continue;
2732 
2733  unsigned UIOpc = UI->getOpcode();
2734 
2735  if (UIOpc == ISD::CopyToReg) {
2736  // Only examine CopyToReg uses that copy to EFLAGS.
2737  if (cast<RegisterSDNode>(UI->getOperand(1))->getReg() != X86::EFLAGS)
2738  return false;
2739  // Examine each user of the CopyToReg use.
2740  for (SDNode::use_iterator FlagUI = UI->use_begin(), FlagUE = UI->use_end();
2741  FlagUI != FlagUE; ++FlagUI) {
2742  // Only examine the Flag result.
2743  if (FlagUI.getUse().getResNo() != 1)
2744  continue;
2745  // Anything unusual: assume conservatively.
2746  if (!FlagUI->isMachineOpcode())
2747  return false;
2748  // Examine the condition code of the user.
2749  X86::CondCode CC = getCondFromNode(*FlagUI);
2750 
2751  if (mayUseCarryFlag(CC))
2752  return false;
2753  }
2754 
2755  // This CopyToReg is ok. Move on to the next user.
2756  continue;
2757  }
2758 
2759  // This might be an unselected node. So look for the pre-isel opcodes that
2760  // use flags.
2761  unsigned CCOpNo;
2762  switch (UIOpc) {
2763  default:
2764  // Something unusual. Be conservative.
2765  return false;
2766  case X86ISD::SETCC: CCOpNo = 0; break;
2767  case X86ISD::SETCC_CARRY: CCOpNo = 0; break;
2768  case X86ISD::CMOV: CCOpNo = 2; break;
2769  case X86ISD::BRCOND: CCOpNo = 2; break;
2770  }
2771 
2772  X86::CondCode CC = (X86::CondCode)UI->getConstantOperandVal(CCOpNo);
2773  if (mayUseCarryFlag(CC))
2774  return false;
2775  }
2776  return true;
2777 }
2778 
2779 /// Check whether or not the chain ending in StoreNode is suitable for doing
2780 /// the {load; op; store} to modify transformation.
2782  SDValue StoredVal, SelectionDAG *CurDAG,
2783  unsigned LoadOpNo,
2784  LoadSDNode *&LoadNode,
2785  SDValue &InputChain) {
2786  // Is the stored value result 0 of the operation?
2787  if (StoredVal.getResNo() != 0) return false;
2788 
2789  // Are there other uses of the operation other than the store?
2790  if (!StoredVal.getNode()->hasNUsesOfValue(1, 0)) return false;
2791 
2792  // Is the store non-extending and non-indexed?
2793  if (!ISD::isNormalStore(StoreNode) || StoreNode->isNonTemporal())
2794  return false;
2795 
2796  SDValue Load = StoredVal->getOperand(LoadOpNo);
2797  // Is the stored value a non-extending and non-indexed load?
2798  if (!ISD::isNormalLoad(Load.getNode())) return false;
2799 
2800  // Return LoadNode by reference.
2801  LoadNode = cast<LoadSDNode>(Load);
2802 
2803  // Is store the only read of the loaded value?
2804  if (!Load.hasOneUse())
2805  return false;
2806 
2807  // Is the address of the store the same as the load?
2808  if (LoadNode->getBasePtr() != StoreNode->getBasePtr() ||
2809  LoadNode->getOffset() != StoreNode->getOffset())
2810  return false;
2811 
2812  bool FoundLoad = false;
2813  SmallVector<SDValue, 4> ChainOps;
2814  SmallVector<const SDNode *, 4> LoopWorklist;
2816  const unsigned int Max = 1024;
2817 
2818  // Visualization of Load-Op-Store fusion:
2819  // -------------------------
2820  // Legend:
2821  // *-lines = Chain operand dependencies.
2822  // |-lines = Normal operand dependencies.
2823  // Dependencies flow down and right. n-suffix references multiple nodes.
2824  //
2825  // C Xn C
2826  // * * *
2827  // * * *
2828  // Xn A-LD Yn TF Yn
2829  // * * \ | * |
2830  // * * \ | * |
2831  // * * \ | => A--LD_OP_ST
2832  // * * \| \
2833  // TF OP \
2834  // * | \ Zn
2835  // * | \
2836  // A-ST Zn
2837  //
2838 
2839  // This merge induced dependences from: #1: Xn -> LD, OP, Zn
2840  // #2: Yn -> LD
2841  // #3: ST -> Zn
2842 
2843  // Ensure the transform is safe by checking for the dual
2844  // dependencies to make sure we do not induce a loop.
2845 
2846  // As LD is a predecessor to both OP and ST we can do this by checking:
2847  // a). if LD is a predecessor to a member of Xn or Yn.
2848  // b). if a Zn is a predecessor to ST.
2849 
2850  // However, (b) can only occur through being a chain predecessor to
2851  // ST, which is the same as Zn being a member or predecessor of Xn,
2852  // which is a subset of LD being a predecessor of Xn. So it's
2853  // subsumed by check (a).
2854 
2855  SDValue Chain = StoreNode->getChain();
2856 
2857  // Gather X elements in ChainOps.
2858  if (Chain == Load.getValue(1)) {
2859  FoundLoad = true;
2860  ChainOps.push_back(Load.getOperand(0));
2861  } else if (Chain.getOpcode() == ISD::TokenFactor) {
2862  for (unsigned i = 0, e = Chain.getNumOperands(); i != e; ++i) {
2863  SDValue Op = Chain.getOperand(i);
2864  if (Op == Load.getValue(1)) {
2865  FoundLoad = true;
2866  // Drop Load, but keep its chain. No cycle check necessary.
2867  ChainOps.push_back(Load.getOperand(0));
2868  continue;
2869  }
2870  LoopWorklist.push_back(Op.getNode());
2871  ChainOps.push_back(Op);
2872  }
2873  }
2874 
2875  if (!FoundLoad)
2876  return false;
2877 
2878  // Worklist is currently Xn. Add Yn to worklist.
2879  for (SDValue Op : StoredVal->ops())
2880  if (Op.getNode() != LoadNode)
2881  LoopWorklist.push_back(Op.getNode());
2882 
2883  // Check (a) if Load is a predecessor to Xn + Yn
2884  if (SDNode::hasPredecessorHelper(Load.getNode(), Visited, LoopWorklist, Max,
2885  true))
2886  return false;
2887 
2888  InputChain =
2889  CurDAG->getNode(ISD::TokenFactor, SDLoc(Chain), MVT::Other, ChainOps);
2890  return true;
2891 }
2892 
2893 // Change a chain of {load; op; store} of the same value into a simple op
2894 // through memory of that value, if the uses of the modified value and its
2895 // address are suitable.
2896 //
2897 // The tablegen pattern memory operand pattern is currently not able to match
2898 // the case where the EFLAGS on the original operation are used.
2899 //
2900 // To move this to tablegen, we'll need to improve tablegen to allow flags to
2901 // be transferred from a node in the pattern to the result node, probably with
2902 // a new keyword. For example, we have this
2903 // def DEC64m : RI<0xFF, MRM1m, (outs), (ins i64mem:$dst), "dec{q}\t$dst",
2904 // [(store (add (loadi64 addr:$dst), -1), addr:$dst),
2905 // (implicit EFLAGS)]>;
2906 // but maybe need something like this
2907 // def DEC64m : RI<0xFF, MRM1m, (outs), (ins i64mem:$dst), "dec{q}\t$dst",
2908 // [(store (add (loadi64 addr:$dst), -1), addr:$dst),
2909 // (transferrable EFLAGS)]>;
2910 //
2911 // Until then, we manually fold these and instruction select the operation
2912 // here.
2913 bool X86DAGToDAGISel::foldLoadStoreIntoMemOperand(SDNode *Node) {
2914  StoreSDNode *StoreNode = cast<StoreSDNode>(Node);
2915  SDValue StoredVal = StoreNode->getOperand(1);
2916  unsigned Opc = StoredVal->getOpcode();
2917 
2918  // Before we try to select anything, make sure this is memory operand size
2919  // and opcode we can handle. Note that this must match the code below that
2920  // actually lowers the opcodes.
2921  EVT MemVT = StoreNode->getMemoryVT();
2922  if (MemVT != MVT::i64 && MemVT != MVT::i32 && MemVT != MVT::i16 &&
2923  MemVT != MVT::i8)
2924  return false;
2925 
2926  bool IsCommutable = false;
2927  bool IsNegate = false;
2928  switch (Opc) {
2929  default:
2930  return false;
2931  case X86ISD::SUB:
2932  IsNegate = isNullConstant(StoredVal.getOperand(0));
2933  break;
2934  case X86ISD::SBB:
2935  break;
2936  case X86ISD::ADD:
2937  case X86ISD::ADC:
2938  case X86ISD::AND:
2939  case X86ISD::OR:
2940  case X86ISD::XOR:
2941  IsCommutable = true;
2942  break;
2943  }
2944 
2945  unsigned LoadOpNo = IsNegate ? 1 : 0;
2946  LoadSDNode *LoadNode = nullptr;
2947  SDValue InputChain;
2948  if (!isFusableLoadOpStorePattern(StoreNode, StoredVal, CurDAG, LoadOpNo,
2949  LoadNode, InputChain)) {
2950  if (!IsCommutable)
2951  return false;
2952 
2953  // This operation is commutable, try the other operand.
2954  LoadOpNo = 1;
2955  if (!isFusableLoadOpStorePattern(StoreNode, StoredVal, CurDAG, LoadOpNo,
2956  LoadNode, InputChain))
2957  return false;
2958  }
2959 
2960  SDValue Base, Scale, Index, Disp, Segment;
2961  if (!selectAddr(LoadNode, LoadNode->getBasePtr(), Base, Scale, Index, Disp,
2962  Segment))
2963  return false;
2964 
2965  auto SelectOpcode = [&](unsigned Opc64, unsigned Opc32, unsigned Opc16,
2966  unsigned Opc8) {
2967  switch (MemVT.getSimpleVT().SimpleTy) {
2968  case MVT::i64:
2969  return Opc64;
2970  case MVT::i32:
2971  return Opc32;
2972  case MVT::i16:
2973  return Opc16;
2974  case MVT::i8:
2975  return Opc8;
2976  default:
2977  llvm_unreachable("Invalid size!");
2978  }
2979  };
2980 
2981  MachineSDNode *Result;
2982  switch (Opc) {
2983  case X86ISD::SUB:
2984  // Handle negate.
2985  if (IsNegate) {
2986  unsigned NewOpc = SelectOpcode(X86::NEG64m, X86::NEG32m, X86::NEG16m,
2987  X86::NEG8m);
2988  const SDValue Ops[] = {Base, Scale, Index, Disp, Segment, InputChain};
2989  Result = CurDAG->getMachineNode(NewOpc, SDLoc(Node), MVT::i32,
2990  MVT::Other, Ops);
2991  break;
2992  }
2994  case X86ISD::ADD:
2995  // Try to match inc/dec.
2996  if (!Subtarget->slowIncDec() || OptForSize) {
2997  bool IsOne = isOneConstant(StoredVal.getOperand(1));
2998  bool IsNegOne = isAllOnesConstant(StoredVal.getOperand(1));
2999  // ADD/SUB with 1/-1 and carry flag isn't used can use inc/dec.
3000  if ((IsOne || IsNegOne) && hasNoCarryFlagUses(StoredVal.getValue(1))) {
3001  unsigned NewOpc =
3002  ((Opc == X86ISD::ADD) == IsOne)
3003  ? SelectOpcode(X86::INC64m, X86::INC32m, X86::INC16m, X86::INC8m)
3004  : SelectOpcode(X86::DEC64m, X86::DEC32m, X86::DEC16m, X86::DEC8m);
3005  const SDValue Ops[] = {Base, Scale, Index, Disp, Segment, InputChain};
3006  Result = CurDAG->getMachineNode(NewOpc, SDLoc(Node), MVT::i32,
3007  MVT::Other, Ops);
3008  break;
3009  }
3010  }
3012  case X86ISD::ADC:
3013  case X86ISD::SBB:
3014  case X86ISD::AND:
3015  case X86ISD::OR:
3016  case X86ISD::XOR: {
3017  auto SelectRegOpcode = [SelectOpcode](unsigned Opc) {
3018  switch (Opc) {
3019  case X86ISD::ADD:
3020  return SelectOpcode(X86::ADD64mr, X86::ADD32mr, X86::ADD16mr,
3021  X86::ADD8mr);
3022  case X86ISD::ADC:
3023  return SelectOpcode(X86::ADC64mr, X86::ADC32mr, X86::ADC16mr,
3024  X86::ADC8mr);
3025  case X86ISD::SUB:
3026  return SelectOpcode(X86::SUB64mr, X86::SUB32mr, X86::SUB16mr,
3027  X86::SUB8mr);
3028  case X86ISD::SBB:
3029  return SelectOpcode(X86::SBB64mr, X86::SBB32mr, X86::SBB16mr,
3030  X86::SBB8mr);
3031  case X86ISD::AND:
3032  return SelectOpcode(X86::AND64mr, X86::AND32mr, X86::AND16mr,
3033  X86::AND8mr);
3034  case X86ISD::OR:
3035  return SelectOpcode(X86::OR64mr, X86::OR32mr, X86::OR16mr, X86::OR8mr);
3036  case X86ISD::XOR:
3037  return SelectOpcode(X86::XOR64mr, X86::XOR32mr, X86::XOR16mr,
3038  X86::XOR8mr);
3039  default:
3040  llvm_unreachable("Invalid opcode!");
3041  }
3042  };
3043  auto SelectImm8Opcode = [SelectOpcode](unsigned Opc) {
3044  switch (Opc) {
3045  case X86ISD::ADD:
3046  return SelectOpcode(X86::ADD64mi8, X86::ADD32mi8, X86::ADD16mi8, 0);
3047  case X86ISD::ADC:
3048  return SelectOpcode(X86::ADC64mi8, X86::ADC32mi8, X86::ADC16mi8, 0);
3049  case X86ISD::SUB:
3050  return SelectOpcode(X86::SUB64mi8, X86::SUB32mi8, X86::SUB16mi8, 0);
3051  case X86ISD::SBB:
3052  return SelectOpcode(X86::SBB64mi8, X86::SBB32mi8, X86::SBB16mi8, 0);
3053  case X86ISD::AND:
3054  return SelectOpcode(X86::AND64mi8, X86::AND32mi8, X86::AND16mi8, 0);
3055  case X86ISD::OR:
3056  return SelectOpcode(X86::OR64mi8, X86::OR32mi8, X86::OR16mi8, 0);
3057  case X86ISD::XOR:
3058  return SelectOpcode(X86::XOR64mi8, X86::XOR32mi8, X86::XOR16mi8, 0);
3059  default:
3060  llvm_unreachable("Invalid opcode!");
3061  }
3062  };
3063  auto SelectImmOpcode = [SelectOpcode](unsigned Opc) {
3064  switch (Opc) {
3065  case X86ISD::ADD:
3066  return SelectOpcode(X86::ADD64mi32, X86::ADD32mi, X86::ADD16mi,
3067  X86::ADD8mi);
3068  case X86ISD::ADC:
3069  return SelectOpcode(X86::ADC64mi32, X86::ADC32mi, X86::ADC16mi,
3070  X86::ADC8mi);
3071  case X86ISD::SUB:
3072  return SelectOpcode(X86::SUB64mi32, X86::SUB32mi, X86::SUB16mi,
3073  X86::SUB8mi);
3074  case X86ISD::SBB:
3075  return SelectOpcode(X86::SBB64mi32, X86::SBB32mi, X86::SBB16mi,
3076  X86::SBB8mi);
3077  case X86ISD::AND:
3078  return SelectOpcode(X86::AND64mi32, X86::AND32mi, X86::AND16mi,
3079  X86::AND8mi);
3080  case X86ISD::OR:
3081  return SelectOpcode(X86::OR64mi32, X86::OR32mi, X86::OR16mi,
3082  X86::OR8mi);
3083  case X86ISD::XOR:
3084  return SelectOpcode(X86::XOR64mi32, X86::XOR32mi, X86::XOR16mi,
3085  X86::XOR8mi);
3086  default:
3087  llvm_unreachable("Invalid opcode!");
3088  }
3089  };
3090 
3091  unsigned NewOpc = SelectRegOpcode(Opc);
3092  SDValue Operand = StoredVal->getOperand(1-LoadOpNo);
3093 
3094  // See if the operand is a constant that we can fold into an immediate
3095  // operand.
3096  if (auto *OperandC = dyn_cast<ConstantSDNode>(Operand)) {
3097  int64_t OperandV = OperandC->getSExtValue();
3098 
3099  // Check if we can shrink the operand enough to fit in an immediate (or
3100  // fit into a smaller immediate) by negating it and switching the
3101  // operation.
3102  if ((Opc == X86ISD::ADD || Opc == X86ISD::SUB) &&
3103  ((MemVT != MVT::i8 && !isInt<8>(OperandV) && isInt<8>(-OperandV)) ||
3104  (MemVT == MVT::i64 && !isInt<32>(OperandV) &&
3105  isInt<32>(-OperandV))) &&
3106  hasNoCarryFlagUses(StoredVal.getValue(1))) {
3107  OperandV = -OperandV;
3108  Opc = Opc == X86ISD::ADD ? X86ISD::SUB : X86ISD::ADD;
3109  }
3110 
3111  // First try to fit this into an Imm8 operand. If it doesn't fit, then try
3112  // the larger immediate operand.
3113  if (MemVT != MVT::i8 && isInt<8>(OperandV)) {
3114  Operand = CurDAG->getTargetConstant(OperandV, SDLoc(Node), MemVT);
3115  NewOpc = SelectImm8Opcode(Opc);
3116  } else if (MemVT != MVT::i64 || isInt<32>(OperandV)) {
3117  Operand = CurDAG->getTargetConstant(OperandV, SDLoc(Node), MemVT);
3118  NewOpc = SelectImmOpcode(Opc);
3119  }
3120  }
3121 
3122  if (Opc == X86ISD::ADC || Opc == X86ISD::SBB) {
3123  SDValue CopyTo =
3124  CurDAG->getCopyToReg(InputChain, SDLoc(Node), X86::EFLAGS,
3125  StoredVal.getOperand(2), SDValue());
3126 
3127  const SDValue Ops[] = {Base, Scale, Index, Disp,
3128  Segment, Operand, CopyTo, CopyTo.getValue(1)};
3129  Result = CurDAG->getMachineNode(NewOpc, SDLoc(Node), MVT::i32, MVT::Other,
3130  Ops);
3131  } else {
3132  const SDValue Ops[] = {Base, Scale, Index, Disp,
3133  Segment, Operand, InputChain};
3134  Result = CurDAG->getMachineNode(NewOpc, SDLoc(Node), MVT::i32, MVT::Other,
3135  Ops);
3136  }
3137  break;
3138  }
3139  default:
3140  llvm_unreachable("Invalid opcode!");
3141  }
3142 
3143  MachineMemOperand *MemOps[] = {StoreNode->getMemOperand(),
3144  LoadNode->getMemOperand()};
3145  CurDAG->setNodeMemRefs(Result, MemOps);
3146 
3147  // Update Load Chain uses as well.
3148  ReplaceUses(SDValue(LoadNode, 1), SDValue(Result, 1));
3149  ReplaceUses(SDValue(StoreNode, 0), SDValue(Result, 1));
3150  ReplaceUses(SDValue(StoredVal.getNode(), 1), SDValue(Result, 0));
3151  CurDAG->RemoveDeadNode(Node);
3152  return true;
3153 }
3154 
3155 // See if this is an X & Mask that we can match to BEXTR/BZHI.
3156 // Where Mask is one of the following patterns:
3157 // a) x & (1 << nbits) - 1
3158 // b) x & ~(-1 << nbits)
3159 // c) x & (-1 >> (32 - y))
3160 // d) x << (32 - y) >> (32 - y)
3161 bool X86DAGToDAGISel::matchBitExtract(SDNode *Node) {
3162  assert(
3163  (Node->getOpcode() == ISD::AND || Node->getOpcode() == ISD::SRL) &&
3164  "Should be either an and-mask, or right-shift after clearing high bits.");
3165 
3166  // BEXTR is BMI instruction, BZHI is BMI2 instruction. We need at least one.
3167  if (!Subtarget->hasBMI() && !Subtarget->hasBMI2())
3168  return false;
3169 
3170  MVT NVT = Node->getSimpleValueType(0);
3171 
3172  // Only supported for 32 and 64 bits.
3173  if (NVT != MVT::i32 && NVT != MVT::i64)
3174  return false;
3175 
3176  SDValue NBits;
3177 
3178  // If we have BMI2's BZHI, we are ok with muti-use patterns.
3179  // Else, if we only have BMI1's BEXTR, we require one-use.
3180  const bool CanHaveExtraUses = Subtarget->hasBMI2();
3181  auto checkUses = [CanHaveExtraUses](SDValue Op, unsigned NUses) {
3182  return CanHaveExtraUses ||
3183  Op.getNode()->hasNUsesOfValue(NUses, Op.getResNo());
3184  };
3185  auto checkOneUse = [checkUses](SDValue Op) { return checkUses(Op, 1); };
3186  auto checkTwoUse = [checkUses](SDValue Op) { return checkUses(Op, 2); };
3187 
3188  auto peekThroughOneUseTruncation = [checkOneUse](SDValue V) {
3189  if (V->getOpcode() == ISD::TRUNCATE && checkOneUse(V)) {
3190  assert(V.getSimpleValueType() == MVT::i32 &&
3191  V.getOperand(0).getSimpleValueType() == MVT::i64 &&
3192  "Expected i64 -> i32 truncation");
3193  V = V.getOperand(0);
3194  }
3195  return V;
3196  };
3197 
3198  // a) x & ((1 << nbits) + (-1))
3199  auto matchPatternA = [checkOneUse, peekThroughOneUseTruncation,
3200  &NBits](SDValue Mask) -> bool {
3201  // Match `add`. Must only have one use!
3202  if (Mask->getOpcode() != ISD::ADD || !checkOneUse(Mask))
3203  return false;
3204  // We should be adding all-ones constant (i.e. subtracting one.)
3205  if (!isAllOnesConstant(Mask->getOperand(1)))
3206  return false;
3207  // Match `1 << nbits`. Might be truncated. Must only have one use!
3208  SDValue M0 = peekThroughOneUseTruncation(Mask->getOperand(0));
3209  if (M0->getOpcode() != ISD::SHL || !checkOneUse(M0))
3210  return false;
3211  if (!isOneConstant(M0->getOperand(0)))
3212  return false;
3213  NBits = M0->getOperand(1);
3214  return true;
3215  };
3216 
3217  auto isAllOnes = [this, peekThroughOneUseTruncation, NVT](SDValue V) {
3218  V = peekThroughOneUseTruncation(V);
3219  return CurDAG->MaskedValueIsAllOnes(
3220  V, APInt::getLowBitsSet(V.getSimpleValueType().getSizeInBits(),
3221  NVT.getSizeInBits()));
3222  };
3223 
3224  // b) x & ~(-1 << nbits)
3225  auto matchPatternB = [checkOneUse, isAllOnes, peekThroughOneUseTruncation,
3226  &NBits](SDValue Mask) -> bool {
3227  // Match `~()`. Must only have one use!
3228  if (Mask.getOpcode() != ISD::XOR || !checkOneUse(Mask))
3229  return false;
3230  // The -1 only has to be all-ones for the final Node's NVT.
3231  if (!isAllOnes(Mask->getOperand(1)))
3232  return false;
3233  // Match `-1 << nbits`. Might be truncated. Must only have one use!
3234  SDValue M0 = peekThroughOneUseTruncation(Mask->getOperand(0));
3235  if (M0->getOpcode() != ISD::SHL || !checkOneUse(M0))
3236  return false;
3237  // The -1 only has to be all-ones for the final Node's NVT.
3238  if (!isAllOnes(M0->getOperand(0)))
3239  return false;
3240  NBits = M0->getOperand(1);
3241  return true;
3242  };
3243 
3244  // Match potentially-truncated (bitwidth - y)
3245  auto matchShiftAmt = [checkOneUse, &NBits](SDValue ShiftAmt,
3246  unsigned Bitwidth) {
3247  // Skip over a truncate of the shift amount.
3248  if (ShiftAmt.getOpcode() == ISD::TRUNCATE) {
3249  ShiftAmt = ShiftAmt.getOperand(0);
3250  // The trunc should have been the only user of the real shift amount.
3251  if (!checkOneUse(ShiftAmt))
3252  return false;
3253  }
3254  // Match the shift amount as: (bitwidth - y). It should go away, too.
3255  if (ShiftAmt.getOpcode() != ISD::SUB)
3256  return false;
3257  auto V0 = dyn_cast<ConstantSDNode>(ShiftAmt.getOperand(0));
3258  if (!V0 || V0->getZExtValue() != Bitwidth)
3259  return false;
3260  NBits = ShiftAmt.getOperand(1);
3261  return true;
3262  };
3263 
3264  // c) x & (-1 >> (32 - y))
3265  auto matchPatternC = [checkOneUse, peekThroughOneUseTruncation,
3266  matchShiftAmt](SDValue Mask) -> bool {
3267  // The mask itself may be truncated.
3268  Mask = peekThroughOneUseTruncation(Mask);
3269  unsigned Bitwidth = Mask.getSimpleValueType().getSizeInBits();
3270  // Match `l>>`. Must only have one use!
3271  if (Mask.getOpcode() != ISD::SRL || !checkOneUse(Mask))
3272  return false;
3273  // We should be shifting truly all-ones constant.
3274  if (!isAllOnesConstant(Mask.getOperand(0)))
3275  return false;
3276  SDValue M1 = Mask.getOperand(1);
3277  // The shift amount should not be used externally.
3278  if (!checkOneUse(M1))
3279  return false;
3280  return matchShiftAmt(M1, Bitwidth);
3281  };
3282 
3283  SDValue X;
3284 
3285  // d) x << (32 - y) >> (32 - y)
3286  auto matchPatternD = [checkOneUse, checkTwoUse, matchShiftAmt,
3287  &X](SDNode *Node) -> bool {
3288  if (Node->getOpcode() != ISD::SRL)
3289  return false;
3290  SDValue N0 = Node->getOperand(0);
3291  if (N0->getOpcode() != ISD::SHL || !checkOneUse(N0))
3292  return false;
3293  unsigned Bitwidth = N0.getSimpleValueType().getSizeInBits();
3294  SDValue N1 = Node->getOperand(1);
3295  SDValue N01 = N0->getOperand(1);
3296  // Both of the shifts must be by the exact same value.
3297  // There should not be any uses of the shift amount outside of the pattern.
3298  if (N1 != N01 || !checkTwoUse(N1))
3299  return false;
3300  if (!matchShiftAmt(N1, Bitwidth))
3301  return false;
3302  X = N0->getOperand(0);
3303  return true;
3304  };
3305 
3306  auto matchLowBitMask = [matchPatternA, matchPatternB,
3307  matchPatternC](SDValue Mask) -> bool {
3308  return matchPatternA(Mask) || matchPatternB(Mask) || matchPatternC(Mask);
3309  };
3310 
3311  if (Node->getOpcode() == ISD::AND) {
3312  X = Node->getOperand(0);
3313  SDValue Mask = Node->getOperand(1);
3314 
3315  if (matchLowBitMask(Mask)) {
3316  // Great.
3317  } else {
3318  std::swap(X, Mask);
3319  if (!matchLowBitMask(Mask))
3320  return false;
3321  }
3322  } else if (!matchPatternD(Node))
3323  return false;
3324 
3325  SDLoc DL(Node);
3326 
3327  // Truncate the shift amount.
3328  NBits = CurDAG->getNode(ISD::TRUNCATE, DL, MVT::i8, NBits);
3329  insertDAGNode(*CurDAG, SDValue(Node, 0), NBits);
3330 
3331  // Insert 8-bit NBits into lowest 8 bits of 32-bit register.
3332  // All the other bits are undefined, we do not care about them.
3333  SDValue ImplDef = SDValue(
3334  CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, MVT::i32), 0);
3335  insertDAGNode(*CurDAG, SDValue(Node, 0), ImplDef);
3336 
3337  SDValue SRIdxVal = CurDAG->getTargetConstant(X86::sub_8bit, DL, MVT::i32);
3338  insertDAGNode(*CurDAG, SDValue(Node, 0), SRIdxVal);
3339  NBits = SDValue(
3340  CurDAG->getMachineNode(TargetOpcode::INSERT_SUBREG, DL, MVT::i32, ImplDef,
3341  NBits, SRIdxVal), 0);
3342  insertDAGNode(*CurDAG, SDValue(Node, 0), NBits);
3343 
3344  if (Subtarget->hasBMI2()) {
3345  // Great, just emit the the BZHI..
3346  if (NVT != MVT::i32) {
3347  // But have to place the bit count into the wide-enough register first.
3348  NBits = CurDAG->getNode(ISD::ANY_EXTEND, DL, NVT, NBits);
3349  insertDAGNode(*CurDAG, SDValue(Node, 0), NBits);
3350  }
3351 
3352  SDValue Extract = CurDAG->getNode(X86ISD::BZHI, DL, NVT, X, NBits);
3353  ReplaceNode(Node, Extract.getNode());
3354  SelectCode(Extract.getNode());
3355  return true;
3356  }
3357 
3358  // Else, if we do *NOT* have BMI2, let's find out if the if the 'X' is
3359  // *logically* shifted (potentially with one-use trunc inbetween),
3360  // and the truncation was the only use of the shift,
3361  // and if so look past one-use truncation.
3362  {
3363  SDValue RealX = peekThroughOneUseTruncation(X);
3364  // FIXME: only if the shift is one-use?
3365  if (RealX != X && RealX.getOpcode() == ISD::SRL)
3366  X = RealX;
3367  }
3368 
3369  MVT XVT = X.getSimpleValueType();
3370 
3371  // Else, emitting BEXTR requires one more step.
3372  // The 'control' of BEXTR has the pattern of:
3373  // [15...8 bit][ 7...0 bit] location
3374  // [ bit count][ shift] name
3375  // I.e. 0b000000011'00000001 means (x >> 0b1) & 0b11
3376 
3377  // Shift NBits left by 8 bits, thus producing 'control'.
3378  // This makes the low 8 bits to be zero.
3379  SDValue C8 = CurDAG->getConstant(8, DL, MVT::i8);
3380  SDValue Control = CurDAG->getNode(ISD::SHL, DL, MVT::i32, NBits, C8);
3381  insertDAGNode(*CurDAG, SDValue(Node, 0), Control);
3382 
3383  // If the 'X' is *logically* shifted, we can fold that shift into 'control'.
3384  // FIXME: only if the shift is one-use?
3385  if (X.getOpcode() == ISD::SRL) {
3386  SDValue ShiftAmt = X.getOperand(1);
3387  X = X.getOperand(0);
3388 
3389  assert(ShiftAmt.getValueType() == MVT::i8 &&
3390  "Expected shift amount to be i8");
3391 
3392  // Now, *zero*-extend the shift amount. The bits 8...15 *must* be zero!
3393  // We could zext to i16 in some form, but we intentionally don't do that.
3394  SDValue OrigShiftAmt = ShiftAmt;
3395  ShiftAmt = CurDAG->getNode(ISD::ZERO_EXTEND, DL, MVT::i32, ShiftAmt);
3396  insertDAGNode(*CurDAG, OrigShiftAmt, ShiftAmt);
3397 
3398  // And now 'or' these low 8 bits of shift amount into the 'control'.
3399  Control = CurDAG->getNode(ISD::OR, DL, MVT::i32, Control, ShiftAmt);
3400  insertDAGNode(*CurDAG, SDValue(Node, 0), Control);
3401  }
3402 
3403  // But have to place the 'control' into the wide-enough register first.
3404  if (XVT != MVT::i32) {
3405  Control = CurDAG->getNode(ISD::ANY_EXTEND, DL, XVT, Control);
3406  insertDAGNode(*CurDAG, SDValue(Node, 0), Control);
3407  }
3408 
3409  // And finally, form the BEXTR itself.
3410  SDValue Extract = CurDAG->getNode(X86ISD::BEXTR, DL, XVT, X, Control);
3411 
3412  // The 'X' was originally truncated. Do that now.
3413  if (XVT != NVT) {
3414  insertDAGNode(*CurDAG, SDValue(Node, 0), Extract);
3415  Extract = CurDAG->getNode(ISD::TRUNCATE, DL, NVT, Extract);
3416  }
3417 
3418  ReplaceNode(Node, Extract.getNode());
3419  SelectCode(Extract.getNode());
3420 
3421  return true;
3422 }
3423 
3424 // See if this is an (X >> C1) & C2 that we can match to BEXTR/BEXTRI.
3425 MachineSDNode *X86DAGToDAGISel::matchBEXTRFromAndImm(SDNode *Node) {
3426  MVT NVT = Node->getSimpleValueType(0);
3427  SDLoc dl(Node);
3428 
3429  SDValue N0 = Node->getOperand(0);
3430  SDValue N1 = Node->getOperand(1);
3431 
3432  // If we have TBM we can use an immediate for the control. If we have BMI
3433  // we should only do this if the BEXTR instruction is implemented well.
3434  // Otherwise moving the control into a register makes this more costly.
3435  // TODO: Maybe load folding, greater than 32-bit masks, or a guarantee of LICM
3436  // hoisting the move immediate would make it worthwhile with a less optimal
3437  // BEXTR?
3438  if (!Subtarget->hasTBM() &&
3439  !(Subtarget->hasBMI() && Subtarget->hasFastBEXTR()))
3440  return nullptr;
3441 
3442  // Must have a shift right.
3443  if (N0->getOpcode() != ISD::SRL && N0->getOpcode() != ISD::SRA)
3444  return nullptr;
3445 
3446  // Shift can't have additional users.
3447  if (!N0->hasOneUse())
3448  return nullptr;
3449 
3450  // Only supported for 32 and 64 bits.
3451  if (NVT != MVT::i32 && NVT != MVT::i64)
3452  return nullptr;
3453 
3454  // Shift amount and RHS of and must be constant.
3455  ConstantSDNode *MaskCst = dyn_cast<ConstantSDNode>(N1);
3456  ConstantSDNode *ShiftCst = dyn_cast<ConstantSDNode>(N0->getOperand(1));
3457  if (!MaskCst || !ShiftCst)
3458  return nullptr;
3459 
3460  // And RHS must be a mask.
3461  uint64_t Mask = MaskCst->getZExtValue();
3462  if (!isMask_64(Mask))
3463  return nullptr;
3464 
3465  uint64_t Shift = ShiftCst->getZExtValue();
3466  uint64_t MaskSize = countPopulation(Mask);
3467 
3468  // Don't interfere with something that can be handled by extracting AH.
3469  // TODO: If we are able to fold a load, BEXTR might still be better than AH.
3470  if (Shift == 8 && MaskSize == 8)
3471  return nullptr;
3472 
3473  // Make sure we are only using bits that were in the original value, not
3474  // shifted in.
3475  if (Shift + MaskSize > NVT.getSizeInBits())
3476  return nullptr;
3477 
3478  SDValue New = CurDAG->getTargetConstant(Shift | (MaskSize << 8), dl, NVT);
3479  unsigned ROpc = NVT == MVT::i64 ? X86::BEXTRI64ri : X86::BEXTRI32ri;
3480  unsigned MOpc = NVT == MVT::i64 ? X86::BEXTRI64mi : X86::BEXTRI32mi;
3481 
3482  // BMI requires the immediate to placed in a register.
3483  if (!Subtarget->hasTBM()) {
3484  ROpc = NVT == MVT::i64 ? X86::BEXTR64rr : X86::BEXTR32rr;
3485  MOpc = NVT == MVT::i64 ? X86::BEXTR64rm : X86::BEXTR32rm;
3486  unsigned NewOpc = NVT == MVT::i64 ? X86::MOV32ri64 : X86::MOV32ri;
3487  New = SDValue(CurDAG->getMachineNode(NewOpc, dl, NVT, New), 0);
3488  }
3489 
3490  MachineSDNode *NewNode;
3491  SDValue Input = N0->getOperand(0);
3492  SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
3493  if (tryFoldLoad(Node, N0.getNode(), Input, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) {
3494  SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, New, Input.getOperand(0) };
3495  SDVTList VTs = CurDAG->getVTList(NVT, MVT::i32, MVT::Other);
3496  NewNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops);
3497  // Update the chain.
3498  ReplaceUses(Input.getValue(1), SDValue(NewNode, 2));
3499  // Record the mem-refs
3500  CurDAG->setNodeMemRefs(NewNode, {cast<LoadSDNode>(Input)->getMemOperand()});
3501  } else {
3502  NewNode = CurDAG->getMachineNode(ROpc, dl, NVT, MVT::i32, Input, New);
3503  }
3504 
3505  return NewNode;
3506 }
3507 
3508 // Emit a PCMISTR(I/M) instruction.
3509 MachineSDNode *X86DAGToDAGISel::emitPCMPISTR(unsigned ROpc, unsigned MOpc,
3510  bool MayFoldLoad, const SDLoc &dl,
3511  MVT VT, SDNode *Node) {
3512  SDValue N0 = Node->getOperand(0);
3513  SDValue N1 = Node->getOperand(1);
3514  SDValue Imm = Node->getOperand(2);
3515  const ConstantInt *Val = cast<ConstantSDNode>(Imm)->getConstantIntValue();
3516  Imm = CurDAG->getTargetConstant(*Val, SDLoc(Node), Imm.getValueType());
3517 
3518  // Try to fold a load. No need to check alignment.
3519  SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
3520  if (MayFoldLoad && tryFoldLoad(Node, N1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) {
3521  SDValue Ops[] = { N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Imm,
3522  N1.getOperand(0) };
3523  SDVTList VTs = CurDAG->getVTList(VT, MVT::i32, MVT::Other);
3524  MachineSDNode *CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops);
3525  // Update the chain.
3526  ReplaceUses(N1.getValue(1), SDValue(CNode, 2));
3527  // Record the mem-refs
3528  CurDAG->setNodeMemRefs(CNode, {cast<LoadSDNode>(N1)->getMemOperand()});
3529  return CNode;
3530  }
3531 
3532  SDValue Ops[] = { N0, N1, Imm };
3533  SDVTList VTs = CurDAG->getVTList(VT, MVT::i32);
3534  MachineSDNode *CNode = CurDAG->getMachineNode(ROpc, dl, VTs, Ops);
3535  return CNode;
3536 }
3537 
3538 // Emit a PCMESTR(I/M) instruction. Also return the Glue result in case we need
3539 // to emit a second instruction after this one. This is needed since we have two
3540 // copyToReg nodes glued before this and we need to continue that glue through.
3541 MachineSDNode *X86DAGToDAGISel::emitPCMPESTR(unsigned ROpc, unsigned MOpc,
3542  bool MayFoldLoad, const SDLoc &dl,
3543  MVT VT, SDNode *Node,
3544  SDValue &InFlag) {
3545  SDValue N0 = Node->getOperand(0);
3546  SDValue N2 = Node->getOperand(2);
3547  SDValue Imm = Node->getOperand(4);
3548  const ConstantInt *Val = cast<ConstantSDNode>(Imm)->getConstantIntValue();
3549  Imm = CurDAG->getTargetConstant(*Val, SDLoc(Node), Imm.getValueType());
3550 
3551  // Try to fold a load. No need to check alignment.
3552  SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
3553  if (MayFoldLoad && tryFoldLoad(Node, N2, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) {
3554  SDValue Ops[] = { N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Imm,
3555  N2.getOperand(0), InFlag };
3556  SDVTList VTs = CurDAG->getVTList(VT, MVT::i32, MVT::Other, MVT::Glue);
3557  MachineSDNode *CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops);
3558  InFlag = SDValue(CNode, 3);
3559  // Update the chain.
3560  ReplaceUses(N2.getValue(1), SDValue(CNode, 2));
3561  // Record the mem-refs
3562  CurDAG->setNodeMemRefs(CNode, {cast<LoadSDNode>(N2)->getMemOperand()});
3563  return CNode;
3564  }
3565 
3566  SDValue Ops[] = { N0, N2, Imm, InFlag };
3567  SDVTList VTs = CurDAG->getVTList(VT, MVT::i32, MVT::Glue);
3568  MachineSDNode *CNode = CurDAG->getMachineNode(ROpc, dl, VTs, Ops);
3569  InFlag = SDValue(CNode, 2);
3570  return CNode;
3571 }
3572 
3573 bool X86DAGToDAGISel::tryShiftAmountMod(SDNode *N) {
3574  EVT VT = N->getValueType(0);
3575 
3576  // Only handle scalar shifts.
3577  if (VT.isVector())
3578  return false;
3579 
3580  // Narrower shifts only mask to 5 bits in hardware.
3581  unsigned Size = VT == MVT::i64 ? 64 : 32;
3582 
3583  SDValue OrigShiftAmt = N->getOperand(1);
3584  SDValue ShiftAmt = OrigShiftAmt;
3585  SDLoc DL(N);
3586 
3587  // Skip over a truncate of the shift amount.
3588  if (ShiftAmt->getOpcode() == ISD::TRUNCATE)
3589  ShiftAmt = ShiftAmt->getOperand(0);
3590 
3591  // This function is called after X86DAGToDAGISel::matchBitExtract(),
3592  // so we are not afraid that we might mess up BZHI/BEXTR pattern.
3593 
3594  SDValue NewShiftAmt;
3595  if (ShiftAmt->getOpcode() == ISD::ADD || ShiftAmt->getOpcode() == ISD::SUB) {
3596  SDValue Add0 = ShiftAmt->getOperand(0);
3597  SDValue Add1 = ShiftAmt->getOperand(1);
3598  // If we are shifting by X+/-N where N == 0 mod Size, then just shift by X
3599  // to avoid the ADD/SUB.
3600  if (isa<ConstantSDNode>(Add1) &&
3601  cast<ConstantSDNode>(Add1)->getZExtValue() % Size == 0) {
3602  NewShiftAmt = Add0;
3603  // If we are shifting by N-X where N == 0 mod Size, then just shift by -X to
3604  // generate a NEG instead of a SUB of a constant.
3605  } else if (ShiftAmt->getOpcode() == ISD::SUB &&
3606  isa<ConstantSDNode>(Add0) &&
3607  cast<ConstantSDNode>(Add0)->getZExtValue() != 0 &&
3608  cast<ConstantSDNode>(Add0)->getZExtValue() % Size == 0) {
3609  // Insert a negate op.
3610  // TODO: This isn't guaranteed to replace the sub if there is a logic cone
3611  // that uses it that's not a shift.
3612  EVT SubVT = ShiftAmt.getValueType();
3613  SDValue Zero = CurDAG->getConstant(0, DL, SubVT);
3614  SDValue Neg = CurDAG->getNode(ISD::SUB, DL, SubVT, Zero, Add1);
3615  NewShiftAmt = Neg;
3616 
3617  // Insert these operands into a valid topological order so they can
3618  // get selected independently.
3619  insertDAGNode(*CurDAG, OrigShiftAmt, Zero);
3620  insertDAGNode(*CurDAG, OrigShiftAmt, Neg);
3621  } else
3622  return false;
3623  } else
3624  return false;
3625 
3626  if (NewShiftAmt.getValueType() != MVT::i8) {
3627  // Need to truncate the shift amount.
3628  NewShiftAmt = CurDAG->getNode(ISD::TRUNCATE, DL, MVT::i8, NewShiftAmt);
3629  // Add to a correct topological ordering.
3630  insertDAGNode(*CurDAG, OrigShiftAmt, NewShiftAmt);
3631  }
3632 
3633  // Insert a new mask to keep the shift amount legal. This should be removed
3634  // by isel patterns.
3635  NewShiftAmt = CurDAG->getNode(ISD::AND, DL, MVT::i8, NewShiftAmt,
3636  CurDAG->getConstant(Size - 1, DL, MVT::i8));
3637  // Place in a correct topological ordering.
3638  insertDAGNode(*CurDAG, OrigShiftAmt, NewShiftAmt);
3639 
3640  SDNode *UpdatedNode = CurDAG->UpdateNodeOperands(N, N->getOperand(0),
3641  NewShiftAmt);
3642  if (UpdatedNode != N) {
3643  // If we found an existing node, we should replace ourselves with that node
3644  // and wait for it to be selected after its other users.
3645  ReplaceNode(N, UpdatedNode);
3646  return true;
3647  }
3648 
3649  // If the original shift amount is now dead, delete it so that we don't run
3650  // it through isel.
3651  if (OrigShiftAmt.getNode()->use_empty())
3652  CurDAG->RemoveDeadNode(OrigShiftAmt.getNode());
3653 
3654  // Now that we've optimized the shift amount, defer to normal isel to get
3655  // load folding and legacy vs BMI2 selection without repeating it here.
3656  SelectCode(N);
3657  return true;
3658 }
3659 
3660 bool X86DAGToDAGISel::tryShrinkShlLogicImm(SDNode *N) {
3661  MVT NVT = N->getSimpleValueType(0);
3662  unsigned Opcode = N->getOpcode();
3663  SDLoc dl(N);
3664 
3665  // For operations of the form (x << C1) op C2, check if we can use a smaller
3666  // encoding for C2 by transforming it into (x op (C2>>C1)) << C1.
3667  SDValue Shift = N->getOperand(0);
3668  SDValue N1 = N->getOperand(1);
3669 
3671  if (!Cst)
3672  return false;
3673 
3674  int64_t Val = Cst->getSExtValue();
3675 
3676  // If we have an any_extend feeding the AND, look through it to see if there
3677  // is a shift behind it. But only if the AND doesn't use the extended bits.
3678  // FIXME: Generalize this to other ANY_EXTEND than i32 to i64?
3679  bool FoundAnyExtend = false;
3680  if (Shift.getOpcode() == ISD::ANY_EXTEND && Shift.hasOneUse() &&
3681  Shift.getOperand(0).getSimpleValueType() == MVT::i32 &&
3682  isUInt<32>(Val)) {
3683  FoundAnyExtend = true;
3684  Shift = Shift.getOperand(0);
3685  }
3686 
3687  if (Shift.getOpcode() != ISD::SHL || !Shift.hasOneUse())
3688  return false;
3689 
3690  // i8 is unshrinkable, i16 should be promoted to i32.
3691  if (NVT != MVT::i32 && NVT != MVT::i64)
3692  return false;
3693 
3694  ConstantSDNode *ShlCst = dyn_cast<ConstantSDNode>(Shift.getOperand(1));
3695  if (!ShlCst)
3696  return false;
3697 
3698  uint64_t ShAmt = ShlCst->getZExtValue();
3699 
3700  // Make sure that we don't change the operation by removing bits.
3701  // This only matters for OR and XOR, AND is unaffected.
3702  uint64_t RemovedBitsMask = (1ULL << ShAmt) - 1;
3703  if (Opcode != ISD::AND && (Val & RemovedBitsMask) != 0)
3704  return false;
3705 
3706  // Check the minimum bitwidth for the new constant.
3707  // TODO: Using 16 and 8 bit operations is also possible for or32 & xor32.
3708  auto CanShrinkImmediate = [&](int64_t &ShiftedVal) {
3709  if (Opcode == ISD::AND) {
3710  // AND32ri is the same as AND64ri32 with zext imm.
3711  // Try this before sign extended immediates below.
3712  ShiftedVal = (uint64_t)Val >> ShAmt;
3713  if (NVT == MVT::i64 && !isUInt<32>(Val) && isUInt<32>(ShiftedVal))
3714  return true;
3715  // Also swap order when the AND can become MOVZX.
3716  if (ShiftedVal == UINT8_MAX || ShiftedVal == UINT16_MAX)
3717  return true;
3718  }
3719  ShiftedVal = Val >> ShAmt;
3720  if ((!isInt<8>(Val) && isInt<8>(ShiftedVal)) ||
3721  (!isInt<32>(Val) && isInt<32>(ShiftedVal)))
3722  return true;
3723  if (Opcode != ISD::AND) {
3724  // MOV32ri+OR64r/XOR64r is cheaper than MOV64ri64+OR64rr/XOR64rr
3725  ShiftedVal = (uint64_t)Val >> ShAmt;
3726  if (NVT == MVT::i64 && !isUInt<32>(Val) && isUInt<32>(ShiftedVal))
3727  return true;
3728  }
3729  return false;
3730  };
3731 
3732  int64_t ShiftedVal;
3733  if (!CanShrinkImmediate(ShiftedVal))
3734  return false;
3735 
3736  // Ok, we can reorder to get a smaller immediate.
3737 
3738  // But, its possible the original immediate allowed an AND to become MOVZX.
3739  // Doing this late due to avoid the MakedValueIsZero call as late as
3740  // possible.
3741  if (Opcode == ISD::AND) {
3742  // Find the smallest zext this could possibly be.
3743  unsigned ZExtWidth = Cst->getAPIntValue().getActiveBits();
3744  ZExtWidth = PowerOf2Ceil(std::max(ZExtWidth, 8U));
3745 
3746  // Figure out which bits need to be zero to achieve that mask.
3747  APInt NeededMask = APInt::getLowBitsSet(NVT.getSizeInBits(),
3748  ZExtWidth);
3749  NeededMask &= ~Cst->getAPIntValue();
3750 
3751  if (CurDAG->MaskedValueIsZero(N->getOperand(0), NeededMask))
3752  return false;
3753  }
3754 
3755  SDValue X = Shift.getOperand(0);
3756  if (FoundAnyExtend) {
3757  SDValue NewX = CurDAG->getNode(ISD::ANY_EXTEND, dl, NVT, X);
3758  insertDAGNode(*CurDAG, SDValue(N, 0), NewX);
3759  X = NewX;
3760  }
3761 
3762  SDValue NewCst = CurDAG->getConstant(ShiftedVal, dl, NVT);
3763  insertDAGNode(*CurDAG, SDValue(N, 0), NewCst);
3764  SDValue NewBinOp = CurDAG->getNode(Opcode, dl, NVT, X, NewCst);
3765  insertDAGNode(*CurDAG, SDValue(N, 0), NewBinOp);
3766  SDValue NewSHL = CurDAG->getNode(ISD::SHL, dl, NVT, NewBinOp,
3767  Shift.getOperand(1));
3768  ReplaceNode(N, NewSHL.getNode());
3769  SelectCode(NewSHL.getNode());
3770  return true;
3771 }
3772 
3773 /// If the high bits of an 'and' operand are known zero, try setting the
3774 /// high bits of an 'and' constant operand to produce a smaller encoding by
3775 /// creating a small, sign-extended negative immediate rather than a large
3776 /// positive one. This reverses a transform in SimplifyDemandedBits that
3777 /// shrinks mask constants by clearing bits. There is also a possibility that
3778 /// the 'and' mask can be made -1, so the 'and' itself is unnecessary. In that
3779 /// case, just replace the 'and'. Return 'true' if the node is replaced.
3780 bool X86DAGToDAGISel::shrinkAndImmediate(SDNode *And) {
3781  // i8 is unshrinkable, i16 should be promoted to i32, and vector ops don't
3782  // have immediate operands.
3783  MVT VT = And->getSimpleValueType(0);
3784  if (VT != MVT::i32 && VT != MVT::i64)
3785  return false;
3786 
3787  auto *And1C = dyn_cast<ConstantSDNode>(And->getOperand(1));
3788  if (!And1C)
3789  return false;
3790 
3791  // Bail out if the mask constant is already negative. It's can't shrink more.
3792  // If the upper 32 bits of a 64 bit mask are all zeros, we have special isel
3793  // patterns to use a 32-bit and instead of a 64-bit and by relying on the
3794  // implicit zeroing of 32 bit ops. So we should check if the lower 32 bits
3795  // are negative too.
3796  APInt MaskVal = And1C->getAPIntValue();
3797  unsigned MaskLZ = MaskVal.countLeadingZeros();
3798  if (!MaskLZ || (VT == MVT::i64 && MaskLZ == 32))
3799  return false;
3800 
3801  // Don't extend into the upper 32 bits of a 64 bit mask.
3802  if (VT == MVT::i64 && MaskLZ >= 32) {
3803  MaskLZ -= 32;
3804  MaskVal = MaskVal.trunc(32);
3805  }
3806 
3807  SDValue And0 = And->getOperand(0);
3808  APInt HighZeros = APInt::getHighBitsSet(MaskVal.getBitWidth(), MaskLZ);
3809  APInt NegMaskVal = MaskVal | HighZeros;
3810 
3811  // If a negative constant would not allow a smaller encoding, there's no need
3812  // to continue. Only change the constant when we know it's a win.
3813  unsigned MinWidth = NegMaskVal.getMinSignedBits();
3814  if (MinWidth > 32 || (MinWidth > 8 && MaskVal.getMinSignedBits() <= 32))
3815  return false;
3816 
3817  // Extend masks if we truncated above.
3818  if (VT == MVT::i64 && MaskVal.getBitWidth() < 64) {
3819  NegMaskVal = NegMaskVal.zext(64);
3820  HighZeros = HighZeros.zext(64);
3821  }
3822 
3823  // The variable operand must be all zeros in the top bits to allow using the
3824  // new, negative constant as the mask.
3825  if (!CurDAG->MaskedValueIsZero(And0, HighZeros))
3826  return false;
3827 
3828  // Check if the mask is -1. In that case, this is an unnecessary instruction
3829  // that escaped earlier analysis.
3830  if (NegMaskVal.isAllOnesValue()) {
3831  ReplaceNode(And, And0.getNode());
3832  return true;
3833  }
3834 
3835  // A negative mask allows a smaller encoding. Create a new 'and' node.
3836  SDValue NewMask = CurDAG->getConstant(NegMaskVal, SDLoc(And), VT);
3837  SDValue NewAnd = CurDAG->getNode(ISD::AND, SDLoc(And), VT, And0, NewMask);
3838  ReplaceNode(And, NewAnd.getNode());
3839  SelectCode(NewAnd.getNode());
3840  return true;
3841 }
3842 
3843 static unsigned getVPTESTMOpc(MVT TestVT, bool IsTestN, bool FoldedLoad,
3844  bool FoldedBCast, bool Masked) {
3845  if (Masked) {
3846  if (FoldedLoad) {
3847  switch (TestVT.SimpleTy) {
3848  default: llvm_unreachable("Unexpected VT!");
3849  case MVT::v16i8:
3850  return IsTestN ? X86::VPTESTNMBZ128rmk : X86::VPTESTMBZ128rmk;
3851  case MVT::v8i16:
3852  return IsTestN ? X86::VPTESTNMWZ128rmk : X86::VPTESTMWZ128rmk;
3853  case MVT::v4i32:
3854  return IsTestN ? X86::VPTESTNMDZ128rmk : X86::VPTESTMDZ128rmk;
3855  case MVT::v2i64:
3856  return IsTestN ? X86::VPTESTNMQZ128rmk : X86::VPTESTMQZ128rmk;
3857  case MVT::v32i8:
3858  return IsTestN ? X86::VPTESTNMBZ256rmk : X86::VPTESTMBZ256rmk;
3859  case MVT::v16i16:
3860  return IsTestN ? X86::VPTESTNMWZ256rmk : X86::VPTESTMWZ256rmk;
3861  case MVT::v8i32:
3862  return IsTestN ? X86::VPTESTNMDZ256rmk : X86::VPTESTMDZ256rmk;
3863  case MVT::v4i64:
3864  return IsTestN ? X86::VPTESTNMQZ256rmk : X86::VPTESTMQZ256rmk;
3865  case MVT::v64i8:
3866  return IsTestN ? X86::VPTESTNMBZrmk : X86::VPTESTMBZrmk;
3867  case MVT::v32i16:
3868  return IsTestN ? X86::VPTESTNMWZrmk : X86::VPTESTMWZrmk;
3869  case MVT::v16i32:
3870  return IsTestN ? X86::VPTESTNMDZrmk : X86::VPTESTMDZrmk;
3871  case MVT::v8i64:
3872  return IsTestN ? X86::VPTESTNMQZrmk : X86::VPTESTMQZrmk;
3873  }
3874  }
3875 
3876  if (FoldedBCast) {
3877  switch (TestVT.SimpleTy) {
3878  default: llvm_unreachable("Unexpected VT!");
3879  case MVT::v4i32:
3880  return IsTestN ? X86::VPTESTNMDZ128rmbk : X86::VPTESTMDZ128rmbk;
3881  case MVT::v2i64:
3882  return IsTestN ? X86::VPTESTNMQZ128rmbk : X86::VPTESTMQZ128rmbk;
3883  case MVT::v8i32:
3884  return IsTestN ? X86::VPTESTNMDZ256rmbk : X86::VPTESTMDZ256rmbk;
3885  case MVT::v4i64:
3886  return IsTestN ? X86::VPTESTNMQZ256rmbk : X86::VPTESTMQZ256rmbk;
3887  case MVT::v16i32:
3888  return IsTestN ? X86::VPTESTNMDZrmbk : X86::VPTESTMDZrmbk;
3889  case MVT::v8i64:
3890  return IsTestN ? X86::VPTESTNMQZrmbk : X86::VPTESTMQZrmbk;
3891  }
3892  }
3893 
3894  switch (TestVT.SimpleTy) {
3895  default: llvm_unreachable("Unexpected VT!");
3896  case MVT::v16i8:
3897  return IsTestN ? X86::VPTESTNMBZ128rrk : X86::VPTESTMBZ128rrk;
3898  case MVT::v8i16:
3899  return IsTestN ? X86::VPTESTNMWZ128rrk : X86::VPTESTMWZ128rrk;
3900  case MVT::v4i32:
3901  return IsTestN ? X86::VPTESTNMDZ128rrk : X86::VPTESTMDZ128rrk;
3902  case MVT::v2i64:
3903  return IsTestN ? X86::VPTESTNMQZ128rrk : X86::VPTESTMQZ128rrk;
3904  case MVT::v32i8:
3905  return IsTestN ? X86::VPTESTNMBZ256rrk : X86::VPTESTMBZ256rrk;
3906  case MVT::v16i16:
3907  return IsTestN ? X86::VPTESTNMWZ256rrk : X86::VPTESTMWZ256rrk;
3908  case MVT::v8i32:
3909  return IsTestN ? X86::VPTESTNMDZ256rrk : X86::VPTESTMDZ256rrk;
3910  case MVT::v4i64:
3911  return IsTestN ? X86::VPTESTNMQZ256rrk : X86::VPTESTMQZ256rrk;
3912  case MVT::v64i8:
3913  return IsTestN ? X86::VPTESTNMBZrrk : X86::VPTESTMBZrrk;
3914  case MVT::v32i16:
3915  return IsTestN ? X86::VPTESTNMWZrrk : X86::VPTESTMWZrrk;
3916  case MVT::v16i32:
3917  return IsTestN ? X86::VPTESTNMDZrrk : X86::VPTESTMDZrrk;
3918  case MVT::v8i64:
3919  return IsTestN ? X86::VPTESTNMQZrrk : X86::VPTESTMQZrrk;
3920  }
3921  }
3922 
3923  if (FoldedLoad) {
3924  switch (TestVT.SimpleTy) {
3925  default: llvm_unreachable("Unexpected VT!");
3926  case MVT::v16i8:
3927  return IsTestN ? X86::VPTESTNMBZ128rm : X86::VPTESTMBZ128rm;
3928  case MVT::v8i16:
3929  return IsTestN ? X86::VPTESTNMWZ128rm : X86::VPTESTMWZ128rm;
3930  case MVT::v4i32:
3931  return IsTestN ? X86::VPTESTNMDZ128rm : X86::VPTESTMDZ128rm;
3932  case MVT::v2i64:
3933  return IsTestN ? X86::VPTESTNMQZ128rm : X86::VPTESTMQZ128rm;
3934  case MVT::v32i8:
3935  return IsTestN ? X86::VPTESTNMBZ256rm : X86::VPTESTMBZ256rm;
3936  case MVT::v16i16:
3937  return IsTestN ? X86::VPTESTNMWZ256rm : X86::VPTESTMWZ256rm;
3938  case MVT::v8i32:
3939  return IsTestN ? X86::VPTESTNMDZ256rm : X86::VPTESTMDZ256rm;
3940  case MVT::v4i64:
3941  return IsTestN ? X86::VPTESTNMQZ256rm : X86::VPTESTMQZ256rm;
3942  case MVT::v64i8:
3943  return IsTestN ? X86::VPTESTNMBZrm : X86::VPTESTMBZrm;
3944  case MVT::v32i16:
3945  return IsTestN ? X86::VPTESTNMWZrm : X86::VPTESTMWZrm;
3946  case MVT::v16i32:
3947  return IsTestN ? X86::VPTESTNMDZrm : X86::VPTESTMDZrm;
3948  case MVT::v8i64:
3949  return IsTestN ? X86::VPTESTNMQZrm : X86::VPTESTMQZrm;
3950  }
3951  }
3952 
3953  if (FoldedBCast) {
3954  switch (TestVT.SimpleTy) {
3955  default: llvm_unreachable("Unexpected VT!");
3956  case MVT::v4i32:
3957  return IsTestN ? X86::VPTESTNMDZ128rmb : X86::VPTESTMDZ128rmb;
3958  case MVT::v2i64:
3959  return IsTestN ? X86::VPTESTNMQZ128rmb : X86::VPTESTMQZ128rmb;
3960  case MVT::v8i32:
3961  return IsTestN ? X86::VPTESTNMDZ256rmb : X86::VPTESTMDZ256rmb;
3962  case MVT::v4i64:
3963  return IsTestN ? X86::VPTESTNMQZ256rmb : X86::VPTESTMQZ256rmb;
3964  case MVT::v16i32:
3965  return IsTestN ? X86::VPTESTNMDZrmb : X86::VPTESTMDZrmb;
3966  case MVT::v8i64:
3967  return IsTestN ? X86::VPTESTNMQZrmb : X86::VPTESTMQZrmb;
3968  }
3969  }
3970 
3971  switch (TestVT.SimpleTy) {
3972  default: llvm_unreachable("Unexpected VT!");
3973  case MVT::v16i8:
3974  return IsTestN ? X86::VPTESTNMBZ128rr : X86::VPTESTMBZ128rr;
3975  case MVT::v8i16:
3976  return IsTestN ? X86::VPTESTNMWZ128rr : X86::VPTESTMWZ128rr;
3977  case MVT::v4i32:
3978  return IsTestN ? X86::VPTESTNMDZ128rr : X86::VPTESTMDZ128rr;
3979  case MVT::v2i64:
3980  return IsTestN ? X86::VPTESTNMQZ128rr : X86::VPTESTMQZ128rr;
3981  case MVT::v32i8:
3982  return IsTestN ? X86::VPTESTNMBZ256rr : X86::VPTESTMBZ256rr;
3983  case MVT::v16i16:
3984  return IsTestN ? X86::VPTESTNMWZ256rr : X86::VPTESTMWZ256rr;
3985  case MVT::v8i32:
3986  return IsTestN ? X86::VPTESTNMDZ256rr : X86::VPTESTMDZ256rr;
3987  case MVT::v4i64:
3988  return IsTestN ? X86::VPTESTNMQZ256rr : X86::VPTESTMQZ256rr;
3989  case MVT::v64i8:
3990  return IsTestN ? X86::VPTESTNMBZrr : X86::VPTESTMBZrr;
3991  case MVT::v32i16:
3992  return IsTestN ? X86::VPTESTNMWZrr : X86::VPTESTMWZrr;
3993  case MVT::v16i32:
3994  return IsTestN ? X86::VPTESTNMDZrr : X86::VPTESTMDZrr;
3995  case MVT::v8i64:
3996  return IsTestN ? X86::VPTESTNMQZrr : X86::VPTESTMQZrr;
3997  }
3998 }
3999 
4000 // Try to create VPTESTM instruction. If InMask is not null, it will be used
4001 // to form a masked operation.
4002 bool X86DAGToDAGISel::tryVPTESTM(SDNode *Root, SDValue Setcc,
4003  SDValue InMask) {
4004  assert(Subtarget->hasAVX512() && "Expected AVX512!");
4006  "Unexpected VT!");
4007 
4008  // Look for equal and not equal compares.
4009  ISD::CondCode CC = cast<CondCodeSDNode>(Setcc.getOperand(2))->get();
4010  if (CC != ISD::SETEQ && CC != ISD::SETNE)
4011  return false;
4012 
4013  // See if we're comparing against zero. This should have been canonicalized
4014  // to RHS during lowering.
4016  return false;
4017 
4018  SDValue N0 = Setcc.getOperand(0);
4019 
4020  MVT CmpVT = N0.getSimpleValueType();
4021  MVT CmpSVT = CmpVT.getVectorElementType();
4022 
4023  // Start with both operands the same. We'll try to refine this.
4024  SDValue Src0 = N0;
4025  SDValue Src1 = N0;
4026 
4027  {
4028  // Look through single use bitcasts.
4029  SDValue N0Temp = N0;
4030  if (N0Temp.getOpcode() == ISD::BITCAST && N0Temp.hasOneUse())
4031  N0Temp = N0.getOperand(0);
4032 
4033  // Look for single use AND.
4034  if (N0Temp.getOpcode() == ISD::AND && N0Temp.hasOneUse()) {
4035  Src0 = N0Temp.getOperand(0);
4036  Src1 = N0Temp.getOperand(1);
4037  }
4038  }
4039 
4040  // Without VLX we need to widen the load.
4041  bool Widen = !Subtarget->hasVLX() && !CmpVT.is512BitVector();
4042 
4043  // We can only fold loads if the sources are unique.
4044  bool CanFoldLoads = Src0 != Src1;
4045 
4046  // Try to fold loads unless we need to widen.
4047  bool FoldedLoad = false;
4048  SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Load;
4049  if (!Widen && CanFoldLoads) {
4050  Load = Src1;
4051  FoldedLoad = tryFoldLoad(Root, N0.getNode(), Load, Tmp0, Tmp1, Tmp2, Tmp3,
4052  Tmp4);
4053  if (!FoldedLoad) {
4054  // And is computative.
4055  Load = Src0;
4056  FoldedLoad = tryFoldLoad(Root, N0.getNode(), Load, Tmp0, Tmp1, Tmp2,
4057  Tmp3, Tmp4);
4058  if (FoldedLoad)
4059  std::swap(Src0, Src1);
4060  }
4061  }
4062 
4063  auto findBroadcastedOp = [](SDValue Src, MVT CmpSVT, SDNode *&Parent) {
4064  // Look through single use bitcasts.
4065  if (Src.getOpcode() == ISD::BITCAST && Src.hasOneUse())
4066  Src = Src.getOperand(0);
4067 
4068  if (Src.getOpcode() == X86ISD::VBROADCAST && Src.hasOneUse()) {
4069  Parent = Src.getNode();
4070  Src = Src.getOperand(0);
4071  if (Src.getSimpleValueType() == CmpSVT)
4072  return Src;
4073  }
4074 
4075  return SDValue();
4076  };
4077 
4078  // If we didn't fold a load, try to match broadcast. No widening limitation
4079  // for this. But only 32 and 64 bit types are supported.
4080  bool FoldedBCast = false;
4081  if (!FoldedLoad && CanFoldLoads &&
4082  (CmpSVT == MVT::i32 || CmpSVT == MVT::i64)) {
4083  SDNode *ParentNode = nullptr;
4084  if ((Load = findBroadcastedOp(Src1, CmpSVT, ParentNode))) {
4085  FoldedBCast = tryFoldLoad(Root, ParentNode, Load, Tmp0,
4086  Tmp1, Tmp2, Tmp3, Tmp4);
4087  }
4088 
4089  // Try the other operand.
4090  if (!FoldedBCast) {
4091  if ((Load = findBroadcastedOp(Src0, CmpSVT, ParentNode))) {
4092  FoldedBCast = tryFoldLoad(Root, ParentNode, Load, Tmp0,
4093  Tmp1, Tmp2, Tmp3, Tmp4);
4094  if (FoldedBCast)
4095  std::swap(Src0, Src1);
4096  }
4097  }
4098  }
4099 
4100  auto getMaskRC = [](MVT MaskVT) {
4101  switch (MaskVT.SimpleTy) {
4102  default: llvm_unreachable("Unexpected VT!");
4103  case MVT::v2i1: return X86::VK2RegClassID;
4104  case MVT::v4i1: return X86::VK4RegClassID;
4105  case MVT::v8i1: return X86::VK8RegClassID;
4106  case MVT::v16i1: return X86::VK16RegClassID;
4107  case MVT::v32i1: return X86::VK32RegClassID;
4108  case MVT::v64i1: return X86::VK64RegClassID;
4109  }
4110  };
4111 
4112  bool IsMasked = InMask.getNode() != nullptr;
4113 
4114  SDLoc dl(Root);
4115 
4116  MVT ResVT = Setcc.getSimpleValueType();
4117  MVT MaskVT = ResVT;
4118  if (Widen) {
4119  // Widen the inputs using insert_subreg or copy_to_regclass.
4120  unsigned Scale = CmpVT.is128BitVector() ? 4 : 2;
4121  unsigned SubReg = CmpVT.is128BitVector() ? X86::sub_xmm : X86::sub_ymm;
4122  unsigned NumElts = CmpVT.getVectorNumElements() * Scale;
4123  CmpVT = MVT::getVectorVT(CmpSVT, NumElts);
4124  MaskVT = MVT::getVectorVT(MVT::i1, NumElts);
4125  SDValue ImplDef = SDValue(CurDAG->getMachineNode(X86::IMPLICIT_DEF, dl,
4126  CmpVT), 0);
4127  Src0 = CurDAG->getTargetInsertSubreg(SubReg, dl, CmpVT, ImplDef, Src0);
4128 
4129  assert(!FoldedLoad && "Shouldn't have folded the load");
4130  if (!FoldedBCast)
4131  Src1 = CurDAG->getTargetInsertSubreg(SubReg, dl, CmpVT, ImplDef, Src1);
4132 
4133  if (IsMasked) {
4134  // Widen the mask.
4135  unsigned RegClass = getMaskRC(MaskVT);
4136  SDValue RC = CurDAG->getTargetConstant(RegClass, dl, MVT::i32);
4137  InMask = SDValue(CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS,
4138  dl, MaskVT, InMask, RC), 0);
4139  }
4140  }
4141 
4142  bool IsTestN = CC == ISD::SETEQ;
4143  unsigned Opc = getVPTESTMOpc(CmpVT, IsTestN, FoldedLoad, FoldedBCast,
4144  IsMasked);
4145 
4146  MachineSDNode *CNode;
4147  if (FoldedLoad || FoldedBCast) {
4148  SDVTList VTs = CurDAG->getVTList(MaskVT, MVT::Other);
4149 
4150  if (IsMasked) {
4151  SDValue Ops[] = { InMask, Src0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4,
4152  Load.getOperand(0) };
4153  CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops);
4154  } else {
4155  SDValue Ops[] = { Src0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4,
4156  Load.getOperand(0) };
4157  CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops);
4158  }
4159 
4160  // Update the chain.
4161  ReplaceUses(Load.getValue(1), SDValue(CNode, 1));
4162  // Record the mem-refs
4163  CurDAG->setNodeMemRefs(CNode, {cast<LoadSDNode>(Load)->getMemOperand()});
4164  } else {
4165  if (IsMasked)
4166  CNode = CurDAG->getMachineNode(Opc, dl, MaskVT, InMask, Src0, Src1);
4167  else
4168  CNode = CurDAG->getMachineNode(Opc, dl, MaskVT, Src0, Src1);
4169  }
4170 
4171  // If we widened, we need to shrink the mask VT.
4172  if (Widen) {
4173  unsigned RegClass = getMaskRC(ResVT);
4174  SDValue RC = CurDAG->getTargetConstant(RegClass, dl, MVT::i32);
4175  CNode = CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS,
4176  dl, ResVT, SDValue(CNode, 0), RC);
4177  }
4178 
4179  ReplaceUses(SDValue(Root, 0), SDValue(CNode, 0));
4180  CurDAG->RemoveDeadNode(Root);
4181  return true;
4182 }
4183 
4184 void X86DAGToDAGISel::Select(SDNode *Node) {
4185  MVT NVT = Node->getSimpleValueType(0);
4186  unsigned Opcode = Node->getOpcode();
4187  SDLoc dl(Node);
4188 
4189  if (Node->isMachineOpcode()) {
4190  LLVM_DEBUG(dbgs() << "== "; Node->dump(CurDAG); dbgs() << '\n');
4191  Node->setNodeId(-1);
4192  return; // Already selected.
4193  }
4194 
4195  switch (Opcode) {
4196  default: break;
4197  case ISD::INTRINSIC_VOID: {
4198  unsigned IntNo = Node->getConstantOperandVal(1);
4199  switch (IntNo) {
4200  default: break;
4201  case Intrinsic::x86_sse3_monitor:
4202  case Intrinsic::x86_monitorx:
4203  case Intrinsic::x86_clzero: {
4204  bool Use64BitPtr = Node->getOperand(2).getValueType() == MVT::i64;
4205 
4206  unsigned Opc = 0;
4207  switch (IntNo) {
4208  default: llvm_unreachable("Unexpected intrinsic!");
4209  case Intrinsic::x86_sse3_monitor:
4210  if (!Subtarget->hasSSE3())
4211  break;
4212  Opc = Use64BitPtr ? X86::MONITOR64rrr : X86::MONITOR32rrr;
4213  break;
4214  case Intrinsic::x86_monitorx:
4215  if (!Subtarget->hasMWAITX())
4216  break;
4217  Opc = Use64BitPtr ? X86::MONITORX64rrr : X86::MONITORX32rrr;
4218  break;
4219  case Intrinsic::x86_clzero:
4220  if (!Subtarget->hasCLZERO())
4221  break;
4222  Opc = Use64BitPtr ? X86::CLZERO64r : X86::CLZERO32r;
4223  break;
4224  }
4225 
4226  if (Opc) {
4227  unsigned PtrReg = Use64BitPtr ? X86::RAX : X86::EAX;
4228  SDValue Chain = CurDAG->getCopyToReg(Node->getOperand(0), dl, PtrReg,
4229  Node->getOperand(2), SDValue());
4230  SDValue InFlag = Chain.getValue(1);
4231 
4232  if (IntNo == Intrinsic::x86_sse3_monitor ||
4233  IntNo == Intrinsic::x86_monitorx) {
4234  // Copy the other two operands to ECX and EDX.
4235  Chain = CurDAG->getCopyToReg(Chain, dl, X86::ECX, Node->getOperand(3),
4236  InFlag);
4237  InFlag = Chain.getValue(1);
4238  Chain = CurDAG->getCopyToReg(Chain, dl, X86::EDX, Node->getOperand(4),
4239  InFlag);
4240  InFlag = Chain.getValue(1);
4241  }
4242 
4243  MachineSDNode *CNode = CurDAG->getMachineNode(Opc, dl, MVT::Other,
4244  { Chain, InFlag});
4245  ReplaceNode(Node, CNode);
4246  return;
4247  }
4248  }
4249  }
4250 
4251  break;
4252  }
4253  case ISD::BRIND: {
4254  if (Subtarget->isTargetNaCl())
4255  // NaCl has its own pass where jmp %r32 are converted to jmp %r64. We
4256  // leave the instruction alone.
4257  break;
4258  if (Subtarget->isTarget64BitILP32()) {
4259  // Converts a 32-bit register to a 64-bit, zero-extended version of
4260  // it. This is needed because x86-64 can do many things, but jmp %r32
4261  // ain't one of them.
4262  const SDValue &Target = Node->getOperand(1);
4264  SDValue ZextTarget = CurDAG->getZExtOrTrunc(Target, dl, EVT(MVT::i64));
4265  SDValue Brind = CurDAG->getNode(ISD::BRIND, dl, MVT::Other,
4266  Node->getOperand(0), ZextTarget);
4267  ReplaceNode(Node, Brind.getNode());
4268  SelectCode(ZextTarget.getNode());
4269  SelectCode(Brind.getNode());
4270  return;
4271  }
4272  break;
4273  }
4274  case X86ISD::GlobalBaseReg:
4275  ReplaceNode(Node, getGlobalBaseReg());
4276  return;
4277 
4278  case ISD::BITCAST:
4279  // Just drop all 128/256/512-bit bitcasts.
4280  if (NVT.is512BitVector() || NVT.is256BitVector() || NVT.is128BitVector() ||
4281  NVT == MVT::f128) {
4282  ReplaceUses(SDValue(Node, 0), Node->getOperand(0));
4283  CurDAG->RemoveDeadNode(Node);
4284  return;
4285  }
4286  break;
4287 
4288  case ISD::VSELECT: {
4289  // Replace VSELECT with non-mask conditions with with BLENDV.
4291  break;
4292 
4293  assert(Subtarget->hasSSE41() && "Expected SSE4.1 support!");
4294  SDValue Blendv = CurDAG->getNode(
4295  X86ISD::BLENDV, SDLoc(Node), Node->getValueType(0), Node->getOperand(0),
4296  Node->getOperand(1), Node->getOperand(2));
4297  ReplaceNode(Node, Blendv.getNode());
4298  SelectCode(Blendv.getNode());
4299  // We already called ReplaceUses.
4300  return;
4301  }
4302 
4303  case ISD::SRL:
4304  if (matchBitExtract(Node))
4305  return;
4307  case ISD::SRA:
4308  case ISD::SHL:
4309  if (tryShiftAmountMod(Node))
4310  return;
4311  break;
4312 
4313  case ISD::AND:
4314  if (NVT.isVector() && NVT.getVectorElementType() == MVT::i1) {
4315  // Try to form a masked VPTESTM. Operands can be in either order.
4316  SDValue N0 = Node->getOperand(0);
4317  SDValue N1 = Node->getOperand(1);
4318  if (N0.getOpcode() == ISD::SETCC && N0.hasOneUse() &&
4319  tryVPTESTM(Node, N0, N1))
4320  return;
4321  if (N1.getOpcode() == ISD::SETCC && N1.hasOneUse() &&
4322  tryVPTESTM(Node, N1, N0))
4323  return;
4324  }
4325 
4326  if (MachineSDNode *NewNode = matchBEXTRFromAndImm(Node)) {
4327  ReplaceUses(SDValue(Node, 0), SDValue(NewNode, 0));
4328  CurDAG->RemoveDeadNode(Node);
4329  return;
4330  }
4331  if (matchBitExtract(Node))
4332  return;
4333  if (AndImmShrink && shrinkAndImmediate(Node))
4334  return;
4335 
4337  case ISD::OR:
4338  case ISD::XOR:
4339  if (tryShrinkShlLogicImm(Node))
4340  return;
4341 
4343  case ISD::ADD:
4344  case ISD::SUB: {
4345  // Try to avoid folding immediates with multiple uses for optsize.
4346  // This code tries to select to register form directly to avoid going
4347  // through the isel table which might fold the immediate. We can't change
4348  // the patterns on the add/sub/and/or/xor with immediate paterns in the
4349  // tablegen files to check immediate use count without making the patterns
4350  // unavailable to the fast-isel table.
4351  if (!OptForSize)
4352  break;
4353 
4354  // Only handle i8/i16/i32/i64.
4355  if (NVT != MVT::i8 && NVT != MVT::i16 && NVT != MVT::i32 && NVT != MVT::i64)
4356  break;
4357 
4358  SDValue N0 = Node->getOperand(0);
4359  SDValue N1 = Node->getOperand(1);
4360 
4362  if (!Cst)
4363  break;
4364 
4365  int64_t Val = Cst->getSExtValue();
4366 
4367  // Make sure its an immediate that is considered foldable.
4368  // FIXME: Handle unsigned 32 bit immediates for 64-bit AND.
4369  if (!isInt<8>(Val) && !isInt<32>(Val))
4370  break;
4371 
4372  // Check if we should avoid folding this immediate.
4373  if (!shouldAvoidImmediateInstFormsForSize(N1.getNode()))
4374  break;
4375 
4376  // We should not fold the immediate. So we need a register form instead.
4377  unsigned ROpc, MOpc;
4378  switch (NVT.SimpleTy) {
4379  default: llvm_unreachable("Unexpected VT!");
4380  case MVT::i8:
4381  switch (Opcode) {
4382  default: llvm_unreachable("Unexpected opcode!");
4383  case ISD::ADD: ROpc = X86::ADD8rr; MOpc = X86::ADD8rm; break;
4384  case ISD::SUB: ROpc = X86::SUB8rr; MOpc = X86::SUB8rm; break;
4385  case ISD::AND: ROpc = X86::AND8rr; MOpc = X86::AND8rm; break;
4386  case ISD::OR: ROpc = X86::OR8rr; MOpc = X86::OR8rm; break;
4387  case ISD::XOR: ROpc = X86::XOR8rr; MOpc = X86::XOR8rm; break;
4388  }
4389  break;
4390  case MVT::i16:
4391  switch (Opcode) {
4392  default: llvm_unreachable("Unexpected opcode!");
4393  case ISD::ADD: ROpc = X86::ADD16rr; MOpc = X86::ADD16rm; break;
4394  case ISD::SUB: ROpc = X86::SUB16rr; MOpc = X86::SUB16rm; break;
4395  case ISD::AND: ROpc = X86::AND16rr; MOpc = X86::AND16rm; break;
4396  case ISD::OR: ROpc = X86::OR16rr; MOpc = X86::OR16rm; break;
4397  case ISD::XOR: ROpc = X86::XOR16rr; MOpc = X86::XOR16rm; break;
4398  }
4399  break;
4400  case MVT::i32:
4401  switch (Opcode) {
4402  default: llvm_unreachable("Unexpected opcode!");
4403  case ISD::ADD: ROpc = X86::ADD32rr; MOpc = X86::ADD32rm; break;
4404  case ISD::SUB: ROpc = X86::SUB32rr; MOpc = X86::SUB32rm; break;
4405  case ISD::AND: ROpc = X86::AND32rr; MOpc = X86::AND32rm; break;
4406  case ISD::OR: ROpc = X86::OR32rr; MOpc = X86::OR32rm; break;
4407  case ISD::XOR: ROpc = X86::XOR32rr; MOpc = X86::XOR32rm; break;
4408  }
4409  break;
4410  case MVT::i64:
4411  switch (Opcode) {
4412  default: llvm_unreachable("Unexpected opcode!");
4413  case ISD::ADD: ROpc = X86::ADD64rr; MOpc = X86::ADD64rm; break;
4414  case ISD::SUB: ROpc = X86::SUB64rr; MOpc = X86::SUB64rm; break;
4415  case ISD::AND: ROpc = X86::AND64rr; MOpc = X86::AND64rm; break;
4416  case ISD::OR: ROpc = X86::OR64rr; MOpc = X86::OR64rm; break;
4417  case ISD::XOR: ROpc = X86::XOR64rr; MOpc = X86::XOR64rm; break;
4418  }
4419  break;
4420  }
4421 
4422  // Ok this is a AND/OR/XOR/ADD/SUB with constant.
4423 
4424  // If this is a not a subtract, we can still try to fold a load.
4425  if (Opcode != ISD::SUB) {
4426  SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
4427  if (tryFoldLoad(Node, N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) {
4428  SDValue Ops[] = { N1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N0.getOperand(0) };
4429  SDVTList VTs = CurDAG->getVTList(NVT, MVT::i32, MVT::Other);
4430  MachineSDNode *CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops);
4431  // Update the chain.
4432  ReplaceUses(N0.getValue(1), SDValue(CNode, 2));
4433  // Record the mem-refs
4434  CurDAG->setNodeMemRefs(CNode, {cast<LoadSDNode>(N0)->getMemOperand()});
4435  ReplaceUses(SDValue(Node, 0), SDValue(CNode, 0));
4436  CurDAG->RemoveDeadNode(Node);
4437  return;
4438  }
4439  }
4440 
4441  CurDAG->SelectNodeTo(Node, ROpc, NVT, MVT::i32, N0, N1);
4442  return;
4443  }
4444 
4445  case X86ISD::SMUL:
4446  // i16/i32/i64 are handled with isel patterns.
4447  if (NVT != MVT::i8)
4448  break;
4450  case X86ISD::UMUL: {
4451  SDValue N0 = Node->getOperand(0);
4452  SDValue N1 = Node->getOperand(1);
4453 
4454  unsigned LoReg, ROpc, MOpc;
4455  switch (NVT.SimpleTy) {
4456  default: llvm_unreachable("Unsupported VT!");
4457  case MVT::i8:
4458  LoReg = X86::AL;
4459  ROpc = Opcode == X86ISD::SMUL ? X86::IMUL8r : X86::MUL8r;
4460  MOpc = Opcode == X86ISD::SMUL ? X86::IMUL8m : X86::MUL8m;
4461  break;
4462  case MVT::i16:
4463  LoReg = X86::AX;
4464  ROpc = X86::MUL16r;
4465  MOpc = X86::MUL16m;
4466  break;
4467  case MVT::i32:
4468  LoReg = X86::EAX;
4469  ROpc = X86::MUL32r;
4470  MOpc = X86::MUL32m;
4471  break;
4472  case MVT::i64:
4473  LoReg = X86::RAX;
4474  ROpc = X86::MUL64r;
4475  MOpc = X86::MUL64m;
4476  break;
4477  }
4478 
4479  SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
4480  bool FoldedLoad = tryFoldLoad(Node, N1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4);
4481  // Multiply is commmutative.
4482  if (!FoldedLoad) {
4483  FoldedLoad = tryFoldLoad(Node, N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4);
4484  if (FoldedLoad)
4485  std::swap(N0, N1);
4486  }
4487 
4488  SDValue InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, LoReg,
4489  N0, SDValue()).getValue(1);
4490 
4491  MachineSDNode *CNode;
4492  if (FoldedLoad) {
4493  // i16/i32/i64 use an instruction that produces a low and high result even
4494  // though only the low result is used.
4495  SDVTList VTs;
4496  if (NVT == MVT::i8)
4497  VTs = CurDAG->getVTList(NVT, MVT::i32, MVT::Other);
4498  else
4499  VTs = CurDAG->getVTList(NVT, NVT, MVT::i32, MVT::Other);
4500 
4501  SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N1.getOperand(0),
4502  InFlag };
4503  CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops);
4504 
4505  // Update the chain.
4506  ReplaceUses(N1.getValue(1), SDValue(CNode, NVT == MVT::i8 ? 2 : 3));
4507  // Record the mem-refs
4508  CurDAG->setNodeMemRefs(CNode, {cast<LoadSDNode>(N1)->getMemOperand()});
4509  } else {
4510  // i16/i32/i64 use an instruction that produces a low and high result even
4511  // though only the low result is used.
4512  SDVTList VTs;
4513  if (NVT == MVT::i8)
4514  VTs = CurDAG->getVTList(NVT, MVT::i32);
4515  else
4516  VTs = CurDAG->getVTList(NVT, NVT, MVT::i32);
4517 
4518  CNode = CurDAG->getMachineNode(ROpc, dl, VTs, {N1, InFlag});
4519  }
4520 
4521  ReplaceUses(SDValue(Node, 0), SDValue(CNode, 0));
4522  ReplaceUses(SDValue(Node, 1), SDValue(CNode, NVT == MVT::i8 ? 1 : 2));
4523  CurDAG->RemoveDeadNode(Node);
4524  return;
4525  }
4526 
4527  case ISD::SMUL_LOHI:
4528  case ISD::UMUL_LOHI: {
4529  SDValue N0 = Node->getOperand(0);
4530  SDValue N1 = Node->getOperand(1);
4531 
4532  unsigned Opc, MOpc;
4533  bool isSigned = Opcode == ISD::SMUL_LOHI;
4534  if (!isSigned) {
4535  switch (NVT.SimpleTy) {
4536  default: llvm_unreachable("Unsupported VT!");
4537  case MVT::i32: Opc = X86::MUL32r; MOpc = X86::MUL32m; break;
4538  case MVT::i64: Opc = X86::MUL64r; MOpc = X86::MUL64m; break;
4539  }
4540  } else {
4541  switch (NVT.SimpleTy) {
4542  default: llvm_unreachable("Unsupported VT!");
4543  case MVT::i32: Opc = X86::IMUL32r; MOpc = X86::IMUL32m; break;
4544  case MVT::i64: Opc = X86::IMUL64r; MOpc = X86::IMUL64m; break;
4545  }
4546  }
4547 
4548  unsigned SrcReg, LoReg, HiReg;
4549  switch (Opc) {
4550  default: llvm_unreachable("Unknown MUL opcode!");
4551  case X86::IMUL32r:
4552  case X86::MUL32r:
4553  SrcReg = LoReg = X86::EAX; HiReg = X86::EDX;
4554  break;
4555  case X86::IMUL64r:
4556  case X86::MUL64r:
4557  SrcReg = LoReg = X86::RAX; HiReg = X86::RDX;
4558  break;
4559  }
4560 
4561  SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
4562  bool foldedLoad = tryFoldLoad(Node, N1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4);
4563  // Multiply is commmutative.
4564  if (!foldedLoad) {
4565  foldedLoad = tryFoldLoad(Node, N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4);
4566  if (foldedLoad)
4567  std::swap(N0, N1);
4568  }
4569 
4570  SDValue InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, SrcReg,
4571  N0, SDValue()).getValue(1);
4572  if (foldedLoad) {
4573  SDValue Chain;
4574  MachineSDNode *CNode = nullptr;
4575  SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N1.getOperand(0),
4576  InFlag };
4577  SDVTList VTs = CurDAG->getVTList(MVT::Other, MVT::Glue);
4578  CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops);
4579  Chain = SDValue(CNode, 0);
4580  InFlag = SDValue(CNode, 1);
4581 
4582  // Update the chain.
4583  ReplaceUses(N1.getValue(1), Chain);
4584  // Record the mem-refs
4585  CurDAG->setNodeMemRefs(CNode, {cast<LoadSDNode>(N1)->getMemOperand()});
4586  } else {
4587  SDValue Ops[] = { N1, InFlag };
4588  SDVTList VTs = CurDAG->getVTList(MVT::Glue);
4589  SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops);
4590  InFlag = SDValue(CNode, 0);
4591  }
4592 
4593  // Copy the low half of the result, if it is needed.
4594  if (!SDValue(Node, 0).use_empty()) {
4595  assert(LoReg && "Register for low half is not defined!");
4596  SDValue ResLo = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, LoReg,
4597  NVT, InFlag);
4598  InFlag = ResLo.getValue(2);
4599  ReplaceUses(SDValue(Node, 0), ResLo);
4600  LLVM_DEBUG(dbgs() << "=> "; ResLo.getNode()->dump(CurDAG);
4601  dbgs() << '\n');
4602  }
4603  // Copy the high half of the result, if it is needed.
4604  if (!SDValue(Node, 1).use_empty()) {
4605  assert(HiReg && "Register for high half is not defined!");
4606  SDValue ResHi = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, HiReg,
4607  NVT, InFlag);
4608  InFlag = ResHi.getValue(2);
4609  ReplaceUses(SDValue(Node, 1), ResHi);
4610  LLVM_DEBUG(dbgs() << "=> "; ResHi.getNode()->dump(CurDAG);
4611  dbgs() << '\n');
4612  }
4613 
4614  CurDAG->RemoveDeadNode(Node);
4615  return;
4616  }
4617 
4618  case ISD::SDIVREM:
4619  case ISD::UDIVREM: {
4620  SDValue N0 = Node->getOperand(0);
4621  SDValue N1 = Node->getOperand(1);
4622 
4623  unsigned Opc, MOpc;
4624  bool isSigned = Opcode == ISD::SDIVREM;
4625  if (!isSigned) {
4626  switch (NVT.SimpleTy) {
4627  default: llvm_unreachable("Unsupported VT!");
4628  case MVT::i8: Opc = X86::DIV8r; MOpc = X86::DIV8m; break;
4629  case MVT::i16: Opc = X86::DIV16r; MOpc = X86::DIV16m; break;
4630  case MVT::i32: Opc = X86::DIV32r; MOpc = X86::DIV32m; break;
4631  case MVT::i64: Opc = X86::DIV64r; MOpc = X86::DIV64m; break;
4632  }
4633  } else {
4634  switch (NVT.SimpleTy) {
4635  default: llvm_unreachable("Unsupported VT!");
4636  case MVT::i8: Opc = X86::IDIV8r; MOpc = X86::IDIV8m; break;
4637  case MVT::i16: Opc = X86::IDIV16r; MOpc = X86::IDIV16m; break;
4638  case MVT::i32: Opc = X86::IDIV32r; MOpc = X86::IDIV32m; break;
4639  case MVT::i64: Opc = X86::IDIV64r; MOpc = X86::IDIV64m; break;
4640  }
4641  }
4642 
4643  unsigned LoReg, HiReg, ClrReg;
4644  unsigned SExtOpcode;
4645  switch (NVT.SimpleTy) {
4646  default: llvm_unreachable("Unsupported VT!");
4647  case MVT::i8:
4648  LoReg = X86::AL; ClrReg = HiReg = X86::AH;
4649  SExtOpcode = X86::CBW;
4650  break;
4651  case MVT::i16:
4652  LoReg = X86::AX; HiReg = X86::DX;
4653  ClrReg = X86::DX;
4654  SExtOpcode = X86::CWD;
4655  break;
4656  case MVT::i32:
4657  LoReg = X86::EAX; ClrReg = HiReg = X86::EDX;
4658  SExtOpcode = X86::CDQ;
4659  break;
4660  case MVT::i64:
4661  LoReg = X86::RAX; ClrReg = HiReg = X86::RDX;
4662  SExtOpcode = X86::CQO;
4663  break;
4664  }
4665 
4666  SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
4667  bool foldedLoad = tryFoldLoad(Node, N1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4);
4668  bool signBitIsZero = CurDAG->SignBitIsZero(N0);
4669 
4670  SDValue InFlag;
4671  if (NVT == MVT::i8 && (!isSigned || signBitIsZero)) {
4672  // Special case for div8, just use a move with zero extension to AX to
4673  // clear the upper 8 bits (AH).
4674  SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Chain;
4675  MachineSDNode *Move;
4676  if (tryFoldLoad(Node, N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) {
4677  SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N0.getOperand(0) };
4678  Move = CurDAG->getMachineNode(X86::MOVZX32rm8, dl, MVT::i32,
4679  MVT::Other, Ops);
4680  Chain = SDValue(Move, 1);
4681  ReplaceUses(N0.getValue(1), Chain);
4682  // Record the mem-refs
4683  CurDAG->setNodeMemRefs(Move, {cast<LoadSDNode>(N0)->getMemOperand()});
4684  } else {
4685  Move = CurDAG->getMachineNode(X86::MOVZX32rr8, dl, MVT::i32, N0);
4686  Chain = CurDAG->getEntryNode();
4687  }
4688  Chain = CurDAG->getCopyToReg(Chain, dl, X86::EAX, SDValue(Move, 0),
4689  SDValue());
4690  InFlag = Chain.getValue(1);
4691  } else {
4692  InFlag =
4693  CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl,
4694  LoReg, N0, SDValue()).getValue(1);
4695  if (isSigned && !signBitIsZero) {
4696  // Sign extend the low part into the high part.
4697  InFlag =
4698  SDValue(CurDAG->getMachineNode(SExtOpcode, dl, MVT::Glue, InFlag),0);
4699  } else {
4700  // Zero out the high part, effectively zero extending the input.
4701  SDValue ClrNode = SDValue(CurDAG->getMachineNode(X86::MOV32r0, dl, NVT), 0);
4702  switch (NVT.SimpleTy) {
4703  case MVT::i16:
4704  ClrNode =
4705  SDValue(CurDAG->getMachineNode(
4706  TargetOpcode::EXTRACT_SUBREG, dl, MVT::i16, ClrNode,
4707  CurDAG->getTargetConstant(X86::sub_16bit, dl,
4708  MVT::i32)),
4709  0);
4710  break;
4711  case MVT::i32:
4712  break;
4713  case MVT::i64:
4714  ClrNode =
4715  SDValue(CurDAG->getMachineNode(
4716  TargetOpcode::SUBREG_TO_REG, dl, MVT::i64,
4717  CurDAG->getTargetConstant(0, dl, MVT::i64), ClrNode,
4718  CurDAG->getTargetConstant(X86::sub_32bit, dl,
4719  MVT::i32)),
4720  0);
4721  break;
4722  default:
4723  llvm_unreachable("Unexpected division source");
4724  }
4725 
4726  InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, ClrReg,
4727  ClrNode, InFlag).getValue(1);
4728  }
4729  }
4730 
4731  if (foldedLoad) {
4732  SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N1.getOperand(0),
4733  InFlag };
4734  MachineSDNode *CNode =
4735  CurDAG->getMachineNode(MOpc, dl, MVT::Other, MVT::Glue, Ops);
4736  InFlag = SDValue(CNode, 1);
4737  // Update the chain.
4738  ReplaceUses(N1.getValue(1), SDValue(CNode, 0));
4739  // Record the mem-refs
4740  CurDAG->setNodeMemRefs(CNode, {cast<LoadSDNode>(N1)->getMemOperand()});
4741  } else {
4742  InFlag =
4743  SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Glue, N1, InFlag), 0);
4744  }
4745 
4746  // Prevent use of AH in a REX instruction by explicitly copying it to
4747  // an ABCD_L register.
4748  //
4749  // The current assumption of the register allocator is that isel
4750  // won't generate explicit references to the GR8_ABCD_H registers. If
4751  // the allocator and/or the backend get enhanced to be more robust in
4752  // that regard, this can be, and should be, removed.
4753  if (HiReg == X86::AH && !SDValue(Node, 1).use_empty()) {
4754  SDValue AHCopy = CurDAG->getRegister(X86::AH, MVT::i8);
4755  unsigned AHExtOpcode =
4756  isSigned ? X86::MOVSX32rr8_NOREX : X86::MOVZX32rr8_NOREX;
4757 
4758  SDNode *RNode = CurDAG->getMachineNode(AHExtOpcode, dl, MVT::i32,
4759  MVT::Glue, AHCopy, InFlag);
4760  SDValue Result(RNode, 0);
4761  InFlag = SDValue(RNode, 1);
4762 
4763  Result =
4764  CurDAG->getTargetExtractSubreg(X86::sub_8bit, dl, MVT::i8, Result);
4765 
4766  ReplaceUses(SDValue(Node, 1), Result);
4767  LLVM_DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG);
4768  dbgs() << '\n');
4769  }
4770  // Copy the division (low) result, if it is needed.
4771  if (!SDValue(Node, 0).use_empty()) {
4772  SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
4773  LoReg, NVT, InFlag);
4774  InFlag = Result.getValue(2);
4775  ReplaceUses(SDValue(Node, 0), Result);
4776  LLVM_DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG);
4777  dbgs() << '\n');
4778  }
4779  // Copy the remainder (high) result, if it is needed.
4780  if (!SDValue(Node, 1).use_empty()) {
4781  SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
4782  HiReg, NVT, InFlag);
4783  InFlag = Result.getValue(2);
4784  ReplaceUses(SDValue(Node, 1), Result);
4785  LLVM_DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG);
4786  dbgs() << '\n');
4787  }
4788  CurDAG->RemoveDeadNode(Node);
4789  return;
4790  }
4791 
4792  case X86ISD::CMP: {
4793  SDValue N0 = Node->getOperand(0);
4794  SDValue N1 = Node->getOperand(1);
4795 
4796  // Optimizations for TEST compares.
4797  if (!isNullConstant(N1))
4798  break;
4799 
4800  // Save the original VT of the compare.
4801  MVT CmpVT = N0.getSimpleValueType();
4802 
4803  // If we are comparing (and (shr X, C, Mask) with 0, emit a BEXTR followed
4804  // by a test instruction. The test should be removed later by
4805  // analyzeCompare if we are using only the zero flag.
4806  // TODO: Should we check the users and use the BEXTR flags directly?
4807  if (N0.getOpcode() == ISD::AND && N0.hasOneUse()) {
4808  if (MachineSDNode *NewNode = matchBEXTRFromAndImm(N0.getNode())) {
4809  unsigned TestOpc = CmpVT == MVT::i64 ? X86::TEST64rr
4810  : X86::TEST32rr;
4811  SDValue BEXTR = SDValue(NewNode, 0);
4812  NewNode = CurDAG->getMachineNode(TestOpc, dl, MVT::i32, BEXTR, BEXTR);
4813  ReplaceUses(SDValue(Node, 0), SDValue(NewNode, 0));
4814  CurDAG->RemoveDeadNode(Node);
4815  return;
4816  }
4817  }
4818 
4819  // We can peek through truncates, but we need to be careful below.
4820  if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse())
4821  N0 = N0.getOperand(0);
4822 
4823  // Look for (X86cmp (and $op, $imm), 0) and see if we can convert it to
4824  // use a smaller encoding.
4825  // Look past the truncate if CMP is the only use of it.
4826  if (N0.getOpcode() == ISD::AND &&
4827  N0.getNode()->hasOneUse() &&
4828  N0.getValueType() != MVT::i8) {
4830  if (!C) break;
4831  uint64_t Mask = C->getZExtValue();
4832 
4833  // Check if we can replace AND+IMM64 with a shift. This is possible for
4834  // masks/ like 0xFF000000 or 0x00FFFFFF and if we care only about the zero
4835  // flag.
4836  if (CmpVT == MVT::i64 && !isInt<32>(Mask) &&
4837  onlyUsesZeroFlag(SDValue(Node, 0))) {
4838  if (isMask_64(~Mask)) {
4839  unsigned TrailingZeros = countTrailingZeros(Mask);
4840  SDValue Imm = CurDAG->getTargetConstant(TrailingZeros, dl, MVT::i64);
4841  SDValue Shift =
4842  SDValue(CurDAG->getMachineNode(X86::SHR64ri, dl, MVT::i64, MVT::i32,
4843  N0.getOperand(0), Imm), 0);
4844  MachineSDNode *Test = CurDAG->getMachineNode(X86::TEST64rr, dl,
4845  MVT::i32, Shift, Shift);
4846  ReplaceNode(Node, Test);
4847  return;
4848  }
4849  if (isMask_64(Mask)) {
4850  unsigned LeadingZeros = countLeadingZeros(Mask);
4851  SDValue Imm = CurDAG->getTargetConstant(LeadingZeros, dl, MVT::i64);
4852  SDValue Shift =
4853  SDValue(CurDAG->getMachineNode(X86::SHL64ri, dl, MVT::i64, MVT::i32,
4854  N0.getOperand(0), Imm), 0);
4855  MachineSDNode *Test = CurDAG->getMachineNode(X86::TEST64rr, dl,
4856  MVT::i32, Shift, Shift);
4857  ReplaceNode(Node, Test);
4858  return;
4859  }
4860  }
4861 
4862  MVT VT;
4863  int SubRegOp;
4864  unsigned ROpc, MOpc;
4865 
4866  // For each of these checks we need to be careful if the sign flag is
4867  // being used. It is only safe to use the sign flag in two conditions,
4868  // either the sign bit in the shrunken mask is zero or the final test
4869  // size is equal to the original compare size.
4870 
4871  if (isUInt<8>(Mask) &&
4872  (!(Mask & 0x80) || CmpVT == MVT::i8 ||
4873  hasNoSignFlagUses(SDValue(Node, 0)))) {
4874  // For example, convert "testl %eax, $8" to "testb %al, $8"
4875  VT = MVT::i8;
4876  SubRegOp = X86::sub_8bit;
4877  ROpc = X86::TEST8ri;
4878  MOpc = X86::TEST8mi;
4879  } else if (OptForMinSize && isUInt<16>(Mask) &&
4880  (!(Mask & 0x8000) || CmpVT == MVT::i16 ||
4881  hasNoSignFlagUses(SDValue(Node, 0)))) {
4882  // For example, "testl %eax, $32776" to "testw %ax, $32776".
4883  // NOTE: We only want to form TESTW instructions if optimizing for
4884  // min size. Otherwise we only save one byte and possibly get a length
4885  // changing prefix penalty in the decoders.
4886  VT = MVT::i16;
4887  SubRegOp = X86::sub_16bit;
4888  ROpc = X86::TEST16ri;
4889  MOpc = X86::TEST16mi;
4890  } else if (isUInt<32>(Mask) && N0.getValueType() != MVT::i16 &&
4891  ((!(Mask & 0x80000000) &&
4892  // Without minsize 16-bit Cmps can get here so we need to
4893  // be sure we calculate the correct sign flag if needed.
4894  (CmpVT != MVT::i16 || !(Mask & 0x8000))) ||
4895  CmpVT == MVT::i32 ||
4896  hasNoSignFlagUses(SDValue(Node, 0)))) {
4897  // For example, "testq %rax, $268468232" to "testl %eax, $268468232".
4898  // NOTE: We only want to run that transform if N0 is 32 or 64 bits.
4899  // Otherwize, we find ourselves in a position where we have to do
4900  // promotion. If previous passes did not promote the and, we assume
4901  // they had a good reason not to and do not promote here.
4902  VT = MVT::i32;
4903  SubRegOp = X86::sub_32bit;
4904  ROpc = X86::TEST32ri;
4905  MOpc = X86::TEST32mi;
4906  } else {
4907  // No eligible transformation was found.
4908  break;
4909  }
4910 
4911  SDValue Imm = CurDAG->getTargetConstant(Mask, dl, VT);
4912  SDValue Reg = N0.getOperand(0);
4913 
4914  // Emit a testl or testw.
4915  MachineSDNode *NewNode;
4916  SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
4917  if (tryFoldLoad(Node, N0.getNode(), Reg, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) {
4918  SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Imm,
4919  Reg.getOperand(0) };
4920  NewNode = CurDAG->getMachineNode(MOpc, dl, MVT::i32, MVT::Other, Ops);
4921  // Update the chain.
4922  ReplaceUses(Reg.getValue(1), SDValue(NewNode, 1));
4923  // Record the mem-refs
4924  CurDAG->setNodeMemRefs(NewNode,
4925  {cast<LoadSDNode>(Reg)->getMemOperand()});
4926  } else {
4927  // Extract the subregister if necessary.
4928  if (N0.getValueType() != VT)
4929  Reg = CurDAG->getTargetExtractSubreg(SubRegOp, dl, VT, Reg);
4930 
4931  NewNode = CurDAG->getMachineNode(ROpc, dl, MVT::i32, Reg, Imm);
4932  }
4933  // Replace CMP with TEST.
4934  ReplaceNode(Node, NewNode);
4935  return;
4936  }
4937  break;
4938  }
4939  case X86ISD::PCMPISTR: {
4940  if (!Subtarget->hasSSE42())
4941  break;
4942 
4943  bool NeedIndex = !SDValue(Node, 0).use_empty();
4944  bool NeedMask = !SDValue(Node, 1).use_empty();
4945  // We can't fold a load if we are going to make two instructions.
4946  bool MayFoldLoad = !NeedIndex || !NeedMask;
4947 
4948  MachineSDNode *CNode;
4949  if (NeedMask) {
4950  unsigned ROpc = Subtarget->hasAVX() ? X86::VPCMPISTRMrr : X86::PCMPISTRMrr;
4951  unsigned MOpc = Subtarget->hasAVX() ? X86::VPCMPISTRMrm : X86::PCMPISTRMrm;
4952  CNode = emitPCMPISTR(ROpc, MOpc, MayFoldLoad, dl, MVT::v16i8, Node);
4953  ReplaceUses(SDValue(Node, 1), SDValue(CNode, 0));
4954  }
4955  if (NeedIndex || !NeedMask) {
4956  unsigned ROpc = Subtarget->hasAVX() ? X86::VPCMPISTRIrr : X86::PCMPISTRIrr;
4957  unsigned MOpc = Subtarget->hasAVX() ? X86::VPCMPISTRIrm : X86::PCMPISTRIrm;
4958  CNode = emitPCMPISTR(ROpc, MOpc, MayFoldLoad, dl, MVT::i32, Node);
4959  ReplaceUses(SDValue(Node, 0), SDValue(CNode, 0));
4960  }
4961 
4962  // Connect the flag usage to the last instruction created.
4963  ReplaceUses(SDValue(Node, 2), SDValue(CNode, 1));
4964  CurDAG->RemoveDeadNode(Node);
4965  return;
4966  }
4967  case X86ISD::PCMPESTR: {
4968  if (!Subtarget->hasSSE42())
4969  break;
4970 
4971  // Copy the two implicit register inputs.
4972  SDValue InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, X86::EAX,
4973  Node->getOperand(1),
4974  SDValue()).getValue(1);
4975  InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, X86::EDX,
4976  Node->getOperand(3), InFlag).getValue(1);
4977 
4978  bool NeedIndex = !SDValue(Node, 0).use_empty();
4979  bool NeedMask = !SDValue(Node, 1).use_empty();
4980  // We can't fold a load if we are going to make two instructions.
4981  bool MayFoldLoad = !NeedIndex || !NeedMask;
4982 
4983  MachineSDNode *CNode;
4984  if (NeedMask) {
4985  unsigned ROpc = Subtarget->hasAVX() ? X86::VPCMPESTRMrr : X86::PCMPESTRMrr;
4986  unsigned MOpc = Subtarget->hasAVX() ? X86::VPCMPESTRMrm : X86::PCMPESTRMrm;
4987  CNode = emitPCMPESTR(ROpc, MOpc, MayFoldLoad, dl, MVT::v16i8, Node,
4988  InFlag);
4989  ReplaceUses(SDValue(Node, 1), SDValue(CNode, 0));
4990  }
4991  if (NeedIndex || !NeedMask) {
4992  unsigned ROpc = Subtarget->hasAVX() ? X86::VPCMPESTRIrr : X86::PCMPESTRIrr;
4993  unsigned MOpc = Subtarget->hasAVX() ? X86::VPCMPESTRIrm : X86::PCMPESTRIrm;
4994  CNode = emitPCMPESTR(ROpc, MOpc, MayFoldLoad, dl, MVT::i32, Node, InFlag);
4995  ReplaceUses(SDValue(Node, 0), SDValue(CNode, 0));
4996  }
4997  // Connect the flag usage to the last instruction created.
4998  ReplaceUses(SDValue(Node, 2), SDValue(CNode, 1));
4999  CurDAG->RemoveDeadNode(Node);
5000  return;
5001  }
5002 
5003  case ISD::SETCC: {
5004  if (NVT.isVector() && tryVPTESTM(Node, SDValue(Node, 0), SDValue()))
5005  return;
5006 
5007  break;
5008  }
5009 
5010  case ISD::STORE:
5011  if (foldLoadStoreIntoMemOperand(Node))
5012  return;
5013  break;
5014  case ISD::FCEIL:
5015  case ISD::FFLOOR:
5016  case ISD::FTRUNC:
5017  case ISD::FNEARBYINT:
5018  case ISD::FRINT: {
5019  // Replace fp rounding with their X86 specific equivalent so we don't
5020  // need 2 sets of patterns.
5021  // FIXME: This can only happen when the nodes started as STRICT_* and have
5022  // been mutated into their non-STRICT equivalents. Eventually this
5023  // mutation will be removed and we should switch the STRICT_ nodes to a
5024  // strict version of RNDSCALE in PreProcessISelDAG.
5025  unsigned Imm;
5026  switch (Node->getOpcode()) {
5027  default: llvm_unreachable("Unexpected opcode!");
5028  case ISD::FCEIL: Imm = 0xA; break;
5029  case ISD::FFLOOR: Imm = 0x9; break;
5030  case ISD::FTRUNC: Imm = 0xB; break;
5031  case ISD::FNEARBYINT: Imm = 0xC; break;
5032  case ISD::FRINT: Imm = 0x4; break;
5033  }
5034  SDLoc dl(Node);
5035  SDValue Res = CurDAG->getNode(X86ISD::VRNDSCALE, dl,
5036  Node->getValueType(0),
5037  Node->getOperand(0),
5038  CurDAG->getConstant(Imm, dl, MVT::i8));
5039  ReplaceNode(Node, Res.getNode());
5040  SelectCode(Res.getNode());
5041  return;
5042  }
5043  }
5044 
5045  SelectCode(Node);
5046 }
5047 
5048 bool X86DAGToDAGISel::
5049 SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
5050  std::vector<SDValue> &OutOps) {
5051  SDValue Op0, Op1, Op2, Op3, Op4;
5052  switch (ConstraintID) {
5053  default:
5054  llvm_unreachable("Unexpected asm memory constraint");
5056  // FIXME: It seems strange that 'i' is needed here since it's supposed to
5057  // be an immediate and not a memory constraint.
5059  case InlineAsm::Constraint_o: // offsetable ??
5060  case InlineAsm::Constraint_v: // not offsetable ??
5061  case InlineAsm::Constraint_m: // memory
5063  if (!selectAddr(nullptr, Op, Op0, Op1, Op2, Op3, Op4))
5064  return true;
5065  break;
5066  }
5067 
5068  OutOps.push_back(Op0);
5069  OutOps.push_back(Op1);
5070  OutOps.push_back(Op2);
5071  OutOps.push_back(Op3);
5072  OutOps.push_back(Op4);
5073  return false;
5074 }
5075 
5076 /// This pass converts a legalized DAG into a X86-specific DAG,
5077 /// ready for instruction scheduling.
5079  CodeGenOpt::Level OptLevel) {
5080  return new X86DAGToDAGISel(TM, OptLevel);
5081 }
static bool foldMaskedShiftToScaledMask(SelectionDAG &DAG, SDValue N, X86ISelAddressMode &AM)
uint64_t CallInst * C
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:595
constexpr bool isUInt< 32 >(uint64_t x)
Definition: MathExtras.h:348
X = FP_ROUND(Y, TRUNC) - Rounding &#39;Y&#39; from a larger floating point type down to the precision of the ...
Definition: ISDOpcodes.h:562
static bool isCalleeLoad(SDValue Callee, SDValue &Chain, bool HasCallSeq)
Return true if call address is a load and it can be moved below CALLSEQ_START and the chains leading ...
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
static bool isFusableLoadOpStorePattern(StoreSDNode *StoreNode, SDValue StoredVal, SelectionDAG *CurDAG, unsigned LoadOpNo, LoadSDNode *&LoadNode, SDValue &InputChain)
Check whether or not the chain ending in StoreNode is suitable for doing the {load; op; store} to mod...
EVT getValueType() const
Return the ValueType of the referenced return value.
Vector comparison generating mask bits for fp and integer signed and unsigned data types...
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
const SDValue & getOffset() const
bool isUndef() const
APInt getSignedMax() const
Return the largest signed value contained in the ConstantRange.
GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2)
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
C - The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
This class represents lattice values for constants.
Definition: AllocatorList.h:23
static void insertDAGNode(SelectionDAG &DAG, SDValue Pos, SDValue N)
static bool isLegalMaskCompare(SDNode *N, const X86Subtarget *Subtarget)
static MVT getVectorVT(MVT VT, unsigned NumElements)
Tail call return.
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
Definition: MCSymbol.h:41
bool hasOptSize() const
Optimize this function for size (-Os) or minimum size (-Oz).
Definition: Function.h:622
ZERO_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register zero-extension of the low ...
Definition: ISDOpcodes.h:543
bool isVector() const
Return true if this is a vector value type.
const SDValue & getBasePtr() const
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
APInt zext(unsigned width) const
Zero extend to a new width.
Definition: APInt.cpp:860
X86 conditional moves.
bool is256BitVector() const
Return true if this is a 256-bit vector type.
bool slt(const APInt &RHS) const
Signed less than comparison.
Definition: APInt.h:1203
unsigned Reg
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Get a value with low bits set.
Definition: APInt.h:647
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:252
bool hasFastBEXTR() const
Definition: X86Subtarget.h:660
unsigned getVectorNumElements() const
const SDValue & getChain() const
constexpr bool isInt< 8 >(int64_t x)
Definition: MathExtras.h:302
ISD::MemIndexedMode getAddressingMode() const
Return the addressing mode for this load or store: unindexed, pre-inc, pre-dec, post-inc, or post-dec.
TargetGlobalAddress - Like GlobalAddress, but the DAG does no folding or anything else with this node...
Definition: ISDOpcodes.h:130
unsigned getAlignment() const
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.h:323
APInt trunc(unsigned width) const
Truncate to new width.
Definition: APInt.cpp:813
STATISTIC(NumFunctions, "Total number of functions")
MVT getSimpleValueType(unsigned ResNo) const
Return the type of a specified result as a simple type.
F(f)
Bitwise logical ANDNOT of floating point values.
void setNodeId(int Id)
Set unique node id.
SDNode * getNode() const
get the SDNode which holds the desired result
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1 at the ...
Definition: ISDOpcodes.h:377
unsigned getValueSizeInBits() const
Returns the size of the value in bits.
static bool MayFoldLoad(SDValue Op)
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1515
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition: ISDOpcodes.h:158
GlobalBaseReg - On Darwin, this node represents the result of the mflr at function entry...
static bool hasPredecessorHelper(const SDNode *N, SmallPtrSetImpl< const SDNode *> &Visited, SmallVectorImpl< const SDNode *> &Worklist, unsigned int MaxSteps=0, bool TopologicalPrune=false)
Returns true if N is a predecessor of any node in Worklist.
unsigned getAddrSpace() const
Return the LLVM IR address space number that this pointer points into.
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition: ISDOpcodes.h:209
X86 compare and logical compare instructions.
unsigned countTrailingZeros(T Val, ZeroBehavior ZB=ZB_Width)
Count number of 0&#39;s from the least significant bit to the most stopping at the first 1...
Definition: MathExtras.h:119
void dump() const
Support for debugging, callable in GDB: V->dump()
Definition: AsmWriter.cpp:4428
The address of a basic block.
Definition: Constants.h:839
static cl::opt< bool > AndImmShrink("x86-and-imm-shrink", cl::init(true), cl::desc("Enable setting constant bits to reduce size of mask immediates"), cl::Hidden)
bool hasOneUse() const
Return true if there is exactly one use of this node.
A description of a memory reference used in the backend.
Dynamic (non-constant condition) vector blend where only the sign bits of the condition elements are ...
Bitwise Logical AND NOT of Packed FP values.
Shift and rotation operations.
Definition: ISDOpcodes.h:434
bool isNormalStore(const SDNode *N)
Returns true if the specified node is a non-truncating and unindexed store.
bool isBuildVectorAllZeros(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are 0 or undef...
static bool signBitIsZero(const MachineOperand &Op, const MachineRegisterInfo &MRI)
CallLoweringInfo & setChain(SDValue InChain)
CopyToReg - This node has three operands: a chain, a register number to set to this value...
Definition: ISDOpcodes.h:169
SymbolFlags
Symbol flags.
Definition: Symbol.h:24
unsigned SubReg
op_iterator op_end() const
unsigned getScalarValueSizeInBits() const
uint64_t getConstantOperandVal(unsigned i) const
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
SimpleValueType SimpleTy
unsigned getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition: ValueTypes.h:303
CALLSEQ_START/CALLSEQ_END - These operators mark the beginning and end of a call sequence, and carry arbitrary information that target might want to know.
Definition: ISDOpcodes.h:744
Position
Position to insert a new instruction relative to an existing instruction.
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
This represents a list of ValueType&#39;s that has been intern&#39;d by a SelectionDAG.
bool hasVLX() const
Definition: X86Subtarget.h:681
unsigned getSizeInBits() const
bool runOnMachineFunction(MachineFunction &MF) override
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
bool hasExternalLinkage() const
Definition: GlobalValue.h:431
unsigned getActiveBits() const
Compute the number of active bits in the value.
Definition: APInt.h:1539
int64_t getSExtValue() const
static void InvalidateNodeId(SDNode *N)
unsigned getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:291
X86 FP SETCC, similar to above, but with output as an i1 mask and and a version with SAE...
constexpr bool isMask_64(uint64_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
Definition: MathExtras.h:410
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...