LLVM  9.0.0svn
X86ISelDAGToDAG.cpp
Go to the documentation of this file.
1 //===- X86ISelDAGToDAG.cpp - A DAG pattern matching inst selector for X86 -===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines a DAG pattern matching instruction selector for X86,
10 // converting from a legalized dag to a X86 dag.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "X86.h"
15 #include "X86MachineFunctionInfo.h"
16 #include "X86RegisterInfo.h"
17 #include "X86Subtarget.h"
18 #include "X86TargetMachine.h"
19 #include "llvm/ADT/Statistic.h"
23 #include "llvm/Config/llvm-config.h"
24 #include "llvm/IR/ConstantRange.h"
25 #include "llvm/IR/Function.h"
26 #include "llvm/IR/Instructions.h"
27 #include "llvm/IR/Intrinsics.h"
28 #include "llvm/IR/Type.h"
29 #include "llvm/Support/Debug.h"
31 #include "llvm/Support/KnownBits.h"
36 #include <stdint.h>
37 using namespace llvm;
38 
39 #define DEBUG_TYPE "x86-isel"
40 
41 STATISTIC(NumLoadMoved, "Number of loads moved below TokenFactor");
42 
43 static cl::opt<bool> AndImmShrink("x86-and-imm-shrink", cl::init(true),
44  cl::desc("Enable setting constant bits to reduce size of mask immediates"),
45  cl::Hidden);
46 
47 //===----------------------------------------------------------------------===//
48 // Pattern Matcher Implementation
49 //===----------------------------------------------------------------------===//
50 
51 namespace {
52  /// This corresponds to X86AddressMode, but uses SDValue's instead of register
53  /// numbers for the leaves of the matched tree.
54  struct X86ISelAddressMode {
55  enum {
56  RegBase,
57  FrameIndexBase
58  } BaseType;
59 
60  // This is really a union, discriminated by BaseType!
61  SDValue Base_Reg;
62  int Base_FrameIndex;
63 
64  unsigned Scale;
65  SDValue IndexReg;
66  int32_t Disp;
67  SDValue Segment;
68  const GlobalValue *GV;
69  const Constant *CP;
70  const BlockAddress *BlockAddr;
71  const char *ES;
72  MCSymbol *MCSym;
73  int JT;
74  unsigned Align; // CP alignment.
75  unsigned char SymbolFlags; // X86II::MO_*
76  bool NegateIndex = false;
77 
78  X86ISelAddressMode()
79  : BaseType(RegBase), Base_FrameIndex(0), Scale(1), IndexReg(), Disp(0),
80  Segment(), GV(nullptr), CP(nullptr), BlockAddr(nullptr), ES(nullptr),
81  MCSym(nullptr), JT(-1), Align(0), SymbolFlags(X86II::MO_NO_FLAG) {}
82 
83  bool hasSymbolicDisplacement() const {
84  return GV != nullptr || CP != nullptr || ES != nullptr ||
85  MCSym != nullptr || JT != -1 || BlockAddr != nullptr;
86  }
87 
88  bool hasBaseOrIndexReg() const {
89  return BaseType == FrameIndexBase ||
90  IndexReg.getNode() != nullptr || Base_Reg.getNode() != nullptr;
91  }
92 
93  /// Return true if this addressing mode is already RIP-relative.
94  bool isRIPRelative() const {
95  if (BaseType != RegBase) return false;
96  if (RegisterSDNode *RegNode =
97  dyn_cast_or_null<RegisterSDNode>(Base_Reg.getNode()))
98  return RegNode->getReg() == X86::RIP;
99  return false;
100  }
101 
102  void setBaseReg(SDValue Reg) {
103  BaseType = RegBase;
104  Base_Reg = Reg;
105  }
106 
107 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
108  void dump(SelectionDAG *DAG = nullptr) {
109  dbgs() << "X86ISelAddressMode " << this << '\n';
110  dbgs() << "Base_Reg ";
111  if (Base_Reg.getNode())
112  Base_Reg.getNode()->dump(DAG);
113  else
114  dbgs() << "nul\n";
115  if (BaseType == FrameIndexBase)
116  dbgs() << " Base.FrameIndex " << Base_FrameIndex << '\n';
117  dbgs() << " Scale " << Scale << '\n'
118  << "IndexReg ";
119  if (NegateIndex)
120  dbgs() << "negate ";
121  if (IndexReg.getNode())
122  IndexReg.getNode()->dump(DAG);
123  else
124  dbgs() << "nul\n";
125  dbgs() << " Disp " << Disp << '\n'
126  << "GV ";
127  if (GV)
128  GV->dump();
129  else
130  dbgs() << "nul";
131  dbgs() << " CP ";
132  if (CP)
133  CP->dump();
134  else
135  dbgs() << "nul";
136  dbgs() << '\n'
137  << "ES ";
138  if (ES)
139  dbgs() << ES;
140  else
141  dbgs() << "nul";
142  dbgs() << " MCSym ";
143  if (MCSym)
144  dbgs() << MCSym;
145  else
146  dbgs() << "nul";
147  dbgs() << " JT" << JT << " Align" << Align << '\n';
148  }
149 #endif
150  };
151 }
152 
153 namespace {
154  //===--------------------------------------------------------------------===//
155  /// ISel - X86-specific code to select X86 machine instructions for
156  /// SelectionDAG operations.
157  ///
158  class X86DAGToDAGISel final : public SelectionDAGISel {
159  /// Keep a pointer to the X86Subtarget around so that we can
160  /// make the right decision when generating code for different targets.
161  const X86Subtarget *Subtarget;
162 
163  /// If true, selector should try to optimize for code size instead of
164  /// performance.
165  bool OptForSize;
166 
167  /// If true, selector should try to optimize for minimum code size.
168  bool OptForMinSize;
169 
170  /// Disable direct TLS access through segment registers.
171  bool IndirectTlsSegRefs;
172 
173  public:
174  explicit X86DAGToDAGISel(X86TargetMachine &tm, CodeGenOpt::Level OptLevel)
175  : SelectionDAGISel(tm, OptLevel), Subtarget(nullptr), OptForSize(false),
176  OptForMinSize(false), IndirectTlsSegRefs(false) {}
177 
178  StringRef getPassName() const override {
179  return "X86 DAG->DAG Instruction Selection";
180  }
181 
182  bool runOnMachineFunction(MachineFunction &MF) override {
183  // Reset the subtarget each time through.
184  Subtarget = &MF.getSubtarget<X86Subtarget>();
185  IndirectTlsSegRefs = MF.getFunction().hasFnAttribute(
186  "indirect-tls-seg-refs");
187 
188  // OptFor[Min]Size are used in pattern predicates that isel is matching.
189  OptForSize = MF.getFunction().hasOptSize();
190  OptForMinSize = MF.getFunction().hasMinSize();
191  assert((!OptForMinSize || OptForSize) &&
192  "OptForMinSize implies OptForSize");
193 
195  return true;
196  }
197 
198  void EmitFunctionEntryCode() override;
199 
200  bool IsProfitableToFold(SDValue N, SDNode *U, SDNode *Root) const override;
201 
202  void PreprocessISelDAG() override;
203  void PostprocessISelDAG() override;
204 
205 // Include the pieces autogenerated from the target description.
206 #include "X86GenDAGISel.inc"
207 
208  private:
209  void Select(SDNode *N) override;
210 
211  bool foldOffsetIntoAddress(uint64_t Offset, X86ISelAddressMode &AM);
212  bool matchLoadInAddress(LoadSDNode *N, X86ISelAddressMode &AM);
213  bool matchWrapper(SDValue N, X86ISelAddressMode &AM);
214  bool matchAddress(SDValue N, X86ISelAddressMode &AM);
215  bool matchVectorAddress(SDValue N, X86ISelAddressMode &AM);
216  bool matchAdd(SDValue &N, X86ISelAddressMode &AM, unsigned Depth);
217  bool matchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
218  unsigned Depth);
219  bool matchAddressBase(SDValue N, X86ISelAddressMode &AM);
220  bool selectAddr(SDNode *Parent, SDValue N, SDValue &Base,
221  SDValue &Scale, SDValue &Index, SDValue &Disp,
222  SDValue &Segment);
223  bool selectVectorAddr(SDNode *Parent, SDValue N, SDValue &Base,
224  SDValue &Scale, SDValue &Index, SDValue &Disp,
225  SDValue &Segment);
226  bool selectMOV64Imm32(SDValue N, SDValue &Imm);
227  bool selectLEAAddr(SDValue N, SDValue &Base,
228  SDValue &Scale, SDValue &Index, SDValue &Disp,
229  SDValue &Segment);
230  bool selectLEA64_32Addr(SDValue N, SDValue &Base,
231  SDValue &Scale, SDValue &Index, SDValue &Disp,
232  SDValue &Segment);
233  bool selectTLSADDRAddr(SDValue N, SDValue &Base,
234  SDValue &Scale, SDValue &Index, SDValue &Disp,
235  SDValue &Segment);
236  bool selectScalarSSELoad(SDNode *Root, SDNode *Parent, SDValue N,
237  SDValue &Base, SDValue &Scale,
238  SDValue &Index, SDValue &Disp,
239  SDValue &Segment,
240  SDValue &NodeWithChain);
241  bool selectRelocImm(SDValue N, SDValue &Op);
242 
243  bool tryFoldLoad(SDNode *Root, SDNode *P, SDValue N,
244  SDValue &Base, SDValue &Scale,
245  SDValue &Index, SDValue &Disp,
246  SDValue &Segment);
247 
248  // Convenience method where P is also root.
249  bool tryFoldLoad(SDNode *P, SDValue N,
250  SDValue &Base, SDValue &Scale,
251  SDValue &Index, SDValue &Disp,
252  SDValue &Segment) {
253  return tryFoldLoad(P, P, N, Base, Scale, Index, Disp, Segment);
254  }
255 
256  /// Implement addressing mode selection for inline asm expressions.
257  bool SelectInlineAsmMemoryOperand(const SDValue &Op,
258  unsigned ConstraintID,
259  std::vector<SDValue> &OutOps) override;
260 
261  void emitSpecialCodeForMain();
262 
263  inline void getAddressOperands(X86ISelAddressMode &AM, const SDLoc &DL,
264  MVT VT, SDValue &Base, SDValue &Scale,
265  SDValue &Index, SDValue &Disp,
266  SDValue &Segment) {
267  if (AM.BaseType == X86ISelAddressMode::FrameIndexBase)
268  Base = CurDAG->getTargetFrameIndex(
269  AM.Base_FrameIndex, TLI->getPointerTy(CurDAG->getDataLayout()));
270  else if (AM.Base_Reg.getNode())
271  Base = AM.Base_Reg;
272  else
273  Base = CurDAG->getRegister(0, VT);
274 
275  Scale = getI8Imm(AM.Scale, DL);
276 
277  // Negate the index if needed.
278  if (AM.NegateIndex) {
279  unsigned NegOpc = VT == MVT::i64 ? X86::NEG64r : X86::NEG32r;
280  SDValue Neg = SDValue(CurDAG->getMachineNode(NegOpc, DL, VT, MVT::i32,
281  AM.IndexReg), 0);
282  AM.IndexReg = Neg;
283  }
284 
285  if (AM.IndexReg.getNode())
286  Index = AM.IndexReg;
287  else
288  Index = CurDAG->getRegister(0, VT);
289 
290  // These are 32-bit even in 64-bit mode since RIP-relative offset
291  // is 32-bit.
292  if (AM.GV)
293  Disp = CurDAG->getTargetGlobalAddress(AM.GV, SDLoc(),
294  MVT::i32, AM.Disp,
295  AM.SymbolFlags);
296  else if (AM.CP)
297  Disp = CurDAG->getTargetConstantPool(AM.CP, MVT::i32,
298  AM.Align, AM.Disp, AM.SymbolFlags);
299  else if (AM.ES) {
300  assert(!AM.Disp && "Non-zero displacement is ignored with ES.");
301  Disp = CurDAG->getTargetExternalSymbol(AM.ES, MVT::i32, AM.SymbolFlags);
302  } else if (AM.MCSym) {
303  assert(!AM.Disp && "Non-zero displacement is ignored with MCSym.");
304  assert(AM.SymbolFlags == 0 && "oo");
305  Disp = CurDAG->getMCSymbol(AM.MCSym, MVT::i32);
306  } else if (AM.JT != -1) {
307  assert(!AM.Disp && "Non-zero displacement is ignored with JT.");
308  Disp = CurDAG->getTargetJumpTable(AM.JT, MVT::i32, AM.SymbolFlags);
309  } else if (AM.BlockAddr)
310  Disp = CurDAG->getTargetBlockAddress(AM.BlockAddr, MVT::i32, AM.Disp,
311  AM.SymbolFlags);
312  else
313  Disp = CurDAG->getTargetConstant(AM.Disp, DL, MVT::i32);
314 
315  if (AM.Segment.getNode())
316  Segment = AM.Segment;
317  else
318  Segment = CurDAG->getRegister(0, MVT::i16);
319  }
320 
321  // Utility function to determine whether we should avoid selecting
322  // immediate forms of instructions for better code size or not.
323  // At a high level, we'd like to avoid such instructions when
324  // we have similar constants used within the same basic block
325  // that can be kept in a register.
326  //
327  bool shouldAvoidImmediateInstFormsForSize(SDNode *N) const {
328  uint32_t UseCount = 0;
329 
330  // Do not want to hoist if we're not optimizing for size.
331  // TODO: We'd like to remove this restriction.
332  // See the comment in X86InstrInfo.td for more info.
333  if (!OptForSize)
334  return false;
335 
336  // Walk all the users of the immediate.
337  for (SDNode::use_iterator UI = N->use_begin(),
338  UE = N->use_end(); (UI != UE) && (UseCount < 2); ++UI) {
339 
340  SDNode *User = *UI;
341 
342  // This user is already selected. Count it as a legitimate use and
343  // move on.
344  if (User->isMachineOpcode()) {
345  UseCount++;
346  continue;
347  }
348 
349  // We want to count stores of immediates as real uses.
350  if (User->getOpcode() == ISD::STORE &&
351  User->getOperand(1).getNode() == N) {
352  UseCount++;
353  continue;
354  }
355 
356  // We don't currently match users that have > 2 operands (except
357  // for stores, which are handled above)
358  // Those instruction won't match in ISEL, for now, and would
359  // be counted incorrectly.
360  // This may change in the future as we add additional instruction
361  // types.
362  if (User->getNumOperands() != 2)
363  continue;
364 
365  // Immediates that are used for offsets as part of stack
366  // manipulation should be left alone. These are typically
367  // used to indicate SP offsets for argument passing and
368  // will get pulled into stores/pushes (implicitly).
369  if (User->getOpcode() == X86ISD::ADD ||
370  User->getOpcode() == ISD::ADD ||
371  User->getOpcode() == X86ISD::SUB ||
372  User->getOpcode() == ISD::SUB) {
373 
374  // Find the other operand of the add/sub.
375  SDValue OtherOp = User->getOperand(0);
376  if (OtherOp.getNode() == N)
377  OtherOp = User->getOperand(1);
378 
379  // Don't count if the other operand is SP.
380  RegisterSDNode *RegNode;
381  if (OtherOp->getOpcode() == ISD::CopyFromReg &&
382  (RegNode = dyn_cast_or_null<RegisterSDNode>(
383  OtherOp->getOperand(1).getNode())))
384  if ((RegNode->getReg() == X86::ESP) ||
385  (RegNode->getReg() == X86::RSP))
386  continue;
387  }
388 
389  // ... otherwise, count this and move on.
390  UseCount++;
391  }
392 
393  // If we have more than 1 use, then recommend for hoisting.
394  return (UseCount > 1);
395  }
396 
397  /// Return a target constant with the specified value of type i8.
398  inline SDValue getI8Imm(unsigned Imm, const SDLoc &DL) {
399  return CurDAG->getTargetConstant(Imm, DL, MVT::i8);
400  }
401 
402  /// Return a target constant with the specified value, of type i32.
403  inline SDValue getI32Imm(unsigned Imm, const SDLoc &DL) {
404  return CurDAG->getTargetConstant(Imm, DL, MVT::i32);
405  }
406 
407  /// Return a target constant with the specified value, of type i64.
408  inline SDValue getI64Imm(uint64_t Imm, const SDLoc &DL) {
409  return CurDAG->getTargetConstant(Imm, DL, MVT::i64);
410  }
411 
412  SDValue getExtractVEXTRACTImmediate(SDNode *N, unsigned VecWidth,
413  const SDLoc &DL) {
414  assert((VecWidth == 128 || VecWidth == 256) && "Unexpected vector width");
415  uint64_t Index = N->getConstantOperandVal(1);
416  MVT VecVT = N->getOperand(0).getSimpleValueType();
417  return getI8Imm((Index * VecVT.getScalarSizeInBits()) / VecWidth, DL);
418  }
419 
420  SDValue getInsertVINSERTImmediate(SDNode *N, unsigned VecWidth,
421  const SDLoc &DL) {
422  assert((VecWidth == 128 || VecWidth == 256) && "Unexpected vector width");
423  uint64_t Index = N->getConstantOperandVal(2);
424  MVT VecVT = N->getSimpleValueType(0);
425  return getI8Imm((Index * VecVT.getScalarSizeInBits()) / VecWidth, DL);
426  }
427 
428  // Helper to detect unneeded and instructions on shift amounts. Called
429  // from PatFrags in tablegen.
430  bool isUnneededShiftMask(SDNode *N, unsigned Width) const {
431  assert(N->getOpcode() == ISD::AND && "Unexpected opcode");
432  const APInt &Val = cast<ConstantSDNode>(N->getOperand(1))->getAPIntValue();
433 
434  if (Val.countTrailingOnes() >= Width)
435  return true;
436 
437  APInt Mask = Val | CurDAG->computeKnownBits(N->getOperand(0)).Zero;
438  return Mask.countTrailingOnes() >= Width;
439  }
440 
441  /// Return an SDNode that returns the value of the global base register.
442  /// Output instructions required to initialize the global base register,
443  /// if necessary.
444  SDNode *getGlobalBaseReg();
445 
446  /// Return a reference to the TargetMachine, casted to the target-specific
447  /// type.
448  const X86TargetMachine &getTargetMachine() const {
449  return static_cast<const X86TargetMachine &>(TM);
450  }
451 
452  /// Return a reference to the TargetInstrInfo, casted to the target-specific
453  /// type.
454  const X86InstrInfo *getInstrInfo() const {
455  return Subtarget->getInstrInfo();
456  }
457 
458  /// Address-mode matching performs shift-of-and to and-of-shift
459  /// reassociation in order to expose more scaled addressing
460  /// opportunities.
461  bool ComplexPatternFuncMutatesDAG() const override {
462  return true;
463  }
464 
465  bool isSExtAbsoluteSymbolRef(unsigned Width, SDNode *N) const;
466 
467  /// Returns whether this is a relocatable immediate in the range
468  /// [-2^Width .. 2^Width-1].
469  template <unsigned Width> bool isSExtRelocImm(SDNode *N) const {
470  if (auto *CN = dyn_cast<ConstantSDNode>(N))
471  return isInt<Width>(CN->getSExtValue());
472  return isSExtAbsoluteSymbolRef(Width, N);
473  }
474 
475  // Indicates we should prefer to use a non-temporal load for this load.
476  bool useNonTemporalLoad(LoadSDNode *N) const {
477  if (!N->isNonTemporal())
478  return false;
479 
480  unsigned StoreSize = N->getMemoryVT().getStoreSize();
481 
482  if (N->getAlignment() < StoreSize)
483  return false;
484 
485  switch (StoreSize) {
486  default: llvm_unreachable("Unsupported store size");
487  case 4:
488  case 8:
489  return false;
490  case 16:
491  return Subtarget->hasSSE41();
492  case 32:
493  return Subtarget->hasAVX2();
494  case 64:
495  return Subtarget->hasAVX512();
496  }
497  }
498 
499  bool foldLoadStoreIntoMemOperand(SDNode *Node);
500  MachineSDNode *matchBEXTRFromAndImm(SDNode *Node);
501  bool matchBitExtract(SDNode *Node);
502  bool shrinkAndImmediate(SDNode *N);
503  bool isMaskZeroExtended(SDNode *N) const;
504  bool tryShiftAmountMod(SDNode *N);
505  bool tryVPTESTM(SDNode *Root, SDValue Setcc, SDValue Mask);
506 
507  MachineSDNode *emitPCMPISTR(unsigned ROpc, unsigned MOpc, bool MayFoldLoad,
508  const SDLoc &dl, MVT VT, SDNode *Node);
509  MachineSDNode *emitPCMPESTR(unsigned ROpc, unsigned MOpc, bool MayFoldLoad,
510  const SDLoc &dl, MVT VT, SDNode *Node,
511  SDValue &InFlag);
512 
513  bool tryOptimizeRem8Extend(SDNode *N);
514 
515  bool onlyUsesZeroFlag(SDValue Flags) const;
516  bool hasNoSignFlagUses(SDValue Flags) const;
517  bool hasNoCarryFlagUses(SDValue Flags) const;
518  };
519 }
520 
521 
522 // Returns true if this masked compare can be implemented legally with this
523 // type.
524 static bool isLegalMaskCompare(SDNode *N, const X86Subtarget *Subtarget) {
525  unsigned Opcode = N->getOpcode();
526  if (Opcode == X86ISD::CMPM || Opcode == ISD::SETCC ||
527  Opcode == X86ISD::CMPM_SAE || Opcode == X86ISD::VFPCLASS) {
528  // We can get 256-bit 8 element types here without VLX being enabled. When
529  // this happens we will use 512-bit operations and the mask will not be
530  // zero extended.
531  EVT OpVT = N->getOperand(0).getValueType();
532  if (OpVT.is256BitVector() || OpVT.is128BitVector())
533  return Subtarget->hasVLX();
534 
535  return true;
536  }
537  // Scalar opcodes use 128 bit registers, but aren't subject to the VLX check.
538  if (Opcode == X86ISD::VFPCLASSS || Opcode == X86ISD::FSETCCM ||
539  Opcode == X86ISD::FSETCCM_SAE)
540  return true;
541 
542  return false;
543 }
544 
545 // Returns true if we can assume the writer of the mask has zero extended it
546 // for us.
547 bool X86DAGToDAGISel::isMaskZeroExtended(SDNode *N) const {
548  // If this is an AND, check if we have a compare on either side. As long as
549  // one side guarantees the mask is zero extended, the AND will preserve those
550  // zeros.
551  if (N->getOpcode() == ISD::AND)
552  return isLegalMaskCompare(N->getOperand(0).getNode(), Subtarget) ||
553  isLegalMaskCompare(N->getOperand(1).getNode(), Subtarget);
554 
555  return isLegalMaskCompare(N, Subtarget);
556 }
557 
558 bool
559 X86DAGToDAGISel::IsProfitableToFold(SDValue N, SDNode *U, SDNode *Root) const {
560  if (OptLevel == CodeGenOpt::None) return false;
561 
562  if (!N.hasOneUse())
563  return false;
564 
565  if (N.getOpcode() != ISD::LOAD)
566  return true;
567 
568  // Don't fold non-temporal loads if we have an instruction for them.
569  if (useNonTemporalLoad(cast<LoadSDNode>(N)))
570  return false;
571 
572  // If N is a load, do additional profitability checks.
573  if (U == Root) {
574  switch (U->getOpcode()) {
575  default: break;
576  case X86ISD::ADD:
577  case X86ISD::ADC:
578  case X86ISD::SUB:
579  case X86ISD::SBB:
580  case X86ISD::AND:
581  case X86ISD::XOR:
582  case X86ISD::OR:
583  case ISD::ADD:
584  case ISD::ADDCARRY:
585  case ISD::AND:
586  case ISD::OR:
587  case ISD::XOR: {
588  SDValue Op1 = U->getOperand(1);
589 
590  // If the other operand is a 8-bit immediate we should fold the immediate
591  // instead. This reduces code size.
592  // e.g.
593  // movl 4(%esp), %eax
594  // addl $4, %eax
595  // vs.
596  // movl $4, %eax
597  // addl 4(%esp), %eax
598  // The former is 2 bytes shorter. In case where the increment is 1, then
599  // the saving can be 4 bytes (by using incl %eax).
600  if (ConstantSDNode *Imm = dyn_cast<ConstantSDNode>(Op1)) {
601  if (Imm->getAPIntValue().isSignedIntN(8))
602  return false;
603 
604  // If this is a 64-bit AND with an immediate that fits in 32-bits,
605  // prefer using the smaller and over folding the load. This is needed to
606  // make sure immediates created by shrinkAndImmediate are always folded.
607  // Ideally we would narrow the load during DAG combine and get the
608  // best of both worlds.
609  if (U->getOpcode() == ISD::AND &&
610  Imm->getAPIntValue().getBitWidth() == 64 &&
611  Imm->getAPIntValue().isIntN(32))
612  return false;
613 
614  // If this really a zext_inreg that can be represented with a movzx
615  // instruction, prefer that.
616  // TODO: We could shrink the load and fold if it is non-volatile.
617  if (U->getOpcode() == ISD::AND &&
618  (Imm->getAPIntValue() == UINT8_MAX ||
619  Imm->getAPIntValue() == UINT16_MAX ||
620  Imm->getAPIntValue() == UINT32_MAX))
621  return false;
622 
623  // ADD/SUB with can negate the immediate and use the opposite operation
624  // to fit 128 into a sign extended 8 bit immediate.
625  if ((U->getOpcode() == ISD::ADD || U->getOpcode() == ISD::SUB) &&
626  (-Imm->getAPIntValue()).isSignedIntN(8))
627  return false;
628  }
629 
630  // If the other operand is a TLS address, we should fold it instead.
631  // This produces
632  // movl %gs:0, %eax
633  // leal i@NTPOFF(%eax), %eax
634  // instead of
635  // movl $i@NTPOFF, %eax
636  // addl %gs:0, %eax
637  // if the block also has an access to a second TLS address this will save
638  // a load.
639  // FIXME: This is probably also true for non-TLS addresses.
640  if (Op1.getOpcode() == X86ISD::Wrapper) {
641  SDValue Val = Op1.getOperand(0);
643  return false;
644  }
645 
646  // Don't fold load if this matches the BTS/BTR/BTC patterns.
647  // BTS: (or X, (shl 1, n))
648  // BTR: (and X, (rotl -2, n))
649  // BTC: (xor X, (shl 1, n))
650  if (U->getOpcode() == ISD::OR || U->getOpcode() == ISD::XOR) {
651  if (U->getOperand(0).getOpcode() == ISD::SHL &&
653  return false;
654 
655  if (U->getOperand(1).getOpcode() == ISD::SHL &&
657  return false;
658  }
659  if (U->getOpcode() == ISD::AND) {
660  SDValue U0 = U->getOperand(0);
661  SDValue U1 = U->getOperand(1);
662  if (U0.getOpcode() == ISD::ROTL) {
663  auto *C = dyn_cast<ConstantSDNode>(U0.getOperand(0));
664  if (C && C->getSExtValue() == -2)
665  return false;
666  }
667 
668  if (U1.getOpcode() == ISD::ROTL) {
669  auto *C = dyn_cast<ConstantSDNode>(U1.getOperand(0));
670  if (C && C->getSExtValue() == -2)
671  return false;
672  }
673  }
674 
675  break;
676  }
677  case ISD::SHL:
678  case ISD::SRA:
679  case ISD::SRL:
680  // Don't fold a load into a shift by immediate. The BMI2 instructions
681  // support folding a load, but not an immediate. The legacy instructions
682  // support folding an immediate, but can't fold a load. Folding an
683  // immediate is preferable to folding a load.
684  if (isa<ConstantSDNode>(U->getOperand(1)))
685  return false;
686 
687  break;
688  }
689  }
690 
691  // Prevent folding a load if this can implemented with an insert_subreg or
692  // a move that implicitly zeroes.
693  if (Root->getOpcode() == ISD::INSERT_SUBVECTOR &&
694  isNullConstant(Root->getOperand(2)) &&
695  (Root->getOperand(0).isUndef() ||
697  return false;
698 
699  return true;
700 }
701 
702 /// Replace the original chain operand of the call with
703 /// load's chain operand and move load below the call's chain operand.
705  SDValue Call, SDValue OrigChain) {
707  SDValue Chain = OrigChain.getOperand(0);
708  if (Chain.getNode() == Load.getNode())
709  Ops.push_back(Load.getOperand(0));
710  else {
711  assert(Chain.getOpcode() == ISD::TokenFactor &&
712  "Unexpected chain operand");
713  for (unsigned i = 0, e = Chain.getNumOperands(); i != e; ++i)
714  if (Chain.getOperand(i).getNode() == Load.getNode())
715  Ops.push_back(Load.getOperand(0));
716  else
717  Ops.push_back(Chain.getOperand(i));
718  SDValue NewChain =
719  CurDAG->getNode(ISD::TokenFactor, SDLoc(Load), MVT::Other, Ops);
720  Ops.clear();
721  Ops.push_back(NewChain);
722  }
723  Ops.append(OrigChain->op_begin() + 1, OrigChain->op_end());
724  CurDAG->UpdateNodeOperands(OrigChain.getNode(), Ops);
725  CurDAG->UpdateNodeOperands(Load.getNode(), Call.getOperand(0),
726  Load.getOperand(1), Load.getOperand(2));
727 
728  Ops.clear();
729  Ops.push_back(SDValue(Load.getNode(), 1));
730  Ops.append(Call->op_begin() + 1, Call->op_end());
731  CurDAG->UpdateNodeOperands(Call.getNode(), Ops);
732 }
733 
734 /// Return true if call address is a load and it can be
735 /// moved below CALLSEQ_START and the chains leading up to the call.
736 /// Return the CALLSEQ_START by reference as a second output.
737 /// In the case of a tail call, there isn't a callseq node between the call
738 /// chain and the load.
739 static bool isCalleeLoad(SDValue Callee, SDValue &Chain, bool HasCallSeq) {
740  // The transformation is somewhat dangerous if the call's chain was glued to
741  // the call. After MoveBelowOrigChain the load is moved between the call and
742  // the chain, this can create a cycle if the load is not folded. So it is
743  // *really* important that we are sure the load will be folded.
744  if (Callee.getNode() == Chain.getNode() || !Callee.hasOneUse())
745  return false;
746  LoadSDNode *LD = dyn_cast<LoadSDNode>(Callee.getNode());
747  if (!LD ||
748  LD->isVolatile() ||
751  return false;
752 
753  // Now let's find the callseq_start.
754  while (HasCallSeq && Chain.getOpcode() != ISD::CALLSEQ_START) {
755  if (!Chain.hasOneUse())
756  return false;
757  Chain = Chain.getOperand(0);
758  }
759 
760  if (!Chain.getNumOperands())
761  return false;
762  // Since we are not checking for AA here, conservatively abort if the chain
763  // writes to memory. It's not safe to move the callee (a load) across a store.
764  if (isa<MemSDNode>(Chain.getNode()) &&
765  cast<MemSDNode>(Chain.getNode())->writeMem())
766  return false;
767  if (Chain.getOperand(0).getNode() == Callee.getNode())
768  return true;
769  if (Chain.getOperand(0).getOpcode() == ISD::TokenFactor &&
770  Callee.getValue(1).isOperandOf(Chain.getOperand(0).getNode()) &&
771  Callee.getValue(1).hasOneUse())
772  return true;
773  return false;
774 }
775 
776 void X86DAGToDAGISel::PreprocessISelDAG() {
777  for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(),
778  E = CurDAG->allnodes_end(); I != E; ) {
779  SDNode *N = &*I++; // Preincrement iterator to avoid invalidation issues.
780 
781  // If this is a target specific AND node with no flag usages, turn it back
782  // into ISD::AND to enable test instruction matching.
783  if (N->getOpcode() == X86ISD::AND && !N->hasAnyUseOfValue(1)) {
784  SDValue Res = CurDAG->getNode(ISD::AND, SDLoc(N), N->getValueType(0),
785  N->getOperand(0), N->getOperand(1));
786  --I;
787  CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Res);
788  ++I;
789  CurDAG->DeleteNode(N);
790  continue;
791  }
792 
793  // Replace vector shifts with their X86 specific equivalent so we don't
794  // need 2 sets of patterns.
795  switch (N->getOpcode()) {
796  case ISD::SHL:
797  case ISD::SRA:
798  case ISD::SRL:
799  if (N->getValueType(0).isVector()) {
800  unsigned NewOpc;
801  switch (N->getOpcode()) {
802  default: llvm_unreachable("Unexpected opcode!");
803  case ISD::SHL: NewOpc = X86ISD::VSHLV; break;
804  case ISD::SRA: NewOpc = X86ISD::VSRAV; break;
805  case ISD::SRL: NewOpc = X86ISD::VSRLV; break;
806  }
807  SDValue Res = CurDAG->getNode(NewOpc, SDLoc(N), N->getValueType(0),
808  N->getOperand(0), N->getOperand(1));
809  --I;
810  CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Res);
811  ++I;
812  CurDAG->DeleteNode(N);
813  continue;
814  }
815  }
816 
817  if (OptLevel != CodeGenOpt::None &&
818  // Only do this when the target can fold the load into the call or
819  // jmp.
820  !Subtarget->useRetpolineIndirectCalls() &&
821  ((N->getOpcode() == X86ISD::CALL && !Subtarget->slowTwoMemOps()) ||
822  (N->getOpcode() == X86ISD::TC_RETURN &&
823  (Subtarget->is64Bit() ||
824  !getTargetMachine().isPositionIndependent())))) {
825  /// Also try moving call address load from outside callseq_start to just
826  /// before the call to allow it to be folded.
827  ///
828  /// [Load chain]
829  /// ^
830  /// |
831  /// [Load]
832  /// ^ ^
833  /// | |
834  /// / \--
835  /// / |
836  ///[CALLSEQ_START] |
837  /// ^ |
838  /// | |
839  /// [LOAD/C2Reg] |
840  /// | |
841  /// \ /
842  /// \ /
843  /// [CALL]
844  bool HasCallSeq = N->getOpcode() == X86ISD::CALL;
845  SDValue Chain = N->getOperand(0);
846  SDValue Load = N->getOperand(1);
847  if (!isCalleeLoad(Load, Chain, HasCallSeq))
848  continue;
849  moveBelowOrigChain(CurDAG, Load, SDValue(N, 0), Chain);
850  ++NumLoadMoved;
851  continue;
852  }
853 
854  // Lower fpround and fpextend nodes that target the FP stack to be store and
855  // load to the stack. This is a gross hack. We would like to simply mark
856  // these as being illegal, but when we do that, legalize produces these when
857  // it expands calls, then expands these in the same legalize pass. We would
858  // like dag combine to be able to hack on these between the call expansion
859  // and the node legalization. As such this pass basically does "really
860  // late" legalization of these inline with the X86 isel pass.
861  // FIXME: This should only happen when not compiled with -O0.
862  if (N->getOpcode() != ISD::FP_ROUND && N->getOpcode() != ISD::FP_EXTEND)
863  continue;
864 
865  MVT SrcVT = N->getOperand(0).getSimpleValueType();
866  MVT DstVT = N->getSimpleValueType(0);
867 
868  // If any of the sources are vectors, no fp stack involved.
869  if (SrcVT.isVector() || DstVT.isVector())
870  continue;
871 
872  // If the source and destination are SSE registers, then this is a legal
873  // conversion that should not be lowered.
874  const X86TargetLowering *X86Lowering =
875  static_cast<const X86TargetLowering *>(TLI);
876  bool SrcIsSSE = X86Lowering->isScalarFPTypeInSSEReg(SrcVT);
877  bool DstIsSSE = X86Lowering->isScalarFPTypeInSSEReg(DstVT);
878  if (SrcIsSSE && DstIsSSE)
879  continue;
880 
881  if (!SrcIsSSE && !DstIsSSE) {
882  // If this is an FPStack extension, it is a noop.
883  if (N->getOpcode() == ISD::FP_EXTEND)
884  continue;
885  // If this is a value-preserving FPStack truncation, it is a noop.
886  if (N->getConstantOperandVal(1))
887  continue;
888  }
889 
890  // Here we could have an FP stack truncation or an FPStack <-> SSE convert.
891  // FPStack has extload and truncstore. SSE can fold direct loads into other
892  // operations. Based on this, decide what we want to do.
893  MVT MemVT;
894  if (N->getOpcode() == ISD::FP_ROUND)
895  MemVT = DstVT; // FP_ROUND must use DstVT, we can't do a 'trunc load'.
896  else
897  MemVT = SrcIsSSE ? SrcVT : DstVT;
898 
899  SDValue MemTmp = CurDAG->CreateStackTemporary(MemVT);
900  SDLoc dl(N);
901 
902  // FIXME: optimize the case where the src/dest is a load or store?
903  SDValue Store =
904  CurDAG->getTruncStore(CurDAG->getEntryNode(), dl, N->getOperand(0),
905  MemTmp, MachinePointerInfo(), MemVT);
906  SDValue Result = CurDAG->getExtLoad(ISD::EXTLOAD, dl, DstVT, Store, MemTmp,
907  MachinePointerInfo(), MemVT);
908 
909  // We're about to replace all uses of the FP_ROUND/FP_EXTEND with the
910  // extload we created. This will cause general havok on the dag because
911  // anything below the conversion could be folded into other existing nodes.
912  // To avoid invalidating 'I', back it up to the convert node.
913  --I;
914  CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
915 
916  // Now that we did that, the node is dead. Increment the iterator to the
917  // next node to process, then delete N.
918  ++I;
919  CurDAG->DeleteNode(N);
920  }
921 
922  // The load+call transform above can leave some dead nodes in the graph. Make
923  // sure we remove them. Its possible some of the other transforms do to so
924  // just remove dead nodes unconditionally.
925  CurDAG->RemoveDeadNodes();
926 }
927 
928 // Look for a redundant movzx/movsx that can occur after an 8-bit divrem.
929 bool X86DAGToDAGISel::tryOptimizeRem8Extend(SDNode *N) {
930  unsigned Opc = N->getMachineOpcode();
931  if (Opc != X86::MOVZX32rr8 && Opc != X86::MOVSX32rr8 &&
932  Opc != X86::MOVSX64rr8)
933  return false;
934 
935  SDValue N0 = N->getOperand(0);
936 
937  // We need to be extracting the lower bit of an extend.
938  if (!N0.isMachineOpcode() ||
939  N0.getMachineOpcode() != TargetOpcode::EXTRACT_SUBREG ||
940  N0.getConstantOperandVal(1) != X86::sub_8bit)
941  return false;
942 
943  // We're looking for either a movsx or movzx to match the original opcode.
944  unsigned ExpectedOpc = Opc == X86::MOVZX32rr8 ? X86::MOVZX32rr8_NOREX
945  : X86::MOVSX32rr8_NOREX;
946  SDValue N00 = N0.getOperand(0);
947  if (!N00.isMachineOpcode() || N00.getMachineOpcode() != ExpectedOpc)
948  return false;
949 
950  if (Opc == X86::MOVSX64rr8) {
951  // If we had a sign extend from 8 to 64 bits. We still need to go from 32
952  // to 64.
953  MachineSDNode *Extend = CurDAG->getMachineNode(X86::MOVSX64rr32, SDLoc(N),
954  MVT::i64, N00);
955  ReplaceUses(N, Extend);
956  } else {
957  // Ok we can drop this extend and just use the original extend.
958  ReplaceUses(N, N00.getNode());
959  }
960 
961  return true;
962 }
963 
964 void X86DAGToDAGISel::PostprocessISelDAG() {
965  // Skip peepholes at -O0.
966  if (TM.getOptLevel() == CodeGenOpt::None)
967  return;
968 
969  SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end();
970 
971  bool MadeChange = false;
972  while (Position != CurDAG->allnodes_begin()) {
973  SDNode *N = &*--Position;
974  // Skip dead nodes and any non-machine opcodes.
975  if (N->use_empty() || !N->isMachineOpcode())
976  continue;
977 
978  if (tryOptimizeRem8Extend(N)) {
979  MadeChange = true;
980  continue;
981  }
982 
983  // Look for a TESTrr+ANDrr pattern where both operands of the test are
984  // the same. Rewrite to remove the AND.
985  unsigned Opc = N->getMachineOpcode();
986  if ((Opc == X86::TEST8rr || Opc == X86::TEST16rr ||
987  Opc == X86::TEST32rr || Opc == X86::TEST64rr) &&
988  N->getOperand(0) == N->getOperand(1) &&
989  N->isOnlyUserOf(N->getOperand(0).getNode()) &&
990  N->getOperand(0).isMachineOpcode()) {
991  SDValue And = N->getOperand(0);
992  unsigned N0Opc = And.getMachineOpcode();
993  if (N0Opc == X86::AND8rr || N0Opc == X86::AND16rr ||
994  N0Opc == X86::AND32rr || N0Opc == X86::AND64rr) {
995  MachineSDNode *Test = CurDAG->getMachineNode(Opc, SDLoc(N),
996  MVT::i32,
997  And.getOperand(0),
998  And.getOperand(1));
999  ReplaceUses(N, Test);
1000  MadeChange = true;
1001  continue;
1002  }
1003  if (N0Opc == X86::AND8rm || N0Opc == X86::AND16rm ||
1004  N0Opc == X86::AND32rm || N0Opc == X86::AND64rm) {
1005  unsigned NewOpc;
1006  switch (N0Opc) {
1007  case X86::AND8rm: NewOpc = X86::TEST8mr; break;
1008  case X86::AND16rm: NewOpc = X86::TEST16mr; break;
1009  case X86::AND32rm: NewOpc = X86::TEST32mr; break;
1010  case X86::AND64rm: NewOpc = X86::TEST64mr; break;
1011  }
1012 
1013  // Need to swap the memory and register operand.
1014  SDValue Ops[] = { And.getOperand(1),
1015  And.getOperand(2),
1016  And.getOperand(3),
1017  And.getOperand(4),
1018  And.getOperand(5),
1019  And.getOperand(0),
1020  And.getOperand(6) /* Chain */ };
1021  MachineSDNode *Test = CurDAG->getMachineNode(NewOpc, SDLoc(N),
1022  MVT::i32, MVT::Other, Ops);
1023  ReplaceUses(N, Test);
1024  MadeChange = true;
1025  continue;
1026  }
1027  }
1028 
1029  // Look for a KAND+KORTEST and turn it into KTEST if only the zero flag is
1030  // used. We're doing this late so we can prefer to fold the AND into masked
1031  // comparisons. Doing that can be better for the live range of the mask
1032  // register.
1033  if ((Opc == X86::KORTESTBrr || Opc == X86::KORTESTWrr ||
1034  Opc == X86::KORTESTDrr || Opc == X86::KORTESTQrr) &&
1035  N->getOperand(0) == N->getOperand(1) &&
1036  N->isOnlyUserOf(N->getOperand(0).getNode()) &&
1037  N->getOperand(0).isMachineOpcode() &&
1038  onlyUsesZeroFlag(SDValue(N, 0))) {
1039  SDValue And = N->getOperand(0);
1040  unsigned N0Opc = And.getMachineOpcode();
1041  // KANDW is legal with AVX512F, but KTESTW requires AVX512DQ. The other
1042  // KAND instructions and KTEST use the same ISA feature.
1043  if (N0Opc == X86::KANDBrr ||
1044  (N0Opc == X86::KANDWrr && Subtarget->hasDQI()) ||
1045  N0Opc == X86::KANDDrr || N0Opc == X86::KANDQrr) {
1046  unsigned NewOpc;
1047  switch (Opc) {
1048  default: llvm_unreachable("Unexpected opcode!");
1049  case X86::KORTESTBrr: NewOpc = X86::KTESTBrr; break;
1050  case X86::KORTESTWrr: NewOpc = X86::KTESTWrr; break;
1051  case X86::KORTESTDrr: NewOpc = X86::KTESTDrr; break;
1052  case X86::KORTESTQrr: NewOpc = X86::KTESTQrr; break;
1053  }
1054  MachineSDNode *KTest = CurDAG->getMachineNode(NewOpc, SDLoc(N),
1055  MVT::i32,
1056  And.getOperand(0),
1057  And.getOperand(1));
1058  ReplaceUses(N, KTest);
1059  MadeChange = true;
1060  continue;
1061  }
1062  }
1063 
1064  // Attempt to remove vectors moves that were inserted to zero upper bits.
1065  if (Opc != TargetOpcode::SUBREG_TO_REG)
1066  continue;
1067 
1068  unsigned SubRegIdx = N->getConstantOperandVal(2);
1069  if (SubRegIdx != X86::sub_xmm && SubRegIdx != X86::sub_ymm)
1070  continue;
1071 
1072  SDValue Move = N->getOperand(1);
1073  if (!Move.isMachineOpcode())
1074  continue;
1075 
1076  // Make sure its one of the move opcodes we recognize.
1077  switch (Move.getMachineOpcode()) {
1078  default:
1079  continue;
1080  case X86::VMOVAPDrr: case X86::VMOVUPDrr:
1081  case X86::VMOVAPSrr: case X86::VMOVUPSrr:
1082  case X86::VMOVDQArr: case X86::VMOVDQUrr:
1083  case X86::VMOVAPDYrr: case X86::VMOVUPDYrr:
1084  case X86::VMOVAPSYrr: case X86::VMOVUPSYrr:
1085  case X86::VMOVDQAYrr: case X86::VMOVDQUYrr:
1086  case X86::VMOVAPDZ128rr: case X86::VMOVUPDZ128rr:
1087  case X86::VMOVAPSZ128rr: case X86::VMOVUPSZ128rr:
1088  case X86::VMOVDQA32Z128rr: case X86::VMOVDQU32Z128rr:
1089  case X86::VMOVDQA64Z128rr: case X86::VMOVDQU64Z128rr:
1090  case X86::VMOVAPDZ256rr: case X86::VMOVUPDZ256rr:
1091  case X86::VMOVAPSZ256rr: case X86::VMOVUPSZ256rr:
1092  case X86::VMOVDQA32Z256rr: case X86::VMOVDQU32Z256rr:
1093  case X86::VMOVDQA64Z256rr: case X86::VMOVDQU64Z256rr:
1094  break;
1095  }
1096 
1097  SDValue In = Move.getOperand(0);
1098  if (!In.isMachineOpcode() ||
1099  In.getMachineOpcode() <= TargetOpcode::GENERIC_OP_END)
1100  continue;
1101 
1102  // Make sure the instruction has a VEX, XOP, or EVEX prefix. This covers
1103  // the SHA instructions which use a legacy encoding.
1104  uint64_t TSFlags = getInstrInfo()->get(In.getMachineOpcode()).TSFlags;
1105  if ((TSFlags & X86II::EncodingMask) != X86II::VEX &&
1106  (TSFlags & X86II::EncodingMask) != X86II::EVEX &&
1107  (TSFlags & X86II::EncodingMask) != X86II::XOP)
1108  continue;
1109 
1110  // Producing instruction is another vector instruction. We can drop the
1111  // move.
1112  CurDAG->UpdateNodeOperands(N, N->getOperand(0), In, N->getOperand(2));
1113  MadeChange = true;
1114  }
1115 
1116  if (MadeChange)
1117  CurDAG->RemoveDeadNodes();
1118 }
1119 
1120 
1121 /// Emit any code that needs to be executed only in the main function.
1122 void X86DAGToDAGISel::emitSpecialCodeForMain() {
1123  if (Subtarget->isTargetCygMing()) {
1125  auto &DL = CurDAG->getDataLayout();
1126 
1127  TargetLowering::CallLoweringInfo CLI(*CurDAG);
1128  CLI.setChain(CurDAG->getRoot())
1129  .setCallee(CallingConv::C, Type::getVoidTy(*CurDAG->getContext()),
1130  CurDAG->getExternalSymbol("__main", TLI->getPointerTy(DL)),
1131  std::move(Args));
1132  const TargetLowering &TLI = CurDAG->getTargetLoweringInfo();
1133  std::pair<SDValue, SDValue> Result = TLI.LowerCallTo(CLI);
1134  CurDAG->setRoot(Result.second);
1135  }
1136 }
1137 
1138 void X86DAGToDAGISel::EmitFunctionEntryCode() {
1139  // If this is main, emit special code for main.
1140  const Function &F = MF->getFunction();
1141  if (F.hasExternalLinkage() && F.getName() == "main")
1142  emitSpecialCodeForMain();
1143 }
1144 
1145 static bool isDispSafeForFrameIndex(int64_t Val) {
1146  // On 64-bit platforms, we can run into an issue where a frame index
1147  // includes a displacement that, when added to the explicit displacement,
1148  // will overflow the displacement field. Assuming that the frame index
1149  // displacement fits into a 31-bit integer (which is only slightly more
1150  // aggressive than the current fundamental assumption that it fits into
1151  // a 32-bit integer), a 31-bit disp should always be safe.
1152  return isInt<31>(Val);
1153 }
1154 
1155 bool X86DAGToDAGISel::foldOffsetIntoAddress(uint64_t Offset,
1156  X86ISelAddressMode &AM) {
1157  // If there's no offset to fold, we don't need to do any work.
1158  if (Offset == 0)
1159  return false;
1160 
1161  // Cannot combine ExternalSymbol displacements with integer offsets.
1162  if (AM.ES || AM.MCSym)
1163  return true;
1164 
1165  int64_t Val = AM.Disp + Offset;
1166  CodeModel::Model M = TM.getCodeModel();
1167  if (Subtarget->is64Bit()) {
1169  AM.hasSymbolicDisplacement()))
1170  return true;
1171  // In addition to the checks required for a register base, check that
1172  // we do not try to use an unsafe Disp with a frame index.
1173  if (AM.BaseType == X86ISelAddressMode::FrameIndexBase &&
1175  return true;
1176  }
1177  AM.Disp = Val;
1178  return false;
1179 
1180 }
1181 
1182 bool X86DAGToDAGISel::matchLoadInAddress(LoadSDNode *N, X86ISelAddressMode &AM){
1183  SDValue Address = N->getOperand(1);
1184 
1185  // load gs:0 -> GS segment register.
1186  // load fs:0 -> FS segment register.
1187  //
1188  // This optimization is valid because the GNU TLS model defines that
1189  // gs:0 (or fs:0 on X86-64) contains its own address.
1190  // For more information see http://people.redhat.com/drepper/tls.pdf
1191  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Address))
1192  if (C->getSExtValue() == 0 && AM.Segment.getNode() == nullptr &&
1193  !IndirectTlsSegRefs &&
1194  (Subtarget->isTargetGlibc() || Subtarget->isTargetAndroid() ||
1195  Subtarget->isTargetFuchsia()))
1196  switch (N->getPointerInfo().getAddrSpace()) {
1197  case 256:
1198  AM.Segment = CurDAG->getRegister(X86::GS, MVT::i16);
1199  return false;
1200  case 257:
1201  AM.Segment = CurDAG->getRegister(X86::FS, MVT::i16);
1202  return false;
1203  // Address space 258 is not handled here, because it is not used to
1204  // address TLS areas.
1205  }
1206 
1207  return true;
1208 }
1209 
1210 /// Try to match X86ISD::Wrapper and X86ISD::WrapperRIP nodes into an addressing
1211 /// mode. These wrap things that will resolve down into a symbol reference.
1212 /// If no match is possible, this returns true, otherwise it returns false.
1213 bool X86DAGToDAGISel::matchWrapper(SDValue N, X86ISelAddressMode &AM) {
1214  // If the addressing mode already has a symbol as the displacement, we can
1215  // never match another symbol.
1216  if (AM.hasSymbolicDisplacement())
1217  return true;
1218 
1219  bool IsRIPRelTLS = false;
1220  bool IsRIPRel = N.getOpcode() == X86ISD::WrapperRIP;
1221  if (IsRIPRel) {
1222  SDValue Val = N.getOperand(0);
1224  IsRIPRelTLS = true;
1225  }
1226 
1227  // We can't use an addressing mode in the 64-bit large code model.
1228  // Global TLS addressing is an exception. In the medium code model,
1229  // we use can use a mode when RIP wrappers are present.
1230  // That signifies access to globals that are known to be "near",
1231  // such as the GOT itself.
1232  CodeModel::Model M = TM.getCodeModel();
1233  if (Subtarget->is64Bit() &&
1234  ((M == CodeModel::Large && !IsRIPRelTLS) ||
1235  (M == CodeModel::Medium && !IsRIPRel)))
1236  return true;
1237 
1238  // Base and index reg must be 0 in order to use %rip as base.
1239  if (IsRIPRel && AM.hasBaseOrIndexReg())
1240  return true;
1241 
1242  // Make a local copy in case we can't do this fold.
1243  X86ISelAddressMode Backup = AM;
1244 
1245  int64_t Offset = 0;
1246  SDValue N0 = N.getOperand(0);
1247  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(N0)) {
1248  AM.GV = G->getGlobal();
1249  AM.SymbolFlags = G->getTargetFlags();
1250  Offset = G->getOffset();
1251  } else if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(N0)) {
1252  AM.CP = CP->getConstVal();
1253  AM.Align = CP->getAlignment();
1254  AM.SymbolFlags = CP->getTargetFlags();
1255  Offset = CP->getOffset();
1256  } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(N0)) {
1257  AM.ES = S->getSymbol();
1258  AM.SymbolFlags = S->getTargetFlags();
1259  } else if (auto *S = dyn_cast<MCSymbolSDNode>(N0)) {
1260  AM.MCSym = S->getMCSymbol();
1261  } else if (JumpTableSDNode *J = dyn_cast<JumpTableSDNode>(N0)) {
1262  AM.JT = J->getIndex();
1263  AM.SymbolFlags = J->getTargetFlags();
1264  } else if (BlockAddressSDNode *BA = dyn_cast<BlockAddressSDNode>(N0)) {
1265  AM.BlockAddr = BA->getBlockAddress();
1266  AM.SymbolFlags = BA->getTargetFlags();
1267  Offset = BA->getOffset();
1268  } else
1269  llvm_unreachable("Unhandled symbol reference node.");
1270 
1271  if (foldOffsetIntoAddress(Offset, AM)) {
1272  AM = Backup;
1273  return true;
1274  }
1275 
1276  if (IsRIPRel)
1277  AM.setBaseReg(CurDAG->getRegister(X86::RIP, MVT::i64));
1278 
1279  // Commit the changes now that we know this fold is safe.
1280  return false;
1281 }
1282 
1283 /// Add the specified node to the specified addressing mode, returning true if
1284 /// it cannot be done. This just pattern matches for the addressing mode.
1285 bool X86DAGToDAGISel::matchAddress(SDValue N, X86ISelAddressMode &AM) {
1286  if (matchAddressRecursively(N, AM, 0))
1287  return true;
1288 
1289  // Post-processing: Convert lea(,%reg,2) to lea(%reg,%reg), which has
1290  // a smaller encoding and avoids a scaled-index.
1291  if (AM.Scale == 2 &&
1292  AM.BaseType == X86ISelAddressMode::RegBase &&
1293  AM.Base_Reg.getNode() == nullptr) {
1294  AM.Base_Reg = AM.IndexReg;
1295  AM.Scale = 1;
1296  }
1297 
1298  // Post-processing: Convert foo to foo(%rip), even in non-PIC mode,
1299  // because it has a smaller encoding.
1300  // TODO: Which other code models can use this?
1301  switch (TM.getCodeModel()) {
1302  default: break;
1303  case CodeModel::Small:
1304  case CodeModel::Kernel:
1305  if (Subtarget->is64Bit() &&
1306  AM.Scale == 1 &&
1307  AM.BaseType == X86ISelAddressMode::RegBase &&
1308  AM.Base_Reg.getNode() == nullptr &&
1309  AM.IndexReg.getNode() == nullptr &&
1310  AM.SymbolFlags == X86II::MO_NO_FLAG &&
1311  AM.hasSymbolicDisplacement())
1312  AM.Base_Reg = CurDAG->getRegister(X86::RIP, MVT::i64);
1313  break;
1314  }
1315 
1316  return false;
1317 }
1318 
1319 bool X86DAGToDAGISel::matchAdd(SDValue &N, X86ISelAddressMode &AM,
1320  unsigned Depth) {
1321  // Add an artificial use to this node so that we can keep track of
1322  // it if it gets CSE'd with a different node.
1323  HandleSDNode Handle(N);
1324 
1325  X86ISelAddressMode Backup = AM;
1326  if (!matchAddressRecursively(N.getOperand(0), AM, Depth+1) &&
1327  !matchAddressRecursively(Handle.getValue().getOperand(1), AM, Depth+1))
1328  return false;
1329  AM = Backup;
1330 
1331  // Try again after commuting the operands.
1332  if (!matchAddressRecursively(Handle.getValue().getOperand(1), AM, Depth+1) &&
1333  !matchAddressRecursively(Handle.getValue().getOperand(0), AM, Depth+1))
1334  return false;
1335  AM = Backup;
1336 
1337  // If we couldn't fold both operands into the address at the same time,
1338  // see if we can just put each operand into a register and fold at least
1339  // the add.
1340  if (AM.BaseType == X86ISelAddressMode::RegBase &&
1341  !AM.Base_Reg.getNode() &&
1342  !AM.IndexReg.getNode()) {
1343  N = Handle.getValue();
1344  AM.Base_Reg = N.getOperand(0);
1345  AM.IndexReg = N.getOperand(1);
1346  AM.Scale = 1;
1347  return false;
1348  }
1349  N = Handle.getValue();
1350  return true;
1351 }
1352 
1353 // Insert a node into the DAG at least before the Pos node's position. This
1354 // will reposition the node as needed, and will assign it a node ID that is <=
1355 // the Pos node's ID. Note that this does *not* preserve the uniqueness of node
1356 // IDs! The selection DAG must no longer depend on their uniqueness when this
1357 // is used.
1358 static void insertDAGNode(SelectionDAG &DAG, SDValue Pos, SDValue N) {
1359  if (N->getNodeId() == -1 ||
1362  DAG.RepositionNode(Pos->getIterator(), N.getNode());
1363  // Mark Node as invalid for pruning as after this it may be a successor to a
1364  // selected node but otherwise be in the same position of Pos.
1365  // Conservatively mark it with the same -abs(Id) to assure node id
1366  // invariant is preserved.
1367  N->setNodeId(Pos->getNodeId());
1369  }
1370 }
1371 
1372 // Transform "(X >> (8-C1)) & (0xff << C1)" to "((X >> 8) & 0xff) << C1" if
1373 // safe. This allows us to convert the shift and and into an h-register
1374 // extract and a scaled index. Returns false if the simplification is
1375 // performed.
1377  uint64_t Mask,
1378  SDValue Shift, SDValue X,
1379  X86ISelAddressMode &AM) {
1380  if (Shift.getOpcode() != ISD::SRL ||
1381  !isa<ConstantSDNode>(Shift.getOperand(1)) ||
1382  !Shift.hasOneUse())
1383  return true;
1384 
1385  int ScaleLog = 8 - Shift.getConstantOperandVal(1);
1386  if (ScaleLog <= 0 || ScaleLog >= 4 ||
1387  Mask != (0xffu << ScaleLog))
1388  return true;
1389 
1390  MVT VT = N.getSimpleValueType();
1391  SDLoc DL(N);
1392  SDValue Eight = DAG.getConstant(8, DL, MVT::i8);
1393  SDValue NewMask = DAG.getConstant(0xff, DL, VT);
1394  SDValue Srl = DAG.getNode(ISD::SRL, DL, VT, X, Eight);
1395  SDValue And = DAG.getNode(ISD::AND, DL, VT, Srl, NewMask);
1396  SDValue ShlCount = DAG.getConstant(ScaleLog, DL, MVT::i8);
1397  SDValue Shl = DAG.getNode(ISD::SHL, DL, VT, And, ShlCount);
1398 
1399  // Insert the new nodes into the topological ordering. We must do this in
1400  // a valid topological ordering as nothing is going to go back and re-sort
1401  // these nodes. We continually insert before 'N' in sequence as this is
1402  // essentially a pre-flattened and pre-sorted sequence of nodes. There is no
1403  // hierarchy left to express.
1404  insertDAGNode(DAG, N, Eight);
1405  insertDAGNode(DAG, N, Srl);
1406  insertDAGNode(DAG, N, NewMask);
1407  insertDAGNode(DAG, N, And);
1408  insertDAGNode(DAG, N, ShlCount);
1409  insertDAGNode(DAG, N, Shl);
1410  DAG.ReplaceAllUsesWith(N, Shl);
1411  DAG.RemoveDeadNode(N.getNode());
1412  AM.IndexReg = And;
1413  AM.Scale = (1 << ScaleLog);
1414  return false;
1415 }
1416 
1417 // Transforms "(X << C1) & C2" to "(X & (C2>>C1)) << C1" if safe and if this
1418 // allows us to fold the shift into this addressing mode. Returns false if the
1419 // transform succeeded.
1421  X86ISelAddressMode &AM) {
1422  SDValue Shift = N.getOperand(0);
1423 
1424  // Use a signed mask so that shifting right will insert sign bits. These
1425  // bits will be removed when we shift the result left so it doesn't matter
1426  // what we use. This might allow a smaller immediate encoding.
1427  int64_t Mask = cast<ConstantSDNode>(N->getOperand(1))->getSExtValue();
1428 
1429  // If we have an any_extend feeding the AND, look through it to see if there
1430  // is a shift behind it. But only if the AND doesn't use the extended bits.
1431  // FIXME: Generalize this to other ANY_EXTEND than i32 to i64?
1432  bool FoundAnyExtend = false;
1433  if (Shift.getOpcode() == ISD::ANY_EXTEND && Shift.hasOneUse() &&
1434  Shift.getOperand(0).getSimpleValueType() == MVT::i32 &&
1435  isUInt<32>(Mask)) {
1436  FoundAnyExtend = true;
1437  Shift = Shift.getOperand(0);
1438  }
1439 
1440  if (Shift.getOpcode() != ISD::SHL ||
1441  !isa<ConstantSDNode>(Shift.getOperand(1)))
1442  return true;
1443 
1444  SDValue X = Shift.getOperand(0);
1445 
1446  // Not likely to be profitable if either the AND or SHIFT node has more
1447  // than one use (unless all uses are for address computation). Besides,
1448  // isel mechanism requires their node ids to be reused.
1449  if (!N.hasOneUse() || !Shift.hasOneUse())
1450  return true;
1451 
1452  // Verify that the shift amount is something we can fold.
1453  unsigned ShiftAmt = Shift.getConstantOperandVal(1);
1454  if (ShiftAmt != 1 && ShiftAmt != 2 && ShiftAmt != 3)
1455  return true;
1456 
1457  MVT VT = N.getSimpleValueType();
1458  SDLoc DL(N);
1459  if (FoundAnyExtend) {
1460  SDValue NewX = DAG.getNode(ISD::ANY_EXTEND, DL, VT, X);
1461  insertDAGNode(DAG, N, NewX);
1462  X = NewX;
1463  }
1464 
1465  SDValue NewMask = DAG.getConstant(Mask >> ShiftAmt, DL, VT);
1466  SDValue NewAnd = DAG.getNode(ISD::AND, DL, VT, X, NewMask);
1467  SDValue NewShift = DAG.getNode(ISD::SHL, DL, VT, NewAnd, Shift.getOperand(1));
1468 
1469  // Insert the new nodes into the topological ordering. We must do this in
1470  // a valid topological ordering as nothing is going to go back and re-sort
1471  // these nodes. We continually insert before 'N' in sequence as this is
1472  // essentially a pre-flattened and pre-sorted sequence of nodes. There is no
1473  // hierarchy left to express.
1474  insertDAGNode(DAG, N, NewMask);
1475  insertDAGNode(DAG, N, NewAnd);
1476  insertDAGNode(DAG, N, NewShift);
1477  DAG.ReplaceAllUsesWith(N, NewShift);
1478  DAG.RemoveDeadNode(N.getNode());
1479 
1480  AM.Scale = 1 << ShiftAmt;
1481  AM.IndexReg = NewAnd;
1482  return false;
1483 }
1484 
1485 // Implement some heroics to detect shifts of masked values where the mask can
1486 // be replaced by extending the shift and undoing that in the addressing mode
1487 // scale. Patterns such as (shl (srl x, c1), c2) are canonicalized into (and
1488 // (srl x, SHIFT), MASK) by DAGCombines that don't know the shl can be done in
1489 // the addressing mode. This results in code such as:
1490 //
1491 // int f(short *y, int *lookup_table) {
1492 // ...
1493 // return *y + lookup_table[*y >> 11];
1494 // }
1495 //
1496 // Turning into:
1497 // movzwl (%rdi), %eax
1498 // movl %eax, %ecx
1499 // shrl $11, %ecx
1500 // addl (%rsi,%rcx,4), %eax
1501 //
1502 // Instead of:
1503 // movzwl (%rdi), %eax
1504 // movl %eax, %ecx
1505 // shrl $9, %ecx
1506 // andl $124, %rcx
1507 // addl (%rsi,%rcx), %eax
1508 //
1509 // Note that this function assumes the mask is provided as a mask *after* the
1510 // value is shifted. The input chain may or may not match that, but computing
1511 // such a mask is trivial.
1513  uint64_t Mask,
1514  SDValue Shift, SDValue X,
1515  X86ISelAddressMode &AM) {
1516  if (Shift.getOpcode() != ISD::SRL || !Shift.hasOneUse() ||
1517  !isa<ConstantSDNode>(Shift.getOperand(1)))
1518  return true;
1519 
1520  unsigned ShiftAmt = Shift.getConstantOperandVal(1);
1521  unsigned MaskLZ = countLeadingZeros(Mask);
1522  unsigned MaskTZ = countTrailingZeros(Mask);
1523 
1524  // The amount of shift we're trying to fit into the addressing mode is taken
1525  // from the trailing zeros of the mask.
1526  unsigned AMShiftAmt = MaskTZ;
1527 
1528  // There is nothing we can do here unless the mask is removing some bits.
1529  // Also, the addressing mode can only represent shifts of 1, 2, or 3 bits.
1530  if (AMShiftAmt <= 0 || AMShiftAmt > 3) return true;
1531 
1532  // We also need to ensure that mask is a continuous run of bits.
1533  if (countTrailingOnes(Mask >> MaskTZ) + MaskTZ + MaskLZ != 64) return true;
1534 
1535  // Scale the leading zero count down based on the actual size of the value.
1536  // Also scale it down based on the size of the shift.
1537  unsigned ScaleDown = (64 - X.getSimpleValueType().getSizeInBits()) + ShiftAmt;
1538  if (MaskLZ < ScaleDown)
1539  return true;
1540  MaskLZ -= ScaleDown;
1541 
1542  // The final check is to ensure that any masked out high bits of X are
1543  // already known to be zero. Otherwise, the mask has a semantic impact
1544  // other than masking out a couple of low bits. Unfortunately, because of
1545  // the mask, zero extensions will be removed from operands in some cases.
1546  // This code works extra hard to look through extensions because we can
1547  // replace them with zero extensions cheaply if necessary.
1548  bool ReplacingAnyExtend = false;
1549  if (X.getOpcode() == ISD::ANY_EXTEND) {
1550  unsigned ExtendBits = X.getSimpleValueType().getSizeInBits() -
1552  // Assume that we'll replace the any-extend with a zero-extend, and
1553  // narrow the search to the extended value.
1554  X = X.getOperand(0);
1555  MaskLZ = ExtendBits > MaskLZ ? 0 : MaskLZ - ExtendBits;
1556  ReplacingAnyExtend = true;
1557  }
1558  APInt MaskedHighBits =
1560  KnownBits Known = DAG.computeKnownBits(X);
1561  if (MaskedHighBits != Known.Zero) return true;
1562 
1563  // We've identified a pattern that can be transformed into a single shift
1564  // and an addressing mode. Make it so.
1565  MVT VT = N.getSimpleValueType();
1566  if (ReplacingAnyExtend) {
1567  assert(X.getValueType() != VT);
1568  // We looked through an ANY_EXTEND node, insert a ZERO_EXTEND.
1569  SDValue NewX = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(X), VT, X);
1570  insertDAGNode(DAG, N, NewX);
1571  X = NewX;
1572  }
1573  SDLoc DL(N);
1574  SDValue NewSRLAmt = DAG.getConstant(ShiftAmt + AMShiftAmt, DL, MVT::i8);
1575  SDValue NewSRL = DAG.getNode(ISD::SRL, DL, VT, X, NewSRLAmt);
1576  SDValue NewSHLAmt = DAG.getConstant(AMShiftAmt, DL, MVT::i8);
1577  SDValue NewSHL = DAG.getNode(ISD::SHL, DL, VT, NewSRL, NewSHLAmt);
1578 
1579  // Insert the new nodes into the topological ordering. We must do this in
1580  // a valid topological ordering as nothing is going to go back and re-sort
1581  // these nodes. We continually insert before 'N' in sequence as this is
1582  // essentially a pre-flattened and pre-sorted sequence of nodes. There is no
1583  // hierarchy left to express.
1584  insertDAGNode(DAG, N, NewSRLAmt);
1585  insertDAGNode(DAG, N, NewSRL);
1586  insertDAGNode(DAG, N, NewSHLAmt);
1587  insertDAGNode(DAG, N, NewSHL);
1588  DAG.ReplaceAllUsesWith(N, NewSHL);
1589  DAG.RemoveDeadNode(N.getNode());
1590 
1591  AM.Scale = 1 << AMShiftAmt;
1592  AM.IndexReg = NewSRL;
1593  return false;
1594 }
1595 
1596 // Transform "(X >> SHIFT) & (MASK << C1)" to
1597 // "((X >> (SHIFT + C1)) & (MASK)) << C1". Everything before the SHL will be
1598 // matched to a BEXTR later. Returns false if the simplification is performed.
1600  uint64_t Mask,
1601  SDValue Shift, SDValue X,
1602  X86ISelAddressMode &AM,
1603  const X86Subtarget &Subtarget) {
1604  if (Shift.getOpcode() != ISD::SRL ||
1605  !isa<ConstantSDNode>(Shift.getOperand(1)) ||
1606  !Shift.hasOneUse() || !N.hasOneUse())
1607  return true;
1608 
1609  // Only do this if BEXTR will be matched by matchBEXTRFromAndImm.
1610  if (!Subtarget.hasTBM() &&
1611  !(Subtarget.hasBMI() && Subtarget.hasFastBEXTR()))
1612  return true;
1613 
1614  // We need to ensure that mask is a continuous run of bits.
1615  if (!isShiftedMask_64(Mask)) return true;
1616 
1617  unsigned ShiftAmt = Shift.getConstantOperandVal(1);
1618 
1619  // The amount of shift we're trying to fit into the addressing mode is taken
1620  // from the trailing zeros of the mask.
1621  unsigned AMShiftAmt = countTrailingZeros(Mask);
1622 
1623  // There is nothing we can do here unless the mask is removing some bits.
1624  // Also, the addressing mode can only represent shifts of 1, 2, or 3 bits.
1625  if (AMShiftAmt <= 0 || AMShiftAmt > 3) return true;
1626 
1627  MVT VT = N.getSimpleValueType();
1628  SDLoc DL(N);
1629  SDValue NewSRLAmt = DAG.getConstant(ShiftAmt + AMShiftAmt, DL, MVT::i8);
1630  SDValue NewSRL = DAG.getNode(ISD::SRL, DL, VT, X, NewSRLAmt);
1631  SDValue NewMask = DAG.getConstant(Mask >> AMShiftAmt, DL, VT);
1632  SDValue NewAnd = DAG.getNode(ISD::AND, DL, VT, NewSRL, NewMask);
1633  SDValue NewSHLAmt = DAG.getConstant(AMShiftAmt, DL, MVT::i8);
1634  SDValue NewSHL = DAG.getNode(ISD::SHL, DL, VT, NewAnd, NewSHLAmt);
1635 
1636  // Insert the new nodes into the topological ordering. We must do this in
1637  // a valid topological ordering as nothing is going to go back and re-sort
1638  // these nodes. We continually insert before 'N' in sequence as this is
1639  // essentially a pre-flattened and pre-sorted sequence of nodes. There is no
1640  // hierarchy left to express.
1641  insertDAGNode(DAG, N, NewSRLAmt);
1642  insertDAGNode(DAG, N, NewSRL);
1643  insertDAGNode(DAG, N, NewMask);
1644  insertDAGNode(DAG, N, NewAnd);
1645  insertDAGNode(DAG, N, NewSHLAmt);
1646  insertDAGNode(DAG, N, NewSHL);
1647  DAG.ReplaceAllUsesWith(N, NewSHL);
1648  DAG.RemoveDeadNode(N.getNode());
1649 
1650  AM.Scale = 1 << AMShiftAmt;
1651  AM.IndexReg = NewAnd;
1652  return false;
1653 }
1654 
1655 bool X86DAGToDAGISel::matchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
1656  unsigned Depth) {
1657  SDLoc dl(N);
1658  LLVM_DEBUG({
1659  dbgs() << "MatchAddress: ";
1660  AM.dump(CurDAG);
1661  });
1662  // Limit recursion.
1663  if (Depth > 5)
1664  return matchAddressBase(N, AM);
1665 
1666  // If this is already a %rip relative address, we can only merge immediates
1667  // into it. Instead of handling this in every case, we handle it here.
1668  // RIP relative addressing: %rip + 32-bit displacement!
1669  if (AM.isRIPRelative()) {
1670  // FIXME: JumpTable and ExternalSymbol address currently don't like
1671  // displacements. It isn't very important, but this should be fixed for
1672  // consistency.
1673  if (!(AM.ES || AM.MCSym) && AM.JT != -1)
1674  return true;
1675 
1676  if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(N))
1677  if (!foldOffsetIntoAddress(Cst->getSExtValue(), AM))
1678  return false;
1679  return true;
1680  }
1681 
1682  switch (N.getOpcode()) {
1683  default: break;
1684  case ISD::LOCAL_RECOVER: {
1685  if (!AM.hasSymbolicDisplacement() && AM.Disp == 0)
1686  if (const auto *ESNode = dyn_cast<MCSymbolSDNode>(N.getOperand(0))) {
1687  // Use the symbol and don't prefix it.
1688  AM.MCSym = ESNode->getMCSymbol();
1689  return false;
1690  }
1691  break;
1692  }
1693  case ISD::Constant: {
1694  uint64_t Val = cast<ConstantSDNode>(N)->getSExtValue();
1695  if (!foldOffsetIntoAddress(Val, AM))
1696  return false;
1697  break;
1698  }
1699 
1700  case X86ISD::Wrapper:
1701  case X86ISD::WrapperRIP:
1702  if (!matchWrapper(N, AM))
1703  return false;
1704  break;
1705 
1706  case ISD::LOAD:
1707  if (!matchLoadInAddress(cast<LoadSDNode>(N), AM))
1708  return false;
1709  break;
1710 
1711  case ISD::FrameIndex:
1712  if (AM.BaseType == X86ISelAddressMode::RegBase &&
1713  AM.Base_Reg.getNode() == nullptr &&
1714  (!Subtarget->is64Bit() || isDispSafeForFrameIndex(AM.Disp))) {
1715  AM.BaseType = X86ISelAddressMode::FrameIndexBase;
1716  AM.Base_FrameIndex = cast<FrameIndexSDNode>(N)->getIndex();
1717  return false;
1718  }
1719  break;
1720 
1721  case ISD::SHL:
1722  if (AM.IndexReg.getNode() != nullptr || AM.Scale != 1)
1723  break;
1724 
1725  if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1726  unsigned Val = CN->getZExtValue();
1727  // Note that we handle x<<1 as (,x,2) rather than (x,x) here so
1728  // that the base operand remains free for further matching. If
1729  // the base doesn't end up getting used, a post-processing step
1730  // in MatchAddress turns (,x,2) into (x,x), which is cheaper.
1731  if (Val == 1 || Val == 2 || Val == 3) {
1732  AM.Scale = 1 << Val;
1733  SDValue ShVal = N.getOperand(0);
1734 
1735  // Okay, we know that we have a scale by now. However, if the scaled
1736  // value is an add of something and a constant, we can fold the
1737  // constant into the disp field here.
1738  if (CurDAG->isBaseWithConstantOffset(ShVal)) {
1739  AM.IndexReg = ShVal.getOperand(0);
1740  ConstantSDNode *AddVal = cast<ConstantSDNode>(ShVal.getOperand(1));
1741  uint64_t Disp = (uint64_t)AddVal->getSExtValue() << Val;
1742  if (!foldOffsetIntoAddress(Disp, AM))
1743  return false;
1744  }
1745 
1746  AM.IndexReg = ShVal;
1747  return false;
1748  }
1749  }
1750  break;
1751 
1752  case ISD::SRL: {
1753  // Scale must not be used already.
1754  if (AM.IndexReg.getNode() != nullptr || AM.Scale != 1) break;
1755 
1756  // We only handle up to 64-bit values here as those are what matter for
1757  // addressing mode optimizations.
1758  assert(N.getSimpleValueType().getSizeInBits() <= 64 &&
1759  "Unexpected value size!");
1760 
1761  SDValue And = N.getOperand(0);
1762  if (And.getOpcode() != ISD::AND) break;
1763  SDValue X = And.getOperand(0);
1764 
1765  // The mask used for the transform is expected to be post-shift, but we
1766  // found the shift first so just apply the shift to the mask before passing
1767  // it down.
1768  if (!isa<ConstantSDNode>(N.getOperand(1)) ||
1769  !isa<ConstantSDNode>(And.getOperand(1)))
1770  break;
1771  uint64_t Mask = And.getConstantOperandVal(1) >> N.getConstantOperandVal(1);
1772 
1773  // Try to fold the mask and shift into the scale, and return false if we
1774  // succeed.
1775  if (!foldMaskAndShiftToScale(*CurDAG, N, Mask, N, X, AM))
1776  return false;
1777  break;
1778  }
1779 
1780  case ISD::SMUL_LOHI:
1781  case ISD::UMUL_LOHI:
1782  // A mul_lohi where we need the low part can be folded as a plain multiply.
1783  if (N.getResNo() != 0) break;
1785  case ISD::MUL:
1786  case X86ISD::MUL_IMM:
1787  // X*[3,5,9] -> X+X*[2,4,8]
1788  if (AM.BaseType == X86ISelAddressMode::RegBase &&
1789  AM.Base_Reg.getNode() == nullptr &&
1790  AM.IndexReg.getNode() == nullptr) {
1791  if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N.getOperand(1)))
1792  if (CN->getZExtValue() == 3 || CN->getZExtValue() == 5 ||
1793  CN->getZExtValue() == 9) {
1794  AM.Scale = unsigned(CN->getZExtValue())-1;
1795 
1796  SDValue MulVal = N.getOperand(0);
1797  SDValue Reg;
1798 
1799  // Okay, we know that we have a scale by now. However, if the scaled
1800  // value is an add of something and a constant, we can fold the
1801  // constant into the disp field here.
1802  if (MulVal.getNode()->getOpcode() == ISD::ADD && MulVal.hasOneUse() &&
1803  isa<ConstantSDNode>(MulVal.getOperand(1))) {
1804  Reg = MulVal.getOperand(0);
1805  ConstantSDNode *AddVal =
1806  cast<ConstantSDNode>(MulVal.getOperand(1));
1807  uint64_t Disp = AddVal->getSExtValue() * CN->getZExtValue();
1808  if (foldOffsetIntoAddress(Disp, AM))
1809  Reg = N.getOperand(0);
1810  } else {
1811  Reg = N.getOperand(0);
1812  }
1813 
1814  AM.IndexReg = AM.Base_Reg = Reg;
1815  return false;
1816  }
1817  }
1818  break;
1819 
1820  case ISD::SUB: {
1821  // Given A-B, if A can be completely folded into the address and
1822  // the index field with the index field unused, use -B as the index.
1823  // This is a win if a has multiple parts that can be folded into
1824  // the address. Also, this saves a mov if the base register has
1825  // other uses, since it avoids a two-address sub instruction, however
1826  // it costs an additional mov if the index register has other uses.
1827 
1828  // Add an artificial use to this node so that we can keep track of
1829  // it if it gets CSE'd with a different node.
1830  HandleSDNode Handle(N);
1831 
1832  // Test if the LHS of the sub can be folded.
1833  X86ISelAddressMode Backup = AM;
1834  if (matchAddressRecursively(N.getOperand(0), AM, Depth+1)) {
1835  N = Handle.getValue();
1836  AM = Backup;
1837  break;
1838  }
1839  N = Handle.getValue();
1840  // Test if the index field is free for use.
1841  if (AM.IndexReg.getNode() || AM.isRIPRelative()) {
1842  AM = Backup;
1843  break;
1844  }
1845 
1846  int Cost = 0;
1847  SDValue RHS = N.getOperand(1);
1848  // If the RHS involves a register with multiple uses, this
1849  // transformation incurs an extra mov, due to the neg instruction
1850  // clobbering its operand.
1851  if (!RHS.getNode()->hasOneUse() ||
1852  RHS.getNode()->getOpcode() == ISD::CopyFromReg ||
1853  RHS.getNode()->getOpcode() == ISD::TRUNCATE ||
1854  RHS.getNode()->getOpcode() == ISD::ANY_EXTEND ||
1855  (RHS.getNode()->getOpcode() == ISD::ZERO_EXTEND &&
1856  RHS.getOperand(0).getValueType() == MVT::i32))
1857  ++Cost;
1858  // If the base is a register with multiple uses, this
1859  // transformation may save a mov.
1860  if ((AM.BaseType == X86ISelAddressMode::RegBase && AM.Base_Reg.getNode() &&
1861  !AM.Base_Reg.getNode()->hasOneUse()) ||
1862  AM.BaseType == X86ISelAddressMode::FrameIndexBase)
1863  --Cost;
1864  // If the folded LHS was interesting, this transformation saves
1865  // address arithmetic.
1866  if ((AM.hasSymbolicDisplacement() && !Backup.hasSymbolicDisplacement()) +
1867  ((AM.Disp != 0) && (Backup.Disp == 0)) +
1868  (AM.Segment.getNode() && !Backup.Segment.getNode()) >= 2)
1869  --Cost;
1870  // If it doesn't look like it may be an overall win, don't do it.
1871  if (Cost >= 0) {
1872  AM = Backup;
1873  break;
1874  }
1875 
1876  // Ok, the transformation is legal and appears profitable. Go for it.
1877  // Negation will be emitted later to avoid creating dangling nodes if this
1878  // was an unprofitable LEA.
1879  AM.IndexReg = RHS;
1880  AM.NegateIndex = true;
1881  AM.Scale = 1;
1882  return false;
1883  }
1884 
1885  case ISD::ADD:
1886  if (!matchAdd(N, AM, Depth))
1887  return false;
1888  break;
1889 
1890  case ISD::OR:
1891  // We want to look through a transform in InstCombine and DAGCombiner that
1892  // turns 'add' into 'or', so we can treat this 'or' exactly like an 'add'.
1893  // Example: (or (and x, 1), (shl y, 3)) --> (add (and x, 1), (shl y, 3))
1894  // An 'lea' can then be used to match the shift (multiply) and add:
1895  // and $1, %esi
1896  // lea (%rsi, %rdi, 8), %rax
1897  if (CurDAG->haveNoCommonBitsSet(N.getOperand(0), N.getOperand(1)) &&
1898  !matchAdd(N, AM, Depth))
1899  return false;
1900  break;
1901 
1902  case ISD::AND: {
1903  // Perform some heroic transforms on an and of a constant-count shift
1904  // with a constant to enable use of the scaled offset field.
1905 
1906  // Scale must not be used already.
1907  if (AM.IndexReg.getNode() != nullptr || AM.Scale != 1) break;
1908 
1909  // We only handle up to 64-bit values here as those are what matter for
1910  // addressing mode optimizations.
1911  assert(N.getSimpleValueType().getSizeInBits() <= 64 &&
1912  "Unexpected value size!");
1913 
1914  if (!isa<ConstantSDNode>(N.getOperand(1)))
1915  break;
1916 
1917  if (N.getOperand(0).getOpcode() == ISD::SRL) {
1918  SDValue Shift = N.getOperand(0);
1919  SDValue X = Shift.getOperand(0);
1920 
1921  uint64_t Mask = N.getConstantOperandVal(1);
1922 
1923  // Try to fold the mask and shift into an extract and scale.
1924  if (!foldMaskAndShiftToExtract(*CurDAG, N, Mask, Shift, X, AM))
1925  return false;
1926 
1927  // Try to fold the mask and shift directly into the scale.
1928  if (!foldMaskAndShiftToScale(*CurDAG, N, Mask, Shift, X, AM))
1929  return false;
1930 
1931  // Try to fold the mask and shift into BEXTR and scale.
1932  if (!foldMaskedShiftToBEXTR(*CurDAG, N, Mask, Shift, X, AM, *Subtarget))
1933  return false;
1934  }
1935 
1936  // Try to swap the mask and shift to place shifts which can be done as
1937  // a scale on the outside of the mask.
1938  if (!foldMaskedShiftToScaledMask(*CurDAG, N, AM))
1939  return false;
1940 
1941  break;
1942  }
1943  case ISD::ZERO_EXTEND: {
1944  // Try to widen a zexted shift left to the same size as its use, so we can
1945  // match the shift as a scale factor.
1946  if (AM.IndexReg.getNode() != nullptr || AM.Scale != 1)
1947  break;
1948  if (N.getOperand(0).getOpcode() != ISD::SHL || !N.getOperand(0).hasOneUse())
1949  break;
1950 
1951  // Give up if the shift is not a valid scale factor [1,2,3].
1952  SDValue Shl = N.getOperand(0);
1953  auto *ShAmtC = dyn_cast<ConstantSDNode>(Shl.getOperand(1));
1954  if (!ShAmtC || ShAmtC->getZExtValue() > 3)
1955  break;
1956 
1957  // The narrow shift must only shift out zero bits (it must be 'nuw').
1958  // That makes it safe to widen to the destination type.
1960  ShAmtC->getZExtValue());
1961  if (!CurDAG->MaskedValueIsZero(Shl.getOperand(0), HighZeros))
1962  break;
1963 
1964  // zext (shl nuw i8 %x, C) to i32 --> shl (zext i8 %x to i32), (zext C)
1965  MVT VT = N.getSimpleValueType();
1966  SDLoc DL(N);
1967  SDValue Zext = CurDAG->getNode(ISD::ZERO_EXTEND, DL, VT, Shl.getOperand(0));
1968  SDValue NewShl = CurDAG->getNode(ISD::SHL, DL, VT, Zext, Shl.getOperand(1));
1969 
1970  // Convert the shift to scale factor.
1971  AM.Scale = 1 << ShAmtC->getZExtValue();
1972  AM.IndexReg = Zext;
1973 
1974  insertDAGNode(*CurDAG, N, Zext);
1975  insertDAGNode(*CurDAG, N, NewShl);
1976  CurDAG->ReplaceAllUsesWith(N, NewShl);
1977  CurDAG->RemoveDeadNode(N.getNode());
1978  return false;
1979  }
1980  }
1981 
1982  return matchAddressBase(N, AM);
1983 }
1984 
1985 /// Helper for MatchAddress. Add the specified node to the
1986 /// specified addressing mode without any further recursion.
1987 bool X86DAGToDAGISel::matchAddressBase(SDValue N, X86ISelAddressMode &AM) {
1988  // Is the base register already occupied?
1989  if (AM.BaseType != X86ISelAddressMode::RegBase || AM.Base_Reg.getNode()) {
1990  // If so, check to see if the scale index register is set.
1991  if (!AM.IndexReg.getNode()) {
1992  AM.IndexReg = N;
1993  AM.Scale = 1;
1994  return false;
1995  }
1996 
1997  // Otherwise, we cannot select it.
1998  return true;
1999  }
2000 
2001  // Default, generate it as a register.
2002  AM.BaseType = X86ISelAddressMode::RegBase;
2003  AM.Base_Reg = N;
2004  return false;
2005 }
2006 
2007 /// Helper for selectVectorAddr. Handles things that can be folded into a
2008 /// gather scatter address. The index register and scale should have already
2009 /// been handled.
2010 bool X86DAGToDAGISel::matchVectorAddress(SDValue N, X86ISelAddressMode &AM) {
2011  // TODO: Support other operations.
2012  switch (N.getOpcode()) {
2013  case ISD::Constant: {
2014  uint64_t Val = cast<ConstantSDNode>(N)->getSExtValue();
2015  if (!foldOffsetIntoAddress(Val, AM))
2016  return false;
2017  break;
2018  }
2019  case X86ISD::Wrapper:
2020  if (!matchWrapper(N, AM))
2021  return false;
2022  break;
2023  }
2024 
2025  return matchAddressBase(N, AM);
2026 }
2027 
2028 bool X86DAGToDAGISel::selectVectorAddr(SDNode *Parent, SDValue N, SDValue &Base,
2029  SDValue &Scale, SDValue &Index,
2030  SDValue &Disp, SDValue &Segment) {
2031  X86ISelAddressMode AM;
2032  auto *Mgs = cast<X86MaskedGatherScatterSDNode>(Parent);
2033  AM.IndexReg = Mgs->getIndex();
2034  AM.Scale = cast<ConstantSDNode>(Mgs->getScale())->getZExtValue();
2035 
2036  unsigned AddrSpace = cast<MemSDNode>(Parent)->getPointerInfo().getAddrSpace();
2037  // AddrSpace 256 -> GS, 257 -> FS, 258 -> SS.
2038  if (AddrSpace == 256)
2039  AM.Segment = CurDAG->getRegister(X86::GS, MVT::i16);
2040  if (AddrSpace == 257)
2041  AM.Segment = CurDAG->getRegister(X86::FS, MVT::i16);
2042  if (AddrSpace == 258)
2043  AM.Segment = CurDAG->getRegister(X86::SS, MVT::i16);
2044 
2045  SDLoc DL(N);
2046  MVT VT = N.getSimpleValueType();
2047 
2048  // Try to match into the base and displacement fields.
2049  if (matchVectorAddress(N, AM))
2050  return false;
2051 
2052  getAddressOperands(AM, DL, VT, Base, Scale, Index, Disp, Segment);
2053  return true;
2054 }
2055 
2056 /// Returns true if it is able to pattern match an addressing mode.
2057 /// It returns the operands which make up the maximal addressing mode it can
2058 /// match by reference.
2059 ///
2060 /// Parent is the parent node of the addr operand that is being matched. It
2061 /// is always a load, store, atomic node, or null. It is only null when
2062 /// checking memory operands for inline asm nodes.
2063 bool X86DAGToDAGISel::selectAddr(SDNode *Parent, SDValue N, SDValue &Base,
2064  SDValue &Scale, SDValue &Index,
2065  SDValue &Disp, SDValue &Segment) {
2066  X86ISelAddressMode AM;
2067 
2068  if (Parent &&
2069  // This list of opcodes are all the nodes that have an "addr:$ptr" operand
2070  // that are not a MemSDNode, and thus don't have proper addrspace info.
2071  Parent->getOpcode() != ISD::INTRINSIC_W_CHAIN && // unaligned loads, fixme
2072  Parent->getOpcode() != ISD::INTRINSIC_VOID && // nontemporal stores
2073  Parent->getOpcode() != X86ISD::TLSCALL && // Fixme
2074  Parent->getOpcode() != X86ISD::EH_SJLJ_SETJMP && // setjmp
2075  Parent->getOpcode() != X86ISD::EH_SJLJ_LONGJMP) { // longjmp
2076  unsigned AddrSpace =
2077  cast<MemSDNode>(Parent)->getPointerInfo().getAddrSpace();
2078  // AddrSpace 256 -> GS, 257 -> FS, 258 -> SS.
2079  if (AddrSpace == 256)
2080  AM.Segment = CurDAG->getRegister(X86::GS, MVT::i16);
2081  if (AddrSpace == 257)
2082  AM.Segment = CurDAG->getRegister(X86::FS, MVT::i16);
2083  if (AddrSpace == 258)
2084  AM.Segment = CurDAG->getRegister(X86::SS, MVT::i16);
2085  }
2086 
2087  // Save the DL and VT before calling matchAddress, it can invalidate N.
2088  SDLoc DL(N);
2089  MVT VT = N.getSimpleValueType();
2090 
2091  if (matchAddress(N, AM))
2092  return false;
2093 
2094  getAddressOperands(AM, DL, VT, Base, Scale, Index, Disp, Segment);
2095  return true;
2096 }
2097 
2098 // We can only fold a load if all nodes between it and the root node have a
2099 // single use. If there are additional uses, we could end up duplicating the
2100 // load.
2101 static bool hasSingleUsesFromRoot(SDNode *Root, SDNode *User) {
2102  while (User != Root) {
2103  if (!User->hasOneUse())
2104  return false;
2105  User = *User->use_begin();
2106  }
2107 
2108  return true;
2109 }
2110 
2111 /// Match a scalar SSE load. In particular, we want to match a load whose top
2112 /// elements are either undef or zeros. The load flavor is derived from the
2113 /// type of N, which is either v4f32 or v2f64.
2114 ///
2115 /// We also return:
2116 /// PatternChainNode: this is the matched node that has a chain input and
2117 /// output.
2118 bool X86DAGToDAGISel::selectScalarSSELoad(SDNode *Root, SDNode *Parent,
2119  SDValue N, SDValue &Base,
2120  SDValue &Scale, SDValue &Index,
2121  SDValue &Disp, SDValue &Segment,
2122  SDValue &PatternNodeWithChain) {
2123  if (!hasSingleUsesFromRoot(Root, Parent))
2124  return false;
2125 
2126  // We can allow a full vector load here since narrowing a load is ok.
2127  if (ISD::isNON_EXTLoad(N.getNode())) {
2128  PatternNodeWithChain = N;
2129  if (IsProfitableToFold(PatternNodeWithChain, N.getNode(), Root) &&
2130  IsLegalToFold(PatternNodeWithChain, Parent, Root, OptLevel)) {
2131  LoadSDNode *LD = cast<LoadSDNode>(PatternNodeWithChain);
2132  return selectAddr(LD, LD->getBasePtr(), Base, Scale, Index, Disp,
2133  Segment);
2134  }
2135  }
2136 
2137  // We can also match the special zero extended load opcode.
2138  if (N.getOpcode() == X86ISD::VZEXT_LOAD) {
2139  PatternNodeWithChain = N;
2140  if (IsProfitableToFold(PatternNodeWithChain, N.getNode(), Root) &&
2141  IsLegalToFold(PatternNodeWithChain, Parent, Root, OptLevel)) {
2142  auto *MI = cast<MemIntrinsicSDNode>(PatternNodeWithChain);
2143  return selectAddr(MI, MI->getBasePtr(), Base, Scale, Index, Disp,
2144  Segment);
2145  }
2146  }
2147 
2148  // Need to make sure that the SCALAR_TO_VECTOR and load are both only used
2149  // once. Otherwise the load might get duplicated and the chain output of the
2150  // duplicate load will not be observed by all dependencies.
2151  if (N.getOpcode() == ISD::SCALAR_TO_VECTOR && N.getNode()->hasOneUse()) {
2152  PatternNodeWithChain = N.getOperand(0);
2153  if (ISD::isNON_EXTLoad(PatternNodeWithChain.getNode()) &&
2154  IsProfitableToFold(PatternNodeWithChain, N.getNode(), Root) &&
2155  IsLegalToFold(PatternNodeWithChain, N.getNode(), Root, OptLevel)) {
2156  LoadSDNode *LD = cast<LoadSDNode>(PatternNodeWithChain);
2157  return selectAddr(LD, LD->getBasePtr(), Base, Scale, Index, Disp,
2158  Segment);
2159  }
2160  }
2161 
2162  // Also handle the case where we explicitly require zeros in the top
2163  // elements. This is a vector shuffle from the zero vector.
2164  if (N.getOpcode() == X86ISD::VZEXT_MOVL && N.getNode()->hasOneUse() &&
2165  // Check to see if the top elements are all zeros (or bitcast of zeros).
2167  N.getOperand(0).getNode()->hasOneUse()) {
2168  PatternNodeWithChain = N.getOperand(0).getOperand(0);
2169  if (ISD::isNON_EXTLoad(PatternNodeWithChain.getNode()) &&
2170  IsProfitableToFold(PatternNodeWithChain, N.getNode(), Root) &&
2171  IsLegalToFold(PatternNodeWithChain, N.getNode(), Root, OptLevel)) {
2172  // Okay, this is a zero extending load. Fold it.
2173  LoadSDNode *LD = cast<LoadSDNode>(PatternNodeWithChain);
2174  return selectAddr(LD, LD->getBasePtr(), Base, Scale, Index, Disp,
2175  Segment);
2176  }
2177  }
2178 
2179  return false;
2180 }
2181 
2182 
2183 bool X86DAGToDAGISel::selectMOV64Imm32(SDValue N, SDValue &Imm) {
2184  if (const ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) {
2185  uint64_t ImmVal = CN->getZExtValue();
2186  if (!isUInt<32>(ImmVal))
2187  return false;
2188 
2189  Imm = CurDAG->getTargetConstant(ImmVal, SDLoc(N), MVT::i64);
2190  return true;
2191  }
2192 
2193  // In static codegen with small code model, we can get the address of a label
2194  // into a register with 'movl'
2195  if (N->getOpcode() != X86ISD::Wrapper)
2196  return false;
2197 
2198  N = N.getOperand(0);
2199 
2200  // At least GNU as does not accept 'movl' for TPOFF relocations.
2201  // FIXME: We could use 'movl' when we know we are targeting MC.
2203  return false;
2204 
2205  Imm = N;
2206  if (N->getOpcode() != ISD::TargetGlobalAddress)
2207  return TM.getCodeModel() == CodeModel::Small;
2208 
2210  cast<GlobalAddressSDNode>(N)->getGlobal()->getAbsoluteSymbolRange();
2211  if (!CR)
2212  return TM.getCodeModel() == CodeModel::Small;
2213 
2214  return CR->getUnsignedMax().ult(1ull << 32);
2215 }
2216 
2217 bool X86DAGToDAGISel::selectLEA64_32Addr(SDValue N, SDValue &Base,
2218  SDValue &Scale, SDValue &Index,
2219  SDValue &Disp, SDValue &Segment) {
2220  // Save the debug loc before calling selectLEAAddr, in case it invalidates N.
2221  SDLoc DL(N);
2222 
2223  if (!selectLEAAddr(N, Base, Scale, Index, Disp, Segment))
2224  return false;
2225 
2227  if (RN && RN->getReg() == 0)
2228  Base = CurDAG->getRegister(0, MVT::i64);
2229  else if (Base.getValueType() == MVT::i32 && !isa<FrameIndexSDNode>(Base)) {
2230  // Base could already be %rip, particularly in the x32 ABI.
2231  SDValue ImplDef = SDValue(CurDAG->getMachineNode(X86::IMPLICIT_DEF, DL,
2232  MVT::i64), 0);
2233  Base = CurDAG->getTargetInsertSubreg(X86::sub_32bit, DL, MVT::i64, ImplDef,
2234  Base);
2235  }
2236 
2237  RN = dyn_cast<RegisterSDNode>(Index);
2238  if (RN && RN->getReg() == 0)
2239  Index = CurDAG->getRegister(0, MVT::i64);
2240  else {
2241  assert(Index.getValueType() == MVT::i32 &&
2242  "Expect to be extending 32-bit registers for use in LEA");
2243  SDValue ImplDef = SDValue(CurDAG->getMachineNode(X86::IMPLICIT_DEF, DL,
2244  MVT::i64), 0);
2245  Index = CurDAG->getTargetInsertSubreg(X86::sub_32bit, DL, MVT::i64, ImplDef,
2246  Index);
2247  }
2248 
2249  return true;
2250 }
2251 
2252 /// Calls SelectAddr and determines if the maximal addressing
2253 /// mode it matches can be cost effectively emitted as an LEA instruction.
2254 bool X86DAGToDAGISel::selectLEAAddr(SDValue N,
2255  SDValue &Base, SDValue &Scale,
2256  SDValue &Index, SDValue &Disp,
2257  SDValue &Segment) {
2258  X86ISelAddressMode AM;
2259 
2260  // Save the DL and VT before calling matchAddress, it can invalidate N.
2261  SDLoc DL(N);
2262  MVT VT = N.getSimpleValueType();
2263 
2264  // Set AM.Segment to prevent MatchAddress from using one. LEA doesn't support
2265  // segments.
2266  SDValue Copy = AM.Segment;
2267  SDValue T = CurDAG->getRegister(0, MVT::i32);
2268  AM.Segment = T;
2269  if (matchAddress(N, AM))
2270  return false;
2271  assert (T == AM.Segment);
2272  AM.Segment = Copy;
2273 
2274  unsigned Complexity = 0;
2275  if (AM.BaseType == X86ISelAddressMode::RegBase && AM.Base_Reg.getNode())
2276  Complexity = 1;
2277  else if (AM.BaseType == X86ISelAddressMode::FrameIndexBase)
2278  Complexity = 4;
2279 
2280  if (AM.IndexReg.getNode())
2281  Complexity++;
2282 
2283  // Don't match just leal(,%reg,2). It's cheaper to do addl %reg, %reg, or with
2284  // a simple shift.
2285  if (AM.Scale > 1)
2286  Complexity++;
2287 
2288  // FIXME: We are artificially lowering the criteria to turn ADD %reg, $GA
2289  // to a LEA. This is determined with some experimentation but is by no means
2290  // optimal (especially for code size consideration). LEA is nice because of
2291  // its three-address nature. Tweak the cost function again when we can run
2292  // convertToThreeAddress() at register allocation time.
2293  if (AM.hasSymbolicDisplacement()) {
2294  // For X86-64, always use LEA to materialize RIP-relative addresses.
2295  if (Subtarget->is64Bit())
2296  Complexity = 4;
2297  else
2298  Complexity += 2;
2299  }
2300 
2301  if (AM.Disp)
2302  Complexity++;
2303 
2304  // If it isn't worth using an LEA, reject it.
2305  if (Complexity <= 2)
2306  return false;
2307 
2308  getAddressOperands(AM, DL, VT, Base, Scale, Index, Disp, Segment);
2309  return true;
2310 }
2311 
2312 /// This is only run on TargetGlobalTLSAddress nodes.
2313 bool X86DAGToDAGISel::selectTLSADDRAddr(SDValue N, SDValue &Base,
2314  SDValue &Scale, SDValue &Index,
2315  SDValue &Disp, SDValue &Segment) {
2317  const GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(N);
2318 
2319  X86ISelAddressMode AM;
2320  AM.GV = GA->getGlobal();
2321  AM.Disp += GA->getOffset();
2322  AM.SymbolFlags = GA->getTargetFlags();
2323 
2324  MVT VT = N.getSimpleValueType();
2325  if (VT == MVT::i32) {
2326  AM.Scale = 1;
2327  AM.IndexReg = CurDAG->getRegister(X86::EBX, MVT::i32);
2328  }
2329 
2330  getAddressOperands(AM, SDLoc(N), VT, Base, Scale, Index, Disp, Segment);
2331  return true;
2332 }
2333 
2334 bool X86DAGToDAGISel::selectRelocImm(SDValue N, SDValue &Op) {
2335  if (auto *CN = dyn_cast<ConstantSDNode>(N)) {
2336  Op = CurDAG->getTargetConstant(CN->getAPIntValue(), SDLoc(CN),
2337  N.getValueType());
2338  return true;
2339  }
2340 
2341  // Keep track of the original value type and whether this value was
2342  // truncated. If we see a truncation from pointer type to VT that truncates
2343  // bits that are known to be zero, we can use a narrow reference.
2344  EVT VT = N.getValueType();
2345  bool WasTruncated = false;
2346  if (N.getOpcode() == ISD::TRUNCATE) {
2347  WasTruncated = true;
2348  N = N.getOperand(0);
2349  }
2350 
2351  if (N.getOpcode() != X86ISD::Wrapper)
2352  return false;
2353 
2354  // We can only use non-GlobalValues as immediates if they were not truncated,
2355  // as we do not have any range information. If we have a GlobalValue and the
2356  // address was not truncated, we can select it as an operand directly.
2357  unsigned Opc = N.getOperand(0)->getOpcode();
2358  if (Opc != ISD::TargetGlobalAddress || !WasTruncated) {
2359  Op = N.getOperand(0);
2360  // We can only select the operand directly if we didn't have to look past a
2361  // truncate.
2362  return !WasTruncated;
2363  }
2364 
2365  // Check that the global's range fits into VT.
2366  auto *GA = cast<GlobalAddressSDNode>(N.getOperand(0));
2367  Optional<ConstantRange> CR = GA->getGlobal()->getAbsoluteSymbolRange();
2368  if (!CR || CR->getUnsignedMax().uge(1ull << VT.getSizeInBits()))
2369  return false;
2370 
2371  // Okay, we can use a narrow reference.
2372  Op = CurDAG->getTargetGlobalAddress(GA->getGlobal(), SDLoc(N), VT,
2373  GA->getOffset(), GA->getTargetFlags());
2374  return true;
2375 }
2376 
2377 bool X86DAGToDAGISel::tryFoldLoad(SDNode *Root, SDNode *P, SDValue N,
2378  SDValue &Base, SDValue &Scale,
2379  SDValue &Index, SDValue &Disp,
2380  SDValue &Segment) {
2381  if (!ISD::isNON_EXTLoad(N.getNode()) ||
2382  !IsProfitableToFold(N, P, Root) ||
2383  !IsLegalToFold(N, P, Root, OptLevel))
2384  return false;
2385 
2386  return selectAddr(N.getNode(),
2387  N.getOperand(1), Base, Scale, Index, Disp, Segment);
2388 }
2389 
2390 /// Return an SDNode that returns the value of the global base register.
2391 /// Output instructions required to initialize the global base register,
2392 /// if necessary.
2393 SDNode *X86DAGToDAGISel::getGlobalBaseReg() {
2394  unsigned GlobalBaseReg = getInstrInfo()->getGlobalBaseReg(MF);
2395  auto &DL = MF->getDataLayout();
2396  return CurDAG->getRegister(GlobalBaseReg, TLI->getPointerTy(DL)).getNode();
2397 }
2398 
2399 bool X86DAGToDAGISel::isSExtAbsoluteSymbolRef(unsigned Width, SDNode *N) const {
2400  if (N->getOpcode() == ISD::TRUNCATE)
2401  N = N->getOperand(0).getNode();
2402  if (N->getOpcode() != X86ISD::Wrapper)
2403  return false;
2404 
2405  auto *GA = dyn_cast<GlobalAddressSDNode>(N->getOperand(0));
2406  if (!GA)
2407  return false;
2408 
2409  Optional<ConstantRange> CR = GA->getGlobal()->getAbsoluteSymbolRange();
2410  return CR && CR->getSignedMin().sge(-1ull << Width) &&
2411  CR->getSignedMax().slt(1ull << Width);
2412 }
2413 
2415  assert(N->isMachineOpcode() && "Unexpected node");
2417  unsigned Opc = N->getMachineOpcode();
2418  if (Opc == X86::JCC_1)
2419  CC = static_cast<X86::CondCode>(N->getConstantOperandVal(1));
2420  else if (Opc == X86::SETCCr)
2421  CC = static_cast<X86::CondCode>(N->getConstantOperandVal(0));
2422  else if (Opc == X86::SETCCm)
2423  CC = static_cast<X86::CondCode>(N->getConstantOperandVal(5));
2424  else if (Opc == X86::CMOV16rr || Opc == X86::CMOV32rr ||
2425  Opc == X86::CMOV64rr)
2426  CC = static_cast<X86::CondCode>(N->getConstantOperandVal(2));
2427  else if (Opc == X86::CMOV16rm || Opc == X86::CMOV32rm ||
2428  Opc == X86::CMOV64rm)
2429  CC = static_cast<X86::CondCode>(N->getConstantOperandVal(6));
2430 
2431  return CC;
2432 }
2433 
2434 /// Test whether the given X86ISD::CMP node has any users that use a flag
2435 /// other than ZF.
2436 bool X86DAGToDAGISel::onlyUsesZeroFlag(SDValue Flags) const {
2437  // Examine each user of the node.
2438  for (SDNode::use_iterator UI = Flags->use_begin(), UE = Flags->use_end();
2439  UI != UE; ++UI) {
2440  // Only check things that use the flags.
2441  if (UI.getUse().getResNo() != Flags.getResNo())
2442  continue;
2443  // Only examine CopyToReg uses that copy to EFLAGS.
2444  if (UI->getOpcode() != ISD::CopyToReg ||
2445  cast<RegisterSDNode>(UI->getOperand(1))->getReg() != X86::EFLAGS)
2446  return false;
2447  // Examine each user of the CopyToReg use.
2448  for (SDNode::use_iterator FlagUI = UI->use_begin(),
2449  FlagUE = UI->use_end(); FlagUI != FlagUE; ++FlagUI) {
2450  // Only examine the Flag result.
2451  if (FlagUI.getUse().getResNo() != 1) continue;
2452  // Anything unusual: assume conservatively.
2453  if (!FlagUI->isMachineOpcode()) return false;
2454  // Examine the condition code of the user.
2455  X86::CondCode CC = getCondFromNode(*FlagUI);
2456 
2457  switch (CC) {
2458  // Comparisons which only use the zero flag.
2459  case X86::COND_E: case X86::COND_NE:
2460  continue;
2461  // Anything else: assume conservatively.
2462  default:
2463  return false;
2464  }
2465  }
2466  }
2467  return true;
2468 }
2469 
2470 /// Test whether the given X86ISD::CMP node has any uses which require the SF
2471 /// flag to be accurate.
2472 bool X86DAGToDAGISel::hasNoSignFlagUses(SDValue Flags) const {
2473  // Examine each user of the node.
2474  for (SDNode::use_iterator UI = Flags->use_begin(), UE = Flags->use_end();
2475  UI != UE; ++UI) {
2476  // Only check things that use the flags.
2477  if (UI.getUse().getResNo() != Flags.getResNo())
2478  continue;
2479  // Only examine CopyToReg uses that copy to EFLAGS.
2480  if (UI->getOpcode() != ISD::CopyToReg ||
2481  cast<RegisterSDNode>(UI->getOperand(1))->getReg() != X86::EFLAGS)
2482  return false;
2483  // Examine each user of the CopyToReg use.
2484  for (SDNode::use_iterator FlagUI = UI->use_begin(),
2485  FlagUE = UI->use_end(); FlagUI != FlagUE; ++FlagUI) {
2486  // Only examine the Flag result.
2487  if (FlagUI.getUse().getResNo() != 1) continue;
2488  // Anything unusual: assume conservatively.
2489  if (!FlagUI->isMachineOpcode()) return false;
2490  // Examine the condition code of the user.
2491  X86::CondCode CC = getCondFromNode(*FlagUI);
2492 
2493  switch (CC) {
2494  // Comparisons which don't examine the SF flag.
2495  case X86::COND_A: case X86::COND_AE:
2496  case X86::COND_B: case X86::COND_BE:
2497  case X86::COND_E: case X86::COND_NE:
2498  case X86::COND_O: case X86::COND_NO:
2499  case X86::COND_P: case X86::COND_NP:
2500  continue;
2501  // Anything else: assume conservatively.
2502  default:
2503  return false;
2504  }
2505  }
2506  }
2507  return true;
2508 }
2509 
2511  switch (CC) {
2512  // Comparisons which don't examine the CF flag.
2513  case X86::COND_O: case X86::COND_NO:
2514  case X86::COND_E: case X86::COND_NE:
2515  case X86::COND_S: case X86::COND_NS:
2516  case X86::COND_P: case X86::COND_NP:
2517  case X86::COND_L: case X86::COND_GE:
2518  case X86::COND_G: case X86::COND_LE:
2519  return false;
2520  // Anything else: assume conservatively.
2521  default:
2522  return true;
2523  }
2524 }
2525 
2526 /// Test whether the given node which sets flags has any uses which require the
2527 /// CF flag to be accurate.
2528  bool X86DAGToDAGISel::hasNoCarryFlagUses(SDValue Flags) const {
2529  // Examine each user of the node.
2530  for (SDNode::use_iterator UI = Flags->use_begin(), UE = Flags->use_end();
2531  UI != UE; ++UI) {
2532  // Only check things that use the flags.
2533  if (UI.getUse().getResNo() != Flags.getResNo())
2534  continue;
2535 
2536  unsigned UIOpc = UI->getOpcode();
2537 
2538  if (UIOpc == ISD::CopyToReg) {
2539  // Only examine CopyToReg uses that copy to EFLAGS.
2540  if (cast<RegisterSDNode>(UI->getOperand(1))->getReg() != X86::EFLAGS)
2541  return false;
2542  // Examine each user of the CopyToReg use.
2543  for (SDNode::use_iterator FlagUI = UI->use_begin(), FlagUE = UI->use_end();
2544  FlagUI != FlagUE; ++FlagUI) {
2545  // Only examine the Flag result.
2546  if (FlagUI.getUse().getResNo() != 1)
2547  continue;
2548  // Anything unusual: assume conservatively.
2549  if (!FlagUI->isMachineOpcode())
2550  return false;
2551  // Examine the condition code of the user.
2552  X86::CondCode CC = getCondFromNode(*FlagUI);
2553 
2554  if (mayUseCarryFlag(CC))
2555  return false;
2556  }
2557 
2558  // This CopyToReg is ok. Move on to the next user.
2559  continue;
2560  }
2561 
2562  // This might be an unselected node. So look for the pre-isel opcodes that
2563  // use flags.
2564  unsigned CCOpNo;
2565  switch (UIOpc) {
2566  default:
2567  // Something unusual. Be conservative.
2568  return false;
2569  case X86ISD::SETCC: CCOpNo = 0; break;
2570  case X86ISD::SETCC_CARRY: CCOpNo = 0; break;
2571  case X86ISD::CMOV: CCOpNo = 2; break;
2572  case X86ISD::BRCOND: CCOpNo = 2; break;
2573  }
2574 
2575  X86::CondCode CC = (X86::CondCode)UI->getConstantOperandVal(CCOpNo);
2576  if (mayUseCarryFlag(CC))
2577  return false;
2578  }
2579  return true;
2580 }
2581 
2582 /// Check whether or not the chain ending in StoreNode is suitable for doing
2583 /// the {load; op; store} to modify transformation.
2585  SDValue StoredVal, SelectionDAG *CurDAG,
2586  unsigned LoadOpNo,
2587  LoadSDNode *&LoadNode,
2588  SDValue &InputChain) {
2589  // Is the stored value result 0 of the operation?
2590  if (StoredVal.getResNo() != 0) return false;
2591 
2592  // Are there other uses of the operation other than the store?
2593  if (!StoredVal.getNode()->hasNUsesOfValue(1, 0)) return false;
2594 
2595  // Is the store non-extending and non-indexed?
2596  if (!ISD::isNormalStore(StoreNode) || StoreNode->isNonTemporal())
2597  return false;
2598 
2599  SDValue Load = StoredVal->getOperand(LoadOpNo);
2600  // Is the stored value a non-extending and non-indexed load?
2601  if (!ISD::isNormalLoad(Load.getNode())) return false;
2602 
2603  // Return LoadNode by reference.
2604  LoadNode = cast<LoadSDNode>(Load);
2605 
2606  // Is store the only read of the loaded value?
2607  if (!Load.hasOneUse())
2608  return false;
2609 
2610  // Is the address of the store the same as the load?
2611  if (LoadNode->getBasePtr() != StoreNode->getBasePtr() ||
2612  LoadNode->getOffset() != StoreNode->getOffset())
2613  return false;
2614 
2615  bool FoundLoad = false;
2616  SmallVector<SDValue, 4> ChainOps;
2617  SmallVector<const SDNode *, 4> LoopWorklist;
2619  const unsigned int Max = 1024;
2620 
2621  // Visualization of Load-Op-Store fusion:
2622  // -------------------------
2623  // Legend:
2624  // *-lines = Chain operand dependencies.
2625  // |-lines = Normal operand dependencies.
2626  // Dependencies flow down and right. n-suffix references multiple nodes.
2627  //
2628  // C Xn C
2629  // * * *
2630  // * * *
2631  // Xn A-LD Yn TF Yn
2632  // * * \ | * |
2633  // * * \ | * |
2634  // * * \ | => A--LD_OP_ST
2635  // * * \| \
2636  // TF OP \
2637  // * | \ Zn
2638  // * | \
2639  // A-ST Zn
2640  //
2641 
2642  // This merge induced dependences from: #1: Xn -> LD, OP, Zn
2643  // #2: Yn -> LD
2644  // #3: ST -> Zn
2645 
2646  // Ensure the transform is safe by checking for the dual
2647  // dependencies to make sure we do not induce a loop.
2648 
2649  // As LD is a predecessor to both OP and ST we can do this by checking:
2650  // a). if LD is a predecessor to a member of Xn or Yn.
2651  // b). if a Zn is a predecessor to ST.
2652 
2653  // However, (b) can only occur through being a chain predecessor to
2654  // ST, which is the same as Zn being a member or predecessor of Xn,
2655  // which is a subset of LD being a predecessor of Xn. So it's
2656  // subsumed by check (a).
2657 
2658  SDValue Chain = StoreNode->getChain();
2659 
2660  // Gather X elements in ChainOps.
2661  if (Chain == Load.getValue(1)) {
2662  FoundLoad = true;
2663  ChainOps.push_back(Load.getOperand(0));
2664  } else if (Chain.getOpcode() == ISD::TokenFactor) {
2665  for (unsigned i = 0, e = Chain.getNumOperands(); i != e; ++i) {
2666  SDValue Op = Chain.getOperand(i);
2667  if (Op == Load.getValue(1)) {
2668  FoundLoad = true;
2669  // Drop Load, but keep its chain. No cycle check necessary.
2670  ChainOps.push_back(Load.getOperand(0));
2671  continue;
2672  }
2673  LoopWorklist.push_back(Op.getNode());
2674  ChainOps.push_back(Op);
2675  }
2676  }
2677 
2678  if (!FoundLoad)
2679  return false;
2680 
2681  // Worklist is currently Xn. Add Yn to worklist.
2682  for (SDValue Op : StoredVal->ops())
2683  if (Op.getNode() != LoadNode)
2684  LoopWorklist.push_back(Op.getNode());
2685 
2686  // Check (a) if Load is a predecessor to Xn + Yn
2687  if (SDNode::hasPredecessorHelper(Load.getNode(), Visited, LoopWorklist, Max,
2688  true))
2689  return false;
2690 
2691  InputChain =
2692  CurDAG->getNode(ISD::TokenFactor, SDLoc(Chain), MVT::Other, ChainOps);
2693  return true;
2694 }
2695 
2696 // Change a chain of {load; op; store} of the same value into a simple op
2697 // through memory of that value, if the uses of the modified value and its
2698 // address are suitable.
2699 //
2700 // The tablegen pattern memory operand pattern is currently not able to match
2701 // the case where the EFLAGS on the original operation are used.
2702 //
2703 // To move this to tablegen, we'll need to improve tablegen to allow flags to
2704 // be transferred from a node in the pattern to the result node, probably with
2705 // a new keyword. For example, we have this
2706 // def DEC64m : RI<0xFF, MRM1m, (outs), (ins i64mem:$dst), "dec{q}\t$dst",
2707 // [(store (add (loadi64 addr:$dst), -1), addr:$dst),
2708 // (implicit EFLAGS)]>;
2709 // but maybe need something like this
2710 // def DEC64m : RI<0xFF, MRM1m, (outs), (ins i64mem:$dst), "dec{q}\t$dst",
2711 // [(store (add (loadi64 addr:$dst), -1), addr:$dst),
2712 // (transferrable EFLAGS)]>;
2713 //
2714 // Until then, we manually fold these and instruction select the operation
2715 // here.
2716 bool X86DAGToDAGISel::foldLoadStoreIntoMemOperand(SDNode *Node) {
2717  StoreSDNode *StoreNode = cast<StoreSDNode>(Node);
2718  SDValue StoredVal = StoreNode->getOperand(1);
2719  unsigned Opc = StoredVal->getOpcode();
2720 
2721  // Before we try to select anything, make sure this is memory operand size
2722  // and opcode we can handle. Note that this must match the code below that
2723  // actually lowers the opcodes.
2724  EVT MemVT = StoreNode->getMemoryVT();
2725  if (MemVT != MVT::i64 && MemVT != MVT::i32 && MemVT != MVT::i16 &&
2726  MemVT != MVT::i8)
2727  return false;
2728 
2729  bool IsCommutable = false;
2730  bool IsNegate = false;
2731  switch (Opc) {
2732  default:
2733  return false;
2734  case X86ISD::SUB:
2735  IsNegate = isNullConstant(StoredVal.getOperand(0));
2736  break;
2737  case X86ISD::SBB:
2738  break;
2739  case X86ISD::ADD:
2740  case X86ISD::ADC:
2741  case X86ISD::AND:
2742  case X86ISD::OR:
2743  case X86ISD::XOR:
2744  IsCommutable = true;
2745  break;
2746  }
2747 
2748  unsigned LoadOpNo = IsNegate ? 1 : 0;
2749  LoadSDNode *LoadNode = nullptr;
2750  SDValue InputChain;
2751  if (!isFusableLoadOpStorePattern(StoreNode, StoredVal, CurDAG, LoadOpNo,
2752  LoadNode, InputChain)) {
2753  if (!IsCommutable)
2754  return false;
2755 
2756  // This operation is commutable, try the other operand.
2757  LoadOpNo = 1;
2758  if (!isFusableLoadOpStorePattern(StoreNode, StoredVal, CurDAG, LoadOpNo,
2759  LoadNode, InputChain))
2760  return false;
2761  }
2762 
2763  SDValue Base, Scale, Index, Disp, Segment;
2764  if (!selectAddr(LoadNode, LoadNode->getBasePtr(), Base, Scale, Index, Disp,
2765  Segment))
2766  return false;
2767 
2768  auto SelectOpcode = [&](unsigned Opc64, unsigned Opc32, unsigned Opc16,
2769  unsigned Opc8) {
2770  switch (MemVT.getSimpleVT().SimpleTy) {
2771  case MVT::i64:
2772  return Opc64;
2773  case MVT::i32:
2774  return Opc32;
2775  case MVT::i16:
2776  return Opc16;
2777  case MVT::i8:
2778  return Opc8;
2779  default:
2780  llvm_unreachable("Invalid size!");
2781  }
2782  };
2783 
2784  MachineSDNode *Result;
2785  switch (Opc) {
2786  case X86ISD::SUB:
2787  // Handle negate.
2788  if (IsNegate) {
2789  unsigned NewOpc = SelectOpcode(X86::NEG64m, X86::NEG32m, X86::NEG16m,
2790  X86::NEG8m);
2791  const SDValue Ops[] = {Base, Scale, Index, Disp, Segment, InputChain};
2792  Result = CurDAG->getMachineNode(NewOpc, SDLoc(Node), MVT::i32,
2793  MVT::Other, Ops);
2794  break;
2795  }
2797  case X86ISD::ADD:
2798  // Try to match inc/dec.
2799  if (!Subtarget->slowIncDec() || OptForSize) {
2800  bool IsOne = isOneConstant(StoredVal.getOperand(1));
2801  bool IsNegOne = isAllOnesConstant(StoredVal.getOperand(1));
2802  // ADD/SUB with 1/-1 and carry flag isn't used can use inc/dec.
2803  if ((IsOne || IsNegOne) && hasNoCarryFlagUses(StoredVal.getValue(1))) {
2804  unsigned NewOpc =
2805  ((Opc == X86ISD::ADD) == IsOne)
2806  ? SelectOpcode(X86::INC64m, X86::INC32m, X86::INC16m, X86::INC8m)
2807  : SelectOpcode(X86::DEC64m, X86::DEC32m, X86::DEC16m, X86::DEC8m);
2808  const SDValue Ops[] = {Base, Scale, Index, Disp, Segment, InputChain};
2809  Result = CurDAG->getMachineNode(NewOpc, SDLoc(Node), MVT::i32,
2810  MVT::Other, Ops);
2811  break;
2812  }
2813  }
2815  case X86ISD::ADC:
2816  case X86ISD::SBB:
2817  case X86ISD::AND:
2818  case X86ISD::OR:
2819  case X86ISD::XOR: {
2820  auto SelectRegOpcode = [SelectOpcode](unsigned Opc) {
2821  switch (Opc) {
2822  case X86ISD::ADD:
2823  return SelectOpcode(X86::ADD64mr, X86::ADD32mr, X86::ADD16mr,
2824  X86::ADD8mr);
2825  case X86ISD::ADC:
2826  return SelectOpcode(X86::ADC64mr, X86::ADC32mr, X86::ADC16mr,
2827  X86::ADC8mr);
2828  case X86ISD::SUB:
2829  return SelectOpcode(X86::SUB64mr, X86::SUB32mr, X86::SUB16mr,
2830  X86::SUB8mr);
2831  case X86ISD::SBB:
2832  return SelectOpcode(X86::SBB64mr, X86::SBB32mr, X86::SBB16mr,
2833  X86::SBB8mr);
2834  case X86ISD::AND:
2835  return SelectOpcode(X86::AND64mr, X86::AND32mr, X86::AND16mr,
2836  X86::AND8mr);
2837  case X86ISD::OR:
2838  return SelectOpcode(X86::OR64mr, X86::OR32mr, X86::OR16mr, X86::OR8mr);
2839  case X86ISD::XOR:
2840  return SelectOpcode(X86::XOR64mr, X86::XOR32mr, X86::XOR16mr,
2841  X86::XOR8mr);
2842  default:
2843  llvm_unreachable("Invalid opcode!");
2844  }
2845  };
2846  auto SelectImm8Opcode = [SelectOpcode](unsigned Opc) {
2847  switch (Opc) {
2848  case X86ISD::ADD:
2849  return SelectOpcode(X86::ADD64mi8, X86::ADD32mi8, X86::ADD16mi8, 0);
2850  case X86ISD::ADC:
2851  return SelectOpcode(X86::ADC64mi8, X86::ADC32mi8, X86::ADC16mi8, 0);
2852  case X86ISD::SUB:
2853  return SelectOpcode(X86::SUB64mi8, X86::SUB32mi8, X86::SUB16mi8, 0);
2854  case X86ISD::SBB:
2855  return SelectOpcode(X86::SBB64mi8, X86::SBB32mi8, X86::SBB16mi8, 0);
2856  case X86ISD::AND:
2857  return SelectOpcode(X86::AND64mi8, X86::AND32mi8, X86::AND16mi8, 0);
2858  case X86ISD::OR:
2859  return SelectOpcode(X86::OR64mi8, X86::OR32mi8, X86::OR16mi8, 0);
2860  case X86ISD::XOR:
2861  return SelectOpcode(X86::XOR64mi8, X86::XOR32mi8, X86::XOR16mi8, 0);
2862  default:
2863  llvm_unreachable("Invalid opcode!");
2864  }
2865  };
2866  auto SelectImmOpcode = [SelectOpcode](unsigned Opc) {
2867  switch (Opc) {
2868  case X86ISD::ADD:
2869  return SelectOpcode(X86::ADD64mi32, X86::ADD32mi, X86::ADD16mi,
2870  X86::ADD8mi);
2871  case X86ISD::ADC:
2872  return SelectOpcode(X86::ADC64mi32, X86::ADC32mi, X86::ADC16mi,
2873  X86::ADC8mi);
2874  case X86ISD::SUB:
2875  return SelectOpcode(X86::SUB64mi32, X86::SUB32mi, X86::SUB16mi,
2876  X86::SUB8mi);
2877  case X86ISD::SBB:
2878  return SelectOpcode(X86::SBB64mi32, X86::SBB32mi, X86::SBB16mi,
2879  X86::SBB8mi);
2880  case X86ISD::AND:
2881  return SelectOpcode(X86::AND64mi32, X86::AND32mi, X86::AND16mi,
2882  X86::AND8mi);
2883  case X86ISD::OR:
2884  return SelectOpcode(X86::OR64mi32, X86::OR32mi, X86::OR16mi,
2885  X86::OR8mi);
2886  case X86ISD::XOR:
2887  return SelectOpcode(X86::XOR64mi32, X86::XOR32mi, X86::XOR16mi,
2888  X86::XOR8mi);
2889  default:
2890  llvm_unreachable("Invalid opcode!");
2891  }
2892  };
2893 
2894  unsigned NewOpc = SelectRegOpcode(Opc);
2895  SDValue Operand = StoredVal->getOperand(1-LoadOpNo);
2896 
2897  // See if the operand is a constant that we can fold into an immediate
2898  // operand.
2899  if (auto *OperandC = dyn_cast<ConstantSDNode>(Operand)) {
2900  int64_t OperandV = OperandC->getSExtValue();
2901 
2902  // Check if we can shrink the operand enough to fit in an immediate (or
2903  // fit into a smaller immediate) by negating it and switching the
2904  // operation.
2905  if ((Opc == X86ISD::ADD || Opc == X86ISD::SUB) &&
2906  ((MemVT != MVT::i8 && !isInt<8>(OperandV) && isInt<8>(-OperandV)) ||
2907  (MemVT == MVT::i64 && !isInt<32>(OperandV) &&
2908  isInt<32>(-OperandV))) &&
2909  hasNoCarryFlagUses(StoredVal.getValue(1))) {
2910  OperandV = -OperandV;
2911  Opc = Opc == X86ISD::ADD ? X86ISD::SUB : X86ISD::ADD;
2912  }
2913 
2914  // First try to fit this into an Imm8 operand. If it doesn't fit, then try
2915  // the larger immediate operand.
2916  if (MemVT != MVT::i8 && isInt<8>(OperandV)) {
2917  Operand = CurDAG->getTargetConstant(OperandV, SDLoc(Node), MemVT);
2918  NewOpc = SelectImm8Opcode(Opc);
2919  } else if (MemVT != MVT::i64 || isInt<32>(OperandV)) {
2920  Operand = CurDAG->getTargetConstant(OperandV, SDLoc(Node), MemVT);
2921  NewOpc = SelectImmOpcode(Opc);
2922  }
2923  }
2924 
2925  if (Opc == X86ISD::ADC || Opc == X86ISD::SBB) {
2926  SDValue CopyTo =
2927  CurDAG->getCopyToReg(InputChain, SDLoc(Node), X86::EFLAGS,
2928  StoredVal.getOperand(2), SDValue());
2929 
2930  const SDValue Ops[] = {Base, Scale, Index, Disp,
2931  Segment, Operand, CopyTo, CopyTo.getValue(1)};
2932  Result = CurDAG->getMachineNode(NewOpc, SDLoc(Node), MVT::i32, MVT::Other,
2933  Ops);
2934  } else {
2935  const SDValue Ops[] = {Base, Scale, Index, Disp,
2936  Segment, Operand, InputChain};
2937  Result = CurDAG->getMachineNode(NewOpc, SDLoc(Node), MVT::i32, MVT::Other,
2938  Ops);
2939  }
2940  break;
2941  }
2942  default:
2943  llvm_unreachable("Invalid opcode!");
2944  }
2945 
2946  MachineMemOperand *MemOps[] = {StoreNode->getMemOperand(),
2947  LoadNode->getMemOperand()};
2948  CurDAG->setNodeMemRefs(Result, MemOps);
2949 
2950  // Update Load Chain uses as well.
2951  ReplaceUses(SDValue(LoadNode, 1), SDValue(Result, 1));
2952  ReplaceUses(SDValue(StoreNode, 0), SDValue(Result, 1));
2953  ReplaceUses(SDValue(StoredVal.getNode(), 1), SDValue(Result, 0));
2954  CurDAG->RemoveDeadNode(Node);
2955  return true;
2956 }
2957 
2958 // See if this is an X & Mask that we can match to BEXTR/BZHI.
2959 // Where Mask is one of the following patterns:
2960 // a) x & (1 << nbits) - 1
2961 // b) x & ~(-1 << nbits)
2962 // c) x & (-1 >> (32 - y))
2963 // d) x << (32 - y) >> (32 - y)
2964 bool X86DAGToDAGISel::matchBitExtract(SDNode *Node) {
2965  assert(
2966  (Node->getOpcode() == ISD::AND || Node->getOpcode() == ISD::SRL) &&
2967  "Should be either an and-mask, or right-shift after clearing high bits.");
2968 
2969  // BEXTR is BMI instruction, BZHI is BMI2 instruction. We need at least one.
2970  if (!Subtarget->hasBMI() && !Subtarget->hasBMI2())
2971  return false;
2972 
2973  MVT NVT = Node->getSimpleValueType(0);
2974 
2975  // Only supported for 32 and 64 bits.
2976  if (NVT != MVT::i32 && NVT != MVT::i64)
2977  return false;
2978 
2979  unsigned Size = NVT.getSizeInBits();
2980 
2981  SDValue NBits;
2982 
2983  // If we have BMI2's BZHI, we are ok with muti-use patterns.
2984  // Else, if we only have BMI1's BEXTR, we require one-use.
2985  const bool CanHaveExtraUses = Subtarget->hasBMI2();
2986  auto checkUses = [CanHaveExtraUses](SDValue Op, unsigned NUses) {
2987  return CanHaveExtraUses ||
2988  Op.getNode()->hasNUsesOfValue(NUses, Op.getResNo());
2989  };
2990  auto checkOneUse = [checkUses](SDValue Op) { return checkUses(Op, 1); };
2991  auto checkTwoUse = [checkUses](SDValue Op) { return checkUses(Op, 2); };
2992 
2993  // a) x & ((1 << nbits) + (-1))
2994  auto matchPatternA = [&checkOneUse, &NBits](SDValue Mask) -> bool {
2995  // Match `add`. Must only have one use!
2996  if (Mask->getOpcode() != ISD::ADD || !checkOneUse(Mask))
2997  return false;
2998  // We should be adding all-ones constant (i.e. subtracting one.)
2999  if (!isAllOnesConstant(Mask->getOperand(1)))
3000  return false;
3001  // Match `1 << nbits`. Must only have one use!
3002  SDValue M0 = Mask->getOperand(0);
3003  if (M0->getOpcode() != ISD::SHL || !checkOneUse(M0))
3004  return false;
3005  if (!isOneConstant(M0->getOperand(0)))
3006  return false;
3007  NBits = M0->getOperand(1);
3008  return true;
3009  };
3010 
3011  // b) x & ~(-1 << nbits)
3012  auto matchPatternB = [&checkOneUse, &NBits](SDValue Mask) -> bool {
3013  // Match `~()`. Must only have one use!
3014  if (!isBitwiseNot(Mask) || !checkOneUse(Mask))
3015  return false;
3016  // Match `-1 << nbits`. Must only have one use!
3017  SDValue M0 = Mask->getOperand(0);
3018  if (M0->getOpcode() != ISD::SHL || !checkOneUse(M0))
3019  return false;
3020  if (!isAllOnesConstant(M0->getOperand(0)))
3021  return false;
3022  NBits = M0->getOperand(1);
3023  return true;
3024  };
3025 
3026  // Match potentially-truncated (bitwidth - y)
3027  auto matchShiftAmt = [checkOneUse, Size, &NBits](SDValue ShiftAmt) {
3028  // Skip over a truncate of the shift amount.
3029  if (ShiftAmt.getOpcode() == ISD::TRUNCATE) {
3030  ShiftAmt = ShiftAmt.getOperand(0);
3031  // The trunc should have been the only user of the real shift amount.
3032  if (!checkOneUse(ShiftAmt))
3033  return false;
3034  }
3035  // Match the shift amount as: (bitwidth - y). It should go away, too.
3036  if (ShiftAmt.getOpcode() != ISD::SUB)
3037  return false;
3038  auto V0 = dyn_cast<ConstantSDNode>(ShiftAmt.getOperand(0));
3039  if (!V0 || V0->getZExtValue() != Size)
3040  return false;
3041  NBits = ShiftAmt.getOperand(1);
3042  return true;
3043  };
3044 
3045  // c) x & (-1 >> (32 - y))
3046  auto matchPatternC = [&checkOneUse, matchShiftAmt](SDValue Mask) -> bool {
3047  // Match `l>>`. Must only have one use!
3048  if (Mask.getOpcode() != ISD::SRL || !checkOneUse(Mask))
3049  return false;
3050  // We should be shifting all-ones constant.
3051  if (!isAllOnesConstant(Mask.getOperand(0)))
3052  return false;
3053  SDValue M1 = Mask.getOperand(1);
3054  // The shift amount should not be used externally.
3055  if (!checkOneUse(M1))
3056  return false;
3057  return matchShiftAmt(M1);
3058  };
3059 
3060  SDValue X;
3061 
3062  // d) x << (32 - y) >> (32 - y)
3063  auto matchPatternD = [&checkOneUse, &checkTwoUse, matchShiftAmt,
3064  &X](SDNode *Node) -> bool {
3065  if (Node->getOpcode() != ISD::SRL)
3066  return false;
3067  SDValue N0 = Node->getOperand(0);
3068  if (N0->getOpcode() != ISD::SHL || !checkOneUse(N0))
3069  return false;
3070  SDValue N1 = Node->getOperand(1);
3071  SDValue N01 = N0->getOperand(1);
3072  // Both of the shifts must be by the exact same value.
3073  // There should not be any uses of the shift amount outside of the pattern.
3074  if (N1 != N01 || !checkTwoUse(N1))
3075  return false;
3076  if (!matchShiftAmt(N1))
3077  return false;
3078  X = N0->getOperand(0);
3079  return true;
3080  };
3081 
3082  auto matchLowBitMask = [&matchPatternA, &matchPatternB,
3083  &matchPatternC](SDValue Mask) -> bool {
3084  // FIXME: pattern c.
3085  return matchPatternA(Mask) || matchPatternB(Mask) || matchPatternC(Mask);
3086  };
3087 
3088  if (Node->getOpcode() == ISD::AND) {
3089  X = Node->getOperand(0);
3090  SDValue Mask = Node->getOperand(1);
3091 
3092  if (matchLowBitMask(Mask)) {
3093  // Great.
3094  } else {
3095  std::swap(X, Mask);
3096  if (!matchLowBitMask(Mask))
3097  return false;
3098  }
3099  } else if (!matchPatternD(Node))
3100  return false;
3101 
3102  SDLoc DL(Node);
3103 
3104  // If we do *NOT* have BMI2, let's find out if the if the 'X' is *logically*
3105  // shifted (potentially with one-use trunc inbetween),
3106  // and if so look past one-use truncation.
3107  MVT XVT = NVT;
3108  if (!Subtarget->hasBMI2() && X.getOpcode() == ISD::TRUNCATE &&
3109  X.hasOneUse() && X.getOperand(0).getOpcode() == ISD::SRL) {
3110  assert(NVT == MVT::i32 && "Expected target valuetype to be i32");
3111  X = X.getOperand(0);
3112  XVT = X.getSimpleValueType();
3113  assert(XVT == MVT::i64 && "Expected truncation from i64");
3114  }
3115 
3116  // Truncate the shift amount.
3117  NBits = CurDAG->getNode(ISD::TRUNCATE, DL, MVT::i8, NBits);
3118  insertDAGNode(*CurDAG, SDValue(Node, 0), NBits);
3119 
3120  // Insert 8-bit NBits into lowest 8 bits of 32-bit register.
3121  // All the other bits are undefined, we do not care about them.
3122  SDValue ImplDef = SDValue(
3123  CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, MVT::i32), 0);
3124  insertDAGNode(*CurDAG, SDValue(Node, 0), ImplDef);
3125  NBits = CurDAG->getTargetInsertSubreg(X86::sub_8bit, DL, MVT::i32, ImplDef,
3126  NBits);
3127  insertDAGNode(*CurDAG, SDValue(Node, 0), NBits);
3128 
3129  if (Subtarget->hasBMI2()) {
3130  // Great, just emit the the BZHI..
3131  if (XVT != MVT::i32) {
3132  // But have to place the bit count into the wide-enough register first.
3133  NBits = CurDAG->getNode(ISD::ANY_EXTEND, DL, XVT, NBits);
3134  insertDAGNode(*CurDAG, SDValue(Node, 0), NBits);
3135  }
3136 
3137  SDValue Extract = CurDAG->getNode(X86ISD::BZHI, DL, XVT, X, NBits);
3138  ReplaceNode(Node, Extract.getNode());
3139  SelectCode(Extract.getNode());
3140  return true;
3141  }
3142 
3143  // Else, emitting BEXTR requires one more step.
3144  // The 'control' of BEXTR has the pattern of:
3145  // [15...8 bit][ 7...0 bit] location
3146  // [ bit count][ shift] name
3147  // I.e. 0b000000011'00000001 means (x >> 0b1) & 0b11
3148 
3149  // Shift NBits left by 8 bits, thus producing 'control'.
3150  // This makes the low 8 bits to be zero.
3151  SDValue C8 = CurDAG->getConstant(8, DL, MVT::i8);
3152  SDValue Control = CurDAG->getNode(ISD::SHL, DL, MVT::i32, NBits, C8);
3153  insertDAGNode(*CurDAG, SDValue(Node, 0), Control);
3154 
3155  // If the 'X' is *logically* shifted, we can fold that shift into 'control'.
3156  if (X.getOpcode() == ISD::SRL) {
3157  SDValue ShiftAmt = X.getOperand(1);
3158  X = X.getOperand(0);
3159 
3160  assert(ShiftAmt.getValueType() == MVT::i8 &&
3161  "Expected shift amount to be i8");
3162 
3163  // Now, *zero*-extend the shift amount. The bits 8...15 *must* be zero!
3164  // We could zext to i16 in some form, but we intentionally don't do that.
3165  SDValue OrigShiftAmt = ShiftAmt;
3166  ShiftAmt = CurDAG->getNode(ISD::ZERO_EXTEND, DL, MVT::i32, ShiftAmt);
3167  insertDAGNode(*CurDAG, OrigShiftAmt, ShiftAmt);
3168 
3169  // And now 'or' these low 8 bits of shift amount into the 'control'.
3170  Control = CurDAG->getNode(ISD::OR, DL, MVT::i32, Control, ShiftAmt);
3171  insertDAGNode(*CurDAG, SDValue(Node, 0), Control);
3172  }
3173 
3174  // But have to place the 'control' into the wide-enough register first.
3175  if (XVT != MVT::i32) {
3176  Control = CurDAG->getNode(ISD::ANY_EXTEND, DL, XVT, Control);
3177  insertDAGNode(*CurDAG, SDValue(Node, 0), Control);
3178  }
3179 
3180  // And finally, form the BEXTR itself.
3181  SDValue Extract = CurDAG->getNode(X86ISD::BEXTR, DL, XVT, X, Control);
3182 
3183  // The 'X' was originally truncated. Do that now.
3184  if (XVT != NVT) {
3185  insertDAGNode(*CurDAG, SDValue(Node, 0), Extract);
3186  Extract = CurDAG->getNode(ISD::TRUNCATE, DL, NVT, Extract);
3187  }
3188 
3189  ReplaceNode(Node, Extract.getNode());
3190  SelectCode(Extract.getNode());
3191 
3192  return true;
3193 }
3194 
3195 // See if this is an (X >> C1) & C2 that we can match to BEXTR/BEXTRI.
3196 MachineSDNode *X86DAGToDAGISel::matchBEXTRFromAndImm(SDNode *Node) {
3197  MVT NVT = Node->getSimpleValueType(0);
3198  SDLoc dl(Node);
3199 
3200  SDValue N0 = Node->getOperand(0);
3201  SDValue N1 = Node->getOperand(1);
3202 
3203  // If we have TBM we can use an immediate for the control. If we have BMI
3204  // we should only do this if the BEXTR instruction is implemented well.
3205  // Otherwise moving the control into a register makes this more costly.
3206  // TODO: Maybe load folding, greater than 32-bit masks, or a guarantee of LICM
3207  // hoisting the move immediate would make it worthwhile with a less optimal
3208  // BEXTR?
3209  if (!Subtarget->hasTBM() &&
3210  !(Subtarget->hasBMI() && Subtarget->hasFastBEXTR()))
3211  return nullptr;
3212 
3213  // Must have a shift right.
3214  if (N0->getOpcode() != ISD::SRL && N0->getOpcode() != ISD::SRA)
3215  return nullptr;
3216 
3217  // Shift can't have additional users.
3218  if (!N0->hasOneUse())
3219  return nullptr;
3220 
3221  // Only supported for 32 and 64 bits.
3222  if (NVT != MVT::i32 && NVT != MVT::i64)
3223  return nullptr;
3224 
3225  // Shift amount and RHS of and must be constant.
3226  ConstantSDNode *MaskCst = dyn_cast<ConstantSDNode>(N1);
3227  ConstantSDNode *ShiftCst = dyn_cast<ConstantSDNode>(N0->getOperand(1));
3228  if (!MaskCst || !ShiftCst)
3229  return nullptr;
3230 
3231  // And RHS must be a mask.
3232  uint64_t Mask = MaskCst->getZExtValue();
3233  if (!isMask_64(Mask))
3234  return nullptr;
3235 
3236  uint64_t Shift = ShiftCst->getZExtValue();
3237  uint64_t MaskSize = countPopulation(Mask);
3238 
3239  // Don't interfere with something that can be handled by extracting AH.
3240  // TODO: If we are able to fold a load, BEXTR might still be better than AH.
3241  if (Shift == 8 && MaskSize == 8)
3242  return nullptr;
3243 
3244  // Make sure we are only using bits that were in the original value, not
3245  // shifted in.
3246  if (Shift + MaskSize > NVT.getSizeInBits())
3247  return nullptr;
3248 
3249  SDValue New = CurDAG->getTargetConstant(Shift | (MaskSize << 8), dl, NVT);
3250  unsigned ROpc = NVT == MVT::i64 ? X86::BEXTRI64ri : X86::BEXTRI32ri;
3251  unsigned MOpc = NVT == MVT::i64 ? X86::BEXTRI64mi : X86::BEXTRI32mi;
3252 
3253  // BMI requires the immediate to placed in a register.
3254  if (!Subtarget->hasTBM()) {
3255  ROpc = NVT == MVT::i64 ? X86::BEXTR64rr : X86::BEXTR32rr;
3256  MOpc = NVT == MVT::i64 ? X86::BEXTR64rm : X86::BEXTR32rm;
3257  unsigned NewOpc = NVT == MVT::i64 ? X86::MOV32ri64 : X86::MOV32ri;
3258  New = SDValue(CurDAG->getMachineNode(NewOpc, dl, NVT, New), 0);
3259  }
3260 
3261  MachineSDNode *NewNode;
3262  SDValue Input = N0->getOperand(0);
3263  SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
3264  if (tryFoldLoad(Node, N0.getNode(), Input, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) {
3265  SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, New, Input.getOperand(0) };
3266  SDVTList VTs = CurDAG->getVTList(NVT, MVT::i32, MVT::Other);
3267  NewNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops);
3268  // Update the chain.
3269  ReplaceUses(Input.getValue(1), SDValue(NewNode, 2));
3270  // Record the mem-refs
3271  CurDAG->setNodeMemRefs(NewNode, {cast<LoadSDNode>(Input)->getMemOperand()});
3272  } else {
3273  NewNode = CurDAG->getMachineNode(ROpc, dl, NVT, MVT::i32, Input, New);
3274  }
3275 
3276  return NewNode;
3277 }
3278 
3279 // Emit a PCMISTR(I/M) instruction.
3280 MachineSDNode *X86DAGToDAGISel::emitPCMPISTR(unsigned ROpc, unsigned MOpc,
3281  bool MayFoldLoad, const SDLoc &dl,
3282  MVT VT, SDNode *Node) {
3283  SDValue N0 = Node->getOperand(0);
3284  SDValue N1 = Node->getOperand(1);
3285  SDValue Imm = Node->getOperand(2);
3286  const ConstantInt *Val = cast<ConstantSDNode>(Imm)->getConstantIntValue();
3287  Imm = CurDAG->getTargetConstant(*Val, SDLoc(Node), Imm.getValueType());
3288 
3289  // Try to fold a load. No need to check alignment.
3290  SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
3291  if (MayFoldLoad && tryFoldLoad(Node, N1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) {
3292  SDValue Ops[] = { N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Imm,
3293  N1.getOperand(0) };
3294  SDVTList VTs = CurDAG->getVTList(VT, MVT::i32, MVT::Other);
3295  MachineSDNode *CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops);
3296  // Update the chain.
3297  ReplaceUses(N1.getValue(1), SDValue(CNode, 2));
3298  // Record the mem-refs
3299  CurDAG->setNodeMemRefs(CNode, {cast<LoadSDNode>(N1)->getMemOperand()});
3300  return CNode;
3301  }
3302 
3303  SDValue Ops[] = { N0, N1, Imm };
3304  SDVTList VTs = CurDAG->getVTList(VT, MVT::i32);
3305  MachineSDNode *CNode = CurDAG->getMachineNode(ROpc, dl, VTs, Ops);
3306  return CNode;
3307 }
3308 
3309 // Emit a PCMESTR(I/M) instruction. Also return the Glue result in case we need
3310 // to emit a second instruction after this one. This is needed since we have two
3311 // copyToReg nodes glued before this and we need to continue that glue through.
3312 MachineSDNode *X86DAGToDAGISel::emitPCMPESTR(unsigned ROpc, unsigned MOpc,
3313  bool MayFoldLoad, const SDLoc &dl,
3314  MVT VT, SDNode *Node,
3315  SDValue &InFlag) {
3316  SDValue N0 = Node->getOperand(0);
3317  SDValue N2 = Node->getOperand(2);
3318  SDValue Imm = Node->getOperand(4);
3319  const ConstantInt *Val = cast<ConstantSDNode>(Imm)->getConstantIntValue();
3320  Imm = CurDAG->getTargetConstant(*Val, SDLoc(Node), Imm.getValueType());
3321 
3322  // Try to fold a load. No need to check alignment.
3323  SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
3324  if (MayFoldLoad && tryFoldLoad(Node, N2, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) {
3325  SDValue Ops[] = { N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Imm,
3326  N2.getOperand(0), InFlag };
3327  SDVTList VTs = CurDAG->getVTList(VT, MVT::i32, MVT::Other, MVT::Glue);
3328  MachineSDNode *CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops);
3329  InFlag = SDValue(CNode, 3);
3330  // Update the chain.
3331  ReplaceUses(N2.getValue(1), SDValue(CNode, 2));
3332  // Record the mem-refs
3333  CurDAG->setNodeMemRefs(CNode, {cast<LoadSDNode>(N2)->getMemOperand()});
3334  return CNode;
3335  }
3336 
3337  SDValue Ops[] = { N0, N2, Imm, InFlag };
3338  SDVTList VTs = CurDAG->getVTList(VT, MVT::i32, MVT::Glue);
3339  MachineSDNode *CNode = CurDAG->getMachineNode(ROpc, dl, VTs, Ops);
3340  InFlag = SDValue(CNode, 2);
3341  return CNode;
3342 }
3343 
3344 bool X86DAGToDAGISel::tryShiftAmountMod(SDNode *N) {
3345  EVT VT = N->getValueType(0);
3346 
3347  // Only handle scalar shifts.
3348  if (VT.isVector())
3349  return false;
3350 
3351  // Narrower shifts only mask to 5 bits in hardware.
3352  unsigned Size = VT == MVT::i64 ? 64 : 32;
3353 
3354  SDValue OrigShiftAmt = N->getOperand(1);
3355  SDValue ShiftAmt = OrigShiftAmt;
3356  SDLoc DL(N);
3357 
3358  // Skip over a truncate of the shift amount.
3359  if (ShiftAmt->getOpcode() == ISD::TRUNCATE)
3360  ShiftAmt = ShiftAmt->getOperand(0);
3361 
3362  // This function is called after X86DAGToDAGISel::matchBitExtract(),
3363  // so we are not afraid that we might mess up BZHI/BEXTR pattern.
3364 
3365  SDValue NewShiftAmt;
3366  if (ShiftAmt->getOpcode() == ISD::ADD || ShiftAmt->getOpcode() == ISD::SUB) {
3367  SDValue Add0 = ShiftAmt->getOperand(0);
3368  SDValue Add1 = ShiftAmt->getOperand(1);
3369  // If we are shifting by X+/-N where N == 0 mod Size, then just shift by X
3370  // to avoid the ADD/SUB.
3371  if (isa<ConstantSDNode>(Add1) &&
3372  cast<ConstantSDNode>(Add1)->getZExtValue() % Size == 0) {
3373  NewShiftAmt = Add0;
3374  // If we are shifting by N-X where N == 0 mod Size, then just shift by -X to
3375  // generate a NEG instead of a SUB of a constant.
3376  } else if (ShiftAmt->getOpcode() == ISD::SUB &&
3377  isa<ConstantSDNode>(Add0) &&
3378  cast<ConstantSDNode>(Add0)->getZExtValue() != 0 &&
3379  cast<ConstantSDNode>(Add0)->getZExtValue() % Size == 0) {
3380  // Insert a negate op.
3381  // TODO: This isn't guaranteed to replace the sub if there is a logic cone
3382  // that uses it that's not a shift.
3383  EVT SubVT = ShiftAmt.getValueType();
3384  SDValue Zero = CurDAG->getConstant(0, DL, SubVT);
3385  SDValue Neg = CurDAG->getNode(ISD::SUB, DL, SubVT, Zero, Add1);
3386  NewShiftAmt = Neg;
3387 
3388  // Insert these operands into a valid topological order so they can
3389  // get selected independently.
3390  insertDAGNode(*CurDAG, OrigShiftAmt, Zero);
3391  insertDAGNode(*CurDAG, OrigShiftAmt, Neg);
3392  } else
3393  return false;
3394  } else
3395  return false;
3396 
3397  if (NewShiftAmt.getValueType() != MVT::i8) {
3398  // Need to truncate the shift amount.
3399  NewShiftAmt = CurDAG->getNode(ISD::TRUNCATE, DL, MVT::i8, NewShiftAmt);
3400  // Add to a correct topological ordering.
3401  insertDAGNode(*CurDAG, OrigShiftAmt, NewShiftAmt);
3402  }
3403 
3404  // Insert a new mask to keep the shift amount legal. This should be removed
3405  // by isel patterns.
3406  NewShiftAmt = CurDAG->getNode(ISD::AND, DL, MVT::i8, NewShiftAmt,
3407  CurDAG->getConstant(Size - 1, DL, MVT::i8));
3408  // Place in a correct topological ordering.
3409  insertDAGNode(*CurDAG, OrigShiftAmt, NewShiftAmt);
3410 
3411  SDNode *UpdatedNode = CurDAG->UpdateNodeOperands(N, N->getOperand(0),
3412  NewShiftAmt);
3413  if (UpdatedNode != N) {
3414  // If we found an existing node, we should replace ourselves with that node
3415  // and wait for it to be selected after its other users.
3416  ReplaceNode(N, UpdatedNode);
3417  return true;
3418  }
3419 
3420  // If the original shift amount is now dead, delete it so that we don't run
3421  // it through isel.
3422  if (OrigShiftAmt.getNode()->use_empty())
3423  CurDAG->RemoveDeadNode(OrigShiftAmt.getNode());
3424 
3425  // Now that we've optimized the shift amount, defer to normal isel to get
3426  // load folding and legacy vs BMI2 selection without repeating it here.
3427  SelectCode(N);
3428  return true;
3429 }
3430 
3431 /// If the high bits of an 'and' operand are known zero, try setting the
3432 /// high bits of an 'and' constant operand to produce a smaller encoding by
3433 /// creating a small, sign-extended negative immediate rather than a large
3434 /// positive one. This reverses a transform in SimplifyDemandedBits that
3435 /// shrinks mask constants by clearing bits. There is also a possibility that
3436 /// the 'and' mask can be made -1, so the 'and' itself is unnecessary. In that
3437 /// case, just replace the 'and'. Return 'true' if the node is replaced.
3438 bool X86DAGToDAGISel::shrinkAndImmediate(SDNode *And) {
3439  // i8 is unshrinkable, i16 should be promoted to i32, and vector ops don't
3440  // have immediate operands.
3441  MVT VT = And->getSimpleValueType(0);
3442  if (VT != MVT::i32 && VT != MVT::i64)
3443  return false;
3444 
3445  auto *And1C = dyn_cast<ConstantSDNode>(And->getOperand(1));
3446  if (!And1C)
3447  return false;
3448 
3449  // Bail out if the mask constant is already negative. It's can't shrink more.
3450  // If the upper 32 bits of a 64 bit mask are all zeros, we have special isel
3451  // patterns to use a 32-bit and instead of a 64-bit and by relying on the
3452  // implicit zeroing of 32 bit ops. So we should check if the lower 32 bits
3453  // are negative too.
3454  APInt MaskVal = And1C->getAPIntValue();
3455  unsigned MaskLZ = MaskVal.countLeadingZeros();
3456  if (!MaskLZ || (VT == MVT::i64 && MaskLZ == 32))
3457  return false;
3458 
3459  // Don't extend into the upper 32 bits of a 64 bit mask.
3460  if (VT == MVT::i64 && MaskLZ >= 32) {
3461  MaskLZ -= 32;
3462  MaskVal = MaskVal.trunc(32);
3463  }
3464 
3465  SDValue And0 = And->getOperand(0);
3466  APInt HighZeros = APInt::getHighBitsSet(MaskVal.getBitWidth(), MaskLZ);
3467  APInt NegMaskVal = MaskVal | HighZeros;
3468 
3469  // If a negative constant would not allow a smaller encoding, there's no need
3470  // to continue. Only change the constant when we know it's a win.
3471  unsigned MinWidth = NegMaskVal.getMinSignedBits();
3472  if (MinWidth > 32 || (MinWidth > 8 && MaskVal.getMinSignedBits() <= 32))
3473  return false;
3474 
3475  // Extend masks if we truncated above.
3476  if (VT == MVT::i64 && MaskVal.getBitWidth() < 64) {
3477  NegMaskVal = NegMaskVal.zext(64);
3478  HighZeros = HighZeros.zext(64);
3479  }
3480 
3481  // The variable operand must be all zeros in the top bits to allow using the
3482  // new, negative constant as the mask.
3483  if (!CurDAG->MaskedValueIsZero(And0, HighZeros))
3484  return false;
3485 
3486  // Check if the mask is -1. In that case, this is an unnecessary instruction
3487  // that escaped earlier analysis.
3488  if (NegMaskVal.isAllOnesValue()) {
3489  ReplaceNode(And, And0.getNode());
3490  return true;
3491  }
3492 
3493  // A negative mask allows a smaller encoding. Create a new 'and' node.
3494  SDValue NewMask = CurDAG->getConstant(NegMaskVal, SDLoc(And), VT);
3495  SDValue NewAnd = CurDAG->getNode(ISD::AND, SDLoc(And), VT, And0, NewMask);
3496  ReplaceNode(And, NewAnd.getNode());
3497  SelectCode(NewAnd.getNode());
3498  return true;
3499 }
3500 
3501 static unsigned getVPTESTMOpc(MVT TestVT, bool IsTestN, bool FoldedLoad,
3502  bool FoldedBCast, bool Masked) {
3503  if (Masked) {
3504  if (FoldedLoad) {
3505  switch (TestVT.SimpleTy) {
3506  default: llvm_unreachable("Unexpected VT!");
3507  case MVT::v16i8:
3508  return IsTestN ? X86::VPTESTNMBZ128rmk : X86::VPTESTMBZ128rmk;
3509  case MVT::v8i16:
3510  return IsTestN ? X86::VPTESTNMWZ128rmk : X86::VPTESTMWZ128rmk;
3511  case MVT::v4i32:
3512  return IsTestN ? X86::VPTESTNMDZ128rmk : X86::VPTESTMDZ128rmk;
3513  case MVT::v2i64:
3514  return IsTestN ? X86::VPTESTNMQZ128rmk : X86::VPTESTMQZ128rmk;
3515  case MVT::v32i8:
3516  return IsTestN ? X86::VPTESTNMBZ256rmk : X86::VPTESTMBZ256rmk;
3517  case MVT::v16i16:
3518  return IsTestN ? X86::VPTESTNMWZ256rmk : X86::VPTESTMWZ256rmk;
3519  case MVT::v8i32:
3520  return IsTestN ? X86::VPTESTNMDZ256rmk : X86::VPTESTMDZ256rmk;
3521  case MVT::v4i64:
3522  return IsTestN ? X86::VPTESTNMQZ256rmk : X86::VPTESTMQZ256rmk;
3523  case MVT::v64i8:
3524  return IsTestN ? X86::VPTESTNMBZrmk : X86::VPTESTMBZrmk;
3525  case MVT::v32i16:
3526  return IsTestN ? X86::VPTESTNMWZrmk : X86::VPTESTMWZrmk;
3527  case MVT::v16i32:
3528  return IsTestN ? X86::VPTESTNMDZrmk : X86::VPTESTMDZrmk;
3529  case MVT::v8i64:
3530  return IsTestN ? X86::VPTESTNMQZrmk : X86::VPTESTMQZrmk;
3531  }
3532  }
3533 
3534  if (FoldedBCast) {
3535  switch (TestVT.SimpleTy) {
3536  default: llvm_unreachable("Unexpected VT!");
3537  case MVT::v4i32:
3538  return IsTestN ? X86::VPTESTNMDZ128rmbk : X86::VPTESTMDZ128rmbk;
3539  case MVT::v2i64:
3540  return IsTestN ? X86::VPTESTNMQZ128rmbk : X86::VPTESTMQZ128rmbk;
3541  case MVT::v8i32:
3542  return IsTestN ? X86::VPTESTNMDZ256rmbk : X86::VPTESTMDZ256rmbk;
3543  case MVT::v4i64:
3544  return IsTestN ? X86::VPTESTNMQZ256rmbk : X86::VPTESTMQZ256rmbk;
3545  case MVT::v16i32:
3546  return IsTestN ? X86::VPTESTNMDZrmbk : X86::VPTESTMDZrmbk;
3547  case MVT::v8i64:
3548  return IsTestN ? X86::VPTESTNMQZrmbk : X86::VPTESTMQZrmbk;
3549  }
3550  }
3551 
3552  switch (TestVT.SimpleTy) {
3553  default: llvm_unreachable("Unexpected VT!");
3554  case MVT::v16i8:
3555  return IsTestN ? X86::VPTESTNMBZ128rrk : X86::VPTESTMBZ128rrk;
3556  case MVT::v8i16:
3557  return IsTestN ? X86::VPTESTNMWZ128rrk : X86::VPTESTMWZ128rrk;
3558  case MVT::v4i32:
3559  return IsTestN ? X86::VPTESTNMDZ128rrk : X86::VPTESTMDZ128rrk;
3560  case MVT::v2i64:
3561  return IsTestN ? X86::VPTESTNMQZ128rrk : X86::VPTESTMQZ128rrk;
3562  case MVT::v32i8:
3563  return IsTestN ? X86::VPTESTNMBZ256rrk : X86::VPTESTMBZ256rrk;
3564  case MVT::v16i16:
3565  return IsTestN ? X86::VPTESTNMWZ256rrk : X86::VPTESTMWZ256rrk;
3566  case MVT::v8i32:
3567  return IsTestN ? X86::VPTESTNMDZ256rrk : X86::VPTESTMDZ256rrk;
3568  case MVT::v4i64:
3569  return IsTestN ? X86::VPTESTNMQZ256rrk : X86::VPTESTMQZ256rrk;
3570  case MVT::v64i8:
3571  return IsTestN ? X86::VPTESTNMBZrrk : X86::VPTESTMBZrrk;
3572  case MVT::v32i16:
3573  return IsTestN ? X86::VPTESTNMWZrrk : X86::VPTESTMWZrrk;
3574  case MVT::v16i32:
3575  return IsTestN ? X86::VPTESTNMDZrrk : X86::VPTESTMDZrrk;
3576  case MVT::v8i64:
3577  return IsTestN ? X86::VPTESTNMQZrrk : X86::VPTESTMQZrrk;
3578  }
3579  }
3580 
3581  if (FoldedLoad) {
3582  switch (TestVT.SimpleTy) {
3583  default: llvm_unreachable("Unexpected VT!");
3584  case MVT::v16i8:
3585  return IsTestN ? X86::VPTESTNMBZ128rm : X86::VPTESTMBZ128rm;
3586  case MVT::v8i16:
3587  return IsTestN ? X86::VPTESTNMWZ128rm : X86::VPTESTMWZ128rm;
3588  case MVT::v4i32:
3589  return IsTestN ? X86::VPTESTNMDZ128rm : X86::VPTESTMDZ128rm;
3590  case MVT::v2i64:
3591  return IsTestN ? X86::VPTESTNMQZ128rm : X86::VPTESTMQZ128rm;
3592  case MVT::v32i8:
3593  return IsTestN ? X86::VPTESTNMBZ256rm : X86::VPTESTMBZ256rm;
3594  case MVT::v16i16:
3595  return IsTestN ? X86::VPTESTNMWZ256rm : X86::VPTESTMWZ256rm;
3596  case MVT::v8i32:
3597  return IsTestN ? X86::VPTESTNMDZ256rm : X86::VPTESTMDZ256rm;
3598  case MVT::v4i64:
3599  return IsTestN ? X86::VPTESTNMQZ256rm : X86::VPTESTMQZ256rm;
3600  case MVT::v64i8:
3601  return IsTestN ? X86::VPTESTNMBZrm : X86::VPTESTMBZrm;
3602  case MVT::v32i16:
3603  return IsTestN ? X86::VPTESTNMWZrm : X86::VPTESTMWZrm;
3604  case MVT::v16i32:
3605  return IsTestN ? X86::VPTESTNMDZrm : X86::VPTESTMDZrm;
3606  case MVT::v8i64:
3607  return IsTestN ? X86::VPTESTNMQZrm : X86::VPTESTMQZrm;
3608  }
3609  }
3610 
3611  if (FoldedBCast) {
3612  switch (TestVT.SimpleTy) {
3613  default: llvm_unreachable("Unexpected VT!");
3614  case MVT::v4i32:
3615  return IsTestN ? X86::VPTESTNMDZ128rmb : X86::VPTESTMDZ128rmb;
3616  case MVT::v2i64:
3617  return IsTestN ? X86::VPTESTNMQZ128rmb : X86::VPTESTMQZ128rmb;
3618  case MVT::v8i32:
3619  return IsTestN ? X86::VPTESTNMDZ256rmb : X86::VPTESTMDZ256rmb;
3620  case MVT::v4i64:
3621  return IsTestN ? X86::VPTESTNMQZ256rmb : X86::VPTESTMQZ256rmb;
3622  case MVT::v16i32:
3623  return IsTestN ? X86::VPTESTNMDZrmb : X86::VPTESTMDZrmb;
3624  case MVT::v8i64:
3625  return IsTestN ? X86::VPTESTNMQZrmb : X86::VPTESTMQZrmb;
3626  }
3627  }
3628 
3629  switch (TestVT.SimpleTy) {
3630  default: llvm_unreachable("Unexpected VT!");
3631  case MVT::v16i8:
3632  return IsTestN ? X86::VPTESTNMBZ128rr : X86::VPTESTMBZ128rr;
3633  case MVT::v8i16:
3634  return IsTestN ? X86::VPTESTNMWZ128rr : X86::VPTESTMWZ128rr;
3635  case MVT::v4i32:
3636  return IsTestN ? X86::VPTESTNMDZ128rr : X86::VPTESTMDZ128rr;
3637  case MVT::v2i64:
3638  return IsTestN ? X86::VPTESTNMQZ128rr : X86::VPTESTMQZ128rr;
3639  case MVT::v32i8:
3640  return IsTestN ? X86::VPTESTNMBZ256rr : X86::VPTESTMBZ256rr;
3641  case MVT::v16i16:
3642  return IsTestN ? X86::VPTESTNMWZ256rr : X86::VPTESTMWZ256rr;
3643  case MVT::v8i32:
3644  return IsTestN ? X86::VPTESTNMDZ256rr : X86::VPTESTMDZ256rr;
3645  case MVT::v4i64:
3646  return IsTestN ? X86::VPTESTNMQZ256rr : X86::VPTESTMQZ256rr;
3647  case MVT::v64i8:
3648  return IsTestN ? X86::VPTESTNMBZrr : X86::VPTESTMBZrr;
3649  case MVT::v32i16:
3650  return IsTestN ? X86::VPTESTNMWZrr : X86::VPTESTMWZrr;
3651  case MVT::v16i32:
3652  return IsTestN ? X86::VPTESTNMDZrr : X86::VPTESTMDZrr;
3653  case MVT::v8i64:
3654  return IsTestN ? X86::VPTESTNMQZrr : X86::VPTESTMQZrr;
3655  }
3656 }
3657 
3658 // Try to create VPTESTM instruction. If InMask is not null, it will be used
3659 // to form a masked operation.
3660 bool X86DAGToDAGISel::tryVPTESTM(SDNode *Root, SDValue Setcc,
3661  SDValue InMask) {
3662  assert(Subtarget->hasAVX512() && "Expected AVX512!");
3664  "Unexpected VT!");
3665 
3666  // Look for equal and not equal compares.
3667  ISD::CondCode CC = cast<CondCodeSDNode>(Setcc.getOperand(2))->get();
3668  if (CC != ISD::SETEQ && CC != ISD::SETNE)
3669  return false;
3670 
3671  // See if we're comparing against zero. This should have been canonicalized
3672  // to RHS during lowering.
3674  return false;
3675 
3676  SDValue N0 = Setcc.getOperand(0);
3677 
3678  MVT CmpVT = N0.getSimpleValueType();
3679  MVT CmpSVT = CmpVT.getVectorElementType();
3680 
3681  // Start with both operands the same. We'll try to refine this.
3682  SDValue Src0 = N0;
3683  SDValue Src1 = N0;
3684 
3685  {
3686  // Look through single use bitcasts.
3687  SDValue N0Temp = N0;
3688  if (N0Temp.getOpcode() == ISD::BITCAST && N0Temp.hasOneUse())
3689  N0Temp = N0.getOperand(0);
3690 
3691  // Look for single use AND.
3692  if (N0Temp.getOpcode() == ISD::AND && N0Temp.hasOneUse()) {
3693  Src0 = N0Temp.getOperand(0);
3694  Src1 = N0Temp.getOperand(1);
3695  }
3696  }
3697 
3698  // Without VLX we need to widen the load.
3699  bool Widen = !Subtarget->hasVLX() && !CmpVT.is512BitVector();
3700 
3701  // We can only fold loads if the sources are unique.
3702  bool CanFoldLoads = Src0 != Src1;
3703 
3704  // Try to fold loads unless we need to widen.
3705  bool FoldedLoad = false;
3706  SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Load;
3707  if (!Widen && CanFoldLoads) {
3708  Load = Src1;
3709  FoldedLoad = tryFoldLoad(Root, N0.getNode(), Load, Tmp0, Tmp1, Tmp2, Tmp3,
3710  Tmp4);
3711  if (!FoldedLoad) {
3712  // And is computative.
3713  Load = Src0;
3714  FoldedLoad = tryFoldLoad(Root, N0.getNode(), Load, Tmp0, Tmp1, Tmp2,
3715  Tmp3, Tmp4);
3716  if (FoldedLoad)
3717  std::swap(Src0, Src1);
3718  }
3719  }
3720 
3721  auto findBroadcastedOp = [](SDValue Src, MVT CmpSVT, SDNode *&Parent) {
3722  // Look through single use bitcasts.
3723  if (Src.getOpcode() == ISD::BITCAST && Src.hasOneUse())
3724  Src = Src.getOperand(0);
3725 
3726  if (Src.getOpcode() == X86ISD::VBROADCAST && Src.hasOneUse()) {
3727  Parent = Src.getNode();
3728  Src = Src.getOperand(0);
3729  if (Src.getSimpleValueType() == CmpSVT)
3730  return Src;
3731  }
3732 
3733  return SDValue();
3734  };
3735 
3736  // If we didn't fold a load, try to match broadcast. No widening limitation
3737  // for this. But only 32 and 64 bit types are supported.
3738  bool FoldedBCast = false;
3739  if (!FoldedLoad && CanFoldLoads &&
3740  (CmpSVT == MVT::i32 || CmpSVT == MVT::i64)) {
3741  SDNode *ParentNode = nullptr;
3742  if ((Load = findBroadcastedOp(Src1, CmpSVT, ParentNode))) {
3743  FoldedBCast = tryFoldLoad(Root, ParentNode, Load, Tmp0,
3744  Tmp1, Tmp2, Tmp3, Tmp4);
3745  }
3746 
3747  // Try the other operand.
3748  if (!FoldedBCast) {
3749  if ((Load = findBroadcastedOp(Src0, CmpSVT, ParentNode))) {
3750  FoldedBCast = tryFoldLoad(Root, ParentNode, Load, Tmp0,
3751  Tmp1, Tmp2, Tmp3, Tmp4);
3752  if (FoldedBCast)
3753  std::swap(Src0, Src1);
3754  }
3755  }
3756  }
3757 
3758  auto getMaskRC = [](MVT MaskVT) {
3759  switch (MaskVT.SimpleTy) {
3760  default: llvm_unreachable("Unexpected VT!");
3761  case MVT::v2i1: return X86::VK2RegClassID;
3762  case MVT::v4i1: return X86::VK4RegClassID;
3763  case MVT::v8i1: return X86::VK8RegClassID;
3764  case MVT::v16i1: return X86::VK16RegClassID;
3765  case MVT::v32i1: return X86::VK32RegClassID;
3766  case MVT::v64i1: return X86::VK64RegClassID;
3767  }
3768  };
3769 
3770  bool IsMasked = InMask.getNode() != nullptr;
3771 
3772  SDLoc dl(Root);
3773 
3774  MVT ResVT = Setcc.getSimpleValueType();
3775  MVT MaskVT = ResVT;
3776  if (Widen) {
3777  // Widen the inputs using insert_subreg or copy_to_regclass.
3778  unsigned Scale = CmpVT.is128BitVector() ? 4 : 2;
3779  unsigned SubReg = CmpVT.is128BitVector() ? X86::sub_xmm : X86::sub_ymm;
3780  unsigned NumElts = CmpVT.getVectorNumElements() * Scale;
3781  CmpVT = MVT::getVectorVT(CmpSVT, NumElts);
3782  MaskVT = MVT::getVectorVT(MVT::i1, NumElts);
3783  SDValue ImplDef = SDValue(CurDAG->getMachineNode(X86::IMPLICIT_DEF, dl,
3784  CmpVT), 0);
3785  Src0 = CurDAG->getTargetInsertSubreg(SubReg, dl, CmpVT, ImplDef, Src0);
3786 
3787  assert(!FoldedLoad && "Shouldn't have folded the load");
3788  if (!FoldedBCast)
3789  Src1 = CurDAG->getTargetInsertSubreg(SubReg, dl, CmpVT, ImplDef, Src1);
3790 
3791  if (IsMasked) {
3792  // Widen the mask.
3793  unsigned RegClass = getMaskRC(MaskVT);
3794  SDValue RC = CurDAG->getTargetConstant(RegClass, dl, MVT::i32);
3795  InMask = SDValue(CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS,
3796  dl, MaskVT, InMask, RC), 0);
3797  }
3798  }
3799 
3800  bool IsTestN = CC == ISD::SETEQ;
3801  unsigned Opc = getVPTESTMOpc(CmpVT, IsTestN, FoldedLoad, FoldedBCast,
3802  IsMasked);
3803 
3804  MachineSDNode *CNode;
3805  if (FoldedLoad || FoldedBCast) {
3806  SDVTList VTs = CurDAG->getVTList(MaskVT, MVT::Other);
3807 
3808  if (IsMasked) {
3809  SDValue Ops[] = { InMask, Src0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4,
3810  Load.getOperand(0) };
3811  CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops);
3812  } else {
3813  SDValue Ops[] = { Src0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4,
3814  Load.getOperand(0) };
3815  CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops);
3816  }
3817 
3818  // Update the chain.
3819  ReplaceUses(Load.getValue(1), SDValue(CNode, 1));
3820  // Record the mem-refs
3821  CurDAG->setNodeMemRefs(CNode, {cast<LoadSDNode>(Load)->getMemOperand()});
3822  } else {
3823  if (IsMasked)
3824  CNode = CurDAG->getMachineNode(Opc, dl, MaskVT, InMask, Src0, Src1);
3825  else
3826  CNode = CurDAG->getMachineNode(Opc, dl, MaskVT, Src0, Src1);
3827  }
3828 
3829  // If we widened, we need to shrink the mask VT.
3830  if (Widen) {
3831  unsigned RegClass = getMaskRC(ResVT);
3832  SDValue RC = CurDAG->getTargetConstant(RegClass, dl, MVT::i32);
3833  CNode = CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS,
3834  dl, ResVT, SDValue(CNode, 0), RC);
3835  }
3836 
3837  ReplaceUses(SDValue(Root, 0), SDValue(CNode, 0));
3838  CurDAG->RemoveDeadNode(Root);
3839  return true;
3840 }
3841 
3842 void X86DAGToDAGISel::Select(SDNode *Node) {
3843  MVT NVT = Node->getSimpleValueType(0);
3844  unsigned Opcode = Node->getOpcode();
3845  SDLoc dl(Node);
3846 
3847  if (Node->isMachineOpcode()) {
3848  LLVM_DEBUG(dbgs() << "== "; Node->dump(CurDAG); dbgs() << '\n');
3849  Node->setNodeId(-1);
3850  return; // Already selected.
3851  }
3852 
3853  switch (Opcode) {
3854  default: break;
3855  case ISD::INTRINSIC_VOID: {
3856  unsigned IntNo = Node->getConstantOperandVal(1);
3857  switch (IntNo) {
3858  default: break;
3859  case Intrinsic::x86_sse3_monitor:
3860  case Intrinsic::x86_monitorx:
3861  case Intrinsic::x86_clzero: {
3862  bool Use64BitPtr = Node->getOperand(2).getValueType() == MVT::i64;
3863 
3864  unsigned Opc = 0;
3865  switch (IntNo) {
3866  case Intrinsic::x86_sse3_monitor:
3867  if (!Subtarget->hasSSE3())
3868  break;
3869  Opc = Use64BitPtr ? X86::MONITOR64rrr : X86::MONITOR32rrr;
3870  break;
3871  case Intrinsic::x86_monitorx:
3872  if (!Subtarget->hasMWAITX())
3873  break;
3874  Opc = Use64BitPtr ? X86::MONITORX64rrr : X86::MONITORX32rrr;
3875  break;
3876  case Intrinsic::x86_clzero:
3877  if (!Subtarget->hasCLZERO())
3878  break;
3879  Opc = Use64BitPtr ? X86::CLZERO64r : X86::CLZERO32r;
3880  break;
3881  }
3882 
3883  if (Opc) {
3884  unsigned PtrReg = Use64BitPtr ? X86::RAX : X86::EAX;
3885  SDValue Chain = CurDAG->getCopyToReg(Node->getOperand(0), dl, PtrReg,
3886  Node->getOperand(2), SDValue());
3887  SDValue InFlag = Chain.getValue(1);
3888 
3889  if (IntNo == Intrinsic::x86_sse3_monitor ||
3890  IntNo == Intrinsic::x86_monitorx) {
3891  // Copy the other two operands to ECX and EDX.
3892  Chain = CurDAG->getCopyToReg(Chain, dl, X86::ECX, Node->getOperand(3),
3893  InFlag);
3894  InFlag = Chain.getValue(1);
3895  Chain = CurDAG->getCopyToReg(Chain, dl, X86::EDX, Node->getOperand(4),
3896  InFlag);
3897  InFlag = Chain.getValue(1);
3898  }
3899 
3900  MachineSDNode *CNode = CurDAG->getMachineNode(Opc, dl, MVT::Other,
3901  { Chain, InFlag});
3902  ReplaceNode(Node, CNode);
3903  return;
3904  }
3905  }
3906  }
3907 
3908  break;
3909  }
3910  case ISD::BRIND: {
3911  if (Subtarget->isTargetNaCl())
3912  // NaCl has its own pass where jmp %r32 are converted to jmp %r64. We
3913  // leave the instruction alone.
3914  break;
3915  if (Subtarget->isTarget64BitILP32()) {
3916  // Converts a 32-bit register to a 64-bit, zero-extended version of
3917  // it. This is needed because x86-64 can do many things, but jmp %r32
3918  // ain't one of them.
3919  const SDValue &Target = Node->getOperand(1);
3921  SDValue ZextTarget = CurDAG->getZExtOrTrunc(Target, dl, EVT(MVT::i64));
3922  SDValue Brind = CurDAG->getNode(ISD::BRIND, dl, MVT::Other,
3923  Node->getOperand(0), ZextTarget);
3924  ReplaceNode(Node, Brind.getNode());
3925  SelectCode(ZextTarget.getNode());
3926  SelectCode(Brind.getNode());
3927  return;
3928  }
3929  break;
3930  }
3931  case X86ISD::GlobalBaseReg:
3932  ReplaceNode(Node, getGlobalBaseReg());
3933  return;
3934 
3935  case ISD::BITCAST:
3936  // Just drop all 128/256/512-bit bitcasts.
3937  if (NVT.is512BitVector() || NVT.is256BitVector() || NVT.is128BitVector() ||
3938  NVT == MVT::f128) {
3939  ReplaceUses(SDValue(Node, 0), Node->getOperand(0));
3940  CurDAG->RemoveDeadNode(Node);
3941  return;
3942  }
3943  break;
3944 
3945  case ISD::VSELECT: {
3946  // Replace VSELECT with non-mask conditions with with BLENDV.
3948  break;
3949 
3950  assert(Subtarget->hasSSE41() && "Expected SSE4.1 support!");
3951  SDValue Blendv = CurDAG->getNode(
3952  X86ISD::BLENDV, SDLoc(Node), Node->getValueType(0), Node->getOperand(0),
3953  Node->getOperand(1), Node->getOperand(2));
3954  ReplaceNode(Node, Blendv.getNode());
3955  SelectCode(Blendv.getNode());
3956  // We already called ReplaceUses.
3957  return;
3958  }
3959 
3960  case ISD::SRL:
3961  if (matchBitExtract(Node))
3962  return;
3964  case ISD::SRA:
3965  case ISD::SHL:
3966  if (tryShiftAmountMod(Node))
3967  return;
3968  break;
3969 
3970  case ISD::AND:
3971  if (NVT.isVector() && NVT.getVectorElementType() == MVT::i1) {
3972  // Try to form a masked VPTESTM. Operands can be in either order.
3973  SDValue N0 = Node->getOperand(0);
3974  SDValue N1 = Node->getOperand(1);
3975  if (N0.getOpcode() == ISD::SETCC && N0.hasOneUse() &&
3976  tryVPTESTM(Node, N0, N1))
3977  return;
3978  if (N1.getOpcode() == ISD::SETCC && N1.hasOneUse() &&
3979  tryVPTESTM(Node, N1, N0))
3980  return;
3981  }
3982 
3983  if (MachineSDNode *NewNode = matchBEXTRFromAndImm(Node)) {
3984  ReplaceUses(SDValue(Node, 0), SDValue(NewNode, 0));
3985  CurDAG->RemoveDeadNode(Node);
3986  return;
3987  }
3988  if (matchBitExtract(Node))
3989  return;
3990  if (AndImmShrink && shrinkAndImmediate(Node))
3991  return;
3992 
3994  case ISD::OR:
3995  case ISD::XOR: {
3996 
3997  // For operations of the form (x << C1) op C2, check if we can use a smaller
3998  // encoding for C2 by transforming it into (x op (C2>>C1)) << C1.
3999  SDValue Shift = Node->getOperand(0);
4000  SDValue N1 = Node->getOperand(1);
4001 
4003  if (!Cst)
4004  break;
4005 
4006  int64_t Val = Cst->getSExtValue();
4007 
4008  // If we have an any_extend feeding the AND, look through it to see if there
4009  // is a shift behind it. But only if the AND doesn't use the extended bits.
4010  // FIXME: Generalize this to other ANY_EXTEND than i32 to i64?
4011  bool FoundAnyExtend = false;
4012  if (Shift.getOpcode() == ISD::ANY_EXTEND && Shift.hasOneUse() &&
4013  Shift.getOperand(0).getSimpleValueType() == MVT::i32 &&
4014  isUInt<32>(Val)) {
4015  FoundAnyExtend = true;
4016  Shift = Shift.getOperand(0);
4017  }
4018 
4019  if (Shift.getOpcode() != ISD::SHL || !Shift.hasOneUse())
4020  break;
4021 
4022  // i8 is unshrinkable, i16 should be promoted to i32.
4023  if (NVT != MVT::i32 && NVT != MVT::i64)
4024  break;
4025 
4026  ConstantSDNode *ShlCst = dyn_cast<ConstantSDNode>(Shift.getOperand(1));
4027  if (!ShlCst)
4028  break;
4029 
4030  uint64_t ShAmt = ShlCst->getZExtValue();
4031 
4032  // Make sure that we don't change the operation by removing bits.
4033  // This only matters for OR and XOR, AND is unaffected.
4034  uint64_t RemovedBitsMask = (1ULL << ShAmt) - 1;
4035  if (Opcode != ISD::AND && (Val & RemovedBitsMask) != 0)
4036  break;
4037 
4038  // Check the minimum bitwidth for the new constant.
4039  // TODO: Using 16 and 8 bit operations is also possible for or32 & xor32.
4040  auto CanShrinkImmediate = [&](int64_t &ShiftedVal) {
4041  if (Opcode == ISD::AND) {
4042  // AND32ri is the same as AND64ri32 with zext imm.
4043  // Try this before sign extended immediates below.
4044  ShiftedVal = (uint64_t)Val >> ShAmt;
4045  if (NVT == MVT::i64 && !isUInt<32>(Val) && isUInt<32>(ShiftedVal))
4046  return true;
4047  // Also swap order when the AND can become MOVZX.
4048  if (ShiftedVal == UINT8_MAX || ShiftedVal == UINT16_MAX)
4049  return true;
4050  }
4051  ShiftedVal = Val >> ShAmt;
4052  if ((!isInt<8>(Val) && isInt<8>(ShiftedVal)) ||
4053  (!isInt<32>(Val) && isInt<32>(ShiftedVal)))
4054  return true;
4055  if (Opcode != ISD::AND) {
4056  // MOV32ri+OR64r/XOR64r is cheaper than MOV64ri64+OR64rr/XOR64rr
4057  ShiftedVal = (uint64_t)Val >> ShAmt;
4058  if (NVT == MVT::i64 && !isUInt<32>(Val) && isUInt<32>(ShiftedVal))
4059  return true;
4060  }
4061  return false;
4062  };
4063 
4064  int64_t ShiftedVal;
4065  if (!CanShrinkImmediate(ShiftedVal))
4066  break;
4067 
4068  // Ok, we can reorder to get a smaller immediate.
4069 
4070  // But, its possible the original immediate allowed an AND to become MOVZX.
4071  // Doing this late due to avoid the MakedValueIsZero call as late as
4072  // possible.
4073  if (Opcode == ISD::AND) {
4074  // Find the smallest zext this could possibly be.
4075  unsigned ZExtWidth = Cst->getAPIntValue().getActiveBits();
4076  ZExtWidth = PowerOf2Ceil(std::max(ZExtWidth, 8U));
4077 
4078  // Figure out which bits need to be zero to achieve that mask.
4079  APInt NeededMask = APInt::getLowBitsSet(NVT.getSizeInBits(),
4080  ZExtWidth);
4081  NeededMask &= ~Cst->getAPIntValue();
4082 
4083  if (CurDAG->MaskedValueIsZero(Node->getOperand(0), NeededMask))
4084  break;
4085  }
4086 
4087  SDValue X = Shift.getOperand(0);
4088  if (FoundAnyExtend) {
4089  SDValue NewX = CurDAG->getNode(ISD::ANY_EXTEND, dl, NVT, X);
4090  insertDAGNode(*CurDAG, SDValue(Node, 0), NewX);
4091  X = NewX;
4092  }
4093 
4094  SDValue NewCst = CurDAG->getConstant(ShiftedVal, dl, NVT);
4095  insertDAGNode(*CurDAG, SDValue(Node, 0), NewCst);
4096  SDValue NewBinOp = CurDAG->getNode(Opcode, dl, NVT, X, NewCst);
4097  insertDAGNode(*CurDAG, SDValue(Node, 0), NewBinOp);
4098  SDValue NewSHL = CurDAG->getNode(ISD::SHL, dl, NVT, NewBinOp,
4099  Shift.getOperand(1));
4100  ReplaceNode(Node, NewSHL.getNode());
4101  SelectCode(NewSHL.getNode());
4102  return;
4103  }
4104  case X86ISD::SMUL:
4105  // i16/i32/i64 are handled with isel patterns.
4106  if (NVT != MVT::i8)
4107  break;
4109  case X86ISD::UMUL: {
4110  SDValue N0 = Node->getOperand(0);
4111  SDValue N1 = Node->getOperand(1);
4112 
4113  unsigned LoReg, ROpc, MOpc;
4114  switch (NVT.SimpleTy) {
4115  default: llvm_unreachable("Unsupported VT!");
4116  case MVT::i8:
4117  LoReg = X86::AL;
4118  ROpc = Opcode == X86ISD::SMUL ? X86::IMUL8r : X86::MUL8r;
4119  MOpc = Opcode == X86ISD::SMUL ? X86::IMUL8m : X86::MUL8m;
4120  break;
4121  case MVT::i16:
4122  LoReg = X86::AX;
4123  ROpc = X86::MUL16r;
4124  MOpc = X86::MUL16m;
4125  break;
4126  case MVT::i32:
4127  LoReg = X86::EAX;
4128  ROpc = X86::MUL32r;
4129  MOpc = X86::MUL32m;
4130  break;
4131  case MVT::i64:
4132  LoReg = X86::RAX;
4133  ROpc = X86::MUL64r;
4134  MOpc = X86::MUL64m;
4135  break;
4136  }
4137 
4138  SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
4139  bool FoldedLoad = tryFoldLoad(Node, N1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4);
4140  // Multiply is commmutative.
4141  if (!FoldedLoad) {
4142  FoldedLoad = tryFoldLoad(Node, N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4);
4143  if (FoldedLoad)
4144  std::swap(N0, N1);
4145  }
4146 
4147  SDValue InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, LoReg,
4148  N0, SDValue()).getValue(1);
4149 
4150  MachineSDNode *CNode;
4151  if (FoldedLoad) {
4152  // i16/i32/i64 use an instruction that produces a low and high result even
4153  // though only the low result is used.
4154  SDVTList VTs;
4155  if (NVT == MVT::i8)
4156  VTs = CurDAG->getVTList(NVT, MVT::i32, MVT::Other);
4157  else
4158  VTs = CurDAG->getVTList(NVT, NVT, MVT::i32, MVT::Other);
4159 
4160  SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N1.getOperand(0),
4161  InFlag };
4162  CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops);
4163 
4164  // Update the chain.
4165  ReplaceUses(N1.getValue(1), SDValue(CNode, NVT == MVT::i8 ? 2 : 3));
4166  // Record the mem-refs
4167  CurDAG->setNodeMemRefs(CNode, {cast<LoadSDNode>(N1)->getMemOperand()});
4168  } else {
4169  // i16/i32/i64 use an instruction that produces a low and high result even
4170  // though only the low result is used.
4171  SDVTList VTs;
4172  if (NVT == MVT::i8)
4173  VTs = CurDAG->getVTList(NVT, MVT::i32);
4174  else
4175  VTs = CurDAG->getVTList(NVT, NVT, MVT::i32);
4176 
4177  CNode = CurDAG->getMachineNode(ROpc, dl, VTs, {N1, InFlag});
4178  }
4179 
4180  ReplaceUses(SDValue(Node, 0), SDValue(CNode, 0));
4181  ReplaceUses(SDValue(Node, 1), SDValue(CNode, NVT == MVT::i8 ? 1 : 2));
4182  CurDAG->RemoveDeadNode(Node);
4183  return;
4184  }
4185 
4186  case ISD::SMUL_LOHI:
4187  case ISD::UMUL_LOHI: {
4188  SDValue N0 = Node->getOperand(0);
4189  SDValue N1 = Node->getOperand(1);
4190 
4191  unsigned Opc, MOpc;
4192  bool isSigned = Opcode == ISD::SMUL_LOHI;
4193  if (!isSigned) {
4194  switch (NVT.SimpleTy) {
4195  default: llvm_unreachable("Unsupported VT!");
4196  case MVT::i32: Opc = X86::MUL32r; MOpc = X86::MUL32m; break;
4197  case MVT::i64: Opc = X86::MUL64r; MOpc = X86::MUL64m; break;
4198  }
4199  } else {
4200  switch (NVT.SimpleTy) {
4201  default: llvm_unreachable("Unsupported VT!");
4202  case MVT::i32: Opc = X86::IMUL32r; MOpc = X86::IMUL32m; break;
4203  case MVT::i64: Opc = X86::IMUL64r; MOpc = X86::IMUL64m; break;
4204  }
4205  }
4206 
4207  unsigned SrcReg, LoReg, HiReg;
4208  switch (Opc) {
4209  default: llvm_unreachable("Unknown MUL opcode!");
4210  case X86::IMUL32r:
4211  case X86::MUL32r:
4212  SrcReg = LoReg = X86::EAX; HiReg = X86::EDX;
4213  break;
4214  case X86::IMUL64r:
4215  case X86::MUL64r:
4216  SrcReg = LoReg = X86::RAX; HiReg = X86::RDX;
4217  break;
4218  }
4219 
4220  SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
4221  bool foldedLoad = tryFoldLoad(Node, N1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4);
4222  // Multiply is commmutative.
4223  if (!foldedLoad) {
4224  foldedLoad = tryFoldLoad(Node, N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4);
4225  if (foldedLoad)
4226  std::swap(N0, N1);
4227  }
4228 
4229  SDValue InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, SrcReg,
4230  N0, SDValue()).getValue(1);
4231  if (foldedLoad) {
4232  SDValue Chain;
4233  MachineSDNode *CNode = nullptr;
4234  SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N1.getOperand(0),
4235  InFlag };
4236  SDVTList VTs = CurDAG->getVTList(MVT::Other, MVT::Glue);
4237  CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops);
4238  Chain = SDValue(CNode, 0);
4239  InFlag = SDValue(CNode, 1);
4240 
4241  // Update the chain.
4242  ReplaceUses(N1.getValue(1), Chain);
4243  // Record the mem-refs
4244  CurDAG->setNodeMemRefs(CNode, {cast<LoadSDNode>(N1)->getMemOperand()});
4245  } else {
4246  SDValue Ops[] = { N1, InFlag };
4247  SDVTList VTs = CurDAG->getVTList(MVT::Glue);
4248  SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops);
4249  InFlag = SDValue(CNode, 0);
4250  }
4251 
4252  // Copy the low half of the result, if it is needed.
4253  if (!SDValue(Node, 0).use_empty()) {
4254  assert(LoReg && "Register for low half is not defined!");
4255  SDValue ResLo = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, LoReg,
4256  NVT, InFlag);
4257  InFlag = ResLo.getValue(2);
4258  ReplaceUses(SDValue(Node, 0), ResLo);
4259  LLVM_DEBUG(dbgs() << "=> "; ResLo.getNode()->dump(CurDAG);
4260  dbgs() << '\n');
4261  }
4262  // Copy the high half of the result, if it is needed.
4263  if (!SDValue(Node, 1).use_empty()) {
4264  assert(HiReg && "Register for high half is not defined!");
4265  SDValue ResHi = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, HiReg,
4266  NVT, InFlag);
4267  InFlag = ResHi.getValue(2);
4268  ReplaceUses(SDValue(Node, 1), ResHi);
4269  LLVM_DEBUG(dbgs() << "=> "; ResHi.getNode()->dump(CurDAG);
4270  dbgs() << '\n');
4271  }
4272 
4273  CurDAG->RemoveDeadNode(Node);
4274  return;
4275  }
4276 
4277  case ISD::SDIVREM:
4278  case ISD::UDIVREM: {
4279  SDValue N0 = Node->getOperand(0);
4280  SDValue N1 = Node->getOperand(1);
4281 
4282  unsigned Opc, MOpc;
4283  bool isSigned = Opcode == ISD::SDIVREM;
4284  if (!isSigned) {
4285  switch (NVT.SimpleTy) {
4286  default: llvm_unreachable("Unsupported VT!");
4287  case MVT::i8: Opc = X86::DIV8r; MOpc = X86::DIV8m; break;
4288  case MVT::i16: Opc = X86::DIV16r; MOpc = X86::DIV16m; break;
4289  case MVT::i32: Opc = X86::DIV32r; MOpc = X86::DIV32m; break;
4290  case MVT::i64: Opc = X86::DIV64r; MOpc = X86::DIV64m; break;
4291  }
4292  } else {
4293  switch (NVT.SimpleTy) {
4294  default: llvm_unreachable("Unsupported VT!");
4295  case MVT::i8: Opc = X86::IDIV8r; MOpc = X86::IDIV8m; break;
4296  case MVT::i16: Opc = X86::IDIV16r; MOpc = X86::IDIV16m; break;
4297  case MVT::i32: Opc = X86::IDIV32r; MOpc = X86::IDIV32m; break;
4298  case MVT::i64: Opc = X86::IDIV64r; MOpc = X86::IDIV64m; break;
4299  }
4300  }
4301 
4302  unsigned LoReg, HiReg, ClrReg;
4303  unsigned SExtOpcode;
4304  switch (NVT.SimpleTy) {
4305  default: llvm_unreachable("Unsupported VT!");
4306  case MVT::i8:
4307  LoReg = X86::AL; ClrReg = HiReg = X86::AH;
4308  SExtOpcode = X86::CBW;
4309  break;
4310  case MVT::i16:
4311  LoReg = X86::AX; HiReg = X86::DX;
4312  ClrReg = X86::DX;
4313  SExtOpcode = X86::CWD;
4314  break;
4315  case MVT::i32:
4316  LoReg = X86::EAX; ClrReg = HiReg = X86::EDX;
4317  SExtOpcode = X86::CDQ;
4318  break;
4319  case MVT::i64:
4320  LoReg = X86::RAX; ClrReg = HiReg = X86::RDX;
4321  SExtOpcode = X86::CQO;
4322  break;
4323  }
4324 
4325  SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
4326  bool foldedLoad = tryFoldLoad(Node, N1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4);
4327  bool signBitIsZero = CurDAG->SignBitIsZero(N0);
4328 
4329  SDValue InFlag;
4330  if (NVT == MVT::i8 && (!isSigned || signBitIsZero)) {
4331  // Special case for div8, just use a move with zero extension to AX to
4332  // clear the upper 8 bits (AH).
4333  SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Chain;
4334  MachineSDNode *Move;
4335  if (tryFoldLoad(Node, N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) {
4336  SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N0.getOperand(0) };
4337  Move = CurDAG->getMachineNode(X86::MOVZX32rm8, dl, MVT::i32,
4338  MVT::Other, Ops);
4339  Chain = SDValue(Move, 1);
4340  ReplaceUses(N0.getValue(1), Chain);
4341  // Record the mem-refs
4342  CurDAG->setNodeMemRefs(Move, {cast<LoadSDNode>(N0)->getMemOperand()});
4343  } else {
4344  Move = CurDAG->getMachineNode(X86::MOVZX32rr8, dl, MVT::i32, N0);
4345  Chain = CurDAG->getEntryNode();
4346  }
4347  Chain = CurDAG->getCopyToReg(Chain, dl, X86::EAX, SDValue(Move, 0),
4348  SDValue());
4349  InFlag = Chain.getValue(1);
4350  } else {
4351  InFlag =
4352  CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl,
4353  LoReg, N0, SDValue()).getValue(1);
4354  if (isSigned && !signBitIsZero) {
4355  // Sign extend the low part into the high part.
4356  InFlag =
4357  SDValue(CurDAG->getMachineNode(SExtOpcode, dl, MVT::Glue, InFlag),0);
4358  } else {
4359  // Zero out the high part, effectively zero extending the input.
4360  SDValue ClrNode = SDValue(CurDAG->getMachineNode(X86::MOV32r0, dl, NVT), 0);
4361  switch (NVT.SimpleTy) {
4362  case MVT::i16:
4363  ClrNode =
4364  SDValue(CurDAG->getMachineNode(
4365  TargetOpcode::EXTRACT_SUBREG, dl, MVT::i16, ClrNode,
4366  CurDAG->getTargetConstant(X86::sub_16bit, dl,
4367  MVT::i32)),
4368  0);
4369  break;
4370  case MVT::i32:
4371  break;
4372  case MVT::i64:
4373  ClrNode =
4374  SDValue(CurDAG->getMachineNode(
4375  TargetOpcode::SUBREG_TO_REG, dl, MVT::i64,
4376  CurDAG->getTargetConstant(0, dl, MVT::i64), ClrNode,
4377  CurDAG->getTargetConstant(X86::sub_32bit, dl,
4378  MVT::i32)),
4379  0);
4380  break;
4381  default:
4382  llvm_unreachable("Unexpected division source");
4383  }
4384 
4385  InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, ClrReg,
4386  ClrNode, InFlag).getValue(1);
4387  }
4388  }
4389 
4390  if (foldedLoad) {
4391  SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N1.getOperand(0),
4392  InFlag };
4393  MachineSDNode *CNode =
4394  CurDAG->getMachineNode(MOpc, dl, MVT::Other, MVT::Glue, Ops);
4395  InFlag = SDValue(CNode, 1);
4396  // Update the chain.
4397  ReplaceUses(N1.getValue(1), SDValue(CNode, 0));
4398  // Record the mem-refs
4399  CurDAG->setNodeMemRefs(CNode, {cast<LoadSDNode>(N1)->getMemOperand()});
4400  } else {
4401  InFlag =
4402  SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Glue, N1, InFlag), 0);
4403  }
4404 
4405  // Prevent use of AH in a REX instruction by explicitly copying it to
4406  // an ABCD_L register.
4407  //
4408  // The current assumption of the register allocator is that isel
4409  // won't generate explicit references to the GR8_ABCD_H registers. If
4410  // the allocator and/or the backend get enhanced to be more robust in
4411  // that regard, this can be, and should be, removed.
4412  if (HiReg == X86::AH && !SDValue(Node, 1).use_empty()) {
4413  SDValue AHCopy = CurDAG->getRegister(X86::AH, MVT::i8);
4414  unsigned AHExtOpcode =
4415  isSigned ? X86::MOVSX32rr8_NOREX : X86::MOVZX32rr8_NOREX;
4416 
4417  SDNode *RNode = CurDAG->getMachineNode(AHExtOpcode, dl, MVT::i32,
4418  MVT::Glue, AHCopy, InFlag);
4419  SDValue Result(RNode, 0);
4420  InFlag = SDValue(RNode, 1);
4421 
4422  Result =
4423  CurDAG->getTargetExtractSubreg(X86::sub_8bit, dl, MVT::i8, Result);
4424 
4425  ReplaceUses(SDValue(Node, 1), Result);
4426  LLVM_DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG);
4427  dbgs() << '\n');
4428  }
4429  // Copy the division (low) result, if it is needed.
4430  if (!SDValue(Node, 0).use_empty()) {
4431  SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
4432  LoReg, NVT, InFlag);
4433  InFlag = Result.getValue(2);
4434  ReplaceUses(SDValue(Node, 0), Result);
4435  LLVM_DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG);
4436  dbgs() << '\n');
4437  }
4438  // Copy the remainder (high) result, if it is needed.
4439  if (!SDValue(Node, 1).use_empty()) {
4440  SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
4441  HiReg, NVT, InFlag);
4442  InFlag = Result.getValue(2);
4443  ReplaceUses(SDValue(Node, 1), Result);
4444  LLVM_DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG);
4445  dbgs() << '\n');
4446  }
4447  CurDAG->RemoveDeadNode(Node);
4448  return;
4449  }
4450 
4451  case X86ISD::CMP: {
4452  SDValue N0 = Node->getOperand(0);
4453  SDValue N1 = Node->getOperand(1);
4454 
4455  // Optimizations for TEST compares.
4456  if (!isNullConstant(N1))
4457  break;
4458 
4459  // Save the original VT of the compare.
4460  MVT CmpVT = N0.getSimpleValueType();
4461 
4462  // If we are comparing (and (shr X, C, Mask) with 0, emit a BEXTR followed
4463  // by a test instruction. The test should be removed later by
4464  // analyzeCompare if we are using only the zero flag.
4465  // TODO: Should we check the users and use the BEXTR flags directly?
4466  if (N0.getOpcode() == ISD::AND && N0.hasOneUse()) {
4467  if (MachineSDNode *NewNode = matchBEXTRFromAndImm(N0.getNode())) {
4468  unsigned TestOpc = CmpVT == MVT::i64 ? X86::TEST64rr
4469  : X86::TEST32rr;
4470  SDValue BEXTR = SDValue(NewNode, 0);
4471  NewNode = CurDAG->getMachineNode(TestOpc, dl, MVT::i32, BEXTR, BEXTR);
4472  ReplaceUses(SDValue(Node, 0), SDValue(NewNode, 0));
4473  CurDAG->RemoveDeadNode(Node);
4474  return;
4475  }
4476  }
4477 
4478  // We can peek through truncates, but we need to be careful below.
4479  if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse())
4480  N0 = N0.getOperand(0);
4481 
4482  // Look for (X86cmp (and $op, $imm), 0) and see if we can convert it to
4483  // use a smaller encoding.
4484  // Look past the truncate if CMP is the only use of it.
4485  if (N0.getOpcode() == ISD::AND &&
4486  N0.getNode()->hasOneUse() &&
4487  N0.getValueType() != MVT::i8) {
4489  if (!C) break;
4490  uint64_t Mask = C->getZExtValue();
4491 
4492  // Check if we can replace AND+IMM64 with a shift. This is possible for
4493  // masks/ like 0xFF000000 or 0x00FFFFFF and if we care only about the zero
4494  // flag.
4495  if (CmpVT == MVT::i64 && !isInt<32>(Mask) &&
4496  onlyUsesZeroFlag(SDValue(Node, 0))) {
4497  if (isMask_64(~Mask)) {
4498  unsigned TrailingZeros = countTrailingZeros(Mask);
4499  SDValue Imm = CurDAG->getTargetConstant(TrailingZeros, dl, MVT::i64);
4500  SDValue Shift =
4501  SDValue(CurDAG->getMachineNode(X86::SHR64ri, dl, MVT::i64, MVT::i32,
4502  N0.getOperand(0), Imm), 0);
4503  MachineSDNode *Test = CurDAG->getMachineNode(X86::TEST64rr, dl,
4504  MVT::i32, Shift, Shift);
4505  ReplaceNode(Node, Test);
4506  return;
4507  }
4508  if (isMask_64(Mask)) {
4509  unsigned LeadingZeros = countLeadingZeros(Mask);
4510  SDValue Imm = CurDAG->getTargetConstant(LeadingZeros, dl, MVT::i64);
4511  SDValue Shift =
4512  SDValue(CurDAG->getMachineNode(X86::SHL64ri, dl, MVT::i64, MVT::i32,
4513  N0.getOperand(0), Imm), 0);
4514  MachineSDNode *Test = CurDAG->getMachineNode(X86::TEST64rr, dl,
4515  MVT::i32, Shift, Shift);
4516  ReplaceNode(Node, Test);
4517  return;
4518  }
4519  }
4520 
4521  MVT VT;
4522  int SubRegOp;
4523  unsigned ROpc, MOpc;
4524 
4525  // For each of these checks we need to be careful if the sign flag is
4526  // being used. It is only safe to use the sign flag in two conditions,
4527  // either the sign bit in the shrunken mask is zero or the final test
4528  // size is equal to the original compare size.
4529 
4530  if (isUInt<8>(Mask) &&
4531  (!(Mask & 0x80) || CmpVT == MVT::i8 ||
4532  hasNoSignFlagUses(SDValue(Node, 0)))) {
4533  // For example, convert "testl %eax, $8" to "testb %al, $8"
4534  VT = MVT::i8;
4535  SubRegOp = X86::sub_8bit;
4536  ROpc = X86::TEST8ri;
4537  MOpc = X86::TEST8mi;
4538  } else if (OptForMinSize && isUInt<16>(Mask) &&
4539  (!(Mask & 0x8000) || CmpVT == MVT::i16 ||
4540  hasNoSignFlagUses(SDValue(Node, 0)))) {
4541  // For example, "testl %eax, $32776" to "testw %ax, $32776".
4542  // NOTE: We only want to form TESTW instructions if optimizing for
4543  // min size. Otherwise we only save one byte and possibly get a length
4544  // changing prefix penalty in the decoders.
4545  VT = MVT::i16;
4546  SubRegOp = X86::sub_16bit;
4547  ROpc = X86::TEST16ri;
4548  MOpc = X86::TEST16mi;
4549  } else if (isUInt<32>(Mask) && N0.getValueType() != MVT::i16 &&
4550  ((!(Mask & 0x80000000) &&
4551  // Without minsize 16-bit Cmps can get here so we need to
4552  // be sure we calculate the correct sign flag if needed.
4553  (CmpVT != MVT::i16 || !(Mask & 0x8000))) ||
4554  CmpVT == MVT::i32 ||
4555  hasNoSignFlagUses(SDValue(Node, 0)))) {
4556  // For example, "testq %rax, $268468232" to "testl %eax, $268468232".
4557  // NOTE: We only want to run that transform if N0 is 32 or 64 bits.
4558  // Otherwize, we find ourselves in a position where we have to do
4559  // promotion. If previous passes did not promote the and, we assume
4560  // they had a good reason not to and do not promote here.
4561  VT = MVT::i32;
4562  SubRegOp = X86::sub_32bit;
4563  ROpc = X86::TEST32ri;
4564  MOpc = X86::TEST32mi;
4565  } else {
4566  // No eligible transformation was found.
4567  break;
4568  }
4569 
4570  SDValue Imm = CurDAG->getTargetConstant(Mask, dl, VT);
4571  SDValue Reg = N0.getOperand(0);
4572 
4573  // Emit a testl or testw.
4574  MachineSDNode *NewNode;
4575  SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
4576  if (tryFoldLoad(Node, N0.getNode(), Reg, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) {
4577  SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Imm,
4578  Reg.getOperand(0) };
4579  NewNode = CurDAG->getMachineNode(MOpc, dl, MVT::i32, MVT::Other, Ops);
4580  // Update the chain.
4581  ReplaceUses(Reg.getValue(1), SDValue(NewNode, 1));
4582  // Record the mem-refs
4583  CurDAG->setNodeMemRefs(NewNode,
4584  {cast<LoadSDNode>(Reg)->getMemOperand()});
4585  } else {
4586  // Extract the subregister if necessary.
4587  if (N0.getValueType() != VT)
4588  Reg = CurDAG->getTargetExtractSubreg(SubRegOp, dl, VT, Reg);
4589 
4590  NewNode = CurDAG->getMachineNode(ROpc, dl, MVT::i32, Reg, Imm);
4591  }
4592  // Replace CMP with TEST.
4593  ReplaceNode(Node, NewNode);
4594  return;
4595  }
4596  break;
4597  }
4598  case X86ISD::PCMPISTR: {
4599  if (!Subtarget->hasSSE42())
4600  break;
4601 
4602  bool NeedIndex = !SDValue(Node, 0).use_empty();
4603  bool NeedMask = !SDValue(Node, 1).use_empty();
4604  // We can't fold a load if we are going to make two instructions.
4605  bool MayFoldLoad = !NeedIndex || !NeedMask;
4606 
4607  MachineSDNode *CNode;
4608  if (NeedMask) {
4609  unsigned ROpc = Subtarget->hasAVX() ? X86::VPCMPISTRMrr : X86::PCMPISTRMrr;
4610  unsigned MOpc = Subtarget->hasAVX() ? X86::VPCMPISTRMrm : X86::PCMPISTRMrm;
4611  CNode = emitPCMPISTR(ROpc, MOpc, MayFoldLoad, dl, MVT::v16i8, Node);
4612  ReplaceUses(SDValue(Node, 1), SDValue(CNode, 0));
4613  }
4614  if (NeedIndex || !NeedMask) {
4615  unsigned ROpc = Subtarget->hasAVX() ? X86::VPCMPISTRIrr : X86::PCMPISTRIrr;
4616  unsigned MOpc = Subtarget->hasAVX() ? X86::VPCMPISTRIrm : X86::PCMPISTRIrm;
4617  CNode = emitPCMPISTR(ROpc, MOpc, MayFoldLoad, dl, MVT::i32, Node);
4618  ReplaceUses(SDValue(Node, 0), SDValue(CNode, 0));
4619  }
4620 
4621  // Connect the flag usage to the last instruction created.
4622  ReplaceUses(SDValue(Node, 2), SDValue(CNode, 1));
4623  CurDAG->RemoveDeadNode(Node);
4624  return;
4625  }
4626  case X86ISD::PCMPESTR: {
4627  if (!Subtarget->hasSSE42())
4628  break;
4629 
4630  // Copy the two implicit register inputs.
4631  SDValue InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, X86::EAX,
4632  Node->getOperand(1),
4633  SDValue()).getValue(1);
4634  InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, X86::EDX,
4635  Node->getOperand(3), InFlag).getValue(1);
4636 
4637  bool NeedIndex = !SDValue(Node, 0).use_empty();
4638  bool NeedMask = !SDValue(Node, 1).use_empty();
4639  // We can't fold a load if we are going to make two instructions.
4640  bool MayFoldLoad = !NeedIndex || !NeedMask;
4641 
4642  MachineSDNode *CNode;
4643  if (NeedMask) {
4644  unsigned ROpc = Subtarget->hasAVX() ? X86::VPCMPESTRMrr : X86::PCMPESTRMrr;
4645  unsigned MOpc = Subtarget->hasAVX() ? X86::VPCMPESTRMrm : X86::PCMPESTRMrm;
4646  CNode = emitPCMPESTR(ROpc, MOpc, MayFoldLoad, dl, MVT::v16i8, Node,
4647  InFlag);
4648  ReplaceUses(SDValue(Node, 1), SDValue(CNode, 0));
4649  }
4650  if (NeedIndex || !NeedMask) {
4651  unsigned ROpc = Subtarget->hasAVX() ? X86::VPCMPESTRIrr : X86::PCMPESTRIrr;
4652  unsigned MOpc = Subtarget->hasAVX() ? X86::VPCMPESTRIrm : X86::PCMPESTRIrm;
4653  CNode = emitPCMPESTR(ROpc, MOpc, MayFoldLoad, dl, MVT::i32, Node, InFlag);
4654  ReplaceUses(SDValue(Node, 0), SDValue(CNode, 0));
4655  }
4656  // Connect the flag usage to the last instruction created.
4657  ReplaceUses(SDValue(Node, 2), SDValue(CNode, 1));
4658  CurDAG->RemoveDeadNode(Node);
4659  return;
4660  }
4661 
4662  case ISD::SETCC: {
4663  if (NVT.isVector() && tryVPTESTM(Node, SDValue(Node, 0), SDValue()))
4664  return;
4665 
4666  break;
4667  }
4668 
4669  case ISD::STORE:
4670  if (foldLoadStoreIntoMemOperand(Node))
4671  return;
4672  break;
4673  }
4674 
4675  SelectCode(Node);
4676 }
4677 
4678 bool X86DAGToDAGISel::
4679 SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
4680  std::vector<SDValue> &OutOps) {
4681  SDValue Op0, Op1, Op2, Op3, Op4;
4682  switch (ConstraintID) {
4683  default:
4684  llvm_unreachable("Unexpected asm memory constraint");
4686  // FIXME: It seems strange that 'i' is needed here since it's supposed to
4687  // be an immediate and not a memory constraint.
4689  case InlineAsm::Constraint_o: // offsetable ??
4690  case InlineAsm::Constraint_v: // not offsetable ??
4691  case InlineAsm::Constraint_m: // memory
4693  if (!selectAddr(nullptr, Op, Op0, Op1, Op2, Op3, Op4))
4694  return true;
4695  break;
4696  }
4697 
4698  OutOps.push_back(Op0);
4699  OutOps.push_back(Op1);
4700  OutOps.push_back(Op2);
4701  OutOps.push_back(Op3);
4702  OutOps.push_back(Op4);
4703  return false;
4704 }
4705 
4706 /// This pass converts a legalized DAG into a X86-specific DAG,
4707 /// ready for instruction scheduling.
4709  CodeGenOpt::Level OptLevel) {
4710  return new X86DAGToDAGISel(TM, OptLevel);
4711 }
static bool foldMaskedShiftToScaledMask(SelectionDAG &DAG, SDValue N, X86ISelAddressMode &AM)
uint64_t CallInst * C
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:590
constexpr bool isUInt< 32 >(uint64_t x)
Definition: MathExtras.h:348
X = FP_ROUND(Y, TRUNC) - Rounding &#39;Y&#39; from a larger floating point type down to the precision of the ...
Definition: ISDOpcodes.h:557
static bool isCalleeLoad(SDValue Callee, SDValue &Chain, bool HasCallSeq)
Return true if call address is a load and it can be moved below CALLSEQ_START and the chains leading ...
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
static bool isFusableLoadOpStorePattern(StoreSDNode *StoreNode, SDValue StoredVal, SelectionDAG *CurDAG, unsigned LoadOpNo, LoadSDNode *&LoadNode, SDValue &InputChain)
Check whether or not the chain ending in StoreNode is suitable for doing the {load; op; store} to mod...
EVT getValueType() const
Return the ValueType of the referenced return value.
Vector comparison generating mask bits for fp and integer signed and unsigned data types...
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
const SDValue & getOffset() const
bool isUndef() const
APInt getSignedMax() const
Return the largest signed value contained in the ConstantRange.
GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2)
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
This class represents lattice values for constants.
Definition: AllocatorList.h:23
static void insertDAGNode(SelectionDAG &DAG, SDValue Pos, SDValue N)
static bool isLegalMaskCompare(SDNode *N, const X86Subtarget *Subtarget)
static MVT getVectorVT(MVT VT, unsigned NumElements)
C - The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
Tail call return.
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
Definition: MCSymbol.h:41
bool hasOptSize() const
Optimize this function for size (-Os) or minimum size (-Oz).
Definition: Function.h:603
bool isVector() const
Return true if this is a vector value type.
const SDValue & getBasePtr() const
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
APInt zext(unsigned width) const
Zero extend to a new width.
Definition: APInt.cpp:857
X86 conditional moves.
bool is256BitVector() const
Return true if this is a 256-bit vector type.
bool slt(const APInt &RHS) const
Signed less than comparison.
Definition: APInt.h:1203
unsigned Reg
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Get a value with low bits set.
Definition: APInt.h:647
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:252
bool hasFastBEXTR() const
Definition: X86Subtarget.h:654
unsigned getVectorNumElements() const
const SDValue & getChain() const
constexpr bool isInt< 8 >(int64_t x)
Definition: MathExtras.h:302
ISD::MemIndexedMode getAddressingMode() const
Return the addressing mode for this load or store: unindexed, pre-inc, pre-dec, post-inc, or post-dec.
TargetGlobalAddress - Like GlobalAddress, but the DAG does no folding or anything else with this node...
Definition: ISDOpcodes.h:130
unsigned getAlignment() const
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.h:323
bool isBitwiseNot(SDValue V)
Returns true if V is a bitwise not operation.
APInt trunc(unsigned width) const
Truncate to new width.
Definition: APInt.cpp:810
STATISTIC(NumFunctions, "Total number of functions")
MVT getSimpleValueType(unsigned ResNo) const
Return the type of a specified result as a simple type.
F(f)
void setNodeId(int Id)
Set unique node id.
SDNode * getNode() const
get the SDNode which holds the desired result
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1 at the ...
Definition: ISDOpcodes.h:372
std::size_t countLeadingZeros(T Val, ZeroBehavior ZB=ZB_Width)
Count number of 0&#39;s from the most significant bit to the least stopping at the first 1...
Definition: MathExtras.h:188
unsigned getValueSizeInBits() const
Returns the size of the value in bits.
static bool MayFoldLoad(SDValue Op)
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1508
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition: ISDOpcodes.h:158
GlobalBaseReg - On Darwin, this node represents the result of the mflr at function entry...
static bool hasPredecessorHelper(const SDNode *N, SmallPtrSetImpl< const SDNode *> &Visited, SmallVectorImpl< const SDNode *> &Worklist, unsigned int MaxSteps=0, bool TopologicalPrune=false)
Returns true if N is a predecessor of any node in Worklist.
unsigned getAddrSpace() const
Return the LLVM IR address space number that this pointer points into.
XOP - Opcode prefix used by XOP instructions.
Definition: X86BaseInfo.h:590
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition: ISDOpcodes.h:209
X86 compare and logical compare instructions.
void dump() const
Support for debugging, callable in GDB: V->dump()
Definition: AsmWriter.cpp:4362
The address of a basic block.
Definition: Constants.h:839
static cl::opt< bool > AndImmShrink("x86-and-imm-shrink", cl::init(true), cl::desc("Enable setting constant bits to reduce size of mask immediates"), cl::Hidden)
bool hasOneUse() const
Return true if there is exactly one use of this node.
A description of a memory reference used in the backend.
Dynamic (non-constant condition) vector blend where only the sign bits of the condition elements are ...
Shift and rotation operations.
Definition: ISDOpcodes.h:429
bool isNormalStore(const SDNode *N)
Returns true if the specified node is a non-truncating and unindexed store.
std::size_t countTrailingOnes(T Value, ZeroBehavior ZB=ZB_Width)
Count the number of ones from the least significant bit to the first zero bit.
Definition: MathExtras.h:477
bool isBuildVectorAllZeros(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are 0 or undef...
CallLoweringInfo & setChain(SDValue InChain)
CopyToReg - This node has three operands: a chain, a register number to set to this value...
Definition: ISDOpcodes.h:169
SymbolFlags
Symbol flags.
Definition: Symbol.h:24
unsigned SubReg
op_iterator op_end() const
uint64_t getConstantOperandVal(unsigned i) const
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
SimpleValueType SimpleTy
unsigned getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition: ValueTypes.h:303
CALLSEQ_START/CALLSEQ_END - These operators mark the beginning and end of a call sequence, and carry arbitrary information that target might want to know.
Definition: ISDOpcodes.h:739
Position
Position to insert a new instruction relative to an existing instruction.
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
This represents a list of ValueType&#39;s that has been intern&#39;d by a SelectionDAG.
bool hasVLX() const
Definition: X86Subtarget.h:675
unsigned getSizeInBits() const
bool runOnMachineFunction(MachineFunction &MF) override
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
bool hasExternalLinkage() const
Definition: GlobalValue.h:421
unsigned getActiveBits() const
Compute the number of active bits in the value.
Definition: APInt.h:1532
int64_t getSExtValue() const
static void InvalidateNodeId(SDNode *N)
unsigned getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:291
X86 FP SETCC, similar to above, but with output as an i1 mask and and a version with SAE...
constexpr bool isMask_64(uint64_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
Definition: MathExtras.h:410
#define T
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition: ISDOpcodes.h:448
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:200
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out...
Definition: ISDOpcodes.h:990
op_iterator op_begin() const
ArrayRef< SDUse > ops() const
MVT getVectorElementType() const
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
bool use_empty() const
Return true if there are no nodes using value ResNo of Node.
This class is used to represent ISD::STORE nodes.
static unsigned getVPTESTMOpc(MVT TestVT, bool IsTestN, bool FoldedLoad, bool FoldedBCast, bool Masked)
bool isOneConstant(SDValue V)
Returns true if V is a constant integer one.
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
bool is128BitVector() const
Return true if this is a 128-bit vector type.
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Get a value with high bits set.
Definition: APInt.h:635
#define P(N)
const SDValue & getBasePtr() const
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:432
bool isNormalLoad(const SDNode *N)
Returns true if the specified node is a non-extending and unindexed load.
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition: ISDOpcodes.h:165
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
constexpr bool isUInt< 8 >(uint64_t x)
Definition: MathExtras.h:342
std::size_t countTrailingZeros(T Val, ZeroBehavior ZB=ZB_Width)
Count number of 0&#39;s from the least significant bit to the most stopping at the first 1...
Definition: MathExtras.h:119
use_iterator use_begin() const
Provide iteration support to walk over all uses of an SDNode.
LOCAL_RECOVER - Represents the llvm.localrecover intrinsic.
Definition: ISDOpcodes.h:80
Machine Value Type.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
bool hasAnyUseOfValue(unsigned Value) const
Return true if there are any use of the indicated value.
bool isMachineOpcode() const
static bool mayUseCarryFlag(X86::CondCode CC)
unsigned getScalarSizeInBits() const
bool ult(const APInt &RHS) const
Unsigned less than comparison.
Definition: APInt.h:1184
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This is an important base class in LLVM.
Definition: Constant.h:41
const SDValue & getOperand(unsigned Num) const
X86 conditional branches.
bool isOperandOf(const SDNode *N) const
Return true if this node is an operand of N.
const SDValue & getOffset() const
APInt getUnsignedMax() const
Return the largest unsigned value contained in the ConstantRange.
static Type * getVoidTy(LLVMContext &C)
Definition: Type.cpp:160
This class provides iterator support for SDUse operands that use a specific SDNode.
unsigned getMachineOpcode() const
void RepositionNode(allnodes_iterator Position, SDNode *N)
Move node N in the AllNodes list to be immediately before the given iterator Position.
void ReplaceAllUsesWith(SDValue From, SDValue To)
Modify anything using &#39;From&#39; to use &#39;To&#39; instead.
const APInt & getAPIntValue() const
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:284
On Darwin, this node represents the result of the popl at function entry, used for PIC code...
void RemoveDeadNode(SDNode *N)
Remove the specified node from the system.
self_iterator getIterator()
Definition: ilist_node.h:81
bool hasNUsesOfValue(unsigned NUses, unsigned Value) const
Return true if there are exactly NUSES uses of the indicated value.
static bool foldMaskAndShiftToExtract(SelectionDAG &DAG, SDValue N, uint64_t Mask, SDValue Shift, SDValue X, X86ISelAddressMode &AM)
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:575
std::vector< ArgListEntry > ArgListTy
Extended Value Type.
Definition: ValueTypes.h:33
This structure contains all information that is necessary for lowering calls.
bool isVolatile() const
APInt getSignedMin() const
Return the smallest signed value contained in the ConstantRange.
This class contains a discriminated union of information about pointers in memory operands...
bool isMachineOpcode() const
Test if this node has a post-isel opcode, directly corresponding to a MachineInstr opcode...
unsigned getNumOperands() const
Return the number of values used by this operation.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
static SDValue Widen(SelectionDAG *CurDAG, SDValue N)
BaseType
A given derived pointer can have multiple base pointers through phi/selects.
bool use_empty() const
Return true if there are no uses of this node.
These operations represent an abstract X86 call instruction, which includes a bunch of information...
TokenFactor - This node takes multiple tokens as input and produces a single token result...
Definition: ISDOpcodes.h:49
void dump() const
Dump this node, for debugging.
static X86::CondCode getCondFromNode(SDNode *N)
Iterator for intrusive lists based on ilist_node.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements...
Definition: SmallPtrSet.h:417
unsigned countPopulation(T Value)
Count the number of set bits in a value.
Definition: MathExtras.h:519
This is the shared class of boolean and integer constants.
Definition: Constants.h:83
static bool foldMaskedShiftToBEXTR(SelectionDAG &DAG, SDValue N, uint64_t Mask, SDValue Shift, SDValue X, X86ISelAddressMode &AM, const X86Subtarget &Subtarget)
constexpr bool isInt< 32 >(int64_t x)
Definition: MathExtras.h:308
bool isScalarFPTypeInSSEReg(EVT VT) const
Return true if the specified scalar FP type is computed in an SSE register, not on the X87 floating p...
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition: ValueTypes.h:264
SDNode * UpdateNodeOperands(SDNode *N, SDValue Op)
Mutate the specified node in-place to have the specified operands.
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:221
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:841
const DataFlowGraph & G
Definition: RDFGraph.cpp:202
An SDNode that represents everything that will be needed to construct a MachineInstr.
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
SelectionDAGISel - This is the common base class used for SelectionDAG-based pattern-matching instruc...
const Function & getFunction() const
Return the LLVM function that this machine code represents.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:940
unsigned countTrailingOnes() const
Count the number of trailing one bits.
Definition: APInt.h:1645
static bool foldMaskAndShiftToScale(SelectionDAG &DAG, SDValue N, uint64_t Mask, SDValue Shift, SDValue X, X86ISelAddressMode &AM)
EVT getMemoryVT() const
Return the type of the in-memory value.
Target - Wrapper for Target specific information.
Class for arbitrary precision integers.
Definition: APInt.h:69
static unsigned getReg(const void *D, unsigned RC, unsigned RegNo)
bool sge(const APInt &RHS) const
Signed greater or equal comparison.
Definition: APInt.h:1308
static use_iterator use_end()
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:487
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:490
void append(in_iter in_start, in_iter in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:387
static bool hasSingleUsesFromRoot(SDNode *Root, SDNode *User)
bool is256BitVector() const
Return true if this is a 256-bit vector type.
Definition: ValueTypes.h:186
int getNodeId() const
Return the unique node id.
static bool isDispSafeForFrameIndex(int64_t Val)
std::pair< SDValue, SDValue > LowerCallTo(CallLoweringInfo &CLI) const
This function lowers an abstract call to a function into an actual call.
constexpr bool isShiftedMask_64(uint64_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (64 bit ver...
Definition: MathExtras.h:422
bool isVector() const
Return true if this is a vector value type.
Definition: ValueTypes.h:150
const SDValue & getValue() const
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:406
bool isOnlyUserOf(const SDNode *N) const
Return true if this node is the only use of N.
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition: ISDOpcodes.h:205
bool is128BitVector() const
Return true if this is a 128-bit vector type.
Definition: ValueTypes.h:181
A wrapper node for TargetConstantPool, TargetJumpTable, TargetExternalSymbol, TargetGlobalAddress, TargetGlobalTLSAddress, MCSymbol and TargetBlockAddress.
FunctionPass * createX86ISelDag(X86TargetMachine &TM, CodeGenOpt::Level OptLevel)
This pass converts a legalized DAG into a X86-specific DAG, ready for instruction scheduling...
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:637
StringRef getName() const
Return a constant reference to the value&#39;s name.
Definition: Value.cpp:214
#define I(x, y, z)
Definition: MD5.cpp:58
#define N
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition: Function.h:600
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:332
bool is512BitVector() const
Return true if this is a 512-bit vector type.
uint32_t Size
Definition: Profile.cpp:46
bool isNON_EXTLoad(const SDNode *N)
Returns true if the specified node is a non-extending load.
unsigned getOpcode() const