LLVM  9.0.0svn
X86ISelDAGToDAG.cpp
Go to the documentation of this file.
1 //===- X86ISelDAGToDAG.cpp - A DAG pattern matching inst selector for X86 -===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines a DAG pattern matching instruction selector for X86,
10 // converting from a legalized dag to a X86 dag.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "X86.h"
15 #include "X86MachineFunctionInfo.h"
16 #include "X86RegisterInfo.h"
17 #include "X86Subtarget.h"
18 #include "X86TargetMachine.h"
19 #include "llvm/ADT/Statistic.h"
23 #include "llvm/Config/llvm-config.h"
24 #include "llvm/IR/ConstantRange.h"
25 #include "llvm/IR/Function.h"
26 #include "llvm/IR/Instructions.h"
27 #include "llvm/IR/Intrinsics.h"
28 #include "llvm/IR/Type.h"
29 #include "llvm/Support/Debug.h"
31 #include "llvm/Support/KnownBits.h"
36 #include <stdint.h>
37 using namespace llvm;
38 
39 #define DEBUG_TYPE "x86-isel"
40 
41 STATISTIC(NumLoadMoved, "Number of loads moved below TokenFactor");
42 
43 static cl::opt<bool> AndImmShrink("x86-and-imm-shrink", cl::init(true),
44  cl::desc("Enable setting constant bits to reduce size of mask immediates"),
45  cl::Hidden);
46 
47 //===----------------------------------------------------------------------===//
48 // Pattern Matcher Implementation
49 //===----------------------------------------------------------------------===//
50 
51 namespace {
52  /// This corresponds to X86AddressMode, but uses SDValue's instead of register
53  /// numbers for the leaves of the matched tree.
54  struct X86ISelAddressMode {
55  enum {
56  RegBase,
57  FrameIndexBase
58  } BaseType;
59 
60  // This is really a union, discriminated by BaseType!
61  SDValue Base_Reg;
62  int Base_FrameIndex;
63 
64  unsigned Scale;
65  SDValue IndexReg;
66  int32_t Disp;
67  SDValue Segment;
68  const GlobalValue *GV;
69  const Constant *CP;
70  const BlockAddress *BlockAddr;
71  const char *ES;
72  MCSymbol *MCSym;
73  int JT;
74  unsigned Align; // CP alignment.
75  unsigned char SymbolFlags; // X86II::MO_*
76 
77  X86ISelAddressMode()
78  : BaseType(RegBase), Base_FrameIndex(0), Scale(1), IndexReg(), Disp(0),
79  Segment(), GV(nullptr), CP(nullptr), BlockAddr(nullptr), ES(nullptr),
80  MCSym(nullptr), JT(-1), Align(0), SymbolFlags(X86II::MO_NO_FLAG) {}
81 
82  bool hasSymbolicDisplacement() const {
83  return GV != nullptr || CP != nullptr || ES != nullptr ||
84  MCSym != nullptr || JT != -1 || BlockAddr != nullptr;
85  }
86 
87  bool hasBaseOrIndexReg() const {
88  return BaseType == FrameIndexBase ||
89  IndexReg.getNode() != nullptr || Base_Reg.getNode() != nullptr;
90  }
91 
92  /// Return true if this addressing mode is already RIP-relative.
93  bool isRIPRelative() const {
94  if (BaseType != RegBase) return false;
95  if (RegisterSDNode *RegNode =
96  dyn_cast_or_null<RegisterSDNode>(Base_Reg.getNode()))
97  return RegNode->getReg() == X86::RIP;
98  return false;
99  }
100 
101  void setBaseReg(SDValue Reg) {
102  BaseType = RegBase;
103  Base_Reg = Reg;
104  }
105 
106 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
107  void dump(SelectionDAG *DAG = nullptr) {
108  dbgs() << "X86ISelAddressMode " << this << '\n';
109  dbgs() << "Base_Reg ";
110  if (Base_Reg.getNode())
111  Base_Reg.getNode()->dump(DAG);
112  else
113  dbgs() << "nul\n";
114  if (BaseType == FrameIndexBase)
115  dbgs() << " Base.FrameIndex " << Base_FrameIndex << '\n';
116  dbgs() << " Scale " << Scale << '\n'
117  << "IndexReg ";
118  if (IndexReg.getNode())
119  IndexReg.getNode()->dump(DAG);
120  else
121  dbgs() << "nul\n";
122  dbgs() << " Disp " << Disp << '\n'
123  << "GV ";
124  if (GV)
125  GV->dump();
126  else
127  dbgs() << "nul";
128  dbgs() << " CP ";
129  if (CP)
130  CP->dump();
131  else
132  dbgs() << "nul";
133  dbgs() << '\n'
134  << "ES ";
135  if (ES)
136  dbgs() << ES;
137  else
138  dbgs() << "nul";
139  dbgs() << " MCSym ";
140  if (MCSym)
141  dbgs() << MCSym;
142  else
143  dbgs() << "nul";
144  dbgs() << " JT" << JT << " Align" << Align << '\n';
145  }
146 #endif
147  };
148 }
149 
150 namespace {
151  //===--------------------------------------------------------------------===//
152  /// ISel - X86-specific code to select X86 machine instructions for
153  /// SelectionDAG operations.
154  ///
155  class X86DAGToDAGISel final : public SelectionDAGISel {
156  /// Keep a pointer to the X86Subtarget around so that we can
157  /// make the right decision when generating code for different targets.
158  const X86Subtarget *Subtarget;
159 
160  /// If true, selector should try to optimize for code size instead of
161  /// performance.
162  bool OptForSize;
163 
164  /// If true, selector should try to optimize for minimum code size.
165  bool OptForMinSize;
166 
167  /// Disable direct TLS access through segment registers.
168  bool IndirectTlsSegRefs;
169 
170  public:
171  explicit X86DAGToDAGISel(X86TargetMachine &tm, CodeGenOpt::Level OptLevel)
172  : SelectionDAGISel(tm, OptLevel), OptForSize(false),
173  OptForMinSize(false) {}
174 
175  StringRef getPassName() const override {
176  return "X86 DAG->DAG Instruction Selection";
177  }
178 
179  bool runOnMachineFunction(MachineFunction &MF) override {
180  // Reset the subtarget each time through.
181  Subtarget = &MF.getSubtarget<X86Subtarget>();
182  IndirectTlsSegRefs = MF.getFunction().hasFnAttribute(
183  "indirect-tls-seg-refs");
185  return true;
186  }
187 
188  void EmitFunctionEntryCode() override;
189 
190  bool IsProfitableToFold(SDValue N, SDNode *U, SDNode *Root) const override;
191 
192  void PreprocessISelDAG() override;
193  void PostprocessISelDAG() override;
194 
195 // Include the pieces autogenerated from the target description.
196 #include "X86GenDAGISel.inc"
197 
198  private:
199  void Select(SDNode *N) override;
200 
201  bool foldOffsetIntoAddress(uint64_t Offset, X86ISelAddressMode &AM);
202  bool matchLoadInAddress(LoadSDNode *N, X86ISelAddressMode &AM);
203  bool matchWrapper(SDValue N, X86ISelAddressMode &AM);
204  bool matchAddress(SDValue N, X86ISelAddressMode &AM);
205  bool matchVectorAddress(SDValue N, X86ISelAddressMode &AM);
206  bool matchAdd(SDValue N, X86ISelAddressMode &AM, unsigned Depth);
207  bool matchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
208  unsigned Depth);
209  bool matchAddressBase(SDValue N, X86ISelAddressMode &AM);
210  bool selectAddr(SDNode *Parent, SDValue N, SDValue &Base,
211  SDValue &Scale, SDValue &Index, SDValue &Disp,
212  SDValue &Segment);
213  bool selectVectorAddr(SDNode *Parent, SDValue N, SDValue &Base,
214  SDValue &Scale, SDValue &Index, SDValue &Disp,
215  SDValue &Segment);
216  bool selectMOV64Imm32(SDValue N, SDValue &Imm);
217  bool selectLEAAddr(SDValue N, SDValue &Base,
218  SDValue &Scale, SDValue &Index, SDValue &Disp,
219  SDValue &Segment);
220  bool selectLEA64_32Addr(SDValue N, SDValue &Base,
221  SDValue &Scale, SDValue &Index, SDValue &Disp,
222  SDValue &Segment);
223  bool selectTLSADDRAddr(SDValue N, SDValue &Base,
224  SDValue &Scale, SDValue &Index, SDValue &Disp,
225  SDValue &Segment);
226  bool selectScalarSSELoad(SDNode *Root, SDNode *Parent, SDValue N,
227  SDValue &Base, SDValue &Scale,
228  SDValue &Index, SDValue &Disp,
229  SDValue &Segment,
230  SDValue &NodeWithChain);
231  bool selectRelocImm(SDValue N, SDValue &Op);
232 
233  bool tryFoldLoad(SDNode *Root, SDNode *P, SDValue N,
234  SDValue &Base, SDValue &Scale,
235  SDValue &Index, SDValue &Disp,
236  SDValue &Segment);
237 
238  // Convenience method where P is also root.
239  bool tryFoldLoad(SDNode *P, SDValue N,
240  SDValue &Base, SDValue &Scale,
241  SDValue &Index, SDValue &Disp,
242  SDValue &Segment) {
243  return tryFoldLoad(P, P, N, Base, Scale, Index, Disp, Segment);
244  }
245 
246  /// Implement addressing mode selection for inline asm expressions.
247  bool SelectInlineAsmMemoryOperand(const SDValue &Op,
248  unsigned ConstraintID,
249  std::vector<SDValue> &OutOps) override;
250 
251  void emitSpecialCodeForMain();
252 
253  inline void getAddressOperands(X86ISelAddressMode &AM, const SDLoc &DL,
254  SDValue &Base, SDValue &Scale,
255  SDValue &Index, SDValue &Disp,
256  SDValue &Segment) {
257  Base = (AM.BaseType == X86ISelAddressMode::FrameIndexBase)
258  ? CurDAG->getTargetFrameIndex(
259  AM.Base_FrameIndex,
260  TLI->getPointerTy(CurDAG->getDataLayout()))
261  : AM.Base_Reg;
262  Scale = getI8Imm(AM.Scale, DL);
263  Index = AM.IndexReg;
264  // These are 32-bit even in 64-bit mode since RIP-relative offset
265  // is 32-bit.
266  if (AM.GV)
267  Disp = CurDAG->getTargetGlobalAddress(AM.GV, SDLoc(),
268  MVT::i32, AM.Disp,
269  AM.SymbolFlags);
270  else if (AM.CP)
271  Disp = CurDAG->getTargetConstantPool(AM.CP, MVT::i32,
272  AM.Align, AM.Disp, AM.SymbolFlags);
273  else if (AM.ES) {
274  assert(!AM.Disp && "Non-zero displacement is ignored with ES.");
275  Disp = CurDAG->getTargetExternalSymbol(AM.ES, MVT::i32, AM.SymbolFlags);
276  } else if (AM.MCSym) {
277  assert(!AM.Disp && "Non-zero displacement is ignored with MCSym.");
278  assert(AM.SymbolFlags == 0 && "oo");
279  Disp = CurDAG->getMCSymbol(AM.MCSym, MVT::i32);
280  } else if (AM.JT != -1) {
281  assert(!AM.Disp && "Non-zero displacement is ignored with JT.");
282  Disp = CurDAG->getTargetJumpTable(AM.JT, MVT::i32, AM.SymbolFlags);
283  } else if (AM.BlockAddr)
284  Disp = CurDAG->getTargetBlockAddress(AM.BlockAddr, MVT::i32, AM.Disp,
285  AM.SymbolFlags);
286  else
287  Disp = CurDAG->getTargetConstant(AM.Disp, DL, MVT::i32);
288 
289  if (AM.Segment.getNode())
290  Segment = AM.Segment;
291  else
292  Segment = CurDAG->getRegister(0, MVT::i32);
293  }
294 
295  // Utility function to determine whether we should avoid selecting
296  // immediate forms of instructions for better code size or not.
297  // At a high level, we'd like to avoid such instructions when
298  // we have similar constants used within the same basic block
299  // that can be kept in a register.
300  //
301  bool shouldAvoidImmediateInstFormsForSize(SDNode *N) const {
302  uint32_t UseCount = 0;
303 
304  // Do not want to hoist if we're not optimizing for size.
305  // TODO: We'd like to remove this restriction.
306  // See the comment in X86InstrInfo.td for more info.
307  if (!OptForSize)
308  return false;
309 
310  // Walk all the users of the immediate.
311  for (SDNode::use_iterator UI = N->use_begin(),
312  UE = N->use_end(); (UI != UE) && (UseCount < 2); ++UI) {
313 
314  SDNode *User = *UI;
315 
316  // This user is already selected. Count it as a legitimate use and
317  // move on.
318  if (User->isMachineOpcode()) {
319  UseCount++;
320  continue;
321  }
322 
323  // We want to count stores of immediates as real uses.
324  if (User->getOpcode() == ISD::STORE &&
325  User->getOperand(1).getNode() == N) {
326  UseCount++;
327  continue;
328  }
329 
330  // We don't currently match users that have > 2 operands (except
331  // for stores, which are handled above)
332  // Those instruction won't match in ISEL, for now, and would
333  // be counted incorrectly.
334  // This may change in the future as we add additional instruction
335  // types.
336  if (User->getNumOperands() != 2)
337  continue;
338 
339  // Immediates that are used for offsets as part of stack
340  // manipulation should be left alone. These are typically
341  // used to indicate SP offsets for argument passing and
342  // will get pulled into stores/pushes (implicitly).
343  if (User->getOpcode() == X86ISD::ADD ||
344  User->getOpcode() == ISD::ADD ||
345  User->getOpcode() == X86ISD::SUB ||
346  User->getOpcode() == ISD::SUB) {
347 
348  // Find the other operand of the add/sub.
349  SDValue OtherOp = User->getOperand(0);
350  if (OtherOp.getNode() == N)
351  OtherOp = User->getOperand(1);
352 
353  // Don't count if the other operand is SP.
354  RegisterSDNode *RegNode;
355  if (OtherOp->getOpcode() == ISD::CopyFromReg &&
356  (RegNode = dyn_cast_or_null<RegisterSDNode>(
357  OtherOp->getOperand(1).getNode())))
358  if ((RegNode->getReg() == X86::ESP) ||
359  (RegNode->getReg() == X86::RSP))
360  continue;
361  }
362 
363  // ... otherwise, count this and move on.
364  UseCount++;
365  }
366 
367  // If we have more than 1 use, then recommend for hoisting.
368  return (UseCount > 1);
369  }
370 
371  /// Return a target constant with the specified value of type i8.
372  inline SDValue getI8Imm(unsigned Imm, const SDLoc &DL) {
373  return CurDAG->getTargetConstant(Imm, DL, MVT::i8);
374  }
375 
376  /// Return a target constant with the specified value, of type i32.
377  inline SDValue getI32Imm(unsigned Imm, const SDLoc &DL) {
378  return CurDAG->getTargetConstant(Imm, DL, MVT::i32);
379  }
380 
381  /// Return a target constant with the specified value, of type i64.
382  inline SDValue getI64Imm(uint64_t Imm, const SDLoc &DL) {
383  return CurDAG->getTargetConstant(Imm, DL, MVT::i64);
384  }
385 
386  SDValue getExtractVEXTRACTImmediate(SDNode *N, unsigned VecWidth,
387  const SDLoc &DL) {
388  assert((VecWidth == 128 || VecWidth == 256) && "Unexpected vector width");
389  uint64_t Index = N->getConstantOperandVal(1);
390  MVT VecVT = N->getOperand(0).getSimpleValueType();
391  return getI8Imm((Index * VecVT.getScalarSizeInBits()) / VecWidth, DL);
392  }
393 
394  SDValue getInsertVINSERTImmediate(SDNode *N, unsigned VecWidth,
395  const SDLoc &DL) {
396  assert((VecWidth == 128 || VecWidth == 256) && "Unexpected vector width");
397  uint64_t Index = N->getConstantOperandVal(2);
398  MVT VecVT = N->getSimpleValueType(0);
399  return getI8Imm((Index * VecVT.getScalarSizeInBits()) / VecWidth, DL);
400  }
401 
402  /// Return an SDNode that returns the value of the global base register.
403  /// Output instructions required to initialize the global base register,
404  /// if necessary.
405  SDNode *getGlobalBaseReg();
406 
407  /// Return a reference to the TargetMachine, casted to the target-specific
408  /// type.
409  const X86TargetMachine &getTargetMachine() const {
410  return static_cast<const X86TargetMachine &>(TM);
411  }
412 
413  /// Return a reference to the TargetInstrInfo, casted to the target-specific
414  /// type.
415  const X86InstrInfo *getInstrInfo() const {
416  return Subtarget->getInstrInfo();
417  }
418 
419  /// Address-mode matching performs shift-of-and to and-of-shift
420  /// reassociation in order to expose more scaled addressing
421  /// opportunities.
422  bool ComplexPatternFuncMutatesDAG() const override {
423  return true;
424  }
425 
426  bool isSExtAbsoluteSymbolRef(unsigned Width, SDNode *N) const;
427 
428  /// Returns whether this is a relocatable immediate in the range
429  /// [-2^Width .. 2^Width-1].
430  template <unsigned Width> bool isSExtRelocImm(SDNode *N) const {
431  if (auto *CN = dyn_cast<ConstantSDNode>(N))
432  return isInt<Width>(CN->getSExtValue());
433  return isSExtAbsoluteSymbolRef(Width, N);
434  }
435 
436  // Indicates we should prefer to use a non-temporal load for this load.
437  bool useNonTemporalLoad(LoadSDNode *N) const {
438  if (!N->isNonTemporal())
439  return false;
440 
441  unsigned StoreSize = N->getMemoryVT().getStoreSize();
442 
443  if (N->getAlignment() < StoreSize)
444  return false;
445 
446  switch (StoreSize) {
447  default: llvm_unreachable("Unsupported store size");
448  case 4:
449  case 8:
450  return false;
451  case 16:
452  return Subtarget->hasSSE41();
453  case 32:
454  return Subtarget->hasAVX2();
455  case 64:
456  return Subtarget->hasAVX512();
457  }
458  }
459 
460  bool foldLoadStoreIntoMemOperand(SDNode *Node);
461  MachineSDNode *matchBEXTRFromAndImm(SDNode *Node);
462  bool matchBitExtract(SDNode *Node);
463  bool shrinkAndImmediate(SDNode *N);
464  bool isMaskZeroExtended(SDNode *N) const;
465  bool tryShiftAmountMod(SDNode *N);
466 
467  MachineSDNode *emitPCMPISTR(unsigned ROpc, unsigned MOpc, bool MayFoldLoad,
468  const SDLoc &dl, MVT VT, SDNode *Node);
469  MachineSDNode *emitPCMPESTR(unsigned ROpc, unsigned MOpc, bool MayFoldLoad,
470  const SDLoc &dl, MVT VT, SDNode *Node,
471  SDValue &InFlag);
472 
473  bool tryOptimizeRem8Extend(SDNode *N);
474 
475  bool onlyUsesZeroFlag(SDValue Flags) const;
476  bool hasNoSignFlagUses(SDValue Flags) const;
477  bool hasNoCarryFlagUses(SDValue Flags) const;
478  };
479 }
480 
481 
482 // Returns true if this masked compare can be implemented legally with this
483 // type.
484 static bool isLegalMaskCompare(SDNode *N, const X86Subtarget *Subtarget) {
485  unsigned Opcode = N->getOpcode();
486  if (Opcode == X86ISD::CMPM || Opcode == ISD::SETCC ||
487  Opcode == X86ISD::CMPM_RND || Opcode == X86ISD::VFPCLASS) {
488  // We can get 256-bit 8 element types here without VLX being enabled. When
489  // this happens we will use 512-bit operations and the mask will not be
490  // zero extended.
491  EVT OpVT = N->getOperand(0).getValueType();
492  if (OpVT.is256BitVector() || OpVT.is128BitVector())
493  return Subtarget->hasVLX();
494 
495  return true;
496  }
497  // Scalar opcodes use 128 bit registers, but aren't subject to the VLX check.
498  if (Opcode == X86ISD::VFPCLASSS || Opcode == X86ISD::FSETCCM ||
499  Opcode == X86ISD::FSETCCM_RND)
500  return true;
501 
502  return false;
503 }
504 
505 // Returns true if we can assume the writer of the mask has zero extended it
506 // for us.
507 bool X86DAGToDAGISel::isMaskZeroExtended(SDNode *N) const {
508  // If this is an AND, check if we have a compare on either side. As long as
509  // one side guarantees the mask is zero extended, the AND will preserve those
510  // zeros.
511  if (N->getOpcode() == ISD::AND)
512  return isLegalMaskCompare(N->getOperand(0).getNode(), Subtarget) ||
513  isLegalMaskCompare(N->getOperand(1).getNode(), Subtarget);
514 
515  return isLegalMaskCompare(N, Subtarget);
516 }
517 
518 bool
519 X86DAGToDAGISel::IsProfitableToFold(SDValue N, SDNode *U, SDNode *Root) const {
520  if (OptLevel == CodeGenOpt::None) return false;
521 
522  if (!N.hasOneUse())
523  return false;
524 
525  if (N.getOpcode() != ISD::LOAD)
526  return true;
527 
528  // Don't fold non-temporal loads if we have an instruction for them.
529  if (useNonTemporalLoad(cast<LoadSDNode>(N)))
530  return false;
531 
532  // If N is a load, do additional profitability checks.
533  if (U == Root) {
534  switch (U->getOpcode()) {
535  default: break;
536  case X86ISD::ADD:
537  case X86ISD::ADC:
538  case X86ISD::SUB:
539  case X86ISD::SBB:
540  case X86ISD::AND:
541  case X86ISD::XOR:
542  case X86ISD::OR:
543  case ISD::ADD:
544  case ISD::ADDCARRY:
545  case ISD::AND:
546  case ISD::OR:
547  case ISD::XOR: {
548  SDValue Op1 = U->getOperand(1);
549 
550  // If the other operand is a 8-bit immediate we should fold the immediate
551  // instead. This reduces code size.
552  // e.g.
553  // movl 4(%esp), %eax
554  // addl $4, %eax
555  // vs.
556  // movl $4, %eax
557  // addl 4(%esp), %eax
558  // The former is 2 bytes shorter. In case where the increment is 1, then
559  // the saving can be 4 bytes (by using incl %eax).
560  if (ConstantSDNode *Imm = dyn_cast<ConstantSDNode>(Op1)) {
561  if (Imm->getAPIntValue().isSignedIntN(8))
562  return false;
563 
564  // If this is a 64-bit AND with an immediate that fits in 32-bits,
565  // prefer using the smaller and over folding the load. This is needed to
566  // make sure immediates created by shrinkAndImmediate are always folded.
567  // Ideally we would narrow the load during DAG combine and get the
568  // best of both worlds.
569  if (U->getOpcode() == ISD::AND &&
570  Imm->getAPIntValue().getBitWidth() == 64 &&
571  Imm->getAPIntValue().isIntN(32))
572  return false;
573  }
574 
575  // If the other operand is a TLS address, we should fold it instead.
576  // This produces
577  // movl %gs:0, %eax
578  // leal i@NTPOFF(%eax), %eax
579  // instead of
580  // movl $i@NTPOFF, %eax
581  // addl %gs:0, %eax
582  // if the block also has an access to a second TLS address this will save
583  // a load.
584  // FIXME: This is probably also true for non-TLS addresses.
585  if (Op1.getOpcode() == X86ISD::Wrapper) {
586  SDValue Val = Op1.getOperand(0);
588  return false;
589  }
590 
591  // Don't fold load if this matches the BTS/BTR/BTC patterns.
592  // BTS: (or X, (shl 1, n))
593  // BTR: (and X, (rotl -2, n))
594  // BTC: (xor X, (shl 1, n))
595  if (U->getOpcode() == ISD::OR || U->getOpcode() == ISD::XOR) {
596  if (U->getOperand(0).getOpcode() == ISD::SHL &&
598  return false;
599 
600  if (U->getOperand(1).getOpcode() == ISD::SHL &&
602  return false;
603  }
604  if (U->getOpcode() == ISD::AND) {
605  SDValue U0 = U->getOperand(0);
606  SDValue U1 = U->getOperand(1);
607  if (U0.getOpcode() == ISD::ROTL) {
608  auto *C = dyn_cast<ConstantSDNode>(U0.getOperand(0));
609  if (C && C->getSExtValue() == -2)
610  return false;
611  }
612 
613  if (U1.getOpcode() == ISD::ROTL) {
614  auto *C = dyn_cast<ConstantSDNode>(U1.getOperand(0));
615  if (C && C->getSExtValue() == -2)
616  return false;
617  }
618  }
619 
620  break;
621  }
622  case ISD::SHL:
623  case ISD::SRA:
624  case ISD::SRL:
625  // Don't fold a load into a shift by immediate. The BMI2 instructions
626  // support folding a load, but not an immediate. The legacy instructions
627  // support folding an immediate, but can't fold a load. Folding an
628  // immediate is preferable to folding a load.
629  if (isa<ConstantSDNode>(U->getOperand(1)))
630  return false;
631 
632  break;
633  }
634  }
635 
636  // Prevent folding a load if this can implemented with an insert_subreg or
637  // a move that implicitly zeroes.
638  if (Root->getOpcode() == ISD::INSERT_SUBVECTOR &&
639  isNullConstant(Root->getOperand(2)) &&
640  (Root->getOperand(0).isUndef() ||
642  return false;
643 
644  return true;
645 }
646 
647 /// Replace the original chain operand of the call with
648 /// load's chain operand and move load below the call's chain operand.
650  SDValue Call, SDValue OrigChain) {
652  SDValue Chain = OrigChain.getOperand(0);
653  if (Chain.getNode() == Load.getNode())
654  Ops.push_back(Load.getOperand(0));
655  else {
656  assert(Chain.getOpcode() == ISD::TokenFactor &&
657  "Unexpected chain operand");
658  for (unsigned i = 0, e = Chain.getNumOperands(); i != e; ++i)
659  if (Chain.getOperand(i).getNode() == Load.getNode())
660  Ops.push_back(Load.getOperand(0));
661  else
662  Ops.push_back(Chain.getOperand(i));
663  SDValue NewChain =
664  CurDAG->getNode(ISD::TokenFactor, SDLoc(Load), MVT::Other, Ops);
665  Ops.clear();
666  Ops.push_back(NewChain);
667  }
668  Ops.append(OrigChain->op_begin() + 1, OrigChain->op_end());
669  CurDAG->UpdateNodeOperands(OrigChain.getNode(), Ops);
670  CurDAG->UpdateNodeOperands(Load.getNode(), Call.getOperand(0),
671  Load.getOperand(1), Load.getOperand(2));
672 
673  Ops.clear();
674  Ops.push_back(SDValue(Load.getNode(), 1));
675  Ops.append(Call->op_begin() + 1, Call->op_end());
676  CurDAG->UpdateNodeOperands(Call.getNode(), Ops);
677 }
678 
679 /// Return true if call address is a load and it can be
680 /// moved below CALLSEQ_START and the chains leading up to the call.
681 /// Return the CALLSEQ_START by reference as a second output.
682 /// In the case of a tail call, there isn't a callseq node between the call
683 /// chain and the load.
684 static bool isCalleeLoad(SDValue Callee, SDValue &Chain, bool HasCallSeq) {
685  // The transformation is somewhat dangerous if the call's chain was glued to
686  // the call. After MoveBelowOrigChain the load is moved between the call and
687  // the chain, this can create a cycle if the load is not folded. So it is
688  // *really* important that we are sure the load will be folded.
689  if (Callee.getNode() == Chain.getNode() || !Callee.hasOneUse())
690  return false;
691  LoadSDNode *LD = dyn_cast<LoadSDNode>(Callee.getNode());
692  if (!LD ||
693  LD->isVolatile() ||
696  return false;
697 
698  // Now let's find the callseq_start.
699  while (HasCallSeq && Chain.getOpcode() != ISD::CALLSEQ_START) {
700  if (!Chain.hasOneUse())
701  return false;
702  Chain = Chain.getOperand(0);
703  }
704 
705  if (!Chain.getNumOperands())
706  return false;
707  // Since we are not checking for AA here, conservatively abort if the chain
708  // writes to memory. It's not safe to move the callee (a load) across a store.
709  if (isa<MemSDNode>(Chain.getNode()) &&
710  cast<MemSDNode>(Chain.getNode())->writeMem())
711  return false;
712  if (Chain.getOperand(0).getNode() == Callee.getNode())
713  return true;
714  if (Chain.getOperand(0).getOpcode() == ISD::TokenFactor &&
715  Callee.getValue(1).isOperandOf(Chain.getOperand(0).getNode()) &&
716  Callee.getValue(1).hasOneUse())
717  return true;
718  return false;
719 }
720 
721 void X86DAGToDAGISel::PreprocessISelDAG() {
722  // OptFor[Min]Size are used in pattern predicates that isel is matching.
723  OptForSize = MF->getFunction().optForSize();
724  OptForMinSize = MF->getFunction().optForMinSize();
725  assert((!OptForMinSize || OptForSize) && "OptForMinSize implies OptForSize");
726 
727  for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(),
728  E = CurDAG->allnodes_end(); I != E; ) {
729  SDNode *N = &*I++; // Preincrement iterator to avoid invalidation issues.
730 
731  // If this is a target specific AND node with no flag usages, turn it back
732  // into ISD::AND to enable test instruction matching.
733  if (N->getOpcode() == X86ISD::AND && !N->hasAnyUseOfValue(1)) {
734  SDValue Res = CurDAG->getNode(ISD::AND, SDLoc(N), N->getValueType(0),
735  N->getOperand(0), N->getOperand(1));
736  --I;
737  CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Res);
738  ++I;
739  CurDAG->DeleteNode(N);
740  continue;
741  }
742 
743  if (OptLevel != CodeGenOpt::None &&
744  // Only do this when the target can fold the load into the call or
745  // jmp.
746  !Subtarget->useRetpolineIndirectCalls() &&
747  ((N->getOpcode() == X86ISD::CALL && !Subtarget->slowTwoMemOps()) ||
748  (N->getOpcode() == X86ISD::TC_RETURN &&
749  (Subtarget->is64Bit() ||
750  !getTargetMachine().isPositionIndependent())))) {
751  /// Also try moving call address load from outside callseq_start to just
752  /// before the call to allow it to be folded.
753  ///
754  /// [Load chain]
755  /// ^
756  /// |
757  /// [Load]
758  /// ^ ^
759  /// | |
760  /// / \--
761  /// / |
762  ///[CALLSEQ_START] |
763  /// ^ |
764  /// | |
765  /// [LOAD/C2Reg] |
766  /// | |
767  /// \ /
768  /// \ /
769  /// [CALL]
770  bool HasCallSeq = N->getOpcode() == X86ISD::CALL;
771  SDValue Chain = N->getOperand(0);
772  SDValue Load = N->getOperand(1);
773  if (!isCalleeLoad(Load, Chain, HasCallSeq))
774  continue;
775  moveBelowOrigChain(CurDAG, Load, SDValue(N, 0), Chain);
776  ++NumLoadMoved;
777  continue;
778  }
779 
780  // Lower fpround and fpextend nodes that target the FP stack to be store and
781  // load to the stack. This is a gross hack. We would like to simply mark
782  // these as being illegal, but when we do that, legalize produces these when
783  // it expands calls, then expands these in the same legalize pass. We would
784  // like dag combine to be able to hack on these between the call expansion
785  // and the node legalization. As such this pass basically does "really
786  // late" legalization of these inline with the X86 isel pass.
787  // FIXME: This should only happen when not compiled with -O0.
788  if (N->getOpcode() != ISD::FP_ROUND && N->getOpcode() != ISD::FP_EXTEND)
789  continue;
790 
791  MVT SrcVT = N->getOperand(0).getSimpleValueType();
792  MVT DstVT = N->getSimpleValueType(0);
793 
794  // If any of the sources are vectors, no fp stack involved.
795  if (SrcVT.isVector() || DstVT.isVector())
796  continue;
797 
798  // If the source and destination are SSE registers, then this is a legal
799  // conversion that should not be lowered.
800  const X86TargetLowering *X86Lowering =
801  static_cast<const X86TargetLowering *>(TLI);
802  bool SrcIsSSE = X86Lowering->isScalarFPTypeInSSEReg(SrcVT);
803  bool DstIsSSE = X86Lowering->isScalarFPTypeInSSEReg(DstVT);
804  if (SrcIsSSE && DstIsSSE)
805  continue;
806 
807  if (!SrcIsSSE && !DstIsSSE) {
808  // If this is an FPStack extension, it is a noop.
809  if (N->getOpcode() == ISD::FP_EXTEND)
810  continue;
811  // If this is a value-preserving FPStack truncation, it is a noop.
812  if (N->getConstantOperandVal(1))
813  continue;
814  }
815 
816  // Here we could have an FP stack truncation or an FPStack <-> SSE convert.
817  // FPStack has extload and truncstore. SSE can fold direct loads into other
818  // operations. Based on this, decide what we want to do.
819  MVT MemVT;
820  if (N->getOpcode() == ISD::FP_ROUND)
821  MemVT = DstVT; // FP_ROUND must use DstVT, we can't do a 'trunc load'.
822  else
823  MemVT = SrcIsSSE ? SrcVT : DstVT;
824 
825  SDValue MemTmp = CurDAG->CreateStackTemporary(MemVT);
826  SDLoc dl(N);
827 
828  // FIXME: optimize the case where the src/dest is a load or store?
829  SDValue Store =
830  CurDAG->getTruncStore(CurDAG->getEntryNode(), dl, N->getOperand(0),
831  MemTmp, MachinePointerInfo(), MemVT);
832  SDValue Result = CurDAG->getExtLoad(ISD::EXTLOAD, dl, DstVT, Store, MemTmp,
833  MachinePointerInfo(), MemVT);
834 
835  // We're about to replace all uses of the FP_ROUND/FP_EXTEND with the
836  // extload we created. This will cause general havok on the dag because
837  // anything below the conversion could be folded into other existing nodes.
838  // To avoid invalidating 'I', back it up to the convert node.
839  --I;
840  CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
841 
842  // Now that we did that, the node is dead. Increment the iterator to the
843  // next node to process, then delete N.
844  ++I;
845  CurDAG->DeleteNode(N);
846  }
847 }
848 
849 // Look for a redundant movzx/movsx that can occur after an 8-bit divrem.
850 bool X86DAGToDAGISel::tryOptimizeRem8Extend(SDNode *N) {
851  unsigned Opc = N->getMachineOpcode();
852  if (Opc != X86::MOVZX32rr8 && Opc != X86::MOVSX32rr8 &&
853  Opc != X86::MOVSX64rr8)
854  return false;
855 
856  SDValue N0 = N->getOperand(0);
857 
858  // We need to be extracting the lower bit of an extend.
859  if (!N0.isMachineOpcode() ||
860  N0.getMachineOpcode() != TargetOpcode::EXTRACT_SUBREG ||
861  N0.getConstantOperandVal(1) != X86::sub_8bit)
862  return false;
863 
864  // We're looking for either a movsx or movzx to match the original opcode.
865  unsigned ExpectedOpc = Opc == X86::MOVZX32rr8 ? X86::MOVZX32rr8_NOREX
866  : X86::MOVSX32rr8_NOREX;
867  SDValue N00 = N0.getOperand(0);
868  if (!N00.isMachineOpcode() || N00.getMachineOpcode() != ExpectedOpc)
869  return false;
870 
871  if (Opc == X86::MOVSX64rr8) {
872  // If we had a sign extend from 8 to 64 bits. We still need to go from 32
873  // to 64.
874  MachineSDNode *Extend = CurDAG->getMachineNode(X86::MOVSX64rr32, SDLoc(N),
875  MVT::i64, N00);
876  ReplaceUses(N, Extend);
877  } else {
878  // Ok we can drop this extend and just use the original extend.
879  ReplaceUses(N, N00.getNode());
880  }
881 
882  return true;
883 }
884 
885 void X86DAGToDAGISel::PostprocessISelDAG() {
886  // Skip peepholes at -O0.
887  if (TM.getOptLevel() == CodeGenOpt::None)
888  return;
889 
890  SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end();
891 
892  bool MadeChange = false;
893  while (Position != CurDAG->allnodes_begin()) {
894  SDNode *N = &*--Position;
895  // Skip dead nodes and any non-machine opcodes.
896  if (N->use_empty() || !N->isMachineOpcode())
897  continue;
898 
899  if (tryOptimizeRem8Extend(N)) {
900  MadeChange = true;
901  continue;
902  }
903 
904  // Look for a TESTrr+ANDrr pattern where both operands of the test are
905  // the same. Rewrite to remove the AND.
906  unsigned Opc = N->getMachineOpcode();
907  if ((Opc == X86::TEST8rr || Opc == X86::TEST16rr ||
908  Opc == X86::TEST32rr || Opc == X86::TEST64rr) &&
909  N->getOperand(0) == N->getOperand(1) &&
910  N->isOnlyUserOf(N->getOperand(0).getNode()) &&
911  N->getOperand(0).isMachineOpcode()) {
912  SDValue And = N->getOperand(0);
913  unsigned N0Opc = And.getMachineOpcode();
914  if (N0Opc == X86::AND8rr || N0Opc == X86::AND16rr ||
915  N0Opc == X86::AND32rr || N0Opc == X86::AND64rr) {
916  MachineSDNode *Test = CurDAG->getMachineNode(Opc, SDLoc(N),
917  MVT::i32,
918  And.getOperand(0),
919  And.getOperand(1));
920  ReplaceUses(N, Test);
921  MadeChange = true;
922  continue;
923  }
924  if (N0Opc == X86::AND8rm || N0Opc == X86::AND16rm ||
925  N0Opc == X86::AND32rm || N0Opc == X86::AND64rm) {
926  unsigned NewOpc;
927  switch (N0Opc) {
928  case X86::AND8rm: NewOpc = X86::TEST8mr; break;
929  case X86::AND16rm: NewOpc = X86::TEST16mr; break;
930  case X86::AND32rm: NewOpc = X86::TEST32mr; break;
931  case X86::AND64rm: NewOpc = X86::TEST64mr; break;
932  }
933 
934  // Need to swap the memory and register operand.
935  SDValue Ops[] = { And.getOperand(1),
936  And.getOperand(2),
937  And.getOperand(3),
938  And.getOperand(4),
939  And.getOperand(5),
940  And.getOperand(0),
941  And.getOperand(6) /* Chain */ };
942  MachineSDNode *Test = CurDAG->getMachineNode(NewOpc, SDLoc(N),
943  MVT::i32, MVT::Other, Ops);
944  ReplaceUses(N, Test);
945  MadeChange = true;
946  continue;
947  }
948  }
949 
950  // Look for a KAND+KORTEST and turn it into KTEST if only the zero flag is
951  // used. We're doing this late so we can prefer to fold the AND into masked
952  // comparisons. Doing that can be better for the live range of the mask
953  // register.
954  if ((Opc == X86::KORTESTBrr || Opc == X86::KORTESTWrr ||
955  Opc == X86::KORTESTDrr || Opc == X86::KORTESTQrr) &&
956  N->getOperand(0) == N->getOperand(1) &&
957  N->isOnlyUserOf(N->getOperand(0).getNode()) &&
958  N->getOperand(0).isMachineOpcode() &&
959  onlyUsesZeroFlag(SDValue(N, 0))) {
960  SDValue And = N->getOperand(0);
961  unsigned N0Opc = And.getMachineOpcode();
962  // KANDW is legal with AVX512F, but KTESTW requires AVX512DQ. The other
963  // KAND instructions and KTEST use the same ISA feature.
964  if (N0Opc == X86::KANDBrr ||
965  (N0Opc == X86::KANDWrr && Subtarget->hasDQI()) ||
966  N0Opc == X86::KANDDrr || N0Opc == X86::KANDQrr) {
967  unsigned NewOpc;
968  switch (Opc) {
969  default: llvm_unreachable("Unexpected opcode!");
970  case X86::KORTESTBrr: NewOpc = X86::KTESTBrr; break;
971  case X86::KORTESTWrr: NewOpc = X86::KTESTWrr; break;
972  case X86::KORTESTDrr: NewOpc = X86::KTESTDrr; break;
973  case X86::KORTESTQrr: NewOpc = X86::KTESTQrr; break;
974  }
975  MachineSDNode *KTest = CurDAG->getMachineNode(NewOpc, SDLoc(N),
976  MVT::i32,
977  And.getOperand(0),
978  And.getOperand(1));
979  ReplaceUses(N, KTest);
980  MadeChange = true;
981  continue;
982  }
983  }
984 
985  // Attempt to remove vectors moves that were inserted to zero upper bits.
986  if (Opc != TargetOpcode::SUBREG_TO_REG)
987  continue;
988 
989  unsigned SubRegIdx = N->getConstantOperandVal(2);
990  if (SubRegIdx != X86::sub_xmm && SubRegIdx != X86::sub_ymm)
991  continue;
992 
993  SDValue Move = N->getOperand(1);
994  if (!Move.isMachineOpcode())
995  continue;
996 
997  // Make sure its one of the move opcodes we recognize.
998  switch (Move.getMachineOpcode()) {
999  default:
1000  continue;
1001  case X86::VMOVAPDrr: case X86::VMOVUPDrr:
1002  case X86::VMOVAPSrr: case X86::VMOVUPSrr:
1003  case X86::VMOVDQArr: case X86::VMOVDQUrr:
1004  case X86::VMOVAPDYrr: case X86::VMOVUPDYrr:
1005  case X86::VMOVAPSYrr: case X86::VMOVUPSYrr:
1006  case X86::VMOVDQAYrr: case X86::VMOVDQUYrr:
1007  case X86::VMOVAPDZ128rr: case X86::VMOVUPDZ128rr:
1008  case X86::VMOVAPSZ128rr: case X86::VMOVUPSZ128rr:
1009  case X86::VMOVDQA32Z128rr: case X86::VMOVDQU32Z128rr:
1010  case X86::VMOVDQA64Z128rr: case X86::VMOVDQU64Z128rr:
1011  case X86::VMOVAPDZ256rr: case X86::VMOVUPDZ256rr:
1012  case X86::VMOVAPSZ256rr: case X86::VMOVUPSZ256rr:
1013  case X86::VMOVDQA32Z256rr: case X86::VMOVDQU32Z256rr:
1014  case X86::VMOVDQA64Z256rr: case X86::VMOVDQU64Z256rr:
1015  break;
1016  }
1017 
1018  SDValue In = Move.getOperand(0);
1019  if (!In.isMachineOpcode() ||
1020  In.getMachineOpcode() <= TargetOpcode::GENERIC_OP_END)
1021  continue;
1022 
1023  // Make sure the instruction has a VEX, XOP, or EVEX prefix. This covers
1024  // the SHA instructions which use a legacy encoding.
1025  uint64_t TSFlags = getInstrInfo()->get(In.getMachineOpcode()).TSFlags;
1026  if ((TSFlags & X86II::EncodingMask) != X86II::VEX &&
1027  (TSFlags & X86II::EncodingMask) != X86II::EVEX &&
1028  (TSFlags & X86II::EncodingMask) != X86II::XOP)
1029  continue;
1030 
1031  // Producing instruction is another vector instruction. We can drop the
1032  // move.
1033  CurDAG->UpdateNodeOperands(N, N->getOperand(0), In, N->getOperand(2));
1034  MadeChange = true;
1035  }
1036 
1037  if (MadeChange)
1038  CurDAG->RemoveDeadNodes();
1039 }
1040 
1041 
1042 /// Emit any code that needs to be executed only in the main function.
1043 void X86DAGToDAGISel::emitSpecialCodeForMain() {
1044  if (Subtarget->isTargetCygMing()) {
1046  auto &DL = CurDAG->getDataLayout();
1047 
1048  TargetLowering::CallLoweringInfo CLI(*CurDAG);
1049  CLI.setChain(CurDAG->getRoot())
1050  .setCallee(CallingConv::C, Type::getVoidTy(*CurDAG->getContext()),
1051  CurDAG->getExternalSymbol("__main", TLI->getPointerTy(DL)),
1052  std::move(Args));
1053  const TargetLowering &TLI = CurDAG->getTargetLoweringInfo();
1054  std::pair<SDValue, SDValue> Result = TLI.LowerCallTo(CLI);
1055  CurDAG->setRoot(Result.second);
1056  }
1057 }
1058 
1059 void X86DAGToDAGISel::EmitFunctionEntryCode() {
1060  // If this is main, emit special code for main.
1061  const Function &F = MF->getFunction();
1062  if (F.hasExternalLinkage() && F.getName() == "main")
1063  emitSpecialCodeForMain();
1064 }
1065 
1066 static bool isDispSafeForFrameIndex(int64_t Val) {
1067  // On 64-bit platforms, we can run into an issue where a frame index
1068  // includes a displacement that, when added to the explicit displacement,
1069  // will overflow the displacement field. Assuming that the frame index
1070  // displacement fits into a 31-bit integer (which is only slightly more
1071  // aggressive than the current fundamental assumption that it fits into
1072  // a 32-bit integer), a 31-bit disp should always be safe.
1073  return isInt<31>(Val);
1074 }
1075 
1076 bool X86DAGToDAGISel::foldOffsetIntoAddress(uint64_t Offset,
1077  X86ISelAddressMode &AM) {
1078  // If there's no offset to fold, we don't need to do any work.
1079  if (Offset == 0)
1080  return false;
1081 
1082  // Cannot combine ExternalSymbol displacements with integer offsets.
1083  if (AM.ES || AM.MCSym)
1084  return true;
1085 
1086  int64_t Val = AM.Disp + Offset;
1087  CodeModel::Model M = TM.getCodeModel();
1088  if (Subtarget->is64Bit()) {
1090  AM.hasSymbolicDisplacement()))
1091  return true;
1092  // In addition to the checks required for a register base, check that
1093  // we do not try to use an unsafe Disp with a frame index.
1094  if (AM.BaseType == X86ISelAddressMode::FrameIndexBase &&
1096  return true;
1097  }
1098  AM.Disp = Val;
1099  return false;
1100 
1101 }
1102 
1103 bool X86DAGToDAGISel::matchLoadInAddress(LoadSDNode *N, X86ISelAddressMode &AM){
1104  SDValue Address = N->getOperand(1);
1105 
1106  // load gs:0 -> GS segment register.
1107  // load fs:0 -> FS segment register.
1108  //
1109  // This optimization is valid because the GNU TLS model defines that
1110  // gs:0 (or fs:0 on X86-64) contains its own address.
1111  // For more information see http://people.redhat.com/drepper/tls.pdf
1112  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Address))
1113  if (C->getSExtValue() == 0 && AM.Segment.getNode() == nullptr &&
1114  !IndirectTlsSegRefs &&
1115  (Subtarget->isTargetGlibc() || Subtarget->isTargetAndroid() ||
1116  Subtarget->isTargetFuchsia()))
1117  switch (N->getPointerInfo().getAddrSpace()) {
1118  case 256:
1119  AM.Segment = CurDAG->getRegister(X86::GS, MVT::i16);
1120  return false;
1121  case 257:
1122  AM.Segment = CurDAG->getRegister(X86::FS, MVT::i16);
1123  return false;
1124  // Address space 258 is not handled here, because it is not used to
1125  // address TLS areas.
1126  }
1127 
1128  return true;
1129 }
1130 
1131 /// Try to match X86ISD::Wrapper and X86ISD::WrapperRIP nodes into an addressing
1132 /// mode. These wrap things that will resolve down into a symbol reference.
1133 /// If no match is possible, this returns true, otherwise it returns false.
1134 bool X86DAGToDAGISel::matchWrapper(SDValue N, X86ISelAddressMode &AM) {
1135  // If the addressing mode already has a symbol as the displacement, we can
1136  // never match another symbol.
1137  if (AM.hasSymbolicDisplacement())
1138  return true;
1139 
1140  bool IsRIPRel = N.getOpcode() == X86ISD::WrapperRIP;
1141 
1142  // We can't use an addressing mode in the 64-bit large code model. In the
1143  // medium code model, we use can use an mode when RIP wrappers are present.
1144  // That signifies access to globals that are known to be "near", such as the
1145  // GOT itself.
1146  CodeModel::Model M = TM.getCodeModel();
1147  if (Subtarget->is64Bit() &&
1148  (M == CodeModel::Large || (M == CodeModel::Medium && !IsRIPRel)))
1149  return true;
1150 
1151  // Base and index reg must be 0 in order to use %rip as base.
1152  if (IsRIPRel && AM.hasBaseOrIndexReg())
1153  return true;
1154 
1155  // Make a local copy in case we can't do this fold.
1156  X86ISelAddressMode Backup = AM;
1157 
1158  int64_t Offset = 0;
1159  SDValue N0 = N.getOperand(0);
1160  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(N0)) {
1161  AM.GV = G->getGlobal();
1162  AM.SymbolFlags = G->getTargetFlags();
1163  Offset = G->getOffset();
1164  } else if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(N0)) {
1165  AM.CP = CP->getConstVal();
1166  AM.Align = CP->getAlignment();
1167  AM.SymbolFlags = CP->getTargetFlags();
1168  Offset = CP->getOffset();
1169  } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(N0)) {
1170  AM.ES = S->getSymbol();
1171  AM.SymbolFlags = S->getTargetFlags();
1172  } else if (auto *S = dyn_cast<MCSymbolSDNode>(N0)) {
1173  AM.MCSym = S->getMCSymbol();
1174  } else if (JumpTableSDNode *J = dyn_cast<JumpTableSDNode>(N0)) {
1175  AM.JT = J->getIndex();
1176  AM.SymbolFlags = J->getTargetFlags();
1177  } else if (BlockAddressSDNode *BA = dyn_cast<BlockAddressSDNode>(N0)) {
1178  AM.BlockAddr = BA->getBlockAddress();
1179  AM.SymbolFlags = BA->getTargetFlags();
1180  Offset = BA->getOffset();
1181  } else
1182  llvm_unreachable("Unhandled symbol reference node.");
1183 
1184  if (foldOffsetIntoAddress(Offset, AM)) {
1185  AM = Backup;
1186  return true;
1187  }
1188 
1189  if (IsRIPRel)
1190  AM.setBaseReg(CurDAG->getRegister(X86::RIP, MVT::i64));
1191 
1192  // Commit the changes now that we know this fold is safe.
1193  return false;
1194 }
1195 
1196 /// Add the specified node to the specified addressing mode, returning true if
1197 /// it cannot be done. This just pattern matches for the addressing mode.
1198 bool X86DAGToDAGISel::matchAddress(SDValue N, X86ISelAddressMode &AM) {
1199  if (matchAddressRecursively(N, AM, 0))
1200  return true;
1201 
1202  // Post-processing: Convert lea(,%reg,2) to lea(%reg,%reg), which has
1203  // a smaller encoding and avoids a scaled-index.
1204  if (AM.Scale == 2 &&
1205  AM.BaseType == X86ISelAddressMode::RegBase &&
1206  AM.Base_Reg.getNode() == nullptr) {
1207  AM.Base_Reg = AM.IndexReg;
1208  AM.Scale = 1;
1209  }
1210 
1211  // Post-processing: Convert foo to foo(%rip), even in non-PIC mode,
1212  // because it has a smaller encoding.
1213  // TODO: Which other code models can use this?
1214  if (TM.getCodeModel() == CodeModel::Small &&
1215  Subtarget->is64Bit() &&
1216  AM.Scale == 1 &&
1217  AM.BaseType == X86ISelAddressMode::RegBase &&
1218  AM.Base_Reg.getNode() == nullptr &&
1219  AM.IndexReg.getNode() == nullptr &&
1220  AM.SymbolFlags == X86II::MO_NO_FLAG &&
1221  AM.hasSymbolicDisplacement())
1222  AM.Base_Reg = CurDAG->getRegister(X86::RIP, MVT::i64);
1223 
1224  return false;
1225 }
1226 
1227 bool X86DAGToDAGISel::matchAdd(SDValue N, X86ISelAddressMode &AM,
1228  unsigned Depth) {
1229  // Add an artificial use to this node so that we can keep track of
1230  // it if it gets CSE'd with a different node.
1231  HandleSDNode Handle(N);
1232 
1233  X86ISelAddressMode Backup = AM;
1234  if (!matchAddressRecursively(N.getOperand(0), AM, Depth+1) &&
1235  !matchAddressRecursively(Handle.getValue().getOperand(1), AM, Depth+1))
1236  return false;
1237  AM = Backup;
1238 
1239  // Try again after commuting the operands.
1240  if (!matchAddressRecursively(Handle.getValue().getOperand(1), AM, Depth+1) &&
1241  !matchAddressRecursively(Handle.getValue().getOperand(0), AM, Depth+1))
1242  return false;
1243  AM = Backup;
1244 
1245  // If we couldn't fold both operands into the address at the same time,
1246  // see if we can just put each operand into a register and fold at least
1247  // the add.
1248  if (AM.BaseType == X86ISelAddressMode::RegBase &&
1249  !AM.Base_Reg.getNode() &&
1250  !AM.IndexReg.getNode()) {
1251  N = Handle.getValue();
1252  AM.Base_Reg = N.getOperand(0);
1253  AM.IndexReg = N.getOperand(1);
1254  AM.Scale = 1;
1255  return false;
1256  }
1257  N = Handle.getValue();
1258  return true;
1259 }
1260 
1261 // Insert a node into the DAG at least before the Pos node's position. This
1262 // will reposition the node as needed, and will assign it a node ID that is <=
1263 // the Pos node's ID. Note that this does *not* preserve the uniqueness of node
1264 // IDs! The selection DAG must no longer depend on their uniqueness when this
1265 // is used.
1266 static void insertDAGNode(SelectionDAG &DAG, SDValue Pos, SDValue N) {
1267  if (N->getNodeId() == -1 ||
1270  DAG.RepositionNode(Pos->getIterator(), N.getNode());
1271  // Mark Node as invalid for pruning as after this it may be a successor to a
1272  // selected node but otherwise be in the same position of Pos.
1273  // Conservatively mark it with the same -abs(Id) to assure node id
1274  // invariant is preserved.
1275  N->setNodeId(Pos->getNodeId());
1277  }
1278 }
1279 
1280 // Transform "(X >> (8-C1)) & (0xff << C1)" to "((X >> 8) & 0xff) << C1" if
1281 // safe. This allows us to convert the shift and and into an h-register
1282 // extract and a scaled index. Returns false if the simplification is
1283 // performed.
1285  uint64_t Mask,
1286  SDValue Shift, SDValue X,
1287  X86ISelAddressMode &AM) {
1288  if (Shift.getOpcode() != ISD::SRL ||
1289  !isa<ConstantSDNode>(Shift.getOperand(1)) ||
1290  !Shift.hasOneUse())
1291  return true;
1292 
1293  int ScaleLog = 8 - Shift.getConstantOperandVal(1);
1294  if (ScaleLog <= 0 || ScaleLog >= 4 ||
1295  Mask != (0xffu << ScaleLog))
1296  return true;
1297 
1298  MVT VT = N.getSimpleValueType();
1299  SDLoc DL(N);
1300  SDValue Eight = DAG.getConstant(8, DL, MVT::i8);
1301  SDValue NewMask = DAG.getConstant(0xff, DL, VT);
1302  SDValue Srl = DAG.getNode(ISD::SRL, DL, VT, X, Eight);
1303  SDValue And = DAG.getNode(ISD::AND, DL, VT, Srl, NewMask);
1304  SDValue ShlCount = DAG.getConstant(ScaleLog, DL, MVT::i8);
1305  SDValue Shl = DAG.getNode(ISD::SHL, DL, VT, And, ShlCount);
1306 
1307  // Insert the new nodes into the topological ordering. We must do this in
1308  // a valid topological ordering as nothing is going to go back and re-sort
1309  // these nodes. We continually insert before 'N' in sequence as this is
1310  // essentially a pre-flattened and pre-sorted sequence of nodes. There is no
1311  // hierarchy left to express.
1312  insertDAGNode(DAG, N, Eight);
1313  insertDAGNode(DAG, N, Srl);
1314  insertDAGNode(DAG, N, NewMask);
1315  insertDAGNode(DAG, N, And);
1316  insertDAGNode(DAG, N, ShlCount);
1317  insertDAGNode(DAG, N, Shl);
1318  DAG.ReplaceAllUsesWith(N, Shl);
1319  AM.IndexReg = And;
1320  AM.Scale = (1 << ScaleLog);
1321  return false;
1322 }
1323 
1324 // Transforms "(X << C1) & C2" to "(X & (C2>>C1)) << C1" if safe and if this
1325 // allows us to fold the shift into this addressing mode. Returns false if the
1326 // transform succeeded.
1328  uint64_t Mask,
1329  SDValue Shift, SDValue X,
1330  X86ISelAddressMode &AM) {
1331  if (Shift.getOpcode() != ISD::SHL ||
1332  !isa<ConstantSDNode>(Shift.getOperand(1)))
1333  return true;
1334 
1335  // Not likely to be profitable if either the AND or SHIFT node has more
1336  // than one use (unless all uses are for address computation). Besides,
1337  // isel mechanism requires their node ids to be reused.
1338  if (!N.hasOneUse() || !Shift.hasOneUse())
1339  return true;
1340 
1341  // Verify that the shift amount is something we can fold.
1342  unsigned ShiftAmt = Shift.getConstantOperandVal(1);
1343  if (ShiftAmt != 1 && ShiftAmt != 2 && ShiftAmt != 3)
1344  return true;
1345 
1346  MVT VT = N.getSimpleValueType();
1347  SDLoc DL(N);
1348  SDValue NewMask = DAG.getConstant(Mask >> ShiftAmt, DL, VT);
1349  SDValue NewAnd = DAG.getNode(ISD::AND, DL, VT, X, NewMask);
1350  SDValue NewShift = DAG.getNode(ISD::SHL, DL, VT, NewAnd, Shift.getOperand(1));
1351 
1352  // Insert the new nodes into the topological ordering. We must do this in
1353  // a valid topological ordering as nothing is going to go back and re-sort
1354  // these nodes. We continually insert before 'N' in sequence as this is
1355  // essentially a pre-flattened and pre-sorted sequence of nodes. There is no
1356  // hierarchy left to express.
1357  insertDAGNode(DAG, N, NewMask);
1358  insertDAGNode(DAG, N, NewAnd);
1359  insertDAGNode(DAG, N, NewShift);
1360  DAG.ReplaceAllUsesWith(N, NewShift);
1361 
1362  AM.Scale = 1 << ShiftAmt;
1363  AM.IndexReg = NewAnd;
1364  return false;
1365 }
1366 
1367 // Implement some heroics to detect shifts of masked values where the mask can
1368 // be replaced by extending the shift and undoing that in the addressing mode
1369 // scale. Patterns such as (shl (srl x, c1), c2) are canonicalized into (and
1370 // (srl x, SHIFT), MASK) by DAGCombines that don't know the shl can be done in
1371 // the addressing mode. This results in code such as:
1372 //
1373 // int f(short *y, int *lookup_table) {
1374 // ...
1375 // return *y + lookup_table[*y >> 11];
1376 // }
1377 //
1378 // Turning into:
1379 // movzwl (%rdi), %eax
1380 // movl %eax, %ecx
1381 // shrl $11, %ecx
1382 // addl (%rsi,%rcx,4), %eax
1383 //
1384 // Instead of:
1385 // movzwl (%rdi), %eax
1386 // movl %eax, %ecx
1387 // shrl $9, %ecx
1388 // andl $124, %rcx
1389 // addl (%rsi,%rcx), %eax
1390 //
1391 // Note that this function assumes the mask is provided as a mask *after* the
1392 // value is shifted. The input chain may or may not match that, but computing
1393 // such a mask is trivial.
1395  uint64_t Mask,
1396  SDValue Shift, SDValue X,
1397  X86ISelAddressMode &AM) {
1398  if (Shift.getOpcode() != ISD::SRL || !Shift.hasOneUse() ||
1399  !isa<ConstantSDNode>(Shift.getOperand(1)))
1400  return true;
1401 
1402  unsigned ShiftAmt = Shift.getConstantOperandVal(1);
1403  unsigned MaskLZ = countLeadingZeros(Mask);
1404  unsigned MaskTZ = countTrailingZeros(Mask);
1405 
1406  // The amount of shift we're trying to fit into the addressing mode is taken
1407  // from the trailing zeros of the mask.
1408  unsigned AMShiftAmt = MaskTZ;
1409 
1410  // There is nothing we can do here unless the mask is removing some bits.
1411  // Also, the addressing mode can only represent shifts of 1, 2, or 3 bits.
1412  if (AMShiftAmt <= 0 || AMShiftAmt > 3) return true;
1413 
1414  // We also need to ensure that mask is a continuous run of bits.
1415  if (countTrailingOnes(Mask >> MaskTZ) + MaskTZ + MaskLZ != 64) return true;
1416 
1417  // Scale the leading zero count down based on the actual size of the value.
1418  // Also scale it down based on the size of the shift.
1419  unsigned ScaleDown = (64 - X.getSimpleValueType().getSizeInBits()) + ShiftAmt;
1420  if (MaskLZ < ScaleDown)
1421  return true;
1422  MaskLZ -= ScaleDown;
1423 
1424  // The final check is to ensure that any masked out high bits of X are
1425  // already known to be zero. Otherwise, the mask has a semantic impact
1426  // other than masking out a couple of low bits. Unfortunately, because of
1427  // the mask, zero extensions will be removed from operands in some cases.
1428  // This code works extra hard to look through extensions because we can
1429  // replace them with zero extensions cheaply if necessary.
1430  bool ReplacingAnyExtend = false;
1431  if (X.getOpcode() == ISD::ANY_EXTEND) {
1432  unsigned ExtendBits = X.getSimpleValueType().getSizeInBits() -
1434  // Assume that we'll replace the any-extend with a zero-extend, and
1435  // narrow the search to the extended value.
1436  X = X.getOperand(0);
1437  MaskLZ = ExtendBits > MaskLZ ? 0 : MaskLZ - ExtendBits;
1438  ReplacingAnyExtend = true;
1439  }
1440  APInt MaskedHighBits =
1442  KnownBits Known = DAG.computeKnownBits(X);
1443  if (MaskedHighBits != Known.Zero) return true;
1444 
1445  // We've identified a pattern that can be transformed into a single shift
1446  // and an addressing mode. Make it so.
1447  MVT VT = N.getSimpleValueType();
1448  if (ReplacingAnyExtend) {
1449  assert(X.getValueType() != VT);
1450  // We looked through an ANY_EXTEND node, insert a ZERO_EXTEND.
1451  SDValue NewX = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(X), VT, X);
1452  insertDAGNode(DAG, N, NewX);
1453  X = NewX;
1454  }
1455  SDLoc DL(N);
1456  SDValue NewSRLAmt = DAG.getConstant(ShiftAmt + AMShiftAmt, DL, MVT::i8);
1457  SDValue NewSRL = DAG.getNode(ISD::SRL, DL, VT, X, NewSRLAmt);
1458  SDValue NewSHLAmt = DAG.getConstant(AMShiftAmt, DL, MVT::i8);
1459  SDValue NewSHL = DAG.getNode(ISD::SHL, DL, VT, NewSRL, NewSHLAmt);
1460 
1461  // Insert the new nodes into the topological ordering. We must do this in
1462  // a valid topological ordering as nothing is going to go back and re-sort
1463  // these nodes. We continually insert before 'N' in sequence as this is
1464  // essentially a pre-flattened and pre-sorted sequence of nodes. There is no
1465  // hierarchy left to express.
1466  insertDAGNode(DAG, N, NewSRLAmt);
1467  insertDAGNode(DAG, N, NewSRL);
1468  insertDAGNode(DAG, N, NewSHLAmt);
1469  insertDAGNode(DAG, N, NewSHL);
1470  DAG.ReplaceAllUsesWith(N, NewSHL);
1471 
1472  AM.Scale = 1 << AMShiftAmt;
1473  AM.IndexReg = NewSRL;
1474  return false;
1475 }
1476 
1477 // Transform "(X >> SHIFT) & (MASK << C1)" to
1478 // "((X >> (SHIFT + C1)) & (MASK)) << C1". Everything before the SHL will be
1479 // matched to a BEXTR later. Returns false if the simplification is performed.
1481  uint64_t Mask,
1482  SDValue Shift, SDValue X,
1483  X86ISelAddressMode &AM,
1484  const X86Subtarget &Subtarget) {
1485  if (Shift.getOpcode() != ISD::SRL ||
1486  !isa<ConstantSDNode>(Shift.getOperand(1)) ||
1487  !Shift.hasOneUse() || !N.hasOneUse())
1488  return true;
1489 
1490  // Only do this if BEXTR will be matched by matchBEXTRFromAndImm.
1491  if (!Subtarget.hasTBM() &&
1492  !(Subtarget.hasBMI() && Subtarget.hasFastBEXTR()))
1493  return true;
1494 
1495  // We need to ensure that mask is a continuous run of bits.
1496  if (!isShiftedMask_64(Mask)) return true;
1497 
1498  unsigned ShiftAmt = Shift.getConstantOperandVal(1);
1499 
1500  // The amount of shift we're trying to fit into the addressing mode is taken
1501  // from the trailing zeros of the mask.
1502  unsigned AMShiftAmt = countTrailingZeros(Mask);
1503 
1504  // There is nothing we can do here unless the mask is removing some bits.
1505  // Also, the addressing mode can only represent shifts of 1, 2, or 3 bits.
1506  if (AMShiftAmt <= 0 || AMShiftAmt > 3) return true;
1507 
1508  MVT VT = N.getSimpleValueType();
1509  SDLoc DL(N);
1510  SDValue NewSRLAmt = DAG.getConstant(ShiftAmt + AMShiftAmt, DL, MVT::i8);
1511  SDValue NewSRL = DAG.getNode(ISD::SRL, DL, VT, X, NewSRLAmt);
1512  SDValue NewMask = DAG.getConstant(Mask >> AMShiftAmt, DL, VT);
1513  SDValue NewAnd = DAG.getNode(ISD::AND, DL, VT, NewSRL, NewMask);
1514  SDValue NewSHLAmt = DAG.getConstant(AMShiftAmt, DL, MVT::i8);
1515  SDValue NewSHL = DAG.getNode(ISD::SHL, DL, VT, NewAnd, NewSHLAmt);
1516 
1517  // Insert the new nodes into the topological ordering. We must do this in
1518  // a valid topological ordering as nothing is going to go back and re-sort
1519  // these nodes. We continually insert before 'N' in sequence as this is
1520  // essentially a pre-flattened and pre-sorted sequence of nodes. There is no
1521  // hierarchy left to express.
1522  insertDAGNode(DAG, N, NewSRLAmt);
1523  insertDAGNode(DAG, N, NewSRL);
1524  insertDAGNode(DAG, N, NewMask);
1525  insertDAGNode(DAG, N, NewAnd);
1526  insertDAGNode(DAG, N, NewSHLAmt);
1527  insertDAGNode(DAG, N, NewSHL);
1528  DAG.ReplaceAllUsesWith(N, NewSHL);
1529 
1530  AM.Scale = 1 << AMShiftAmt;
1531  AM.IndexReg = NewAnd;
1532  return false;
1533 }
1534 
1535 bool X86DAGToDAGISel::matchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
1536  unsigned Depth) {
1537  SDLoc dl(N);
1538  LLVM_DEBUG({
1539  dbgs() << "MatchAddress: ";
1540  AM.dump(CurDAG);
1541  });
1542  // Limit recursion.
1543  if (Depth > 5)
1544  return matchAddressBase(N, AM);
1545 
1546  // If this is already a %rip relative address, we can only merge immediates
1547  // into it. Instead of handling this in every case, we handle it here.
1548  // RIP relative addressing: %rip + 32-bit displacement!
1549  if (AM.isRIPRelative()) {
1550  // FIXME: JumpTable and ExternalSymbol address currently don't like
1551  // displacements. It isn't very important, but this should be fixed for
1552  // consistency.
1553  if (!(AM.ES || AM.MCSym) && AM.JT != -1)
1554  return true;
1555 
1556  if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(N))
1557  if (!foldOffsetIntoAddress(Cst->getSExtValue(), AM))
1558  return false;
1559  return true;
1560  }
1561 
1562  switch (N.getOpcode()) {
1563  default: break;
1564  case ISD::LOCAL_RECOVER: {
1565  if (!AM.hasSymbolicDisplacement() && AM.Disp == 0)
1566  if (const auto *ESNode = dyn_cast<MCSymbolSDNode>(N.getOperand(0))) {
1567  // Use the symbol and don't prefix it.
1568  AM.MCSym = ESNode->getMCSymbol();
1569  return false;
1570  }
1571  break;
1572  }
1573  case ISD::Constant: {
1574  uint64_t Val = cast<ConstantSDNode>(N)->getSExtValue();
1575  if (!foldOffsetIntoAddress(Val, AM))
1576  return false;
1577  break;
1578  }
1579 
1580  case X86ISD::Wrapper:
1581  case X86ISD::WrapperRIP:
1582  if (!matchWrapper(N, AM))
1583  return false;
1584  break;
1585 
1586  case ISD::LOAD:
1587  if (!matchLoadInAddress(cast<LoadSDNode>(N), AM))
1588  return false;
1589  break;
1590 
1591  case ISD::FrameIndex:
1592  if (AM.BaseType == X86ISelAddressMode::RegBase &&
1593  AM.Base_Reg.getNode() == nullptr &&
1594  (!Subtarget->is64Bit() || isDispSafeForFrameIndex(AM.Disp))) {
1595  AM.BaseType = X86ISelAddressMode::FrameIndexBase;
1596  AM.Base_FrameIndex = cast<FrameIndexSDNode>(N)->getIndex();
1597  return false;
1598  }
1599  break;
1600 
1601  case ISD::SHL:
1602  if (AM.IndexReg.getNode() != nullptr || AM.Scale != 1)
1603  break;
1604 
1605  if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1606  unsigned Val = CN->getZExtValue();
1607  // Note that we handle x<<1 as (,x,2) rather than (x,x) here so
1608  // that the base operand remains free for further matching. If
1609  // the base doesn't end up getting used, a post-processing step
1610  // in MatchAddress turns (,x,2) into (x,x), which is cheaper.
1611  if (Val == 1 || Val == 2 || Val == 3) {
1612  AM.Scale = 1 << Val;
1613  SDValue ShVal = N.getOperand(0);
1614 
1615  // Okay, we know that we have a scale by now. However, if the scaled
1616  // value is an add of something and a constant, we can fold the
1617  // constant into the disp field here.
1618  if (CurDAG->isBaseWithConstantOffset(ShVal)) {
1619  AM.IndexReg = ShVal.getOperand(0);
1620  ConstantSDNode *AddVal = cast<ConstantSDNode>(ShVal.getOperand(1));
1621  uint64_t Disp = (uint64_t)AddVal->getSExtValue() << Val;
1622  if (!foldOffsetIntoAddress(Disp, AM))
1623  return false;
1624  }
1625 
1626  AM.IndexReg = ShVal;
1627  return false;
1628  }
1629  }
1630  break;
1631 
1632  case ISD::SRL: {
1633  // Scale must not be used already.
1634  if (AM.IndexReg.getNode() != nullptr || AM.Scale != 1) break;
1635 
1636  SDValue And = N.getOperand(0);
1637  if (And.getOpcode() != ISD::AND) break;
1638  SDValue X = And.getOperand(0);
1639 
1640  // We only handle up to 64-bit values here as those are what matter for
1641  // addressing mode optimizations.
1642  if (X.getSimpleValueType().getSizeInBits() > 64) break;
1643 
1644  // The mask used for the transform is expected to be post-shift, but we
1645  // found the shift first so just apply the shift to the mask before passing
1646  // it down.
1647  if (!isa<ConstantSDNode>(N.getOperand(1)) ||
1648  !isa<ConstantSDNode>(And.getOperand(1)))
1649  break;
1650  uint64_t Mask = And.getConstantOperandVal(1) >> N.getConstantOperandVal(1);
1651 
1652  // Try to fold the mask and shift into the scale, and return false if we
1653  // succeed.
1654  if (!foldMaskAndShiftToScale(*CurDAG, N, Mask, N, X, AM))
1655  return false;
1656  break;
1657  }
1658 
1659  case ISD::SMUL_LOHI:
1660  case ISD::UMUL_LOHI:
1661  // A mul_lohi where we need the low part can be folded as a plain multiply.
1662  if (N.getResNo() != 0) break;
1664  case ISD::MUL:
1665  case X86ISD::MUL_IMM:
1666  // X*[3,5,9] -> X+X*[2,4,8]
1667  if (AM.BaseType == X86ISelAddressMode::RegBase &&
1668  AM.Base_Reg.getNode() == nullptr &&
1669  AM.IndexReg.getNode() == nullptr) {
1670  if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N.getOperand(1)))
1671  if (CN->getZExtValue() == 3 || CN->getZExtValue() == 5 ||
1672  CN->getZExtValue() == 9) {
1673  AM.Scale = unsigned(CN->getZExtValue())-1;
1674 
1675  SDValue MulVal = N.getOperand(0);
1676  SDValue Reg;
1677 
1678  // Okay, we know that we have a scale by now. However, if the scaled
1679  // value is an add of something and a constant, we can fold the
1680  // constant into the disp field here.
1681  if (MulVal.getNode()->getOpcode() == ISD::ADD && MulVal.hasOneUse() &&
1682  isa<ConstantSDNode>(MulVal.getOperand(1))) {
1683  Reg = MulVal.getOperand(0);
1684  ConstantSDNode *AddVal =
1685  cast<ConstantSDNode>(MulVal.getOperand(1));
1686  uint64_t Disp = AddVal->getSExtValue() * CN->getZExtValue();
1687  if (foldOffsetIntoAddress(Disp, AM))
1688  Reg = N.getOperand(0);
1689  } else {
1690  Reg = N.getOperand(0);
1691  }
1692 
1693  AM.IndexReg = AM.Base_Reg = Reg;
1694  return false;
1695  }
1696  }
1697  break;
1698 
1699  case ISD::SUB: {
1700  // Given A-B, if A can be completely folded into the address and
1701  // the index field with the index field unused, use -B as the index.
1702  // This is a win if a has multiple parts that can be folded into
1703  // the address. Also, this saves a mov if the base register has
1704  // other uses, since it avoids a two-address sub instruction, however
1705  // it costs an additional mov if the index register has other uses.
1706 
1707  // Add an artificial use to this node so that we can keep track of
1708  // it if it gets CSE'd with a different node.
1709  HandleSDNode Handle(N);
1710 
1711  // Test if the LHS of the sub can be folded.
1712  X86ISelAddressMode Backup = AM;
1713  if (matchAddressRecursively(N.getOperand(0), AM, Depth+1)) {
1714  AM = Backup;
1715  break;
1716  }
1717  // Test if the index field is free for use.
1718  if (AM.IndexReg.getNode() || AM.isRIPRelative()) {
1719  AM = Backup;
1720  break;
1721  }
1722 
1723  int Cost = 0;
1724  SDValue RHS = Handle.getValue().getOperand(1);
1725  // If the RHS involves a register with multiple uses, this
1726  // transformation incurs an extra mov, due to the neg instruction
1727  // clobbering its operand.
1728  if (!RHS.getNode()->hasOneUse() ||
1729  RHS.getNode()->getOpcode() == ISD::CopyFromReg ||
1730  RHS.getNode()->getOpcode() == ISD::TRUNCATE ||
1731  RHS.getNode()->getOpcode() == ISD::ANY_EXTEND ||
1732  (RHS.getNode()->getOpcode() == ISD::ZERO_EXTEND &&
1733  RHS.getOperand(0).getValueType() == MVT::i32))
1734  ++Cost;
1735  // If the base is a register with multiple uses, this
1736  // transformation may save a mov.
1737  // FIXME: Don't rely on DELETED_NODEs.
1738  if ((AM.BaseType == X86ISelAddressMode::RegBase && AM.Base_Reg.getNode() &&
1739  AM.Base_Reg->getOpcode() != ISD::DELETED_NODE &&
1740  !AM.Base_Reg.getNode()->hasOneUse()) ||
1741  AM.BaseType == X86ISelAddressMode::FrameIndexBase)
1742  --Cost;
1743  // If the folded LHS was interesting, this transformation saves
1744  // address arithmetic.
1745  if ((AM.hasSymbolicDisplacement() && !Backup.hasSymbolicDisplacement()) +
1746  ((AM.Disp != 0) && (Backup.Disp == 0)) +
1747  (AM.Segment.getNode() && !Backup.Segment.getNode()) >= 2)
1748  --Cost;
1749  // If it doesn't look like it may be an overall win, don't do it.
1750  if (Cost >= 0) {
1751  AM = Backup;
1752  break;
1753  }
1754 
1755  // Ok, the transformation is legal and appears profitable. Go for it.
1756  SDValue Zero = CurDAG->getConstant(0, dl, N.getValueType());
1757  SDValue Neg = CurDAG->getNode(ISD::SUB, dl, N.getValueType(), Zero, RHS);
1758  AM.IndexReg = Neg;
1759  AM.Scale = 1;
1760 
1761  // Insert the new nodes into the topological ordering.
1762  insertDAGNode(*CurDAG, Handle.getValue(), Zero);
1763  insertDAGNode(*CurDAG, Handle.getValue(), Neg);
1764  return false;
1765  }
1766 
1767  case ISD::ADD:
1768  if (!matchAdd(N, AM, Depth))
1769  return false;
1770  break;
1771 
1772  case ISD::OR:
1773  // We want to look through a transform in InstCombine and DAGCombiner that
1774  // turns 'add' into 'or', so we can treat this 'or' exactly like an 'add'.
1775  // Example: (or (and x, 1), (shl y, 3)) --> (add (and x, 1), (shl y, 3))
1776  // An 'lea' can then be used to match the shift (multiply) and add:
1777  // and $1, %esi
1778  // lea (%rsi, %rdi, 8), %rax
1779  if (CurDAG->haveNoCommonBitsSet(N.getOperand(0), N.getOperand(1)) &&
1780  !matchAdd(N, AM, Depth))
1781  return false;
1782  break;
1783 
1784  case ISD::AND: {
1785  // Perform some heroic transforms on an and of a constant-count shift
1786  // with a constant to enable use of the scaled offset field.
1787 
1788  // Scale must not be used already.
1789  if (AM.IndexReg.getNode() != nullptr || AM.Scale != 1) break;
1790 
1791  SDValue Shift = N.getOperand(0);
1792  if (Shift.getOpcode() != ISD::SRL && Shift.getOpcode() != ISD::SHL) break;
1793  SDValue X = Shift.getOperand(0);
1794 
1795  // We only handle up to 64-bit values here as those are what matter for
1796  // addressing mode optimizations.
1797  if (X.getSimpleValueType().getSizeInBits() > 64) break;
1798 
1799  if (!isa<ConstantSDNode>(N.getOperand(1)))
1800  break;
1801  uint64_t Mask = N.getConstantOperandVal(1);
1802 
1803  // Try to fold the mask and shift into an extract and scale.
1804  if (!foldMaskAndShiftToExtract(*CurDAG, N, Mask, Shift, X, AM))
1805  return false;
1806 
1807  // Try to fold the mask and shift directly into the scale.
1808  if (!foldMaskAndShiftToScale(*CurDAG, N, Mask, Shift, X, AM))
1809  return false;
1810 
1811  // Try to swap the mask and shift to place shifts which can be done as
1812  // a scale on the outside of the mask.
1813  if (!foldMaskedShiftToScaledMask(*CurDAG, N, Mask, Shift, X, AM))
1814  return false;
1815 
1816  // Try to fold the mask and shift into BEXTR and scale.
1817  if (!foldMaskedShiftToBEXTR(*CurDAG, N, Mask, Shift, X, AM, *Subtarget))
1818  return false;
1819 
1820  break;
1821  }
1822  }
1823 
1824  return matchAddressBase(N, AM);
1825 }
1826 
1827 /// Helper for MatchAddress. Add the specified node to the
1828 /// specified addressing mode without any further recursion.
1829 bool X86DAGToDAGISel::matchAddressBase(SDValue N, X86ISelAddressMode &AM) {
1830  // Is the base register already occupied?
1831  if (AM.BaseType != X86ISelAddressMode::RegBase || AM.Base_Reg.getNode()) {
1832  // If so, check to see if the scale index register is set.
1833  if (!AM.IndexReg.getNode()) {
1834  AM.IndexReg = N;
1835  AM.Scale = 1;
1836  return false;
1837  }
1838 
1839  // Otherwise, we cannot select it.
1840  return true;
1841  }
1842 
1843  // Default, generate it as a register.
1844  AM.BaseType = X86ISelAddressMode::RegBase;
1845  AM.Base_Reg = N;
1846  return false;
1847 }
1848 
1849 /// Helper for selectVectorAddr. Handles things that can be folded into a
1850 /// gather scatter address. The index register and scale should have already
1851 /// been handled.
1852 bool X86DAGToDAGISel::matchVectorAddress(SDValue N, X86ISelAddressMode &AM) {
1853  // TODO: Support other operations.
1854  switch (N.getOpcode()) {
1855  case ISD::Constant: {
1856  uint64_t Val = cast<ConstantSDNode>(N)->getSExtValue();
1857  if (!foldOffsetIntoAddress(Val, AM))
1858  return false;
1859  break;
1860  }
1861  case X86ISD::Wrapper:
1862  if (!matchWrapper(N, AM))
1863  return false;
1864  break;
1865  }
1866 
1867  return matchAddressBase(N, AM);
1868 }
1869 
1870 bool X86DAGToDAGISel::selectVectorAddr(SDNode *Parent, SDValue N, SDValue &Base,
1871  SDValue &Scale, SDValue &Index,
1872  SDValue &Disp, SDValue &Segment) {
1873  X86ISelAddressMode AM;
1874  auto *Mgs = cast<X86MaskedGatherScatterSDNode>(Parent);
1875  AM.IndexReg = Mgs->getIndex();
1876  AM.Scale = cast<ConstantSDNode>(Mgs->getScale())->getZExtValue();
1877 
1878  unsigned AddrSpace = cast<MemSDNode>(Parent)->getPointerInfo().getAddrSpace();
1879  // AddrSpace 256 -> GS, 257 -> FS, 258 -> SS.
1880  if (AddrSpace == 256)
1881  AM.Segment = CurDAG->getRegister(X86::GS, MVT::i16);
1882  if (AddrSpace == 257)
1883  AM.Segment = CurDAG->getRegister(X86::FS, MVT::i16);
1884  if (AddrSpace == 258)
1885  AM.Segment = CurDAG->getRegister(X86::SS, MVT::i16);
1886 
1887  // Try to match into the base and displacement fields.
1888  if (matchVectorAddress(N, AM))
1889  return false;
1890 
1891  MVT VT = N.getSimpleValueType();
1892  if (AM.BaseType == X86ISelAddressMode::RegBase) {
1893  if (!AM.Base_Reg.getNode())
1894  AM.Base_Reg = CurDAG->getRegister(0, VT);
1895  }
1896 
1897  getAddressOperands(AM, SDLoc(N), Base, Scale, Index, Disp, Segment);
1898  return true;
1899 }
1900 
1901 /// Returns true if it is able to pattern match an addressing mode.
1902 /// It returns the operands which make up the maximal addressing mode it can
1903 /// match by reference.
1904 ///
1905 /// Parent is the parent node of the addr operand that is being matched. It
1906 /// is always a load, store, atomic node, or null. It is only null when
1907 /// checking memory operands for inline asm nodes.
1908 bool X86DAGToDAGISel::selectAddr(SDNode *Parent, SDValue N, SDValue &Base,
1909  SDValue &Scale, SDValue &Index,
1910  SDValue &Disp, SDValue &Segment) {
1911  X86ISelAddressMode AM;
1912 
1913  if (Parent &&
1914  // This list of opcodes are all the nodes that have an "addr:$ptr" operand
1915  // that are not a MemSDNode, and thus don't have proper addrspace info.
1916  Parent->getOpcode() != ISD::INTRINSIC_W_CHAIN && // unaligned loads, fixme
1917  Parent->getOpcode() != ISD::INTRINSIC_VOID && // nontemporal stores
1918  Parent->getOpcode() != X86ISD::TLSCALL && // Fixme
1919  Parent->getOpcode() != X86ISD::EH_SJLJ_SETJMP && // setjmp
1920  Parent->getOpcode() != X86ISD::EH_SJLJ_LONGJMP) { // longjmp
1921  unsigned AddrSpace =
1922  cast<MemSDNode>(Parent)->getPointerInfo().getAddrSpace();
1923  // AddrSpace 256 -> GS, 257 -> FS, 258 -> SS.
1924  if (AddrSpace == 256)
1925  AM.Segment = CurDAG->getRegister(X86::GS, MVT::i16);
1926  if (AddrSpace == 257)
1927  AM.Segment = CurDAG->getRegister(X86::FS, MVT::i16);
1928  if (AddrSpace == 258)
1929  AM.Segment = CurDAG->getRegister(X86::SS, MVT::i16);
1930  }
1931 
1932  if (matchAddress(N, AM))
1933  return false;
1934 
1935  MVT VT = N.getSimpleValueType();
1936  if (AM.BaseType == X86ISelAddressMode::RegBase) {
1937  if (!AM.Base_Reg.getNode())
1938  AM.Base_Reg = CurDAG->getRegister(0, VT);
1939  }
1940 
1941  if (!AM.IndexReg.getNode())
1942  AM.IndexReg = CurDAG->getRegister(0, VT);
1943 
1944  getAddressOperands(AM, SDLoc(N), Base, Scale, Index, Disp, Segment);
1945  return true;
1946 }
1947 
1948 // We can only fold a load if all nodes between it and the root node have a
1949 // single use. If there are additional uses, we could end up duplicating the
1950 // load.
1951 static bool hasSingleUsesFromRoot(SDNode *Root, SDNode *User) {
1952  while (User != Root) {
1953  if (!User->hasOneUse())
1954  return false;
1955  User = *User->use_begin();
1956  }
1957 
1958  return true;
1959 }
1960 
1961 /// Match a scalar SSE load. In particular, we want to match a load whose top
1962 /// elements are either undef or zeros. The load flavor is derived from the
1963 /// type of N, which is either v4f32 or v2f64.
1964 ///
1965 /// We also return:
1966 /// PatternChainNode: this is the matched node that has a chain input and
1967 /// output.
1968 bool X86DAGToDAGISel::selectScalarSSELoad(SDNode *Root, SDNode *Parent,
1969  SDValue N, SDValue &Base,
1970  SDValue &Scale, SDValue &Index,
1971  SDValue &Disp, SDValue &Segment,
1972  SDValue &PatternNodeWithChain) {
1973  if (!hasSingleUsesFromRoot(Root, Parent))
1974  return false;
1975 
1976  // We can allow a full vector load here since narrowing a load is ok.
1977  if (ISD::isNON_EXTLoad(N.getNode())) {
1978  PatternNodeWithChain = N;
1979  if (IsProfitableToFold(PatternNodeWithChain, N.getNode(), Root) &&
1980  IsLegalToFold(PatternNodeWithChain, Parent, Root, OptLevel)) {
1981  LoadSDNode *LD = cast<LoadSDNode>(PatternNodeWithChain);
1982  return selectAddr(LD, LD->getBasePtr(), Base, Scale, Index, Disp,
1983  Segment);
1984  }
1985  }
1986 
1987  // We can also match the special zero extended load opcode.
1988  if (N.getOpcode() == X86ISD::VZEXT_LOAD) {
1989  PatternNodeWithChain = N;
1990  if (IsProfitableToFold(PatternNodeWithChain, N.getNode(), Root) &&
1991  IsLegalToFold(PatternNodeWithChain, Parent, Root, OptLevel)) {
1992  auto *MI = cast<MemIntrinsicSDNode>(PatternNodeWithChain);
1993  return selectAddr(MI, MI->getBasePtr(), Base, Scale, Index, Disp,
1994  Segment);
1995  }
1996  }
1997 
1998  // Need to make sure that the SCALAR_TO_VECTOR and load are both only used
1999  // once. Otherwise the load might get duplicated and the chain output of the
2000  // duplicate load will not be observed by all dependencies.
2001  if (N.getOpcode() == ISD::SCALAR_TO_VECTOR && N.getNode()->hasOneUse()) {
2002  PatternNodeWithChain = N.getOperand(0);
2003  if (ISD::isNON_EXTLoad(PatternNodeWithChain.getNode()) &&
2004  IsProfitableToFold(PatternNodeWithChain, N.getNode(), Root) &&
2005  IsLegalToFold(PatternNodeWithChain, N.getNode(), Root, OptLevel)) {
2006  LoadSDNode *LD = cast<LoadSDNode>(PatternNodeWithChain);
2007  return selectAddr(LD, LD->getBasePtr(), Base, Scale, Index, Disp,
2008  Segment);
2009  }
2010  }
2011 
2012  // Also handle the case where we explicitly require zeros in the top
2013  // elements. This is a vector shuffle from the zero vector.
2014  if (N.getOpcode() == X86ISD::VZEXT_MOVL && N.getNode()->hasOneUse() &&
2015  // Check to see if the top elements are all zeros (or bitcast of zeros).
2017  N.getOperand(0).getNode()->hasOneUse()) {
2018  PatternNodeWithChain = N.getOperand(0).getOperand(0);
2019  if (ISD::isNON_EXTLoad(PatternNodeWithChain.getNode()) &&
2020  IsProfitableToFold(PatternNodeWithChain, N.getNode(), Root) &&
2021  IsLegalToFold(PatternNodeWithChain, N.getNode(), Root, OptLevel)) {
2022  // Okay, this is a zero extending load. Fold it.
2023  LoadSDNode *LD = cast<LoadSDNode>(PatternNodeWithChain);
2024  return selectAddr(LD, LD->getBasePtr(), Base, Scale, Index, Disp,
2025  Segment);
2026  }
2027  }
2028 
2029  return false;
2030 }
2031 
2032 
2033 bool X86DAGToDAGISel::selectMOV64Imm32(SDValue N, SDValue &Imm) {
2034  if (const ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) {
2035  uint64_t ImmVal = CN->getZExtValue();
2036  if (!isUInt<32>(ImmVal))
2037  return false;
2038 
2039  Imm = CurDAG->getTargetConstant(ImmVal, SDLoc(N), MVT::i64);
2040  return true;
2041  }
2042 
2043  // In static codegen with small code model, we can get the address of a label
2044  // into a register with 'movl'
2045  if (N->getOpcode() != X86ISD::Wrapper)
2046  return false;
2047 
2048  N = N.getOperand(0);
2049 
2050  // At least GNU as does not accept 'movl' for TPOFF relocations.
2051  // FIXME: We could use 'movl' when we know we are targeting MC.
2053  return false;
2054 
2055  Imm = N;
2056  if (N->getOpcode() != ISD::TargetGlobalAddress)
2057  return TM.getCodeModel() == CodeModel::Small;
2058 
2060  cast<GlobalAddressSDNode>(N)->getGlobal()->getAbsoluteSymbolRange();
2061  if (!CR)
2062  return TM.getCodeModel() == CodeModel::Small;
2063 
2064  return CR->getUnsignedMax().ult(1ull << 32);
2065 }
2066 
2067 bool X86DAGToDAGISel::selectLEA64_32Addr(SDValue N, SDValue &Base,
2068  SDValue &Scale, SDValue &Index,
2069  SDValue &Disp, SDValue &Segment) {
2070  // Save the debug loc before calling selectLEAAddr, in case it invalidates N.
2071  SDLoc DL(N);
2072 
2073  if (!selectLEAAddr(N, Base, Scale, Index, Disp, Segment))
2074  return false;
2075 
2077  if (RN && RN->getReg() == 0)
2078  Base = CurDAG->getRegister(0, MVT::i64);
2079  else if (Base.getValueType() == MVT::i32 && !dyn_cast<FrameIndexSDNode>(Base)) {
2080  // Base could already be %rip, particularly in the x32 ABI.
2081  Base = SDValue(CurDAG->getMachineNode(
2082  TargetOpcode::SUBREG_TO_REG, DL, MVT::i64,
2083  CurDAG->getTargetConstant(0, DL, MVT::i64),
2084  Base,
2085  CurDAG->getTargetConstant(X86::sub_32bit, DL, MVT::i32)),
2086  0);
2087  }
2088 
2089  RN = dyn_cast<RegisterSDNode>(Index);
2090  if (RN && RN->getReg() == 0)
2091  Index = CurDAG->getRegister(0, MVT::i64);
2092  else {
2093  assert(Index.getValueType() == MVT::i32 &&
2094  "Expect to be extending 32-bit registers for use in LEA");
2095  Index = SDValue(CurDAG->getMachineNode(
2096  TargetOpcode::SUBREG_TO_REG, DL, MVT::i64,
2097  CurDAG->getTargetConstant(0, DL, MVT::i64),
2098  Index,
2099  CurDAG->getTargetConstant(X86::sub_32bit, DL,
2100  MVT::i32)),
2101  0);
2102  }
2103 
2104  return true;
2105 }
2106 
2107 /// Calls SelectAddr and determines if the maximal addressing
2108 /// mode it matches can be cost effectively emitted as an LEA instruction.
2109 bool X86DAGToDAGISel::selectLEAAddr(SDValue N,
2110  SDValue &Base, SDValue &Scale,
2111  SDValue &Index, SDValue &Disp,
2112  SDValue &Segment) {
2113  X86ISelAddressMode AM;
2114 
2115  // Save the DL and VT before calling matchAddress, it can invalidate N.
2116  SDLoc DL(N);
2117  MVT VT = N.getSimpleValueType();
2118 
2119  // Set AM.Segment to prevent MatchAddress from using one. LEA doesn't support
2120  // segments.
2121  SDValue Copy = AM.Segment;
2122  SDValue T = CurDAG->getRegister(0, MVT::i32);
2123  AM.Segment = T;
2124  if (matchAddress(N, AM))
2125  return false;
2126  assert (T == AM.Segment);
2127  AM.Segment = Copy;
2128 
2129  unsigned Complexity = 0;
2130  if (AM.BaseType == X86ISelAddressMode::RegBase)
2131  if (AM.Base_Reg.getNode())
2132  Complexity = 1;
2133  else
2134  AM.Base_Reg = CurDAG->getRegister(0, VT);
2135  else if (AM.BaseType == X86ISelAddressMode::FrameIndexBase)
2136  Complexity = 4;
2137 
2138  if (AM.IndexReg.getNode())
2139  Complexity++;
2140  else
2141  AM.IndexReg = CurDAG->getRegister(0, VT);
2142 
2143  // Don't match just leal(,%reg,2). It's cheaper to do addl %reg, %reg, or with
2144  // a simple shift.
2145  if (AM.Scale > 1)
2146  Complexity++;
2147 
2148  // FIXME: We are artificially lowering the criteria to turn ADD %reg, $GA
2149  // to a LEA. This is determined with some experimentation but is by no means
2150  // optimal (especially for code size consideration). LEA is nice because of
2151  // its three-address nature. Tweak the cost function again when we can run
2152  // convertToThreeAddress() at register allocation time.
2153  if (AM.hasSymbolicDisplacement()) {
2154  // For X86-64, always use LEA to materialize RIP-relative addresses.
2155  if (Subtarget->is64Bit())
2156  Complexity = 4;
2157  else
2158  Complexity += 2;
2159  }
2160 
2161  if (AM.Disp && (AM.Base_Reg.getNode() || AM.IndexReg.getNode()))
2162  Complexity++;
2163 
2164  // If it isn't worth using an LEA, reject it.
2165  if (Complexity <= 2)
2166  return false;
2167 
2168  getAddressOperands(AM, DL, Base, Scale, Index, Disp, Segment);
2169  return true;
2170 }
2171 
2172 /// This is only run on TargetGlobalTLSAddress nodes.
2173 bool X86DAGToDAGISel::selectTLSADDRAddr(SDValue N, SDValue &Base,
2174  SDValue &Scale, SDValue &Index,
2175  SDValue &Disp, SDValue &Segment) {
2177  const GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(N);
2178 
2179  X86ISelAddressMode AM;
2180  AM.GV = GA->getGlobal();
2181  AM.Disp += GA->getOffset();
2182  AM.Base_Reg = CurDAG->getRegister(0, N.getValueType());
2183  AM.SymbolFlags = GA->getTargetFlags();
2184 
2185  if (N.getValueType() == MVT::i32) {
2186  AM.Scale = 1;
2187  AM.IndexReg = CurDAG->getRegister(X86::EBX, MVT::i32);
2188  } else {
2189  AM.IndexReg = CurDAG->getRegister(0, MVT::i64);
2190  }
2191 
2192  getAddressOperands(AM, SDLoc(N), Base, Scale, Index, Disp, Segment);
2193  return true;
2194 }
2195 
2196 bool X86DAGToDAGISel::selectRelocImm(SDValue N, SDValue &Op) {
2197  if (auto *CN = dyn_cast<ConstantSDNode>(N)) {
2198  Op = CurDAG->getTargetConstant(CN->getAPIntValue(), SDLoc(CN),
2199  N.getValueType());
2200  return true;
2201  }
2202 
2203  // Keep track of the original value type and whether this value was
2204  // truncated. If we see a truncation from pointer type to VT that truncates
2205  // bits that are known to be zero, we can use a narrow reference.
2206  EVT VT = N.getValueType();
2207  bool WasTruncated = false;
2208  if (N.getOpcode() == ISD::TRUNCATE) {
2209  WasTruncated = true;
2210  N = N.getOperand(0);
2211  }
2212 
2213  if (N.getOpcode() != X86ISD::Wrapper)
2214  return false;
2215 
2216  // We can only use non-GlobalValues as immediates if they were not truncated,
2217  // as we do not have any range information. If we have a GlobalValue and the
2218  // address was not truncated, we can select it as an operand directly.
2219  unsigned Opc = N.getOperand(0)->getOpcode();
2220  if (Opc != ISD::TargetGlobalAddress || !WasTruncated) {
2221  Op = N.getOperand(0);
2222  // We can only select the operand directly if we didn't have to look past a
2223  // truncate.
2224  return !WasTruncated;
2225  }
2226 
2227  // Check that the global's range fits into VT.
2228  auto *GA = cast<GlobalAddressSDNode>(N.getOperand(0));
2229  Optional<ConstantRange> CR = GA->getGlobal()->getAbsoluteSymbolRange();
2230  if (!CR || CR->getUnsignedMax().uge(1ull << VT.getSizeInBits()))
2231  return false;
2232 
2233  // Okay, we can use a narrow reference.
2234  Op = CurDAG->getTargetGlobalAddress(GA->getGlobal(), SDLoc(N), VT,
2235  GA->getOffset(), GA->getTargetFlags());
2236  return true;
2237 }
2238 
2239 bool X86DAGToDAGISel::tryFoldLoad(SDNode *Root, SDNode *P, SDValue N,
2240  SDValue &Base, SDValue &Scale,
2241  SDValue &Index, SDValue &Disp,
2242  SDValue &Segment) {
2243  if (!ISD::isNON_EXTLoad(N.getNode()) ||
2244  !IsProfitableToFold(N, P, Root) ||
2245  !IsLegalToFold(N, P, Root, OptLevel))
2246  return false;
2247 
2248  return selectAddr(N.getNode(),
2249  N.getOperand(1), Base, Scale, Index, Disp, Segment);
2250 }
2251 
2252 /// Return an SDNode that returns the value of the global base register.
2253 /// Output instructions required to initialize the global base register,
2254 /// if necessary.
2255 SDNode *X86DAGToDAGISel::getGlobalBaseReg() {
2256  unsigned GlobalBaseReg = getInstrInfo()->getGlobalBaseReg(MF);
2257  auto &DL = MF->getDataLayout();
2258  return CurDAG->getRegister(GlobalBaseReg, TLI->getPointerTy(DL)).getNode();
2259 }
2260 
2261 bool X86DAGToDAGISel::isSExtAbsoluteSymbolRef(unsigned Width, SDNode *N) const {
2262  if (N->getOpcode() == ISD::TRUNCATE)
2263  N = N->getOperand(0).getNode();
2264  if (N->getOpcode() != X86ISD::Wrapper)
2265  return false;
2266 
2267  auto *GA = dyn_cast<GlobalAddressSDNode>(N->getOperand(0));
2268  if (!GA)
2269  return false;
2270 
2271  Optional<ConstantRange> CR = GA->getGlobal()->getAbsoluteSymbolRange();
2272  return CR && CR->getSignedMin().sge(-1ull << Width) &&
2273  CR->getSignedMax().slt(1ull << Width);
2274 }
2275 
2276 static X86::CondCode getCondFromOpc(unsigned Opc) {
2278  if (CC == X86::COND_INVALID)
2279  CC = X86::getCondFromBranchOpc(Opc);
2280  if (CC == X86::COND_INVALID)
2281  CC = X86::getCondFromSETOpc(Opc);
2282  if (CC == X86::COND_INVALID)
2283  CC = X86::getCondFromCMovOpc(Opc);
2284 
2285  return CC;
2286 }
2287 
2288 /// Test whether the given X86ISD::CMP node has any users that use a flag
2289 /// other than ZF.
2290 bool X86DAGToDAGISel::onlyUsesZeroFlag(SDValue Flags) const {
2291  // Examine each user of the node.
2292  for (SDNode::use_iterator UI = Flags->use_begin(), UE = Flags->use_end();
2293  UI != UE; ++UI) {
2294  // Only check things that use the flags.
2295  if (UI.getUse().getResNo() != Flags.getResNo())
2296  continue;
2297  // Only examine CopyToReg uses that copy to EFLAGS.
2298  if (UI->getOpcode() != ISD::CopyToReg ||
2299  cast<RegisterSDNode>(UI->getOperand(1))->getReg() != X86::EFLAGS)
2300  return false;
2301  // Examine each user of the CopyToReg use.
2302  for (SDNode::use_iterator FlagUI = UI->use_begin(),
2303  FlagUE = UI->use_end(); FlagUI != FlagUE; ++FlagUI) {
2304  // Only examine the Flag result.
2305  if (FlagUI.getUse().getResNo() != 1) continue;
2306  // Anything unusual: assume conservatively.
2307  if (!FlagUI->isMachineOpcode()) return false;
2308  // Examine the condition code of the user.
2309  X86::CondCode CC = getCondFromOpc(FlagUI->getMachineOpcode());
2310 
2311  switch (CC) {
2312  // Comparisons which only use the zero flag.
2313  case X86::COND_E: case X86::COND_NE:
2314  continue;
2315  // Anything else: assume conservatively.
2316  default:
2317  return false;
2318  }
2319  }
2320  }
2321  return true;
2322 }
2323 
2324 /// Test whether the given X86ISD::CMP node has any uses which require the SF
2325 /// flag to be accurate.
2326 bool X86DAGToDAGISel::hasNoSignFlagUses(SDValue Flags) const {
2327  // Examine each user of the node.
2328  for (SDNode::use_iterator UI = Flags->use_begin(), UE = Flags->use_end();
2329  UI != UE; ++UI) {
2330  // Only check things that use the flags.
2331  if (UI.getUse().getResNo() != Flags.getResNo())
2332  continue;
2333  // Only examine CopyToReg uses that copy to EFLAGS.
2334  if (UI->getOpcode() != ISD::CopyToReg ||
2335  cast<RegisterSDNode>(UI->getOperand(1))->getReg() != X86::EFLAGS)
2336  return false;
2337  // Examine each user of the CopyToReg use.
2338  for (SDNode::use_iterator FlagUI = UI->use_begin(),
2339  FlagUE = UI->use_end(); FlagUI != FlagUE; ++FlagUI) {
2340  // Only examine the Flag result.
2341  if (FlagUI.getUse().getResNo() != 1) continue;
2342  // Anything unusual: assume conservatively.
2343  if (!FlagUI->isMachineOpcode()) return false;
2344  // Examine the condition code of the user.
2345  X86::CondCode CC = getCondFromOpc(FlagUI->getMachineOpcode());
2346 
2347  switch (CC) {
2348  // Comparisons which don't examine the SF flag.
2349  case X86::COND_A: case X86::COND_AE:
2350  case X86::COND_B: case X86::COND_BE:
2351  case X86::COND_E: case X86::COND_NE:
2352  case X86::COND_O: case X86::COND_NO:
2353  case X86::COND_P: case X86::COND_NP:
2354  continue;
2355  // Anything else: assume conservatively.
2356  default:
2357  return false;
2358  }
2359  }
2360  }
2361  return true;
2362 }
2363 
2365  switch (CC) {
2366  // Comparisons which don't examine the CF flag.
2367  case X86::COND_O: case X86::COND_NO:
2368  case X86::COND_E: case X86::COND_NE:
2369  case X86::COND_S: case X86::COND_NS:
2370  case X86::COND_P: case X86::COND_NP:
2371  case X86::COND_L: case X86::COND_GE:
2372  case X86::COND_G: case X86::COND_LE:
2373  return false;
2374  // Anything else: assume conservatively.
2375  default:
2376  return true;
2377  }
2378 }
2379 
2380 /// Test whether the given node which sets flags has any uses which require the
2381 /// CF flag to be accurate.
2382  bool X86DAGToDAGISel::hasNoCarryFlagUses(SDValue Flags) const {
2383  // Examine each user of the node.
2384  for (SDNode::use_iterator UI = Flags->use_begin(), UE = Flags->use_end();
2385  UI != UE; ++UI) {
2386  // Only check things that use the flags.
2387  if (UI.getUse().getResNo() != Flags.getResNo())
2388  continue;
2389 
2390  unsigned UIOpc = UI->getOpcode();
2391 
2392  if (UIOpc == ISD::CopyToReg) {
2393  // Only examine CopyToReg uses that copy to EFLAGS.
2394  if (cast<RegisterSDNode>(UI->getOperand(1))->getReg() != X86::EFLAGS)
2395  return false;
2396  // Examine each user of the CopyToReg use.
2397  for (SDNode::use_iterator FlagUI = UI->use_begin(), FlagUE = UI->use_end();
2398  FlagUI != FlagUE; ++FlagUI) {
2399  // Only examine the Flag result.
2400  if (FlagUI.getUse().getResNo() != 1)
2401  continue;
2402  // Anything unusual: assume conservatively.
2403  if (!FlagUI->isMachineOpcode())
2404  return false;
2405  // Examine the condition code of the user.
2406  X86::CondCode CC = getCondFromOpc(FlagUI->getMachineOpcode());
2407 
2408  if (mayUseCarryFlag(CC))
2409  return false;
2410  }
2411 
2412  // This CopyToReg is ok. Move on to the next user.
2413  continue;
2414  }
2415 
2416  // This might be an unselected node. So look for the pre-isel opcodes that
2417  // use flags.
2418  unsigned CCOpNo;
2419  switch (UIOpc) {
2420  default:
2421  // Something unusual. Be conservative.
2422  return false;
2423  case X86ISD::SETCC: CCOpNo = 0; break;
2424  case X86ISD::SETCC_CARRY: CCOpNo = 0; break;
2425  case X86ISD::CMOV: CCOpNo = 2; break;
2426  case X86ISD::BRCOND: CCOpNo = 2; break;
2427  }
2428 
2429  X86::CondCode CC = (X86::CondCode)UI->getConstantOperandVal(CCOpNo);
2430  if (mayUseCarryFlag(CC))
2431  return false;
2432  }
2433  return true;
2434 }
2435 
2436 /// Check whether or not the chain ending in StoreNode is suitable for doing
2437 /// the {load; op; store} to modify transformation.
2439  SDValue StoredVal, SelectionDAG *CurDAG,
2440  unsigned LoadOpNo,
2441  LoadSDNode *&LoadNode,
2442  SDValue &InputChain) {
2443  // Is the stored value result 0 of the operation?
2444  if (StoredVal.getResNo() != 0) return false;
2445 
2446  // Are there other uses of the operation other than the store?
2447  if (!StoredVal.getNode()->hasNUsesOfValue(1, 0)) return false;
2448 
2449  // Is the store non-extending and non-indexed?
2450  if (!ISD::isNormalStore(StoreNode) || StoreNode->isNonTemporal())
2451  return false;
2452 
2453  SDValue Load = StoredVal->getOperand(LoadOpNo);
2454  // Is the stored value a non-extending and non-indexed load?
2455  if (!ISD::isNormalLoad(Load.getNode())) return false;
2456 
2457  // Return LoadNode by reference.
2458  LoadNode = cast<LoadSDNode>(Load);
2459 
2460  // Is store the only read of the loaded value?
2461  if (!Load.hasOneUse())
2462  return false;
2463 
2464  // Is the address of the store the same as the load?
2465  if (LoadNode->getBasePtr() != StoreNode->getBasePtr() ||
2466  LoadNode->getOffset() != StoreNode->getOffset())
2467  return false;
2468 
2469  bool FoundLoad = false;
2470  SmallVector<SDValue, 4> ChainOps;
2471  SmallVector<const SDNode *, 4> LoopWorklist;
2473  const unsigned int Max = 1024;
2474 
2475  // Visualization of Load-Op-Store fusion:
2476  // -------------------------
2477  // Legend:
2478  // *-lines = Chain operand dependencies.
2479  // |-lines = Normal operand dependencies.
2480  // Dependencies flow down and right. n-suffix references multiple nodes.
2481  //
2482  // C Xn C
2483  // * * *
2484  // * * *
2485  // Xn A-LD Yn TF Yn
2486  // * * \ | * |
2487  // * * \ | * |
2488  // * * \ | => A--LD_OP_ST
2489  // * * \| \
2490  // TF OP \
2491  // * | \ Zn
2492  // * | \
2493  // A-ST Zn
2494  //
2495 
2496  // This merge induced dependences from: #1: Xn -> LD, OP, Zn
2497  // #2: Yn -> LD
2498  // #3: ST -> Zn
2499 
2500  // Ensure the transform is safe by checking for the dual
2501  // dependencies to make sure we do not induce a loop.
2502 
2503  // As LD is a predecessor to both OP and ST we can do this by checking:
2504  // a). if LD is a predecessor to a member of Xn or Yn.
2505  // b). if a Zn is a predecessor to ST.
2506 
2507  // However, (b) can only occur through being a chain predecessor to
2508  // ST, which is the same as Zn being a member or predecessor of Xn,
2509  // which is a subset of LD being a predecessor of Xn. So it's
2510  // subsumed by check (a).
2511 
2512  SDValue Chain = StoreNode->getChain();
2513 
2514  // Gather X elements in ChainOps.
2515  if (Chain == Load.getValue(1)) {
2516  FoundLoad = true;
2517  ChainOps.push_back(Load.getOperand(0));
2518  } else if (Chain.getOpcode() == ISD::TokenFactor) {
2519  for (unsigned i = 0, e = Chain.getNumOperands(); i != e; ++i) {
2520  SDValue Op = Chain.getOperand(i);
2521  if (Op == Load.getValue(1)) {
2522  FoundLoad = true;
2523  // Drop Load, but keep its chain. No cycle check necessary.
2524  ChainOps.push_back(Load.getOperand(0));
2525  continue;
2526  }
2527  LoopWorklist.push_back(Op.getNode());
2528  ChainOps.push_back(Op);
2529  }
2530  }
2531 
2532  if (!FoundLoad)
2533  return false;
2534 
2535  // Worklist is currently Xn. Add Yn to worklist.
2536  for (SDValue Op : StoredVal->ops())
2537  if (Op.getNode() != LoadNode)
2538  LoopWorklist.push_back(Op.getNode());
2539 
2540  // Check (a) if Load is a predecessor to Xn + Yn
2541  if (SDNode::hasPredecessorHelper(Load.getNode(), Visited, LoopWorklist, Max,
2542  true))
2543  return false;
2544 
2545  InputChain =
2546  CurDAG->getNode(ISD::TokenFactor, SDLoc(Chain), MVT::Other, ChainOps);
2547  return true;
2548 }
2549 
2550 // Change a chain of {load; op; store} of the same value into a simple op
2551 // through memory of that value, if the uses of the modified value and its
2552 // address are suitable.
2553 //
2554 // The tablegen pattern memory operand pattern is currently not able to match
2555 // the case where the EFLAGS on the original operation are used.
2556 //
2557 // To move this to tablegen, we'll need to improve tablegen to allow flags to
2558 // be transferred from a node in the pattern to the result node, probably with
2559 // a new keyword. For example, we have this
2560 // def DEC64m : RI<0xFF, MRM1m, (outs), (ins i64mem:$dst), "dec{q}\t$dst",
2561 // [(store (add (loadi64 addr:$dst), -1), addr:$dst),
2562 // (implicit EFLAGS)]>;
2563 // but maybe need something like this
2564 // def DEC64m : RI<0xFF, MRM1m, (outs), (ins i64mem:$dst), "dec{q}\t$dst",
2565 // [(store (add (loadi64 addr:$dst), -1), addr:$dst),
2566 // (transferrable EFLAGS)]>;
2567 //
2568 // Until then, we manually fold these and instruction select the operation
2569 // here.
2570 bool X86DAGToDAGISel::foldLoadStoreIntoMemOperand(SDNode *Node) {
2571  StoreSDNode *StoreNode = cast<StoreSDNode>(Node);
2572  SDValue StoredVal = StoreNode->getOperand(1);
2573  unsigned Opc = StoredVal->getOpcode();
2574 
2575  // Before we try to select anything, make sure this is memory operand size
2576  // and opcode we can handle. Note that this must match the code below that
2577  // actually lowers the opcodes.
2578  EVT MemVT = StoreNode->getMemoryVT();
2579  if (MemVT != MVT::i64 && MemVT != MVT::i32 && MemVT != MVT::i16 &&
2580  MemVT != MVT::i8)
2581  return false;
2582 
2583  bool IsCommutable = false;
2584  switch (Opc) {
2585  default:
2586  return false;
2587  case X86ISD::SUB:
2588  case X86ISD::SBB:
2589  break;
2590  case X86ISD::ADD:
2591  case X86ISD::ADC:
2592  case X86ISD::AND:
2593  case X86ISD::OR:
2594  case X86ISD::XOR:
2595  IsCommutable = true;
2596  break;
2597  }
2598 
2599  unsigned LoadOpNo = 0;
2600  LoadSDNode *LoadNode = nullptr;
2601  SDValue InputChain;
2602  if (!isFusableLoadOpStorePattern(StoreNode, StoredVal, CurDAG, LoadOpNo,
2603  LoadNode, InputChain)) {
2604  if (!IsCommutable)
2605  return false;
2606 
2607  // This operation is commutable, try the other operand.
2608  LoadOpNo = 1;
2609  if (!isFusableLoadOpStorePattern(StoreNode, StoredVal, CurDAG, LoadOpNo,
2610  LoadNode, InputChain))
2611  return false;
2612  }
2613 
2614  SDValue Base, Scale, Index, Disp, Segment;
2615  if (!selectAddr(LoadNode, LoadNode->getBasePtr(), Base, Scale, Index, Disp,
2616  Segment))
2617  return false;
2618 
2619  auto SelectOpcode = [&](unsigned Opc64, unsigned Opc32, unsigned Opc16,
2620  unsigned Opc8) {
2621  switch (MemVT.getSimpleVT().SimpleTy) {
2622  case MVT::i64:
2623  return Opc64;
2624  case MVT::i32:
2625  return Opc32;
2626  case MVT::i16:
2627  return Opc16;
2628  case MVT::i8:
2629  return Opc8;
2630  default:
2631  llvm_unreachable("Invalid size!");
2632  }
2633  };
2634 
2635  MachineSDNode *Result;
2636  switch (Opc) {
2637  case X86ISD::ADD:
2638  case X86ISD::SUB:
2639  // Try to match inc/dec.
2640  if (!Subtarget->slowIncDec() ||
2641  CurDAG->getMachineFunction().getFunction().optForSize()) {
2642  bool IsOne = isOneConstant(StoredVal.getOperand(1));
2643  bool IsNegOne = isAllOnesConstant(StoredVal.getOperand(1));
2644  // ADD/SUB with 1/-1 and carry flag isn't used can use inc/dec.
2645  if ((IsOne || IsNegOne) && hasNoCarryFlagUses(StoredVal.getValue(1))) {
2646  unsigned NewOpc =
2647  ((Opc == X86ISD::ADD) == IsOne)
2648  ? SelectOpcode(X86::INC64m, X86::INC32m, X86::INC16m, X86::INC8m)
2649  : SelectOpcode(X86::DEC64m, X86::DEC32m, X86::DEC16m, X86::DEC8m);
2650  const SDValue Ops[] = {Base, Scale, Index, Disp, Segment, InputChain};
2651  Result = CurDAG->getMachineNode(NewOpc, SDLoc(Node), MVT::i32,
2652  MVT::Other, Ops);
2653  break;
2654  }
2655  }
2657  case X86ISD::ADC:
2658  case X86ISD::SBB:
2659  case X86ISD::AND:
2660  case X86ISD::OR:
2661  case X86ISD::XOR: {
2662  auto SelectRegOpcode = [SelectOpcode](unsigned Opc) {
2663  switch (Opc) {
2664  case X86ISD::ADD:
2665  return SelectOpcode(X86::ADD64mr, X86::ADD32mr, X86::ADD16mr,
2666  X86::ADD8mr);
2667  case X86ISD::ADC:
2668  return SelectOpcode(X86::ADC64mr, X86::ADC32mr, X86::ADC16mr,
2669  X86::ADC8mr);
2670  case X86ISD::SUB:
2671  return SelectOpcode(X86::SUB64mr, X86::SUB32mr, X86::SUB16mr,
2672  X86::SUB8mr);
2673  case X86ISD::SBB:
2674  return SelectOpcode(X86::SBB64mr, X86::SBB32mr, X86::SBB16mr,
2675  X86::SBB8mr);
2676  case X86ISD::AND:
2677  return SelectOpcode(X86::AND64mr, X86::AND32mr, X86::AND16mr,
2678  X86::AND8mr);
2679  case X86ISD::OR:
2680  return SelectOpcode(X86::OR64mr, X86::OR32mr, X86::OR16mr, X86::OR8mr);
2681  case X86ISD::XOR:
2682  return SelectOpcode(X86::XOR64mr, X86::XOR32mr, X86::XOR16mr,
2683  X86::XOR8mr);
2684  default:
2685  llvm_unreachable("Invalid opcode!");
2686  }
2687  };
2688  auto SelectImm8Opcode = [SelectOpcode](unsigned Opc) {
2689  switch (Opc) {
2690  case X86ISD::ADD:
2691  return SelectOpcode(X86::ADD64mi8, X86::ADD32mi8, X86::ADD16mi8, 0);
2692  case X86ISD::ADC:
2693  return SelectOpcode(X86::ADC64mi8, X86::ADC32mi8, X86::ADC16mi8, 0);
2694  case X86ISD::SUB:
2695  return SelectOpcode(X86::SUB64mi8, X86::SUB32mi8, X86::SUB16mi8, 0);
2696  case X86ISD::SBB:
2697  return SelectOpcode(X86::SBB64mi8, X86::SBB32mi8, X86::SBB16mi8, 0);
2698  case X86ISD::AND:
2699  return SelectOpcode(X86::AND64mi8, X86::AND32mi8, X86::AND16mi8, 0);
2700  case X86ISD::OR:
2701  return SelectOpcode(X86::OR64mi8, X86::OR32mi8, X86::OR16mi8, 0);
2702  case X86ISD::XOR:
2703  return SelectOpcode(X86::XOR64mi8, X86::XOR32mi8, X86::XOR16mi8, 0);
2704  default:
2705  llvm_unreachable("Invalid opcode!");
2706  }
2707  };
2708  auto SelectImmOpcode = [SelectOpcode](unsigned Opc) {
2709  switch (Opc) {
2710  case X86ISD::ADD:
2711  return SelectOpcode(X86::ADD64mi32, X86::ADD32mi, X86::ADD16mi,
2712  X86::ADD8mi);
2713  case X86ISD::ADC:
2714  return SelectOpcode(X86::ADC64mi32, X86::ADC32mi, X86::ADC16mi,
2715  X86::ADC8mi);
2716  case X86ISD::SUB:
2717  return SelectOpcode(X86::SUB64mi32, X86::SUB32mi, X86::SUB16mi,
2718  X86::SUB8mi);
2719  case X86ISD::SBB:
2720  return SelectOpcode(X86::SBB64mi32, X86::SBB32mi, X86::SBB16mi,
2721  X86::SBB8mi);
2722  case X86ISD::AND:
2723  return SelectOpcode(X86::AND64mi32, X86::AND32mi, X86::AND16mi,
2724  X86::AND8mi);
2725  case X86ISD::OR:
2726  return SelectOpcode(X86::OR64mi32, X86::OR32mi, X86::OR16mi,
2727  X86::OR8mi);
2728  case X86ISD::XOR:
2729  return SelectOpcode(X86::XOR64mi32, X86::XOR32mi, X86::XOR16mi,
2730  X86::XOR8mi);
2731  default:
2732  llvm_unreachable("Invalid opcode!");
2733  }
2734  };
2735 
2736  unsigned NewOpc = SelectRegOpcode(Opc);
2737  SDValue Operand = StoredVal->getOperand(1-LoadOpNo);
2738 
2739  // See if the operand is a constant that we can fold into an immediate
2740  // operand.
2741  if (auto *OperandC = dyn_cast<ConstantSDNode>(Operand)) {
2742  auto OperandV = OperandC->getAPIntValue();
2743 
2744  // Check if we can shrink the operand enough to fit in an immediate (or
2745  // fit into a smaller immediate) by negating it and switching the
2746  // operation.
2747  if ((Opc == X86ISD::ADD || Opc == X86ISD::SUB) &&
2748  ((MemVT != MVT::i8 && OperandV.getMinSignedBits() > 8 &&
2749  (-OperandV).getMinSignedBits() <= 8) ||
2750  (MemVT == MVT::i64 && OperandV.getMinSignedBits() > 32 &&
2751  (-OperandV).getMinSignedBits() <= 32)) &&
2752  hasNoCarryFlagUses(StoredVal.getValue(1))) {
2753  OperandV = -OperandV;
2754  Opc = Opc == X86ISD::ADD ? X86ISD::SUB : X86ISD::ADD;
2755  }
2756 
2757  // First try to fit this into an Imm8 operand. If it doesn't fit, then try
2758  // the larger immediate operand.
2759  if (MemVT != MVT::i8 && OperandV.getMinSignedBits() <= 8) {
2760  Operand = CurDAG->getTargetConstant(OperandV, SDLoc(Node), MemVT);
2761  NewOpc = SelectImm8Opcode(Opc);
2762  } else if (OperandV.getActiveBits() <= MemVT.getSizeInBits() &&
2763  (MemVT != MVT::i64 || OperandV.getMinSignedBits() <= 32)) {
2764  Operand = CurDAG->getTargetConstant(OperandV, SDLoc(Node), MemVT);
2765  NewOpc = SelectImmOpcode(Opc);
2766  }
2767  }
2768 
2769  if (Opc == X86ISD::ADC || Opc == X86ISD::SBB) {
2770  SDValue CopyTo =
2771  CurDAG->getCopyToReg(InputChain, SDLoc(Node), X86::EFLAGS,
2772  StoredVal.getOperand(2), SDValue());
2773 
2774  const SDValue Ops[] = {Base, Scale, Index, Disp,
2775  Segment, Operand, CopyTo, CopyTo.getValue(1)};
2776  Result = CurDAG->getMachineNode(NewOpc, SDLoc(Node), MVT::i32, MVT::Other,
2777  Ops);
2778  } else {
2779  const SDValue Ops[] = {Base, Scale, Index, Disp,
2780  Segment, Operand, InputChain};
2781  Result = CurDAG->getMachineNode(NewOpc, SDLoc(Node), MVT::i32, MVT::Other,
2782  Ops);
2783  }
2784  break;
2785  }
2786  default:
2787  llvm_unreachable("Invalid opcode!");
2788  }
2789 
2790  MachineMemOperand *MemOps[] = {StoreNode->getMemOperand(),
2791  LoadNode->getMemOperand()};
2792  CurDAG->setNodeMemRefs(Result, MemOps);
2793 
2794  // Update Load Chain uses as well.
2795  ReplaceUses(SDValue(LoadNode, 1), SDValue(Result, 1));
2796  ReplaceUses(SDValue(StoreNode, 0), SDValue(Result, 1));
2797  ReplaceUses(SDValue(StoredVal.getNode(), 1), SDValue(Result, 0));
2798  CurDAG->RemoveDeadNode(Node);
2799  return true;
2800 }
2801 
2802 // See if this is an X & Mask that we can match to BEXTR/BZHI.
2803 // Where Mask is one of the following patterns:
2804 // a) x & (1 << nbits) - 1
2805 // b) x & ~(-1 << nbits)
2806 // c) x & (-1 >> (32 - y))
2807 // d) x << (32 - y) >> (32 - y)
2808 bool X86DAGToDAGISel::matchBitExtract(SDNode *Node) {
2809  assert(
2810  (Node->getOpcode() == ISD::AND || Node->getOpcode() == ISD::SRL) &&
2811  "Should be either an and-mask, or right-shift after clearing high bits.");
2812 
2813  // BEXTR is BMI instruction, BZHI is BMI2 instruction. We need at least one.
2814  if (!Subtarget->hasBMI() && !Subtarget->hasBMI2())
2815  return false;
2816 
2817  MVT NVT = Node->getSimpleValueType(0);
2818 
2819  // Only supported for 32 and 64 bits.
2820  if (NVT != MVT::i32 && NVT != MVT::i64)
2821  return false;
2822 
2823  unsigned Size = NVT.getSizeInBits();
2824 
2825  SDValue NBits;
2826 
2827  // If we have BMI2's BZHI, we are ok with muti-use patterns.
2828  // Else, if we only have BMI1's BEXTR, we require one-use.
2829  const bool CanHaveExtraUses = Subtarget->hasBMI2();
2830  auto checkUses = [CanHaveExtraUses](SDValue Op, unsigned NUses) {
2831  return CanHaveExtraUses ||
2832  Op.getNode()->hasNUsesOfValue(NUses, Op.getResNo());
2833  };
2834  auto checkOneUse = [checkUses](SDValue Op) { return checkUses(Op, 1); };
2835  auto checkTwoUse = [checkUses](SDValue Op) { return checkUses(Op, 2); };
2836 
2837  // a) x & ((1 << nbits) + (-1))
2838  auto matchPatternA = [&checkOneUse, &NBits](SDValue Mask) -> bool {
2839  // Match `add`. Must only have one use!
2840  if (Mask->getOpcode() != ISD::ADD || !checkOneUse(Mask))
2841  return false;
2842  // We should be adding all-ones constant (i.e. subtracting one.)
2843  if (!isAllOnesConstant(Mask->getOperand(1)))
2844  return false;
2845  // Match `1 << nbits`. Must only have one use!
2846  SDValue M0 = Mask->getOperand(0);
2847  if (M0->getOpcode() != ISD::SHL || !checkOneUse(M0))
2848  return false;
2849  if (!isOneConstant(M0->getOperand(0)))
2850  return false;
2851  NBits = M0->getOperand(1);
2852  return true;
2853  };
2854 
2855  // b) x & ~(-1 << nbits)
2856  auto matchPatternB = [&checkOneUse, &NBits](SDValue Mask) -> bool {
2857  // Match `~()`. Must only have one use!
2858  if (!isBitwiseNot(Mask) || !checkOneUse(Mask))
2859  return false;
2860  // Match `-1 << nbits`. Must only have one use!
2861  SDValue M0 = Mask->getOperand(0);
2862  if (M0->getOpcode() != ISD::SHL || !checkOneUse(M0))
2863  return false;
2864  if (!isAllOnesConstant(M0->getOperand(0)))
2865  return false;
2866  NBits = M0->getOperand(1);
2867  return true;
2868  };
2869 
2870  // Match potentially-truncated (bitwidth - y)
2871  auto matchShiftAmt = [checkOneUse, Size, &NBits](SDValue ShiftAmt) {
2872  // Skip over a truncate of the shift amount.
2873  if (ShiftAmt.getOpcode() == ISD::TRUNCATE) {
2874  ShiftAmt = ShiftAmt.getOperand(0);
2875  // The trunc should have been the only user of the real shift amount.
2876  if (!checkOneUse(ShiftAmt))
2877  return false;
2878  }
2879  // Match the shift amount as: (bitwidth - y). It should go away, too.
2880  if (ShiftAmt.getOpcode() != ISD::SUB)
2881  return false;
2882  auto V0 = dyn_cast<ConstantSDNode>(ShiftAmt.getOperand(0));
2883  if (!V0 || V0->getZExtValue() != Size)
2884  return false;
2885  NBits = ShiftAmt.getOperand(1);
2886  return true;
2887  };
2888 
2889  // c) x & (-1 >> (32 - y))
2890  auto matchPatternC = [&checkOneUse, matchShiftAmt](SDValue Mask) -> bool {
2891  // Match `l>>`. Must only have one use!
2892  if (Mask.getOpcode() != ISD::SRL || !checkOneUse(Mask))
2893  return false;
2894  // We should be shifting all-ones constant.
2895  if (!isAllOnesConstant(Mask.getOperand(0)))
2896  return false;
2897  SDValue M1 = Mask.getOperand(1);
2898  // The shift amount should not be used externally.
2899  if (!checkOneUse(M1))
2900  return false;
2901  return matchShiftAmt(M1);
2902  };
2903 
2904  SDValue X;
2905 
2906  // d) x << (32 - y) >> (32 - y)
2907  auto matchPatternD = [&checkOneUse, &checkTwoUse, matchShiftAmt,
2908  &X](SDNode *Node) -> bool {
2909  if (Node->getOpcode() != ISD::SRL)
2910  return false;
2911  SDValue N0 = Node->getOperand(0);
2912  if (N0->getOpcode() != ISD::SHL || !checkOneUse(N0))
2913  return false;
2914  SDValue N1 = Node->getOperand(1);
2915  SDValue N01 = N0->getOperand(1);
2916  // Both of the shifts must be by the exact same value.
2917  // There should not be any uses of the shift amount outside of the pattern.
2918  if (N1 != N01 || !checkTwoUse(N1))
2919  return false;
2920  if (!matchShiftAmt(N1))
2921  return false;
2922  X = N0->getOperand(0);
2923  return true;
2924  };
2925 
2926  auto matchLowBitMask = [&matchPatternA, &matchPatternB,
2927  &matchPatternC](SDValue Mask) -> bool {
2928  // FIXME: pattern c.
2929  return matchPatternA(Mask) || matchPatternB(Mask) || matchPatternC(Mask);
2930  };
2931 
2932  if (Node->getOpcode() == ISD::AND) {
2933  X = Node->getOperand(0);
2934  SDValue Mask = Node->getOperand(1);
2935 
2936  if (matchLowBitMask(Mask)) {
2937  // Great.
2938  } else {
2939  std::swap(X, Mask);
2940  if (!matchLowBitMask(Mask))
2941  return false;
2942  }
2943  } else if (!matchPatternD(Node))
2944  return false;
2945 
2946  SDLoc DL(Node);
2947 
2948  // If we do *NOT* have BMI2, let's find out if the if the 'X' is *logically*
2949  // shifted (potentially with one-use trunc inbetween),
2950  // and if so look past one-use truncation.
2951  MVT XVT = NVT;
2952  if (!Subtarget->hasBMI2() && X.getOpcode() == ISD::TRUNCATE &&
2953  X.hasOneUse() && X.getOperand(0).getOpcode() == ISD::SRL) {
2954  assert(NVT == MVT::i32 && "Expected target valuetype to be i32");
2955  X = X.getOperand(0);
2956  XVT = X.getSimpleValueType();
2957  assert(XVT == MVT::i64 && "Expected truncation from i64");
2958  }
2959 
2960  SDValue OrigNBits = NBits;
2961  // Truncate the shift amount.
2962  NBits = CurDAG->getNode(ISD::TRUNCATE, DL, MVT::i8, NBits);
2963  insertDAGNode(*CurDAG, OrigNBits, NBits);
2964 
2965  // Insert 8-bit NBits into lowest 8 bits of 32-bit register.
2966  // All the other bits are undefined, we do not care about them.
2967  SDValue ImplDef = SDValue(
2968  CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, MVT::i32), 0);
2969  insertDAGNode(*CurDAG, OrigNBits, ImplDef);
2970  NBits = CurDAG->getTargetInsertSubreg(X86::sub_8bit, DL, MVT::i32, ImplDef,
2971  NBits);
2972  insertDAGNode(*CurDAG, OrigNBits, NBits);
2973 
2974  if (Subtarget->hasBMI2()) {
2975  // Great, just emit the the BZHI..
2976  if (XVT != MVT::i32) {
2977  // But have to place the bit count into the wide-enough register first.
2978  SDValue ImplDef = SDValue(
2979  CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, XVT), 0);
2980  insertDAGNode(*CurDAG, OrigNBits, ImplDef);
2981  NBits = CurDAG->getTargetInsertSubreg(X86::sub_32bit, DL, XVT, ImplDef,
2982  NBits);
2983  insertDAGNode(*CurDAG, OrigNBits, NBits);
2984  }
2985 
2986  SDValue Extract = CurDAG->getNode(X86ISD::BZHI, DL, XVT, X, NBits);
2987  ReplaceNode(Node, Extract.getNode());
2988  SelectCode(Extract.getNode());
2989  return true;
2990  }
2991 
2992  // Else, emitting BEXTR requires one more step.
2993  // The 'control' of BEXTR has the pattern of:
2994  // [15...8 bit][ 7...0 bit] location
2995  // [ bit count][ shift] name
2996  // I.e. 0b000000011'00000001 means (x >> 0b1) & 0b11
2997 
2998  // Shift NBits left by 8 bits, thus producing 'control'.
2999  // This makes the low 8 bits to be zero.
3000  SDValue C8 = CurDAG->getConstant(8, DL, MVT::i8);
3001  SDValue Control = CurDAG->getNode(ISD::SHL, DL, MVT::i32, NBits, C8);
3002  insertDAGNode(*CurDAG, OrigNBits, Control);
3003 
3004  // If the 'X' is *logically* shifted, we can fold that shift into 'control'.
3005  if (X.getOpcode() == ISD::SRL) {
3006  SDValue ShiftAmt = X.getOperand(1);
3007  X = X.getOperand(0);
3008 
3009  assert(ShiftAmt.getValueType() == MVT::i8 &&
3010  "Expected shift amount to be i8");
3011 
3012  // Now, *zero*-extend the shift amount. The bits 8...15 *must* be zero!
3013  // We could zext to i16 in some form, but we intentionally don't do that.
3014  SDValue OrigShiftAmt = ShiftAmt;
3015  ShiftAmt = CurDAG->getNode(ISD::ZERO_EXTEND, DL, MVT::i32, ShiftAmt);
3016  insertDAGNode(*CurDAG, OrigShiftAmt, ShiftAmt);
3017 
3018  // And now 'or' these low 8 bits of shift amount into the 'control'.
3019  Control = CurDAG->getNode(ISD::OR, DL, MVT::i32, Control, ShiftAmt);
3020  insertDAGNode(*CurDAG, OrigNBits, Control);
3021  }
3022 
3023  // But have to place the 'control' into the wide-enough register first.
3024  if (XVT != MVT::i32) {
3025  SDValue ImplDef =
3026  SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, XVT), 0);
3027  insertDAGNode(*CurDAG, OrigNBits, ImplDef);
3028  Control = CurDAG->getTargetInsertSubreg(X86::sub_32bit, DL, XVT, ImplDef,
3029  Control);
3030  insertDAGNode(*CurDAG, OrigNBits, Control);
3031  }
3032 
3033  // And finally, form the BEXTR itself.
3034  SDValue Extract = CurDAG->getNode(X86ISD::BEXTR, DL, XVT, X, Control);
3035 
3036  // The 'X' was originally truncated. Do that now.
3037  if (XVT != NVT) {
3038  insertDAGNode(*CurDAG, OrigNBits, Extract);
3039  Extract = CurDAG->getNode(ISD::TRUNCATE, DL, NVT, Extract);
3040  }
3041 
3042  ReplaceNode(Node, Extract.getNode());
3043  SelectCode(Extract.getNode());
3044 
3045  return true;
3046 }
3047 
3048 // See if this is an (X >> C1) & C2 that we can match to BEXTR/BEXTRI.
3049 MachineSDNode *X86DAGToDAGISel::matchBEXTRFromAndImm(SDNode *Node) {
3050  MVT NVT = Node->getSimpleValueType(0);
3051  SDLoc dl(Node);
3052 
3053  SDValue N0 = Node->getOperand(0);
3054  SDValue N1 = Node->getOperand(1);
3055 
3056  // If we have TBM we can use an immediate for the control. If we have BMI
3057  // we should only do this if the BEXTR instruction is implemented well.
3058  // Otherwise moving the control into a register makes this more costly.
3059  // TODO: Maybe load folding, greater than 32-bit masks, or a guarantee of LICM
3060  // hoisting the move immediate would make it worthwhile with a less optimal
3061  // BEXTR?
3062  if (!Subtarget->hasTBM() &&
3063  !(Subtarget->hasBMI() && Subtarget->hasFastBEXTR()))
3064  return nullptr;
3065 
3066  // Must have a shift right.
3067  if (N0->getOpcode() != ISD::SRL && N0->getOpcode() != ISD::SRA)
3068  return nullptr;
3069 
3070  // Shift can't have additional users.
3071  if (!N0->hasOneUse())
3072  return nullptr;
3073 
3074  // Only supported for 32 and 64 bits.
3075  if (NVT != MVT::i32 && NVT != MVT::i64)
3076  return nullptr;
3077 
3078  // Shift amount and RHS of and must be constant.
3079  ConstantSDNode *MaskCst = dyn_cast<ConstantSDNode>(N1);
3080  ConstantSDNode *ShiftCst = dyn_cast<ConstantSDNode>(N0->getOperand(1));
3081  if (!MaskCst || !ShiftCst)
3082  return nullptr;
3083 
3084  // And RHS must be a mask.
3085  uint64_t Mask = MaskCst->getZExtValue();
3086  if (!isMask_64(Mask))
3087  return nullptr;
3088 
3089  uint64_t Shift = ShiftCst->getZExtValue();
3090  uint64_t MaskSize = countPopulation(Mask);
3091 
3092  // Don't interfere with something that can be handled by extracting AH.
3093  // TODO: If we are able to fold a load, BEXTR might still be better than AH.
3094  if (Shift == 8 && MaskSize == 8)
3095  return nullptr;
3096 
3097  // Make sure we are only using bits that were in the original value, not
3098  // shifted in.
3099  if (Shift + MaskSize > NVT.getSizeInBits())
3100  return nullptr;
3101 
3102  SDValue New = CurDAG->getTargetConstant(Shift | (MaskSize << 8), dl, NVT);
3103  unsigned ROpc = NVT == MVT::i64 ? X86::BEXTRI64ri : X86::BEXTRI32ri;
3104  unsigned MOpc = NVT == MVT::i64 ? X86::BEXTRI64mi : X86::BEXTRI32mi;
3105 
3106  // BMI requires the immediate to placed in a register.
3107  if (!Subtarget->hasTBM()) {
3108  ROpc = NVT == MVT::i64 ? X86::BEXTR64rr : X86::BEXTR32rr;
3109  MOpc = NVT == MVT::i64 ? X86::BEXTR64rm : X86::BEXTR32rm;
3110  unsigned NewOpc = NVT == MVT::i64 ? X86::MOV32ri64 : X86::MOV32ri;
3111  New = SDValue(CurDAG->getMachineNode(NewOpc, dl, NVT, New), 0);
3112  }
3113 
3114  MachineSDNode *NewNode;
3115  SDValue Input = N0->getOperand(0);
3116  SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
3117  if (tryFoldLoad(Node, N0.getNode(), Input, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) {
3118  SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, New, Input.getOperand(0) };
3119  SDVTList VTs = CurDAG->getVTList(NVT, MVT::Other);
3120  NewNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops);
3121  // Update the chain.
3122  ReplaceUses(Input.getValue(1), SDValue(NewNode, 1));
3123  // Record the mem-refs
3124  CurDAG->setNodeMemRefs(NewNode, {cast<LoadSDNode>(Input)->getMemOperand()});
3125  } else {
3126  NewNode = CurDAG->getMachineNode(ROpc, dl, NVT, Input, New);
3127  }
3128 
3129  return NewNode;
3130 }
3131 
3132 // Emit a PCMISTR(I/M) instruction.
3133 MachineSDNode *X86DAGToDAGISel::emitPCMPISTR(unsigned ROpc, unsigned MOpc,
3134  bool MayFoldLoad, const SDLoc &dl,
3135  MVT VT, SDNode *Node) {
3136  SDValue N0 = Node->getOperand(0);
3137  SDValue N1 = Node->getOperand(1);
3138  SDValue Imm = Node->getOperand(2);
3139  const ConstantInt *Val = cast<ConstantSDNode>(Imm)->getConstantIntValue();
3140  Imm = CurDAG->getTargetConstant(*Val, SDLoc(Node), Imm.getValueType());
3141 
3142  // Try to fold a load. No need to check alignment.
3143  SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
3144  if (MayFoldLoad && tryFoldLoad(Node, N1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) {
3145  SDValue Ops[] = { N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Imm,
3146  N1.getOperand(0) };
3147  SDVTList VTs = CurDAG->getVTList(VT, MVT::i32, MVT::Other);
3148  MachineSDNode *CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops);
3149  // Update the chain.
3150  ReplaceUses(N1.getValue(1), SDValue(CNode, 2));
3151  // Record the mem-refs
3152  CurDAG->setNodeMemRefs(CNode, {cast<LoadSDNode>(N1)->getMemOperand()});
3153  return CNode;
3154  }
3155 
3156  SDValue Ops[] = { N0, N1, Imm };
3157  SDVTList VTs = CurDAG->getVTList(VT, MVT::i32);
3158  MachineSDNode *CNode = CurDAG->getMachineNode(ROpc, dl, VTs, Ops);
3159  return CNode;
3160 }
3161 
3162 // Emit a PCMESTR(I/M) instruction. Also return the Glue result in case we need
3163 // to emit a second instruction after this one. This is needed since we have two
3164 // copyToReg nodes glued before this and we need to continue that glue through.
3165 MachineSDNode *X86DAGToDAGISel::emitPCMPESTR(unsigned ROpc, unsigned MOpc,
3166  bool MayFoldLoad, const SDLoc &dl,
3167  MVT VT, SDNode *Node,
3168  SDValue &InFlag) {
3169  SDValue N0 = Node->getOperand(0);
3170  SDValue N2 = Node->getOperand(2);
3171  SDValue Imm = Node->getOperand(4);
3172  const ConstantInt *Val = cast<ConstantSDNode>(Imm)->getConstantIntValue();
3173  Imm = CurDAG->getTargetConstant(*Val, SDLoc(Node), Imm.getValueType());
3174 
3175  // Try to fold a load. No need to check alignment.
3176  SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
3177  if (MayFoldLoad && tryFoldLoad(Node, N2, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) {
3178  SDValue Ops[] = { N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Imm,
3179  N2.getOperand(0), InFlag };
3180  SDVTList VTs = CurDAG->getVTList(VT, MVT::i32, MVT::Other, MVT::Glue);
3181  MachineSDNode *CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops);
3182  InFlag = SDValue(CNode, 3);
3183  // Update the chain.
3184  ReplaceUses(N2.getValue(1), SDValue(CNode, 2));
3185  // Record the mem-refs
3186  CurDAG->setNodeMemRefs(CNode, {cast<LoadSDNode>(N2)->getMemOperand()});
3187  return CNode;
3188  }
3189 
3190  SDValue Ops[] = { N0, N2, Imm, InFlag };
3191  SDVTList VTs = CurDAG->getVTList(VT, MVT::i32, MVT::Glue);
3192  MachineSDNode *CNode = CurDAG->getMachineNode(ROpc, dl, VTs, Ops);
3193  InFlag = SDValue(CNode, 2);
3194  return CNode;
3195 }
3196 
3197 bool X86DAGToDAGISel::tryShiftAmountMod(SDNode *N) {
3198  EVT VT = N->getValueType(0);
3199 
3200  // Only handle scalar shifts.
3201  if (VT.isVector())
3202  return false;
3203 
3204  // Narrower shifts only mask to 5 bits in hardware.
3205  unsigned Size = VT == MVT::i64 ? 64 : 32;
3206 
3207  SDValue OrigShiftAmt = N->getOperand(1);
3208  SDValue ShiftAmt = OrigShiftAmt;
3209  SDLoc DL(N);
3210 
3211  // Skip over a truncate of the shift amount.
3212  if (ShiftAmt->getOpcode() == ISD::TRUNCATE)
3213  ShiftAmt = ShiftAmt->getOperand(0);
3214 
3215  // This function is called after X86DAGToDAGISel::matchBitExtract(),
3216  // so we are not afraid that we might mess up BZHI/BEXTR pattern.
3217 
3218  SDValue NewShiftAmt;
3219  if (ShiftAmt->getOpcode() == ISD::ADD || ShiftAmt->getOpcode() == ISD::SUB) {
3220  SDValue Add0 = ShiftAmt->getOperand(0);
3221  SDValue Add1 = ShiftAmt->getOperand(1);
3222  // If we are shifting by X+/-N where N == 0 mod Size, then just shift by X
3223  // to avoid the ADD/SUB.
3224  if (isa<ConstantSDNode>(Add1) &&
3225  cast<ConstantSDNode>(Add1)->getZExtValue() % Size == 0) {
3226  NewShiftAmt = Add0;
3227  // If we are shifting by N-X where N == 0 mod Size, then just shift by -X to
3228  // generate a NEG instead of a SUB of a constant.
3229  } else if (ShiftAmt->getOpcode() == ISD::SUB &&
3230  isa<ConstantSDNode>(Add0) &&
3231  cast<ConstantSDNode>(Add0)->getZExtValue() != 0 &&
3232  cast<ConstantSDNode>(Add0)->getZExtValue() % Size == 0) {
3233  // Insert a negate op.
3234  // TODO: This isn't guaranteed to replace the sub if there is a logic cone
3235  // that uses it that's not a shift.
3236  EVT SubVT = ShiftAmt.getValueType();
3237  SDValue Zero = CurDAG->getConstant(0, DL, SubVT);
3238  SDValue Neg = CurDAG->getNode(ISD::SUB, DL, SubVT, Zero, Add1);
3239  NewShiftAmt = Neg;
3240 
3241  // Insert these operands into a valid topological order so they can
3242  // get selected independently.
3243  insertDAGNode(*CurDAG, OrigShiftAmt, Zero);
3244  insertDAGNode(*CurDAG, OrigShiftAmt, Neg);
3245  } else
3246  return false;
3247  } else
3248  return false;
3249 
3250  if (NewShiftAmt.getValueType() != MVT::i8) {
3251  // Need to truncate the shift amount.
3252  NewShiftAmt = CurDAG->getNode(ISD::TRUNCATE, DL, MVT::i8, NewShiftAmt);
3253  // Add to a correct topological ordering.
3254  insertDAGNode(*CurDAG, OrigShiftAmt, NewShiftAmt);
3255  }
3256 
3257  // Insert a new mask to keep the shift amount legal. This should be removed
3258  // by isel patterns.
3259  NewShiftAmt = CurDAG->getNode(ISD::AND, DL, MVT::i8, NewShiftAmt,
3260  CurDAG->getConstant(Size - 1, DL, MVT::i8));
3261  // Place in a correct topological ordering.
3262  insertDAGNode(*CurDAG, OrigShiftAmt, NewShiftAmt);
3263 
3264  SDNode *UpdatedNode = CurDAG->UpdateNodeOperands(N, N->getOperand(0),
3265  NewShiftAmt);
3266  if (UpdatedNode != N) {
3267  // If we found an existing node, we should replace ourselves with that node
3268  // and wait for it to be selected after its other users.
3269  ReplaceNode(N, UpdatedNode);
3270  return true;
3271  }
3272 
3273  // If the original shift amount is now dead, delete it so that we don't run
3274  // it through isel.
3275  if (OrigShiftAmt.getNode()->use_empty())
3276  CurDAG->RemoveDeadNode(OrigShiftAmt.getNode());
3277 
3278  // Now that we've optimized the shift amount, defer to normal isel to get
3279  // load folding and legacy vs BMI2 selection without repeating it here.
3280  SelectCode(N);
3281  return true;
3282 }
3283 
3284 /// If the high bits of an 'and' operand are known zero, try setting the
3285 /// high bits of an 'and' constant operand to produce a smaller encoding by
3286 /// creating a small, sign-extended negative immediate rather than a large
3287 /// positive one. This reverses a transform in SimplifyDemandedBits that
3288 /// shrinks mask constants by clearing bits. There is also a possibility that
3289 /// the 'and' mask can be made -1, so the 'and' itself is unnecessary. In that
3290 /// case, just replace the 'and'. Return 'true' if the node is replaced.
3291 bool X86DAGToDAGISel::shrinkAndImmediate(SDNode *And) {
3292  // i8 is unshrinkable, i16 should be promoted to i32, and vector ops don't
3293  // have immediate operands.
3294  MVT VT = And->getSimpleValueType(0);
3295  if (VT != MVT::i32 && VT != MVT::i64)
3296  return false;
3297 
3298  auto *And1C = dyn_cast<ConstantSDNode>(And->getOperand(1));
3299  if (!And1C)
3300  return false;
3301 
3302  // Bail out if the mask constant is already negative. It's can't shrink more.
3303  // If the upper 32 bits of a 64 bit mask are all zeros, we have special isel
3304  // patterns to use a 32-bit and instead of a 64-bit and by relying on the
3305  // implicit zeroing of 32 bit ops. So we should check if the lower 32 bits
3306  // are negative too.
3307  APInt MaskVal = And1C->getAPIntValue();
3308  unsigned MaskLZ = MaskVal.countLeadingZeros();
3309  if (!MaskLZ || (VT == MVT::i64 && MaskLZ == 32))
3310  return false;
3311 
3312  // Don't extend into the upper 32 bits of a 64 bit mask.
3313  if (VT == MVT::i64 && MaskLZ >= 32) {
3314  MaskLZ -= 32;
3315  MaskVal = MaskVal.trunc(32);
3316  }
3317 
3318  SDValue And0 = And->getOperand(0);
3319  APInt HighZeros = APInt::getHighBitsSet(MaskVal.getBitWidth(), MaskLZ);
3320  APInt NegMaskVal = MaskVal | HighZeros;
3321 
3322  // If a negative constant would not allow a smaller encoding, there's no need
3323  // to continue. Only change the constant when we know it's a win.
3324  unsigned MinWidth = NegMaskVal.getMinSignedBits();
3325  if (MinWidth > 32 || (MinWidth > 8 && MaskVal.getMinSignedBits() <= 32))
3326  return false;
3327 
3328  // Extend masks if we truncated above.
3329  if (VT == MVT::i64 && MaskVal.getBitWidth() < 64) {
3330  NegMaskVal = NegMaskVal.zext(64);
3331  HighZeros = HighZeros.zext(64);
3332  }
3333 
3334  // The variable operand must be all zeros in the top bits to allow using the
3335  // new, negative constant as the mask.
3336  if (!CurDAG->MaskedValueIsZero(And0, HighZeros))
3337  return false;
3338 
3339  // Check if the mask is -1. In that case, this is an unnecessary instruction
3340  // that escaped earlier analysis.
3341  if (NegMaskVal.isAllOnesValue()) {
3342  ReplaceNode(And, And0.getNode());
3343  return true;
3344  }
3345 
3346  // A negative mask allows a smaller encoding. Create a new 'and' node.
3347  SDValue NewMask = CurDAG->getConstant(NegMaskVal, SDLoc(And), VT);
3348  SDValue NewAnd = CurDAG->getNode(ISD::AND, SDLoc(And), VT, And0, NewMask);
3349  ReplaceNode(And, NewAnd.getNode());
3350  SelectCode(NewAnd.getNode());
3351  return true;
3352 }
3353 
3354 void X86DAGToDAGISel::Select(SDNode *Node) {
3355  MVT NVT = Node->getSimpleValueType(0);
3356  unsigned Opcode = Node->getOpcode();
3357  SDLoc dl(Node);
3358 
3359  if (Node->isMachineOpcode()) {
3360  LLVM_DEBUG(dbgs() << "== "; Node->dump(CurDAG); dbgs() << '\n');
3361  Node->setNodeId(-1);
3362  return; // Already selected.
3363  }
3364 
3365  switch (Opcode) {
3366  default: break;
3367  case ISD::BRIND: {
3368  if (Subtarget->isTargetNaCl())
3369  // NaCl has its own pass where jmp %r32 are converted to jmp %r64. We
3370  // leave the instruction alone.
3371  break;
3372  if (Subtarget->isTarget64BitILP32()) {
3373  // Converts a 32-bit register to a 64-bit, zero-extended version of
3374  // it. This is needed because x86-64 can do many things, but jmp %r32
3375  // ain't one of them.
3376  const SDValue &Target = Node->getOperand(1);
3378  SDValue ZextTarget = CurDAG->getZExtOrTrunc(Target, dl, EVT(MVT::i64));
3379  SDValue Brind = CurDAG->getNode(ISD::BRIND, dl, MVT::Other,
3380  Node->getOperand(0), ZextTarget);
3381  ReplaceNode(Node, Brind.getNode());
3382  SelectCode(ZextTarget.getNode());
3383  SelectCode(Brind.getNode());
3384  return;
3385  }
3386  break;
3387  }
3388  case X86ISD::GlobalBaseReg:
3389  ReplaceNode(Node, getGlobalBaseReg());
3390  return;
3391 
3392  case ISD::BITCAST:
3393  // Just drop all 128/256/512-bit bitcasts.
3394  if (NVT.is512BitVector() || NVT.is256BitVector() || NVT.is128BitVector() ||
3395  NVT == MVT::f128) {
3396  ReplaceUses(SDValue(Node, 0), Node->getOperand(0));
3397  CurDAG->RemoveDeadNode(Node);
3398  return;
3399  }
3400  break;
3401 
3402  case ISD::VSELECT: {
3403  // Replace VSELECT with non-mask conditions with with BLENDV.
3405  break;
3406 
3407  assert(Subtarget->hasSSE41() && "Expected SSE4.1 support!");
3408  SDValue Blendv = CurDAG->getNode(
3409  X86ISD::BLENDV, SDLoc(Node), Node->getValueType(0), Node->getOperand(0),
3410  Node->getOperand(1), Node->getOperand(2));
3411  ReplaceNode(Node, Blendv.getNode());
3412  SelectCode(Blendv.getNode());
3413  // We already called ReplaceUses.
3414  return;
3415  }
3416 
3417  case ISD::SRL:
3418  if (matchBitExtract(Node))
3419  return;
3421  case ISD::SRA:
3422  case ISD::SHL:
3423  if (tryShiftAmountMod(Node))
3424  return;
3425  break;
3426 
3427  case ISD::AND:
3428  if (MachineSDNode *NewNode = matchBEXTRFromAndImm(Node)) {
3429  ReplaceUses(SDValue(Node, 0), SDValue(NewNode, 0));
3430  CurDAG->RemoveDeadNode(Node);
3431  return;
3432  }
3433  if (matchBitExtract(Node))
3434  return;
3435  if (AndImmShrink && shrinkAndImmediate(Node))
3436  return;
3437 
3439  case ISD::OR:
3440  case ISD::XOR: {
3441 
3442  // For operations of the form (x << C1) op C2, check if we can use a smaller
3443  // encoding for C2 by transforming it into (x op (C2>>C1)) << C1.
3444  SDValue N0 = Node->getOperand(0);
3445  SDValue N1 = Node->getOperand(1);
3446 
3447  if (N0->getOpcode() != ISD::SHL || !N0->hasOneUse())
3448  break;
3449 
3450  // i8 is unshrinkable, i16 should be promoted to i32.
3451  if (NVT != MVT::i32 && NVT != MVT::i64)
3452  break;
3453 
3455  ConstantSDNode *ShlCst = dyn_cast<ConstantSDNode>(N0->getOperand(1));
3456  if (!Cst || !ShlCst)
3457  break;
3458 
3459  int64_t Val = Cst->getSExtValue();
3460  uint64_t ShlVal = ShlCst->getZExtValue();
3461 
3462  // Make sure that we don't change the operation by removing bits.
3463  // This only matters for OR and XOR, AND is unaffected.
3464  uint64_t RemovedBitsMask = (1ULL << ShlVal) - 1;
3465  if (Opcode != ISD::AND && (Val & RemovedBitsMask) != 0)
3466  break;
3467 
3468  unsigned ShlOp, AddOp, Op;
3469  MVT CstVT = NVT;
3470 
3471  // Check the minimum bitwidth for the new constant.
3472  // TODO: AND32ri is the same as AND64ri32 with zext imm.
3473  // TODO: MOV32ri+OR64r is cheaper than MOV64ri64+OR64rr
3474  // TODO: Using 16 and 8 bit operations is also possible for or32 & xor32.
3475  if (!isInt<8>(Val) && isInt<8>(Val >> ShlVal))
3476  CstVT = MVT::i8;
3477  else if (!isInt<32>(Val) && isInt<32>(Val >> ShlVal))
3478  CstVT = MVT::i32;
3479 
3480  // Bail if there is no smaller encoding.
3481  if (NVT == CstVT)
3482  break;
3483 
3484  switch (NVT.SimpleTy) {
3485  default: llvm_unreachable("Unsupported VT!");
3486  case MVT::i32:
3487  assert(CstVT == MVT::i8);
3488  ShlOp = X86::SHL32ri;
3489  AddOp = X86::ADD32rr;
3490 
3491  switch (Opcode) {
3492  default: llvm_unreachable("Impossible opcode");
3493  case ISD::AND: Op = X86::AND32ri8; break;
3494  case ISD::OR: Op = X86::OR32ri8; break;
3495  case ISD::XOR: Op = X86::XOR32ri8; break;
3496  }
3497  break;
3498  case MVT::i64:
3499  assert(CstVT == MVT::i8 || CstVT == MVT::i32);
3500  ShlOp = X86::SHL64ri;
3501  AddOp = X86::ADD64rr;
3502 
3503  switch (Opcode) {
3504  default: llvm_unreachable("Impossible opcode");
3505  case ISD::AND: Op = CstVT==MVT::i8? X86::AND64ri8 : X86::AND64ri32; break;
3506  case ISD::OR: Op = CstVT==MVT::i8? X86::OR64ri8 : X86::OR64ri32; break;
3507  case ISD::XOR: Op = CstVT==MVT::i8? X86::XOR64ri8 : X86::XOR64ri32; break;
3508  }
3509  break;
3510  }
3511 
3512  // Emit the smaller op and the shift.
3513  SDValue NewCst = CurDAG->getTargetConstant(Val >> ShlVal, dl, CstVT);
3514  SDNode *New = CurDAG->getMachineNode(Op, dl, NVT, N0->getOperand(0),NewCst);
3515  if (ShlVal == 1)
3516  CurDAG->SelectNodeTo(Node, AddOp, NVT, SDValue(New, 0),
3517  SDValue(New, 0));
3518  else
3519  CurDAG->SelectNodeTo(Node, ShlOp, NVT, SDValue(New, 0),
3520  getI8Imm(ShlVal, dl));
3521  return;
3522  }
3523  case X86ISD::SMUL:
3524  // i16/i32/i64 are handled with isel patterns.
3525  if (NVT != MVT::i8)
3526  break;
3528  case X86ISD::UMUL: {
3529  SDValue N0 = Node->getOperand(0);
3530  SDValue N1 = Node->getOperand(1);
3531 
3532  unsigned LoReg, ROpc, MOpc;
3533  switch (NVT.SimpleTy) {
3534  default: llvm_unreachable("Unsupported VT!");
3535  case MVT::i8:
3536  LoReg = X86::AL;
3537  ROpc = Opcode == X86ISD::SMUL ? X86::IMUL8r : X86::MUL8r;
3538  MOpc = Opcode == X86ISD::SMUL ? X86::IMUL8m : X86::MUL8m;
3539  break;
3540  case MVT::i16:
3541  LoReg = X86::AX;
3542  ROpc = X86::MUL16r;
3543  MOpc = X86::MUL16m;
3544  break;
3545  case MVT::i32:
3546  LoReg = X86::EAX;
3547  ROpc = X86::MUL32r;
3548  MOpc = X86::MUL32m;
3549  break;
3550  case MVT::i64:
3551  LoReg = X86::RAX;
3552  ROpc = X86::MUL64r;
3553  MOpc = X86::MUL64m;
3554  break;
3555  }
3556 
3557  SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
3558  bool FoldedLoad = tryFoldLoad(Node, N1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4);
3559  // Multiply is commmutative.
3560  if (!FoldedLoad) {
3561  FoldedLoad = tryFoldLoad(Node, N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4);
3562  if (FoldedLoad)
3563  std::swap(N0, N1);
3564  }
3565 
3566  SDValue InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, LoReg,
3567  N0, SDValue()).getValue(1);
3568 
3569  MachineSDNode *CNode;
3570  if (FoldedLoad) {
3571  // i16/i32/i64 use an instruction that produces a low and high result even
3572  // though only the low result is used.
3573  SDVTList VTs;
3574  if (NVT == MVT::i8)
3575  VTs = CurDAG->getVTList(NVT, MVT::i32, MVT::Other);
3576  else
3577  VTs = CurDAG->getVTList(NVT, NVT, MVT::i32, MVT::Other);
3578 
3579  SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N1.getOperand(0),
3580  InFlag };
3581  CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops);
3582 
3583  // Update the chain.
3584  ReplaceUses(N1.getValue(1), SDValue(CNode, NVT == MVT::i8 ? 2 : 3));
3585  // Record the mem-refs
3586  CurDAG->setNodeMemRefs(CNode, {cast<LoadSDNode>(N1)->getMemOperand()});
3587  } else {
3588  // i16/i32/i64 use an instruction that produces a low and high result even
3589  // though only the low result is used.
3590  SDVTList VTs;
3591  if (NVT == MVT::i8)
3592  VTs = CurDAG->getVTList(NVT, MVT::i32);
3593  else
3594  VTs = CurDAG->getVTList(NVT, NVT, MVT::i32);
3595 
3596  CNode = CurDAG->getMachineNode(ROpc, dl, VTs, {N1, InFlag});
3597  }
3598 
3599  ReplaceUses(SDValue(Node, 0), SDValue(CNode, 0));
3600  ReplaceUses(SDValue(Node, 1), SDValue(CNode, NVT == MVT::i8 ? 1 : 2));
3601  CurDAG->RemoveDeadNode(Node);
3602  return;
3603  }
3604 
3605  case ISD::SMUL_LOHI:
3606  case ISD::UMUL_LOHI: {
3607  SDValue N0 = Node->getOperand(0);
3608  SDValue N1 = Node->getOperand(1);
3609 
3610  unsigned Opc, MOpc;
3611  bool isSigned = Opcode == ISD::SMUL_LOHI;
3612  if (!isSigned) {
3613  switch (NVT.SimpleTy) {
3614  default: llvm_unreachable("Unsupported VT!");
3615  case MVT::i32: Opc = X86::MUL32r; MOpc = X86::MUL32m; break;
3616  case MVT::i64: Opc = X86::MUL64r; MOpc = X86::MUL64m; break;
3617  }
3618  } else {
3619  switch (NVT.SimpleTy) {
3620  default: llvm_unreachable("Unsupported VT!");
3621  case MVT::i32: Opc = X86::IMUL32r; MOpc = X86::IMUL32m; break;
3622  case MVT::i64: Opc = X86::IMUL64r; MOpc = X86::IMUL64m; break;
3623  }
3624  }
3625 
3626  unsigned SrcReg, LoReg, HiReg;
3627  switch (Opc) {
3628  default: llvm_unreachable("Unknown MUL opcode!");
3629  case X86::IMUL32r:
3630  case X86::MUL32r:
3631  SrcReg = LoReg = X86::EAX; HiReg = X86::EDX;
3632  break;
3633  case X86::IMUL64r:
3634  case X86::MUL64r:
3635  SrcReg = LoReg = X86::RAX; HiReg = X86::RDX;
3636  break;
3637  }
3638 
3639  SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
3640  bool foldedLoad = tryFoldLoad(Node, N1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4);
3641  // Multiply is commmutative.
3642  if (!foldedLoad) {
3643  foldedLoad = tryFoldLoad(Node, N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4);
3644  if (foldedLoad)
3645  std::swap(N0, N1);
3646  }
3647 
3648  SDValue InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, SrcReg,
3649  N0, SDValue()).getValue(1);
3650  if (foldedLoad) {
3651  SDValue Chain;
3652  MachineSDNode *CNode = nullptr;
3653  SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N1.getOperand(0),
3654  InFlag };
3655  SDVTList VTs = CurDAG->getVTList(MVT::Other, MVT::Glue);
3656  CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops);
3657  Chain = SDValue(CNode, 0);
3658  InFlag = SDValue(CNode, 1);
3659 
3660  // Update the chain.
3661  ReplaceUses(N1.getValue(1), Chain);
3662  // Record the mem-refs
3663  CurDAG->setNodeMemRefs(CNode, {cast<LoadSDNode>(N1)->getMemOperand()});
3664  } else {
3665  SDValue Ops[] = { N1, InFlag };
3666  SDVTList VTs = CurDAG->getVTList(MVT::Glue);
3667  SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops);
3668  InFlag = SDValue(CNode, 0);
3669  }
3670 
3671  // Copy the low half of the result, if it is needed.
3672  if (!SDValue(Node, 0).use_empty()) {
3673  assert(LoReg && "Register for low half is not defined!");
3674  SDValue ResLo = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, LoReg,
3675  NVT, InFlag);
3676  InFlag = ResLo.getValue(2);
3677  ReplaceUses(SDValue(Node, 0), ResLo);
3678  LLVM_DEBUG(dbgs() << "=> "; ResLo.getNode()->dump(CurDAG);
3679  dbgs() << '\n');
3680  }
3681  // Copy the high half of the result, if it is needed.
3682  if (!SDValue(Node, 1).use_empty()) {
3683  assert(HiReg && "Register for high half is not defined!");
3684  SDValue ResHi = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, HiReg,
3685  NVT, InFlag);
3686  InFlag = ResHi.getValue(2);
3687  ReplaceUses(SDValue(Node, 1), ResHi);
3688  LLVM_DEBUG(dbgs() << "=> "; ResHi.getNode()->dump(CurDAG);
3689  dbgs() << '\n');
3690  }
3691 
3692  CurDAG->RemoveDeadNode(Node);
3693  return;
3694  }
3695 
3696  case ISD::SDIVREM:
3697  case ISD::UDIVREM: {
3698  SDValue N0 = Node->getOperand(0);
3699  SDValue N1 = Node->getOperand(1);
3700 
3701  unsigned Opc, MOpc;
3702  bool isSigned = Opcode == ISD::SDIVREM;
3703  if (!isSigned) {
3704  switch (NVT.SimpleTy) {
3705  default: llvm_unreachable("Unsupported VT!");
3706  case MVT::i8: Opc = X86::DIV8r; MOpc = X86::DIV8m; break;
3707  case MVT::i16: Opc = X86::DIV16r; MOpc = X86::DIV16m; break;
3708  case MVT::i32: Opc = X86::DIV32r; MOpc = X86::DIV32m; break;
3709  case MVT::i64: Opc = X86::DIV64r; MOpc = X86::DIV64m; break;
3710  }
3711  } else {
3712  switch (NVT.SimpleTy) {
3713  default: llvm_unreachable("Unsupported VT!");
3714  case MVT::i8: Opc = X86::IDIV8r; MOpc = X86::IDIV8m; break;
3715  case MVT::i16: Opc = X86::IDIV16r; MOpc = X86::IDIV16m; break;
3716  case MVT::i32: Opc = X86::IDIV32r; MOpc = X86::IDIV32m; break;
3717  case MVT::i64: Opc = X86::IDIV64r; MOpc = X86::IDIV64m; break;
3718  }
3719  }
3720 
3721  unsigned LoReg, HiReg, ClrReg;
3722  unsigned SExtOpcode;
3723  switch (NVT.SimpleTy) {
3724  default: llvm_unreachable("Unsupported VT!");
3725  case MVT::i8:
3726  LoReg = X86::AL; ClrReg = HiReg = X86::AH;
3727  SExtOpcode = X86::CBW;
3728  break;
3729  case MVT::i16:
3730  LoReg = X86::AX; HiReg = X86::DX;
3731  ClrReg = X86::DX;
3732  SExtOpcode = X86::CWD;
3733  break;
3734  case MVT::i32:
3735  LoReg = X86::EAX; ClrReg = HiReg = X86::EDX;
3736  SExtOpcode = X86::CDQ;
3737  break;
3738  case MVT::i64:
3739  LoReg = X86::RAX; ClrReg = HiReg = X86::RDX;
3740  SExtOpcode = X86::CQO;
3741  break;
3742  }
3743 
3744  SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
3745  bool foldedLoad = tryFoldLoad(Node, N1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4);
3746  bool signBitIsZero = CurDAG->SignBitIsZero(N0);
3747 
3748  SDValue InFlag;
3749  if (NVT == MVT::i8 && (!isSigned || signBitIsZero)) {
3750  // Special case for div8, just use a move with zero extension to AX to
3751  // clear the upper 8 bits (AH).
3752  SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Chain;
3753  MachineSDNode *Move;
3754  if (tryFoldLoad(Node, N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) {
3755  SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N0.getOperand(0) };
3756  Move = CurDAG->getMachineNode(X86::MOVZX32rm8, dl, MVT::i32,
3757  MVT::Other, Ops);
3758  Chain = SDValue(Move, 1);
3759  ReplaceUses(N0.getValue(1), Chain);
3760  // Record the mem-refs
3761  CurDAG->setNodeMemRefs(Move, {cast<LoadSDNode>(N0)->getMemOperand()});
3762  } else {
3763  Move = CurDAG->getMachineNode(X86::MOVZX32rr8, dl, MVT::i32, N0);
3764  Chain = CurDAG->getEntryNode();
3765  }
3766  Chain = CurDAG->getCopyToReg(Chain, dl, X86::EAX, SDValue(Move, 0),
3767  SDValue());
3768  InFlag = Chain.getValue(1);
3769  } else {
3770  InFlag =
3771  CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl,
3772  LoReg, N0, SDValue()).getValue(1);
3773  if (isSigned && !signBitIsZero) {
3774  // Sign extend the low part into the high part.
3775  InFlag =
3776  SDValue(CurDAG->getMachineNode(SExtOpcode, dl, MVT::Glue, InFlag),0);
3777  } else {
3778  // Zero out the high part, effectively zero extending the input.
3779  SDValue ClrNode = SDValue(CurDAG->getMachineNode(X86::MOV32r0, dl, NVT), 0);
3780  switch (NVT.SimpleTy) {
3781  case MVT::i16:
3782  ClrNode =
3783  SDValue(CurDAG->getMachineNode(
3784  TargetOpcode::EXTRACT_SUBREG, dl, MVT::i16, ClrNode,
3785  CurDAG->getTargetConstant(X86::sub_16bit, dl,
3786  MVT::i32)),
3787  0);
3788  break;
3789  case MVT::i32:
3790  break;
3791  case MVT::i64:
3792  ClrNode =
3793  SDValue(CurDAG->getMachineNode(
3794  TargetOpcode::SUBREG_TO_REG, dl, MVT::i64,
3795  CurDAG->getTargetConstant(0, dl, MVT::i64), ClrNode,
3796  CurDAG->getTargetConstant(X86::sub_32bit, dl,
3797  MVT::i32)),
3798  0);
3799  break;
3800  default:
3801  llvm_unreachable("Unexpected division source");
3802  }
3803 
3804  InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, ClrReg,
3805  ClrNode, InFlag).getValue(1);
3806  }
3807  }
3808 
3809  if (foldedLoad) {
3810  SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N1.getOperand(0),
3811  InFlag };
3812  MachineSDNode *CNode =
3813  CurDAG->getMachineNode(MOpc, dl, MVT::Other, MVT::Glue, Ops);
3814  InFlag = SDValue(CNode, 1);
3815  // Update the chain.
3816  ReplaceUses(N1.getValue(1), SDValue(CNode, 0));
3817  // Record the mem-refs
3818  CurDAG->setNodeMemRefs(CNode, {cast<LoadSDNode>(N1)->getMemOperand()});
3819  } else {
3820  InFlag =
3821  SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Glue, N1, InFlag), 0);
3822  }
3823 
3824  // Prevent use of AH in a REX instruction by explicitly copying it to
3825  // an ABCD_L register.
3826  //
3827  // The current assumption of the register allocator is that isel
3828  // won't generate explicit references to the GR8_ABCD_H registers. If
3829  // the allocator and/or the backend get enhanced to be more robust in
3830  // that regard, this can be, and should be, removed.
3831  if (HiReg == X86::AH && !SDValue(Node, 1).use_empty()) {
3832  SDValue AHCopy = CurDAG->getRegister(X86::AH, MVT::i8);
3833  unsigned AHExtOpcode =
3834  isSigned ? X86::MOVSX32rr8_NOREX : X86::MOVZX32rr8_NOREX;
3835 
3836  SDNode *RNode = CurDAG->getMachineNode(AHExtOpcode, dl, MVT::i32,
3837  MVT::Glue, AHCopy, InFlag);
3838  SDValue Result(RNode, 0);
3839  InFlag = SDValue(RNode, 1);
3840 
3841  Result =
3842  CurDAG->getTargetExtractSubreg(X86::sub_8bit, dl, MVT::i8, Result);
3843 
3844  ReplaceUses(SDValue(Node, 1), Result);
3845  LLVM_DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG);
3846  dbgs() << '\n');
3847  }
3848  // Copy the division (low) result, if it is needed.
3849  if (!SDValue(Node, 0).use_empty()) {
3850  SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
3851  LoReg, NVT, InFlag);
3852  InFlag = Result.getValue(2);
3853  ReplaceUses(SDValue(Node, 0), Result);
3854  LLVM_DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG);
3855  dbgs() << '\n');
3856  }
3857  // Copy the remainder (high) result, if it is needed.
3858  if (!SDValue(Node, 1).use_empty()) {
3859  SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
3860  HiReg, NVT, InFlag);
3861  InFlag = Result.getValue(2);
3862  ReplaceUses(SDValue(Node, 1), Result);
3863  LLVM_DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG);
3864  dbgs() << '\n');
3865  }
3866  CurDAG->RemoveDeadNode(Node);
3867  return;
3868  }
3869 
3870  case X86ISD::CMP: {
3871  SDValue N0 = Node->getOperand(0);
3872  SDValue N1 = Node->getOperand(1);
3873 
3874  // Optimizations for TEST compares.
3875  if (!isNullConstant(N1))
3876  break;
3877 
3878  // Save the original VT of the compare.
3879  MVT CmpVT = N0.getSimpleValueType();
3880 
3881  // If we are comparing (and (shr X, C, Mask) with 0, emit a BEXTR followed
3882  // by a test instruction. The test should be removed later by
3883  // analyzeCompare if we are using only the zero flag.
3884  // TODO: Should we check the users and use the BEXTR flags directly?
3885  if (N0.getOpcode() == ISD::AND && N0.hasOneUse()) {
3886  if (MachineSDNode *NewNode = matchBEXTRFromAndImm(N0.getNode())) {
3887  unsigned TestOpc = CmpVT == MVT::i64 ? X86::TEST64rr
3888  : X86::TEST32rr;
3889  SDValue BEXTR = SDValue(NewNode, 0);
3890  NewNode = CurDAG->getMachineNode(TestOpc, dl, MVT::i32, BEXTR, BEXTR);
3891  ReplaceUses(SDValue(Node, 0), SDValue(NewNode, 0));
3892  CurDAG->RemoveDeadNode(Node);
3893  return;
3894  }
3895  }
3896 
3897  // We can peek through truncates, but we need to be careful below.
3898  if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse())
3899  N0 = N0.getOperand(0);
3900 
3901  // Look for (X86cmp (and $op, $imm), 0) and see if we can convert it to
3902  // use a smaller encoding.
3903  // Look past the truncate if CMP is the only use of it.
3904  if (N0.getOpcode() == ISD::AND &&
3905  N0.getNode()->hasOneUse() &&
3906  N0.getValueType() != MVT::i8) {
3908  if (!C) break;
3909  uint64_t Mask = C->getZExtValue();
3910 
3911  // Check if we can replace AND+IMM64 with a shift. This is possible for
3912  // masks/ like 0xFF000000 or 0x00FFFFFF and if we care only about the zero
3913  // flag.
3914  if (CmpVT == MVT::i64 && !isInt<32>(Mask) &&
3915  onlyUsesZeroFlag(SDValue(Node, 0))) {
3916  if (isMask_64(~Mask)) {
3917  unsigned TrailingZeros = countTrailingZeros(Mask);
3918  SDValue Imm = CurDAG->getTargetConstant(TrailingZeros, dl, MVT::i64);
3919  SDValue Shift =
3920  SDValue(CurDAG->getMachineNode(X86::SHR64ri, dl, MVT::i64,
3921  N0.getOperand(0), Imm), 0);
3922  MachineSDNode *Test = CurDAG->getMachineNode(X86::TEST64rr, dl,
3923  MVT::i32, Shift, Shift);
3924  ReplaceNode(Node, Test);
3925  return;
3926  }
3927  if (isMask_64(Mask)) {
3928  unsigned LeadingZeros = countLeadingZeros(Mask);
3929  SDValue Imm = CurDAG->getTargetConstant(LeadingZeros, dl, MVT::i64);
3930  SDValue Shift =
3931  SDValue(CurDAG->getMachineNode(X86::SHL64ri, dl, MVT::i64,
3932  N0.getOperand(0), Imm), 0);
3933  MachineSDNode *Test = CurDAG->getMachineNode(X86::TEST64rr, dl,
3934  MVT::i32, Shift, Shift);
3935  ReplaceNode(Node, Test);
3936  return;
3937  }
3938  }
3939 
3940  MVT VT;
3941  int SubRegOp;
3942  unsigned ROpc, MOpc;
3943 
3944  // For each of these checks we need to be careful if the sign flag is
3945  // being used. It is only safe to use the sign flag in two conditions,
3946  // either the sign bit in the shrunken mask is zero or the final test
3947  // size is equal to the original compare size.
3948 
3949  if (isUInt<8>(Mask) &&
3950  (!(Mask & 0x80) || CmpVT == MVT::i8 ||
3951  hasNoSignFlagUses(SDValue(Node, 0)))) {
3952  // For example, convert "testl %eax, $8" to "testb %al, $8"
3953  VT = MVT::i8;
3954  SubRegOp = X86::sub_8bit;
3955  ROpc = X86::TEST8ri;
3956  MOpc = X86::TEST8mi;
3957  } else if (OptForMinSize && isUInt<16>(Mask) &&
3958  (!(Mask & 0x8000) || CmpVT == MVT::i16 ||
3959  hasNoSignFlagUses(SDValue(Node, 0)))) {
3960  // For example, "testl %eax, $32776" to "testw %ax, $32776".
3961  // NOTE: We only want to form TESTW instructions if optimizing for
3962  // min size. Otherwise we only save one byte and possibly get a length
3963  // changing prefix penalty in the decoders.
3964  VT = MVT::i16;
3965  SubRegOp = X86::sub_16bit;
3966  ROpc = X86::TEST16ri;
3967  MOpc = X86::TEST16mi;
3968  } else if (isUInt<32>(Mask) && N0.getValueType() != MVT::i16 &&
3969  ((!(Mask & 0x80000000) &&
3970  // Without minsize 16-bit Cmps can get here so we need to
3971  // be sure we calculate the correct sign flag if needed.
3972  (CmpVT != MVT::i16 || !(Mask & 0x8000))) ||
3973  CmpVT == MVT::i32 ||
3974  hasNoSignFlagUses(SDValue(Node, 0)))) {
3975  // For example, "testq %rax, $268468232" to "testl %eax, $268468232".
3976  // NOTE: We only want to run that transform if N0 is 32 or 64 bits.
3977  // Otherwize, we find ourselves in a position where we have to do
3978  // promotion. If previous passes did not promote the and, we assume
3979  // they had a good reason not to and do not promote here.
3980  VT = MVT::i32;
3981  SubRegOp = X86::sub_32bit;
3982  ROpc = X86::TEST32ri;
3983  MOpc = X86::TEST32mi;
3984  } else {
3985  // No eligible transformation was found.
3986  break;
3987  }
3988 
3989  // FIXME: We should be able to fold loads here.
3990 
3991  SDValue Imm = CurDAG->getTargetConstant(Mask, dl, VT);
3992  SDValue Reg = N0.getOperand(0);
3993 
3994  // Emit a testl or testw.
3995  MachineSDNode *NewNode;
3996  SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
3997  if (tryFoldLoad(Node, N0.getNode(), Reg, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) {
3998  SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Imm,
3999  Reg.getOperand(0) };
4000  NewNode = CurDAG->getMachineNode(MOpc, dl, MVT::i32, MVT::Other, Ops);
4001  // Update the chain.
4002  ReplaceUses(Reg.getValue(1), SDValue(NewNode, 1));
4003  // Record the mem-refs
4004  CurDAG->setNodeMemRefs(NewNode,
4005  {cast<LoadSDNode>(Reg)->getMemOperand()});
4006  } else {
4007  // Extract the subregister if necessary.
4008  if (N0.getValueType() != VT)
4009  Reg = CurDAG->getTargetExtractSubreg(SubRegOp, dl, VT, Reg);
4010 
4011  NewNode = CurDAG->getMachineNode(ROpc, dl, MVT::i32, Reg, Imm);
4012  }
4013  // Replace CMP with TEST.
4014  ReplaceNode(Node, NewNode);
4015  return;
4016  }
4017  break;
4018  }
4019  case X86ISD::PCMPISTR: {
4020  if (!Subtarget->hasSSE42())
4021  break;
4022 
4023  bool NeedIndex = !SDValue(Node, 0).use_empty();
4024  bool NeedMask = !SDValue(Node, 1).use_empty();
4025  // We can't fold a load if we are going to make two instructions.
4026  bool MayFoldLoad = !NeedIndex || !NeedMask;
4027 
4028  MachineSDNode *CNode;
4029  if (NeedMask) {
4030  unsigned ROpc = Subtarget->hasAVX() ? X86::VPCMPISTRMrr : X86::PCMPISTRMrr;
4031  unsigned MOpc = Subtarget->hasAVX() ? X86::VPCMPISTRMrm : X86::PCMPISTRMrm;
4032  CNode = emitPCMPISTR(ROpc, MOpc, MayFoldLoad, dl, MVT::v16i8, Node);
4033  ReplaceUses(SDValue(Node, 1), SDValue(CNode, 0));
4034  }
4035  if (NeedIndex || !NeedMask) {
4036  unsigned ROpc = Subtarget->hasAVX() ? X86::VPCMPISTRIrr : X86::PCMPISTRIrr;
4037  unsigned MOpc = Subtarget->hasAVX() ? X86::VPCMPISTRIrm : X86::PCMPISTRIrm;
4038  CNode = emitPCMPISTR(ROpc, MOpc, MayFoldLoad, dl, MVT::i32, Node);
4039  ReplaceUses(SDValue(Node, 0), SDValue(CNode, 0));
4040  }
4041 
4042  // Connect the flag usage to the last instruction created.
4043  ReplaceUses(SDValue(Node, 2), SDValue(CNode, 1));
4044  CurDAG->RemoveDeadNode(Node);
4045  return;
4046  }
4047  case X86ISD::PCMPESTR: {
4048  if (!Subtarget->hasSSE42())
4049  break;
4050 
4051  // Copy the two implicit register inputs.
4052  SDValue InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, X86::EAX,
4053  Node->getOperand(1),
4054  SDValue()).getValue(1);
4055  InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, X86::EDX,
4056  Node->getOperand(3), InFlag).getValue(1);
4057 
4058  bool NeedIndex = !SDValue(Node, 0).use_empty();
4059  bool NeedMask = !SDValue(Node, 1).use_empty();
4060  // We can't fold a load if we are going to make two instructions.
4061  bool MayFoldLoad = !NeedIndex || !NeedMask;
4062 
4063  MachineSDNode *CNode;
4064  if (NeedMask) {
4065  unsigned ROpc = Subtarget->hasAVX() ? X86::VPCMPESTRMrr : X86::PCMPESTRMrr;
4066  unsigned MOpc = Subtarget->hasAVX() ? X86::VPCMPESTRMrm : X86::PCMPESTRMrm;
4067  CNode = emitPCMPESTR(ROpc, MOpc, MayFoldLoad, dl, MVT::v16i8, Node,
4068  InFlag);
4069  ReplaceUses(SDValue(Node, 1), SDValue(CNode, 0));
4070  }
4071  if (NeedIndex || !NeedMask) {
4072  unsigned ROpc = Subtarget->hasAVX() ? X86::VPCMPESTRIrr : X86::PCMPESTRIrr;
4073  unsigned MOpc = Subtarget->hasAVX() ? X86::VPCMPESTRIrm : X86::PCMPESTRIrm;
4074  CNode = emitPCMPESTR(ROpc, MOpc, MayFoldLoad, dl, MVT::i32, Node, InFlag);
4075  ReplaceUses(SDValue(Node, 0), SDValue(CNode, 0));
4076  }
4077  // Connect the flag usage to the last instruction created.
4078  ReplaceUses(SDValue(Node, 2), SDValue(CNode, 1));
4079  CurDAG->RemoveDeadNode(Node);
4080  return;
4081  }
4082 
4083  case ISD::STORE:
4084  if (foldLoadStoreIntoMemOperand(Node))
4085  return;
4086  break;
4087  }
4088 
4089  SelectCode(Node);
4090 }
4091 
4092 bool X86DAGToDAGISel::
4093 SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
4094  std::vector<SDValue> &OutOps) {
4095  SDValue Op0, Op1, Op2, Op3, Op4;
4096  switch (ConstraintID) {
4097  default:
4098  llvm_unreachable("Unexpected asm memory constraint");
4100  // FIXME: It seems strange that 'i' is needed here since it's supposed to
4101  // be an immediate and not a memory constraint.
4103  case InlineAsm::Constraint_o: // offsetable ??
4104  case InlineAsm::Constraint_v: // not offsetable ??
4105  case InlineAsm::Constraint_m: // memory
4107  if (!selectAddr(nullptr, Op, Op0, Op1, Op2, Op3, Op4))
4108  return true;
4109  break;
4110  }
4111 
4112  OutOps.push_back(Op0);
4113  OutOps.push_back(Op1);
4114  OutOps.push_back(Op2);
4115  OutOps.push_back(Op3);
4116  OutOps.push_back(Op4);
4117  return false;
4118 }
4119 
4120 /// This pass converts a legalized DAG into a X86-specific DAG,
4121 /// ready for instruction scheduling.
4123  CodeGenOpt::Level OptLevel) {
4124  return new X86DAGToDAGISel(TM, OptLevel);
4125 }
uint64_t CallInst * C
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:570
constexpr bool isUInt< 32 >(uint64_t x)
Definition: MathExtras.h:348
X = FP_ROUND(Y, TRUNC) - Rounding &#39;Y&#39; from a larger floating point type down to the precision of the ...
Definition: ISDOpcodes.h:537
static bool isCalleeLoad(SDValue Callee, SDValue &Chain, bool HasCallSeq)
Return true if call address is a load and it can be moved below CALLSEQ_START and the chains leading ...
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
static bool isFusableLoadOpStorePattern(StoreSDNode *StoreNode, SDValue StoredVal, SelectionDAG *CurDAG, unsigned LoadOpNo, LoadSDNode *&LoadNode, SDValue &InputChain)
Check whether or not the chain ending in StoreNode is suitable for doing the {load; op; store} to mod...
EVT getValueType() const
Return the ValueType of the referenced return value.
Vector comparison generating mask bits for fp and integer signed and unsigned data types...
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
const SDValue & getOffset() const
bool isUndef() const
C - The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
APInt getSignedMax() const
Return the largest signed value contained in the ConstantRange.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
DELETED_NODE - This is an illegal value that is used to catch errors.
Definition: ISDOpcodes.h:41
This class represents lattice values for constants.
Definition: AllocatorList.h:23
static void insertDAGNode(SelectionDAG &DAG, SDValue Pos, SDValue N)
static bool isLegalMaskCompare(SDNode *N, const X86Subtarget *Subtarget)
Tail call return.
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
Definition: MCSymbol.h:41
bool isVector() const
Return true if this is a vector value type.
const SDValue & getBasePtr() const
CondCode getCondFromCMovOpc(unsigned Opc)
Return condition code of a CMov opcode.
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
APInt zext(unsigned width) const
Zero extend to a new width.
Definition: APInt.cpp:857
X86 conditional moves.
bool is256BitVector() const
Return true if this is a 256-bit vector type.
bool slt(const APInt &RHS) const
Signed less than comparison.
Definition: APInt.h:1203
unsigned Reg
CondCode getCondFromSETOpc(unsigned Opc)
Return condition code of a SET opcode.
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:252
bool hasFastBEXTR() const
Definition: X86Subtarget.h:638
const SDValue & getChain() const
constexpr bool isInt< 8 >(int64_t x)
Definition: MathExtras.h:302
ISD::MemIndexedMode getAddressingMode() const
Return the addressing mode for this load or store: unindexed, pre-inc, pre-dec, post-inc, or post-dec.
TargetGlobalAddress - Like GlobalAddress, but the DAG does no folding or anything else with this node...
Definition: ISDOpcodes.h:130
unsigned getAlignment() const
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.h:320
bool isBitwiseNot(SDValue V)
Returns true if V is a bitwise not operation.
APInt trunc(unsigned width) const
Truncate to new width.
Definition: APInt.cpp:810
STATISTIC(NumFunctions, "Total number of functions")
MVT getSimpleValueType(unsigned ResNo) const
Return the type of a specified result as a simple type.
F(f)
void setNodeId(int Id)
Set unique node id.
SDNode * getNode() const
get the SDNode which holds the desired result
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1 at the ...
Definition: ISDOpcodes.h:352
std::size_t countLeadingZeros(T Val, ZeroBehavior ZB=ZB_Width)
Count number of 0&#39;s from the most significant bit to the least stopping at the first 1...
Definition: MathExtras.h:188
static bool MayFoldLoad(SDValue Op)
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1508
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition: ISDOpcodes.h:158
GlobalBaseReg - On Darwin, this node represents the result of the mflr at function entry...
static bool hasPredecessorHelper(const SDNode *N, SmallPtrSetImpl< const SDNode *> &Visited, SmallVectorImpl< const SDNode *> &Worklist, unsigned int MaxSteps=0, bool TopologicalPrune=false)
Returns true if N is a predecessor of any node in Worklist.
unsigned getAddrSpace() const
Return the LLVM IR address space number that this pointer points into.
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition: ISDOpcodes.h:209
X86 compare and logical compare instructions.
void dump() const
Support for debugging, callable in GDB: V->dump()
Definition: AsmWriter.cpp:4342
The address of a basic block.
Definition: Constants.h:839
static cl::opt< bool > AndImmShrink("x86-and-imm-shrink", cl::init(true), cl::desc("Enable setting constant bits to reduce size of mask immediates"), cl::Hidden)
bool hasOneUse() const
Return true if there is exactly one use of this node.
A description of a memory reference used in the backend.
Dynamic (non-constant condition) vector blend where only the sign bits of the condition elements are ...
Shift and rotation operations.
Definition: ISDOpcodes.h:409
bool isNormalStore(const SDNode *N)
Returns true if the specified node is a non-truncating and unindexed store.
std::size_t countTrailingOnes(T Value, ZeroBehavior ZB=ZB_Width)
Count the number of ones from the least significant bit to the first zero bit.
Definition: MathExtras.h:477
bool isBuildVectorAllZeros(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are 0 or undef...
CallLoweringInfo & setChain(SDValue InChain)
CopyToReg - This node has three operands: a chain, a register number to set to this value...
Definition: ISDOpcodes.h:169
op_iterator op_end() const
uint64_t getConstantOperandVal(unsigned i) const
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
CondCode getCondFromBranchOpc(unsigned Opc)
SimpleValueType SimpleTy
unsigned getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition: ValueTypes.h:303
CALLSEQ_START/CALLSEQ_END - These operators mark the beginning and end of a call sequence, and carry arbitrary information that target might want to know.
Definition: ISDOpcodes.h:717
Position
Position to insert a new instruction relative to an existing instruction.
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
This represents a list of ValueType&#39;s that has been intern&#39;d by a SelectionDAG.
bool hasVLX() const
Definition: X86Subtarget.h:656
unsigned getSizeInBits() const
bool runOnMachineFunction(MachineFunction &MF) override
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
bool hasExternalLinkage() const
Definition: GlobalValue.h:421
int64_t getSExtValue() const
static void InvalidateNodeId(SDNode *N)
unsigned getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:291
X86 FP SETCC, similar to above, but with output as an i1 mask and with optional rounding mode...
constexpr bool isMask_64(uint64_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
Definition: MathExtras.h:410
#define T
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition: ISDOpcodes.h:428
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:200
op_iterator op_begin() const
static X86::CondCode getCondFromOpc(unsigned Opc)
ArrayRef< SDUse > ops() const
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
bool use_empty() const
Return true if there are no nodes using value ResNo of Node.
This class is used to represent ISD::STORE nodes.
bool isOneConstant(SDValue V)
Returns true if V is a constant integer one.
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
bool is128BitVector() const
Return true if this is a 128-bit vector type.
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Get a value with high bits set.
Definition: APInt.h:635
#define P(N)
const SDValue & getBasePtr() const
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:422
bool isNormalLoad(const SDNode *N)
Returns true if the specified node is a non-extending and unindexed load.
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition: ISDOpcodes.h:165
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
constexpr bool isUInt< 8 >(uint64_t x)
Definition: MathExtras.h:342
std::size_t countTrailingZeros(T Val, ZeroBehavior ZB=ZB_Width)
Count number of 0&#39;s from the least significant bit to the most stopping at the first 1...
Definition: MathExtras.h:119
use_iterator use_begin() const
Provide iteration support to walk over all uses of an SDNode.
LOCAL_RECOVER - Represents the llvm.localrecover intrinsic.
Definition: ISDOpcodes.h:80
Machine Value Type.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
static bool foldMaskedShiftToScaledMask(SelectionDAG &DAG, SDValue N, uint64_t Mask, SDValue Shift, SDValue X, X86ISelAddressMode &AM)
bool hasAnyUseOfValue(unsigned Value) const
Return true if there are any use of the indicated value.
bool isMachineOpcode() const
static bool mayUseCarryFlag(X86::CondCode CC)
unsigned getScalarSizeInBits() const
bool ult(const APInt &RHS) const
Unsigned less than comparison.
Definition: APInt.h:1184
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This is an important base class in LLVM.
Definition: Constant.h:41
const SDValue & getOperand(unsigned Num) const
X86 conditional branches.
bool isOperandOf(const SDNode *N) const
Return true if this node is an operand of N.
const SDValue & getOffset() const
APInt getUnsignedMax() const
Return the largest unsigned value contained in the ConstantRange.
static Type * getVoidTy(LLVMContext &C)
Definition: Type.cpp:160
This class provides iterator support for SDUse operands that use a specific SDNode.
unsigned getMachineOpcode() const
bool optForSize() const
Optimize this function for size (-Os) or minimum size (-Oz).
Definition: Function.h:597
void RepositionNode(allnodes_iterator Position, SDNode *N)
Move node N in the AllNodes list to be immediately before the given iterator Position.
void ReplaceAllUsesWith(SDValue From, SDValue To)
Modify anything using &#39;From&#39; to use &#39;To&#39; instead.
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:284
On Darwin, this node represents the result of the popl at function entry, used for PIC code...
self_iterator getIterator()
Definition: ilist_node.h:81
bool hasNUsesOfValue(unsigned NUses, unsigned Value) const
Return true if there are exactly NUSES uses of the indicated value.
static bool foldMaskAndShiftToExtract(SelectionDAG &DAG, SDValue N, uint64_t Mask, SDValue Shift, SDValue X, X86ISelAddressMode &AM)
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:555
std::vector< ArgListEntry > ArgListTy
Extended Value Type.
Definition: ValueTypes.h:33
This structure contains all information that is necessary for lowering calls.
bool isVolatile() const
APInt getSignedMin() const
Return the smallest signed value contained in the ConstantRange.
This class contains a discriminated union of information about pointers in memory operands...
bool isMachineOpcode() const
Test if this node has a post-isel opcode, directly corresponding to a MachineInstr opcode...
unsigned getNumOperands() const
Return the number of values used by this operation.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
BaseType
A given derived pointer can have multiple base pointers through phi/selects.
bool use_empty() const
Return true if there are no uses of this node.
These operations represent an abstract X86 call instruction, which includes a bunch of information...
TokenFactor - This node takes multiple tokens as input and produces a single token result...
Definition: ISDOpcodes.h:49
void dump() const
Dump this node, for debugging.
Iterator for intrusive lists based on ilist_node.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements...
Definition: SmallPtrSet.h:417
unsigned countPopulation(T Value)
Count the number of set bits in a value.
Definition: MathExtras.h:519
This is the shared class of boolean and integer constants.
Definition: Constants.h:83
static bool foldMaskedShiftToBEXTR(SelectionDAG &DAG, SDValue N, uint64_t Mask, SDValue Shift, SDValue X, X86ISelAddressMode &AM, const X86Subtarget &Subtarget)
constexpr bool isInt< 32 >(int64_t x)
Definition: MathExtras.h:308
bool isScalarFPTypeInSSEReg(EVT VT) const
Return true if the specified scalar FP type is computed in an SSE register, not on the X87 floating p...
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition: ValueTypes.h:264
SDNode * UpdateNodeOperands(SDNode *N, SDValue Op)
Mutate the specified node in-place to have the specified operands.
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:221
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:839
const DataFlowGraph & G
Definition: RDFGraph.cpp:210
An SDNode that represents everything that will be needed to construct a MachineInstr.
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
SelectionDAGISel - This is the common base class used for SelectionDAG-based pattern-matching instruc...
const Function & getFunction() const
Return the LLVM function that this machine code represents.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:940
static bool foldMaskAndShiftToScale(SelectionDAG &DAG, SDValue N, uint64_t Mask, SDValue Shift, SDValue X, X86ISelAddressMode &AM)
EVT getMemoryVT() const
Return the type of the in-memory value.
Target - Wrapper for Target specific information.
Class for arbitrary precision integers.
Definition: APInt.h:69
static unsigned getReg(const void *D, unsigned RC, unsigned RegNo)
bool sge(const APInt &RHS) const
Signed greater or equal comparison.
Definition: APInt.h:1308
static use_iterator use_end()
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:467
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:470
XOP - Opcode prefix used by XOP instructions.
Definition: X86BaseInfo.h:524
void append(in_iter in_start, in_iter in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:386
static bool hasSingleUsesFromRoot(SDNode *Root, SDNode *User)
bool is256BitVector() const
Return true if this is a 256-bit vector type.
Definition: ValueTypes.h:186
int getNodeId() const
Return the unique node id.
static bool isDispSafeForFrameIndex(int64_t Val)
std::pair< SDValue, SDValue > LowerCallTo(CallLoweringInfo &CLI) const
This function lowers an abstract call to a function into an actual call.
constexpr bool isShiftedMask_64(uint64_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (64 bit ver...
Definition: MathExtras.h:422
bool isVector() const
Return true if this is a vector value type.
Definition: ValueTypes.h:150
const SDValue & getValue() const
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:386
bool isOnlyUserOf(const SDNode *N) const
Return true if this node is the only use of N.
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition: ISDOpcodes.h:205
bool is128BitVector() const
Return true if this is a 128-bit vector type.
Definition: ValueTypes.h:181
A wrapper node for TargetConstantPool, TargetJumpTable, TargetExternalSymbol, TargetGlobalAddress, TargetGlobalTLSAddress, MCSymbol and TargetBlockAddress.
FunctionPass * createX86ISelDag(X86TargetMachine &TM, CodeGenOpt::Level OptLevel)
This pass converts a legalized DAG into a X86-specific DAG, ready for instruction scheduling...
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:615
StringRef getName() const
Return a constant reference to the value&#39;s name.
Definition: Value.cpp:214
#define I(x, y, z)
Definition: MD5.cpp:58
#define N
bool optForMinSize() const
Optimize this function for minimum size (-Oz).
Definition: Function.h:594
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:322
bool is512BitVector() const
Return true if this is a 512-bit vector type.
uint32_t Size
Definition: Profile.cpp:46
bool isNON_EXTLoad(const SDNode *N)
Returns true if the specified node is a non-extending load.
unsigned getOpcode() const
SDValue getValue(unsigned R) const
constexpr bool isUInt< 16 >(uint64_t x)
Definition: MathExtras.h:345
CopyFromReg - This node indicates that the input value is a virtual or physical register that is defi...
Definition: ISDOpcodes.h:174
SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
const MachinePointerInfo & getPointerInfo() const
This class is used to form a handle around another node that is persistent and is updated across invo...
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
unsigned getReg() const
unsigned getMinSignedBits() const
Get the minimum bit size for this signed APInt.
Definition: APInt.h:1551
bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
bool hasBMI() const
Definition: X86Subtarget.h:591
uint64_t getConstantOperandVal(unsigned Num) const
Helper method returns the integer value of a ConstantSDNode operand.
unsigned getResNo() const
get the index which selects a specific result in the SDNode
unsigned getMachineOpcode() const
This may only be called if isMachineOpcode returns true.
bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
#define LLVM_FALLTHROUGH
LLVM_FALLTHROUGH - Mark fallthrough cases in switch statements.
Definition: Compiler.h:250
std::underlying_type< E >::type Mask()
Get a bitmask with 1s in all places up to the high-order bit of E&#39;s largest value.
Definition: BitmaskEnum.h:80
bool isNonTemporal() const
IRTranslator LLVM IR MI
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:48
SetCC operator - This evaluates to a true value iff the condition is true.
Definition: ISDOpcodes.h:442
unsigned countLeadingZeros() const
The APInt version of the countLeadingZeros functions in MathExtras.h.
Definition: APInt.h:1595
KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
bool isOffsetSuitableForCodeModel(int64_t Offset, CodeModel::Model M, bool hasSymbolicDisplacement=true)
Returns true of the given offset can be fit into displacement field of the instruction.
unsigned getNumOperands() const
const SDValue & getOperand(unsigned i) const
uint64_t getZExtValue() const
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:473
#define LLVM_DEBUG(X)
Definition: Debug.h:122
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation...
bool hasTBM() const
Definition: X86Subtarget.h:584
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition: ISDOpcodes.h:374
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
static void moveBelowOrigChain(SelectionDAG *CurDAG, SDValue Load, SDValue Call, SDValue OrigChain)
Replace the original chain operand of the call with load&#39;s chain operand and move load below the call...
Carry-using nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:241
Special wrapper used under X86-64 PIC mode for RIP relative displacements.
BRIND - Indirect branch.
Definition: ISDOpcodes.h:635
This class is used to represent ISD::LOAD nodes.
static int getUninvalidatedNodeId(SDNode *N)