LLVM  6.0.0svn
X86ISelLowering.h
Go to the documentation of this file.
1 //===-- X86ISelLowering.h - X86 DAG Lowering Interface ----------*- C++ -*-===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file defines the interfaces that X86 uses to lower LLVM code into a
11 // selection DAG.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #ifndef LLVM_LIB_TARGET_X86_X86ISELLOWERING_H
16 #define LLVM_LIB_TARGET_X86_X86ISELLOWERING_H
17 
22 
23 namespace llvm {
24  class X86Subtarget;
25  class X86TargetMachine;
26 
27  namespace X86ISD {
28  // X86 Specific DAG Nodes
29  enum NodeType : unsigned {
30  // Start the numbering where the builtin ops leave off.
32 
33  /// Bit scan forward.
34  BSF,
35  /// Bit scan reverse.
36  BSR,
37 
38  /// Double shift instructions. These correspond to
39  /// X86::SHLDxx and X86::SHRDxx instructions.
42 
43  /// Bitwise logical AND of floating point values. This corresponds
44  /// to X86::ANDPS or X86::ANDPD.
46 
47  /// Bitwise logical OR of floating point values. This corresponds
48  /// to X86::ORPS or X86::ORPD.
49  FOR,
50 
51  /// Bitwise logical XOR of floating point values. This corresponds
52  /// to X86::XORPS or X86::XORPD.
54 
55  /// Bitwise logical ANDNOT of floating point values. This
56  /// corresponds to X86::ANDNPS or X86::ANDNPD.
58 
59  /// These operations represent an abstract X86 call
60  /// instruction, which includes a bunch of information. In particular the
61  /// operands of these node are:
62  ///
63  /// #0 - The incoming token chain
64  /// #1 - The callee
65  /// #2 - The number of arg bytes the caller pushes on the stack.
66  /// #3 - The number of arg bytes the callee pops off the stack.
67  /// #4 - The value to pass in AL/AX/EAX (optional)
68  /// #5 - The value to pass in DL/DX/EDX (optional)
69  ///
70  /// The result values of these nodes are:
71  ///
72  /// #0 - The outgoing token chain
73  /// #1 - The first register result value (optional)
74  /// #2 - The second register result value (optional)
75  ///
77 
78  /// This operation implements the lowering for readcyclecounter.
80 
81  /// X86 Read Time-Stamp Counter and Processor ID.
83 
84  /// X86 Read Performance Monitoring Counters.
86 
87  /// X86 compare and logical compare instructions.
89 
90  /// X86 bit-test instructions.
91  BT,
92 
93  /// X86 SetCC. Operand 0 is condition code, and operand 1 is the EFLAGS
94  /// operand, usually produced by a CMP instruction.
96 
97  /// X86 Select
99 
100  // Same as SETCC except it's materialized with a sbb and the value is all
101  // one's or all zero's.
102  SETCC_CARRY, // R = carry_bit ? ~0 : 0
103 
104  /// X86 FP SETCC, implemented with CMP{cc}SS/CMP{cc}SD.
105  /// Operands are two FP values to compare; result is a mask of
106  /// 0s or 1s. Generally DTRT for C/C++ with NaNs.
108 
109  /// X86 FP SETCC, similar to above, but with output as an i1 mask and
110  /// with optional rounding mode.
112 
113  /// X86 conditional moves. Operand 0 and operand 1 are the two values
114  /// to select from. Operand 2 is the condition code, and operand 3 is the
115  /// flag operand produced by a CMP or TEST instruction. It also writes a
116  /// flag result.
118 
119  /// X86 conditional branches. Operand 0 is the chain operand, operand 1
120  /// is the block to branch if condition is true, operand 2 is the
121  /// condition code, and operand 3 is the flag operand produced by a CMP
122  /// or TEST instruction.
124 
125  /// Return with a flag operand. Operand 0 is the chain operand, operand
126  /// 1 is the number of bytes of stack to pop.
128 
129  /// Return from interrupt. Operand 0 is the number of bytes to pop.
131 
132  /// Repeat fill, corresponds to X86::REP_STOSx.
134 
135  /// Repeat move, corresponds to X86::REP_MOVSx.
137 
138  /// On Darwin, this node represents the result of the popl
139  /// at function entry, used for PIC code.
141 
142  /// A wrapper node for TargetConstantPool, TargetJumpTable,
143  /// TargetExternalSymbol, TargetGlobalAddress, TargetGlobalTLSAddress,
144  /// MCSymbol and TargetBlockAddress.
146 
147  /// Special wrapper used under X86-64 PIC mode for RIP
148  /// relative displacements.
150 
151  /// Copies a 64-bit value from the low word of an XMM vector
152  /// to an MMX vector.
154 
155  /// Copies a 32-bit value from the low word of a MMX
156  /// vector to a GPR.
158 
159  /// Copies a GPR into the low 32-bit word of a MMX vector
160  /// and zero out the high word.
162 
163  /// Extract an 8-bit value from a vector and zero extend it to
164  /// i32, corresponds to X86::PEXTRB.
166 
167  /// Extract a 16-bit value from a vector and zero extend it to
168  /// i32, corresponds to X86::PEXTRW.
170 
171  /// Insert any element of a 4 x float vector into any element
172  /// of a destination 4 x floatvector.
174 
175  /// Insert the lower 8-bits of a 32-bit value to a vector,
176  /// corresponds to X86::PINSRB.
178 
179  /// Insert the lower 16-bits of a 32-bit value to a vector,
180  /// corresponds to X86::PINSRW.
182 
183  /// Shuffle 16 8-bit values within a vector.
185 
186  /// Compute Sum of Absolute Differences.
188  /// Compute Double Block Packed Sum-Absolute-Differences
190 
191  /// Bitwise Logical AND NOT of Packed FP values.
193 
194  /// Blend where the selector is an immediate.
196 
197  /// Dynamic (non-constant condition) vector blend where only the sign bits
198  /// of the condition elements are used. This is used to enforce that the
199  /// condition mask is not valid for generic VSELECT optimizations.
201 
202  /// Combined add and sub on an FP vector.
204 
205  // FP vector ops with rounding mode.
213 
214  // FP vector get exponent.
216  // Extract Normalized Mantissas.
218  // FP Scale.
221 
222  // Integer add/sub with unsigned saturation.
225 
226  // Integer add/sub with signed saturation.
229 
230  // Unsigned Integer average.
232 
233  /// Integer horizontal add/sub.
236 
237  /// Floating point horizontal add/sub.
240 
241  // Detect Conflicts Within a Vector
243 
244  /// Floating point max and min.
246 
247  /// Commutative FMIN and FMAX.
249 
250  /// Scalar intrinsic floating point max and min.
252 
253  /// Floating point reciprocal-sqrt and reciprocal approximation.
254  /// Note that these typically require refinement
255  /// in order to obtain suitable precision.
257 
258  // AVX-512 reciprocal approximations with a little more precision.
260 
261  // Thread Local Storage.
263 
264  // Thread Local Storage. A call to get the start address
265  // of the TLS block for the current module.
267 
268  // Thread Local Storage. When calling to an OS provided
269  // thunk at the address from an earlier relocation.
271 
272  // Exception Handling helpers.
274 
275  // SjLj exception handling setjmp.
277 
278  // SjLj exception handling longjmp.
280 
281  // SjLj exception handling dispatch.
283 
284  /// Tail call return. See X86TargetLowering::LowerCall for
285  /// the list of operands.
287 
288  // Vector move to low scalar and zero higher vector elements.
290 
291  // Vector integer zero-extend.
293  // Vector integer signed-extend.
295 
296  // Vector integer truncate.
298  // Vector integer truncate with unsigned/signed saturation.
300 
301  // Vector FP extend.
303 
304  // Vector FP round.
306 
307  // Convert a vector to mask, set bits base on MSB.
309 
310  // 128-bit vector logical left / right shift
312 
313  // Vector shift elements
315 
316  // Vector variable shift right arithmetic.
317  // Unlike ISD::SRA, in case shift count greater then element size
318  // use sign bit to fill destination data element.
320 
321  // Vector shift elements by immediate
323 
324  // Shifts of mask registers.
326 
327  // Bit rotate by immediate
329 
330  // Vector packed double/float comparison.
332 
333  // Vector integer comparisons.
335  // Vector integer comparisons, the result is in a mask vector.
337 
339 
340  /// Vector comparison generating mask bits for fp and
341  /// integer signed and unsigned data types.
344  // Vector comparison with rounding mode for FP values
346 
347  // Arithmetic operations with FLAGS results.
349  INC, DEC, OR, XOR, AND,
350 
351  // LOW, HI, FLAGS = umul LHS, RHS.
353 
354  // 8-bit SMUL/UMUL - AX, FLAGS = smul8/umul8 AL, RHS.
356 
357  // 8-bit divrem that zero-extend the high result (AH).
360 
361  // X86-specific multiply by immediate.
363 
364  // Vector sign bit extraction.
366 
367  // Vector bitwise comparisons.
369 
370  // Vector packed fp sign bitwise comparisons.
372 
373  // Vector "test" in AVX-512, the result is in a mask vector.
376 
377  // OR/AND test for masks.
380 
381  // Several flavors of instructions with vector shuffle behaviors.
382  // Saturated signed/unnsigned packing.
385  // Intra-lane alignr.
387  // AVX512 inter-lane alignr.
393  //Shuffle Packed Values at 128-bit granularity.
410 
411  // Variable Permute (VPERM).
412  // Res = VPERMV MaskV, V0
414 
415  // 3-op Variable Permute (VPERMT2).
416  // Res = VPERMV3 V0, MaskV, V1
418 
419  // 3-op Variable Permute overwriting the index (VPERMI2).
420  // Res = VPERMIV3 V0, MaskV, V1
422 
423  // Bitwise ternary logic.
425  // Fix Up Special Packed Float32/64 values.
428  // Range Restriction Calculation For Packed Pairs of Float32/64 values.
430  // Reduce - Perform Reduction Transformation on scalar\packed FP.
432  // RndScale - Round FP Values To Include A Given Number Of Fraction Bits.
433  // Also used by the legacy (V)ROUND intrinsics where we mask out the
434  // scaling part of the immediate.
436  // Tests Types Of a FP Values for packed types.
438  // Tests Types Of a FP Values for scalar types.
440 
441  // Broadcast scalar to vector.
443  // Broadcast mask to vector.
445  // Broadcast subvector to vector.
447 
448  // Extract vector element.
450 
451  /// SSE4A Extraction and Insertion.
453 
454  // XOP arithmetic/logical shifts.
456  // XOP signed/unsigned integer comparisons.
458  // XOP packed permute bytes.
460  // XOP two source permutation.
462 
463  // Vector multiply packed unsigned doubleword integers.
465  // Vector multiply packed signed doubleword integers.
467  // Vector Multiply Packed UnsignedIntegers with Round and Scale.
469 
470  // Multiply and Add Packed Integers.
472 
473  // AVX512IFMA multiply and add.
474  // NOTE: These are different than the instruction and perform
475  // op0 x op1 + op2.
477 
478  // FMA nodes.
479  // We use the target independent ISD::FMA for the non-inverted case.
485 
486  // FMA with rounding mode.
493 
494  // Scalar intrinsic FMA.
499 
500  // Scalar intrinsic FMA with rounding mode.
501  // Two versions, passthru bits on op1 or op3.
506 
507  // Compress and expand.
510 
511  // Convert Unsigned/Integer to Floating-Point Value with rounding mode.
514 
515  // Vector float/double to signed/unsigned integer.
517  // Scalar float/double to signed/unsigned integer.
519 
520  // Vector float/double to signed/unsigned integer with truncation.
522  // Scalar float/double to signed/unsigned integer with truncation.
524 
525  // Vector signed/unsigned integer to float/double.
527 
528  // Save xmm argument registers to the stack, according to %al. An operator
529  // is needed so that this can be expanded with control flow.
531 
532  // Windows's _chkstk call to do stack probing.
534 
535  // For allocating variable amounts of stack space when using
536  // segmented stacks. Check if the current stacklet has enough space, and
537  // falls back to heap allocation if not.
539 
540  // Memory barriers.
543 
544  // Store FP status word into i16 register.
546 
547  // Store contents of %ah into %eflags.
549 
550  // Get a random integer and indicate whether it is valid in CF.
552 
553  // Get a NIST SP800-90B & C compliant random integer and
554  // indicate whether it is valid in CF.
556 
557  // SSE42 string comparisons.
560 
561  // Test if in transactional execution.
563 
564  // ERI instructions.
566 
567  // Conversions between float and half-float.
569 
570  // LWP insert record.
572 
573  // Compare and swap.
579 
580  /// LOCK-prefixed arithmetic read-modify-write instructions.
581  /// EFLAGS, OUTCHAIN = LADD(INCHAIN, PTR, RHS)
583 
584  // Load, scalar_to_vector, and zero extend.
586 
587  // Store FP control world into i16 memory.
589 
590  /// This instruction implements FP_TO_SINT with the
591  /// integer destination in memory and a FP reg source. This corresponds
592  /// to the X86::FIST*m instructions and the rounding mode change stuff. It
593  /// has two inputs (token chain and address) and two outputs (int value
594  /// and token chain).
598 
599  /// This instruction implements SINT_TO_FP with the
600  /// integer source in memory and FP reg result. This corresponds to the
601  /// X86::FILD*m instructions. It has three inputs (token chain, address,
602  /// and source type) and two outputs (FP value and token chain). FILD_FLAG
603  /// also produces a flag).
606 
607  /// This instruction implements an extending load to FP stack slots.
608  /// This corresponds to the X86::FLD32m / X86::FLD64m. It takes a chain
609  /// operand, ptr to load from, and a ValueType node indicating the type
610  /// to load to.
612 
613  /// This instruction implements a truncating store to FP stack
614  /// slots. This corresponds to the X86::FST32m / X86::FST64m. It takes a
615  /// chain operand, value to store, address, and a ValueType to store it
616  /// as.
618 
619  /// This instruction grabs the address of the next argument
620  /// from a va_list. (reads and modifies the va_list in memory)
622 
623  // Vector truncating store with unsigned/signed saturation
625  // Vector truncating masked store with unsigned/signed saturation
627 
628  // X86 specific gather
630 
631  // WARNING: Do not add anything in the end unless you want the node to
632  // have memop! In fact, starting from FIRST_TARGET_MEMORY_OPCODE all
633  // opcodes will be thought as target memory ops!
634  };
635  } // end namespace X86ISD
636 
637  /// Define some predicates that are used for node matching.
638  namespace X86 {
639  /// Returns true if Elt is a constant zero or floating point constant +0.0.
640  bool isZeroNode(SDValue Elt);
641 
642  /// Returns true of the given offset can be
643  /// fit into displacement field of the instruction.
645  bool hasSymbolicDisplacement = true);
646 
647  /// Determines whether the callee is required to pop its
648  /// own arguments. Callee pop is necessary to support tail calls.
649  bool isCalleePop(CallingConv::ID CallingConv,
650  bool is64Bit, bool IsVarArg, bool GuaranteeTCO);
651 
652  } // end namespace X86
653 
654  //===--------------------------------------------------------------------===//
655  // X86 Implementation of the TargetLowering interface
656  class X86TargetLowering final : public TargetLowering {
657  public:
658  explicit X86TargetLowering(const X86TargetMachine &TM,
659  const X86Subtarget &STI);
660 
661  unsigned getJumpTableEncoding() const override;
662  bool useSoftFloat() const override;
663 
664  void markLibCallAttributes(MachineFunction *MF, unsigned CC,
665  ArgListTy &Args) const override;
666 
667  MVT getScalarShiftAmountTy(const DataLayout &, EVT) const override {
668  return MVT::i8;
669  }
670 
671  const MCExpr *
672  LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI,
673  const MachineBasicBlock *MBB, unsigned uid,
674  MCContext &Ctx) const override;
675 
676  /// Returns relocation base for the given PIC jumptable.
677  SDValue getPICJumpTableRelocBase(SDValue Table,
678  SelectionDAG &DAG) const override;
679  const MCExpr *
680  getPICJumpTableRelocBaseExpr(const MachineFunction *MF,
681  unsigned JTI, MCContext &Ctx) const override;
682 
683  /// Return the desired alignment for ByVal aggregate
684  /// function arguments in the caller parameter area. For X86, aggregates
685  /// that contains are placed at 16-byte boundaries while the rest are at
686  /// 4-byte boundaries.
687  unsigned getByValTypeAlignment(Type *Ty,
688  const DataLayout &DL) const override;
689 
690  /// Returns the target specific optimal type for load
691  /// and store operations as a result of memset, memcpy, and memmove
692  /// lowering. If DstAlign is zero that means it's safe to destination
693  /// alignment can satisfy any constraint. Similarly if SrcAlign is zero it
694  /// means there isn't a need to check it against alignment requirement,
695  /// probably because the source does not need to be loaded. If 'IsMemset' is
696  /// true, that means it's expanding a memset. If 'ZeroMemset' is true, that
697  /// means it's a memset of zero. 'MemcpyStrSrc' indicates whether the memcpy
698  /// source is constant so it does not need to be loaded.
699  /// It returns EVT::Other if the type should be determined using generic
700  /// target-independent logic.
701  EVT getOptimalMemOpType(uint64_t Size, unsigned DstAlign, unsigned SrcAlign,
702  bool IsMemset, bool ZeroMemset, bool MemcpyStrSrc,
703  MachineFunction &MF) const override;
704 
705  /// Returns true if it's safe to use load / store of the
706  /// specified type to expand memcpy / memset inline. This is mostly true
707  /// for all types except for some special cases. For example, on X86
708  /// targets without SSE2 f64 load / store are done with fldl / fstpl which
709  /// also does type conversion. Note the specified type doesn't have to be
710  /// legal as the hook is used before type legalization.
711  bool isSafeMemOpType(MVT VT) const override;
712 
713  /// Returns true if the target allows unaligned memory accesses of the
714  /// specified type. Returns whether it is "fast" in the last argument.
715  bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS, unsigned Align,
716  bool *Fast) const override;
717 
718  /// Provide custom lowering hooks for some operations.
719  ///
720  SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
721 
722  /// Places new result values for the node in Results (their number
723  /// and types must exactly match those of the original return values of
724  /// the node), or leaves Results empty, which indicates that the node is not
725  /// to be custom lowered after all.
726  void LowerOperationWrapper(SDNode *N,
728  SelectionDAG &DAG) const override;
729 
730  /// Replace the results of node with an illegal result
731  /// type with new values built out of custom code.
732  ///
733  void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue>&Results,
734  SelectionDAG &DAG) const override;
735 
736  SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;
737 
738  // Return true if it is profitable to combine a BUILD_VECTOR with a
739  // stride-pattern to a shuffle and a truncate.
740  // Example of such a combine:
741  // v4i32 build_vector((extract_elt V, 1),
742  // (extract_elt V, 3),
743  // (extract_elt V, 5),
744  // (extract_elt V, 7))
745  // -->
746  // v4i32 truncate (bitcast (shuffle<1,u,3,u,4,u,5,u,6,u,7,u> V, u) to
747  // v4i64)
748  bool isDesirableToCombineBuildVectorToShuffleTruncate(
749  ArrayRef<int> ShuffleMask, EVT SrcVT, EVT TruncVT) const override;
750 
751  /// Return true if the target has native support for
752  /// the specified value type and it is 'desirable' to use the type for the
753  /// given node type. e.g. On x86 i16 is legal, but undesirable since i16
754  /// instruction encodings are longer and some i16 instructions are slow.
755  bool isTypeDesirableForOp(unsigned Opc, EVT VT) const override;
756 
757  /// Return true if the target has native support for the
758  /// specified value type and it is 'desirable' to use the type. e.g. On x86
759  /// i16 is legal, but undesirable since i16 instruction encodings are longer
760  /// and some i16 instructions are slow.
761  bool IsDesirableToPromoteOp(SDValue Op, EVT &PVT) const override;
762 
764  EmitInstrWithCustomInserter(MachineInstr &MI,
765  MachineBasicBlock *MBB) const override;
766 
767  /// This method returns the name of a target specific DAG node.
768  const char *getTargetNodeName(unsigned Opcode) const override;
769 
770  bool mergeStoresAfterLegalization() const override { return true; }
771 
772  bool canMergeStoresTo(unsigned AddressSpace, EVT MemVT,
773  const SelectionDAG &DAG) const override;
774 
775  bool isCheapToSpeculateCttz() const override;
776 
777  bool isCheapToSpeculateCtlz() const override;
778 
779  bool isCtlzFast() const override;
780 
781  bool hasBitPreservingFPLogic(EVT VT) const override {
782  return VT == MVT::f32 || VT == MVT::f64 || VT.isVector();
783  }
784 
785  bool isMultiStoresCheaperThanBitsMerge(EVT LTy, EVT HTy) const override {
786  // If the pair to store is a mixture of float and int values, we will
787  // save two bitwise instructions and one float-to-int instruction and
788  // increase one store instruction. There is potentially a more
789  // significant benefit because it avoids the float->int domain switch
790  // for input value. So It is more likely a win.
791  if ((LTy.isFloatingPoint() && HTy.isInteger()) ||
792  (LTy.isInteger() && HTy.isFloatingPoint()))
793  return true;
794  // If the pair only contains int values, we will save two bitwise
795  // instructions and increase one store instruction (costing one more
796  // store buffer). Since the benefit is more blurred so we leave
797  // such pair out until we get testcase to prove it is a win.
798  return false;
799  }
800 
801  bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override;
802 
803  bool hasAndNotCompare(SDValue Y) const override;
804 
805  bool convertSetCCLogicToBitwiseLogic(EVT VT) const override {
806  return VT.isScalarInteger();
807  }
808 
809  /// Vector-sized comparisons are fast using PCMPEQ + PMOVMSK or PTEST.
810  MVT hasFastEqualityCompare(unsigned NumBits) const override;
811 
812  /// Return the value type to use for ISD::SETCC.
813  EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context,
814  EVT VT) const override;
815 
816  /// Determine which of the bits specified in Mask are known to be either
817  /// zero or one and return them in the KnownZero/KnownOne bitsets.
818  void computeKnownBitsForTargetNode(const SDValue Op,
819  KnownBits &Known,
820  const APInt &DemandedElts,
821  const SelectionDAG &DAG,
822  unsigned Depth = 0) const override;
823 
824  /// Determine the number of bits in the operation that are sign bits.
825  unsigned ComputeNumSignBitsForTargetNode(SDValue Op,
826  const APInt &DemandedElts,
827  const SelectionDAG &DAG,
828  unsigned Depth) const override;
829 
830  SDValue unwrapAddress(SDValue N) const override;
831 
832  bool isGAPlusOffset(SDNode *N, const GlobalValue* &GA,
833  int64_t &Offset) const override;
834 
835  SDValue getReturnAddressFrameIndex(SelectionDAG &DAG) const;
836 
837  bool ExpandInlineAsm(CallInst *CI) const override;
838 
839  ConstraintType getConstraintType(StringRef Constraint) const override;
840 
841  /// Examine constraint string and operand type and determine a weight value.
842  /// The operand object must already have been set up with the operand type.
844  getSingleConstraintMatchWeight(AsmOperandInfo &info,
845  const char *constraint) const override;
846 
847  const char *LowerXConstraint(EVT ConstraintVT) const override;
848 
849  /// Lower the specified operand into the Ops vector. If it is invalid, don't
850  /// add anything to Ops. If hasMemory is true it means one of the asm
851  /// constraint of the inline asm instruction being processed is 'm'.
852  void LowerAsmOperandForConstraint(SDValue Op,
853  std::string &Constraint,
854  std::vector<SDValue> &Ops,
855  SelectionDAG &DAG) const override;
856 
857  unsigned
858  getInlineAsmMemConstraint(StringRef ConstraintCode) const override {
859  if (ConstraintCode == "i")
861  else if (ConstraintCode == "o")
863  else if (ConstraintCode == "v")
865  else if (ConstraintCode == "X")
867  return TargetLowering::getInlineAsmMemConstraint(ConstraintCode);
868  }
869 
870  /// Given a physical register constraint
871  /// (e.g. {edx}), return the register number and the register class for the
872  /// register. This should only be used for C_Register constraints. On
873  /// error, this returns a register number of 0.
874  std::pair<unsigned, const TargetRegisterClass *>
875  getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
876  StringRef Constraint, MVT VT) const override;
877 
878  /// Return true if the addressing mode represented
879  /// by AM is legal for this target, for a load/store of the specified type.
880  bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM,
881  Type *Ty, unsigned AS,
882  Instruction *I = nullptr) const override;
883 
884  /// Return true if the specified immediate is legal
885  /// icmp immediate, that is the target has icmp instructions which can
886  /// compare a register against the immediate without having to materialize
887  /// the immediate into a register.
888  bool isLegalICmpImmediate(int64_t Imm) const override;
889 
890  /// Return true if the specified immediate is legal
891  /// add immediate, that is the target has add instructions which can
892  /// add a register and the immediate without having to materialize
893  /// the immediate into a register.
894  bool isLegalAddImmediate(int64_t Imm) const override;
895 
896  /// \brief Return the cost of the scaling factor used in the addressing
897  /// mode represented by AM for this target, for a load/store
898  /// of the specified type.
899  /// If the AM is supported, the return value must be >= 0.
900  /// If the AM is not supported, it returns a negative value.
901  int getScalingFactorCost(const DataLayout &DL, const AddrMode &AM, Type *Ty,
902  unsigned AS) const override;
903 
904  bool isVectorShiftByScalarCheap(Type *Ty) const override;
905 
906  /// Return true if it's free to truncate a value of
907  /// type Ty1 to type Ty2. e.g. On x86 it's free to truncate a i32 value in
908  /// register EAX to i16 by referencing its sub-register AX.
909  bool isTruncateFree(Type *Ty1, Type *Ty2) const override;
910  bool isTruncateFree(EVT VT1, EVT VT2) const override;
911 
912  bool allowTruncateForTailCall(Type *Ty1, Type *Ty2) const override;
913 
914  /// Return true if any actual instruction that defines a
915  /// value of type Ty1 implicit zero-extends the value to Ty2 in the result
916  /// register. This does not necessarily include registers defined in
917  /// unknown ways, such as incoming arguments, or copies from unknown
918  /// virtual registers. Also, if isTruncateFree(Ty2, Ty1) is true, this
919  /// does not necessarily apply to truncate instructions. e.g. on x86-64,
920  /// all instructions that define 32-bit values implicit zero-extend the
921  /// result out to 64 bits.
922  bool isZExtFree(Type *Ty1, Type *Ty2) const override;
923  bool isZExtFree(EVT VT1, EVT VT2) const override;
924  bool isZExtFree(SDValue Val, EVT VT2) const override;
925 
926  /// Return true if folding a vector load into ExtVal (a sign, zero, or any
927  /// extend node) is profitable.
928  bool isVectorLoadExtDesirable(SDValue) const override;
929 
930  /// Return true if an FMA operation is faster than a pair of fmul and fadd
931  /// instructions. fmuladd intrinsics will be expanded to FMAs when this
932  /// method returns true, otherwise fmuladd is expanded to fmul + fadd.
933  bool isFMAFasterThanFMulAndFAdd(EVT VT) const override;
934 
935  /// Return true if it's profitable to narrow
936  /// operations of type VT1 to VT2. e.g. on x86, it's profitable to narrow
937  /// from i32 to i8 but not from i32 to i16.
938  bool isNarrowingProfitable(EVT VT1, EVT VT2) const override;
939 
940  /// Given an intrinsic, checks if on the target the intrinsic will need to map
941  /// to a MemIntrinsicNode (touches memory). If this is the case, it returns
942  /// true and stores the intrinsic information into the IntrinsicInfo that was
943  /// passed to the function.
944  bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I,
945  unsigned Intrinsic) const override;
946 
947  /// Returns true if the target can instruction select the
948  /// specified FP immediate natively. If false, the legalizer will
949  /// materialize the FP immediate as a load from a constant pool.
950  bool isFPImmLegal(const APFloat &Imm, EVT VT) const override;
951 
952  /// Targets can use this to indicate that they only support *some*
953  /// VECTOR_SHUFFLE operations, those with specific masks. By default, if a
954  /// target supports the VECTOR_SHUFFLE node, all mask values are assumed to
955  /// be legal.
956  bool isShuffleMaskLegal(ArrayRef<int> Mask, EVT VT) const override;
957 
958  /// Similar to isShuffleMaskLegal. This is used by Targets can use this to
959  /// indicate if there is a suitable VECTOR_SHUFFLE that can be used to
960  /// replace a VAND with a constant pool entry.
961  bool isVectorClearMaskLegal(const SmallVectorImpl<int> &Mask,
962  EVT VT) const override;
963 
964  /// If true, then instruction selection should
965  /// seek to shrink the FP constant of the specified type to a smaller type
966  /// in order to save space and / or reduce runtime.
967  bool ShouldShrinkFPConstant(EVT VT) const override {
968  // Don't shrink FP constpool if SSE2 is available since cvtss2sd is more
969  // expensive than a straight movsd. On the other hand, it's important to
970  // shrink long double fp constant since fldt is very slow.
971  return !X86ScalarSSEf64 || VT == MVT::f80;
972  }
973 
974  /// Return true if we believe it is correct and profitable to reduce the
975  /// load node to a smaller type.
976  bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy,
977  EVT NewVT) const override;
978 
979  /// Return true if the specified scalar FP type is computed in an SSE
980  /// register, not on the X87 floating point stack.
981  bool isScalarFPTypeInSSEReg(EVT VT) const {
982  return (VT == MVT::f64 && X86ScalarSSEf64) || // f64 is when SSE2
983  (VT == MVT::f32 && X86ScalarSSEf32); // f32 is when SSE1
984  }
985 
986  /// \brief Returns true if it is beneficial to convert a load of a constant
987  /// to just the constant itself.
988  bool shouldConvertConstantLoadToIntImm(const APInt &Imm,
989  Type *Ty) const override;
990 
991  bool convertSelectOfConstantsToMath(EVT VT) const override;
992 
993  /// Return true if EXTRACT_SUBVECTOR is cheap for this result type
994  /// with this index.
995  bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT,
996  unsigned Index) const override;
997 
998  bool storeOfVectorConstantIsCheap(EVT MemVT, unsigned NumElem,
999  unsigned AddrSpace) const override {
1000  // If we can replace more than 2 scalar stores, there will be a reduction
1001  // in instructions even after we add a vector constant load.
1002  return NumElem > 2;
1003  }
1004 
1005  /// Intel processors have a unified instruction and data cache
1006  const char * getClearCacheBuiltinName() const override {
1007  return nullptr; // nothing to do, move along.
1008  }
1009 
1010  unsigned getRegisterByName(const char* RegName, EVT VT,
1011  SelectionDAG &DAG) const override;
1012 
1013  /// If a physical register, this returns the register that receives the
1014  /// exception address on entry to an EH pad.
1015  unsigned
1016  getExceptionPointerRegister(const Constant *PersonalityFn) const override;
1017 
1018  /// If a physical register, this returns the register that receives the
1019  /// exception typeid on entry to a landing pad.
1020  unsigned
1021  getExceptionSelectorRegister(const Constant *PersonalityFn) const override;
1022 
1023  virtual bool needsFixedCatchObjects() const override;
1024 
1025  /// This method returns a target specific FastISel object,
1026  /// or null if the target does not support "fast" ISel.
1028  const TargetLibraryInfo *libInfo) const override;
1029 
1030  /// If the target has a standard location for the stack protector cookie,
1031  /// returns the address of that location. Otherwise, returns nullptr.
1032  Value *getIRStackGuard(IRBuilder<> &IRB) const override;
1033 
1034  bool useLoadStackGuardNode() const override;
1035  void insertSSPDeclarations(Module &M) const override;
1036  Value *getSDagStackGuard(const Module &M) const override;
1037  Value *getSSPStackGuardCheck(const Module &M) const override;
1038 
1039  /// Return true if the target stores SafeStack pointer at a fixed offset in
1040  /// some non-standard address space, and populates the address space and
1041  /// offset as appropriate.
1042  Value *getSafeStackPointerLocation(IRBuilder<> &IRB) const override;
1043 
1044  SDValue BuildFILD(SDValue Op, EVT SrcVT, SDValue Chain, SDValue StackSlot,
1045  SelectionDAG &DAG) const;
1046 
1047  bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override;
1048 
1049  /// \brief Customize the preferred legalization strategy for certain types.
1050  LegalizeTypeAction getPreferredVectorAction(EVT VT) const override;
1051 
1052  bool isIntDivCheap(EVT VT, AttributeList Attr) const override;
1053 
1054  bool supportSwiftError() const override;
1055 
1056  StringRef getStackProbeSymbolName(MachineFunction &MF) const override;
1057 
1058  unsigned getMaxSupportedInterleaveFactor() const override { return 4; }
1059 
1060  /// \brief Lower interleaved load(s) into target specific
1061  /// instructions/intrinsics.
1062  bool lowerInterleavedLoad(LoadInst *LI,
1064  ArrayRef<unsigned> Indices,
1065  unsigned Factor) const override;
1066 
1067  /// \brief Lower interleaved store(s) into target specific
1068  /// instructions/intrinsics.
1069  bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI,
1070  unsigned Factor) const override;
1071 
1072 
1073  void finalizeLowering(MachineFunction &MF) const override;
1074 
1075  protected:
1076  std::pair<const TargetRegisterClass *, uint8_t>
1077  findRepresentativeClass(const TargetRegisterInfo *TRI,
1078  MVT VT) const override;
1079 
1080  private:
1081  /// Keep a reference to the X86Subtarget around so that we can
1082  /// make the right decision when generating code for different targets.
1083  const X86Subtarget &Subtarget;
1084 
1085  /// Select between SSE or x87 floating point ops.
1086  /// When SSE is available, use it for f32 operations.
1087  /// When SSE2 is available, use it for f64 operations.
1088  bool X86ScalarSSEf32;
1089  bool X86ScalarSSEf64;
1090 
1091  /// A list of legal FP immediates.
1092  std::vector<APFloat> LegalFPImmediates;
1093 
1094  /// Indicate that this x86 target can instruction
1095  /// select the specified FP immediate natively.
1096  void addLegalFPImmediate(const APFloat& Imm) {
1097  LegalFPImmediates.push_back(Imm);
1098  }
1099 
1100  SDValue LowerCallResult(SDValue Chain, SDValue InFlag,
1101  CallingConv::ID CallConv, bool isVarArg,
1103  const SDLoc &dl, SelectionDAG &DAG,
1104  SmallVectorImpl<SDValue> &InVals,
1105  uint32_t *RegMask) const;
1106  SDValue LowerMemArgument(SDValue Chain, CallingConv::ID CallConv,
1107  const SmallVectorImpl<ISD::InputArg> &ArgInfo,
1108  const SDLoc &dl, SelectionDAG &DAG,
1109  const CCValAssign &VA, MachineFrameInfo &MFI,
1110  unsigned i) const;
1112  const SDLoc &dl, SelectionDAG &DAG,
1113  const CCValAssign &VA,
1114  ISD::ArgFlagsTy Flags) const;
1115 
1116  // Call lowering helpers.
1117 
1118  /// Check whether the call is eligible for tail call optimization. Targets
1119  /// that want to do tail call optimization should implement this function.
1120  bool IsEligibleForTailCallOptimization(SDValue Callee,
1121  CallingConv::ID CalleeCC,
1122  bool isVarArg,
1123  bool isCalleeStructRet,
1124  bool isCallerStructRet,
1125  Type *RetTy,
1126  const SmallVectorImpl<ISD::OutputArg> &Outs,
1127  const SmallVectorImpl<SDValue> &OutVals,
1128  const SmallVectorImpl<ISD::InputArg> &Ins,
1129  SelectionDAG& DAG) const;
1130  SDValue EmitTailCallLoadRetAddr(SelectionDAG &DAG, SDValue &OutRetAddr,
1131  SDValue Chain, bool IsTailCall,
1132  bool Is64Bit, int FPDiff,
1133  const SDLoc &dl) const;
1134 
1135  unsigned GetAlignedArgumentStackSize(unsigned StackSize,
1136  SelectionDAG &DAG) const;
1137 
1138  unsigned getAddressSpace(void) const;
1139 
1140  std::pair<SDValue,SDValue> FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG,
1141  bool isSigned,
1142  bool isReplace) const;
1143 
1144  SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
1145  SDValue LowerBUILD_VECTORvXi1(SDValue Op, SelectionDAG &DAG) const;
1146  SDValue LowerVSELECT(SDValue Op, SelectionDAG &DAG) const;
1148  SDValue ExtractBitFromMaskVector(SDValue Op, SelectionDAG &DAG) const;
1149  SDValue InsertBitToMaskVector(SDValue Op, SelectionDAG &DAG) const;
1151 
1152  unsigned getGlobalWrapperKind(const GlobalValue *GV = nullptr) const;
1153  SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
1154  SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
1155  SDValue LowerGlobalAddress(const GlobalValue *GV, const SDLoc &dl,
1156  int64_t Offset, SelectionDAG &DAG) const;
1157  SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
1158  SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
1159  SDValue LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const;
1160 
1161  SDValue LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
1162  SDValue LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
1163  SDValue LowerUINT_TO_FP_i64(SDValue Op, SelectionDAG &DAG) const;
1164  SDValue LowerUINT_TO_FP_i32(SDValue Op, SelectionDAG &DAG) const;
1165  SDValue lowerUINT_TO_FP_vec(SDValue Op, SelectionDAG &DAG) const;
1166  SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const;
1167  SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const;
1168  SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
1169  SDValue LowerSETCCCARRY(SDValue Op, SelectionDAG &DAG) const;
1170  SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const;
1171  SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const;
1172  SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
1174  SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const;
1175  SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) const;
1176  SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
1177  SDValue LowerADDROFRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
1178  SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
1179  SDValue LowerFRAME_TO_ARGS_OFFSET(SDValue Op, SelectionDAG &DAG) const;
1180  SDValue LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const;
1181  SDValue lowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const;
1182  SDValue lowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const;
1183  SDValue lowerEH_SJLJ_SETUP_DISPATCH(SDValue Op, SelectionDAG &DAG) const;
1184  SDValue LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
1185  SDValue LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) const;
1186  SDValue LowerWin64_i128OP(SDValue Op, SelectionDAG &DAG) const;
1187  SDValue LowerGC_TRANSITION_START(SDValue Op, SelectionDAG &DAG) const;
1188  SDValue LowerGC_TRANSITION_END(SDValue Op, SelectionDAG &DAG) const;
1189  SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
1190 
1191  SDValue
1192  LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
1193  const SmallVectorImpl<ISD::InputArg> &Ins,
1194  const SDLoc &dl, SelectionDAG &DAG,
1195  SmallVectorImpl<SDValue> &InVals) const override;
1196  SDValue LowerCall(CallLoweringInfo &CLI,
1197  SmallVectorImpl<SDValue> &InVals) const override;
1198 
1199  SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
1200  const SmallVectorImpl<ISD::OutputArg> &Outs,
1201  const SmallVectorImpl<SDValue> &OutVals,
1202  const SDLoc &dl, SelectionDAG &DAG) const override;
1203 
1204  bool supportSplitCSR(MachineFunction *MF) const override {
1206  MF->getFunction()->hasFnAttribute(Attribute::NoUnwind);
1207  }
1208  void initializeSplitCSR(MachineBasicBlock *Entry) const override;
1209  void insertCopiesSplitCSR(
1210  MachineBasicBlock *Entry,
1211  const SmallVectorImpl<MachineBasicBlock *> &Exits) const override;
1212 
1213  bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override;
1214 
1215  bool mayBeEmittedAsTailCall(const CallInst *CI) const override;
1216 
1217  EVT getTypeForExtReturn(LLVMContext &Context, EVT VT,
1218  ISD::NodeType ExtendKind) const override;
1219 
1220  bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF,
1221  bool isVarArg,
1222  const SmallVectorImpl<ISD::OutputArg> &Outs,
1223  LLVMContext &Context) const override;
1224 
1225  const MCPhysReg *getScratchRegisters(CallingConv::ID CC) const override;
1226 
1228  shouldExpandAtomicLoadInIR(LoadInst *SI) const override;
1229  bool shouldExpandAtomicStoreInIR(StoreInst *SI) const override;
1231  shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override;
1232 
1233  LoadInst *
1234  lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *AI) const override;
1235 
1236  bool needsCmpXchgNb(Type *MemType) const;
1237 
1238  void SetupEntryBlockForSjLj(MachineInstr &MI, MachineBasicBlock *MBB,
1239  MachineBasicBlock *DispatchBB, int FI) const;
1240 
1241  // Utility function to emit the low-level va_arg code for X86-64.
1243  EmitVAARG64WithCustomInserter(MachineInstr &MI,
1244  MachineBasicBlock *MBB) const;
1245 
1246  /// Utility function to emit the xmm reg save portion of va_start.
1248  EmitVAStartSaveXMMRegsWithCustomInserter(MachineInstr &BInstr,
1249  MachineBasicBlock *BB) const;
1250 
1251  MachineBasicBlock *EmitLoweredCascadedSelect(MachineInstr &MI1,
1252  MachineInstr &MI2,
1253  MachineBasicBlock *BB) const;
1254 
1255  MachineBasicBlock *EmitLoweredSelect(MachineInstr &I,
1256  MachineBasicBlock *BB) const;
1257 
1258  MachineBasicBlock *EmitLoweredAtomicFP(MachineInstr &I,
1259  MachineBasicBlock *BB) const;
1260 
1261  MachineBasicBlock *EmitLoweredCatchRet(MachineInstr &MI,
1262  MachineBasicBlock *BB) const;
1263 
1264  MachineBasicBlock *EmitLoweredCatchPad(MachineInstr &MI,
1265  MachineBasicBlock *BB) const;
1266 
1267  MachineBasicBlock *EmitLoweredSegAlloca(MachineInstr &MI,
1268  MachineBasicBlock *BB) const;
1269 
1270  MachineBasicBlock *EmitLoweredTLSAddr(MachineInstr &MI,
1271  MachineBasicBlock *BB) const;
1272 
1273  MachineBasicBlock *EmitLoweredTLSCall(MachineInstr &MI,
1274  MachineBasicBlock *BB) const;
1275 
1276  MachineBasicBlock *emitEHSjLjSetJmp(MachineInstr &MI,
1277  MachineBasicBlock *MBB) const;
1278 
1279  MachineBasicBlock *emitEHSjLjLongJmp(MachineInstr &MI,
1280  MachineBasicBlock *MBB) const;
1281 
1282  MachineBasicBlock *emitFMA3Instr(MachineInstr &MI,
1283  MachineBasicBlock *MBB) const;
1284 
1285  MachineBasicBlock *EmitSjLjDispatchBlock(MachineInstr &MI,
1286  MachineBasicBlock *MBB) const;
1287 
1288  /// Emit nodes that will be selected as "test Op0,Op0", or something
1289  /// equivalent, for use with the given x86 condition code.
1290  SDValue EmitTest(SDValue Op0, unsigned X86CC, const SDLoc &dl,
1291  SelectionDAG &DAG) const;
1292 
1293  /// Emit nodes that will be selected as "cmp Op0,Op1", or something
1294  /// equivalent, for use with the given x86 condition code.
1295  SDValue EmitCmp(SDValue Op0, SDValue Op1, unsigned X86CC, const SDLoc &dl,
1296  SelectionDAG &DAG) const;
1297 
1298  /// Convert a comparison if required by the subtarget.
1299  SDValue ConvertCmpIfNecessary(SDValue Cmp, SelectionDAG &DAG) const;
1300 
1301  /// Check if replacement of SQRT with RSQRT should be disabled.
1302  bool isFsqrtCheap(SDValue Operand, SelectionDAG &DAG) const override;
1303 
1304  /// Use rsqrt* to speed up sqrt calculations.
1305  SDValue getSqrtEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled,
1306  int &RefinementSteps, bool &UseOneConstNR,
1307  bool Reciprocal) const override;
1308 
1309  /// Use rcp* to speed up fdiv calculations.
1310  SDValue getRecipEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled,
1311  int &RefinementSteps) const override;
1312 
1313  /// Reassociate floating point divisions into multiply by reciprocal.
1314  unsigned combineRepeatedFPDivisors() const override;
1315  };
1316 
1317  namespace X86 {
1319  const TargetLibraryInfo *libInfo);
1320  } // end namespace X86
1321 
1322  // Base class for all X86 non-masked store operations.
1323  class X86StoreSDNode : public MemSDNode {
1324  public:
1325  X86StoreSDNode(unsigned Opcode, unsigned Order, const DebugLoc &dl,
1326  SDVTList VTs, EVT MemVT,
1327  MachineMemOperand *MMO)
1328  :MemSDNode(Opcode, Order, dl, VTs, MemVT, MMO) {}
1329  const SDValue &getValue() const { return getOperand(1); }
1330  const SDValue &getBasePtr() const { return getOperand(2); }
1331 
1332  static bool classof(const SDNode *N) {
1333  return N->getOpcode() == X86ISD::VTRUNCSTORES ||
1335  }
1336  };
1337 
1338  // Base class for all X86 masked store operations.
1339  // The class has the same order of operands as MaskedStoreSDNode for
1340  // convenience.
1342  public:
1343  X86MaskedStoreSDNode(unsigned Opcode, unsigned Order,
1344  const DebugLoc &dl, SDVTList VTs, EVT MemVT,
1345  MachineMemOperand *MMO)
1346  : MemSDNode(Opcode, Order, dl, VTs, MemVT, MMO) {}
1347 
1348  const SDValue &getBasePtr() const { return getOperand(1); }
1349  const SDValue &getMask() const { return getOperand(2); }
1350  const SDValue &getValue() const { return getOperand(3); }
1351 
1352  static bool classof(const SDNode *N) {
1353  return N->getOpcode() == X86ISD::VMTRUNCSTORES ||
1355  }
1356  };
1357 
1358  // X86 Truncating Store with Signed saturation.
1360  public:
1361  TruncSStoreSDNode(unsigned Order, const DebugLoc &dl,
1362  SDVTList VTs, EVT MemVT, MachineMemOperand *MMO)
1363  : X86StoreSDNode(X86ISD::VTRUNCSTORES, Order, dl, VTs, MemVT, MMO) {}
1364 
1365  static bool classof(const SDNode *N) {
1366  return N->getOpcode() == X86ISD::VTRUNCSTORES;
1367  }
1368  };
1369 
1370  // X86 Truncating Store with Unsigned saturation.
1372  public:
1373  TruncUSStoreSDNode(unsigned Order, const DebugLoc &dl,
1374  SDVTList VTs, EVT MemVT, MachineMemOperand *MMO)
1375  : X86StoreSDNode(X86ISD::VTRUNCSTOREUS, Order, dl, VTs, MemVT, MMO) {}
1376 
1377  static bool classof(const SDNode *N) {
1378  return N->getOpcode() == X86ISD::VTRUNCSTOREUS;
1379  }
1380  };
1381 
1382  // X86 Truncating Masked Store with Signed saturation.
1384  public:
1385  MaskedTruncSStoreSDNode(unsigned Order,
1386  const DebugLoc &dl, SDVTList VTs, EVT MemVT,
1387  MachineMemOperand *MMO)
1388  : X86MaskedStoreSDNode(X86ISD::VMTRUNCSTORES, Order, dl, VTs, MemVT, MMO) {}
1389 
1390  static bool classof(const SDNode *N) {
1391  return N->getOpcode() == X86ISD::VMTRUNCSTORES;
1392  }
1393  };
1394 
1395  // X86 Truncating Masked Store with Unsigned saturation.
1397  public:
1399  const DebugLoc &dl, SDVTList VTs, EVT MemVT,
1400  MachineMemOperand *MMO)
1401  : X86MaskedStoreSDNode(X86ISD::VMTRUNCSTOREUS, Order, dl, VTs, MemVT, MMO) {}
1402 
1403  static bool classof(const SDNode *N) {
1404  return N->getOpcode() == X86ISD::VMTRUNCSTOREUS;
1405  }
1406  };
1407 
1408  // X86 specific Gather node.
1409  // The class has the same order of operands as MaskedGatherSDNode for
1410  // convenience.
1412  public:
1413  X86MaskedGatherSDNode(unsigned Order,
1414  const DebugLoc &dl, SDVTList VTs, EVT MemVT,
1415  MachineMemOperand *MMO)
1416  : MemSDNode(X86ISD::MGATHER, Order, dl, VTs, MemVT, MMO)
1417  {}
1418 
1419  const SDValue &getBasePtr() const { return getOperand(3); }
1420  const SDValue &getIndex() const { return getOperand(4); }
1421  const SDValue &getMask() const { return getOperand(2); }
1422  const SDValue &getValue() const { return getOperand(1); }
1423 
1424  static bool classof(const SDNode *N) {
1425  return N->getOpcode() == X86ISD::MGATHER;
1426  }
1427  };
1428 
1429  /// Generate unpacklo/unpackhi shuffle mask.
1430  template <typename T = int>
1432  bool Unary) {
1433  assert(Mask.empty() && "Expected an empty shuffle mask vector");
1434  int NumElts = VT.getVectorNumElements();
1435  int NumEltsInLane = 128 / VT.getScalarSizeInBits();
1436  for (int i = 0; i < NumElts; ++i) {
1437  unsigned LaneStart = (i / NumEltsInLane) * NumEltsInLane;
1438  int Pos = (i % NumEltsInLane) / 2 + LaneStart;
1439  Pos += (Unary ? 0 : NumElts * (i % 2));
1440  Pos += (Lo ? 0 : NumEltsInLane / 2);
1441  Mask.push_back(Pos);
1442  }
1443  }
1444 
1445  /// Helper function to scale a shuffle or target shuffle mask, replacing each
1446  /// mask index with the scaled sequential indices for an equivalent narrowed
1447  /// mask. This is the reverse process to canWidenShuffleElements, but can
1448  /// always succeed.
1449  template <typename T>
1451  SmallVectorImpl<T> &ScaledMask) {
1452  assert(0 < Scale && "Unexpected scaling factor");
1453  int NumElts = Mask.size();
1454  ScaledMask.assign(static_cast<size_t>(NumElts * Scale), -1);
1455 
1456  for (int i = 0; i != NumElts; ++i) {
1457  int M = Mask[i];
1458 
1459  // Repeat sentinel values in every mask element.
1460  if (M < 0) {
1461  for (int s = 0; s != Scale; ++s)
1462  ScaledMask[(Scale * i) + s] = M;
1463  continue;
1464  }
1465 
1466  // Scale mask element and increment across each mask element.
1467  for (int s = 0; s != Scale; ++s)
1468  ScaledMask[(Scale * i) + s] = (Scale * M) + s;
1469  }
1470  }
1471 } // end namespace llvm
1472 
1473 #endif // LLVM_LIB_TARGET_X86_X86ISELLOWERING_H
static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG)
Fast - This calling convention attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:43
Double shift instructions.
static SDValue LowerCallResult(SDValue Chain, SDValue InFlag, const SmallVectorImpl< CCValAssign > &RVLocs, const SDLoc &dl, SelectionDAG &DAG, SmallVectorImpl< SDValue > &InVals)
LowerCallResult - Lower the result values of a call into the appropriate copies out of appropriate ph...
TruncUSStoreSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs, EVT MemVT, MachineMemOperand *MMO)
BUILTIN_OP_END - This must be the last enum value in this list.
Definition: ISDOpcodes.h:834
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:109
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg If BaseGV is null...
Vector comparison generating mask bits for fp and integer signed and unsigned data types...
Repeat move, corresponds to X86::REP_MOVSx.
void createUnpackShuffleMask(MVT VT, SmallVectorImpl< T > &Mask, bool Lo, bool Unary)
Generate unpacklo/unpackhi shuffle mask.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
LLVMContext & Context
static bool classof(const SDNode *N)
Return with a flag operand.
const SDValue & getBasePtr() const
Compute iterated dominance frontiers using a linear time algorithm.
Definition: AllocatorList.h:24
Tail call return.
Compute Double Block Packed Sum-Absolute-Differences.
A Module instance is used to store all the information related to an LLVM module. ...
Definition: Module.h:63
bool mergeStoresAfterLegalization() const override
Allow store merging after legalization in addition to before legalization.
static bool classof(const SDNode *N)
static void LowerMemOpCallTo(SelectionDAG &DAG, MachineFunction &MF, SDValue Chain, SDValue Arg, SDValue PtrOff, int SPDiff, unsigned ArgOffset, bool isPPC64, bool isTailCall, bool isVector, SmallVectorImpl< SDValue > &MemOpChains, SmallVectorImpl< TailCallArgumentInfo > &TailCallArguments, const SDLoc &dl)
LowerMemOpCallTo - Store the argument to the stack or remember it in case of tail calls...
bool isMultiStoresCheaperThanBitsMerge(EVT LTy, EVT HTy) const override
Return true if it is cheaper to split the store of a merged int val from a pair of smaller values int...
const SDValue & getValue() const
X86 conditional moves.
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition: ValueTypes.h:146
This class represents a function call, abstracting a target machine&#39;s calling convention.
unsigned getVectorNumElements() const
Function Alias Analysis Results
This instruction constructs a fixed permutation of two input vectors.
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.h:262
A debug info location.
Definition: DebugLoc.h:34
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition: ValueTypes.h:141
static bool classof(const SDNode *N)
SSE4A Extraction and Insertion.
static bool classof(const SDNode *N)
An instruction for reading from memory.
Definition: Instructions.h:164
an instruction that atomically reads a memory location, combines it with another value, and then stores the result back.
Definition: Instructions.h:677
Bitwise logical ANDNOT of floating point values.
AtomicExpansionKind
Enum that specifies what an atomic load/AtomicRMWInst is expanded to, if at all.
bool isCalleePop(CallingConv::ID CallingConv, bool is64Bit, bool IsVarArg, bool GuaranteeTCO)
Determines whether the callee is required to pop its own arguments.
This operation implements the lowering for readcyclecounter.
NodeType
ISD::NodeType enum - This enum defines the target-independent operators for a SelectionDAG.
Definition: ISDOpcodes.h:39
X86 compare and logical compare instructions.
MaskedTruncUSStoreSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs, EVT MemVT, MachineMemOperand *MMO)
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
Definition: ValueTypes.h:136
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
Extract an 8-bit value from a vector and zero extend it to i32, corresponds to X86::PEXTRB.
A description of a memory reference used in the backend.
X86StoreSDNode(unsigned Opcode, unsigned Order, const DebugLoc &dl, SDVTList VTs, EVT MemVT, MachineMemOperand *MMO)
Bitwise Logical AND NOT of Packed FP values.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:42
Base class for the full range of assembler expressions which are needed for parsing.
Definition: MCExpr.h:36
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:668
This instruction implements SINT_TO_FP with the integer source in memory and FP reg result...
static SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG, const SparcTargetLowering &TLI, const SparcSubtarget *Subtarget)
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted...
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
Integer horizontal add/sub.
This represents a list of ValueType&#39;s that has been intern&#39;d by a SelectionDAG.
Copies a 64-bit value from the low word of an XMM vector to an MMX vector.
void assign(size_type NumElts, const T &Elt)
Definition: SmallVector.h:427
Context object for machine code objects.
Definition: MCContext.h:59
static SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG)
Copies a 32-bit value from the low word of a MMX vector to a GPR.
This is a fast-path instruction selection class that generates poor code and doesn&#39;t support illegal ...
Definition: FastISel.h:67
X86 FP SETCC, similar to above, but with output as an i1 mask and with optional rounding mode...
Return from interrupt. Operand 0 is the number of bytes to pop.
This contains information for each constraint that we are lowering.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:33
const SDValue & getIndex() const
const SDValue & getBasePtr() const
An instruction for storing to memory.
Definition: Instructions.h:306
static bool classof(const SDNode *N)
X86 FP SETCC, implemented with CMP{cc}SS/CMP{cc}SD.
const SDValue & getBasePtr() const
virtual unsigned getInlineAsmMemConstraint(StringRef ConstraintCode) const
Floating point horizontal add/sub.
unsigned getInlineAsmMemConstraint(StringRef ConstraintCode) const override
amdgpu Simplify well known AMD library false Value * Callee
Bitwise logical XOR of floating point values.
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
static bool classof(const SDNode *N)
const SDValue & getMask() const
Machine Value Type.
static SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG)
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:46
This instruction implements an extending load to FP stack slots.
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:69
Insert any element of a 4 x float vector into any element of a destination 4 x floatvector.
unsigned getScalarSizeInBits() const
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:149
This is an important base class in LLVM.
Definition: Constant.h:42
Repeat fill, corresponds to X86::REP_STOSx.
static bool is64Bit(const char *name)
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
Definition: ISDOpcodes.h:891
bool storeOfVectorConstantIsCheap(EVT MemVT, unsigned NumElem, unsigned AddrSpace) const override
Return true if it is expected to be cheaper to do a store of a non-zero vector constant with the give...
X86 conditional branches.
const SDValue & getMask() const
Insert the lower 16-bits of a 32-bit value to a vector, corresponds to X86::PINSRW.
Commutative FMIN and FMAX.
static SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG, const SparcSubtarget *Subtarget)
On Darwin, this node represents the result of the popl at function entry, used for PIC code...
bool convertSetCCLogicToBitwiseLogic(EVT VT) const override
Use bitwise logic to make pairs of compares more efficient.
static SDValue LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG, const SparcTargetLowering &TLI, bool hasHardQuad)
const SDValue & getValue() const
static SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG, const SparcSubtarget *Subtarget)
lazy value info
std::vector< ArgListEntry > ArgListTy
Extended Value Type.
Definition: ValueTypes.h:34
const AMDGPUAS & AS
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
This structure contains all information that is necessary for lowering calls.
These operations represent an abstract X86 call instruction, which includes a bunch of information...
Floating point max and min.
TruncSStoreSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs, EVT MemVT, MachineMemOperand *MMO)
bool isScalarFPTypeInSSEReg(EVT VT) const
Return true if the specified scalar FP type is computed in an SSE register, not on the X87 floating p...
Copies a GPR into the low 32-bit word of a MMX vector and zero out the high word. ...
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:194
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:209
unsigned getMaxSupportedInterleaveFactor() const override
Get the maximum supported factor for interleaved memory accesses.
Provides information about what library functions are available for the current target.
X86 Read Time-Stamp Counter and Processor ID.
CCValAssign - Represent assignment of one arg/retval to a location.
AddressSpace
Definition: NVPTXBaseInfo.h:22
This is an abstract virtual class for memory operations.
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Bit scan reverse.
Floating point reciprocal-sqrt and reciprocal approximation.
static const int FIRST_TARGET_MEMORY_OPCODE
FIRST_TARGET_MEMORY_OPCODE - Target-specific pre-isel operations which do not reference a specific me...
Definition: ISDOpcodes.h:841
const SDValue & getValue() const
Represents one node in the SelectionDAG.
X86 bit-test instructions.
static bool Enabled
Definition: Statistic.cpp:49
MaskedTruncSStoreSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs, EVT MemVT, MachineMemOperand *MMO)
Class for arbitrary precision integers.
Definition: APInt.h:69
static SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG)
This instruction implements FP_TO_SINT with the integer destination in memory and a FP reg source...
Bit scan forward.
const char * getClearCacheBuiltinName() const override
Intel processors have a unified instruction and data cache.
amdgpu Simplify well known AMD library false Value Value * Arg
bool ShouldShrinkFPConstant(EVT VT) const override
If true, then instruction selection should seek to shrink the FP constant of the specified type to a ...
Representation of each machine instruction.
Definition: MachineInstr.h:59
static unsigned getScalingFactorCost(const TargetTransformInfo &TTI, const LSRUse &LU, const Formula &F, const Loop &L)
bool isVector() const
Return true if this is a vector value type.
Definition: ValueTypes.h:151
Insert the lower 8-bits of a 32-bit value to a vector, corresponds to X86::PINSRB.
LLVM_NODISCARD bool empty() const
Definition: SmallVector.h:61
A wrapper node for TargetConstantPool, TargetJumpTable, TargetExternalSymbol, TargetGlobalAddress, TargetGlobalTLSAddress, MCSymbol and TargetBlockAddress.
Bitwise logical AND of floating point values.
#define I(x, y, z)
Definition: MD5.cpp:58
#define N
FunctionLoweringInfo - This contains information that is global to a function that is used when lower...
static bool classof(const SDNode *N)
X86MaskedStoreSDNode(unsigned Opcode, unsigned Order, const DebugLoc &dl, SDVTList VTs, EVT MemVT, MachineMemOperand *MMO)
LOCK-prefixed arithmetic read-modify-write instructions.
Extract a 16-bit value from a vector and zero extend it to i32, corresponds to X86::PEXTRW.
MVT getScalarShiftAmountTy(const DataLayout &, EVT) const override
EVT is not used in-tree, but is used by out-of-tree target.
Blend where the selector is an immediate.
const Function * getFunction() const
getFunction - Return the LLVM function that this machine code represents
X86MaskedGatherSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs, EVT MemVT, MachineMemOperand *MMO)
This instruction implements a truncating store to FP stack slots.
Combined add and sub on an FP vector.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
LegalizeTypeAction
This enum indicates whether a types are legal for a target, and if not, what action should be used to...
This instruction grabs the address of the next argument from a va_list.
LLVM Value Representation.
Definition: Value.h:73
Bitwise logical OR of floating point values.
Dynamic (non-constant condition) vector blend where only the sign bits of the condition elements are ...
X86 Read Performance Monitoring Counters.
constexpr char Size[]
Key for Kernel::Arg::Metadata::mSize.
std::underlying_type< E >::type Mask()
Get a bitmask with 1s in all places up to the high-order bit of E&#39;s largest value.
Definition: BitmaskEnum.h:81
static SDValue LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG, const SparcTargetLowering &TLI, bool hasHardQuad)
IRTranslator LLVM IR MI
bool isZeroNode(SDValue Elt)
Returns true if Elt is a constant zero or floating point constant +0.0.
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:49
bool isOffsetSuitableForCodeModel(int64_t Offset, CodeModel::Model M, bool hasSymbolicDisplacement=true)
Returns true of the given offset can be fit into displacement field of the instruction.
bool hasBitPreservingFPLogic(EVT VT) const override
Return true if it is safe to transform an integer-domain bitwise operation into the equivalent floati...
Compute Sum of Absolute Differences.
Scalar intrinsic floating point max and min.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation...
void scaleShuffleMask(int Scale, ArrayRef< T > Mask, SmallVectorImpl< T > &ScaledMask)
Helper function to scale a shuffle or target shuffle mask, replacing each mask index with the scaled ...
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
FastISel * createFastISel(FunctionLoweringInfo &funcInfo, const TargetLibraryInfo *libInfo)
Shuffle 16 8-bit values within a vector.
This file describes how to lower LLVM code to machine code.
Special wrapper used under X86-64 PIC mode for RIP relative displacements.