LLVM  7.0.0svn
X86ISelLowering.h
Go to the documentation of this file.
1 //===-- X86ISelLowering.h - X86 DAG Lowering Interface ----------*- C++ -*-===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file defines the interfaces that X86 uses to lower LLVM code into a
11 // selection DAG.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #ifndef LLVM_LIB_TARGET_X86_X86ISELLOWERING_H
16 #define LLVM_LIB_TARGET_X86_X86ISELLOWERING_H
17 
22 
23 namespace llvm {
24  class X86Subtarget;
25  class X86TargetMachine;
26 
27  namespace X86ISD {
28  // X86 Specific DAG Nodes
29  enum NodeType : unsigned {
30  // Start the numbering where the builtin ops leave off.
32 
33  /// Bit scan forward.
34  BSF,
35  /// Bit scan reverse.
36  BSR,
37 
38  /// Double shift instructions. These correspond to
39  /// X86::SHLDxx and X86::SHRDxx instructions.
42 
43  /// Bitwise logical AND of floating point values. This corresponds
44  /// to X86::ANDPS or X86::ANDPD.
46 
47  /// Bitwise logical OR of floating point values. This corresponds
48  /// to X86::ORPS or X86::ORPD.
49  FOR,
50 
51  /// Bitwise logical XOR of floating point values. This corresponds
52  /// to X86::XORPS or X86::XORPD.
54 
55  /// Bitwise logical ANDNOT of floating point values. This
56  /// corresponds to X86::ANDNPS or X86::ANDNPD.
58 
59  /// These operations represent an abstract X86 call
60  /// instruction, which includes a bunch of information. In particular the
61  /// operands of these node are:
62  ///
63  /// #0 - The incoming token chain
64  /// #1 - The callee
65  /// #2 - The number of arg bytes the caller pushes on the stack.
66  /// #3 - The number of arg bytes the callee pops off the stack.
67  /// #4 - The value to pass in AL/AX/EAX (optional)
68  /// #5 - The value to pass in DL/DX/EDX (optional)
69  ///
70  /// The result values of these nodes are:
71  ///
72  /// #0 - The outgoing token chain
73  /// #1 - The first register result value (optional)
74  /// #2 - The second register result value (optional)
75  ///
77 
78  /// Same as call except it adds the NoTrack prefix.
80 
81  /// This operation implements the lowering for readcyclecounter.
83 
84  /// X86 Read Time-Stamp Counter and Processor ID.
86 
87  /// X86 Read Performance Monitoring Counters.
89 
90  /// X86 compare and logical compare instructions.
92 
93  /// X86 bit-test instructions.
94  BT,
95 
96  /// X86 SetCC. Operand 0 is condition code, and operand 1 is the EFLAGS
97  /// operand, usually produced by a CMP instruction.
99 
100  /// X86 Select
102 
103  // Same as SETCC except it's materialized with a sbb and the value is all
104  // one's or all zero's.
105  SETCC_CARRY, // R = carry_bit ? ~0 : 0
106 
107  /// X86 FP SETCC, implemented with CMP{cc}SS/CMP{cc}SD.
108  /// Operands are two FP values to compare; result is a mask of
109  /// 0s or 1s. Generally DTRT for C/C++ with NaNs.
111 
112  /// X86 FP SETCC, similar to above, but with output as an i1 mask and
113  /// with optional rounding mode.
115 
116  /// X86 conditional moves. Operand 0 and operand 1 are the two values
117  /// to select from. Operand 2 is the condition code, and operand 3 is the
118  /// flag operand produced by a CMP or TEST instruction. It also writes a
119  /// flag result.
121 
122  /// X86 conditional branches. Operand 0 is the chain operand, operand 1
123  /// is the block to branch if condition is true, operand 2 is the
124  /// condition code, and operand 3 is the flag operand produced by a CMP
125  /// or TEST instruction.
127 
128  /// BRIND node with NoTrack prefix. Operand 0 is the chain operand and
129  /// operand 1 is the target address.
131 
132  /// Return with a flag operand. Operand 0 is the chain operand, operand
133  /// 1 is the number of bytes of stack to pop.
135 
136  /// Return from interrupt. Operand 0 is the number of bytes to pop.
138 
139  /// Repeat fill, corresponds to X86::REP_STOSx.
141 
142  /// Repeat move, corresponds to X86::REP_MOVSx.
144 
145  /// On Darwin, this node represents the result of the popl
146  /// at function entry, used for PIC code.
148 
149  /// A wrapper node for TargetConstantPool, TargetJumpTable,
150  /// TargetExternalSymbol, TargetGlobalAddress, TargetGlobalTLSAddress,
151  /// MCSymbol and TargetBlockAddress.
153 
154  /// Special wrapper used under X86-64 PIC mode for RIP
155  /// relative displacements.
157 
158  /// Copies a 64-bit value from the low word of an XMM vector
159  /// to an MMX vector.
161 
162  /// Copies a 32-bit value from the low word of a MMX
163  /// vector to a GPR.
165 
166  /// Copies a GPR into the low 32-bit word of a MMX vector
167  /// and zero out the high word.
169 
170  /// Extract an 8-bit value from a vector and zero extend it to
171  /// i32, corresponds to X86::PEXTRB.
173 
174  /// Extract a 16-bit value from a vector and zero extend it to
175  /// i32, corresponds to X86::PEXTRW.
177 
178  /// Insert any element of a 4 x float vector into any element
179  /// of a destination 4 x floatvector.
181 
182  /// Insert the lower 8-bits of a 32-bit value to a vector,
183  /// corresponds to X86::PINSRB.
185 
186  /// Insert the lower 16-bits of a 32-bit value to a vector,
187  /// corresponds to X86::PINSRW.
189 
190  /// Shuffle 16 8-bit values within a vector.
192 
193  /// Compute Sum of Absolute Differences.
195  /// Compute Double Block Packed Sum-Absolute-Differences
197 
198  /// Bitwise Logical AND NOT of Packed FP values.
200 
201  /// Blend where the selector is an immediate.
203 
204  /// Dynamic (non-constant condition) vector blend where only the sign bits
205  /// of the condition elements are used. This is used to enforce that the
206  /// condition mask is not valid for generic VSELECT optimizations.
208 
209  /// Combined add and sub on an FP vector.
211 
212  // FP vector ops with rounding mode.
220 
221  // FP vector get exponent.
223  // Extract Normalized Mantissas.
225  // FP Scale.
228 
229  // Integer add/sub with unsigned saturation.
232 
233  // Integer add/sub with signed saturation.
236 
237  // Unsigned Integer average.
239 
240  /// Integer horizontal add/sub.
243 
244  /// Floating point horizontal add/sub.
247 
248  // Detect Conflicts Within a Vector
250 
251  /// Floating point max and min.
253 
254  /// Commutative FMIN and FMAX.
256 
257  /// Scalar intrinsic floating point max and min.
259 
260  /// Floating point reciprocal-sqrt and reciprocal approximation.
261  /// Note that these typically require refinement
262  /// in order to obtain suitable precision.
264 
265  // AVX-512 reciprocal approximations with a little more precision.
267 
268  // Thread Local Storage.
270 
271  // Thread Local Storage. A call to get the start address
272  // of the TLS block for the current module.
274 
275  // Thread Local Storage. When calling to an OS provided
276  // thunk at the address from an earlier relocation.
278 
279  // Exception Handling helpers.
281 
282  // SjLj exception handling setjmp.
284 
285  // SjLj exception handling longjmp.
287 
288  // SjLj exception handling dispatch.
290 
291  /// Tail call return. See X86TargetLowering::LowerCall for
292  /// the list of operands.
294 
295  // Vector move to low scalar and zero higher vector elements.
297 
298  // Vector integer zero-extend.
300  // Vector integer signed-extend.
302 
303  // Vector integer truncate.
305  // Vector integer truncate with unsigned/signed saturation.
307 
308  // Vector FP extend.
310 
311  // Vector FP round.
313 
314  // 128-bit vector logical left / right shift
316 
317  // Vector shift elements
319 
320  // Vector variable shift right arithmetic.
321  // Unlike ISD::SRA, in case shift count greater then element size
322  // use sign bit to fill destination data element.
324 
325  // Vector shift elements by immediate
327 
328  // Shifts of mask registers.
330 
331  // Bit rotate by immediate
333 
334  // Vector packed double/float comparison.
336 
337  // Vector integer comparisons.
339 
340  // v8i16 Horizontal minimum and position.
342 
344 
345  /// Vector comparison generating mask bits for fp and
346  /// integer signed and unsigned data types.
348  // Vector comparison with rounding mode for FP values
350 
351  // Arithmetic operations with FLAGS results.
353  INC, DEC, OR, XOR, AND,
354 
355  // Bit field extract.
357 
358  // LOW, HI, FLAGS = umul LHS, RHS.
360 
361  // 8-bit SMUL/UMUL - AX, FLAGS = smul8/umul8 AL, RHS.
363 
364  // 8-bit divrem that zero-extend the high result (AH).
367 
368  // X86-specific multiply by immediate.
370 
371  // Vector sign bit extraction.
373 
374  // Vector bitwise comparisons.
376 
377  // Vector packed fp sign bitwise comparisons.
379 
380  // OR/AND test for masks.
383 
384  // ADD for masks.
386 
387  // Several flavors of instructions with vector shuffle behaviors.
388  // Saturated signed/unnsigned packing.
391  // Intra-lane alignr.
393  // AVX512 inter-lane alignr.
399  // VBMI2 Concat & Shift.
404  //Shuffle Packed Values at 128-bit granularity.
419 
420  // Variable Permute (VPERM).
421  // Res = VPERMV MaskV, V0
423 
424  // 3-op Variable Permute (VPERMT2).
425  // Res = VPERMV3 V0, MaskV, V1
427 
428  // Bitwise ternary logic.
430  // Fix Up Special Packed Float32/64 values.
433  // Range Restriction Calculation For Packed Pairs of Float32/64 values.
435  // Reduce - Perform Reduction Transformation on scalar\packed FP.
437  // RndScale - Round FP Values To Include A Given Number Of Fraction Bits.
438  // Also used by the legacy (V)ROUND intrinsics where we mask out the
439  // scaling part of the immediate.
441  // Tests Types Of a FP Values for packed types.
443  // Tests Types Of a FP Values for scalar types.
445 
446  // Broadcast scalar to vector.
448  // Broadcast mask to vector.
450  // Broadcast subvector to vector.
452 
453  /// SSE4A Extraction and Insertion.
455 
456  // XOP arithmetic/logical shifts.
458  // XOP signed/unsigned integer comparisons.
460  // XOP packed permute bytes.
462  // XOP two source permutation.
464 
465  // Vector multiply packed unsigned doubleword integers.
467  // Vector multiply packed signed doubleword integers.
469  // Vector Multiply Packed UnsignedIntegers with Round and Scale.
471 
472  // Multiply and Add Packed Integers.
474 
475  // AVX512IFMA multiply and add.
476  // NOTE: These are different than the instruction and perform
477  // op0 x op1 + op2.
479 
480  // VNNI
485 
486  // FMA nodes.
487  // We use the target independent ISD::FMA for the non-inverted case.
493 
494  // FMA with rounding mode.
501 
502  // Compress and expand.
505 
506  // Bits shuffle
508 
509  // Convert Unsigned/Integer to Floating-Point Value with rounding mode.
512 
513  // Vector float/double to signed/unsigned integer.
515  // Scalar float/double to signed/unsigned integer.
517 
518  // Vector float/double to signed/unsigned integer with truncation.
520  // Scalar float/double to signed/unsigned integer with truncation.
522 
523  // Vector signed/unsigned integer to float/double.
525 
526  // Save xmm argument registers to the stack, according to %al. An operator
527  // is needed so that this can be expanded with control flow.
529 
530  // Windows's _chkstk call to do stack probing.
532 
533  // For allocating variable amounts of stack space when using
534  // segmented stacks. Check if the current stacklet has enough space, and
535  // falls back to heap allocation if not.
537 
538  // Memory barriers.
541 
542  // Store FP status word into i16 register.
544 
545  // Store contents of %ah into %eflags.
547 
548  // Get a random integer and indicate whether it is valid in CF.
550 
551  // Get a NIST SP800-90B & C compliant random integer and
552  // indicate whether it is valid in CF.
554 
555  // SSE42 string comparisons.
556  // These nodes produce 3 results, index, mask, and flags. X86ISelDAGToDAG
557  // will emit one or two instructions based on which results are used. If
558  // flags and index/mask this allows us to use a single instruction since
559  // we won't have to pick and opcode for flags. Instead we can rely on the
560  // DAG to CSE everything and decide at isel.
563 
564  // Test if in transactional execution.
566 
567  // ERI instructions.
569 
570  // Conversions between float and half-float.
572 
573  // Galois Field Arithmetic Instructions
575 
576  // LWP insert record.
578 
579  // User level wait
581 
582  // Compare and swap.
588 
589  /// LOCK-prefixed arithmetic read-modify-write instructions.
590  /// EFLAGS, OUTCHAIN = LADD(INCHAIN, PTR, RHS)
592 
593  // Load, scalar_to_vector, and zero extend.
595 
596  // Store FP control world into i16 memory.
598 
599  /// This instruction implements FP_TO_SINT with the
600  /// integer destination in memory and a FP reg source. This corresponds
601  /// to the X86::FIST*m instructions and the rounding mode change stuff. It
602  /// has two inputs (token chain and address) and two outputs (int value
603  /// and token chain).
607 
608  /// This instruction implements SINT_TO_FP with the
609  /// integer source in memory and FP reg result. This corresponds to the
610  /// X86::FILD*m instructions. It has three inputs (token chain, address,
611  /// and source type) and two outputs (FP value and token chain). FILD_FLAG
612  /// also produces a flag).
615 
616  /// This instruction implements an extending load to FP stack slots.
617  /// This corresponds to the X86::FLD32m / X86::FLD64m. It takes a chain
618  /// operand, ptr to load from, and a ValueType node indicating the type
619  /// to load to.
621 
622  /// This instruction implements a truncating store to FP stack
623  /// slots. This corresponds to the X86::FST32m / X86::FST64m. It takes a
624  /// chain operand, value to store, address, and a ValueType to store it
625  /// as.
627 
628  /// This instruction grabs the address of the next argument
629  /// from a va_list. (reads and modifies the va_list in memory)
631 
632  // Vector truncating store with unsigned/signed saturation
634  // Vector truncating masked store with unsigned/signed saturation
636 
637  // X86 specific gather and scatter
639 
640  // WARNING: Do not add anything in the end unless you want the node to
641  // have memop! In fact, starting from FIRST_TARGET_MEMORY_OPCODE all
642  // opcodes will be thought as target memory ops!
643  };
644  } // end namespace X86ISD
645 
646  /// Define some predicates that are used for node matching.
647  namespace X86 {
648  /// Returns true if Elt is a constant zero or floating point constant +0.0.
649  bool isZeroNode(SDValue Elt);
650 
651  /// Returns true of the given offset can be
652  /// fit into displacement field of the instruction.
654  bool hasSymbolicDisplacement = true);
655 
656  /// Determines whether the callee is required to pop its
657  /// own arguments. Callee pop is necessary to support tail calls.
658  bool isCalleePop(CallingConv::ID CallingConv,
659  bool is64Bit, bool IsVarArg, bool GuaranteeTCO);
660 
661  } // end namespace X86
662 
663  //===--------------------------------------------------------------------===//
664  // X86 Implementation of the TargetLowering interface
665  class X86TargetLowering final : public TargetLowering {
666  public:
667  explicit X86TargetLowering(const X86TargetMachine &TM,
668  const X86Subtarget &STI);
669 
670  unsigned getJumpTableEncoding() const override;
671  bool useSoftFloat() const override;
672 
673  void markLibCallAttributes(MachineFunction *MF, unsigned CC,
674  ArgListTy &Args) const override;
675 
676  MVT getScalarShiftAmountTy(const DataLayout &, EVT VT) const override {
677  return MVT::i8;
678  }
679 
680  const MCExpr *
681  LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI,
682  const MachineBasicBlock *MBB, unsigned uid,
683  MCContext &Ctx) const override;
684 
685  /// Returns relocation base for the given PIC jumptable.
686  SDValue getPICJumpTableRelocBase(SDValue Table,
687  SelectionDAG &DAG) const override;
688  const MCExpr *
689  getPICJumpTableRelocBaseExpr(const MachineFunction *MF,
690  unsigned JTI, MCContext &Ctx) const override;
691 
692  /// Return the desired alignment for ByVal aggregate
693  /// function arguments in the caller parameter area. For X86, aggregates
694  /// that contains are placed at 16-byte boundaries while the rest are at
695  /// 4-byte boundaries.
696  unsigned getByValTypeAlignment(Type *Ty,
697  const DataLayout &DL) const override;
698 
699  /// Returns the target specific optimal type for load
700  /// and store operations as a result of memset, memcpy, and memmove
701  /// lowering. If DstAlign is zero that means it's safe to destination
702  /// alignment can satisfy any constraint. Similarly if SrcAlign is zero it
703  /// means there isn't a need to check it against alignment requirement,
704  /// probably because the source does not need to be loaded. If 'IsMemset' is
705  /// true, that means it's expanding a memset. If 'ZeroMemset' is true, that
706  /// means it's a memset of zero. 'MemcpyStrSrc' indicates whether the memcpy
707  /// source is constant so it does not need to be loaded.
708  /// It returns EVT::Other if the type should be determined using generic
709  /// target-independent logic.
710  EVT getOptimalMemOpType(uint64_t Size, unsigned DstAlign, unsigned SrcAlign,
711  bool IsMemset, bool ZeroMemset, bool MemcpyStrSrc,
712  MachineFunction &MF) const override;
713 
714  /// Returns true if it's safe to use load / store of the
715  /// specified type to expand memcpy / memset inline. This is mostly true
716  /// for all types except for some special cases. For example, on X86
717  /// targets without SSE2 f64 load / store are done with fldl / fstpl which
718  /// also does type conversion. Note the specified type doesn't have to be
719  /// legal as the hook is used before type legalization.
720  bool isSafeMemOpType(MVT VT) const override;
721 
722  /// Returns true if the target allows unaligned memory accesses of the
723  /// specified type. Returns whether it is "fast" in the last argument.
724  bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS, unsigned Align,
725  bool *Fast) const override;
726 
727  /// Provide custom lowering hooks for some operations.
728  ///
729  SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
730 
731  /// Places new result values for the node in Results (their number
732  /// and types must exactly match those of the original return values of
733  /// the node), or leaves Results empty, which indicates that the node is not
734  /// to be custom lowered after all.
735  void LowerOperationWrapper(SDNode *N,
737  SelectionDAG &DAG) const override;
738 
739  /// Replace the results of node with an illegal result
740  /// type with new values built out of custom code.
741  ///
742  void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue>&Results,
743  SelectionDAG &DAG) const override;
744 
745  SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;
746 
747  // Return true if it is profitable to combine a BUILD_VECTOR with a
748  // stride-pattern to a shuffle and a truncate.
749  // Example of such a combine:
750  // v4i32 build_vector((extract_elt V, 1),
751  // (extract_elt V, 3),
752  // (extract_elt V, 5),
753  // (extract_elt V, 7))
754  // -->
755  // v4i32 truncate (bitcast (shuffle<1,u,3,u,4,u,5,u,6,u,7,u> V, u) to
756  // v4i64)
757  bool isDesirableToCombineBuildVectorToShuffleTruncate(
758  ArrayRef<int> ShuffleMask, EVT SrcVT, EVT TruncVT) const override;
759 
760  /// Return true if the target has native support for
761  /// the specified value type and it is 'desirable' to use the type for the
762  /// given node type. e.g. On x86 i16 is legal, but undesirable since i16
763  /// instruction encodings are longer and some i16 instructions are slow.
764  bool isTypeDesirableForOp(unsigned Opc, EVT VT) const override;
765 
766  /// Return true if the target has native support for the
767  /// specified value type and it is 'desirable' to use the type. e.g. On x86
768  /// i16 is legal, but undesirable since i16 instruction encodings are longer
769  /// and some i16 instructions are slow.
770  bool IsDesirableToPromoteOp(SDValue Op, EVT &PVT) const override;
771 
773  EmitInstrWithCustomInserter(MachineInstr &MI,
774  MachineBasicBlock *MBB) const override;
775 
776  /// This method returns the name of a target specific DAG node.
777  const char *getTargetNodeName(unsigned Opcode) const override;
778 
779  bool mergeStoresAfterLegalization() const override { return true; }
780 
781  bool canMergeStoresTo(unsigned AddressSpace, EVT MemVT,
782  const SelectionDAG &DAG) const override;
783 
784  bool isCheapToSpeculateCttz() const override;
785 
786  bool isCheapToSpeculateCtlz() const override;
787 
788  bool isCtlzFast() const override;
789 
790  bool hasBitPreservingFPLogic(EVT VT) const override {
791  return VT == MVT::f32 || VT == MVT::f64 || VT.isVector();
792  }
793 
794  bool isMultiStoresCheaperThanBitsMerge(EVT LTy, EVT HTy) const override {
795  // If the pair to store is a mixture of float and int values, we will
796  // save two bitwise instructions and one float-to-int instruction and
797  // increase one store instruction. There is potentially a more
798  // significant benefit because it avoids the float->int domain switch
799  // for input value. So It is more likely a win.
800  if ((LTy.isFloatingPoint() && HTy.isInteger()) ||
801  (LTy.isInteger() && HTy.isFloatingPoint()))
802  return true;
803  // If the pair only contains int values, we will save two bitwise
804  // instructions and increase one store instruction (costing one more
805  // store buffer). Since the benefit is more blurred so we leave
806  // such pair out until we get testcase to prove it is a win.
807  return false;
808  }
809 
810  bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override;
811 
812  bool hasAndNotCompare(SDValue Y) const override;
813 
814  bool hasAndNot(SDValue Y) const override;
815 
816  bool preferShiftsToClearExtremeBits(SDValue Y) const override;
817 
818  bool convertSetCCLogicToBitwiseLogic(EVT VT) const override {
819  return VT.isScalarInteger();
820  }
821 
822  /// Vector-sized comparisons are fast using PCMPEQ + PMOVMSK or PTEST.
823  MVT hasFastEqualityCompare(unsigned NumBits) const override;
824 
825  /// Allow multiple load pairs per block for smaller and faster code.
826  unsigned getMemcmpEqZeroLoadsPerBlock() const override {
827  return 2;
828  }
829 
830  /// Return the value type to use for ISD::SETCC.
831  EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context,
832  EVT VT) const override;
833 
834  bool targetShrinkDemandedConstant(SDValue Op, const APInt &Demanded,
835  TargetLoweringOpt &TLO) const override;
836 
837  /// Determine which of the bits specified in Mask are known to be either
838  /// zero or one and return them in the KnownZero/KnownOne bitsets.
839  void computeKnownBitsForTargetNode(const SDValue Op,
840  KnownBits &Known,
841  const APInt &DemandedElts,
842  const SelectionDAG &DAG,
843  unsigned Depth = 0) const override;
844 
845  /// Determine the number of bits in the operation that are sign bits.
846  unsigned ComputeNumSignBitsForTargetNode(SDValue Op,
847  const APInt &DemandedElts,
848  const SelectionDAG &DAG,
849  unsigned Depth) const override;
850 
851  SDValue unwrapAddress(SDValue N) const override;
852 
853  bool isGAPlusOffset(SDNode *N, const GlobalValue* &GA,
854  int64_t &Offset) const override;
855 
856  SDValue getReturnAddressFrameIndex(SelectionDAG &DAG) const;
857 
858  bool ExpandInlineAsm(CallInst *CI) const override;
859 
860  ConstraintType getConstraintType(StringRef Constraint) const override;
861 
862  /// Examine constraint string and operand type and determine a weight value.
863  /// The operand object must already have been set up with the operand type.
865  getSingleConstraintMatchWeight(AsmOperandInfo &info,
866  const char *constraint) const override;
867 
868  const char *LowerXConstraint(EVT ConstraintVT) const override;
869 
870  /// Lower the specified operand into the Ops vector. If it is invalid, don't
871  /// add anything to Ops. If hasMemory is true it means one of the asm
872  /// constraint of the inline asm instruction being processed is 'm'.
873  void LowerAsmOperandForConstraint(SDValue Op,
874  std::string &Constraint,
875  std::vector<SDValue> &Ops,
876  SelectionDAG &DAG) const override;
877 
878  unsigned
879  getInlineAsmMemConstraint(StringRef ConstraintCode) const override {
880  if (ConstraintCode == "i")
882  else if (ConstraintCode == "o")
884  else if (ConstraintCode == "v")
886  else if (ConstraintCode == "X")
888  return TargetLowering::getInlineAsmMemConstraint(ConstraintCode);
889  }
890 
891  /// Given a physical register constraint
892  /// (e.g. {edx}), return the register number and the register class for the
893  /// register. This should only be used for C_Register constraints. On
894  /// error, this returns a register number of 0.
895  std::pair<unsigned, const TargetRegisterClass *>
896  getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
897  StringRef Constraint, MVT VT) const override;
898 
899  /// Return true if the addressing mode represented
900  /// by AM is legal for this target, for a load/store of the specified type.
901  bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM,
902  Type *Ty, unsigned AS,
903  Instruction *I = nullptr) const override;
904 
905  /// Return true if the specified immediate is legal
906  /// icmp immediate, that is the target has icmp instructions which can
907  /// compare a register against the immediate without having to materialize
908  /// the immediate into a register.
909  bool isLegalICmpImmediate(int64_t Imm) const override;
910 
911  /// Return true if the specified immediate is legal
912  /// add immediate, that is the target has add instructions which can
913  /// add a register and the immediate without having to materialize
914  /// the immediate into a register.
915  bool isLegalAddImmediate(int64_t Imm) const override;
916 
917  /// Return the cost of the scaling factor used in the addressing
918  /// mode represented by AM for this target, for a load/store
919  /// of the specified type.
920  /// If the AM is supported, the return value must be >= 0.
921  /// If the AM is not supported, it returns a negative value.
922  int getScalingFactorCost(const DataLayout &DL, const AddrMode &AM, Type *Ty,
923  unsigned AS) const override;
924 
925  bool isVectorShiftByScalarCheap(Type *Ty) const override;
926 
927  /// Return true if it's free to truncate a value of
928  /// type Ty1 to type Ty2. e.g. On x86 it's free to truncate a i32 value in
929  /// register EAX to i16 by referencing its sub-register AX.
930  bool isTruncateFree(Type *Ty1, Type *Ty2) const override;
931  bool isTruncateFree(EVT VT1, EVT VT2) const override;
932 
933  bool allowTruncateForTailCall(Type *Ty1, Type *Ty2) const override;
934 
935  /// Return true if any actual instruction that defines a
936  /// value of type Ty1 implicit zero-extends the value to Ty2 in the result
937  /// register. This does not necessarily include registers defined in
938  /// unknown ways, such as incoming arguments, or copies from unknown
939  /// virtual registers. Also, if isTruncateFree(Ty2, Ty1) is true, this
940  /// does not necessarily apply to truncate instructions. e.g. on x86-64,
941  /// all instructions that define 32-bit values implicit zero-extend the
942  /// result out to 64 bits.
943  bool isZExtFree(Type *Ty1, Type *Ty2) const override;
944  bool isZExtFree(EVT VT1, EVT VT2) const override;
945  bool isZExtFree(SDValue Val, EVT VT2) const override;
946 
947  /// Return true if folding a vector load into ExtVal (a sign, zero, or any
948  /// extend node) is profitable.
949  bool isVectorLoadExtDesirable(SDValue) const override;
950 
951  /// Return true if an FMA operation is faster than a pair of fmul and fadd
952  /// instructions. fmuladd intrinsics will be expanded to FMAs when this
953  /// method returns true, otherwise fmuladd is expanded to fmul + fadd.
954  bool isFMAFasterThanFMulAndFAdd(EVT VT) const override;
955 
956  /// Return true if it's profitable to narrow
957  /// operations of type VT1 to VT2. e.g. on x86, it's profitable to narrow
958  /// from i32 to i8 but not from i32 to i16.
959  bool isNarrowingProfitable(EVT VT1, EVT VT2) const override;
960 
961  /// Given an intrinsic, checks if on the target the intrinsic will need to map
962  /// to a MemIntrinsicNode (touches memory). If this is the case, it returns
963  /// true and stores the intrinsic information into the IntrinsicInfo that was
964  /// passed to the function.
965  bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I,
966  MachineFunction &MF,
967  unsigned Intrinsic) const override;
968 
969  /// Returns true if the target can instruction select the
970  /// specified FP immediate natively. If false, the legalizer will
971  /// materialize the FP immediate as a load from a constant pool.
972  bool isFPImmLegal(const APFloat &Imm, EVT VT) const override;
973 
974  /// Targets can use this to indicate that they only support *some*
975  /// VECTOR_SHUFFLE operations, those with specific masks. By default, if a
976  /// target supports the VECTOR_SHUFFLE node, all mask values are assumed to
977  /// be legal.
978  bool isShuffleMaskLegal(ArrayRef<int> Mask, EVT VT) const override;
979 
980  /// Similar to isShuffleMaskLegal. Targets can use this to indicate if there
981  /// is a suitable VECTOR_SHUFFLE that can be used to replace a VAND with a
982  /// constant pool entry.
983  bool isVectorClearMaskLegal(ArrayRef<int> Mask, EVT VT) const override;
984 
985  /// Returns true if lowering to a jump table is allowed.
986  bool areJTsAllowed(const Function *Fn) const override;
987 
988  /// If true, then instruction selection should
989  /// seek to shrink the FP constant of the specified type to a smaller type
990  /// in order to save space and / or reduce runtime.
991  bool ShouldShrinkFPConstant(EVT VT) const override {
992  // Don't shrink FP constpool if SSE2 is available since cvtss2sd is more
993  // expensive than a straight movsd. On the other hand, it's important to
994  // shrink long double fp constant since fldt is very slow.
995  return !X86ScalarSSEf64 || VT == MVT::f80;
996  }
997 
998  /// Return true if we believe it is correct and profitable to reduce the
999  /// load node to a smaller type.
1000  bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy,
1001  EVT NewVT) const override;
1002 
1003  /// Return true if the specified scalar FP type is computed in an SSE
1004  /// register, not on the X87 floating point stack.
1005  bool isScalarFPTypeInSSEReg(EVT VT) const {
1006  return (VT == MVT::f64 && X86ScalarSSEf64) || // f64 is when SSE2
1007  (VT == MVT::f32 && X86ScalarSSEf32); // f32 is when SSE1
1008  }
1009 
1010  /// Returns true if it is beneficial to convert a load of a constant
1011  /// to just the constant itself.
1012  bool shouldConvertConstantLoadToIntImm(const APInt &Imm,
1013  Type *Ty) const override;
1014 
1015  bool convertSelectOfConstantsToMath(EVT VT) const override;
1016 
1017  /// Return true if EXTRACT_SUBVECTOR is cheap for this result type
1018  /// with this index.
1019  bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT,
1020  unsigned Index) const override;
1021 
1022  bool storeOfVectorConstantIsCheap(EVT MemVT, unsigned NumElem,
1023  unsigned AddrSpace) const override {
1024  // If we can replace more than 2 scalar stores, there will be a reduction
1025  // in instructions even after we add a vector constant load.
1026  return NumElem > 2;
1027  }
1028 
1029  bool isLoadBitCastBeneficial(EVT LoadVT, EVT BitcastVT) const override;
1030 
1031  /// Intel processors have a unified instruction and data cache
1032  const char * getClearCacheBuiltinName() const override {
1033  return nullptr; // nothing to do, move along.
1034  }
1035 
1036  unsigned getRegisterByName(const char* RegName, EVT VT,
1037  SelectionDAG &DAG) const override;
1038 
1039  /// If a physical register, this returns the register that receives the
1040  /// exception address on entry to an EH pad.
1041  unsigned
1042  getExceptionPointerRegister(const Constant *PersonalityFn) const override;
1043 
1044  /// If a physical register, this returns the register that receives the
1045  /// exception typeid on entry to a landing pad.
1046  unsigned
1047  getExceptionSelectorRegister(const Constant *PersonalityFn) const override;
1048 
1049  virtual bool needsFixedCatchObjects() const override;
1050 
1051  /// This method returns a target specific FastISel object,
1052  /// or null if the target does not support "fast" ISel.
1054  const TargetLibraryInfo *libInfo) const override;
1055 
1056  /// If the target has a standard location for the stack protector cookie,
1057  /// returns the address of that location. Otherwise, returns nullptr.
1058  Value *getIRStackGuard(IRBuilder<> &IRB) const override;
1059 
1060  bool useLoadStackGuardNode() const override;
1061  bool useStackGuardXorFP() const override;
1062  void insertSSPDeclarations(Module &M) const override;
1063  Value *getSDagStackGuard(const Module &M) const override;
1064  Value *getSSPStackGuardCheck(const Module &M) const override;
1065  SDValue emitStackGuardXorFP(SelectionDAG &DAG, SDValue Val,
1066  const SDLoc &DL) const override;
1067 
1068 
1069  /// Return true if the target stores SafeStack pointer at a fixed offset in
1070  /// some non-standard address space, and populates the address space and
1071  /// offset as appropriate.
1072  Value *getSafeStackPointerLocation(IRBuilder<> &IRB) const override;
1073 
1074  SDValue BuildFILD(SDValue Op, EVT SrcVT, SDValue Chain, SDValue StackSlot,
1075  SelectionDAG &DAG) const;
1076 
1077  bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override;
1078 
1079  /// Customize the preferred legalization strategy for certain types.
1080  LegalizeTypeAction getPreferredVectorAction(EVT VT) const override;
1081 
1082  MVT getRegisterTypeForCallingConv(LLVMContext &Context,
1083  EVT VT) const override;
1084 
1085  unsigned getNumRegistersForCallingConv(LLVMContext &Context,
1086  EVT VT) const override;
1087 
1088  bool isIntDivCheap(EVT VT, AttributeList Attr) const override;
1089 
1090  bool supportSwiftError() const override;
1091 
1092  StringRef getStackProbeSymbolName(MachineFunction &MF) const override;
1093 
1094  bool hasVectorBlend() const override { return true; }
1095 
1096  unsigned getMaxSupportedInterleaveFactor() const override { return 4; }
1097 
1098  /// Lower interleaved load(s) into target specific
1099  /// instructions/intrinsics.
1100  bool lowerInterleavedLoad(LoadInst *LI,
1102  ArrayRef<unsigned> Indices,
1103  unsigned Factor) const override;
1104 
1105  /// Lower interleaved store(s) into target specific
1106  /// instructions/intrinsics.
1107  bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI,
1108  unsigned Factor) const override;
1109 
1110  SDValue expandIndirectJTBranch(const SDLoc& dl, SDValue Value,
1111  SDValue Addr, SelectionDAG &DAG)
1112  const override;
1113 
1114  protected:
1115  std::pair<const TargetRegisterClass *, uint8_t>
1116  findRepresentativeClass(const TargetRegisterInfo *TRI,
1117  MVT VT) const override;
1118 
1119  private:
1120  /// Keep a reference to the X86Subtarget around so that we can
1121  /// make the right decision when generating code for different targets.
1122  const X86Subtarget &Subtarget;
1123 
1124  /// Select between SSE or x87 floating point ops.
1125  /// When SSE is available, use it for f32 operations.
1126  /// When SSE2 is available, use it for f64 operations.
1127  bool X86ScalarSSEf32;
1128  bool X86ScalarSSEf64;
1129 
1130  /// A list of legal FP immediates.
1131  std::vector<APFloat> LegalFPImmediates;
1132 
1133  /// Indicate that this x86 target can instruction
1134  /// select the specified FP immediate natively.
1135  void addLegalFPImmediate(const APFloat& Imm) {
1136  LegalFPImmediates.push_back(Imm);
1137  }
1138 
1139  SDValue LowerCallResult(SDValue Chain, SDValue InFlag,
1140  CallingConv::ID CallConv, bool isVarArg,
1142  const SDLoc &dl, SelectionDAG &DAG,
1143  SmallVectorImpl<SDValue> &InVals,
1144  uint32_t *RegMask) const;
1145  SDValue LowerMemArgument(SDValue Chain, CallingConv::ID CallConv,
1146  const SmallVectorImpl<ISD::InputArg> &ArgInfo,
1147  const SDLoc &dl, SelectionDAG &DAG,
1148  const CCValAssign &VA, MachineFrameInfo &MFI,
1149  unsigned i) const;
1151  const SDLoc &dl, SelectionDAG &DAG,
1152  const CCValAssign &VA,
1153  ISD::ArgFlagsTy Flags) const;
1154 
1155  // Call lowering helpers.
1156 
1157  /// Check whether the call is eligible for tail call optimization. Targets
1158  /// that want to do tail call optimization should implement this function.
1159  bool IsEligibleForTailCallOptimization(SDValue Callee,
1160  CallingConv::ID CalleeCC,
1161  bool isVarArg,
1162  bool isCalleeStructRet,
1163  bool isCallerStructRet,
1164  Type *RetTy,
1165  const SmallVectorImpl<ISD::OutputArg> &Outs,
1166  const SmallVectorImpl<SDValue> &OutVals,
1167  const SmallVectorImpl<ISD::InputArg> &Ins,
1168  SelectionDAG& DAG) const;
1169  SDValue EmitTailCallLoadRetAddr(SelectionDAG &DAG, SDValue &OutRetAddr,
1170  SDValue Chain, bool IsTailCall,
1171  bool Is64Bit, int FPDiff,
1172  const SDLoc &dl) const;
1173 
1174  unsigned GetAlignedArgumentStackSize(unsigned StackSize,
1175  SelectionDAG &DAG) const;
1176 
1177  unsigned getAddressSpace(void) const;
1178 
1179  std::pair<SDValue,SDValue> FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG,
1180  bool isSigned,
1181  bool isReplace) const;
1182 
1183  SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
1184  SDValue LowerVSELECT(SDValue Op, SelectionDAG &DAG) const;
1187 
1188  unsigned getGlobalWrapperKind(const GlobalValue *GV = nullptr,
1189  const unsigned char OpFlags = 0) const;
1190  SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
1191  SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
1192  SDValue LowerGlobalAddress(const GlobalValue *GV, const SDLoc &dl,
1193  int64_t Offset, SelectionDAG &DAG) const;
1194  SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
1195  SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
1196  SDValue LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const;
1197 
1198  SDValue LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
1199  SDValue LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
1200  SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const;
1201  SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const;
1202  SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
1203  SDValue LowerSETCCCARRY(SDValue Op, SelectionDAG &DAG) const;
1204  SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const;
1205  SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const;
1206  SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
1208  SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const;
1209  SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) const;
1210  SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
1211  SDValue LowerADDROFRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
1212  SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
1213  SDValue LowerFRAME_TO_ARGS_OFFSET(SDValue Op, SelectionDAG &DAG) const;
1214  SDValue LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const;
1215  SDValue lowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const;
1216  SDValue lowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const;
1217  SDValue lowerEH_SJLJ_SETUP_DISPATCH(SDValue Op, SelectionDAG &DAG) const;
1218  SDValue LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
1219  SDValue LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) const;
1220  SDValue LowerWin64_i128OP(SDValue Op, SelectionDAG &DAG) const;
1221  SDValue LowerGC_TRANSITION_START(SDValue Op, SelectionDAG &DAG) const;
1222  SDValue LowerGC_TRANSITION_END(SDValue Op, SelectionDAG &DAG) const;
1223  SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
1224 
1225  SDValue
1226  LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
1227  const SmallVectorImpl<ISD::InputArg> &Ins,
1228  const SDLoc &dl, SelectionDAG &DAG,
1229  SmallVectorImpl<SDValue> &InVals) const override;
1230  SDValue LowerCall(CallLoweringInfo &CLI,
1231  SmallVectorImpl<SDValue> &InVals) const override;
1232 
1233  SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
1234  const SmallVectorImpl<ISD::OutputArg> &Outs,
1235  const SmallVectorImpl<SDValue> &OutVals,
1236  const SDLoc &dl, SelectionDAG &DAG) const override;
1237 
1238  bool supportSplitCSR(MachineFunction *MF) const override {
1240  MF->getFunction().hasFnAttribute(Attribute::NoUnwind);
1241  }
1242  void initializeSplitCSR(MachineBasicBlock *Entry) const override;
1243  void insertCopiesSplitCSR(
1244  MachineBasicBlock *Entry,
1245  const SmallVectorImpl<MachineBasicBlock *> &Exits) const override;
1246 
1247  bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override;
1248 
1249  bool mayBeEmittedAsTailCall(const CallInst *CI) const override;
1250 
1251  EVT getTypeForExtReturn(LLVMContext &Context, EVT VT,
1252  ISD::NodeType ExtendKind) const override;
1253 
1254  bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF,
1255  bool isVarArg,
1256  const SmallVectorImpl<ISD::OutputArg> &Outs,
1257  LLVMContext &Context) const override;
1258 
1259  const MCPhysReg *getScratchRegisters(CallingConv::ID CC) const override;
1260 
1262  shouldExpandAtomicLoadInIR(LoadInst *SI) const override;
1263  bool shouldExpandAtomicStoreInIR(StoreInst *SI) const override;
1265  shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override;
1266 
1267  LoadInst *
1268  lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *AI) const override;
1269 
1270  bool needsCmpXchgNb(Type *MemType) const;
1271 
1272  void SetupEntryBlockForSjLj(MachineInstr &MI, MachineBasicBlock *MBB,
1273  MachineBasicBlock *DispatchBB, int FI) const;
1274 
1275  // Utility function to emit the low-level va_arg code for X86-64.
1277  EmitVAARG64WithCustomInserter(MachineInstr &MI,
1278  MachineBasicBlock *MBB) const;
1279 
1280  /// Utility function to emit the xmm reg save portion of va_start.
1282  EmitVAStartSaveXMMRegsWithCustomInserter(MachineInstr &BInstr,
1283  MachineBasicBlock *BB) const;
1284 
1285  MachineBasicBlock *EmitLoweredCascadedSelect(MachineInstr &MI1,
1286  MachineInstr &MI2,
1287  MachineBasicBlock *BB) const;
1288 
1289  MachineBasicBlock *EmitLoweredSelect(MachineInstr &I,
1290  MachineBasicBlock *BB) const;
1291 
1292  MachineBasicBlock *EmitLoweredAtomicFP(MachineInstr &I,
1293  MachineBasicBlock *BB) const;
1294 
1295  MachineBasicBlock *EmitLoweredCatchRet(MachineInstr &MI,
1296  MachineBasicBlock *BB) const;
1297 
1298  MachineBasicBlock *EmitLoweredCatchPad(MachineInstr &MI,
1299  MachineBasicBlock *BB) const;
1300 
1301  MachineBasicBlock *EmitLoweredSegAlloca(MachineInstr &MI,
1302  MachineBasicBlock *BB) const;
1303 
1304  MachineBasicBlock *EmitLoweredTLSAddr(MachineInstr &MI,
1305  MachineBasicBlock *BB) const;
1306 
1307  MachineBasicBlock *EmitLoweredTLSCall(MachineInstr &MI,
1308  MachineBasicBlock *BB) const;
1309 
1310  MachineBasicBlock *EmitLoweredRetpoline(MachineInstr &MI,
1311  MachineBasicBlock *BB) const;
1312 
1313  MachineBasicBlock *emitEHSjLjSetJmp(MachineInstr &MI,
1314  MachineBasicBlock *MBB) const;
1315 
1316  void emitSetJmpShadowStackFix(MachineInstr &MI,
1317  MachineBasicBlock *MBB) const;
1318 
1319  MachineBasicBlock *emitEHSjLjLongJmp(MachineInstr &MI,
1320  MachineBasicBlock *MBB) const;
1321 
1322  MachineBasicBlock *emitLongJmpShadowStackFix(MachineInstr &MI,
1323  MachineBasicBlock *MBB) const;
1324 
1325  MachineBasicBlock *emitFMA3Instr(MachineInstr &MI,
1326  MachineBasicBlock *MBB) const;
1327 
1328  MachineBasicBlock *EmitSjLjDispatchBlock(MachineInstr &MI,
1329  MachineBasicBlock *MBB) const;
1330 
1331  /// Emit nodes that will be selected as "test Op0,Op0", or something
1332  /// equivalent, for use with the given x86 condition code.
1333  SDValue EmitTest(SDValue Op0, unsigned X86CC, const SDLoc &dl,
1334  SelectionDAG &DAG) const;
1335 
1336  /// Emit nodes that will be selected as "cmp Op0,Op1", or something
1337  /// equivalent, for use with the given x86 condition code.
1338  SDValue EmitCmp(SDValue Op0, SDValue Op1, unsigned X86CC, const SDLoc &dl,
1339  SelectionDAG &DAG) const;
1340 
1341  /// Convert a comparison if required by the subtarget.
1342  SDValue ConvertCmpIfNecessary(SDValue Cmp, SelectionDAG &DAG) const;
1343 
1344  /// Check if replacement of SQRT with RSQRT should be disabled.
1345  bool isFsqrtCheap(SDValue Operand, SelectionDAG &DAG) const override;
1346 
1347  /// Use rsqrt* to speed up sqrt calculations.
1348  SDValue getSqrtEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled,
1349  int &RefinementSteps, bool &UseOneConstNR,
1350  bool Reciprocal) const override;
1351 
1352  /// Use rcp* to speed up fdiv calculations.
1353  SDValue getRecipEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled,
1354  int &RefinementSteps) const override;
1355 
1356  /// Reassociate floating point divisions into multiply by reciprocal.
1357  unsigned combineRepeatedFPDivisors() const override;
1358  };
1359 
1360  namespace X86 {
1362  const TargetLibraryInfo *libInfo);
1363  } // end namespace X86
1364 
1365  // Base class for all X86 non-masked store operations.
1366  class X86StoreSDNode : public MemSDNode {
1367  public:
1368  X86StoreSDNode(unsigned Opcode, unsigned Order, const DebugLoc &dl,
1369  SDVTList VTs, EVT MemVT,
1370  MachineMemOperand *MMO)
1371  :MemSDNode(Opcode, Order, dl, VTs, MemVT, MMO) {}
1372  const SDValue &getValue() const { return getOperand(1); }
1373  const SDValue &getBasePtr() const { return getOperand(2); }
1374 
1375  static bool classof(const SDNode *N) {
1376  return N->getOpcode() == X86ISD::VTRUNCSTORES ||
1378  }
1379  };
1380 
1381  // Base class for all X86 masked store operations.
1382  // The class has the same order of operands as MaskedStoreSDNode for
1383  // convenience.
1385  public:
1386  X86MaskedStoreSDNode(unsigned Opcode, unsigned Order,
1387  const DebugLoc &dl, SDVTList VTs, EVT MemVT,
1388  MachineMemOperand *MMO)
1389  : MemSDNode(Opcode, Order, dl, VTs, MemVT, MMO) {}
1390 
1391  const SDValue &getBasePtr() const { return getOperand(1); }
1392  const SDValue &getMask() const { return getOperand(2); }
1393  const SDValue &getValue() const { return getOperand(3); }
1394 
1395  static bool classof(const SDNode *N) {
1396  return N->getOpcode() == X86ISD::VMTRUNCSTORES ||
1398  }
1399  };
1400 
1401  // X86 Truncating Store with Signed saturation.
1403  public:
1404  TruncSStoreSDNode(unsigned Order, const DebugLoc &dl,
1405  SDVTList VTs, EVT MemVT, MachineMemOperand *MMO)
1406  : X86StoreSDNode(X86ISD::VTRUNCSTORES, Order, dl, VTs, MemVT, MMO) {}
1407 
1408  static bool classof(const SDNode *N) {
1409  return N->getOpcode() == X86ISD::VTRUNCSTORES;
1410  }
1411  };
1412 
1413  // X86 Truncating Store with Unsigned saturation.
1415  public:
1416  TruncUSStoreSDNode(unsigned Order, const DebugLoc &dl,
1417  SDVTList VTs, EVT MemVT, MachineMemOperand *MMO)
1418  : X86StoreSDNode(X86ISD::VTRUNCSTOREUS, Order, dl, VTs, MemVT, MMO) {}
1419 
1420  static bool classof(const SDNode *N) {
1421  return N->getOpcode() == X86ISD::VTRUNCSTOREUS;
1422  }
1423  };
1424 
1425  // X86 Truncating Masked Store with Signed saturation.
1427  public:
1428  MaskedTruncSStoreSDNode(unsigned Order,
1429  const DebugLoc &dl, SDVTList VTs, EVT MemVT,
1430  MachineMemOperand *MMO)
1431  : X86MaskedStoreSDNode(X86ISD::VMTRUNCSTORES, Order, dl, VTs, MemVT, MMO) {}
1432 
1433  static bool classof(const SDNode *N) {
1434  return N->getOpcode() == X86ISD::VMTRUNCSTORES;
1435  }
1436  };
1437 
1438  // X86 Truncating Masked Store with Unsigned saturation.
1440  public:
1442  const DebugLoc &dl, SDVTList VTs, EVT MemVT,
1443  MachineMemOperand *MMO)
1444  : X86MaskedStoreSDNode(X86ISD::VMTRUNCSTOREUS, Order, dl, VTs, MemVT, MMO) {}
1445 
1446  static bool classof(const SDNode *N) {
1447  return N->getOpcode() == X86ISD::VMTRUNCSTOREUS;
1448  }
1449  };
1450 
1451  // X86 specific Gather/Scatter nodes.
1452  // The class has the same order of operands as MaskedGatherScatterSDNode for
1453  // convenience.
1455  public:
1456  X86MaskedGatherScatterSDNode(unsigned Opc, unsigned Order,
1457  const DebugLoc &dl, SDVTList VTs, EVT MemVT,
1458  MachineMemOperand *MMO)
1459  : MemSDNode(Opc, Order, dl, VTs, MemVT, MMO) {}
1460 
1461  const SDValue &getBasePtr() const { return getOperand(3); }
1462  const SDValue &getIndex() const { return getOperand(4); }
1463  const SDValue &getMask() const { return getOperand(2); }
1464  const SDValue &getValue() const { return getOperand(1); }
1465  const SDValue &getScale() const { return getOperand(5); }
1466 
1467  static bool classof(const SDNode *N) {
1468  return N->getOpcode() == X86ISD::MGATHER ||
1469  N->getOpcode() == X86ISD::MSCATTER;
1470  }
1471  };
1472 
1474  public:
1475  X86MaskedGatherSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs,
1476  EVT MemVT, MachineMemOperand *MMO)
1477  : X86MaskedGatherScatterSDNode(X86ISD::MGATHER, Order, dl, VTs, MemVT,
1478  MMO) {}
1479 
1480  static bool classof(const SDNode *N) {
1481  return N->getOpcode() == X86ISD::MGATHER;
1482  }
1483  };
1484 
1486  public:
1487  X86MaskedScatterSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs,
1488  EVT MemVT, MachineMemOperand *MMO)
1489  : X86MaskedGatherScatterSDNode(X86ISD::MSCATTER, Order, dl, VTs, MemVT,
1490  MMO) {}
1491 
1492  static bool classof(const SDNode *N) {
1493  return N->getOpcode() == X86ISD::MSCATTER;
1494  }
1495  };
1496 
1497  /// Generate unpacklo/unpackhi shuffle mask.
1498  template <typename T = int>
1500  bool Unary) {
1501  assert(Mask.empty() && "Expected an empty shuffle mask vector");
1502  int NumElts = VT.getVectorNumElements();
1503  int NumEltsInLane = 128 / VT.getScalarSizeInBits();
1504  for (int i = 0; i < NumElts; ++i) {
1505  unsigned LaneStart = (i / NumEltsInLane) * NumEltsInLane;
1506  int Pos = (i % NumEltsInLane) / 2 + LaneStart;
1507  Pos += (Unary ? 0 : NumElts * (i % 2));
1508  Pos += (Lo ? 0 : NumEltsInLane / 2);
1509  Mask.push_back(Pos);
1510  }
1511  }
1512 
1513  /// Helper function to scale a shuffle or target shuffle mask, replacing each
1514  /// mask index with the scaled sequential indices for an equivalent narrowed
1515  /// mask. This is the reverse process to canWidenShuffleElements, but can
1516  /// always succeed.
1517  template <typename T>
1519  SmallVectorImpl<T> &ScaledMask) {
1520  assert(0 < Scale && "Unexpected scaling factor");
1521  int NumElts = Mask.size();
1522  ScaledMask.assign(static_cast<size_t>(NumElts * Scale), -1);
1523 
1524  for (int i = 0; i != NumElts; ++i) {
1525  int M = Mask[i];
1526 
1527  // Repeat sentinel values in every mask element.
1528  if (M < 0) {
1529  for (int s = 0; s != Scale; ++s)
1530  ScaledMask[(Scale * i) + s] = M;
1531  continue;
1532  }
1533 
1534  // Scale mask element and increment across each mask element.
1535  for (int s = 0; s != Scale; ++s)
1536  ScaledMask[(Scale * i) + s] = (Scale * M) + s;
1537  }
1538  }
1539 } // end namespace llvm
1540 
1541 #endif // LLVM_LIB_TARGET_X86_X86ISELLOWERING_H
static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG)
const SDValue & getIndex() const
Double shift instructions.
static SDValue LowerCallResult(SDValue Chain, SDValue InFlag, const SmallVectorImpl< CCValAssign > &RVLocs, const SDLoc &dl, SelectionDAG &DAG, SmallVectorImpl< SDValue > &InVals)
LowerCallResult - Lower the result values of a call into the appropriate copies out of appropriate ph...
TruncUSStoreSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs, EVT MemVT, MachineMemOperand *MMO)
BUILTIN_OP_END - This must be the last enum value in this list.
Definition: ISDOpcodes.h:829
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:111
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg If BaseGV is null...
Vector comparison generating mask bits for fp and integer signed and unsigned data types...
Repeat move, corresponds to X86::REP_MOVSx.
void createUnpackShuffleMask(MVT VT, SmallVectorImpl< T > &Mask, bool Lo, bool Unary)
Generate unpacklo/unpackhi shuffle mask.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
LLVMContext & Context
static bool classof(const SDNode *N)
Return with a flag operand.
const SDValue & getBasePtr() const
Compute iterated dominance frontiers using a linear time algorithm.
Definition: AllocatorList.h:24
const SDValue & getScale() const
Tail call return.
Compute Double Block Packed Sum-Absolute-Differences.
A Module instance is used to store all the information related to an LLVM module. ...
Definition: Module.h:63
bool mergeStoresAfterLegalization() const override
Allow store merging after legalization in addition to before legalization.
static bool classof(const SDNode *N)
Same as call except it adds the NoTrack prefix.
static void LowerMemOpCallTo(SelectionDAG &DAG, MachineFunction &MF, SDValue Chain, SDValue Arg, SDValue PtrOff, int SPDiff, unsigned ArgOffset, bool isPPC64, bool isTailCall, bool isVector, SmallVectorImpl< SDValue > &MemOpChains, SmallVectorImpl< TailCallArgumentInfo > &TailCallArguments, const SDLoc &dl)
LowerMemOpCallTo - Store the argument to the stack or remember it in case of tail calls...
bool isMultiStoresCheaperThanBitsMerge(EVT LTy, EVT HTy) const override
Return true if it is cheaper to split the store of a merged int val from a pair of smaller values int...
X86 conditional moves.
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition: ValueTypes.h:146
This class represents a function call, abstracting a target machine&#39;s calling convention.
unsigned getVectorNumElements() const
Function Alias Analysis Results
This instruction constructs a fixed permutation of two input vectors.
const SDValue & getValue() const
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.h:307
unsigned const TargetRegisterInfo * TRI
A debug info location.
Definition: DebugLoc.h:34
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition: ValueTypes.h:141
static bool classof(const SDNode *N)
SSE4A Extraction and Insertion.
static bool classof(const SDNode *N)
An instruction for reading from memory.
Definition: Instructions.h:168
an instruction that atomically reads a memory location, combines it with another value, and then stores the result back.
Definition: Instructions.h:681
Bitwise logical ANDNOT of floating point values.
AtomicExpansionKind
Enum that specifies what an atomic load/AtomicRMWInst is expanded to, if at all.
bool isCalleePop(CallingConv::ID CallingConv, bool is64Bit, bool IsVarArg, bool GuaranteeTCO)
Determines whether the callee is required to pop its own arguments.
This operation implements the lowering for readcyclecounter.
NodeType
ISD::NodeType enum - This enum defines the target-independent operators for a SelectionDAG.
Definition: ISDOpcodes.h:39
X86MaskedGatherScatterSDNode(unsigned Opc, unsigned Order, const DebugLoc &dl, SDVTList VTs, EVT MemVT, MachineMemOperand *MMO)
A convenience struct that encapsulates a DAG, and two SDValues for returning information from TargetL...
X86 compare and logical compare instructions.
MaskedTruncUSStoreSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs, EVT MemVT, MachineMemOperand *MMO)
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
Definition: ValueTypes.h:136
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
Extract an 8-bit value from a vector and zero extend it to i32, corresponds to X86::PEXTRB.
A description of a memory reference used in the backend.
X86StoreSDNode(unsigned Opcode, unsigned Order, const DebugLoc &dl, SDVTList VTs, EVT MemVT, MachineMemOperand *MMO)
Bitwise Logical AND NOT of Packed FP values.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:42
Base class for the full range of assembler expressions which are needed for parsing.
Definition: MCExpr.h:36
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:731
This instruction implements SINT_TO_FP with the integer source in memory and FP reg result...
static SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG, const SparcTargetLowering &TLI, const SparcSubtarget *Subtarget)
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted...
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
Integer horizontal add/sub.
This represents a list of ValueType&#39;s that has been intern&#39;d by a SelectionDAG.
Copies a 64-bit value from the low word of an XMM vector to an MMX vector.
void assign(size_type NumElts, const T &Elt)
Definition: SmallVector.h:424
Context object for machine code objects.
Definition: MCContext.h:63
static SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG)
Copies a 32-bit value from the low word of a MMX vector to a GPR.
This is a fast-path instruction selection class that generates poor code and doesn&#39;t support illegal ...
Definition: FastISel.h:67
X86 FP SETCC, similar to above, but with output as an i1 mask and with optional rounding mode...
Return from interrupt. Operand 0 is the number of bytes to pop.
This contains information for each constraint that we are lowering.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:33
An instruction for storing to memory.
Definition: Instructions.h:310
static bool classof(const SDNode *N)
X86 FP SETCC, implemented with CMP{cc}SS/CMP{cc}SD.
const SDValue & getBasePtr() const
virtual unsigned getInlineAsmMemConstraint(StringRef ConstraintCode) const
Floating point horizontal add/sub.
unsigned getInlineAsmMemConstraint(StringRef ConstraintCode) const override
amdgpu Simplify well known AMD library false Value * Callee
Bitwise logical XOR of floating point values.
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
static bool classof(const SDNode *N)
const SDValue & getMask() const
Machine Value Type.
static SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG)
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:46
This instruction implements an extending load to FP stack slots.
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:69
Insert any element of a 4 x float vector into any element of a destination 4 x floatvector.
unsigned getScalarSizeInBits() const
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:149
This is an important base class in LLVM.
Definition: Constant.h:42
Repeat fill, corresponds to X86::REP_STOSx.
static bool is64Bit(const char *name)
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
Definition: ISDOpcodes.h:886
bool storeOfVectorConstantIsCheap(EVT MemVT, unsigned NumElem, unsigned AddrSpace) const override
Return true if it is expected to be cheaper to do a store of a non-zero vector constant with the give...
X86 conditional branches.
Insert the lower 16-bits of a 32-bit value to a vector, corresponds to X86::PINSRW.
Commutative FMIN and FMAX.
static SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG, const SparcSubtarget *Subtarget)
On Darwin, this node represents the result of the popl at function entry, used for PIC code...
bool convertSetCCLogicToBitwiseLogic(EVT VT) const override
Use bitwise logic to make pairs of compares more efficient.
static SDValue LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG, const SparcTargetLowering &TLI, bool hasHardQuad)
const SDValue & getValue() const
static SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG, const SparcSubtarget *Subtarget)
lazy value info
std::vector< ArgListEntry > ArgListTy
Extended Value Type.
Definition: ValueTypes.h:34
const AMDGPUAS & AS
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
This structure contains all information that is necessary for lowering calls.
These operations represent an abstract X86 call instruction, which includes a bunch of information...
Floating point max and min.
static SDValue LowerSETCCCARRY(SDValue Op, SelectionDAG &DAG)
TruncSStoreSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs, EVT MemVT, MachineMemOperand *MMO)
bool isScalarFPTypeInSSEReg(EVT VT) const
Return true if the specified scalar FP type is computed in an SSE register, not on the X87 floating p...
Copies a GPR into the low 32-bit word of a MMX vector and zero out the high word. ...
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:199
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:222
unsigned getMaxSupportedInterleaveFactor() const override
Get the maximum supported factor for interleaved memory accesses.
Provides information about what library functions are available for the current target.
X86 Read Time-Stamp Counter and Processor ID.
CCValAssign - Represent assignment of one arg/retval to a location.
AddressSpace
Definition: NVPTXBaseInfo.h:22
unsigned getMemcmpEqZeroLoadsPerBlock() const override
Allow multiple load pairs per block for smaller and faster code.
X86MaskedScatterSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs, EVT MemVT, MachineMemOperand *MMO)
This is an abstract virtual class for memory operations.
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Bit scan reverse.
Floating point reciprocal-sqrt and reciprocal approximation.
static const int FIRST_TARGET_MEMORY_OPCODE
FIRST_TARGET_MEMORY_OPCODE - Target-specific pre-isel operations which do not reference a specific me...
Definition: ISDOpcodes.h:836
const SDValue & getValue() const
Represents one node in the SelectionDAG.
X86 bit-test instructions.
static bool Enabled
Definition: Statistic.cpp:51
const Function & getFunction() const
Return the LLVM function that this machine code represents.
static bool classof(const SDNode *N)
MaskedTruncSStoreSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs, EVT MemVT, MachineMemOperand *MMO)
Class for arbitrary precision integers.
Definition: APInt.h:69
static SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG)
static bool classof(const SDNode *N)
This instruction implements FP_TO_SINT with the integer destination in memory and a FP reg source...
Fast - This calling convention attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:43
Bit scan forward.
const char * getClearCacheBuiltinName() const override
Intel processors have a unified instruction and data cache.
amdgpu Simplify well known AMD library false Value Value * Arg
bool ShouldShrinkFPConstant(EVT VT) const override
If true, then instruction selection should seek to shrink the FP constant of the specified type to a ...
Representation of each machine instruction.
Definition: MachineInstr.h:60
static unsigned getScalingFactorCost(const TargetTransformInfo &TTI, const LSRUse &LU, const Formula &F, const Loop &L)
bool isVector() const
Return true if this is a vector value type.
Definition: ValueTypes.h:151
Insert the lower 8-bits of a 32-bit value to a vector, corresponds to X86::PINSRB.
LLVM_NODISCARD bool empty() const
Definition: SmallVector.h:62
A wrapper node for TargetConstantPool, TargetJumpTable, TargetExternalSymbol, TargetGlobalAddress, TargetGlobalTLSAddress, MCSymbol and TargetBlockAddress.
Bitwise logical AND of floating point values.
#define I(x, y, z)
Definition: MD5.cpp:58
#define N
FunctionLoweringInfo - This contains information that is global to a function that is used when lower...
static bool classof(const SDNode *N)
X86MaskedStoreSDNode(unsigned Opcode, unsigned Order, const DebugLoc &dl, SDVTList VTs, EVT MemVT, MachineMemOperand *MMO)
LOCK-prefixed arithmetic read-modify-write instructions.
Extract a 16-bit value from a vector and zero extend it to i32, corresponds to X86::PEXTRW.
bool hasVectorBlend() const override
Return true if the target has a vector blend instruction.
Blend where the selector is an immediate.
X86MaskedGatherSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs, EVT MemVT, MachineMemOperand *MMO)
This instruction implements a truncating store to FP stack slots.
Combined add and sub on an FP vector.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
LegalizeTypeAction
This enum indicates whether a types are legal for a target, and if not, what action should be used to...
This instruction grabs the address of the next argument from a va_list.
LLVM Value Representation.
Definition: Value.h:73
Bitwise logical OR of floating point values.
Dynamic (non-constant condition) vector blend where only the sign bits of the condition elements are ...
X86 Read Performance Monitoring Counters.
constexpr char Size[]
Key for Kernel::Arg::Metadata::mSize.
const SDValue & getBasePtr() const
std::underlying_type< E >::type Mask()
Get a bitmask with 1s in all places up to the high-order bit of E&#39;s largest value.
Definition: BitmaskEnum.h:81
static SDValue LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG, const SparcTargetLowering &TLI, bool hasHardQuad)
IRTranslator LLVM IR MI
bool isZeroNode(SDValue Elt)
Returns true if Elt is a constant zero or floating point constant +0.0.
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:49
bool isOffsetSuitableForCodeModel(int64_t Offset, CodeModel::Model M, bool hasSymbolicDisplacement=true)
Returns true of the given offset can be fit into displacement field of the instruction.
bool hasBitPreservingFPLogic(EVT VT) const override
Return true if it is safe to transform an integer-domain bitwise operation into the equivalent floati...
Compute Sum of Absolute Differences.
Scalar intrinsic floating point max and min.
MVT getScalarShiftAmountTy(const DataLayout &, EVT VT) const override
EVT is not used in-tree, but is used by out-of-tree target.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation...
void scaleShuffleMask(int Scale, ArrayRef< T > Mask, SmallVectorImpl< T > &ScaledMask)
Helper function to scale a shuffle or target shuffle mask, replacing each mask index with the scaled ...
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
FastISel * createFastISel(FunctionLoweringInfo &funcInfo, const TargetLibraryInfo *libInfo)
BRIND node with NoTrack prefix.
Shuffle 16 8-bit values within a vector.
This file describes how to lower LLVM code to machine code.
Special wrapper used under X86-64 PIC mode for RIP relative displacements.