LLVM  3.7.0
AArch64ISelLowering.cpp
Go to the documentation of this file.
1 //===-- AArch64ISelLowering.cpp - AArch64 DAG Lowering Implementation ----===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file implements the AArch64TargetLowering class.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "AArch64ISelLowering.h"
17 #include "AArch64PerfectShuffle.h"
18 #include "AArch64Subtarget.h"
19 #include "AArch64TargetMachine.h"
22 #include "llvm/ADT/Statistic.h"
27 #include "llvm/IR/Function.h"
29 #include "llvm/IR/Intrinsics.h"
30 #include "llvm/IR/Type.h"
32 #include "llvm/Support/Debug.h"
36 using namespace llvm;
37 
38 #define DEBUG_TYPE "aarch64-lower"
39 
40 STATISTIC(NumTailCalls, "Number of tail calls");
41 STATISTIC(NumShiftInserts, "Number of vector shift inserts");
42 
43 namespace {
44 enum AlignMode {
45  StrictAlign,
46  NoStrictAlign
47 };
48 }
49 
50 static cl::opt<AlignMode>
51 Align(cl::desc("Load/store alignment support"),
52  cl::Hidden, cl::init(NoStrictAlign),
53  cl::values(
54  clEnumValN(StrictAlign, "aarch64-strict-align",
55  "Disallow all unaligned memory accesses"),
56  clEnumValN(NoStrictAlign, "aarch64-no-strict-align",
57  "Allow unaligned memory accesses"),
58  clEnumValEnd));
59 
60 // Place holder until extr generation is tested fully.
61 static cl::opt<bool>
62 EnableAArch64ExtrGeneration("aarch64-extr-generation", cl::Hidden,
63  cl::desc("Allow AArch64 (or (shift)(shift))->extract"),
64  cl::init(true));
65 
66 static cl::opt<bool>
67 EnableAArch64SlrGeneration("aarch64-shift-insert-generation", cl::Hidden,
68  cl::desc("Allow AArch64 SLI/SRI formation"),
69  cl::init(false));
70 
71 // FIXME: The necessary dtprel relocations don't seem to be supported
72 // well in the GNU bfd and gold linkers at the moment. Therefore, by
73 // default, for now, fall back to GeneralDynamic code generation.
75  "aarch64-elf-ldtls-generation", cl::Hidden,
76  cl::desc("Allow AArch64 Local Dynamic TLS code generation"),
77  cl::init(false));
78 
80  const AArch64Subtarget &STI)
81  : TargetLowering(TM), Subtarget(&STI) {
82 
83  // AArch64 doesn't have comparisons which set GPRs or setcc instructions, so
84  // we have to make something up. Arbitrarily, choose ZeroOrOne.
86  // When comparing vectors the result sets the different elements in the
87  // vector to all-one or all-zero.
89 
90  // Set up the register classes.
91  addRegisterClass(MVT::i32, &AArch64::GPR32allRegClass);
92  addRegisterClass(MVT::i64, &AArch64::GPR64allRegClass);
93 
94  if (Subtarget->hasFPARMv8()) {
95  addRegisterClass(MVT::f16, &AArch64::FPR16RegClass);
96  addRegisterClass(MVT::f32, &AArch64::FPR32RegClass);
97  addRegisterClass(MVT::f64, &AArch64::FPR64RegClass);
98  addRegisterClass(MVT::f128, &AArch64::FPR128RegClass);
99  }
100 
101  if (Subtarget->hasNEON()) {
102  addRegisterClass(MVT::v16i8, &AArch64::FPR8RegClass);
103  addRegisterClass(MVT::v8i16, &AArch64::FPR16RegClass);
104  // Someone set us up the NEON.
105  addDRTypeForNEON(MVT::v2f32);
106  addDRTypeForNEON(MVT::v8i8);
107  addDRTypeForNEON(MVT::v4i16);
108  addDRTypeForNEON(MVT::v2i32);
109  addDRTypeForNEON(MVT::v1i64);
110  addDRTypeForNEON(MVT::v1f64);
111  addDRTypeForNEON(MVT::v4f16);
112 
113  addQRTypeForNEON(MVT::v4f32);
114  addQRTypeForNEON(MVT::v2f64);
115  addQRTypeForNEON(MVT::v16i8);
116  addQRTypeForNEON(MVT::v8i16);
117  addQRTypeForNEON(MVT::v4i32);
118  addQRTypeForNEON(MVT::v2i64);
119  addQRTypeForNEON(MVT::v8f16);
120  }
121 
122  // Compute derived properties from the register classes
124 
125  // Provide all sorts of operation actions
147 
151 
155 
156  // Custom lowering hooks are needed for XOR
157  // to fold it into CSINC/CSINV.
160 
161  // Virtually no operation on f128 is legal, but LLVM can't expand them when
162  // there's a valid register class, so we need custom operations in most cases.
184 
185  // Lowering for many of the conversions is actually specified by the non-f128
186  // type. The LowerXXX function will be trivial when f128 isn't involved.
201 
202  // Variable arguments.
207 
208  // Variable-sized objects.
212 
213  // Exception handling.
214  // FIXME: These are guesses. Has this been defined yet?
215  setExceptionPointerRegister(AArch64::X0);
216  setExceptionSelectorRegister(AArch64::X1);
217 
218  // Constant pool entries
220 
221  // BlockAddress
223 
224  // Add/Sub overflow ops with MVT::Glues are lowered to NZCV dependences.
233 
234  // AArch64 lacks both left-rotate and popcount instructions.
237 
238  // AArch64 doesn't have {U|S}MUL_LOHI.
241 
242 
243  // Expand the undefined-at-zero variants to cttz/ctlz to their defined-at-zero
244  // counterparts, which AArch64 supports directly.
249 
252 
261 
262  // Custom lower Add/Sub/Mul with overflow.
275 
284 
285  // f16 is a storage-only type, always promote it to f32.
318 
319  // v4f16 is also a storage-only type, so promote it to v4f32 when that is
320  // known to be safe.
333 
334  // Expand all other v4f16 operations.
335  // FIXME: We could generate better code by promoting some operations to
336  // a pair of v4f32s
363 
364 
365  // v8f16 is also a storage-only type, so expand it.
397 
398  // AArch64 has implementations of a lot of rounding-like FP operations.
399  for (MVT Ty : {MVT::f32, MVT::f64}) {
406  }
407 
409 
410  if (Subtarget->isTargetMachO()) {
411  // For iOS, we don't want to the normal expansion of a libcall to
412  // sincos. We want to issue a libcall to __sincos_stret to avoid memory
413  // traffic.
416  } else {
419  }
420 
421  // Make floating-point constants legal for the large code model, so they don't
422  // become loads from the constant pool.
423  if (Subtarget->isTargetMachO() && TM.getCodeModel() == CodeModel::Large) {
426  }
427 
428  // AArch64 does not have floating-point extending loads, i1 sign-extending
429  // load, floating-point truncating stores, or v2i32->v2i16 truncating store.
430  for (MVT VT : MVT::fp_valuetypes()) {
435  }
436  for (MVT VT : MVT::integer_valuetypes())
438 
446 
449 
450  // Indexed loads and stores are supported.
451  for (unsigned im = (unsigned)ISD::PRE_INC;
465  }
466 
467  // Trap.
469 
470  // We combine OR nodes for bitfield operations.
472 
473  // Vector add and sub nodes may conceal a high-half opportunity.
474  // Also, try to fold ADD into CSINC/CSINV..
477 
481 
483 
490 
492 
496 
500 
504 
506 
508 
509  // Enable TBZ/TBNZ
511  EnableExtLdPromotion = true;
512 
514 
515  RequireStrictAlign = (Align == StrictAlign);
516 
517  setHasExtractBitsInsn(true);
518 
519  if (Subtarget->hasNEON()) {
520  // FIXME: v1f64 shouldn't be legal if we can avoid it, because it leads to
521  // silliness like this:
547 
553 
555 
556  // AArch64 doesn't have a direct vector ->f32 conversion instructions for
557  // elements smaller than i32, so promote the input to i32 first.
562  // i8 and i16 vector elements also need promotion to i32 for v8i8 or v8i16
563  // -> v8f16 conversions.
568  // Similarly, there is no direct i32 -> f64 vector conversion instruction.
573  // Or, direct i32 -> f16 vector conversion. Set it so custom, so the
574  // conversion happens in two steps: v4i32 -> v4f32 -> v4f16
577 
578  // AArch64 doesn't have MUL.2d:
580  // Custom handling for some quad-vector types to detect MULL.
584 
587  // Likewise, narrowing and extending vector loads/stores aren't handled
588  // directly.
589  for (MVT VT : MVT::vector_valuetypes()) {
591 
596 
598 
599  for (MVT InnerVT : MVT::vector_valuetypes()) {
600  setTruncStoreAction(VT, InnerVT, Expand);
601  setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand);
602  setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand);
603  setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
604  }
605  }
606 
607  // AArch64 has implementations of a lot of rounding-like FP operations.
608  for (MVT Ty : {MVT::v2f32, MVT::v4f32, MVT::v2f64}) {
615  }
616  }
617 
618  // Prefer likely predicted branches to selects on out-of-order cores.
619  if (Subtarget->isCortexA57())
621 }
622 
623 void AArch64TargetLowering::addTypeForNEON(EVT VT, EVT PromotedBitwiseVT) {
624  if (VT == MVT::v2f32 || VT == MVT::v4f16) {
627 
630  } else if (VT == MVT::v2f64 || VT == MVT::v4f32 || VT == MVT::v8f16) {
633 
636  }
637 
638  // Mark vector float intrinsics as expand.
639  if (VT == MVT::v2f32 || VT == MVT::v4f32 || VT == MVT::v2f64) {
649  }
650 
663 
667  for (MVT InnerVT : MVT::all_valuetypes())
669 
670  // CNT supports only B element sizes.
671  if (VT != MVT::v8i8 && VT != MVT::v16i8)
673 
679 
682 
683  // [SU][MIN|MAX] are available for all NEON types apart from i64.
684  if (!VT.isFloatingPoint() &&
685  VT.getSimpleVT() != MVT::v2i64 && VT.getSimpleVT() != MVT::v1i64)
686  for (unsigned Opcode : {ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX})
687  setOperationAction(Opcode, VT.getSimpleVT(), Legal);
688 
689  if (Subtarget->isLittleEndian()) {
690  for (unsigned im = (unsigned)ISD::PRE_INC;
694  }
695  }
696 }
697 
698 void AArch64TargetLowering::addDRTypeForNEON(MVT VT) {
699  addRegisterClass(VT, &AArch64::FPR64RegClass);
700  addTypeForNEON(VT, MVT::v2i32);
701 }
702 
703 void AArch64TargetLowering::addQRTypeForNEON(MVT VT) {
704  addRegisterClass(VT, &AArch64::FPR128RegClass);
705  addTypeForNEON(VT, MVT::v4i32);
706 }
707 
709  EVT VT) const {
710  if (!VT.isVector())
711  return MVT::i32;
713 }
714 
715 /// computeKnownBitsForTargetNode - Determine which of the bits specified in
716 /// Mask are known to be either zero or one and return them in the
717 /// KnownZero/KnownOne bitsets.
719  const SDValue Op, APInt &KnownZero, APInt &KnownOne,
720  const SelectionDAG &DAG, unsigned Depth) const {
721  switch (Op.getOpcode()) {
722  default:
723  break;
724  case AArch64ISD::CSEL: {
725  APInt KnownZero2, KnownOne2;
726  DAG.computeKnownBits(Op->getOperand(0), KnownZero, KnownOne, Depth + 1);
727  DAG.computeKnownBits(Op->getOperand(1), KnownZero2, KnownOne2, Depth + 1);
728  KnownZero &= KnownZero2;
729  KnownOne &= KnownOne2;
730  break;
731  }
732  case ISD::INTRINSIC_W_CHAIN: {
733  ConstantSDNode *CN = cast<ConstantSDNode>(Op->getOperand(1));
734  Intrinsic::ID IntID = static_cast<Intrinsic::ID>(CN->getZExtValue());
735  switch (IntID) {
736  default: return;
737  case Intrinsic::aarch64_ldaxr:
738  case Intrinsic::aarch64_ldxr: {
739  unsigned BitWidth = KnownOne.getBitWidth();
740  EVT VT = cast<MemIntrinsicSDNode>(Op)->getMemoryVT();
741  unsigned MemBits = VT.getScalarType().getSizeInBits();
742  KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - MemBits);
743  return;
744  }
745  }
746  break;
747  }
749  case ISD::INTRINSIC_VOID: {
750  unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
751  switch (IntNo) {
752  default:
753  break;
754  case Intrinsic::aarch64_neon_umaxv:
755  case Intrinsic::aarch64_neon_uminv: {
756  // Figure out the datatype of the vector operand. The UMINV instruction
757  // will zero extend the result, so we can mark as known zero all the
758  // bits larger than the element datatype. 32-bit or larget doesn't need
759  // this as those are legal types and will be handled by isel directly.
760  MVT VT = Op.getOperand(1).getValueType().getSimpleVT();
761  unsigned BitWidth = KnownZero.getBitWidth();
762  if (VT == MVT::v8i8 || VT == MVT::v16i8) {
763  assert(BitWidth >= 8 && "Unexpected width!");
764  APInt Mask = APInt::getHighBitsSet(BitWidth, BitWidth - 8);
765  KnownZero |= Mask;
766  } else if (VT == MVT::v4i16 || VT == MVT::v8i16) {
767  assert(BitWidth >= 16 && "Unexpected width!");
768  APInt Mask = APInt::getHighBitsSet(BitWidth, BitWidth - 16);
769  KnownZero |= Mask;
770  }
771  break;
772  } break;
773  }
774  }
775  }
776 }
777 
779  EVT) const {
780  return MVT::i64;
781 }
782 
783 FastISel *
785  const TargetLibraryInfo *libInfo) const {
786  return AArch64::createFastISel(funcInfo, libInfo);
787 }
788 
789 const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
790  switch ((AArch64ISD::NodeType)Opcode) {
791  case AArch64ISD::FIRST_NUMBER: break;
792  case AArch64ISD::CALL: return "AArch64ISD::CALL";
793  case AArch64ISD::ADRP: return "AArch64ISD::ADRP";
794  case AArch64ISD::ADDlow: return "AArch64ISD::ADDlow";
795  case AArch64ISD::LOADgot: return "AArch64ISD::LOADgot";
796  case AArch64ISD::RET_FLAG: return "AArch64ISD::RET_FLAG";
797  case AArch64ISD::BRCOND: return "AArch64ISD::BRCOND";
798  case AArch64ISD::CSEL: return "AArch64ISD::CSEL";
799  case AArch64ISD::FCSEL: return "AArch64ISD::FCSEL";
800  case AArch64ISD::CSINV: return "AArch64ISD::CSINV";
801  case AArch64ISD::CSNEG: return "AArch64ISD::CSNEG";
802  case AArch64ISD::CSINC: return "AArch64ISD::CSINC";
803  case AArch64ISD::THREAD_POINTER: return "AArch64ISD::THREAD_POINTER";
804  case AArch64ISD::TLSDESC_CALLSEQ: return "AArch64ISD::TLSDESC_CALLSEQ";
805  case AArch64ISD::ADC: return "AArch64ISD::ADC";
806  case AArch64ISD::SBC: return "AArch64ISD::SBC";
807  case AArch64ISD::ADDS: return "AArch64ISD::ADDS";
808  case AArch64ISD::SUBS: return "AArch64ISD::SUBS";
809  case AArch64ISD::ADCS: return "AArch64ISD::ADCS";
810  case AArch64ISD::SBCS: return "AArch64ISD::SBCS";
811  case AArch64ISD::ANDS: return "AArch64ISD::ANDS";
812  case AArch64ISD::FCMP: return "AArch64ISD::FCMP";
813  case AArch64ISD::FMIN: return "AArch64ISD::FMIN";
814  case AArch64ISD::FMAX: return "AArch64ISD::FMAX";
815  case AArch64ISD::DUP: return "AArch64ISD::DUP";
816  case AArch64ISD::DUPLANE8: return "AArch64ISD::DUPLANE8";
817  case AArch64ISD::DUPLANE16: return "AArch64ISD::DUPLANE16";
818  case AArch64ISD::DUPLANE32: return "AArch64ISD::DUPLANE32";
819  case AArch64ISD::DUPLANE64: return "AArch64ISD::DUPLANE64";
820  case AArch64ISD::MOVI: return "AArch64ISD::MOVI";
821  case AArch64ISD::MOVIshift: return "AArch64ISD::MOVIshift";
822  case AArch64ISD::MOVIedit: return "AArch64ISD::MOVIedit";
823  case AArch64ISD::MOVImsl: return "AArch64ISD::MOVImsl";
824  case AArch64ISD::FMOV: return "AArch64ISD::FMOV";
825  case AArch64ISD::MVNIshift: return "AArch64ISD::MVNIshift";
826  case AArch64ISD::MVNImsl: return "AArch64ISD::MVNImsl";
827  case AArch64ISD::BICi: return "AArch64ISD::BICi";
828  case AArch64ISD::ORRi: return "AArch64ISD::ORRi";
829  case AArch64ISD::BSL: return "AArch64ISD::BSL";
830  case AArch64ISD::NEG: return "AArch64ISD::NEG";
831  case AArch64ISD::EXTR: return "AArch64ISD::EXTR";
832  case AArch64ISD::ZIP1: return "AArch64ISD::ZIP1";
833  case AArch64ISD::ZIP2: return "AArch64ISD::ZIP2";
834  case AArch64ISD::UZP1: return "AArch64ISD::UZP1";
835  case AArch64ISD::UZP2: return "AArch64ISD::UZP2";
836  case AArch64ISD::TRN1: return "AArch64ISD::TRN1";
837  case AArch64ISD::TRN2: return "AArch64ISD::TRN2";
838  case AArch64ISD::REV16: return "AArch64ISD::REV16";
839  case AArch64ISD::REV32: return "AArch64ISD::REV32";
840  case AArch64ISD::REV64: return "AArch64ISD::REV64";
841  case AArch64ISD::EXT: return "AArch64ISD::EXT";
842  case AArch64ISD::VSHL: return "AArch64ISD::VSHL";
843  case AArch64ISD::VLSHR: return "AArch64ISD::VLSHR";
844  case AArch64ISD::VASHR: return "AArch64ISD::VASHR";
845  case AArch64ISD::CMEQ: return "AArch64ISD::CMEQ";
846  case AArch64ISD::CMGE: return "AArch64ISD::CMGE";
847  case AArch64ISD::CMGT: return "AArch64ISD::CMGT";
848  case AArch64ISD::CMHI: return "AArch64ISD::CMHI";
849  case AArch64ISD::CMHS: return "AArch64ISD::CMHS";
850  case AArch64ISD::FCMEQ: return "AArch64ISD::FCMEQ";
851  case AArch64ISD::FCMGE: return "AArch64ISD::FCMGE";
852  case AArch64ISD::FCMGT: return "AArch64ISD::FCMGT";
853  case AArch64ISD::CMEQz: return "AArch64ISD::CMEQz";
854  case AArch64ISD::CMGEz: return "AArch64ISD::CMGEz";
855  case AArch64ISD::CMGTz: return "AArch64ISD::CMGTz";
856  case AArch64ISD::CMLEz: return "AArch64ISD::CMLEz";
857  case AArch64ISD::CMLTz: return "AArch64ISD::CMLTz";
858  case AArch64ISD::FCMEQz: return "AArch64ISD::FCMEQz";
859  case AArch64ISD::FCMGEz: return "AArch64ISD::FCMGEz";
860  case AArch64ISD::FCMGTz: return "AArch64ISD::FCMGTz";
861  case AArch64ISD::FCMLEz: return "AArch64ISD::FCMLEz";
862  case AArch64ISD::FCMLTz: return "AArch64ISD::FCMLTz";
863  case AArch64ISD::SADDV: return "AArch64ISD::SADDV";
864  case AArch64ISD::UADDV: return "AArch64ISD::UADDV";
865  case AArch64ISD::SMINV: return "AArch64ISD::SMINV";
866  case AArch64ISD::UMINV: return "AArch64ISD::UMINV";
867  case AArch64ISD::SMAXV: return "AArch64ISD::SMAXV";
868  case AArch64ISD::UMAXV: return "AArch64ISD::UMAXV";
869  case AArch64ISD::NOT: return "AArch64ISD::NOT";
870  case AArch64ISD::BIT: return "AArch64ISD::BIT";
871  case AArch64ISD::CBZ: return "AArch64ISD::CBZ";
872  case AArch64ISD::CBNZ: return "AArch64ISD::CBNZ";
873  case AArch64ISD::TBZ: return "AArch64ISD::TBZ";
874  case AArch64ISD::TBNZ: return "AArch64ISD::TBNZ";
875  case AArch64ISD::TC_RETURN: return "AArch64ISD::TC_RETURN";
876  case AArch64ISD::PREFETCH: return "AArch64ISD::PREFETCH";
877  case AArch64ISD::SITOF: return "AArch64ISD::SITOF";
878  case AArch64ISD::UITOF: return "AArch64ISD::UITOF";
879  case AArch64ISD::NVCAST: return "AArch64ISD::NVCAST";
880  case AArch64ISD::SQSHL_I: return "AArch64ISD::SQSHL_I";
881  case AArch64ISD::UQSHL_I: return "AArch64ISD::UQSHL_I";
882  case AArch64ISD::SRSHR_I: return "AArch64ISD::SRSHR_I";
883  case AArch64ISD::URSHR_I: return "AArch64ISD::URSHR_I";
884  case AArch64ISD::SQSHLU_I: return "AArch64ISD::SQSHLU_I";
885  case AArch64ISD::WrapperLarge: return "AArch64ISD::WrapperLarge";
886  case AArch64ISD::LD2post: return "AArch64ISD::LD2post";
887  case AArch64ISD::LD3post: return "AArch64ISD::LD3post";
888  case AArch64ISD::LD4post: return "AArch64ISD::LD4post";
889  case AArch64ISD::ST2post: return "AArch64ISD::ST2post";
890  case AArch64ISD::ST3post: return "AArch64ISD::ST3post";
891  case AArch64ISD::ST4post: return "AArch64ISD::ST4post";
892  case AArch64ISD::LD1x2post: return "AArch64ISD::LD1x2post";
893  case AArch64ISD::LD1x3post: return "AArch64ISD::LD1x3post";
894  case AArch64ISD::LD1x4post: return "AArch64ISD::LD1x4post";
895  case AArch64ISD::ST1x2post: return "AArch64ISD::ST1x2post";
896  case AArch64ISD::ST1x3post: return "AArch64ISD::ST1x3post";
897  case AArch64ISD::ST1x4post: return "AArch64ISD::ST1x4post";
898  case AArch64ISD::LD1DUPpost: return "AArch64ISD::LD1DUPpost";
899  case AArch64ISD::LD2DUPpost: return "AArch64ISD::LD2DUPpost";
900  case AArch64ISD::LD3DUPpost: return "AArch64ISD::LD3DUPpost";
901  case AArch64ISD::LD4DUPpost: return "AArch64ISD::LD4DUPpost";
902  case AArch64ISD::LD1LANEpost: return "AArch64ISD::LD1LANEpost";
903  case AArch64ISD::LD2LANEpost: return "AArch64ISD::LD2LANEpost";
904  case AArch64ISD::LD3LANEpost: return "AArch64ISD::LD3LANEpost";
905  case AArch64ISD::LD4LANEpost: return "AArch64ISD::LD4LANEpost";
906  case AArch64ISD::ST2LANEpost: return "AArch64ISD::ST2LANEpost";
907  case AArch64ISD::ST3LANEpost: return "AArch64ISD::ST3LANEpost";
908  case AArch64ISD::ST4LANEpost: return "AArch64ISD::ST4LANEpost";
909  case AArch64ISD::SMULL: return "AArch64ISD::SMULL";
910  case AArch64ISD::UMULL: return "AArch64ISD::UMULL";
911  }
912  return nullptr;
913 }
914 
917  MachineBasicBlock *MBB) const {
918  // We materialise the F128CSEL pseudo-instruction as some control flow and a
919  // phi node:
920 
921  // OrigBB:
922  // [... previous instrs leading to comparison ...]
923  // b.ne TrueBB
924  // b EndBB
925  // TrueBB:
926  // ; Fallthrough
927  // EndBB:
928  // Dest = PHI [IfTrue, TrueBB], [IfFalse, OrigBB]
929 
930  MachineFunction *MF = MBB->getParent();
931  const TargetInstrInfo *TII = Subtarget->getInstrInfo();
932  const BasicBlock *LLVM_BB = MBB->getBasicBlock();
933  DebugLoc DL = MI->getDebugLoc();
934  MachineFunction::iterator It = MBB;
935  ++It;
936 
937  unsigned DestReg = MI->getOperand(0).getReg();
938  unsigned IfTrueReg = MI->getOperand(1).getReg();
939  unsigned IfFalseReg = MI->getOperand(2).getReg();
940  unsigned CondCode = MI->getOperand(3).getImm();
941  bool NZCVKilled = MI->getOperand(4).isKill();
942 
943  MachineBasicBlock *TrueBB = MF->CreateMachineBasicBlock(LLVM_BB);
944  MachineBasicBlock *EndBB = MF->CreateMachineBasicBlock(LLVM_BB);
945  MF->insert(It, TrueBB);
946  MF->insert(It, EndBB);
947 
948  // Transfer rest of current basic-block to EndBB
949  EndBB->splice(EndBB->begin(), MBB, std::next(MachineBasicBlock::iterator(MI)),
950  MBB->end());
952 
953  BuildMI(MBB, DL, TII->get(AArch64::Bcc)).addImm(CondCode).addMBB(TrueBB);
954  BuildMI(MBB, DL, TII->get(AArch64::B)).addMBB(EndBB);
955  MBB->addSuccessor(TrueBB);
956  MBB->addSuccessor(EndBB);
957 
958  // TrueBB falls through to the end.
959  TrueBB->addSuccessor(EndBB);
960 
961  if (!NZCVKilled) {
962  TrueBB->addLiveIn(AArch64::NZCV);
963  EndBB->addLiveIn(AArch64::NZCV);
964  }
965 
966  BuildMI(*EndBB, EndBB->begin(), DL, TII->get(AArch64::PHI), DestReg)
967  .addReg(IfTrueReg)
968  .addMBB(TrueBB)
969  .addReg(IfFalseReg)
970  .addMBB(MBB);
971 
972  MI->eraseFromParent();
973  return EndBB;
974 }
975 
978  MachineBasicBlock *BB) const {
979  switch (MI->getOpcode()) {
980  default:
981 #ifndef NDEBUG
982  MI->dump();
983 #endif
984  llvm_unreachable("Unexpected instruction for custom inserter!");
985 
986  case AArch64::F128CSEL:
987  return EmitF128CSEL(MI, BB);
988 
991  return emitPatchPoint(MI, BB);
992  }
993 }
994 
995 //===----------------------------------------------------------------------===//
996 // AArch64 Lowering private implementation.
997 //===----------------------------------------------------------------------===//
998 
999 //===----------------------------------------------------------------------===//
1000 // Lowering Code
1001 //===----------------------------------------------------------------------===//
1002 
1003 /// changeIntCCToAArch64CC - Convert a DAG integer condition code to an AArch64
1004 /// CC
1006  switch (CC) {
1007  default:
1008  llvm_unreachable("Unknown condition code!");
1009  case ISD::SETNE:
1010  return AArch64CC::NE;
1011  case ISD::SETEQ:
1012  return AArch64CC::EQ;
1013  case ISD::SETGT:
1014  return AArch64CC::GT;
1015  case ISD::SETGE:
1016  return AArch64CC::GE;
1017  case ISD::SETLT:
1018  return AArch64CC::LT;
1019  case ISD::SETLE:
1020  return AArch64CC::LE;
1021  case ISD::SETUGT:
1022  return AArch64CC::HI;
1023  case ISD::SETUGE:
1024  return AArch64CC::HS;
1025  case ISD::SETULT:
1026  return AArch64CC::LO;
1027  case ISD::SETULE:
1028  return AArch64CC::LS;
1029  }
1030 }
1031 
1032 /// changeFPCCToAArch64CC - Convert a DAG fp condition code to an AArch64 CC.
1035  AArch64CC::CondCode &CondCode2) {
1036  CondCode2 = AArch64CC::AL;
1037  switch (CC) {
1038  default:
1039  llvm_unreachable("Unknown FP condition!");
1040  case ISD::SETEQ:
1041  case ISD::SETOEQ:
1042  CondCode = AArch64CC::EQ;
1043  break;
1044  case ISD::SETGT:
1045  case ISD::SETOGT:
1046  CondCode = AArch64CC::GT;
1047  break;
1048  case ISD::SETGE:
1049  case ISD::SETOGE:
1050  CondCode = AArch64CC::GE;
1051  break;
1052  case ISD::SETOLT:
1053  CondCode = AArch64CC::MI;
1054  break;
1055  case ISD::SETOLE:
1056  CondCode = AArch64CC::LS;
1057  break;
1058  case ISD::SETONE:
1059  CondCode = AArch64CC::MI;
1060  CondCode2 = AArch64CC::GT;
1061  break;
1062  case ISD::SETO:
1063  CondCode = AArch64CC::VC;
1064  break;
1065  case ISD::SETUO:
1066  CondCode = AArch64CC::VS;
1067  break;
1068  case ISD::SETUEQ:
1069  CondCode = AArch64CC::EQ;
1070  CondCode2 = AArch64CC::VS;
1071  break;
1072  case ISD::SETUGT:
1073  CondCode = AArch64CC::HI;
1074  break;
1075  case ISD::SETUGE:
1076  CondCode = AArch64CC::PL;
1077  break;
1078  case ISD::SETLT:
1079  case ISD::SETULT:
1080  CondCode = AArch64CC::LT;
1081  break;
1082  case ISD::SETLE:
1083  case ISD::SETULE:
1084  CondCode = AArch64CC::LE;
1085  break;
1086  case ISD::SETNE:
1087  case ISD::SETUNE:
1088  CondCode = AArch64CC::NE;
1089  break;
1090  }
1091 }
1092 
1093 /// changeVectorFPCCToAArch64CC - Convert a DAG fp condition code to an AArch64
1094 /// CC usable with the vector instructions. Fewer operations are available
1095 /// without a real NZCV register, so we have to use less efficient combinations
1096 /// to get the same effect.
1099  AArch64CC::CondCode &CondCode2,
1100  bool &Invert) {
1101  Invert = false;
1102  switch (CC) {
1103  default:
1104  // Mostly the scalar mappings work fine.
1105  changeFPCCToAArch64CC(CC, CondCode, CondCode2);
1106  break;
1107  case ISD::SETUO:
1108  Invert = true; // Fallthrough
1109  case ISD::SETO:
1110  CondCode = AArch64CC::MI;
1111  CondCode2 = AArch64CC::GE;
1112  break;
1113  case ISD::SETUEQ:
1114  case ISD::SETULT:
1115  case ISD::SETULE:
1116  case ISD::SETUGT:
1117  case ISD::SETUGE:
1118  // All of the compare-mask comparisons are ordered, but we can switch
1119  // between the two by a double inversion. E.g. ULE == !OGT.
1120  Invert = true;
1121  changeFPCCToAArch64CC(getSetCCInverse(CC, false), CondCode, CondCode2);
1122  break;
1123  }
1124 }
1125 
1126 static bool isLegalArithImmed(uint64_t C) {
1127  // Matches AArch64DAGToDAGISel::SelectArithImmed().
1128  return (C >> 12 == 0) || ((C & 0xFFFULL) == 0 && C >> 24 == 0);
1129 }
1130 
1132  SDLoc dl, SelectionDAG &DAG) {
1133  EVT VT = LHS.getValueType();
1134 
1135  if (VT.isFloatingPoint())
1136  return DAG.getNode(AArch64ISD::FCMP, dl, VT, LHS, RHS);
1137 
1138  // The CMP instruction is just an alias for SUBS, and representing it as
1139  // SUBS means that it's possible to get CSE with subtract operations.
1140  // A later phase can perform the optimization of setting the destination
1141  // register to WZR/XZR if it ends up being unused.
1142  unsigned Opcode = AArch64ISD::SUBS;
1143 
1144  if (RHS.getOpcode() == ISD::SUB && isa<ConstantSDNode>(RHS.getOperand(0)) &&
1145  cast<ConstantSDNode>(RHS.getOperand(0))->getZExtValue() == 0 &&
1146  (CC == ISD::SETEQ || CC == ISD::SETNE)) {
1147  // We'd like to combine a (CMP op1, (sub 0, op2) into a CMN instruction on
1148  // the grounds that "op1 - (-op2) == op1 + op2". However, the C and V flags
1149  // can be set differently by this operation. It comes down to whether
1150  // "SInt(~op2)+1 == SInt(~op2+1)" (and the same for UInt). If they are then
1151  // everything is fine. If not then the optimization is wrong. Thus general
1152  // comparisons are only valid if op2 != 0.
1153 
1154  // So, finally, the only LLVM-native comparisons that don't mention C and V
1155  // are SETEQ and SETNE. They're the only ones we can safely use CMN for in
1156  // the absence of information about op2.
1157  Opcode = AArch64ISD::ADDS;
1158  RHS = RHS.getOperand(1);
1159  } else if (LHS.getOpcode() == ISD::AND && isa<ConstantSDNode>(RHS) &&
1160  cast<ConstantSDNode>(RHS)->getZExtValue() == 0 &&
1161  !isUnsignedIntSetCC(CC)) {
1162  // Similarly, (CMP (and X, Y), 0) can be implemented with a TST
1163  // (a.k.a. ANDS) except that the flags are only guaranteed to work for one
1164  // of the signed comparisons.
1165  Opcode = AArch64ISD::ANDS;
1166  RHS = LHS.getOperand(1);
1167  LHS = LHS.getOperand(0);
1168  }
1169 
1170  return DAG.getNode(Opcode, dl, DAG.getVTList(VT, MVT::i32), LHS, RHS)
1171  .getValue(1);
1172 }
1173 
1175  SDValue &AArch64cc, SelectionDAG &DAG, SDLoc dl) {
1176  SDValue Cmp;
1177  AArch64CC::CondCode AArch64CC;
1178  if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS.getNode())) {
1179  EVT VT = RHS.getValueType();
1180  uint64_t C = RHSC->getZExtValue();
1181  if (!isLegalArithImmed(C)) {
1182  // Constant does not fit, try adjusting it by one?
1183  switch (CC) {
1184  default:
1185  break;
1186  case ISD::SETLT:
1187  case ISD::SETGE:
1188  if ((VT == MVT::i32 && C != 0x80000000 &&
1189  isLegalArithImmed((uint32_t)(C - 1))) ||
1190  (VT == MVT::i64 && C != 0x80000000ULL &&
1191  isLegalArithImmed(C - 1ULL))) {
1192  CC = (CC == ISD::SETLT) ? ISD::SETLE : ISD::SETGT;
1193  C = (VT == MVT::i32) ? (uint32_t)(C - 1) : C - 1;
1194  RHS = DAG.getConstant(C, dl, VT);
1195  }
1196  break;
1197  case ISD::SETULT:
1198  case ISD::SETUGE:
1199  if ((VT == MVT::i32 && C != 0 &&
1200  isLegalArithImmed((uint32_t)(C - 1))) ||
1201  (VT == MVT::i64 && C != 0ULL && isLegalArithImmed(C - 1ULL))) {
1202  CC = (CC == ISD::SETULT) ? ISD::SETULE : ISD::SETUGT;
1203  C = (VT == MVT::i32) ? (uint32_t)(C - 1) : C - 1;
1204  RHS = DAG.getConstant(C, dl, VT);
1205  }
1206  break;
1207  case ISD::SETLE:
1208  case ISD::SETGT:
1209  if ((VT == MVT::i32 && C != INT32_MAX &&
1210  isLegalArithImmed((uint32_t)(C + 1))) ||
1211  (VT == MVT::i64 && C != INT64_MAX &&
1212  isLegalArithImmed(C + 1ULL))) {
1213  CC = (CC == ISD::SETLE) ? ISD::SETLT : ISD::SETGE;
1214  C = (VT == MVT::i32) ? (uint32_t)(C + 1) : C + 1;
1215  RHS = DAG.getConstant(C, dl, VT);
1216  }
1217  break;
1218  case ISD::SETULE:
1219  case ISD::SETUGT:
1220  if ((VT == MVT::i32 && C != UINT32_MAX &&
1221  isLegalArithImmed((uint32_t)(C + 1))) ||
1222  (VT == MVT::i64 && C != UINT64_MAX &&
1223  isLegalArithImmed(C + 1ULL))) {
1224  CC = (CC == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE;
1225  C = (VT == MVT::i32) ? (uint32_t)(C + 1) : C + 1;
1226  RHS = DAG.getConstant(C, dl, VT);
1227  }
1228  break;
1229  }
1230  }
1231  }
1232  // The imm operand of ADDS is an unsigned immediate, in the range 0 to 4095.
1233  // For the i8 operand, the largest immediate is 255, so this can be easily
1234  // encoded in the compare instruction. For the i16 operand, however, the
1235  // largest immediate cannot be encoded in the compare.
1236  // Therefore, use a sign extending load and cmn to avoid materializing the -1
1237  // constant. For example,
1238  // movz w1, #65535
1239  // ldrh w0, [x0, #0]
1240  // cmp w0, w1
1241  // >
1242  // ldrsh w0, [x0, #0]
1243  // cmn w0, #1
1244  // Fundamental, we're relying on the property that (zext LHS) == (zext RHS)
1245  // if and only if (sext LHS) == (sext RHS). The checks are in place to ensure
1246  // both the LHS and RHS are truely zero extended and to make sure the
1247  // transformation is profitable.
1248  if ((CC == ISD::SETEQ || CC == ISD::SETNE) && isa<ConstantSDNode>(RHS)) {
1249  if ((cast<ConstantSDNode>(RHS)->getZExtValue() >> 16 == 0) &&
1250  isa<LoadSDNode>(LHS)) {
1251  if (cast<LoadSDNode>(LHS)->getExtensionType() == ISD::ZEXTLOAD &&
1252  cast<LoadSDNode>(LHS)->getMemoryVT() == MVT::i16 &&
1253  LHS.getNode()->hasNUsesOfValue(1, 0)) {
1254  int16_t ValueofRHS = cast<ConstantSDNode>(RHS)->getZExtValue();
1255  if (ValueofRHS < 0 && isLegalArithImmed(-ValueofRHS)) {
1256  SDValue SExt =
1257  DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, LHS.getValueType(), LHS,
1258  DAG.getValueType(MVT::i16));
1259  Cmp = emitComparison(SExt,
1260  DAG.getConstant(ValueofRHS, dl,
1261  RHS.getValueType()),
1262  CC, dl, DAG);
1263  AArch64CC = changeIntCCToAArch64CC(CC);
1264  AArch64cc = DAG.getConstant(AArch64CC, dl, MVT::i32);
1265  return Cmp;
1266  }
1267  }
1268  }
1269  }
1270  Cmp = emitComparison(LHS, RHS, CC, dl, DAG);
1271  AArch64CC = changeIntCCToAArch64CC(CC);
1272  AArch64cc = DAG.getConstant(AArch64CC, dl, MVT::i32);
1273  return Cmp;
1274 }
1275 
1276 static std::pair<SDValue, SDValue>
1278  assert((Op.getValueType() == MVT::i32 || Op.getValueType() == MVT::i64) &&
1279  "Unsupported value type");
1280  SDValue Value, Overflow;
1281  SDLoc DL(Op);
1282  SDValue LHS = Op.getOperand(0);
1283  SDValue RHS = Op.getOperand(1);
1284  unsigned Opc = 0;
1285  switch (Op.getOpcode()) {
1286  default:
1287  llvm_unreachable("Unknown overflow instruction!");
1288  case ISD::SADDO:
1289  Opc = AArch64ISD::ADDS;
1290  CC = AArch64CC::VS;
1291  break;
1292  case ISD::UADDO:
1293  Opc = AArch64ISD::ADDS;
1294  CC = AArch64CC::HS;
1295  break;
1296  case ISD::SSUBO:
1297  Opc = AArch64ISD::SUBS;
1298  CC = AArch64CC::VS;
1299  break;
1300  case ISD::USUBO:
1301  Opc = AArch64ISD::SUBS;
1302  CC = AArch64CC::LO;
1303  break;
1304  // Multiply needs a little bit extra work.
1305  case ISD::SMULO:
1306  case ISD::UMULO: {
1307  CC = AArch64CC::NE;
1308  bool IsSigned = Op.getOpcode() == ISD::SMULO;
1309  if (Op.getValueType() == MVT::i32) {
1310  unsigned ExtendOpc = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
1311  // For a 32 bit multiply with overflow check we want the instruction
1312  // selector to generate a widening multiply (SMADDL/UMADDL). For that we
1313  // need to generate the following pattern:
1314  // (i64 add 0, (i64 mul (i64 sext|zext i32 %a), (i64 sext|zext i32 %b))
1315  LHS = DAG.getNode(ExtendOpc, DL, MVT::i64, LHS);
1316  RHS = DAG.getNode(ExtendOpc, DL, MVT::i64, RHS);
1317  SDValue Mul = DAG.getNode(ISD::MUL, DL, MVT::i64, LHS, RHS);
1318  SDValue Add = DAG.getNode(ISD::ADD, DL, MVT::i64, Mul,
1319  DAG.getConstant(0, DL, MVT::i64));
1320  // On AArch64 the upper 32 bits are always zero extended for a 32 bit
1321  // operation. We need to clear out the upper 32 bits, because we used a
1322  // widening multiply that wrote all 64 bits. In the end this should be a
1323  // noop.
1324  Value = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Add);
1325  if (IsSigned) {
1326  // The signed overflow check requires more than just a simple check for
1327  // any bit set in the upper 32 bits of the result. These bits could be
1328  // just the sign bits of a negative number. To perform the overflow
1329  // check we have to arithmetic shift right the 32nd bit of the result by
1330  // 31 bits. Then we compare the result to the upper 32 bits.
1331  SDValue UpperBits = DAG.getNode(ISD::SRL, DL, MVT::i64, Add,
1332  DAG.getConstant(32, DL, MVT::i64));
1333  UpperBits = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, UpperBits);
1334  SDValue LowerBits = DAG.getNode(ISD::SRA, DL, MVT::i32, Value,
1335  DAG.getConstant(31, DL, MVT::i64));
1336  // It is important that LowerBits is last, otherwise the arithmetic
1337  // shift will not be folded into the compare (SUBS).
1338  SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32);
1339  Overflow = DAG.getNode(AArch64ISD::SUBS, DL, VTs, UpperBits, LowerBits)
1340  .getValue(1);
1341  } else {
1342  // The overflow check for unsigned multiply is easy. We only need to
1343  // check if any of the upper 32 bits are set. This can be done with a
1344  // CMP (shifted register). For that we need to generate the following
1345  // pattern:
1346  // (i64 AArch64ISD::SUBS i64 0, (i64 srl i64 %Mul, i64 32)
1347  SDValue UpperBits = DAG.getNode(ISD::SRL, DL, MVT::i64, Mul,
1348  DAG.getConstant(32, DL, MVT::i64));
1349  SDVTList VTs = DAG.getVTList(MVT::i64, MVT::i32);
1350  Overflow =
1351  DAG.getNode(AArch64ISD::SUBS, DL, VTs,
1352  DAG.getConstant(0, DL, MVT::i64),
1353  UpperBits).getValue(1);
1354  }
1355  break;
1356  }
1357  assert(Op.getValueType() == MVT::i64 && "Expected an i64 value type");
1358  // For the 64 bit multiply
1359  Value = DAG.getNode(ISD::MUL, DL, MVT::i64, LHS, RHS);
1360  if (IsSigned) {
1361  SDValue UpperBits = DAG.getNode(ISD::MULHS, DL, MVT::i64, LHS, RHS);
1362  SDValue LowerBits = DAG.getNode(ISD::SRA, DL, MVT::i64, Value,
1363  DAG.getConstant(63, DL, MVT::i64));
1364  // It is important that LowerBits is last, otherwise the arithmetic
1365  // shift will not be folded into the compare (SUBS).
1366  SDVTList VTs = DAG.getVTList(MVT::i64, MVT::i32);
1367  Overflow = DAG.getNode(AArch64ISD::SUBS, DL, VTs, UpperBits, LowerBits)
1368  .getValue(1);
1369  } else {
1370  SDValue UpperBits = DAG.getNode(ISD::MULHU, DL, MVT::i64, LHS, RHS);
1371  SDVTList VTs = DAG.getVTList(MVT::i64, MVT::i32);
1372  Overflow =
1373  DAG.getNode(AArch64ISD::SUBS, DL, VTs,
1374  DAG.getConstant(0, DL, MVT::i64),
1375  UpperBits).getValue(1);
1376  }
1377  break;
1378  }
1379  } // switch (...)
1380 
1381  if (Opc) {
1382  SDVTList VTs = DAG.getVTList(Op->getValueType(0), MVT::i32);
1383 
1384  // Emit the AArch64 operation with overflow check.
1385  Value = DAG.getNode(Opc, DL, VTs, LHS, RHS);
1386  Overflow = Value.getValue(1);
1387  }
1388  return std::make_pair(Value, Overflow);
1389 }
1390 
1391 SDValue AArch64TargetLowering::LowerF128Call(SDValue Op, SelectionDAG &DAG,
1392  RTLIB::Libcall Call) const {
1393  SmallVector<SDValue, 2> Ops(Op->op_begin(), Op->op_end());
1394  return makeLibCall(DAG, Call, MVT::f128, &Ops[0], Ops.size(), false,
1395  SDLoc(Op)).first;
1396 }
1397 
1399  SDValue Sel = Op.getOperand(0);
1400  SDValue Other = Op.getOperand(1);
1401 
1402  // If neither operand is a SELECT_CC, give up.
1403  if (Sel.getOpcode() != ISD::SELECT_CC)
1404  std::swap(Sel, Other);
1405  if (Sel.getOpcode() != ISD::SELECT_CC)
1406  return Op;
1407 
1408  // The folding we want to perform is:
1409  // (xor x, (select_cc a, b, cc, 0, -1) )
1410  // -->
1411  // (csel x, (xor x, -1), cc ...)
1412  //
1413  // The latter will get matched to a CSINV instruction.
1414 
1415  ISD::CondCode CC = cast<CondCodeSDNode>(Sel.getOperand(4))->get();
1416  SDValue LHS = Sel.getOperand(0);
1417  SDValue RHS = Sel.getOperand(1);
1418  SDValue TVal = Sel.getOperand(2);
1419  SDValue FVal = Sel.getOperand(3);
1420  SDLoc dl(Sel);
1421 
1422  // FIXME: This could be generalized to non-integer comparisons.
1423  if (LHS.getValueType() != MVT::i32 && LHS.getValueType() != MVT::i64)
1424  return Op;
1425 
1426  ConstantSDNode *CFVal = dyn_cast<ConstantSDNode>(FVal);
1427  ConstantSDNode *CTVal = dyn_cast<ConstantSDNode>(TVal);
1428 
1429  // The values aren't constants, this isn't the pattern we're looking for.
1430  if (!CFVal || !CTVal)
1431  return Op;
1432 
1433  // We can commute the SELECT_CC by inverting the condition. This
1434  // might be needed to make this fit into a CSINV pattern.
1435  if (CTVal->isAllOnesValue() && CFVal->isNullValue()) {
1436  std::swap(TVal, FVal);
1437  std::swap(CTVal, CFVal);
1438  CC = ISD::getSetCCInverse(CC, true);
1439  }
1440 
1441  // If the constants line up, perform the transform!
1442  if (CTVal->isNullValue() && CFVal->isAllOnesValue()) {
1443  SDValue CCVal;
1444  SDValue Cmp = getAArch64Cmp(LHS, RHS, CC, CCVal, DAG, dl);
1445 
1446  FVal = Other;
1447  TVal = DAG.getNode(ISD::XOR, dl, Other.getValueType(), Other,
1448  DAG.getConstant(-1ULL, dl, Other.getValueType()));
1449 
1450  return DAG.getNode(AArch64ISD::CSEL, dl, Sel.getValueType(), FVal, TVal,
1451  CCVal, Cmp);
1452  }
1453 
1454  return Op;
1455 }
1456 
1458  EVT VT = Op.getValueType();
1459 
1460  // Let legalize expand this if it isn't a legal type yet.
1461  if (!DAG.getTargetLoweringInfo().isTypeLegal(VT))
1462  return SDValue();
1463 
1464  SDVTList VTs = DAG.getVTList(VT, MVT::i32);
1465 
1466  unsigned Opc;
1467  bool ExtraOp = false;
1468  switch (Op.getOpcode()) {
1469  default:
1470  llvm_unreachable("Invalid code");
1471  case ISD::ADDC:
1472  Opc = AArch64ISD::ADDS;
1473  break;
1474  case ISD::SUBC:
1475  Opc = AArch64ISD::SUBS;
1476  break;
1477  case ISD::ADDE:
1478  Opc = AArch64ISD::ADCS;
1479  ExtraOp = true;
1480  break;
1481  case ISD::SUBE:
1482  Opc = AArch64ISD::SBCS;
1483  ExtraOp = true;
1484  break;
1485  }
1486 
1487  if (!ExtraOp)
1488  return DAG.getNode(Opc, SDLoc(Op), VTs, Op.getOperand(0), Op.getOperand(1));
1489  return DAG.getNode(Opc, SDLoc(Op), VTs, Op.getOperand(0), Op.getOperand(1),
1490  Op.getOperand(2));
1491 }
1492 
1494  // Let legalize expand this if it isn't a legal type yet.
1496  return SDValue();
1497 
1498  SDLoc dl(Op);
1500  // The actual operation that sets the overflow or carry flag.
1501  SDValue Value, Overflow;
1502  std::tie(Value, Overflow) = getAArch64XALUOOp(CC, Op, DAG);
1503 
1504  // We use 0 and 1 as false and true values.
1505  SDValue TVal = DAG.getConstant(1, dl, MVT::i32);
1506  SDValue FVal = DAG.getConstant(0, dl, MVT::i32);
1507 
1508  // We use an inverted condition, because the conditional select is inverted
1509  // too. This will allow it to be selected to a single instruction:
1510  // CSINC Wd, WZR, WZR, invert(cond).
1511  SDValue CCVal = DAG.getConstant(getInvertedCondCode(CC), dl, MVT::i32);
1512  Overflow = DAG.getNode(AArch64ISD::CSEL, dl, MVT::i32, FVal, TVal,
1513  CCVal, Overflow);
1514 
1515  SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32);
1516  return DAG.getNode(ISD::MERGE_VALUES, dl, VTs, Value, Overflow);
1517 }
1518 
1519 // Prefetch operands are:
1520 // 1: Address to prefetch
1521 // 2: bool isWrite
1522 // 3: int locality (0 = no locality ... 3 = extreme locality)
1523 // 4: bool isDataCache
1525  SDLoc DL(Op);
1526  unsigned IsWrite = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue();
1527  unsigned Locality = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
1528  unsigned IsData = cast<ConstantSDNode>(Op.getOperand(4))->getZExtValue();
1529 
1530  bool IsStream = !Locality;
1531  // When the locality number is set
1532  if (Locality) {
1533  // The front-end should have filtered out the out-of-range values
1534  assert(Locality <= 3 && "Prefetch locality out-of-range");
1535  // The locality degree is the opposite of the cache speed.
1536  // Put the number the other way around.
1537  // The encoding starts at 0 for level 1
1538  Locality = 3 - Locality;
1539  }
1540 
1541  // built the mask value encoding the expected behavior.
1542  unsigned PrfOp = (IsWrite << 4) | // Load/Store bit
1543  (!IsData << 3) | // IsDataCache bit
1544  (Locality << 1) | // Cache level bits
1545  (unsigned)IsStream; // Stream bit
1546  return DAG.getNode(AArch64ISD::PREFETCH, DL, MVT::Other, Op.getOperand(0),
1547  DAG.getConstant(PrfOp, DL, MVT::i32), Op.getOperand(1));
1548 }
1549 
1550 SDValue AArch64TargetLowering::LowerFP_EXTEND(SDValue Op,
1551  SelectionDAG &DAG) const {
1552  assert(Op.getValueType() == MVT::f128 && "Unexpected lowering");
1553 
1554  RTLIB::Libcall LC;
1556 
1557  return LowerF128Call(Op, DAG, LC);
1558 }
1559 
1560 SDValue AArch64TargetLowering::LowerFP_ROUND(SDValue Op,
1561  SelectionDAG &DAG) const {
1562  if (Op.getOperand(0).getValueType() != MVT::f128) {
1563  // It's legal except when f128 is involved
1564  return Op;
1565  }
1566 
1567  RTLIB::Libcall LC;
1569 
1570  // FP_ROUND node has a second operand indicating whether it is known to be
1571  // precise. That doesn't take part in the LibCall so we can't directly use
1572  // LowerF128Call.
1573  SDValue SrcVal = Op.getOperand(0);
1574  return makeLibCall(DAG, LC, Op.getValueType(), &SrcVal, 1,
1575  /*isSigned*/ false, SDLoc(Op)).first;
1576 }
1577 
1579  // Warning: We maintain cost tables in AArch64TargetTransformInfo.cpp.
1580  // Any additional optimization in this function should be recorded
1581  // in the cost tables.
1582  EVT InVT = Op.getOperand(0).getValueType();
1583  EVT VT = Op.getValueType();
1584 
1585  if (VT.getSizeInBits() < InVT.getSizeInBits()) {
1586  SDLoc dl(Op);
1587  SDValue Cv =
1588  DAG.getNode(Op.getOpcode(), dl, InVT.changeVectorElementTypeToInteger(),
1589  Op.getOperand(0));
1590  return DAG.getNode(ISD::TRUNCATE, dl, VT, Cv);
1591  }
1592 
1593  if (VT.getSizeInBits() > InVT.getSizeInBits()) {
1594  SDLoc dl(Op);
1595  MVT ExtVT =
1597  VT.getVectorNumElements());
1598  SDValue Ext = DAG.getNode(ISD::FP_EXTEND, dl, ExtVT, Op.getOperand(0));
1599  return DAG.getNode(Op.getOpcode(), dl, VT, Ext);
1600  }
1601 
1602  // Type changing conversions are illegal.
1603  return Op;
1604 }
1605 
1606 SDValue AArch64TargetLowering::LowerFP_TO_INT(SDValue Op,
1607  SelectionDAG &DAG) const {
1608  if (Op.getOperand(0).getValueType().isVector())
1609  return LowerVectorFP_TO_INT(Op, DAG);
1610 
1611  // f16 conversions are promoted to f32.
1612  if (Op.getOperand(0).getValueType() == MVT::f16) {
1613  SDLoc dl(Op);
1614  return DAG.getNode(
1615  Op.getOpcode(), dl, Op.getValueType(),
1616  DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, Op.getOperand(0)));
1617  }
1618 
1619  if (Op.getOperand(0).getValueType() != MVT::f128) {
1620  // It's legal except when f128 is involved
1621  return Op;
1622  }
1623 
1624  RTLIB::Libcall LC;
1625  if (Op.getOpcode() == ISD::FP_TO_SINT)
1627  else
1629 
1630  SmallVector<SDValue, 2> Ops(Op->op_begin(), Op->op_end());
1631  return makeLibCall(DAG, LC, Op.getValueType(), &Ops[0], Ops.size(), false,
1632  SDLoc(Op)).first;
1633 }
1634 
1636  // Warning: We maintain cost tables in AArch64TargetTransformInfo.cpp.
1637  // Any additional optimization in this function should be recorded
1638  // in the cost tables.
1639  EVT VT = Op.getValueType();
1640  SDLoc dl(Op);
1641  SDValue In = Op.getOperand(0);
1642  EVT InVT = In.getValueType();
1643 
1644  if (VT.getSizeInBits() < InVT.getSizeInBits()) {
1645  MVT CastVT =
1647  InVT.getVectorNumElements());
1648  In = DAG.getNode(Op.getOpcode(), dl, CastVT, In);
1649  return DAG.getNode(ISD::FP_ROUND, dl, VT, In, DAG.getIntPtrConstant(0, dl));
1650  }
1651 
1652  if (VT.getSizeInBits() > InVT.getSizeInBits()) {
1653  unsigned CastOpc =
1655  EVT CastVT = VT.changeVectorElementTypeToInteger();
1656  In = DAG.getNode(CastOpc, dl, CastVT, In);
1657  return DAG.getNode(Op.getOpcode(), dl, VT, In);
1658  }
1659 
1660  return Op;
1661 }
1662 
1663 SDValue AArch64TargetLowering::LowerINT_TO_FP(SDValue Op,
1664  SelectionDAG &DAG) const {
1665  if (Op.getValueType().isVector())
1666  return LowerVectorINT_TO_FP(Op, DAG);
1667 
1668  // f16 conversions are promoted to f32.
1669  if (Op.getValueType() == MVT::f16) {
1670  SDLoc dl(Op);
1671  return DAG.getNode(
1672  ISD::FP_ROUND, dl, MVT::f16,
1673  DAG.getNode(Op.getOpcode(), dl, MVT::f32, Op.getOperand(0)),
1674  DAG.getIntPtrConstant(0, dl));
1675  }
1676 
1677  // i128 conversions are libcalls.
1678  if (Op.getOperand(0).getValueType() == MVT::i128)
1679  return SDValue();
1680 
1681  // Other conversions are legal, unless it's to the completely software-based
1682  // fp128.
1683  if (Op.getValueType() != MVT::f128)
1684  return Op;
1685 
1686  RTLIB::Libcall LC;
1687  if (Op.getOpcode() == ISD::SINT_TO_FP)
1689  else
1691 
1692  return LowerF128Call(Op, DAG, LC);
1693 }
1694 
1695 SDValue AArch64TargetLowering::LowerFSINCOS(SDValue Op,
1696  SelectionDAG &DAG) const {
1697  // For iOS, we want to call an alternative entry point: __sincos_stret,
1698  // which returns the values in two S / D registers.
1699  SDLoc dl(Op);
1700  SDValue Arg = Op.getOperand(0);
1701  EVT ArgVT = Arg.getValueType();
1702  Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
1703 
1704  ArgListTy Args;
1705  ArgListEntry Entry;
1706 
1707  Entry.Node = Arg;
1708  Entry.Ty = ArgTy;
1709  Entry.isSExt = false;
1710  Entry.isZExt = false;
1711  Args.push_back(Entry);
1712 
1713  const char *LibcallName =
1714  (ArgVT == MVT::f64) ? "__sincos_stret" : "__sincosf_stret";
1715  SDValue Callee =
1716  DAG.getExternalSymbol(LibcallName, getPointerTy(DAG.getDataLayout()));
1717 
1718  StructType *RetTy = StructType::get(ArgTy, ArgTy, nullptr);
1720  CLI.setDebugLoc(dl).setChain(DAG.getEntryNode())
1721  .setCallee(CallingConv::Fast, RetTy, Callee, std::move(Args), 0);
1722 
1723  std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
1724  return CallResult.first;
1725 }
1726 
1728  if (Op.getValueType() != MVT::f16)
1729  return SDValue();
1730 
1731  assert(Op.getOperand(0).getValueType() == MVT::i16);
1732  SDLoc DL(Op);
1733 
1734  Op = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Op.getOperand(0));
1735  Op = DAG.getNode(ISD::BITCAST, DL, MVT::f32, Op);
1736  return SDValue(
1738  DAG.getTargetConstant(AArch64::hsub, DL, MVT::i32)),
1739  0);
1740 }
1741 
1742 static EVT getExtensionTo64Bits(const EVT &OrigVT) {
1743  if (OrigVT.getSizeInBits() >= 64)
1744  return OrigVT;
1745 
1746  assert(OrigVT.isSimple() && "Expecting a simple value type");
1747 
1748  MVT::SimpleValueType OrigSimpleTy = OrigVT.getSimpleVT().SimpleTy;
1749  switch (OrigSimpleTy) {
1750  default: llvm_unreachable("Unexpected Vector Type");
1751  case MVT::v2i8:
1752  case MVT::v2i16:
1753  return MVT::v2i32;
1754  case MVT::v4i8:
1755  return MVT::v4i16;
1756  }
1757 }
1758 
1760  const EVT &OrigTy,
1761  const EVT &ExtTy,
1762  unsigned ExtOpcode) {
1763  // The vector originally had a size of OrigTy. It was then extended to ExtTy.
1764  // We expect the ExtTy to be 128-bits total. If the OrigTy is less than
1765  // 64-bits we need to insert a new extension so that it will be 64-bits.
1766  assert(ExtTy.is128BitVector() && "Unexpected extension size");
1767  if (OrigTy.getSizeInBits() >= 64)
1768  return N;
1769 
1770  // Must extend size to at least 64 bits to be used as an operand for VMULL.
1771  EVT NewVT = getExtensionTo64Bits(OrigTy);
1772 
1773  return DAG.getNode(ExtOpcode, SDLoc(N), NewVT, N);
1774 }
1775 
1777  bool isSigned) {
1778  EVT VT = N->getValueType(0);
1779 
1780  if (N->getOpcode() != ISD::BUILD_VECTOR)
1781  return false;
1782 
1783  for (const SDValue &Elt : N->op_values()) {
1784  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt)) {
1785  unsigned EltSize = VT.getVectorElementType().getSizeInBits();
1786  unsigned HalfSize = EltSize / 2;
1787  if (isSigned) {
1788  if (!isIntN(HalfSize, C->getSExtValue()))
1789  return false;
1790  } else {
1791  if (!isUIntN(HalfSize, C->getZExtValue()))
1792  return false;
1793  }
1794  continue;
1795  }
1796  return false;
1797  }
1798 
1799  return true;
1800 }
1801 
1803  if (N->getOpcode() == ISD::SIGN_EXTEND || N->getOpcode() == ISD::ZERO_EXTEND)
1805  N->getOperand(0)->getValueType(0),
1806  N->getValueType(0),
1807  N->getOpcode());
1808 
1809  assert(N->getOpcode() == ISD::BUILD_VECTOR && "expected BUILD_VECTOR");
1810  EVT VT = N->getValueType(0);
1811  SDLoc dl(N);
1812  unsigned EltSize = VT.getVectorElementType().getSizeInBits() / 2;
1813  unsigned NumElts = VT.getVectorNumElements();
1814  MVT TruncVT = MVT::getIntegerVT(EltSize);
1816  for (unsigned i = 0; i != NumElts; ++i) {
1817  ConstantSDNode *C = cast<ConstantSDNode>(N->getOperand(i));
1818  const APInt &CInt = C->getAPIntValue();
1819  // Element types smaller than 32 bits are not legal, so use i32 elements.
1820  // The values are implicitly truncated so sext vs. zext doesn't matter.
1821  Ops.push_back(DAG.getConstant(CInt.zextOrTrunc(32), dl, MVT::i32));
1822  }
1823  return DAG.getNode(ISD::BUILD_VECTOR, dl,
1824  MVT::getVectorVT(TruncVT, NumElts), Ops);
1825 }
1826 
1827 static bool isSignExtended(SDNode *N, SelectionDAG &DAG) {
1828  if (N->getOpcode() == ISD::SIGN_EXTEND)
1829  return true;
1830  if (isExtendedBUILD_VECTOR(N, DAG, true))
1831  return true;
1832  return false;
1833 }
1834 
1835 static bool isZeroExtended(SDNode *N, SelectionDAG &DAG) {
1836  if (N->getOpcode() == ISD::ZERO_EXTEND)
1837  return true;
1838  if (isExtendedBUILD_VECTOR(N, DAG, false))
1839  return true;
1840  return false;
1841 }
1842 
1843 static bool isAddSubSExt(SDNode *N, SelectionDAG &DAG) {
1844  unsigned Opcode = N->getOpcode();
1845  if (Opcode == ISD::ADD || Opcode == ISD::SUB) {
1846  SDNode *N0 = N->getOperand(0).getNode();
1847  SDNode *N1 = N->getOperand(1).getNode();
1848  return N0->hasOneUse() && N1->hasOneUse() &&
1849  isSignExtended(N0, DAG) && isSignExtended(N1, DAG);
1850  }
1851  return false;
1852 }
1853 
1854 static bool isAddSubZExt(SDNode *N, SelectionDAG &DAG) {
1855  unsigned Opcode = N->getOpcode();
1856  if (Opcode == ISD::ADD || Opcode == ISD::SUB) {
1857  SDNode *N0 = N->getOperand(0).getNode();
1858  SDNode *N1 = N->getOperand(1).getNode();
1859  return N0->hasOneUse() && N1->hasOneUse() &&
1860  isZeroExtended(N0, DAG) && isZeroExtended(N1, DAG);
1861  }
1862  return false;
1863 }
1864 
1866  // Multiplications are only custom-lowered for 128-bit vectors so that
1867  // VMULL can be detected. Otherwise v2i64 multiplications are not legal.
1868  EVT VT = Op.getValueType();
1869  assert(VT.is128BitVector() && VT.isInteger() &&
1870  "unexpected type for custom-lowering ISD::MUL");
1871  SDNode *N0 = Op.getOperand(0).getNode();
1872  SDNode *N1 = Op.getOperand(1).getNode();
1873  unsigned NewOpc = 0;
1874  bool isMLA = false;
1875  bool isN0SExt = isSignExtended(N0, DAG);
1876  bool isN1SExt = isSignExtended(N1, DAG);
1877  if (isN0SExt && isN1SExt)
1878  NewOpc = AArch64ISD::SMULL;
1879  else {
1880  bool isN0ZExt = isZeroExtended(N0, DAG);
1881  bool isN1ZExt = isZeroExtended(N1, DAG);
1882  if (isN0ZExt && isN1ZExt)
1883  NewOpc = AArch64ISD::UMULL;
1884  else if (isN1SExt || isN1ZExt) {
1885  // Look for (s/zext A + s/zext B) * (s/zext C). We want to turn these
1886  // into (s/zext A * s/zext C) + (s/zext B * s/zext C)
1887  if (isN1SExt && isAddSubSExt(N0, DAG)) {
1888  NewOpc = AArch64ISD::SMULL;
1889  isMLA = true;
1890  } else if (isN1ZExt && isAddSubZExt(N0, DAG)) {
1891  NewOpc = AArch64ISD::UMULL;
1892  isMLA = true;
1893  } else if (isN0ZExt && isAddSubZExt(N1, DAG)) {
1894  std::swap(N0, N1);
1895  NewOpc = AArch64ISD::UMULL;
1896  isMLA = true;
1897  }
1898  }
1899 
1900  if (!NewOpc) {
1901  if (VT == MVT::v2i64)
1902  // Fall through to expand this. It is not legal.
1903  return SDValue();
1904  else
1905  // Other vector multiplications are legal.
1906  return Op;
1907  }
1908  }
1909 
1910  // Legalize to a S/UMULL instruction
1911  SDLoc DL(Op);
1912  SDValue Op0;
1913  SDValue Op1 = skipExtensionForVectorMULL(N1, DAG);
1914  if (!isMLA) {
1915  Op0 = skipExtensionForVectorMULL(N0, DAG);
1916  assert(Op0.getValueType().is64BitVector() &&
1917  Op1.getValueType().is64BitVector() &&
1918  "unexpected types for extended operands to VMULL");
1919  return DAG.getNode(NewOpc, DL, VT, Op0, Op1);
1920  }
1921  // Optimizing (zext A + zext B) * C, to (S/UMULL A, C) + (S/UMULL B, C) during
1922  // isel lowering to take advantage of no-stall back to back s/umul + s/umla.
1923  // This is true for CPUs with accumulate forwarding such as Cortex-A53/A57
1924  SDValue N00 = skipExtensionForVectorMULL(N0->getOperand(0).getNode(), DAG);
1925  SDValue N01 = skipExtensionForVectorMULL(N0->getOperand(1).getNode(), DAG);
1926  EVT Op1VT = Op1.getValueType();
1927  return DAG.getNode(N0->getOpcode(), DL, VT,
1928  DAG.getNode(NewOpc, DL, VT,
1929  DAG.getNode(ISD::BITCAST, DL, Op1VT, N00), Op1),
1930  DAG.getNode(NewOpc, DL, VT,
1931  DAG.getNode(ISD::BITCAST, DL, Op1VT, N01), Op1));
1932 }
1933 
1935  SelectionDAG &DAG) const {
1936  switch (Op.getOpcode()) {
1937  default:
1938  llvm_unreachable("unimplemented operand");
1939  return SDValue();
1940  case ISD::BITCAST:
1941  return LowerBITCAST(Op, DAG);
1942  case ISD::GlobalAddress:
1943  return LowerGlobalAddress(Op, DAG);
1944  case ISD::GlobalTLSAddress:
1945  return LowerGlobalTLSAddress(Op, DAG);
1946  case ISD::SETCC:
1947  return LowerSETCC(Op, DAG);
1948  case ISD::BR_CC:
1949  return LowerBR_CC(Op, DAG);
1950  case ISD::SELECT:
1951  return LowerSELECT(Op, DAG);
1952  case ISD::SELECT_CC:
1953  return LowerSELECT_CC(Op, DAG);
1954  case ISD::JumpTable:
1955  return LowerJumpTable(Op, DAG);
1956  case ISD::ConstantPool:
1957  return LowerConstantPool(Op, DAG);
1958  case ISD::BlockAddress:
1959  return LowerBlockAddress(Op, DAG);
1960  case ISD::VASTART:
1961  return LowerVASTART(Op, DAG);
1962  case ISD::VACOPY:
1963  return LowerVACOPY(Op, DAG);
1964  case ISD::VAARG:
1965  return LowerVAARG(Op, DAG);
1966  case ISD::ADDC:
1967  case ISD::ADDE:
1968  case ISD::SUBC:
1969  case ISD::SUBE:
1970  return LowerADDC_ADDE_SUBC_SUBE(Op, DAG);
1971  case ISD::SADDO:
1972  case ISD::UADDO:
1973  case ISD::SSUBO:
1974  case ISD::USUBO:
1975  case ISD::SMULO:
1976  case ISD::UMULO:
1977  return LowerXALUO(Op, DAG);
1978  case ISD::FADD:
1979  return LowerF128Call(Op, DAG, RTLIB::ADD_F128);
1980  case ISD::FSUB:
1981  return LowerF128Call(Op, DAG, RTLIB::SUB_F128);
1982  case ISD::FMUL:
1983  return LowerF128Call(Op, DAG, RTLIB::MUL_F128);
1984  case ISD::FDIV:
1985  return LowerF128Call(Op, DAG, RTLIB::DIV_F128);
1986  case ISD::FP_ROUND:
1987  return LowerFP_ROUND(Op, DAG);
1988  case ISD::FP_EXTEND:
1989  return LowerFP_EXTEND(Op, DAG);
1990  case ISD::FRAMEADDR:
1991  return LowerFRAMEADDR(Op, DAG);
1992  case ISD::RETURNADDR:
1993  return LowerRETURNADDR(Op, DAG);
1995  return LowerINSERT_VECTOR_ELT(Op, DAG);
1997  return LowerEXTRACT_VECTOR_ELT(Op, DAG);
1998  case ISD::BUILD_VECTOR:
1999  return LowerBUILD_VECTOR(Op, DAG);
2000  case ISD::VECTOR_SHUFFLE:
2001  return LowerVECTOR_SHUFFLE(Op, DAG);
2003  return LowerEXTRACT_SUBVECTOR(Op, DAG);
2004  case ISD::SRA:
2005  case ISD::SRL:
2006  case ISD::SHL:
2007  return LowerVectorSRA_SRL_SHL(Op, DAG);
2008  case ISD::SHL_PARTS:
2009  return LowerShiftLeftParts(Op, DAG);
2010  case ISD::SRL_PARTS:
2011  case ISD::SRA_PARTS:
2012  return LowerShiftRightParts(Op, DAG);
2013  case ISD::CTPOP:
2014  return LowerCTPOP(Op, DAG);
2015  case ISD::FCOPYSIGN:
2016  return LowerFCOPYSIGN(Op, DAG);
2017  case ISD::AND:
2018  return LowerVectorAND(Op, DAG);
2019  case ISD::OR:
2020  return LowerVectorOR(Op, DAG);
2021  case ISD::XOR:
2022  return LowerXOR(Op, DAG);
2023  case ISD::PREFETCH:
2024  return LowerPREFETCH(Op, DAG);
2025  case ISD::SINT_TO_FP:
2026  case ISD::UINT_TO_FP:
2027  return LowerINT_TO_FP(Op, DAG);
2028  case ISD::FP_TO_SINT:
2029  case ISD::FP_TO_UINT:
2030  return LowerFP_TO_INT(Op, DAG);
2031  case ISD::FSINCOS:
2032  return LowerFSINCOS(Op, DAG);
2033  case ISD::MUL:
2034  return LowerMUL(Op, DAG);
2035  }
2036 }
2037 
2038 /// getFunctionAlignment - Return the Log2 alignment of this function.
2040  return 2;
2041 }
2042 
2043 //===----------------------------------------------------------------------===//
2044 // Calling Convention Implementation
2045 //===----------------------------------------------------------------------===//
2046 
2047 #include "AArch64GenCallingConv.inc"
2048 
2049 /// Selects the correct CCAssignFn for a given CallingConvention value.
2051  bool IsVarArg) const {
2052  switch (CC) {
2053  default:
2054  llvm_unreachable("Unsupported calling convention.");
2056  return CC_AArch64_WebKit_JS;
2057  case CallingConv::GHC:
2058  return CC_AArch64_GHC;
2059  case CallingConv::C:
2060  case CallingConv::Fast:
2061  if (!Subtarget->isTargetDarwin())
2062  return CC_AArch64_AAPCS;
2063  return IsVarArg ? CC_AArch64_DarwinPCS_VarArg : CC_AArch64_DarwinPCS;
2064  }
2065 }
2066 
2067 SDValue AArch64TargetLowering::LowerFormalArguments(
2068  SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
2070  SmallVectorImpl<SDValue> &InVals) const {
2071  MachineFunction &MF = DAG.getMachineFunction();
2072  MachineFrameInfo *MFI = MF.getFrameInfo();
2073 
2074  // Assign locations to all of the incoming arguments.
2076  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
2077  *DAG.getContext());
2078 
2079  // At this point, Ins[].VT may already be promoted to i32. To correctly
2080  // handle passing i8 as i8 instead of i32 on stack, we pass in both i32 and
2081  // i8 to CC_AArch64_AAPCS with i32 being ValVT and i8 being LocVT.
2082  // Since AnalyzeFormalArguments uses Ins[].VT for both ValVT and LocVT, here
2083  // we use a special version of AnalyzeFormalArguments to pass in ValVT and
2084  // LocVT.
2085  unsigned NumArgs = Ins.size();
2086  Function::const_arg_iterator CurOrigArg = MF.getFunction()->arg_begin();
2087  unsigned CurArgIdx = 0;
2088  for (unsigned i = 0; i != NumArgs; ++i) {
2089  MVT ValVT = Ins[i].VT;
2090  if (Ins[i].isOrigArg()) {
2091  std::advance(CurOrigArg, Ins[i].getOrigArgIndex() - CurArgIdx);
2092  CurArgIdx = Ins[i].getOrigArgIndex();
2093 
2094  // Get type of the original argument.
2095  EVT ActualVT = getValueType(DAG.getDataLayout(), CurOrigArg->getType(),
2096  /*AllowUnknown*/ true);
2097  MVT ActualMVT = ActualVT.isSimple() ? ActualVT.getSimpleVT() : MVT::Other;
2098  // If ActualMVT is i1/i8/i16, we should set LocVT to i8/i8/i16.
2099  if (ActualMVT == MVT::i1 || ActualMVT == MVT::i8)
2100  ValVT = MVT::i8;
2101  else if (ActualMVT == MVT::i16)
2102  ValVT = MVT::i16;
2103  }
2104  CCAssignFn *AssignFn = CCAssignFnForCall(CallConv, /*IsVarArg=*/false);
2105  bool Res =
2106  AssignFn(i, ValVT, ValVT, CCValAssign::Full, Ins[i].Flags, CCInfo);
2107  assert(!Res && "Call operand has unhandled type");
2108  (void)Res;
2109  }
2110  assert(ArgLocs.size() == Ins.size());
2111  SmallVector<SDValue, 16> ArgValues;
2112  for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
2113  CCValAssign &VA = ArgLocs[i];
2114 
2115  if (Ins[i].Flags.isByVal()) {
2116  // Byval is used for HFAs in the PCS, but the system should work in a
2117  // non-compliant manner for larger structs.
2118  EVT PtrVT = getPointerTy(DAG.getDataLayout());
2119  int Size = Ins[i].Flags.getByValSize();
2120  unsigned NumRegs = (Size + 7) / 8;
2121 
2122  // FIXME: This works on big-endian for composite byvals, which are the common
2123  // case. It should also work for fundamental types too.
2124  unsigned FrameIdx =
2125  MFI->CreateFixedObject(8 * NumRegs, VA.getLocMemOffset(), false);
2126  SDValue FrameIdxN = DAG.getFrameIndex(FrameIdx, PtrVT);
2127  InVals.push_back(FrameIdxN);
2128 
2129  continue;
2130  }
2131 
2132  if (VA.isRegLoc()) {
2133  // Arguments stored in registers.
2134  EVT RegVT = VA.getLocVT();
2135 
2136  SDValue ArgValue;
2137  const TargetRegisterClass *RC;
2138 
2139  if (RegVT == MVT::i32)
2140  RC = &AArch64::GPR32RegClass;
2141  else if (RegVT == MVT::i64)
2142  RC = &AArch64::GPR64RegClass;
2143  else if (RegVT == MVT::f16)
2144  RC = &AArch64::FPR16RegClass;
2145  else if (RegVT == MVT::f32)
2146  RC = &AArch64::FPR32RegClass;
2147  else if (RegVT == MVT::f64 || RegVT.is64BitVector())
2148  RC = &AArch64::FPR64RegClass;
2149  else if (RegVT == MVT::f128 || RegVT.is128BitVector())
2150  RC = &AArch64::FPR128RegClass;
2151  else
2152  llvm_unreachable("RegVT not supported by FORMAL_ARGUMENTS Lowering");
2153 
2154  // Transform the arguments in physical registers into virtual ones.
2155  unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
2156  ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, RegVT);
2157 
2158  // If this is an 8, 16 or 32-bit value, it is really passed promoted
2159  // to 64 bits. Insert an assert[sz]ext to capture this, then
2160  // truncate to the right size.
2161  switch (VA.getLocInfo()) {
2162  default:
2163  llvm_unreachable("Unknown loc info!");
2164  case CCValAssign::Full:
2165  break;
2166  case CCValAssign::BCvt:
2167  ArgValue = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), ArgValue);
2168  break;
2169  case CCValAssign::AExt:
2170  case CCValAssign::SExt:
2171  case CCValAssign::ZExt:
2172  // SelectionDAGBuilder will insert appropriate AssertZExt & AssertSExt
2173  // nodes after our lowering.
2174  assert(RegVT == Ins[i].VT && "incorrect register location selected");
2175  break;
2176  }
2177 
2178  InVals.push_back(ArgValue);
2179 
2180  } else { // VA.isRegLoc()
2181  assert(VA.isMemLoc() && "CCValAssign is neither reg nor mem");
2182  unsigned ArgOffset = VA.getLocMemOffset();
2183  unsigned ArgSize = VA.getValVT().getSizeInBits() / 8;
2184 
2185  uint32_t BEAlign = 0;
2186  if (!Subtarget->isLittleEndian() && ArgSize < 8 &&
2187  !Ins[i].Flags.isInConsecutiveRegs())
2188  BEAlign = 8 - ArgSize;
2189 
2190  int FI = MFI->CreateFixedObject(ArgSize, ArgOffset + BEAlign, true);
2191 
2192  // Create load nodes to retrieve arguments from the stack.
2193  SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
2194  SDValue ArgValue;
2195 
2196  // For NON_EXTLOAD, generic code in getLoad assert(ValVT == MemVT)
2198  MVT MemVT = VA.getValVT();
2199 
2200  switch (VA.getLocInfo()) {
2201  default:
2202  break;
2203  case CCValAssign::BCvt:
2204  MemVT = VA.getLocVT();
2205  break;
2206  case CCValAssign::SExt:
2207  ExtType = ISD::SEXTLOAD;
2208  break;
2209  case CCValAssign::ZExt:
2210  ExtType = ISD::ZEXTLOAD;
2211  break;
2212  case CCValAssign::AExt:
2213  ExtType = ISD::EXTLOAD;
2214  break;
2215  }
2216 
2217  ArgValue = DAG.getExtLoad(ExtType, DL, VA.getLocVT(), Chain, FIN,
2219  MemVT, false, false, false, 0);
2220 
2221  InVals.push_back(ArgValue);
2222  }
2223  }
2224 
2225  // varargs
2226  if (isVarArg) {
2227  if (!Subtarget->isTargetDarwin()) {
2228  // The AAPCS variadic function ABI is identical to the non-variadic
2229  // one. As a result there may be more arguments in registers and we should
2230  // save them for future reference.
2231  saveVarArgRegisters(CCInfo, DAG, DL, Chain);
2232  }
2233 
2235  // This will point to the next argument passed via stack.
2236  unsigned StackOffset = CCInfo.getNextStackOffset();
2237  // We currently pass all varargs at 8-byte alignment.
2238  StackOffset = ((StackOffset + 7) & ~7);
2239  AFI->setVarArgsStackIndex(MFI->CreateFixedObject(4, StackOffset, true));
2240  }
2241 
2243  unsigned StackArgSize = CCInfo.getNextStackOffset();
2244  bool TailCallOpt = MF.getTarget().Options.GuaranteedTailCallOpt;
2245  if (DoesCalleeRestoreStack(CallConv, TailCallOpt)) {
2246  // This is a non-standard ABI so by fiat I say we're allowed to make full
2247  // use of the stack area to be popped, which must be aligned to 16 bytes in
2248  // any case:
2249  StackArgSize = RoundUpToAlignment(StackArgSize, 16);
2250 
2251  // If we're expected to restore the stack (e.g. fastcc) then we'll be adding
2252  // a multiple of 16.
2253  FuncInfo->setArgumentStackToRestore(StackArgSize);
2254 
2255  // This realignment carries over to the available bytes below. Our own
2256  // callers will guarantee the space is free by giving an aligned value to
2257  // CALLSEQ_START.
2258  }
2259  // Even if we're not expected to free up the space, it's useful to know how
2260  // much is there while considering tail calls (because we can reuse it).
2261  FuncInfo->setBytesInStackArgArea(StackArgSize);
2262 
2263  return Chain;
2264 }
2265 
2266 void AArch64TargetLowering::saveVarArgRegisters(CCState &CCInfo,
2267  SelectionDAG &DAG, SDLoc DL,
2268  SDValue &Chain) const {
2269  MachineFunction &MF = DAG.getMachineFunction();
2270  MachineFrameInfo *MFI = MF.getFrameInfo();
2272  auto PtrVT = getPointerTy(DAG.getDataLayout());
2273 
2274  SmallVector<SDValue, 8> MemOps;
2275 
2276  static const MCPhysReg GPRArgRegs[] = { AArch64::X0, AArch64::X1, AArch64::X2,
2277  AArch64::X3, AArch64::X4, AArch64::X5,
2278  AArch64::X6, AArch64::X7 };
2279  static const unsigned NumGPRArgRegs = array_lengthof(GPRArgRegs);
2280  unsigned FirstVariadicGPR = CCInfo.getFirstUnallocated(GPRArgRegs);
2281 
2282  unsigned GPRSaveSize = 8 * (NumGPRArgRegs - FirstVariadicGPR);
2283  int GPRIdx = 0;
2284  if (GPRSaveSize != 0) {
2285  GPRIdx = MFI->CreateStackObject(GPRSaveSize, 8, false);
2286 
2287  SDValue FIN = DAG.getFrameIndex(GPRIdx, PtrVT);
2288 
2289  for (unsigned i = FirstVariadicGPR; i < NumGPRArgRegs; ++i) {
2290  unsigned VReg = MF.addLiveIn(GPRArgRegs[i], &AArch64::GPR64RegClass);
2291  SDValue Val = DAG.getCopyFromReg(Chain, DL, VReg, MVT::i64);
2292  SDValue Store =
2293  DAG.getStore(Val.getValue(1), DL, Val, FIN,
2294  MachinePointerInfo::getStack(i * 8), false, false, 0);
2295  MemOps.push_back(Store);
2296  FIN =
2297  DAG.getNode(ISD::ADD, DL, PtrVT, FIN, DAG.getConstant(8, DL, PtrVT));
2298  }
2299  }
2300  FuncInfo->setVarArgsGPRIndex(GPRIdx);
2301  FuncInfo->setVarArgsGPRSize(GPRSaveSize);
2302 
2303  if (Subtarget->hasFPARMv8()) {
2304  static const MCPhysReg FPRArgRegs[] = {
2305  AArch64::Q0, AArch64::Q1, AArch64::Q2, AArch64::Q3,
2306  AArch64::Q4, AArch64::Q5, AArch64::Q6, AArch64::Q7};
2307  static const unsigned NumFPRArgRegs = array_lengthof(FPRArgRegs);
2308  unsigned FirstVariadicFPR = CCInfo.getFirstUnallocated(FPRArgRegs);
2309 
2310  unsigned FPRSaveSize = 16 * (NumFPRArgRegs - FirstVariadicFPR);
2311  int FPRIdx = 0;
2312  if (FPRSaveSize != 0) {
2313  FPRIdx = MFI->CreateStackObject(FPRSaveSize, 16, false);
2314 
2315  SDValue FIN = DAG.getFrameIndex(FPRIdx, PtrVT);
2316 
2317  for (unsigned i = FirstVariadicFPR; i < NumFPRArgRegs; ++i) {
2318  unsigned VReg = MF.addLiveIn(FPRArgRegs[i], &AArch64::FPR128RegClass);
2319  SDValue Val = DAG.getCopyFromReg(Chain, DL, VReg, MVT::f128);
2320 
2321  SDValue Store =
2322  DAG.getStore(Val.getValue(1), DL, Val, FIN,
2323  MachinePointerInfo::getStack(i * 16), false, false, 0);
2324  MemOps.push_back(Store);
2325  FIN = DAG.getNode(ISD::ADD, DL, PtrVT, FIN,
2326  DAG.getConstant(16, DL, PtrVT));
2327  }
2328  }
2329  FuncInfo->setVarArgsFPRIndex(FPRIdx);
2330  FuncInfo->setVarArgsFPRSize(FPRSaveSize);
2331  }
2332 
2333  if (!MemOps.empty()) {
2334  Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOps);
2335  }
2336 }
2337 
2338 /// LowerCallResult - Lower the result values of a call into the
2339 /// appropriate copies out of appropriate physical registers.
2340 SDValue AArch64TargetLowering::LowerCallResult(
2341  SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg,
2342  const SmallVectorImpl<ISD::InputArg> &Ins, SDLoc DL, SelectionDAG &DAG,
2343  SmallVectorImpl<SDValue> &InVals, bool isThisReturn,
2344  SDValue ThisVal) const {
2345  CCAssignFn *RetCC = CallConv == CallingConv::WebKit_JS
2346  ? RetCC_AArch64_WebKit_JS
2347  : RetCC_AArch64_AAPCS;
2348  // Assign locations to each value returned by this call.
2350  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
2351  *DAG.getContext());
2352  CCInfo.AnalyzeCallResult(Ins, RetCC);
2353 
2354  // Copy all of the result registers out of their specified physreg.
2355  for (unsigned i = 0; i != RVLocs.size(); ++i) {
2356  CCValAssign VA = RVLocs[i];
2357 
2358  // Pass 'this' value directly from the argument to return value, to avoid
2359  // reg unit interference
2360  if (i == 0 && isThisReturn) {
2361  assert(!VA.needsCustom() && VA.getLocVT() == MVT::i64 &&
2362  "unexpected return calling convention register assignment");
2363  InVals.push_back(ThisVal);
2364  continue;
2365  }
2366 
2367  SDValue Val =
2368  DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), InFlag);
2369  Chain = Val.getValue(1);
2370  InFlag = Val.getValue(2);
2371 
2372  switch (VA.getLocInfo()) {
2373  default:
2374  llvm_unreachable("Unknown loc info!");
2375  case CCValAssign::Full:
2376  break;
2377  case CCValAssign::BCvt:
2378  Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val);
2379  break;
2380  }
2381 
2382  InVals.push_back(Val);
2383  }
2384 
2385  return Chain;
2386 }
2387 
2388 bool AArch64TargetLowering::isEligibleForTailCallOptimization(
2389  SDValue Callee, CallingConv::ID CalleeCC, bool isVarArg,
2390  bool isCalleeStructRet, bool isCallerStructRet,
2391  const SmallVectorImpl<ISD::OutputArg> &Outs,
2392  const SmallVectorImpl<SDValue> &OutVals,
2393  const SmallVectorImpl<ISD::InputArg> &Ins, SelectionDAG &DAG) const {
2394  // For CallingConv::C this function knows whether the ABI needs
2395  // changing. That's not true for other conventions so they will have to opt in
2396  // manually.
2397  if (!IsTailCallConvention(CalleeCC) && CalleeCC != CallingConv::C)
2398  return false;
2399 
2400  const MachineFunction &MF = DAG.getMachineFunction();
2401  const Function *CallerF = MF.getFunction();
2402  CallingConv::ID CallerCC = CallerF->getCallingConv();
2403  bool CCMatch = CallerCC == CalleeCC;
2404 
2405  // Byval parameters hand the function a pointer directly into the stack area
2406  // we want to reuse during a tail call. Working around this *is* possible (see
2407  // X86) but less efficient and uglier in LowerCall.
2408  for (Function::const_arg_iterator i = CallerF->arg_begin(),
2409  e = CallerF->arg_end();
2410  i != e; ++i)
2411  if (i->hasByValAttr())
2412  return false;
2413 
2415  if (IsTailCallConvention(CalleeCC) && CCMatch)
2416  return true;
2417  return false;
2418  }
2419 
2420  // Externally-defined functions with weak linkage should not be
2421  // tail-called on AArch64 when the OS does not support dynamic
2422  // pre-emption of symbols, as the AAELF spec requires normal calls
2423  // to undefined weak functions to be replaced with a NOP or jump to the
2424  // next instruction. The behaviour of branch instructions in this
2425  // situation (as used for tail calls) is implementation-defined, so we
2426  // cannot rely on the linker replacing the tail call with a return.
2427  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
2428  const GlobalValue *GV = G->getGlobal();
2429  const Triple &TT = getTargetMachine().getTargetTriple();
2430  if (GV->hasExternalWeakLinkage() &&
2431  (!TT.isOSWindows() || TT.isOSBinFormatELF() || TT.isOSBinFormatMachO()))
2432  return false;
2433  }
2434 
2435  // Now we search for cases where we can use a tail call without changing the
2436  // ABI. Sibcall is used in some places (particularly gcc) to refer to this
2437  // concept.
2438 
2439  // I want anyone implementing a new calling convention to think long and hard
2440  // about this assert.
2441  assert((!isVarArg || CalleeCC == CallingConv::C) &&
2442  "Unexpected variadic calling convention");
2443 
2444  if (isVarArg && !Outs.empty()) {
2445  // At least two cases here: if caller is fastcc then we can't have any
2446  // memory arguments (we'd be expected to clean up the stack afterwards). If
2447  // caller is C then we could potentially use its argument area.
2448 
2449  // FIXME: for now we take the most conservative of these in both cases:
2450  // disallow all variadic memory operands.
2452  CCState CCInfo(CalleeCC, isVarArg, DAG.getMachineFunction(), ArgLocs,
2453  *DAG.getContext());
2454 
2455  CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForCall(CalleeCC, true));
2456  for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i)
2457  if (!ArgLocs[i].isRegLoc())
2458  return false;
2459  }
2460 
2461  // If the calling conventions do not match, then we'd better make sure the
2462  // results are returned in the same way as what the caller expects.
2463  if (!CCMatch) {
2465  CCState CCInfo1(CalleeCC, false, DAG.getMachineFunction(), RVLocs1,
2466  *DAG.getContext());
2467  CCInfo1.AnalyzeCallResult(Ins, CCAssignFnForCall(CalleeCC, isVarArg));
2468 
2470  CCState CCInfo2(CallerCC, false, DAG.getMachineFunction(), RVLocs2,
2471  *DAG.getContext());
2472  CCInfo2.AnalyzeCallResult(Ins, CCAssignFnForCall(CallerCC, isVarArg));
2473 
2474  if (RVLocs1.size() != RVLocs2.size())
2475  return false;
2476  for (unsigned i = 0, e = RVLocs1.size(); i != e; ++i) {
2477  if (RVLocs1[i].isRegLoc() != RVLocs2[i].isRegLoc())
2478  return false;
2479  if (RVLocs1[i].getLocInfo() != RVLocs2[i].getLocInfo())
2480  return false;
2481  if (RVLocs1[i].isRegLoc()) {
2482  if (RVLocs1[i].getLocReg() != RVLocs2[i].getLocReg())
2483  return false;
2484  } else {
2485  if (RVLocs1[i].getLocMemOffset() != RVLocs2[i].getLocMemOffset())
2486  return false;
2487  }
2488  }
2489  }
2490 
2491  // Nothing more to check if the callee is taking no arguments
2492  if (Outs.empty())
2493  return true;
2494 
2496  CCState CCInfo(CalleeCC, isVarArg, DAG.getMachineFunction(), ArgLocs,
2497  *DAG.getContext());
2498 
2499  CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForCall(CalleeCC, isVarArg));
2500 
2501  const AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
2502 
2503  // If the stack arguments for this call would fit into our own save area then
2504  // the call can be made tail.
2505  return CCInfo.getNextStackOffset() <= FuncInfo->getBytesInStackArgArea();
2506 }
2507 
2508 SDValue AArch64TargetLowering::addTokenForArgument(SDValue Chain,
2509  SelectionDAG &DAG,
2510  MachineFrameInfo *MFI,
2511  int ClobberedFI) const {
2512  SmallVector<SDValue, 8> ArgChains;
2513  int64_t FirstByte = MFI->getObjectOffset(ClobberedFI);
2514  int64_t LastByte = FirstByte + MFI->getObjectSize(ClobberedFI) - 1;
2515 
2516  // Include the original chain at the beginning of the list. When this is
2517  // used by target LowerCall hooks, this helps legalize find the
2518  // CALLSEQ_BEGIN node.
2519  ArgChains.push_back(Chain);
2520 
2521  // Add a chain value for each stack argument corresponding
2522  for (SDNode::use_iterator U = DAG.getEntryNode().getNode()->use_begin(),
2523  UE = DAG.getEntryNode().getNode()->use_end();
2524  U != UE; ++U)
2525  if (LoadSDNode *L = dyn_cast<LoadSDNode>(*U))
2526  if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(L->getBasePtr()))
2527  if (FI->getIndex() < 0) {
2528  int64_t InFirstByte = MFI->getObjectOffset(FI->getIndex());
2529  int64_t InLastByte = InFirstByte;
2530  InLastByte += MFI->getObjectSize(FI->getIndex()) - 1;
2531 
2532  if ((InFirstByte <= FirstByte && FirstByte <= InLastByte) ||
2533  (FirstByte <= InFirstByte && InFirstByte <= LastByte))
2534  ArgChains.push_back(SDValue(L, 1));
2535  }
2536 
2537  // Build a tokenfactor for all the chains.
2538  return DAG.getNode(ISD::TokenFactor, SDLoc(Chain), MVT::Other, ArgChains);
2539 }
2540 
2541 bool AArch64TargetLowering::DoesCalleeRestoreStack(CallingConv::ID CallCC,
2542  bool TailCallOpt) const {
2543  return CallCC == CallingConv::Fast && TailCallOpt;
2544 }
2545 
2546 bool AArch64TargetLowering::IsTailCallConvention(CallingConv::ID CallCC) const {
2547  return CallCC == CallingConv::Fast;
2548 }
2549 
2550 /// LowerCall - Lower a call to a callseq_start + CALL + callseq_end chain,
2551 /// and add input and output parameter nodes.
2552 SDValue
2553 AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
2554  SmallVectorImpl<SDValue> &InVals) const {
2555  SelectionDAG &DAG = CLI.DAG;
2556  SDLoc &DL = CLI.DL;
2557  SmallVector<ISD::OutputArg, 32> &Outs = CLI.Outs;
2558  SmallVector<SDValue, 32> &OutVals = CLI.OutVals;
2559  SmallVector<ISD::InputArg, 32> &Ins = CLI.Ins;
2560  SDValue Chain = CLI.Chain;
2561  SDValue Callee = CLI.Callee;
2562  bool &IsTailCall = CLI.IsTailCall;
2563  CallingConv::ID CallConv = CLI.CallConv;
2564  bool IsVarArg = CLI.IsVarArg;
2565 
2566  MachineFunction &MF = DAG.getMachineFunction();
2567  bool IsStructRet = (Outs.empty()) ? false : Outs[0].Flags.isSRet();
2568  bool IsThisReturn = false;
2569 
2571  bool TailCallOpt = MF.getTarget().Options.GuaranteedTailCallOpt;
2572  bool IsSibCall = false;
2573 
2574  if (IsTailCall) {
2575  // Check if it's really possible to do a tail call.
2576  IsTailCall = isEligibleForTailCallOptimization(
2577  Callee, CallConv, IsVarArg, IsStructRet,
2578  MF.getFunction()->hasStructRetAttr(), Outs, OutVals, Ins, DAG);
2579  if (!IsTailCall && CLI.CS && CLI.CS->isMustTailCall())
2580  report_fatal_error("failed to perform tail call elimination on a call "
2581  "site marked musttail");
2582 
2583  // A sibling call is one where we're under the usual C ABI and not planning
2584  // to change that but can still do a tail call:
2585  if (!TailCallOpt && IsTailCall)
2586  IsSibCall = true;
2587 
2588  if (IsTailCall)
2589  ++NumTailCalls;
2590  }
2591 
2592  // Analyze operands of the call, assigning locations to each operand.
2594  CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), ArgLocs,
2595  *DAG.getContext());
2596 
2597  if (IsVarArg) {
2598  // Handle fixed and variable vector arguments differently.
2599  // Variable vector arguments always go into memory.
2600  unsigned NumArgs = Outs.size();
2601 
2602  for (unsigned i = 0; i != NumArgs; ++i) {
2603  MVT ArgVT = Outs[i].VT;
2604  ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
2605  CCAssignFn *AssignFn = CCAssignFnForCall(CallConv,
2606  /*IsVarArg=*/ !Outs[i].IsFixed);
2607  bool Res = AssignFn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, CCInfo);
2608  assert(!Res && "Call operand has unhandled type");
2609  (void)Res;
2610  }
2611  } else {
2612  // At this point, Outs[].VT may already be promoted to i32. To correctly
2613  // handle passing i8 as i8 instead of i32 on stack, we pass in both i32 and
2614  // i8 to CC_AArch64_AAPCS with i32 being ValVT and i8 being LocVT.
2615  // Since AnalyzeCallOperands uses Ins[].VT for both ValVT and LocVT, here
2616  // we use a special version of AnalyzeCallOperands to pass in ValVT and
2617  // LocVT.
2618  unsigned NumArgs = Outs.size();
2619  for (unsigned i = 0; i != NumArgs; ++i) {
2620  MVT ValVT = Outs[i].VT;
2621  // Get type of the original argument.
2622  EVT ActualVT = getValueType(DAG.getDataLayout(),
2623  CLI.getArgs()[Outs[i].OrigArgIndex].Ty,
2624  /*AllowUnknown*/ true);
2625  MVT ActualMVT = ActualVT.isSimple() ? ActualVT.getSimpleVT() : ValVT;
2626  ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
2627  // If ActualMVT is i1/i8/i16, we should set LocVT to i8/i8/i16.
2628  if (ActualMVT == MVT::i1 || ActualMVT == MVT::i8)
2629  ValVT = MVT::i8;
2630  else if (ActualMVT == MVT::i16)
2631  ValVT = MVT::i16;
2632 
2633  CCAssignFn *AssignFn = CCAssignFnForCall(CallConv, /*IsVarArg=*/false);
2634  bool Res = AssignFn(i, ValVT, ValVT, CCValAssign::Full, ArgFlags, CCInfo);
2635  assert(!Res && "Call operand has unhandled type");
2636  (void)Res;
2637  }
2638  }
2639 
2640  // Get a count of how many bytes are to be pushed on the stack.
2641  unsigned NumBytes = CCInfo.getNextStackOffset();
2642 
2643  if (IsSibCall) {
2644  // Since we're not changing the ABI to make this a tail call, the memory
2645  // operands are already available in the caller's incoming argument space.
2646  NumBytes = 0;
2647  }
2648 
2649  // FPDiff is the byte offset of the call's argument area from the callee's.
2650  // Stores to callee stack arguments will be placed in FixedStackSlots offset
2651  // by this amount for a tail call. In a sibling call it must be 0 because the
2652  // caller will deallocate the entire stack and the callee still expects its
2653  // arguments to begin at SP+0. Completely unused for non-tail calls.
2654  int FPDiff = 0;
2655 
2656  if (IsTailCall && !IsSibCall) {
2657  unsigned NumReusableBytes = FuncInfo->getBytesInStackArgArea();
2658 
2659  // Since callee will pop argument stack as a tail call, we must keep the
2660  // popped size 16-byte aligned.
2661  NumBytes = RoundUpToAlignment(NumBytes, 16);
2662 
2663  // FPDiff will be negative if this tail call requires more space than we
2664  // would automatically have in our incoming argument space. Positive if we
2665  // can actually shrink the stack.
2666  FPDiff = NumReusableBytes - NumBytes;
2667 
2668  // The stack pointer must be 16-byte aligned at all times it's used for a
2669  // memory operation, which in practice means at *all* times and in
2670  // particular across call boundaries. Therefore our own arguments started at
2671  // a 16-byte aligned SP and the delta applied for the tail call should
2672  // satisfy the same constraint.
2673  assert(FPDiff % 16 == 0 && "unaligned stack on tail call");
2674  }
2675 
2676  // Adjust the stack pointer for the new arguments...
2677  // These operations are automatically eliminated by the prolog/epilog pass
2678  if (!IsSibCall)
2679  Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, DL,
2680  true),
2681  DL);
2682 
2683  SDValue StackPtr = DAG.getCopyFromReg(Chain, DL, AArch64::SP,
2684  getPointerTy(DAG.getDataLayout()));
2685 
2687  SmallVector<SDValue, 8> MemOpChains;
2688  auto PtrVT = getPointerTy(DAG.getDataLayout());
2689 
2690  // Walk the register/memloc assignments, inserting copies/loads.
2691  for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size(); i != e;
2692  ++i, ++realArgIdx) {
2693  CCValAssign &VA = ArgLocs[i];
2694  SDValue Arg = OutVals[realArgIdx];
2695  ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags;
2696 
2697  // Promote the value if needed.
2698  switch (VA.getLocInfo()) {
2699  default:
2700  llvm_unreachable("Unknown loc info!");
2701  case CCValAssign::Full:
2702  break;
2703  case CCValAssign::SExt:
2704  Arg = DAG.getNode(ISD::SIGN_EXTEND, DL, VA.getLocVT(), Arg);
2705  break;
2706  case CCValAssign::ZExt:
2707  Arg = DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), Arg);
2708  break;
2709  case CCValAssign::AExt:
2710  if (Outs[realArgIdx].ArgVT == MVT::i1) {
2711  // AAPCS requires i1 to be zero-extended to 8-bits by the caller.
2712  Arg = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, Arg);
2713  Arg = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i8, Arg);
2714  }
2715  Arg = DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), Arg);
2716  break;
2717  case CCValAssign::BCvt:
2718  Arg = DAG.getNode(ISD::BITCAST, DL, VA.getLocVT(), Arg);
2719  break;
2720  case CCValAssign::FPExt:
2721  Arg = DAG.getNode(ISD::FP_EXTEND, DL, VA.getLocVT(), Arg);
2722  break;
2723  }
2724 
2725  if (VA.isRegLoc()) {
2726  if (realArgIdx == 0 && Flags.isReturned() && Outs[0].VT == MVT::i64) {
2727  assert(VA.getLocVT() == MVT::i64 &&
2728  "unexpected calling convention register assignment");
2729  assert(!Ins.empty() && Ins[0].VT == MVT::i64 &&
2730  "unexpected use of 'returned'");
2731  IsThisReturn = true;
2732  }
2733  RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
2734  } else {
2735  assert(VA.isMemLoc());
2736 
2737  SDValue DstAddr;
2738  MachinePointerInfo DstInfo;
2739 
2740  // FIXME: This works on big-endian for composite byvals, which are the
2741  // common case. It should also work for fundamental types too.
2742  uint32_t BEAlign = 0;
2743  unsigned OpSize = Flags.isByVal() ? Flags.getByValSize() * 8
2744  : VA.getValVT().getSizeInBits();
2745  OpSize = (OpSize + 7) / 8;
2746  if (!Subtarget->isLittleEndian() && !Flags.isByVal() &&
2747  !Flags.isInConsecutiveRegs()) {
2748  if (OpSize < 8)
2749  BEAlign = 8 - OpSize;
2750  }
2751  unsigned LocMemOffset = VA.getLocMemOffset();
2752  int32_t Offset = LocMemOffset + BEAlign;
2753  SDValue PtrOff = DAG.getIntPtrConstant(Offset, DL);
2754  PtrOff = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, PtrOff);
2755 
2756  if (IsTailCall) {
2757  Offset = Offset + FPDiff;
2758  int FI = MF.getFrameInfo()->CreateFixedObject(OpSize, Offset, true);
2759 
2760  DstAddr = DAG.getFrameIndex(FI, PtrVT);
2761  DstInfo = MachinePointerInfo::getFixedStack(FI);
2762 
2763  // Make sure any stack arguments overlapping with where we're storing
2764  // are loaded before this eventual operation. Otherwise they'll be
2765  // clobbered.
2766  Chain = addTokenForArgument(Chain, DAG, MF.getFrameInfo(), FI);
2767  } else {
2768  SDValue PtrOff = DAG.getIntPtrConstant(Offset, DL);
2769 
2770  DstAddr = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, PtrOff);
2771  DstInfo = MachinePointerInfo::getStack(LocMemOffset);
2772  }
2773 
2774  if (Outs[i].Flags.isByVal()) {
2775  SDValue SizeNode =
2776  DAG.getConstant(Outs[i].Flags.getByValSize(), DL, MVT::i64);
2777  SDValue Cpy = DAG.getMemcpy(
2778  Chain, DL, DstAddr, Arg, SizeNode, Outs[i].Flags.getByValAlign(),
2779  /*isVol = */ false, /*AlwaysInline = */ false,
2780  /*isTailCall = */ false,
2781  DstInfo, MachinePointerInfo());
2782 
2783  MemOpChains.push_back(Cpy);
2784  } else {
2785  // Since we pass i1/i8/i16 as i1/i8/i16 on stack and Arg is already
2786  // promoted to a legal register type i32, we should truncate Arg back to
2787  // i1/i8/i16.
2788  if (VA.getValVT() == MVT::i1 || VA.getValVT() == MVT::i8 ||
2789  VA.getValVT() == MVT::i16)
2790  Arg = DAG.getNode(ISD::TRUNCATE, DL, VA.getValVT(), Arg);
2791 
2792  SDValue Store =
2793  DAG.getStore(Chain, DL, Arg, DstAddr, DstInfo, false, false, 0);
2794  MemOpChains.push_back(Store);
2795  }
2796  }
2797  }
2798 
2799  if (!MemOpChains.empty())
2800  Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
2801 
2802  // Build a sequence of copy-to-reg nodes chained together with token chain
2803  // and flag operands which copy the outgoing args into the appropriate regs.
2804  SDValue InFlag;
2805  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
2806  Chain = DAG.getCopyToReg(Chain, DL, RegsToPass[i].first,
2807  RegsToPass[i].second, InFlag);
2808  InFlag = Chain.getValue(1);
2809  }
2810 
2811  // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
2812  // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
2813  // node so that legalize doesn't hack it.
2814  if (getTargetMachine().getCodeModel() == CodeModel::Large &&
2815  Subtarget->isTargetMachO()) {
2816  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
2817  const GlobalValue *GV = G->getGlobal();
2818  bool InternalLinkage = GV->hasInternalLinkage();
2819  if (InternalLinkage)
2820  Callee = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, 0);
2821  else {
2822  Callee =
2823  DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, AArch64II::MO_GOT);
2824  Callee = DAG.getNode(AArch64ISD::LOADgot, DL, PtrVT, Callee);
2825  }
2826  } else if (ExternalSymbolSDNode *S =
2827  dyn_cast<ExternalSymbolSDNode>(Callee)) {
2828  const char *Sym = S->getSymbol();
2829  Callee = DAG.getTargetExternalSymbol(Sym, PtrVT, AArch64II::MO_GOT);
2830  Callee = DAG.getNode(AArch64ISD::LOADgot, DL, PtrVT, Callee);
2831  }
2832  } else if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
2833  const GlobalValue *GV = G->getGlobal();
2834  Callee = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, 0);
2835  } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
2836  const char *Sym = S->getSymbol();
2837  Callee = DAG.getTargetExternalSymbol(Sym, PtrVT, 0);
2838  }
2839 
2840  // We don't usually want to end the call-sequence here because we would tidy
2841  // the frame up *after* the call, however in the ABI-changing tail-call case
2842  // we've carefully laid out the parameters so that when sp is reset they'll be
2843  // in the correct location.
2844  if (IsTailCall && !IsSibCall) {
2845  Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, DL, true),
2846  DAG.getIntPtrConstant(0, DL, true), InFlag, DL);
2847  InFlag = Chain.getValue(1);
2848  }
2849 
2850  std::vector<SDValue> Ops;
2851  Ops.push_back(Chain);
2852  Ops.push_back(Callee);
2853 
2854  if (IsTailCall) {
2855  // Each tail call may have to adjust the stack by a different amount, so
2856  // this information must travel along with the operation for eventual
2857  // consumption by emitEpilogue.
2858  Ops.push_back(DAG.getTargetConstant(FPDiff, DL, MVT::i32));
2859  }
2860 
2861  // Add argument registers to the end of the list so that they are known live
2862  // into the call.
2863  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
2864  Ops.push_back(DAG.getRegister(RegsToPass[i].first,
2865  RegsToPass[i].second.getValueType()));
2866 
2867  // Add a register mask operand representing the call-preserved registers.
2868  const uint32_t *Mask;
2869  const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo();
2870  if (IsThisReturn) {
2871  // For 'this' returns, use the X0-preserving mask if applicable
2872  Mask = TRI->getThisReturnPreservedMask(MF, CallConv);
2873  if (!Mask) {
2874  IsThisReturn = false;
2875  Mask = TRI->getCallPreservedMask(MF, CallConv);
2876  }
2877  } else
2878  Mask = TRI->getCallPreservedMask(MF, CallConv);
2879 
2880  assert(Mask && "Missing call preserved mask for calling convention");
2881  Ops.push_back(DAG.getRegisterMask(Mask));
2882 
2883  if (InFlag.getNode())
2884  Ops.push_back(InFlag);
2885 
2886  SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
2887 
2888  // If we're doing a tall call, use a TC_RETURN here rather than an
2889  // actual call instruction.
2890  if (IsTailCall) {
2891  MF.getFrameInfo()->setHasTailCall();
2892  return DAG.getNode(AArch64ISD::TC_RETURN, DL, NodeTys, Ops);
2893  }
2894 
2895  // Returns a chain and a flag for retval copy to use.
2896  Chain = DAG.getNode(AArch64ISD::CALL, DL, NodeTys, Ops);
2897  InFlag = Chain.getValue(1);
2898 
2899  uint64_t CalleePopBytes = DoesCalleeRestoreStack(CallConv, TailCallOpt)
2900  ? RoundUpToAlignment(NumBytes, 16)
2901  : 0;
2902 
2903  Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, DL, true),
2904  DAG.getIntPtrConstant(CalleePopBytes, DL, true),
2905  InFlag, DL);
2906  if (!Ins.empty())
2907  InFlag = Chain.getValue(1);
2908 
2909  // Handle result values, copying them out of physregs into vregs that we
2910  // return.
2911  return LowerCallResult(Chain, InFlag, CallConv, IsVarArg, Ins, DL, DAG,
2912  InVals, IsThisReturn,
2913  IsThisReturn ? OutVals[0] : SDValue());
2914 }
2915 
2916 bool AArch64TargetLowering::CanLowerReturn(
2917  CallingConv::ID CallConv, MachineFunction &MF, bool isVarArg,
2918  const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const {
2919  CCAssignFn *RetCC = CallConv == CallingConv::WebKit_JS
2920  ? RetCC_AArch64_WebKit_JS
2921  : RetCC_AArch64_AAPCS;
2923  CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
2924  return CCInfo.CheckReturn(Outs, RetCC);
2925 }
2926 
2927 SDValue
2928 AArch64TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
2929  bool isVarArg,
2930  const SmallVectorImpl<ISD::OutputArg> &Outs,
2931  const SmallVectorImpl<SDValue> &OutVals,
2932  SDLoc DL, SelectionDAG &DAG) const {
2933  CCAssignFn *RetCC = CallConv == CallingConv::WebKit_JS
2934  ? RetCC_AArch64_WebKit_JS
2935  : RetCC_AArch64_AAPCS;
2937  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
2938  *DAG.getContext());
2939  CCInfo.AnalyzeReturn(Outs, RetCC);
2940 
2941  // Copy the result values into the output registers.
2942  SDValue Flag;
2943  SmallVector<SDValue, 4> RetOps(1, Chain);
2944  for (unsigned i = 0, realRVLocIdx = 0; i != RVLocs.size();
2945  ++i, ++realRVLocIdx) {
2946  CCValAssign &VA = RVLocs[i];
2947  assert(VA.isRegLoc() && "Can only return in registers!");
2948  SDValue Arg = OutVals[realRVLocIdx];
2949 
2950  switch (VA.getLocInfo()) {
2951  default:
2952  llvm_unreachable("Unknown loc info!");
2953  case CCValAssign::Full:
2954  if (Outs[i].ArgVT == MVT::i1) {
2955  // AAPCS requires i1 to be zero-extended to i8 by the producer of the
2956  // value. This is strictly redundant on Darwin (which uses "zeroext
2957  // i1"), but will be optimised out before ISel.
2958  Arg = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, Arg);
2959  Arg = DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), Arg);
2960  }
2961  break;
2962  case CCValAssign::BCvt:
2963  Arg = DAG.getNode(ISD::BITCAST, DL, VA.getLocVT(), Arg);
2964  break;
2965  }
2966 
2967  Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Arg, Flag);
2968  Flag = Chain.getValue(1);
2969  RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
2970  }
2971 
2972  RetOps[0] = Chain; // Update chain.
2973 
2974  // Add the flag if we have it.
2975  if (Flag.getNode())
2976  RetOps.push_back(Flag);
2977 
2978  return DAG.getNode(AArch64ISD::RET_FLAG, DL, MVT::Other, RetOps);
2979 }
2980 
2981 //===----------------------------------------------------------------------===//
2982 // Other Lowering Code
2983 //===----------------------------------------------------------------------===//
2984 
2985 SDValue AArch64TargetLowering::LowerGlobalAddress(SDValue Op,
2986  SelectionDAG &DAG) const {
2987  EVT PtrVT = getPointerTy(DAG.getDataLayout());
2988  SDLoc DL(Op);
2989  const GlobalAddressSDNode *GN = cast<GlobalAddressSDNode>(Op);
2990  const GlobalValue *GV = GN->getGlobal();
2991  unsigned char OpFlags =
2992  Subtarget->ClassifyGlobalReference(GV, getTargetMachine());
2993 
2994  assert(cast<GlobalAddressSDNode>(Op)->getOffset() == 0 &&
2995  "unexpected offset in global node");
2996 
2997  // This also catched the large code model case for Darwin.
2998  if ((OpFlags & AArch64II::MO_GOT) != 0) {
2999  SDValue GotAddr = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, OpFlags);
3000  // FIXME: Once remat is capable of dealing with instructions with register
3001  // operands, expand this into two nodes instead of using a wrapper node.
3002  return DAG.getNode(AArch64ISD::LOADgot, DL, PtrVT, GotAddr);
3003  }
3004 
3005  if ((OpFlags & AArch64II::MO_CONSTPOOL) != 0) {
3006  assert(getTargetMachine().getCodeModel() == CodeModel::Small &&
3007  "use of MO_CONSTPOOL only supported on small model");
3008  SDValue Hi = DAG.getTargetConstantPool(GV, PtrVT, 0, 0, AArch64II::MO_PAGE);
3009  SDValue ADRP = DAG.getNode(AArch64ISD::ADRP, DL, PtrVT, Hi);
3010  unsigned char LoFlags = AArch64II::MO_PAGEOFF | AArch64II::MO_NC;
3011  SDValue Lo = DAG.getTargetConstantPool(GV, PtrVT, 0, 0, LoFlags);
3012  SDValue PoolAddr = DAG.getNode(AArch64ISD::ADDlow, DL, PtrVT, ADRP, Lo);
3013  SDValue GlobalAddr = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), PoolAddr,
3015  /*isVolatile=*/ false,
3016  /*isNonTemporal=*/ true,
3017  /*isInvariant=*/ true, 8);
3018  if (GN->getOffset() != 0)
3019  return DAG.getNode(ISD::ADD, DL, PtrVT, GlobalAddr,
3020  DAG.getConstant(GN->getOffset(), DL, PtrVT));
3021  return GlobalAddr;
3022  }
3023 
3025  const unsigned char MO_NC = AArch64II::MO_NC;
3026  return DAG.getNode(
3027  AArch64ISD::WrapperLarge, DL, PtrVT,
3028  DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, AArch64II::MO_G3),
3029  DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, AArch64II::MO_G2 | MO_NC),
3030  DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, AArch64II::MO_G1 | MO_NC),
3031  DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, AArch64II::MO_G0 | MO_NC));
3032  } else {
3033  // Use ADRP/ADD or ADRP/LDR for everything else: the small model on ELF and
3034  // the only correct model on Darwin.
3035  SDValue Hi = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0,
3036  OpFlags | AArch64II::MO_PAGE);
3037  unsigned char LoFlags = OpFlags | AArch64II::MO_PAGEOFF | AArch64II::MO_NC;
3038  SDValue Lo = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, LoFlags);
3039 
3040  SDValue ADRP = DAG.getNode(AArch64ISD::ADRP, DL, PtrVT, Hi);
3041  return DAG.getNode(AArch64ISD::ADDlow, DL, PtrVT, ADRP, Lo);
3042  }
3043 }
3044 
3045 /// \brief Convert a TLS address reference into the correct sequence of loads
3046 /// and calls to compute the variable's address (for Darwin, currently) and
3047 /// return an SDValue containing the final node.
3048 
3049 /// Darwin only has one TLS scheme which must be capable of dealing with the
3050 /// fully general situation, in the worst case. This means:
3051 /// + "extern __thread" declaration.
3052 /// + Defined in a possibly unknown dynamic library.
3053 ///
3054 /// The general system is that each __thread variable has a [3 x i64] descriptor
3055 /// which contains information used by the runtime to calculate the address. The
3056 /// only part of this the compiler needs to know about is the first xword, which
3057 /// contains a function pointer that must be called with the address of the
3058 /// entire descriptor in "x0".
3059 ///
3060 /// Since this descriptor may be in a different unit, in general even the
3061 /// descriptor must be accessed via an indirect load. The "ideal" code sequence
3062 /// is:
3063 /// adrp x0, _var@TLVPPAGE
3064 /// ldr x0, [x0, _var@TLVPPAGEOFF] ; x0 now contains address of descriptor
3065 /// ldr x1, [x0] ; x1 contains 1st entry of descriptor,
3066 /// ; the function pointer
3067 /// blr x1 ; Uses descriptor address in x0
3068 /// ; Address of _var is now in x0.
3069 ///
3070 /// If the address of _var's descriptor *is* known to the linker, then it can
3071 /// change the first "ldr" instruction to an appropriate "add x0, x0, #imm" for
3072 /// a slight efficiency gain.
3073 SDValue
3074 AArch64TargetLowering::LowerDarwinGlobalTLSAddress(SDValue Op,
3075  SelectionDAG &DAG) const {
3076  assert(Subtarget->isTargetDarwin() && "TLS only supported on Darwin");
3077 
3078  SDLoc DL(Op);
3079  MVT PtrVT = getPointerTy(DAG.getDataLayout());
3080  const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
3081 
3082  SDValue TLVPAddr =
3083  DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, AArch64II::MO_TLS);
3084  SDValue DescAddr = DAG.getNode(AArch64ISD::LOADgot, DL, PtrVT, TLVPAddr);
3085 
3086  // The first entry in the descriptor is a function pointer that we must call
3087  // to obtain the address of the variable.
3088  SDValue Chain = DAG.getEntryNode();
3089  SDValue FuncTLVGet =
3090  DAG.getLoad(MVT::i64, DL, Chain, DescAddr, MachinePointerInfo::getGOT(),
3091  false, true, true, 8);
3092  Chain = FuncTLVGet.getValue(1);
3093 
3095  MFI->setAdjustsStack(true);
3096 
3097  // TLS calls preserve all registers except those that absolutely must be
3098  // trashed: X0 (it takes an argument), LR (it's a call) and NZCV (let's not be
3099  // silly).
3100  const uint32_t *Mask =
3101  Subtarget->getRegisterInfo()->getTLSCallPreservedMask();
3102 
3103  // Finally, we can make the call. This is just a degenerate version of a
3104  // normal AArch64 call node: x0 takes the address of the descriptor, and
3105  // returns the address of the variable in this thread.
3106  Chain = DAG.getCopyToReg(Chain, DL, AArch64::X0, DescAddr, SDValue());
3107  Chain =
3109  Chain, FuncTLVGet, DAG.getRegister(AArch64::X0, MVT::i64),
3110  DAG.getRegisterMask(Mask), Chain.getValue(1));
3111  return DAG.getCopyFromReg(Chain, DL, AArch64::X0, PtrVT, Chain.getValue(1));
3112 }
3113 
3114 /// When accessing thread-local variables under either the general-dynamic or
3115 /// local-dynamic system, we make a "TLS-descriptor" call. The variable will
3116 /// have a descriptor, accessible via a PC-relative ADRP, and whose first entry
3117 /// is a function pointer to carry out the resolution.
3118 ///
3119 /// The sequence is:
3120 /// adrp x0, :tlsdesc:var
3121 /// ldr x1, [x0, #:tlsdesc_lo12:var]
3122 /// add x0, x0, #:tlsdesc_lo12:var
3123 /// .tlsdesccall var
3124 /// blr x1
3125 /// (TPIDR_EL0 offset now in x0)
3126 ///
3127 /// The above sequence must be produced unscheduled, to enable the linker to
3128 /// optimize/relax this sequence.
3129 /// Therefore, a pseudo-instruction (TLSDESC_CALLSEQ) is used to represent the
3130 /// above sequence, and expanded really late in the compilation flow, to ensure
3131 /// the sequence is produced as per above.
3132 SDValue AArch64TargetLowering::LowerELFTLSDescCallSeq(SDValue SymAddr, SDLoc DL,
3133  SelectionDAG &DAG) const {
3134  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3135 
3136  SDValue Chain = DAG.getEntryNode();
3137  SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
3138 
3140  Ops.push_back(Chain);
3141  Ops.push_back(SymAddr);
3142 
3143  Chain = DAG.getNode(AArch64ISD::TLSDESC_CALLSEQ, DL, NodeTys, Ops);
3144  SDValue Glue = Chain.getValue(1);
3145 
3146  return DAG.getCopyFromReg(Chain, DL, AArch64::X0, PtrVT, Glue);
3147 }
3148 
3149 SDValue
3150 AArch64TargetLowering::LowerELFGlobalTLSAddress(SDValue Op,
3151  SelectionDAG &DAG) const {
3152  assert(Subtarget->isTargetELF() && "This function expects an ELF target");
3153  assert(getTargetMachine().getCodeModel() == CodeModel::Small &&
3154  "ELF TLS only supported in small memory model");
3155  // Different choices can be made for the maximum size of the TLS area for a
3156  // module. For the small address model, the default TLS size is 16MiB and the
3157  // maximum TLS size is 4GiB.
3158  // FIXME: add -mtls-size command line option and make it control the 16MiB
3159  // vs. 4GiB code sequence generation.
3160  const GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
3161 
3164  if (Model == TLSModel::LocalDynamic)
3165  Model = TLSModel::GeneralDynamic;
3166  }
3167 
3168  SDValue TPOff;
3169  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3170  SDLoc DL(Op);
3171  const GlobalValue *GV = GA->getGlobal();
3172 
3173  SDValue ThreadBase = DAG.getNode(AArch64ISD::THREAD_POINTER, DL, PtrVT);
3174 
3175  if (Model == TLSModel::LocalExec) {
3176  SDValue HiVar = DAG.getTargetGlobalAddress(
3177  GV, DL, PtrVT, 0, AArch64II::MO_TLS | AArch64II::MO_HI12);
3178  SDValue LoVar = DAG.getTargetGlobalAddress(
3179  GV, DL, PtrVT, 0,
3181 
3182  SDValue TPWithOff_lo =
3183  SDValue(DAG.getMachineNode(AArch64::ADDXri, DL, PtrVT, ThreadBase,
3184  HiVar,
3185  DAG.getTargetConstant(0, DL, MVT::i32)),
3186  0);
3187  SDValue TPWithOff =
3188  SDValue(DAG.getMachineNode(AArch64::ADDXri, DL, PtrVT, TPWithOff_lo,
3189  LoVar,
3190  DAG.getTargetConstant(0, DL, MVT::i32)),
3191  0);
3192  return TPWithOff;
3193  } else if (Model == TLSModel::InitialExec) {
3194  TPOff = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, AArch64II::MO_TLS);
3195  TPOff = DAG.getNode(AArch64ISD::LOADgot, DL, PtrVT, TPOff);
3196  } else if (Model == TLSModel::LocalDynamic) {
3197  // Local-dynamic accesses proceed in two phases. A general-dynamic TLS
3198  // descriptor call against the special symbol _TLS_MODULE_BASE_ to calculate
3199  // the beginning of the module's TLS region, followed by a DTPREL offset
3200  // calculation.
3201 
3202  // These accesses will need deduplicating if there's more than one.
3203  AArch64FunctionInfo *MFI =
3206 
3207  // The call needs a relocation too for linker relaxation. It doesn't make
3208  // sense to call it MO_PAGE or MO_PAGEOFF though so we need another copy of
3209  // the address.
3210  SDValue SymAddr = DAG.getTargetExternalSymbol("_TLS_MODULE_BASE_", PtrVT,
3212 
3213  // Now we can calculate the offset from TPIDR_EL0 to this module's
3214  // thread-local area.
3215  TPOff = LowerELFTLSDescCallSeq(SymAddr, DL, DAG);
3216 
3217  // Now use :dtprel_whatever: operations to calculate this variable's offset
3218  // in its thread-storage area.
3219  SDValue HiVar = DAG.getTargetGlobalAddress(
3221  SDValue LoVar = DAG.getTargetGlobalAddress(
3222  GV, DL, MVT::i64, 0,
3224 
3225  TPOff = SDValue(DAG.getMachineNode(AArch64::ADDXri, DL, PtrVT, TPOff, HiVar,
3226  DAG.getTargetConstant(0, DL, MVT::i32)),
3227  0);
3228  TPOff = SDValue(DAG.getMachineNode(AArch64::ADDXri, DL, PtrVT, TPOff, LoVar,
3229  DAG.getTargetConstant(0, DL, MVT::i32)),
3230  0);
3231  } else if (Model == TLSModel::GeneralDynamic) {
3232  // The call needs a relocation too for linker relaxation. It doesn't make
3233  // sense to call it MO_PAGE or MO_PAGEOFF though so we need another copy of
3234  // the address.
3235  SDValue SymAddr =
3236  DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, AArch64II::MO_TLS);
3237 
3238  // Finally we can make a call to calculate the offset from tpidr_el0.
3239  TPOff = LowerELFTLSDescCallSeq(SymAddr, DL, DAG);
3240  } else
3241  llvm_unreachable("Unsupported ELF TLS access model");
3242 
3243  return DAG.getNode(ISD::ADD, DL, PtrVT, ThreadBase, TPOff);
3244 }
3245 
3246 SDValue AArch64TargetLowering::LowerGlobalTLSAddress(SDValue Op,
3247  SelectionDAG &DAG) const {
3248  if (Subtarget->isTargetDarwin())
3249  return LowerDarwinGlobalTLSAddress(Op, DAG);
3250  else if (Subtarget->isTargetELF())
3251  return LowerELFGlobalTLSAddress(Op, DAG);
3252 
3253  llvm_unreachable("Unexpected platform trying to use TLS");
3254 }
3255 SDValue AArch64TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
3256  SDValue Chain = Op.getOperand(0);
3257  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
3258  SDValue LHS = Op.getOperand(2);
3259  SDValue RHS = Op.getOperand(3);
3260  SDValue Dest = Op.getOperand(4);
3261  SDLoc dl(Op);
3262 
3263  // Handle f128 first, since lowering it will result in comparing the return
3264  // value of a libcall against zero, which is just what the rest of LowerBR_CC
3265  // is expecting to deal with.
3266  if (LHS.getValueType() == MVT::f128) {
3267  softenSetCCOperands(DAG, MVT::f128, LHS, RHS, CC, dl);
3268 
3269  // If softenSetCCOperands returned a scalar, we need to compare the result
3270  // against zero to select between true and false values.
3271  if (!RHS.getNode()) {
3272  RHS = DAG.getConstant(0, dl, LHS.getValueType());
3273  CC = ISD::SETNE;
3274  }
3275  }
3276 
3277  // Optimize {s|u}{add|sub|mul}.with.overflow feeding into a branch
3278  // instruction.
3279  unsigned Opc = LHS.getOpcode();
3280  if (LHS.getResNo() == 1 && isa<ConstantSDNode>(RHS) &&
3281  cast<ConstantSDNode>(RHS)->isOne() &&
3282  (Opc == ISD::SADDO || Opc == ISD::UADDO || Opc == ISD::SSUBO ||
3283  Opc == ISD::USUBO || Opc == ISD::SMULO || Opc == ISD::UMULO)) {
3284  assert((CC == ISD::SETEQ || CC == ISD::SETNE) &&
3285  "Unexpected condition code.");
3286  // Only lower legal XALUO ops.
3287  if (!DAG.getTargetLoweringInfo().isTypeLegal(LHS->getValueType(0)))
3288  return SDValue();
3289 
3290  // The actual operation with overflow check.
3291  AArch64CC::CondCode OFCC;
3292  SDValue Value, Overflow;
3293  std::tie(Value, Overflow) = getAArch64XALUOOp(OFCC, LHS.getValue(0), DAG);
3294 
3295  if (CC == ISD::SETNE)
3296  OFCC = getInvertedCondCode(OFCC);
3297  SDValue CCVal = DAG.getConstant(OFCC, dl, MVT::i32);
3298 
3299  return DAG.getNode(AArch64ISD::BRCOND, dl, MVT::Other, Chain, Dest, CCVal,
3300  Overflow);
3301  }
3302 
3303  if (LHS.getValueType().isInteger()) {
3304  assert((LHS.getValueType() == RHS.getValueType()) &&
3305  (LHS.getValueType() == MVT::i32 || LHS.getValueType() == MVT::i64));
3306 
3307  // If the RHS of the comparison is zero, we can potentially fold this
3308  // to a specialized branch.
3309  const ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS);
3310  if (RHSC && RHSC->getZExtValue() == 0) {
3311  if (CC == ISD::SETEQ) {
3312  // See if we can use a TBZ to fold in an AND as well.
3313  // TBZ has a smaller branch displacement than CBZ. If the offset is
3314  // out of bounds, a late MI-layer pass rewrites branches.
3315  // 403.gcc is an example that hits this case.
3316  if (LHS.getOpcode() == ISD::AND &&
3317  isa<ConstantSDNode>(LHS.getOperand(1)) &&
3319  SDValue Test = LHS.getOperand(0);
3320  uint64_t Mask = LHS.getConstantOperandVal(1);
3321  return DAG.getNode(AArch64ISD::TBZ, dl, MVT::Other, Chain, Test,
3322  DAG.getConstant(Log2_64(Mask), dl, MVT::i64),
3323  Dest);
3324  }
3325 
3326  return DAG.getNode(AArch64ISD::CBZ, dl, MVT::Other, Chain, LHS, Dest);
3327  } else if (CC == ISD::SETNE) {
3328  // See if we can use a TBZ to fold in an AND as well.
3329  // TBZ has a smaller branch displacement than CBZ. If the offset is
3330  // out of bounds, a late MI-layer pass rewrites branches.
3331  // 403.gcc is an example that hits this case.
3332  if (LHS.getOpcode() == ISD::AND &&
3333  isa<ConstantSDNode>(LHS.getOperand(1)) &&
3335  SDValue Test = LHS.getOperand(0);
3336  uint64_t Mask = LHS.getConstantOperandVal(1);
3337  return DAG.getNode(AArch64ISD::TBNZ, dl, MVT::Other, Chain, Test,
3338  DAG.getConstant(Log2_64(Mask), dl, MVT::i64),
3339  Dest);
3340  }
3341 
3342  return DAG.getNode(AArch64ISD::CBNZ, dl, MVT::Other, Chain, LHS, Dest);
3343  } else if (CC == ISD::SETLT && LHS.getOpcode() != ISD::AND) {
3344  // Don't combine AND since emitComparison converts the AND to an ANDS
3345  // (a.k.a. TST) and the test in the test bit and branch instruction
3346  // becomes redundant. This would also increase register pressure.
3347  uint64_t Mask = LHS.getValueType().getSizeInBits() - 1;
3348  return DAG.getNode(AArch64ISD::TBNZ, dl, MVT::Other, Chain, LHS,
3349  DAG.getConstant(Mask, dl, MVT::i64), Dest);
3350  }
3351  }
3352  if (RHSC && RHSC->getSExtValue() == -1 && CC == ISD::SETGT &&
3353  LHS.getOpcode() != ISD::AND) {
3354  // Don't combine AND since emitComparison converts the AND to an ANDS
3355  // (a.k.a. TST) and the test in the test bit and branch instruction
3356  // becomes redundant. This would also increase register pressure.
3357  uint64_t Mask = LHS.getValueType().getSizeInBits() - 1;
3358  return DAG.getNode(AArch64ISD::TBZ, dl, MVT::Other, Chain, LHS,
3359  DAG.getConstant(Mask, dl, MVT::i64), Dest);
3360  }
3361 
3362  SDValue CCVal;
3363  SDValue Cmp = getAArch64Cmp(LHS, RHS, CC, CCVal, DAG, dl);
3364  return DAG.getNode(AArch64ISD::BRCOND, dl, MVT::Other, Chain, Dest, CCVal,
3365  Cmp);
3366  }
3367 
3368  assert(LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64);
3369 
3370  // Unfortunately, the mapping of LLVM FP CC's onto AArch64 CC's isn't totally
3371  // clean. Some of them require two branches to implement.
3372  SDValue Cmp = emitComparison(LHS, RHS, CC, dl, DAG);
3373  AArch64CC::CondCode CC1, CC2;
3374  changeFPCCToAArch64CC(CC, CC1, CC2);
3375  SDValue CC1Val = DAG.getConstant(CC1, dl, MVT::i32);
3376  SDValue BR1 =
3377  DAG.getNode(AArch64ISD::BRCOND, dl, MVT::Other, Chain, Dest, CC1Val, Cmp);
3378  if (CC2 != AArch64CC::AL) {
3379  SDValue CC2Val = DAG.getConstant(CC2, dl, MVT::i32);
3380  return DAG.getNode(AArch64ISD::BRCOND, dl, MVT::Other, BR1, Dest, CC2Val,
3381  Cmp);
3382  }
3383 
3384  return BR1;
3385 }
3386 
3387 SDValue AArch64TargetLowering::LowerFCOPYSIGN(SDValue Op,
3388  SelectionDAG &DAG) const {
3389  EVT VT = Op.getValueType();
3390  SDLoc DL(Op);
3391 
3392  SDValue In1 = Op.getOperand(0);
3393  SDValue In2 = Op.getOperand(1);
3394  EVT SrcVT = In2.getValueType();
3395  if (SrcVT != VT) {
3396  if (SrcVT == MVT::f32 && VT == MVT::f64)
3397  In2 = DAG.getNode(ISD::FP_EXTEND, DL, VT, In2);
3398  else if (SrcVT == MVT::f64 && VT == MVT::f32)
3399  In2 = DAG.getNode(ISD::FP_ROUND, DL, VT, In2,
3400  DAG.getIntPtrConstant(0, DL));
3401  else
3402  // FIXME: Src type is different, bail out for now. Can VT really be a
3403  // vector type?
3404  return SDValue();
3405  }
3406 
3407  EVT VecVT;
3408  EVT EltVT;
3409  uint64_t EltMask;
3410  SDValue VecVal1, VecVal2;
3411  if (VT == MVT::f32 || VT == MVT::v2f32 || VT == MVT::v4f32) {
3412  EltVT = MVT::i32;
3413  VecVT = MVT::v4i32;
3414  EltMask = 0x80000000ULL;
3415 
3416  if (!VT.isVector()) {
3417  VecVal1 = DAG.getTargetInsertSubreg(AArch64::ssub, DL, VecVT,
3418  DAG.getUNDEF(VecVT), In1);
3419  VecVal2 = DAG.getTargetInsertSubreg(AArch64::ssub, DL, VecVT,
3420  DAG.getUNDEF(VecVT), In2);
3421  } else {
3422  VecVal1 = DAG.getNode(ISD::BITCAST, DL, VecVT, In1);
3423  VecVal2 = DAG.getNode(ISD::BITCAST, DL, VecVT, In2);
3424  }
3425  } else if (VT == MVT::f64 || VT == MVT::v2f64) {
3426  EltVT = MVT::i64;
3427  VecVT = MVT::v2i64;
3428 
3429  // We want to materialize a mask with the high bit set, but the AdvSIMD
3430  // immediate moves cannot materialize that in a single instruction for
3431  // 64-bit elements. Instead, materialize zero and then negate it.
3432  EltMask = 0;
3433 
3434  if (!VT.isVector()) {
3435  VecVal1 = DAG.getTargetInsertSubreg(AArch64::dsub, DL, VecVT,
3436  DAG.getUNDEF(VecVT), In1);
3437  VecVal2 = DAG.getTargetInsertSubreg(AArch64::dsub, DL, VecVT,
3438  DAG.getUNDEF(VecVT), In2);
3439  } else {
3440  VecVal1 = DAG.getNode(ISD::BITCAST, DL, VecVT, In1);
3441  VecVal2 = DAG.getNode(ISD::BITCAST, DL, VecVT, In2);
3442  }
3443  } else {
3444  llvm_unreachable("Invalid type for copysign!");
3445  }
3446 
3447  SDValue BuildVec = DAG.getConstant(EltMask, DL, VecVT);
3448 
3449  // If we couldn't materialize the mask above, then the mask vector will be
3450  // the zero vector, and we need to negate it here.
3451  if (VT == MVT::f64 || VT == MVT::v2f64) {
3452  BuildVec = DAG.getNode(ISD::BITCAST, DL, MVT::v2f64, BuildVec);
3453  BuildVec = DAG.getNode(ISD::FNEG, DL, MVT::v2f64, BuildVec);
3454  BuildVec = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, BuildVec);
3455  }
3456 
3457  SDValue Sel =
3458  DAG.getNode(AArch64ISD::BIT, DL, VecVT, VecVal1, VecVal2, BuildVec);
3459 
3460  if (VT == MVT::f32)
3461  return DAG.getTargetExtractSubreg(AArch64::ssub, DL, VT, Sel);
3462  else if (VT == MVT::f64)
3463  return DAG.getTargetExtractSubreg(AArch64::dsub, DL, VT, Sel);
3464  else
3465  return DAG.getNode(ISD::BITCAST, DL, VT, Sel);
3466 }
3467 
3468 SDValue AArch64TargetLowering::LowerCTPOP(SDValue Op, SelectionDAG &DAG) const {
3471  return SDValue();
3472 
3473  if (!Subtarget->hasNEON())
3474  return SDValue();
3475 
3476  // While there is no integer popcount instruction, it can
3477  // be more efficiently lowered to the following sequence that uses
3478  // AdvSIMD registers/instructions as long as the copies to/from
3479  // the AdvSIMD registers are cheap.
3480  // FMOV D0, X0 // copy 64-bit int to vector, high bits zero'd
3481  // CNT V0.8B, V0.8B // 8xbyte pop-counts
3482  // ADDV B0, V0.8B // sum 8xbyte pop-counts
3483  // UMOV X0, V0.B[0] // copy byte result back to integer reg
3484  SDValue Val = Op.getOperand(0);
3485  SDLoc DL(Op);
3486  EVT VT = Op.getValueType();
3487 
3488  if (VT == MVT::i32)
3489  Val = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, Val);
3490  Val = DAG.getNode(ISD::BITCAST, DL, MVT::v8i8, Val);
3491 
3492  SDValue CtPop = DAG.getNode(ISD::CTPOP, DL, MVT::v8i8, Val);
3493  SDValue UaddLV = DAG.getNode(
3495  DAG.getConstant(Intrinsic::aarch64_neon_uaddlv, DL, MVT::i32), CtPop);
3496 
3497  if (VT == MVT::i64)
3498  UaddLV = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, UaddLV);
3499  return UaddLV;
3500 }
3501 
3502 SDValue AArch64TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
3503 
3504  if (Op.getValueType().isVector())
3505  return LowerVSETCC(Op, DAG);
3506 
3507  SDValue LHS = Op.getOperand(0);
3508  SDValue RHS = Op.getOperand(1);
3509  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
3510  SDLoc dl(Op);
3511 
3512  // We chose ZeroOrOneBooleanContents, so use zero and one.
3513  EVT VT = Op.getValueType();
3514  SDValue TVal = DAG.getConstant(1, dl, VT);
3515  SDValue FVal = DAG.getConstant(0, dl, VT);
3516 
3517  // Handle f128 first, since one possible outcome is a normal integer
3518  // comparison which gets picked up by the next if statement.
3519  if (LHS.getValueType() == MVT::f128) {
3520  softenSetCCOperands(DAG, MVT::f128, LHS, RHS, CC, dl);
3521 
3522  // If softenSetCCOperands returned a scalar, use it.
3523  if (!RHS.getNode()) {
3524  assert(LHS.getValueType() == Op.getValueType() &&
3525  "Unexpected setcc expansion!");
3526  return LHS;
3527  }
3528  }
3529 
3530  if (LHS.getValueType().isInteger()) {
3531  SDValue CCVal;
3532  SDValue Cmp =
3533  getAArch64Cmp(LHS, RHS, ISD::getSetCCInverse(CC, true), CCVal, DAG, dl);
3534 
3535  // Note that we inverted the condition above, so we reverse the order of
3536  // the true and false operands here. This will allow the setcc to be
3537  // matched to a single CSINC instruction.
3538  return DAG.getNode(AArch64ISD::CSEL, dl, VT, FVal, TVal, CCVal, Cmp);
3539  }
3540 
3541  // Now we know we're dealing with FP values.
3542  assert(LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64);
3543 
3544  // If that fails, we'll need to perform an FCMP + CSEL sequence. Go ahead
3545  // and do the comparison.
3546  SDValue Cmp = emitComparison(LHS, RHS, CC, dl, DAG);
3547 
3548  AArch64CC::CondCode CC1, CC2;
3549  changeFPCCToAArch64CC(CC, CC1, CC2);
3550  if (CC2 == AArch64CC::AL) {
3551  changeFPCCToAArch64CC(ISD::getSetCCInverse(CC, false), CC1, CC2);
3552  SDValue CC1Val = DAG.getConstant(CC1, dl, MVT::i32);
3553 
3554  // Note that we inverted the condition above, so we reverse the order of
3555  // the true and false operands here. This will allow the setcc to be
3556  // matched to a single CSINC instruction.
3557  return DAG.getNode(AArch64ISD::CSEL, dl, VT, FVal, TVal, CC1Val, Cmp);
3558  } else {
3559  // Unfortunately, the mapping of LLVM FP CC's onto AArch64 CC's isn't
3560  // totally clean. Some of them require two CSELs to implement. As is in
3561  // this case, we emit the first CSEL and then emit a second using the output
3562  // of the first as the RHS. We're effectively OR'ing the two CC's together.
3563 
3564  // FIXME: It would be nice if we could match the two CSELs to two CSINCs.
3565  SDValue CC1Val = DAG.getConstant(CC1, dl, MVT::i32);
3566  SDValue CS1 =
3567  DAG.getNode(AArch64ISD::CSEL, dl, VT, TVal, FVal, CC1Val, Cmp);
3568 
3569  SDValue CC2Val = DAG.getConstant(CC2, dl, MVT::i32);
3570  return DAG.getNode(AArch64ISD::CSEL, dl, VT, TVal, CS1, CC2Val, Cmp);
3571  }
3572 }
3573 
3574 /// A SELECT_CC operation is really some kind of max or min if both values being
3575 /// compared are, in some sense, equal to the results in either case. However,
3576 /// it is permissible to compare f32 values and produce directly extended f64
3577 /// values.
3578 ///
3579 /// Extending the comparison operands would also be allowed, but is less likely
3580 /// to happen in practice since their use is right here. Note that truncate
3581 /// operations would *not* be semantically equivalent.
3583  if (Cmp == Result)
3584  return (Cmp.getValueType() == MVT::f32 ||
3585  Cmp.getValueType() == MVT::f64);
3586 
3588  ConstantFPSDNode *CResult = dyn_cast<ConstantFPSDNode>(Result);
3589  if (CCmp && CResult && Cmp.getValueType() == MVT::f32 &&
3590  Result.getValueType() == MVT::f64) {
3591  bool Lossy;
3592  APFloat CmpVal = CCmp->getValueAPF();
3594  return CResult->getValueAPF().bitwiseIsEqual(CmpVal);
3595  }
3596 
3597  return Result->getOpcode() == ISD::FP_EXTEND && Result->getOperand(0) == Cmp;
3598 }
3599 
3600 SDValue AArch64TargetLowering::LowerSELECT_CC(ISD::CondCode CC, SDValue LHS,
3601  SDValue RHS, SDValue TVal,
3602  SDValue FVal, SDLoc dl,
3603  SelectionDAG &DAG) const {
3604  // Handle f128 first, because it will result in a comparison of some RTLIB
3605  // call result against zero.
3606  if (LHS.getValueType() == MVT::f128) {
3607  softenSetCCOperands(DAG, MVT::f128, LHS, RHS, CC, dl);
3608 
3609  // If softenSetCCOperands returned a scalar, we need to compare the result
3610  // against zero to select between true and false values.
3611  if (!RHS.getNode()) {
3612  RHS = DAG.getConstant(0, dl, LHS.getValueType());
3613  CC = ISD::SETNE;
3614  }
3615  }
3616 
3617  // Handle integers first.
3618  if (LHS.getValueType().isInteger()) {
3619  assert((LHS.getValueType() == RHS.getValueType()) &&
3620  (LHS.getValueType() == MVT::i32 || LHS.getValueType() == MVT::i64));
3621 
3622  unsigned Opcode = AArch64ISD::CSEL;
3623 
3624  // If both the TVal and the FVal are constants, see if we can swap them in
3625  // order to for a CSINV or CSINC out of them.
3626  ConstantSDNode *CFVal = dyn_cast<ConstantSDNode>(FVal);
3627  ConstantSDNode *CTVal = dyn_cast<ConstantSDNode>(TVal);
3628 
3629  if (CTVal && CFVal && CTVal->isAllOnesValue() && CFVal->isNullValue()) {
3630  std::swap(TVal, FVal);
3631  std::swap(CTVal, CFVal);
3632  CC = ISD::getSetCCInverse(CC, true);
3633  } else if (CTVal && CFVal && CTVal->isOne() && CFVal->isNullValue()) {
3634  std::swap(TVal, FVal);
3635  std::swap(CTVal, CFVal);
3636  CC = ISD::getSetCCInverse(CC, true);
3637  } else if (TVal.getOpcode() == ISD::XOR) {
3638  // If TVal is a NOT we want to swap TVal and FVal so that we can match
3639  // with a CSINV rather than a CSEL.
3641 
3642  if (CVal && CVal->isAllOnesValue()) {
3643  std::swap(TVal, FVal);
3644  std::swap(CTVal, CFVal);
3645  CC = ISD::getSetCCInverse(CC, true);
3646  }
3647  } else if (TVal.getOpcode() == ISD::SUB) {
3648  // If TVal is a negation (SUB from 0) we want to swap TVal and FVal so
3649  // that we can match with a CSNEG rather than a CSEL.
3651 
3652  if (CVal && CVal->isNullValue()) {
3653  std::swap(TVal, FVal);
3654  std::swap(CTVal, CFVal);
3655  CC = ISD::getSetCCInverse(CC, true);
3656  }
3657  } else if (CTVal && CFVal) {
3658  const int64_t TrueVal = CTVal->getSExtValue();
3659  const int64_t FalseVal = CFVal->getSExtValue();
3660  bool Swap = false;
3661 
3662  // If both TVal and FVal are constants, see if FVal is the
3663  // inverse/negation/increment of TVal and generate a CSINV/CSNEG/CSINC
3664  // instead of a CSEL in that case.
3665  if (TrueVal == ~FalseVal) {
3666  Opcode = AArch64ISD::CSINV;
3667  } else if (TrueVal == -FalseVal) {
3668  Opcode = AArch64ISD::CSNEG;
3669  } else if (TVal.getValueType() == MVT::i32) {
3670  // If our operands are only 32-bit wide, make sure we use 32-bit
3671  // arithmetic for the check whether we can use CSINC. This ensures that
3672  // the addition in the check will wrap around properly in case there is
3673  // an overflow (which would not be the case if we do the check with
3674  // 64-bit arithmetic).
3675  const uint32_t TrueVal32 = CTVal->getZExtValue();
3676  const uint32_t FalseVal32 = CFVal->getZExtValue();
3677 
3678  if ((TrueVal32 == FalseVal32 + 1) || (TrueVal32 + 1 == FalseVal32)) {
3679  Opcode = AArch64ISD::CSINC;
3680 
3681  if (TrueVal32 > FalseVal32) {
3682  Swap = true;
3683  }
3684  }
3685  // 64-bit check whether we can use CSINC.
3686  } else if ((TrueVal == FalseVal + 1) || (TrueVal + 1 == FalseVal)) {
3687  Opcode = AArch64ISD::CSINC;
3688 
3689  if (TrueVal > FalseVal) {
3690  Swap = true;
3691  }
3692  }
3693 
3694  // Swap TVal and FVal if necessary.
3695  if (Swap) {
3696  std::swap(TVal, FVal);
3697  std::swap(CTVal, CFVal);
3698  CC = ISD::getSetCCInverse(CC, true);
3699  }
3700 
3701  if (Opcode != AArch64ISD::CSEL) {
3702  // Drop FVal since we can get its value by simply inverting/negating
3703  // TVal.
3704  FVal = TVal;
3705  }
3706  }
3707 
3708  SDValue CCVal;
3709  SDValue Cmp = getAArch64Cmp(LHS, RHS, CC, CCVal, DAG, dl);
3710 
3711  EVT VT = TVal.getValueType();
3712  return DAG.getNode(Opcode, dl, VT, TVal, FVal, CCVal, Cmp);
3713  }
3714 
3715  // Now we know we're dealing with FP values.
3716  assert(LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64);
3717  assert(LHS.getValueType() == RHS.getValueType());
3718  EVT VT = TVal.getValueType();
3719  SDValue Cmp = emitComparison(LHS, RHS, CC, dl, DAG);
3720 
3721  // Unfortunately, the mapping of LLVM FP CC's onto AArch64 CC's isn't totally
3722  // clean. Some of them require two CSELs to implement.
3723  AArch64CC::CondCode CC1, CC2;
3724  changeFPCCToAArch64CC(CC, CC1, CC2);
3725  SDValue CC1Val = DAG.getConstant(CC1, dl, MVT::i32);
3726  SDValue CS1 = DAG.getNode(AArch64ISD::CSEL, dl, VT, TVal, FVal, CC1Val, Cmp);
3727 
3728  // If we need a second CSEL, emit it, using the output of the first as the
3729  // RHS. We're effectively OR'ing the two CC's together.
3730  if (CC2 != AArch64CC::AL) {
3731  SDValue CC2Val = DAG.getConstant(CC2, dl, MVT::i32);
3732  return DAG.getNode(AArch64ISD::CSEL, dl, VT, TVal, CS1, CC2Val, Cmp);
3733  }
3734 
3735  // Otherwise, return the output of the first CSEL.
3736  return CS1;
3737 }
3738 
3739 SDValue AArch64TargetLowering::LowerSELECT_CC(SDValue Op,
3740  SelectionDAG &DAG) const {
3741  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
3742  SDValue LHS = Op.getOperand(0);
3743  SDValue RHS = Op.getOperand(1);
3744  SDValue TVal = Op.getOperand(2);
3745  SDValue FVal = Op.getOperand(3);
3746  SDLoc DL(Op);
3747  return LowerSELECT_CC(CC, LHS, RHS, TVal, FVal, DL, DAG);
3748 }
3749 
3750 SDValue AArch64TargetLowering::LowerSELECT(SDValue Op,
3751  SelectionDAG &DAG) const {
3752  SDValue CCVal = Op->getOperand(0);
3753  SDValue TVal = Op->getOperand(1);
3754  SDValue FVal = Op->getOperand(2);
3755  SDLoc DL(Op);
3756 
3757  unsigned Opc = CCVal.getOpcode();
3758  // Optimize {s|u}{add|sub|mul}.with.overflow feeding into a select
3759  // instruction.
3760  if (CCVal.getResNo() == 1 &&
3761  (Opc == ISD::SADDO || Opc == ISD::UADDO || Opc == ISD::SSUBO ||
3762  Opc == ISD::USUBO || Opc == ISD::SMULO || Opc == ISD::UMULO)) {
3763  // Only lower legal XALUO ops.
3764  if (!DAG.getTargetLoweringInfo().isTypeLegal(CCVal->getValueType(0)))
3765  return SDValue();
3766 
3767  AArch64CC::CondCode OFCC;
3768  SDValue Value, Overflow;
3769  std::tie(Value, Overflow) = getAArch64XALUOOp(OFCC, CCVal.getValue(0), DAG);
3770  SDValue CCVal = DAG.getConstant(OFCC, DL, MVT::i32);
3771 
3772  return DAG.getNode(AArch64ISD::CSEL, DL, Op.getValueType(), TVal, FVal,
3773  CCVal, Overflow);
3774  }
3775 
3776  // Lower it the same way as we would lower a SELECT_CC node.
3777  ISD::CondCode CC;
3778  SDValue LHS, RHS;
3779  if (CCVal.getOpcode() == ISD::SETCC) {
3780  LHS = CCVal.getOperand(0);
3781  RHS = CCVal.getOperand(1);
3782  CC = cast<CondCodeSDNode>(CCVal->getOperand(2))->get();
3783  } else {
3784  LHS = CCVal;
3785  RHS = DAG.getConstant(0, DL, CCVal.getValueType());
3786  CC = ISD::SETNE;
3787  }
3788  return LowerSELECT_CC(CC, LHS, RHS, TVal, FVal, DL, DAG);
3789 }
3790 
3791 SDValue AArch64TargetLowering::LowerJumpTable(SDValue Op,
3792  SelectionDAG &DAG) const {
3793  // Jump table entries as PC relative offsets. No additional tweaking
3794  // is necessary here. Just get the address of the jump table.
3795  JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
3796  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3797  SDLoc DL(Op);
3798 
3799  if (getTargetMachine().getCodeModel() == CodeModel::Large &&
3800  !Subtarget->isTargetMachO()) {
3801  const unsigned char MO_NC = AArch64II::MO_NC;
3802  return DAG.getNode(
3803  AArch64ISD::WrapperLarge, DL, PtrVT,
3804  DAG.getTargetJumpTable(JT->getIndex(), PtrVT, AArch64II::MO_G3),
3805  DAG.getTargetJumpTable(JT->getIndex(), PtrVT, AArch64II::MO_G2 | MO_NC),
3806  DAG.getTargetJumpTable(JT->getIndex(), PtrVT, AArch64II::MO_G1 | MO_NC),
3807  DAG.getTargetJumpTable(JT->getIndex(), PtrVT,
3808  AArch64II::MO_G0 | MO_NC));
3809  }
3810 
3811  SDValue Hi =
3812  DAG.getTargetJumpTable(JT->getIndex(), PtrVT, AArch64II::MO_PAGE);
3813  SDValue Lo = DAG.getTargetJumpTable(JT->getIndex(), PtrVT,
3815  SDValue ADRP = DAG.getNode(AArch64ISD::ADRP, DL, PtrVT, Hi);
3816  return DAG.getNode(AArch64ISD::ADDlow, DL, PtrVT, ADRP, Lo);
3817 }
3818 
3819 SDValue AArch64TargetLowering::LowerConstantPool(SDValue Op,
3820  SelectionDAG &DAG) const {
3821  ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
3822  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3823  SDLoc DL(Op);
3824 
3825  if (getTargetMachine().getCodeModel() == CodeModel::Large) {
3826  // Use the GOT for the large code model on iOS.
3827  if (Subtarget->isTargetMachO()) {
3828  SDValue GotAddr = DAG.getTargetConstantPool(
3829  CP->getConstVal(), PtrVT, CP->getAlignment(), CP->getOffset(),
3831  return DAG.getNode(AArch64ISD::LOADgot, DL, PtrVT, GotAddr);
3832  }
3833 
3834  const unsigned char MO_NC = AArch64II::MO_NC;
3835  return DAG.getNode(
3836  AArch64ISD::WrapperLarge, DL, PtrVT,
3837  DAG.getTargetConstantPool(CP->getConstVal(), PtrVT, CP->getAlignment(),
3838  CP->getOffset(), AArch64II::MO_G3),
3839  DAG.getTargetConstantPool(CP->getConstVal(), PtrVT, CP->getAlignment(),
3840  CP->getOffset(), AArch64II::MO_G2 | MO_NC),
3841  DAG.getTargetConstantPool(CP->getConstVal(), PtrVT, CP->getAlignment(),
3842  CP->getOffset(), AArch64II::MO_G1 | MO_NC),
3843  DAG.getTargetConstantPool(CP->getConstVal(), PtrVT, CP->getAlignment(),
3844  CP->getOffset(), AArch64II::MO_G0 | MO_NC));
3845  } else {
3846  // Use ADRP/ADD or ADRP/LDR for everything else: the small memory model on
3847  // ELF, the only valid one on Darwin.
3848  SDValue Hi =
3849  DAG.getTargetConstantPool(CP->getConstVal(), PtrVT, CP->getAlignment(),
3850  CP->getOffset(), AArch64II::MO_PAGE);
3851  SDValue Lo = DAG.getTargetConstantPool(
3852  CP->getConstVal(), PtrVT, CP->getAlignment(), CP->getOffset(),
3854 
3855  SDValue ADRP = DAG.getNode(AArch64ISD::ADRP, DL, PtrVT, Hi);
3856  return DAG.getNode(AArch64ISD::ADDlow, DL, PtrVT, ADRP, Lo);
3857  }
3858 }
3859 
3860 SDValue AArch64TargetLowering::LowerBlockAddress(SDValue Op,
3861  SelectionDAG &DAG) const {
3862  const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
3863  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3864  SDLoc DL(Op);
3865  if (getTargetMachine().getCodeModel() == CodeModel::Large &&
3866  !Subtarget->isTargetMachO()) {
3867  const unsigned char MO_NC = AArch64II::MO_NC;
3868  return DAG.getNode(
3869  AArch64ISD::WrapperLarge, DL, PtrVT,
3870  DAG.getTargetBlockAddress(BA, PtrVT, 0, AArch64II::MO_G3),
3871  DAG.getTargetBlockAddress(BA, PtrVT, 0, AArch64II::MO_G2 | MO_NC),
3872  DAG.getTargetBlockAddress(BA, PtrVT, 0, AArch64II::MO_G1 | MO_NC),
3873  DAG.getTargetBlockAddress(BA, PtrVT, 0, AArch64II::MO_G0 | MO_NC));
3874  } else {
3875  SDValue Hi = DAG.getTargetBlockAddress(BA, PtrVT, 0, AArch64II::MO_PAGE);
3876  SDValue Lo = DAG.getTargetBlockAddress(BA, PtrVT, 0, AArch64II::MO_PAGEOFF |
3878  SDValue ADRP = DAG.getNode(AArch64ISD::ADRP, DL, PtrVT, Hi);
3879  return DAG.getNode(AArch64ISD::ADDlow, DL, PtrVT, ADRP, Lo);
3880  }
3881 }
3882 
3883 SDValue AArch64TargetLowering::LowerDarwin_VASTART(SDValue Op,
3884  SelectionDAG &DAG) const {
3885  AArch64FunctionInfo *FuncInfo =
3887 
3888  SDLoc DL(Op);
3889  SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsStackIndex(),
3890  getPointerTy(DAG.getDataLayout()));
3891  const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
3892  return DAG.getStore(Op.getOperand(0), DL, FR, Op.getOperand(1),
3893  MachinePointerInfo(SV), false, false, 0);
3894 }
3895 
3896 SDValue AArch64TargetLowering::LowerAAPCS_VASTART(SDValue Op,
3897  SelectionDAG &DAG) const {
3898  // The layout of the va_list struct is specified in the AArch64 Procedure Call
3899  // Standard, section B.3.
3900  MachineFunction &MF = DAG.getMachineFunction();
3902  auto PtrVT = getPointerTy(DAG.getDataLayout());
3903  SDLoc DL(Op);
3904 
3905  SDValue Chain = Op.getOperand(0);
3906  SDValue VAList = Op.getOperand(1);
3907  const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
3908  SmallVector<SDValue, 4> MemOps;
3909 
3910  // void *__stack at offset 0
3911  SDValue Stack = DAG.getFrameIndex(FuncInfo->getVarArgsStackIndex(), PtrVT);
3912  MemOps.push_back(DAG.getStore(Chain, DL, Stack, VAList,
3913  MachinePointerInfo(SV), false, false, 8));
3914 
3915  // void *__gr_top at offset 8
3916  int GPRSize = FuncInfo->getVarArgsGPRSize();
3917  if (GPRSize > 0) {
3918  SDValue GRTop, GRTopAddr;
3919 
3920  GRTopAddr =
3921  DAG.getNode(ISD::ADD, DL, PtrVT, VAList, DAG.getConstant(8, DL, PtrVT));
3922 
3923  GRTop = DAG.getFrameIndex(FuncInfo->getVarArgsGPRIndex(), PtrVT);
3924  GRTop = DAG.getNode(ISD::ADD, DL, PtrVT, GRTop,
3925  DAG.getConstant(GPRSize, DL, PtrVT));
3926 
3927  MemOps.push_back(DAG.getStore(Chain, DL, GRTop, GRTopAddr,
3928  MachinePointerInfo(SV, 8), false, false, 8));
3929  }
3930 
3931  // void *__vr_top at offset 16
3932  int FPRSize = FuncInfo->getVarArgsFPRSize();
3933  if (FPRSize > 0) {
3934  SDValue VRTop, VRTopAddr;
3935  VRTopAddr = DAG.getNode(ISD::ADD, DL, PtrVT, VAList,
3936  DAG.getConstant(16, DL, PtrVT));
3937 
3938  VRTop = DAG.getFrameIndex(FuncInfo->getVarArgsFPRIndex(), PtrVT);
3939  VRTop = DAG.getNode(ISD::ADD, DL, PtrVT, VRTop,
3940  DAG.getConstant(FPRSize, DL, PtrVT));
3941 
3942  MemOps.push_back(DAG.getStore(Chain, DL, VRTop, VRTopAddr,
3943  MachinePointerInfo(SV, 16), false, false, 8));
3944  }
3945 
3946  // int __gr_offs at offset 24
3947  SDValue GROffsAddr =
3948  DAG.getNode(ISD::ADD, DL, PtrVT, VAList, DAG.getConstant(24, DL, PtrVT));
3949  MemOps.push_back(DAG.getStore(Chain, DL,
3950  DAG.getConstant(-GPRSize, DL, MVT::i32),
3951  GROffsAddr, MachinePointerInfo(SV, 24), false,
3952  false, 4));
3953 
3954  // int __vr_offs at offset 28
3955  SDValue VROffsAddr =
3956  DAG.getNode(ISD::ADD, DL, PtrVT, VAList, DAG.getConstant(28, DL, PtrVT));
3957  MemOps.push_back(DAG.getStore(Chain, DL,
3958  DAG.getConstant(-FPRSize, DL, MVT::i32),
3959  VROffsAddr, MachinePointerInfo(SV, 28), false,
3960  false, 4));
3961 
3962  return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOps);
3963 }
3964 
3965 SDValue AArch64TargetLowering::LowerVASTART(SDValue Op,
3966  SelectionDAG &DAG) const {
3967  return Subtarget->isTargetDarwin() ? LowerDarwin_VASTART(Op, DAG)
3968  : LowerAAPCS_VASTART(Op, DAG);
3969 }
3970 
3971 SDValue AArch64TargetLowering::LowerVACOPY(SDValue Op,
3972  SelectionDAG &DAG) const {
3973  // AAPCS has three pointers and two ints (= 32 bytes), Darwin has single
3974  // pointer.
3975  SDLoc DL(Op);
3976  unsigned VaListSize = Subtarget->isTargetDarwin() ? 8 : 32;
3977  const Value *DestSV = cast<SrcValueSDNode>(Op.getOperand(3))->getValue();
3978  const Value *SrcSV = cast<SrcValueSDNode>(Op.getOperand(4))->getValue();
3979 
3980  return DAG.getMemcpy(Op.getOperand(0), DL, Op.getOperand(1),
3981  Op.getOperand(2),
3982  DAG.getConstant(VaListSize, DL, MVT::i32),
3983  8, false, false, false, MachinePointerInfo(DestSV),
3984  MachinePointerInfo(SrcSV));
3985 }
3986 
3987 SDValue AArch64TargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {
3988  assert(Subtarget->isTargetDarwin() &&
3989  "automatic va_arg instruction only works on Darwin");
3990 
3991  const Value *V = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
3992  EVT VT = Op.getValueType();
3993  SDLoc DL(Op);
3994  SDValue Chain = Op.getOperand(0);
3995  SDValue Addr = Op.getOperand(1);
3996  unsigned Align = Op.getConstantOperandVal(3);
3997  auto PtrVT = getPointerTy(DAG.getDataLayout());
3998 
3999  SDValue VAList = DAG.getLoad(PtrVT, DL, Chain, Addr, MachinePointerInfo(V),
4000  false, false, false, 0);
4001  Chain = VAList.getValue(1);
4002 
4003  if (Align > 8) {
4004  assert(((Align & (Align - 1)) == 0) && "Expected Align to be a power of 2");
4005  VAList = DAG.getNode(ISD::ADD, DL, PtrVT, VAList,
4006  DAG.getConstant(Align - 1, DL, PtrVT));
4007  VAList = DAG.getNode(ISD::AND, DL, PtrVT, VAList,
4008  DAG.getConstant(-(int64_t)Align, DL, PtrVT));
4009  }
4010 
4011  Type *ArgTy = VT.getTypeForEVT(*DAG.getContext());
4012  uint64_t ArgSize = DAG.getDataLayout().getTypeAllocSize(ArgTy);
4013 
4014  // Scalar integer and FP values smaller than 64 bits are implicitly extended
4015  // up to 64 bits. At the very least, we have to increase the striding of the
4016  // vaargs list to match this, and for FP values we need to introduce
4017  // FP_ROUND nodes as well.
4018  if (VT.isInteger() && !VT.isVector())
4019  ArgSize = 8;
4020  bool NeedFPTrunc = false;
4021  if (VT.isFloatingPoint() && !VT.isVector() && VT != MVT::f64) {
4022  ArgSize = 8;
4023  NeedFPTrunc = true;
4024  }
4025 
4026  // Increment the pointer, VAList, to the next vaarg
4027  SDValue VANext = DAG.getNode(ISD::ADD, DL, PtrVT, VAList,
4028  DAG.getConstant(ArgSize, DL, PtrVT));
4029  // Store the incremented VAList to the legalized pointer
4030  SDValue APStore = DAG.getStore(Chain, DL, VANext, Addr, MachinePointerInfo(V),
4031  false, false, 0);
4032 
4033  // Load the actual argument out of the pointer VAList
4034  if (NeedFPTrunc) {
4035  // Load the value as an f64.
4036  SDValue WideFP = DAG.getLoad(MVT::f64, DL, APStore, VAList,
4037  MachinePointerInfo(), false, false, false, 0);
4038  // Round the value down to an f32.
4039  SDValue NarrowFP = DAG.getNode(ISD::FP_ROUND, DL, VT, WideFP.getValue(0),
4040  DAG.getIntPtrConstant(1, DL));
4041  SDValue Ops[] = { NarrowFP, WideFP.getValue(1) };
4042  // Merge the rounded value with the chain output of the load.
4043  return DAG.getMergeValues(Ops, DL);
4044  }
4045 
4046  return DAG.getLoad(VT, DL, APStore, VAList, MachinePointerInfo(), false,
4047  false, false, 0);
4048 }
4049 
4050 SDValue AArch64TargetLowering::LowerFRAMEADDR(SDValue Op,
4051  SelectionDAG &DAG) const {
4053  MFI->setFrameAddressIsTaken(true);
4054 
4055  EVT VT = Op.getValueType();
4056  SDLoc DL(Op);
4057  unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
4058  SDValue FrameAddr =
4059  DAG.getCopyFromReg(DAG.getEntryNode(), DL, AArch64::FP, VT);
4060  while (Depth--)
4061  FrameAddr = DAG.getLoad(VT, DL, DAG.getEntryNode(), FrameAddr,
4062  MachinePointerInfo(), false, false, false, 0);
4063  return FrameAddr;
4064 }
4065 
4066 // FIXME? Maybe this could be a TableGen attribute on some registers and
4067 // this table could be generated automatically from RegInfo.
4068 unsigned AArch64TargetLowering::getRegisterByName(const char* RegName, EVT VT,
4069  SelectionDAG &DAG) const {
4070  unsigned Reg = StringSwitch<unsigned>(RegName)
4071  .Case("sp", AArch64::SP)
4072  .Default(0);
4073  if (Reg)
4074  return Reg;
4075  report_fatal_error(Twine("Invalid register name \""
4076  + StringRef(RegName) + "\"."));
4077 }
4078 
4079 SDValue AArch64TargetLowering::LowerRETURNADDR(SDValue Op,
4080  SelectionDAG &DAG) const {
4081  MachineFunction &MF = DAG.getMachineFunction();
4082  MachineFrameInfo *MFI = MF.getFrameInfo();
4083  MFI->setReturnAddressIsTaken(true);
4084 
4085  EVT VT = Op.getValueType();
4086  SDLoc DL(Op);
4087  unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
4088  if (Depth) {
4089  SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
4090  SDValue Offset = DAG.getConstant(8, DL, getPointerTy(DAG.getDataLayout()));
4091  return DAG.getLoad(VT, DL, DAG.getEntryNode(),
4092  DAG.getNode(ISD::ADD, DL, VT, FrameAddr, Offset),
4093  MachinePointerInfo(), false, false, false, 0);
4094  }
4095 
4096  // Return LR, which contains the return address. Mark it an implicit live-in.
4097  unsigned Reg = MF.addLiveIn(AArch64::LR, &AArch64::GPR64RegClass);
4098  return DAG.getCopyFromReg(DAG.getEntryNode(), DL, Reg, VT);
4099 }
4100 
4101 /// LowerShiftRightParts - Lower SRA_PARTS, which returns two
4102 /// i64 values and take a 2 x i64 value to shift plus a shift amount.
4103 SDValue AArch64TargetLowering::LowerShiftRightParts(SDValue Op,
4104  SelectionDAG &DAG) const {
4105  assert(Op.getNumOperands() == 3 && "Not a double-shift!");
4106  EVT VT = Op.getValueType();
4107  unsigned VTBits = VT.getSizeInBits();
4108  SDLoc dl(Op);
4109  SDValue ShOpLo = Op.getOperand(0);
4110  SDValue ShOpHi = Op.getOperand(1);
4111  SDValue ShAmt = Op.getOperand(2);
4112  SDValue ARMcc;
4113  unsigned Opc = (Op.getOpcode() == ISD::SRA_PARTS) ? ISD::SRA : ISD::SRL;
4114 
4115  assert(Op.getOpcode() == ISD::SRA_PARTS || Op.getOpcode() == ISD::SRL_PARTS);
4116 
4117  SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i64,
4118  DAG.getConstant(VTBits, dl, MVT::i64), ShAmt);
4119  SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, ShAmt);
4120  SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i64, ShAmt,
4121  DAG.getConstant(VTBits, dl, MVT::i64));
4122  SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, RevShAmt);
4123 
4124  SDValue Cmp = emitComparison(ExtraShAmt, DAG.getConstant(0, dl, MVT::i64),
4125  ISD::SETGE, dl, DAG);
4126  SDValue CCVal = DAG.getConstant(AArch64CC::GE, dl, MVT::i32);
4127 
4128  SDValue FalseValLo = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2);
4129  SDValue TrueValLo = DAG.getNode(Opc, dl, VT, ShOpHi, ExtraShAmt);
4130  SDValue Lo =
4131  DAG.getNode(AArch64ISD::CSEL, dl, VT, TrueValLo, FalseValLo, CCVal, Cmp);
4132 
4133  // AArch64 shifts larger than the register width are wrapped rather than
4134  // clamped, so we can't just emit "hi >> x".
4135  SDValue FalseValHi = DAG.getNode(Opc, dl, VT, ShOpHi, ShAmt);
4136  SDValue TrueValHi = Opc == ISD::SRA
4137  ? DAG.getNode(Opc, dl, VT, ShOpHi,
4138  DAG.getConstant(VTBits - 1, dl,
4139  MVT::i64))
4140  : DAG.getConstant(0, dl, VT);
4141  SDValue Hi =
4142  DAG.getNode(AArch64ISD::CSEL, dl, VT, TrueValHi, FalseValHi, CCVal, Cmp);
4143 
4144  SDValue Ops[2] = { Lo, Hi };
4145  return DAG.getMergeValues(Ops, dl);
4146 }
4147 
4148 /// LowerShiftLeftParts - Lower SHL_PARTS, which returns two
4149 /// i64 values and take a 2 x i64 value to shift plus a shift amount.
4150 SDValue AArch64TargetLowering::LowerShiftLeftParts(SDValue Op,
4151  SelectionDAG &DAG) const {
4152  assert(Op.getNumOperands() == 3 && "Not a double-shift!");
4153  EVT VT = Op.getValueType();
4154  unsigned VTBits = VT.getSizeInBits();
4155  SDLoc dl(Op);
4156  SDValue ShOpLo = Op.getOperand(0);
4157  SDValue ShOpHi = Op.getOperand(1);
4158  SDValue ShAmt = Op.getOperand(2);
4159  SDValue ARMcc;
4160 
4161  assert(Op.getOpcode() == ISD::SHL_PARTS);
4162  SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i64,
4163  DAG.getConstant(VTBits, dl, MVT::i64), ShAmt);
4164  SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, RevShAmt);
4165  SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i64, ShAmt,
4166  DAG.getConstant(VTBits, dl, MVT::i64));
4167  SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, ShAmt);
4168  SDValue Tmp3 = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ExtraShAmt);
4169 
4170  SDValue FalseVal = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2);
4171 
4172  SDValue Cmp = emitComparison(ExtraShAmt, DAG.getConstant(0, dl, MVT::i64),
4173  ISD::SETGE, dl, DAG);
4174  SDValue CCVal = DAG.getConstant(AArch64CC::GE, dl, MVT::i32);
4175  SDValue Hi =
4176  DAG.getNode(AArch64ISD::CSEL, dl, VT, Tmp3, FalseVal, CCVal, Cmp);
4177 
4178  // AArch64 shifts of larger than register sizes are wrapped rather than
4179  // clamped, so we can't just emit "lo << a" if a is too big.
4180  SDValue TrueValLo = DAG.getConstant(0, dl, VT);
4181  SDValue FalseValLo = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ShAmt);
4182  SDValue Lo =
4183  DAG.getNode(AArch64ISD::CSEL, dl, VT, TrueValLo, FalseValLo, CCVal, Cmp);
4184 
4185  SDValue Ops[2] = { Lo, Hi };
4186  return DAG.getMergeValues(Ops, dl);
4187 }
4188 
4190  const GlobalAddressSDNode *GA) const {
4191  // The AArch64 target doesn't support folding offsets into global addresses.
4192  return false;
4193 }
4194 
4196  // We can materialize #0.0 as fmov $Rd, XZR for 64-bit and 32-bit cases.
4197  // FIXME: We should be able to handle f128 as well with a clever lowering.
4198  if (Imm.isPosZero() && (VT == MVT::f64 || VT == MVT::f32))
4199  return true;
4200 
4201  if (VT == MVT::f64)
4202  return AArch64_AM::getFP64Imm(Imm) != -1;
4203  else if (VT == MVT::f32)
4204  return AArch64_AM::getFP32Imm(Imm) != -1;
4205  return false;
4206 }
4207 
4208 //===----------------------------------------------------------------------===//
4209 // AArch64 Optimization Hooks
4210 //===----------------------------------------------------------------------===//
4211 
4212 //===----------------------------------------------------------------------===//
4213 // AArch64 Inline Assembly Support
4214 //===----------------------------------------------------------------------===//
4215 
4216 // Table of Constraints
4217 // TODO: This is the current set of constraints supported by ARM for the
4218 // compiler, not all of them may make sense, e.g. S may be difficult to support.
4219 //
4220 // r - A general register
4221 // w - An FP/SIMD register of some size in the range v0-v31
4222 // x - An FP/SIMD register of some size in the range v0-v15
4223 // I - Constant that can be used with an ADD instruction
4224 // J - Constant that can be used with a SUB instruction
4225 // K - Constant that can be used with a 32-bit logical instruction
4226 // L - Constant that can be used with a 64-bit logical instruction
4227 // M - Constant that can be used as a 32-bit MOV immediate
4228 // N - Constant that can be used as a 64-bit MOV immediate
4229 // Q - A memory reference with base register and no offset
4230 // S - A symbolic address
4231 // Y - Floating point constant zero
4232 // Z - Integer constant zero
4233 //
4234 // Note that general register operands will be output using their 64-bit x
4235 // register name, whatever the size of the variable, unless the asm operand
4236 // is prefixed by the %w modifier. Floating-point and SIMD register operands
4237 // will be output with the v prefix unless prefixed by the %b, %h, %s, %d or
4238 // %q modifier.
4239 
4240 /// getConstraintType - Given a constraint letter, return the type of
4241 /// constraint it is for this target.
4243 AArch64TargetLowering::getConstraintType(StringRef Constraint) const {
4244  if (Constraint.size() == 1) {
4245  switch (Constraint[0]) {
4246  default:
4247  break;
4248  case 'z':
4249  return C_Other;
4250  case 'x':
4251  case 'w':
4252  return C_RegisterClass;
4253  // An address with a single base register. Due to the way we
4254  // currently handle addresses it is the same as 'r'.
4255  case 'Q':
4256  return C_Memory;
4257  }
4258  }
4259  return TargetLowering::getConstraintType(Constraint);
4260 }
4261 
4262 /// Examine constraint type and operand type and determine a weight value.
4263 /// This object must already have been set up with the operand type
4264 /// and the current alternative constraint selected.
4266 AArch64TargetLowering::getSingleConstraintMatchWeight(
4267  AsmOperandInfo &info, const char *constraint) const {
4268  ConstraintWeight weight = CW_Invalid;
4269  Value *CallOperandVal = info.CallOperandVal;
4270  // If we don't have a value, we can't do a match,
4271  // but allow it at the lowest weight.
4272  if (!CallOperandVal)
4273  return CW_Default;
4274  Type *type = CallOperandVal->getType();
4275  // Look at the constraint type.
4276  switch (*constraint) {
4277  default:
4278  weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint);
4279  break;
4280  case 'x':
4281  case 'w':
4282  if (type->isFloatingPointTy() || type->isVectorTy())
4283  weight = CW_Register;
4284  break;
4285  case 'z':
4286  weight = CW_Constant;
4287  break;
4288  }
4289  return weight;
4290 }
4291 
4292 std::pair<unsigned, const TargetRegisterClass *>
4293 AArch64TargetLowering::getRegForInlineAsmConstraint(
4294  const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const {
4295  if (Constraint.size() == 1) {
4296  switch (Constraint[0]) {
4297  case 'r':
4298  if (VT.getSizeInBits() == 64)
4299  return std::make_pair(0U, &AArch64::GPR64commonRegClass);
4300  return std::make_pair(0U, &AArch64::GPR32commonRegClass);
4301  case 'w':
4302  if (VT == MVT::f32)
4303  return std::make_pair(0U, &AArch64::FPR32RegClass);
4304  if (VT.getSizeInBits() == 64)
4305  return std::make_pair(0U, &AArch64::FPR64RegClass);
4306  if (VT.getSizeInBits() == 128)
4307  return std::make_pair(0U, &AArch64::FPR128RegClass);
4308  break;
4309  // The instructions that this constraint is designed for can
4310  // only take 128-bit registers so just use that regclass.
4311  case 'x':
4312  if (VT.getSizeInBits() == 128)
4313  return std::make_pair(0U, &AArch64::FPR128_loRegClass);
4314  break;
4315  }
4316  }
4317  if (StringRef("{cc}").equals_lower(Constraint))
4318  return std::make_pair(unsigned(AArch64::NZCV), &AArch64::CCRRegClass);
4319 
4320  // Use the default implementation in TargetLowering to convert the register
4321  // constraint into a member of a register class.
4322  std::pair<unsigned, const TargetRegisterClass *> Res;
4323  Res = TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
4324 
4325  // Not found as a standard register?
4326  if (!Res.second) {
4327  unsigned Size = Constraint.size();
4328  if ((Size == 4 || Size == 5) && Constraint[0] == '{' &&
4329  tolower(Constraint[1]) == 'v' && Constraint[Size - 1] == '}') {
4330  int RegNo;
4331  bool Failed = Constraint.slice(2, Size - 1).getAsInteger(10, RegNo);
4332  if (!Failed && RegNo >= 0 && RegNo <= 31) {
4333  // v0 - v31 are aliases of q0 - q31.
4334  // By default we'll emit v0-v31 for this unless there's a modifier where
4335  // we'll emit the correct register as well.
4336  Res.first = AArch64::FPR128RegClass.getRegister(RegNo);
4337  Res.second = &AArch64::FPR128RegClass;
4338  }
4339  }
4340  }
4341 
4342  return Res;
4343 }
4344 
4345 /// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
4346 /// vector. If it is invalid, don't add anything to Ops.
4347 void AArch64TargetLowering::LowerAsmOperandForConstraint(
4348  SDValue Op, std::string &Constraint, std::vector<SDValue> &Ops,
4349  SelectionDAG &DAG) const {
4350  SDValue Result;
4351 
4352  // Currently only support length 1 constraints.
4353  if (Constraint.length() != 1)
4354  return;
4355 
4356  char ConstraintLetter = Constraint[0];
4357  switch (ConstraintLetter) {
4358  default:
4359  break;
4360 
4361  // This set of constraints deal with valid constants for various instructions.
4362  // Validate and return a target constant for them if we can.
4363  case 'z': {
4364  // 'z' maps to xzr or wzr so it needs an input of 0.
4366  if (!C || C->getZExtValue() != 0)
4367  return;
4368 
4369  if (Op.getValueType() == MVT::i64)
4370  Result = DAG.getRegister(AArch64::XZR, MVT::i64);
4371  else
4372  Result = DAG.getRegister(AArch64::WZR, MVT::i32);
4373  break;
4374  }
4375 
4376  case 'I':
4377  case 'J':
4378  case 'K':
4379  case 'L':
4380  case 'M':
4381  case 'N':
4383  if (!C)
4384  return;
4385 
4386  // Grab the value and do some validation.
4387  uint64_t CVal = C->getZExtValue();
4388  switch (ConstraintLetter) {
4389  // The I constraint applies only to simple ADD or SUB immediate operands:
4390  // i.e. 0 to 4095 with optional shift by 12
4391  // The J constraint applies only to ADD or SUB immediates that would be
4392  // valid when negated, i.e. if [an add pattern] were to be output as a SUB
4393  // instruction [or vice versa], in other words -1 to -4095 with optional
4394  // left shift by 12.
4395  case 'I':
4396  if (isUInt<12>(CVal) || isShiftedUInt<12, 12>(CVal))
4397  break;
4398  return;
4399  case 'J': {
4400  uint64_t NVal = -C->getSExtValue();
4401  if (isUInt<12>(NVal) || isShiftedUInt<12, 12>(NVal)) {
4402  CVal = C->getSExtValue();
4403  break;
4404  }
4405  return;
4406  }
4407  // The K and L constraints apply *only* to logical immediates, including
4408  // what used to be the MOVI alias for ORR (though the MOVI alias has now
4409  // been removed and MOV should be used). So these constraints have to
4410  // distinguish between bit patterns that are valid 32-bit or 64-bit
4411  // "bitmask immediates": for example 0xaaaaaaaa is a valid bimm32 (K), but
4412  // not a valid bimm64 (L) where 0xaaaaaaaaaaaaaaaa would be valid, and vice
4413  // versa.
4414  case 'K':
4415  if (AArch64_AM::isLogicalImmediate(CVal, 32))
4416  break;
4417  return;
4418  case 'L':
4419  if (AArch64_AM::isLogicalImmediate(CVal, 64))
4420  break;
4421  return;
4422  // The M and N constraints are a superset of K and L respectively, for use
4423  // with the MOV (immediate) alias. As well as the logical immediates they
4424  // also match 32 or 64-bit immediates that can be loaded either using a
4425  // *single* MOVZ or MOVN , such as 32-bit 0x12340000, 0x00001234, 0xffffedca
4426  // (M) or 64-bit 0x1234000000000000 (N) etc.
4427  // As a note some of this code is liberally stolen from the asm parser.
4428  case 'M': {
4429  if (!isUInt<32>(CVal))
4430  return;
4431  if (AArch64_AM::isLogicalImmediate(CVal, 32))
4432  break;
4433  if ((CVal & 0xFFFF) == CVal)
4434  break;
4435  if ((CVal & 0xFFFF0000ULL) == CVal)
4436  break;
4437  uint64_t NCVal = ~(uint32_t)CVal;
4438  if ((NCVal & 0xFFFFULL) == NCVal)
4439  break;
4440  if ((NCVal & 0xFFFF0000ULL) == NCVal)
4441  break;
4442  return;
4443  }
4444  case 'N': {
4445  if (AArch64_AM::isLogicalImmediate(CVal, 64))
4446  break;
4447  if ((CVal & 0xFFFFULL) == CVal)
4448  break;
4449  if ((CVal & 0xFFFF0000ULL) == CVal)
4450  break;
4451  if ((CVal & 0xFFFF00000000ULL) == CVal)
4452  break;
4453  if ((CVal & 0xFFFF000000000000ULL) == CVal)
4454  break;
4455  uint64_t NCVal = ~CVal;
4456  if ((NCVal & 0xFFFFULL) == NCVal)
4457  break;
4458  if ((NCVal & 0xFFFF0000ULL) == NCVal)
4459  break;
4460  if ((NCVal & 0xFFFF00000000ULL) == NCVal)
4461  break;
4462  if ((NCVal & 0xFFFF000000000000ULL) == NCVal)
4463  break;
4464  return;
4465  }
4466  default:
4467  return;
4468  }
4469 
4470  // All assembler immediates are 64-bit integers.
4471  Result = DAG.getTargetConstant(CVal, SDLoc(Op), MVT::i64);
4472  break;
4473  }
4474 
4475  if (Result.getNode()) {
4476  Ops.push_back(Result);
4477  return;
4478  }
4479 
4480  return TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
4481 }
4482 
4483 //===----------------------------------------------------------------------===//
4484 // AArch64 Advanced SIMD Support
4485 //===----------------------------------------------------------------------===//
4486 
4487 /// WidenVector - Given a value in the V64 register class, produce the
4488 /// equivalent value in the V128 register class.
4489 static SDValue WidenVector(SDValue V64Reg, SelectionDAG &DAG) {
4490  EVT VT = V64Reg.getValueType();
4491  unsigned NarrowSize = VT.getVectorNumElements();
4492  MVT EltTy = VT.getVectorElementType().getSimpleVT();
4493  MVT WideTy = MVT::getVectorVT(EltTy, 2 * NarrowSize);
4494  SDLoc DL(V64Reg);
4495 
4496  return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, WideTy, DAG.getUNDEF(WideTy),
4497  V64Reg, DAG.getConstant(0, DL, MVT::i32));
4498 }
4499 
4500 /// getExtFactor - Determine the adjustment factor for the position when
4501 /// generating an "extract from vector registers" instruction.
4502 static unsigned getExtFactor(SDValue &V) {
4503  EVT EltType = V.getValueType().getVectorElementType();
4504  return EltType.getSizeInBits() / 8;
4505 }
4506 
4507 /// NarrowVector - Given a value in the V128 register class, produce the
4508 /// equivalent value in the V64 register class.
4509 static SDValue NarrowVector(SDValue V128Reg, SelectionDAG &DAG) {
4510  EVT VT = V128Reg.getValueType();
4511  unsigned WideSize = VT.getVectorNumElements();
4512  MVT EltTy = VT.getVectorElementType().getSimpleVT();
4513  MVT NarrowTy = MVT::getVectorVT(EltTy, WideSize / 2);
4514  SDLoc DL(V128Reg);
4515 
4516  return DAG.getTargetExtractSubreg(AArch64::dsub, DL, NarrowTy, V128Reg);
4517 }
4518 
4519 // Gather data to see if the operation can be modelled as a
4520 // shuffle in combination with VEXTs.
4522  SelectionDAG &DAG) const {
4523  assert(Op.getOpcode() == ISD::BUILD_VECTOR && "Unknown opcode!");
4524  SDLoc dl(Op);
4525  EVT VT = Op.getValueType();
4526  unsigned NumElts = VT.getVectorNumElements();
4527 
4528  struct ShuffleSourceInfo {
4529  SDValue Vec;
4530  unsigned MinElt;
4531  unsigned MaxElt;
4532 
4533  // We may insert some combination of BITCASTs and VEXT nodes to force Vec to
4534  // be compatible with the shuffle we intend to construct. As a result
4535  // ShuffleVec will be some sliding window into the original Vec.
4536  SDValue ShuffleVec;
4537 
4538  // Code should guarantee that element i in Vec starts at element "WindowBase
4539  // + i * WindowScale in ShuffleVec".
4540  int WindowBase;
4541  int WindowScale;
4542 
4543  bool operator ==(SDValue OtherVec) { return Vec == OtherVec; }
4544  ShuffleSourceInfo(SDValue Vec)
4545  : Vec(Vec), MinElt(UINT_MAX), MaxElt(0), ShuffleVec(Vec), WindowBase(0),
4546  WindowScale(1) {}
4547  };
4548 
4549  // First gather all vectors used as an immediate source for this BUILD_VECTOR
4550  // node.
4552  for (unsigned i = 0; i < NumElts; ++i) {
4553  SDValue V = Op.getOperand(i);
4554  if (V.getOpcode() == ISD::UNDEF)
4555  continue;
4556  else if (V.getOpcode() != ISD::EXTRACT_VECTOR_ELT) {
4557  // A shuffle can only come from building a vector from various
4558  // elements of other vectors.
4559  return SDValue();
4560  }
4561 
4562  // Add this element source to the list if it's not already there.
4563  SDValue SourceVec = V.getOperand(0);
4564  auto Source = std::find(Sources.begin(), Sources.end(), SourceVec);
4565  if (Source == Sources.end())
4566  Source = Sources.insert(Sources.end(), ShuffleSourceInfo(SourceVec));
4567 
4568  // Update the minimum and maximum lane number seen.
4569  unsigned EltNo = cast<ConstantSDNode>(V.getOperand(1))->getZExtValue();
4570  Source->MinElt = std::min(Source->MinElt, EltNo);
4571  Source->MaxElt = std::max(Source->MaxElt, EltNo);
4572  }
4573 
4574  // Currently only do something sane when at most two source vectors
4575  // are involved.
4576  if (Sources.size() > 2)
4577  return SDValue();
4578 
4579  // Find out the smallest element size among result and two sources, and use
4580  // it as element size to build the shuffle_vector.
4581  EVT SmallestEltTy = VT.getVectorElementType();
4582  for (auto &Source : Sources) {
4583  EVT SrcEltTy = Source.Vec.getValueType().getVectorElementType();
4584  if (SrcEltTy.bitsLT(SmallestEltTy)) {
4585  SmallestEltTy = SrcEltTy;
4586  }
4587  }
4588  unsigned ResMultiplier =
4589  VT.getVectorElementType().getSizeInBits() / SmallestEltTy.getSizeInBits();
4590  NumElts = VT.getSizeInBits() / SmallestEltTy.getSizeInBits();
4591  EVT ShuffleVT = EVT::getVectorVT(*DAG.getContext(), SmallestEltTy, NumElts);
4592 
4593  // If the source vector is too wide or too narrow, we may nevertheless be able
4594  // to construct a compatible shuffle either by concatenating it with UNDEF or
4595  // extracting a suitable range of elements.
4596  for (auto &Src : Sources) {
4597  EVT SrcVT = Src.ShuffleVec.getValueType();
4598 
4599  if (SrcVT.getSizeInBits() == VT.getSizeInBits())
4600  continue;
4601 
4602  // This stage of the search produces a source with the same element type as
4603  // the original, but with a total width matching the BUILD_VECTOR output.
4604  EVT EltVT = SrcVT.getVectorElementType();
4605  unsigned NumSrcElts = VT.getSizeInBits() / EltVT.getSizeInBits();
4606  EVT DestVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumSrcElts);
4607 
4608  if (SrcVT.getSizeInBits() < VT.getSizeInBits()) {
4609  assert(2 * SrcVT.getSizeInBits() == VT.getSizeInBits());
4610  // We can pad out the smaller vector for free, so if it's part of a
4611  // shuffle...
4612  Src.ShuffleVec =
4613  DAG.getNode(ISD::CONCAT_VECTORS, dl, DestVT, Src.ShuffleVec,
4614  DAG.getUNDEF(Src.ShuffleVec.getValueType()));
4615  continue;
4616  }
4617 
4618  assert(SrcVT.getSizeInBits() == 2 * VT.getSizeInBits());
4619 
4620  if (Src.MaxElt - Src.MinElt >= NumSrcElts) {
4621  // Span too large for a VEXT to cope
4622  return SDValue();
4623  }
4624 
4625  if (Src.MinElt >= NumSrcElts) {
4626  // The extraction can just take the second half
4627  Src.ShuffleVec =
4628  DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
4629  DAG.getConstant(NumSrcElts, dl, MVT::i64));
4630  Src.WindowBase = -NumSrcElts;
4631  } else if (Src.MaxElt < NumSrcElts) {
4632  // The extraction can just take the first half
4633  Src.ShuffleVec =
4634  DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
4635  DAG.getConstant(0, dl, MVT::i64));
4636  } else {
4637  // An actual VEXT is needed
4638  SDValue VEXTSrc1 =
4639  DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
4640  DAG.getConstant(0, dl, MVT::i64));
4641  SDValue VEXTSrc2 =
4642  DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
4643  DAG.getConstant(NumSrcElts, dl, MVT::i64));
4644  unsigned Imm = Src.MinElt * getExtFactor(VEXTSrc1);
4645 
4646  Src.ShuffleVec = DAG.getNode(AArch64ISD::EXT, dl, DestVT, VEXTSrc1,
4647  VEXTSrc2,
4648  DAG.getConstant(Imm, dl, MVT::i32));
4649  Src.WindowBase = -Src.MinElt;
4650  }
4651  }
4652 
4653  // Another possible incompatibility occurs from the vector element types. We
4654  // can fix this by bitcasting the source vectors to the same type we intend
4655  // for the shuffle.
4656  for (auto &Src : Sources) {
4657  EVT SrcEltTy = Src.ShuffleVec.getValueType().getVectorElementType();
4658  if (SrcEltTy == SmallestEltTy)
4659  continue;
4660  assert(ShuffleVT.getVectorElementType() == SmallestEltTy);
4661  Src.ShuffleVec = DAG.getNode(ISD::BITCAST, dl, ShuffleVT, Src.ShuffleVec);
4662  Src.WindowScale = SrcEltTy.getSizeInBits() / SmallestEltTy.getSizeInBits();
4663  Src.WindowBase *= Src.WindowScale;
4664  }
4665 
4666  // Final sanity check before we try to actually produce a shuffle.
4667  DEBUG(
4668  for (auto Src : Sources)
4669  assert(Src.ShuffleVec.getValueType() == ShuffleVT);
4670  );
4671 
4672  // The stars all align, our next step is to produce the mask for the shuffle.
4673  SmallVector<int, 8> Mask(ShuffleVT.getVectorNumElements(), -1);
4674  int BitsPerShuffleLane = ShuffleVT.getVectorElementType().getSizeInBits();
4675  for (unsigned i = 0; i < VT.getVectorNumElements(); ++i) {
4676  SDValue Entry = Op.getOperand(i);
4677  if (Entry.getOpcode() == ISD::UNDEF)
4678  continue;
4679 
4680  auto Src = std::find(Sources.begin(), Sources.end(), Entry.getOperand(0));
4681  int EltNo = cast<ConstantSDNode>(Entry.getOperand(1))->getSExtValue();
4682 
4683  // EXTRACT_VECTOR_ELT performs an implicit any_ext; BUILD_VECTOR an implicit
4684  // trunc. So only std::min(SrcBits, DestBits) actually get defined in this
4685  // segment.
4686  EVT OrigEltTy = Entry.getOperand(0).getValueType().getVectorElementType();
4687  int BitsDefined = std::min(OrigEltTy.getSizeInBits(),
4689  int LanesDefined = BitsDefined / BitsPerShuffleLane;
4690 
4691  // This source is expected to fill ResMultiplier lanes of the final shuffle,
4692  // starting at the appropriate offset.
4693  int *LaneMask = &Mask[i * ResMultiplier];
4694 
4695  int ExtractBase = EltNo * Src->WindowScale + Src->WindowBase;
4696  ExtractBase += NumElts * (Src - Sources.begin());
4697  for (int j = 0; j < LanesDefined; ++j)
4698  LaneMask[j] = ExtractBase + j;
4699  }
4700 
4701  // Final check before we try to produce nonsense...
4702  if (!isShuffleMaskLegal(Mask, ShuffleVT))
4703  return SDValue();
4704 
4705  SDValue ShuffleOps[] = { DAG.getUNDEF(ShuffleVT), DAG.getUNDEF(ShuffleVT) };
4706  for (unsigned i = 0; i < Sources.size(); ++i)
4707  ShuffleOps[i] = Sources[i].ShuffleVec;
4708 
4709  SDValue Shuffle = DAG.getVectorShuffle(ShuffleVT, dl, ShuffleOps[0],
4710  ShuffleOps[1], &Mask[0]);
4711  return DAG.getNode(ISD::BITCAST, dl, VT, Shuffle);
4712 }
4713 
4714 // check if an EXT instruction can handle the shuffle mask when the
4715 // vector sources of the shuffle are the same.
4716 static bool isSingletonEXTMask(ArrayRef<int> M, EVT VT, unsigned &Imm) {
4717  unsigned NumElts = VT.getVectorNumElements();
4718 
4719  // Assume that the first shuffle index is not UNDEF. Fail if it is.
4720  if (M[0] < 0)
4721  return false;
4722 
4723  Imm = M[0];
4724 
4725  // If this is a VEXT shuffle, the immediate value is the index of the first
4726  // element. The other shuffle indices must be the successive elements after
4727  // the first one.
4728  unsigned ExpectedElt = Imm;
4729  for (unsigned i = 1; i < NumElts; ++i) {
4730  // Increment the expected index. If it wraps around, just follow it
4731  // back to index zero and keep going.
4732  ++ExpectedElt;
4733  if (ExpectedElt == NumElts)
4734  ExpectedElt = 0;
4735 
4736  if (M[i] < 0)
4737  continue; // ignore UNDEF indices
4738  if (ExpectedElt != static_cast<unsigned>(M[i]))
4739  return false;
4740  }
4741 
4742  return true;
4743 }
4744 
4745 // check if an EXT instruction can handle the shuffle mask when the
4746 // vector sources of the shuffle are different.
4747 static bool isEXTMask(ArrayRef<int> M, EVT VT, bool &ReverseEXT,
4748  unsigned &Imm) {
4749  // Look for the first non-undef element.
4750  const int *FirstRealElt = std::find_if(M.begin(), M.end(),
4751  [](int Elt) {return Elt >= 0;});
4752 
4753  // Benefit form APInt to handle overflow when calculating expected element.
4754  unsigned NumElts = VT.getVectorNumElements();
4755  unsigned MaskBits = APInt(32, NumElts * 2).logBase2();
4756  APInt ExpectedElt = APInt(MaskBits, *FirstRealElt + 1);
4757  // The following shuffle indices must be the successive elements after the
4758  // first real element.
4759  const int *FirstWrongElt = std::find_if(FirstRealElt + 1, M.end(),
4760  [&](int Elt) {return Elt != ExpectedElt++ && Elt != -1;});
4761  if (FirstWrongElt != M.end())
4762  return false;
4763 
4764  // The index of an EXT is the first element if it is not UNDEF.
4765  // Watch out for the beginning UNDEFs. The EXT index should be the expected
4766  // value of the first element. E.g.
4767  // <-1, -1, 3, ...> is treated as <1, 2, 3, ...>.
4768  // <-1, -1, 0, 1, ...> is treated as <2*NumElts-2, 2*NumElts-1, 0, 1, ...>.
4769  // ExpectedElt is the last mask index plus 1.
4770  Imm = ExpectedElt.getZExtValue();
4771 
4772  // There are two difference cases requiring to reverse input vectors.
4773  // For example, for vector <4 x i32> we have the following cases,
4774  // Case 1: shufflevector(<4 x i32>,<4 x i32>,<-1, -1, -1, 0>)
4775  // Case 2: shufflevector(<4 x i32>,<4 x i32>,<-1, -1, 7, 0>)
4776  // For both cases, we finally use mask <5, 6, 7, 0>, which requires
4777  // to reverse two input vectors.
4778  if (Imm < NumElts)
4779  ReverseEXT = true;
4780  else
4781  Imm -= NumElts;
4782 
4783  return true;
4784 }
4785 
4786 /// isREVMask - Check if a vector shuffle corresponds to a REV
4787 /// instruction with the specified blocksize. (The order of the elements
4788 /// within each block of the vector is reversed.)
4789 static bool isREVMask(ArrayRef<int> M, EVT VT, unsigned BlockSize) {
4790  assert((BlockSize == 16 || BlockSize == 32 || BlockSize == 64) &&
4791  "Only possible block sizes for REV are: 16, 32, 64");
4792 
4793  unsigned EltSz = VT.getVectorElementType().getSizeInBits();
4794  if (EltSz == 64)
4795  return false;
4796 
4797  unsigned NumElts = VT.getVectorNumElements();
4798  unsigned BlockElts = M[0] + 1;
4799  // If the first shuffle index is UNDEF, be optimistic.
4800  if (M[0] < 0)
4801  BlockElts = BlockSize / EltSz;
4802 
4803  if (BlockSize <= EltSz || BlockSize != BlockElts * EltSz)
4804  return false;
4805 
4806  for (unsigned i = 0; i < NumElts; ++i) {
4807  if (M[i] < 0)
4808  continue; // ignore UNDEF indices
4809  if ((unsigned)M[i] != (i - i % BlockElts) + (BlockElts - 1 - i % BlockElts))
4810  return false;
4811  }
4812 
4813  return true;
4814 }
4815 
4816 static bool isZIPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
4817  unsigned NumElts = VT.getVectorNumElements();
4818  WhichResult = (M[0] == 0 ? 0 : 1);
4819  unsigned Idx = WhichResult * NumElts / 2;
4820  for (unsigned i = 0; i != NumElts; i += 2) {
4821  if ((M[i] >= 0 && (unsigned)M[i] != Idx) ||
4822  (M[i + 1] >= 0 && (unsigned)M[i + 1] != Idx + NumElts))
4823  return false;
4824  Idx += 1;
4825  }
4826 
4827  return true;
4828 }
4829 
4830 static bool isUZPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
4831  unsigned NumElts = VT.getVectorNumElements();
4832  WhichResult = (M[0] == 0 ? 0 : 1);
4833  for (unsigned i = 0; i != NumElts; ++i) {
4834  if (M[i] < 0)
4835  continue; // ignore UNDEF indices
4836  if ((unsigned)M[i] != 2 * i + WhichResult)
4837  return false;
4838  }
4839 
4840  return true;
4841 }
4842 
4843 static bool isTRNMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
4844  unsigned NumElts = VT.getVectorNumElements();
4845  WhichResult = (M[0] == 0 ? 0 : 1);
4846  for (unsigned i = 0; i < NumElts; i += 2) {
4847  if ((M[i] >= 0 && (unsigned)M[i] != i + WhichResult) ||
4848  (M[i + 1] >= 0 && (unsigned)M[i + 1] != i + NumElts + WhichResult))
4849  return false;
4850  }
4851  return true;
4852 }
4853 
4854 /// isZIP_v_undef_Mask - Special case of isZIPMask for canonical form of
4855 /// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
4856 /// Mask is e.g., <0, 0, 1, 1> instead of <0, 4, 1, 5>.
4857 static bool isZIP_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
4858  unsigned NumElts = VT.getVectorNumElements();
4859  WhichResult = (M[0] == 0 ? 0 : 1);
4860  unsigned Idx = WhichResult * NumElts / 2;
4861  for (unsigned i = 0; i != NumElts; i += 2) {
4862  if ((M[i] >= 0 && (unsigned)M[i] != Idx) ||
4863  (M[i + 1] >= 0 && (unsigned)M[i + 1] != Idx))
4864  return false;
4865  Idx += 1;
4866  }
4867 
4868  return true;
4869 }
4870 
4871 /// isUZP_v_undef_Mask - Special case of isUZPMask for canonical form of
4872 /// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
4873 /// Mask is e.g., <0, 2, 0, 2> instead of <0, 2, 4, 6>,
4874 static bool isUZP_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
4875  unsigned Half = VT.getVectorNumElements() / 2;
4876  WhichResult = (M[0] == 0 ? 0 : 1);
4877  for (unsigned j = 0; j != 2; ++j) {
4878  unsigned Idx = WhichResult;
4879  for (unsigned i = 0; i != Half; ++i) {
4880  int MIdx = M[i + j * Half];
4881  if (MIdx >= 0 && (unsigned)MIdx != Idx)
4882  return false;
4883  Idx += 2;
4884  }
4885  }
4886 
4887  return true;
4888 }
4889 
4890 /// isTRN_v_undef_Mask - Special case of isTRNMask for canonical form of
4891 /// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
4892 /// Mask is e.g., <0, 0, 2, 2> instead of <0, 4, 2, 6>.
4893 static bool isTRN_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
4894  unsigned NumElts = VT.getVectorNumElements();
4895  WhichResult = (M[0] == 0 ? 0 : 1);
4896  for (unsigned i = 0; i < NumElts; i += 2) {
4897  if ((M[i] >= 0 && (unsigned)M[i] != i + WhichResult) ||
4898  (M[i + 1] >= 0 && (unsigned)M[i + 1] != i + WhichResult))
4899  return false;
4900  }
4901  return true;
4902 }
4903 
4904 static bool isINSMask(ArrayRef<int> M, int NumInputElements,
4905  bool &DstIsLeft, int &Anomaly) {
4906  if (M.size() != static_cast<size_t>(NumInputElements))
4907  return false;
4908 
4909  int NumLHSMatch = 0, NumRHSMatch = 0;
4910  int LastLHSMismatch = -1, LastRHSMismatch = -1;
4911 
4912  for (int i = 0; i < NumInputElements; ++i) {
4913  if (M[i] == -1) {
4914  ++NumLHSMatch;
4915  ++NumRHSMatch;
4916  continue;
4917  }
4918 
4919  if (M[i] == i)
4920  ++NumLHSMatch;
4921  else
4922  LastLHSMismatch = i;
4923 
4924  if (M[i] == i + NumInputElements)
4925  ++NumRHSMatch;
4926  else
4927  LastRHSMismatch = i;
4928  }
4929 
4930  if (NumLHSMatch == NumInputElements - 1) {
4931  DstIsLeft = true;
4932  Anomaly = LastLHSMismatch;
4933  return true;
4934  } else if (NumRHSMatch == NumInputElements - 1) {
4935  DstIsLeft = false;
4936  Anomaly = LastRHSMismatch;
4937  return true;
4938  }
4939 
4940  return false;
4941 }
4942 
4943 static bool isConcatMask(ArrayRef<int> Mask, EVT VT, bool SplitLHS) {
4944  if (VT.getSizeInBits() != 128)
4945  return false;
4946 
4947  unsigned NumElts = VT.getVectorNumElements();
4948 
4949  for (int I = 0, E = NumElts / 2; I != E; I++) {
4950  if (Mask[I] != I)
4951  return false;
4952  }
4953 
4954  int Offset = NumElts / 2;
4955  for (int I = NumElts / 2, E = NumElts; I != E; I++) {
4956  if (Mask[I] != I + SplitLHS * Offset)
4957  return false;
4958  }
4959 
4960  return true;
4961 }
4962 
4964  SDLoc DL(Op);
4965  EVT VT = Op.getValueType();
4966  SDValue V0 = Op.getOperand(0);
4967  SDValue V1 = Op.getOperand(1);
4968  ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(Op)->getMask();
4969 
4972  return SDValue();
4973 
4974  bool SplitV0 = V0.getValueType().getSizeInBits() == 128;
4975 
4976  if (!isConcatMask(Mask, VT, SplitV0))
4977  return SDValue();
4978 
4979  EVT CastVT = EVT::getVectorVT(*DAG.getContext(), VT.getVectorElementType(),
4980  VT.getVectorNumElements() / 2);
4981  if (SplitV0) {
4982  V0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, CastVT, V0,
4983  DAG.getConstant(0, DL, MVT::i64));
4984  }
4985  if (V1.getValueType().getSizeInBits() == 128) {
4986  V1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, CastVT, V1,
4987  DAG.getConstant(0, DL, MVT::i64));
4988  }
4989  return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, V0, V1);
4990 }
4991 
4992 /// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit
4993 /// the specified operations to build the shuffle.
4994 static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS,
4995  SDValue RHS, SelectionDAG &DAG,
4996  SDLoc dl) {
4997  unsigned OpNum = (PFEntry >> 26) & 0x0F;
4998  unsigned LHSID = (PFEntry >> 13) & ((1 << 13) - 1);
4999  unsigned RHSID = (PFEntry >> 0) & ((1 << 13) - 1);
5000 
5001  enum {
5002  OP_COPY = 0, // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3>
5003  OP_VREV,
5004  OP_VDUP0,
5005  OP_VDUP1,
5006  OP_VDUP2,
5007  OP_VDUP3,
5008  OP_VEXT1,
5009  OP_VEXT2,
5010  OP_VEXT3,
5011  OP_VUZPL, // VUZP, left result
5012  OP_VUZPR, // VUZP, right result
5013  OP_VZIPL, // VZIP, left result
5014  OP_VZIPR, // VZIP, right result
5015  OP_VTRNL, // VTRN, left result
5016  OP_VTRNR // VTRN, right result
5017  };
5018 
5019  if (OpNum == OP_COPY) {
5020  if (LHSID == (1 * 9 + 2) * 9 + 3)
5021  return LHS;
5022  assert(LHSID == ((4 * 9 + 5) * 9 + 6) * 9 + 7 && "Illegal OP_COPY!");
5023  return RHS;
5024  }
5025 
5026  SDValue OpLHS, OpRHS;
5027  OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG, dl);
5028  OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG, dl);
5029  EVT VT = OpLHS.getValueType();
5030 
5031  switch (OpNum) {
5032  default:
5033  llvm_unreachable("Unknown shuffle opcode!");
5034  case OP_VREV:
5035  // VREV divides the vector in half and swaps within the half.
5036  if (VT.getVectorElementType() == MVT::i32 ||
5038  return DAG.getNode(AArch64ISD::REV64, dl, VT, OpLHS);
5039  // vrev <4 x i16> -> REV32
5040  if (VT.getVectorElementType() == MVT::i16 ||
5042  return DAG.getNode(AArch64ISD::REV32, dl, VT, OpLHS);
5043  // vrev <4 x i8> -> REV16
5044  assert(VT.getVectorElementType() == MVT::i8);
5045  return DAG.getNode(AArch64ISD::REV16, dl, VT, OpLHS);
5046  case OP_VDUP0:
5047  case OP_VDUP1:
5048  case OP_VDUP2:
5049  case OP_VDUP3: {
5050  EVT EltTy = VT.getVectorElementType();
5051  unsigned Opcode;
5052  if (EltTy == MVT::i8)
5053  Opcode = AArch64ISD::DUPLANE8;
5054  else if (EltTy == MVT::i16 || EltTy == MVT::f16)
5055  Opcode = AArch64ISD::DUPLANE16;
5056  else if (EltTy == MVT::i32 || EltTy == MVT::f32)
5057  Opcode = AArch64ISD::DUPLANE32;
5058  else if (EltTy == MVT::i64 || EltTy == MVT::f64)
5059  Opcode = AArch64ISD::DUPLANE64;
5060  else
5061  llvm_unreachable("Invalid vector element type?");
5062 
5063  if (VT.getSizeInBits() == 64)
5064  OpLHS = WidenVector(OpLHS, DAG);
5065  SDValue Lane = DAG.getConstant(OpNum - OP_VDUP0, dl, MVT::i64);
5066  return DAG.getNode(Opcode, dl, VT, OpLHS, Lane);
5067  }
5068  case OP_VEXT1:
5069  case OP_VEXT2:
5070  case OP_VEXT3: {
5071  unsigned Imm = (OpNum - OP_VEXT1 + 1) * getExtFactor(OpLHS);
5072  return DAG.getNode(AArch64ISD::EXT, dl, VT, OpLHS, OpRHS,
5073  DAG.getConstant(Imm, dl, MVT::i32));
5074  }
5075  case OP_VUZPL:
5076  return DAG.getNode(AArch64ISD::UZP1, dl, DAG.getVTList(VT, VT), OpLHS,
5077  OpRHS);
5078  case OP_VUZPR:
5079  return DAG.getNode(AArch64ISD::UZP2, dl, DAG.getVTList(VT, VT), OpLHS,
5080  OpRHS);
5081  case OP_VZIPL:
5082  return DAG.getNode(AArch64ISD::ZIP1, dl, DAG.getVTList(VT, VT), OpLHS,
5083  OpRHS);
5084  case OP_VZIPR:
5085  return DAG.getNode(AArch64ISD::ZIP2, dl, DAG.getVTList(VT, VT), OpLHS,
5086  OpRHS);
5087  case OP_VTRNL:
5088  return DAG.getNode(AArch64ISD::TRN1, dl, DAG.getVTList(VT, VT), OpLHS,
5089  OpRHS);
5090  case OP_VTRNR:
5091  return DAG.getNode(AArch64ISD::TRN2, dl, DAG.getVTList(VT, VT), OpLHS,
5092  OpRHS);
5093  }
5094 }
5095 
5096 static SDValue GenerateTBL(SDValue Op, ArrayRef<int> ShuffleMask,
5097  SelectionDAG &DAG) {
5098  // Check to see if we can use the TBL instruction.
5099  SDValue V1 = Op.getOperand(0);
5100  SDValue V2 = Op.getOperand(1);
5101  SDLoc DL(Op);
5102 
5103  EVT EltVT = Op.getValueType().getVectorElementType();
5104  unsigned BytesPerElt = EltVT.getSizeInBits() / 8;
5105 
5106  SmallVector<SDValue, 8> TBLMask;
5107  for (int Val : ShuffleMask) {
5108  for (unsigned Byte = 0; Byte < BytesPerElt; ++Byte) {
5109  unsigned Offset = Byte + Val * BytesPerElt;
5110  TBLMask.push_back(DAG.getConstant(Offset, DL, MVT::i32));
5111  }
5112  }
5113 
5114  MVT IndexVT = MVT::v8i8;
5115  unsigned IndexLen = 8;
5116  if (Op.getValueType().getSizeInBits() == 128) {
5117  IndexVT = MVT::v16i8;
5118  IndexLen = 16;
5119  }
5120 
5121  SDValue V1Cst = DAG.getNode(ISD::BITCAST, DL, IndexVT, V1);
5122  SDValue V2Cst = DAG.getNode(ISD::BITCAST, DL, IndexVT, V2);
5123 
5124  SDValue Shuffle;
5125  if (V2.getNode()->getOpcode() == ISD::UNDEF) {
5126  if (IndexLen == 8)
5127  V1Cst = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v16i8, V1Cst, V1Cst);
5128  Shuffle = DAG.getNode(
5129  ISD::INTRINSIC_WO_CHAIN, DL, IndexVT,
5130  DAG.getConstant(Intrinsic::aarch64_neon_tbl1, DL, MVT::i32), V1Cst,
5131  DAG.getNode(ISD::BUILD_VECTOR, DL, IndexVT,
5132  makeArrayRef(TBLMask.data(), IndexLen)));
5133  } else {
5134  if (IndexLen == 8) {
5135  V1Cst = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v16i8, V1Cst, V2Cst);
5136  Shuffle = DAG.getNode(
5137  ISD::INTRINSIC_WO_CHAIN, DL, IndexVT,
5138  DAG.getConstant(Intrinsic::aarch64_neon_tbl1, DL, MVT::i32), V1Cst,
5139  DAG.getNode(ISD::BUILD_VECTOR, DL, IndexVT,
5140  makeArrayRef(TBLMask.data(), IndexLen)));
5141  } else {
5142  // FIXME: We cannot, for the moment, emit a TBL2 instruction because we
5143  // cannot currently represent the register constraints on the input
5144  // table registers.
5145  // Shuffle = DAG.getNode(AArch64ISD::TBL2, DL, IndexVT, V1Cst, V2Cst,
5146  // DAG.getNode(ISD::BUILD_VECTOR, DL, IndexVT,
5147  // &TBLMask[0], IndexLen));
5148  Shuffle = DAG.getNode(
5149  ISD::INTRINSIC_WO_CHAIN, DL, IndexVT,
5150  DAG.getConstant(Intrinsic::aarch64_neon_tbl2, DL, MVT::i32),
5151  V1Cst, V2Cst,
5152  DAG.getNode(ISD::BUILD_VECTOR, DL, IndexVT,
5153  makeArrayRef(TBLMask.data(), IndexLen)));
5154  }
5155  }
5156  return DAG.getNode(ISD::BITCAST, DL, Op.getValueType(), Shuffle);
5157 }
5158 
5159 static unsigned getDUPLANEOp(EVT EltType) {
5160  if (EltType == MVT::i8)
5161  return AArch64ISD::DUPLANE8;
5162  if (EltType == MVT::i16 || EltType == MVT::f16)
5163  return AArch64ISD::DUPLANE16;
5164  if (EltType == MVT::i32 || EltType == MVT::f32)
5165  return AArch64ISD::DUPLANE32;
5166  if (EltType == MVT::i64 || EltType == MVT::f64)
5167  return AArch64ISD::DUPLANE64;
5168 
5169  llvm_unreachable("Invalid vector element type?");
5170 }
5171 
5172 SDValue AArch64TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
5173  SelectionDAG &DAG) const {
5174  SDLoc dl(Op);
5175  EVT VT = Op.getValueType();
5176 
5177  ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op.getNode());
5178 
5179  // Convert shuffles that are directly supported on NEON to target-specific
5180  // DAG nodes, instead of keeping them as shuffles and matching them again
5181  // during code selection. This is more efficient and avoids the possibility
5182  // of inconsistencies between legalization and selection.
5183  ArrayRef<int> ShuffleMask = SVN->getMask();
5184 
5185  SDValue V1 = Op.getOperand(0);
5186  SDValue V2 = Op.getOperand(1);
5187 
5188  if (ShuffleVectorSDNode::isSplatMask(&ShuffleMask[0],
5189  V1.getValueType().getSimpleVT())) {
5190  int Lane = SVN->getSplatIndex();
5191  // If this is undef splat, generate it via "just" vdup, if possible.
5192  if (Lane == -1)
5193  Lane = 0;
5194 
5195  if (Lane == 0 && V1.getOpcode() == ISD::SCALAR_TO_VECTOR)
5196  return DAG.getNode(AArch64ISD::DUP, dl, V1.getValueType(),
5197  V1.getOperand(0));
5198  // Test if V1 is a BUILD_VECTOR and the lane being referenced is a non-
5199  // constant. If so, we can just reference the lane's definition directly.
5200  if (V1.getOpcode() == ISD::BUILD_VECTOR &&
5201  !isa<ConstantSDNode>(V1.getOperand(Lane)))
5202  return DAG.getNode(AArch64ISD::DUP, dl, VT, V1.getOperand(Lane));
5203 
5204  // Otherwise, duplicate from the lane of the input vector.
5205  unsigned Opcode = getDUPLANEOp(V1.getValueType().getVectorElementType());
5206 
5207  // SelectionDAGBuilder may have "helpfully" already extracted or conatenated
5208  // to make a vector of the same size as this SHUFFLE. We can ignore the
5209  // extract entirely, and canonicalise the concat using WidenVector.
5210  if (V1.getOpcode() == ISD::EXTRACT_SUBVECTOR) {
5211  Lane += cast<ConstantSDNode>(V1.getOperand(1))->getZExtValue();
5212  V1 = V1.getOperand(0);
5213  } else if (V1.getOpcode() == ISD::CONCAT_VECTORS) {
5214  unsigned Idx = Lane >= (int)VT.getVectorNumElements() / 2;
5215  Lane -= Idx * VT.getVectorNumElements() / 2;
5216  V1 = WidenVector(V1.getOperand(Idx), DAG);
5217  } else if (VT.getSizeInBits() == 64)
5218  V1 = WidenVector(V1, DAG);
5219 
5220  return DAG.getNode(Opcode, dl, VT, V1, DAG.getConstant(Lane, dl, MVT::i64));
5221  }
5222 
5223  if (isREVMask(ShuffleMask, VT, 64))
5224  return DAG.getNode(AArch64ISD::REV64, dl, V1.getValueType(), V1, V2);
5225  if (isREVMask(ShuffleMask, VT, 32))
5226  return DAG.getNode(AArch64ISD::REV32, dl, V1.getValueType(), V1, V2);
5227  if (isREVMask(ShuffleMask, VT, 16))
5228  return DAG.getNode(AArch64ISD::REV16, dl, V1.getValueType(), V1, V2);
5229 
5230  bool ReverseEXT = false;
5231  unsigned Imm;
5232  if (isEXTMask(ShuffleMask, VT, ReverseEXT, Imm)) {
5233  if (ReverseEXT)
5234  std::swap(V1, V2);
5235  Imm *= getExtFactor(V1);
5236  return DAG.getNode(AArch64ISD::EXT, dl, V1.getValueType(), V1, V2,
5237  DAG.getConstant(Imm, dl, MVT::i32));
5238  } else if (V2->getOpcode() == ISD::UNDEF &&
5239  isSingletonEXTMask(ShuffleMask, VT, Imm)) {
5240  Imm *= getExtFactor(V1);
5241  return DAG.getNode(AArch64ISD::EXT, dl, V1.getValueType(), V1, V1,
5242  DAG.getConstant(Imm, dl, MVT::i32));
5243  }
5244 
5245  unsigned WhichResult;
5246  if (isZIPMask(ShuffleMask, VT, WhichResult)) {
5247  unsigned Opc = (WhichResult == 0) ? AArch64ISD::ZIP1 : AArch64ISD::ZIP2;
5248  return DAG.getNode(Opc, dl, V1.getValueType(), V1, V2);
5249  }
5250  if (isUZPMask(ShuffleMask, VT, WhichResult)) {
5251  unsigned Opc = (WhichResult == 0) ? AArch64ISD::UZP1 : AArch64ISD::UZP2;
5252  return DAG.getNode(Opc, dl, V1.getValueType(), V1, V2);
5253  }
5254  if (isTRNMask(ShuffleMask, VT, WhichResult)) {
5255  unsigned Opc = (WhichResult == 0) ? AArch64ISD::TRN1 : AArch64ISD::TRN2;
5256  return DAG.getNode(Opc, dl, V1.getValueType(), V1, V2);
5257  }
5258 
5259  if (isZIP_v_undef_Mask(ShuffleMask, VT, WhichResult)) {
5260  unsigned Opc = (WhichResult == 0) ? AArch64ISD::ZIP1 : AArch64ISD::ZIP2;
5261  return DAG.getNode(Opc, dl, V1.getValueType(), V1, V1);
5262  }
5263  if (isUZP_v_undef_Mask(ShuffleMask, VT, WhichResult)) {
5264  unsigned Opc = (WhichResult == 0) ? AArch64ISD::UZP1 : AArch64ISD::UZP2;
5265  return DAG.getNode(Opc, dl, V1.getValueType(), V1, V1);
5266  }
5267  if (isTRN_v_undef_Mask(ShuffleMask, VT, WhichResult)) {
5268  unsigned Opc = (WhichResult == 0) ? AArch64ISD::TRN1 : AArch64ISD::TRN2;
5269  return DAG.getNode(Opc, dl, V1.getValueType(), V1, V1);
5270  }
5271 
5272  SDValue Concat = tryFormConcatFromShuffle(Op, DAG);
5273  if (Concat.getNode())
5274  return Concat;
5275 
5276  bool DstIsLeft;
5277  int Anomaly;
5278  int NumInputElements = V1.getValueType().getVectorNumElements();
5279  if (isINSMask(ShuffleMask, NumInputElements, DstIsLeft, Anomaly)) {
5280  SDValue DstVec = DstIsLeft ? V1 : V2;
5281  SDValue DstLaneV = DAG.getConstant(Anomaly, dl, MVT::i64);
5282 
5283  SDValue SrcVec = V1;
5284  int SrcLane = ShuffleMask[Anomaly];
5285  if (SrcLane >= NumInputElements) {
5286  SrcVec = V2;
5287  SrcLane -= VT.getVectorNumElements();
5288  }
5289  SDValue SrcLaneV = DAG.getConstant(SrcLane, dl, MVT::i64);
5290 
5291  EVT ScalarVT = VT.getVectorElementType();
5292 
5293  if (ScalarVT.getSizeInBits() < 32 && ScalarVT.isInteger())
5294  ScalarVT = MVT::i32;
5295 
5296  return DAG.getNode(
5297  ISD::INSERT_VECTOR_ELT, dl, VT, DstVec,
5298  DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ScalarVT, SrcVec, SrcLaneV),
5299  DstLaneV);
5300  }
5301 
5302  // If the shuffle is not directly supported and it has 4 elements, use
5303  // the PerfectShuffle-generated table to synthesize it from other shuffles.
5304  unsigned NumElts = VT.getVectorNumElements();
5305  if (NumElts == 4) {
5306  unsigned PFIndexes[4];
5307  for (unsigned i = 0; i != 4; ++i) {
5308  if (ShuffleMask[i] < 0)
5309  PFIndexes[i] = 8;
5310  else
5311  PFIndexes[i] = ShuffleMask[i];
5312  }
5313 
5314  // Compute the index in the perfect shuffle table.
5315  unsigned PFTableIndex = PFIndexes[0] * 9 * 9 * 9 + PFIndexes[1] * 9 * 9 +
5316  PFIndexes[2] * 9 + PFIndexes[3];
5317  unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
5318  unsigned Cost = (PFEntry >> 30);
5319 
5320  if (Cost <= 4)
5321  return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl);
5322  }
5323 
5324  return GenerateTBL(Op, ShuffleMask, DAG);
5325 }
5326 
5327 static bool resolveBuildVector(BuildVectorSDNode *BVN, APInt &CnstBits,
5328  APInt &UndefBits) {
5329  EVT VT = BVN->getValueType(0);
5330  APInt SplatBits, SplatUndef;
5331  unsigned SplatBitSize;
5332  bool HasAnyUndefs;
5333  if (BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) {
5334  unsigned NumSplats = VT.getSizeInBits() / SplatBitSize;
5335 
5336  for (unsigned i = 0; i < NumSplats; ++i) {
5337  CnstBits <<= SplatBitSize;
5338  UndefBits <<= SplatBitSize;
5339  CnstBits |= SplatBits.zextOrTrunc(VT.getSizeInBits());
5340  UndefBits |= (SplatBits ^ SplatUndef).zextOrTrunc(VT.getSizeInBits());
5341  }
5342 
5343  return true;
5344  }
5345 
5346  return false;
5347 }
5348 
5349 SDValue AArch64TargetLowering::LowerVectorAND(SDValue Op,
5350  SelectionDAG &DAG) const {
5351  BuildVectorSDNode *BVN =
5353  SDValue LHS = Op.getOperand(0);
5354  SDLoc dl(Op);
5355  EVT VT = Op.getValueType();
5356 
5357  if (!BVN)
5358  return Op;
5359 
5360  APInt CnstBits(VT.getSizeInBits(), 0);
5361  APInt UndefBits(VT.getSizeInBits(), 0);
5362  if (resolveBuildVector(BVN, CnstBits, UndefBits)) {
5363  // We only have BIC vector immediate instruction, which is and-not.
5364  CnstBits = ~CnstBits;
5365 
5366  // We make use of a little bit of goto ickiness in order to avoid having to
5367  // duplicate the immediate matching logic for the undef toggled case.
5368  bool SecondTry = false;
5369  AttemptModImm:
5370 
5371  if (CnstBits.getHiBits(64) == CnstBits.getLoBits(64)) {
5372  CnstBits = CnstBits.zextOrTrunc(64);
5373  uint64_t CnstVal = CnstBits.getZExtValue();
5374 
5375  if (AArch64_AM::isAdvSIMDModImmType1(CnstVal)) {
5376  CnstVal = AArch64_AM::encodeAdvSIMDModImmType1(CnstVal);
5377  MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
5378  SDValue Mov = DAG.getNode(AArch64ISD::BICi, dl, MovTy, LHS,
5379  DAG.getConstant(CnstVal, dl, MVT::i32),
5380  DAG.getConstant(0, dl, MVT::i32));
5381  return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
5382  }
5383 
5384  if (AArch64_AM::isAdvSIMDModImmType2(CnstVal)) {
5385  CnstVal = AArch64_AM::encodeAdvSIMDModImmType2(CnstVal);
5386  MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
5387  SDValue Mov = DAG.getNode(AArch64ISD::BICi, dl, MovTy, LHS,
5388  DAG.getConstant(CnstVal, dl, MVT::i32),
5389  DAG.getConstant(8, dl, MVT::i32));
5390  return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
5391  }
5392 
5393  if (AArch64_AM::isAdvSIMDModImmType3(CnstVal)) {
5394  CnstVal = AArch64_AM::encodeAdvSIMDModImmType3(CnstVal);
5395  MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
5396  SDValue Mov = DAG.getNode(AArch64ISD::BICi, dl, MovTy, LHS,
5397  DAG.getConstant(CnstVal, dl, MVT::i32),
5398  DAG.getConstant(16, dl, MVT::i32));
5399  return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
5400  }
5401 
5402  if (AArch64_AM::isAdvSIMDModImmType4(CnstVal)) {
5403  CnstVal = AArch64_AM::encodeAdvSIMDModImmType4(CnstVal);
5404  MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
5405  SDValue Mov = DAG.getNode(AArch64ISD::BICi, dl, MovTy, LHS,
5406  DAG.getConstant(CnstVal, dl, MVT::i32),
5407  DAG.getConstant(24, dl, MVT::i32));
5408  return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
5409  }
5410 
5411  if (AArch64_AM::isAdvSIMDModImmType5(CnstVal)) {
5412  CnstVal = AArch64_AM::encodeAdvSIMDModImmType5(CnstVal);
5413  MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v8i16 : MVT::v4i16;
5414  SDValue Mov = DAG.getNode(AArch64ISD::BICi, dl, MovTy, LHS,
5415  DAG.getConstant(CnstVal, dl, MVT::i32),
5416  DAG.getConstant(0, dl, MVT::i32));
5417  return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
5418  }
5419 
5420  if (AArch64_AM::isAdvSIMDModImmType6(CnstVal)) {
5421  CnstVal = AArch64_AM::encodeAdvSIMDModImmType6(CnstVal);
5422  MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v8i16 : MVT::v4i16;
5423  SDValue Mov = DAG.getNode(AArch64ISD::BICi, dl, MovTy, LHS,
5424  DAG.getConstant(CnstVal, dl, MVT::i32),
5425  DAG.getConstant(8, dl, MVT::i32));
5426  return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
5427  }
5428  }
5429 
5430  if (SecondTry)
5431  goto FailedModImm;
5432  SecondTry = true;
5433  CnstBits = ~UndefBits;
5434  goto AttemptModImm;
5435  }
5436 
5437 // We can always fall back to a non-immediate AND.
5438 FailedModImm:
5439  return Op;
5440 }
5441 
5442 // Specialized code to quickly find if PotentialBVec is a BuildVector that
5443 // consists of only the same constant int value, returned in reference arg
5444 // ConstVal
5445 static bool isAllConstantBuildVector(const SDValue &PotentialBVec,
5446  uint64_t &ConstVal) {
5447  BuildVectorSDNode *Bvec = dyn_cast<BuildVectorSDNode>(PotentialBVec);
5448  if (!Bvec)
5449  return false;
5450  ConstantSDNode *FirstElt = dyn_cast<ConstantSDNode>(Bvec->getOperand(0));
5451  if (!FirstElt)
5452  return false;
5453  EVT VT = Bvec->getValueType(0);
5454  unsigned NumElts = VT.getVectorNumElements();
5455  for (unsigned i = 1; i < NumElts; ++i)
5456  if (dyn_cast<ConstantSDNode>(Bvec->getOperand(i)) != FirstElt)
5457  return false;
5458  ConstVal = FirstElt->getZExtValue();
5459  return true;
5460 }
5461 
5462 static unsigned getIntrinsicID(const SDNode *N) {
5463  unsigned Opcode = N->getOpcode();
5464  switch (Opcode) {
5465  default:
5466  return Intrinsic::not_intrinsic;
5467  case ISD::INTRINSIC_WO_CHAIN: {
5468  unsigned IID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
5469  if (IID < Intrinsic::num_intrinsics)
5470  return IID;
5471  return Intrinsic::not_intrinsic;
5472  }
5473  }
5474 }
5475 
5476 // Attempt to form a vector S[LR]I from (or (and X, BvecC1), (lsl Y, C2)),
5477 // to (SLI X, Y, C2), where X and Y have matching vector types, BvecC1 is a
5478 // BUILD_VECTORs with constant element C1, C2 is a constant, and C1 == ~C2.
5479 // Also, logical shift right -> sri, with the same structure.
5481  EVT VT = N->getValueType(0);
5482 
5483  if (!VT.isVector())
5484  return SDValue();
5485 
5486  SDLoc DL(N);
5487 
5488  // Is the first op an AND?
5489  const SDValue And = N->getOperand(0);
5490  if (And.getOpcode() != ISD::AND)
5491  return SDValue();
5492 
5493  // Is the second op an shl or lshr?
5494  SDValue Shift = N->getOperand(1);
5495  // This will have been turned into: AArch64ISD::VSHL vector, #shift
5496  // or AArch64ISD::VLSHR vector, #shift
5497  unsigned ShiftOpc = Shift.getOpcode();
5498  if ((ShiftOpc != AArch64ISD::VSHL && ShiftOpc != AArch64ISD::VLSHR))
5499  return SDValue();
5500  bool IsShiftRight = ShiftOpc == AArch64ISD::VLSHR;
5501 
5502  // Is the shift amount constant?
5503  ConstantSDNode *C2node = dyn_cast<ConstantSDNode>(Shift.getOperand(1));
5504  if (!C2node)
5505  return SDValue();
5506 
5507  // Is the and mask vector all constant?
5508  uint64_t C1;
5509  if (!isAllConstantBuildVector(And.getOperand(1), C1))
5510  return SDValue();
5511 
5512  // Is C1 == ~C2, taking into account how much one can shift elements of a
5513  // particular size?
5514  uint64_t C2 = C2node->getZExtValue();
5515  unsigned ElemSizeInBits = VT.getVectorElementType().getSizeInBits();
5516  if (C2 > ElemSizeInBits)
5517  return SDValue();
5518  unsigned ElemMask = (1 << ElemSizeInBits) - 1;
5519  if ((C1 & ElemMask) != (~C2 & ElemMask))
5520  return SDValue();
5521 
5522  SDValue X = And.getOperand(0);
5523  SDValue Y = Shift.getOperand(0);
5524 
5525  unsigned Intrin =
5526  IsShiftRight ? Intrinsic::aarch64_neon_vsri : Intrinsic::aarch64_neon_vsli;
5527  SDValue ResultSLI =
5528  DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
5529  DAG.getConstant(Intrin, DL, MVT::i32), X, Y,
5530  Shift.getOperand(1));
5531 
5532  DEBUG(dbgs() << "aarch64-lower: transformed: \n");
5533  DEBUG(N->dump(&DAG));
5534  DEBUG(dbgs() << "into: \n");
5535  DEBUG(ResultSLI->dump(&DAG));
5536 
5537  ++NumShiftInserts;
5538  return ResultSLI;
5539 }
5540 
5541 SDValue AArch64TargetLowering::LowerVectorOR(SDValue Op,
5542  SelectionDAG &DAG) const {
5543  // Attempt to form a vector S[LR]I from (or (and X, C1), (lsl Y, C2))
5545  SDValue Res = tryLowerToSLI(Op.getNode(), DAG);
5546  if (Res.getNode())
5547  return Res;
5548  }
5549 
5550  BuildVectorSDNode *BVN =
5552  SDValue LHS = Op.getOperand(1);
5553  SDLoc dl(Op);
5554  EVT VT = Op.getValueType();
5555 
5556  // OR commutes, so try swapping the operands.
5557  if (!BVN) {
5558  LHS = Op.getOperand(0);
5559  BVN = dyn_cast<BuildVectorSDNode>(Op.getOperand(1).getNode());
5560  }
5561  if (!BVN)
5562  return Op;
5563 
5564  APInt CnstBits(VT.getSizeInBits(), 0);
5565  APInt UndefBits(VT.getSizeInBits(), 0);
5566  if (resolveBuildVector(BVN, CnstBits, UndefBits)) {
5567  // We make use of a little bit of goto ickiness in order to avoid having to
5568  // duplicate the immediate matching logic for the undef toggled case.
5569  bool SecondTry = false;
5570  AttemptModImm:
5571 
5572  if (CnstBits.getHiBits(64) == CnstBits.getLoBits(64)) {
5573  CnstBits = CnstBits.zextOrTrunc(64);
5574  uint64_t CnstVal = CnstBits.getZExtValue();
5575 
5576  if (AArch64_AM::isAdvSIMDModImmType1(CnstVal)) {
5577  CnstVal = AArch64_AM::encodeAdvSIMDModImmType1(CnstVal);
5578  MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
5579  SDValue Mov = DAG.getNode(AArch64ISD::ORRi, dl, MovTy, LHS,
5580  DAG.getConstant(CnstVal, dl, MVT::i32),
5581  DAG.getConstant(0, dl, MVT::i32));
5582  return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
5583  }
5584 
5585  if (AArch64_AM::isAdvSIMDModImmType2(CnstVal)) {
5586  CnstVal = AArch64_AM::encodeAdvSIMDModImmType2(CnstVal);
5587  MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
5588  SDValue Mov = DAG.getNode(AArch64ISD::ORRi, dl, MovTy, LHS,
5589  DAG.getConstant(CnstVal, dl, MVT::i32),
5590  DAG.getConstant(8, dl, MVT::i32));
5591  return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
5592  }
5593 
5594  if (AArch64_AM::isAdvSIMDModImmType3(CnstVal)) {
5595  CnstVal = AArch64_AM::encodeAdvSIMDModImmType3(CnstVal);
5596  MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
5597  SDValue Mov = DAG.getNode(AArch64ISD::ORRi, dl, MovTy, LHS,
5598  DAG.getConstant(CnstVal, dl, MVT::i32),
5599  DAG.getConstant(16, dl, MVT::i32));
5600  return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
5601  }
5602 
5603  if (AArch64_AM::isAdvSIMDModImmType4(CnstVal)) {
5604  CnstVal = AArch64_AM::encodeAdvSIMDModImmType4(CnstVal);
5605  MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
5606  SDValue Mov = DAG.getNode(AArch64ISD::ORRi, dl, MovTy, LHS,
5607  DAG.getConstant(CnstVal, dl, MVT::i32),
5608  DAG.getConstant(24, dl, MVT::i32));
5609  return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
5610  }
5611 
5612  if (AArch64_AM::isAdvSIMDModImmType5(CnstVal)) {
5613  CnstVal = AArch64_AM::encodeAdvSIMDModImmType5(CnstVal);
5614  MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v8i16 : MVT::v4i16;
5615  SDValue Mov = DAG.getNode(AArch64ISD::ORRi, dl, MovTy, LHS,
5616  DAG.getConstant(CnstVal, dl, MVT::i32),
5617  DAG.getConstant(0, dl, MVT::i32));
5618  return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
5619  }
5620 
5621  if (AArch64_AM::isAdvSIMDModImmType6(CnstVal)) {
5622  CnstVal = AArch64_AM::encodeAdvSIMDModImmType6(CnstVal);
5623  MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v8i16 : MVT::v4i16;
5624  SDValue Mov = DAG.getNode(AArch64ISD::ORRi, dl, MovTy, LHS,
5625  DAG.getConstant(CnstVal, dl, MVT::i32),
5626  DAG.getConstant(8, dl, MVT::i32));
5627  return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
5628  }
5629  }
5630 
5631  if (SecondTry)
5632  goto FailedModImm;
5633  SecondTry = true;
5634  CnstBits = UndefBits;
5635  goto AttemptModImm;
5636  }
5637 
5638 // We can always fall back to a non-immediate OR.
5639 FailedModImm:
5640  return Op;
5641 }
5642 
5643 // Normalize the operands of BUILD_VECTOR. The value of constant operands will
5644 // be truncated to fit element width.
5646  SelectionDAG &DAG) {
5647  assert(Op.getOpcode() == ISD::BUILD_VECTOR && "Unknown opcode!");
5648  SDLoc dl(Op);
5649  EVT VT = Op.getValueType();
5650  EVT EltTy= VT.getVectorElementType();
5651 
5652  if (EltTy.isFloatingPoint() || EltTy.getSizeInBits() > 16)
5653  return Op;
5654 
5656  for (unsigned I = 0, E = VT.getVectorNumElements(); I != E; ++I) {
5657  SDValue Lane = Op.getOperand(I);
5658  if (Lane.getOpcode() == ISD::Constant) {
5659  APInt LowBits(EltTy.getSizeInBits(),
5660  cast<ConstantSDNode>(Lane)->getZExtValue());
5661  Lane = DAG.getConstant(LowBits.getZExtValue(), dl, MVT::i32);
5662  }
5663  Ops.push_back(Lane);
5664  }
5665  return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops);
5666 }
5667 
5668 SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op,
5669  SelectionDAG &DAG) const {
5670  SDLoc dl(Op);
5671  EVT VT = Op.getValueType();
5672  Op = NormalizeBuildVector(Op, DAG);
5673  BuildVectorSDNode *BVN = cast<BuildVectorSDNode>(Op.getNode());
5674 
5675  APInt CnstBits(VT.getSizeInBits(), 0);
5676  APInt UndefBits(VT.getSizeInBits(), 0);
5677  if (resolveBuildVector(BVN, CnstBits, UndefBits)) {
5678  // We make use of a little bit of goto ickiness in order to avoid having to
5679  // duplicate the immediate matching logic for the undef toggled case.
5680  bool SecondTry = false;
5681  AttemptModImm:
5682 
5683  if (CnstBits.getHiBits(64) == CnstBits.getLoBits(64)) {
5684  CnstBits = CnstBits.zextOrTrunc(64);
5685  uint64_t CnstVal = CnstBits.getZExtValue();
5686 
5687  // Certain magic vector constants (used to express things like NOT
5688  // and NEG) are passed through unmodified. This allows codegen patterns
5689  // for these operations to match. Special-purpose patterns will lower
5690  // these immediates to MOVIs if it proves necessary.
5691  if (VT.isInteger() && (CnstVal == 0 || CnstVal == ~0ULL))
5692  return Op;
5693 
5694  // The many faces of MOVI...
5695  if (AArch64_AM::isAdvSIMDModImmType10(CnstVal)) {
5696  CnstVal = AArch64_AM::encodeAdvSIMDModImmType10(CnstVal);
5697  if (VT.getSizeInBits() == 128) {
5699  DAG.getConstant(CnstVal, dl, MVT::i32));
5700  return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
5701  }
5702 
5703  // Support the V64 version via subregister insertion.
5705  DAG.getConstant(CnstVal, dl, MVT::i32));
5706  return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
5707  }
5708 
5709  if (AArch64_AM::isAdvSIMDModImmType1(CnstVal)) {
5710  CnstVal = AArch64_AM::encodeAdvSIMDModImmType1(CnstVal);
5711  MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
5712  SDValue Mov = DAG.getNode(AArch64ISD::MOVIshift, dl, MovTy,
5713  DAG.getConstant(CnstVal, dl, MVT::i32),
5714  DAG.getConstant(0, dl, MVT::i32));
5715  return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
5716  }
5717 
5718  if (AArch64_AM::isAdvSIMDModImmType2(CnstVal)) {
5719  CnstVal = AArch64_AM::encodeAdvSIMDModImmType2(CnstVal);
5720  MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
5721  SDValue Mov = DAG.getNode(AArch64ISD::MOVIshift, dl, MovTy,
5722  DAG.getConstant(CnstVal, dl, MVT::i32),
5723  DAG.getConstant(8, dl, MVT::i32));
5724  return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
5725  }
5726 
5727  if (AArch64_AM::isAdvSIMDModImmType3(CnstVal)) {
5728  CnstVal = AArch64_AM::encodeAdvSIMDModImmType3(CnstVal);
5729  MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
5730  SDValue Mov = DAG.getNode(AArch64ISD::MOVIshift, dl, MovTy,
5731  DAG.getConstant(CnstVal, dl, MVT::i32),
5732  DAG.getConstant(16, dl, MVT::i32));
5733  return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
5734  }
5735 
5736  if (AArch64_AM::isAdvSIMDModImmType4(CnstVal)) {
5737  CnstVal = AArch64_AM::encodeAdvSIMDModImmType4(CnstVal);
5738  MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
5739  SDValue Mov = DAG.getNode(AArch64ISD::MOVIshift, dl, MovTy,
5740  DAG.getConstant(CnstVal, dl, MVT::i32),
5741  DAG.getConstant(24, dl, MVT::i32));
5742  return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
5743  }
5744 
5745  if (AArch64_AM::isAdvSIMDModImmType5(CnstVal)) {
5746  CnstVal = AArch64_AM::encodeAdvSIMDModImmType5(CnstVal);
5747  MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v8i16 : MVT::v4i16;
5748  SDValue Mov = DAG.getNode(AArch64ISD::MOVIshift, dl, MovTy,
5749  DAG.getConstant(CnstVal, dl, MVT::i32),
5750  DAG.getConstant(0, dl, MVT::i32));
5751  return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
5752  }
5753 
5754  if (AArch64_AM::isAdvSIMDModImmType6(CnstVal)) {
5755  CnstVal = AArch64_AM::encodeAdvSIMDModImmType6(CnstVal);
5756  MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v8i16 : MVT::v4i16;
5757  SDValue Mov = DAG.getNode(AArch64ISD::MOVIshift, dl, MovTy,
5758  DAG.getConstant(CnstVal, dl, MVT::i32),
5759  DAG.getConstant(8, dl, MVT::i32));
5760  return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
5761  }
5762 
5763  if (AArch64_AM::isAdvSIMDModImmType7(CnstVal)) {
5764  CnstVal = AArch64_AM::encodeAdvSIMDModImmType7(CnstVal);
5765  MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
5766  SDValue Mov = DAG.getNode(AArch64ISD::MOVImsl, dl, MovTy,
5767  DAG.getConstant(CnstVal, dl, MVT::i32),
5768  DAG.getConstant(264, dl, MVT::i32));
5769  return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
5770  }
5771 
5772  if (AArch64_AM::isAdvSIMDModImmType8(CnstVal)) {
5773  CnstVal = AArch64_AM::encodeAdvSIMDModImmType8(CnstVal);
5774  MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
5775  SDValue Mov = DAG.getNode(AArch64ISD::MOVImsl, dl, MovTy,
5776  DAG.getConstant(CnstVal, dl, MVT::i32),
5777  DAG.getConstant(272, dl, MVT::i32));
5778  return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
5779  }
5780 
5781  if (AArch64_AM::isAdvSIMDModImmType9(CnstVal)) {
5782  CnstVal = AArch64_AM::encodeAdvSIMDModImmType9(CnstVal);
5783  MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v16i8 : MVT::v8i8;
5784  SDValue Mov = DAG.getNode(AArch64ISD::MOVI, dl, MovTy,
5785  DAG.getConstant(CnstVal, dl, MVT::i32));
5786  return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
5787  }
5788 
5789  // The few faces of FMOV...
5790  if (AArch64_AM::isAdvSIMDModImmType11(CnstVal)) {
5791  CnstVal = AArch64_AM::encodeAdvSIMDModImmType11(CnstVal);
5792  MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4f32 : MVT::v2f32;
5793  SDValue Mov = DAG.getNode(AArch64ISD::FMOV, dl, MovTy,
5794  DAG.getConstant(CnstVal, dl, MVT::i32));
5795  return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
5796  }
5797 
5798  if (AArch64_AM::isAdvSIMDModImmType12(CnstVal) &&
5799  VT.getSizeInBits() == 128) {
5800  CnstVal = AArch64_AM::encodeAdvSIMDModImmType12(CnstVal);
5801  SDValue Mov = DAG.getNode(AArch64ISD::FMOV, dl, MVT::v2f64,
5802  DAG.getConstant(CnstVal, dl, MVT::i32));
5803  return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
5804  }
5805 
5806  // The many faces of MVNI...
5807  CnstVal = ~CnstVal;
5808  if (AArch64_AM::isAdvSIMDModImmType1(CnstVal)) {
5809  CnstVal = AArch64_AM::encodeAdvSIMDModImmType1(CnstVal);
5810  MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
5811  SDValue Mov = DAG.getNode(AArch64ISD::MVNIshift, dl, MovTy,
5812  DAG.getConstant(CnstVal, dl, MVT::i32),
5813  DAG.getConstant(0, dl, MVT::i32));
5814  return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
5815  }
5816 
5817  if (AArch64_AM::isAdvSIMDModImmType2(CnstVal)) {
5818  CnstVal = AArch64_AM::encodeAdvSIMDModImmType2(CnstVal);
5819  MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
5820  SDValue Mov = DAG.getNode(AArch64ISD::MVNIshift, dl, MovTy,
5821  DAG.getConstant(CnstVal, dl, MVT::i32),
5822  DAG.getConstant(8, dl, MVT::i32));
5823  return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
5824  }
5825 
5826  if (AArch64_AM::isAdvSIMDModImmType3(CnstVal)) {
5827  CnstVal = AArch64_AM::encodeAdvSIMDModImmType3(CnstVal);
5828  MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
5829  SDValue Mov = DAG.getNode(AArch64ISD::MVNIshift, dl, MovTy,
5830  DAG.getConstant(CnstVal, dl, MVT::i32),
5831  DAG.getConstant(16, dl, MVT::i32));
5832  return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
5833  }
5834 
5835  if (AArch64_AM::isAdvSIMDModImmType4(CnstVal)) {
5836  CnstVal = AArch64_AM::encodeAdvSIMDModImmType4(CnstVal);
5837  MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
5838  SDValue Mov = DAG.getNode(AArch64ISD::MVNIshift, dl, MovTy,
5839  DAG.getConstant(CnstVal, dl, MVT::i32),
5840  DAG.getConstant(24, dl, MVT::i32));
5841  return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
5842  }
5843 
5844  if (AArch64_AM::isAdvSIMDModImmType5(CnstVal)) {
5845  CnstVal = AArch64_AM::encodeAdvSIMDModImmType5(CnstVal);
5846  MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v8i16 : MVT::v4i16;
5847  SDValue Mov = DAG.getNode(AArch64ISD::MVNIshift, dl, MovTy,
5848  DAG.getConstant(CnstVal, dl, MVT::i32),
5849  DAG.getConstant(0, dl, MVT::i32));
5850  return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
5851  }
5852 
5853  if (AArch64_AM::isAdvSIMDModImmType6(CnstVal)) {
5854  CnstVal = AArch64_AM::encodeAdvSIMDModImmType6(CnstVal);
5855  MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v8i16 : MVT::v4i16;
5856  SDValue Mov = DAG.getNode(AArch64ISD::MVNIshift, dl, MovTy,
5857  DAG.getConstant(CnstVal, dl, MVT::i32),
5858  DAG.getConstant(8, dl, MVT::i32));
5859  return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
5860  }
5861 
5862  if (AArch64_AM::isAdvSIMDModImmType7(CnstVal)) {
5863  CnstVal = AArch64_AM::encodeAdvSIMDModImmType7(CnstVal);
5864  MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
5865  SDValue Mov = DAG.getNode(AArch64ISD::MVNImsl, dl, MovTy,
5866  DAG.getConstant(CnstVal, dl, MVT::i32),
5867  DAG.getConstant(264, dl, MVT::i32));
5868  return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
5869  }
5870 
5871  if (AArch64_AM::isAdvSIMDModImmType8(CnstVal)) {
5872  CnstVal = AArch64_AM::encodeAdvSIMDModImmType8(CnstVal);
5873  MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
5874  SDValue Mov = DAG.getNode(AArch64ISD::MVNImsl, dl, MovTy,
5875  DAG.getConstant(CnstVal, dl, MVT::i32),
5876  DAG.getConstant(272, dl, MVT::i32));
5877  return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
5878  }
5879  }
5880 
5881  if (SecondTry)
5882  goto FailedModImm;
5883  SecondTry = true;
5884  CnstBits = UndefBits;
5885  goto AttemptModImm;
5886  }
5887 FailedModImm:
5888 
5889  // Scan through the operands to find some interesting properties we can
5890  // exploit:
5891  // 1) If only one value is used, we can use a DUP, or
5892  // 2) if only the low element is not undef, we can just insert that, or
5893  // 3) if only one constant value is used (w/ some non-constant lanes),
5894  // we can splat the constant value into the whole vector then fill
5895  // in the non-constant lanes.
5896  // 4) FIXME: If different constant values are used, but we can intelligently
5897  // select the values we'll be overwriting for the non-constant
5898  // lanes such that we can directly materialize the vector
5899  // some other way (MOVI, e.g.), we can be sneaky.
5900  unsigned NumElts = VT.getVectorNumElements();
5901  bool isOnlyLowElement = true;
5902  bool usesOnlyOneValue = true;
5903  bool usesOnlyOneConstantValue = true;
5904  bool isConstant = true;
5905  unsigned NumConstantLanes = 0;
5906  SDValue Value;
5907  SDValue ConstantValue;
5908  for (unsigned i = 0; i < NumElts; ++i) {
5909  SDValue V = Op.getOperand(i);
5910  if (V.getOpcode() == ISD::UNDEF)
5911  continue;
5912  if (i > 0)
5913  isOnlyLowElement = false;
5914  if (!isa<ConstantFPSDNode>(V) && !isa<ConstantSDNode>(V))
5915  isConstant = false;
5916 
5917  if (isa<ConstantSDNode>(V) || isa<ConstantFPSDNode>(V)) {
5918  ++NumConstantLanes;
5919  if (!ConstantValue.getNode())
5920  ConstantValue = V;
5921  else if (ConstantValue != V)
5922  usesOnlyOneConstantValue = false;
5923  }
5924 
5925  if (!Value.getNode())
5926  Value = V;
5927  else if (V != Value)
5928  usesOnlyOneValue = false;
5929  }
5930 
5931  if (!Value.getNode())
5932  return DAG.getUNDEF(VT);
5933 
5934  if (isOnlyLowElement)
5935  return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Value);
5936 
5937  // Use DUP for non-constant splats. For f32 constant splats, reduce to
5938  // i32 and try again.
5939  if (usesOnlyOneValue) {
5940  if (!isConstant) {
5941  if (Value.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
5942  Value.getValueType() != VT)
5943  return DAG.getNode(AArch64ISD::DUP, dl, VT, Value);
5944 
5945  // This is actually a DUPLANExx operation, which keeps everything vectory.
5946 
5947  // DUPLANE works on 128-bit vectors, widen it if necessary.
5948  SDValue Lane = Value.getOperand(1);
5949  Value = Value.getOperand(0);
5950  if (Value.getValueType().getSizeInBits() == 64)
5951  Value = WidenVector(Value, DAG);
5952 
5953  unsigned Opcode = getDUPLANEOp(VT.getVectorElementType());
5954  return DAG.getNode(Opcode, dl, VT, Value, Lane);
5955  }
5956 
5959  EVT EltTy = VT.getVectorElementType();
5960  assert ((EltTy == MVT::f16 || EltTy == MVT::f32 || EltTy == MVT::f64) &&
5961  "Unsupported floating-point vector type");
5962  MVT NewType = MVT::getIntegerVT(EltTy.getSizeInBits());
5963  for (unsigned i = 0; i < NumElts; ++i)
5964  Ops.push_back(DAG.getNode(ISD::BITCAST, dl, NewType, Op.getOperand(i)));
5965  EVT VecVT = EVT::getVectorVT(*DAG.getContext(), NewType, NumElts);
5966  SDValue Val = DAG.getNode(ISD::BUILD_VECTOR, dl, VecVT, Ops);
5967  Val = LowerBUILD_VECTOR(Val, DAG);
5968  if (Val.getNode())
5969  return DAG.getNode(ISD::BITCAST, dl, VT, Val);
5970  }
5971  }
5972 
5973  // If there was only one constant value used and for more than one lane,
5974  // start by splatting that value, then replace the non-constant lanes. This
5975  // is better than the default, which will perform a separate initialization
5976  // for each lane.
5977  if (NumConstantLanes > 0 && usesOnlyOneConstantValue) {
5978  SDValue Val = DAG.getNode(AArch64ISD::DUP, dl, VT, ConstantValue);
5979  // Now insert the non-constant lanes.
5980  for (unsigned i = 0; i < NumElts; ++i) {
5981  SDValue V = Op.getOperand(i);
5982  SDValue LaneIdx = DAG.getConstant(i, dl, MVT::i64);
5983  if (!isa<ConstantSDNode>(V) && !isa<ConstantFPSDNode>(V)) {
5984  // Note that type legalization likely mucked about with the VT of the
5985  // source operand, so we may have to convert it here before inserting.
5986  Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Val, V, LaneIdx);
5987  }
5988  }
5989  return Val;
5990  }
5991 
5992  // If all elements are constants and the case above didn't get hit, fall back
5993  // to the default expansion, which will generate a load from the constant
5994  // pool.
5995  if (isConstant)
5996  return SDValue();
5997 
5998  // Empirical tests suggest this is rarely worth it for vectors of length <= 2.
5999  if (NumElts >= 4) {
6000  SDValue shuffle = ReconstructShuffle(Op, DAG);
6001  if (shuffle != SDValue())
6002  return shuffle;
6003  }
6004 
6005  // If all else fails, just use a sequence of INSERT_VECTOR_ELT when we
6006  // know the default expansion would otherwise fall back on something even
6007  // worse. For a vector with one or two non-undef values, that's
6008  // scalar_to_vector for the elements followed by a shuffle (provided the
6009  // shuffle is valid for the target) and materialization element by element
6010  // on the stack followed by a load for everything else.
6011  if (!isConstant && !usesOnlyOneValue) {
6012  SDValue Vec = DAG.getUNDEF(VT);
6013  SDValue Op0 = Op.getOperand(0);
6014  unsigned ElemSize = VT.getVectorElementType().getSizeInBits();
6015  unsigned i = 0;
6016  // For 32 and 64 bit types, use INSERT_SUBREG for lane zero to
6017  // a) Avoid a RMW dependency on the full vector register, and
6018  // b) Allow the register coalescer to fold away the copy if the
6019  // value is already in an S or D register.
6020  if (Op0.getOpcode() != ISD::UNDEF && (ElemSize == 32 || ElemSize == 64)) {
6021  unsigned SubIdx = ElemSize == 32 ? AArch64::ssub : AArch64::dsub;
6022  MachineSDNode *N =
6023  DAG.getMachineNode(TargetOpcode::INSERT_SUBREG, dl, VT, Vec, Op0,
6024  DAG.getTargetConstant(SubIdx, dl, MVT::i32));
6025  Vec = SDValue(N, 0);
6026  ++i;
6027  }
6028  for (; i < NumElts; ++i) {
6029  SDValue V = Op.getOperand(i);
6030  if (V.getOpcode() == ISD::UNDEF)
6031  continue;
6032  SDValue LaneIdx = DAG.getConstant(i, dl, MVT::i64);
6033  Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Vec, V, LaneIdx);
6034  }
6035  return Vec;
6036  }
6037 
6038  // Just use the default expansion. We failed to find a better alternative.
6039  return SDValue();
6040 }
6041 
6042 SDValue AArch64TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
6043  SelectionDAG &DAG) const {
6044  assert(Op.getOpcode() == ISD::INSERT_VECTOR_ELT && "Unknown opcode!");
6045 
6046  // Check for non-constant or out of range lane.
6047  EVT VT = Op.getOperand(0).getValueType();
6049  if (!CI || CI->getZExtValue() >= VT.getVectorNumElements())
6050  return SDValue();
6051 
6052 
6053  // Insertion/extraction are legal for V128 types.
6054  if (VT == MVT::v16i8 || VT == MVT::v8i16 || VT == MVT::v4i32 ||
6055  VT == MVT::v2i64 || VT == MVT::v4f32 || VT == MVT::v2f64 ||
6056  VT == MVT::v8f16)
6057  return Op;
6058 
6059  if (VT != MVT::v8i8 && VT != MVT::v4i16 && VT != MVT::v2i32 &&
6060  VT != MVT::v1i64 && VT != MVT::v2f32 && VT != MVT::v4f16)
6061  return SDValue();
6062 
6063  // For V64 types, we perform insertion by expanding the value
6064  // to a V128 type and perform the insertion on that.
6065  SDLoc DL(Op);
6066  SDValue WideVec = WidenVector(Op.getOperand(0), DAG);
6067  EVT WideTy = WideVec.getValueType();
6068 
6069  SDValue Node = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, WideTy, WideVec,
6070  Op.getOperand(1), Op.getOperand(2));
6071  // Re-narrow the resultant vector.
6072  return NarrowVector(Node, DAG);
6073 }
6074 
6075 SDValue
6076 AArch64TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
6077  SelectionDAG &DAG) const {
6078  assert(Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT && "Unknown opcode!");
6079 
6080  // Check for non-constant or out of range lane.
6081  EVT VT = Op.getOperand(0).getValueType();
6083  if (!CI || CI->getZExtValue() >= VT.getVectorNumElements())
6084  return SDValue();
6085 
6086 
6087  // Insertion/extraction are legal for V128 types.
6088  if (VT == MVT::v16i8 || VT == MVT::v8i16 || VT == MVT::v4i32 ||
6089  VT == MVT::v2i64 || VT == MVT::v4f32 || VT == MVT::v2f64 ||
6090  VT == MVT::v8f16)
6091  return Op;
6092 
6093  if (VT != MVT::v8i8 && VT != MVT::v4i16 && VT != MVT::v2i32 &&
6094  VT != MVT::v1i64 && VT != MVT::v2f32 && VT != MVT::v4f16)
6095  return SDValue();
6096 
6097  // For V64 types, we perform extraction by expanding the value
6098  // to a V128 type and perform the extraction on that.
6099  SDLoc DL(Op);
6100  SDValue WideVec = WidenVector(Op.getOperand(0), DAG);
6101  EVT WideTy = WideVec.getValueType();
6102 
6103  EVT ExtrTy = WideTy.getVectorElementType();
6104  if (ExtrTy == MVT::i16 || ExtrTy == MVT::i8)
6105  ExtrTy = MVT::i32;
6106 
6107  // For extractions, we just return the result directly.
6108  return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ExtrTy, WideVec,
6109  Op.getOperand(1));
6110 }
6111 
6112 SDValue AArch64TargetLowering::LowerEXTRACT_SUBVECTOR(SDValue Op,
6113  SelectionDAG &DAG) const {
6114  EVT VT = Op.getOperand(0).getValueType();
6115  SDLoc dl(Op);
6116  // Just in case...
6117  if (!VT.isVector())
6118  return SDValue();
6119 
6121  if (!Cst)
6122  return SDValue();
6123  unsigned Val = Cst->getZExtValue();
6124 
6125  unsigned Size = Op.getValueType().getSizeInBits();
6126  if (Val == 0) {
6127  switch (Size) {
6128  case 8:
6129  return DAG.getTargetExtractSubreg(AArch64::bsub, dl, Op.getValueType(),
6130  Op.getOperand(0));
6131  case 16:
6132  return DAG.getTargetExtractSubreg(AArch64::hsub, dl, Op.getValueType(),
6133  Op.getOperand(0));
6134  case 32:
6135  return DAG.getTargetExtractSubreg(AArch64::ssub, dl, Op.getValueType(),
6136  Op.getOperand(0));
6137  case 64:
6138  return DAG.getTargetExtractSubreg(AArch64::dsub, dl, Op.getValueType(),
6139  Op.getOperand(0));
6140  default:
6141  llvm_unreachable("Unexpected vector type in extract_subvector!");
6142  }
6143  }
6144  // If this is extracting the upper 64-bits of a 128-bit vector, we match
6145  // that directly.
6146  if (Size == 64 && Val * VT.getVectorElementType().getSizeInBits() == 64)
6147  return Op;
6148 
6149  return SDValue();
6150 }
6151 
6153  EVT VT) const {
6154  if (VT.getVectorNumElements() == 4 &&
6155  (VT.is128BitVector() || VT.is64BitVector())) {
6156  unsigned PFIndexes[4];
6157  for (unsigned i = 0; i != 4; ++i) {
6158  if (M[i] < 0)
6159  PFIndexes[i] = 8;
6160  else
6161  PFIndexes[i] = M[i];
6162  }
6163 
6164  // Compute the index in the perfect shuffle table.
6165  unsigned PFTableIndex = PFIndexes[0] * 9 * 9 * 9 + PFIndexes[1] * 9 * 9 +
6166  PFIndexes[2] * 9 + PFIndexes[3];
6167  unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
6168  unsigned Cost = (PFEntry >> 30);
6169 
6170  if (Cost <= 4)
6171  return true;
6172  }
6173 
6174  bool DummyBool;
6175  int DummyInt;
6176  unsigned DummyUnsigned;
6177 
6178  return (ShuffleVectorSDNode::isSplatMask(&M[0], VT) || isREVMask(M, VT, 64) ||
6179  isREVMask(M, VT, 32) || isREVMask(M, VT, 16) ||
6180  isEXTMask(M, VT, DummyBool, DummyUnsigned) ||
6181  // isTBLMask(M, VT) || // FIXME: Port TBL support from ARM.
6182  isTRNMask(M, VT, DummyUnsigned) || isUZPMask(M, VT, DummyUnsigned) ||
6183  isZIPMask(M, VT, DummyUnsigned) ||
6184  isTRN_v_undef_Mask(M, VT, DummyUnsigned) ||
6185  isUZP_v_undef_Mask(M, VT, DummyUnsigned) ||
6186  isZIP_v_undef_Mask(M, VT, DummyUnsigned) ||
6187  isINSMask(M, VT.getVectorNumElements(), DummyBool, DummyInt) ||
6188  isConcatMask(M, VT, VT.getSizeInBits() == 128));
6189 }
6190 
6191 /// getVShiftImm - Check if this is a valid build_vector for the immediate
6192 /// operand of a vector shift operation, where all the elements of the
6193 /// build_vector must have the same constant integer value.
6194 static bool getVShiftImm(SDValue Op, unsigned ElementBits, int64_t &Cnt) {
6195  // Ignore bit_converts.
6196  while (Op.getOpcode() == ISD::BITCAST)
6197  Op = Op.getOperand(0);
6199  APInt SplatBits, SplatUndef;
6200  unsigned SplatBitSize;
6201  bool HasAnyUndefs;
6202  if (!BVN || !BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize,
6203  HasAnyUndefs, ElementBits) ||
6204  SplatBitSize > ElementBits)
6205  return false;
6206  Cnt = SplatBits.getSExtValue();
6207  return true;
6208 }
6209 
6210 /// isVShiftLImm - Check if this is a valid build_vector for the immediate
6211 /// operand of a vector shift left operation. That value must be in the range:
6212 /// 0 <= Value < ElementBits for a left shift; or
6213 /// 0 <= Value <= ElementBits for a long left shift.
6214 static bool isVShiftLImm(SDValue Op, EVT VT, bool isLong, int64_t &Cnt) {
6215  assert(VT.isVector() && "vector shift count is not a vector type");
6216  unsigned ElementBits = VT.getVectorElementType().getSizeInBits();
6217  if (!getVShiftImm(Op, ElementBits, Cnt))
6218  return false;
6219  return (Cnt >= 0 && (isLong ? Cnt - 1 : Cnt) < ElementBits);
6220 }
6221 
6222 /// isVShiftRImm - Check if this is a valid build_vector for the immediate
6223 /// operand of a vector shift right operation. For a shift opcode, the value
6224 /// is positive, but for an intrinsic the value count must be negative. The
6225 /// absolute value must be in the range:
6226 /// 1 <= |Value| <= ElementBits for a right shift; or
6227 /// 1 <= |Value| <= ElementBits/2 for a narrow right shift.
6228 static bool isVShiftRImm(SDValue Op, EVT VT, bool isNarrow, bool isIntrinsic,
6229  int64_t &Cnt) {
6230  assert(VT.isVector() && "vector shift count is not a vector type");
6231  unsigned ElementBits = VT.getVectorElementType().getSizeInBits();
6232  if (!getVShiftImm(Op, ElementBits, Cnt))
6233  return false;
6234  if (isIntrinsic)
6235  Cnt = -Cnt;
6236  return (Cnt >= 1 && Cnt <= (isNarrow ? ElementBits / 2 : ElementBits));
6237 }
6238 
6239 SDValue AArch64TargetLowering::LowerVectorSRA_SRL_SHL(SDValue Op,
6240  SelectionDAG &DAG) const {
6241  EVT VT = Op.getValueType();
6242  SDLoc DL(Op);
6243  int64_t Cnt;
6244 
6245  if (!Op.getOperand(1).getValueType().isVector())
6246  return Op;
6247  unsigned EltSize = VT.getVectorElementType().getSizeInBits();
6248 
6249  switch (Op.getOpcode()) {
6250  default:
6251  llvm_unreachable("unexpected shift opcode");
6252 
6253  case ISD::SHL:
6254  if (isVShiftLImm(Op.getOperand(1), VT, false, Cnt) && Cnt < EltSize)
6255  return DAG.getNode(AArch64ISD::VSHL, DL, VT, Op.getOperand(0),
6256  DAG.getConstant(Cnt, DL, MVT::i32));
6257  return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
6258  DAG.getConstant(Intrinsic::aarch64_neon_ushl, DL,
6259  MVT::i32),
6260  Op.getOperand(0), Op.getOperand(1));
6261  case ISD::SRA:
6262  case ISD::SRL:
6263  // Right shift immediate
6264  if (isVShiftRImm(Op.getOperand(1), VT, false, false, Cnt) &&
6265  Cnt < EltSize) {
6266  unsigned Opc =
6268  return DAG.getNode(Opc, DL, VT, Op.getOperand(0),
6269  DAG.getConstant(Cnt, DL, MVT::i32));
6270  }
6271 
6272  // Right shift register. Note, there is not a shift right register
6273  // instruction, but the shift left register instruction takes a signed
6274  // value, where negative numbers specify a right shift.
6275  unsigned Opc = (Op.getOpcode() == ISD::SRA) ? Intrinsic::aarch64_neon_sshl
6276  : Intrinsic::aarch64_neon_ushl;
6277  // negate the shift amount
6278  SDValue NegShift = DAG.getNode(AArch64ISD::NEG, DL, VT, Op.getOperand(1));
6279  SDValue NegShiftLeft =
6280  DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
6281  DAG.getConstant(Opc, DL, MVT::i32), Op.getOperand(0),
6282  NegShift);
6283  return NegShiftLeft;
6284  }
6285 
6286  return SDValue();
6287 }
6288 
6290  AArch64CC::CondCode CC, bool NoNans, EVT VT,
6291  SDLoc dl, SelectionDAG &DAG) {
6292  EVT SrcVT = LHS.getValueType();
6293  assert(VT.getSizeInBits() == SrcVT.getSizeInBits() &&
6294  "function only supposed to emit natural comparisons");
6295 
6297  APInt CnstBits(VT.getSizeInBits(), 0);
6298  APInt UndefBits(VT.getSizeInBits(), 0);
6299  bool IsCnst = BVN && resolveBuildVector(BVN, CnstBits, UndefBits);
6300  bool IsZero = IsCnst && (CnstBits == 0);
6301 
6302  if (SrcVT.getVectorElementType().isFloatingPoint()) {
6303  switch (CC) {
6304  default:
6305  return SDValue();
6306  case AArch64CC::NE: {
6307  SDValue Fcmeq;
6308  if (IsZero)
6309  Fcmeq = DAG.getNode(AArch64ISD::FCMEQz, dl, VT, LHS);
6310  else
6311  Fcmeq = DAG.getNode(AArch64ISD::FCMEQ, dl, VT, LHS, RHS);
6312  return DAG.getNode(AArch64ISD::NOT, dl, VT, Fcmeq);
6313  }
6314  case AArch64CC::EQ:
6315  if (IsZero)
6316  return DAG.getNode(AArch64ISD::FCMEQz, dl, VT, LHS);
6317  return DAG.getNode(AArch64ISD::FCMEQ, dl, VT, LHS, RHS);
6318  case AArch64CC::GE:
6319  if (IsZero)
6320  return DAG.getNode(AArch64ISD::FCMGEz, dl, VT, LHS);
6321  return DAG.getNode(AArch64ISD::FCMGE, dl, VT, LHS, RHS);
6322  case AArch64CC::GT:
6323  if (IsZero)
6324  return DAG.getNode(AArch64ISD::FCMGTz, dl, VT, LHS);
6325  return DAG.getNode(AArch64ISD::FCMGT, dl, VT, LHS, RHS);
6326  case AArch64CC::LS:
6327  if (IsZero)
6328  return DAG.getNode(AArch64ISD::FCMLEz, dl, VT, LHS);
6329  return DAG.getNode(AArch64ISD::FCMGE, dl, VT, RHS, LHS);
6330  case AArch64CC::LT:
6331  if (!NoNans)
6332  return SDValue();
6333  // If we ignore NaNs then we can use to the MI implementation.
6334  // Fallthrough.
6335  case AArch64CC::MI:
6336  if (IsZero)
6337  return DAG.getNode(AArch64ISD::FCMLTz, dl, VT, LHS);
6338  return DAG.getNode(AArch64ISD::FCMGT, dl, VT, RHS, LHS);
6339  }
6340  }
6341 
6342  switch (CC) {
6343  default:
6344  return SDValue();
6345  case AArch64CC::NE: {
6346  SDValue Cmeq;
6347  if (IsZero)
6348  Cmeq = DAG.getNode(AArch64ISD::CMEQz, dl, VT, LHS);
6349  else
6350  Cmeq = DAG.getNode(AArch64ISD::CMEQ, dl, VT, LHS, RHS);
6351  return DAG.getNode(AArch64ISD::NOT, dl, VT, Cmeq);
6352  }
6353  case AArch64CC::EQ:
6354  if (IsZero)
6355  return DAG.getNode(AArch64ISD::CMEQz, dl, VT, LHS);
6356  return DAG.getNode(AArch64ISD::CMEQ, dl, VT, LHS, RHS);
6357  case AArch64CC::GE:
6358  if (IsZero)
6359  return DAG.getNode(AArch64ISD::CMGEz, dl, VT, LHS);
6360  return DAG.getNode(AArch64ISD::CMGE, dl, VT, LHS, RHS);
6361  case AArch64CC::GT:
6362  if (IsZero)
6363  return DAG.getNode(AArch64ISD::CMGTz, dl, VT, LHS);
6364  return DAG.getNode(AArch64ISD::CMGT, dl, VT, LHS, RHS);
6365  case AArch64CC::LE:
6366  if (IsZero)
6367  return DAG.getNode(AArch64ISD::CMLEz, dl, VT, LHS);
6368  return DAG.getNode(AArch64ISD::CMGE, dl, VT, RHS, LHS);
6369  case AArch64CC::LS:
6370  return DAG.getNode(AArch64ISD::CMHS, dl, VT, RHS, LHS);
6371  case AArch64CC::LO:
6372  return DAG.getNode(AArch64ISD::CMHI, dl, VT, RHS, LHS);
6373  case AArch64CC::LT:
6374  if (IsZero)
6375  return DAG.getNode(AArch64ISD::CMLTz, dl, VT, LHS);
6376  return DAG.getNode(AArch64ISD::CMGT, dl, VT, RHS, LHS);
6377  case AArch64CC::HI:
6378  return DAG.getNode(AArch64ISD::CMHI, dl, VT, LHS, RHS);
6379  case AArch64CC::HS:
6380  return DAG.getNode(AArch64ISD::CMHS, dl, VT, LHS, RHS);
6381  }
6382 }
6383 
6384 SDValue AArch64TargetLowering::LowerVSETCC(SDValue Op,
6385  SelectionDAG &DAG) const {
6386  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
6387  SDValue LHS = Op.getOperand(0);
6388  SDValue RHS = Op.getOperand(1);
6390  SDLoc dl(Op);
6391 
6393  assert(LHS.getValueType() == RHS.getValueType());
6395  SDValue Cmp =
6396  EmitVectorComparison(LHS, RHS, AArch64CC, false, CmpVT, dl, DAG);
6397  return DAG.getSExtOrTrunc(Cmp, dl, Op.getValueType());
6398  }
6399 
6400  assert(LHS.getValueType().getVectorElementType() == MVT::f32 ||
6402 
6403  // Unfortunately, the mapping of LLVM FP CC's onto AArch64 CC's isn't totally
6404  // clean. Some of them require two branches to implement.
6405  AArch64CC::CondCode CC1, CC2;
6406  bool ShouldInvert;
6407  changeVectorFPCCToAArch64CC(CC, CC1, CC2, ShouldInvert);
6408 
6409  bool NoNaNs = getTargetMachine().Options.NoNaNsFPMath;
6410  SDValue Cmp =
6411  EmitVectorComparison(LHS, RHS, CC1, NoNaNs, CmpVT, dl, DAG);
6412  if (!Cmp.getNode())
6413  return SDValue();
6414 
6415  if (CC2 != AArch64CC::AL) {
6416  SDValue Cmp2 =
6417  EmitVectorComparison(LHS, RHS, CC2, NoNaNs, CmpVT, dl, DAG);
6418  if (!Cmp2.getNode())
6419  return SDValue();
6420 
6421  Cmp = DAG.getNode(ISD::OR, dl, CmpVT, Cmp, Cmp2);
6422  }
6423 
6424  Cmp = DAG.getSExtOrTrunc(Cmp, dl, Op.getValueType());
6425 
6426  if (ShouldInvert)
6427  return Cmp = DAG.getNOT(dl, Cmp, Cmp.getValueType());
6428 
6429  return Cmp;
6430 }
6431 
6432 /// getTgtMemIntrinsic - Represent NEON load and store intrinsics as
6433 /// MemIntrinsicNodes. The associated MachineMemOperands record the alignment
6434 /// specified in the intrinsic calls.
6436  const CallInst &I,
6437  unsigned Intrinsic) const {
6438  auto &DL = I.getModule()->getDataLayout();
6439  switch (Intrinsic) {
6440  case Intrinsic::aarch64_neon_ld2:
6441  case Intrinsic::aarch64_neon_ld3:
6442  case Intrinsic::aarch64_neon_ld4:
6443  case Intrinsic::aarch64_neon_ld1x2:
6444  case Intrinsic::aarch64_neon_ld1x3:
6445  case Intrinsic::aarch64_neon_ld1x4:
6446  case Intrinsic::aarch64_neon_ld2lane:
6447  case Intrinsic::aarch64_neon_ld3lane:
6448  case Intrinsic::aarch64_neon_ld4lane:
6449  case Intrinsic::aarch64_neon_ld2r:
6450  case Intrinsic::aarch64_neon_ld3r:
6451  case Intrinsic::aarch64_neon_ld4r: {
6452  Info.opc = ISD::INTRINSIC_W_CHAIN;
6453  // Conservatively set memVT to the entire set of vectors loaded.
6454  uint64_t NumElts = DL.getTypeAllocSize(I.getType()) / 8;
6455  Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts);
6456  Info.ptrVal = I.getArgOperand(I.getNumArgOperands() - 1);
6457  Info.offset = 0;
6458  Info.align = 0;
6459  Info.vol = false; // volatile loads with NEON intrinsics not supported
6460  Info.readMem = true;
6461  Info.writeMem = false;
6462  return true;
6463  }
6464  case Intrinsic::aarch64_neon_st2:
6465  case Intrinsic::aarch64_neon_st3:
6466  case Intrinsic::aarch64_neon_st4:
6467  case Intrinsic::aarch64_neon_st1x2:
6468  case Intrinsic::aarch64_neon_st1x3:
6469  case Intrinsic::aarch64_neon_st1x4:
6470  case Intrinsic::aarch64_neon_st2lane:
6471  case Intrinsic::aarch64_neon_st3lane:
6472  case Intrinsic::aarch64_neon_st4lane: {
6473  Info.opc = ISD::INTRINSIC_VOID;
6474  // Conservatively set memVT to the entire set of vectors stored.
6475  unsigned NumElts = 0;
6476  for (unsigned ArgI = 1, ArgE = I.getNumArgOperands(); ArgI < ArgE; ++ArgI) {
6477  Type *ArgTy = I.getArgOperand(ArgI)->getType();
6478  if (!ArgTy->isVectorTy())
6479  break;
6480  NumElts += DL.getTypeAllocSize(ArgTy) / 8;
6481  }
6482  Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts);
6483  Info.ptrVal = I.getArgOperand(I.getNumArgOperands() - 1);
6484  Info.offset = 0;
6485  Info.align = 0;
6486  Info.vol = false; // volatile stores with NEON intrinsics not supported
6487  Info.readMem = false;
6488  Info.writeMem = true;
6489  return true;
6490  }
6491  case Intrinsic::aarch64_ldaxr:
6492  case Intrinsic::aarch64_ldxr: {
6493  PointerType *PtrTy = cast<PointerType>(I.getArgOperand(0)->getType());
6494  Info.opc = ISD::INTRINSIC_W_CHAIN;
6495  Info.memVT = MVT::getVT(PtrTy->getElementType());
6496  Info.ptrVal = I.getArgOperand(0);
6497  Info.offset = 0;
6498  Info.align = DL.getABITypeAlignment(PtrTy->getElementType());
6499  Info.vol = true;
6500  Info.readMem = true;
6501  Info.writeMem = false;
6502  return true;
6503  }
6504  case Intrinsic::aarch64_stlxr:
6505  case Intrinsic::aarch64_stxr: {
6506  PointerType *PtrTy = cast<PointerType>(I.getArgOperand(1)->getType());
6507  Info.opc = ISD::INTRINSIC_W_CHAIN;
6508  Info.memVT = MVT::getVT(PtrTy->getElementType());
6509  Info.ptrVal = I.getArgOperand(1);
6510  Info.offset = 0;
6511  Info.align = DL.getABITypeAlignment(PtrTy->getElementType());
6512  Info.vol = true;
6513  Info.readMem = false;
6514  Info.writeMem = true;
6515  return true;
6516  }
6517  case Intrinsic::aarch64_ldaxp:
6518  case Intrinsic::aarch64_ldxp: {
6519  Info.opc = ISD::INTRINSIC_W_CHAIN;
6520  Info.memVT = MVT::i128;
6521  Info.ptrVal = I.getArgOperand(0);
6522  Info.offset = 0;
6523  Info.align = 16;
6524  Info.vol = true;
6525  Info.readMem = true;
6526  Info.writeMem = false;
6527  return true;
6528  }
6529  case Intrinsic::aarch64_stlxp:
6530  case Intrinsic::aarch64_stxp: {
6531  Info.opc = ISD::INTRINSIC_W_CHAIN;
6532  Info.memVT = MVT::i128;
6533  Info.ptrVal = I.getArgOperand(2);
6534  Info.offset = 0;
6535  Info.align = 16;
6536  Info.vol = true;
6537  Info.readMem = false;
6538  Info.writeMem = true;
6539  return true;
6540  }
6541  default:
6542  break;
6543  }
6544 
6545  return false;
6546 }
6547 
6548 // Truncations from 64-bit GPR to 32-bit GPR is free.
6550  if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy())
6551  return false;
6552  unsigned NumBits1 = Ty1->getPrimitiveSizeInBits();
6553  unsigned NumBits2 = Ty2->getPrimitiveSizeInBits();
6554  return NumBits1 > NumBits2;
6555 }
6557  if (VT1.isVector() || VT2.isVector() || !VT1.isInteger() || !VT2.isInteger())
6558  return false;
6559  unsigned NumBits1 = VT1.getSizeInBits();
6560  unsigned NumBits2 = VT2.getSizeInBits();
6561  return NumBits1 > NumBits2;
6562 }
6563 
6564 /// Check if it is profitable to hoist instruction in then/else to if.
6565 /// Not profitable if I and it's user can form a FMA instruction
6566 /// because we prefer FMSUB/FMADD.
6568  if (I->getOpcode() != Instruction::FMul)
6569  return true;
6570 
6571  if (I->getNumUses() != 1)
6572  return true;
6573 
6574  Instruction *User = I->user_back();
6575 
6576  if (User &&
6577  !(User->getOpcode() == Instruction::FSub ||
6578  User->getOpcode() == Instruction::FAdd))
6579  return true;
6580 
6581  const TargetOptions &Options = getTargetMachine().Options;
6582  const DataLayout &DL = I->getModule()->getDataLayout();
6583  EVT VT = getValueType(DL, User->getOperand(0)->getType());
6584 
6585  if (isFMAFasterThanFMulAndFAdd(VT) &&
6587  (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath))
6588  return false;
6589 
6590  return true;
6591 }
6592 
6593 // All 32-bit GPR operations implicitly zero the high-half of the corresponding
6594 // 64-bit GPR.
6596  if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy())
6597  return false;
6598  unsigned NumBits1 = Ty1->getPrimitiveSizeInBits();
6599  unsigned NumBits2 = Ty2->getPrimitiveSizeInBits();
6600  return NumBits1 == 32 && NumBits2 == 64;
6601 }
6603  if (VT1.isVector() || VT2.isVector() || !VT1.isInteger() || !VT2.isInteger())
6604  return false;
6605  unsigned NumBits1 = VT1.getSizeInBits();
6606  unsigned NumBits2 = VT2.getSizeInBits();
6607  return NumBits1 == 32 && NumBits2 == 64;
6608 }
6609 
6611  EVT VT1 = Val.getValueType();
6612  if (isZExtFree(VT1, VT2)) {
6613  return true;
6614  }
6615 
6616  if (Val.getOpcode() != ISD::LOAD)
6617  return false;
6618 
6619  // 8-, 16-, and 32-bit integer loads all implicitly zero-extend.
6620  return (VT1.isSimple() && !VT1.isVector() && VT1.isInteger() &&
6621  VT2.isSimple() && !VT2.isVector() && VT2.isInteger() &&
6622  VT1.getSizeInBits() <= 32);
6623 }
6624 
6625 bool AArch64TargetLowering::isExtFreeImpl(const Instruction *Ext) const {
6626  if (isa<FPExtInst>(Ext))
6627  return false;
6628 
6629  // Vector types are next free.
6630  if (Ext->getType()->isVectorTy())
6631  return false;
6632 
6633  for (const Use &U : Ext->uses()) {
6634  // The extension is free if we can fold it with a left shift in an
6635  // addressing mode or an arithmetic operation: add, sub, and cmp.
6636 
6637  // Is there a shift?
6638  const Instruction *Instr = cast<Instruction>(U.getUser());
6639 
6640  // Is this a constant shift?
6641  switch (Instr->getOpcode()) {
6642  case Instruction::Shl:
6643  if (!isa<ConstantInt>(Instr->getOperand(1)))
6644  return false;
6645  break;
6646  case Instruction::GetElementPtr: {
6647  gep_type_iterator GTI = gep_type_begin(Instr);
6648  auto &DL = Ext->getModule()->getDataLayout();
6649  std::advance(GTI, U.getOperandNo());
6650  Type *IdxTy = *GTI;
6651  // This extension will end up with a shift because of the scaling factor.
6652  // 8-bit sized types have a scaling factor of 1, thus a shift amount of 0.
6653  // Get the shift amount based on the scaling factor:
6654  // log2(sizeof(IdxTy)) - log2(8).
6655  uint64_t ShiftAmt =
6656  countTrailingZeros(DL.getTypeStoreSizeInBits(IdxTy)) - 3;
6657  // Is the constant foldable in the shift of the addressing mode?
6658  // I.e., shift amount is between 1 and 4 inclusive.
6659  if (ShiftAmt == 0 || ShiftAmt > 4)
6660  return false;
6661  break;
6662  }
6663  case Instruction::Trunc:
6664  // Check if this is a noop.
6665  // trunc(sext ty1 to ty2) to ty1.
6666  if (Instr->getType() == Ext->getOperand(0)->getType())
6667  continue;
6668  // FALL THROUGH.
6669  default:
6670  return false;
6671  }
6672 
6673  // At this point we can use the bfm family, so this extension is free
6674  // for that use.
6675  }
6676  return true;
6677 }
6678 
6680  unsigned &RequiredAligment) const {
6681  if (!LoadedType->isIntegerTy() && !LoadedType->isFloatTy())
6682  return false;
6683  // Cyclone supports unaligned accesses.
6684  RequiredAligment = 0;
6685  unsigned NumBits = LoadedType->getPrimitiveSizeInBits();
6686  return NumBits == 32 || NumBits == 64;
6687 }
6688 
6690  unsigned &RequiredAligment) const {
6691  if (!LoadedType.isSimple() ||
6692  (!LoadedType.isInteger() && !LoadedType.isFloatingPoint()))
6693  return false;
6694  // Cyclone supports unaligned accesses.
6695  RequiredAligment = 0;
6696  unsigned NumBits = LoadedType.getSizeInBits();
6697  return NumBits == 32 || NumBits == 64;
6698 }
6699 
6700 /// \brief Lower an interleaved load into a ldN intrinsic.
6701 ///
6702 /// E.g. Lower an interleaved load (Factor = 2):
6703 /// %wide.vec = load <8 x i32>, <8 x i32>* %ptr
6704 /// %v0 = shuffle %wide.vec, undef, <0, 2, 4, 6> ; Extract even elements
6705 /// %v1 = shuffle %wide.vec, undef, <1, 3, 5, 7> ; Extract odd elements
6706 ///
6707 /// Into:
6708 /// %ld2 = { <4 x i32>, <4 x i32> } call llvm.aarch64.neon.ld2(%ptr)
6709 /// %vec0 = extractelement { <4 x i32>, <4 x i32> } %ld2, i32 0
6710 /// %vec1 = extractelement { <4 x i32>, <4 x i32> } %ld2, i32 1
6713  ArrayRef<unsigned> Indices, unsigned Factor) const {
6714  assert(Factor >= 2 && Factor <= getMaxSupportedInterleaveFactor() &&
6715  "Invalid interleave factor");
6716  assert(!Shuffles.empty() && "Empty shufflevector input");
6717  assert(Shuffles.size() == Indices.size() &&
6718  "Unmatched number of shufflevectors and indices");
6719 
6720  const DataLayout &DL = LI->getModule()->getDataLayout();
6721 
6722  VectorType *VecTy = Shuffles[0]->getType();
6723  unsigned VecSize = DL.getTypeAllocSizeInBits(VecTy);
6724 
6725  // Skip illegal vector types.
6726  if (VecSize != 64 && VecSize != 128)
6727  return false;
6728 
6729  // A pointer vector can not be the return type of the ldN intrinsics. Need to
6730  // load integer vectors first and then convert to pointer vectors.
6731  Type *EltTy = VecTy->getVectorElementType();
6732  if (EltTy->isPointerTy())
6733  VecTy =
6734  VectorType::get(DL.getIntPtrType(EltTy), VecTy->getVectorNumElements());
6735 
6736  Type *PtrTy = VecTy->getPointerTo(LI->getPointerAddressSpace());
6737  Type *Tys[2] = {VecTy, PtrTy};
6738  static const Intrinsic::ID LoadInts[3] = {Intrinsic::aarch64_neon_ld2,
6739  Intrinsic::aarch64_neon_ld3,
6740  Intrinsic::aarch64_neon_ld4};
6741  Function *LdNFunc =
6742  Intrinsic::getDeclaration(LI->getModule(), LoadInts[Factor - 2], Tys);
6743 
6744  IRBuilder<> Builder(LI);
6745  Value *Ptr = Builder.CreateBitCast(LI->getPointerOperand(), PtrTy);
6746 
6747  CallInst *LdN = Builder.CreateCall(LdNFunc, Ptr, "ldN");
6748 
6749  // Replace uses of each shufflevector with the corresponding vector loaded
6750  // by ldN.
6751  for (unsigned i = 0; i < Shuffles.size(); i++) {
6752  ShuffleVectorInst *SVI = Shuffles[i];
6753  unsigned Index = Indices[i];
6754 
6755  Value *SubVec = Builder.CreateExtractValue(LdN, Index);
6756 
6757  // Convert the integer vector to pointer vector if the element is pointer.
6758  if (EltTy->isPointerTy())
6759  SubVec = Builder.CreateIntToPtr(SubVec, SVI->getType());
6760 
6761  SVI->replaceAllUsesWith(SubVec);
6762  }
6763 
6764  return true;
6765 }
6766 
6767 /// \brief Get a mask consisting of sequential integers starting from \p Start.
6768 ///
6769 /// I.e. <Start, Start + 1, ..., Start + NumElts - 1>
6770 static Constant *getSequentialMask(IRBuilder<> &Builder, unsigned Start,
6771  unsigned NumElts) {
6773  for (unsigned i = 0; i < NumElts; i++)
6774  Mask.push_back(Builder.getInt32(Start + i));
6775 
6776  return ConstantVector::get(Mask);
6777 }
6778 
6779 /// \brief Lower an interleaved store into a stN intrinsic.
6780 ///
6781 /// E.g. Lower an interleaved store (Factor = 3):
6782 /// %i.vec = shuffle <8 x i32> %v0, <8 x i32> %v1,
6783 /// <0, 4, 8, 1, 5, 9, 2, 6, 10, 3, 7, 11>
6784 /// store <12 x i32> %i.vec, <12 x i32>* %ptr
6785 ///
6786 /// Into:
6787 /// %sub.v0 = shuffle <8 x i32> %v0, <8 x i32> v1, <0, 1, 2, 3>
6788 /// %sub.v1 = shuffle <8 x i32> %v0, <8 x i32> v1, <4, 5, 6, 7>
6789 /// %sub.v2 = shuffle <8 x i32> %v0, <8 x i32> v1, <8, 9, 10, 11>
6790 /// call void llvm.aarch64.neon.st3(%sub.v0, %sub.v1, %sub.v2, %ptr)
6791 ///
6792 /// Note that the new shufflevectors will be removed and we'll only generate one
6793 /// st3 instruction in CodeGen.
6795  ShuffleVectorInst *SVI,
6796  unsigned Factor) const {
6797  assert(Factor >= 2 && Factor <= getMaxSupportedInterleaveFactor() &&
6798  "Invalid interleave factor");
6799 
6800  VectorType *VecTy = SVI->getType();
6801  assert(VecTy->getVectorNumElements() % Factor == 0 &&
6802  "Invalid interleaved store");
6803 
6804  unsigned NumSubElts = VecTy->getVectorNumElements() / Factor;
6805  Type *EltTy = VecTy->getVectorElementType();
6806  VectorType *SubVecTy = VectorType::get(EltTy, NumSubElts);
6807 
6808  const DataLayout &DL = SI->getModule()->getDataLayout();
6809  unsigned SubVecSize = DL.getTypeAllocSizeInBits(SubVecTy);
6810 
6811  // Skip illegal vector types.
6812  if (SubVecSize != 64 && SubVecSize != 128)
6813  return false;
6814 
6815  Value *Op0 = SVI->getOperand(0);
6816  Value *Op1 = SVI->getOperand(1);
6817  IRBuilder<> Builder(SI);
6818 
6819  // StN intrinsics don't support pointer vectors as arguments. Convert pointer
6820  // vectors to integer vectors.
6821  if (EltTy->isPointerTy()) {
6822  Type *IntTy = DL.getIntPtrType(EltTy);
6823  unsigned NumOpElts =
6824  dyn_cast<VectorType>(Op0->getType())->getVectorNumElements();
6825 
6826  // Convert to the corresponding integer vector.
6827  Type *IntVecTy = VectorType::get(IntTy, NumOpElts);
6828  Op0 = Builder.CreatePtrToInt(Op0, IntVecTy);
6829  Op1 = Builder.CreatePtrToInt(Op1, IntVecTy);
6830 
6831  SubVecTy = VectorType::get(IntTy, NumSubElts);
6832  }
6833 
6834  Type *PtrTy = SubVecTy->getPointerTo(SI->getPointerAddressSpace());
6835  Type *Tys[2] = {SubVecTy, PtrTy};
6836  static const Intrinsic::ID StoreInts[3] = {Intrinsic::aarch64_neon_st2,
6837  Intrinsic::aarch64_neon_st3,
6838  Intrinsic::aarch64_neon_st4};
6839  Function *StNFunc =
6840  Intrinsic::getDeclaration(SI->getModule(), StoreInts[Factor - 2], Tys);
6841 
6843 
6844  // Split the shufflevector operands into sub vectors for the new stN call.
6845  for (unsigned i = 0; i < Factor; i++)
6846  Ops.push_back(Builder.CreateShuffleVector(
6847  Op0, Op1, getSequentialMask(Builder, NumSubElts * i, NumSubElts)));
6848 
6849  Ops.push_back(Builder.CreateBitCast(SI->getPointerOperand(), PtrTy));
6850  Builder.CreateCall(StNFunc, Ops);
6851  return true;
6852 }
6853 
6854 static bool memOpAlign(unsigned DstAlign, unsigned SrcAlign,
6855  unsigned AlignCheck) {
6856  return ((SrcAlign == 0 || SrcAlign % AlignCheck == 0) &&
6857  (DstAlign == 0 || DstAlign % AlignCheck == 0));
6858 }
6859 
6860 EVT AArch64TargetLowering::getOptimalMemOpType(uint64_t Size, unsigned DstAlign,
6861  unsigned SrcAlign, bool IsMemset,
6862  bool ZeroMemset,
6863  bool MemcpyStrSrc,
6864  MachineFunction &MF) const {
6865  // Don't use AdvSIMD to implement 16-byte memset. It would have taken one
6866  // instruction to materialize the v2i64 zero and one store (with restrictive
6867  // addressing mode). Just do two i64 store of zero-registers.
6868  bool Fast;
6869  const Function *F = MF.getFunction();
6870  if (Subtarget->hasFPARMv8() && !IsMemset && Size >= 16 &&
6872  (memOpAlign(SrcAlign, DstAlign, 16) ||
6873  (allowsMisalignedMemoryAccesses(MVT::f128, 0, 1, &Fast) && Fast)))
6874  return MVT::f128;
6875 
6876  if (Size >= 8 &&
6877  (memOpAlign(SrcAlign, DstAlign, 8) ||
6878  (allowsMisalignedMemoryAccesses(MVT::i64, 0, 1, &Fast) && Fast)))
6879  return MVT::i64;
6880 
6881  if (Size >= 4 &&
6882  (memOpAlign(SrcAlign, DstAlign, 4) ||
6883  (allowsMisalignedMemoryAccesses(MVT::i32, 0, 1, &Fast) && Fast)))
6884  return MVT::i32;
6885 
6886  return MVT::Other;
6887 }
6888 
6889 // 12-bit optionally shifted immediates are legal for adds.
6891  if ((Immed >> 12) == 0 || ((Immed & 0xfff) == 0 && Immed >> 24 == 0))
6892  return true;
6893  return false;
6894 }
6895 
6896 // Integer comparisons are implemented with ADDS/SUBS, so the range of valid
6897 // immediates is the same as for an add or a sub.
6899  if (Immed < 0)
6900  Immed *= -1;
6901  return isLegalAddImmediate(Immed);
6902 }
6903 
6904 /// isLegalAddressingMode - Return true if the addressing mode represented
6905 /// by AM is legal for this target, for a load/store of the specified type.
6907  const AddrMode &AM, Type *Ty,
6908  unsigned AS) const {
6909  // AArch64 has five basic addressing modes:
6910  // reg
6911  // reg + 9-bit signed offset
6912  // reg + SIZE_IN_BYTES * 12-bit unsigned offset
6913  // reg1 + reg2
6914  // reg + SIZE_IN_BYTES * reg
6915 
6916  // No global is ever allowed as a base.
6917  if (AM.BaseGV)
6918  return false;
6919 
6920  // No reg+reg+imm addressing.
6921  if (AM.HasBaseReg && AM.BaseOffs && AM.Scale)
6922  return false;
6923 
6924  // check reg + imm case:
6925  // i.e., reg + 0, reg + imm9, reg + SIZE_IN_BYTES * uimm12
6926  uint64_t NumBytes = 0;
6927  if (Ty->isSized()) {
6928  uint64_t NumBits = DL.getTypeSizeInBits(Ty);
6929  NumBytes = NumBits / 8;
6930  if (!isPowerOf2_64(NumBits))
6931  NumBytes = 0;
6932  }
6933 
6934  if (!AM.Scale) {
6935  int64_t Offset = AM.BaseOffs;
6936 
6937  // 9-bit signed offset
6938  if (Offset >= -(1LL << 9) && Offset <= (1LL << 9) - 1)
6939  return true;
6940 
6941  // 12-bit unsigned offset
6942  unsigned shift = Log2_64(NumBytes);
6943  if (NumBytes && Offset > 0 && (Offset / NumBytes) <= (1LL << 12) - 1 &&
6944  // Must be a multiple of NumBytes (NumBytes is a power of 2)
6945  (Offset >> shift) << shift == Offset)
6946  return true;
6947  return false;
6948  }
6949 
6950  // Check reg1 + SIZE_IN_BYTES * reg2 and reg1 + reg2
6951 
6952  if (!AM.Scale || AM.Scale == 1 ||
6953  (AM.Scale > 0 && (uint64_t)AM.Scale == NumBytes))
6954  return true;
6955  return false;
6956 }
6957 
6959  const AddrMode &AM, Type *Ty,
6960  unsigned AS) const {
6961  // Scaling factors are not free at all.
6962  // Operands | Rt Latency
6963  // -------------------------------------------
6964  // Rt, [Xn, Xm] | 4
6965  // -------------------------------------------
6966  // Rt, [Xn, Xm, lsl #imm] | Rn: 4 Rm: 5
6967  // Rt, [Xn, Wm, <extend> #imm] |
6968  if (isLegalAddressingMode(DL, AM, Ty, AS))
6969  // Scale represents reg2 * scale, thus account for 1 if
6970  // it is not equal to 0 or 1.
6971  return AM.Scale != 0 && AM.Scale != 1;
6972  return -1;
6973 }
6974 
6976  VT = VT.getScalarType();
6977 
6978  if (!VT.isSimple())
6979  return false;
6980 
6981  switch (VT.getSimpleVT().SimpleTy) {
6982  case MVT::f32:
6983  case MVT::f64:
6984  return true;
6985  default:
6986  break;
6987  }
6988 
6989  return false;
6990 }
6991 
6992 const MCPhysReg *
6994  // LR is a callee-save register, but we must treat it as clobbered by any call
6995  // site. Hence we include LR in the scratch registers, which are in turn added
6996  // as implicit-defs for stackmaps and patchpoints.
6997  static const MCPhysReg ScratchRegs[] = {
6998  AArch64::X16, AArch64::X17, AArch64::LR, 0
6999  };
7000  return ScratchRegs;
7001 }
7002 
7003 bool
7005  EVT VT = N->getValueType(0);
7006  // If N is unsigned bit extraction: ((x >> C) & mask), then do not combine
7007  // it with shift to let it be lowered to UBFX.
7008  if (N->getOpcode() == ISD::AND && (VT == MVT::i32 || VT == MVT::i64) &&
7009  isa<ConstantSDNode>(N->getOperand(1))) {
7010  uint64_t TruncMask = N->getConstantOperandVal(1);
7011  if (isMask_64(TruncMask) &&
7012  N->getOperand(0).getOpcode() == ISD::SRL &&
7013  isa<ConstantSDNode>(N->getOperand(0)->getOperand(1)))
7014  return false;
7015  }
7016  return true;
7017 }
7018 
7020  Type *Ty) const {
7021  assert(Ty->isIntegerTy());
7022 
7023  unsigned BitSize = Ty->getPrimitiveSizeInBits();
7024  if (BitSize == 0)
7025  return false;
7026 
7027  int64_t Val = Imm.getSExtValue();
7028  if (Val == 0 || AArch64_AM::isLogicalImmediate(Val, BitSize))
7029  return true;
7030 
7031  if ((int64_t)Val < 0)
7032  Val = ~Val;
7033  if (BitSize == 32)
7034  Val &= (1LL << 32) - 1;
7035 
7036  unsigned LZ = countLeadingZeros((uint64_t)Val);
7037  unsigned Shift = (63 - LZ) / 16;
7038  // MOVZ is free so return true for one or fewer MOVK.
7039  return Shift < 3;
7040 }
7041 
7042 // Generate SUBS and CSEL for integer abs.
7044  EVT VT = N->getValueType(0);
7045 
7046  SDValue N0 = N->getOperand(0);
7047  SDValue N1 = N->getOperand(1);
7048  SDLoc DL(N);
7049 
7050  // Check pattern of XOR(ADD(X,Y), Y) where Y is SRA(X, size(X)-1)
7051  // and change it to SUB and CSEL.
7052  if (VT.isInteger() && N->getOpcode() == ISD::XOR &&
7053  N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1 &&
7054  N1.getOpcode() == ISD::SRA && N1.getOperand(0) == N0.getOperand(0))
7055  if (ConstantSDNode *Y1C = dyn_cast<ConstantSDNode>(N1.getOperand(1)))
7056  if (Y1C->getAPIntValue() == VT.getSizeInBits() - 1) {
7057  SDValue Neg = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
7058  N0.getOperand(0));
7059  // Generate SUBS & CSEL.
7060  SDValue Cmp =
7061  DAG.getNode(AArch64ISD::SUBS, DL, DAG.getVTList(VT, MVT::i32),
7062  N0.getOperand(0), DAG.getConstant(0, DL, VT));
7063  return DAG.getNode(AArch64ISD::CSEL, DL, VT, N0.getOperand(0), Neg,
7065  SDValue(Cmp.getNode(), 1));
7066  }
7067  return SDValue();
7068 }
7069 
7070 // performXorCombine - Attempts to handle integer ABS.
7073  const AArch64Subtarget *Subtarget) {
7074  if (DCI.isBeforeLegalizeOps())
7075  return SDValue();
7076 
7077  return performIntegerAbsCombine(N, DAG);
7078 }
7079 
7080 SDValue
7081 AArch64TargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
7082  SelectionDAG &DAG,
7083  std::vector<SDNode *> *Created) const {
7084  // fold (sdiv X, pow2)
7085  EVT VT = N->getValueType(0);
7086  if ((VT != MVT::i32 && VT != MVT::i64) ||
7087  !(Divisor.isPowerOf2() || (-Divisor).isPowerOf2()))
7088  return SDValue();
7089 
7090  SDLoc DL(N);
7091  SDValue N0 = N->getOperand(0);
7092  unsigned Lg2 = Divisor.countTrailingZeros();
7093  SDValue Zero = DAG.getConstant(0, DL, VT);
7094  SDValue Pow2MinusOne = DAG.getConstant((1ULL << Lg2) - 1, DL, VT);
7095 
7096  // Add (N0 < 0) ? Pow2 - 1 : 0;
7097  SDValue CCVal;
7098  SDValue Cmp = getAArch64Cmp(N0, Zero, ISD::SETLT, CCVal, DAG, DL);
7099  SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, Pow2MinusOne);
7100  SDValue CSel = DAG.getNode(AArch64ISD::CSEL, DL, VT, Add, N0, CCVal, Cmp);
7101 
7102  if (Created) {
7103  Created->push_back(Cmp.getNode());
7104  Created->push_back(Add.getNode());
7105  Created->push_back(CSel.getNode());
7106  }
7107 
7108  // Divide by pow2.
7109  SDValue SRA =
7110  DAG.getNode(ISD::SRA, DL, VT, CSel, DAG.getConstant(Lg2, DL, MVT::i64));
7111 
7112  // If we're dividing by a positive value, we're done. Otherwise, we must
7113  // negate the result.
7114  if (Divisor.isNonNegative())
7115  return SRA;
7116 
7117  if (Created)
7118  Created->push_back(SRA.getNode());
7119  return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), SRA);
7120 }
7121 
7124  const AArch64Subtarget *Subtarget) {
7125  if (DCI.isBeforeLegalizeOps())
7126  return SDValue();
7127 
7128  // Multiplication of a power of two plus/minus one can be done more
7129  // cheaply as as shift+add/sub. For now, this is true unilaterally. If
7130  // future CPUs have a cheaper MADD instruction, this may need to be
7131  // gated on a subtarget feature. For Cyclone, 32-bit MADD is 4 cycles and
7132  // 64-bit is 5 cycles, so this is always a win.
7133  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
7134  APInt Value = C->getAPIntValue();
7135  EVT VT = N->getValueType(0);
7136  SDLoc DL(N);
7137  if (Value.isNonNegative()) {
7138  // (mul x, 2^N + 1) => (add (shl x, N), x)
7139  APInt VM1 = Value - 1;
7140  if (VM1.isPowerOf2()) {
7141  SDValue ShiftedVal =
7142  DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
7143  DAG.getConstant(VM1.logBase2(), DL, MVT::i64));
7144  return DAG.getNode(ISD::ADD, DL, VT, ShiftedVal,
7145  N->getOperand(0));
7146  }
7147  // (mul x, 2^N - 1) => (sub (shl x, N), x)
7148  APInt VP1 = Value + 1;
7149  if (VP1.isPowerOf2()) {
7150  SDValue ShiftedVal =
7151  DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
7152  DAG.getConstant(VP1.logBase2(), DL, MVT::i64));
7153  return DAG.getNode(ISD::SUB, DL, VT, ShiftedVal,
7154  N->getOperand(0));
7155  }
7156  } else {
7157  // (mul x, -(2^N - 1)) => (sub x, (shl x, N))
7158  APInt VNP1 = -Value + 1;
7159  if (VNP1.isPowerOf2()) {
7160  SDValue ShiftedVal =
7161  DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
7162  DAG.getConstant(VNP1.logBase2(), DL, MVT::i64));
7163  return DAG.getNode(ISD::SUB, DL, VT, N->getOperand(0),
7164  ShiftedVal);
7165  }
7166  // (mul x, -(2^N + 1)) => - (add (shl x, N), x)
7167  APInt VNM1 = -Value - 1;
7168  if (VNM1.isPowerOf2()) {
7169  SDValue ShiftedVal =
7170  DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
7171  DAG.getConstant(VNM1.logBase2(), DL, MVT::i64));
7172  SDValue Add =
7173  DAG.getNode(ISD::ADD, DL, VT, ShiftedVal, N->getOperand(0));
7174  return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Add);
7175  }
7176  }
7177  }
7178  return SDValue();
7179 }
7180 
7182  SelectionDAG &DAG) {
7183  // Take advantage of vector comparisons producing 0 or -1 in each lane to
7184  // optimize away operation when it's from a constant.
7185  //
7186  // The general transformation is:
7187  // UNARYOP(AND(VECTOR_CMP(x,y), constant)) -->
7188  // AND(VECTOR_CMP(x,y), constant2)
7189  // constant2 = UNARYOP(constant)
7190 
7191  // Early exit if this isn't a vector operation, the operand of the
7192  // unary operation isn't a bitwise AND, or if the sizes of the operations
7193  // aren't the same.
7194  EVT VT = N->getValueType(0);
7195  if (!VT.isVector() || N->getOperand(0)->getOpcode() != ISD::AND ||
7196  N->getOperand(0)->getOperand(0)->getOpcode() != ISD::SETCC ||
7197  VT.getSizeInBits() != N->getOperand(0)->getValueType(0).getSizeInBits())
7198  return SDValue();
7199 
7200  // Now check that the other operand of the AND is a constant. We could
7201  // make the transformation for non-constant splats as well, but it's unclear
7202  // that would be a benefit as it would not eliminate any operations, just
7203  // perform one more step in scalar code before moving to the vector unit.
7204  if (BuildVectorSDNode *BV =
7205  dyn_cast<BuildVectorSDNode>(N->getOperand(0)->getOperand(1))) {
7206  // Bail out if the vector isn't a constant.
7207  if (!BV->isConstant())
7208  return SDValue();
7209 
7210  // Everything checks out. Build up the new and improved node.
7211  SDLoc DL(N);
7212  EVT IntVT = BV->getValueType(0);
7213  // Create a new constant of the appropriate type for the transformed
7214  // DAG.
7215  SDValue SourceConst = DAG.getNode(N->getOpcode(), DL, VT, SDValue(BV, 0));
7216  // The AND node needs bitcasts to/from an integer vector type around it.
7217  SDValue MaskConst = DAG.getNode(ISD::BITCAST, DL, IntVT, SourceConst);
7218  SDValue NewAnd = DAG.getNode(ISD::AND, DL, IntVT,
7219  N->getOperand(0)->getOperand(0), MaskConst);
7220  SDValue Res = DAG.getNode(ISD::BITCAST, DL, VT, NewAnd);
7221  return Res;
7222  }
7223 
7224  return SDValue();
7225 }
7226 
7228  const AArch64Subtarget *Subtarget) {
7229  // First try to optimize away the conversion when it's conditionally from
7230  // a constant. Vectors only.
7232  if (Res != SDValue())
7233  return Res;
7234 
7235  EVT VT = N->getValueType(0);
7236  if (VT != MVT::f32 && VT != MVT::f64)
7237  return SDValue();
7238 
7239  // Only optimize when the source and destination types have the same width.
7240  if (VT.getSizeInBits() != N->getOperand(0).getValueType().getSizeInBits())
7241  return SDValue();
7242 
7243  // If the result of an integer load is only used by an integer-to-float
7244  // conversion, use a fp load instead and a AdvSIMD scalar {S|U}CVTF instead.
7245  // This eliminates an "integer-to-vector-move UOP and improve throughput.
7246  SDValue N0 = N->getOperand(0);
7247  if (Subtarget->hasNEON() && ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
7248  // Do not change the width of a volatile load.
7249  !cast<LoadSDNode>(N0)->isVolatile()) {
7250  LoadSDNode *LN0 = cast<LoadSDNode>(N0);
7251  SDValue Load = DAG.getLoad(VT, SDLoc(N), LN0->getChain(), LN0->getBasePtr(),
7252  LN0->getPointerInfo(), LN0->isVolatile(),
7253  LN0->isNonTemporal(), LN0->isInvariant(),
7254  LN0->getAlignment());
7255 
7256  // Make sure successors of the original load stay after it by updating them
7257  // to use the new Chain.
7258  DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), Load.getValue(1));
7259 
7260  unsigned Opcode =
7262  return DAG.getNode(Opcode, SDLoc(N), VT, Load);
7263  }
7264 
7265  return SDValue();
7266 }
7267 
7268 /// An EXTR instruction is made up of two shifts, ORed together. This helper
7269 /// searches for and classifies those shifts.
7270 static bool findEXTRHalf(SDValue N, SDValue &Src, uint32_t &ShiftAmount,
7271  bool &FromHi) {
7272  if (N.getOpcode() == ISD::SHL)
7273  FromHi = false;
7274  else if (N.getOpcode() == ISD::SRL)
7275  FromHi = true;
7276  else
7277  return false;
7278 
7279  if (!isa<ConstantSDNode>(N.getOperand(1)))
7280  return false;
7281 
7282  ShiftAmount = N->getConstantOperandVal(1);
7283  Src = N->getOperand(0);
7284  return true;
7285 }
7286 
7287 /// EXTR instruction extracts a contiguous chunk of bits from two existing
7288 /// registers viewed as a high/low pair. This function looks for the pattern:
7289 /// (or (shl VAL1, #N), (srl VAL2, #RegWidth-N)) and replaces it with an
7290 /// EXTR. Can't quite be done in TableGen because the two immediates aren't
7291 /// independent.
7294  SelectionDAG &DAG = DCI.DAG;
7295  SDLoc DL(N);
7296  EVT VT = N->getValueType(0);
7297 
7298  assert(N->getOpcode() == ISD::OR && "Unexpected root");
7299 
7300  if (VT != MVT::i32 && VT != MVT::i64)
7301  return SDValue();
7302 
7303  SDValue LHS;
7304  uint32_t ShiftLHS = 0;
7305  bool LHSFromHi = 0;
7306  if (!findEXTRHalf(N->getOperand(0), LHS, ShiftLHS, LHSFromHi))
7307  return SDValue();
7308 
7309  SDValue RHS;
7310  uint32_t ShiftRHS = 0;
7311  bool RHSFromHi = 0;
7312  if (!findEXTRHalf(N->getOperand(1), RHS, ShiftRHS, RHSFromHi))
7313  return SDValue();
7314 
7315  // If they're both trying to come from the high part of the register, they're
7316  // not really an EXTR.
7317  if (LHSFromHi == RHSFromHi)
7318  return SDValue();
7319 
7320  if (ShiftLHS + ShiftRHS != VT.getSizeInBits())
7321  return SDValue();
7322 
7323  if (LHSFromHi) {
7324  std::swap(LHS, RHS);
7325  std::swap(ShiftLHS, ShiftRHS);
7326  }
7327 
7328  return DAG.getNode(AArch64ISD::EXTR, DL, VT, LHS, RHS,
7329  DAG.getConstant(ShiftRHS, DL, MVT::i64));
7330 }
7331 
7334  EVT VT = N->getValueType(0);
7335  SelectionDAG &DAG = DCI.DAG;
7336  SDLoc DL(N);
7337 
7338  if (!VT.isVector())
7339  return SDValue();
7340 
7341  SDValue N0 = N->getOperand(0);
7342  if (N0.getOpcode() != ISD::AND)
7343  return SDValue();
7344 
7345  SDValue N1 = N->getOperand(1);
7346  if (N1.getOpcode() != ISD::AND)
7347  return SDValue();
7348 
7349  // We only have to look for constant vectors here since the general, variable
7350  // case can be handled in TableGen.
7351  unsigned Bits = VT.getVectorElementType().getSizeInBits();
7352  uint64_t BitMask = Bits == 64 ? -1ULL : ((1ULL << Bits) - 1);
7353  for (int i = 1; i >= 0; --i)
7354  for (int j = 1; j >= 0; --j) {
7357  if (!BVN0 || !BVN1)
7358  continue;
7359 
7360  bool FoundMatch = true;
7361  for (unsigned k = 0; k < VT.getVectorNumElements(); ++k) {
7363  ConstantSDNode *CN1 = dyn_cast<ConstantSDNode>(BVN1->getOperand(k));
7364  if (!CN0 || !CN1 ||
7365  CN0->getZExtValue() != (BitMask & ~CN1->getZExtValue())) {
7366  FoundMatch = false;
7367  break;
7368  }
7369  }
7370 
7371  if (FoundMatch)
7372  return DAG.getNode(AArch64ISD::BSL, DL, VT, SDValue(BVN0, 0),
7373  N0->getOperand(1 - i), N1->getOperand(1 - j));
7374  }
7375 
7376  return SDValue();
7377 }
7378 
7380  const AArch64Subtarget *Subtarget) {
7381  // Attempt to form an EXTR from (or (shl VAL1, #N), (srl VAL2, #RegWidth-N))
7383  return SDValue();
7384  SelectionDAG &DAG = DCI.DAG;
7385  EVT VT = N->getValueType(0);
7386 
7387  if (!DAG.getTargetLoweringInfo().isTypeLegal(VT))
7388  return SDValue();
7389 
7390  SDValue Res = tryCombineToEXTR(N, DCI);
7391  if (Res.getNode())
7392  return Res;
7393 
7394  Res = tryCombineToBSL(N, DCI);
7395  if (Res.getNode())
7396  return Res;
7397 
7398  return SDValue();
7399 }
7400 
7403  SelectionDAG &DAG) {
7404  // Wait 'til after everything is legalized to try this. That way we have
7405  // legal vector types and such.
7406  if (DCI.isBeforeLegalizeOps())
7407  return SDValue();
7408 
7409  // Remove extraneous bitcasts around an extract_subvector.
7410  // For example,
7411  // (v4i16 (bitconvert
7412  // (extract_subvector (v2i64 (bitconvert (v8i16 ...)), (i64 1)))))
7413  // becomes
7414  // (extract_subvector ((v8i16 ...), (i64 4)))
7415 
7416  // Only interested in 64-bit vectors as the ultimate result.
7417  EVT VT = N->getValueType(0);
7418  if (!VT.isVector())
7419  return SDValue();
7420  if (VT.getSimpleVT().getSizeInBits() != 64)
7421  return SDValue();
7422  // Is the operand an extract_subvector starting at the beginning or halfway
7423  // point of the vector? A low half may also come through as an
7424  // EXTRACT_SUBREG, so look for that, too.
7425  SDValue Op0 = N->getOperand(0);
7426  if (Op0->getOpcode() != ISD::EXTRACT_SUBVECTOR &&
7427  !(Op0->isMachineOpcode() &&
7429  return SDValue();
7430  uint64_t idx = cast<ConstantSDNode>(Op0->getOperand(1))->getZExtValue();
7431  if (Op0->getOpcode() == ISD::EXTRACT_SUBVECTOR) {
7432  if (Op0->getValueType(0).getVectorNumElements() != idx && idx != 0)
7433  return SDValue();
7434  } else if (Op0->getMachineOpcode() == AArch64::EXTRACT_SUBREG) {
7435  if (idx != AArch64::dsub)
7436  return SDValue();
7437  // The dsub reference is equivalent to a lane zero subvector reference.
7438  idx = 0;
7439  }
7440  // Look through the bitcast of the input to the extract.
7441  if (Op0->getOperand(0)->getOpcode() != ISD::BITCAST)
7442  return SDValue();
7443  SDValue Source = Op0->getOperand(0)->getOperand(0);
7444  // If the source type has twice the number of elements as our destination
7445  // type, we know this is an extract of the high or low half of the vector.
7446  EVT SVT = Source->getValueType(0);
7447  if (SVT.getVectorNumElements() != VT.getVectorNumElements() * 2)
7448  return SDValue();
7449 
7450  DEBUG(dbgs() << "aarch64-lower: bitcast extract_subvector simplification\n");
7451 
7452  // Create the simplified form to just extract the low or high half of the
7453  // vector directly rather than bothering with the bitcasts.
7454  SDLoc dl(N);
7455  unsigned NumElements = VT.getVectorNumElements();
7456  if (idx) {
7457  SDValue HalfIdx = DAG.getConstant(NumElements, dl, MVT::i64);
7458  return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, Source, HalfIdx);
7459  } else {
7460  SDValue SubReg = DAG.getTargetConstant(AArch64::dsub, dl, MVT::i32);
7462  Source, SubReg),
7463  0);
7464  }
7465 }
7466 
7469  SelectionDAG &DAG) {
7470  SDLoc dl(N);
7471  EVT VT = N->getValueType(0);
7472  SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
7473 
7474  // Optimize concat_vectors of truncated vectors, where the intermediate
7475  // type is illegal, to avoid said illegality, e.g.,
7476  // (v4i16 (concat_vectors (v2i16 (truncate (v2i64))),
7477  // (v2i16 (truncate (v2i64)))))
7478  // ->
7479  // (v4i16 (truncate (vector_shuffle (v4i32 (bitcast (v2i64))),
7480  // (v4i32 (bitcast (v2i64))),
7481  // <0, 2, 4, 6>)))
7482  // This isn't really target-specific, but ISD::TRUNCATE legality isn't keyed
7483  // on both input and result type, so we might generate worse code.
7484  // On AArch64 we know it's fine for v2i64->v4i16 and v4i32->v8i8.
7485  if (N->getNumOperands() == 2 &&
7486  N0->getOpcode() == ISD::TRUNCATE &&
7487  N1->getOpcode() == ISD::TRUNCATE) {
7488  SDValue N00 = N0->getOperand(0);
7489  SDValue N10 = N1->getOperand(0);
7490  EVT N00VT = N00.getValueType();
7491 
7492  if (N00VT == N10.getValueType() &&
7493  (N00VT == MVT::v2i64 || N00VT == MVT::v4i32) &&
7494  N00VT.getScalarSizeInBits() == 4 * VT.getScalarSizeInBits()) {
7495  MVT MidVT = (N00VT == MVT::v2i64 ? MVT::v4i32 : MVT::v8i16);
7497  for (size_t i = 0; i < Mask.size(); ++i)
7498  Mask[i] = i * 2;
7499  return DAG.getNode(ISD::TRUNCATE, dl, VT,
7500  DAG.getVectorShuffle(
7501  MidVT, dl,
7502  DAG.getNode(ISD::BITCAST, dl, MidVT, N00),
7503  DAG.getNode(ISD::BITCAST, dl, MidVT, N10), Mask));
7504  }
7505  }
7506 
7507  // Wait 'til after everything is legalized to try this. That way we have
7508  // legal vector types and such.
7509  if (DCI.isBeforeLegalizeOps())
7510  return SDValue();
7511 
7512  // If we see a (concat_vectors (v1x64 A), (v1x64 A)) it's really a vector
7513  // splat. The indexed instructions are going to be expecting a DUPLANE64, so
7514  // canonicalise to that.
7515  if (N0 == N1 && VT.getVectorNumElements() == 2) {
7516  assert(VT.getVectorElementType().getSizeInBits() == 64);
7517  return DAG.getNode(AArch64ISD::DUPLANE64, dl, VT, WidenVector(N0, DAG),
7518  DAG.getConstant(0, dl, MVT::i64));
7519  }
7520 
7521  // Canonicalise concat_vectors so that the right-hand vector has as few
7522  // bit-casts as possible before its real operation. The primary matching
7523  // destination for these operations will be the narrowing "2" instructions,
7524  // which depend on the operation being performed on this right-hand vector.
7525  // For example,
7526  // (concat_vectors LHS, (v1i64 (bitconvert (v4i16 RHS))))
7527  // becomes
7528  // (bitconvert (concat_vectors (v4i16 (bitconvert LHS)), RHS))
7529 
7530  if (N1->getOpcode() != ISD::BITCAST)
7531  return SDValue();
7532  SDValue RHS = N1->getOperand(0);
7533  MVT RHSTy = RHS.getValueType().getSimpleVT();
7534  // If the RHS is not a vector, this is not the pattern we're looking for.
7535  if (!RHSTy.isVector())
7536  return SDValue();
7537 
7538  DEBUG(dbgs() << "aarch64-lower: concat_vectors bitcast simplification\n");
7539 
7540  MVT ConcatTy = MVT::getVectorVT(RHSTy.getVectorElementType(),
7541  RHSTy.getVectorNumElements() * 2);
7542  return DAG.getNode(ISD::BITCAST, dl, VT,
7543  DAG.getNode(ISD::CONCAT_VECTORS, dl, ConcatTy,
7544  DAG.getNode(ISD::BITCAST, dl, RHSTy, N0),
7545  RHS));
7546 }
7547 
7550  SelectionDAG &DAG) {
7551  // Wait 'til after everything is legalized to try this. That way we have
7552  // legal vector types and such.
7553  if (DCI.isBeforeLegalizeOps())
7554  return SDValue();
7555  // Transform a scalar conversion of a value from a lane extract into a
7556  // lane extract of a vector conversion. E.g., from foo1 to foo2:
7557  // double foo1(int64x2_t a) { return vcvtd_n_f64_s64(a[1], 9); }
7558  // double foo2(int64x2_t a) { return vcvtq_n_f64_s64(a, 9)[1]; }
7559  //
7560  // The second form interacts better with instruction selection and the
7561  // register allocator to avoid cross-class register copies that aren't
7562  // coalescable due to a lane reference.
7563 
7564  // Check the operand and see if it originates from a lane extract.
7565  SDValue Op1 = N->getOperand(1);
7566  if (Op1.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
7567  // Yep, no additional predication needed. Perform the transform.
7568  SDValue IID = N->getOperand(0);
7569  SDValue Shift = N->getOperand(2);
7570  SDValue Vec = Op1.getOperand(0);
7571  SDValue Lane = Op1.getOperand(1);
7572  EVT ResTy = N->getValueType(0);
7573  EVT VecResTy;
7574  SDLoc DL(N);
7575 
7576  // The vector width should be 128 bits by the time we get here, even
7577  // if it started as 64 bits (the extract_vector handling will have
7578  // done so).
7579  assert(Vec.getValueType().getSizeInBits() == 128 &&
7580  "unexpected vector size on extract_vector_elt!");
7581  if (Vec.getValueType() == MVT::v4i32)
7582  VecResTy = MVT::v4f32;
7583  else if (Vec.getValueType() == MVT::v2i64)
7584  VecResTy = MVT::v2f64;
7585  else
7586  llvm_unreachable("unexpected vector type!");
7587 
7588  SDValue Convert =
7589  DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VecResTy, IID, Vec, Shift);
7590  return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ResTy, Convert, Lane);
7591  }
7592  return SDValue();
7593 }
7594 
7595 // AArch64 high-vector "long" operations are formed by performing the non-high
7596 // version on an extract_subvector of each operand which gets the high half:
7597 //
7598 // (longop2 LHS, RHS) == (longop (extract_high LHS), (extract_high RHS))
7599 //
7600 // However, there are cases which don't have an extract_high explicitly, but
7601 // have another operation that can be made compatible with one for free. For
7602 // example:
7603 //
7604 // (dupv64 scalar) --> (extract_high (dup128 scalar))
7605 //
7606 // This routine does the actual conversion of such DUPs, once outer routines
7607 // have determined that everything else is in order.
7608 // It also supports immediate DUP-like nodes (MOVI/MVNi), which we can fold
7609 // similarly here.
7611  switch (N.getOpcode()) {
7612  case AArch64ISD::DUP:
7613  case AArch64ISD::DUPLANE8:
7614  case AArch64ISD::DUPLANE16:
7615  case AArch64ISD::DUPLANE32:
7616  case AArch64ISD::DUPLANE64:
7617  case AArch64ISD::MOVI:
7618  case AArch64ISD::MOVIshift:
7619  case AArch64ISD::MOVIedit:
7620  case AArch64ISD::MOVImsl:
7621  case AArch64ISD::MVNIshift:
7622  case AArch64ISD::MVNImsl:
7623  break;
7624  default:
7625  // FMOV could be supported, but isn't very useful, as it would only occur
7626  // if you passed a bitcast' floating point immediate to an eligible long
7627  // integer op (addl, smull, ...).
7628  return SDValue();
7629  }
7630 
7631  MVT NarrowTy = N.getSimpleValueType();
7632  if (!NarrowTy.is64BitVector())
7633  return SDValue();
7634 
7635  MVT ElementTy = NarrowTy.getVectorElementType();
7636  unsigned NumElems = NarrowTy.getVectorNumElements();
7637  MVT NewVT = MVT::getVectorVT(ElementTy, NumElems * 2);
7638 
7639  SDLoc dl(N);
7640  return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, NarrowTy,
7641  DAG.getNode(N->getOpcode(), dl, NewVT, N->ops()),
7642  DAG.getConstant(NumElems, dl, MVT::i64));
7643 }
7644 
7646  if (N.getOpcode() == ISD::EXTRACT_SUBVECTOR)
7647  return true;
7648 
7649  return N.getOpcode() == ISD::BITCAST &&
7651 }
7652 
7653 /// \brief Helper structure to keep track of ISD::SET_CC operands.
7655  const SDValue *Opnd0;
7656  const SDValue *Opnd1;
7658 };
7659 
7660 /// \brief Helper structure to keep track of a SET_CC lowered into AArch64 code.
7662  const SDValue *Cmp;
7664 };
7665 
7666 /// \brief Helper structure to keep track of SetCC information.
7667 union SetCCInfo {
7670 };
7671 
7672 /// \brief Helper structure to be able to read SetCC information. If set to
7673 /// true, IsAArch64 field, Info is a AArch64SetCCInfo, otherwise Info is a
7674 /// GenericSetCCInfo.
7678 };
7679 
7680 /// \brief Check whether or not \p Op is a SET_CC operation, either a generic or
7681 /// an
7682 /// AArch64 lowered one.
7683 /// \p SetCCInfo is filled accordingly.
7684 /// \post SetCCInfo is meanginfull only when this function returns true.
7685 /// \return True when Op is a kind of SET_CC operation.
7687  // If this is a setcc, this is straight forward.
7688  if (Op.getOpcode() == ISD::SETCC) {
7689  SetCCInfo.Info.Generic.Opnd0 = &Op.getOperand(0);
7690  SetCCInfo.Info.Generic.Opnd1 = &Op.getOperand(1);
7691  SetCCInfo.Info.Generic.CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
7692  SetCCInfo.IsAArch64 = false;
7693  return true;
7694  }
7695  // Otherwise, check if this is a matching csel instruction.
7696  // In other words:
7697  // - csel 1, 0, cc
7698  // - csel 0, 1, !cc
7699  if (Op.getOpcode() != AArch64ISD::CSEL)
7700  return false;
7701  // Set the information about the operands.
7702  // TODO: we want the operands of the Cmp not the csel
7703  SetCCInfo.Info.AArch64.Cmp = &Op.getOperand(3);
7704  SetCCInfo.IsAArch64 = true;
7705  SetCCInfo.Info.AArch64.CC = static_cast<AArch64CC::CondCode>(
7706  cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue());
7707 
7708  // Check that the operands matches the constraints:
7709  // (1) Both operands must be constants.
7710  // (2) One must be 1 and the other must be 0.
7713 
7714  // Check (1).
7715  if (!TValue || !FValue)
7716  return false;
7717 
7718  // Check (2).
7719  if (!TValue->isOne()) {
7720  // Update the comparison when we are interested in !cc.
7721  std::swap(TValue, FValue);
7722  SetCCInfo.Info.AArch64.CC =
7724  }
7725  return TValue->isOne() && FValue->isNullValue();
7726 }
7727 
7728 // Returns true if Op is setcc or zext of setcc.
7729 static bool isSetCCOrZExtSetCC(const SDValue& Op, SetCCInfoAndKind &Info) {
7730  if (isSetCC(Op, Info))
7731  return true;
7732  return ((Op.getOpcode() == ISD::ZERO_EXTEND) &&
7733  isSetCC(Op->getOperand(0), Info));
7734 }
7735 
7736 // The folding we want to perform is:
7737 // (add x, [zext] (setcc cc ...) )
7738 // -->
7739 // (csel x, (add x, 1), !cc ...)
7740 //
7741 // The latter will get matched to a CSINC instruction.
7743  assert(Op && Op->getOpcode() == ISD::ADD && "Unexpected operation!");
7744  SDValue LHS = Op->getOperand(0);
7745  SDValue RHS = Op->getOperand(1);
7746  SetCCInfoAndKind InfoAndKind;
7747 
7748  // If neither operand is a SET_CC, give up.
7749  if (!isSetCCOrZExtSetCC(LHS, InfoAndKind)) {
7750  std::swap(LHS, RHS);
7751  if (!isSetCCOrZExtSetCC(LHS, InfoAndKind))
7752  return SDValue();
7753  }
7754 
7755  // FIXME: This could be generatized to work for FP comparisons.
7756  EVT CmpVT = InfoAndKind.IsAArch64
7757  ? InfoAndKind.Info.AArch64.Cmp->getOperand(0).getValueType()
7758  : InfoAndKind.Info.Generic.Opnd0->getValueType();
7759  if (CmpVT != MVT::i32 && CmpVT != MVT::i64)
7760  return SDValue();
7761 
7762  SDValue CCVal;
7763  SDValue Cmp;
7764  SDLoc dl(Op);
7765  if (InfoAndKind.IsAArch64) {
7766  CCVal = DAG.getConstant(
7767  AArch64CC::getInvertedCondCode(InfoAndKind.Info.AArch64.CC), dl,
7768  MVT::i32);
7769  Cmp = *InfoAndKind.Info.AArch64.Cmp;
7770  } else
7771  Cmp = getAArch64Cmp(*InfoAndKind.Info.Generic.Opnd0,
7772  *InfoAndKind.Info.Generic.Opnd1,
7773  ISD::getSetCCInverse(InfoAndKind.Info.Generic.CC, true),
7774  CCVal, DAG, dl);
7775 
7776  EVT VT = Op->getValueType(0);
7777  LHS = DAG.getNode(ISD::ADD, dl, VT, RHS, DAG.getConstant(1, dl, VT));
7778  return DAG.getNode(AArch64ISD::CSEL, dl, VT, RHS, LHS, CCVal, Cmp);
7779 }
7780 
7781 // The basic add/sub long vector instructions have variants with "2" on the end
7782 // which act on the high-half of their inputs. They are normally matched by
7783 // patterns like:
7784 //
7785 // (add (zeroext (extract_high LHS)),
7786 // (zeroext (extract_high RHS)))
7787 // -> uaddl2 vD, vN, vM
7788 //
7789 // However, if one of the extracts is something like a duplicate, this
7790 // instruction can still be used profitably. This function puts the DAG into a
7791 // more appropriate form for those patterns to trigger.
7794  SelectionDAG &DAG) {
7795  if (DCI.isBeforeLegalizeOps())
7796  return SDValue();
7797 
7798  MVT VT = N->getSimpleValueType(0);
7799  if (!VT.is128BitVector()) {
7800  if (N->getOpcode() == ISD::ADD)
7801  return performSetccAddFolding(N, DAG);
7802  return SDValue();
7803  }
7804 
7805  // Make sure both branches are extended in the same way.
7806  SDValue LHS = N->getOperand(0);
7807  SDValue RHS = N->getOperand(1);
7808  if ((LHS.getOpcode() != ISD::ZERO_EXTEND &&
7809  LHS.getOpcode() != ISD::SIGN_EXTEND) ||
7810  LHS.getOpcode() != RHS.getOpcode())
7811  return SDValue();
7812 
7813  unsigned ExtType = LHS.getOpcode();
7814 
7815  // It's not worth doing if at least one of the inputs isn't already an
7816  // extract, but we don't know which it'll be so we have to try both.
7818  RHS = tryExtendDUPToExtractHigh(RHS.getOperand(0), DAG);
7819  if (!RHS.getNode())
7820  return SDValue();
7821 
7822  RHS = DAG.getNode(ExtType, SDLoc(N), VT, RHS);
7823  } else if (isEssentiallyExtractSubvector(RHS.getOperand(0))) {
7824  LHS = tryExtendDUPToExtractHigh(LHS.getOperand(0), DAG);
7825  if (!LHS.getNode())
7826  return SDValue();
7827 
7828  LHS = DAG.getNode(ExtType, SDLoc(N), VT, LHS);
7829  }
7830 
7831  return DAG.getNode(N->getOpcode(), SDLoc(N), VT, LHS, RHS);
7832 }
7833 
7834 // Massage DAGs which we can use the high-half "long" operations on into
7835 // something isel will recognize better. E.g.
7836 //
7837 // (aarch64_neon_umull (extract_high vec) (dupv64 scalar)) -->
7838 // (aarch64_neon_umull (extract_high (v2i64 vec)))
7839 // (extract_high (v2i64 (dup128 scalar)))))
7840 //
7841 static SDValue tryCombineLongOpWithDup(unsigned IID, SDNode *N,
7843  SelectionDAG &DAG) {
7844  if (DCI.isBeforeLegalizeOps())
7845  return SDValue();
7846 
7847  SDValue LHS = N->getOperand(1);
7848  SDValue RHS = N->getOperand(2);
7849  assert(LHS.getValueType().is64BitVector() &&
7850  RHS.getValueType().is64BitVector() &&
7851  "unexpected shape for long operation");
7852 
7853  // Either node could be a DUP, but it's not worth doing both of them (you'd
7854  // just as well use the non-high version) so look for a corresponding extract
7855  // operation on the other "wing".
7856  if (isEssentiallyExtractSubvector(LHS)) {
7857  RHS = tryExtendDUPToExtractHigh(RHS, DAG);
7858  if (!RHS.getNode())
7859  return SDValue();
7860  } else if (isEssentiallyExtractSubvector(RHS)) {
7861  LHS = tryExtendDUPToExtractHigh(LHS, DAG);
7862  if (!LHS.getNode())
7863  return SDValue();
7864  }
7865 
7866  return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, SDLoc(N), N->getValueType(0),
7867  N->getOperand(0), LHS, RHS);
7868 }
7869 
7870 static SDValue tryCombineShiftImm(unsigned IID, SDNode *N, SelectionDAG &DAG) {
7871  MVT ElemTy = N->getSimpleValueType(0).getScalarType();
7872  unsigned ElemBits = ElemTy.getSizeInBits();
7873 
7874  int64_t ShiftAmount;
7875  if (BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(N->getOperand(2))) {
7876  APInt SplatValue, SplatUndef;
7877  unsigned SplatBitSize;
7878  bool HasAnyUndefs;
7879  if (!BVN->isConstantSplat(SplatValue, SplatUndef, SplatBitSize,
7880  HasAnyUndefs, ElemBits) ||
7881  SplatBitSize != ElemBits)
7882  return SDValue();
7883 
7884  ShiftAmount = SplatValue.getSExtValue();
7885  } else if (ConstantSDNode *CVN = dyn_cast<ConstantSDNode>(N->getOperand(2))) {
7886  ShiftAmount = CVN->getSExtValue();
7887  } else
7888  return SDValue();
7889 
7890  unsigned Opcode;
7891  bool IsRightShift;
7892  switch (IID) {
7893  default:
7894  llvm_unreachable("Unknown shift intrinsic");
7895  case Intrinsic::aarch64_neon_sqshl:
7896  Opcode = AArch64ISD::SQSHL_I;
7897  IsRightShift = false;
7898  break;
7899  case Intrinsic::aarch64_neon_uqshl:
7900  Opcode = AArch64ISD::UQSHL_I;
7901  IsRightShift = false;
7902  break;
7903  case Intrinsic::aarch64_neon_srshl:
7904  Opcode = AArch64ISD::SRSHR_I;
7905  IsRightShift = true;
7906  break;
7907  case Intrinsic::aarch64_neon_urshl:
7908  Opcode = AArch64ISD::URSHR_I;
7909  IsRightShift = true;
7910  break;
7911  case Intrinsic::aarch64_neon_sqshlu:
7912  Opcode = AArch64ISD::SQSHLU_I;
7913  IsRightShift = false;
7914  break;
7915  }
7916 
7917  if (IsRightShift && ShiftAmount <= -1 && ShiftAmount >= -(int)ElemBits) {
7918  SDLoc dl(N);
7919  return DAG.getNode(Opcode, dl, N->getValueType(0), N->getOperand(1),
7920  DAG.getConstant(-ShiftAmount, dl, MVT::i32));
7921  } else if (!IsRightShift && ShiftAmount >= 0 && ShiftAmount < ElemBits) {
7922  SDLoc dl(N);
7923  return DAG.getNode(Opcode, dl, N->getValueType(0), N->getOperand(1),
7924  DAG.getConstant(ShiftAmount, dl, MVT::i32));
7925  }
7926 
7927  return SDValue();
7928 }
7929 
7930 // The CRC32[BH] instructions ignore the high bits of their data operand. Since
7931 // the intrinsics must be legal and take an i32, this means there's almost
7932 // certainly going to be a zext in the DAG which we can eliminate.
7933 static SDValue tryCombineCRC32(unsigned Mask, SDNode *N, SelectionDAG &DAG) {
7934  SDValue AndN = N->getOperand(2);
7935  if (AndN.getOpcode() != ISD::AND)
7936  return SDValue();
7937 
7938  ConstantSDNode *CMask = dyn_cast<ConstantSDNode>(AndN.getOperand(1));
7939  if (!CMask || CMask->getZExtValue() != Mask)
7940  return SDValue();
7941 
7943  N->getOperand(0), N->getOperand(1), AndN.getOperand(0));
7944 }
7945 
7947  SelectionDAG &DAG) {
7948  SDLoc dl(N);
7949  return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, N->getValueType(0),
7950  DAG.getNode(Opc, dl,
7952  N->getOperand(1)),
7953  DAG.getConstant(0, dl, MVT::i64));
7954 }
7955 
7958  const AArch64Subtarget *Subtarget) {
7959  SelectionDAG &DAG = DCI.DAG;
7960  unsigned IID = getIntrinsicID(N);
7961  switch (IID) {
7962  default:
7963  break;
7964  case Intrinsic::aarch64_neon_vcvtfxs2fp:
7965  case Intrinsic::aarch64_neon_vcvtfxu2fp:
7966  return tryCombineFixedPointConvert(N, DCI, DAG);
7967  break;
7968  case Intrinsic::aarch64_neon_saddv:
7970  case Intrinsic::aarch64_neon_uaddv:
7972  case Intrinsic::aarch64_neon_sminv:
7974  case Intrinsic::aarch64_neon_uminv:
7976  case Intrinsic::aarch64_neon_smaxv:
7978  case Intrinsic::aarch64_neon_umaxv:
7980  case Intrinsic::aarch64_neon_fmax:
7981  return DAG.getNode(AArch64ISD::FMAX, SDLoc(N), N->getValueType(0),
7982  N->getOperand(1), N->getOperand(2));
7983  case Intrinsic::aarch64_neon_fmin:
7984  return DAG.getNode(AArch64ISD::FMIN, SDLoc(N), N->getValueType(0),
7985  N->getOperand(1), N->getOperand(2));
7986  case Intrinsic::aarch64_neon_smull:
7987  case Intrinsic::aarch64_neon_umull:
7988  case Intrinsic::aarch64_neon_pmull:
7989  case Intrinsic::aarch64_neon_sqdmull:
7990  return tryCombineLongOpWithDup(IID, N, DCI, DAG);
7991  case Intrinsic::aarch64_neon_sqshl:
7992  case Intrinsic::aarch64_neon_uqshl:
7993  case Intrinsic::aarch64_neon_sqshlu:
7994  case Intrinsic::aarch64_neon_srshl:
7995  case Intrinsic::aarch64_neon_urshl:
7996  return tryCombineShiftImm(IID, N, DAG);
7997  case Intrinsic::aarch64_crc32b:
7998  case Intrinsic::aarch64_crc32cb:
7999  return tryCombineCRC32(0xff, N, DAG);
8000  case Intrinsic::aarch64_crc32h:
8001  case Intrinsic::aarch64_crc32ch:
8002  return tryCombineCRC32(0xffff, N, DAG);
8003  }
8004  return SDValue();
8005 }
8006 
8009  SelectionDAG &DAG) {
8010  // If we see something like (zext (sabd (extract_high ...), (DUP ...))) then
8011  // we can convert that DUP into another extract_high (of a bigger DUP), which
8012  // helps the backend to decide that an sabdl2 would be useful, saving a real
8013  // extract_high operation.
8014  if (!DCI.isBeforeLegalizeOps() && N->getOpcode() == ISD::ZERO_EXTEND &&
8016  SDNode *ABDNode = N->getOperand(0).getNode();
8017  unsigned IID = getIntrinsicID(ABDNode);
8018  if (IID == Intrinsic::aarch64_neon_sabd ||
8019  IID == Intrinsic::aarch64_neon_uabd) {
8020  SDValue NewABD = tryCombineLongOpWithDup(IID, ABDNode, DCI, DAG);
8021  if (!NewABD.getNode())
8022  return SDValue();
8023 
8024  return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), N->getValueType(0),
8025  NewABD);
8026  }
8027  }
8028 
8029  // This is effectively a custom type legalization for AArch64.
8030  //
8031  // Type legalization will split an extend of a small, legal, type to a larger
8032  // illegal type by first splitting the destination type, often creating
8033  // illegal source types, which then get legalized in isel-confusing ways,
8034  // leading to really terrible codegen. E.g.,
8035  // %result = v8i32 sext v8i8 %value
8036  // becomes
8037  // %losrc = extract_subreg %value, ...
8038  // %hisrc = extract_subreg %value, ...
8039  // %lo = v4i32 sext v4i8 %losrc
8040  // %hi = v4i32 sext v4i8 %hisrc
8041  // Things go rapidly downhill from there.
8042  //
8043  // For AArch64, the [sz]ext vector instructions can only go up one element
8044  // size, so we can, e.g., extend from i8 to i16, but to go from i8 to i32
8045  // take two instructions.
8046  //
8047  // This implies that the most efficient way to do the extend from v8i8
8048  // to two v4i32 values is to first extend the v8i8 to v8i16, then do
8049  // the normal splitting to happen for the v8i16->v8i32.
8050 
8051  // This is pre-legalization to catch some cases where the default
8052  // type legalization will create ill-tempered code.
8053  if (!DCI.isBeforeLegalizeOps())
8054  return SDValue();
8055 
8056  // We're only interested in cleaning things up for non-legal vector types
8057  // here. If both the source and destination are legal, things will just
8058  // work naturally without any fiddling.
8059  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
8060  EVT ResVT = N->getValueType(0);
8061  if (!ResVT.isVector() || TLI.isTypeLegal(ResVT))
8062  return SDValue();
8063  // If the vector type isn't a simple VT, it's beyond the scope of what
8064  // we're worried about here. Let legalization do its thing and hope for
8065  // the best.
8066  SDValue Src = N->getOperand(0);
8067  EVT SrcVT = Src->getValueType(0);
8068  if (!ResVT.isSimple() || !SrcVT.isSimple())
8069  return SDValue();
8070 
8071  // If the source VT is a 64-bit vector, we can play games and get the
8072  // better results we want.
8073  if (SrcVT.getSizeInBits() != 64)
8074  return SDValue();
8075 
8076  unsigned SrcEltSize = SrcVT.getVectorElementType().getSizeInBits();
8077  unsigned ElementCount = SrcVT.getVectorNumElements();
8078  SrcVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize * 2), ElementCount);
8079  SDLoc DL(N);
8080  Src = DAG.getNode(N->getOpcode(), DL, SrcVT, Src);
8081 
8082  // Now split the rest of the operation into two halves, each with a 64
8083  // bit source.
8084  EVT LoVT, HiVT;
8085  SDValue Lo, Hi;
8086  unsigned NumElements = ResVT.getVectorNumElements();
8087  assert(!(NumElements & 1) && "Splitting vector, but not in half!");
8088  LoVT = HiVT = EVT::getVectorVT(*DAG.getContext(),
8089  ResVT.getVectorElementType(), NumElements / 2);
8090 
8091  EVT InNVT = EVT::getVectorVT(*DAG.getContext(), SrcVT.getVectorElementType(),
8092  LoVT.getVectorNumElements());
8093  Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InNVT, Src,
8094  DAG.getConstant(0, DL, MVT::i64));
8095  Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InNVT, Src,
8096  DAG.getConstant(InNVT.getVectorNumElements(), DL, MVT::i64));
8097  Lo = DAG.getNode(N->getOpcode(), DL, LoVT, Lo);
8098  Hi = DAG.getNode(N->getOpcode(), DL, HiVT, Hi);
8099 
8100  // Now combine the parts back together so we still have a single result
8101  // like the combiner expects.
8102  return DAG.getNode(ISD::CONCAT_VECTORS, DL, ResVT, Lo, Hi);
8103 }
8104 
8105 /// Replace a splat of a scalar to a vector store by scalar stores of the scalar
8106 /// value. The load store optimizer pass will merge them to store pair stores.
8107 /// This has better performance than a splat of the scalar followed by a split
8108 /// vector store. Even if the stores are not merged it is four stores vs a dup,
8109 /// followed by an ext.b and two stores.
8111  SDValue StVal = St->getValue();
8112  EVT VT = StVal.getValueType();
8113 
8114  // Don't replace floating point stores, they possibly won't be transformed to
8115  // stp because of the store pair suppress pass.
8116  if (VT.isFloatingPoint())
8117  return SDValue();
8118 
8119  // Check for insert vector elements.
8120  if (StVal.getOpcode() != ISD::INSERT_VECTOR_ELT)
8121  return SDValue();
8122 
8123  // We can express a splat as store pair(s) for 2 or 4 elements.
8124  unsigned NumVecElts = VT.getVectorNumElements();
8125  if (NumVecElts != 4 && NumVecElts != 2)
8126  return SDValue();
8127  SDValue SplatVal = StVal.getOperand(1);
8128  unsigned RemainInsertElts = NumVecElts - 1;
8129 
8130  // Check that this is a splat.
8131  while (--RemainInsertElts) {
8132  SDValue NextInsertElt = StVal.getOperand(0);
8133  if (NextInsertElt.getOpcode() != ISD::INSERT_VECTOR_ELT)
8134  return SDValue();
8135  if (NextInsertElt.getOperand(1) != SplatVal)
8136  return SDValue();
8137  StVal = NextInsertElt;
8138  }
8139  unsigned OrigAlignment = St->getAlignment();
8140  unsigned EltOffset = NumVecElts == 4 ? 4 : 8;
8141  unsigned Alignment = std::min(OrigAlignment, EltOffset);
8142 
8143  // Create scalar stores. This is at least as good as the code sequence for a
8144  // split unaligned store wich is a dup.s, ext.b, and two stores.
8145  // Most of the time the three stores should be replaced by store pair
8146  // instructions (stp).
8147  SDLoc DL(St);
8148  SDValue BasePtr = St->getBasePtr();
8149  SDValue NewST1 =
8150  DAG.getStore(St->getChain(), DL, SplatVal, BasePtr, St->getPointerInfo(),
8151  St->isVolatile(), St->isNonTemporal(), St->getAlignment());
8152 
8153  unsigned Offset = EltOffset;
8154  while (--NumVecElts) {
8155  SDValue OffsetPtr = DAG.getNode(ISD::ADD, DL, MVT::i64, BasePtr,
8156  DAG.getConstant(Offset, DL, MVT::i64));
8157  NewST1 = DAG.getStore(NewST1.getValue(0), DL, SplatVal, OffsetPtr,
8158  St->getPointerInfo(), St->isVolatile(),
8159  St->isNonTemporal(), Alignment);
8160  Offset += EltOffset;
8161  }
8162  return NewST1;
8163 }
8164 
8167  SelectionDAG &DAG,
8168  const AArch64Subtarget *Subtarget) {
8169  if (!DCI.isBeforeLegalize())
8170  return SDValue();
8171 
8172  StoreSDNode *S = cast<StoreSDNode>(N);
8173  if (S->isVolatile())
8174  return SDValue();
8175 
8176  // Cyclone has bad performance on unaligned 16B stores when crossing line and
8177  // page boundaries. We want to split such stores.
8178  if (!Subtarget->isCyclone())
8179  return SDValue();
8180 
8181  // Don't split at Oz.
8182  MachineFunction &MF = DAG.getMachineFunction();
8183  bool IsMinSize = MF.getFunction()->hasFnAttribute(Attribute::MinSize);
8184  if (IsMinSize)
8185  return SDValue();
8186 
8187  SDValue StVal = S->getValue();
8188  EVT VT = StVal.getValueType();
8189 
8190  // Don't split v2i64 vectors. Memcpy lowering produces those and splitting
8191  // those up regresses performance on micro-benchmarks and olden/bh.
8192  if (!VT.isVector() || VT.getVectorNumElements() < 2 || VT == MVT::v2i64)
8193  return SDValue();
8194 
8195  // Split unaligned 16B stores. They are terrible for performance.
8196  // Don't split stores with alignment of 1 or 2. Code that uses clang vector
8197  // extensions can use this to mark that it does not want splitting to happen
8198  // (by underspecifying alignment to be 1 or 2). Furthermore, the chance of
8199  // eliminating alignment hazards is only 1 in 8 for alignment of 2.
8200  if (VT.getSizeInBits() != 128 || S->getAlignment() >= 16 ||
8201  S->getAlignment() <= 2)
8202  return SDValue();
8203 
8204  // If we get a splat of a scalar convert this vector store to a store of
8205  // scalars. They will be merged into store pairs thereby removing two
8206  // instructions.
8207  SDValue ReplacedSplat = replaceSplatVectorStore(DAG, S);
8208  if (ReplacedSplat != SDValue())
8209  return ReplacedSplat;
8210 
8211  SDLoc DL(S);
8212  unsigned NumElts = VT.getVectorNumElements() / 2;
8213  // Split VT into two.
8214  EVT HalfVT =
8215  EVT::getVectorVT(*DAG.getContext(), VT.getVectorElementType(), NumElts);
8216  SDValue SubVector0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, StVal,
8217  DAG.getConstant(0, DL, MVT::i64));
8218  SDValue SubVector1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, StVal,
8219  DAG.getConstant(NumElts, DL, MVT::i64));
8220  SDValue BasePtr = S->getBasePtr();
8221  SDValue NewST1 =
8222  DAG.getStore(S->getChain(), DL, SubVector0, BasePtr, S->getPointerInfo(),
8223  S->isVolatile(), S->isNonTemporal(), S->getAlignment());
8224  SDValue OffsetPtr = DAG.getNode(ISD::ADD, DL, MVT::i64, BasePtr,
8225  DAG.getConstant(8, DL, MVT::i64));
8226  return DAG.getStore(NewST1.getValue(0), DL, SubVector1, OffsetPtr,
8227  S->getPointerInfo(), S->isVolatile(), S->isNonTemporal(),
8228  S->getAlignment());
8229 }
8230 
8231 /// Target-specific DAG combine function for post-increment LD1 (lane) and
8232 /// post-increment LD1R.
8235  bool IsLaneOp) {
8236  if (DCI.isBeforeLegalizeOps())
8237  return SDValue();
8238 
8239  SelectionDAG &DAG = DCI.DAG;
8240  EVT VT = N->getValueType(0);
8241 
8242  unsigned LoadIdx = IsLaneOp ? 1 : 0;
8243  SDNode *LD = N->getOperand(LoadIdx).getNode();
8244  // If it is not LOAD, can not do such combine.
8245  if (LD->getOpcode() != ISD::LOAD)
8246  return SDValue();
8247 
8248  LoadSDNode *LoadSDN = cast<LoadSDNode>(LD);
8249  EVT MemVT = LoadSDN->getMemoryVT();
8250  // Check if memory operand is the same type as the vector element.
8251  if (MemVT != VT.getVectorElementType())
8252  return SDValue();
8253 
8254  // Check if there are other uses. If so, do not combine as it will introduce
8255  // an extra load.
8256  for (SDNode::use_iterator UI = LD->use_begin(), UE = LD->use_end(); UI != UE;
8257  ++UI) {
8258  if (UI.getUse().getResNo() == 1) // Ignore uses of the chain result.
8259  continue;
8260  if (*UI != N)
8261  return SDValue();
8262  }
8263 
8264  SDValue Addr = LD->getOperand(1);
8265  SDValue Vector = N->getOperand(0);
8266  // Search for a use of the address operand that is an increment.
8267  for (SDNode::use_iterator UI = Addr.getNode()->use_begin(), UE =
8268  Addr.getNode()->use_end(); UI != UE; ++UI) {
8269  SDNode *User = *UI;
8270  if (User->getOpcode() != ISD::ADD
8271  || UI.getUse().getResNo() != Addr.getResNo())
8272  continue;
8273 
8274  // Check that the add is independent of the load. Otherwise, folding it
8275  // would create a cycle.
8276  if (User->isPredecessorOf(LD) || LD->isPredecessorOf(User))
8277  continue;
8278  // Also check that add is not used in the vector operand. This would also
8279  // create a cycle.
8280  if (User->isPredecessorOf(Vector.getNode()))
8281  continue;
8282 
8283  // If the increment is a constant, it must match the memory ref size.
8284  SDValue Inc = User->getOperand(User->getOperand(0) == Addr ? 1 : 0);
8285  if (ConstantSDNode *CInc = dyn_cast<ConstantSDNode>(Inc.getNode())) {
8286  uint32_t IncVal = CInc->getZExtValue();
8287  unsigned NumBytes = VT.getScalarSizeInBits() / 8;
8288  if (IncVal != NumBytes)
8289  continue;
8290  Inc = DAG.getRegister(AArch64::XZR, MVT::i64);
8291  }
8292 
8293  // Finally, check that the vector doesn't depend on the load.
8294  // Again, this would create a cycle.
8295  // The load depending on the vector is fine, as that's the case for the
8296  // LD1*post we'll eventually generate anyway.
8297  if (LoadSDN->isPredecessorOf(Vector.getNode()))
8298  continue;
8299 
8301  Ops.push_back(LD->getOperand(0)); // Chain
8302  if (IsLaneOp) {
8303  Ops.push_back(Vector); // The vector to be inserted
8304  Ops.push_back(N->getOperand(2)); // The lane to be inserted in the vector
8305  }
8306  Ops.push_back(Addr);
8307  Ops.push_back(Inc);
8308 
8309  EVT Tys[3] = { VT, MVT::i64, MVT::Other };
8310  SDVTList SDTys = DAG.getVTList(Tys);
8311  unsigned NewOp = IsLaneOp ? AArch64ISD::LD1LANEpost : AArch64ISD::LD1DUPpost;
8312  SDValue UpdN = DAG.getMemIntrinsicNode(NewOp, SDLoc(N), SDTys, Ops,
8313  MemVT,
8314  LoadSDN->getMemOperand());
8315 
8316  // Update the uses.
8317  SmallVector<SDValue, 2> NewResults;
8318  NewResults.push_back(SDValue(LD, 0)); // The result of load
8319  NewResults.push_back(SDValue(UpdN.getNode(), 2)); // Chain
8320  DCI.CombineTo(LD, NewResults);
8321  DCI.CombineTo(N, SDValue(UpdN.getNode(), 0)); // Dup/Inserted Result
8322  DCI.CombineTo(User, SDValue(UpdN.getNode(), 1)); // Write back register
8323 
8324  break;
8325  }
8326  return SDValue();
8327 }
8328 
8329 /// Target-specific DAG combine function for NEON load/store intrinsics
8330 /// to merge base address updates.
8333  SelectionDAG &DAG) {
8334  if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())
8335  return SDValue();
8336 
8337  unsigned AddrOpIdx = N->getNumOperands() - 1;
8338  SDValue Addr = N->getOperand(AddrOpIdx);
8339 
8340  // Search for a use of the address operand that is an increment.
8341  for (SDNode::use_iterator UI = Addr.getNode()->use_begin(),
8342  UE = Addr.getNode()->use_end(); UI != UE; ++UI) {
8343  SDNode *User = *UI;
8344  if (User->getOpcode() != ISD::ADD ||
8345  UI.getUse().getResNo() != Addr.getResNo())
8346  continue;
8347 
8348  // Check that the add is independent of the load/store. Otherwise, folding
8349  // it would create a cycle.
8350  if (User->isPredecessorOf(N) || N->isPredecessorOf(User))
8351  continue;
8352 
8353  // Find the new opcode for the updating load/store.
8354  bool IsStore = false;
8355  bool IsLaneOp = false;
8356  bool IsDupOp = false;
8357  unsigned NewOpc = 0;
8358  unsigned NumVecs = 0;
8359  unsigned IntNo = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
8360  switch (IntNo) {
8361  default: llvm_unreachable("unexpected intrinsic for Neon base update");
8362  case Intrinsic::aarch64_neon_ld2: NewOpc = AArch64ISD::LD2post;
8363  NumVecs = 2; break;
8364  case Intrinsic::aarch64_neon_ld3: NewOpc = AArch64ISD::LD3post;
8365  NumVecs = 3; break;
8366  case Intrinsic::aarch64_neon_ld4: NewOpc = AArch64ISD::LD4post;
8367  NumVecs = 4; break;
8368  case Intrinsic::aarch64_neon_st2: NewOpc = AArch64ISD::ST2post;
8369  NumVecs = 2; IsStore = true; break;
8370  case Intrinsic::aarch64_neon_st3: NewOpc = AArch64ISD::ST3post;
8371  NumVecs = 3; IsStore = true; break;
8372  case Intrinsic::aarch64_neon_st4: NewOpc = AArch64ISD::ST4post;
8373  NumVecs = 4; IsStore = true; break;
8374  case Intrinsic::aarch64_neon_ld1x2: NewOpc = AArch64ISD::LD1x2post;
8375  NumVecs = 2; break;
8376  case Intrinsic::aarch64_neon_ld1x3: NewOpc = AArch64ISD::LD1x3post;
8377  NumVecs = 3; break;
8378  case Intrinsic::aarch64_neon_ld1x4: NewOpc = AArch64ISD::LD1x4post;
8379  NumVecs = 4; break;
8380  case Intrinsic::aarch64_neon_st1x2: NewOpc = AArch64ISD::ST1x2post;
8381  NumVecs = 2; IsStore = true; break;
8382  case Intrinsic::aarch64_neon_st1x3: NewOpc = AArch64ISD::ST1x3post;
8383  NumVecs = 3; IsStore = true; break;
8384  case Intrinsic::aarch64_neon_st1x4: NewOpc = AArch64ISD::ST1x4post;
8385  NumVecs = 4; IsStore = true; break;
8386  case Intrinsic::aarch64_neon_ld2r: NewOpc = AArch64ISD::LD2DUPpost;
8387  NumVecs = 2; IsDupOp = true; break;
8388  case Intrinsic::aarch64_neon_ld3r: NewOpc = AArch64ISD::LD3DUPpost;
8389  NumVecs = 3; IsDupOp = true; break;
8390  case Intrinsic::aarch64_neon_ld4r: NewOpc = AArch64ISD::LD4DUPpost;
8391  NumVecs = 4; IsDupOp = true; break;
8392  case Intrinsic::aarch64_neon_ld2lane: NewOpc = AArch64ISD::LD2LANEpost;
8393  NumVecs = 2; IsLaneOp = true; break;
8394  case Intrinsic::aarch64_neon_ld3lane: NewOpc = AArch64ISD::LD3LANEpost;
8395  NumVecs = 3; IsLaneOp = true; break;
8396  case Intrinsic::aarch64_neon_ld4lane: NewOpc = AArch64ISD::LD4LANEpost;
8397  NumVecs = 4; IsLaneOp = true; break;
8398  case Intrinsic::aarch64_neon_st2lane: NewOpc = AArch64ISD::ST2LANEpost;
8399  NumVecs = 2; IsStore = true; IsLaneOp = true; break;
8400  case Intrinsic::aarch64_neon_st3lane: NewOpc = AArch64ISD::ST3LANEpost;
8401  NumVecs = 3; IsStore = true; IsLaneOp = true; break;
8402  case Intrinsic::aarch64_neon_st4lane: NewOpc = AArch64ISD::ST4LANEpost;
8403  NumVecs = 4; IsStore = true; IsLaneOp = true; break;
8404  }
8405 
8406  EVT VecTy;
8407  if (IsStore)
8408  VecTy = N->getOperand(2).getValueType();
8409  else
8410  VecTy = N->getValueType(0);
8411 
8412  // If the increment is a constant, it must match the memory ref size.
8413  SDValue Inc = User->getOperand(User->getOperand(0) == Addr ? 1 : 0);
8414  if (ConstantSDNode *CInc = dyn_cast<ConstantSDNode>(Inc.getNode())) {
8415  uint32_t IncVal = CInc->getZExtValue();
8416  unsigned NumBytes = NumVecs * VecTy.getSizeInBits() / 8;
8417  if (IsLaneOp || IsDupOp)
8418  NumBytes /= VecTy.getVectorNumElements();
8419  if (IncVal != NumBytes)
8420  continue;
8421  Inc = DAG.getRegister(AArch64::XZR, MVT::i64);
8422  }
8424  Ops.push_back(N->getOperand(0)); // Incoming chain
8425  // Load lane and store have vector list as input.
8426  if (IsLaneOp || IsStore)
8427  for (unsigned i = 2; i < AddrOpIdx; ++i)
8428  Ops.push_back(N->getOperand(i));
8429  Ops.push_back(Addr); // Base register
8430  Ops.push_back(Inc);
8431 
8432  // Return Types.
8433  EVT Tys[6];
8434  unsigned NumResultVecs = (IsStore ? 0 : NumVecs);
8435  unsigned n;
8436  for (n = 0; n < NumResultVecs; ++n)
8437  Tys[n] = VecTy;
8438  Tys[n++] = MVT::i64; // Type of write back register
8439  Tys[n] = MVT::Other; // Type of the chain
8440  SDVTList SDTys = DAG.getVTList(makeArrayRef(Tys, NumResultVecs + 2));
8441 
8442  MemIntrinsicSDNode *MemInt = cast<MemIntrinsicSDNode>(N);
8443  SDValue UpdN = DAG.getMemIntrinsicNode(NewOpc, SDLoc(N), SDTys, Ops,
8444  MemInt->getMemoryVT(),
8445  MemInt->getMemOperand());
8446 
8447  // Update the uses.
8448  std::vector<SDValue> NewResults;
8449  for (unsigned i = 0; i < NumResultVecs; ++i) {
8450  NewResults.push_back(SDValue(UpdN.getNode(), i));
8451  }
8452  NewResults.push_back(SDValue(UpdN.getNode(), NumResultVecs + 1));
8453  DCI.CombineTo(N, NewResults);
8454  DCI.CombineTo(User, SDValue(UpdN.getNode(), NumResultVecs));
8455 
8456  break;
8457  }
8458  return SDValue();
8459 }
8460 
8461 // Checks to see if the value is the prescribed width and returns information
8462 // about its extension mode.
8463 static
8464 bool checkValueWidth(SDValue V, unsigned width, ISD::LoadExtType &ExtType) {
8465  ExtType = ISD::NON_EXTLOAD;
8466  switch(V.getNode()->getOpcode()) {
8467  default:
8468  return false;
8469  case ISD::LOAD: {
8470  LoadSDNode *LoadNode = cast<LoadSDNode>(V.getNode());
8471  if ((LoadNode->getMemoryVT() == MVT::i8 && width == 8)
8472  || (LoadNode->getMemoryVT() == MVT::i16 && width == 16)) {
8473  ExtType = LoadNode->getExtensionType();
8474  return true;
8475  }
8476  return false;
8477  }
8478  case ISD::AssertSext: {
8479  VTSDNode *TypeNode = cast<VTSDNode>(V.getNode()->getOperand(1));
8480  if ((TypeNode->getVT() == MVT::i8 && width == 8)
8481  || (TypeNode->getVT() == MVT::i16 && width == 16)) {
8482  ExtType = ISD::SEXTLOAD;
8483  return true;
8484  }
8485  return false;
8486  }
8487  case ISD::AssertZext: {
8488  VTSDNode *TypeNode = cast<VTSDNode>(V.getNode()->getOperand(1));
8489  if ((TypeNode->getVT() == MVT::i8 && width == 8)
8490  || (TypeNode->getVT() == MVT::i16 && width == 16)) {
8491  ExtType = ISD::ZEXTLOAD;
8492  return true;
8493  }
8494  return false;
8495  }
8496  case ISD::Constant:
8497  case ISD::TargetConstant: {
8498  if (std::abs(cast<ConstantSDNode>(V.getNode())->getSExtValue()) <
8499  1LL << (width - 1))
8500  return true;
8501  return false;
8502  }
8503  }
8504 
8505  return true;
8506 }
8507 
8508 // This function does a whole lot of voodoo to determine if the tests are
8509 // equivalent without and with a mask. Essentially what happens is that given a
8510 // DAG resembling:
8511 //
8512 // +-------------+ +-------------+ +-------------+ +-------------+
8513 // | Input | | AddConstant | | CompConstant| | CC |
8514 // +-------------+ +-------------+ +-------------+ +-------------+
8515 // | | | |
8516 // V V | +----------+
8517 // +-------------+ +----+ | |
8518 // | ADD | |0xff| | |
8519 // +-------------+ +----+ | |
8520 // | | | |
8521 // V V | |
8522 // +-------------+ | |
8523 // | AND | | |
8524 // +-------------+ | |
8525 // | | |
8526 // +-----+ | |
8527 // | | |
8528 // V V V
8529 // +-------------+
8530 // | CMP |
8531 // +-------------+
8532 //
8533 // The AND node may be safely removed for some combinations of inputs. In
8534 // particular we need to take into account the extension type of the Input,
8535 // the exact values of AddConstant, CompConstant, and CC, along with the nominal
8536 // width of the input (this can work for any width inputs, the above graph is
8537 // specific to 8 bits.
8538 //
8539 // The specific equations were worked out by generating output tables for each
8540 // AArch64CC value in terms of and AddConstant (w1), CompConstant(w2). The
8541 // problem was simplified by working with 4 bit inputs, which means we only
8542 // needed to reason about 24 distinct bit patterns: 8 patterns unique to zero
8543 // extension (8,15), 8 patterns unique to sign extensions (-8,-1), and 8
8544 // patterns present in both extensions (0,7). For every distinct set of
8545 // AddConstant and CompConstants bit patterns we can consider the masked and
8546 // unmasked versions to be equivalent if the result of this function is true for
8547 // all 16 distinct bit patterns of for the current extension type of Input (w0).
8548 //
8549 // sub w8, w0, w1
8550 // and w10, w8, #0x0f
8551 // cmp w8, w2
8552 // cset w9, AArch64CC
8553 // cmp w10, w2
8554 // cset w11, AArch64CC
8555 // cmp w9, w11
8556 // cset w0, eq
8557 // ret
8558 //
8559 // Since the above function shows when the outputs are equivalent it defines
8560 // when it is safe to remove the AND. Unfortunately it only runs on AArch64 and
8561 // would be expensive to run during compiles. The equations below were written
8562 // in a test harness that confirmed they gave equivalent outputs to the above
8563 // for all inputs function, so they can be used determine if the removal is
8564 // legal instead.
8565 //
8566 // isEquivalentMaskless() is the code for testing if the AND can be removed
8567 // factored out of the DAG recognition as the DAG can take several forms.
8568 
8569 static
8570 bool isEquivalentMaskless(unsigned CC, unsigned width,
8571  ISD::LoadExtType ExtType, signed AddConstant,
8572  signed CompConstant) {
8573  // By being careful about our equations and only writing the in term
8574  // symbolic values and well known constants (0, 1, -1, MaxUInt) we can
8575  // make them generally applicable to all bit widths.
8576  signed MaxUInt = (1 << width);
8577 
8578  // For the purposes of these comparisons sign extending the type is
8579  // equivalent to zero extending the add and displacing it by half the integer
8580  // width. Provided we are careful and make sure our equations are valid over
8581  // the whole range we can just adjust the input and avoid writing equations
8582  // for sign extended inputs.
8583  if (ExtType == ISD::SEXTLOAD)
8584  AddConstant -= (1 << (width-1));
8585 
8586  switch(CC) {
8587  case AArch64CC::LE:
8588  case AArch64CC::GT: {
8589  if ((AddConstant == 0) ||
8590  (CompConstant == MaxUInt - 1 && AddConstant < 0) ||
8591  (AddConstant >= 0 && CompConstant < 0) ||
8592  (AddConstant <= 0 && CompConstant <= 0 && CompConstant < AddConstant))
8593  return true;
8594  } break;
8595  case AArch64CC::LT:
8596  case AArch64CC::GE: {
8597  if ((AddConstant == 0) ||
8598  (AddConstant >= 0 && CompConstant <= 0) ||
8599  (AddConstant <= 0 && CompConstant <= 0 && CompConstant <= AddConstant))
8600  return true;
8601  } break;
8602  case AArch64CC::HI:
8603  case AArch64CC::LS: {
8604  if ((AddConstant >= 0 && CompConstant < 0) ||
8605  (AddConstant <= 0 && CompConstant >= -1 &&
8606  CompConstant < AddConstant + MaxUInt))
8607  return true;
8608  } break;
8609  case AArch64CC::PL:
8610  case AArch64CC::MI: {
8611  if ((AddConstant == 0) ||
8612  (AddConstant > 0 && CompConstant <= 0) ||
8613  (AddConstant < 0 && CompConstant <= AddConstant))
8614  return true;
8615  } break;
8616  case AArch64CC::LO:
8617  case AArch64CC::HS: {
8618  if ((AddConstant >= 0 && CompConstant <= 0) ||
8619  (AddConstant <= 0 && CompConstant >= 0 &&
8620  CompConstant <= AddConstant + MaxUInt))
8621  return true;
8622  } break;
8623  case AArch64CC::EQ:
8624  case AArch64CC::NE: {
8625  if ((AddConstant > 0 && CompConstant < 0) ||
8626  (AddConstant < 0 && CompConstant >= 0 &&
8627  CompConstant < AddConstant + MaxUInt) ||
8628  (AddConstant >= 0 && CompConstant >= 0 &&
8629  CompConstant >= AddConstant) ||
8630  (AddConstant <= 0 && CompConstant < 0 && CompConstant < AddConstant))
8631 
8632  return true;
8633  } break;
8634  case AArch64CC::VS:
8635  case AArch64CC::VC:
8636  case AArch64CC::AL:
8637  case AArch64CC::NV:
8638  return true;
8639  case AArch64CC::Invalid:
8640  break;
8641  }
8642 
8643  return false;
8644 }
8645 
8646 static
8649  SelectionDAG &DAG, unsigned CCIndex,
8650  unsigned CmpIndex) {
8651  unsigned CC = cast<ConstantSDNode>(N->getOperand(CCIndex))->getSExtValue();
8652  SDNode *SubsNode = N->getOperand(CmpIndex).getNode();
8653  unsigned CondOpcode = SubsNode->getOpcode();
8654 
8655  if (CondOpcode != AArch64ISD::SUBS)
8656  return SDValue();
8657 
8658  // There is a SUBS feeding this condition. Is it fed by a mask we can
8659  // use?
8660 
8661  SDNode *AndNode = SubsNode->getOperand(0).getNode();
8662  unsigned MaskBits = 0;
8663 
8664  if (AndNode->getOpcode() != ISD::AND)
8665  return SDValue();
8666 
8667  if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(AndNode->getOperand(1))) {
8668  uint32_t CNV = CN->getZExtValue();
8669  if (CNV == 255)
8670  MaskBits = 8;
8671  else if (CNV == 65535)
8672  MaskBits = 16;
8673  }
8674 
8675  if (!MaskBits)
8676  return SDValue();
8677 
8678  SDValue AddValue = AndNode->getOperand(0);
8679 
8680  if (AddValue.getOpcode() != ISD::ADD)
8681  return SDValue();
8682 
8683  // The basic dag structure is correct, grab the inputs and validate them.
8684 
8685  SDValue AddInputValue1 = AddValue.getNode()->getOperand(0);
8686  SDValue AddInputValue2 = AddValue.getNode()->getOperand(1);
8687  SDValue SubsInputValue = SubsNode->getOperand(1);
8688 
8689  // The mask is present and the provenance of all the values is a smaller type,
8690  // lets see if the mask is superfluous.
8691 
8692  if (!isa<ConstantSDNode>(AddInputValue2.getNode()) ||
8693  !isa<ConstantSDNode>(SubsInputValue.getNode()))
8694  return SDValue();
8695 
8696  ISD::LoadExtType ExtType;
8697 
8698  if (!checkValueWidth(SubsInputValue, MaskBits, ExtType) ||
8699  !checkValueWidth(AddInputValue2, MaskBits, ExtType) ||
8700  !checkValueWidth(AddInputValue1, MaskBits, ExtType) )
8701  return SDValue();
8702 
8703  if(!isEquivalentMaskless(CC, MaskBits, ExtType,
8704  cast<ConstantSDNode>(AddInputValue2.getNode())->getSExtValue(),
8705  cast<ConstantSDNode>(SubsInputValue.getNode())->getSExtValue()))
8706  return SDValue();
8707 
8708  // The AND is not necessary, remove it.
8709 
8710  SDVTList VTs = DAG.getVTList(SubsNode->getValueType(0),
8711  SubsNode->getValueType(1));
8712  SDValue Ops[] = { AddValue, SubsNode->getOperand(1) };
8713 
8714  SDValue NewValue = DAG.getNode(CondOpcode, SDLoc(SubsNode), VTs, Ops);
8715  DAG.ReplaceAllUsesWith(SubsNode, NewValue.getNode());
8716 
8717  return SDValue(N, 0);
8718 }
8719 
8720 // Optimize compare with zero and branch.
8723  SelectionDAG &DAG) {
8724  SDValue NV = performCONDCombine(N, DCI, DAG, 2, 3);
8725  if (NV.getNode())
8726  N = NV.getNode();
8727  SDValue Chain = N->getOperand(0);
8728  SDValue Dest = N->getOperand(1);
8729  SDValue CCVal = N->getOperand(2);
8730  SDValue Cmp = N->getOperand(3);
8731 
8732  assert(isa<ConstantSDNode>(CCVal) && "Expected a ConstantSDNode here!");
8733  unsigned CC = cast<ConstantSDNode>(CCVal)->getZExtValue();
8734  if (CC != AArch64CC::EQ && CC != AArch64CC::NE)
8735  return SDValue();
8736 
8737  unsigned CmpOpc = Cmp.getOpcode();
8738  if (CmpOpc != AArch64ISD::ADDS && CmpOpc != AArch64ISD::SUBS)
8739  return SDValue();
8740 
8741  // Only attempt folding if there is only one use of the flag and no use of the
8742  // value.
8743  if (!Cmp->hasNUsesOfValue(0, 0) || !Cmp->hasNUsesOfValue(1, 1))
8744  return SDValue();
8745 
8746  SDValue LHS = Cmp.getOperand(0);
8747  SDValue RHS = Cmp.getOperand(1);
8748 
8749  assert(LHS.getValueType() == RHS.getValueType() &&
8750  "Expected the value type to be the same for both operands!");
8751  if (LHS.getValueType() != MVT::i32 && LHS.getValueType() != MVT::i64)
8752  return SDValue();
8753 
8754  if (isa<ConstantSDNode>(LHS) && cast<ConstantSDNode>(LHS)->isNullValue())
8755  std::swap(LHS, RHS);
8756 
8757  if (!isa<ConstantSDNode>(RHS) || !cast<ConstantSDNode>(RHS)->isNullValue())
8758  return SDValue();
8759 
8760  if (LHS.getOpcode() == ISD::SHL || LHS.getOpcode() == ISD::SRA ||
8761  LHS.getOpcode() == ISD::SRL)
8762  return SDValue();
8763 
8764  // Fold the compare into the branch instruction.
8765  SDValue BR;
8766  if (CC == AArch64CC::EQ)
8767  BR = DAG.getNode(AArch64ISD::CBZ, SDLoc(N), MVT::Other, Chain, LHS, Dest);
8768  else
8769  BR = DAG.getNode(AArch64ISD::CBNZ, SDLoc(N), MVT::Other, Chain, LHS, Dest);
8770 
8771  // Do not add new nodes to DAG combiner worklist.
8772  DCI.CombineTo(N, BR, false);
8773 
8774  return SDValue();
8775 }
8776 
8777 // vselect (v1i1 setcc) ->
8778 // vselect (v1iXX setcc) (XX is the size of the compared operand type)
8779 // FIXME: Currently the type legalizer can't handle VSELECT having v1i1 as
8780 // condition. If it can legalize "VSELECT v1i1" correctly, no need to combine
8781 // such VSELECT.
8783  SDValue N0 = N->getOperand(0);
8784  EVT CCVT = N0.getValueType();
8785 
8786  if (N0.getOpcode() != ISD::SETCC || CCVT.getVectorNumElements() != 1 ||
8787  CCVT.getVectorElementType() != MVT::i1)
8788  return SDValue();
8789 
8790  EVT ResVT = N->getValueType(0);
8791  EVT CmpVT = N0.getOperand(0).getValueType();
8792  // Only combine when the result type is of the same size as the compared
8793  // operands.
8794  if (ResVT.getSizeInBits() != CmpVT.getSizeInBits())
8795  return SDValue();
8796 
8797  SDValue IfTrue = N->getOperand(1);
8798  SDValue IfFalse = N->getOperand(2);
8799  SDValue SetCC =
8801  N0.getOperand(0), N0.getOperand(1),
8802  cast<CondCodeSDNode>(N0.getOperand(2))->get());
8803  return DAG.getNode(ISD::VSELECT, SDLoc(N), ResVT, SetCC,
8804  IfTrue, IfFalse);
8805 }
8806 
8807 /// A vector select: "(select vL, vR, (setcc LHS, RHS))" is best performed with
8808 /// the compare-mask instructions rather than going via NZCV, even if LHS and
8809 /// RHS are really scalar. This replaces any scalar setcc in the above pattern
8810 /// with a vector one followed by a DUP shuffle on the result.
8813  SelectionDAG &DAG = DCI.DAG;
8814  SDValue N0 = N->getOperand(0);
8815  EVT ResVT = N->getValueType(0);
8816 
8817  if (N0.getOpcode() != ISD::SETCC)
8818  return SDValue();
8819 
8820  // Make sure the SETCC result is either i1 (initial DAG), or i32, the lowered
8821  // scalar SetCCResultType. We also don't expect vectors, because we assume
8822  // that selects fed by vector SETCCs are canonicalized to VSELECT.
8823  assert((N0.getValueType() == MVT::i1 || N0.getValueType() == MVT::i32) &&
8824  "Scalar-SETCC feeding SELECT has unexpected result type!");
8825 
8826  // If NumMaskElts == 0, the comparison is larger than select result. The
8827  // largest real NEON comparison is 64-bits per lane, which means the result is
8828  // at most 32-bits and an illegal vector. Just bail out for now.
8829  EVT SrcVT = N0.getOperand(0).getValueType();
8830 
8831  // Don't try to do this optimization when the setcc itself has i1 operands.
8832  // There are no legal vectors of i1, so this would be pointless.
8833  if (SrcVT == MVT::i1)
8834  return SDValue();
8835 
8836  int NumMaskElts = ResVT.getSizeInBits() / SrcVT.getSizeInBits();
8837  if (!ResVT.isVector() || NumMaskElts == 0)
8838  return SDValue();
8839 
8840  SrcVT = EVT::getVectorVT(*DAG.getContext(), SrcVT, NumMaskElts);
8841  EVT CCVT = SrcVT.changeVectorElementTypeToInteger();
8842 
8843  // Also bail out if the vector CCVT isn't the same size as ResVT.
8844  // This can happen if the SETCC operand size doesn't divide the ResVT size
8845  // (e.g., f64 vs v3f32).
8846  if (CCVT.getSizeInBits() != ResVT.getSizeInBits())
8847  return SDValue();
8848 
8849  // Make sure we didn't create illegal types, if we're not supposed to.
8850  assert(DCI.isBeforeLegalize() ||
8851  DAG.getTargetLoweringInfo().isTypeLegal(SrcVT));
8852 
8853  // First perform a vector comparison, where lane 0 is the one we're interested
8854  // in.
8855  SDLoc DL(N0);
8856  SDValue LHS =
8857  DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, SrcVT, N0.getOperand(0));
8858  SDValue RHS =
8859  DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, SrcVT, N0.getOperand(1));
8860  SDValue SetCC = DAG.getNode(ISD::SETCC, DL, CCVT, LHS, RHS, N0.getOperand(2));
8861 
8862  // Now duplicate the comparison mask we want across all other lanes.
8863  SmallVector<int, 8> DUPMask(CCVT.getVectorNumElements(), 0);
8864  SDValue Mask = DAG.getVectorShuffle(CCVT, DL, SetCC, SetCC, DUPMask.data());
8865  Mask = DAG.getNode(ISD::BITCAST, DL,
8866  ResVT.changeVectorElementTypeToInteger(), Mask);
8867 
8868  return DAG.getSelect(DL, ResVT, Mask, N->getOperand(1), N->getOperand(2));
8869 }
8870 
8871 /// performSelectCCCombine - Target-specific DAG combining for ISD::SELECT_CC
8872 /// to match FMIN/FMAX patterns.
8874  // Try to use FMIN/FMAX instructions for FP selects like "x < y ? x : y".
8875  // Unless the NoNaNsFPMath option is set, be careful about NaNs:
8876  // vmax/vmin return NaN if either operand is a NaN;
8877  // only do the transformation when it matches that behavior.
8878 
8879  SDValue CondLHS = N->getOperand(0);
8880  SDValue CondRHS = N->getOperand(1);
8881  SDValue LHS = N->getOperand(2);
8882  SDValue RHS = N->getOperand(3);
8883  ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(4))->get();
8884 
8885  unsigned Opcode;
8886  bool IsReversed;
8887  if (selectCCOpsAreFMaxCompatible(CondLHS, LHS) &&
8888  selectCCOpsAreFMaxCompatible(CondRHS, RHS)) {
8889  IsReversed = false; // x CC y ? x : y
8890  } else if (selectCCOpsAreFMaxCompatible(CondRHS, LHS) &&
8891  selectCCOpsAreFMaxCompatible(CondLHS, RHS)) {
8892  IsReversed = true ; // x CC y ? y : x
8893  } else {
8894  return SDValue();
8895  }
8896 
8897  bool IsUnordered = false, IsOrEqual;
8898  switch (CC) {
8899  default:
8900  return SDValue();
8901  case ISD::SETULT:
8902  case ISD::SETULE:
8903  IsUnordered = true;
8904  case ISD::SETOLT:
8905  case ISD::SETOLE:
8906  case ISD::SETLT:
8907  case ISD::SETLE:
8908  IsOrEqual = (CC == ISD::SETLE || CC == ISD::SETOLE || CC == ISD::SETULE);
8909  Opcode = IsReversed ? AArch64ISD::FMAX : AArch64ISD::FMIN;
8910  break;
8911 
8912  case ISD::SETUGT:
8913  case ISD::SETUGE:
8914  IsUnordered = true;
8915  case ISD::SETOGT:
8916  case ISD::SETOGE:
8917  case ISD::SETGT:
8918  case ISD::SETGE:
8919  IsOrEqual = (CC == ISD::SETGE || CC == ISD::SETOGE || CC == ISD::SETUGE);
8920  Opcode = IsReversed ? AArch64ISD::FMIN : AArch64ISD::FMAX;
8921  break;
8922  }
8923 
8924  // If LHS is NaN, an ordered comparison will be false and the result will be
8925  // the RHS, but FMIN(NaN, RHS) = FMAX(NaN, RHS) = NaN. Avoid this by checking
8926  // that LHS != NaN. Likewise, for unordered comparisons, check for RHS != NaN.
8927  if (!DAG.isKnownNeverNaN(IsUnordered ? RHS : LHS))
8928  return SDValue();
8929 
8930  // For xxx-or-equal comparisons, "+0 <= -0" and "-0 >= +0" will both be true,
8931  // but FMIN will return -0, and FMAX will return +0. So FMIN/FMAX can only be
8932  // used for unsafe math or if one of the operands is known to be nonzero.
8933  if (IsOrEqual && !DAG.getTarget().Options.UnsafeFPMath &&
8934  !(DAG.isKnownNeverZero(LHS) || DAG.isKnownNeverZero(RHS)))
8935  return SDValue();
8936 
8937  return DAG.getNode(Opcode, SDLoc(N), N->getValueType(0), LHS, RHS);
8938 }
8939 
8940 /// Get rid of unnecessary NVCASTs (that don't change the type).
8942  if (N->getValueType(0) == N->getOperand(0).getValueType())
8943  return N->getOperand(0);
8944 
8945  return SDValue();
8946 }
8947 
8949  DAGCombinerInfo &DCI) const {
8950  SelectionDAG &DAG = DCI.DAG;
8951  switch (N->getOpcode()) {
8952  default:
8953  break;
8954  case ISD::ADD:
8955  case ISD::SUB:
8956  return performAddSubLongCombine(N, DCI, DAG);
8957  case ISD::XOR:
8958  return performXorCombine(N, DAG, DCI, Subtarget);
8959  case ISD::MUL:
8960  return performMulCombine(N, DAG, DCI, Subtarget);
8961  case ISD::SINT_TO_FP:
8962  case ISD::UINT_TO_FP:
8963  return performIntToFpCombine(N, DAG, Subtarget);
8964  case ISD::OR:
8965  return performORCombine(N, DCI, Subtarget);
8967  return performIntrinsicCombine(N, DCI, Subtarget);
8968  case ISD::ANY_EXTEND:
8969  case ISD::ZERO_EXTEND:
8970  case ISD::SIGN_EXTEND:
8971  return performExtendCombine(N, DCI, DAG);
8972  case ISD::BITCAST:
8973  return performBitcastCombine(N, DCI, DAG);
8974  case ISD::CONCAT_VECTORS:
8975  return performConcatVectorsCombine(N, DCI, DAG);
8976  case ISD::SELECT:
8977  return performSelectCombine(N, DCI);
8978  case ISD::VSELECT:
8979  return performVSelectCombine(N, DCI.DAG);
8980  case ISD::SELECT_CC:
8981  return performSelectCCCombine(N, DCI.DAG);
8982  case ISD::STORE:
8983  return performSTORECombine(N, DCI, DAG, Subtarget);
8984  case AArch64ISD::BRCOND:
8985  return performBRCONDCombine(N, DCI, DAG);
8986  case AArch64ISD::CSEL:
8987  return performCONDCombine(N, DCI, DAG, 2, 3);
8988  case AArch64ISD::DUP:
8989  return performPostLD1Combine(N, DCI, false);
8990  case AArch64ISD::NVCAST:
8991  return performNVCASTCombine(N);
8993  return performPostLD1Combine(N, DCI, true);
8994  case ISD::INTRINSIC_VOID:
8996  switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
8997  case Intrinsic::aarch64_neon_ld2:
8998  case Intrinsic::aarch64_neon_ld3:
8999  case Intrinsic::aarch64_neon_ld4:
9000  case Intrinsic::aarch64_neon_ld1x2:
9001  case Intrinsic::aarch64_neon_ld1x3:
9002  case Intrinsic::aarch64_neon_ld1x4:
9003  case Intrinsic::aarch64_neon_ld2lane:
9004  case Intrinsic::aarch64_neon_ld3lane:
9005  case Intrinsic::aarch64_neon_ld4lane:
9006  case Intrinsic::aarch64_neon_ld2r:
9007  case Intrinsic::aarch64_neon_ld3r:
9008  case Intrinsic::aarch64_neon_ld4r:
9009  case Intrinsic::aarch64_neon_st2:
9010  case Intrinsic::aarch64_neon_st3:
9011  case Intrinsic::aarch64_neon_st4:
9012  case Intrinsic::aarch64_neon_st1x2:
9013  case Intrinsic::aarch64_neon_st1x3:
9014  case Intrinsic::aarch64_neon_st1x4:
9015  case Intrinsic::aarch64_neon_st2lane:
9016  case Intrinsic::aarch64_neon_st3lane:
9017  case Intrinsic::aarch64_neon_st4lane:
9018  return performNEONPostLDSTCombine(N, DCI, DAG);
9019  default:
9020  break;
9021  }
9022  }
9023  return SDValue();
9024 }
9025 
9026 // Check if the return value is used as only a return value, as otherwise
9027 // we can't perform a tail-call. In particular, we need to check for
9028 // target ISD nodes that are returns and any other "odd" constructs
9029 // that the generic analysis code won't necessarily catch.
9030 bool AArch64TargetLowering::isUsedByReturnOnly(SDNode *N,
9031  SDValue &Chain) const {
9032  if (N->getNumValues() != 1)
9033  return false;
9034  if (!N->hasNUsesOfValue(1, 0))
9035  return false;
9036 
9037  SDValue TCChain = Chain;
9038  SDNode *Copy = *N->use_begin();
9039  if (Copy->getOpcode() == ISD::CopyToReg) {
9040  // If the copy has a glue operand, we conservatively assume it isn't safe to
9041  // perform a tail call.
9042  if (Copy->getOperand(Copy->getNumOperands() - 1).getValueType() ==
9043  MVT::Glue)
9044  return false;
9045  TCChain = Copy->getOperand(0);
9046  } else if (Copy->getOpcode() != ISD::FP_EXTEND)
9047  return false;
9048 
9049  bool HasRet = false;
9050  for (SDNode *Node : Copy->uses()) {
9051  if (Node->getOpcode() != AArch64ISD::RET_FLAG)
9052  return false;
9053  HasRet = true;
9054  }
9055 
9056  if (!HasRet)
9057  return false;
9058 
9059  Chain = TCChain;
9060  return true;
9061 }
9062 
9063 // Return whether the an instruction can potentially be optimized to a tail
9064 // call. This will cause the optimizers to attempt to move, or duplicate,
9065 // return instructions to help enable tail call optimizations for this
9066 // instruction.
9067 bool AArch64TargetLowering::mayBeEmittedAsTailCall(CallInst *CI) const {
9068  if (!CI->isTailCall())
9069  return false;
9070 
9071  return true;
9072 }
9073 
9074 bool AArch64TargetLowering::getIndexedAddressParts(SDNode *Op, SDValue &Base,
9075  SDValue &Offset,
9076  ISD::MemIndexedMode &AM,
9077  bool &IsInc,
9078  SelectionDAG &DAG) const {
9079  if (Op->getOpcode() != ISD::ADD && Op->getOpcode() != ISD::SUB)
9080  return false;
9081 
9082  Base = Op->getOperand(0);
9083  // All of the indexed addressing mode instructions take a signed
9084  // 9 bit immediate offset.
9085  if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Op->getOperand(1))) {
9086  int64_t RHSC = (int64_t)RHS->getZExtValue();
9087  if (RHSC >= 256 || RHSC <= -256)
9088  return false;
9089  IsInc = (Op->getOpcode() == ISD::ADD);
9090  Offset = Op->getOperand(1);
9091  return true;
9092  }
9093  return false;
9094 }
9095 
9096 bool AArch64TargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base,
9097  SDValue &Offset,
9098  ISD::MemIndexedMode &AM,
9099  SelectionDAG &DAG) const {
9100  EVT VT;
9101  SDValue Ptr;
9102  if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
9103  VT = LD->getMemoryVT();
9104  Ptr = LD->getBasePtr();
9105  } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
9106  VT = ST->getMemoryVT();
9107  Ptr = ST->getBasePtr();
9108  } else
9109  return false;
9110 
9111  bool IsInc;
9112  if (!getIndexedAddressParts(Ptr.getNode(), Base, Offset, AM, IsInc, DAG))
9113  return false;
9114  AM = IsInc ? ISD::PRE_INC : ISD::PRE_DEC;
9115  return true;
9116 }
9117 
9118 bool AArch64TargetLowering::getPostIndexedAddressParts(
9119  SDNode *N, SDNode *Op, SDValue &Base, SDValue &Offset,
9120  ISD::MemIndexedMode &AM, SelectionDAG &DAG) const {
9121  EVT VT;
9122  SDValue Ptr;
9123  if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
9124  VT = LD->getMemoryVT();
9125  Ptr = LD->getBasePtr();
9126  } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
9127  VT = ST->getMemoryVT();
9128  Ptr = ST->getBasePtr();
9129  } else
9130  return false;
9131 
9132  bool IsInc;
9133  if (!getIndexedAddressParts(Op, Base, Offset, AM, IsInc, DAG))
9134  return false;
9135  // Post-indexing updates the base, so it's not a valid transform
9136  // if that's not the same as the load's pointer.
9137  if (Ptr != Base)
9138  return false;
9139  AM = IsInc ? ISD::POST_INC : ISD::POST_DEC;
9140  return true;
9141 }
9142 
9144  SelectionDAG &DAG) {
9145  SDLoc DL(N);
9146  SDValue Op = N->getOperand(0);
9147 
9148  if (N->getValueType(0) != MVT::i16 || Op.getValueType() != MVT::f16)
9149  return;
9150 
9151  Op = SDValue(
9152  DAG.getMachineNode(TargetOpcode::INSERT_SUBREG, DL, MVT::f32,
9153  DAG.getUNDEF(MVT::i32), Op,
9154  DAG.getTargetConstant(AArch64::hsub, DL, MVT::i32)),
9155  0);
9156  Op = DAG.getNode(ISD::BITCAST, DL, MVT::i32, Op);
9157  Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, Op));
9158 }
9159 
9160 void AArch64TargetLowering::ReplaceNodeResults(
9161  SDNode *N, SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG) const {
9162  switch (N->getOpcode()) {
9163  default:
9164  llvm_unreachable("Don't know how to custom expand this");
9165  case ISD::BITCAST:
9166  ReplaceBITCASTResults(N, Results, DAG);
9167  return;
9168  case ISD::FP_TO_UINT:
9169  case ISD::FP_TO_SINT:
9170  assert(N->getValueType(0) == MVT::i128 && "unexpected illegal conversion");
9171  // Let normal code take care of it by not adding anything to Results.
9172  return;
9173  }
9174 }
9175 
9177  return true;
9178 }
9179 
9180 bool AArch64TargetLowering::combineRepeatedFPDivisors(unsigned NumUsers) const {
9181  // Combine multiple FDIVs with the same divisor into multiple FMULs by the
9182  // reciprocal if there are three or more FDIVs.
9183  return NumUsers > 2;
9184 }
9185 
9188  MVT SVT = VT.getSimpleVT();
9189  // During type legalization, we prefer to widen v1i8, v1i16, v1i32 to v8i8,
9190  // v4i16, v2i32 instead of to promote.
9191  if (SVT == MVT::v1i8 || SVT == MVT::v1i16 || SVT == MVT::v1i32
9192  || SVT == MVT::v1f32)
9193  return TypeWidenVector;
9194 
9196 }
9197 
9198 // Loads and stores less than 128-bits are already atomic; ones above that
9199 // are doomed anyway, so defer to the default libcall and blame the OS when
9200 // things go wrong.
9202  unsigned Size = SI->getValueOperand()->getType()->getPrimitiveSizeInBits();
9203  return Size == 128;
9204 }
9205 
9206 // Loads and stores less than 128-bits are already atomic; ones above that
9207 // are doomed anyway, so defer to the default libcall and blame the OS when
9208 // things go wrong.
9210  unsigned Size = LI->getType()->getPrimitiveSizeInBits();
9211  return Size == 128;
9212 }
9213 
9214 // For the real atomic operations, we have ldxr/stxr up to 128 bits,
9217  unsigned Size = AI->getType()->getPrimitiveSizeInBits();
9218  return Size <= 128 ? AtomicRMWExpansionKind::LLSC
9220 }
9221 
9223  return true;
9224 }
9225 
9227  AtomicOrdering Ord) const {
9228  Module *M = Builder.GetInsertBlock()->getParent()->getParent();
9229  Type *ValTy = cast<PointerType>(Addr->getType())->getElementType();
9230  bool IsAcquire = isAtLeastAcquire(Ord);
9231 
9232  // Since i128 isn't legal and intrinsics don't get type-lowered, the ldrexd
9233  // intrinsic must return {i64, i64} and we have to recombine them into a
9234  // single i128 here.
9235  if (ValTy->getPrimitiveSizeInBits() == 128) {
9236  Intrinsic::ID Int =
9237  IsAcquire ? Intrinsic::aarch64_ldaxp : Intrinsic::aarch64_ldxp;
9238  Function *Ldxr = llvm::Intrinsic::getDeclaration(M, Int);
9239 
9240  Addr = Builder.CreateBitCast(Addr, Type::getInt8PtrTy(M->getContext()));
9241  Value *LoHi = Builder.CreateCall(Ldxr, Addr, "lohi");
9242 
9243  Value *Lo = Builder.CreateExtractValue(LoHi, 0, "lo");
9244  Value *Hi = Builder.CreateExtractValue(LoHi, 1, "hi");
9245  Lo = Builder.CreateZExt(Lo, ValTy, "lo64");
9246  Hi = Builder.CreateZExt(Hi, ValTy, "hi64");
9247  return Builder.CreateOr(
9248  Lo, Builder.CreateShl(Hi, ConstantInt::get(ValTy, 64)), "val64");
9249  }
9250 
9251  Type *Tys[] = { Addr->getType() };
9252  Intrinsic::ID Int =
9253  IsAcquire ? Intrinsic::aarch64_ldaxr : Intrinsic::aarch64_ldxr;
9254  Function *Ldxr = llvm::Intrinsic::getDeclaration(M, Int, Tys);
9255 
9256  return Builder.CreateTruncOrBitCast(
9257  Builder.CreateCall(Ldxr, Addr),
9258  cast<PointerType>(Addr->getType())->getElementType());
9259 }
9260 
9262  Value *Val, Value *Addr,
9263  AtomicOrdering Ord) const {
9264  Module *M = Builder.GetInsertBlock()->getParent()->getParent();
9265  bool IsRelease = isAtLeastRelease(Ord);
9266 
9267  // Since the intrinsics must have legal type, the i128 intrinsics take two
9268  // parameters: "i64, i64". We must marshal Val into the appropriate form
9269  // before the call.
9270  if (Val->getType()->getPrimitiveSizeInBits() == 128) {
9271  Intrinsic::ID Int =
9272  IsRelease ? Intrinsic::aarch64_stlxp : Intrinsic::aarch64_stxp;
9273  Function *Stxr = Intrinsic::getDeclaration(M, Int);
9274  Type *Int64Ty = Type::getInt64Ty(M->getContext());
9275 
9276  Value *Lo = Builder.CreateTrunc(Val, Int64Ty, "lo");
9277  Value *Hi = Builder.CreateTrunc(Builder.CreateLShr(Val, 64), Int64Ty, "hi");
9278  Addr = Builder.CreateBitCast(Addr, Type::getInt8PtrTy(M->getContext()));
9279  return Builder.CreateCall(Stxr, {Lo, Hi, Addr});
9280  }
9281 
9282  Intrinsic::ID Int =
9283  IsRelease ? Intrinsic::aarch64_stlxr : Intrinsic::aarch64_stxr;
9284  Type *Tys[] = { Addr->getType() };
9285  Function *Stxr = Intrinsic::getDeclaration(M, Int, Tys);
9286 
9287  return Builder.CreateCall(Stxr,
9288  {Builder.CreateZExtOrBitCast(
9289  Val, Stxr->getFunctionType()->getParamType(0)),
9290  Addr});
9291 }
9292 
9293 bool AArch64TargetLowering::functionArgumentNeedsConsecutiveRegisters(
9294  Type *Ty, CallingConv::ID CallConv, bool isVarArg) const {
9295  return Ty->isArrayTy();
9296 }
Value * CreateLShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Definition: IRBuilder.h:842
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
static bool isAdvSIMDModImmType6(uint64_t Imm)
static bool isVShiftLImm(SDValue Op, EVT VT, bool isLong, int64_t &Cnt)
isVShiftLImm - Check if this is a valid build_vector for the immediate operand of a vector shift left...
void setFrameAddressIsTaken(bool T)
std::enable_if< std::numeric_limits< T >::is_signed, bool >::type getAsInteger(unsigned Radix, T &Result) const
Parse the current string as an integer of the specified radix.
Definition: StringRef.h:347
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:477
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition: ISDOpcodes.h:450
Value * getValueOperand()
Definition: Instructions.h:406
unsigned getFunctionAlignment(const Function *F) const
getFunctionAlignment - Return the Log2 alignment of this function.
ValuesClass< DataType > LLVM_END_WITH_NULL values(const char *Arg, DataType Val, const char *Desc,...)
Definition: CommandLine.h:536
Helper structure to keep track of SetCC information.
static MVT getIntegerVT(unsigned BitWidth)
static bool isUZP_v_undef_Mask(ArrayRef< int > M, EVT VT, unsigned &WhichResult)
isUZP_v_undef_Mask - Special case of isUZPMask for canonical form of "vector_shuffle v...
void AnalyzeCallResult(const SmallVectorImpl< ISD::InputArg > &Ins, CCAssignFn Fn)
AnalyzeCallResult - Analyze the return values of a call, incorporating info about the passed values i...
const MachineFunction * getParent() const
getParent - Return the MachineFunction containing this basic block.
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:104
static SDValue performBitcastCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
SDValue getValue(unsigned R) const
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg If BaseGV is null...
MO_G3 - A symbol operand with this flag (granule 3) represents the high 16-bits of a 64-bit address...
static SDValue LowerADDC_ADDE_SUBC_SUBE(SDValue Op, SelectionDAG &DAG)
void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified load with extension does not work with the specified type and indicate wh...
MVT getValVT() const
iterator_range< use_iterator > uses()
Definition: Value.h:283
This SDNode is used for target intrinsics that touch memory and need an associated MachineMemOperand...
const MCPhysReg * getScratchRegisters(CallingConv::ID CC) const override
Returns a 0 terminated array of registers that can be safely used as scratch registers.
virtual ConstraintType getConstraintType(StringRef Constraint) const
Given a constraint, return the type of constraint it is for this target.
AArch64FunctionInfo - This class is derived from MachineFunctionInfo and contains private AArch64-spe...
LLVMContext * getContext() const
Definition: SelectionDAG.h:289
static bool isEquivalentMaskless(unsigned CC, unsigned width, ISD::LoadExtType ExtType, signed AddConstant, signed CompConstant)
static uint8_t encodeAdvSIMDModImmType3(uint64_t Imm)
bool isOSBinFormatMachO() const
Tests whether the environment is MachO.
Definition: Triple.h:489
SDValue getCopyToReg(SDValue Chain, SDLoc dl, unsigned Reg, SDValue N)
Definition: SelectionDAG.h:522
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1327
SDValue getCALLSEQ_END(SDValue Chain, SDValue Op1, SDValue Op2, SDValue InGlue, SDLoc DL)
Return a new CALLSEQ_END node, which always must have a glue result (to ensure it's not CSE'd)...
Definition: SelectionDAG.h:646
SDValue CombineTo(SDNode *N, ArrayRef< SDValue > To, bool AddTo=true)
static SDValue performVSelectCombine(SDNode *N, SelectionDAG &DAG)
void dump() const
Dump this node, for debugging.
static SDValue performNEONPostLDSTCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
Target-specific DAG combine function for NEON load/store intrinsics to merge base address updates...
STATISTIC(NumFunctions,"Total number of functions")
size_t size() const
size - Get the string size.
Definition: StringRef.h:113
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR (an vector value) starting with the ...
Definition: ISDOpcodes.h:292
bool isKnownNeverNaN(SDValue Op) const
Test whether the given SDValue is known to never be NaN.
BR_CC - Conditional branch.
Definition: ISDOpcodes.h:554
LocInfo getLocInfo() const
static bool isUZPMask(ArrayRef< int > M, EVT VT, unsigned &WhichResult)
MO_PAGE - A symbol operand with this flag represents the pc-relative offset of the 4K page containing...
static bool isTRN_v_undef_Mask(ArrayRef< int > M, EVT VT, unsigned &WhichResult)
isTRN_v_undef_Mask - Special case of isTRNMask for canonical form of "vector_shuffle v...
static MVT getVectorVT(MVT VT, unsigned NumElements)
bool hasOneUse() const
Return true if there is exactly one use of this node.
static const fltSemantics IEEEdouble
Definition: APFloat.h:133
bool hasNUsesOfValue(unsigned NUses, unsigned Value) const
Return true if there are exactly NUSES uses of the indicated value.
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition: ISDOpcodes.h:301
FastISel * createFastISel(FunctionLoweringInfo &funcInfo, const TargetLibraryInfo *libInfo)
A Module instance is used to store all the information related to an LLVM module. ...
Definition: Module.h:114
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
static SDValue tryCombineToEXTR(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
EXTR instruction extracts a contiguous chunk of bits from two existing registers viewed as a high/low...
static bool isAdvSIMDModImmType12(uint64_t Imm)
const TargetMachine & getTargetMachine() const
bool isAtLeastAcquire(AtomicOrdering Ord)
Returns true if the ordering is at least as strong as acquire (i.e.
Definition: Instructions.h:56
static bool isAdvSIMDModImmType4(uint64_t Imm)
#define clEnumValEnd
Definition: CommandLine.h:498
static SDValue performXorCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const AArch64Subtarget *Subtarget)
static uint8_t encodeAdvSIMDModImmType1(uint64_t Imm)
SDValue getMergeValues(ArrayRef< SDValue > Ops, SDLoc dl)
Create a MERGE_VALUES node from the given operands.
Carry-setting nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:210
const TargetMachine & getTarget() const
Definition: SelectionDAG.h:285
static bool isINSMask(ArrayRef< int > M, int NumInputElements, bool &DstIsLeft, int &Anomaly)
static bool isAddSubZExt(SDNode *N, SelectionDAG &DAG)
TargetLoweringBase::LegalizeTypeAction getPreferredVectorAction(EVT VT) const override
Return the preferred vector type legalization action.
unsigned addLiveIn(unsigned PReg, const TargetRegisterClass *RC)
addLiveIn - Add the specified physical register as a live-in value and create a corresponding virtual...
static CondCode getInvertedCondCode(CondCode Code)
iterator end() const
Definition: ArrayRef.h:123
bool isNonNegative() const
Determine if this APInt Value is non-negative (>= 0)
Definition: APInt.h:324
A Stackmap instruction captures the location of live variables at its position in the instruction str...
static bool isZIP_v_undef_Mask(ArrayRef< int > M, EVT VT, unsigned &WhichResult)
isZIP_v_undef_Mask - Special case of isZIPMask for canonical form of "vector_shuffle v...
STACKRESTORE has two operands, an input chain and a pointer to restore to it returns an output chain...
Definition: ISDOpcodes.h:585
CallInst - This class represents a function call, abstracting a target machine's calling convention...
void computeKnownBits(SDValue Op, APInt &KnownZero, APInt &KnownOne, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in the KnownZero/KnownO...
const GlobalValue * getGlobal() const
Libcall
RTLIB::Libcall enum - This enum defines all of the runtime library calls the backend can emit...
bool CCAssignFn(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
CCAssignFn - This function assigns a location for Val, updating State to reflect the change...
static MVT getFloatingPointVT(unsigned BitWidth)
RESULT, BOOL = [SU]ADDO(LHS, RHS) - Overflow-aware nodes for addition.
Definition: ISDOpcodes.h:228
int getScalingFactorCost(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS) const override
Return the cost of the scaling factor used in the addressing mode represented by AM for this target...
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
void addLiveIn(unsigned Reg)
Adds the specified register as a live in.
std::pair< Value *, Value * > ShuffleOps
We are building a shuffle to create V, which is a sequence of insertelement, extractelement pairs...
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Type * getTypeForEVT(LLVMContext &Context) const
getTypeForEVT - This method returns an LLVM type corresponding to the specified EVT.
Definition: ValueTypes.cpp:181
unsigned getSizeInBits() const
ShuffleVectorInst - This instruction constructs a fixed permutation of two input vectors.
static SDValue performSelectCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
A vector select: "(select vL, vR, (setcc LHS, RHS))" is best performed with the compare-mask instruct...
SDValue getLoad(EVT VT, SDLoc dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, bool isVolatile, bool isNonTemporal, bool isInvariant, unsigned Alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands...
unsigned getByValSize() const
static SDValue performVectorCompareAndMaskUnaryOpCombine(SDNode *N, SelectionDAG &DAG)
unsigned getNumOperands() const
Return the number of values used by this operation.
void setBooleanVectorContents(BooleanContent Ty)
Specify how the target extends the result of a vector boolean value from a vector of i1 to a wider ty...
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:111
unsigned getNumOperands() const
arg_iterator arg_end()
Definition: Function.h:480
static SDValue LowerPREFETCH(SDValue Op, SelectionDAG &DAG)
A debug info location.
Definition: DebugLoc.h:34
void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *fromMBB)
transferSuccessorsAndUpdatePHIs - Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor blocks which refer to fromMBB to refer to this.
const SDValue & getOperand(unsigned Num) const
F(f)
MO_G0 - A symbol operand with this flag (granule 0) represents the bits 0-15 of a 64-bit address...
const Function * getFunction() const
getFunction - Return the LLVM function that this machine code represents
static SDValue performExtendCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
static SDValue NormalizeBuildVector(SDValue Op, SelectionDAG &DAG)
LoadInst - an instruction for reading from memory.
Definition: Instructions.h:177
static unsigned getDUPLANEOp(EVT EltType)
static MachinePointerInfo getConstantPool()
getConstantPool - Return a MachinePointerInfo record that refers to the constant pool.
static IntegerType * getInt64Ty(LLVMContext &C)
Definition: Type.cpp:240
static bool isAdvSIMDModImmType3(uint64_t Imm)
[US]{MIN/MAX} - Binary minimum or maximum or signed or unsigned integers.
Definition: ISDOpcodes.h:318
AtomicRMWInst - an instruction that atomically reads a memory location, combines it with another valu...
Definition: Instructions.h:674
SDValue getTargetExternalSymbol(const char *Sym, EVT VT, unsigned char TargetFlags=0)
static SDValue LowerVectorINT_TO_FP(SDValue Op, SelectionDAG &DAG)
static SDValue performSetccAddFolding(SDNode *Op, SelectionDAG &DAG)
bool CheckReturn(const SmallVectorImpl< ISD::OutputArg > &ArgsFlags, CCAssignFn Fn)
CheckReturn - Analyze the return values of a function, returning true if the return can be performed ...
Same for subtraction.
Definition: ISDOpcodes.h:231
MachineBasicBlock * EmitF128CSEL(MachineInstr *MI, MachineBasicBlock *BB) const
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
Definition: IRBuilder.h:1522
static SDValue tryCombineFixedPointConvert(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
static std::error_code getOffset(const SymbolRef &Sym, SectionRef Sec, uint64_t &Result)
const SDValue & getBasePtr() const
bool lowerInterleavedLoad(LoadInst *LI, ArrayRef< ShuffleVectorInst * > Shuffles, ArrayRef< unsigned > Indices, unsigned Factor) const override
Lower an interleaved load into a ldN intrinsic.
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1 at the ...
Definition: ISDOpcodes.h:287
APInt LLVM_ATTRIBUTE_UNUSED_RESULT zextOrTrunc(unsigned width) const
Zero extend or truncate to width.
Definition: APInt.cpp:1015
std::size_t countLeadingZeros(T Val, ZeroBehavior ZB=ZB_Width)
Count number of 0's from the most significant bit to the least stopping at the first 1...
Definition: MathExtras.h:178
static bool isZeroExtended(SDNode *N, SelectionDAG &DAG)
static std::pair< SDValue, SDValue > getAArch64XALUOOp(AArch64CC::CondCode &CC, SDValue Op, SelectionDAG &DAG)
const uint32_t * getCallPreservedMask(const MachineFunction &MF, CallingConv::ID) const override
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition: ISDOpcodes.h:357
unsigned getResNo() const
get the index which selects a specific result in the SDNode
Libcall getFPROUND(EVT OpVT, EVT RetVT)
getFPROUND - Return the FPROUND_*_* value for the given types, or UNKNOWN_LIBCALL if there is none...
bool bitsLT(EVT VT) const
bitsLT - Return true if this has less bits than VT.
Definition: ValueTypes.h:189
bool isUnsignedIntSetCC(CondCode Code)
isUnsignedIntSetCC - Return true if this is a setcc instruction that performs an unsigned comparison ...
Definition: ISDOpcodes.h:843
static SDValue performIntegerAbsCombine(SDNode *N, SelectionDAG &DAG)
bool isRegLoc() const
LLVM_ATTRIBUTE_NORETURN void report_fatal_error(const char *reason, bool gen_crash_diag=true)
Reports a serious error, calling any installed error handler.
This SDNode is used to implement the code generator support for the llvm IR shufflevector instruction...
SDValue getExternalSymbol(const char *Sym, EVT VT)
bool isAllOnesValue() const
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:172
uint64_t getTypeAllocSizeInBits(Type *Ty) const
Returns the offset in bits between successive objects of the specified type, including alignment padd...
Definition: DataLayout.h:398
const uint32_t * getThisReturnPreservedMask(const MachineFunction &MF, CallingConv::ID) const
getThisReturnPreservedMask - Returns a call preserved mask specific to the case that 'returned' is on...
bool isOSWindows() const
Tests whether the OS is Windows.
Definition: Triple.h:464
void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified truncating store does not work with the specified type and indicate what ...
SDValue getMemcpy(SDValue Chain, SDLoc dl, SDValue Dst, SDValue Src, SDValue Size, unsigned Align, bool isVol, bool AlwaysInline, bool isTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo)
static MachinePointerInfo getFixedStack(int FI, int64_t offset=0)
getFixedStack - Return a MachinePointerInfo record that refers to the the specified FrameIndex...
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition: ISDOpcodes.h:150
Libcall getUINTTOFP(EVT OpVT, EVT RetVT)
getUINTTOFP - Return the UINTTOFP_*_* value for the given types, or UNKNOWN_LIBCALL if there is none...
const Triple & getTargetTriple() const
MO_G2 - A symbol operand with this flag (granule 2) represents the bits 32-47 of a 64-bit address...
bool isKnownNeverZero(SDValue Op) const
Test whether the given SDValue is known to never be positive or negative Zero.
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition: ISDOpcodes.h:200
AtomicRMWExpansionKind
Enum that specifies what a AtomicRMWInst is expanded to, if at all.
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations...
Definition: ISDOpcodes.h:371
bool isVector() const
isVector - Return true if this is a vector value type.
Definition: ValueTypes.h:115
lazy value info
static bool isAdvSIMDModImmType7(uint64_t Imm)
static SDValue tryFormConcatFromShuffle(SDValue Op, SelectionDAG &DAG)
BlockAddress - The address of a basic block.
Definition: Constants.h:802
SDValue getStore(SDValue Chain, SDLoc dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, bool isVolatile, bool isNonTemporal, unsigned Alignment, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
static bool isTRNMask(ArrayRef< int > M, EVT VT, unsigned &WhichResult)
Value * CreateExtractValue(Value *Agg, ArrayRef< unsigned > Idxs, const Twine &Name="")
Definition: IRBuilder.h:1541
static bool isVShiftRImm(SDValue Op, EVT VT, bool isNarrow, bool isIntrinsic, int64_t &Cnt)
isVShiftRImm - Check if this is a valid build_vector for the immediate operand of a vector shift righ...
cl::opt< bool > EnableAArch64ELFLocalDynamicTLSGeneration("aarch64-elf-ldtls-generation", cl::Hidden, cl::desc("Allow AArch64 Local Dynamic TLS code generation"), cl::init(false))
static SDValue performCONDCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG, unsigned CCIndex, unsigned CmpIndex)
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:79
const HexagonInstrInfo * TII
static SDValue tryCombineCRC32(unsigned Mask, SDNode *N, SelectionDAG &DAG)
ArrayRef< T > makeArrayRef(const T &OneElt)
Construct an ArrayRef from a single element.
Definition: ArrayRef.h:308
LegalizeTypeAction
This enum indicates whether a types are legal for a target, and if not, what action should be used to...
Shift and rotation operations.
Definition: ISDOpcodes.h:332
static SDValue performAddSubLongCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
StructType - Class to represent struct types.
Definition: DerivedTypes.h:191
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:98
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
A Use represents the edge between a Value definition and its users.
Definition: Use.h:69
bool hasInternalLinkage() const
Definition: GlobalValue.h:278
MachineFunction & getMachineFunction() const
Definition: SelectionDAG.h:283
std::pair< SDValue, SDValue > makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT, const SDValue *Ops, unsigned NumOps, bool isSigned, SDLoc dl, bool doesNotReturn=false, bool isReturnValueUsed=true) const
Returns a pair of (return value, chain).
static void advance(T &it, size_t Val)
static uint8_t encodeAdvSIMDModImmType6(uint64_t Imm)
unsigned getNumArgOperands() const
getNumArgOperands - Return the number of call arguments.
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
SDValue getTargetGlobalAddress(const GlobalValue *GV, SDLoc DL, EVT VT, int64_t offset=0, unsigned char TargetFlags=0)
Definition: SelectionDAG.h:467
CopyToReg - This node has three operands: a chain, a register number to set to this value...
Definition: ISDOpcodes.h:161
static Constant * get(ArrayRef< Constant * > V)
Definition: Constants.cpp:1057
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:517
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const override
getSetCCResultType - Return the ISD::SETCC ValueType
Helper structure to keep track of a SET_CC lowered into AArch64 code.
Reg
All possible values of the reg field in the ModR/M byte.
SimpleValueType SimpleTy
static SDValue EmitVectorComparison(SDValue LHS, SDValue RHS, AArch64CC::CondCode CC, bool NoNans, EVT VT, SDLoc dl, SelectionDAG &DAG)
bool isSized(SmallPtrSetImpl< const Type * > *Visited=nullptr) const
isSized - Return true if it makes sense to take the size of this type.
Definition: Type.h:268
MVT getScalarType() const
getScalarType - If this is a vector type, return the element type, otherwise return this...
static SDValue LowerMUL(SDValue Op, SelectionDAG &DAG)
EVT getScalarType() const
getScalarType - If this is a vector type, return the element type, otherwise return this...
Definition: ValueTypes.h:210
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted...
Number of individual test Apply this number of consecutive mutations to each input exit after the first new interesting input is found the minimized corpus is saved into the first input directory Number of jobs to run If min(jobs, NumberOfCpuCores()/2)\" is used.") FUZZER_FLAG_INT(reload
static bool isLogicalImmediate(uint64_t imm, unsigned regSize)
isLogicalImmediate - Return true if the immediate is valid for a logical immediate instruction of the...
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
static SDValue combineAcrossLanesIntrinsic(unsigned Opc, SDNode *N, SelectionDAG &DAG)
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
This class is used to represent EVT's, which are used to parameterize some operations.
FRAMEADDR, RETURNADDR - These nodes represent llvm.frameaddress and llvm.returnaddress on the DAG...
Definition: ISDOpcodes.h:73
Type * getVectorElementType() const
Definition: Type.h:364
#define im(i)
static bool isAdvSIMDModImmType5(uint64_t Imm)
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
MO_CONSTPOOL - This flag indicates that a symbol operand represents the address of a constant pool en...
ELFYAML::ELF_STO Other
Definition: ELFYAML.cpp:591
#define G(x, y, z)
Definition: MD5.cpp:52
bool isInteger() const
isInteger - Return true if this is an integer, or a vector integer type.
Definition: ValueTypes.h:110
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
AtomicOrdering
Definition: Instructions.h:38
STACKSAVE - STACKSAVE has one operand, an input chain.
Definition: ISDOpcodes.h:581
EVT getVectorElementType() const
getVectorElementType - Given a vector type, return the type of each element.
Definition: ValueTypes.h:216
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
Definition: Instructions.h:414
SDValue ReconstructShuffle(SDValue Op, SelectionDAG &DAG) const
bool isConstantSplat(APInt &SplatValue, APInt &SplatUndef, unsigned &SplatBitSize, bool &HasAnyUndefs, unsigned MinSplatBits=0, bool isBigEndian=false) const
Check if this is a constant splat, and if so, find the smallest element size that splats the vector...
static const unsigned PerfectShuffleTable[6561+1]
bool isInConsecutiveRegs() const
SDValue getCALLSEQ_START(SDValue Chain, SDValue Op, SDLoc DL)
Return a new CALLSEQ_START node, which always must have a glue result (to ensure it's not CSE'd)...
Definition: SelectionDAG.h:637
unsigned getLocReg() const
void setArgumentStackToRestore(unsigned bytes)
This is a fast-path instruction selection class that generates poor code and doesn't support illegal ...
Definition: FastISel.h:30
unsigned NoNaNsFPMath
NoNaNsFPMath - This flag is enabled when the -enable-no-nans-fp-math flag is specified on the command...
bool isKill() const
static SDValue performConcatVectorsCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition: ISDOpcodes.h:393
void computeRegisterProperties(const TargetRegisterInfo *TRI)
Once all of the register classes are added, this allows us to compute derived properties we expose...
static bool isEXTMask(ArrayRef< int > M, EVT VT, bool &ReverseEXT, unsigned &Imm)
LLVMContext & getContext() const
getContext - Return the LLVMContext in which this type was uniqued.
Definition: Type.h:125
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:894
static bool checkValueWidth(SDValue V, unsigned width, ISD::LoadExtType &ExtType)
std::pair< SDValue, SDValue > LowerCallTo(CallLoweringInfo &CLI) const
This function lowers an abstract call to a function into an actual call.
unsigned getNumValues() const
Return the number of values defined/returned by this operator.
bool LLVM_ATTRIBUTE_UNUSED_RESULT empty() const
Definition: SmallVector.h:57
MO_GOT - This flag indicates that a symbol operand represents the address of the GOT entry for the sy...
SDValue getRegisterMask(const uint32_t *RegMask)
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AddrSpace=0, unsigned Align=1, bool *Fast=nullptr) const override
allowsMisalignedMemoryAccesses - Returns true if the target allows unaligned memory accesses of the s...
const AArch64RegisterInfo * getRegisterInfo() const override
bool hasStructRetAttr() const
Determine if the function returns a structure through first pointer argument.
Definition: Function.h:360
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition: ISDOpcodes.h:351
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:191
SDValue getVectorShuffle(EVT VT, SDLoc dl, SDValue N1, SDValue N2, const int *MaskElts)
Return an ISD::VECTOR_SHUFFLE node.
bool isMask_64(uint64_t Value)
isMask_64 - This function returns true if the argument is a non-empty sequence of ones starting at th...
Definition: MathExtras.h:335
static SDValue getAArch64Cmp(SDValue LHS, SDValue RHS, ISD::CondCode CC, SDValue &AArch64cc, SelectionDAG &DAG, SDLoc dl)
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: ArrayRef.h:31
bool isFloatingPointTy() const
isFloatingPointTy - Return true if this is one of the six floating point types
Definition: Type.h:159
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=None)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
Definition: Function.cpp:866
const SDValue & getBasePtr() const
static bool isAdvSIMDModImmType2(uint64_t Imm)
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
Definition: SelectionDAG.h:659
MachineBasicBlock * emitPatchPoint(MachineInstr *MI, MachineBasicBlock *MBB) const
Replace/modify any TargetFrameIndex operands with a targte-dependent sequence of memory operands that...
StoreInst - an instruction for storing to memory.
Definition: Instructions.h:316
Natural vector cast.
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out...
Definition: ISDOpcodes.h:804
const APInt & getAPIntValue() const
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS) const override
isLegalAddressingMode - Return true if the addressing mode represented by AM is legal for this target...
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:351
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
bool isArrayTy() const
isArrayTy - True if this is an instance of ArrayType.
Definition: Type.h:213
TargetConstant* - Like Constant*, but the DAG does not do any folding, simplification, or lowering of the constant.
Definition: ISDOpcodes.h:116
EVT getMemoryVT() const
Return the type of the in-memory value.
int64_t getImm() const
bool isFPImmLegal(const APFloat &Imm, EVT VT) const override
Returns true if the target can instruction select the specified FP immediate natively.
TargetLoweringBase::AtomicRMWExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override
Returns how the IR-level AtomicExpand pass should expand the given AtomicRMW, if at all...
static SDValue LowerVectorFP_TO_INT(SDValue Op, SelectionDAG &DAG)
static const MCPhysReg GPRArgRegs[]
void computeKnownBitsForTargetNode(const SDValue Op, APInt &KnownZero, APInt &KnownOne, const SelectionDAG &DAG, unsigned Depth=0) const override
computeKnownBitsForTargetNode - Determine which of the bits specified in Mask are known to be either ...
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition: ISDOpcodes.h:142
Type * getElementType() const
Definition: DerivedTypes.h:323
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:134
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *bb=nullptr)
CreateMachineBasicBlock - Allocate a new MachineBasicBlock.
const DataLayout & getDataLayout() const
Definition: SelectionDAG.h:284
static SDValue WidenVector(SDValue V64Reg, SelectionDAG &DAG)
WidenVector - Given a value in the V64 register class, produce the equivalent value in the V128 regis...
bool hasPairedLoad(Type *LoadedType, unsigned &RequiredAligment) const override
Return true if the target supplies and combines to a paired load two loaded values of type LoadedType...
static bool isREVMask(ArrayRef< int > M, EVT VT, unsigned BlockSize)
isREVMask - Check if a vector shuffle corresponds to a REV instruction with the specified blocksize...
static SDValue tryExtendDUPToExtractHigh(SDValue N, SelectionDAG &DAG)
PointerType - Class to represent pointers.
Definition: DerivedTypes.h:449
const BasicBlock * getBasicBlock() const
getBasicBlock - Return the LLVM basic block that this instance corresponded to originally.
UNDEF - An undefined node.
Definition: ISDOpcodes.h:169
static bool isEssentiallyExtractSubvector(SDValue N)
This class is used to represent ISD::STORE nodes.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:267
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:436
static SDValue performBRCONDCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
Flag
These should be considered private to the implementation of the MCInstrDesc class.
Definition: MCInstrDesc.h:97
AArch64SetCCInfo AArch64
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a vector with the specified, possibly variable...
Definition: ISDOpcodes.h:262
TargetInstrInfo - Interface to description of machine instruction set.
LLVM_CONSTEXPR size_t array_lengthof(T(&)[N])
Find the length of an array.
Definition: STLExtras.h:247
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Get a value with high bits set.
Definition: APInt.h:513
bool isLegalICmpImmediate(int64_t) const override
Return true if the specified immediate is legal icmp immediate, that is the target has icmp instructi...
static bool isAdvSIMDModImmType9(uint64_t Imm)
SDNode * getNode() const
get the SDNode which holds the desired result
bool isTruncateFree(Type *Ty1, Type *Ty2) const override
Return true if it's free to truncate a value of type Ty1 to type Ty2.
bundle_iterator< MachineInstr, instr_iterator > iterator
A self-contained host- and target-independent arbitrary-precision floating-point software implementat...
Definition: APFloat.h:122
static SDValue tryCombineToBSL(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
unsigned getScalarSizeInBits() const
Definition: ValueTypes.h:239
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
bool hasLoadLinkedStoreConditional() const override
True if AtomicExpandPass should use emitLoadLinked/emitStoreConditional and expand AtomicCmpXchgInst...
A switch()-like statement whose cases are string literals.
Definition: StringSwitch.h:42
Type * getParamType(unsigned i) const
Parameter type accessors.
Definition: DerivedTypes.h:131
unsigned getFirstUnallocated(ArrayRef< MCPhysReg > Regs) const
getFirstUnallocated - Return the index of the first unallocated register in the set, or Regs.size() if they are all allocated.
#define true
Definition: ConvertUTF.c:66
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:325
bool isNormalLoad(const SDNode *N)
Returns true if the specified node is a non-extending and unindexed load.
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition: ISDOpcodes.h:157
unsigned UnsafeFPMath
UnsafeFPMath - This flag is enabled when the -enable-unsafe-fp-math flag is specified on the command ...
Patchable call instruction - this instruction represents a call to a constant address, followed by a series of NOPs.
FPOpFusion::FPOpFusionMode AllowFPOpFusion
AllowFPOpFusion - This flag is set by the -fuse-fp-ops=xxx option.
Control flow instructions. These all have token chains.
Definition: ISDOpcodes.h:533
MO_G1 - A symbol operand with this flag (granule 1) represents the bits 16-31 of a 64-bit address...
std::size_t countTrailingZeros(T Val, ZeroBehavior ZB=ZB_Width)
Count number of 0's from the least significant bit to the most stopping at the first 1...
Definition: MathExtras.h:109
static int getFP32Imm(const APInt &Imm)
getFP32Imm - Return an 8-bit floating-point version of the 32-bit floating-point value.
unsigned getVectorNumElements() const
bool isZExtFree(Type *Ty1, Type *Ty2) const override
Return true if any actual instruction that defines a value of type Ty1 implicitly zero-extends the va...
CodeModel::Model getCodeModel() const
Returns the code model.
MVT - Machine Value Type.
LLVM Basic Block Representation.
Definition: BasicBlock.h:65
const SDValue & getOperand(unsigned i) const
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:45
GenericSetCCInfo Generic
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:41
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Add the specified register class as an available regclass for the specified value type...
Simple binary floating point operators.
Definition: ISDOpcodes.h:237
void setTargetDAGCombine(ISD::NodeType NT)
Targets should invoke this method for each target independent node that they want to provide a custom...
bool isNonTemporal() const
bool isVectorTy() const
isVectorTy - True if this is an instance of VectorType.
Definition: Type.h:226
bool isOperationLegalOrCustom(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
MVT getLocVT() const
Value * CreatePtrToInt(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1245
This is an important base class in LLVM.
Definition: Constant.h:41
VAEND, VASTART - VAEND and VASTART have three operands: an input chain, pointer, and a SRCVALUE...
Definition: ISDOpcodes.h:607
bool isVector() const
isVector - Return true if this is a vector value type.
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
Definition: ISDOpcodes.h:780
int64_t getSExtValue() const
Get sign extended value.
Definition: APInt.h:1339
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL...
Definition: ISDOpcodes.h:267
const Constant * getConstVal() const
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:273
Carry-using nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:219
bool isFloatTy() const
isFloatTy - Return true if this is 'float', a 32-bit IEEE fp type.
Definition: Type.h:143
static SDValue tryCombineLongOpWithDup(unsigned IID, SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
INSERT_SUBREG - This instruction takes three operands: a register that has subregisters, a register providing an insert value, and a subregister index.
Definition: TargetOpcodes.h:49
bool isLittleEndian() const
void setBooleanContents(BooleanContent Ty)
Specify how the target extends the result of integer and floating point boolean values from i1 to a w...
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
Libcall getFPTOSINT(EVT OpVT, EVT RetVT)
getFPTOSINT - Return the FPTOSINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none...
static bool isSetCCOrZExtSetCC(const SDValue &Op, SetCCInfoAndKind &Info)
static EVT getExtensionTo64Bits(const EVT &OrigVT)
static bool isAdvSIMDModImmType1(uint64_t Imm)
static mvt_range fp_valuetypes()
virtual std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const
Given a physical register constraint (e.g.
bool isDesirableToCommuteWithShift(const SDNode *N) const override
Returns false if N is a bit extraction pattern of (X >> C) & Mask.
This class provides iterator support for SDUse operands that use a specific SDNode.
bool shouldConvertConstantLoadToIntImm(const APInt &Imm, Type *Ty) const override
Returns true if it is beneficial to convert a load of a constant to just the constant itself...
SDValue getCopyFromReg(SDValue Chain, SDLoc dl, unsigned Reg, EVT VT)
Definition: SelectionDAG.h:547
SDValue getSExtOrTrunc(SDValue Op, SDLoc DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either sign-extending or trunca...
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang","erlang-compatible garbage collector")
EVT getVT() const
SDValue getTargetConstant(uint64_t Val, SDLoc DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:436
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1273
iterator begin() const
Definition: ArrayRef.h:122
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
opStatus convert(const fltSemantics &, roundingMode, bool *)
APFloat::convert - convert a value of one floating point type to another.
Definition: APFloat.cpp:1972
unsigned getOpcode() const
TRAP - Trapping instruction.
Definition: ISDOpcodes.h:644
unsigned GuaranteedTailCallOpt
GuaranteedTailCallOpt - This flag is enabled when -tailcallopt is specified on the commandline...
Value * getOperand(unsigned i) const
Definition: User.h:118
Value * getPointerOperand()
Definition: Instructions.h:284
static bool isAdvSIMDModImmType8(uint64_t Imm)
AssertSext, AssertZext - These nodes record if a register contains a value that has already been zero...
Definition: ISDOpcodes.h:57
static mvt_range vector_valuetypes()
arg_iterator arg_begin()
Definition: Function.h:472
static MVT getVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
Definition: ValueTypes.cpp:249
int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool Immutable, bool isAliased=false)
Create a new object at a fixed location on the stack.
VAARG - VAARG has four operands: an input chain, a pointer, a SRCVALUE, and the alignment.
Definition: ISDOpcodes.h:598
static void changeFPCCToAArch64CC(ISD::CondCode CC, AArch64CC::CondCode &CondCode, AArch64CC::CondCode &CondCode2)
changeFPCCToAArch64CC - Convert a DAG fp condition code to an AArch64 CC.
bool empty() const
empty - Check if the array is empty.
Definition: ArrayRef.h:129
use_iterator use_begin() const
Provide iteration support to walk over all uses of an SDNode.
bool isVolatile() const
const SDValue & getValue() const
void AddPromotedToType(unsigned Opc, MVT OrigVT, MVT DestVT)
If Opc/OrigVT is specified as being promoted, the promotion code defaults to trying a larger integer/...
unsigned MaxStoresPerMemmove
Specify maximum bytes of store instructions per memmove call.
static SDValue performSelectCCCombine(SDNode *N, SelectionDAG &DAG)
performSelectCCCombine - Target-specific DAG combining for ISD::SELECT_CC to match FMIN/FMAX patterns...
SDValue getExtLoad(ISD::LoadExtType ExtType, SDLoc dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, bool isVolatile, bool isNonTemporal, bool isInvariant, unsigned Alignment, const AAMDNodes &AAInfo=AAMDNodes())
Bit counting operators with an undefined result for zero inputs.
Definition: ISDOpcodes.h:338
MachineInstrBuilder BuildMI(MachineFunction &MF, DebugLoc DL, const MCInstrDesc &MCID)
BuildMI - Builder interface.
Helper structure to be able to read SetCC information.
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:468
static SDValue addRequiredExtensionForVectorMULL(SDValue N, SelectionDAG &DAG, const EVT &OrigTy, const EVT &ExtTy, unsigned ExtOpcode)
MO_HI12 - This flag indicates that a symbol operand represents the bits 13-24 of a 64-bit address...
EVT - Extended Value Type.
Definition: ValueTypes.h:31
bool isIntN(unsigned N, int64_t x)
isIntN - Checks if an signed integer fits into the given (dynamic) bit width.
Definition: MathExtras.h:321
bool isPointerTy() const
isPointerTy - True if this is an instance of PointerType.
Definition: Type.h:217
std::vector< ArgListEntry > ArgListTy
static SDValue performIntToFpCombine(SDNode *N, SelectionDAG &DAG, const AArch64Subtarget *Subtarget)
const APFloat & getValueAPF() const
unsigned getNextStackOffset() const
static bool isSingletonEXTMask(ArrayRef< int > M, EVT VT, unsigned &Imm)
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
CallInst * CreateCall(Value *Callee, ArrayRef< Value * > Args=None, const Twine &Name="")
Definition: IRBuilder.h:1467
This structure contains all information that is necessary for lowering calls.
PointerType * getPointerTo(unsigned AddrSpace=0)
getPointerTo - Return a pointer to the current type.
Definition: Type.cpp:764
static PointerType * getInt8PtrTy(LLVMContext &C, unsigned AS=0)
Definition: Type.cpp:283
static SDValue GenerateTBL(SDValue Op, ArrayRef< int > ShuffleMask, SelectionDAG &DAG)
static cl::opt< bool > EnableAArch64ExtrGeneration("aarch64-extr-generation", cl::Hidden, cl::desc("Allow AArch64 (or (shift)(shift))->extract"), cl::init(true))
const uint32_t * getTLSCallPreservedMask() const
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements)
getVectorVT - Returns the EVT that represents a vector NumElements in length, where each element is o...
Definition: ValueTypes.h:70
MachinePointerInfo - This class contains a discriminated union of information about pointers in memor...
uint64_t getConstantOperandVal(unsigned Num) const
Helper method returns the integer value of a ConstantSDNode operand.
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
Definition: APInt.h:386
MO_TLS - Indicates that the operand being accessed is some kind of thread-local symbol.
unsigned char ClassifyGlobalReference(const GlobalValue *GV, const TargetMachine &TM) const
ClassifyGlobalReference - Find the target operand flags that describe how a global value should be re...
static SDValue LowerXOR(SDValue Op, SelectionDAG &DAG)
static uint8_t encodeAdvSIMDModImmType12(uint64_t Imm)
static SDValue performSTORECombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG, const AArch64Subtarget *Subtarget)
static SDValue NarrowVector(SDValue V128Reg, SelectionDAG &DAG)
NarrowVector - Given a value in the V128 register class, produce the equivalent value in the V64 regi...
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode...
Definition: MCInstrInfo.h:45
Triple - Helper class for working with autoconf configuration names.
Definition: Triple.h:44
bool needsCustom() const
const MachinePointerInfo & getPointerInfo() const
int64_t getObjectOffset(int ObjectIdx) const
Return the assigned stack offset of the specified object from the incoming stack pointer.
unsigned getByValAlign() const
static bool findEXTRHalf(SDValue N, SDValue &Src, uint32_t &ShiftAmount, bool &FromHi)
An EXTR instruction is made up of two shifts, ORed together.
static SDValue performORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const AArch64Subtarget *Subtarget)
void setHasExtractBitsInsn(bool hasExtractInsn=true)
Tells the code generator that the target has BitExtract instructions.
static cl::opt< bool > EnableAArch64SlrGeneration("aarch64-shift-insert-generation", cl::Hidden, cl::desc("Allow AArch64 SLI/SRI formation"), cl::init(false))
static uint8_t encodeAdvSIMDModImmType2(uint64_t Imm)
ArrayRef< int > getMask() const
unsigned countTrailingZeros() const
Count the number of trailing zero bits.
Definition: APInt.cpp:749
Libcall getFPEXT(EVT OpVT, EVT RetVT)
getFPEXT - Return the FPEXT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none...
IntegerType * getIntPtrType(LLVMContext &C, unsigned AddressSpace=0) const
Returns an integer type with size at least as big as that of a pointer in the given address space...
Definition: DataLayout.cpp:694
SDValue getTargetConstantPool(const Constant *C, EVT VT, unsigned Align=0, int Offset=0, unsigned char TargetFlags=0)
Definition: SelectionDAG.h:484
SDValue getTargetJumpTable(int JTI, EVT VT, unsigned char TargetFlags=0)
Definition: SelectionDAG.h:478
bool hasExternalWeakLinkage() const
Definition: GlobalValue.h:281
TokenFactor - This node takes multiple tokens as input and produces a single token result...
Definition: ISDOpcodes.h:50
static int getFP64Imm(const APInt &Imm)
getFP64Imm - Return an 8-bit floating-point version of the 64-bit floating-point value.
static SDValue LowerBITCAST(SDValue Op, SelectionDAG &DAG)
bool isPosZero() const
Definition: APFloat.h:438
EXTRACT_SUBREG - This instruction takes two operands: a register that has subregisters, and a subregister index.
Definition: TargetOpcodes.h:41
SDValue getNOT(SDLoc DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
CCState - This class holds information needed while lowering arguments and return values...
bool MaskAndBranchFoldingIsLegal
MaskAndBranchFoldingIsLegal - Indicates if the target supports folding a mask of a single bit...
static unsigned getIntrinsicID(const SDNode *N)
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1192
EVT getOptimalMemOpType(uint64_t Size, unsigned DstAlign, unsigned SrcAlign, bool IsMemset, bool ZeroMemset, bool MemcpyStrSrc, MachineFunction &MF) const override
Returns the target specific optimal type for load and store operations as a result of memset...
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition: ISDOpcodes.h:273
uint64_t getTypeAllocSize(Type *Ty) const
Returns the offset in bytes between successive objects of the specified type, including alignment pad...
Definition: DataLayout.h:388
unsigned getVectorNumElements() const
Definition: Type.cpp:212
void setExceptionPointerRegister(unsigned R)
If set to a physical register, this sets the register that receives the exception address on entry to...
static StructType * get(LLVMContext &Context, ArrayRef< Type * > Elements, bool isPacked=false)
StructType::get - This static method is the primary way to create a literal StructType.
Definition: Type.cpp:404
bool isInvariant() const
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:179
MVT getScalarShiftAmountTy(const DataLayout &DL, EVT) const override
EVT is not used in-tree, but is used by out-of-tree target.
MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *MBB) const override
This method should be implemented by targets that mark instructions with the 'usesCustomInserter' fla...
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:57
SDValue getTargetInsertSubreg(int SRIdx, SDLoc DL, EVT VT, SDValue Operand, SDValue Subreg)
A convenience function for creating TargetInstrInfo::INSERT_SUBREG nodes.
AArch64TargetLowering(const TargetMachine &TM, const AArch64Subtarget &STI)
unsigned logBase2() const
Definition: APInt.h:1521
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1253
bool isPredecessorOf(const SDNode *N) const
Return true if this node is a predecessor of N.
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:861
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:222
Instruction * user_back()
user_back - Specialize the methods defined in Value, as we know that an instruction can only be used ...
Definition: Instruction.h:69
static bool memOpAlign(unsigned DstAlign, unsigned SrcAlign, unsigned AlignCheck)
Provides information about what library functions are available for the current target.
static bool isLegalArithImmed(uint64_t C)
CCValAssign - Represent assignment of one arg/retval to a location.
static unsigned getExtFactor(SDValue &V)
getExtFactor - Determine the adjustment factor for the position when generating an "extract from vect...
BRCOND - Conditional branch.
Definition: ISDOpcodes.h:548
An SDNode that represents everything that will be needed to construct a MachineInstr.
static bool isAllConstantBuildVector(const SDValue &PotentialBVec, uint64_t &ConstVal)
const SDValue & getChain() const
Byte Swap and Counting operators.
Definition: ISDOpcodes.h:335
ConstantInt * getInt32(uint32_t C)
Get a constant 32-bit value.
Definition: IRBuilder.h:266
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
BasicBlock * GetInsertBlock() const
Definition: IRBuilder.h:79
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
static SDValue tryCombineShiftImm(unsigned IID, SDNode *N, SelectionDAG &DAG)
static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS, SDValue RHS, SelectionDAG &DAG, SDLoc dl)
GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit the specified operations t...
static Constant * get(Type *Ty, uint64_t V, bool isSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:582
Helper structure to keep track of ISD::SET_CC operands.
MachineFrameInfo * getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
bool is64BitVector() const
is64BitVector - Return true if this is a 64-bit vector type.
Represents one node in the SelectionDAG.
bool isProfitableToHoist(Instruction *I) const override
Check if it is profitable to hoist instruction in then/else to if.
CondCode getSetCCInverse(CondCode Operation, bool isInteger)
getSetCCInverse - Return the operation corresponding to !(X op Y), where 'op' is a valid SetCC operat...
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
Definition: Instructions.h:289
static bool isSignExtended(SDNode *N, SelectionDAG &DAG)
static MachinePointerInfo getStack(int64_t Offset)
getStack - stack pointer relative access.
static cl::opt< AlignMode > Align(cl::desc("Load/store alignment support"), cl::Hidden, cl::init(NoStrictAlign), cl::values(clEnumValN(StrictAlign,"aarch64-strict-align","Disallow all unaligned memory accesses"), clEnumValN(NoStrictAlign,"aarch64-no-strict-align","Allow unaligned memory accesses"), clEnumValEnd))
static bool selectCCOpsAreFMaxCompatible(SDValue Cmp, SDValue Result)
A SELECT_CC operation is really some kind of max or min if both values being compared are...
bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI, unsigned Factor) const override
Lower an interleaved store into a stN intrinsic.
R Default(const T &Value) const
Definition: StringSwitch.h:111
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:123
static mvt_range integer_valuetypes()
void dump() const
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:576
static bool isZIPMask(ArrayRef< int > M, EVT VT, unsigned &WhichResult)
Value * CreateZExtOrBitCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1261
bool is128BitVector() const
is128BitVector - Return true if this is a 128-bit vector type.
CCAssignFn * CCAssignFnForCall(CallingConv::ID CC, bool IsVarArg) const
Selects the correct CCAssignFn for a given CallingConvention value.
Value * getArgOperand(unsigned i) const
getArgOperand/setArgOperand - Return/set the i-th call argument.
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
VectorType - Class to represent vector types.
Definition: DerivedTypes.h:362
bool shouldExpandAtomicStoreInIR(StoreInst *SI) const override
Returns true if the given (atomic) store should be expanded by the IR-level AtomicExpand pass into an...
const char * getTargetNodeName(unsigned Opcode) const override
This method returns the name of a target specific DAG node.
void setIndexedLoadAction(unsigned IdxMode, MVT VT, LegalizeAction Action)
Indicate that the specified indexed load does or does not work with the specified type and indicate w...
Class for arbitrary precision integers.
Definition: APInt.h:73
bool isUInt< 32 >(uint64_t x)
Definition: MathExtras.h:302
void setExceptionSelectorRegister(unsigned R)
If set to a physical register, this sets the register that receives the exception typeid on entry to ...
iterator_range< use_iterator > uses()
A "pseudo-class" with methods for operating on BUILD_VECTORs.
Select(COND, TRUEVAL, FALSEVAL).
Definition: ISDOpcodes.h:342
void setMinFunctionAlignment(unsigned Align)
Set the target's minimum function alignment (in log2(bytes))
int64_t getSExtValue() const
static void changeVectorFPCCToAArch64CC(ISD::CondCode CC, AArch64CC::CondCode &CondCode, AArch64CC::CondCode &CondCode2, bool &Invert)
changeVectorFPCCToAArch64CC - Convert a DAG fp condition code to an AArch64 CC usable with the vector...
op_iterator op_begin() const
bool isIntegerTy() const
isIntegerTy - True if this is an instance of IntegerType.
Definition: Type.h:193
static uint8_t encodeAdvSIMDModImmType10(uint64_t Imm)
static use_iterator use_end()
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:383
bool isPowerOf2_64(uint64_t Value)
isPowerOf2_64 - This function returns true if the argument is a power of two 0 (64 bit edition...
Definition: MathExtras.h:360
bool isMemLoc() const
void AnalyzeCallOperands(const SmallVectorImpl< ISD::OutputArg > &Outs, CCAssignFn Fn)
AnalyzeCallOperands - Analyze the outgoing arguments to a call, incorporating info about the passed v...
LLVM_ATTRIBUTE_UNUSED_RESULT std::enable_if< !is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:285
SDValue getNode(unsigned Opcode, SDLoc DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:386
static SDValue performNVCASTCombine(SDNode *N)
Get rid of unnecessary NVCASTs (that don't change the type).
static SDValue performMulCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const AArch64Subtarget *Subtarget)
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
Definition: MachineInstr.h:238
Value * CreateTruncOrBitCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1277
iterator insert(iterator I, T &&Elt)
Definition: SmallVector.h:481
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
Definition: ISDOpcodes.h:250
uint64_t RoundUpToAlignment(uint64_t Value, uint64_t Align)
Returns the next integer (mod 2**64) that is greater than or equal to Value and is a multiple of Alig...
Definition: MathExtras.h:609
virtual TargetLoweringBase::LegalizeTypeAction getPreferredVectorAction(EVT VT) const
Return the preferred vector type legalization action.
bool isTargetDarwin() const
static SDValue skipExtensionForVectorMULL(SDNode *N, SelectionDAG &DAG)
iterator_range< value_op_iterator > op_values() const
APInt And(const APInt &LHS, const APInt &RHS)
Bitwise AND function for APInt.
Definition: APInt.h:1890
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
Definition: CommandLine.h:497
static SDValue performPostLD1Combine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, bool IsLaneOp)
Target-specific DAG combine function for post-increment LD1 (lane) and post-increment LD1R...
static uint8_t encodeAdvSIMDModImmType5(uint64_t Imm)
static Constant * getSequentialMask(IRBuilder<> &Builder, unsigned Start, unsigned NumElts)
Get a mask consisting of sequential integers starting from Start.
Value * CreateShl(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:823
BR_JT - Jumptable branch.
Definition: ISDOpcodes.h:542
static mvt_range all_valuetypes()
SimpleValueType Iteration.
static MachinePointerInfo getGOT()
getGOT - Return a MachinePointerInfo record that refers to a GOT entry.
Representation of each machine instruction.
Definition: MachineInstr.h:51
VACOPY - VACOPY has 5 operands: an input chain, a destination pointer, a source pointer, a SRCVALUE for the destination, and a SRCVALUE for the source.
Definition: ISDOpcodes.h:603
static SDValue replaceSplatVectorStore(SelectionDAG &DAG, StoreSDNode *St)
Replace a splat of a scalar to a vector store by scalar stores of the scalar value.
static bool isAdvSIMDModImmType10(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType8(uint64_t Imm)
Libcall getFPTOUINT(EVT OpVT, EVT RetVT)
getFPTOUINT - Return the FPTOUINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none...
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
Definition: Module.cpp:372
SDValue getTargetBlockAddress(const BlockAddress *BA, EVT VT, int64_t Offset=0, unsigned char TargetFlags=0)
Definition: SelectionDAG.h:516
uint64_t getConstantOperandVal(unsigned i) const
bool shouldExpandAtomicLoadInIR(LoadInst *LI) const override
Returns true if the given (atomic) load should be expanded by the IR-level AtomicExpand pass into a l...
bool isOSBinFormatELF() const
Tests whether the OS uses the ELF binary format.
Definition: Triple.h:479
static SDValue LowerXALUO(SDValue Op, SelectionDAG &DAG)
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:321
pointer data()
Return a pointer to the vector's buffer, even if empty().
Definition: SmallVector.h:134
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.h:217
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
virtual void LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const
Lower the specified operand into the Ops vector.
FastISel * createFastISel(FunctionLoweringInfo &funcInfo, const TargetLibraryInfo *libInfo) const override
createFastISel - This method returns a target specific FastISel object, or null if the target does no...
bool equals_lower(StringRef RHS) const
equals_lower - Check for string equality, ignoring case.
Definition: StringRef.h:142
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition: ISDOpcodes.h:196
static uint8_t encodeAdvSIMDModImmType4(uint64_t Imm)
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition: ISDOpcodes.h:401
static uint8_t encodeAdvSIMDModImmType11(uint64_t Imm)
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:518
unsigned getSizeInBits() const
getSizeInBits - Return the size of the specified value type in bits.
Definition: ValueTypes.h:233
void ReplaceAllUsesWith(SDValue From, SDValue Op)
Modify anything using 'From' to use 'To' instead.
#define I(x, y, z)
Definition: MD5.cpp:54
static bool isAddSubSExt(SDNode *N, SelectionDAG &DAG)
#define N
ArrayRef< SDUse > ops() const
void AnalyzeReturn(const SmallVectorImpl< ISD::OutputArg > &Outs, CCAssignFn Fn)
AnalyzeReturn - Analyze the returned values of a return, incorporating info about the result values i...
FunctionType * getFunctionType() const
Definition: Function.cpp:227
Fast - This calling convention attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:42
FunctionLoweringInfo - This contains information that is global to a function that is used when lower...
APFloat abs(APFloat X)
Returns the absolute value of the argument.
Definition: APFloat.h:651
MO_PAGEOFF - A symbol operand with this flag represents the offset of that symbol within a 4K page...
unsigned MaxStoresPerMemmoveOptSize
Maximum number of store instructions that may be substituted for a call to memmove, used for functions with OpSize attribute.
unsigned MaxStoresPerMemcpyOptSize
Maximum number of store operations that may be substituted for a call to memcpy, used for functions w...
Value * emitLoadLinked(IRBuilder<> &Builder, Value *Addr, AtomicOrdering Ord) const override
Perform a load-linked operation on Addr, returning a "Value *" with the corresponding pointee type...
void setStackPointerRegisterToSaveRestore(unsigned R)
If set to a physical register, this specifies the register that llvm.savestack/llvm.restorestack should save and restore.
bool isTailCall() const
bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, unsigned Intrinsic) const override
getTgtMemIntrinsic - Represent NEON load and store intrinsics as MemIntrinsicNodes.
op_iterator op_end() const
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
static SDValue performIntrinsicCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const AArch64Subtarget *Subtarget)
VectorType * getType() const
getType - Overload to return most specific vector type.
MachineSDNode * getMachineNode(unsigned Opcode, SDLoc dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s), MachineInstr opcode, and operands.
Same for multiplication.
Definition: ISDOpcodes.h:234
static SDValue emitComparison(SDValue LHS, SDValue RHS, ISD::CondCode CC, SDLoc dl, SelectionDAG &DAG)
FSINCOS - Compute both fsin and fcos as a single operation.
Definition: ISDOpcodes.h:512
bool useLoadStackGuardNode() const override
If this function returns true, SelectionDAGBuilder emits a LOAD_STACK_GUARD node when it is lowering ...
unsigned MaxStoresPerMemcpy
Specify maximum bytes of store instructions per memcpy call.
bool isAtLeastRelease(AtomicOrdering Ord)
Returns true if the ordering is at least as strong as release (i.e.
Definition: Instructions.h:64
AArch64CC::CondCode CC
int CreateStackObject(uint64_t Size, unsigned Alignment, bool isSS, const AllocaInst *Alloca=nullptr)
Create a new statically sized stack object, returning a nonnegative identifier to represent it...
EVT getValueType() const
Return the ValueType of the referenced return value.
void setBytesInStackArgArea(unsigned bytes)
Libcall getSINTTOFP(EVT OpVT, EVT RetVT)
getSINTTOFP - Return the SINTTOFP_*_* value for the given types, or UNKNOWN_LIBCALL if there is none...
SDValue getConstant(uint64_t Val, SDLoc DL, EVT VT, bool isTarget=false, bool isOpaque=false)
bool is128BitVector() const
is128BitVector - Return true if this is a 128-bit vector type.
Definition: ValueTypes.h:135
TLSModel::Model getTLSModel(const GlobalValue *GV) const
Returns the TLS model which should be used for the given global variable.
bool isLegalAddImmediate(int64_t) const override
Return true if the specified immediate is legal add immediate, that is the target has add instruction...
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition: ISDOpcodes.h:279
SDValue getSelect(SDLoc DL, EVT VT, SDValue Cond, SDValue LHS, SDValue RHS)
Helper function to make it easier to build Select's if you just have operands and don't want to check...
Definition: SelectionDAG.h:739
unsigned getReg() const
getReg - Returns the register number.
bool isFloatingPoint() const
isFloatingPoint - Return true if this is a FP, or a vector FP type.
Definition: ValueTypes.h:105
void insert(iterator MBBI, MachineBasicBlock *MBB)
SDValue getFrameIndex(int FI, EVT VT, bool isTarget=false)
static AArch64CC::CondCode changeIntCCToAArch64CC(ISD::CondCode CC)
changeIntCCToAArch64CC - Convert a DAG integer condition code to an AArch64 CC
static bool isSetCC(SDValue Op, SetCCInfoAndKind &SetCCInfo)
Check whether or not Op is a SET_CC operation, either a generic or an AArch64 lowered one...
void setReturnAddressIsTaken(bool s)
bool isSimple() const
isSimple - Test if the given EVT is simple (as opposed to being extended).
Definition: ValueTypes.h:94
unsigned getAlignment() const
MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
void setSchedulingPreference(Sched::Preference Pref)
Specify the target scheduling preference.
SDValue getTargetExtractSubreg(int SRIdx, SDLoc DL, EVT VT, SDValue Operand)
A convenience function for creating TargetInstrInfo::EXTRACT_SUBREG nodes.
unsigned getPrimitiveSizeInBits() const LLVM_READONLY
getPrimitiveSizeInBits - Return the basic size of this type if it is a primitive type.
Definition: Type.cpp:121
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:365
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1189
LLVM Value Representation.
Definition: Value.h:69
FMA - Perform a * b + c with no intermediate rounding step.
Definition: ISDOpcodes.h:240
SDValue getRegister(unsigned Reg, EVT VT)
unsigned getOpcode() const
getOpcode() returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:112
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned char TargetFlags=0) const
const AArch64InstrInfo * getInstrInfo() const override
static void ReplaceBITCASTResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG)
static VectorType * get(Type *ElementType, unsigned NumElements)
VectorType::get - This static method is the primary way to construct an VectorType.
Definition: Type.cpp:713
SDValue getValueType(EVT)
Disable implicit floating point insts.
Definition: Attributes.h:87
Value * emitStoreConditional(IRBuilder<> &Builder, Value *Val, Value *Addr, AtomicOrdering Ord) const override
Perform a store-conditional operation to Addr.
static bool getVShiftImm(SDValue Op, unsigned ElementBits, int64_t &Cnt)
getVShiftImm - Check if this is a valid build_vector for the immediate operand of a vector shift oper...
PREFETCH - This corresponds to a prefetch intrinsic.
Definition: ISDOpcodes.h:653
static bool isExtendedBUILD_VECTOR(SDNode *N, SelectionDAG &DAG, bool isSigned)
bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override
Return true if folding a constant offset with the given GlobalAddress is legal.
uint64_t getTypeSizeInBits(Type *Ty) const
Size examples:
Definition: DataLayout.h:507
unsigned getMaxSupportedInterleaveFactor() const override
Get the maximum supported factor for interleaved memory accesses.
BasicBlockListType::iterator iterator
const TargetLowering & getTargetLoweringInfo() const
Definition: SelectionDAG.h:287
void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.Val alone...
#define DEBUG(X)
Definition: Debug.h:92
Primary interface to the complete machine description for the target machine.
C - The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:40
MO_NC - Indicates whether the linker is expected to check the symbol reference for overflow...
SetCC operator - This evaluates to a true value iff the condition is true.
Definition: ISDOpcodes.h:365
unsigned MaxStoresPerMemset
Specify maximum number of store instructions per memset call.
static uint8_t encodeAdvSIMDModImmType9(uint64_t Imm)
unsigned MaxStoresPerMemsetOptSize
Maximum number of stores operations that may be substituted for the call to memset, used for functions with OptSize attribute.
static bool isConcatMask(ArrayRef< int > Mask, EVT VT, bool SplitLHS)
static bool isAdvSIMDModImmType11(uint64_t Imm)
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml","ocaml 3.10-compatible collector")
StringRef slice(size_t Start, size_t End) const
Return a reference to the substring from [Start, End).
Definition: StringRef.h:434
SDValue getSetCC(SDLoc DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond)
Helper function to make it easier to build SetCC's if you just have an ISD::CondCode instead of an SD...
Definition: SelectionDAG.h:726
bool operator==(uint64_t V1, const APInt &V2)
Definition: APInt.h:1734
MERGE_VALUES - This node takes multiple discrete operands and returns them all as its individual resu...
Definition: ISDOpcodes.h:188
unsigned getLocMemOffset() const
MVT getVectorElementType() const
static bool isVolatile(Instruction *Inst)
Conversion operators.
Definition: ISDOpcodes.h:380
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
LowerOperation - Provide custom lowering hooks for some operations.
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
Definition: SelectionDAG.h:338
unsigned getNumUses() const
This method computes the number of uses of this Value.
Definition: Value.cpp:134
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:389
bool isUIntN(unsigned N, uint64_t x)
isUIntN - Checks if an unsigned integer fits into the given (dynamic) bit width.
Definition: MathExtras.h:315
unsigned getAlignment() const
virtual ConstraintWeight getSingleConstraintMatchWeight(AsmOperandInfo &info, const char *constraint) const
Examine constraint string and operand type and determine a weight value.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation...
const MachineInstrBuilder & addReg(unsigned RegNo, unsigned flags=0, unsigned SubReg=0) const
addReg - Add a new virtual register operand...
MVT getSimpleValueType(unsigned ResNo) const
Return the type of a specified result as a simple type.
bool isShuffleMaskLegal(const SmallVectorImpl< int > &M, EVT VT) const override
isShuffleMaskLegal - Return true if the given shuffle mask can be codegen'd directly, or if it should be stack expanded.
bool is64BitVector() const
is64BitVector - Return true if this is a 64-bit vector type.
Definition: ValueTypes.h:130
FNEG, FABS, FSQRT, FSIN, FCOS, FPOWI, FPOW, FLOG, FLOG2, FLOG10, FEXP, FEXP2, FCEIL, FTRUNC, FRINT, FNEARBYINT, FROUND, FFLOOR - Perform various unary floating point operations.
Definition: ISDOpcodes.h:506
static uint8_t encodeAdvSIMDModImmType7(uint64_t Imm)
Value * getPointerOperand()
Definition: Instructions.h:409
void addSuccessor(MachineBasicBlock *succ, uint32_t weight=0)
addSuccessor - Add succ as a successor of this MachineBasicBlock.
unsigned Log2_64(uint64_t Value)
Log2_64 - This function returns the floor log base 2 of the specified value, -1 if the value is zero...
Definition: MathExtras.h:474
int64_t getObjectSize(int ObjectIdx) const
Return the size of the specified object.
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition: ISDOpcodes.h:309
static bool isSplatMask(const int *Mask, EVT VT)
static bool resolveBuildVector(BuildVectorSDNode *BVN, APInt &CnstBits, APInt &UndefBits)
EVT changeVectorElementTypeToInteger() const
changeVectorElementTypeToInteger - Return a vector with the same number of elements as this vector...
Definition: ValueTypes.h:80
bool PredictableSelectIsExpensive
Tells the code generator that select is more expensive than a branch if the branch is usually predict...
MVT getSimpleVT() const
getSimpleVT - Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:203
SDValue getIntPtrConstant(uint64_t Val, SDLoc DL, bool isTarget=false)
SDValue getMemIntrinsicNode(unsigned Opcode, SDLoc dl, SDVTList VTList, ArrayRef< SDValue > Ops, EVT MemVT, MachinePointerInfo PtrInfo, unsigned Align=0, bool Vol=false, bool ReadMem=true, bool WriteMem=true, unsigned Size=0)
Creates a MemIntrinsicNode that may produce a result and takes a list of operands.
bool isMachineOpcode() const
Test if this node has a post-isel opcode, directly corresponding to a MachineInstr opcode...
LLVMContext & getContext() const
Get the global data context.
Definition: Module.h:265
unsigned getMachineOpcode() const
This may only be called if isMachineOpcode returns true.
bool isFMAFasterThanFMulAndFAdd(EVT VT) const override
isFMAFasterThanFMulAndFAdd - Return true if an FMA operation is faster than a pair of fmul and fadd i...
static SDValue tryLowerToSLI(SDNode *N, SelectionDAG &DAG)
void setIndexedStoreAction(unsigned IdxMode, MVT VT, LegalizeAction Action)
Indicate that the specified indexed store does or does not work with the specified type and indicate ...
gep_type_iterator gep_type_begin(const User *GEP)
uint64_t getZExtValue() const
MemIndexedMode
MemIndexedMode enum - This enum defines the load / store indexed addressing modes.
Definition: ISDOpcodes.h:761
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition: ISDOpcodes.h:314
unsigned getVectorNumElements() const
getVectorNumElements - Given a vector type, return the number of elements it contains.
Definition: ValueTypes.h:225
void softenSetCCOperands(SelectionDAG &DAG, EVT VT, SDValue &NewLHS, SDValue &NewRHS, ISD::CondCode &CCCode, SDLoc DL) const
SoftenSetCCOperands - Soften the operands of a comparison.
This class is used to represent ISD::LOAD nodes.
DYNAMIC_STACKALLOC - Allocate some number of bytes on the stack aligned to a specified boundary...
Definition: ISDOpcodes.h:527
Function must be optimized for size first.
Definition: Attributes.h:80