LLVM  4.0.0
AArch64ISelLowering.cpp
Go to the documentation of this file.
1 //===-- AArch64ISelLowering.cpp - AArch64 DAG Lowering Implementation ----===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file implements the AArch64TargetLowering class.
11 //
12 //===----------------------------------------------------------------------===//
13 
16 #include "AArch64ISelLowering.h"
17 #include "AArch64PerfectShuffle.h"
18 #include "AArch64RegisterInfo.h"
19 #include "AArch64Subtarget.h"
21 #include "Utils/AArch64BaseInfo.h"
22 #include "llvm/ADT/APFloat.h"
23 #include "llvm/ADT/APInt.h"
24 #include "llvm/ADT/ArrayRef.h"
25 #include "llvm/ADT/SmallVector.h"
26 #include "llvm/ADT/Statistic.h"
27 #include "llvm/ADT/STLExtras.h"
28 #include "llvm/ADT/StringRef.h"
29 #include "llvm/ADT/StringSwitch.h"
30 #include "llvm/ADT/Triple.h"
31 #include "llvm/ADT/Twine.h"
45 #include "llvm/IR/Attributes.h"
46 #include "llvm/IR/Constants.h"
47 #include "llvm/IR/DataLayout.h"
48 #include "llvm/IR/DebugLoc.h"
49 #include "llvm/IR/DerivedTypes.h"
50 #include "llvm/IR/Function.h"
52 #include "llvm/IR/GlobalValue.h"
53 #include "llvm/IR/Instruction.h"
54 #include "llvm/IR/Instructions.h"
55 #include "llvm/IR/Intrinsics.h"
56 #include "llvm/IR/IRBuilder.h"
57 #include "llvm/IR/Module.h"
58 #include "llvm/IR/OperandTraits.h"
59 #include "llvm/IR/Type.h"
60 #include "llvm/IR/Use.h"
61 #include "llvm/IR/Value.h"
62 #include "llvm/MC/MCRegisterInfo.h"
63 #include "llvm/Support/Casting.h"
64 #include "llvm/Support/CodeGen.h"
66 #include "llvm/Support/Compiler.h"
67 #include "llvm/Support/Debug.h"
75 #include <algorithm>
76 #include <bitset>
77 #include <cassert>
78 #include <cctype>
79 #include <cstdint>
80 #include <cstdlib>
81 #include <iterator>
82 #include <limits>
83 #include <tuple>
84 #include <utility>
85 #include <vector>
86 
87 using namespace llvm;
88 
89 #define DEBUG_TYPE "aarch64-lower"
90 
91 STATISTIC(NumTailCalls, "Number of tail calls");
92 STATISTIC(NumShiftInserts, "Number of vector shift inserts");
93 
94 static cl::opt<bool>
95 EnableAArch64SlrGeneration("aarch64-shift-insert-generation", cl::Hidden,
96  cl::desc("Allow AArch64 SLI/SRI formation"),
97  cl::init(false));
98 
99 // FIXME: The necessary dtprel relocations don't seem to be supported
100 // well in the GNU bfd and gold linkers at the moment. Therefore, by
101 // default, for now, fall back to GeneralDynamic code generation.
103  "aarch64-elf-ldtls-generation", cl::Hidden,
104  cl::desc("Allow AArch64 Local Dynamic TLS code generation"),
105  cl::init(false));
106 
107 /// Value type used for condition codes.
108 static const MVT MVT_CC = MVT::i32;
109 
111  const AArch64Subtarget &STI)
112  : TargetLowering(TM), Subtarget(&STI) {
113  // AArch64 doesn't have comparisons which set GPRs or setcc instructions, so
114  // we have to make something up. Arbitrarily, choose ZeroOrOne.
116  // When comparing vectors the result sets the different elements in the
117  // vector to all-one or all-zero.
119 
120  // Set up the register classes.
121  addRegisterClass(MVT::i32, &AArch64::GPR32allRegClass);
122  addRegisterClass(MVT::i64, &AArch64::GPR64allRegClass);
123 
124  if (Subtarget->hasFPARMv8()) {
125  addRegisterClass(MVT::f16, &AArch64::FPR16RegClass);
126  addRegisterClass(MVT::f32, &AArch64::FPR32RegClass);
127  addRegisterClass(MVT::f64, &AArch64::FPR64RegClass);
128  addRegisterClass(MVT::f128, &AArch64::FPR128RegClass);
129  }
130 
131  if (Subtarget->hasNEON()) {
132  addRegisterClass(MVT::v16i8, &AArch64::FPR8RegClass);
133  addRegisterClass(MVT::v8i16, &AArch64::FPR16RegClass);
134  // Someone set us up the NEON.
135  addDRTypeForNEON(MVT::v2f32);
136  addDRTypeForNEON(MVT::v8i8);
137  addDRTypeForNEON(MVT::v4i16);
138  addDRTypeForNEON(MVT::v2i32);
139  addDRTypeForNEON(MVT::v1i64);
140  addDRTypeForNEON(MVT::v1f64);
141  addDRTypeForNEON(MVT::v4f16);
142 
143  addQRTypeForNEON(MVT::v4f32);
144  addQRTypeForNEON(MVT::v2f64);
145  addQRTypeForNEON(MVT::v16i8);
146  addQRTypeForNEON(MVT::v8i16);
147  addQRTypeForNEON(MVT::v4i32);
148  addQRTypeForNEON(MVT::v2i64);
149  addQRTypeForNEON(MVT::v8f16);
150  }
151 
152  // Compute derived properties from the register classes
154 
155  // Provide all sorts of operation actions
179 
183 
187 
188  // Custom lowering hooks are needed for XOR
189  // to fold it into CSINC/CSINV.
192 
193  // Virtually no operation on f128 is legal, but LLVM can't expand them when
194  // there's a valid register class, so we need custom operations in most cases.
216 
217  // Lowering for many of the conversions is actually specified by the non-f128
218  // type. The LowerXXX function will be trivial when f128 isn't involved.
233 
234  // Variable arguments.
239 
240  // Variable-sized objects.
244 
245  // Constant pool entries
247 
248  // BlockAddress
250 
251  // Add/Sub overflow ops with MVT::Glues are lowered to NZCV dependences.
260 
261  // AArch64 lacks both left-rotate and popcount instructions.
264  for (MVT VT : MVT::vector_valuetypes()) {
267  }
268 
269  // AArch64 doesn't have {U|S}MUL_LOHI.
272 
275 
278  for (MVT VT : MVT::vector_valuetypes()) {
281  }
288 
289  // Custom lower Add/Sub/Mul with overflow.
302 
311 
312  // f16 is a storage-only type, always promote it to f32.
347 
348  // v4f16 is also a storage-only type, so promote it to v4f32 when that is
349  // known to be safe.
362 
363  // Expand all other v4f16 operations.
364  // FIXME: We could generate better code by promoting some operations to
365  // a pair of v4f32s
392 
393 
394  // v8f16 is also a storage-only type, so expand it.
426 
427  // AArch64 has implementations of a lot of rounding-like FP operations.
428  for (MVT Ty : {MVT::f32, MVT::f64}) {
439  }
440 
442 
444 
445  // Lower READCYCLECOUNTER using an mrs from PMCCNTR_EL0.
446  // This requires the Performance Monitors extension.
447  if (Subtarget->hasPerfMon())
449 
450  if (Subtarget->isTargetMachO()) {
451  // For iOS, we don't want to the normal expansion of a libcall to
452  // sincos. We want to issue a libcall to __sincos_stret to avoid memory
453  // traffic.
456  } else {
459  }
460 
461  // Make floating-point constants legal for the large code model, so they don't
462  // become loads from the constant pool.
463  if (Subtarget->isTargetMachO() && TM.getCodeModel() == CodeModel::Large) {
466  }
467 
468  // AArch64 does not have floating-point extending loads, i1 sign-extending
469  // load, floating-point truncating stores, or v2i32->v2i16 truncating store.
470  for (MVT VT : MVT::fp_valuetypes()) {
475  }
476  for (MVT VT : MVT::integer_valuetypes())
478 
486 
489 
490  // Indexed loads and stores are supported.
491  for (unsigned im = (unsigned)ISD::PRE_INC;
507  }
508 
509  // Trap.
511 
512  // We combine OR nodes for bitfield operations.
514 
515  // Vector add and sub nodes may conceal a high-half opportunity.
516  // Also, try to fold ADD into CSINC/CSINV..
523 
527 
529 
536  if (Subtarget->supportsAddressTopByteIgnored())
538 
540 
543 
548 
552 
554 
556 
557  // Enable TBZ/TBNZ
559  EnableExtLdPromotion = true;
560 
561  // Set required alignment.
563  // Set preferred alignments.
566 
567  // Only change the limit for entries in a jump table if specified by
568  // the subtarget, but not at the command line.
569  unsigned MaxJT = STI.getMaximumJumpTableSize();
570  if (MaxJT && getMaximumJumpTableSize() == 0)
572 
573  setHasExtractBitsInsn(true);
574 
576 
577  if (Subtarget->hasNEON()) {
578  // FIXME: v1f64 shouldn't be legal if we can avoid it, because it leads to
579  // silliness like this:
605 
611 
613 
614  // AArch64 doesn't have a direct vector ->f32 conversion instructions for
615  // elements smaller than i32, so promote the input to i32 first.
620  // i8 and i16 vector elements also need promotion to i32 for v8i8 or v8i16
621  // -> v8f16 conversions.
626  // Similarly, there is no direct i32 -> f64 vector conversion instruction.
631  // Or, direct i32 -> f16 vector conversion. Set it so custom, so the
632  // conversion happens in two steps: v4i32 -> v4f32 -> v4f16
635 
638 
647 
648  // AArch64 doesn't have MUL.2d:
650  // Custom handling for some quad-vector types to detect MULL.
654 
657  // Likewise, narrowing and extending vector loads/stores aren't handled
658  // directly.
659  for (MVT VT : MVT::vector_valuetypes()) {
661 
666 
668 
669  for (MVT InnerVT : MVT::vector_valuetypes()) {
670  setTruncStoreAction(VT, InnerVT, Expand);
671  setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand);
672  setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand);
673  setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
674  }
675  }
676 
677  // AArch64 has implementations of a lot of rounding-like FP operations.
678  for (MVT Ty : {MVT::v2f32, MVT::v4f32, MVT::v2f64}) {
685  }
686  }
687 
689 }
690 
691 void AArch64TargetLowering::addTypeForNEON(MVT VT, MVT PromotedBitwiseVT) {
692  if (VT == MVT::v2f32 || VT == MVT::v4f16) {
695 
698  } else if (VT == MVT::v2f64 || VT == MVT::v4f32 || VT == MVT::v8f16) {
701 
704  }
705 
706  // Mark vector float intrinsics as expand.
707  if (VT == MVT::v2f32 || VT == MVT::v4f32 || VT == MVT::v2f64) {
717 
718  // But we do support custom-lowering for FCOPYSIGN.
720  }
721 
734 
738  for (MVT InnerVT : MVT::all_valuetypes())
739  setLoadExtAction(ISD::EXTLOAD, InnerVT, VT, Expand);
740 
741  // CNT supports only B element sizes.
742  if (VT != MVT::v8i8 && VT != MVT::v16i8)
744 
750 
753 
754  // [SU][MIN|MAX] are available for all NEON types apart from i64.
755  if (!VT.isFloatingPoint() && VT != MVT::v2i64 && VT != MVT::v1i64)
756  for (unsigned Opcode : {ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX})
757  setOperationAction(Opcode, VT, Legal);
758 
759  // F[MIN|MAX][NUM|NAN] are available for all FP NEON types (not f16 though!).
760  if (VT.isFloatingPoint() && VT.getVectorElementType() != MVT::f16)
761  for (unsigned Opcode : {ISD::FMINNAN, ISD::FMAXNAN,
763  setOperationAction(Opcode, VT, Legal);
764 
765  if (Subtarget->isLittleEndian()) {
766  for (unsigned im = (unsigned)ISD::PRE_INC;
770  }
771  }
772 }
773 
774 void AArch64TargetLowering::addDRTypeForNEON(MVT VT) {
775  addRegisterClass(VT, &AArch64::FPR64RegClass);
776  addTypeForNEON(VT, MVT::v2i32);
777 }
778 
779 void AArch64TargetLowering::addQRTypeForNEON(MVT VT) {
780  addRegisterClass(VT, &AArch64::FPR128RegClass);
781  addTypeForNEON(VT, MVT::v4i32);
782 }
783 
785  EVT VT) const {
786  if (!VT.isVector())
787  return MVT::i32;
789 }
790 
791 /// computeKnownBitsForTargetNode - Determine which of the bits specified in
792 /// Mask are known to be either zero or one and return them in the
793 /// KnownZero/KnownOne bitsets.
795  const SDValue Op, APInt &KnownZero, APInt &KnownOne,
796  const SelectionDAG &DAG, unsigned Depth) const {
797  switch (Op.getOpcode()) {
798  default:
799  break;
800  case AArch64ISD::CSEL: {
801  APInt KnownZero2, KnownOne2;
802  DAG.computeKnownBits(Op->getOperand(0), KnownZero, KnownOne, Depth + 1);
803  DAG.computeKnownBits(Op->getOperand(1), KnownZero2, KnownOne2, Depth + 1);
804  KnownZero &= KnownZero2;
805  KnownOne &= KnownOne2;
806  break;
807  }
808  case ISD::INTRINSIC_W_CHAIN: {
809  ConstantSDNode *CN = cast<ConstantSDNode>(Op->getOperand(1));
810  Intrinsic::ID IntID = static_cast<Intrinsic::ID>(CN->getZExtValue());
811  switch (IntID) {
812  default: return;
813  case Intrinsic::aarch64_ldaxr:
814  case Intrinsic::aarch64_ldxr: {
815  unsigned BitWidth = KnownOne.getBitWidth();
816  EVT VT = cast<MemIntrinsicSDNode>(Op)->getMemoryVT();
817  unsigned MemBits = VT.getScalarSizeInBits();
818  KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - MemBits);
819  return;
820  }
821  }
822  break;
823  }
825  case ISD::INTRINSIC_VOID: {
826  unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
827  switch (IntNo) {
828  default:
829  break;
830  case Intrinsic::aarch64_neon_umaxv:
831  case Intrinsic::aarch64_neon_uminv: {
832  // Figure out the datatype of the vector operand. The UMINV instruction
833  // will zero extend the result, so we can mark as known zero all the
834  // bits larger than the element datatype. 32-bit or larget doesn't need
835  // this as those are legal types and will be handled by isel directly.
836  MVT VT = Op.getOperand(1).getValueType().getSimpleVT();
837  unsigned BitWidth = KnownZero.getBitWidth();
838  if (VT == MVT::v8i8 || VT == MVT::v16i8) {
839  assert(BitWidth >= 8 && "Unexpected width!");
840  APInt Mask = APInt::getHighBitsSet(BitWidth, BitWidth - 8);
841  KnownZero |= Mask;
842  } else if (VT == MVT::v4i16 || VT == MVT::v8i16) {
843  assert(BitWidth >= 16 && "Unexpected width!");
844  APInt Mask = APInt::getHighBitsSet(BitWidth, BitWidth - 16);
845  KnownZero |= Mask;
846  }
847  break;
848  } break;
849  }
850  }
851  }
852 }
853 
855  EVT) const {
856  return MVT::i64;
857 }
858 
860  unsigned AddrSpace,
861  unsigned Align,
862  bool *Fast) const {
863  if (Subtarget->requiresStrictAlign())
864  return false;
865 
866  if (Fast) {
867  // Some CPUs are fine with unaligned stores except for 128-bit ones.
868  *Fast = !Subtarget->isMisaligned128StoreSlow() || VT.getStoreSize() != 16 ||
869  // See comments in performSTORECombine() for more details about
870  // these conditions.
871 
872  // Code that uses clang vector extensions can mark that it
873  // wants unaligned accesses to be treated as fast by
874  // underspecifying alignment to be 1 or 2.
875  Align <= 2 ||
876 
877  // Disregard v2i64. Memcpy lowering produces those and splitting
878  // them regresses performance on micro-benchmarks and olden/bh.
879  VT == MVT::v2i64;
880  }
881  return true;
882 }
883 
884 FastISel *
886  const TargetLibraryInfo *libInfo) const {
887  return AArch64::createFastISel(funcInfo, libInfo);
888 }
889 
890 const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
891  switch ((AArch64ISD::NodeType)Opcode) {
892  case AArch64ISD::FIRST_NUMBER: break;
893  case AArch64ISD::CALL: return "AArch64ISD::CALL";
894  case AArch64ISD::ADRP: return "AArch64ISD::ADRP";
895  case AArch64ISD::ADDlow: return "AArch64ISD::ADDlow";
896  case AArch64ISD::LOADgot: return "AArch64ISD::LOADgot";
897  case AArch64ISD::RET_FLAG: return "AArch64ISD::RET_FLAG";
898  case AArch64ISD::BRCOND: return "AArch64ISD::BRCOND";
899  case AArch64ISD::CSEL: return "AArch64ISD::CSEL";
900  case AArch64ISD::FCSEL: return "AArch64ISD::FCSEL";
901  case AArch64ISD::CSINV: return "AArch64ISD::CSINV";
902  case AArch64ISD::CSNEG: return "AArch64ISD::CSNEG";
903  case AArch64ISD::CSINC: return "AArch64ISD::CSINC";
904  case AArch64ISD::THREAD_POINTER: return "AArch64ISD::THREAD_POINTER";
905  case AArch64ISD::TLSDESC_CALLSEQ: return "AArch64ISD::TLSDESC_CALLSEQ";
906  case AArch64ISD::ADC: return "AArch64ISD::ADC";
907  case AArch64ISD::SBC: return "AArch64ISD::SBC";
908  case AArch64ISD::ADDS: return "AArch64ISD::ADDS";
909  case AArch64ISD::SUBS: return "AArch64ISD::SUBS";
910  case AArch64ISD::ADCS: return "AArch64ISD::ADCS";
911  case AArch64ISD::SBCS: return "AArch64ISD::SBCS";
912  case AArch64ISD::ANDS: return "AArch64ISD::ANDS";
913  case AArch64ISD::CCMP: return "AArch64ISD::CCMP";
914  case AArch64ISD::CCMN: return "AArch64ISD::CCMN";
915  case AArch64ISD::FCCMP: return "AArch64ISD::FCCMP";
916  case AArch64ISD::FCMP: return "AArch64ISD::FCMP";
917  case AArch64ISD::DUP: return "AArch64ISD::DUP";
918  case AArch64ISD::DUPLANE8: return "AArch64ISD::DUPLANE8";
919  case AArch64ISD::DUPLANE16: return "AArch64ISD::DUPLANE16";
920  case AArch64ISD::DUPLANE32: return "AArch64ISD::DUPLANE32";
921  case AArch64ISD::DUPLANE64: return "AArch64ISD::DUPLANE64";
922  case AArch64ISD::MOVI: return "AArch64ISD::MOVI";
923  case AArch64ISD::MOVIshift: return "AArch64ISD::MOVIshift";
924  case AArch64ISD::MOVIedit: return "AArch64ISD::MOVIedit";
925  case AArch64ISD::MOVImsl: return "AArch64ISD::MOVImsl";
926  case AArch64ISD::FMOV: return "AArch64ISD::FMOV";
927  case AArch64ISD::MVNIshift: return "AArch64ISD::MVNIshift";
928  case AArch64ISD::MVNImsl: return "AArch64ISD::MVNImsl";
929  case AArch64ISD::BICi: return "AArch64ISD::BICi";
930  case AArch64ISD::ORRi: return "AArch64ISD::ORRi";
931  case AArch64ISD::BSL: return "AArch64ISD::BSL";
932  case AArch64ISD::NEG: return "AArch64ISD::NEG";
933  case AArch64ISD::EXTR: return "AArch64ISD::EXTR";
934  case AArch64ISD::ZIP1: return "AArch64ISD::ZIP1";
935  case AArch64ISD::ZIP2: return "AArch64ISD::ZIP2";
936  case AArch64ISD::UZP1: return "AArch64ISD::UZP1";
937  case AArch64ISD::UZP2: return "AArch64ISD::UZP2";
938  case AArch64ISD::TRN1: return "AArch64ISD::TRN1";
939  case AArch64ISD::TRN2: return "AArch64ISD::TRN2";
940  case AArch64ISD::REV16: return "AArch64ISD::REV16";
941  case AArch64ISD::REV32: return "AArch64ISD::REV32";
942  case AArch64ISD::REV64: return "AArch64ISD::REV64";
943  case AArch64ISD::EXT: return "AArch64ISD::EXT";
944  case AArch64ISD::VSHL: return "AArch64ISD::VSHL";
945  case AArch64ISD::VLSHR: return "AArch64ISD::VLSHR";
946  case AArch64ISD::VASHR: return "AArch64ISD::VASHR";
947  case AArch64ISD::CMEQ: return "AArch64ISD::CMEQ";
948  case AArch64ISD::CMGE: return "AArch64ISD::CMGE";
949  case AArch64ISD::CMGT: return "AArch64ISD::CMGT";
950  case AArch64ISD::CMHI: return "AArch64ISD::CMHI";
951  case AArch64ISD::CMHS: return "AArch64ISD::CMHS";
952  case AArch64ISD::FCMEQ: return "AArch64ISD::FCMEQ";
953  case AArch64ISD::FCMGE: return "AArch64ISD::FCMGE";
954  case AArch64ISD::FCMGT: return "AArch64ISD::FCMGT";
955  case AArch64ISD::CMEQz: return "AArch64ISD::CMEQz";
956  case AArch64ISD::CMGEz: return "AArch64ISD::CMGEz";
957  case AArch64ISD::CMGTz: return "AArch64ISD::CMGTz";
958  case AArch64ISD::CMLEz: return "AArch64ISD::CMLEz";
959  case AArch64ISD::CMLTz: return "AArch64ISD::CMLTz";
960  case AArch64ISD::FCMEQz: return "AArch64ISD::FCMEQz";
961  case AArch64ISD::FCMGEz: return "AArch64ISD::FCMGEz";
962  case AArch64ISD::FCMGTz: return "AArch64ISD::FCMGTz";
963  case AArch64ISD::FCMLEz: return "AArch64ISD::FCMLEz";
964  case AArch64ISD::FCMLTz: return "AArch64ISD::FCMLTz";
965  case AArch64ISD::SADDV: return "AArch64ISD::SADDV";
966  case AArch64ISD::UADDV: return "AArch64ISD::UADDV";
967  case AArch64ISD::SMINV: return "AArch64ISD::SMINV";
968  case AArch64ISD::UMINV: return "AArch64ISD::UMINV";
969  case AArch64ISD::SMAXV: return "AArch64ISD::SMAXV";
970  case AArch64ISD::UMAXV: return "AArch64ISD::UMAXV";
971  case AArch64ISD::NOT: return "AArch64ISD::NOT";
972  case AArch64ISD::BIT: return "AArch64ISD::BIT";
973  case AArch64ISD::CBZ: return "AArch64ISD::CBZ";
974  case AArch64ISD::CBNZ: return "AArch64ISD::CBNZ";
975  case AArch64ISD::TBZ: return "AArch64ISD::TBZ";
976  case AArch64ISD::TBNZ: return "AArch64ISD::TBNZ";
977  case AArch64ISD::TC_RETURN: return "AArch64ISD::TC_RETURN";
978  case AArch64ISD::PREFETCH: return "AArch64ISD::PREFETCH";
979  case AArch64ISD::SITOF: return "AArch64ISD::SITOF";
980  case AArch64ISD::UITOF: return "AArch64ISD::UITOF";
981  case AArch64ISD::NVCAST: return "AArch64ISD::NVCAST";
982  case AArch64ISD::SQSHL_I: return "AArch64ISD::SQSHL_I";
983  case AArch64ISD::UQSHL_I: return "AArch64ISD::UQSHL_I";
984  case AArch64ISD::SRSHR_I: return "AArch64ISD::SRSHR_I";
985  case AArch64ISD::URSHR_I: return "AArch64ISD::URSHR_I";
986  case AArch64ISD::SQSHLU_I: return "AArch64ISD::SQSHLU_I";
987  case AArch64ISD::WrapperLarge: return "AArch64ISD::WrapperLarge";
988  case AArch64ISD::LD2post: return "AArch64ISD::LD2post";
989  case AArch64ISD::LD3post: return "AArch64ISD::LD3post";
990  case AArch64ISD::LD4post: return "AArch64ISD::LD4post";
991  case AArch64ISD::ST2post: return "AArch64ISD::ST2post";
992  case AArch64ISD::ST3post: return "AArch64ISD::ST3post";
993  case AArch64ISD::ST4post: return "AArch64ISD::ST4post";
994  case AArch64ISD::LD1x2post: return "AArch64ISD::LD1x2post";
995  case AArch64ISD::LD1x3post: return "AArch64ISD::LD1x3post";
996  case AArch64ISD::LD1x4post: return "AArch64ISD::LD1x4post";
997  case AArch64ISD::ST1x2post: return "AArch64ISD::ST1x2post";
998  case AArch64ISD::ST1x3post: return "AArch64ISD::ST1x3post";
999  case AArch64ISD::ST1x4post: return "AArch64ISD::ST1x4post";
1000  case AArch64ISD::LD1DUPpost: return "AArch64ISD::LD1DUPpost";
1001  case AArch64ISD::LD2DUPpost: return "AArch64ISD::LD2DUPpost";
1002  case AArch64ISD::LD3DUPpost: return "AArch64ISD::LD3DUPpost";
1003  case AArch64ISD::LD4DUPpost: return "AArch64ISD::LD4DUPpost";
1004  case AArch64ISD::LD1LANEpost: return "AArch64ISD::LD1LANEpost";
1005  case AArch64ISD::LD2LANEpost: return "AArch64ISD::LD2LANEpost";
1006  case AArch64ISD::LD3LANEpost: return "AArch64ISD::LD3LANEpost";
1007  case AArch64ISD::LD4LANEpost: return "AArch64ISD::LD4LANEpost";
1008  case AArch64ISD::ST2LANEpost: return "AArch64ISD::ST2LANEpost";
1009  case AArch64ISD::ST3LANEpost: return "AArch64ISD::ST3LANEpost";
1010  case AArch64ISD::ST4LANEpost: return "AArch64ISD::ST4LANEpost";
1011  case AArch64ISD::SMULL: return "AArch64ISD::SMULL";
1012  case AArch64ISD::UMULL: return "AArch64ISD::UMULL";
1013  case AArch64ISD::FRECPE: return "AArch64ISD::FRECPE";
1014  case AArch64ISD::FRECPS: return "AArch64ISD::FRECPS";
1015  case AArch64ISD::FRSQRTE: return "AArch64ISD::FRSQRTE";
1016  case AArch64ISD::FRSQRTS: return "AArch64ISD::FRSQRTS";
1017  }
1018  return nullptr;
1019 }
1020 
1023  MachineBasicBlock *MBB) const {
1024  // We materialise the F128CSEL pseudo-instruction as some control flow and a
1025  // phi node:
1026 
1027  // OrigBB:
1028  // [... previous instrs leading to comparison ...]
1029  // b.ne TrueBB
1030  // b EndBB
1031  // TrueBB:
1032  // ; Fallthrough
1033  // EndBB:
1034  // Dest = PHI [IfTrue, TrueBB], [IfFalse, OrigBB]
1035 
1036  MachineFunction *MF = MBB->getParent();
1037  const TargetInstrInfo *TII = Subtarget->getInstrInfo();
1038  const BasicBlock *LLVM_BB = MBB->getBasicBlock();
1039  DebugLoc DL = MI.getDebugLoc();
1040  MachineFunction::iterator It = ++MBB->getIterator();
1041 
1042  unsigned DestReg = MI.getOperand(0).getReg();
1043  unsigned IfTrueReg = MI.getOperand(1).getReg();
1044  unsigned IfFalseReg = MI.getOperand(2).getReg();
1045  unsigned CondCode = MI.getOperand(3).getImm();
1046  bool NZCVKilled = MI.getOperand(4).isKill();
1047 
1048  MachineBasicBlock *TrueBB = MF->CreateMachineBasicBlock(LLVM_BB);
1049  MachineBasicBlock *EndBB = MF->CreateMachineBasicBlock(LLVM_BB);
1050  MF->insert(It, TrueBB);
1051  MF->insert(It, EndBB);
1052 
1053  // Transfer rest of current basic-block to EndBB
1054  EndBB->splice(EndBB->begin(), MBB, std::next(MachineBasicBlock::iterator(MI)),
1055  MBB->end());
1056  EndBB->transferSuccessorsAndUpdatePHIs(MBB);
1057 
1058  BuildMI(MBB, DL, TII->get(AArch64::Bcc)).addImm(CondCode).addMBB(TrueBB);
1059  BuildMI(MBB, DL, TII->get(AArch64::B)).addMBB(EndBB);
1060  MBB->addSuccessor(TrueBB);
1061  MBB->addSuccessor(EndBB);
1062 
1063  // TrueBB falls through to the end.
1064  TrueBB->addSuccessor(EndBB);
1065 
1066  if (!NZCVKilled) {
1067  TrueBB->addLiveIn(AArch64::NZCV);
1068  EndBB->addLiveIn(AArch64::NZCV);
1069  }
1070 
1071  BuildMI(*EndBB, EndBB->begin(), DL, TII->get(AArch64::PHI), DestReg)
1072  .addReg(IfTrueReg)
1073  .addMBB(TrueBB)
1074  .addReg(IfFalseReg)
1075  .addMBB(MBB);
1076 
1077  MI.eraseFromParent();
1078  return EndBB;
1079 }
1080 
1082  MachineInstr &MI, MachineBasicBlock *BB) const {
1083  switch (MI.getOpcode()) {
1084  default:
1085 #ifndef NDEBUG
1086  MI.dump();
1087 #endif
1088  llvm_unreachable("Unexpected instruction for custom inserter!");
1089 
1090  case AArch64::F128CSEL:
1091  return EmitF128CSEL(MI, BB);
1092 
1093  case TargetOpcode::STACKMAP:
1094  case TargetOpcode::PATCHPOINT:
1095  return emitPatchPoint(MI, BB);
1096  }
1097 }
1098 
1099 //===----------------------------------------------------------------------===//
1100 // AArch64 Lowering private implementation.
1101 //===----------------------------------------------------------------------===//
1102 
1103 //===----------------------------------------------------------------------===//
1104 // Lowering Code
1105 //===----------------------------------------------------------------------===//
1106 
1107 /// changeIntCCToAArch64CC - Convert a DAG integer condition code to an AArch64
1108 /// CC
1110  switch (CC) {
1111  default:
1112  llvm_unreachable("Unknown condition code!");
1113  case ISD::SETNE:
1114  return AArch64CC::NE;
1115  case ISD::SETEQ:
1116  return AArch64CC::EQ;
1117  case ISD::SETGT:
1118  return AArch64CC::GT;
1119  case ISD::SETGE:
1120  return AArch64CC::GE;
1121  case ISD::SETLT:
1122  return AArch64CC::LT;
1123  case ISD::SETLE:
1124  return AArch64CC::LE;
1125  case ISD::SETUGT:
1126  return AArch64CC::HI;
1127  case ISD::SETUGE:
1128  return AArch64CC::HS;
1129  case ISD::SETULT:
1130  return AArch64CC::LO;
1131  case ISD::SETULE:
1132  return AArch64CC::LS;
1133  }
1134 }
1135 
1136 /// changeFPCCToAArch64CC - Convert a DAG fp condition code to an AArch64 CC.
1139  AArch64CC::CondCode &CondCode2) {
1140  CondCode2 = AArch64CC::AL;
1141  switch (CC) {
1142  default:
1143  llvm_unreachable("Unknown FP condition!");
1144  case ISD::SETEQ:
1145  case ISD::SETOEQ:
1146  CondCode = AArch64CC::EQ;
1147  break;
1148  case ISD::SETGT:
1149  case ISD::SETOGT:
1150  CondCode = AArch64CC::GT;
1151  break;
1152  case ISD::SETGE:
1153  case ISD::SETOGE:
1154  CondCode = AArch64CC::GE;
1155  break;
1156  case ISD::SETOLT:
1157  CondCode = AArch64CC::MI;
1158  break;
1159  case ISD::SETOLE:
1160  CondCode = AArch64CC::LS;
1161  break;
1162  case ISD::SETONE:
1163  CondCode = AArch64CC::MI;
1164  CondCode2 = AArch64CC::GT;
1165  break;
1166  case ISD::SETO:
1167  CondCode = AArch64CC::VC;
1168  break;
1169  case ISD::SETUO:
1170  CondCode = AArch64CC::VS;
1171  break;
1172  case ISD::SETUEQ:
1173  CondCode = AArch64CC::EQ;
1174  CondCode2 = AArch64CC::VS;
1175  break;
1176  case ISD::SETUGT:
1177  CondCode = AArch64CC::HI;
1178  break;
1179  case ISD::SETUGE:
1180  CondCode = AArch64CC::PL;
1181  break;
1182  case ISD::SETLT:
1183  case ISD::SETULT:
1184  CondCode = AArch64CC::LT;
1185  break;
1186  case ISD::SETLE:
1187  case ISD::SETULE:
1188  CondCode = AArch64CC::LE;
1189  break;
1190  case ISD::SETNE:
1191  case ISD::SETUNE:
1192  CondCode = AArch64CC::NE;
1193  break;
1194  }
1195 }
1196 
1197 /// Convert a DAG fp condition code to an AArch64 CC.
1198 /// This differs from changeFPCCToAArch64CC in that it returns cond codes that
1199 /// should be AND'ed instead of OR'ed.
1202  AArch64CC::CondCode &CondCode2) {
1203  CondCode2 = AArch64CC::AL;
1204  switch (CC) {
1205  default:
1206  changeFPCCToAArch64CC(CC, CondCode, CondCode2);
1207  assert(CondCode2 == AArch64CC::AL);
1208  break;
1209  case ISD::SETONE:
1210  // (a one b)
1211  // == ((a olt b) || (a ogt b))
1212  // == ((a ord b) && (a une b))
1213  CondCode = AArch64CC::VC;
1214  CondCode2 = AArch64CC::NE;
1215  break;
1216  case ISD::SETUEQ:
1217  // (a ueq b)
1218  // == ((a uno b) || (a oeq b))
1219  // == ((a ule b) && (a uge b))
1220  CondCode = AArch64CC::PL;
1221  CondCode2 = AArch64CC::LE;
1222  break;
1223  }
1224 }
1225 
1226 /// changeVectorFPCCToAArch64CC - Convert a DAG fp condition code to an AArch64
1227 /// CC usable with the vector instructions. Fewer operations are available
1228 /// without a real NZCV register, so we have to use less efficient combinations
1229 /// to get the same effect.
1232  AArch64CC::CondCode &CondCode2,
1233  bool &Invert) {
1234  Invert = false;
1235  switch (CC) {
1236  default:
1237  // Mostly the scalar mappings work fine.
1238  changeFPCCToAArch64CC(CC, CondCode, CondCode2);
1239  break;
1240  case ISD::SETUO:
1241  Invert = true;
1243  case ISD::SETO:
1244  CondCode = AArch64CC::MI;
1245  CondCode2 = AArch64CC::GE;
1246  break;
1247  case ISD::SETUEQ:
1248  case ISD::SETULT:
1249  case ISD::SETULE:
1250  case ISD::SETUGT:
1251  case ISD::SETUGE:
1252  // All of the compare-mask comparisons are ordered, but we can switch
1253  // between the two by a double inversion. E.g. ULE == !OGT.
1254  Invert = true;
1255  changeFPCCToAArch64CC(getSetCCInverse(CC, false), CondCode, CondCode2);
1256  break;
1257  }
1258 }
1259 
1260 static bool isLegalArithImmed(uint64_t C) {
1261  // Matches AArch64DAGToDAGISel::SelectArithImmed().
1262  return (C >> 12 == 0) || ((C & 0xFFFULL) == 0 && C >> 24 == 0);
1263 }
1264 
1266  const SDLoc &dl, SelectionDAG &DAG) {
1267  EVT VT = LHS.getValueType();
1268 
1269  if (VT.isFloatingPoint()) {
1270  assert(VT != MVT::f128);
1271  if (VT == MVT::f16) {
1272  LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, LHS);
1273  RHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, RHS);
1274  VT = MVT::f32;
1275  }
1276  return DAG.getNode(AArch64ISD::FCMP, dl, VT, LHS, RHS);
1277  }
1278 
1279  // The CMP instruction is just an alias for SUBS, and representing it as
1280  // SUBS means that it's possible to get CSE with subtract operations.
1281  // A later phase can perform the optimization of setting the destination
1282  // register to WZR/XZR if it ends up being unused.
1283  unsigned Opcode = AArch64ISD::SUBS;
1284 
1285  if (RHS.getOpcode() == ISD::SUB && isNullConstant(RHS.getOperand(0)) &&
1286  (CC == ISD::SETEQ || CC == ISD::SETNE)) {
1287  // We'd like to combine a (CMP op1, (sub 0, op2) into a CMN instruction on
1288  // the grounds that "op1 - (-op2) == op1 + op2". However, the C and V flags
1289  // can be set differently by this operation. It comes down to whether
1290  // "SInt(~op2)+1 == SInt(~op2+1)" (and the same for UInt). If they are then
1291  // everything is fine. If not then the optimization is wrong. Thus general
1292  // comparisons are only valid if op2 != 0.
1293 
1294  // So, finally, the only LLVM-native comparisons that don't mention C and V
1295  // are SETEQ and SETNE. They're the only ones we can safely use CMN for in
1296  // the absence of information about op2.
1297  Opcode = AArch64ISD::ADDS;
1298  RHS = RHS.getOperand(1);
1299  } else if (LHS.getOpcode() == ISD::AND && isNullConstant(RHS) &&
1300  !isUnsignedIntSetCC(CC)) {
1301  // Similarly, (CMP (and X, Y), 0) can be implemented with a TST
1302  // (a.k.a. ANDS) except that the flags are only guaranteed to work for one
1303  // of the signed comparisons.
1304  Opcode = AArch64ISD::ANDS;
1305  RHS = LHS.getOperand(1);
1306  LHS = LHS.getOperand(0);
1307  }
1308 
1309  return DAG.getNode(Opcode, dl, DAG.getVTList(VT, MVT_CC), LHS, RHS)
1310  .getValue(1);
1311 }
1312 
1313 /// \defgroup AArch64CCMP CMP;CCMP matching
1314 ///
1315 /// These functions deal with the formation of CMP;CCMP;... sequences.
1316 /// The CCMP/CCMN/FCCMP/FCCMPE instructions allow the conditional execution of
1317 /// a comparison. They set the NZCV flags to a predefined value if their
1318 /// predicate is false. This allows to express arbitrary conjunctions, for
1319 /// example "cmp 0 (and (setCA (cmp A)) (setCB (cmp B))))"
1320 /// expressed as:
1321 /// cmp A
1322 /// ccmp B, inv(CB), CA
1323 /// check for CB flags
1324 ///
1325 /// In general we can create code for arbitrary "... (and (and A B) C)"
1326 /// sequences. We can also implement some "or" expressions, because "(or A B)"
1327 /// is equivalent to "not (and (not A) (not B))" and we can implement some
1328 /// negation operations:
1329 /// We can negate the results of a single comparison by inverting the flags
1330 /// used when the predicate fails and inverting the flags tested in the next
1331 /// instruction; We can also negate the results of the whole previous
1332 /// conditional compare sequence by inverting the flags tested in the next
1333 /// instruction. However there is no way to negate the result of a partial
1334 /// sequence.
1335 ///
1336 /// Therefore on encountering an "or" expression we can negate the subtree on
1337 /// one side and have to be able to push the negate to the leafs of the subtree
1338 /// on the other side (see also the comments in code). As complete example:
1339 /// "or (or (setCA (cmp A)) (setCB (cmp B)))
1340 /// (and (setCC (cmp C)) (setCD (cmp D)))"
1341 /// is transformed to
1342 /// "not (and (not (and (setCC (cmp C)) (setCC (cmp D))))
1343 /// (and (not (setCA (cmp A)) (not (setCB (cmp B))))))"
1344 /// and implemented as:
1345 /// cmp C
1346 /// ccmp D, inv(CD), CC
1347 /// ccmp A, CA, inv(CD)
1348 /// ccmp B, CB, inv(CA)
1349 /// check for CB flags
1350 /// A counterexample is "or (and A B) (and C D)" which cannot be implemented
1351 /// by conditional compare sequences.
1352 /// @{
1353 
1354 /// Create a conditional comparison; Use CCMP, CCMN or FCCMP as appropriate.
1356  ISD::CondCode CC, SDValue CCOp,
1358  AArch64CC::CondCode OutCC,
1359  const SDLoc &DL, SelectionDAG &DAG) {
1360  unsigned Opcode = 0;
1361  if (LHS.getValueType().isFloatingPoint()) {
1362  assert(LHS.getValueType() != MVT::f128);
1363  if (LHS.getValueType() == MVT::f16) {
1364  LHS = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, LHS);
1365  RHS = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, RHS);
1366  }
1367  Opcode = AArch64ISD::FCCMP;
1368  } else if (RHS.getOpcode() == ISD::SUB) {
1369  SDValue SubOp0 = RHS.getOperand(0);
1370  if (isNullConstant(SubOp0) && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
1371  // See emitComparison() on why we can only do this for SETEQ and SETNE.
1372  Opcode = AArch64ISD::CCMN;
1373  RHS = RHS.getOperand(1);
1374  }
1375  }
1376  if (Opcode == 0)
1377  Opcode = AArch64ISD::CCMP;
1378 
1379  SDValue Condition = DAG.getConstant(Predicate, DL, MVT_CC);
1381  unsigned NZCV = AArch64CC::getNZCVToSatisfyCondCode(InvOutCC);
1382  SDValue NZCVOp = DAG.getConstant(NZCV, DL, MVT::i32);
1383  return DAG.getNode(Opcode, DL, MVT_CC, LHS, RHS, NZCVOp, Condition, CCOp);
1384 }
1385 
1386 /// Returns true if @p Val is a tree of AND/OR/SETCC operations.
1387 /// CanPushNegate is set to true if we can push a negate operation through
1388 /// the tree in a was that we are left with AND operations and negate operations
1389 /// at the leafs only. i.e. "not (or (or x y) z)" can be changed to
1390 /// "and (and (not x) (not y)) (not z)"; "not (or (and x y) z)" cannot be
1391 /// brought into such a form.
1392 static bool isConjunctionDisjunctionTree(const SDValue Val, bool &CanNegate,
1393  unsigned Depth = 0) {
1394  if (!Val.hasOneUse())
1395  return false;
1396  unsigned Opcode = Val->getOpcode();
1397  if (Opcode == ISD::SETCC) {
1398  if (Val->getOperand(0).getValueType() == MVT::f128)
1399  return false;
1400  CanNegate = true;
1401  return true;
1402  }
1403  // Protect against exponential runtime and stack overflow.
1404  if (Depth > 6)
1405  return false;
1406  if (Opcode == ISD::AND || Opcode == ISD::OR) {
1407  SDValue O0 = Val->getOperand(0);
1408  SDValue O1 = Val->getOperand(1);
1409  bool CanNegateL;
1410  if (!isConjunctionDisjunctionTree(O0, CanNegateL, Depth+1))
1411  return false;
1412  bool CanNegateR;
1413  if (!isConjunctionDisjunctionTree(O1, CanNegateR, Depth+1))
1414  return false;
1415 
1416  if (Opcode == ISD::OR) {
1417  // For an OR expression we need to be able to negate at least one side or
1418  // we cannot do the transformation at all.
1419  if (!CanNegateL && !CanNegateR)
1420  return false;
1421  // We can however change a (not (or x y)) to (and (not x) (not y)) if we
1422  // can negate the x and y subtrees.
1423  CanNegate = CanNegateL && CanNegateR;
1424  } else {
1425  // If the operands are OR expressions then we finally need to negate their
1426  // outputs, we can only do that for the operand with emitted last by
1427  // negating OutCC, not for both operands.
1428  bool NeedsNegOutL = O0->getOpcode() == ISD::OR;
1429  bool NeedsNegOutR = O1->getOpcode() == ISD::OR;
1430  if (NeedsNegOutL && NeedsNegOutR)
1431  return false;
1432  // We cannot negate an AND operation (it would become an OR),
1433  CanNegate = false;
1434  }
1435  return true;
1436  }
1437  return false;
1438 }
1439 
1440 /// Emit conjunction or disjunction tree with the CMP/FCMP followed by a chain
1441 /// of CCMP/CFCMP ops. See @ref AArch64CCMP.
1442 /// Tries to transform the given i1 producing node @p Val to a series compare
1443 /// and conditional compare operations. @returns an NZCV flags producing node
1444 /// and sets @p OutCC to the flags that should be tested or returns SDValue() if
1445 /// transformation was not possible.
1446 /// On recursive invocations @p PushNegate may be set to true to have negation
1447 /// effects pushed to the tree leafs; @p Predicate is an NZCV flag predicate
1448 /// for the comparisons in the current subtree; @p Depth limits the search
1449 /// depth to avoid stack overflow.
1451  AArch64CC::CondCode &OutCC, bool Negate, SDValue CCOp,
1453  // We're at a tree leaf, produce a conditional comparison operation.
1454  unsigned Opcode = Val->getOpcode();
1455  if (Opcode == ISD::SETCC) {
1456  SDValue LHS = Val->getOperand(0);
1457  SDValue RHS = Val->getOperand(1);
1458  ISD::CondCode CC = cast<CondCodeSDNode>(Val->getOperand(2))->get();
1459  bool isInteger = LHS.getValueType().isInteger();
1460  if (Negate)
1461  CC = getSetCCInverse(CC, isInteger);
1462  SDLoc DL(Val);
1463  // Determine OutCC and handle FP special case.
1464  if (isInteger) {
1465  OutCC = changeIntCCToAArch64CC(CC);
1466  } else {
1468  AArch64CC::CondCode ExtraCC;
1469  changeFPCCToANDAArch64CC(CC, OutCC, ExtraCC);
1470  // Some floating point conditions can't be tested with a single condition
1471  // code. Construct an additional comparison in this case.
1472  if (ExtraCC != AArch64CC::AL) {
1473  SDValue ExtraCmp;
1474  if (!CCOp.getNode())
1475  ExtraCmp = emitComparison(LHS, RHS, CC, DL, DAG);
1476  else
1477  ExtraCmp = emitConditionalComparison(LHS, RHS, CC, CCOp, Predicate,
1478  ExtraCC, DL, DAG);
1479  CCOp = ExtraCmp;
1480  Predicate = ExtraCC;
1481  }
1482  }
1483 
1484  // Produce a normal comparison if we are first in the chain
1485  if (!CCOp)
1486  return emitComparison(LHS, RHS, CC, DL, DAG);
1487  // Otherwise produce a ccmp.
1488  return emitConditionalComparison(LHS, RHS, CC, CCOp, Predicate, OutCC, DL,
1489  DAG);
1490  }
1491  assert((Opcode == ISD::AND || (Opcode == ISD::OR && Val->hasOneUse())) &&
1492  "Valid conjunction/disjunction tree");
1493 
1494  // Check if both sides can be transformed.
1495  SDValue LHS = Val->getOperand(0);
1496  SDValue RHS = Val->getOperand(1);
1497 
1498  // In case of an OR we need to negate our operands and the result.
1499  // (A v B) <=> not(not(A) ^ not(B))
1500  bool NegateOpsAndResult = Opcode == ISD::OR;
1501  // We can negate the results of all previous operations by inverting the
1502  // predicate flags giving us a free negation for one side. The other side
1503  // must be negatable by itself.
1504  if (NegateOpsAndResult) {
1505  // See which side we can negate.
1506  bool CanNegateL;
1507  bool isValidL = isConjunctionDisjunctionTree(LHS, CanNegateL);
1508  assert(isValidL && "Valid conjunction/disjunction tree");
1509  (void)isValidL;
1510 
1511 #ifndef NDEBUG
1512  bool CanNegateR;
1513  bool isValidR = isConjunctionDisjunctionTree(RHS, CanNegateR);
1514  assert(isValidR && "Valid conjunction/disjunction tree");
1515  assert((CanNegateL || CanNegateR) && "Valid conjunction/disjunction tree");
1516 #endif
1517 
1518  // Order the side which we cannot negate to RHS so we can emit it first.
1519  if (!CanNegateL)
1520  std::swap(LHS, RHS);
1521  } else {
1522  bool NeedsNegOutL = LHS->getOpcode() == ISD::OR;
1523  assert((!NeedsNegOutL || RHS->getOpcode() != ISD::OR) &&
1524  "Valid conjunction/disjunction tree");
1525  // Order the side where we need to negate the output flags to RHS so it
1526  // gets emitted first.
1527  if (NeedsNegOutL)
1528  std::swap(LHS, RHS);
1529  }
1530 
1531  // Emit RHS. If we want to negate the tree we only need to push a negate
1532  // through if we are already in a PushNegate case, otherwise we can negate
1533  // the "flags to test" afterwards.
1534  AArch64CC::CondCode RHSCC;
1535  SDValue CmpR = emitConjunctionDisjunctionTreeRec(DAG, RHS, RHSCC, Negate,
1536  CCOp, Predicate);
1537  if (NegateOpsAndResult && !Negate)
1538  RHSCC = AArch64CC::getInvertedCondCode(RHSCC);
1539  // Emit LHS. We may need to negate it.
1540  SDValue CmpL = emitConjunctionDisjunctionTreeRec(DAG, LHS, OutCC,
1541  NegateOpsAndResult, CmpR,
1542  RHSCC);
1543  // If we transformed an OR to and AND then we have to negate the result
1544  // (or absorb the Negate parameter).
1545  if (NegateOpsAndResult && !Negate)
1546  OutCC = AArch64CC::getInvertedCondCode(OutCC);
1547  return CmpL;
1548 }
1549 
1550 /// Emit conjunction or disjunction tree with the CMP/FCMP followed by a chain
1551 /// of CCMP/CFCMP ops. See @ref AArch64CCMP.
1552 /// \see emitConjunctionDisjunctionTreeRec().
1554  AArch64CC::CondCode &OutCC) {
1555  bool CanNegate;
1556  if (!isConjunctionDisjunctionTree(Val, CanNegate))
1557  return SDValue();
1558 
1559  return emitConjunctionDisjunctionTreeRec(DAG, Val, OutCC, false, SDValue(),
1560  AArch64CC::AL);
1561 }
1562 
1563 /// @}
1564 
1566  SDValue &AArch64cc, SelectionDAG &DAG,
1567  const SDLoc &dl) {
1568  if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS.getNode())) {
1569  EVT VT = RHS.getValueType();
1570  uint64_t C = RHSC->getZExtValue();
1571  if (!isLegalArithImmed(C)) {
1572  // Constant does not fit, try adjusting it by one?
1573  switch (CC) {
1574  default:
1575  break;
1576  case ISD::SETLT:
1577  case ISD::SETGE:
1578  if ((VT == MVT::i32 && C != 0x80000000 &&
1579  isLegalArithImmed((uint32_t)(C - 1))) ||
1580  (VT == MVT::i64 && C != 0x80000000ULL &&
1581  isLegalArithImmed(C - 1ULL))) {
1582  CC = (CC == ISD::SETLT) ? ISD::SETLE : ISD::SETGT;
1583  C = (VT == MVT::i32) ? (uint32_t)(C - 1) : C - 1;
1584  RHS = DAG.getConstant(C, dl, VT);
1585  }
1586  break;
1587  case ISD::SETULT:
1588  case ISD::SETUGE:
1589  if ((VT == MVT::i32 && C != 0 &&
1590  isLegalArithImmed((uint32_t)(C - 1))) ||
1591  (VT == MVT::i64 && C != 0ULL && isLegalArithImmed(C - 1ULL))) {
1592  CC = (CC == ISD::SETULT) ? ISD::SETULE : ISD::SETUGT;
1593  C = (VT == MVT::i32) ? (uint32_t)(C - 1) : C - 1;
1594  RHS = DAG.getConstant(C, dl, VT);
1595  }
1596  break;
1597  case ISD::SETLE:
1598  case ISD::SETGT:
1599  if ((VT == MVT::i32 && C != INT32_MAX &&
1600  isLegalArithImmed((uint32_t)(C + 1))) ||
1601  (VT == MVT::i64 && C != INT64_MAX &&
1602  isLegalArithImmed(C + 1ULL))) {
1603  CC = (CC == ISD::SETLE) ? ISD::SETLT : ISD::SETGE;
1604  C = (VT == MVT::i32) ? (uint32_t)(C + 1) : C + 1;
1605  RHS = DAG.getConstant(C, dl, VT);
1606  }
1607  break;
1608  case ISD::SETULE:
1609  case ISD::SETUGT:
1610  if ((VT == MVT::i32 && C != UINT32_MAX &&
1611  isLegalArithImmed((uint32_t)(C + 1))) ||
1612  (VT == MVT::i64 && C != UINT64_MAX &&
1613  isLegalArithImmed(C + 1ULL))) {
1614  CC = (CC == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE;
1615  C = (VT == MVT::i32) ? (uint32_t)(C + 1) : C + 1;
1616  RHS = DAG.getConstant(C, dl, VT);
1617  }
1618  break;
1619  }
1620  }
1621  }
1622  SDValue Cmp;
1623  AArch64CC::CondCode AArch64CC;
1624  if ((CC == ISD::SETEQ || CC == ISD::SETNE) && isa<ConstantSDNode>(RHS)) {
1625  const ConstantSDNode *RHSC = cast<ConstantSDNode>(RHS);
1626 
1627  // The imm operand of ADDS is an unsigned immediate, in the range 0 to 4095.
1628  // For the i8 operand, the largest immediate is 255, so this can be easily
1629  // encoded in the compare instruction. For the i16 operand, however, the
1630  // largest immediate cannot be encoded in the compare.
1631  // Therefore, use a sign extending load and cmn to avoid materializing the
1632  // -1 constant. For example,
1633  // movz w1, #65535
1634  // ldrh w0, [x0, #0]
1635  // cmp w0, w1
1636  // >
1637  // ldrsh w0, [x0, #0]
1638  // cmn w0, #1
1639  // Fundamental, we're relying on the property that (zext LHS) == (zext RHS)
1640  // if and only if (sext LHS) == (sext RHS). The checks are in place to
1641  // ensure both the LHS and RHS are truly zero extended and to make sure the
1642  // transformation is profitable.
1643  if ((RHSC->getZExtValue() >> 16 == 0) && isa<LoadSDNode>(LHS) &&
1644  cast<LoadSDNode>(LHS)->getExtensionType() == ISD::ZEXTLOAD &&
1645  cast<LoadSDNode>(LHS)->getMemoryVT() == MVT::i16 &&
1646  LHS.getNode()->hasNUsesOfValue(1, 0)) {
1647  int16_t ValueofRHS = cast<ConstantSDNode>(RHS)->getZExtValue();
1648  if (ValueofRHS < 0 && isLegalArithImmed(-ValueofRHS)) {
1649  SDValue SExt =
1650  DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, LHS.getValueType(), LHS,
1651  DAG.getValueType(MVT::i16));
1652  Cmp = emitComparison(SExt, DAG.getConstant(ValueofRHS, dl,
1653  RHS.getValueType()),
1654  CC, dl, DAG);
1655  AArch64CC = changeIntCCToAArch64CC(CC);
1656  }
1657  }
1658 
1659  if (!Cmp && (RHSC->isNullValue() || RHSC->isOne())) {
1660  if ((Cmp = emitConjunctionDisjunctionTree(DAG, LHS, AArch64CC))) {
1661  if ((CC == ISD::SETNE) ^ RHSC->isNullValue())
1662  AArch64CC = AArch64CC::getInvertedCondCode(AArch64CC);
1663  }
1664  }
1665  }
1666 
1667  if (!Cmp) {
1668  Cmp = emitComparison(LHS, RHS, CC, dl, DAG);
1669  AArch64CC = changeIntCCToAArch64CC(CC);
1670  }
1671  AArch64cc = DAG.getConstant(AArch64CC, dl, MVT_CC);
1672  return Cmp;
1673 }
1674 
1675 static std::pair<SDValue, SDValue>
1677  assert((Op.getValueType() == MVT::i32 || Op.getValueType() == MVT::i64) &&
1678  "Unsupported value type");
1679  SDValue Value, Overflow;
1680  SDLoc DL(Op);
1681  SDValue LHS = Op.getOperand(0);
1682  SDValue RHS = Op.getOperand(1);
1683  unsigned Opc = 0;
1684  switch (Op.getOpcode()) {
1685  default:
1686  llvm_unreachable("Unknown overflow instruction!");
1687  case ISD::SADDO:
1688  Opc = AArch64ISD::ADDS;
1689  CC = AArch64CC::VS;
1690  break;
1691  case ISD::UADDO:
1692  Opc = AArch64ISD::ADDS;
1693  CC = AArch64CC::HS;
1694  break;
1695  case ISD::SSUBO:
1696  Opc = AArch64ISD::SUBS;
1697  CC = AArch64CC::VS;
1698  break;
1699  case ISD::USUBO:
1700  Opc = AArch64ISD::SUBS;
1701  CC = AArch64CC::LO;
1702  break;
1703  // Multiply needs a little bit extra work.
1704  case ISD::SMULO:
1705  case ISD::UMULO: {
1706  CC = AArch64CC::NE;
1707  bool IsSigned = Op.getOpcode() == ISD::SMULO;
1708  if (Op.getValueType() == MVT::i32) {
1709  unsigned ExtendOpc = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
1710  // For a 32 bit multiply with overflow check we want the instruction
1711  // selector to generate a widening multiply (SMADDL/UMADDL). For that we
1712  // need to generate the following pattern:
1713  // (i64 add 0, (i64 mul (i64 sext|zext i32 %a), (i64 sext|zext i32 %b))
1714  LHS = DAG.getNode(ExtendOpc, DL, MVT::i64, LHS);
1715  RHS = DAG.getNode(ExtendOpc, DL, MVT::i64, RHS);
1716  SDValue Mul = DAG.getNode(ISD::MUL, DL, MVT::i64, LHS, RHS);
1717  SDValue Add = DAG.getNode(ISD::ADD, DL, MVT::i64, Mul,
1718  DAG.getConstant(0, DL, MVT::i64));
1719  // On AArch64 the upper 32 bits are always zero extended for a 32 bit
1720  // operation. We need to clear out the upper 32 bits, because we used a
1721  // widening multiply that wrote all 64 bits. In the end this should be a
1722  // noop.
1723  Value = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Add);
1724  if (IsSigned) {
1725  // The signed overflow check requires more than just a simple check for
1726  // any bit set in the upper 32 bits of the result. These bits could be
1727  // just the sign bits of a negative number. To perform the overflow
1728  // check we have to arithmetic shift right the 32nd bit of the result by
1729  // 31 bits. Then we compare the result to the upper 32 bits.
1730  SDValue UpperBits = DAG.getNode(ISD::SRL, DL, MVT::i64, Add,
1731  DAG.getConstant(32, DL, MVT::i64));
1732  UpperBits = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, UpperBits);
1733  SDValue LowerBits = DAG.getNode(ISD::SRA, DL, MVT::i32, Value,
1734  DAG.getConstant(31, DL, MVT::i64));
1735  // It is important that LowerBits is last, otherwise the arithmetic
1736  // shift will not be folded into the compare (SUBS).
1737  SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32);
1738  Overflow = DAG.getNode(AArch64ISD::SUBS, DL, VTs, UpperBits, LowerBits)
1739  .getValue(1);
1740  } else {
1741  // The overflow check for unsigned multiply is easy. We only need to
1742  // check if any of the upper 32 bits are set. This can be done with a
1743  // CMP (shifted register). For that we need to generate the following
1744  // pattern:
1745  // (i64 AArch64ISD::SUBS i64 0, (i64 srl i64 %Mul, i64 32)
1746  SDValue UpperBits = DAG.getNode(ISD::SRL, DL, MVT::i64, Mul,
1747  DAG.getConstant(32, DL, MVT::i64));
1748  SDVTList VTs = DAG.getVTList(MVT::i64, MVT::i32);
1749  Overflow =
1750  DAG.getNode(AArch64ISD::SUBS, DL, VTs,
1751  DAG.getConstant(0, DL, MVT::i64),
1752  UpperBits).getValue(1);
1753  }
1754  break;
1755  }
1756  assert(Op.getValueType() == MVT::i64 && "Expected an i64 value type");
1757  // For the 64 bit multiply
1758  Value = DAG.getNode(ISD::MUL, DL, MVT::i64, LHS, RHS);
1759  if (IsSigned) {
1760  SDValue UpperBits = DAG.getNode(ISD::MULHS, DL, MVT::i64, LHS, RHS);
1761  SDValue LowerBits = DAG.getNode(ISD::SRA, DL, MVT::i64, Value,
1762  DAG.getConstant(63, DL, MVT::i64));
1763  // It is important that LowerBits is last, otherwise the arithmetic
1764  // shift will not be folded into the compare (SUBS).
1765  SDVTList VTs = DAG.getVTList(MVT::i64, MVT::i32);
1766  Overflow = DAG.getNode(AArch64ISD::SUBS, DL, VTs, UpperBits, LowerBits)
1767  .getValue(1);
1768  } else {
1769  SDValue UpperBits = DAG.getNode(ISD::MULHU, DL, MVT::i64, LHS, RHS);
1770  SDVTList VTs = DAG.getVTList(MVT::i64, MVT::i32);
1771  Overflow =
1772  DAG.getNode(AArch64ISD::SUBS, DL, VTs,
1773  DAG.getConstant(0, DL, MVT::i64),
1774  UpperBits).getValue(1);
1775  }
1776  break;
1777  }
1778  } // switch (...)
1779 
1780  if (Opc) {
1781  SDVTList VTs = DAG.getVTList(Op->getValueType(0), MVT::i32);
1782 
1783  // Emit the AArch64 operation with overflow check.
1784  Value = DAG.getNode(Opc, DL, VTs, LHS, RHS);
1785  Overflow = Value.getValue(1);
1786  }
1787  return std::make_pair(Value, Overflow);
1788 }
1789 
1790 SDValue AArch64TargetLowering::LowerF128Call(SDValue Op, SelectionDAG &DAG,
1791  RTLIB::Libcall Call) const {
1792  SmallVector<SDValue, 2> Ops(Op->op_begin(), Op->op_end());
1793  return makeLibCall(DAG, Call, MVT::f128, Ops, false, SDLoc(Op)).first;
1794 }
1795 
1797  SDValue Sel = Op.getOperand(0);
1798  SDValue Other = Op.getOperand(1);
1799 
1800  // If neither operand is a SELECT_CC, give up.
1801  if (Sel.getOpcode() != ISD::SELECT_CC)
1802  std::swap(Sel, Other);
1803  if (Sel.getOpcode() != ISD::SELECT_CC)
1804  return Op;
1805 
1806  // The folding we want to perform is:
1807  // (xor x, (select_cc a, b, cc, 0, -1) )
1808  // -->
1809  // (csel x, (xor x, -1), cc ...)
1810  //
1811  // The latter will get matched to a CSINV instruction.
1812 
1813  ISD::CondCode CC = cast<CondCodeSDNode>(Sel.getOperand(4))->get();
1814  SDValue LHS = Sel.getOperand(0);
1815  SDValue RHS = Sel.getOperand(1);
1816  SDValue TVal = Sel.getOperand(2);
1817  SDValue FVal = Sel.getOperand(3);
1818  SDLoc dl(Sel);
1819 
1820  // FIXME: This could be generalized to non-integer comparisons.
1821  if (LHS.getValueType() != MVT::i32 && LHS.getValueType() != MVT::i64)
1822  return Op;
1823 
1824  ConstantSDNode *CFVal = dyn_cast<ConstantSDNode>(FVal);
1825  ConstantSDNode *CTVal = dyn_cast<ConstantSDNode>(TVal);
1826 
1827  // The values aren't constants, this isn't the pattern we're looking for.
1828  if (!CFVal || !CTVal)
1829  return Op;
1830 
1831  // We can commute the SELECT_CC by inverting the condition. This
1832  // might be needed to make this fit into a CSINV pattern.
1833  if (CTVal->isAllOnesValue() && CFVal->isNullValue()) {
1834  std::swap(TVal, FVal);
1835  std::swap(CTVal, CFVal);
1836  CC = ISD::getSetCCInverse(CC, true);
1837  }
1838 
1839  // If the constants line up, perform the transform!
1840  if (CTVal->isNullValue() && CFVal->isAllOnesValue()) {
1841  SDValue CCVal;
1842  SDValue Cmp = getAArch64Cmp(LHS, RHS, CC, CCVal, DAG, dl);
1843 
1844  FVal = Other;
1845  TVal = DAG.getNode(ISD::XOR, dl, Other.getValueType(), Other,
1846  DAG.getConstant(-1ULL, dl, Other.getValueType()));
1847 
1848  return DAG.getNode(AArch64ISD::CSEL, dl, Sel.getValueType(), FVal, TVal,
1849  CCVal, Cmp);
1850  }
1851 
1852  return Op;
1853 }
1854 
1856  EVT VT = Op.getValueType();
1857 
1858  // Let legalize expand this if it isn't a legal type yet.
1859  if (!DAG.getTargetLoweringInfo().isTypeLegal(VT))
1860  return SDValue();
1861 
1862  SDVTList VTs = DAG.getVTList(VT, MVT::i32);
1863 
1864  unsigned Opc;
1865  bool ExtraOp = false;
1866  switch (Op.getOpcode()) {
1867  default:
1868  llvm_unreachable("Invalid code");
1869  case ISD::ADDC:
1870  Opc = AArch64ISD::ADDS;
1871  break;
1872  case ISD::SUBC:
1873  Opc = AArch64ISD::SUBS;
1874  break;
1875  case ISD::ADDE:
1876  Opc = AArch64ISD::ADCS;
1877  ExtraOp = true;
1878  break;
1879  case ISD::SUBE:
1880  Opc = AArch64ISD::SBCS;
1881  ExtraOp = true;
1882  break;
1883  }
1884 
1885  if (!ExtraOp)
1886  return DAG.getNode(Opc, SDLoc(Op), VTs, Op.getOperand(0), Op.getOperand(1));
1887  return DAG.getNode(Opc, SDLoc(Op), VTs, Op.getOperand(0), Op.getOperand(1),
1888  Op.getOperand(2));
1889 }
1890 
1892  // Let legalize expand this if it isn't a legal type yet.
1894  return SDValue();
1895 
1896  SDLoc dl(Op);
1898  // The actual operation that sets the overflow or carry flag.
1899  SDValue Value, Overflow;
1900  std::tie(Value, Overflow) = getAArch64XALUOOp(CC, Op, DAG);
1901 
1902  // We use 0 and 1 as false and true values.
1903  SDValue TVal = DAG.getConstant(1, dl, MVT::i32);
1904  SDValue FVal = DAG.getConstant(0, dl, MVT::i32);
1905 
1906  // We use an inverted condition, because the conditional select is inverted
1907  // too. This will allow it to be selected to a single instruction:
1908  // CSINC Wd, WZR, WZR, invert(cond).
1909  SDValue CCVal = DAG.getConstant(getInvertedCondCode(CC), dl, MVT::i32);
1910  Overflow = DAG.getNode(AArch64ISD::CSEL, dl, MVT::i32, FVal, TVal,
1911  CCVal, Overflow);
1912 
1913  SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32);
1914  return DAG.getNode(ISD::MERGE_VALUES, dl, VTs, Value, Overflow);
1915 }
1916 
1917 // Prefetch operands are:
1918 // 1: Address to prefetch
1919 // 2: bool isWrite
1920 // 3: int locality (0 = no locality ... 3 = extreme locality)
1921 // 4: bool isDataCache
1923  SDLoc DL(Op);
1924  unsigned IsWrite = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue();
1925  unsigned Locality = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
1926  unsigned IsData = cast<ConstantSDNode>(Op.getOperand(4))->getZExtValue();
1927 
1928  bool IsStream = !Locality;
1929  // When the locality number is set
1930  if (Locality) {
1931  // The front-end should have filtered out the out-of-range values
1932  assert(Locality <= 3 && "Prefetch locality out-of-range");
1933  // The locality degree is the opposite of the cache speed.
1934  // Put the number the other way around.
1935  // The encoding starts at 0 for level 1
1936  Locality = 3 - Locality;
1937  }
1938 
1939  // built the mask value encoding the expected behavior.
1940  unsigned PrfOp = (IsWrite << 4) | // Load/Store bit
1941  (!IsData << 3) | // IsDataCache bit
1942  (Locality << 1) | // Cache level bits
1943  (unsigned)IsStream; // Stream bit
1944  return DAG.getNode(AArch64ISD::PREFETCH, DL, MVT::Other, Op.getOperand(0),
1945  DAG.getConstant(PrfOp, DL, MVT::i32), Op.getOperand(1));
1946 }
1947 
1948 SDValue AArch64TargetLowering::LowerFP_EXTEND(SDValue Op,
1949  SelectionDAG &DAG) const {
1950  assert(Op.getValueType() == MVT::f128 && "Unexpected lowering");
1951 
1952  RTLIB::Libcall LC;
1954 
1955  return LowerF128Call(Op, DAG, LC);
1956 }
1957 
1958 SDValue AArch64TargetLowering::LowerFP_ROUND(SDValue Op,
1959  SelectionDAG &DAG) const {
1960  if (Op.getOperand(0).getValueType() != MVT::f128) {
1961  // It's legal except when f128 is involved
1962  return Op;
1963  }
1964 
1965  RTLIB::Libcall LC;
1967 
1968  // FP_ROUND node has a second operand indicating whether it is known to be
1969  // precise. That doesn't take part in the LibCall so we can't directly use
1970  // LowerF128Call.
1971  SDValue SrcVal = Op.getOperand(0);
1972  return makeLibCall(DAG, LC, Op.getValueType(), SrcVal, /*isSigned*/ false,
1973  SDLoc(Op)).first;
1974 }
1975 
1977  // Warning: We maintain cost tables in AArch64TargetTransformInfo.cpp.
1978  // Any additional optimization in this function should be recorded
1979  // in the cost tables.
1980  EVT InVT = Op.getOperand(0).getValueType();
1981  EVT VT = Op.getValueType();
1982  unsigned NumElts = InVT.getVectorNumElements();
1983 
1984  // f16 vectors are promoted to f32 before a conversion.
1985  if (InVT.getVectorElementType() == MVT::f16) {
1986  MVT NewVT = MVT::getVectorVT(MVT::f32, NumElts);
1987  SDLoc dl(Op);
1988  return DAG.getNode(
1989  Op.getOpcode(), dl, Op.getValueType(),
1990  DAG.getNode(ISD::FP_EXTEND, dl, NewVT, Op.getOperand(0)));
1991  }
1992 
1993  if (VT.getSizeInBits() < InVT.getSizeInBits()) {
1994  SDLoc dl(Op);
1995  SDValue Cv =
1996  DAG.getNode(Op.getOpcode(), dl, InVT.changeVectorElementTypeToInteger(),
1997  Op.getOperand(0));
1998  return DAG.getNode(ISD::TRUNCATE, dl, VT, Cv);
1999  }
2000 
2001  if (VT.getSizeInBits() > InVT.getSizeInBits()) {
2002  SDLoc dl(Op);
2003  MVT ExtVT =
2005  VT.getVectorNumElements());
2006  SDValue Ext = DAG.getNode(ISD::FP_EXTEND, dl, ExtVT, Op.getOperand(0));
2007  return DAG.getNode(Op.getOpcode(), dl, VT, Ext);
2008  }
2009 
2010  // Type changing conversions are illegal.
2011  return Op;
2012 }
2013 
2014 SDValue AArch64TargetLowering::LowerFP_TO_INT(SDValue Op,
2015  SelectionDAG &DAG) const {
2016  if (Op.getOperand(0).getValueType().isVector())
2017  return LowerVectorFP_TO_INT(Op, DAG);
2018 
2019  // f16 conversions are promoted to f32.
2020  if (Op.getOperand(0).getValueType() == MVT::f16) {
2021  SDLoc dl(Op);
2022  return DAG.getNode(
2023  Op.getOpcode(), dl, Op.getValueType(),
2024  DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, Op.getOperand(0)));
2025  }
2026 
2027  if (Op.getOperand(0).getValueType() != MVT::f128) {
2028  // It's legal except when f128 is involved
2029  return Op;
2030  }
2031 
2032  RTLIB::Libcall LC;
2033  if (Op.getOpcode() == ISD::FP_TO_SINT)
2035  else
2037 
2038  SmallVector<SDValue, 2> Ops(Op->op_begin(), Op->op_end());
2039  return makeLibCall(DAG, LC, Op.getValueType(), Ops, false, SDLoc(Op)).first;
2040 }
2041 
2043  // Warning: We maintain cost tables in AArch64TargetTransformInfo.cpp.
2044  // Any additional optimization in this function should be recorded
2045  // in the cost tables.
2046  EVT VT = Op.getValueType();
2047  SDLoc dl(Op);
2048  SDValue In = Op.getOperand(0);
2049  EVT InVT = In.getValueType();
2050 
2051  if (VT.getSizeInBits() < InVT.getSizeInBits()) {
2052  MVT CastVT =
2054  InVT.getVectorNumElements());
2055  In = DAG.getNode(Op.getOpcode(), dl, CastVT, In);
2056  return DAG.getNode(ISD::FP_ROUND, dl, VT, In, DAG.getIntPtrConstant(0, dl));
2057  }
2058 
2059  if (VT.getSizeInBits() > InVT.getSizeInBits()) {
2060  unsigned CastOpc =
2062  EVT CastVT = VT.changeVectorElementTypeToInteger();
2063  In = DAG.getNode(CastOpc, dl, CastVT, In);
2064  return DAG.getNode(Op.getOpcode(), dl, VT, In);
2065  }
2066 
2067  return Op;
2068 }
2069 
2070 SDValue AArch64TargetLowering::LowerINT_TO_FP(SDValue Op,
2071  SelectionDAG &DAG) const {
2072  if (Op.getValueType().isVector())
2073  return LowerVectorINT_TO_FP(Op, DAG);
2074 
2075  // f16 conversions are promoted to f32.
2076  if (Op.getValueType() == MVT::f16) {
2077  SDLoc dl(Op);
2078  return DAG.getNode(
2079  ISD::FP_ROUND, dl, MVT::f16,
2080  DAG.getNode(Op.getOpcode(), dl, MVT::f32, Op.getOperand(0)),
2081  DAG.getIntPtrConstant(0, dl));
2082  }
2083 
2084  // i128 conversions are libcalls.
2085  if (Op.getOperand(0).getValueType() == MVT::i128)
2086  return SDValue();
2087 
2088  // Other conversions are legal, unless it's to the completely software-based
2089  // fp128.
2090  if (Op.getValueType() != MVT::f128)
2091  return Op;
2092 
2093  RTLIB::Libcall LC;
2094  if (Op.getOpcode() == ISD::SINT_TO_FP)
2096  else
2098 
2099  return LowerF128Call(Op, DAG, LC);
2100 }
2101 
2102 SDValue AArch64TargetLowering::LowerFSINCOS(SDValue Op,
2103  SelectionDAG &DAG) const {
2104  // For iOS, we want to call an alternative entry point: __sincos_stret,
2105  // which returns the values in two S / D registers.
2106  SDLoc dl(Op);
2107  SDValue Arg = Op.getOperand(0);
2108  EVT ArgVT = Arg.getValueType();
2109  Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
2110 
2111  ArgListTy Args;
2112  ArgListEntry Entry;
2113 
2114  Entry.Node = Arg;
2115  Entry.Ty = ArgTy;
2116  Entry.isSExt = false;
2117  Entry.isZExt = false;
2118  Args.push_back(Entry);
2119 
2120  const char *LibcallName =
2121  (ArgVT == MVT::f64) ? "__sincos_stret" : "__sincosf_stret";
2122  SDValue Callee =
2123  DAG.getExternalSymbol(LibcallName, getPointerTy(DAG.getDataLayout()));
2124 
2125  StructType *RetTy = StructType::get(ArgTy, ArgTy, nullptr);
2127  CLI.setDebugLoc(dl).setChain(DAG.getEntryNode())
2128  .setCallee(CallingConv::Fast, RetTy, Callee, std::move(Args));
2129 
2130  std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
2131  return CallResult.first;
2132 }
2133 
2135  if (Op.getValueType() != MVT::f16)
2136  return SDValue();
2137 
2138  assert(Op.getOperand(0).getValueType() == MVT::i16);
2139  SDLoc DL(Op);
2140 
2141  Op = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Op.getOperand(0));
2142  Op = DAG.getNode(ISD::BITCAST, DL, MVT::f32, Op);
2143  return SDValue(
2144  DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, MVT::f16, Op,
2145  DAG.getTargetConstant(AArch64::hsub, DL, MVT::i32)),
2146  0);
2147 }
2148 
2149 static EVT getExtensionTo64Bits(const EVT &OrigVT) {
2150  if (OrigVT.getSizeInBits() >= 64)
2151  return OrigVT;
2152 
2153  assert(OrigVT.isSimple() && "Expecting a simple value type");
2154 
2155  MVT::SimpleValueType OrigSimpleTy = OrigVT.getSimpleVT().SimpleTy;
2156  switch (OrigSimpleTy) {
2157  default: llvm_unreachable("Unexpected Vector Type");
2158  case MVT::v2i8:
2159  case MVT::v2i16:
2160  return MVT::v2i32;
2161  case MVT::v4i8:
2162  return MVT::v4i16;
2163  }
2164 }
2165 
2167  const EVT &OrigTy,
2168  const EVT &ExtTy,
2169  unsigned ExtOpcode) {
2170  // The vector originally had a size of OrigTy. It was then extended to ExtTy.
2171  // We expect the ExtTy to be 128-bits total. If the OrigTy is less than
2172  // 64-bits we need to insert a new extension so that it will be 64-bits.
2173  assert(ExtTy.is128BitVector() && "Unexpected extension size");
2174  if (OrigTy.getSizeInBits() >= 64)
2175  return N;
2176 
2177  // Must extend size to at least 64 bits to be used as an operand for VMULL.
2178  EVT NewVT = getExtensionTo64Bits(OrigTy);
2179 
2180  return DAG.getNode(ExtOpcode, SDLoc(N), NewVT, N);
2181 }
2182 
2184  bool isSigned) {
2185  EVT VT = N->getValueType(0);
2186 
2187  if (N->getOpcode() != ISD::BUILD_VECTOR)
2188  return false;
2189 
2190  for (const SDValue &Elt : N->op_values()) {
2191  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt)) {
2192  unsigned EltSize = VT.getScalarSizeInBits();
2193  unsigned HalfSize = EltSize / 2;
2194  if (isSigned) {
2195  if (!isIntN(HalfSize, C->getSExtValue()))
2196  return false;
2197  } else {
2198  if (!isUIntN(HalfSize, C->getZExtValue()))
2199  return false;
2200  }
2201  continue;
2202  }
2203  return false;
2204  }
2205 
2206  return true;
2207 }
2208 
2210  if (N->getOpcode() == ISD::SIGN_EXTEND || N->getOpcode() == ISD::ZERO_EXTEND)
2212  N->getOperand(0)->getValueType(0),
2213  N->getValueType(0),
2214  N->getOpcode());
2215 
2216  assert(N->getOpcode() == ISD::BUILD_VECTOR && "expected BUILD_VECTOR");
2217  EVT VT = N->getValueType(0);
2218  SDLoc dl(N);
2219  unsigned EltSize = VT.getScalarSizeInBits() / 2;
2220  unsigned NumElts = VT.getVectorNumElements();
2221  MVT TruncVT = MVT::getIntegerVT(EltSize);
2223  for (unsigned i = 0; i != NumElts; ++i) {
2224  ConstantSDNode *C = cast<ConstantSDNode>(N->getOperand(i));
2225  const APInt &CInt = C->getAPIntValue();
2226  // Element types smaller than 32 bits are not legal, so use i32 elements.
2227  // The values are implicitly truncated so sext vs. zext doesn't matter.
2228  Ops.push_back(DAG.getConstant(CInt.zextOrTrunc(32), dl, MVT::i32));
2229  }
2230  return DAG.getBuildVector(MVT::getVectorVT(TruncVT, NumElts), dl, Ops);
2231 }
2232 
2233 static bool isSignExtended(SDNode *N, SelectionDAG &DAG) {
2234  if (N->getOpcode() == ISD::SIGN_EXTEND)
2235  return true;
2236  if (isExtendedBUILD_VECTOR(N, DAG, true))
2237  return true;
2238  return false;
2239 }
2240 
2241 static bool isZeroExtended(SDNode *N, SelectionDAG &DAG) {
2242  if (N->getOpcode() == ISD::ZERO_EXTEND)
2243  return true;
2244  if (isExtendedBUILD_VECTOR(N, DAG, false))
2245  return true;
2246  return false;
2247 }
2248 
2249 static bool isAddSubSExt(SDNode *N, SelectionDAG &DAG) {
2250  unsigned Opcode = N->getOpcode();
2251  if (Opcode == ISD::ADD || Opcode == ISD::SUB) {
2252  SDNode *N0 = N->getOperand(0).getNode();
2253  SDNode *N1 = N->getOperand(1).getNode();
2254  return N0->hasOneUse() && N1->hasOneUse() &&
2255  isSignExtended(N0, DAG) && isSignExtended(N1, DAG);
2256  }
2257  return false;
2258 }
2259 
2260 static bool isAddSubZExt(SDNode *N, SelectionDAG &DAG) {
2261  unsigned Opcode = N->getOpcode();
2262  if (Opcode == ISD::ADD || Opcode == ISD::SUB) {
2263  SDNode *N0 = N->getOperand(0).getNode();
2264  SDNode *N1 = N->getOperand(1).getNode();
2265  return N0->hasOneUse() && N1->hasOneUse() &&
2266  isZeroExtended(N0, DAG) && isZeroExtended(N1, DAG);
2267  }
2268  return false;
2269 }
2270 
2272  // Multiplications are only custom-lowered for 128-bit vectors so that
2273  // VMULL can be detected. Otherwise v2i64 multiplications are not legal.
2274  EVT VT = Op.getValueType();
2275  assert(VT.is128BitVector() && VT.isInteger() &&
2276  "unexpected type for custom-lowering ISD::MUL");
2277  SDNode *N0 = Op.getOperand(0).getNode();
2278  SDNode *N1 = Op.getOperand(1).getNode();
2279  unsigned NewOpc = 0;
2280  bool isMLA = false;
2281  bool isN0SExt = isSignExtended(N0, DAG);
2282  bool isN1SExt = isSignExtended(N1, DAG);
2283  if (isN0SExt && isN1SExt)
2284  NewOpc = AArch64ISD::SMULL;
2285  else {
2286  bool isN0ZExt = isZeroExtended(N0, DAG);
2287  bool isN1ZExt = isZeroExtended(N1, DAG);
2288  if (isN0ZExt && isN1ZExt)
2289  NewOpc = AArch64ISD::UMULL;
2290  else if (isN1SExt || isN1ZExt) {
2291  // Look for (s/zext A + s/zext B) * (s/zext C). We want to turn these
2292  // into (s/zext A * s/zext C) + (s/zext B * s/zext C)
2293  if (isN1SExt && isAddSubSExt(N0, DAG)) {
2294  NewOpc = AArch64ISD::SMULL;
2295  isMLA = true;
2296  } else if (isN1ZExt && isAddSubZExt(N0, DAG)) {
2297  NewOpc = AArch64ISD::UMULL;
2298  isMLA = true;
2299  } else if (isN0ZExt && isAddSubZExt(N1, DAG)) {
2300  std::swap(N0, N1);
2301  NewOpc = AArch64ISD::UMULL;
2302  isMLA = true;
2303  }
2304  }
2305 
2306  if (!NewOpc) {
2307  if (VT == MVT::v2i64)
2308  // Fall through to expand this. It is not legal.
2309  return SDValue();
2310  else
2311  // Other vector multiplications are legal.
2312  return Op;
2313  }
2314  }
2315 
2316  // Legalize to a S/UMULL instruction
2317  SDLoc DL(Op);
2318  SDValue Op0;
2319  SDValue Op1 = skipExtensionForVectorMULL(N1, DAG);
2320  if (!isMLA) {
2321  Op0 = skipExtensionForVectorMULL(N0, DAG);
2322  assert(Op0.getValueType().is64BitVector() &&
2323  Op1.getValueType().is64BitVector() &&
2324  "unexpected types for extended operands to VMULL");
2325  return DAG.getNode(NewOpc, DL, VT, Op0, Op1);
2326  }
2327  // Optimizing (zext A + zext B) * C, to (S/UMULL A, C) + (S/UMULL B, C) during
2328  // isel lowering to take advantage of no-stall back to back s/umul + s/umla.
2329  // This is true for CPUs with accumulate forwarding such as Cortex-A53/A57
2330  SDValue N00 = skipExtensionForVectorMULL(N0->getOperand(0).getNode(), DAG);
2331  SDValue N01 = skipExtensionForVectorMULL(N0->getOperand(1).getNode(), DAG);
2332  EVT Op1VT = Op1.getValueType();
2333  return DAG.getNode(N0->getOpcode(), DL, VT,
2334  DAG.getNode(NewOpc, DL, VT,
2335  DAG.getNode(ISD::BITCAST, DL, Op1VT, N00), Op1),
2336  DAG.getNode(NewOpc, DL, VT,
2337  DAG.getNode(ISD::BITCAST, DL, Op1VT, N01), Op1));
2338 }
2339 
2340 SDValue AArch64TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
2341  SelectionDAG &DAG) const {
2342  unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
2343  SDLoc dl(Op);
2344  switch (IntNo) {
2345  default: return SDValue(); // Don't custom lower most intrinsics.
2346  case Intrinsic::thread_pointer: {
2347  EVT PtrVT = getPointerTy(DAG.getDataLayout());
2348  return DAG.getNode(AArch64ISD::THREAD_POINTER, dl, PtrVT);
2349  }
2350  case Intrinsic::aarch64_neon_smax:
2351  return DAG.getNode(ISD::SMAX, dl, Op.getValueType(),
2352  Op.getOperand(1), Op.getOperand(2));
2353  case Intrinsic::aarch64_neon_umax:
2354  return DAG.getNode(ISD::UMAX, dl, Op.getValueType(),
2355  Op.getOperand(1), Op.getOperand(2));
2356  case Intrinsic::aarch64_neon_smin:
2357  return DAG.getNode(ISD::SMIN, dl, Op.getValueType(),
2358  Op.getOperand(1), Op.getOperand(2));
2359  case Intrinsic::aarch64_neon_umin:
2360  return DAG.getNode(ISD::UMIN, dl, Op.getValueType(),
2361  Op.getOperand(1), Op.getOperand(2));
2362  }
2363 }
2364 
2366  SelectionDAG &DAG) const {
2367  switch (Op.getOpcode()) {
2368  default:
2369  llvm_unreachable("unimplemented operand");
2370  return SDValue();
2371  case ISD::BITCAST:
2372  return LowerBITCAST(Op, DAG);
2373  case ISD::GlobalAddress:
2374  return LowerGlobalAddress(Op, DAG);
2375  case ISD::GlobalTLSAddress:
2376  return LowerGlobalTLSAddress(Op, DAG);
2377  case ISD::SETCC:
2378  return LowerSETCC(Op, DAG);
2379  case ISD::BR_CC:
2380  return LowerBR_CC(Op, DAG);
2381  case ISD::SELECT:
2382  return LowerSELECT(Op, DAG);
2383  case ISD::SELECT_CC:
2384  return LowerSELECT_CC(Op, DAG);
2385  case ISD::JumpTable:
2386  return LowerJumpTable(Op, DAG);
2387  case ISD::ConstantPool:
2388  return LowerConstantPool(Op, DAG);
2389  case ISD::BlockAddress:
2390  return LowerBlockAddress(Op, DAG);
2391  case ISD::VASTART:
2392  return LowerVASTART(Op, DAG);
2393  case ISD::VACOPY:
2394  return LowerVACOPY(Op, DAG);
2395  case ISD::VAARG:
2396  return LowerVAARG(Op, DAG);
2397  case ISD::ADDC:
2398  case ISD::ADDE:
2399  case ISD::SUBC:
2400  case ISD::SUBE:
2401  return LowerADDC_ADDE_SUBC_SUBE(Op, DAG);
2402  case ISD::SADDO:
2403  case ISD::UADDO:
2404  case ISD::SSUBO:
2405  case ISD::USUBO:
2406  case ISD::SMULO:
2407  case ISD::UMULO:
2408  return LowerXALUO(Op, DAG);
2409  case ISD::FADD:
2410  return LowerF128Call(Op, DAG, RTLIB::ADD_F128);
2411  case ISD::FSUB:
2412  return LowerF128Call(Op, DAG, RTLIB::SUB_F128);
2413  case ISD::FMUL:
2414  return LowerF128Call(Op, DAG, RTLIB::MUL_F128);
2415  case ISD::FDIV:
2416  return LowerF128Call(Op, DAG, RTLIB::DIV_F128);
2417  case ISD::FP_ROUND:
2418  return LowerFP_ROUND(Op, DAG);
2419  case ISD::FP_EXTEND:
2420  return LowerFP_EXTEND(Op, DAG);
2421  case ISD::FRAMEADDR:
2422  return LowerFRAMEADDR(Op, DAG);
2423  case ISD::RETURNADDR:
2424  return LowerRETURNADDR(Op, DAG);
2426  return LowerINSERT_VECTOR_ELT(Op, DAG);
2428  return LowerEXTRACT_VECTOR_ELT(Op, DAG);
2429  case ISD::BUILD_VECTOR:
2430  return LowerBUILD_VECTOR(Op, DAG);
2431  case ISD::VECTOR_SHUFFLE:
2432  return LowerVECTOR_SHUFFLE(Op, DAG);
2434  return LowerEXTRACT_SUBVECTOR(Op, DAG);
2435  case ISD::SRA:
2436  case ISD::SRL:
2437  case ISD::SHL:
2438  return LowerVectorSRA_SRL_SHL(Op, DAG);
2439  case ISD::SHL_PARTS:
2440  return LowerShiftLeftParts(Op, DAG);
2441  case ISD::SRL_PARTS:
2442  case ISD::SRA_PARTS:
2443  return LowerShiftRightParts(Op, DAG);
2444  case ISD::CTPOP:
2445  return LowerCTPOP(Op, DAG);
2446  case ISD::FCOPYSIGN:
2447  return LowerFCOPYSIGN(Op, DAG);
2448  case ISD::AND:
2449  return LowerVectorAND(Op, DAG);
2450  case ISD::OR:
2451  return LowerVectorOR(Op, DAG);
2452  case ISD::XOR:
2453  return LowerXOR(Op, DAG);
2454  case ISD::PREFETCH:
2455  return LowerPREFETCH(Op, DAG);
2456  case ISD::SINT_TO_FP:
2457  case ISD::UINT_TO_FP:
2458  return LowerINT_TO_FP(Op, DAG);
2459  case ISD::FP_TO_SINT:
2460  case ISD::FP_TO_UINT:
2461  return LowerFP_TO_INT(Op, DAG);
2462  case ISD::FSINCOS:
2463  return LowerFSINCOS(Op, DAG);
2464  case ISD::MUL:
2465  return LowerMUL(Op, DAG);
2467  return LowerINTRINSIC_WO_CHAIN(Op, DAG);
2468  }
2469 }
2470 
2471 //===----------------------------------------------------------------------===//
2472 // Calling Convention Implementation
2473 //===----------------------------------------------------------------------===//
2474 
2475 #include "AArch64GenCallingConv.inc"
2476 
2477 /// Selects the correct CCAssignFn for a given CallingConvention value.
2479  bool IsVarArg) const {
2480  switch (CC) {
2481  default:
2482  llvm_unreachable("Unsupported calling convention.");
2484  return CC_AArch64_WebKit_JS;
2485  case CallingConv::GHC:
2486  return CC_AArch64_GHC;
2487  case CallingConv::C:
2488  case CallingConv::Fast:
2491  case CallingConv::Swift:
2492  if (!Subtarget->isTargetDarwin())
2493  return CC_AArch64_AAPCS;
2494  return IsVarArg ? CC_AArch64_DarwinPCS_VarArg : CC_AArch64_DarwinPCS;
2495  }
2496 }
2497 
2498 CCAssignFn *
2500  return CC == CallingConv::WebKit_JS ? RetCC_AArch64_WebKit_JS
2501  : RetCC_AArch64_AAPCS;
2502 }
2503 
2504 SDValue AArch64TargetLowering::LowerFormalArguments(
2505  SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
2506  const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
2507  SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
2508  MachineFunction &MF = DAG.getMachineFunction();
2509  MachineFrameInfo &MFI = MF.getFrameInfo();
2510 
2511  // Assign locations to all of the incoming arguments.
2513  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
2514  *DAG.getContext());
2515 
2516  // At this point, Ins[].VT may already be promoted to i32. To correctly
2517  // handle passing i8 as i8 instead of i32 on stack, we pass in both i32 and
2518  // i8 to CC_AArch64_AAPCS with i32 being ValVT and i8 being LocVT.
2519  // Since AnalyzeFormalArguments uses Ins[].VT for both ValVT and LocVT, here
2520  // we use a special version of AnalyzeFormalArguments to pass in ValVT and
2521  // LocVT.
2522  unsigned NumArgs = Ins.size();
2523  Function::const_arg_iterator CurOrigArg = MF.getFunction()->arg_begin();
2524  unsigned CurArgIdx = 0;
2525  for (unsigned i = 0; i != NumArgs; ++i) {
2526  MVT ValVT = Ins[i].VT;
2527  if (Ins[i].isOrigArg()) {
2528  std::advance(CurOrigArg, Ins[i].getOrigArgIndex() - CurArgIdx);
2529  CurArgIdx = Ins[i].getOrigArgIndex();
2530 
2531  // Get type of the original argument.
2532  EVT ActualVT = getValueType(DAG.getDataLayout(), CurOrigArg->getType(),
2533  /*AllowUnknown*/ true);
2534  MVT ActualMVT = ActualVT.isSimple() ? ActualVT.getSimpleVT() : MVT::Other;
2535  // If ActualMVT is i1/i8/i16, we should set LocVT to i8/i8/i16.
2536  if (ActualMVT == MVT::i1 || ActualMVT == MVT::i8)
2537  ValVT = MVT::i8;
2538  else if (ActualMVT == MVT::i16)
2539  ValVT = MVT::i16;
2540  }
2541  CCAssignFn *AssignFn = CCAssignFnForCall(CallConv, /*IsVarArg=*/false);
2542  bool Res =
2543  AssignFn(i, ValVT, ValVT, CCValAssign::Full, Ins[i].Flags, CCInfo);
2544  assert(!Res && "Call operand has unhandled type");
2545  (void)Res;
2546  }
2547  assert(ArgLocs.size() == Ins.size());
2548  SmallVector<SDValue, 16> ArgValues;
2549  for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
2550  CCValAssign &VA = ArgLocs[i];
2551 
2552  if (Ins[i].Flags.isByVal()) {
2553  // Byval is used for HFAs in the PCS, but the system should work in a
2554  // non-compliant manner for larger structs.
2555  EVT PtrVT = getPointerTy(DAG.getDataLayout());
2556  int Size = Ins[i].Flags.getByValSize();
2557  unsigned NumRegs = (Size + 7) / 8;
2558 
2559  // FIXME: This works on big-endian for composite byvals, which are the common
2560  // case. It should also work for fundamental types too.
2561  unsigned FrameIdx =
2562  MFI.CreateFixedObject(8 * NumRegs, VA.getLocMemOffset(), false);
2563  SDValue FrameIdxN = DAG.getFrameIndex(FrameIdx, PtrVT);
2564  InVals.push_back(FrameIdxN);
2565 
2566  continue;
2567  }
2568 
2569  if (VA.isRegLoc()) {
2570  // Arguments stored in registers.
2571  EVT RegVT = VA.getLocVT();
2572 
2573  SDValue ArgValue;
2574  const TargetRegisterClass *RC;
2575 
2576  if (RegVT == MVT::i32)
2577  RC = &AArch64::GPR32RegClass;
2578  else if (RegVT == MVT::i64)
2579  RC = &AArch64::GPR64RegClass;
2580  else if (RegVT == MVT::f16)
2581  RC = &AArch64::FPR16RegClass;
2582  else if (RegVT == MVT::f32)
2583  RC = &AArch64::FPR32RegClass;
2584  else if (RegVT == MVT::f64 || RegVT.is64BitVector())
2585  RC = &AArch64::FPR64RegClass;
2586  else if (RegVT == MVT::f128 || RegVT.is128BitVector())
2587  RC = &AArch64::FPR128RegClass;
2588  else
2589  llvm_unreachable("RegVT not supported by FORMAL_ARGUMENTS Lowering");
2590 
2591  // Transform the arguments in physical registers into virtual ones.
2592  unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
2593  ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, RegVT);
2594 
2595  // If this is an 8, 16 or 32-bit value, it is really passed promoted
2596  // to 64 bits. Insert an assert[sz]ext to capture this, then
2597  // truncate to the right size.
2598  switch (VA.getLocInfo()) {
2599  default:
2600  llvm_unreachable("Unknown loc info!");
2601  case CCValAssign::Full:
2602  break;
2603  case CCValAssign::BCvt:
2604  ArgValue = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), ArgValue);
2605  break;
2606  case CCValAssign::AExt:
2607  case CCValAssign::SExt:
2608  case CCValAssign::ZExt:
2609  // SelectionDAGBuilder will insert appropriate AssertZExt & AssertSExt
2610  // nodes after our lowering.
2611  assert(RegVT == Ins[i].VT && "incorrect register location selected");
2612  break;
2613  }
2614 
2615  InVals.push_back(ArgValue);
2616 
2617  } else { // VA.isRegLoc()
2618  assert(VA.isMemLoc() && "CCValAssign is neither reg nor mem");
2619  unsigned ArgOffset = VA.getLocMemOffset();
2620  unsigned ArgSize = VA.getValVT().getSizeInBits() / 8;
2621 
2622  uint32_t BEAlign = 0;
2623  if (!Subtarget->isLittleEndian() && ArgSize < 8 &&
2624  !Ins[i].Flags.isInConsecutiveRegs())
2625  BEAlign = 8 - ArgSize;
2626 
2627  int FI = MFI.CreateFixedObject(ArgSize, ArgOffset + BEAlign, true);
2628 
2629  // Create load nodes to retrieve arguments from the stack.
2630  SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
2631  SDValue ArgValue;
2632 
2633  // For NON_EXTLOAD, generic code in getLoad assert(ValVT == MemVT)
2635  MVT MemVT = VA.getValVT();
2636 
2637  switch (VA.getLocInfo()) {
2638  default:
2639  break;
2640  case CCValAssign::BCvt:
2641  MemVT = VA.getLocVT();
2642  break;
2643  case CCValAssign::SExt:
2644  ExtType = ISD::SEXTLOAD;
2645  break;
2646  case CCValAssign::ZExt:
2647  ExtType = ISD::ZEXTLOAD;
2648  break;
2649  case CCValAssign::AExt:
2650  ExtType = ISD::EXTLOAD;
2651  break;
2652  }
2653 
2654  ArgValue = DAG.getExtLoad(
2655  ExtType, DL, VA.getLocVT(), Chain, FIN,
2657  MemVT);
2658 
2659  InVals.push_back(ArgValue);
2660  }
2661  }
2662 
2663  // varargs
2665  if (isVarArg) {
2666  if (!Subtarget->isTargetDarwin()) {
2667  // The AAPCS variadic function ABI is identical to the non-variadic
2668  // one. As a result there may be more arguments in registers and we should
2669  // save them for future reference.
2670  saveVarArgRegisters(CCInfo, DAG, DL, Chain);
2671  }
2672 
2673  // This will point to the next argument passed via stack.
2674  unsigned StackOffset = CCInfo.getNextStackOffset();
2675  // We currently pass all varargs at 8-byte alignment.
2676  StackOffset = ((StackOffset + 7) & ~7);
2677  FuncInfo->setVarArgsStackIndex(MFI.CreateFixedObject(4, StackOffset, true));
2678  }
2679 
2680  unsigned StackArgSize = CCInfo.getNextStackOffset();
2681  bool TailCallOpt = MF.getTarget().Options.GuaranteedTailCallOpt;
2682  if (DoesCalleeRestoreStack(CallConv, TailCallOpt)) {
2683  // This is a non-standard ABI so by fiat I say we're allowed to make full
2684  // use of the stack area to be popped, which must be aligned to 16 bytes in
2685  // any case:
2686  StackArgSize = alignTo(StackArgSize, 16);
2687 
2688  // If we're expected to restore the stack (e.g. fastcc) then we'll be adding
2689  // a multiple of 16.
2690  FuncInfo->setArgumentStackToRestore(StackArgSize);
2691 
2692  // This realignment carries over to the available bytes below. Our own
2693  // callers will guarantee the space is free by giving an aligned value to
2694  // CALLSEQ_START.
2695  }
2696  // Even if we're not expected to free up the space, it's useful to know how
2697  // much is there while considering tail calls (because we can reuse it).
2698  FuncInfo->setBytesInStackArgArea(StackArgSize);
2699 
2700  return Chain;
2701 }
2702 
2703 void AArch64TargetLowering::saveVarArgRegisters(CCState &CCInfo,
2704  SelectionDAG &DAG,
2705  const SDLoc &DL,
2706  SDValue &Chain) const {
2707  MachineFunction &MF = DAG.getMachineFunction();
2708  MachineFrameInfo &MFI = MF.getFrameInfo();
2710  auto PtrVT = getPointerTy(DAG.getDataLayout());
2711 
2712  SmallVector<SDValue, 8> MemOps;
2713 
2714  static const MCPhysReg GPRArgRegs[] = { AArch64::X0, AArch64::X1, AArch64::X2,
2715  AArch64::X3, AArch64::X4, AArch64::X5,
2716  AArch64::X6, AArch64::X7 };
2717  static const unsigned NumGPRArgRegs = array_lengthof(GPRArgRegs);
2718  unsigned FirstVariadicGPR = CCInfo.getFirstUnallocated(GPRArgRegs);
2719 
2720  unsigned GPRSaveSize = 8 * (NumGPRArgRegs - FirstVariadicGPR);
2721  int GPRIdx = 0;
2722  if (GPRSaveSize != 0) {
2723  GPRIdx = MFI.CreateStackObject(GPRSaveSize, 8, false);
2724 
2725  SDValue FIN = DAG.getFrameIndex(GPRIdx, PtrVT);
2726 
2727  for (unsigned i = FirstVariadicGPR; i < NumGPRArgRegs; ++i) {
2728  unsigned VReg = MF.addLiveIn(GPRArgRegs[i], &AArch64::GPR64RegClass);
2729  SDValue Val = DAG.getCopyFromReg(Chain, DL, VReg, MVT::i64);
2730  SDValue Store = DAG.getStore(
2731  Val.getValue(1), DL, Val, FIN,
2733  MemOps.push_back(Store);
2734  FIN =
2735  DAG.getNode(ISD::ADD, DL, PtrVT, FIN, DAG.getConstant(8, DL, PtrVT));
2736  }
2737  }
2738  FuncInfo->setVarArgsGPRIndex(GPRIdx);
2739  FuncInfo->setVarArgsGPRSize(GPRSaveSize);
2740 
2741  if (Subtarget->hasFPARMv8()) {
2742  static const MCPhysReg FPRArgRegs[] = {
2743  AArch64::Q0, AArch64::Q1, AArch64::Q2, AArch64::Q3,
2744  AArch64::Q4, AArch64::Q5, AArch64::Q6, AArch64::Q7};
2745  static const unsigned NumFPRArgRegs = array_lengthof(FPRArgRegs);
2746  unsigned FirstVariadicFPR = CCInfo.getFirstUnallocated(FPRArgRegs);
2747 
2748  unsigned FPRSaveSize = 16 * (NumFPRArgRegs - FirstVariadicFPR);
2749  int FPRIdx = 0;
2750  if (FPRSaveSize != 0) {
2751  FPRIdx = MFI.CreateStackObject(FPRSaveSize, 16, false);
2752 
2753  SDValue FIN = DAG.getFrameIndex(FPRIdx, PtrVT);
2754 
2755  for (unsigned i = FirstVariadicFPR; i < NumFPRArgRegs; ++i) {
2756  unsigned VReg = MF.addLiveIn(FPRArgRegs[i], &AArch64::FPR128RegClass);
2757  SDValue Val = DAG.getCopyFromReg(Chain, DL, VReg, MVT::f128);
2758 
2759  SDValue Store = DAG.getStore(
2760  Val.getValue(1), DL, Val, FIN,
2762  MemOps.push_back(Store);
2763  FIN = DAG.getNode(ISD::ADD, DL, PtrVT, FIN,
2764  DAG.getConstant(16, DL, PtrVT));
2765  }
2766  }
2767  FuncInfo->setVarArgsFPRIndex(FPRIdx);
2768  FuncInfo->setVarArgsFPRSize(FPRSaveSize);
2769  }
2770 
2771  if (!MemOps.empty()) {
2772  Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOps);
2773  }
2774 }
2775 
2776 /// LowerCallResult - Lower the result values of a call into the
2777 /// appropriate copies out of appropriate physical registers.
2778 SDValue AArch64TargetLowering::LowerCallResult(
2779  SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg,
2780  const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
2781  SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals, bool isThisReturn,
2782  SDValue ThisVal) const {
2783  CCAssignFn *RetCC = CallConv == CallingConv::WebKit_JS
2784  ? RetCC_AArch64_WebKit_JS
2785  : RetCC_AArch64_AAPCS;
2786  // Assign locations to each value returned by this call.
2788  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
2789  *DAG.getContext());
2790  CCInfo.AnalyzeCallResult(Ins, RetCC);
2791 
2792  // Copy all of the result registers out of their specified physreg.
2793  for (unsigned i = 0; i != RVLocs.size(); ++i) {
2794  CCValAssign VA = RVLocs[i];
2795 
2796  // Pass 'this' value directly from the argument to return value, to avoid
2797  // reg unit interference
2798  if (i == 0 && isThisReturn) {
2799  assert(!VA.needsCustom() && VA.getLocVT() == MVT::i64 &&
2800  "unexpected return calling convention register assignment");
2801  InVals.push_back(ThisVal);
2802  continue;
2803  }
2804 
2805  SDValue Val =
2806  DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), InFlag);
2807  Chain = Val.getValue(1);
2808  InFlag = Val.getValue(2);
2809 
2810  switch (VA.getLocInfo()) {
2811  default:
2812  llvm_unreachable("Unknown loc info!");
2813  case CCValAssign::Full:
2814  break;
2815  case CCValAssign::BCvt:
2816  Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val);
2817  break;
2818  }
2819 
2820  InVals.push_back(Val);
2821  }
2822 
2823  return Chain;
2824 }
2825 
2826 /// Return true if the calling convention is one that we can guarantee TCO for.
2828  return CC == CallingConv::Fast;
2829 }
2830 
2831 /// Return true if we might ever do TCO for calls with this calling convention.
2833  switch (CC) {
2834  case CallingConv::C:
2836  case CallingConv::Swift:
2837  return true;
2838  default:
2839  return canGuaranteeTCO(CC);
2840  }
2841 }
2842 
2843 bool AArch64TargetLowering::isEligibleForTailCallOptimization(
2844  SDValue Callee, CallingConv::ID CalleeCC, bool isVarArg,
2845  const SmallVectorImpl<ISD::OutputArg> &Outs,
2846  const SmallVectorImpl<SDValue> &OutVals,
2847  const SmallVectorImpl<ISD::InputArg> &Ins, SelectionDAG &DAG) const {
2848  if (!mayTailCallThisCC(CalleeCC))
2849  return false;
2850 
2851  MachineFunction &MF = DAG.getMachineFunction();
2852  const Function *CallerF = MF.getFunction();
2853  CallingConv::ID CallerCC = CallerF->getCallingConv();
2854  bool CCMatch = CallerCC == CalleeCC;
2855 
2856  // Byval parameters hand the function a pointer directly into the stack area
2857  // we want to reuse during a tail call. Working around this *is* possible (see
2858  // X86) but less efficient and uglier in LowerCall.
2859  for (Function::const_arg_iterator i = CallerF->arg_begin(),
2860  e = CallerF->arg_end();
2861  i != e; ++i)
2862  if (i->hasByValAttr())
2863  return false;
2864 
2866  return canGuaranteeTCO(CalleeCC) && CCMatch;
2867 
2868  // Externally-defined functions with weak linkage should not be
2869  // tail-called on AArch64 when the OS does not support dynamic
2870  // pre-emption of symbols, as the AAELF spec requires normal calls
2871  // to undefined weak functions to be replaced with a NOP or jump to the
2872  // next instruction. The behaviour of branch instructions in this
2873  // situation (as used for tail calls) is implementation-defined, so we
2874  // cannot rely on the linker replacing the tail call with a return.
2875  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
2876  const GlobalValue *GV = G->getGlobal();
2877  const Triple &TT = getTargetMachine().getTargetTriple();
2878  if (GV->hasExternalWeakLinkage() &&
2879  (!TT.isOSWindows() || TT.isOSBinFormatELF() || TT.isOSBinFormatMachO()))
2880  return false;
2881  }
2882 
2883  // Now we search for cases where we can use a tail call without changing the
2884  // ABI. Sibcall is used in some places (particularly gcc) to refer to this
2885  // concept.
2886 
2887  // I want anyone implementing a new calling convention to think long and hard
2888  // about this assert.
2889  assert((!isVarArg || CalleeCC == CallingConv::C) &&
2890  "Unexpected variadic calling convention");
2891 
2892  LLVMContext &C = *DAG.getContext();
2893  if (isVarArg && !Outs.empty()) {
2894  // At least two cases here: if caller is fastcc then we can't have any
2895  // memory arguments (we'd be expected to clean up the stack afterwards). If
2896  // caller is C then we could potentially use its argument area.
2897 
2898  // FIXME: for now we take the most conservative of these in both cases:
2899  // disallow all variadic memory operands.
2901  CCState CCInfo(CalleeCC, isVarArg, MF, ArgLocs, C);
2902 
2903  CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForCall(CalleeCC, true));
2904  for (const CCValAssign &ArgLoc : ArgLocs)
2905  if (!ArgLoc.isRegLoc())
2906  return false;
2907  }
2908 
2909  // Check that the call results are passed in the same way.
2910  if (!CCState::resultsCompatible(CalleeCC, CallerCC, MF, C, Ins,
2911  CCAssignFnForCall(CalleeCC, isVarArg),
2912  CCAssignFnForCall(CallerCC, isVarArg)))
2913  return false;
2914  // The callee has to preserve all registers the caller needs to preserve.
2915  const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo();
2916  const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
2917  if (!CCMatch) {
2918  const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
2919  if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
2920  return false;
2921  }
2922 
2923  // Nothing more to check if the callee is taking no arguments
2924  if (Outs.empty())
2925  return true;
2926 
2928  CCState CCInfo(CalleeCC, isVarArg, MF, ArgLocs, C);
2929 
2930  CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForCall(CalleeCC, isVarArg));
2931 
2932  const AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
2933 
2934  // If the stack arguments for this call do not fit into our own save area then
2935  // the call cannot be made tail.
2936  if (CCInfo.getNextStackOffset() > FuncInfo->getBytesInStackArgArea())
2937  return false;
2938 
2939  const MachineRegisterInfo &MRI = MF.getRegInfo();
2940  if (!parametersInCSRMatch(MRI, CallerPreserved, ArgLocs, OutVals))
2941  return false;
2942 
2943  return true;
2944 }
2945 
2946 SDValue AArch64TargetLowering::addTokenForArgument(SDValue Chain,
2947  SelectionDAG &DAG,
2948  MachineFrameInfo &MFI,
2949  int ClobberedFI) const {
2950  SmallVector<SDValue, 8> ArgChains;
2951  int64_t FirstByte = MFI.getObjectOffset(ClobberedFI);
2952  int64_t LastByte = FirstByte + MFI.getObjectSize(ClobberedFI) - 1;
2953 
2954  // Include the original chain at the beginning of the list. When this is
2955  // used by target LowerCall hooks, this helps legalize find the
2956  // CALLSEQ_BEGIN node.
2957  ArgChains.push_back(Chain);
2958 
2959  // Add a chain value for each stack argument corresponding
2960  for (SDNode::use_iterator U = DAG.getEntryNode().getNode()->use_begin(),
2961  UE = DAG.getEntryNode().getNode()->use_end();
2962  U != UE; ++U)
2963  if (LoadSDNode *L = dyn_cast<LoadSDNode>(*U))
2964  if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(L->getBasePtr()))
2965  if (FI->getIndex() < 0) {
2966  int64_t InFirstByte = MFI.getObjectOffset(FI->getIndex());
2967  int64_t InLastByte = InFirstByte;
2968  InLastByte += MFI.getObjectSize(FI->getIndex()) - 1;
2969 
2970  if ((InFirstByte <= FirstByte && FirstByte <= InLastByte) ||
2971  (FirstByte <= InFirstByte && InFirstByte <= LastByte))
2972  ArgChains.push_back(SDValue(L, 1));
2973  }
2974 
2975  // Build a tokenfactor for all the chains.
2976  return DAG.getNode(ISD::TokenFactor, SDLoc(Chain), MVT::Other, ArgChains);
2977 }
2978 
2979 bool AArch64TargetLowering::DoesCalleeRestoreStack(CallingConv::ID CallCC,
2980  bool TailCallOpt) const {
2981  return CallCC == CallingConv::Fast && TailCallOpt;
2982 }
2983 
2984 /// LowerCall - Lower a call to a callseq_start + CALL + callseq_end chain,
2985 /// and add input and output parameter nodes.
2986 SDValue
2987 AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
2988  SmallVectorImpl<SDValue> &InVals) const {
2989  SelectionDAG &DAG = CLI.DAG;
2990  SDLoc &DL = CLI.DL;
2991  SmallVector<ISD::OutputArg, 32> &Outs = CLI.Outs;
2992  SmallVector<SDValue, 32> &OutVals = CLI.OutVals;
2993  SmallVector<ISD::InputArg, 32> &Ins = CLI.Ins;
2994  SDValue Chain = CLI.Chain;
2995  SDValue Callee = CLI.Callee;
2996  bool &IsTailCall = CLI.IsTailCall;
2997  CallingConv::ID CallConv = CLI.CallConv;
2998  bool IsVarArg = CLI.IsVarArg;
2999 
3000  MachineFunction &MF = DAG.getMachineFunction();
3001  bool IsThisReturn = false;
3002 
3004  bool TailCallOpt = MF.getTarget().Options.GuaranteedTailCallOpt;
3005  bool IsSibCall = false;
3006 
3007  if (IsTailCall) {
3008  // Check if it's really possible to do a tail call.
3009  IsTailCall = isEligibleForTailCallOptimization(
3010  Callee, CallConv, IsVarArg, Outs, OutVals, Ins, DAG);
3011  if (!IsTailCall && CLI.CS && CLI.CS->isMustTailCall())
3012  report_fatal_error("failed to perform tail call elimination on a call "
3013  "site marked musttail");
3014 
3015  // A sibling call is one where we're under the usual C ABI and not planning
3016  // to change that but can still do a tail call:
3017  if (!TailCallOpt && IsTailCall)
3018  IsSibCall = true;
3019 
3020  if (IsTailCall)
3021  ++NumTailCalls;
3022  }
3023 
3024  // Analyze operands of the call, assigning locations to each operand.
3026  CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), ArgLocs,
3027  *DAG.getContext());
3028 
3029  if (IsVarArg) {
3030  // Handle fixed and variable vector arguments differently.
3031  // Variable vector arguments always go into memory.
3032  unsigned NumArgs = Outs.size();
3033 
3034  for (unsigned i = 0; i != NumArgs; ++i) {
3035  MVT ArgVT = Outs[i].VT;
3036  ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
3037  CCAssignFn *AssignFn = CCAssignFnForCall(CallConv,
3038  /*IsVarArg=*/ !Outs[i].IsFixed);
3039  bool Res = AssignFn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, CCInfo);
3040  assert(!Res && "Call operand has unhandled type");
3041  (void)Res;
3042  }
3043  } else {
3044  // At this point, Outs[].VT may already be promoted to i32. To correctly
3045  // handle passing i8 as i8 instead of i32 on stack, we pass in both i32 and
3046  // i8 to CC_AArch64_AAPCS with i32 being ValVT and i8 being LocVT.
3047  // Since AnalyzeCallOperands uses Ins[].VT for both ValVT and LocVT, here
3048  // we use a special version of AnalyzeCallOperands to pass in ValVT and
3049  // LocVT.
3050  unsigned NumArgs = Outs.size();
3051  for (unsigned i = 0; i != NumArgs; ++i) {
3052  MVT ValVT = Outs[i].VT;
3053  // Get type of the original argument.
3054  EVT ActualVT = getValueType(DAG.getDataLayout(),
3055  CLI.getArgs()[Outs[i].OrigArgIndex].Ty,
3056  /*AllowUnknown*/ true);
3057  MVT ActualMVT = ActualVT.isSimple() ? ActualVT.getSimpleVT() : ValVT;
3058  ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
3059  // If ActualMVT is i1/i8/i16, we should set LocVT to i8/i8/i16.
3060  if (ActualMVT == MVT::i1 || ActualMVT == MVT::i8)
3061  ValVT = MVT::i8;
3062  else if (ActualMVT == MVT::i16)
3063  ValVT = MVT::i16;
3064 
3065  CCAssignFn *AssignFn = CCAssignFnForCall(CallConv, /*IsVarArg=*/false);
3066  bool Res = AssignFn(i, ValVT, ValVT, CCValAssign::Full, ArgFlags, CCInfo);
3067  assert(!Res && "Call operand has unhandled type");
3068  (void)Res;
3069  }
3070  }
3071 
3072  // Get a count of how many bytes are to be pushed on the stack.
3073  unsigned NumBytes = CCInfo.getNextStackOffset();
3074 
3075  if (IsSibCall) {
3076  // Since we're not changing the ABI to make this a tail call, the memory
3077  // operands are already available in the caller's incoming argument space.
3078  NumBytes = 0;
3079  }
3080 
3081  // FPDiff is the byte offset of the call's argument area from the callee's.
3082  // Stores to callee stack arguments will be placed in FixedStackSlots offset
3083  // by this amount for a tail call. In a sibling call it must be 0 because the
3084  // caller will deallocate the entire stack and the callee still expects its
3085  // arguments to begin at SP+0. Completely unused for non-tail calls.
3086  int FPDiff = 0;
3087 
3088  if (IsTailCall && !IsSibCall) {
3089  unsigned NumReusableBytes = FuncInfo->getBytesInStackArgArea();
3090 
3091  // Since callee will pop argument stack as a tail call, we must keep the
3092  // popped size 16-byte aligned.
3093  NumBytes = alignTo(NumBytes, 16);
3094 
3095  // FPDiff will be negative if this tail call requires more space than we
3096  // would automatically have in our incoming argument space. Positive if we
3097  // can actually shrink the stack.
3098  FPDiff = NumReusableBytes - NumBytes;
3099 
3100  // The stack pointer must be 16-byte aligned at all times it's used for a
3101  // memory operation, which in practice means at *all* times and in
3102  // particular across call boundaries. Therefore our own arguments started at
3103  // a 16-byte aligned SP and the delta applied for the tail call should
3104  // satisfy the same constraint.
3105  assert(FPDiff % 16 == 0 && "unaligned stack on tail call");
3106  }
3107 
3108  // Adjust the stack pointer for the new arguments...
3109  // These operations are automatically eliminated by the prolog/epilog pass
3110  if (!IsSibCall)
3111  Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, DL,
3112  true),
3113  DL);
3114 
3115  SDValue StackPtr = DAG.getCopyFromReg(Chain, DL, AArch64::SP,
3116  getPointerTy(DAG.getDataLayout()));
3117 
3119  SmallVector<SDValue, 8> MemOpChains;
3120  auto PtrVT = getPointerTy(DAG.getDataLayout());
3121 
3122  // Walk the register/memloc assignments, inserting copies/loads.
3123  for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size(); i != e;
3124  ++i, ++realArgIdx) {
3125  CCValAssign &VA = ArgLocs[i];
3126  SDValue Arg = OutVals[realArgIdx];
3127  ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags;
3128 
3129  // Promote the value if needed.
3130  switch (VA.getLocInfo()) {
3131  default:
3132  llvm_unreachable("Unknown loc info!");
3133  case CCValAssign::Full:
3134  break;
3135  case CCValAssign::SExt:
3136  Arg = DAG.getNode(ISD::SIGN_EXTEND, DL, VA.getLocVT(), Arg);
3137  break;
3138  case CCValAssign::ZExt:
3139  Arg = DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), Arg);
3140  break;
3141  case CCValAssign::AExt:
3142  if (Outs[realArgIdx].ArgVT == MVT::i1) {
3143  // AAPCS requires i1 to be zero-extended to 8-bits by the caller.
3144  Arg = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, Arg);
3145  Arg = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i8, Arg);
3146  }
3147  Arg = DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), Arg);
3148  break;
3149  case CCValAssign::BCvt:
3150  Arg = DAG.getNode(ISD::BITCAST, DL, VA.getLocVT(), Arg);
3151  break;
3152  case CCValAssign::FPExt:
3153  Arg = DAG.getNode(ISD::FP_EXTEND, DL, VA.getLocVT(), Arg);
3154  break;
3155  }
3156 
3157  if (VA.isRegLoc()) {
3158  if (realArgIdx == 0 && Flags.isReturned() && !Flags.isSwiftSelf() &&
3159  Outs[0].VT == MVT::i64) {
3160  assert(VA.getLocVT() == MVT::i64 &&
3161  "unexpected calling convention register assignment");
3162  assert(!Ins.empty() && Ins[0].VT == MVT::i64 &&
3163  "unexpected use of 'returned'");
3164  IsThisReturn = true;
3165  }
3166  RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
3167  } else {
3168  assert(VA.isMemLoc());
3169 
3170  SDValue DstAddr;
3171  MachinePointerInfo DstInfo;
3172 
3173  // FIXME: This works on big-endian for composite byvals, which are the
3174  // common case. It should also work for fundamental types too.
3175  uint32_t BEAlign = 0;
3176  unsigned OpSize = Flags.isByVal() ? Flags.getByValSize() * 8
3177  : VA.getValVT().getSizeInBits();
3178  OpSize = (OpSize + 7) / 8;
3179  if (!Subtarget->isLittleEndian() && !Flags.isByVal() &&
3180  !Flags.isInConsecutiveRegs()) {
3181  if (OpSize < 8)
3182  BEAlign = 8 - OpSize;
3183  }
3184  unsigned LocMemOffset = VA.getLocMemOffset();
3185  int32_t Offset = LocMemOffset + BEAlign;
3186  SDValue PtrOff = DAG.getIntPtrConstant(Offset, DL);
3187  PtrOff = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, PtrOff);
3188 
3189  if (IsTailCall) {
3190  Offset = Offset + FPDiff;
3191  int FI = MF.getFrameInfo().CreateFixedObject(OpSize, Offset, true);
3192 
3193  DstAddr = DAG.getFrameIndex(FI, PtrVT);
3194  DstInfo =
3196 
3197  // Make sure any stack arguments overlapping with where we're storing
3198  // are loaded before this eventual operation. Otherwise they'll be
3199  // clobbered.
3200  Chain = addTokenForArgument(Chain, DAG, MF.getFrameInfo(), FI);
3201  } else {
3202  SDValue PtrOff = DAG.getIntPtrConstant(Offset, DL);
3203 
3204  DstAddr = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, PtrOff);
3206  LocMemOffset);
3207  }
3208 
3209  if (Outs[i].Flags.isByVal()) {
3210  SDValue SizeNode =
3211  DAG.getConstant(Outs[i].Flags.getByValSize(), DL, MVT::i64);
3212  SDValue Cpy = DAG.getMemcpy(
3213  Chain, DL, DstAddr, Arg, SizeNode, Outs[i].Flags.getByValAlign(),
3214  /*isVol = */ false, /*AlwaysInline = */ false,
3215  /*isTailCall = */ false,
3216  DstInfo, MachinePointerInfo());
3217 
3218  MemOpChains.push_back(Cpy);
3219  } else {
3220  // Since we pass i1/i8/i16 as i1/i8/i16 on stack and Arg is already
3221  // promoted to a legal register type i32, we should truncate Arg back to
3222  // i1/i8/i16.
3223  if (VA.getValVT() == MVT::i1 || VA.getValVT() == MVT::i8 ||
3224  VA.getValVT() == MVT::i16)
3225  Arg = DAG.getNode(ISD::TRUNCATE, DL, VA.getValVT(), Arg);
3226 
3227  SDValue Store = DAG.getStore(Chain, DL, Arg, DstAddr, DstInfo);
3228  MemOpChains.push_back(Store);
3229  }
3230  }
3231  }
3232 
3233  if (!MemOpChains.empty())
3234  Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
3235 
3236  // Build a sequence of copy-to-reg nodes chained together with token chain
3237  // and flag operands which copy the outgoing args into the appropriate regs.
3238  SDValue InFlag;
3239  for (auto &RegToPass : RegsToPass) {
3240  Chain = DAG.getCopyToReg(Chain, DL, RegToPass.first,
3241  RegToPass.second, InFlag);
3242  InFlag = Chain.getValue(1);
3243  }
3244 
3245  // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
3246  // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
3247  // node so that legalize doesn't hack it.
3248  if (getTargetMachine().getCodeModel() == CodeModel::Large &&
3249  Subtarget->isTargetMachO()) {
3250  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
3251  const GlobalValue *GV = G->getGlobal();
3252  bool InternalLinkage = GV->hasInternalLinkage();
3253  if (InternalLinkage)
3254  Callee = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, 0);
3255  else {
3256  Callee =
3257  DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, AArch64II::MO_GOT);
3258  Callee = DAG.getNode(AArch64ISD::LOADgot, DL, PtrVT, Callee);
3259  }
3260  } else if (ExternalSymbolSDNode *S =
3261  dyn_cast<ExternalSymbolSDNode>(Callee)) {
3262  const char *Sym = S->getSymbol();
3263  Callee = DAG.getTargetExternalSymbol(Sym, PtrVT, AArch64II::MO_GOT);
3264  Callee = DAG.getNode(AArch64ISD::LOADgot, DL, PtrVT, Callee);
3265  }
3266  } else if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
3267  const GlobalValue *GV = G->getGlobal();
3268  Callee = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, 0);
3269  } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
3270  const char *Sym = S->getSymbol();
3271  Callee = DAG.getTargetExternalSymbol(Sym, PtrVT, 0);
3272  }
3273 
3274  // We don't usually want to end the call-sequence here because we would tidy
3275  // the frame up *after* the call, however in the ABI-changing tail-call case
3276  // we've carefully laid out the parameters so that when sp is reset they'll be
3277  // in the correct location.
3278  if (IsTailCall && !IsSibCall) {
3279  Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, DL, true),
3280  DAG.getIntPtrConstant(0, DL, true), InFlag, DL);
3281  InFlag = Chain.getValue(1);
3282  }
3283 
3284  std::vector<SDValue> Ops;
3285  Ops.push_back(Chain);
3286  Ops.push_back(Callee);
3287 
3288  if (IsTailCall) {
3289  // Each tail call may have to adjust the stack by a different amount, so
3290  // this information must travel along with the operation for eventual
3291  // consumption by emitEpilogue.
3292  Ops.push_back(DAG.getTargetConstant(FPDiff, DL, MVT::i32));
3293  }
3294 
3295  // Add argument registers to the end of the list so that they are known live
3296  // into the call.
3297  for (auto &RegToPass : RegsToPass)
3298  Ops.push_back(DAG.getRegister(RegToPass.first,
3299  RegToPass.second.getValueType()));
3300 
3301  // Add a register mask operand representing the call-preserved registers.
3302  const uint32_t *Mask;
3303  const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo();
3304  if (IsThisReturn) {
3305  // For 'this' returns, use the X0-preserving mask if applicable
3306  Mask = TRI->getThisReturnPreservedMask(MF, CallConv);
3307  if (!Mask) {
3308  IsThisReturn = false;
3309  Mask = TRI->getCallPreservedMask(MF, CallConv);
3310  }
3311  } else
3312  Mask = TRI->getCallPreservedMask(MF, CallConv);
3313 
3314  assert(Mask && "Missing call preserved mask for calling convention");
3315  Ops.push_back(DAG.getRegisterMask(Mask));
3316 
3317  if (InFlag.getNode())
3318  Ops.push_back(InFlag);
3319 
3320  SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
3321 
3322  // If we're doing a tall call, use a TC_RETURN here rather than an
3323  // actual call instruction.
3324  if (IsTailCall) {
3326  return DAG.getNode(AArch64ISD::TC_RETURN, DL, NodeTys, Ops);
3327  }
3328 
3329  // Returns a chain and a flag for retval copy to use.
3330  Chain = DAG.getNode(AArch64ISD::CALL, DL, NodeTys, Ops);
3331  InFlag = Chain.getValue(1);
3332 
3333  uint64_t CalleePopBytes =
3334  DoesCalleeRestoreStack(CallConv, TailCallOpt) ? alignTo(NumBytes, 16) : 0;
3335 
3336  Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, DL, true),
3337  DAG.getIntPtrConstant(CalleePopBytes, DL, true),
3338  InFlag, DL);
3339  if (!Ins.empty())
3340  InFlag = Chain.getValue(1);
3341 
3342  // Handle result values, copying them out of physregs into vregs that we
3343  // return.
3344  return LowerCallResult(Chain, InFlag, CallConv, IsVarArg, Ins, DL, DAG,
3345  InVals, IsThisReturn,
3346  IsThisReturn ? OutVals[0] : SDValue());
3347 }
3348 
3349 bool AArch64TargetLowering::CanLowerReturn(
3350  CallingConv::ID CallConv, MachineFunction &MF, bool isVarArg,
3351  const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const {
3352  CCAssignFn *RetCC = CallConv == CallingConv::WebKit_JS
3353  ? RetCC_AArch64_WebKit_JS
3354  : RetCC_AArch64_AAPCS;
3356  CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
3357  return CCInfo.CheckReturn(Outs, RetCC);
3358 }
3359 
3360 SDValue
3361 AArch64TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
3362  bool isVarArg,
3363  const SmallVectorImpl<ISD::OutputArg> &Outs,
3364  const SmallVectorImpl<SDValue> &OutVals,
3365  const SDLoc &DL, SelectionDAG &DAG) const {
3366  CCAssignFn *RetCC = CallConv == CallingConv::WebKit_JS
3367  ? RetCC_AArch64_WebKit_JS
3368  : RetCC_AArch64_AAPCS;
3370  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
3371  *DAG.getContext());
3372  CCInfo.AnalyzeReturn(Outs, RetCC);
3373 
3374  // Copy the result values into the output registers.
3375  SDValue Flag;
3376  SmallVector<SDValue, 4> RetOps(1, Chain);
3377  for (unsigned i = 0, realRVLocIdx = 0; i != RVLocs.size();
3378  ++i, ++realRVLocIdx) {
3379  CCValAssign &VA = RVLocs[i];
3380  assert(VA.isRegLoc() && "Can only return in registers!");
3381  SDValue Arg = OutVals[realRVLocIdx];
3382 
3383  switch (VA.getLocInfo()) {
3384  default:
3385  llvm_unreachable("Unknown loc info!");
3386  case CCValAssign::Full:
3387  if (Outs[i].ArgVT == MVT::i1) {
3388  // AAPCS requires i1 to be zero-extended to i8 by the producer of the
3389  // value. This is strictly redundant on Darwin (which uses "zeroext
3390  // i1"), but will be optimised out before ISel.
3391  Arg = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, Arg);
3392  Arg = DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), Arg);
3393  }
3394  break;
3395  case CCValAssign::BCvt:
3396  Arg = DAG.getNode(ISD::BITCAST, DL, VA.getLocVT(), Arg);
3397  break;
3398  }
3399 
3400  Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Arg, Flag);
3401  Flag = Chain.getValue(1);
3402  RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
3403  }
3404  const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo();
3405  const MCPhysReg *I =
3407  if (I) {
3408  for (; *I; ++I) {
3409  if (AArch64::GPR64RegClass.contains(*I))
3410  RetOps.push_back(DAG.getRegister(*I, MVT::i64));
3411  else if (AArch64::FPR64RegClass.contains(*I))
3412  RetOps.push_back(DAG.getRegister(*I, MVT::getFloatingPointVT(64)));
3413  else
3414  llvm_unreachable("Unexpected register class in CSRsViaCopy!");
3415  }
3416  }
3417 
3418  RetOps[0] = Chain; // Update chain.
3419 
3420  // Add the flag if we have it.
3421  if (Flag.getNode())
3422  RetOps.push_back(Flag);
3423 
3424  return DAG.getNode(AArch64ISD::RET_FLAG, DL, MVT::Other, RetOps);
3425 }
3426 
3427 //===----------------------------------------------------------------------===//
3428 // Other Lowering Code
3429 //===----------------------------------------------------------------------===//
3430 
3431 SDValue AArch64TargetLowering::LowerGlobalAddress(SDValue Op,
3432  SelectionDAG &DAG) const {
3433  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3434  SDLoc DL(Op);
3435  const GlobalAddressSDNode *GN = cast<GlobalAddressSDNode>(Op);
3436  const GlobalValue *GV = GN->getGlobal();
3437  unsigned char OpFlags =
3438  Subtarget->ClassifyGlobalReference(GV, getTargetMachine());
3439 
3440  assert(cast<GlobalAddressSDNode>(Op)->getOffset() == 0 &&
3441  "unexpected offset in global node");
3442 
3443  // This also catched the large code model case for Darwin.
3444  if ((OpFlags & AArch64II::MO_GOT) != 0) {
3445  SDValue GotAddr = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, OpFlags);
3446  // FIXME: Once remat is capable of dealing with instructions with register
3447  // operands, expand this into two nodes instead of using a wrapper node.
3448  return DAG.getNode(AArch64ISD::LOADgot, DL, PtrVT, GotAddr);
3449  }
3450 
3451  if (getTargetMachine().getCodeModel() == CodeModel::Large) {
3452  const unsigned char MO_NC = AArch64II::MO_NC;
3453  return DAG.getNode(
3454  AArch64ISD::WrapperLarge, DL, PtrVT,
3455  DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, AArch64II::MO_G3),
3456  DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, AArch64II::MO_G2 | MO_NC),
3457  DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, AArch64II::MO_G1 | MO_NC),
3458  DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, AArch64II::MO_G0 | MO_NC));
3459  } else {
3460  // Use ADRP/ADD or ADRP/LDR for everything else: the small model on ELF and
3461  // the only correct model on Darwin.
3462  SDValue Hi = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0,
3463  OpFlags | AArch64II::MO_PAGE);
3464  unsigned char LoFlags = OpFlags | AArch64II::MO_PAGEOFF | AArch64II::MO_NC;
3465  SDValue Lo = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, LoFlags);
3466 
3467  SDValue ADRP = DAG.getNode(AArch64ISD::ADRP, DL, PtrVT, Hi);
3468  return DAG.getNode(AArch64ISD::ADDlow, DL, PtrVT, ADRP, Lo);
3469  }
3470 }
3471 
3472 /// \brief Convert a TLS address reference into the correct sequence of loads
3473 /// and calls to compute the variable's address (for Darwin, currently) and
3474 /// return an SDValue containing the final node.
3475 
3476 /// Darwin only has one TLS scheme which must be capable of dealing with the
3477 /// fully general situation, in the worst case. This means:
3478 /// + "extern __thread" declaration.
3479 /// + Defined in a possibly unknown dynamic library.
3480 ///
3481 /// The general system is that each __thread variable has a [3 x i64] descriptor
3482 /// which contains information used by the runtime to calculate the address. The
3483 /// only part of this the compiler needs to know about is the first xword, which
3484 /// contains a function pointer that must be called with the address of the
3485 /// entire descriptor in "x0".
3486 ///
3487 /// Since this descriptor may be in a different unit, in general even the
3488 /// descriptor must be accessed via an indirect load. The "ideal" code sequence
3489 /// is:
3490 /// adrp x0, _var@TLVPPAGE
3491 /// ldr x0, [x0, _var@TLVPPAGEOFF] ; x0 now contains address of descriptor
3492 /// ldr x1, [x0] ; x1 contains 1st entry of descriptor,
3493 /// ; the function pointer
3494 /// blr x1 ; Uses descriptor address in x0
3495 /// ; Address of _var is now in x0.
3496 ///
3497 /// If the address of _var's descriptor *is* known to the linker, then it can
3498 /// change the first "ldr" instruction to an appropriate "add x0, x0, #imm" for
3499 /// a slight efficiency gain.
3500 SDValue
3501 AArch64TargetLowering::LowerDarwinGlobalTLSAddress(SDValue Op,
3502  SelectionDAG &DAG) const {
3503  assert(Subtarget->isTargetDarwin() && "TLS only supported on Darwin");
3504 
3505  SDLoc DL(Op);
3506  MVT PtrVT = getPointerTy(DAG.getDataLayout());
3507  const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
3508 
3509  SDValue TLVPAddr =
3510  DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, AArch64II::MO_TLS);
3511  SDValue DescAddr = DAG.getNode(AArch64ISD::LOADgot, DL, PtrVT, TLVPAddr);
3512 
3513  // The first entry in the descriptor is a function pointer that we must call
3514  // to obtain the address of the variable.
3515  SDValue Chain = DAG.getEntryNode();
3516  SDValue FuncTLVGet = DAG.getLoad(
3517  MVT::i64, DL, Chain, DescAddr,
3519  /* Alignment = */ 8,
3522  Chain = FuncTLVGet.getValue(1);
3523 
3525  MFI.setAdjustsStack(true);
3526 
3527  // TLS calls preserve all registers except those that absolutely must be
3528  // trashed: X0 (it takes an argument), LR (it's a call) and NZCV (let's not be
3529  // silly).
3530  const uint32_t *Mask =
3531  Subtarget->getRegisterInfo()->getTLSCallPreservedMask();
3532 
3533  // Finally, we can make the call. This is just a degenerate version of a
3534  // normal AArch64 call node: x0 takes the address of the descriptor, and
3535  // returns the address of the variable in this thread.
3536  Chain = DAG.getCopyToReg(Chain, DL, AArch64::X0, DescAddr, SDValue());
3537  Chain =
3539  Chain, FuncTLVGet, DAG.getRegister(AArch64::X0, MVT::i64),
3540  DAG.getRegisterMask(Mask), Chain.getValue(1));
3541  return DAG.getCopyFromReg(Chain, DL, AArch64::X0, PtrVT, Chain.getValue(1));
3542 }
3543 
3544 /// When accessing thread-local variables under either the general-dynamic or
3545 /// local-dynamic system, we make a "TLS-descriptor" call. The variable will
3546 /// have a descriptor, accessible via a PC-relative ADRP, and whose first entry
3547 /// is a function pointer to carry out the resolution.
3548 ///
3549 /// The sequence is:
3550 /// adrp x0, :tlsdesc:var
3551 /// ldr x1, [x0, #:tlsdesc_lo12:var]
3552 /// add x0, x0, #:tlsdesc_lo12:var
3553 /// .tlsdesccall var
3554 /// blr x1
3555 /// (TPIDR_EL0 offset now in x0)
3556 ///
3557 /// The above sequence must be produced unscheduled, to enable the linker to
3558 /// optimize/relax this sequence.
3559 /// Therefore, a pseudo-instruction (TLSDESC_CALLSEQ) is used to represent the
3560 /// above sequence, and expanded really late in the compilation flow, to ensure
3561 /// the sequence is produced as per above.
3562 SDValue AArch64TargetLowering::LowerELFTLSDescCallSeq(SDValue SymAddr,
3563  const SDLoc &DL,
3564  SelectionDAG &DAG) const {
3565  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3566 
3567  SDValue Chain = DAG.getEntryNode();
3568  SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
3569 
3570  Chain =
3571  DAG.getNode(AArch64ISD::TLSDESC_CALLSEQ, DL, NodeTys, {Chain, SymAddr});
3572  SDValue Glue = Chain.getValue(1);
3573 
3574  return DAG.getCopyFromReg(Chain, DL, AArch64::X0, PtrVT, Glue);
3575 }
3576 
3577 SDValue
3578 AArch64TargetLowering::LowerELFGlobalTLSAddress(SDValue Op,
3579  SelectionDAG &DAG) const {
3580  assert(Subtarget->isTargetELF() && "This function expects an ELF target");
3581  assert(getTargetMachine().getCodeModel() == CodeModel::Small &&
3582  "ELF TLS only supported in small memory model");
3583  // Different choices can be made for the maximum size of the TLS area for a
3584  // module. For the small address model, the default TLS size is 16MiB and the
3585  // maximum TLS size is 4GiB.
3586  // FIXME: add -mtls-size command line option and make it control the 16MiB
3587  // vs. 4GiB code sequence generation.
3588  const GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
3589 
3591 
3592  if (DAG.getTarget().Options.EmulatedTLS)
3593  return LowerToTLSEmulatedModel(GA, DAG);
3594 
3596  if (Model == TLSModel::LocalDynamic)
3597  Model = TLSModel::GeneralDynamic;
3598  }
3599 
3600  SDValue TPOff;
3601  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3602  SDLoc DL(Op);
3603  const GlobalValue *GV = GA->getGlobal();
3604 
3605  SDValue ThreadBase = DAG.getNode(AArch64ISD::THREAD_POINTER, DL, PtrVT);
3606 
3607  if (Model == TLSModel::LocalExec) {
3608  SDValue HiVar = DAG.getTargetGlobalAddress(
3609  GV, DL, PtrVT, 0, AArch64II::MO_TLS | AArch64II::MO_HI12);
3610  SDValue LoVar = DAG.getTargetGlobalAddress(
3611  GV, DL, PtrVT, 0,
3613 
3614  SDValue TPWithOff_lo =
3615  SDValue(DAG.getMachineNode(AArch64::ADDXri, DL, PtrVT, ThreadBase,
3616  HiVar,
3617  DAG.getTargetConstant(0, DL, MVT::i32)),
3618  0);
3619  SDValue TPWithOff =
3620  SDValue(DAG.getMachineNode(AArch64::ADDXri, DL, PtrVT, TPWithOff_lo,
3621  LoVar,
3622  DAG.getTargetConstant(0, DL, MVT::i32)),
3623  0);
3624  return TPWithOff;
3625  } else if (Model == TLSModel::InitialExec) {
3626  TPOff = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, AArch64II::MO_TLS);
3627  TPOff = DAG.getNode(AArch64ISD::LOADgot, DL, PtrVT, TPOff);
3628  } else if (Model == TLSModel::LocalDynamic) {
3629  // Local-dynamic accesses proceed in two phases. A general-dynamic TLS
3630  // descriptor call against the special symbol _TLS_MODULE_BASE_ to calculate
3631  // the beginning of the module's TLS region, followed by a DTPREL offset
3632  // calculation.
3633 
3634  // These accesses will need deduplicating if there's more than one.
3635  AArch64FunctionInfo *MFI =
3638 
3639  // The call needs a relocation too for linker relaxation. It doesn't make
3640  // sense to call it MO_PAGE or MO_PAGEOFF though so we need another copy of
3641  // the address.
3642  SDValue SymAddr = DAG.getTargetExternalSymbol("_TLS_MODULE_BASE_", PtrVT,
3644 
3645  // Now we can calculate the offset from TPIDR_EL0 to this module's
3646  // thread-local area.
3647  TPOff = LowerELFTLSDescCallSeq(SymAddr, DL, DAG);
3648 
3649  // Now use :dtprel_whatever: operations to calculate this variable's offset
3650  // in its thread-storage area.
3651  SDValue HiVar = DAG.getTargetGlobalAddress(
3653  SDValue LoVar = DAG.getTargetGlobalAddress(
3654  GV, DL, MVT::i64, 0,
3656 
3657  TPOff = SDValue(DAG.getMachineNode(AArch64::ADDXri, DL, PtrVT, TPOff, HiVar,
3658  DAG.getTargetConstant(0, DL, MVT::i32)),
3659  0);
3660  TPOff = SDValue(DAG.getMachineNode(AArch64::ADDXri, DL, PtrVT, TPOff, LoVar,
3661  DAG.getTargetConstant(0, DL, MVT::i32)),
3662  0);
3663  } else if (Model == TLSModel::GeneralDynamic) {
3664  // The call needs a relocation too for linker relaxation. It doesn't make
3665  // sense to call it MO_PAGE or MO_PAGEOFF though so we need another copy of
3666  // the address.
3667  SDValue SymAddr =
3668  DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, AArch64II::MO_TLS);
3669 
3670  // Finally we can make a call to calculate the offset from tpidr_el0.
3671  TPOff = LowerELFTLSDescCallSeq(SymAddr, DL, DAG);
3672  } else
3673  llvm_unreachable("Unsupported ELF TLS access model");
3674 
3675  return DAG.getNode(ISD::ADD, DL, PtrVT, ThreadBase, TPOff);
3676 }
3677 
3678 SDValue AArch64TargetLowering::LowerGlobalTLSAddress(SDValue Op,
3679  SelectionDAG &DAG) const {
3680  if (Subtarget->isTargetDarwin())
3681  return LowerDarwinGlobalTLSAddress(Op, DAG);
3682  else if (Subtarget->isTargetELF())
3683  return LowerELFGlobalTLSAddress(Op, DAG);
3684 
3685  llvm_unreachable("Unexpected platform trying to use TLS");
3686 }
3687 
3688 SDValue AArch64TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
3689  SDValue Chain = Op.getOperand(0);
3690  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
3691  SDValue LHS = Op.getOperand(2);
3692  SDValue RHS = Op.getOperand(3);
3693  SDValue Dest = Op.getOperand(4);
3694  SDLoc dl(Op);
3695 
3696  // Handle f128 first, since lowering it will result in comparing the return
3697  // value of a libcall against zero, which is just what the rest of LowerBR_CC
3698  // is expecting to deal with.
3699  if (LHS.getValueType() == MVT::f128) {
3700  softenSetCCOperands(DAG, MVT::f128, LHS, RHS, CC, dl);
3701 
3702  // If softenSetCCOperands returned a scalar, we need to compare the result
3703  // against zero to select between true and false values.
3704  if (!RHS.getNode()) {
3705  RHS = DAG.getConstant(0, dl, LHS.getValueType());
3706  CC = ISD::SETNE;
3707  }
3708  }
3709 
3710  // Optimize {s|u}{add|sub|mul}.with.overflow feeding into a branch
3711  // instruction.
3712  unsigned Opc = LHS.getOpcode();
3713  if (LHS.getResNo() == 1 && isOneConstant(RHS) &&
3714  (Opc == ISD::SADDO || Opc == ISD::UADDO || Opc == ISD::SSUBO ||
3715  Opc == ISD::USUBO || Opc == ISD::SMULO || Opc == ISD::UMULO)) {
3716  assert((CC == ISD::SETEQ || CC == ISD::SETNE) &&
3717  "Unexpected condition code.");
3718  // Only lower legal XALUO ops.
3719  if (!DAG.getTargetLoweringInfo().isTypeLegal(LHS->getValueType(0)))
3720  return SDValue();
3721 
3722  // The actual operation with overflow check.
3723  AArch64CC::CondCode OFCC;
3724  SDValue Value, Overflow;
3725  std::tie(Value, Overflow) = getAArch64XALUOOp(OFCC, LHS.getValue(0), DAG);
3726 
3727  if (CC == ISD::SETNE)
3728  OFCC = getInvertedCondCode(OFCC);
3729  SDValue CCVal = DAG.getConstant(OFCC, dl, MVT::i32);
3730 
3731  return DAG.getNode(AArch64ISD::BRCOND, dl, MVT::Other, Chain, Dest, CCVal,
3732  Overflow);
3733  }
3734 
3735  if (LHS.getValueType().isInteger()) {
3736  assert((LHS.getValueType() == RHS.getValueType()) &&
3737  (LHS.getValueType() == MVT::i32 || LHS.getValueType() == MVT::i64));
3738 
3739  // If the RHS of the comparison is zero, we can potentially fold this
3740  // to a specialized branch.
3741  const ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS);
3742  if (RHSC && RHSC->getZExtValue() == 0) {
3743  if (CC == ISD::SETEQ) {
3744  // See if we can use a TBZ to fold in an AND as well.
3745  // TBZ has a smaller branch displacement than CBZ. If the offset is
3746  // out of bounds, a late MI-layer pass rewrites branches.
3747  // 403.gcc is an example that hits this case.
3748  if (LHS.getOpcode() == ISD::AND &&
3749  isa<ConstantSDNode>(LHS.getOperand(1)) &&
3751  SDValue Test = LHS.getOperand(0);
3752  uint64_t Mask = LHS.getConstantOperandVal(1);
3753  return DAG.getNode(AArch64ISD::TBZ, dl, MVT::Other, Chain, Test,
3754  DAG.getConstant(Log2_64(Mask), dl, MVT::i64),
3755  Dest);
3756  }
3757 
3758  return DAG.getNode(AArch64ISD::CBZ, dl, MVT::Other, Chain, LHS, Dest);
3759  } else if (CC == ISD::SETNE) {
3760  // See if we can use a TBZ to fold in an AND as well.
3761  // TBZ has a smaller branch displacement than CBZ. If the offset is
3762  // out of bounds, a late MI-layer pass rewrites branches.
3763  // 403.gcc is an example that hits this case.
3764  if (LHS.getOpcode() == ISD::AND &&
3765  isa<ConstantSDNode>(LHS.getOperand(1)) &&
3767  SDValue Test = LHS.getOperand(0);
3768  uint64_t Mask = LHS.getConstantOperandVal(1);
3769  return DAG.getNode(AArch64ISD::TBNZ, dl, MVT::Other, Chain, Test,
3770  DAG.getConstant(Log2_64(Mask), dl, MVT::i64),
3771  Dest);
3772  }
3773 
3774  return DAG.getNode(AArch64ISD::CBNZ, dl, MVT::Other, Chain, LHS, Dest);
3775  } else if (CC == ISD::SETLT && LHS.getOpcode() != ISD::AND) {
3776  // Don't combine AND since emitComparison converts the AND to an ANDS
3777  // (a.k.a. TST) and the test in the test bit and branch instruction
3778  // becomes redundant. This would also increase register pressure.
3779  uint64_t Mask = LHS.getValueSizeInBits() - 1;
3780  return DAG.getNode(AArch64ISD::TBNZ, dl, MVT::Other, Chain, LHS,
3781  DAG.getConstant(Mask, dl, MVT::i64), Dest);
3782  }
3783  }
3784  if (RHSC && RHSC->getSExtValue() == -1 && CC == ISD::SETGT &&
3785  LHS.getOpcode() != ISD::AND) {
3786  // Don't combine AND since emitComparison converts the AND to an ANDS
3787  // (a.k.a. TST) and the test in the test bit and branch instruction
3788  // becomes redundant. This would also increase register pressure.
3789  uint64_t Mask = LHS.getValueSizeInBits() - 1;
3790  return DAG.getNode(AArch64ISD::TBZ, dl, MVT::Other, Chain, LHS,
3791  DAG.getConstant(Mask, dl, MVT::i64), Dest);
3792  }
3793 
3794  SDValue CCVal;
3795  SDValue Cmp = getAArch64Cmp(LHS, RHS, CC, CCVal, DAG, dl);
3796  return DAG.getNode(AArch64ISD::BRCOND, dl, MVT::Other, Chain, Dest, CCVal,
3797  Cmp);
3798  }
3799 
3800  assert(LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64);
3801 
3802  // Unfortunately, the mapping of LLVM FP CC's onto AArch64 CC's isn't totally
3803  // clean. Some of them require two branches to implement.
3804  SDValue Cmp = emitComparison(LHS, RHS, CC, dl, DAG);
3805  AArch64CC::CondCode CC1, CC2;
3806  changeFPCCToAArch64CC(CC, CC1, CC2);
3807  SDValue CC1Val = DAG.getConstant(CC1, dl, MVT::i32);
3808  SDValue BR1 =
3809  DAG.getNode(AArch64ISD::BRCOND, dl, MVT::Other, Chain, Dest, CC1Val, Cmp);
3810  if (CC2 != AArch64CC::AL) {
3811  SDValue CC2Val = DAG.getConstant(CC2, dl, MVT::i32);
3812  return DAG.getNode(AArch64ISD::BRCOND, dl, MVT::Other, BR1, Dest, CC2Val,
3813  Cmp);
3814  }
3815 
3816  return BR1;
3817 }
3818 
3819 SDValue AArch64TargetLowering::LowerFCOPYSIGN(SDValue Op,
3820  SelectionDAG &DAG) const {
3821  EVT VT = Op.getValueType();
3822  SDLoc DL(Op);
3823 
3824  SDValue In1 = Op.getOperand(0);
3825  SDValue In2 = Op.getOperand(1);
3826  EVT SrcVT = In2.getValueType();
3827 
3828  if (SrcVT.bitsLT(VT))
3829  In2 = DAG.getNode(ISD::FP_EXTEND, DL, VT, In2);
3830  else if (SrcVT.bitsGT(VT))
3831  In2 = DAG.getNode(ISD::FP_ROUND, DL, VT, In2, DAG.getIntPtrConstant(0, DL));
3832 
3833  EVT VecVT;
3834  EVT EltVT;
3835  uint64_t EltMask;
3836  SDValue VecVal1, VecVal2;
3837  if (VT == MVT::f32 || VT == MVT::v2f32 || VT == MVT::v4f32) {
3838  EltVT = MVT::i32;
3839  VecVT = (VT == MVT::v2f32 ? MVT::v2i32 : MVT::v4i32);
3840  EltMask = 0x80000000ULL;
3841 
3842  if (!VT.isVector()) {
3843  VecVal1 = DAG.getTargetInsertSubreg(AArch64::ssub, DL, VecVT,
3844  DAG.getUNDEF(VecVT), In1);
3845  VecVal2 = DAG.getTargetInsertSubreg(AArch64::ssub, DL, VecVT,
3846  DAG.getUNDEF(VecVT), In2);
3847  } else {
3848  VecVal1 = DAG.getNode(ISD::BITCAST, DL, VecVT, In1);
3849  VecVal2 = DAG.getNode(ISD::BITCAST, DL, VecVT, In2);
3850  }
3851  } else if (VT == MVT::f64 || VT == MVT::v2f64) {
3852  EltVT = MVT::i64;
3853  VecVT = MVT::v2i64;
3854 
3855  // We want to materialize a mask with the high bit set, but the AdvSIMD
3856  // immediate moves cannot materialize that in a single instruction for
3857  // 64-bit elements. Instead, materialize zero and then negate it.
3858  EltMask = 0;
3859 
3860  if (!VT.isVector()) {
3861  VecVal1 = DAG.getTargetInsertSubreg(AArch64::dsub, DL, VecVT,
3862  DAG.getUNDEF(VecVT), In1);
3863  VecVal2 = DAG.getTargetInsertSubreg(AArch64::dsub, DL, VecVT,
3864  DAG.getUNDEF(VecVT), In2);
3865  } else {
3866  VecVal1 = DAG.getNode(ISD::BITCAST, DL, VecVT, In1);
3867  VecVal2 = DAG.getNode(ISD::BITCAST, DL, VecVT, In2);
3868  }
3869  } else {
3870  llvm_unreachable("Invalid type for copysign!");
3871  }
3872 
3873  SDValue BuildVec = DAG.getConstant(EltMask, DL, VecVT);
3874 
3875  // If we couldn't materialize the mask above, then the mask vector will be
3876  // the zero vector, and we need to negate it here.
3877  if (VT == MVT::f64 || VT == MVT::v2f64) {
3878  BuildVec = DAG.getNode(ISD::BITCAST, DL, MVT::v2f64, BuildVec);
3879  BuildVec = DAG.getNode(ISD::FNEG, DL, MVT::v2f64, BuildVec);
3880  BuildVec = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, BuildVec);
3881  }
3882 
3883  SDValue Sel =
3884  DAG.getNode(AArch64ISD::BIT, DL, VecVT, VecVal1, VecVal2, BuildVec);
3885 
3886  if (VT == MVT::f32)
3887  return DAG.getTargetExtractSubreg(AArch64::ssub, DL, VT, Sel);
3888  else if (VT == MVT::f64)
3889  return DAG.getTargetExtractSubreg(AArch64::dsub, DL, VT, Sel);
3890  else
3891  return DAG.getNode(ISD::BITCAST, DL, VT, Sel);
3892 }
3893 
3894 SDValue AArch64TargetLowering::LowerCTPOP(SDValue Op, SelectionDAG &DAG) const {
3896  Attribute::NoImplicitFloat))
3897  return SDValue();
3898 
3899  if (!Subtarget->hasNEON())
3900  return SDValue();
3901 
3902  // While there is no integer popcount instruction, it can
3903  // be more efficiently lowered to the following sequence that uses
3904  // AdvSIMD registers/instructions as long as the copies to/from
3905  // the AdvSIMD registers are cheap.
3906  // FMOV D0, X0 // copy 64-bit int to vector, high bits zero'd
3907  // CNT V0.8B, V0.8B // 8xbyte pop-counts
3908  // ADDV B0, V0.8B // sum 8xbyte pop-counts
3909  // UMOV X0, V0.B[0] // copy byte result back to integer reg
3910  SDValue Val = Op.getOperand(0);
3911  SDLoc DL(Op);
3912  EVT VT = Op.getValueType();
3913 
3914  if (VT == MVT::i32)
3915  Val = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, Val);
3916  Val = DAG.getNode(ISD::BITCAST, DL, MVT::v8i8, Val);
3917 
3918  SDValue CtPop = DAG.getNode(ISD::CTPOP, DL, MVT::v8i8, Val);
3919  SDValue UaddLV = DAG.getNode(
3921  DAG.getConstant(Intrinsic::aarch64_neon_uaddlv, DL, MVT::i32), CtPop);
3922 
3923  if (VT == MVT::i64)
3924  UaddLV = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, UaddLV);
3925  return UaddLV;
3926 }
3927 
3928 SDValue AArch64TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
3929 
3930  if (Op.getValueType().isVector())
3931  return LowerVSETCC(Op, DAG);
3932 
3933  SDValue LHS = Op.getOperand(0);
3934  SDValue RHS = Op.getOperand(1);
3935  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
3936  SDLoc dl(Op);
3937 
3938  // We chose ZeroOrOneBooleanContents, so use zero and one.
3939  EVT VT = Op.getValueType();
3940  SDValue TVal = DAG.getConstant(1, dl, VT);
3941  SDValue FVal = DAG.getConstant(0, dl, VT);
3942 
3943  // Handle f128 first, since one possible outcome is a normal integer
3944  // comparison which gets picked up by the next if statement.
3945  if (LHS.getValueType() == MVT::f128) {
3946  softenSetCCOperands(DAG, MVT::f128, LHS, RHS, CC, dl);
3947 
3948  // If softenSetCCOperands returned a scalar, use it.
3949  if (!RHS.getNode()) {
3950  assert(LHS.getValueType() == Op.getValueType() &&
3951  "Unexpected setcc expansion!");
3952  return LHS;
3953  }
3954  }
3955 
3956  if (LHS.getValueType().isInteger()) {
3957  SDValue CCVal;
3958  SDValue Cmp =
3959  getAArch64Cmp(LHS, RHS, ISD::getSetCCInverse(CC, true), CCVal, DAG, dl);
3960 
3961  // Note that we inverted the condition above, so we reverse the order of
3962  // the true and false operands here. This will allow the setcc to be
3963  // matched to a single CSINC instruction.
3964  return DAG.getNode(AArch64ISD::CSEL, dl, VT, FVal, TVal, CCVal, Cmp);
3965  }
3966 
3967  // Now we know we're dealing with FP values.
3968  assert(LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64);
3969 
3970  // If that fails, we'll need to perform an FCMP + CSEL sequence. Go ahead
3971  // and do the comparison.
3972  SDValue Cmp = emitComparison(LHS, RHS, CC, dl, DAG);
3973 
3974  AArch64CC::CondCode CC1, CC2;
3975  changeFPCCToAArch64CC(CC, CC1, CC2);
3976  if (CC2 == AArch64CC::AL) {
3977  changeFPCCToAArch64CC(ISD::getSetCCInverse(CC, false), CC1, CC2);
3978  SDValue CC1Val = DAG.getConstant(CC1, dl, MVT::i32);
3979 
3980  // Note that we inverted the condition above, so we reverse the order of
3981  // the true and false operands here. This will allow the setcc to be
3982  // matched to a single CSINC instruction.
3983  return DAG.getNode(AArch64ISD::CSEL, dl, VT, FVal, TVal, CC1Val, Cmp);
3984  } else {
3985  // Unfortunately, the mapping of LLVM FP CC's onto AArch64 CC's isn't
3986  // totally clean. Some of them require two CSELs to implement. As is in
3987  // this case, we emit the first CSEL and then emit a second using the output
3988  // of the first as the RHS. We're effectively OR'ing the two CC's together.
3989 
3990  // FIXME: It would be nice if we could match the two CSELs to two CSINCs.
3991  SDValue CC1Val = DAG.getConstant(CC1, dl, MVT::i32);
3992  SDValue CS1 =
3993  DAG.getNode(AArch64ISD::CSEL, dl, VT, TVal, FVal, CC1Val, Cmp);
3994 
3995  SDValue CC2Val = DAG.getConstant(CC2, dl, MVT::i32);
3996  return DAG.getNode(AArch64ISD::CSEL, dl, VT, TVal, CS1, CC2Val, Cmp);
3997  }
3998 }
3999 
4000 SDValue AArch64TargetLowering::LowerSELECT_CC(ISD::CondCode CC, SDValue LHS,
4001  SDValue RHS, SDValue TVal,
4002  SDValue FVal, const SDLoc &dl,
4003  SelectionDAG &DAG) const {
4004  // Handle f128 first, because it will result in a comparison of some RTLIB
4005  // call result against zero.
4006  if (LHS.getValueType() == MVT::f128) {
4007  softenSetCCOperands(DAG, MVT::f128, LHS, RHS, CC, dl);
4008 
4009  // If softenSetCCOperands returned a scalar, we need to compare the result
4010  // against zero to select between true and false values.
4011  if (!RHS.getNode()) {
4012  RHS = DAG.getConstant(0, dl, LHS.getValueType());
4013  CC = ISD::SETNE;
4014  }
4015  }
4016 
4017  // Also handle f16, for which we need to do a f32 comparison.
4018  if (LHS.getValueType() == MVT::f16) {
4019  LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, LHS);
4020  RHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, RHS);
4021  }
4022 
4023  // Next, handle integers.
4024  if (LHS.getValueType().isInteger()) {
4025  assert((LHS.getValueType() == RHS.getValueType()) &&
4026  (LHS.getValueType() == MVT::i32 || LHS.getValueType() == MVT::i64));
4027 
4028  unsigned Opcode = AArch64ISD::CSEL;
4029 
4030  // If both the TVal and the FVal are constants, see if we can swap them in
4031  // order to for a CSINV or CSINC out of them.
4032  ConstantSDNode *CFVal = dyn_cast<ConstantSDNode>(FVal);
4033  ConstantSDNode *CTVal = dyn_cast<ConstantSDNode>(TVal);
4034 
4035  if (CTVal && CFVal && CTVal->isAllOnesValue() && CFVal->isNullValue()) {
4036  std::swap(TVal, FVal);
4037  std::swap(CTVal, CFVal);
4038  CC = ISD::getSetCCInverse(CC, true);
4039  } else if (CTVal && CFVal && CTVal->isOne() && CFVal->isNullValue()) {
4040  std::swap(TVal, FVal);
4041  std::swap(CTVal, CFVal);
4042  CC = ISD::getSetCCInverse(CC, true);
4043  } else if (TVal.getOpcode() == ISD::XOR) {
4044  // If TVal is a NOT we want to swap TVal and FVal so that we can match
4045  // with a CSINV rather than a CSEL.
4046  if (isAllOnesConstant(TVal.getOperand(1))) {
4047  std::swap(TVal, FVal);
4048  std::swap(CTVal, CFVal);
4049  CC = ISD::getSetCCInverse(CC, true);
4050  }
4051  } else if (TVal.getOpcode() == ISD::SUB) {
4052  // If TVal is a negation (SUB from 0) we want to swap TVal and FVal so
4053  // that we can match with a CSNEG rather than a CSEL.
4054  if (isNullConstant(TVal.getOperand(0))) {
4055  std::swap(TVal, FVal);
4056  std::swap(CTVal, CFVal);
4057  CC = ISD::getSetCCInverse(CC, true);
4058  }
4059  } else if (CTVal && CFVal) {
4060  const int64_t TrueVal = CTVal->getSExtValue();
4061  const int64_t FalseVal = CFVal->getSExtValue();
4062  bool Swap = false;
4063 
4064  // If both TVal and FVal are constants, see if FVal is the
4065  // inverse/negation/increment of TVal and generate a CSINV/CSNEG/CSINC
4066  // instead of a CSEL in that case.
4067  if (TrueVal == ~FalseVal) {
4068  Opcode = AArch64ISD::CSINV;
4069  } else if (TrueVal == -FalseVal) {
4070  Opcode = AArch64ISD::CSNEG;
4071  } else if (TVal.getValueType() == MVT::i32) {
4072  // If our operands are only 32-bit wide, make sure we use 32-bit
4073  // arithmetic for the check whether we can use CSINC. This ensures that
4074  // the addition in the check will wrap around properly in case there is
4075  // an overflow (which would not be the case if we do the check with
4076  // 64-bit arithmetic).
4077  const uint32_t TrueVal32 = CTVal->getZExtValue();
4078  const uint32_t FalseVal32 = CFVal->getZExtValue();
4079 
4080  if ((TrueVal32 == FalseVal32 + 1) || (TrueVal32 + 1 == FalseVal32)) {
4081  Opcode = AArch64ISD::CSINC;
4082 
4083  if (TrueVal32 > FalseVal32) {
4084  Swap = true;
4085  }
4086  }
4087  // 64-bit check whether we can use CSINC.
4088  } else if ((TrueVal == FalseVal + 1) || (TrueVal + 1 == FalseVal)) {
4089  Opcode = AArch64ISD::CSINC;
4090 
4091  if (TrueVal > FalseVal) {
4092  Swap = true;
4093  }
4094  }
4095 
4096  // Swap TVal and FVal if necessary.
4097  if (Swap) {
4098  std::swap(TVal, FVal);
4099  std::swap(CTVal, CFVal);
4100  CC = ISD::getSetCCInverse(CC, true);
4101  }
4102 
4103  if (Opcode != AArch64ISD::CSEL) {
4104  // Drop FVal since we can get its value by simply inverting/negating
4105  // TVal.
4106  FVal = TVal;
4107  }
4108  }
4109 
4110  // Avoid materializing a constant when possible by reusing a known value in
4111  // a register. However, don't perform this optimization if the known value
4112  // is one, zero or negative one in the case of a CSEL. We can always
4113  // materialize these values using CSINC, CSEL and CSINV with wzr/xzr as the
4114  // FVal, respectively.
4115  ConstantSDNode *RHSVal = dyn_cast<ConstantSDNode>(RHS);
4116  if (Opcode == AArch64ISD::CSEL && RHSVal && !RHSVal->isOne() &&
4117  !RHSVal->isNullValue() && !RHSVal->isAllOnesValue()) {
4119  // Transform "a == C ? C : x" to "a == C ? a : x" and "a != C ? x : C" to
4120  // "a != C ? x : a" to avoid materializing C.
4121  if (CTVal && CTVal == RHSVal && AArch64CC == AArch64CC::EQ)
4122  TVal = LHS;
4123  else if (CFVal && CFVal == RHSVal && AArch64CC == AArch64CC::NE)
4124  FVal = LHS;
4125  } else if (Opcode == AArch64ISD::CSNEG && RHSVal && RHSVal->isOne()) {
4126  assert (CTVal && CFVal && "Expected constant operands for CSNEG.");
4127  // Use a CSINV to transform "a == C ? 1 : -1" to "a == C ? a : -1" to
4128  // avoid materializing C.
4130  if (CTVal == RHSVal && AArch64CC == AArch64CC::EQ) {
4131  Opcode = AArch64ISD::CSINV;
4132  TVal = LHS;
4133  FVal = DAG.getConstant(0, dl, FVal.getValueType());
4134  }
4135  }
4136 
4137  SDValue CCVal;
4138  SDValue Cmp = getAArch64Cmp(LHS, RHS, CC, CCVal, DAG, dl);
4139 
4140  EVT VT = TVal.getValueType();
4141  return DAG.getNode(Opcode, dl, VT, TVal, FVal, CCVal, Cmp);
4142  }
4143 
4144  // Now we know we're dealing with FP values.
4145  assert(LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64);
4146  assert(LHS.getValueType() == RHS.getValueType());
4147  EVT VT = TVal.getValueType();
4148  SDValue Cmp = emitComparison(LHS, RHS, CC, dl, DAG);
4149 
4150  // Unfortunately, the mapping of LLVM FP CC's onto AArch64 CC's isn't totally
4151  // clean. Some of them require two CSELs to implement.
4152  AArch64CC::CondCode CC1, CC2;
4153  changeFPCCToAArch64CC(CC, CC1, CC2);
4154 
4155  if (DAG.getTarget().Options.UnsafeFPMath) {
4156  // Transform "a == 0.0 ? 0.0 : x" to "a == 0.0 ? a : x" and
4157  // "a != 0.0 ? x : 0.0" to "a != 0.0 ? x : a" to avoid materializing 0.0.
4158  ConstantFPSDNode *RHSVal = dyn_cast<ConstantFPSDNode>(RHS);
4159  if (RHSVal && RHSVal->isZero()) {
4160  ConstantFPSDNode *CFVal = dyn_cast<ConstantFPSDNode>(FVal);
4161  ConstantFPSDNode *CTVal = dyn_cast<ConstantFPSDNode>(TVal);
4162 
4163  if ((CC == ISD::SETEQ || CC == ISD::SETOEQ || CC == ISD::SETUEQ) &&
4164  CTVal && CTVal->isZero() && TVal.getValueType() == LHS.getValueType())
4165  TVal = LHS;
4166  else if ((CC == ISD::SETNE || CC == ISD::SETONE || CC == ISD::SETUNE) &&
4167  CFVal && CFVal->isZero() &&
4168  FVal.getValueType() == LHS.getValueType())
4169  FVal = LHS;
4170  }
4171  }
4172 
4173  // Emit first, and possibly only, CSEL.
4174  SDValue CC1Val = DAG.getConstant(CC1, dl, MVT::i32);
4175  SDValue CS1 = DAG.getNode(AArch64ISD::CSEL, dl, VT, TVal, FVal, CC1Val, Cmp);
4176 
4177  // If we need a second CSEL, emit it, using the output of the first as the
4178  // RHS. We're effectively OR'ing the two CC's together.
4179  if (CC2 != AArch64CC::AL) {
4180  SDValue CC2Val = DAG.getConstant(CC2, dl, MVT::i32);
4181  return DAG.getNode(AArch64ISD::CSEL, dl, VT, TVal, CS1, CC2Val, Cmp);
4182  }
4183 
4184  // Otherwise, return the output of the first CSEL.
4185  return CS1;
4186 }
4187 
4188 SDValue AArch64TargetLowering::LowerSELECT_CC(SDValue Op,
4189  SelectionDAG &DAG) const {
4190  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
4191  SDValue LHS = Op.getOperand(0);
4192  SDValue RHS = Op.getOperand(1);
4193  SDValue TVal = Op.getOperand(2);
4194  SDValue FVal = Op.getOperand(3);
4195  SDLoc DL(Op);
4196  return LowerSELECT_CC(CC, LHS, RHS, TVal, FVal, DL, DAG);
4197 }
4198 
4199 SDValue AArch64TargetLowering::LowerSELECT(SDValue Op,
4200  SelectionDAG &DAG) const {
4201  SDValue CCVal = Op->getOperand(0);
4202  SDValue TVal = Op->getOperand(1);
4203  SDValue FVal = Op->getOperand(2);
4204  SDLoc DL(Op);
4205 
4206  unsigned Opc = CCVal.getOpcode();
4207  // Optimize {s|u}{add|sub|mul}.with.overflow feeding into a select
4208  // instruction.
4209  if (CCVal.getResNo() == 1 &&
4210  (Opc == ISD::SADDO || Opc == ISD::UADDO || Opc == ISD::SSUBO ||
4211  Opc == ISD::USUBO || Opc == ISD::SMULO || Opc == ISD::UMULO)) {
4212  // Only lower legal XALUO ops.
4213  if (!DAG.getTargetLoweringInfo().isTypeLegal(CCVal->getValueType(0)))
4214  return SDValue();
4215 
4216  AArch64CC::CondCode OFCC;
4217  SDValue Value, Overflow;
4218  std::tie(Value, Overflow) = getAArch64XALUOOp(OFCC, CCVal.getValue(0), DAG);
4219  SDValue CCVal = DAG.getConstant(OFCC, DL, MVT::i32);
4220 
4221  return DAG.getNode(AArch64ISD::CSEL, DL, Op.getValueType(), TVal, FVal,
4222  CCVal, Overflow);
4223  }
4224 
4225  // Lower it the same way as we would lower a SELECT_CC node.
4226  ISD::CondCode CC;
4227  SDValue LHS, RHS;
4228  if (CCVal.getOpcode() == ISD::SETCC) {
4229  LHS = CCVal.getOperand(0);
4230  RHS = CCVal.getOperand(1);
4231  CC = cast<CondCodeSDNode>(CCVal->getOperand(2))->get();
4232  } else {
4233  LHS = CCVal;
4234  RHS = DAG.getConstant(0, DL, CCVal.getValueType());
4235  CC = ISD::SETNE;
4236  }
4237  return LowerSELECT_CC(CC, LHS, RHS, TVal, FVal, DL, DAG);
4238 }
4239 
4240 SDValue AArch64TargetLowering::LowerJumpTable(SDValue Op,
4241  SelectionDAG &DAG) const {
4242  // Jump table entries as PC relative offsets. No additional tweaking
4243  // is necessary here. Just get the address of the jump table.
4244  JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
4245  EVT PtrVT = getPointerTy(DAG.getDataLayout());
4246  SDLoc DL(Op);
4247 
4248  if (getTargetMachine().getCodeModel() == CodeModel::Large &&
4249  !Subtarget->isTargetMachO()) {
4250  const unsigned char MO_NC = AArch64II::MO_NC;
4251  return DAG.getNode(
4252  AArch64ISD::WrapperLarge, DL, PtrVT,
4253  DAG.getTargetJumpTable(JT->getIndex(), PtrVT, AArch64II::MO_G3),
4254  DAG.getTargetJumpTable(JT->getIndex(), PtrVT, AArch64II::MO_G2 | MO_NC),
4255  DAG.getTargetJumpTable(JT->getIndex(), PtrVT, AArch64II::MO_G1 | MO_NC),
4256  DAG.getTargetJumpTable(JT->getIndex(), PtrVT,
4257  AArch64II::MO_G0 | MO_NC));
4258  }
4259 
4260  SDValue Hi =
4261  DAG.getTargetJumpTable(JT->getIndex(), PtrVT, AArch64II::MO_PAGE);
4262  SDValue Lo = DAG.getTargetJumpTable(JT->getIndex(), PtrVT,
4264  SDValue ADRP = DAG.getNode(AArch64ISD::ADRP, DL, PtrVT, Hi);
4265  return DAG.getNode(AArch64ISD::ADDlow, DL, PtrVT, ADRP, Lo);
4266 }
4267 
4268 SDValue AArch64TargetLowering::LowerConstantPool(SDValue Op,
4269  SelectionDAG &DAG) const {
4270  ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
4271  EVT PtrVT = getPointerTy(DAG.getDataLayout());
4272  SDLoc DL(Op);
4273 
4274  if (getTargetMachine().getCodeModel() == CodeModel::Large) {
4275  // Use the GOT for the large code model on iOS.
4276  if (Subtarget->isTargetMachO()) {
4277  SDValue GotAddr = DAG.getTargetConstantPool(
4278  CP->getConstVal(), PtrVT, CP->getAlignment(), CP->getOffset(),
4280  return DAG.getNode(AArch64ISD::LOADgot, DL, PtrVT, GotAddr);
4281  }
4282 
4283  const unsigned char MO_NC = AArch64II::MO_NC;
4284  return DAG.getNode(
4285  AArch64ISD::WrapperLarge, DL, PtrVT,
4286  DAG.getTargetConstantPool(CP->getConstVal(), PtrVT, CP->getAlignment(),
4287  CP->getOffset(), AArch64II::MO_G3),
4288  DAG.getTargetConstantPool(CP->getConstVal(), PtrVT, CP->getAlignment(),
4289  CP->getOffset(), AArch64II::MO_G2 | MO_NC),
4290  DAG.getTargetConstantPool(CP->getConstVal(), PtrVT, CP->getAlignment(),
4291  CP->getOffset(), AArch64II::MO_G1 | MO_NC),
4292  DAG.getTargetConstantPool(CP->getConstVal(), PtrVT, CP->getAlignment(),
4293  CP->getOffset(), AArch64II::MO_G0 | MO_NC));
4294  } else {
4295  // Use ADRP/ADD or ADRP/LDR for everything else: the small memory model on
4296  // ELF, the only valid one on Darwin.
4297  SDValue Hi =
4298  DAG.getTargetConstantPool(CP->getConstVal(), PtrVT, CP->getAlignment(),
4299  CP->getOffset(), AArch64II::MO_PAGE);
4300  SDValue Lo = DAG.getTargetConstantPool(
4301  CP->getConstVal(), PtrVT, CP->getAlignment(), CP->getOffset(),
4303 
4304  SDValue ADRP = DAG.getNode(AArch64ISD::ADRP, DL, PtrVT, Hi);
4305  return DAG.getNode(AArch64ISD::ADDlow, DL, PtrVT, ADRP, Lo);
4306  }
4307 }
4308 
4309 SDValue AArch64TargetLowering::LowerBlockAddress(SDValue Op,
4310  SelectionDAG &DAG) const {
4311  const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
4312  EVT PtrVT = getPointerTy(DAG.getDataLayout());
4313  SDLoc DL(Op);
4314  if (getTargetMachine().getCodeModel() == CodeModel::Large &&
4315  !Subtarget->isTargetMachO()) {
4316  const unsigned char MO_NC = AArch64II::MO_NC;
4317  return DAG.getNode(
4318  AArch64ISD::WrapperLarge, DL, PtrVT,
4319  DAG.getTargetBlockAddress(BA, PtrVT, 0, AArch64II::MO_G3),
4320  DAG.getTargetBlockAddress(BA, PtrVT, 0, AArch64II::MO_G2 | MO_NC),
4321  DAG.getTargetBlockAddress(BA, PtrVT, 0, AArch64II::MO_G1 | MO_NC),
4322  DAG.getTargetBlockAddress(BA, PtrVT, 0, AArch64II::MO_G0 | MO_NC));
4323  } else {
4324  SDValue Hi = DAG.getTargetBlockAddress(BA, PtrVT, 0, AArch64II::MO_PAGE);
4325  SDValue Lo = DAG.getTargetBlockAddress(BA, PtrVT, 0, AArch64II::MO_PAGEOFF |
4327  SDValue ADRP = DAG.getNode(AArch64ISD::ADRP, DL, PtrVT, Hi);
4328  return DAG.getNode(AArch64ISD::ADDlow, DL, PtrVT, ADRP, Lo);
4329  }
4330 }
4331 
4332 SDValue AArch64TargetLowering::LowerDarwin_VASTART(SDValue Op,
4333  SelectionDAG &DAG) const {
4334  AArch64FunctionInfo *FuncInfo =
4336 
4337  SDLoc DL(Op);
4338  SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsStackIndex(),
4339  getPointerTy(DAG.getDataLayout()));
4340  const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
4341  return DAG.getStore(Op.getOperand(0), DL, FR, Op.getOperand(1),
4342  MachinePointerInfo(SV));
4343 }
4344 
4345 SDValue AArch64TargetLowering::LowerAAPCS_VASTART(SDValue Op,
4346  SelectionDAG &DAG) const {
4347  // The layout of the va_list struct is specified in the AArch64 Procedure Call
4348  // Standard, section B.3.
4349  MachineFunction &MF = DAG.getMachineFunction();
4351  auto PtrVT = getPointerTy(DAG.getDataLayout());
4352  SDLoc DL(Op);
4353 
4354  SDValue Chain = Op.getOperand(0);
4355  SDValue VAList = Op.getOperand(1);
4356  const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
4357  SmallVector<SDValue, 4> MemOps;
4358 
4359  // void *__stack at offset 0
4360  SDValue Stack = DAG.getFrameIndex(FuncInfo->getVarArgsStackIndex(), PtrVT);
4361  MemOps.push_back(DAG.getStore(Chain, DL, Stack, VAList,
4362  MachinePointerInfo(SV), /* Alignment = */ 8));
4363 
4364  // void *__gr_top at offset 8
4365  int GPRSize = FuncInfo->getVarArgsGPRSize();
4366  if (GPRSize > 0) {
4367  SDValue GRTop, GRTopAddr;
4368 
4369  GRTopAddr =
4370  DAG.getNode(ISD::ADD, DL, PtrVT, VAList, DAG.getConstant(8, DL, PtrVT));
4371 
4372  GRTop = DAG.getFrameIndex(FuncInfo->getVarArgsGPRIndex(), PtrVT);
4373  GRTop = DAG.getNode(ISD::ADD, DL, PtrVT, GRTop,
4374  DAG.getConstant(GPRSize, DL, PtrVT));
4375 
4376  MemOps.push_back(DAG.getStore(Chain, DL, GRTop, GRTopAddr,
4377  MachinePointerInfo(SV, 8),
4378  /* Alignment = */ 8));
4379  }
4380 
4381  // void *__vr_top at offset 16
4382  int FPRSize = FuncInfo->getVarArgsFPRSize();
4383  if (FPRSize > 0) {
4384  SDValue VRTop, VRTopAddr;
4385  VRTopAddr = DAG.getNode(ISD::ADD, DL, PtrVT, VAList,
4386  DAG.getConstant(16, DL, PtrVT));
4387 
4388  VRTop = DAG.getFrameIndex(FuncInfo->getVarArgsFPRIndex(), PtrVT);
4389  VRTop = DAG.getNode(ISD::ADD, DL, PtrVT, VRTop,
4390  DAG.getConstant(FPRSize, DL, PtrVT));
4391 
4392  MemOps.push_back(DAG.getStore(Chain, DL, VRTop, VRTopAddr,
4393  MachinePointerInfo(SV, 16),
4394  /* Alignment = */ 8));
4395  }
4396 
4397  // int __gr_offs at offset 24
4398  SDValue GROffsAddr =
4399  DAG.getNode(ISD::ADD, DL, PtrVT, VAList, DAG.getConstant(24, DL, PtrVT));
4400  MemOps.push_back(DAG.getStore(
4401  Chain, DL, DAG.getConstant(-GPRSize, DL, MVT::i32), GROffsAddr,
4402  MachinePointerInfo(SV, 24), /* Alignment = */ 4));
4403 
4404  // int __vr_offs at offset 28
4405  SDValue VROffsAddr =
4406  DAG.getNode(ISD::ADD, DL, PtrVT, VAList, DAG.getConstant(28, DL, PtrVT));
4407  MemOps.push_back(DAG.getStore(
4408  Chain, DL, DAG.getConstant(-FPRSize, DL, MVT::i32), VROffsAddr,
4409  MachinePointerInfo(SV, 28), /* Alignment = */ 4));
4410 
4411  return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOps);
4412 }
4413 
4414 SDValue AArch64TargetLowering::LowerVASTART(SDValue Op,
4415  SelectionDAG &DAG) const {
4416  return Subtarget->isTargetDarwin() ? LowerDarwin_VASTART(Op, DAG)
4417  : LowerAAPCS_VASTART(Op, DAG);
4418 }
4419 
4420 SDValue AArch64TargetLowering::LowerVACOPY(SDValue Op,
4421  SelectionDAG &DAG) const {
4422  // AAPCS has three pointers and two ints (= 32 bytes), Darwin has single
4423  // pointer.
4424  SDLoc DL(Op);
4425  unsigned VaListSize = Subtarget->isTargetDarwin() ? 8 : 32;
4426  const Value *DestSV = cast<SrcValueSDNode>(Op.getOperand(3))->getValue();
4427  const Value *SrcSV = cast<SrcValueSDNode>(Op.getOperand(4))->getValue();
4428 
4429  return DAG.getMemcpy(Op.getOperand(0), DL, Op.getOperand(1),
4430  Op.getOperand(2),
4431  DAG.getConstant(VaListSize, DL, MVT::i32),
4432  8, false, false, false, MachinePointerInfo(DestSV),
4433  MachinePointerInfo(SrcSV));
4434 }
4435 
4436 SDValue AArch64TargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {
4437  assert(Subtarget->isTargetDarwin() &&
4438  "automatic va_arg instruction only works on Darwin");
4439 
4440  const Value *V = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
4441  EVT VT = Op.getValueType();
4442  SDLoc DL(Op);
4443  SDValue Chain = Op.getOperand(0);
4444  SDValue Addr = Op.getOperand(1);
4445  unsigned Align = Op.getConstantOperandVal(3);
4446  auto PtrVT = getPointerTy(DAG.getDataLayout());
4447 
4448  SDValue VAList = DAG.getLoad(PtrVT, DL, Chain, Addr, MachinePointerInfo(V));
4449  Chain = VAList.getValue(1);
4450 
4451  if (Align > 8) {
4452  assert(((Align & (Align - 1)) == 0) && "Expected Align to be a power of 2");
4453  VAList = DAG.getNode(ISD::ADD, DL, PtrVT, VAList,
4454  DAG.getConstant(Align - 1, DL, PtrVT));
4455  VAList = DAG.getNode(ISD::AND, DL, PtrVT, VAList,
4456  DAG.getConstant(-(int64_t)Align, DL, PtrVT));
4457  }
4458 
4459  Type *ArgTy = VT.getTypeForEVT(*DAG.getContext());
4460  uint64_t ArgSize = DAG.getDataLayout().getTypeAllocSize(ArgTy);
4461 
4462  // Scalar integer and FP values smaller than 64 bits are implicitly extended
4463  // up to 64 bits. At the very least, we have to increase the striding of the
4464  // vaargs list to match this, and for FP values we need to introduce
4465  // FP_ROUND nodes as well.
4466  if (VT.isInteger() && !VT.isVector())
4467  ArgSize = 8;
4468  bool NeedFPTrunc = false;
4469  if (VT.isFloatingPoint() && !VT.isVector() && VT != MVT::f64) {
4470  ArgSize = 8;
4471  NeedFPTrunc = true;
4472  }
4473 
4474  // Increment the pointer, VAList, to the next vaarg
4475  SDValue VANext = DAG.getNode(ISD::ADD, DL, PtrVT, VAList,
4476  DAG.getConstant(ArgSize, DL, PtrVT));
4477  // Store the incremented VAList to the legalized pointer
4478  SDValue APStore =
4479  DAG.getStore(Chain, DL, VANext, Addr, MachinePointerInfo(V));
4480 
4481  // Load the actual argument out of the pointer VAList
4482  if (NeedFPTrunc) {
4483  // Load the value as an f64.
4484  SDValue WideFP =
4485  DAG.getLoad(MVT::f64, DL, APStore, VAList, MachinePointerInfo());
4486  // Round the value down to an f32.
4487  SDValue NarrowFP = DAG.getNode(ISD::FP_ROUND, DL, VT, WideFP.getValue(0),
4488  DAG.getIntPtrConstant(1, DL));
4489  SDValue Ops[] = { NarrowFP, WideFP.getValue(1) };
4490  // Merge the rounded value with the chain output of the load.
4491  return DAG.getMergeValues(Ops, DL);
4492  }
4493 
4494  return DAG.getLoad(VT, DL, APStore, VAList, MachinePointerInfo());
4495 }
4496 
4497 SDValue AArch64TargetLowering::LowerFRAMEADDR(SDValue Op,
4498  SelectionDAG &DAG) const {
4500  MFI.setFrameAddressIsTaken(true);
4501 
4502  EVT VT = Op.getValueType();
4503  SDLoc DL(Op);
4504  unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
4505  SDValue FrameAddr =
4506  DAG.getCopyFromReg(DAG.getEntryNode(), DL, AArch64::FP, VT);
4507  while (Depth--)
4508  FrameAddr = DAG.getLoad(VT, DL, DAG.getEntryNode(), FrameAddr,
4509  MachinePointerInfo());
4510  return FrameAddr;
4511 }
4512 
4513 // FIXME? Maybe this could be a TableGen attribute on some registers and
4514 // this table could be generated automatically from RegInfo.
4515 unsigned AArch64TargetLowering::getRegisterByName(const char* RegName, EVT VT,
4516  SelectionDAG &DAG) const {
4517  unsigned Reg = StringSwitch<unsigned>(RegName)
4518  .Case("sp", AArch64::SP)
4519  .Default(0);
4520  if (Reg)
4521  return Reg;
4522  report_fatal_error(Twine("Invalid register name \""
4523  + StringRef(RegName) + "\"."));
4524 }
4525 
4526 SDValue AArch64TargetLowering::LowerRETURNADDR(SDValue Op,
4527  SelectionDAG &DAG) const {
4528  MachineFunction &MF = DAG.getMachineFunction();
4529  MachineFrameInfo &MFI = MF.getFrameInfo();
4530  MFI.setReturnAddressIsTaken(true);
4531 
4532  EVT VT = Op.getValueType();
4533  SDLoc DL(Op);
4534  unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
4535  if (Depth) {
4536  SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
4537  SDValue Offset = DAG.getConstant(8, DL, getPointerTy(DAG.getDataLayout()));
4538  return DAG.getLoad(VT, DL, DAG.getEntryNode(),
4539  DAG.getNode(ISD::ADD, DL, VT, FrameAddr, Offset),
4540  MachinePointerInfo());
4541  }
4542 
4543  // Return LR, which contains the return address. Mark it an implicit live-in.
4544  unsigned Reg = MF.addLiveIn(AArch64::LR, &AArch64::GPR64RegClass);
4545  return DAG.getCopyFromReg(DAG.getEntryNode(), DL, Reg, VT);
4546 }
4547 
4548 /// LowerShiftRightParts - Lower SRA_PARTS, which returns two
4549 /// i64 values and take a 2 x i64 value to shift plus a shift amount.
4550 SDValue AArch64TargetLowering::LowerShiftRightParts(SDValue Op,
4551  SelectionDAG &DAG) const {
4552  assert(Op.getNumOperands() == 3 && "Not a double-shift!");
4553  EVT VT = Op.getValueType();
4554  unsigned VTBits = VT.getSizeInBits();
4555  SDLoc dl(Op);
4556  SDValue ShOpLo = Op.getOperand(0);
4557  SDValue ShOpHi = Op.getOperand(1);
4558  SDValue ShAmt = Op.getOperand(2);
4559  unsigned Opc = (Op.getOpcode() == ISD::SRA_PARTS) ? ISD::SRA : ISD::SRL;
4560 
4562 
4563  SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i64,
4564  DAG.getConstant(VTBits, dl, MVT::i64), ShAmt);
4565  SDValue HiBitsForLo = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, RevShAmt);
4566 
4567  // Unfortunately, if ShAmt == 0, we just calculated "(SHL ShOpHi, 64)" which
4568  // is "undef". We wanted 0, so CSEL it directly.
4569  SDValue Cmp = emitComparison(ShAmt, DAG.getConstant(0, dl, MVT::i64),
4570  ISD::SETEQ, dl, DAG);
4571  SDValue CCVal = DAG.getConstant(AArch64CC::EQ, dl, MVT::i32);
4572  HiBitsForLo =
4573  DAG.getNode(AArch64ISD::CSEL, dl, VT, DAG.getConstant(0, dl, MVT::i64),
4574  HiBitsForLo, CCVal, Cmp);
4575 
4576  SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i64, ShAmt,
4577  DAG.getConstant(VTBits, dl, MVT::i64));
4578 
4579  SDValue LoBitsForLo = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, ShAmt);
4580  SDValue LoForNormalShift =
4581  DAG.getNode(ISD::OR, dl, VT, LoBitsForLo, HiBitsForLo);
4582 
4583  Cmp = emitComparison(ExtraShAmt, DAG.getConstant(0, dl, MVT::i64), ISD::SETGE,
4584  dl, DAG);
4585  CCVal = DAG.getConstant(AArch64CC::GE, dl, MVT::i32);
4586  SDValue LoForBigShift = DAG.getNode(Opc, dl, VT, ShOpHi, ExtraShAmt);
4587  SDValue Lo = DAG.getNode(AArch64ISD::CSEL, dl, VT, LoForBigShift,
4588  LoForNormalShift, CCVal, Cmp);
4589 
4590  // AArch64 shifts larger than the register width are wrapped rather than
4591  // clamped, so we can't just emit "hi >> x".
4592  SDValue HiForNormalShift = DAG.getNode(Opc, dl, VT, ShOpHi, ShAmt);
4593  SDValue HiForBigShift =
4594  Opc == ISD::SRA
4595  ? DAG.getNode(Opc, dl, VT, ShOpHi,
4596  DAG.getConstant(VTBits - 1, dl, MVT::i64))
4597  : DAG.getConstant(0, dl, VT);
4598  SDValue Hi = DAG.getNode(AArch64ISD::CSEL, dl, VT, HiForBigShift,
4599  HiForNormalShift, CCVal, Cmp);
4600 
4601  SDValue Ops[2] = { Lo, Hi };
4602  return DAG.getMergeValues(Ops, dl);
4603 }
4604 
4605 /// LowerShiftLeftParts - Lower SHL_PARTS, which returns two
4606 /// i64 values and take a 2 x i64 value to shift plus a shift amount.
4607 SDValue AArch64TargetLowering::LowerShiftLeftParts(SDValue Op,
4608  SelectionDAG &DAG) const {
4609  assert(Op.getNumOperands() == 3 && "Not a double-shift!");
4610  EVT VT = Op.getValueType();
4611  unsigned VTBits = VT.getSizeInBits();
4612  SDLoc dl(Op);
4613  SDValue ShOpLo = Op.getOperand(0);
4614  SDValue ShOpHi = Op.getOperand(1);
4615  SDValue ShAmt = Op.getOperand(2);
4616 
4617  assert(Op.getOpcode() == ISD::SHL_PARTS);
4618  SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i64,
4619  DAG.getConstant(VTBits, dl, MVT::i64), ShAmt);
4620  SDValue LoBitsForHi = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, RevShAmt);
4621 
4622  // Unfortunately, if ShAmt == 0, we just calculated "(SRL ShOpLo, 64)" which
4623  // is "undef". We wanted 0, so CSEL it directly.
4624  SDValue Cmp = emitComparison(ShAmt, DAG.getConstant(0, dl, MVT::i64),
4625  ISD::SETEQ, dl, DAG);
4626  SDValue CCVal = DAG.getConstant(AArch64CC::EQ, dl, MVT::i32);
4627  LoBitsForHi =
4628  DAG.getNode(AArch64ISD::CSEL, dl, VT, DAG.getConstant(0, dl, MVT::i64),
4629  LoBitsForHi, CCVal, Cmp);
4630 
4631  SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i64, ShAmt,
4632  DAG.getConstant(VTBits, dl, MVT::i64));
4633  SDValue HiBitsForHi = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, ShAmt);
4634  SDValue HiForNormalShift =
4635  DAG.getNode(ISD::OR, dl, VT, LoBitsForHi, HiBitsForHi);
4636 
4637  SDValue HiForBigShift = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ExtraShAmt);
4638 
4639  Cmp = emitComparison(ExtraShAmt, DAG.getConstant(0, dl, MVT::i64), ISD::SETGE,
4640  dl, DAG);
4641  CCVal = DAG.getConstant(AArch64CC::GE, dl, MVT::i32);
4642  SDValue Hi = DAG.getNode(AArch64ISD::CSEL, dl, VT, HiForBigShift,
4643  HiForNormalShift, CCVal, Cmp);
4644 
4645  // AArch64 shifts of larger than register sizes are wrapped rather than
4646  // clamped, so we can't just emit "lo << a" if a is too big.
4647  SDValue LoForBigShift = DAG.getConstant(0, dl, VT);
4648  SDValue LoForNormalShift = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ShAmt);
4649  SDValue Lo = DAG.getNode(AArch64ISD::CSEL, dl, VT, LoForBigShift,
4650  LoForNormalShift, CCVal, Cmp);
4651 
4652  SDValue Ops[2] = { Lo, Hi };
4653  return DAG.getMergeValues(Ops, dl);
4654 }
4655 
4657  const GlobalAddressSDNode *GA) const {
4658  // The AArch64 target doesn't support folding offsets into global addresses.
4659  return false;
4660 }
4661 
4663  // We can materialize #0.0 as fmov $Rd, XZR for 64-bit and 32-bit cases.
4664  // FIXME: We should be able to handle f128 as well with a clever lowering.
4665  if (Imm.isPosZero() && (VT == MVT::f64 || VT == MVT::f32))
4666  return true;
4667 
4668  if (VT == MVT::f64)
4669  return AArch64_AM::getFP64Imm(Imm) != -1;
4670  else if (VT == MVT::f32)
4671  return AArch64_AM::getFP32Imm(Imm) != -1;
4672  return false;
4673 }
4674 
4675 //===----------------------------------------------------------------------===//
4676 // AArch64 Optimization Hooks
4677 //===----------------------------------------------------------------------===//
4678 
4679 static SDValue getEstimate(const AArch64Subtarget *ST, unsigned Opcode,
4680  SDValue Operand, SelectionDAG &DAG,
4681  int &ExtraSteps) {
4682  EVT VT = Operand.getValueType();
4683  if (ST->hasNEON() &&
4684  (VT == MVT::f64 || VT == MVT::v1f64 || VT == MVT::v2f64 ||
4685  VT == MVT::f32 || VT == MVT::v1f32 ||
4686  VT == MVT::v2f32 || VT == MVT::v4f32)) {
4687  if (ExtraSteps == TargetLoweringBase::ReciprocalEstimate::Unspecified)
4688  // For the reciprocal estimates, convergence is quadratic, so the number
4689  // of digits is doubled after each iteration. In ARMv8, the accuracy of
4690  // the initial estimate is 2^-8. Thus the number of extra steps to refine
4691  // the result for float (23 mantissa bits) is 2 and for double (52
4692  // mantissa bits) is 3.
4693  ExtraSteps = VT == MVT::f64 ? 3 : 2;
4694 
4695  return DAG.getNode(Opcode, SDLoc(Operand), VT, Operand);
4696  }
4697 
4698  return SDValue();
4699 }
4700 
4701 SDValue AArch64TargetLowering::getSqrtEstimate(SDValue Operand,
4702  SelectionDAG &DAG, int Enabled,
4703  int &ExtraSteps,
4704  bool &UseOneConst,
4705  bool Reciprocal) const {
4706  if (Enabled == ReciprocalEstimate::Enabled ||
4707  (Enabled == ReciprocalEstimate::Unspecified && Subtarget->useRSqrt()))
4708  if (SDValue Estimate = getEstimate(Subtarget, AArch64ISD::FRSQRTE, Operand,
4709  DAG, ExtraSteps)) {
4710  SDLoc DL(Operand);
4711  EVT VT = Operand.getValueType();
4712 
4714  Flags.setUnsafeAlgebra(true);
4715 
4716  // Newton reciprocal square root iteration: E * 0.5 * (3 - X * E^2)
4717  // AArch64 reciprocal square root iteration instruction: 0.5 * (3 - M * N)
4718  for (int i = ExtraSteps; i > 0; --i) {
4719  SDValue Step = DAG.getNode(ISD::FMUL, DL, VT, Estimate, Estimate,
4720  &Flags);
4721  Step = DAG.getNode(AArch64ISD::FRSQRTS, DL, VT, Operand, Step, &Flags);
4722  Estimate = DAG.getNode(ISD::FMUL, DL, VT, Estimate, Step, &Flags);
4723  }
4724 
4725  if (!Reciprocal) {
4726  EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(),
4727  VT);
4728  SDValue FPZero = DAG.getConstantFP(0.0, DL, VT);
4729  SDValue Eq = DAG.getSetCC(DL, CCVT, Operand, FPZero, ISD::SETEQ);
4730 
4731  Estimate = DAG.getNode(ISD::FMUL, DL, VT, Operand, Estimate, &Flags);
4732  // Correct the result if the operand is 0.0.
4733  Estimate = DAG.getNode(VT.isVector() ? ISD::VSELECT : ISD::SELECT, DL,
4734  VT, Eq, Operand, Estimate);
4735  }
4736 
4737  ExtraSteps = 0;
4738  return Estimate;
4739  }
4740 
4741  return SDValue();
4742 }
4743 
4744 SDValue AArch64TargetLowering::getRecipEstimate(SDValue Operand,
4745  SelectionDAG &DAG, int Enabled,
4746  int &ExtraSteps) const {
4747  if (Enabled == ReciprocalEstimate::Enabled)
4748  if (SDValue Estimate = getEstimate(Subtarget, AArch64ISD::FRECPE, Operand,
4749  DAG, ExtraSteps)) {
4750  SDLoc DL(Operand);
4751  EVT VT = Operand.getValueType();
4752 
4754  Flags.setUnsafeAlgebra(true);
4755 
4756  // Newton reciprocal iteration: E * (2 - X * E)
4757  // AArch64 reciprocal iteration instruction: (2 - M * N)
4758  for (int i = ExtraSteps; i > 0; --i) {
4759  SDValue Step = DAG.getNode(AArch64ISD::FRECPS, DL, VT, Operand,
4760  Estimate, &Flags);
4761  Estimate = DAG.getNode(ISD::FMUL, DL, VT, Estimate, Step, &Flags);
4762  }
4763 
4764  ExtraSteps = 0;
4765  return Estimate;
4766  }
4767 
4768  return SDValue();
4769 }
4770 
4771 //===----------------------------------------------------------------------===//
4772 // AArch64 Inline Assembly Support
4773 //===----------------------------------------------------------------------===//
4774 
4775 // Table of Constraints
4776 // TODO: This is the current set of constraints supported by ARM for the
4777 // compiler, not all of them may make sense, e.g. S may be difficult to support.
4778 //
4779 // r - A general register
4780 // w - An FP/SIMD register of some size in the range v0-v31
4781 // x - An FP/SIMD register of some size in the range v0-v15
4782 // I - Constant that can be used with an ADD instruction
4783 // J - Constant that can be used with a SUB instruction
4784 // K - Constant that can be used with a 32-bit logical instruction
4785 // L - Constant that can be used with a 64-bit logical instruction
4786 // M - Constant that can be used as a 32-bit MOV immediate
4787 // N - Constant that can be used as a 64-bit MOV immediate
4788 // Q - A memory reference with base register and no offset
4789 // S - A symbolic address
4790 // Y - Floating point constant zero
4791 // Z - Integer constant zero
4792 //
4793 // Note that general register operands will be output using their 64-bit x
4794 // register name, whatever the size of the variable, unless the asm operand
4795 // is prefixed by the %w modifier. Floating-point and SIMD register operands
4796 // will be output with the v prefix unless prefixed by the %b, %h, %s, %d or
4797 // %q modifier.
4798 const char *AArch64TargetLowering::LowerXConstraint(EVT ConstraintVT) const {
4799  // At this point, we have to lower this constraint to something else, so we
4800  // lower it to an "r" or "w". However, by doing this we will force the result
4801  // to be in register, while the X constraint is much more permissive.
4802  //
4803  // Although we are correct (we are free to emit anything, without
4804  // constraints), we might break use cases that would expect us to be more
4805  // efficient and emit something else.
4806  if (!Subtarget->hasFPARMv8())
4807  return "r";
4808 
4809  if (ConstraintVT.isFloatingPoint())
4810  return "w";
4811 
4812  if (ConstraintVT.isVector() &&
4813  (ConstraintVT.getSizeInBits() == 64 ||
4814  ConstraintVT.getSizeInBits() == 128))
4815  return "w";
4816 
4817  return "r";
4818 }
4819 
4820 /// getConstraintType - Given a constraint letter, return the type of
4821 /// constraint it is for this target.
4823 AArch64TargetLowering::getConstraintType(StringRef Constraint) const {
4824  if (Constraint.size() == 1) {
4825  switch (Constraint[0]) {
4826  default:
4827  break;
4828  case 'z':
4829  return C_Other;
4830  case 'x':
4831  case 'w':
4832  return C_RegisterClass;
4833  // An address with a single base register. Due to the way we
4834  // currently handle addresses it is the same as 'r'.
4835  case 'Q':
4836  return C_Memory;
4837  }
4838  }
4839  return TargetLowering::getConstraintType(Constraint);
4840 }
4841 
4842 /// Examine constraint type and operand type and determine a weight value.
4843 /// This object must already have been set up with the operand type
4844 /// and the current alternative constraint selected.
4846 AArch64TargetLowering::getSingleConstraintMatchWeight(
4847  AsmOperandInfo &info, const char *constraint) const {
4848  ConstraintWeight weight = CW_Invalid;
4849  Value *CallOperandVal = info.CallOperandVal;
4850  // If we don't have a value, we can't do a match,
4851  // but allow it at the lowest weight.
4852  if (!CallOperandVal)
4853  return CW_Default;
4854  Type *type = CallOperandVal->getType();
4855  // Look at the constraint type.
4856  switch (*constraint) {
4857  default:
4858  weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint);
4859  break;
4860  case 'x':
4861  case 'w':
4862  if (type->isFloatingPointTy() || type->isVectorTy())
4863  weight = CW_Register;
4864  break;
4865  case 'z':
4866  weight = CW_Constant;
4867  break;
4868  }
4869  return weight;
4870 }
4871 
4872 std::pair<unsigned, const TargetRegisterClass *>
4873 AArch64TargetLowering::getRegForInlineAsmConstraint(
4874  const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const {
4875  if (Constraint.size() == 1) {
4876  switch (Constraint[0]) {
4877  case 'r':
4878  if (VT.getSizeInBits() == 64)
4879  return std::make_pair(0U, &AArch64::GPR64commonRegClass);
4880  return std::make_pair(0U, &AArch64::GPR32commonRegClass);
4881  case 'w':
4882  if (VT.getSizeInBits() == 16)
4883  return std::make_pair(0U, &AArch64::FPR16RegClass);
4884  if (VT.getSizeInBits() == 32)
4885  return std::make_pair(0U, &AArch64::FPR32RegClass);
4886  if (VT.getSizeInBits() == 64)
4887  return std::make_pair(0U, &AArch64::FPR64RegClass);
4888  if (VT.getSizeInBits() == 128)
4889  return std::make_pair(0U, &AArch64::FPR128RegClass);
4890  break;
4891  // The instructions that this constraint is designed for can
4892  // only take 128-bit registers so just use that regclass.
4893  case 'x':
4894  if (VT.getSizeInBits() == 128)
4895  return std::make_pair(0U, &AArch64::FPR128_loRegClass);
4896  break;
4897  }
4898  }
4899  if (StringRef("{cc}").equals_lower(Constraint))
4900  return std::make_pair(unsigned(AArch64::NZCV), &AArch64::CCRRegClass);
4901 
4902  // Use the default implementation in TargetLowering to convert the register
4903  // constraint into a member of a register class.
4904  std::pair<unsigned, const TargetRegisterClass *> Res;
4905  Res = TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
4906 
4907  // Not found as a standard register?
4908  if (!Res.second) {
4909  unsigned Size = Constraint.size();
4910  if ((Size == 4 || Size == 5) && Constraint[0] == '{' &&
4911  tolower(Constraint[1]) == 'v' && Constraint[Size - 1] == '}') {
4912  int RegNo;
4913  bool Failed = Constraint.slice(2, Size - 1).getAsInteger(10, RegNo);
4914  if (!Failed && RegNo >= 0 && RegNo <= 31) {
4915  // v0 - v31 are aliases of q0 - q31 or d0 - d31 depending on size.
4916  // By default we'll emit v0-v31 for this unless there's a modifier where
4917  // we'll emit the correct register as well.
4918  if (VT != MVT::Other && VT.getSizeInBits() == 64) {
4919  Res.first = AArch64::FPR64RegClass.getRegister(RegNo);
4920  Res.second = &AArch64::FPR64RegClass;
4921  } else {
4922  Res.first = AArch64::FPR128RegClass.getRegister(RegNo);
4923  Res.second = &AArch64::FPR128RegClass;
4924  }
4925  }
4926  }
4927  }
4928 
4929  return Res;
4930 }
4931 
4932 /// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
4933 /// vector. If it is invalid, don't add anything to Ops.
4934 void AArch64TargetLowering::LowerAsmOperandForConstraint(
4935  SDValue Op, std::string &Constraint, std::vector<SDValue> &Ops,
4936  SelectionDAG &DAG) const {
4937  SDValue Result;
4938 
4939  // Currently only support length 1 constraints.
4940  if (Constraint.length() != 1)
4941  return;
4942 
4943  char ConstraintLetter = Constraint[0];
4944  switch (ConstraintLetter) {
4945  default:
4946  break;
4947 
4948  // This set of constraints deal with valid constants for various instructions.
4949  // Validate and return a target constant for them if we can.
4950  case 'z': {
4951  // 'z' maps to xzr or wzr so it needs an input of 0.
4952  if (!isNullConstant(Op))
4953  return;
4954 
4955  if (Op.getValueType() == MVT::i64)
4956  Result = DAG.getRegister(AArch64::XZR, MVT::i64);
4957  else
4958  Result = DAG.getRegister(AArch64::WZR, MVT::i32);
4959  break;
4960  }
4961 
4962  case 'I':
4963  case 'J':
4964  case 'K':
4965  case 'L':
4966  case 'M':
4967  case 'N':
4969  if (!C)
4970  return;
4971 
4972  // Grab the value and do some validation.
4973  uint64_t CVal = C->getZExtValue();
4974  switch (ConstraintLetter) {
4975  // The I constraint applies only to simple ADD or SUB immediate operands:
4976  // i.e. 0 to 4095 with optional shift by 12
4977  // The J constraint applies only to ADD or SUB immediates that would be
4978  // valid when negated, i.e. if [an add pattern] were to be output as a SUB
4979  // instruction [or vice versa], in other words -1 to -4095 with optional
4980  // left shift by 12.
4981  case 'I':
4982  if (isUInt<12>(CVal) || isShiftedUInt<12, 12>(CVal))
4983  break;
4984  return;
4985  case 'J': {
4986  uint64_t NVal = -C->getSExtValue();
4987  if (isUInt<12>(NVal) || isShiftedUInt<12, 12>(NVal)) {
4988  CVal = C->getSExtValue();
4989  break;
4990  }
4991  return;
4992  }
4993  // The K and L constraints apply *only* to logical immediates, including
4994  // what used to be the MOVI alias for ORR (though the MOVI alias has now
4995  // been removed and MOV should be used). So these constraints have to
4996  // distinguish between bit patterns that are valid 32-bit or 64-bit
4997  // "bitmask immediates": for example 0xaaaaaaaa is a valid bimm32 (K), but
4998  // not a valid bimm64 (L) where 0xaaaaaaaaaaaaaaaa would be valid, and vice
4999  // versa.
5000  case 'K':
5001  if (AArch64_AM::isLogicalImmediate(CVal, 32))
5002  break;
5003  return;
5004  case 'L':
5005  if (AArch64_AM::isLogicalImmediate(CVal, 64))
5006  break;
5007  return;
5008  // The M and N constraints are a superset of K and L respectively, for use
5009  // with the MOV (immediate) alias. As well as the logical immediates they
5010  // also match 32 or 64-bit immediates that can be loaded either using a
5011  // *single* MOVZ or MOVN , such as 32-bit 0x12340000, 0x00001234, 0xffffedca
5012  // (M) or 64-bit 0x1234000000000000 (N) etc.
5013  // As a note some of this code is liberally stolen from the asm parser.
5014  case 'M': {
5015  if (!isUInt<32>(CVal))
5016  return;
5017  if (AArch64_AM::isLogicalImmediate(CVal, 32))
5018  break;
5019  if ((CVal & 0xFFFF) == CVal)
5020  break;
5021  if ((CVal & 0xFFFF0000ULL) == CVal)
5022  break;
5023  uint64_t NCVal = ~(uint32_t)CVal;
5024  if ((NCVal & 0xFFFFULL) == NCVal)
5025  break;
5026  if ((NCVal & 0xFFFF0000ULL) == NCVal)
5027  break;
5028  return;
5029  }
5030  case 'N': {
5031  if (AArch64_AM::isLogicalImmediate(CVal, 64))
5032  break;
5033  if ((CVal & 0xFFFFULL) == CVal)
5034  break;
5035  if ((CVal & 0xFFFF0000ULL) == CVal)
5036  break;
5037  if ((CVal & 0xFFFF00000000ULL) == CVal)
5038  break;
5039  if ((CVal & 0xFFFF000000000000ULL) == CVal)
5040  break;
5041  uint64_t NCVal = ~CVal;
5042  if ((NCVal & 0xFFFFULL) == NCVal)
5043  break;
5044  if ((NCVal & 0xFFFF0000ULL) == NCVal)
5045  break;
5046  if ((NCVal & 0xFFFF00000000ULL) == NCVal)
5047  break;
5048  if ((NCVal & 0xFFFF000000000000ULL) == NCVal)
5049  break;
5050  return;
5051  }
5052  default:
5053  return;
5054  }
5055 
5056  // All assembler immediates are 64-bit integers.
5057  Result = DAG.getTargetConstant(CVal, SDLoc(Op), MVT::i64);
5058  break;
5059  }
5060 
5061  if (Result.getNode()) {
5062  Ops.push_back(Result);
5063  return;
5064  }
5065 
5066  return TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
5067 }
5068 
5069 //===----------------------------------------------------------------------===//
5070 // AArch64 Advanced SIMD Support
5071 //===----------------------------------------------------------------------===//
5072 
5073 /// WidenVector - Given a value in the V64 register class, produce the
5074 /// equivalent value in the V128 register class.
5075 static SDValue WidenVector(SDValue V64Reg, SelectionDAG &DAG) {
5076  EVT VT = V64Reg.getValueType();
5077  unsigned NarrowSize = VT.getVectorNumElements();
5078  MVT EltTy = VT.getVectorElementType().getSimpleVT();
5079  MVT WideTy = MVT::getVectorVT(EltTy, 2 * NarrowSize);
5080  SDLoc DL(V64Reg);
5081 
5082  return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, WideTy, DAG.getUNDEF(WideTy),
5083  V64Reg, DAG.getConstant(0, DL, MVT::i32));
5084 }
5085 
5086 /// getExtFactor - Determine the adjustment factor for the position when
5087 /// generating an "extract from vector registers" instruction.
5088 static unsigned getExtFactor(SDValue &V) {
5089  EVT EltType = V.getValueType().getVectorElementType();
5090  return EltType.getSizeInBits() / 8;
5091 }
5092 
5093 /// NarrowVector - Given a value in the V128 register class, produce the
5094 /// equivalent value in the V64 register class.
5095 static SDValue NarrowVector(SDValue V128Reg, SelectionDAG &DAG) {
5096  EVT VT = V128Reg.getValueType();
5097  unsigned WideSize = VT.getVectorNumElements();
5098  MVT EltTy = VT.getVectorElementType().getSimpleVT();
5099  MVT NarrowTy = MVT::getVectorVT(EltTy, WideSize / 2);
5100  SDLoc DL(V128Reg);
5101 
5102  return DAG.getTargetExtractSubreg(AArch64::dsub, DL, NarrowTy, V128Reg);
5103 }
5104 
5105 // Gather data to see if the operation can be modelled as a
5106 // shuffle in combination with VEXTs.
5108  SelectionDAG &DAG) const {
5109  assert(Op.getOpcode() == ISD::BUILD_VECTOR && "Unknown opcode!");
5110  SDLoc dl(Op);
5111  EVT VT = Op.getValueType();
5112  unsigned NumElts = VT.getVectorNumElements();
5113 
5114  struct ShuffleSourceInfo {
5115  SDValue Vec;
5116  unsigned MinElt;
5117  unsigned MaxElt;
5118 
5119  // We may insert some combination of BITCASTs and VEXT nodes to force Vec to
5120  // be compatible with the shuffle we intend to construct. As a result
5121  // ShuffleVec will be some sliding window into the original Vec.
5122  SDValue ShuffleVec;
5123 
5124  // Code should guarantee that element i in Vec starts at element "WindowBase
5125  // + i * WindowScale in ShuffleVec".
5126  int WindowBase;
5127  int WindowScale;
5128 
5129  ShuffleSourceInfo(SDValue Vec)
5130  : Vec(Vec), MinElt(std::numeric_limits<unsigned>::max()), MaxElt(0),
5131  ShuffleVec(Vec), WindowBase(0), WindowScale(1) {}
5132 
5133  bool operator ==(SDValue OtherVec) { return Vec == OtherVec; }
5134  };
5135 
5136  // First gather all vectors used as an immediate source for this BUILD_VECTOR
5137  // node.
5139  for (unsigned i = 0; i < NumElts; ++i) {
5140  SDValue V = Op.getOperand(i);
5141  if (V.isUndef())
5142  continue;
5143  else if (V.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
5144  !isa<ConstantSDNode>(V.getOperand(1))) {
5145  // A shuffle can only come from building a vector from various
5146  // elements of other vectors, provided their indices are constant.
5147  return SDValue();
5148  }
5149 
5150  // Add this element source to the list if it's not already there.
5151  SDValue SourceVec = V.getOperand(0);
5152  auto Source = find(Sources, SourceVec);
5153  if (Source == Sources.end())
5154  Source = Sources.insert(Sources.end(), ShuffleSourceInfo(SourceVec));
5155 
5156  // Update the minimum and maximum lane number seen.
5157  unsigned EltNo = cast<ConstantSDNode>(V.getOperand(1))->getZExtValue();
5158  Source->MinElt = std::min(Source->MinElt, EltNo);
5159  Source->MaxElt = std::max(Source->MaxElt, EltNo);
5160  }
5161 
5162  // Currently only do something sane when at most two source vectors
5163  // are involved.
5164  if (Sources.size() > 2)
5165  return SDValue();
5166 
5167  // Find out the smallest element size among result and two sources, and use
5168  // it as element size to build the shuffle_vector.
5169  EVT SmallestEltTy = VT.getVectorElementType();
5170  for (auto &Source : Sources) {
5171  EVT SrcEltTy = Source.Vec.getValueType().getVectorElementType();
5172  if (SrcEltTy.bitsLT(SmallestEltTy)) {
5173  SmallestEltTy = SrcEltTy;
5174  }
5175  }
5176  unsigned ResMultiplier =
5177  VT.getScalarSizeInBits() / SmallestEltTy.getSizeInBits();
5178  NumElts = VT.getSizeInBits() / SmallestEltTy.getSizeInBits();
5179  EVT ShuffleVT = EVT::getVectorVT(*DAG.getContext(), SmallestEltTy, NumElts);
5180 
5181  // If the source vector is too wide or too narrow, we may nevertheless be able
5182  // to construct a compatible shuffle either by concatenating it with UNDEF or
5183  // extracting a suitable range of elements.
5184  for (auto &Src : Sources) {
5185  EVT SrcVT = Src.ShuffleVec.getValueType();
5186 
5187  if (SrcVT.getSizeInBits() == VT.getSizeInBits())
5188  continue;
5189 
5190  // This stage of the search produces a source with the same element type as
5191  // the original, but with a total width matching the BUILD_VECTOR output.
5192  EVT EltVT = SrcVT.getVectorElementType();
5193  unsigned NumSrcElts = VT.getSizeInBits() / EltVT.getSizeInBits();
5194  EVT DestVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumSrcElts);
5195 
5196  if (SrcVT.getSizeInBits() < VT.getSizeInBits()) {
5197  assert(2 * SrcVT.getSizeInBits() == VT.getSizeInBits());
5198  // We can pad out the smaller vector for free, so if it's part of a
5199  // shuffle...
5200  Src.ShuffleVec =
5201  DAG.getNode(ISD::CONCAT_VECTORS, dl, DestVT, Src.ShuffleVec,
5202  DAG.getUNDEF(Src.ShuffleVec.getValueType()));
5203  continue;
5204  }
5205 
5206  assert(SrcVT.getSizeInBits() == 2 * VT.getSizeInBits());
5207 
5208  if (Src.MaxElt - Src.MinElt >= NumSrcElts) {
5209  // Span too large for a VEXT to cope
5210  return SDValue();
5211  }
5212 
5213  if (Src.MinElt >= NumSrcElts) {
5214  // The extraction can just take the second half
5215  Src.ShuffleVec =
5216  DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
5217  DAG.getConstant(NumSrcElts, dl, MVT::i64));
5218  Src.WindowBase = -NumSrcElts;
5219  } else if (Src.MaxElt < NumSrcElts) {
5220  // The extraction can just take the first half
5221  Src.ShuffleVec =
5222  DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
5223  DAG.getConstant(0, dl, MVT::i64));
5224  } else {
5225  // An actual VEXT is needed
5226  SDValue VEXTSrc1 =
5227  DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
5228  DAG.getConstant(0, dl, MVT::i64));
5229  SDValue VEXTSrc2 =
5230  DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
5231  DAG.getConstant(NumSrcElts, dl, MVT::i64));
5232  unsigned Imm = Src.MinElt * getExtFactor(VEXTSrc1);
5233 
5234  Src.ShuffleVec = DAG.getNode(AArch64ISD::EXT, dl, DestVT, VEXTSrc1,
5235  VEXTSrc2,
5236  DAG.getConstant(Imm, dl, MVT::i32));
5237  Src.WindowBase = -Src.MinElt;
5238  }
5239  }
5240 
5241  // Another possible incompatibility occurs from the vector element types. We
5242  // can fix this by bitcasting the source vectors to the same type we intend
5243  // for the shuffle.
5244  for (auto &Src : Sources) {
5245  EVT SrcEltTy = Src.ShuffleVec.getValueType().getVectorElementType();
5246  if (SrcEltTy == SmallestEltTy)
5247  continue;
5248  assert(ShuffleVT.getVectorElementType() == SmallestEltTy);
5249  Src.ShuffleVec = DAG.getNode(ISD::BITCAST, dl, ShuffleVT, Src.ShuffleVec);
5250  Src.WindowScale = SrcEltTy.getSizeInBits() / SmallestEltTy.getSizeInBits();
5251  Src.WindowBase *= Src.WindowScale;
5252  }
5253 
5254  // Final sanity check before we try to actually produce a shuffle.
5255  DEBUG(
5256  for (auto Src : Sources)
5257  assert(Src.ShuffleVec.getValueType() == ShuffleVT);
5258  );
5259 
5260  // The stars all align, our next step is to produce the mask for the shuffle.
5262  int BitsPerShuffleLane = ShuffleVT.getScalarSizeInBits();
5263  for (unsigned i = 0; i < VT.getVectorNumElements(); ++i) {
5264  SDValue Entry = Op.getOperand(i);
5265  if (Entry.isUndef())
5266  continue;
5267 
5268  auto Src = find(Sources, Entry.getOperand(0));
5269  int EltNo = cast<ConstantSDNode>(Entry.getOperand(1))->getSExtValue();
5270 
5271  // EXTRACT_VECTOR_ELT performs an implicit any_ext; BUILD_VECTOR an implicit
5272  // trunc. So only std::min(SrcBits, DestBits) actually get defined in this
5273  // segment.
5274  EVT OrigEltTy = Entry.getOperand(0).getValueType().getVectorElementType();
5275  int BitsDefined =
5276  std::min(OrigEltTy.getSizeInBits(), VT.getScalarSizeInBits());
5277  int LanesDefined = BitsDefined / BitsPerShuffleLane;
5278 
5279  // This source is expected to fill ResMultiplier lanes of the final shuffle,
5280  // starting at the appropriate offset.
5281  int *LaneMask = &Mask[i * ResMultiplier];
5282 
5283  int ExtractBase = EltNo * Src->WindowScale + Src->WindowBase;
5284  ExtractBase += NumElts * (Src - Sources.begin());
5285  for (int j = 0; j < LanesDefined; ++j)
5286  LaneMask[j] = ExtractBase + j;
5287  }
5288 
5289  // Final check before we try to produce nonsense...
5290  if (!isShuffleMaskLegal(Mask, ShuffleVT))
5291  return SDValue();
5292 
5293  SDValue ShuffleOps[] = { DAG.getUNDEF(ShuffleVT), DAG.getUNDEF(ShuffleVT) };
5294  for (unsigned i = 0; i < Sources.size(); ++i)
5295  ShuffleOps[i] = Sources[i].ShuffleVec;
5296 
5297  SDValue Shuffle = DAG.getVectorShuffle(ShuffleVT, dl, ShuffleOps[0],
5298  ShuffleOps[1], Mask);
5299  return DAG.getNode(ISD::BITCAST, dl, VT, Shuffle);
5300 }
5301 
5302 // check if an EXT instruction can handle the shuffle mask when the
5303 // vector sources of the shuffle are the same.
5304 static bool isSingletonEXTMask(ArrayRef<int> M, EVT VT, unsigned &Imm) {
5305  unsigned NumElts = VT.getVectorNumElements();
5306 
5307  // Assume that the first shuffle index is not UNDEF. Fail if it is.
5308  if (M[0] < 0)
5309  return false;
5310 
5311  Imm = M[0];
5312 
5313  // If this is a VEXT shuffle, the immediate value is the index of the first
5314  // element. The other shuffle indices must be the successive elements after
5315  // the first one.
5316  unsigned ExpectedElt = Imm;
5317  for (unsigned i = 1; i < NumElts; ++i) {
5318  // Increment the expected index. If it wraps around, just follow it
5319  // back to index zero and keep going.
5320  ++ExpectedElt;
5321  if (ExpectedElt == NumElts)
5322  ExpectedElt = 0;
5323 
5324  if (M[i] < 0)
5325  continue; // ignore UNDEF indices
5326  if (ExpectedElt != static_cast<unsigned>(M[i]))
5327  return false;
5328  }
5329 
5330  return true;
5331 }
5332 
5333 // check if an EXT instruction can handle the shuffle mask when the
5334 // vector sources of the shuffle are different.
5335 static bool isEXTMask(ArrayRef<int> M, EVT VT, bool &ReverseEXT,
5336  unsigned &Imm) {
5337  // Look for the first non-undef element.
5338  const int *FirstRealElt = find_if(M, [](int Elt) { return Elt >= 0; });
5339 
5340  // Benefit form APInt to handle overflow when calculating expected element.
5341  unsigned NumElts = VT.getVectorNumElements();
5342  unsigned MaskBits = APInt(32, NumElts * 2).logBase2();
5343  APInt ExpectedElt = APInt(MaskBits, *FirstRealElt + 1);
5344  // The following shuffle indices must be the successive elements after the
5345  // first real element.
5346  const int *FirstWrongElt = std::find_if(FirstRealElt + 1, M.end(),
5347  [&](int Elt) {return Elt != ExpectedElt++ && Elt != -1;});
5348  if (FirstWrongElt != M.end())
5349  return false;
5350 
5351  // The index of an EXT is the first element if it is not UNDEF.
5352  // Watch out for the beginning UNDEFs. The EXT index should be the expected
5353  // value of the first element. E.g.
5354  // <-1, -1, 3, ...> is treated as <1, 2, 3, ...>.
5355  // <-1, -1, 0, 1, ...> is treated as <2*NumElts-2, 2*NumElts-1, 0, 1, ...>.
5356  // ExpectedElt is the last mask index plus 1.
5357  Imm = ExpectedElt.getZExtValue();
5358 
5359  // There are two difference cases requiring to reverse input vectors.
5360  // For example, for vector <4 x i32> we have the following cases,
5361  // Case 1: shufflevector(<4 x i32>,<4 x i32>,<-1, -1, -1, 0>)
5362  // Case 2: shufflevector(<4 x i32>,<4 x i32>,<-1, -1, 7, 0>)
5363  // For both cases, we finally use mask <5, 6, 7, 0>, which requires
5364  // to reverse two input vectors.
5365  if (Imm < NumElts)
5366  ReverseEXT = true;
5367  else
5368  Imm -= NumElts;
5369 
5370  return true;
5371 }
5372 
5373 /// isREVMask - Check if a vector shuffle corresponds to a REV
5374 /// instruction with the specified blocksize. (The order of the elements
5375 /// within each block of the vector is reversed.)
5376 static bool isREVMask(ArrayRef<int> M, EVT VT, unsigned BlockSize) {
5377  assert((BlockSize == 16 || BlockSize == 32 || BlockSize == 64) &&
5378  "Only possible block sizes for REV are: 16, 32, 64");
5379 
5380  unsigned EltSz = VT.getScalarSizeInBits();
5381  if (EltSz == 64)
5382  return false;
5383 
5384  unsigned NumElts = VT.getVectorNumElements();
5385  unsigned BlockElts = M[0] + 1;
5386  // If the first shuffle index is UNDEF, be optimistic.
5387  if (M[0] < 0)
5388  BlockElts = BlockSize / EltSz;
5389 
5390  if (BlockSize <= EltSz || BlockSize != BlockElts * EltSz)
5391  return false;
5392 
5393  for (unsigned i = 0; i < NumElts; ++i) {
5394  if (M[i] < 0)
5395  continue; // ignore UNDEF indices
5396  if ((unsigned)M[i] != (i - i % BlockElts) + (BlockElts - 1 - i % BlockElts))
5397  return false;
5398  }
5399 
5400  return true;
5401 }
5402 
5403 static bool isZIPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
5404  unsigned NumElts = VT.getVectorNumElements();
5405  WhichResult = (M[0] == 0 ? 0 : 1);
5406  unsigned Idx = WhichResult * NumElts / 2;
5407  for (unsigned i = 0; i != NumElts; i += 2) {
5408  if ((M[i] >= 0 && (unsigned)M[i] != Idx) ||
5409  (M[i + 1] >= 0 && (unsigned)M[i + 1] != Idx + NumElts))
5410  return false;
5411  Idx += 1;
5412  }
5413 
5414  return true;
5415 }
5416 
5417 static bool isUZPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
5418  unsigned NumElts = VT.getVectorNumElements();
5419  WhichResult = (M[0] == 0 ? 0 : 1);
5420  for (unsigned i = 0; i != NumElts; ++i) {
5421  if (M[i] < 0)
5422  continue; // ignore UNDEF indices
5423  if ((unsigned)M[i] != 2 * i + WhichResult)
5424  return false;
5425  }
5426 
5427  return true;
5428 }
5429 
5430 static bool isTRNMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
5431  unsigned NumElts = VT.getVectorNumElements();
5432  WhichResult = (M[0] == 0 ? 0 : 1);
5433  for (unsigned i = 0; i < NumElts; i += 2) {
5434  if ((M[i] >= 0 && (unsigned)M[i] != i + WhichResult) ||
5435  (M[i + 1] >= 0 && (unsigned)M[i + 1] != i + NumElts + WhichResult))
5436  return false;
5437  }
5438  return true;
5439 }
5440 
5441 /// isZIP_v_undef_Mask - Special case of isZIPMask for canonical form of
5442 /// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
5443 /// Mask is e.g., <0, 0, 1, 1> instead of <0, 4, 1, 5>.
5444 static bool isZIP_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
5445  unsigned NumElts = VT.getVectorNumElements();
5446  WhichResult = (M[0] == 0 ? 0 : 1);
5447  unsigned Idx = WhichResult * NumElts / 2;
5448  for (unsigned i = 0; i != NumElts; i += 2) {
5449  if ((M[i] >= 0 && (unsigned)M[i] != Idx) ||
5450  (M[i + 1] >= 0 && (unsigned)M[i + 1] != Idx))
5451  return false;
5452  Idx += 1;
5453  }
5454 
5455  return true;
5456 }
5457 
5458 /// isUZP_v_undef_Mask - Special case of isUZPMask for canonical form of
5459 /// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
5460 /// Mask is e.g., <0, 2, 0, 2> instead of <0, 2, 4, 6>,
5461 static bool isUZP_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
5462  unsigned Half = VT.getVectorNumElements() / 2;
5463  WhichResult = (M[0] == 0 ? 0 : 1);
5464  for (unsigned j = 0; j != 2; ++j) {
5465  unsigned Idx = WhichResult;
5466  for (unsigned i = 0; i != Half; ++i) {
5467  int MIdx = M[i + j * Half];
5468  if (MIdx >= 0 && (unsigned)MIdx != Idx)
5469  return false;
5470  Idx += 2;
5471  }
5472  }
5473 
5474  return true;
5475 }
5476 
5477 /// isTRN_v_undef_Mask - Special case of isTRNMask for canonical form of
5478 /// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
5479 /// Mask is e.g., <0, 0, 2, 2> instead of <0, 4, 2, 6>.
5480 static bool isTRN_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
5481  unsigned NumElts = VT.getVectorNumElements();
5482  WhichResult = (M[0] == 0 ? 0 : 1);
5483  for (unsigned i = 0; i < NumElts; i += 2) {
5484  if ((M[i] >= 0 && (unsigned)M[i] != i + WhichResult) ||
5485  (M[i + 1] >= 0 && (unsigned)M[i + 1] != i + WhichResult))
5486  return false;
5487  }
5488  return true;
5489 }
5490 
5491 static bool isINSMask(ArrayRef<int> M, int NumInputElements,
5492  bool &DstIsLeft, int &Anomaly) {
5493  if (M.size() != static_cast<size_t>(NumInputElements))
5494  return false;
5495 
5496  int NumLHSMatch = 0, NumRHSMatch = 0;
5497  int LastLHSMismatch = -1, LastRHSMismatch = -1;
5498 
5499  for (int i = 0; i < NumInputElements; ++i) {
5500  if (M[i] == -1) {
5501  ++NumLHSMatch;
5502  ++NumRHSMatch;
5503  continue;
5504  }
5505 
5506  if (M[i] == i)
5507  ++NumLHSMatch;
5508  else
5509  LastLHSMismatch = i;
5510 
5511  if (M[i] == i + NumInputElements)
5512  ++NumRHSMatch;
5513  else
5514  LastRHSMismatch = i;
5515  }
5516 
5517  if (NumLHSMatch == NumInputElements - 1) {
5518  DstIsLeft = true;
5519  Anomaly = LastLHSMismatch;
5520  return true;
5521  } else if (NumRHSMatch == NumInputElements - 1) {
5522  DstIsLeft = false;
5523  Anomaly = LastRHSMismatch;
5524  return true;
5525  }
5526 
5527  return false;
5528 }
5529 
5530 static bool isConcatMask(ArrayRef<int> Mask, EVT VT, bool SplitLHS) {
5531  if (VT.getSizeInBits() != 128)
5532  return false;
5533 
5534  unsigned NumElts = VT.getVectorNumElements();
5535 
5536  for (int I = 0, E = NumElts / 2; I != E; I++) {
5537  if (Mask[I] != I)
5538  return false;
5539  }
5540 
5541  int Offset = NumElts / 2;
5542  for (int I = NumElts / 2, E = NumElts; I != E; I++) {
5543  if (Mask[I] != I + SplitLHS * Offset)
5544  return false;
5545  }
5546 
5547  return true;
5548 }
5549 
5551  SDLoc DL(Op);
5552  EVT VT = Op.getValueType();
5553  SDValue V0 = Op.getOperand(0);
5554  SDValue V1 = Op.getOperand(1);
5555  ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(Op)->getMask();
5556 
5559  return SDValue();
5560 
5561  bool SplitV0 = V0.getValueSizeInBits() == 128;
5562 
5563  if (!isConcatMask(Mask, VT, SplitV0))
5564  return SDValue();
5565 
5566  EVT CastVT = EVT::getVectorVT(*DAG.getContext(), VT.getVectorElementType(),
5567  VT.getVectorNumElements() / 2);
5568  if (SplitV0) {
5569  V0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, CastVT, V0,
5570  DAG.getConstant(0, DL, MVT::i64));
5571  }
5572  if (V1.getValueSizeInBits() == 128) {
5573  V1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, CastVT, V1,
5574  DAG.getConstant(0, DL, MVT::i64));
5575  }
5576  return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, V0, V1);
5577 }
5578 
5579 /// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit
5580 /// the specified operations to build the shuffle.
5581 static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS,
5582  SDValue RHS, SelectionDAG &DAG,
5583  const SDLoc &dl) {
5584  unsigned OpNum = (PFEntry >> 26) & 0x0F;
5585  unsigned LHSID = (PFEntry >> 13) & ((1 << 13) - 1);
5586  unsigned RHSID = (PFEntry >> 0) & ((1 << 13) - 1);
5587 
5588  enum {
5589  OP_COPY = 0, // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3>
5590  OP_VREV,
5591  OP_VDUP0,
5592  OP_VDUP1,
5593  OP_VDUP2,
5594  OP_VDUP3,
5595  OP_VEXT1,
5596  OP_VEXT2,
5597  OP_VEXT3,
5598  OP_VUZPL, // VUZP, left result
5599  OP_VUZPR, // VUZP, right result
5600  OP_VZIPL, // VZIP, left result
5601  OP_VZIPR, // VZIP, right result
5602  OP_VTRNL, // VTRN, left result
5603  OP_VTRNR // VTRN, right result
5604  };
5605 
5606  if (OpNum == OP_COPY) {
5607  if (LHSID == (1 * 9 + 2) * 9 + 3)
5608  return LHS;
5609  assert(LHSID == ((4 * 9 + 5) * 9 + 6) * 9 + 7 && "Illegal OP_COPY!");
5610  return RHS;
5611  }
5612 
5613  SDValue OpLHS, OpRHS;
5614  OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG, dl);
5615  OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG, dl);
5616  EVT VT = OpLHS.getValueType();
5617 
5618  switch (OpNum) {
5619  default:
5620  llvm_unreachable("Unknown shuffle opcode!");
5621  case OP_VREV:
5622  // VREV divides the vector in half and swaps within the half.
5623  if (VT.getVectorElementType() == MVT::i32 ||
5625  return DAG.getNode(AArch64ISD::REV64, dl, VT, OpLHS);
5626  // vrev <4 x i16> -> REV32
5627  if (VT.getVectorElementType() == MVT::i16 ||
5629  return DAG.getNode(AArch64ISD::REV32, dl, VT, OpLHS);
5630  // vrev <4 x i8> -> REV16
5632  return DAG.getNode(AArch64ISD::REV16, dl, VT, OpLHS);
5633  case OP_VDUP0:
5634  case OP_VDUP1:
5635  case OP_VDUP2:
5636  case OP_VDUP3: {
5637  EVT EltTy = VT.getVectorElementType();
5638  unsigned Opcode;
5639  if (EltTy == MVT::i8)
5640  Opcode = AArch64ISD::DUPLANE8;
5641  else if (EltTy == MVT::i16 || EltTy == MVT::f16)
5642  Opcode = AArch64ISD::DUPLANE16;
5643  else if (EltTy == MVT::i32 || EltTy == MVT::f32)
5644  Opcode = AArch64ISD::DUPLANE32;
5645  else if (EltTy == MVT::i64 || EltTy == MVT::f64)
5646  Opcode = AArch64ISD::DUPLANE64;
5647  else
5648  llvm_unreachable("Invalid vector element type?");
5649 
5650  if (VT.getSizeInBits() == 64)
5651  OpLHS = WidenVector(OpLHS, DAG);
5652  SDValue Lane = DAG.getConstant(OpNum - OP_VDUP0, dl, MVT::i64);
5653  return DAG.getNode(Opcode, dl, VT, OpLHS, Lane);
5654  }
5655  case OP_VEXT1:
5656  case OP_VEXT2:
5657  case OP_VEXT3: {
5658  unsigned Imm = (OpNum - OP_VEXT1 + 1) * getExtFactor(OpLHS);
5659  return DAG.getNode(AArch64ISD::EXT, dl, VT, OpLHS, OpRHS,
5660  DAG.getConstant(Imm, dl, MVT::i32));
5661  }
5662  case OP_VUZPL:
5663  return DAG.getNode(AArch64ISD::UZP1, dl, DAG.getVTList(VT, VT), OpLHS,
5664  OpRHS);
5665  case OP_VUZPR:
5666  return DAG.getNode(AArch64ISD::UZP2, dl, DAG.getVTList(VT, VT), OpLHS,
5667  OpRHS);
5668  case OP_VZIPL:
5669  return DAG.getNode(AArch64ISD::ZIP1, dl, DAG.getVTList(VT, VT), OpLHS,
5670  OpRHS);
5671  case OP_VZIPR:
5672  return DAG.getNode(AArch64ISD::ZIP2, dl, DAG.getVTList(VT, VT), OpLHS,
5673  OpRHS);
5674  case OP_VTRNL:
5675  return DAG.getNode(AArch64ISD::TRN1, dl, DAG.getVTList(VT, VT), OpLHS,
5676  OpRHS);
5677  case OP_VTRNR:
5678  return DAG.getNode(AArch64ISD::TRN2, dl, DAG.getVTList(VT, VT), OpLHS,
5679  OpRHS);
5680  }
5681 }
5682 
5683 static SDValue GenerateTBL(SDValue Op, ArrayRef<int> ShuffleMask,
5684  SelectionDAG &DAG) {
5685  // Check to see if we can use the TBL instruction.
5686  SDValue V1 = Op.getOperand(0);
5687  SDValue V2 = Op.getOperand(1);
5688  SDLoc DL(Op);
5689 
5690  EVT EltVT = Op.getValueType().getVectorElementType();
5691  unsigned BytesPerElt = EltVT.getSizeInBits() / 8;
5692 
5693  SmallVector<SDValue, 8> TBLMask;
5694  for (int Val : ShuffleMask) {
5695  for (unsigned Byte = 0; Byte < BytesPerElt; ++Byte) {
5696  unsigned Offset = Byte + Val * BytesPerElt;
5697  TBLMask.push_back(DAG.getConstant(Offset, DL, MVT::i32));
5698  }
5699  }
5700 
5701  MVT IndexVT = MVT::v8i8;
5702  unsigned IndexLen = 8;
5703  if (Op.getValueSizeInBits() == 128) {
5704  IndexVT = MVT::v16i8;
5705  IndexLen = 16;
5706  }
5707 
5708  SDValue V1Cst = DAG.getNode(ISD::BITCAST, DL, IndexVT, V1);
5709  SDValue V2Cst = DAG.getNode(ISD::BITCAST, DL, IndexVT, V2);
5710 
5711  SDValue Shuffle;
5712  if (V2.getNode()->isUndef()) {
5713  if (IndexLen == 8)
5714  V1Cst = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v16i8, V1Cst, V1Cst);
5715  Shuffle = DAG.getNode(
5716  ISD::INTRINSIC_WO_CHAIN, DL, IndexVT,
5717  DAG.getConstant(Intrinsic::aarch64_neon_tbl1, DL, MVT::i32), V1Cst,
5718  DAG.getBuildVector(IndexVT, DL,
5719  makeArrayRef(TBLMask.data(), IndexLen)));
5720  } else {
5721  if (IndexLen == 8) {
5722  V1Cst = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v16i8, V1Cst, V2Cst);
5723  Shuffle = DAG.getNode(
5724  ISD::INTRINSIC_WO_CHAIN, DL, IndexVT,
5725  DAG.getConstant(Intrinsic::aarch64_neon_tbl1, DL, MVT::i32), V1Cst,
5726  DAG.getBuildVector(IndexVT, DL,
5727  makeArrayRef(TBLMask.data(), IndexLen)));
5728  } else {
5729  // FIXME: We cannot, for the moment, emit a TBL2 instruction because we
5730  // cannot currently represent the register constraints on the input
5731  // table registers.
5732  // Shuffle = DAG.getNode(AArch64ISD::TBL2, DL, IndexVT, V1Cst, V2Cst,
5733  // DAG.getBuildVector(IndexVT, DL, &TBLMask[0],
5734  // IndexLen));
5735  Shuffle = DAG.getNode(
5736  ISD::INTRINSIC_WO_CHAIN, DL, IndexVT,
5737  DAG.getConstant(Intrinsic::aarch64_neon_tbl2, DL, MVT::i32), V1Cst,
5738  V2Cst, DAG.getBuildVector(IndexVT, DL,
5739  makeArrayRef(TBLMask.data(), IndexLen)));
5740  }
5741  }
5742  return DAG.getNode(ISD::BITCAST, DL, Op.getValueType(), Shuffle);
5743 }
5744 
5745 static unsigned getDUPLANEOp(EVT EltType) {
5746  if (EltType == MVT::i8)
5747  return AArch64ISD::DUPLANE8;
5748  if (EltType == MVT::i16 || EltType == MVT::f16)
5749  return AArch64ISD::DUPLANE16;
5750  if (EltType == MVT::i32 || EltType == MVT::f32)
5751  return AArch64ISD::DUPLANE32;
5752  if (EltType == MVT::i64 || EltType == MVT::f64)
5753  return AArch64ISD::DUPLANE64;
5754 
5755  llvm_unreachable("Invalid vector element type?");
5756 }
5757 
5758 SDValue AArch64TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
5759  SelectionDAG &DAG) const {
5760  SDLoc dl(Op);
5761  EVT VT = Op.getValueType();
5762 
5763  ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op.getNode());
5764 
5765  // Convert shuffles that are directly supported on NEON to target-specific
5766  // DAG nodes, instead of keeping them as shuffles and matching them again
5767  // during code selection. This is more efficient and avoids the possibility
5768  // of inconsistencies between legalization and selection.
5769  ArrayRef<int> ShuffleMask = SVN->getMask();
5770 
5771  SDValue V1 = Op.getOperand(0);
5772  SDValue V2 = Op.getOperand(1);
5773 
5774  if (SVN->isSplat()) {
5775  int Lane = SVN->getSplatIndex();
5776  // If this is undef splat, generate it via "just" vdup, if possible.
5777  if (Lane == -1)
5778  Lane = 0;
5779 
5780  if (Lane == 0 && V1.getOpcode() == ISD::SCALAR_TO_VECTOR)
5781  return DAG.getNode(AArch64ISD::DUP, dl, V1.getValueType(),
5782  V1.getOperand(0));
5783  // Test if V1 is a BUILD_VECTOR and the lane being referenced is a non-
5784  // constant. If so, we can just reference the lane's definition directly.
5785  if (V1.getOpcode() == ISD::BUILD_VECTOR &&
5786  !isa<ConstantSDNode>(V1.getOperand(Lane)))
5787  return DAG.getNode(AArch64ISD::DUP, dl, VT, V1.getOperand(Lane));
5788 
5789  // Otherwise, duplicate from the lane of the input vector.
5790  unsigned Opcode = getDUPLANEOp(V1.getValueType().getVectorElementType());
5791 
5792  // SelectionDAGBuilder may have "helpfully" already extracted or conatenated
5793  // to make a vector of the same size as this SHUFFLE. We can ignore the
5794  // extract entirely, and canonicalise the concat using WidenVector.
5795  if (V1.getOpcode() == ISD::EXTRACT_SUBVECTOR) {
5796  Lane += cast<ConstantSDNode>(V1.getOperand(1))->getZExtValue();
5797  V1 = V1.getOperand(0);
5798  } else if (V1.getOpcode() == ISD::CONCAT_VECTORS) {
5799  unsigned Idx = Lane >= (int)VT.getVectorNumElements() / 2;
5800  Lane -= Idx * VT.getVectorNumElements() / 2;
5801  V1 = WidenVector(V1.getOperand(Idx), DAG);
5802  } else if (VT.getSizeInBits() == 64)
5803  V1 = WidenVector(V1, DAG);
5804 
5805  return DAG.getNode(Opcode, dl, VT, V1, DAG.getConstant(Lane, dl, MVT::i64));
5806  }
5807 
5808  if (isREVMask(ShuffleMask, VT, 64))
5809  return DAG.getNode(AArch64ISD::REV64, dl, V1.getValueType(), V1, V2);
5810  if (isREVMask(ShuffleMask, VT, 32))
5811  return DAG.getNode(AArch64ISD::REV32, dl, V1.getValueType(), V1, V2);
5812  if (isREVMask(ShuffleMask, VT, 16))
5813  return DAG.getNode(AArch64ISD::REV16, dl, V1.getValueType(), V1, V2);
5814 
5815  bool ReverseEXT = false;
5816  unsigned Imm;
5817  if (isEXTMask(ShuffleMask, VT, ReverseEXT, Imm)) {
5818  if (ReverseEXT)
5819  std::swap(V1, V2);
5820  Imm *= getExtFactor(V1);
5821  return DAG.getNode(AArch64ISD::EXT, dl, V1.getValueType(), V1, V2,
5822  DAG.getConstant(Imm, dl, MVT::i32));
5823  } else if (V2->isUndef() && isSingletonEXTMask(ShuffleMask, VT, Imm)) {
5824  Imm *= getExtFactor(V1);
5825  return DAG.getNode(AArch64ISD::EXT, dl, V1.getValueType(), V1, V1,
5826  DAG.getConstant(Imm, dl, MVT::i32));
5827  }
5828 
5829  unsigned WhichResult;
5830  if (isZIPMask(ShuffleMask, VT, WhichResult)) {
5831  unsigned Opc = (WhichResult == 0) ? AArch64ISD::ZIP1 : AArch64ISD::ZIP2;
5832  return DAG.getNode(Opc, dl, V1.getValueType(), V1, V2);
5833  }
5834  if (isUZPMask(ShuffleMask, VT, WhichResult)) {
5835  unsigned Opc = (WhichResult == 0) ? AArch64ISD::UZP1 : AArch64ISD::UZP2;
5836  return DAG.getNode(Opc, dl, V1.getValueType(), V1, V2);
5837  }
5838  if (isTRNMask(ShuffleMask, VT, WhichResult)) {
5839  unsigned Opc = (WhichResult == 0) ? AArch64ISD::TRN1 : AArch64ISD::TRN2;
5840  return DAG.getNode(Opc, dl, V1.getValueType(), V1, V2);
5841  }
5842 
5843  if (isZIP_v_undef_Mask(ShuffleMask, VT, WhichResult)) {
5844  unsigned Opc = (WhichResult == 0) ? AArch64ISD::ZIP1 : AArch64ISD::ZIP2;
5845  return DAG.getNode(Opc, dl, V1.getValueType(), V1, V1);
5846  }
5847  if (isUZP_v_undef_Mask(ShuffleMask, VT, WhichResult)) {
5848  unsigned Opc = (WhichResult == 0) ? AArch64ISD::UZP1 : AArch64ISD::UZP2;
5849  return DAG.getNode(Opc, dl, V1.getValueType(), V1, V1);
5850  }
5851  if (isTRN_v_undef_Mask(ShuffleMask, VT, WhichResult)) {
5852  unsigned Opc = (WhichResult == 0) ? AArch64ISD::TRN1 : AArch64ISD::TRN2;
5853  return DAG.getNode(Opc, dl, V1.getValueType(), V1, V1);
5854  }
5855 
5856  if (SDValue Concat = tryFormConcatFromShuffle(Op, DAG))
5857  return Concat;
5858 
5859  bool DstIsLeft;
5860  int Anomaly;
5861  int NumInputElements = V1.getValueType().getVectorNumElements();
5862  if (isINSMask(ShuffleMask, NumInputElements, DstIsLeft, Anomaly)) {
5863  SDValue DstVec = DstIsLeft ? V1 : V2;
5864  SDValue DstLaneV = DAG.getConstant(Anomaly, dl, MVT::i64);
5865 
5866  SDValue SrcVec = V1;
5867  int SrcLane = ShuffleMask[Anomaly];
5868  if (SrcLane >= NumInputElements) {
5869  SrcVec = V2;
5870  SrcLane -= VT.getVectorNumElements();
5871  }
5872  SDValue SrcLaneV = DAG.getConstant(SrcLane, dl, MVT::i64);
5873 
5874  EVT ScalarVT = VT.getVectorElementType();
5875 
5876  if (ScalarVT.getSizeInBits() < 32 && ScalarVT.isInteger())
5877  ScalarVT = MVT::i32;
5878 
5879  return DAG.getNode(
5880  ISD::INSERT_VECTOR_ELT, dl, VT, DstVec,
5881  DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ScalarVT, SrcVec, SrcLaneV),
5882  DstLaneV);
5883  }
5884 
5885  // If the shuffle is not directly supported and it has 4 elements, use
5886  // the PerfectShuffle-generated table to synthesize it from other shuffles.
5887  unsigned NumElts = VT.getVectorNumElements();
5888  if (NumElts == 4) {
5889  unsigned PFIndexes[4];
5890  for (unsigned i = 0; i != 4; ++i) {
5891  if (ShuffleMask[i] < 0)
5892  PFIndexes[i] = 8;
5893  else
5894  PFIndexes[i] = ShuffleMask[i];
5895  }
5896 
5897  // Compute the index in the perfect shuffle table.
5898  unsigned PFTableIndex = PFIndexes[0] * 9 * 9 * 9 + PFIndexes[1] * 9 * 9 +
5899  PFIndexes[2] * 9 + PFIndexes[3];
5900  unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
5901  unsigned Cost = (PFEntry >> 30);
5902 
5903  if (Cost <= 4)
5904  return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl);
5905  }
5906 
5907  return GenerateTBL(Op, ShuffleMask, DAG);
5908 }
5909 
5910 static bool resolveBuildVector(BuildVectorSDNode *BVN, APInt &CnstBits,
5911  APInt &UndefBits) {
5912  EVT VT = BVN->getValueType(0);
5913  APInt SplatBits, SplatUndef;
5914  unsigned SplatBitSize;
5915  bool HasAnyUndefs;
5916  if (BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) {
5917  unsigned NumSplats = VT.getSizeInBits() / SplatBitSize;
5918 
5919  for (unsigned i = 0; i < NumSplats; ++i) {
5920  CnstBits <<= SplatBitSize;
5921  UndefBits <<= SplatBitSize;
5922  CnstBits |= SplatBits.zextOrTrunc(VT.getSizeInBits());
5923  UndefBits |= (SplatBits ^ SplatUndef).zextOrTrunc(VT.getSizeInBits());
5924  }
5925 
5926  return true;
5927  }
5928 
5929  return false;
5930 }
5931 
5932 SDValue AArch64TargetLowering::LowerVectorAND(SDValue Op,
5933  SelectionDAG &DAG) const {
5934  BuildVectorSDNode *BVN =
5936  SDValue LHS = Op.getOperand(0);
5937  SDLoc dl(Op);
5938  EVT VT = Op.getValueType();
5939 
5940  if (!BVN)
5941  return Op;
5942 
5943  APInt CnstBits(VT.getSizeInBits(), 0);
5944  APInt UndefBits(VT.getSizeInBits(), 0);
5945  if (resolveBuildVector(BVN, CnstBits, UndefBits)) {
5946  // We only have BIC vector immediate instruction, which is and-not.
5947  CnstBits = ~CnstBits;
5948 
5949  // We make use of a little bit of goto ickiness in order to avoid having to
5950  // duplicate the immediate matching logic for the undef toggled case.
5951  bool SecondTry = false;
5952  AttemptModImm:
5953 
5954  if (CnstBits.getHiBits(64) == CnstBits.getLoBits(64)) {
5955  CnstBits = CnstBits.zextOrTrunc(64);
5956  uint64_t CnstVal = CnstBits.getZExtValue();
5957 
5958  if (AArch64_AM::isAdvSIMDModImmType1(CnstVal)) {
5959  CnstVal = AArch64_AM::encodeAdvSIMDModImmType1(CnstVal);
5960  MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
5961  SDValue Mov = DAG.getNode(AArch64ISD::BICi, dl, MovTy, LHS,
5962  DAG.getConstant(CnstVal, dl, MVT::i32),
5963  DAG.getConstant(0, dl, MVT::i32));
5964  return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
5965  }
5966 
5967  if (AArch64_AM::isAdvSIMDModImmType2(CnstVal)) {
5968  CnstVal = AArch64_AM::encodeAdvSIMDModImmType2(CnstVal);
5969  MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
5970  SDValue Mov = DAG.getNode(AArch64ISD::BICi, dl, MovTy, LHS,
5971  DAG.getConstant(CnstVal, dl, MVT::i32),
5972  DAG.getConstant(8, dl, MVT::i32));
5973  return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
5974  }
5975 
5976  if (AArch64_AM::isAdvSIMDModImmType3(CnstVal)) {
5977  CnstVal = AArch64_AM::encodeAdvSIMDModImmType3(CnstVal);
5978  MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
5979  SDValue Mov = DAG.getNode(AArch64ISD::BICi, dl, MovTy, LHS,
5980  DAG.getConstant(CnstVal, dl, MVT::i32),
5981  DAG.getConstant(16, dl, MVT::i32));
5982  return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
5983  }
5984 
5985  if (AArch64_AM::isAdvSIMDModImmType4(CnstVal)) {
5986  CnstVal = AArch64_AM::encodeAdvSIMDModImmType4(CnstVal);
5987  MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
5988  SDValue Mov = DAG.getNode(AArch64ISD::BICi, dl, MovTy, LHS,
5989  DAG.getConstant(CnstVal, dl, MVT::i32),
5990  DAG.getConstant(24, dl, MVT::i32));
5991  return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
5992  }
5993 
5994  if (AArch64_AM::isAdvSIMDModImmType5(CnstVal)) {
5995  CnstVal = AArch64_AM::encodeAdvSIMDModImmType5(CnstVal);
5996  MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v8i16 : MVT::v4i16;
5997  SDValue Mov = DAG.getNode(AArch64ISD::BICi, dl, MovTy, LHS,
5998  DAG.getConstant(CnstVal, dl, MVT::i32),
5999  DAG.getConstant(0, dl, MVT::i32));
6000  return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
6001  }
6002 
6003  if (AArch64_AM::isAdvSIMDModImmType6(CnstVal)) {
6004  CnstVal = AArch64_AM::encodeAdvSIMDModImmType6(CnstVal);
6005  MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v8i16 : MVT::v4i16;
6006  SDValue Mov = DAG.getNode(AArch64ISD::BICi, dl, MovTy, LHS,
6007  DAG.getConstant(CnstVal, dl, MVT::i32),
6008  DAG.getConstant(8, dl, MVT::i32));
6009  return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
6010  }
6011  }
6012 
6013  if (SecondTry)
6014  goto FailedModImm;
6015  SecondTry = true;
6016  CnstBits = ~UndefBits;
6017  goto AttemptModImm;
6018  }
6019 
6020 // We can always fall back to a non-immediate AND.
6021 FailedModImm:
6022  return Op;
6023 }
6024 
6025 // Specialized code to quickly find if PotentialBVec is a BuildVector that
6026 // consists of only the same constant int value, returned in reference arg
6027 // ConstVal
6028 static bool isAllConstantBuildVector(const SDValue &PotentialBVec,
6029  uint64_t &ConstVal) {
6030  BuildVectorSDNode *Bvec = dyn_cast<BuildVectorSDNode>(PotentialBVec);
6031  if (!Bvec)
6032  return false;
6033  ConstantSDNode *FirstElt = dyn_cast<ConstantSDNode>(Bvec->getOperand(0));
6034  if (!FirstElt)
6035  return false;
6036  EVT VT = Bvec->getValueType(0);
6037  unsigned NumElts = VT.getVectorNumElements();
6038  for (unsigned i = 1; i < NumElts; ++i)
6039  if (dyn_cast<ConstantSDNode>(Bvec->getOperand(i)) != FirstElt)
6040  return false;
6041  ConstVal = FirstElt->getZExtValue();
6042  return true;
6043 }
6044 
6045 static unsigned getIntrinsicID(const SDNode *N) {
6046  unsigned Opcode = N->getOpcode();
6047  switch (Opcode) {
6048  default:
6049  return Intrinsic::not_intrinsic;
6050  case ISD::INTRINSIC_WO_CHAIN: {
6051  unsigned IID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
6052  if (IID < Intrinsic::num_intrinsics)
6053  return IID;
6054  return Intrinsic::not_intrinsic;
6055  }
6056  }
6057 }
6058 
6059 // Attempt to form a vector S[LR]I from (or (and X, BvecC1), (lsl Y, C2)),
6060 // to (SLI X, Y, C2), where X and Y have matching vector types, BvecC1 is a
6061 // BUILD_VECTORs with constant element C1, C2 is a constant, and C1 == ~C2.
6062 // Also, logical shift right -> sri, with the same structure.
6064  EVT VT = N->getValueType(0);
6065 
6066  if (!VT.isVector())
6067  return SDValue();
6068 
6069  SDLoc DL(N);
6070 
6071  // Is the first op an AND?
6072  const SDValue And = N->getOperand(0);
6073  if (And.getOpcode() != ISD::AND)
6074  return SDValue();
6075 
6076  // Is the second op an shl or lshr?
6077  SDValue Shift = N->getOperand(1);
6078  // This will have been turned into: AArch64ISD::VSHL vector, #shift
6079  // or AArch64ISD::VLSHR vector, #shift
6080  unsigned ShiftOpc = Shift.getOpcode();
6081  if ((ShiftOpc != AArch64ISD::VSHL && ShiftOpc != AArch64ISD::VLSHR))
6082  return SDValue();
6083  bool IsShiftRight = ShiftOpc == AArch64ISD::VLSHR;
6084 
6085  // Is the shift amount constant?
6086  ConstantSDNode *C2node = dyn_cast<ConstantSDNode>(Shift.getOperand(1));
6087  if (!C2node)
6088  return SDValue();
6089 
6090  // Is the and mask vector all constant?
6091  uint64_t C1;
6092  if (!isAllConstantBuildVector(And.getOperand(1), C1))
6093  return SDValue();
6094 
6095  // Is C1 == ~C2, taking into account how much one can shift elements of a
6096  // particular size?
6097  uint64_t C2 = C2node->getZExtValue();
6098  unsigned ElemSizeInBits = VT.getScalarSizeInBits();
6099  if (C2 > ElemSizeInBits)
6100  return SDValue();
6101  unsigned ElemMask = (1 << ElemSizeInBits) - 1;
6102  if ((C1 & ElemMask) != (~C2 & ElemMask))
6103  return SDValue();
6104 
6105  SDValue X = And.getOperand(0);
6106  SDValue Y = Shift.getOperand(0);
6107 
6108  unsigned Intrin =
6109  IsShiftRight ? Intrinsic::aarch64_neon_vsri : Intrinsic::aarch64_neon_vsli;
6110  SDValue ResultSLI =
6111  DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
6112  DAG.getConstant(Intrin, DL, MVT::i32), X, Y,
6113  Shift.getOperand(1));
6114 
6115  DEBUG(dbgs() << "aarch64-lower: transformed: \n");
6116  DEBUG(N->dump(&DAG));
6117  DEBUG(dbgs() << "into: \n");
6118  DEBUG(ResultSLI->dump(&DAG));
6119 
6120  ++NumShiftInserts;
6121  return ResultSLI;
6122 }
6123 
6124 SDValue AArch64TargetLowering::LowerVectorOR(SDValue Op,
6125  SelectionDAG &DAG) const {
6126  // Attempt to form a vector S[LR]I from (or (and X, C1), (lsl Y, C2))
6128  if (SDValue Res = tryLowerToSLI(Op.getNode(), DAG))
6129  return Res;
6130  }
6131 
6132  BuildVectorSDNode *BVN =
6134  SDValue LHS = Op.getOperand(1);
6135  SDLoc dl(Op);
6136  EVT VT = Op.getValueType();
6137 
6138  // OR commutes, so try swapping the operands.
6139  if (!BVN) {
6140  LHS = Op.getOperand(0);
6141  BVN = dyn_cast<BuildVectorSDNode>(Op.getOperand(1).getNode());
6142  }
6143  if (!BVN)
6144  return Op;
6145 
6146  APInt CnstBits(VT.getSizeInBits(), 0);
6147  APInt UndefBits(VT.getSizeInBits(), 0);
6148  if (resolveBuildVector(BVN, CnstBits, UndefBits)) {
6149  // We make use of a little bit of goto ickiness in order to avoid having to
6150  // duplicate the immediate matching logic for the undef toggled case.
6151  bool SecondTry = false;
6152  AttemptModImm:
6153 
6154  if (CnstBits.getHiBits(64) == CnstBits.getLoBits(64)) {
6155  CnstBits = CnstBits.zextOrTrunc(64);
6156  uint64_t CnstVal = CnstBits.getZExtValue();
6157 
6158  if (AArch64_AM::isAdvSIMDModImmType1(CnstVal)) {
6159  CnstVal = AArch64_AM::encodeAdvSIMDModImmType1(CnstVal);
6160  MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
6161  SDValue Mov = DAG.getNode(AArch64ISD::ORRi, dl, MovTy, LHS,
6162  DAG.getConstant(CnstVal, dl, MVT::i32),
6163  DAG.getConstant(0, dl, MVT::i32));
6164  return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
6165  }
6166 
6167  if (AArch64_AM::isAdvSIMDModImmType2(CnstVal)) {
6168  CnstVal = AArch64_AM::encodeAdvSIMDModImmType2(CnstVal);
6169  MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
6170  SDValue Mov = DAG.getNode(AArch64ISD::ORRi, dl, MovTy, LHS,
6171  DAG.getConstant(CnstVal, dl, MVT::i32),
6172  DAG.getConstant(8, dl, MVT::i32));
6173  return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
6174  }
6175 
6176  if (AArch64_AM::isAdvSIMDModImmType3(CnstVal)) {
6177  CnstVal = AArch64_AM::encodeAdvSIMDModImmType3(CnstVal);
6178  MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
6179  SDValue Mov = DAG.getNode(AArch64ISD::ORRi, dl, MovTy, LHS,
6180  DAG.getConstant(CnstVal, dl, MVT::i32),
6181  DAG.getConstant(16, dl, MVT::i32));
6182  return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
6183  }
6184 
6185  if (AArch64_AM::isAdvSIMDModImmType4(CnstVal)) {
6186  CnstVal = AArch64_AM::encodeAdvSIMDModImmType4(CnstVal);
6187  MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
6188  SDValue Mov = DAG.getNode(AArch64ISD::ORRi, dl, MovTy, LHS,
6189  DAG.getConstant(CnstVal, dl, MVT::i32),
6190  DAG.getConstant(24, dl, MVT::i32));
6191  return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
6192  }
6193 
6194  if (AArch64_AM::isAdvSIMDModImmType5(CnstVal)) {
6195  CnstVal = AArch64_AM::encodeAdvSIMDModImmType5(CnstVal);
6196  MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v8i16 : MVT::v4i16;
6197  SDValue Mov = DAG.getNode(AArch64ISD::ORRi, dl, MovTy, LHS,
6198  DAG.getConstant(CnstVal, dl, MVT::i32),
6199  DAG.getConstant(0, dl, MVT::i32));
6200  return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
6201  }
6202 
6203  if (AArch64_AM::isAdvSIMDModImmType6(CnstVal)) {
6204  CnstVal = AArch64_AM::encodeAdvSIMDModImmType6(CnstVal);
6205  MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v8i16 : MVT::v4i16;
6206  SDValue Mov = DAG.getNode(AArch64ISD::ORRi, dl, MovTy, LHS,
6207  DAG.getConstant(CnstVal, dl, MVT::i32),
6208  DAG.getConstant(8, dl, MVT::i32));
6209  return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
6210  }
6211  }
6212 
6213  if (SecondTry)
6214  goto FailedModImm;
6215  SecondTry = true;
6216  CnstBits = UndefBits;
6217  goto AttemptModImm;
6218  }
6219 
6220 // We can always fall back to a non-immediate OR.
6221 FailedModImm:
6222  return Op;
6223 }
6224 
6225 // Normalize the operands of BUILD_VECTOR. The value of constant operands will
6226 // be truncated to fit element width.
6228  SelectionDAG &DAG) {
6229  assert(Op.getOpcode() == ISD::BUILD_VECTOR && "Unknown opcode!");
6230  SDLoc dl(Op);
6231  EVT VT = Op.getValueType();
6232  EVT EltTy= VT.getVectorElementType();
6233 
6234  if (EltTy.isFloatingPoint() || EltTy.getSizeInBits() > 16)
6235  return Op;
6236 
6238  for (SDValue Lane : Op->ops()) {
6239  if (auto *CstLane = dyn_cast<ConstantSDNode>(Lane)) {
6240  APInt LowBits(EltTy.getSizeInBits(),
6241  CstLane->getZExtValue());
6242  Lane = DAG.getConstant(LowBits.getZExtValue(), dl, MVT::i32);
6243  }
6244  Ops.push_back(Lane);
6245  }
6246  return DAG.getBuildVector(VT, dl, Ops);
6247 }
6248 
6249 SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op,
6250  SelectionDAG &DAG) const {
6251  SDLoc dl(Op);
6252  EVT VT = Op.getValueType();
6253  Op = NormalizeBuildVector(Op, DAG);
6254  BuildVectorSDNode *BVN = cast<BuildVectorSDNode>(Op.getNode());
6255 
6256  APInt CnstBits(VT.getSizeInBits(), 0);
6257  APInt UndefBits(VT.getSizeInBits(), 0);
6258  if (resolveBuildVector(BVN, CnstBits, UndefBits)) {
6259  // We make use of a little bit of goto ickiness in order to avoid having to
6260  // duplicate the immediate matching logic for the undef toggled case.
6261  bool SecondTry = false;
6262  AttemptModImm:
6263 
6264  if (CnstBits.getHiBits(64) == CnstBits.getLoBits(64)) {
6265  CnstBits = CnstBits.zextOrTrunc(64);
6266  uint64_t CnstVal = CnstBits.getZExtValue();
6267 
6268  // Certain magic vector constants (used to express things like NOT
6269  // and NEG) are passed through unmodified. This allows codegen patterns
6270  // for these operations to match. Special-purpose patterns will lower
6271  // these immediates to MOVIs if it proves necessary.
6272  if (VT.isInteger() && (CnstVal == 0 || CnstVal == ~0ULL))
6273  return Op;
6274 
6275  // The many faces of MOVI...
6276  if (AArch64_AM::isAdvSIMDModImmType10(CnstVal)) {
6277  CnstVal = AArch64_AM::encodeAdvSIMDModImmType10(CnstVal);
6278  if (VT.getSizeInBits() == 128) {
6280  DAG.getConstant(CnstVal, dl, MVT::i32));
6281  return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
6282  }
6283 
6284  // Support the V64 version via subregister insertion.
6286  DAG.getConstant(CnstVal, dl, MVT::i32));
6287  return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
6288  }
6289 
6290  if (AArch64_AM::isAdvSIMDModImmType1(CnstVal)) {
6291  CnstVal = AArch64_AM::encodeAdvSIMDModImmType1(CnstVal);
6292  MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
6293  SDValue Mov = DAG.getNode(AArch64ISD::MOVIshift, dl, MovTy,
6294  DAG.getConstant(CnstVal, dl, MVT::i32),
6295  DAG.getConstant(0, dl, MVT::i32));
6296  return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
6297  }
6298 
6299  if (AArch64_AM::isAdvSIMDModImmType2(CnstVal)) {
6300  CnstVal = AArch64_AM::encodeAdvSIMDModImmType2(CnstVal);
6301  MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
6302  SDValue Mov = DAG.getNode(AArch64ISD::MOVIshift, dl, MovTy,
6303  DAG.getConstant(CnstVal, dl, MVT::i32),
6304  DAG.getConstant(8, dl, MVT::i32));
6305  return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
6306  }
6307 
6308  if (AArch64_AM::isAdvSIMDModImmType3(CnstVal)) {
6309  CnstVal = AArch64_AM::encodeAdvSIMDModImmType3(CnstVal);
6310  MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
6311  SDValue Mov = DAG.getNode(AArch64ISD::MOVIshift, dl, MovTy,
6312  DAG.getConstant(CnstVal, dl, MVT::i32),
6313  DAG.getConstant(16, dl, MVT::i32));
6314  return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
6315  }
6316 
6317  if (AArch64_AM::isAdvSIMDModImmType4(CnstVal)) {
6318  CnstVal = AArch64_AM::encodeAdvSIMDModImmType4(CnstVal);
6319  MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
6320  SDValue Mov = DAG.getNode(AArch64ISD::MOVIshift, dl, MovTy,
6321  DAG.getConstant(CnstVal, dl, MVT::i32),
6322  DAG.getConstant(24, dl, MVT::i32));
6323  return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
6324  }
6325 
6326  if (AArch64_AM::isAdvSIMDModImmType5(CnstVal)) {
6327  CnstVal = AArch64_AM::encodeAdvSIMDModImmType5(CnstVal);
6328  MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v8i16 : MVT::v4i16;
6329  SDValue Mov = DAG.getNode(AArch64ISD::MOVIshift, dl, MovTy,
6330  DAG.getConstant(CnstVal, dl, MVT::i32),
6331  DAG.getConstant(0, dl, MVT::i32));
6332  return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
6333  }
6334 
6335  if (AArch64_AM::isAdvSIMDModImmType6(CnstVal)) {
6336  CnstVal = AArch64_AM::encodeAdvSIMDModImmType6(CnstVal);
6337  MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v8i16 : MVT::v4i16;
6338  SDValue Mov = DAG.getNode(AArch64ISD::MOVIshift, dl, MovTy,
6339  DAG.getConstant(CnstVal, dl, MVT::i32),
6340  DAG.getConstant(8, dl, MVT::i32));
6341  return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
6342  }
6343 
6344  if (AArch64_AM::isAdvSIMDModImmType7(CnstVal)) {
6345  CnstVal = AArch64_AM::encodeAdvSIMDModImmType7(CnstVal);
6346  MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
6347  SDValue Mov = DAG.getNode(AArch64ISD::MOVImsl, dl, MovTy,
6348  DAG.getConstant(CnstVal, dl, MVT::i32),
6349  DAG.getConstant(264, dl, MVT::i32));
6350  return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
6351  }
6352 
6353  if (AArch64_AM::isAdvSIMDModImmType8(CnstVal)) {
6354  CnstVal = AArch64_AM::encodeAdvSIMDModImmType8(CnstVal);
6355  MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
6356  SDValue Mov = DAG.getNode(AArch64ISD::MOVImsl, dl, MovTy,
6357  DAG.getConstant(CnstVal, dl, MVT::i32),
6358  DAG.getConstant(272, dl, MVT::i32));
6359  return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
6360  }
6361 
6362  if (AArch64_AM::isAdvSIMDModImmType9(CnstVal)) {
6363  CnstVal = AArch64_AM::encodeAdvSIMDModImmType9(CnstVal);
6364  MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v16i8 : MVT::v8i8;
6365  SDValue Mov = DAG.getNode(AArch64ISD::MOVI, dl, MovTy,
6366  DAG.getConstant(CnstVal, dl, MVT::i32));
6367  return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
6368  }
6369 
6370  // The few faces of FMOV...
6371  if (AArch64_AM::isAdvSIMDModImmType11(CnstVal)) {
6372  CnstVal = AArch64_AM::encodeAdvSIMDModImmType11(CnstVal);
6373  MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4f32 : MVT::v2f32;
6374  SDValue Mov = DAG.getNode(AArch64ISD::FMOV, dl, MovTy,
6375  DAG.getConstant(CnstVal, dl, MVT::i32));
6376  return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
6377  }
6378 
6379  if (AArch64_AM::isAdvSIMDModImmType12(CnstVal) &&
6380  VT.getSizeInBits() == 128) {
6381  CnstVal = AArch64_AM::encodeAdvSIMDModImmType12(CnstVal);
6382  SDValue Mov = DAG.getNode(AArch64ISD::FMOV, dl, MVT::v2f64,
6383  DAG.getConstant(CnstVal, dl, MVT::i32));
6384  return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
6385  }
6386 
6387  // The many faces of MVNI...
6388  CnstVal = ~CnstVal;
6389  if (AArch64_AM::isAdvSIMDModImmType1(CnstVal)) {
6390  CnstVal = AArch64_AM::encodeAdvSIMDModImmType1(CnstVal);
6391  MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
6392  SDValue Mov = DAG.getNode(AArch64ISD::MVNIshift, dl, MovTy,
6393  DAG.getConstant(CnstVal, dl, MVT::i32),
6394  DAG.getConstant(0, dl, MVT::i32));
6395  return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
6396  }
6397 
6398  if (AArch64_AM::isAdvSIMDModImmType2(CnstVal)) {
6399  CnstVal = AArch64_AM::encodeAdvSIMDModImmType2(CnstVal);
6400  MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
6401  SDValue Mov = DAG.getNode(AArch64ISD::MVNIshift, dl, MovTy,
6402  DAG.getConstant(CnstVal, dl, MVT::i32),
6403  DAG.getConstant(8, dl, MVT::i32));
6404  return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
6405  }
6406 
6407  if (AArch64_AM::isAdvSIMDModImmType3(CnstVal)) {
6408  CnstVal = AArch64_AM::encodeAdvSIMDModImmType3(CnstVal);
6409  MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
6410  SDValue Mov = DAG.getNode(AArch64ISD::MVNIshift, dl, MovTy,
6411  DAG.getConstant(CnstVal, dl, MVT::i32),
6412  DAG.getConstant(16, dl, MVT::i32));
6413  return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
6414  }
6415 
6416  if (AArch64_AM::isAdvSIMDModImmType4(CnstVal)) {
6417  CnstVal = AArch64_AM::encodeAdvSIMDModImmType4(CnstVal);
6418  MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
6419  SDValue Mov = DAG.getNode(AArch64ISD::MVNIshift, dl, MovTy,
6420  DAG.getConstant(CnstVal, dl, MVT::i32),
6421  DAG.getConstant(24, dl, MVT::i32));
6422  return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
6423  }
6424 
6425  if (AArch64_AM::isAdvSIMDModImmType5(CnstVal)) {
6426  CnstVal = AArch64_AM::encodeAdvSIMDModImmType5(CnstVal);
6427  MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v8i16 : MVT::v4i16;
6428  SDValue Mov = DAG.getNode(AArch64ISD::MVNIshift, dl, MovTy,
6429  DAG.getConstant(CnstVal, dl, MVT::i32),
6430  DAG.getConstant(0, dl, MVT::i32));
6431  return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
6432  }
6433 
6434  if (AArch64_AM::isAdvSIMDModImmType6(CnstVal)) {
6435  CnstVal = AArch64_AM::encodeAdvSIMDModImmType6(CnstVal);
6436  MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v8i16 : MVT::v4i16;
6437  SDValue Mov = DAG.getNode(AArch64ISD::MVNIshift, dl, MovTy,
6438  DAG.getConstant(CnstVal, dl, MVT::i32),
6439  DAG.getConstant(8, dl, MVT::i32));
6440  return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
6441  }
6442 
6443  if (AArch64_AM::isAdvSIMDModImmType7(CnstVal)) {
6444  CnstVal = AArch64_AM::encodeAdvSIMDModImmType7(CnstVal);
6445  MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
6446  SDValue Mov = DAG.getNode(AArch64ISD::MVNImsl, dl, MovTy,
6447  DAG.getConstant(CnstVal, dl, MVT::i32),
6448  DAG.getConstant(264, dl, MVT::i32));
6449  return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
6450  }
6451 
6452  if (AArch64_AM::isAdvSIMDModImmType8(CnstVal)) {
6453  CnstVal = AArch64_AM::encodeAdvSIMDModImmType8(CnstVal);
6454  MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
6455  SDValue Mov = DAG.getNode(AArch64ISD::MVNImsl, dl, MovTy,
6456  DAG.getConstant(CnstVal, dl, MVT::i32),
6457  DAG.getConstant(272, dl, MVT::i32));
6458  return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
6459  }
6460  }
6461 
6462  if (SecondTry)
6463  goto FailedModImm;
6464  SecondTry = true;
6465  CnstBits = UndefBits;
6466  goto AttemptModImm;
6467  }
6468 FailedModImm:
6469 
6470  // Scan through the operands to find some interesting properties we can
6471  // exploit:
6472  // 1) If only one value is used, we can use a DUP, or
6473  // 2) if only the low element is not undef, we can just insert that, or
6474  // 3) if only one constant value is used (w/ some non-constant lanes),
6475  // we can splat the constant value into the whole vector then fill
6476  // in the non-constant lanes.
6477  // 4) FIXME: If different constant values are used, but we can intelligently
6478  // select the values we'll be overwriting for the non-constant
6479  // lanes such that we can directly materialize the vector
6480  // some other way (MOVI, e.g.), we can be sneaky.
6481  unsigned NumElts = VT.getVectorNumElements();
6482  bool isOnlyLowElement = true;
6483  bool usesOnlyOneValue = true;
6484  bool usesOnlyOneConstantValue = true;
6485  bool isConstant = true;
6486  unsigned NumConstantLanes = 0;
6487  SDValue Value;
6488  SDValue ConstantValue;
6489  for (unsigned i = 0; i < NumElts; ++i) {
6490  SDValue V = Op.getOperand(i);
6491  if (V.isUndef())
6492  continue;
6493  if (i > 0)
6494  isOnlyLowElement = false;
6495  if (!isa<ConstantFPSDNode>(V) && !isa<ConstantSDNode>(V))
6496  isConstant = false;
6497 
6498  if (isa<ConstantSDNode>(V) || isa<ConstantFPSDNode>(V)) {
6499  ++NumConstantLanes;
6500  if (!ConstantValue.getNode())
6501  ConstantValue = V;
6502  else if (ConstantValue != V)
6503  usesOnlyOneConstantValue = false;
6504  }
6505 
6506  if (!Value.getNode())
6507  Value = V;
6508  else if (V != Value)
6509  usesOnlyOneValue = false;
6510  }
6511 
6512  if (!Value.getNode())
6513  return DAG.getUNDEF(VT);
6514 
6515  if (isOnlyLowElement)
6516  return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Value);
6517 
6518  // Use DUP for non-constant splats. For f32 constant splats, reduce to
6519  // i32 and try again.
6520  if (usesOnlyOneValue) {
6521  if (!isConstant) {
6522  if (Value.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
6523  Value.getValueType() != VT)
6524  return DAG.getNode(AArch64ISD::DUP, dl, VT, Value);
6525 
6526  // This is actually a DUPLANExx operation, which keeps everything vectory.
6527 
6528  // DUPLANE works on 128-bit vectors, widen it if necessary.
6529  SDValue Lane = Value.getOperand(1);
6530  Value = Value.getOperand(0);
6531  if (Value.getValueSizeInBits() == 64)
6532  Value = WidenVector(Value, DAG);
6533 
6534  unsigned Opcode = getDUPLANEOp(VT.getVectorElementType());
6535  return DAG.getNode(Opcode, dl, VT, Value, Lane);
6536  }
6537 
6540  EVT EltTy = VT.getVectorElementType();
6541  assert ((EltTy == MVT::f16 || EltTy == MVT::f32 || EltTy == MVT::f64) &&
6542  "Unsupported floating-point vector type");
6543  MVT NewType = MVT::getIntegerVT(EltTy.getSizeInBits());
6544  for (unsigned i = 0; i < NumElts; ++i)
6545  Ops.push_back(DAG.getNode(ISD::BITCAST, dl, NewType, Op.getOperand(i)));
6546  EVT VecVT = EVT::getVectorVT(*DAG.getContext(), NewType, NumElts);
6547  SDValue Val = DAG.getBuildVector(VecVT, dl, Ops);
6548  Val = LowerBUILD_VECTOR(Val, DAG);
6549  if (Val.getNode())
6550  return DAG.getNode(ISD::BITCAST, dl, VT, Val);
6551  }
6552  }
6553 
6554  // If there was only one constant value used and for more than one lane,
6555  // start by splatting that value, then replace the non-constant lanes. This
6556  // is better than the default, which will perform a separate initialization
6557  // for each lane.
6558  if (NumConstantLanes > 0 && usesOnlyOneConstantValue) {
6559  SDValue Val = DAG.getNode(AArch64ISD::DUP, dl, VT, ConstantValue);
6560  // Now insert the non-constant lanes.
6561  for (unsigned i = 0; i < NumElts; ++i) {
6562  SDValue V = Op.getOperand(i);
6563  SDValue LaneIdx = DAG.getConstant(i, dl, MVT::i64);
6564  if (!isa<ConstantSDNode>(V) && !isa<ConstantFPSDNode>(V)) {
6565  // Note that type legalization likely mucked about with the VT of the
6566  // source operand, so we may have to convert it here before inserting.
6567  Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Val, V, LaneIdx);
6568  }
6569  }
6570  return Val;
6571  }
6572 
6573  // If all elements are constants and the case above didn't get hit, fall back
6574  // to the default expansion, which will generate a load from the constant
6575  // pool.
6576  if (isConstant)
6577  return SDValue();
6578 
6579  // Empirical tests suggest this is rarely worth it for vectors of length <= 2.
6580  if (NumElts >= 4) {
6581  if (SDValue shuffle = ReconstructShuffle(Op, DAG))
6582  return shuffle;
6583  }
6584 
6585  // If all else fails, just use a sequence of INSERT_VECTOR_ELT when we
6586  // know the default expansion would otherwise fall back on something even
6587  // worse. For a vector with one or two non-undef values, that's
6588  // scalar_to_vector for the elements followed by a shuffle (provided the
6589  // shuffle is valid for the target) and materialization element by element
6590  // on the stack followed by a load for everything else.
6591  if (!isConstant && !usesOnlyOneValue) {
6592  SDValue Vec = DAG.getUNDEF(VT);
6593  SDValue Op0 = Op.getOperand(0);
6594  unsigned ElemSize = VT.getScalarSizeInBits();
6595  unsigned i = 0;
6596  // For 32 and 64 bit types, use INSERT_SUBREG for lane zero to
6597  // a) Avoid a RMW dependency on the full vector register, and
6598  // b) Allow the register coalescer to fold away the copy if the
6599  // value is already in an S or D register.
6600  // Do not do this for UNDEF/LOAD nodes because we have better patterns
6601  // for those avoiding the SCALAR_TO_VECTOR/BUILD_VECTOR.
6602  if (!Op0.isUndef() && Op0.getOpcode() != ISD::LOAD &&
6603  (ElemSize == 32 || ElemSize == 64)) {
6604  unsigned SubIdx = ElemSize == 32 ? AArch64::ssub : AArch64::dsub;
6605  MachineSDNode *N =
6606  DAG.getMachineNode(TargetOpcode::INSERT_SUBREG, dl, VT, Vec, Op0,
6607  DAG.getTargetConstant(SubIdx, dl, MVT::i32));
6608  Vec = SDValue(N, 0);
6609  ++i;
6610  }
6611  for (; i < NumElts; ++i) {
6612  SDValue V = Op.getOperand(i);
6613  if (V.isUndef())
6614  continue;
6615  SDValue LaneIdx = DAG.getConstant(i, dl, MVT::i64);
6616  Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Vec, V, LaneIdx);
6617  }
6618  return Vec;
6619  }
6620 
6621  // Just use the default expansion. We failed to find a better alternative.
6622  return SDValue();
6623 }
6624 
6625 SDValue AArch64TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
6626  SelectionDAG &DAG) const {
6627  assert(Op.getOpcode() == ISD::INSERT_VECTOR_ELT && "Unknown opcode!");
6628 
6629  // Check for non-constant or out of range lane.
6630  EVT VT = Op.getOperand(0).getValueType();
6632  if (!CI || CI->getZExtValue() >= VT.getVectorNumElements())
6633  return SDValue();
6634 
6635 
6636  // Insertion/extraction are legal for V128 types.
6637  if (VT == MVT::v16i8 || VT == MVT::v8i16 || VT == MVT::v4i32 ||
6638  VT == MVT::v2i64 || VT == MVT::v4f32 || VT == MVT::v2f64 ||
6639  VT == MVT::v8f16)
6640  return Op;
6641 
6642  if (VT != MVT::v8i8 && VT != MVT::v4i16 && VT != MVT::v2i32 &&
6643  VT != MVT::v1i64 && VT != MVT::v2f32 && VT != MVT::v4f16)
6644  return SDValue();
6645 
6646  // For V64 types, we perform insertion by expanding the value
6647  // to a V128 type and perform the insertion on that.
6648  SDLoc DL(Op);
6649  SDValue WideVec = WidenVector(Op.getOperand(0), DAG);
6650  EVT WideTy = WideVec.getValueType();
6651 
6652  SDValue Node = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, WideTy, WideVec,
6653  Op.getOperand(1), Op.getOperand(2));
6654  // Re-narrow the resultant vector.
6655  return NarrowVector(Node, DAG);
6656 }
6657 
6658 SDValue
6659 AArch64TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
6660  SelectionDAG &DAG) const {
6661  assert(Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT && "Unknown opcode!");
6662 
6663  // Check for non-constant or out of range lane.
6664  EVT VT = Op.getOperand(0).getValueType();
6666  if (!CI || CI->getZExtValue() >= VT.getVectorNumElements())
6667  return SDValue();
6668 
6669 
6670  // Insertion/extraction are legal for V128 types.
6671  if (VT == MVT::v16i8 || VT == MVT::v8i16 || VT == MVT::v4i32 ||
6672  VT == MVT::v2i64 || VT == MVT::v4f32 || VT == MVT::v2f64 ||
6673  VT == MVT::v8f16)
6674  return Op;
6675 
6676  if (VT != MVT::v8i8 && VT != MVT::v4i16 && VT != MVT::v2i32 &&
6677  VT != MVT::v1i64 && VT != MVT::v2f32 && VT != MVT::v4f16)
6678  return SDValue();
6679 
6680  // For V64 types, we perform extraction by expanding the value
6681  // to a V128 type and perform the extraction on that.
6682  SDLoc DL(Op);
6683  SDValue WideVec = WidenVector(Op.getOperand(0), DAG);
6684  EVT WideTy = WideVec.getValueType();
6685 
6686  EVT ExtrTy = WideTy.getVectorElementType();
6687  if (ExtrTy == MVT::i16 || ExtrTy == MVT::i8)
6688  ExtrTy = MVT::i32;
6689 
6690  // For extractions, we just return the result directly.
6691  return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ExtrTy, WideVec,
6692  Op.getOperand(1));
6693 }
6694 
6695 SDValue AArch64TargetLowering::LowerEXTRACT_SUBVECTOR(SDValue Op,
6696  SelectionDAG &DAG) const {
6697  EVT VT = Op.getOperand(0).getValueType();
6698  SDLoc dl(Op);
6699  // Just in case...
6700  if (!VT.isVector())
6701  return SDValue();
6702 
6704  if (!Cst)
6705  return SDValue();
6706  unsigned Val = Cst->getZExtValue();
6707 
6708  unsigned Size = Op.getValueSizeInBits();
6709 
6710  // This will get lowered to an appropriate EXTRACT_SUBREG in ISel.
6711  if (Val == 0)
6712  return Op;
6713 
6714  // If this is extracting the upper 64-bits of a 128-bit vector, we match
6715  // that directly.
6716  if (Size == 64 && Val * VT.getScalarSizeInBits() == 64)
6717  return Op;
6718 
6719  return SDValue();
6720 }
6721 
6723  EVT VT) const {
6724  if (VT.getVectorNumElements() == 4 &&
6725  (VT.is128BitVector() || VT.is64BitVector())) {
6726  unsigned PFIndexes[4];
6727  for (unsigned i = 0; i != 4; ++i) {
6728  if (M[i] < 0)
6729  PFIndexes[i] = 8;
6730  else
6731  PFIndexes[i] = M[i];
6732  }
6733 
6734  // Compute the index in the perfect shuffle table.
6735  unsigned PFTableIndex = PFIndexes[0] * 9 * 9 * 9 + PFIndexes[1] * 9 * 9 +
6736  PFIndexes[2] * 9 + PFIndexes[3];
6737  unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
6738  unsigned Cost = (PFEntry >> 30);
6739 
6740  if (Cost <= 4)
6741  return true;
6742  }
6743 
6744  bool DummyBool;
6745  int DummyInt;
6746  unsigned DummyUnsigned;
6747 
6748  return (ShuffleVectorSDNode::isSplatMask(&M[0], VT) || isREVMask(M, VT, 64) ||
6749  isREVMask(M, VT, 32) || isREVMask(M, VT, 16) ||
6750  isEXTMask(M, VT, DummyBool, DummyUnsigned) ||
6751  // isTBLMask(M, VT) || // FIXME: Port TBL support from ARM.
6752  isTRNMask(M, VT, DummyUnsigned) || isUZPMask(M, VT, DummyUnsigned) ||
6753  isZIPMask(M, VT, DummyUnsigned) ||
6754  isTRN_v_undef_Mask(M, VT, DummyUnsigned) ||
6755  isUZP_v_undef_Mask(M, VT, DummyUnsigned) ||
6756  isZIP_v_undef_Mask(M, VT, DummyUnsigned) ||
6757  isINSMask(M, VT.getVectorNumElements(), DummyBool, DummyInt) ||
6758  isConcatMask(M, VT, VT.getSizeInBits() == 128));
6759 }
6760 
6761 /// getVShiftImm - Check if this is a valid build_vector for the immediate
6762 /// operand of a vector shift operation, where all the elements of the
6763 /// build_vector must have the same constant integer value.
6764 static bool getVShiftImm(SDValue Op, unsigned ElementBits, int64_t &Cnt) {
6765  // Ignore bit_converts.
6766  while (Op.getOpcode() == ISD::BITCAST)
6767  Op = Op.getOperand(0);
6769  APInt SplatBits, SplatUndef;
6770  unsigned SplatBitSize;
6771  bool HasAnyUndefs;
6772  if (!BVN || !BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize,
6773  HasAnyUndefs, ElementBits) ||
6774  SplatBitSize > ElementBits)
6775  return false;
6776  Cnt = SplatBits.getSExtValue();
6777  return true;
6778 }
6779 
6780 /// isVShiftLImm - Check if this is a valid build_vector for the immediate
6781 /// operand of a vector shift left operation. That value must be in the range:
6782 /// 0 <= Value < ElementBits for a left shift; or
6783 /// 0 <= Value <= ElementBits for a long left shift.
6784 static bool isVShiftLImm(SDValue Op, EVT VT, bool isLong, int64_t &Cnt) {
6785  assert(VT.isVector() && "vector shift count is not a vector type");
6786  int64_t ElementBits = VT.getScalarSizeInBits();
6787  if (!getVShiftImm(Op, ElementBits, Cnt))
6788  return false;
6789  return (Cnt >= 0 && (isLong ? Cnt - 1 : Cnt) < ElementBits);
6790 }
6791 
6792 /// isVShiftRImm - Check if this is a valid build_vector for the immediate
6793 /// operand of a vector shift right operation. The value must be in the range:
6794 /// 1 <= Value <= ElementBits for a right shift; or
6795 static bool isVShiftRImm(SDValue Op, EVT VT, bool isNarrow, int64_t &Cnt) {
6796  assert(VT.isVector() && "vector shift count is not a vector type");
6797  int64_t ElementBits = VT.getScalarSizeInBits();
6798  if (!getVShiftImm(Op, ElementBits, Cnt))
6799  return false;
6800  return (Cnt >= 1 && Cnt <= (isNarrow ? ElementBits / 2 : ElementBits));
6801 }
6802 
6803 SDValue AArch64TargetLowering::LowerVectorSRA_SRL_SHL(SDValue Op,
6804  SelectionDAG &DAG) const {
6805  EVT VT = Op.getValueType();
6806  SDLoc DL(Op);
6807  int64_t Cnt;
6808 
6809  if (!Op.getOperand(1).getValueType().isVector())
6810  return Op;
6811  unsigned EltSize = VT.getScalarSizeInBits();
6812 
6813  switch (Op.getOpcode()) {
6814  default:
6815  llvm_unreachable("unexpected shift opcode");
6816 
6817  case ISD::SHL:
6818  if (isVShiftLImm(Op.getOperand(1), VT, false, Cnt) && Cnt < EltSize)
6819  return DAG.getNode(AArch64ISD::VSHL, DL, VT, Op.getOperand(0),
6820  DAG.getConstant(Cnt, DL, MVT::i32));
6821  return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
6822  DAG.getConstant(Intrinsic::aarch64_neon_ushl, DL,
6823  MVT::i32),
6824  Op.getOperand(0), Op.getOperand(1));
6825  case ISD::SRA:
6826  case ISD::SRL:
6827  // Right shift immediate
6828  if (isVShiftRImm(Op.getOperand(1), VT, false, Cnt) && Cnt < EltSize) {
6829  unsigned Opc =
6831  return DAG.getNode(Opc, DL, VT, Op.getOperand(0),
6832  DAG.getConstant(Cnt, DL, MVT::i32));
6833  }
6834 
6835  // Right shift register. Note, there is not a shift right register
6836  // instruction, but the shift left register instruction takes a signed
6837  // value, where negative numbers specify a right shift.
6838  unsigned Opc = (Op.getOpcode() == ISD::SRA) ? Intrinsic::aarch64_neon_sshl
6839  : Intrinsic::aarch64_neon_ushl;
6840  // negate the shift amount
6841  SDValue NegShift = DAG.getNode(AArch64ISD::NEG, DL, VT, Op.getOperand(1));
6842  SDValue NegShiftLeft =
6843  DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
6844  DAG.getConstant(Opc, DL, MVT::i32), Op.getOperand(0),
6845  NegShift);
6846  return NegShiftLeft;
6847  }
6848 
6849  return SDValue();
6850 }
6851 
6853  AArch64CC::CondCode CC, bool NoNans, EVT VT,
6854  const SDLoc &dl, SelectionDAG &DAG) {
6855  EVT SrcVT = LHS.getValueType();
6856  assert(VT.getSizeInBits() == SrcVT.getSizeInBits() &&
6857  "function only supposed to emit natural comparisons");
6858 
6860  APInt CnstBits(VT.getSizeInBits(), 0);
6861  APInt UndefBits(VT.getSizeInBits(), 0);
6862  bool IsCnst = BVN && resolveBuildVector(BVN, CnstBits, UndefBits);
6863  bool IsZero = IsCnst && (CnstBits == 0);
6864 
6865  if (SrcVT.getVectorElementType().isFloatingPoint()) {
6866  switch (CC) {
6867  default:
6868  return SDValue();
6869  case AArch64CC::NE: {
6870  SDValue Fcmeq;
6871  if (IsZero)
6872  Fcmeq = DAG.getNode(AArch64ISD::FCMEQz, dl, VT, LHS);
6873  else
6874  Fcmeq = DAG.getNode(AArch64ISD::FCMEQ, dl, VT, LHS, RHS);
6875  return DAG.getNode(AArch64ISD::NOT, dl, VT, Fcmeq);
6876  }
6877  case AArch64CC::EQ:
6878  if (IsZero)
6879  return DAG.getNode(AArch64ISD::FCMEQz, dl, VT, LHS);
6880  return DAG.getNode(AArch64ISD::FCMEQ, dl, VT, LHS, RHS);
6881  case AArch64CC::GE:
6882  if (IsZero)
6883  return DAG.getNode(AArch64ISD::FCMGEz, dl, VT, LHS);
6884  return DAG.getNode(AArch64ISD::FCMGE, dl, VT, LHS, RHS);
6885  case AArch64CC::GT:
6886  if (IsZero)
6887  return DAG.getNode(AArch64ISD::FCMGTz, dl, VT, LHS);
6888  return DAG.getNode(AArch64ISD::FCMGT, dl, VT, LHS, RHS);
6889  case AArch64CC::LS:
6890  if (IsZero)
6891  return DAG.getNode(AArch64ISD::FCMLEz, dl, VT, LHS);
6892  return DAG.getNode(AArch64ISD::FCMGE, dl, VT, RHS, LHS);
6893  case AArch64CC::LT:
6894  if (!NoNans)
6895  return SDValue();
6896  // If we ignore NaNs then we can use to the MI implementation.
6898  case AArch64CC::MI:
6899  if (IsZero)
6900  return DAG.getNode(AArch64ISD::FCMLTz, dl, VT, LHS);
6901  return DAG.getNode(AArch64ISD::FCMGT, dl, VT, RHS, LHS);
6902  }
6903  }
6904 
6905  switch (CC) {
6906  default:
6907  return SDValue();
6908  case AArch64CC::NE: {
6909  SDValue Cmeq;
6910  if (IsZero)
6911  Cmeq = DAG.getNode(AArch64ISD::CMEQz, dl, VT, LHS);
6912  else
6913  Cmeq = DAG.getNode(AArch64ISD::CMEQ, dl, VT, LHS, RHS);
6914  return DAG.getNode(AArch64ISD::NOT, dl, VT, Cmeq);
6915  }
6916  case AArch64CC::EQ:
6917  if (IsZero)
6918  return DAG.getNode(AArch64ISD::CMEQz, dl, VT, LHS);
6919  return DAG.getNode(AArch64ISD::CMEQ, dl, VT, LHS, RHS);
6920  case AArch64CC::GE:
6921  if (IsZero)
6922  return DAG.getNode(AArch64ISD::CMGEz, dl, VT, LHS);
6923  return DAG.getNode(AArch64ISD::CMGE, dl, VT, LHS, RHS);
6924  case AArch64CC::GT:
6925  if (IsZero)
6926  return DAG.getNode(AArch64ISD::CMGTz, dl, VT, LHS);
6927  return DAG.getNode(AArch64ISD::CMGT, dl, VT, LHS, RHS);
6928  case AArch64CC::LE:
6929  if (IsZero)
6930  return DAG.getNode(AArch64ISD::CMLEz, dl, VT, LHS);
6931  return DAG.getNode(AArch64ISD::CMGE, dl, VT, RHS, LHS);
6932  case AArch64CC::LS:
6933  return DAG.getNode(AArch64ISD::CMHS, dl, VT, RHS, LHS);
6934  case AArch64CC::LO:
6935  return DAG.getNode(AArch64ISD::CMHI, dl, VT, RHS, LHS);
6936  case AArch64CC::LT:
6937  if (IsZero)
6938  return DAG.getNode(AArch64ISD::CMLTz, dl, VT, LHS);
6939  return DAG.getNode(AArch64ISD::CMGT, dl, VT, RHS, LHS);
6940  case AArch64CC::HI:
6941  return DAG.getNode(AArch64ISD::CMHI, dl, VT, LHS, RHS);
6942  case AArch64CC::HS:
6943  return DAG.getNode(AArch64ISD::CMHS, dl, VT, LHS, RHS);
6944  }
6945 }
6946 
6947 SDValue AArch64TargetLowering::LowerVSETCC(SDValue Op,
6948  SelectionDAG &DAG) const {
6949  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
6950  SDValue LHS = Op.getOperand(0);
6951  SDValue RHS = Op.getOperand(1);
6953  SDLoc dl(Op);
6954 
6956  assert(LHS.getValueType() == RHS.getValueType());
6958  SDValue Cmp =
6959  EmitVectorComparison(LHS, RHS, AArch64CC, false, CmpVT, dl, DAG);
6960  return DAG.getSExtOrTrunc(Cmp, dl, Op.getValueType());
6961  }
6962 
6964  return SDValue();
6965 
6966  assert(LHS.getValueType().getVectorElementType() == MVT::f32 ||
6968 
6969  // Unfortunately, the mapping of LLVM FP CC's onto AArch64 CC's isn't totally
6970  // clean. Some of them require two branches to implement.
6971  AArch64CC::CondCode CC1, CC2;
6972  bool ShouldInvert;
6973  changeVectorFPCCToAArch64CC(CC, CC1, CC2, ShouldInvert);
6974 
6975  bool NoNaNs = getTargetMachine().Options.NoNaNsFPMath;
6976  SDValue Cmp =
6977  EmitVectorComparison(LHS, RHS, CC1, NoNaNs, CmpVT, dl, DAG);
6978  if (!Cmp.getNode())
6979  return SDValue();
6980 
6981  if (CC2 != AArch64CC::AL) {
6982  SDValue Cmp2 =
6983  EmitVectorComparison(LHS, RHS, CC2, NoNaNs, CmpVT, dl, DAG);
6984  if (!Cmp2.getNode())
6985  return SDValue();
6986 
6987  Cmp = DAG.getNode(ISD::OR, dl, CmpVT, Cmp, Cmp2);
6988  }
6989 
6990  Cmp = DAG.getSExtOrTrunc(Cmp, dl, Op.getValueType());
6991 
6992  if (ShouldInvert)
6993  return Cmp = DAG.getNOT(dl, Cmp, Cmp.getValueType());
6994 
6995  return Cmp;
6996 }
6997 
6998 /// getTgtMemIntrinsic - Represent NEON load and store intrinsics as
6999 /// MemIntrinsicNodes. The associated MachineMemOperands record the alignment
7000 /// specified in the intrinsic calls.
7002  const CallInst &I,
7003  unsigned Intrinsic) const {
7004  auto &DL = I.getModule()->getDataLayout();
7005  switch (Intrinsic) {
7006  case Intrinsic::aarch64_neon_ld2:
7007  case Intrinsic::aarch64_neon_ld3:
7008  case Intrinsic::aarch64_neon_ld4:
7009  case Intrinsic::aarch64_neon_ld1x2:
7010  case Intrinsic::aarch64_neon_ld1x3:
7011  case Intrinsic::aarch64_neon_ld1x4:
7012  case Intrinsic::aarch64_neon_ld2lane:
7013  case Intrinsic::aarch64_neon_ld3lane:
7014  case Intrinsic::aarch64_neon_ld4lane:
7015  case Intrinsic::aarch64_neon_ld2r:
7016  case Intrinsic::aarch64_neon_ld3r:
7017  case Intrinsic::aarch64_neon_ld4r: {
7018  Info.opc = ISD::INTRINSIC_W_CHAIN;
7019  // Conservatively set memVT to the entire set of vectors loaded.
7020  uint64_t NumElts = DL.getTypeSizeInBits(I.getType()) / 64;
7021  Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts);
7022  Info.ptrVal = I.getArgOperand(I.getNumArgOperands() - 1);
7023  Info.offset = 0;
7024  Info.align = 0;
7025  Info.vol = false; // volatile loads with NEON intrinsics not supported
7026  Info.readMem = true;
7027  Info.writeMem = false;
7028  return true;
7029  }
7030  case Intrinsic::aarch64_neon_st2:
7031  case Intrinsic::aarch64_neon_st3:
7032  case Intrinsic::aarch64_neon_st4:
7033  case Intrinsic::aarch64_neon_st1x2:
7034  case Intrinsic::aarch64_neon_st1x3:
7035  case Intrinsic::aarch64_neon_st1x4:
7036  case Intrinsic::aarch64_neon_st2lane:
7037  case Intrinsic::aarch64_neon_st3lane:
7038  case Intrinsic::aarch64_neon_st4lane: {
7039  Info.opc = ISD::INTRINSIC_VOID;
7040  // Conservatively set memVT to the entire set of vectors stored.
7041  unsigned NumElts = 0;
7042  for (unsigned ArgI = 1, ArgE = I.getNumArgOperands(); ArgI < ArgE; ++ArgI) {
7043  Type *ArgTy = I.getArgOperand(ArgI)->getType();
7044  if (!ArgTy->isVectorTy())
7045  break;
7046  NumElts += DL.getTypeSizeInBits(ArgTy) / 64;
7047  }
7048  Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts);
7049  Info.ptrVal = I.getArgOperand(I.getNumArgOperands() - 1);
7050  Info.offset = 0;
7051  Info.align = 0;
7052  Info.vol = false; // volatile stores with NEON intrinsics not supported
7053  Info.readMem = false;
7054  Info.writeMem = true;
7055  return true;
7056  }
7057  case Intrinsic::aarch64_ldaxr:
7058  case Intrinsic::aarch64_ldxr: {
7059  PointerType *PtrTy = cast<PointerType>(I.getArgOperand(0)->getType());
7060  Info.opc = ISD::INTRINSIC_W_CHAIN;
7061  Info.memVT = MVT::getVT(PtrTy->getElementType());
7062  Info.ptrVal = I.getArgOperand(0);
7063  Info.offset = 0;
7064  Info.align = DL.getABITypeAlignment(PtrTy->getElementType());
7065  Info.vol = true;
7066  Info.readMem = true;
7067  Info.writeMem = false;
7068  return true;
7069  }
7070  case Intrinsic::aarch64_stlxr:
7071  case Intrinsic::aarch64_stxr: {
7072  PointerType *PtrTy = cast<PointerType>(I.getArgOperand(1)->getType());
7073  Info.opc = ISD::INTRINSIC_W_CHAIN;
7074  Info.memVT = MVT::getVT(PtrTy->getElementType());
7075  Info.ptrVal = I.getArgOperand(1);
7076  Info.offset = 0;
7077  Info.align = DL.getABITypeAlignment(PtrTy->getElementType());
7078  Info.vol = true;
7079  Info.readMem = false;
7080  Info.writeMem = true;
7081  return true;
7082  }
7083  case Intrinsic::aarch64_ldaxp:
7084  case Intrinsic::aarch64_ldxp:
7085  Info.opc = ISD::INTRINSIC_W_CHAIN;
7086  Info.memVT = MVT::i128;
7087  Info.ptrVal = I.getArgOperand(0);
7088  Info.offset = 0;
7089  Info.align = 16;
7090  Info.vol = true;
7091  Info.readMem = true;
7092  Info.writeMem = false;
7093  return true;
7094  case Intrinsic::aarch64_stlxp:
7095  case Intrinsic::aarch64_stxp:
7096  Info.opc = ISD::INTRINSIC_W_CHAIN;
7097  Info.memVT = MVT::i128;
7098  Info.ptrVal = I.getArgOperand(2);
7099  Info.offset = 0;
7100  Info.align = 16;
7101  Info.vol = true;
7102  Info.readMem = false;
7103  Info.writeMem = true;
7104  return true;
7105  default:
7106  break;
7107  }
7108 
7109  return false;
7110 }
7111 
7112 // Truncations from 64-bit GPR to 32-bit GPR is free.
7114  if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy())
7115  return false;
7116  unsigned NumBits1 = Ty1->getPrimitiveSizeInBits();
7117  unsigned NumBits2 = Ty2->getPrimitiveSizeInBits();
7118  return NumBits1 > NumBits2;
7119 }
7121  if (VT1.isVector() || VT2.isVector() || !VT1.isInteger() || !VT2.isInteger())
7122  return false;
7123  unsigned NumBits1 = VT1.getSizeInBits();
7124  unsigned NumBits2 = VT2.getSizeInBits();
7125  return NumBits1 > NumBits2;
7126 }
7127 
7128 /// Check if it is profitable to hoist instruction in then/else to if.
7129 /// Not profitable if I and it's user can form a FMA instruction
7130 /// because we prefer FMSUB/FMADD.
7132  if (I->getOpcode() != Instruction::FMul)
7133  return true;
7134 
7135  if (I->getNumUses() != 1)
7136  return true;
7137 
7138  Instruction *User = I->user_back();
7139 
7140  if (User &&
7141  !(User->getOpcode() == Instruction::FSub ||
7142  User->getOpcode() == Instruction::FAdd))
7143  return true;
7144 
7145  const TargetOptions &Options = getTargetMachine().Options;
7146  const DataLayout &DL = I->getModule()->getDataLayout();
7147  EVT VT = getValueType(DL, User->getOperand(0)->getType());
7148 
7149  return !(isFMAFasterThanFMulAndFAdd(VT) &&
7151  (Options.AllowFPOpFusion == FPOpFusion::Fast ||
7152  Options.UnsafeFPMath));
7153 }
7154 
7155 // All 32-bit GPR operations implicitly zero the high-half of the corresponding
7156 // 64-bit GPR.
7158  if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy())
7159  return false;
7160  unsigned NumBits1 = Ty1->getPrimitiveSizeInBits();
7161  unsigned NumBits2 = Ty2->getPrimitiveSizeInBits();
7162  return NumBits1 == 32 && NumBits2 == 64;
7163 }
7165  if (VT1.isVector() || VT2.isVector() || !VT1.isInteger() || !VT2.isInteger())
7166  return false;
7167  unsigned NumBits1 = VT1.getSizeInBits();
7168  unsigned NumBits2 = VT2.getSizeInBits();
7169  return NumBits1 == 32 && NumBits2 == 64;
7170 }
7171 
7173  EVT VT1 = Val.getValueType();
7174  if (isZExtFree(VT1, VT2)) {
7175  return true;
7176  }
7177 
7178  if (Val.getOpcode() != ISD::LOAD)
7179  return false;
7180 
7181  // 8-, 16-, and 32-bit integer loads all implicitly zero-extend.
7182  return (VT1.isSimple() && !VT1.isVector() && VT1.isInteger() &&
7183  VT2.isSimple() && !VT2.isVector() && VT2.isInteger() &&
7184  VT1.getSizeInBits() <= 32);
7185 }
7186 
7187 bool AArch64TargetLowering::isExtFreeImpl(const Instruction *Ext) const {
7188  if (isa<FPExtInst>(Ext))
7189  return false;
7190 
7191  // Vector types are next free.
7192  if (Ext->getType()->isVectorTy())
7193  return false;
7194 
7195  for (const Use &U : Ext->uses()) {
7196  // The extension is free if we can fold it with a left shift in an
7197  // addressing mode or an arithmetic operation: add, sub, and cmp.
7198 
7199  // Is there a shift?
7200  const Instruction *Instr = cast<Instruction>(U.getUser());
7201 
7202  // Is this a constant shift?
7203  switch (Instr->getOpcode()) {
7204  case Instruction::Shl:
7205  if (!isa<ConstantInt>(Instr->getOperand(1)))
7206  return false;
7207  break;
7208  case Instruction::GetElementPtr: {
7209  gep_type_iterator GTI = gep_type_begin(Instr);
7210  auto &DL = Ext->getModule()->getDataLayout();
7211  std::advance(GTI, U.getOperandNo()-1);
7212  Type *IdxTy = GTI.getIndexedType();
7213  // This extension will end up with a shift because of the scaling factor.
7214  // 8-bit sized types have a scaling factor of 1, thus a shift amount of 0.
7215  // Get the shift amount based on the scaling factor:
7216  // log2(sizeof(IdxTy)) - log2(8).
7217  uint64_t ShiftAmt =
7218  countTrailingZeros(DL.getTypeStoreSizeInBits(IdxTy)) - 3;
7219  // Is the constant foldable in the shift of the addressing mode?
7220  // I.e., shift amount is between 1 and 4 inclusive.
7221  if (ShiftAmt == 0 || ShiftAmt > 4)
7222  return false;
7223  break;
7224  }
7225  case Instruction::Trunc:
7226  // Check if this is a noop.
7227  // trunc(sext ty1 to ty2) to ty1.
7228  if (Instr->getType() == Ext->getOperand(0)->getType())
7229  continue;
7231  default:
7232  return false;
7233  }
7234 
7235  // At this point we can use the bfm family, so this extension is free
7236  // for that use.
7237  }
7238  return true;
7239 }
7240 
7242  unsigned &RequiredAligment) const {
7243  if (!LoadedType.isSimple() ||
7244  (!LoadedType.isInteger() && !LoadedType.isFloatingPoint()))
7245  return false;
7246  // Cyclone supports unaligned accesses.
7247  RequiredAligment = 0;
7248  unsigned NumBits = LoadedType.getSizeInBits();
7249  return NumBits == 32 || NumBits == 64;
7250 }
7251 
7252 /// \brief Lower an interleaved load into a ldN intrinsic.
7253 ///
7254 /// E.g. Lower an interleaved load (Factor = 2):
7255 /// %wide.vec = load <8 x i32>, <8 x i32>* %ptr
7256 /// %v0 = shuffle %wide.vec, undef, <0, 2, 4, 6> ; Extract even elements
7257 /// %v1 = shuffle %wide.vec, undef, <1, 3, 5, 7> ; Extract odd elements
7258 ///
7259 /// Into:
7260 /// %ld2 = { <4 x i32>, <4 x i32> } call llvm.aarch64.neon.ld2(%ptr)
7261 /// %vec0 = extractelement { <4 x i32>, <4 x i32> } %ld2, i32 0
7262 /// %vec1 = extractelement { <4 x i32>, <4 x i32> } %ld2, i32 1
7265  ArrayRef<unsigned> Indices, unsigned Factor) const {
7266  assert(Factor >= 2 && Factor <= getMaxSupportedInterleaveFactor() &&
7267  "Invalid interleave factor");
7268  assert(!Shuffles.empty() && "Empty shufflevector input");
7269  assert(Shuffles.size() == Indices.size() &&
7270  "Unmatched number of shufflevectors and indices");
7271 
7272  const DataLayout &DL = LI->getModule()->getDataLayout();
7273 
7274  VectorType *VecTy = Shuffles[0]->getType();
7275  unsigned VecSize = DL.getTypeSizeInBits(VecTy);
7276 
7277  // Skip if we do not have NEON and skip illegal vector types.
7278  if (!Subtarget->hasNEON() || (VecSize != 64 && VecSize != 128))
7279  return false;
7280 
7281  // A pointer vector can not be the return type of the ldN intrinsics. Need to
7282  // load integer vectors first and then convert to pointer vectors.
7283  Type *EltTy = VecTy->getVectorElementType();
7284  if (EltTy->isPointerTy())
7285  VecTy =
7286  VectorType::get(DL.getIntPtrType(EltTy), VecTy->getVectorNumElements());
7287 
7288  Type *PtrTy = VecTy->getPointerTo(LI->getPointerAddressSpace());
7289  Type *Tys[2] = {VecTy, PtrTy};
7290  static const Intrinsic::ID LoadInts[3] = {Intrinsic::aarch64_neon_ld2,
7291  Intrinsic::aarch64_neon_ld3,
7292  Intrinsic::aarch64_neon_ld4};
7293  Function *LdNFunc =
7294  Intrinsic::getDeclaration(LI->getModule(), LoadInts[Factor - 2], Tys);
7295 
7296  IRBuilder<> Builder(LI);
7297  Value *Ptr = Builder.CreateBitCast(LI->getPointerOperand(), PtrTy);
7298 
7299  CallInst *LdN = Builder.CreateCall(LdNFunc, Ptr, "ldN");
7300 
7301  // Replace uses of each shufflevector with the corresponding vector loaded
7302  // by ldN.
7303  for (unsigned i = 0; i < Shuffles.size(); i++) {
7304  ShuffleVectorInst *SVI = Shuffles[i];
7305  unsigned Index = Indices[i];
7306 
7307  Value *SubVec = Builder.CreateExtractValue(LdN, Index);
7308 
7309  // Convert the integer vector to pointer vector if the element is pointer.
7310  if (EltTy->isPointerTy())
7311  SubVec = Builder.CreateIntToPtr(SubVec, SVI->getType());
7312 
7313  SVI->replaceAllUsesWith(SubVec);
7314  }
7315 
7316  return true;
7317 }
7318 
7319 /// \brief Get a mask consisting of sequential integers starting from \p Start.
7320 ///
7321 /// I.e. <Start, Start + 1, ..., Start + NumElts - 1>
7322 static Constant *getSequentialMask(IRBuilder<> &Builder, unsigned Start,
7323  unsigned NumElts) {
7325  for (unsigned i = 0; i < NumElts; i++)
7326  Mask.push_back(Builder.getInt32(Start + i));
7327 
7328  return ConstantVector::get(Mask);
7329 }
7330 
7331 /// \brief Lower an interleaved store into a stN intrinsic.
7332 ///
7333 /// E.g. Lower an interleaved store (Factor = 3):
7334 /// %i.vec = shuffle <8 x i32> %v0, <8 x i32> %v1,
7335 /// <0, 4, 8, 1, 5, 9, 2, 6, 10, 3, 7, 11>
7336 /// store <12 x i32> %i.vec, <12 x i32>* %ptr
7337 ///
7338 /// Into:
7339 /// %sub.v0 = shuffle <8 x i32> %v0, <8 x i32> v1, <0, 1, 2, 3>
7340 /// %sub.v1 = shuffle <8 x i32> %v0, <8 x i32> v1, <4, 5, 6, 7>
7341 /// %sub.v2 = shuffle <8 x i32> %v0, <8 x i32> v1, <8, 9, 10, 11>
7342 /// call void llvm.aarch64.neon.st3(%sub.v0, %sub.v1, %sub.v2, %ptr)
7343 ///
7344 /// Note that the new shufflevectors will be removed and we'll only generate one
7345 /// st3 instruction in CodeGen.
7346 ///
7347 /// Example for a more general valid mask (Factor 3). Lower:
7348 /// %i.vec = shuffle <32 x i32> %v0, <32 x i32> %v1,
7349 /// <4, 32, 16, 5, 33, 17, 6, 34, 18, 7, 35, 19>
7350 /// store <12 x i32> %i.vec, <12 x i32>* %ptr
7351 ///
7352 /// Into:
7353 /// %sub.v0 = shuffle <32 x i32> %v0, <32 x i32> v1, <4, 5, 6, 7>
7354 /// %sub.v1 = shuffle <32 x i32> %v0, <32 x i32> v1, <32, 33, 34, 35>
7355 /// %sub.v2 = shuffle <32 x i32> %v0, <32 x i32> v1, <16, 17, 18, 19>
7356 /// call void llvm.aarch64.neon.st3(%sub.v0, %sub.v1, %sub.v2, %ptr)
7358  ShuffleVectorInst *SVI,
7359  unsigned Factor) const {
7360  assert(Factor >= 2 && Factor <= getMaxSupportedInterleaveFactor() &&
7361  "Invalid interleave factor");
7362 
7363  VectorType *VecTy = SVI->getType();
7364  assert(VecTy->getVectorNumElements() % Factor == 0 &&
7365  "Invalid interleaved store");
7366 
7367  unsigned LaneLen = VecTy->getVectorNumElements() / Factor;
7368  Type *EltTy = VecTy->getVectorElementType();
7369  VectorType *SubVecTy = VectorType::get(EltTy, LaneLen);
7370 
7371  const DataLayout &DL = SI->getModule()->getDataLayout();
7372  unsigned SubVecSize = DL.getTypeSizeInBits(SubVecTy);
7373 
7374  // Skip if we do not have NEON and skip illegal vector types.
7375  if (!Subtarget->hasNEON() || (SubVecSize != 64 && SubVecSize != 128))
7376  return false;
7377 
7378  Value *Op0 = SVI->getOperand(0);
7379  Value *Op1 = SVI->getOperand(1);
7380  IRBuilder<> Builder(SI);
7381 
7382  // StN intrinsics don't support pointer vectors as arguments. Convert pointer
7383  // vectors to integer vectors.
7384  if (EltTy->isPointerTy()) {
7385  Type *IntTy = DL.getIntPtrType(EltTy);
7386  unsigned NumOpElts =
7387  dyn_cast<VectorType>(Op0->getType())->getVectorNumElements();
7388 
7389  // Convert to the corresponding integer vector.
7390  Type *IntVecTy = VectorType::get(IntTy, NumOpElts);
7391  Op0 = Builder.CreatePtrToInt(Op0, IntVecTy);
7392  Op1 = Builder.CreatePtrToInt(Op1, IntVecTy);
7393 
7394  SubVecTy = VectorType::get(IntTy, LaneLen);
7395  }
7396 
7397  Type *PtrTy = SubVecTy->getPointerTo(SI->getPointerAddressSpace());
7398  Type *Tys[2] = {SubVecTy, PtrTy};
7399  static const Intrinsic::ID StoreInts[3] = {Intrinsic::aarch64_neon_st2,
7400  Intrinsic::aarch64_neon_st3,
7401  Intrinsic::aarch64_neon_st4};
7402  Function *StNFunc =
7403  Intrinsic::getDeclaration(SI->getModule(), StoreInts[Factor - 2], Tys);
7404 
7406 
7407  // Split the shufflevector operands into sub vectors for the new stN call.
7408  auto Mask = SVI->getShuffleMask();
7409  for (unsigned i = 0; i < Factor; i++) {
7410  if (Mask[i] >= 0) {
7411  Ops.push_back(Builder.CreateShuffleVector(
7412  Op0, Op1, getSequentialMask(Builder, Mask[i], LaneLen)));
7413  } else {
7414  unsigned StartMask = 0;
7415  for (unsigned j = 1; j < LaneLen; j++) {
7416  if (Mask[j*Factor + i] >= 0) {
7417  StartMask = Mask[j*Factor + i] - j;
7418  break;
7419  }
7420  }
7421  // Note: If all elements in a chunk are undefs, StartMask=0!
7422  // Note: Filling undef gaps with random elements is ok, since
7423  // those elements were being written anyway (with undefs).
7424  // In the case of all undefs we're defaulting to using elems from 0
7425  // Note: StartMask cannot be negative, it's checked in isReInterleaveMask
7426  Ops.push_back(Builder.CreateShuffleVector(
7427  Op0, Op1, getSequentialMask(Builder, StartMask, LaneLen)));
7428  }
7429  }
7430 
7431  Ops.push_back(Builder.CreateBitCast(SI->getPointerOperand(), PtrTy));
7432  Builder.CreateCall(StNFunc, Ops);
7433  return true;
7434 }
7435 
7436 static bool memOpAlign(unsigned DstAlign, unsigned SrcAlign,
7437  unsigned AlignCheck) {
7438  return ((SrcAlign == 0 || SrcAlign % AlignCheck == 0) &&
7439  (DstAlign == 0 || DstAlign % AlignCheck == 0));
7440 }
7441 
7442 EVT AArch64TargetLowering::getOptimalMemOpType(uint64_t Size, unsigned DstAlign,
7443  unsigned SrcAlign, bool IsMemset,
7444  bool ZeroMemset,
7445  bool MemcpyStrSrc,
7446  MachineFunction &MF) const {
7447  // Don't use AdvSIMD to implement 16-byte memset. It would have taken one
7448  // instruction to materialize the v2i64 zero and one store (with restrictive
7449  // addressing mode). Just do two i64 store of zero-registers.
7450  bool Fast;
7451  const Function *F = MF.getFunction();
7452  if (Subtarget->hasFPARMv8() && !IsMemset && Size >= 16 &&
7453  !F->hasFnAttribute(Attribute::NoImplicitFloat) &&
7454  (memOpAlign(SrcAlign, DstAlign, 16) ||
7455  (allowsMisalignedMemoryAccesses(MVT::f128, 0, 1, &Fast) && Fast)))
7456  return MVT::f128;
7457 
7458  if (Size >= 8 &&
7459  (memOpAlign(SrcAlign, DstAlign, 8) ||
7460  (allowsMisalignedMemoryAccesses(MVT::i64, 0, 1, &Fast) && Fast)))
7461  return MVT::i64;
7462 
7463  if (Size >= 4 &&
7464  (memOpAlign(SrcAlign, DstAlign, 4) ||
7465  (allowsMisalignedMemoryAccesses(MVT::i32, 0, 1, &Fast) && Fast)))
7466  return MVT::i32;
7467 
7468  return MVT::Other;
7469 }
7470 
7471 // 12-bit optionally shifted immediates are legal for adds.
7473  // Avoid UB for INT64_MIN.
7474  if (Immed == std::numeric_limits<int64_t>::min())
7475  return false;
7476  // Same encoding for add/sub, just flip the sign.
7477  Immed = std::abs(Immed);
7478  return ((Immed >> 12) == 0 || ((Immed & 0xfff) == 0 && Immed >> 24 == 0));
7479 }
7480 
7481 // Integer comparisons are implemented with ADDS/SUBS, so the range of valid
7482 // immediates is the same as for an add or a sub.
7484  return isLegalAddImmediate(Immed);
7485 }
7486 
7487 /// isLegalAddressingMode - Return true if the addressing mode represented
7488 /// by AM is legal for this target, for a load/store of the specified type.
7490  const AddrMode &AM, Type *Ty,
7491  unsigned AS) const {
7492  // AArch64 has five basic addressing modes:
7493  // reg
7494  // reg + 9-bit signed offset
7495  // reg + SIZE_IN_BYTES * 12-bit unsigned offset
7496  // reg1 + reg2
7497  // reg + SIZE_IN_BYTES * reg
7498 
7499  // No global is ever allowed as a base.
7500  if (AM.BaseGV)
7501  return false;
7502 
7503  // No reg+reg+imm addressing.
7504  if (AM.HasBaseReg && AM.BaseOffs && AM.Scale)
7505  return false;
7506 
7507  // check reg + imm case:
7508  // i.e., reg + 0, reg + imm9, reg + SIZE_IN_BYTES * uimm12
7509  uint64_t NumBytes = 0;
7510  if (Ty->isSized()) {
7511  uint64_t NumBits = DL.getTypeSizeInBits(Ty);
7512  NumBytes = NumBits / 8;
7513  if (!isPowerOf2_64(NumBits))
7514  NumBytes = 0;
7515  }
7516 
7517  if (!AM.Scale) {
7518  int64_t Offset = AM.BaseOffs;
7519 
7520  // 9-bit signed offset
7521  if (isInt<9>(Offset))
7522  return true;
7523 
7524  // 12-bit unsigned offset
7525  unsigned shift = Log2_64(NumBytes);
7526  if (NumBytes && Offset > 0 && (Offset / NumBytes) <= (1LL << 12) - 1 &&
7527  // Must be a multiple of NumBytes (NumBytes is a power of 2)
7528  (Offset >> shift) << shift == Offset)
7529  return true;
7530  return false;
7531  }
7532 
7533  // Check reg1 + SIZE_IN_BYTES * reg2 and reg1 + reg2
7534 
7535  return AM.Scale == 1 || (AM.Scale > 0 && (uint64_t)AM.Scale == NumBytes);
7536 }
7537 
7539  const AddrMode &AM, Type *Ty,
7540  unsigned AS) const {
7541  // Scaling factors are not free at all.
7542  // Operands | Rt Latency
7543  // -------------------------------------------
7544  // Rt, [Xn, Xm] | 4
7545  // -------------------------------------------
7546  // Rt, [Xn, Xm, lsl #imm] | Rn: 4 Rm: 5
7547  // Rt, [Xn, Wm, <extend> #imm] |
7548  if (isLegalAddressingMode(DL, AM, Ty, AS))
7549  // Scale represents reg2 * scale, thus account for 1 if
7550  // it is not equal to 0 or 1.
7551  return AM.Scale != 0 && AM.Scale != 1;
7552  return -1;
7553 }
7554 
7556  VT = VT.getScalarType();
7557 
7558  if (!VT.isSimple())
7559  return false;
7560 
7561  switch (VT.getSimpleVT().SimpleTy) {
7562  case MVT::f32:
7563  case MVT::f64:
7564  return true;
7565  default:
7566  break;
7567  }
7568 
7569  return false;
7570 }
7571 
7572 const MCPhysReg *
7574  // LR is a callee-save register, but we must treat it as clobbered by any call
7575  // site. Hence we include LR in the scratch registers, which are in turn added
7576  // as implicit-defs for stackmaps and patchpoints.
7577  static const MCPhysReg ScratchRegs[] = {
7578  AArch64::X16, AArch64::X17, AArch64::LR, 0
7579  };
7580  return ScratchRegs;
7581 }
7582 
7583 bool
7585  EVT VT = N->getValueType(0);
7586  // If N is unsigned bit extraction: ((x >> C) & mask), then do not combine
7587  // it with shift to let it be lowered to UBFX.
7588  if (N->getOpcode() == ISD::AND && (VT == MVT::i32 || VT == MVT::i64) &&
7589  isa<ConstantSDNode>(N->getOperand(1))) {
7590  uint64_t TruncMask = N->getConstantOperandVal(1);
7591  if (isMask_64(TruncMask) &&
7592  N->getOperand(0).getOpcode() == ISD::SRL &&
7593  isa<ConstantSDNode>(N->getOperand(0)->getOperand(1)))
7594  return false;
7595  }
7596  return true;
7597 }
7598 
7600  Type *Ty) const {
7601  assert(Ty->isIntegerTy());
7602 
7603  unsigned BitSize = Ty->getPrimitiveSizeInBits();
7604  if (BitSize == 0)
7605  return false;
7606 
7607  int64_t Val = Imm.getSExtValue();
7608  if (Val == 0 || AArch64_AM::isLogicalImmediate(Val, BitSize))
7609  return true;
7610 
7611  if ((int64_t)Val < 0)
7612  Val = ~Val;
7613  if (BitSize == 32)
7614  Val &= (1LL << 32) - 1;
7615 
7616  unsigned LZ = countLeadingZeros((uint64_t)Val);
7617  unsigned Shift = (63 - LZ) / 16;
7618  // MOVZ is free so return true for one or fewer MOVK.
7619  return Shift < 3;
7620 }
7621 
7622 /// Turn vector tests of the signbit in the form of:
7623 /// xor (sra X, elt_size(X)-1), -1
7624 /// into:
7625 /// cmge X, X, #0
7627  const AArch64Subtarget *Subtarget) {
7628  EVT VT = N->getValueType(0);
7629  if (!Subtarget->hasNEON() || !VT.isVector())
7630  return SDValue();
7631 
7632  // There must be a shift right algebraic before the xor, and the xor must be a
7633  // 'not' operation.
7634  SDValue Shift = N->getOperand(0);
7635  SDValue Ones = N->getOperand(1);
7636  if (Shift.getOpcode() != AArch64ISD::VASHR || !Shift.hasOneUse() ||
7638  return SDValue();
7639 
7640  // The shift should be smearing the sign bit across each vector element.
7641  auto *ShiftAmt = dyn_cast<ConstantSDNode>(Shift.getOperand(1));
7642  EVT ShiftEltTy = Shift.getValueType().getVectorElementType();
7643  if (!ShiftAmt || ShiftAmt->getZExtValue() != ShiftEltTy.getSizeInBits() - 1)
7644  return SDValue();
7645 
7646  return DAG.getNode(AArch64ISD::CMGEz, SDLoc(N), VT, Shift.getOperand(0));
7647 }
7648 
7649 // Generate SUBS and CSEL for integer abs.
7651  EVT VT = N->getValueType(0);
7652 
7653  SDValue N0 = N->getOperand(0);
7654  SDValue N1 = N->getOperand(1);
7655  SDLoc DL(N);
7656 
7657  // Check pattern of XOR(ADD(X,Y), Y) where Y is SRA(X, size(X)-1)
7658  // and change it to SUB and CSEL.
7659  if (VT.isInteger() && N->getOpcode() == ISD::XOR &&
7660  N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1 &&
7661  N1.getOpcode() == ISD::SRA && N1.getOperand(0) == N0.getOperand(0))
7662  if (ConstantSDNode *Y1C = dyn_cast<ConstantSDNode>(N1.getOperand(1)))
7663  if (Y1C->getAPIntValue() == VT.getSizeInBits() - 1) {
7664  SDValue Neg = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
7665  N0.getOperand(0));
7666  // Generate SUBS & CSEL.
7667  SDValue Cmp =
7668  DAG.getNode(AArch64ISD::SUBS, DL, DAG.getVTList(VT, MVT::i32),
7669  N0.getOperand(0), DAG.getConstant(0, DL, VT));
7670  return DAG.getNode(AArch64ISD::CSEL, DL, VT, N0.getOperand(0), Neg,
7672  SDValue(Cmp.getNode(), 1));
7673  }
7674  return SDValue();
7675 }
7676 
7679  const AArch64Subtarget *Subtarget) {
7680  if (DCI.isBeforeLegalizeOps())
7681  return SDValue();
7682 
7683  if (SDValue Cmp = foldVectorXorShiftIntoCmp(N, DAG, Subtarget))
7684  return Cmp;
7685 
7686  return performIntegerAbsCombine(N, DAG);
7687 }
7688 
7689 SDValue
7690 AArch64TargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
7691  SelectionDAG &DAG,
7692  std::vector<SDNode *> *Created) const {
7694  if (isIntDivCheap(N->getValueType(0), Attr))
7695  return SDValue(N,0); // Lower SDIV as SDIV
7696 
7697  // fold (sdiv X, pow2)
7698  EVT VT = N->getValueType(0);
7699  if ((VT != MVT::i32 && VT != MVT::i64) ||
7700  !(Divisor.isPowerOf2() || (-Divisor).isPowerOf2()))
7701  return SDValue();
7702 
7703  SDLoc DL(N);
7704  SDValue N0 = N->getOperand(0);
7705  unsigned Lg2 = Divisor.countTrailingZeros();
7706  SDValue Zero = DAG.getConstant(0, DL, VT);
7707  SDValue Pow2MinusOne = DAG.getConstant((1ULL << Lg2) - 1, DL, VT);
7708 
7709  // Add (N0 < 0) ? Pow2 - 1 : 0;
7710  SDValue CCVal;
7711  SDValue Cmp = getAArch64Cmp(N0, Zero, ISD::SETLT, CCVal, DAG, DL);
7712  SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, Pow2MinusOne);
7713  SDValue CSel = DAG.getNode(AArch64ISD::CSEL, DL, VT, Add, N0, CCVal, Cmp);
7714 
7715  if (Created) {
7716  Created->push_back(Cmp.getNode());
7717  Created->push_back(Add.getNode());
7718  Created->push_back(CSel.getNode());
7719  }
7720 
7721  // Divide by pow2.
7722  SDValue SRA =
7723  DAG.getNode(ISD::SRA, DL, VT, CSel, DAG.getConstant(Lg2, DL, MVT::i64));
7724 
7725  // If we're dividing by a positive value, we're done. Otherwise, we must
7726  // negate the result.
7727  if (Divisor.isNonNegative())
7728  return SRA;
7729 
7730  if (Created)
7731  Created->push_back(SRA.getNode());
7732  return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), SRA);
7733 }
7734 
7737  const AArch64Subtarget *Subtarget) {
7738  if (DCI.isBeforeLegalizeOps())
7739  return SDValue();
7740 
7741  // The below optimizations require a constant RHS.
7742  if (!isa<ConstantSDNode>(N->getOperand(1)))
7743  return SDValue();
7744 
7745  ConstantSDNode *C = cast<ConstantSDNode>(N->getOperand(1));
7746  const APInt &ConstValue = C->getAPIntValue();
7747 
7748  // Multiplication of a power of two plus/minus one can be done more
7749  // cheaply as as shift+add/sub. For now, this is true unilaterally. If
7750  // future CPUs have a cheaper MADD instruction, this may need to be
7751  // gated on a subtarget feature. For Cyclone, 32-bit MADD is 4 cycles and
7752  // 64-bit is 5 cycles, so this is always a win.
7753  // More aggressively, some multiplications N0 * C can be lowered to
7754  // shift+add+shift if the constant C = A * B where A = 2^N + 1 and B = 2^M,
7755  // e.g. 6=3*2=(2+1)*2.
7756  // TODO: consider lowering more cases, e.g. C = 14, -6, -14 or even 45
7757  // which equals to (1+2)*16-(1+2).
7758  SDValue N0 = N->getOperand(0);
7759  // TrailingZeroes is used to test if the mul can be lowered to
7760  // shift+add+shift.
7761  unsigned TrailingZeroes = ConstValue.countTrailingZeros();
7762  if (TrailingZeroes) {
7763  // Conservatively do not lower to shift+add+shift if the mul might be
7764  // folded into smul or umul.
7765  if (N0->hasOneUse() && (isSignExtended(N0.getNode(), DAG) ||
7766  isZeroExtended(N0.getNode(), DAG)))
7767  return SDValue();
7768  // Conservatively do not lower to shift+add+shift if the mul might be
7769  // folded into madd or msub.
7770  if (N->hasOneUse() && (N->use_begin()->getOpcode() == ISD::ADD ||
7771  N->use_begin()->getOpcode() == ISD::SUB))
7772  return SDValue();
7773  }
7774  // Use ShiftedConstValue instead of ConstValue to support both shift+add/sub
7775  // and shift+add+shift.
7776  APInt ShiftedConstValue = ConstValue.ashr(TrailingZeroes);
7777 
7778  unsigned ShiftAmt, AddSubOpc;
7779  // Is the shifted value the LHS operand of the add/sub?
7780  bool ShiftValUseIsN0 = true;
7781  // Do we need to negate the result?
7782  bool NegateResult = false;
7783 
7784  if (ConstValue.isNonNegative()) {
7785  // (mul x, 2^N + 1) => (add (shl x, N), x)
7786  // (mul x, 2^N - 1) => (sub (shl x, N), x)
7787  // (mul x, (2^N + 1) * 2^M) => (shl (add (shl x, N), x), M)
7788  APInt SCVMinus1 = ShiftedConstValue - 1;
7789  APInt CVPlus1 = ConstValue + 1;
7790  if (SCVMinus1.isPowerOf2()) {
7791  ShiftAmt = SCVMinus1.logBase2();
7792  AddSubOpc = ISD::ADD;
7793  } else if (CVPlus1.isPowerOf2()) {
7794  ShiftAmt = CVPlus1.logBase2();
7795  AddSubOpc = ISD::SUB;
7796  } else
7797  return SDValue();
7798  } else {
7799  // (mul x, -(2^N - 1)) => (sub x, (shl x, N))
7800  // (mul x, -(2^N + 1)) => - (add (shl x, N), x)
7801  APInt CVNegPlus1 = -ConstValue + 1;
7802  APInt CVNegMinus1 = -ConstValue - 1;
7803  if (CVNegPlus1.isPowerOf2()) {
7804  ShiftAmt = CVNegPlus1.logBase2();
7805  AddSubOpc = ISD::SUB;
7806  ShiftValUseIsN0 = false;
7807  } else if (CVNegMinus1.isPowerOf2()) {
7808  ShiftAmt = CVNegMinus1.logBase2();
7809  AddSubOpc = ISD::ADD;
7810  NegateResult = true;
7811  } else
7812  return SDValue();
7813  }
7814 
7815  SDLoc DL(N);
7816  EVT VT = N->getValueType(0);
7817  SDValue ShiftedVal = DAG.getNode(ISD::SHL, DL, VT, N0,
7818  DAG.getConstant(ShiftAmt, DL, MVT::i64));
7819 
7820  SDValue AddSubN0 = ShiftValUseIsN0 ? ShiftedVal : N0;
7821  SDValue AddSubN1 = ShiftValUseIsN0 ? N0 : ShiftedVal;
7822  SDValue Res = DAG.getNode(AddSubOpc, DL, VT, AddSubN0, AddSubN1);
7823  assert(!(NegateResult && TrailingZeroes) &&
7824  "NegateResult and TrailingZeroes cannot both be true for now.");
7825  // Negate the result.
7826  if (NegateResult)
7827  return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Res);
7828  // Shift the result.
7829  if (TrailingZeroes)
7830  return DAG.getNode(ISD::SHL, DL, VT, Res,
7831  DAG.getConstant(TrailingZeroes, DL, MVT::i64));
7832  return Res;
7833 }
7834 
7836  SelectionDAG &DAG) {
7837  // Take advantage of vector comparisons producing 0 or -1 in each lane to
7838  // optimize away operation when it's from a constant.
7839  //
7840  // The general transformation is:
7841  // UNARYOP(AND(VECTOR_CMP(x,y), constant)) -->
7842  // AND(VECTOR_CMP(x,y), constant2)
7843  // constant2 = UNARYOP(constant)
7844 
7845  // Early exit if this isn't a vector operation, the operand of the
7846  // unary operation isn't a bitwise AND, or if the sizes of the operations
7847  // aren't the same.
7848  EVT VT = N->getValueType(0);
7849  if (!VT.isVector() || N->getOperand(0)->getOpcode() != ISD::AND ||
7850  N->getOperand(0)->getOperand(0)->getOpcode() != ISD::SETCC ||
7851  VT.getSizeInBits() != N->getOperand(0)->getValueType(0).getSizeInBits())
7852  return SDValue();
7853 
7854  // Now check that the other operand of the AND is a constant. We could
7855  // make the transformation for non-constant splats as well, but it's unclear
7856  // that would be a benefit as it would not eliminate any operations, just
7857  // perform one more step in scalar code before moving to the vector unit.
7858  if (BuildVectorSDNode *BV =
7859  dyn_cast<BuildVectorSDNode>(N->getOperand(0)->getOperand(1))) {
7860  // Bail out if the vector isn't a constant.
7861  if (!BV->isConstant())
7862  return SDValue();
7863 
7864  // Everything checks out. Build up the new and improved node.
7865  SDLoc DL(N);
7866  EVT IntVT = BV->getValueType(0);
7867  // Create a new constant of the appropriate type for the transformed
7868  // DAG.
7869  SDValue SourceConst = DAG.getNode(N->getOpcode(), DL, VT, SDValue(BV, 0));
7870  // The AND node needs bitcasts to/from an integer vector type around it.
7871  SDValue MaskConst = DAG.getNode(ISD::BITCAST, DL, IntVT, SourceConst);
7872  SDValue NewAnd = DAG.getNode(ISD::AND, DL, IntVT,
7873  N->getOperand(0)->getOperand(0), MaskConst);
7874  SDValue Res = DAG.getNode(ISD::BITCAST, DL, VT, NewAnd);
7875  return Res;
7876  }
7877 
7878  return SDValue();
7879 }
7880 
7882  const AArch64Subtarget *Subtarget) {
7883  // First try to optimize away the conversion when it's conditionally from
7884  // a constant. Vectors only.
7886  return Res;
7887 
7888  EVT VT = N->getValueType(0);
7889  if (VT != MVT::f32 && VT != MVT::f64)
7890  return SDValue();
7891 
7892  // Only optimize when the source and destination types have the same width.
7893  if (VT.getSizeInBits() != N->getOperand(0).getValueSizeInBits())
7894  return SDValue();
7895 
7896  // If the result of an integer load is only used by an integer-to-float
7897  // conversion, use a fp load instead and a AdvSIMD scalar {S|U}CVTF instead.
7898  // This eliminates an "integer-to-vector-move" UOP and improves throughput.
7899  SDValue N0 = N->getOperand(0);
7900  if (Subtarget->hasNEON() && ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
7901  // Do not change the width of a volatile load.
7902  !cast<LoadSDNode>(N0)->isVolatile()) {
7903  LoadSDNode *LN0 = cast<LoadSDNode>(N0);
7904  SDValue Load = DAG.getLoad(VT, SDLoc(N), LN0->getChain(), LN0->getBasePtr(),
7905  LN0->getPointerInfo(), LN0->getAlignment(),
7906  LN0->getMemOperand()->getFlags());
7907 
7908  // Make sure successors of the original load stay after it by updating them
7909  // to use the new Chain.
7910  DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), Load.getValue(1));
7911 
7912  unsigned Opcode =
7914  return DAG.getNode(Opcode, SDLoc(N), VT, Load);
7915  }
7916 
7917  return SDValue();
7918 }
7919 
7920 /// Fold a floating-point multiply by power of two into floating-point to
7921 /// fixed-point conversion.
7924  const AArch64Subtarget *Subtarget) {
7925  if (!Subtarget->hasNEON())
7926  return SDValue();
7927 
7928  SDValue Op = N->getOperand(0);
7929  if (!Op.getValueType().isVector() || !Op.getValueType().isSimple() ||
7930  Op.getOpcode() != ISD::FMUL)
7931  return SDValue();
7932 
7933  SDValue ConstVec = Op->getOperand(1);
7934  if (!isa<BuildVectorSDNode>(ConstVec))
7935  return SDValue();
7936 
7937  MVT FloatTy = Op.getSimpleValueType().getVectorElementType();
7938  uint32_t FloatBits = FloatTy.getSizeInBits();
7939  if (FloatBits != 32 && FloatBits != 64)
7940  return SDValue();
7941 
7943  uint32_t IntBits = IntTy.getSizeInBits();
7944  if (IntBits != 16 && IntBits != 32 && IntBits != 64)
7945  return SDValue();
7946 
7947  // Avoid conversions where iN is larger than the float (e.g., float -> i64).
7948  if (IntBits > FloatBits)
7949  return SDValue();
7950 
7951  BitVector UndefElements;
7952  BuildVectorSDNode *BV = cast<BuildVectorSDNode>(ConstVec);
7953  int32_t Bits = IntBits == 64 ? 64 : 32;
7954  int32_t C = BV->getConstantFPSplatPow2ToLog2Int(&UndefElements, Bits + 1);
7955  if (C == -1 || C == 0 || C > Bits)
7956  return SDValue();
7957 
7958  MVT ResTy;
7959  unsigned NumLanes = Op.getValueType().getVectorNumElements();
7960  switch (NumLanes) {
7961  default:
7962  return SDValue();
7963  case 2:
7964  ResTy = FloatBits == 32 ? MVT::v2i32 : MVT::v2i64;
7965  break;
7966  case 4:
7967  ResTy = FloatBits == 32 ? MVT::v4i32 : MVT::v4i64;
7968  break;
7969  }
7970 
7971  if (ResTy == MVT::v4i64 && DCI.isBeforeLegalizeOps())
7972  return SDValue();
7973 
7974  assert((ResTy != MVT::v4i64 || DCI.isBeforeLegalizeOps()) &&
7975  "Illegal vector type after legalization");
7976 
7977  SDLoc DL(N);
7978  bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT;
7979  unsigned IntrinsicOpcode = IsSigned ? Intrinsic::aarch64_neon_vcvtfp2fxs
7980  : Intrinsic::aarch64_neon_vcvtfp2fxu;
7981  SDValue FixConv =
7982  DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, ResTy,
7983  DAG.getConstant(IntrinsicOpcode, DL, MVT::i32),
7984  Op->getOperand(0), DAG.getConstant(C, DL, MVT::i32));
7985  // We can handle smaller integers by generating an extra trunc.
7986  if (IntBits < FloatBits)
7987  FixConv = DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), FixConv);
7988 
7989  return FixConv;
7990 }
7991 
7992 /// Fold a floating-point divide by power of two into fixed-point to
7993 /// floating-point conversion.
7996  const AArch64Subtarget *Subtarget) {
7997  if (!Subtarget->hasNEON())
7998  return SDValue();
7999 
8000  SDValue Op = N->getOperand(0);
8001  unsigned Opc = Op->getOpcode();
8002  if (!Op.getValueType().isVector() || !Op.getValueType().isSimple() ||
8003  !Op.getOperand(0).getValueType().isSimple() ||
8004  (Opc != ISD::SINT_TO_FP && Opc != ISD::UINT_TO_FP))
8005  return SDValue();
8006 
8007  SDValue ConstVec = N->getOperand(1);
8008  if (!isa<BuildVectorSDNode>(ConstVec))
8009  return SDValue();
8010 
8012  int32_t IntBits = IntTy.getSizeInBits();
8013  if (IntBits != 16 && IntBits != 32 && IntBits != 64)
8014  return SDValue();
8015 
8016  MVT FloatTy = N->getSimpleValueType(0).getVectorElementType();
8017  int32_t FloatBits = FloatTy.getSizeInBits();
8018  if (FloatBits != 32 && FloatBits != 64)
8019  return SDValue();
8020 
8021  // Avoid conversions where iN is larger than the float (e.g., i64 -> float).
8022  if (IntBits > FloatBits)
8023  return SDValue();
8024 
8025  BitVector UndefElements;
8026  BuildVectorSDNode *BV = cast<BuildVectorSDNode>(ConstVec);
8027  int32_t C = BV->getConstantFPSplatPow2ToLog2Int(&UndefElements, FloatBits + 1);
8028  if (C == -1 || C == 0 || C > FloatBits)
8029  return SDValue();
8030 
8031  MVT ResTy;
8032  unsigned NumLanes = Op.getValueType().getVectorNumElements();
8033  switch (NumLanes) {
8034  default:
8035  return SDValue();
8036  case 2:
8037  ResTy = FloatBits == 32 ? MVT::v2i32 : MVT::v2i64;
8038  break;
8039  case 4:
8040  ResTy = FloatBits == 32 ? MVT::v4i32 : MVT::v4i64;
8041  break;
8042  }
8043 
8044  if (ResTy == MVT::v4i64 && DCI.isBeforeLegalizeOps())
8045  return SDValue();
8046 
8047  SDLoc DL(N);
8048  SDValue ConvInput = Op.getOperand(0);
8049  bool IsSigned = Opc == ISD::SINT_TO_FP;
8050  if (IntBits < FloatBits)
8051  ConvInput = DAG.getNode(IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND, DL,
8052  ResTy, ConvInput);
8053 
8054  unsigned IntrinsicOpcode = IsSigned ? Intrinsic::aarch64_neon_vcvtfxs2fp
8055  : Intrinsic::aarch64_neon_vcvtfxu2fp;
8056  return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, Op.getValueType(),
8057  DAG.getConstant(IntrinsicOpcode, DL, MVT::i32), ConvInput,
8058  DAG.getConstant(C, DL, MVT::i32));
8059 }
8060 
8061 /// An EXTR instruction is made up of two shifts, ORed together. This helper
8062 /// searches for and classifies those shifts.
8063 static bool findEXTRHalf(SDValue N, SDValue &Src, uint32_t &ShiftAmount,
8064  bool &FromHi) {
8065  if (N.getOpcode() == ISD::SHL)
8066  FromHi = false;
8067  else if (N.getOpcode() == ISD::SRL)
8068  FromHi = true;
8069  else
8070  return false;
8071 
8072  if (!isa<ConstantSDNode>(N.getOperand(1)))
8073  return false;
8074 
8075  ShiftAmount = N->getConstantOperandVal(1);
8076  Src = N->getOperand(0);
8077  return true;
8078 }
8079 
8080 /// EXTR instruction extracts a contiguous chunk of bits from two existing
8081 /// registers viewed as a high/low pair. This function looks for the pattern:
8082 /// (or (shl VAL1, #N), (srl VAL2, #RegWidth-N)) and replaces it with an
8083 /// EXTR. Can't quite be done in TableGen because the two immediates aren't
8084 /// independent.
8087  SelectionDAG &DAG = DCI.DAG;
8088  SDLoc DL(N);
8089  EVT VT = N->getValueType(0);
8090 
8091  assert(N->getOpcode() == ISD::OR && "Unexpected root");
8092 
8093  if (VT != MVT::i32 && VT != MVT::i64)
8094  return SDValue();
8095 
8096  SDValue LHS;
8097  uint32_t ShiftLHS = 0;
8098  bool LHSFromHi = false;
8099  if (!findEXTRHalf(N->getOperand(0), LHS, ShiftLHS, LHSFromHi))
8100  return SDValue();
8101 
8102  SDValue RHS;
8103  uint32_t ShiftRHS = 0;
8104  bool RHSFromHi = false;
8105  if (!findEXTRHalf(N->getOperand(1), RHS, ShiftRHS, RHSFromHi))
8106  return SDValue();
8107 
8108  // If they're both trying to come from the high part of the register, they're
8109  // not really an EXTR.
8110  if (LHSFromHi == RHSFromHi)
8111  return SDValue();
8112 
8113  if (ShiftLHS + ShiftRHS != VT.getSizeInBits())
8114  return SDValue();
8115 
8116  if (LHSFromHi) {
8117  std::swap(LHS, RHS);
8118  std::swap(ShiftLHS, ShiftRHS);
8119  }
8120 
8121  return DAG.getNode(AArch64ISD::EXTR, DL, VT, LHS, RHS,
8122  DAG.getConstant(ShiftRHS, DL, MVT::i64));
8123 }
8124 
8127  EVT VT = N->getValueType(0);
8128  SelectionDAG &DAG = DCI.DAG;
8129  SDLoc DL(N);
8130 
8131  if (!VT.isVector())
8132  return SDValue();
8133 
8134  SDValue N0 = N->getOperand(0);
8135  if (N0.getOpcode() != ISD::AND)
8136  return SDValue();
8137 
8138  SDValue N1 = N->getOperand(1);
8139  if (N1.getOpcode() != ISD::AND)
8140  return SDValue();
8141 
8142  // We only have to look for constant vectors here since the general, variable
8143  // case can be handled in TableGen.
8144  unsigned Bits = VT.getScalarSizeInBits();
8145  uint64_t BitMask = Bits == 64 ? -1ULL : ((1ULL << Bits) - 1);
8146  for (int i = 1; i >= 0; --i)
8147  for (int j = 1; j >= 0; --j) {
8150  if (!BVN0 || !BVN1)
8151  continue;
8152 
8153  bool FoundMatch = true;
8154  for (unsigned k = 0; k < VT.getVectorNumElements(); ++k) {
8156  ConstantSDNode *CN1 = dyn_cast<ConstantSDNode>(BVN1->getOperand(k));
8157  if (!CN0 || !CN1 ||
8158  CN0->getZExtValue() != (BitMask & ~CN1->getZExtValue())) {
8159  FoundMatch = false;
8160  break;
8161  }
8162  }
8163 
8164  if (FoundMatch)
8165  return DAG.getNode(AArch64ISD::BSL, DL, VT, SDValue(BVN0, 0),
8166  N0->getOperand(1 - i), N1->getOperand(1 - j));
8167  }
8168 
8169  return SDValue();
8170 }
8171 
8173  const AArch64Subtarget *Subtarget) {
8174  // Attempt to form an EXTR from (or (shl VAL1, #N), (srl VAL2, #RegWidth-N))
8175  SelectionDAG &DAG = DCI.DAG;
8176  EVT VT = N->getValueType(0);
8177 
8178  if (!DAG.getTargetLoweringInfo().isTypeLegal(VT))
8179  return SDValue();
8180 
8181  if (SDValue Res = tryCombineToEXTR(N, DCI))
8182  return Res;
8183 
8184  if (SDValue Res = tryCombineToBSL(N, DCI))
8185  return Res;
8186 
8187  return SDValue();
8188 }
8189 
8192  SelectionDAG &DAG = DCI.DAG;
8193  EVT VT = N->getValueType(0);
8194  if (VT != MVT::i32 && VT != MVT::i64)
8195  return SDValue();
8196 
8197  // Canonicalize (srl (bswap i32 x), 16) to (rotr (bswap i32 x), 16), if the
8198  // high 16-bits of x are zero. Similarly, canonicalize (srl (bswap i64 x), 32)
8199  // to (rotr (bswap i64 x), 32), if the high 32-bits of x are zero.
8200  SDValue N0 = N->getOperand(0);
8201  if (N0.getOpcode() == ISD::BSWAP) {
8202  SDLoc DL(N);
8203  SDValue N1 = N->getOperand(1);
8204  SDValue N00 = N0.getOperand(0);
8205  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N1)) {
8206  uint64_t ShiftAmt = C->getZExtValue();
8207  if (VT == MVT::i32 && ShiftAmt == 16 &&
8208  DAG.MaskedValueIsZero(N00, APInt::getHighBitsSet(32, 16)))
8209  return DAG.getNode(ISD::ROTR, DL, VT, N0, N1);
8210  if (VT == MVT::i64 && ShiftAmt == 32 &&
8211  DAG.MaskedValueIsZero(N00, APInt::getHighBitsSet(64, 32)))
8212  return DAG.getNode(ISD::ROTR, DL, VT, N0, N1);
8213  }
8214  }
8215  return SDValue();
8216 }
8217 
8220  SelectionDAG &DAG) {
8221  // Wait 'til after everything is legalized to try this. That way we have
8222  // legal vector types and such.
8223  if (DCI.isBeforeLegalizeOps())
8224  return SDValue();
8225 
8226  // Remove extraneous bitcasts around an extract_subvector.
8227  // For example,
8228  // (v4i16 (bitconvert
8229  // (extract_subvector (v2i64 (bitconvert (v8i16 ...)), (i64 1)))))
8230  // becomes
8231  // (extract_subvector ((v8i16 ...), (i64 4)))
8232 
8233  // Only interested in 64-bit vectors as the ultimate result.
8234  EVT VT = N->getValueType(0);
8235  if (!VT.isVector())
8236  return SDValue();
8237  if (VT.getSimpleVT().getSizeInBits() != 64)
8238  return SDValue();
8239  // Is the operand an extract_subvector starting at the beginning or halfway
8240  // point of the vector? A low half may also come through as an
8241  // EXTRACT_SUBREG, so look for that, too.
8242  SDValue Op0 = N->getOperand(0);
8243  if (Op0->getOpcode() != ISD::EXTRACT_SUBVECTOR &&
8244  !(Op0->isMachineOpcode() &&
8245  Op0->getMachineOpcode() == AArch64::EXTRACT_SUBREG))
8246  return SDValue();
8247  uint64_t idx = cast<ConstantSDNode>(Op0->getOperand(1))->getZExtValue();
8248  if (Op0->getOpcode() == ISD::EXTRACT_SUBVECTOR) {
8249  if (Op0->getValueType(0).getVectorNumElements() != idx && idx != 0)
8250  return SDValue();
8251  } else if (Op0->getMachineOpcode() == AArch64::EXTRACT_SUBREG) {
8252  if (idx != AArch64::dsub)
8253  return SDValue();
8254  // The dsub reference is equivalent to a lane zero subvector reference.
8255  idx = 0;
8256  }
8257  // Look through the bitcast of the input to the extract.
8258  if (Op0->getOperand(0)->getOpcode() != ISD::BITCAST)
8259  return SDValue();
8260  SDValue Source = Op0->getOperand(0)->getOperand(0);
8261  // If the source type has twice the number of elements as our destination
8262  // type, we know this is an extract of the high or low half of the vector.
8263  EVT SVT = Source->getValueType(0);
8264  if (SVT.getVectorNumElements() != VT.getVectorNumElements() * 2)
8265  return SDValue();
8266 
8267  DEBUG(dbgs() << "aarch64-lower: bitcast extract_subvector simplification\n");
8268 
8269  // Create the simplified form to just extract the low or high half of the
8270  // vector directly rather than bothering with the bitcasts.
8271  SDLoc dl(N);
8272  unsigned NumElements = VT.getVectorNumElements();
8273  if (idx) {
8274  SDValue HalfIdx = DAG.getConstant(NumElements, dl, MVT::i64);
8275  return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, Source, HalfIdx);
8276  } else {
8277  SDValue SubReg = DAG.getTargetConstant(AArch64::dsub, dl, MVT::i32);
8278  return SDValue(DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, VT,
8279  Source, SubReg),
8280  0);
8281  }
8282 }
8283 
8286  SelectionDAG &DAG) {
8287  SDLoc dl(N);
8288  EVT VT = N->getValueType(0);
8289  SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
8290 
8291  // Optimize concat_vectors of truncated vectors, where the intermediate
8292  // type is illegal, to avoid said illegality, e.g.,
8293  // (v4i16 (concat_vectors (v2i16 (truncate (v2i64))),
8294  // (v2i16 (truncate (v2i64)))))
8295  // ->
8296  // (v4i16 (truncate (vector_shuffle (v4i32 (bitcast (v2i64))),
8297  // (v4i32 (bitcast (v2i64))),
8298  // <0, 2, 4, 6>)))
8299  // This isn't really target-specific, but ISD::TRUNCATE legality isn't keyed
8300  // on both input and result type, so we might generate worse code.
8301  // On AArch64 we know it's fine for v2i64->v4i16 and v4i32->v8i8.
8302  if (N->getNumOperands() == 2 &&
8303  N0->getOpcode() == ISD::TRUNCATE &&
8304  N1->getOpcode() == ISD::TRUNCATE) {
8305  SDValue N00 = N0->getOperand(0);
8306  SDValue N10 = N1->getOperand(0);
8307  EVT N00VT = N00.getValueType();
8308 
8309  if (N00VT == N10.getValueType() &&
8310  (N00VT == MVT::v2i64 || N00VT == MVT::v4i32) &&
8311  N00VT.getScalarSizeInBits() == 4 * VT.getScalarSizeInBits()) {
8312  MVT MidVT = (N00VT == MVT::v2i64 ? MVT::v4i32 : MVT::v8i16);
8314  for (size_t i = 0; i < Mask.size(); ++i)
8315  Mask[i] = i * 2;
8316  return DAG.getNode(ISD::TRUNCATE, dl, VT,
8317  DAG.getVectorShuffle(
8318  MidVT, dl,
8319  DAG.getNode(ISD::BITCAST, dl, MidVT, N00),
8320  DAG.getNode(ISD::BITCAST, dl, MidVT, N10), Mask));
8321  }
8322  }
8323 
8324  // Wait 'til after everything is legalized to try this. That way we have
8325  // legal vector types and such.
8326  if (DCI.isBeforeLegalizeOps())
8327  return SDValue();
8328 
8329  // If we see a (concat_vectors (v1x64 A), (v1x64 A)) it's really a vector
8330  // splat. The indexed instructions are going to be expecting a DUPLANE64, so
8331  // canonicalise to that.
8332  if (N0 == N1 && VT.getVectorNumElements() == 2) {
8333  assert(VT.getScalarSizeInBits() == 64);
8334  return DAG.getNode(AArch64ISD::DUPLANE64, dl, VT, WidenVector(N0, DAG),
8335  DAG.getConstant(0, dl, MVT::i64));
8336  }
8337 
8338  // Canonicalise concat_vectors so that the right-hand vector has as few
8339  // bit-casts as possible before its real operation. The primary matching
8340  // destination for these operations will be the narrowing "2" instructions,
8341  // which depend on the operation being performed on this right-hand vector.
8342  // For example,
8343  // (concat_vectors LHS, (v1i64 (bitconvert (v4i16 RHS))))
8344  // becomes
8345  // (bitconvert (concat_vectors (v4i16 (bitconvert LHS)), RHS))
8346 
8347  if (N1->getOpcode() != ISD::BITCAST)
8348  return SDValue();
8349  SDValue RHS = N1->getOperand(0);
8350  MVT RHSTy = RHS.getValueType().getSimpleVT();
8351  // If the RHS is not a vector, this is not the pattern we're looking for.
8352  if (!RHSTy.isVector())
8353  return SDValue();
8354 
8355  DEBUG(dbgs() << "aarch64-lower: concat_vectors bitcast simplification\n");
8356 
8357  MVT ConcatTy = MVT::getVectorVT(RHSTy.getVectorElementType(),
8358  RHSTy.getVectorNumElements() * 2);
8359  return DAG.getNode(ISD::BITCAST, dl, VT,
8360  DAG.getNode(ISD::CONCAT_VECTORS, dl, ConcatTy,
8361  DAG.getNode(ISD::BITCAST, dl, RHSTy, N0),
8362  RHS));
8363 }
8364 
8367  SelectionDAG &DAG) {
8368  // Wait 'til after everything is legalized to try this. That way we have
8369  // legal vector types and such.
8370  if (DCI.isBeforeLegalizeOps())
8371  return SDValue();
8372  // Transform a scalar conversion of a value from a lane extract into a
8373  // lane extract of a vector conversion. E.g., from foo1 to foo2:
8374  // double foo1(int64x2_t a) { return vcvtd_n_f64_s64(a[1], 9); }
8375  // double foo2(int64x2_t a) { return vcvtq_n_f64_s64(a, 9)[1]; }
8376  //
8377  // The second form interacts better with instruction selection and the
8378  // register allocator to avoid cross-class register copies that aren't
8379  // coalescable due to a lane reference.
8380 
8381  // Check the operand and see if it originates from a lane extract.
8382  SDValue Op1 = N->getOperand(1);
8383  if (Op1.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
8384  // Yep, no additional predication needed. Perform the transform.
8385  SDValue IID = N->getOperand(0);
8386  SDValue Shift = N->getOperand(2);
8387  SDValue Vec = Op1.getOperand(0);
8388  SDValue Lane = Op1.getOperand(1);
8389  EVT ResTy = N->getValueType(0);
8390  EVT VecResTy;
8391  SDLoc DL(N);
8392 
8393  // The vector width should be 128 bits by the time we get here, even
8394  // if it started as 64 bits (the extract_vector handling will have
8395  // done so).
8396  assert(Vec.getValueSizeInBits() == 128 &&
8397  "unexpected vector size on extract_vector_elt!");
8398  if (Vec.getValueType() == MVT::v4i32)
8399  VecResTy = MVT::v4f32;
8400  else if (Vec.getValueType() == MVT::v2i64)
8401  VecResTy = MVT::v2f64;
8402  else
8403  llvm_unreachable("unexpected vector type!");
8404 
8405  SDValue Convert =
8406  DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VecResTy, IID, Vec, Shift);
8407  return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ResTy, Convert, Lane);
8408  }
8409  return SDValue();
8410 }
8411 
8412 // AArch64 high-vector "long" operations are formed by performing the non-high
8413 // version on an extract_subvector of each operand which gets the high half:
8414 //
8415 // (longop2 LHS, RHS) == (longop (extract_high LHS), (extract_high RHS))
8416 //
8417 // However, there are cases which don't have an extract_high explicitly, but
8418 // have another operation that can be made compatible with one for free. For
8419 // example:
8420 //
8421 // (dupv64 scalar) --> (extract_high (dup128 scalar))
8422 //
8423 // This routine does the actual conversion of such DUPs, once outer routines
8424 // have determined that everything else is in order.
8425 // It also supports immediate DUP-like nodes (MOVI/MVNi), which we can fold
8426 // similarly here.
8428  switch (N.getOpcode()) {
8429  case AArch64ISD::DUP:
8430  case AArch64ISD::DUPLANE8:
8431  case AArch64ISD::DUPLANE16:
8432  case AArch64ISD::DUPLANE32:
8433  case AArch64ISD::DUPLANE64:
8434  case AArch64ISD::MOVI:
8435  case AArch64ISD::MOVIshift:
8436  case AArch64ISD::MOVIedit:
8437  case AArch64ISD::MOVImsl:
8438  case AArch64ISD::MVNIshift:
8439  case AArch64ISD::MVNImsl:
8440  break;
8441  default:
8442  // FMOV could be supported, but isn't very useful, as it would only occur
8443  // if you passed a bitcast' floating point immediate to an eligible long
8444  // integer op (addl, smull, ...).
8445  return SDValue();
8446  }
8447 
8448  MVT NarrowTy = N.getSimpleValueType();
8449  if (!NarrowTy.is64BitVector())
8450  return SDValue();
8451 
8452  MVT ElementTy = NarrowTy.getVectorElementType();
8453  unsigned NumElems = NarrowTy.getVectorNumElements();
8454  MVT NewVT = MVT::getVectorVT(ElementTy, NumElems * 2);
8455 
8456  SDLoc dl(N);
8457  return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, NarrowTy,
8458  DAG.getNode(N->getOpcode(), dl, NewVT, N->ops()),
8459  DAG.getConstant(NumElems, dl, MVT::i64));
8460 }
8461 
8463  if (N.getOpcode() == ISD::EXTRACT_SUBVECTOR)
8464  return true;
8465 
8466  return N.getOpcode() == ISD::BITCAST &&
8468 }
8469 
8470 /// \brief Helper structure to keep track of ISD::SET_CC operands.
8472  const SDValue *Opnd0;
8473  const SDValue *Opnd1;
8475 };
8476 
8477 /// \brief Helper structure to keep track of a SET_CC lowered into AArch64 code.
8479  const SDValue *Cmp;
8481 };
8482 
8483 /// \brief Helper structure to keep track of SetCC information.
8484 union SetCCInfo {
8487 };
8488 
8489 /// \brief Helper structure to be able to read SetCC information. If set to
8490 /// true, IsAArch64 field, Info is a AArch64SetCCInfo, otherwise Info is a
8491 /// GenericSetCCInfo.
8495 };
8496 
8497 /// \brief Check whether or not \p Op is a SET_CC operation, either a generic or
8498 /// an
8499 /// AArch64 lowered one.
8500 /// \p SetCCInfo is filled accordingly.
8501 /// \post SetCCInfo is meanginfull only when this function returns true.
8502 /// \return True when Op is a kind of SET_CC operation.
8504  // If this is a setcc, this is straight forward.
8505  if (Op.getOpcode() == ISD::SETCC) {
8506  SetCCInfo.Info.Generic.Opnd0 = &Op.getOperand(0);
8507  SetCCInfo.Info.Generic.Opnd1 = &Op.getOperand(1);
8508  SetCCInfo.Info.Generic.CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
8509  SetCCInfo.IsAArch64 = false;
8510  return true;
8511  }
8512  // Otherwise, check if this is a matching csel instruction.
8513  // In other words:
8514  // - csel 1, 0, cc
8515  // - csel 0, 1, !cc
8516  if (Op.getOpcode() != AArch64ISD::CSEL)
8517  return false;
8518  // Set the information about the operands.
8519  // TODO: we want the operands of the Cmp not the csel
8520  SetCCInfo.Info.AArch64.Cmp = &Op.getOperand(3);
8521  SetCCInfo.IsAArch64 = true;
8522  SetCCInfo.Info.AArch64.CC = static_cast<AArch64CC::CondCode>(
8523  cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue());
8524 
8525  // Check that the operands matches the constraints:
8526  // (1) Both operands must be constants.
8527  // (2) One must be 1 and the other must be 0.
8530 
8531  // Check (1).
8532  if (!TValue || !FValue)
8533  return false;
8534 
8535  // Check (2).
8536  if (!TValue->isOne()) {
8537  // Update the comparison when we are interested in !cc.
8538  std::swap(TValue, FValue);
8539  SetCCInfo.Info.AArch64.CC =
8541  }
8542  return TValue->isOne() && FValue->isNullValue();
8543 }
8544 
8545 // Returns true if Op is setcc or zext of setcc.
8546 static bool isSetCCOrZExtSetCC(const SDValue& Op, SetCCInfoAndKind &Info) {
8547  if (isSetCC(Op, Info))
8548  return true;
8549  return ((Op.getOpcode() == ISD::ZERO_EXTEND) &&
8550  isSetCC(Op->getOperand(0), Info));
8551 }
8552 
8553 // The folding we want to perform is:
8554 // (add x, [zext] (setcc cc ...) )
8555 // -->
8556 // (csel x, (add x, 1), !cc ...)
8557 //
8558 // The latter will get matched to a CSINC instruction.
8560  assert(Op && Op->getOpcode() == ISD::ADD && "Unexpected operation!");
8561  SDValue LHS = Op->getOperand(0);
8562  SDValue RHS = Op->getOperand(1);
8563  SetCCInfoAndKind InfoAndKind;
8564 
8565  // If neither operand is a SET_CC, give up.
8566  if (!isSetCCOrZExtSetCC(LHS, InfoAndKind)) {
8567  std::swap(LHS, RHS);
8568  if (!isSetCCOrZExtSetCC(LHS, InfoAndKind))
8569  return SDValue();
8570  }
8571 
8572  // FIXME: This could be generatized to work for FP comparisons.
8573  EVT CmpVT = InfoAndKind.IsAArch64
8574  ? InfoAndKind.Info.AArch64.Cmp->getOperand(0).getValueType()
8575  : InfoAndKind.Info.Generic.Opnd0->getValueType();
8576  if (CmpVT != MVT::i32 && CmpVT != MVT::i64)
8577  return SDValue();
8578 
8579  SDValue CCVal;
8580  SDValue Cmp;
8581  SDLoc dl(Op);
8582  if (InfoAndKind.IsAArch64) {
8583  CCVal = DAG.getConstant(
8584  AArch64CC::getInvertedCondCode(InfoAndKind.Info.AArch64.CC), dl,
8585  MVT::i32);
8586  Cmp = *InfoAndKind.Info.AArch64.Cmp;
8587  } else
8588  Cmp = getAArch64Cmp(*InfoAndKind.Info.Generic.Opnd0,
8589  *InfoAndKind.Info.Generic.Opnd1,
8590  ISD::getSetCCInverse(InfoAndKind.Info.Generic.CC, true),
8591  CCVal, DAG, dl);
8592 
8593  EVT VT = Op->getValueType(0);
8594  LHS = DAG.getNode(ISD::ADD, dl, VT, RHS, DAG.getConstant(1, dl, VT));
8595  return DAG.getNode(AArch64ISD::CSEL, dl, VT, RHS, LHS, CCVal, Cmp);
8596 }
8597 
8598 // The basic add/sub long vector instructions have variants with "2" on the end
8599 // which act on the high-half of their inputs. They are normally matched by
8600 // patterns like:
8601 //
8602 // (add (zeroext (extract_high LHS)),
8603 // (zeroext (extract_high RHS)))
8604 // -> uaddl2 vD, vN, vM
8605 //
8606 // However, if one of the extracts is something like a duplicate, this
8607 // instruction can still be used profitably. This function puts the DAG into a
8608 // more appropriate form for those patterns to trigger.
8611  SelectionDAG &DAG) {
8612  if (DCI.isBeforeLegalizeOps())
8613  return SDValue();
8614 
8615  MVT VT = N->getSimpleValueType(0);
8616  if (!VT.is128BitVector()) {
8617  if (N->getOpcode() == ISD::ADD)
8618  return performSetccAddFolding(N, DAG);
8619  return SDValue();
8620  }
8621 
8622  // Make sure both branches are extended in the same way.
8623  SDValue LHS = N->getOperand(0);
8624  SDValue RHS = N->getOperand(1);
8625  if ((LHS.getOpcode() != ISD::ZERO_EXTEND &&
8626  LHS.getOpcode() != ISD::SIGN_EXTEND) ||
8627  LHS.getOpcode() != RHS.getOpcode())
8628  return SDValue();
8629 
8630  unsigned ExtType = LHS.getOpcode();
8631 
8632  // It's not worth doing if at least one of the inputs isn't already an
8633  // extract, but we don't know which it'll be so we have to try both.
8635  RHS = tryExtendDUPToExtractHigh(RHS.getOperand(0), DAG);
8636  if (!RHS.getNode())
8637  return SDValue();
8638 
8639  RHS = DAG.getNode(ExtType, SDLoc(N), VT, RHS);
8640  } else if (isEssentiallyExtractSubvector(RHS.getOperand(0))) {
8641  LHS = tryExtendDUPToExtractHigh(LHS.getOperand(0), DAG);
8642  if (!LHS.getNode())
8643  return SDValue();
8644 
8645  LHS = DAG.getNode(ExtType, SDLoc(N), VT, LHS);
8646  }
8647 
8648  return DAG.getNode(N->getOpcode(), SDLoc(N), VT, LHS, RHS);
8649 }
8650 
8651 // Massage DAGs which we can use the high-half "long" operations on into
8652 // something isel will recognize better. E.g.
8653 //
8654 // (aarch64_neon_umull (extract_high vec) (dupv64 scalar)) -->
8655 // (aarch64_neon_umull (extract_high (v2i64 vec)))
8656 // (extract_high (v2i64 (dup128 scalar)))))
8657 //
8658 static SDValue tryCombineLongOpWithDup(unsigned IID, SDNode *N,
8660  SelectionDAG &DAG) {
8661  if (DCI.isBeforeLegalizeOps())
8662  return SDValue();
8663 
8664  SDValue LHS = N->getOperand(1);
8665  SDValue RHS = N->getOperand(2);
8666  assert(LHS.getValueType().is64BitVector() &&
8667  RHS.getValueType().is64BitVector() &&
8668  "unexpected shape for long operation");
8669 
8670  // Either node could be a DUP, but it's not worth doing both of them (you'd
8671  // just as well use the non-high version) so look for a corresponding extract
8672  // operation on the other "wing".
8673  if (isEssentiallyExtractSubvector(LHS)) {
8674  RHS = tryExtendDUPToExtractHigh(RHS, DAG);
8675  if (!RHS.getNode())
8676  return SDValue();
8677  } else if (isEssentiallyExtractSubvector(RHS)) {
8678  LHS = tryExtendDUPToExtractHigh(LHS, DAG);
8679  if (!LHS.getNode())
8680  return SDValue();
8681  }
8682 
8683  return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, SDLoc(N), N->getValueType(0),
8684  N->getOperand(0), LHS, RHS);
8685 }
8686 
8687 static SDValue tryCombineShiftImm(unsigned IID, SDNode *N, SelectionDAG &DAG) {
8688  MVT ElemTy = N->getSimpleValueType(0).getScalarType();
8689  unsigned ElemBits = ElemTy.getSizeInBits();
8690 
8691  int64_t ShiftAmount;
8692  if (BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(N->getOperand(2))) {
8693  APInt SplatValue, SplatUndef;
8694  unsigned SplatBitSize;
8695  bool HasAnyUndefs;
8696  if (!BVN->isConstantSplat(SplatValue, SplatUndef, SplatBitSize,
8697  HasAnyUndefs, ElemBits) ||
8698  SplatBitSize != ElemBits)
8699  return SDValue();
8700 
8701  ShiftAmount = SplatValue.getSExtValue();
8702  } else if (ConstantSDNode *CVN = dyn_cast<ConstantSDNode>(N->getOperand(2))) {
8703  ShiftAmount = CVN->getSExtValue();
8704  } else
8705  return SDValue();
8706 
8707  unsigned Opcode;
8708  bool IsRightShift;
8709  switch (IID) {
8710  default:
8711  llvm_unreachable("Unknown shift intrinsic");
8712  case Intrinsic::aarch64_neon_sqshl:
8713  Opcode = AArch64ISD::SQSHL_I;
8714  IsRightShift = false;
8715  break;
8716  case Intrinsic::aarch64_neon_uqshl:
8717  Opcode = AArch64ISD::UQSHL_I;
8718  IsRightShift = false;
8719  break;
8720  case Intrinsic::aarch64_neon_srshl:
8721  Opcode = AArch64ISD::SRSHR_I;
8722  IsRightShift = true;
8723  break;
8724  case Intrinsic::aarch64_neon_urshl:
8725  Opcode = AArch64ISD::URSHR_I;
8726  IsRightShift = true;
8727  break;
8728  case Intrinsic::aarch64_neon_sqshlu:
8729  Opcode = AArch64ISD::SQSHLU_I;
8730  IsRightShift = false;
8731  break;
8732  }
8733 
8734  if (IsRightShift && ShiftAmount <= -1 && ShiftAmount >= -(int)ElemBits) {
8735  SDLoc dl(N);
8736  return DAG.getNode(Opcode, dl, N->getValueType(0), N->getOperand(1),
8737  DAG.getConstant(-ShiftAmount, dl, MVT::i32));
8738  } else if (!IsRightShift && ShiftAmount >= 0 && ShiftAmount < ElemBits) {
8739  SDLoc dl(N);
8740  return DAG.getNode(Opcode, dl, N->getValueType(0), N->getOperand(1),
8741  DAG.getConstant(ShiftAmount, dl, MVT::i32));
8742  }
8743 
8744  return SDValue();
8745 }
8746 
8747 // The CRC32[BH] instructions ignore the high bits of their data operand. Since
8748 // the intrinsics must be legal and take an i32, this means there's almost
8749 // certainly going to be a zext in the DAG which we can eliminate.
8750 static SDValue tryCombineCRC32(unsigned Mask, SDNode *N, SelectionDAG &DAG) {
8751  SDValue AndN = N->getOperand(2);
8752  if (AndN.getOpcode() != ISD::AND)
8753  return SDValue();
8754 
8755  ConstantSDNode *CMask = dyn_cast<ConstantSDNode>(AndN.getOperand(1));
8756  if (!CMask || CMask->getZExtValue() != Mask)
8757  return SDValue();
8758 
8760  N->getOperand(0), N->getOperand(1), AndN.getOperand(0));
8761 }
8762 
8764  SelectionDAG &DAG) {
8765  SDLoc dl(N);
8766  return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, N->getValueType(0),
8767  DAG.getNode(Opc, dl,
8769  N->getOperand(1)),
8770  DAG.getConstant(0, dl, MVT::i64));
8771 }
8772 
8775  const AArch64Subtarget *Subtarget) {
8776  SelectionDAG &DAG = DCI.DAG;
8777  unsigned IID = getIntrinsicID(N);
8778  switch (IID) {
8779  default:
8780  break;
8781  case Intrinsic::aarch64_neon_vcvtfxs2fp:
8782  case Intrinsic::aarch64_neon_vcvtfxu2fp:
8783  return tryCombineFixedPointConvert(N, DCI, DAG);
8784  case Intrinsic::aarch64_neon_saddv:
8786  case Intrinsic::aarch64_neon_uaddv:
8788  case Intrinsic::aarch64_neon_sminv:
8790  case Intrinsic::aarch64_neon_uminv:
8792  case Intrinsic::aarch64_neon_smaxv:
8794  case Intrinsic::aarch64_neon_umaxv:
8796  case Intrinsic::aarch64_neon_fmax:
8797  return DAG.getNode(ISD::FMAXNAN, SDLoc(N), N->getValueType(0),
8798  N->getOperand(1), N->getOperand(2));
8799  case Intrinsic::aarch64_neon_fmin:
8800  return DAG.getNode(ISD::FMINNAN, SDLoc(N), N->getValueType(0),
8801  N->getOperand(1), N->getOperand(2));
8802  case Intrinsic::aarch64_neon_fmaxnm:
8803  return DAG.getNode(ISD::FMAXNUM, SDLoc(N), N->getValueType(0),
8804  N->getOperand(1), N->getOperand(2));
8805  case Intrinsic::aarch64_neon_fminnm:
8806  return DAG.getNode(ISD::FMINNUM, SDLoc(N), N->getValueType(0),
8807  N->getOperand(1), N->getOperand(2));
8808  case Intrinsic::aarch64_neon_smull:
8809  case Intrinsic::aarch64_neon_umull:
8810  case Intrinsic::aarch64_neon_pmull:
8811  case Intrinsic::aarch64_neon_sqdmull:
8812  return tryCombineLongOpWithDup(IID, N, DCI, DAG);
8813  case Intrinsic::aarch64_neon_sqshl:
8814  case Intrinsic::aarch64_neon_uqshl:
8815  case Intrinsic::aarch64_neon_sqshlu:
8816  case Intrinsic::aarch64_neon_srshl:
8817  case Intrinsic::aarch64_neon_urshl:
8818  return tryCombineShiftImm(IID, N, DAG);
8819  case Intrinsic::aarch64_crc32b:
8820  case Intrinsic::aarch64_crc32cb:
8821  return tryCombineCRC32(0xff, N, DAG);
8822  case Intrinsic::aarch64_crc32h:
8823  case Intrinsic::aarch64_crc32ch:
8824  return tryCombineCRC32(0xffff, N, DAG);
8825  }
8826  return SDValue();
8827 }
8828 
8831  SelectionDAG &DAG) {
8832  // If we see something like (zext (sabd (extract_high ...), (DUP ...))) then
8833  // we can convert that DUP into another extract_high (of a bigger DUP), which
8834  // helps the backend to decide that an sabdl2 would be useful, saving a real
8835  // extract_high operation.
8836  if (!DCI.isBeforeLegalizeOps() && N->getOpcode() == ISD::ZERO_EXTEND &&
8838  SDNode *ABDNode = N->getOperand(0).getNode();
8839  unsigned IID = getIntrinsicID(ABDNode);
8840  if (IID == Intrinsic::aarch64_neon_sabd ||
8841  IID == Intrinsic::aarch64_neon_uabd) {
8842  SDValue NewABD = tryCombineLongOpWithDup(IID, ABDNode, DCI, DAG);
8843  if (!NewABD.getNode())
8844  return SDValue();
8845 
8846  return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), N->getValueType(0),
8847  NewABD);
8848  }
8849  }
8850 
8851  // This is effectively a custom type legalization for AArch64.
8852  //
8853  // Type legalization will split an extend of a small, legal, type to a larger
8854  // illegal type by first splitting the destination type, often creating
8855  // illegal source types, which then get legalized in isel-confusing ways,
8856  // leading to really terrible codegen. E.g.,
8857  // %result = v8i32 sext v8i8 %value
8858  // becomes
8859  // %losrc = extract_subreg %value, ...
8860  // %hisrc = extract_subreg %value, ...
8861  // %lo = v4i32 sext v4i8 %losrc
8862  // %hi = v4i32 sext v4i8 %hisrc
8863  // Things go rapidly downhill from there.
8864  //
8865  // For AArch64, the [sz]ext vector instructions can only go up one element
8866  // size, so we can, e.g., extend from i8 to i16, but to go from i8 to i32
8867  // take two instructions.
8868  //
8869  // This implies that the most efficient way to do the extend from v8i8
8870  // to two v4i32 values is to first extend the v8i8 to v8i16, then do
8871  // the normal splitting to happen for the v8i16->v8i32.
8872 
8873  // This is pre-legalization to catch some cases where the default
8874  // type legalization will create ill-tempered code.
8875  if (!DCI.isBeforeLegalizeOps())
8876  return SDValue();
8877 
8878  // We're only interested in cleaning things up for non-legal vector types
8879  // here. If both the source and destination are legal, things will just
8880  // work naturally without any fiddling.
8881  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
8882  EVT ResVT = N->getValueType(0);
8883  if (!ResVT.isVector() || TLI.isTypeLegal(ResVT))
8884  return SDValue();
8885  // If the vector type isn't a simple VT, it's beyond the scope of what
8886  // we're worried about here. Let legalization do its thing and hope for
8887  // the best.
8888  SDValue Src = N->getOperand(0);
8889  EVT SrcVT = Src->getValueType(0);
8890  if (!ResVT.isSimple() || !SrcVT.isSimple())
8891  return SDValue();
8892 
8893  // If the source VT is a 64-bit vector, we can play games and get the
8894  // better results we want.
8895  if (SrcVT.getSizeInBits() != 64)
8896  return SDValue();
8897 
8898  unsigned SrcEltSize = SrcVT.getScalarSizeInBits();
8899  unsigned ElementCount = SrcVT.getVectorNumElements();
8900  SrcVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize * 2), ElementCount);
8901  SDLoc DL(N);
8902  Src = DAG.getNode(N->getOpcode(), DL, SrcVT, Src);
8903 
8904  // Now split the rest of the operation into two halves, each with a 64
8905  // bit source.
8906  EVT LoVT, HiVT;
8907  SDValue Lo, Hi;
8908  unsigned NumElements = ResVT.getVectorNumElements();
8909  assert(!(NumElements & 1) && "Splitting vector, but not in half!");
8910  LoVT = HiVT = EVT::getVectorVT(*DAG.getContext(),
8911  ResVT.getVectorElementType(), NumElements / 2);
8912 
8913  EVT InNVT = EVT::getVectorVT(*DAG.getContext(), SrcVT.getVectorElementType(),
8914  LoVT.getVectorNumElements());
8915  Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InNVT, Src,
8916  DAG.getConstant(0, DL, MVT::i64));
8917  Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InNVT, Src,
8918  DAG.getConstant(InNVT.getVectorNumElements(), DL, MVT::i64));
8919  Lo = DAG.getNode(N->getOpcode(), DL, LoVT, Lo);
8920  Hi = DAG.getNode(N->getOpcode(), DL, HiVT, Hi);
8921 
8922  // Now combine the parts back together so we still have a single result
8923  // like the combiner expects.
8924  return DAG.getNode(ISD::CONCAT_VECTORS, DL, ResVT, Lo, Hi);
8925 }
8926 
8928  SDValue SplatVal, unsigned NumVecElts) {
8929  unsigned OrigAlignment = St.getAlignment();
8930  unsigned EltOffset = SplatVal.getValueType().getSizeInBits() / 8;
8931 
8932  // Create scalar stores. This is at least as good as the code sequence for a
8933  // split unaligned store which is a dup.s, ext.b, and two stores.
8934  // Most of the time the three stores should be replaced by store pair
8935  // instructions (stp).
8936  SDLoc DL(&St);
8937  SDValue BasePtr = St.getBasePtr();
8938  const MachinePointerInfo &PtrInfo = St.getPointerInfo();
8939  SDValue NewST1 =
8940  DAG.getStore(St.getChain(), DL, SplatVal, BasePtr, PtrInfo,
8941  OrigAlignment, St.getMemOperand()->getFlags());
8942 
8943  unsigned Offset = EltOffset;
8944  while (--NumVecElts) {
8945  unsigned Alignment = MinAlign(OrigAlignment, Offset);
8946  SDValue OffsetPtr = DAG.getNode(ISD::ADD, DL, MVT::i64, BasePtr,
8947  DAG.getConstant(Offset, DL, MVT::i64));
8948  NewST1 = DAG.getStore(NewST1.getValue(0), DL, SplatVal, OffsetPtr,
8949  PtrInfo.getWithOffset(Offset), Alignment,
8950  St.getMemOperand()->getFlags());
8951  Offset += EltOffset;
8952  }
8953  return NewST1;
8954 }
8955 
8956 /// Replace a splat of zeros to a vector store by scalar stores of WZR/XZR. The
8957 /// load store optimizer pass will merge them to store pair stores. This should
8958 /// be better than a movi to create the vector zero followed by a vector store
8959 /// if the zero constant is not re-used, since one instructions and one register
8960 /// live range will be removed.
8961 ///
8962 /// For example, the final generated code should be:
8963 ///
8964 /// stp xzr, xzr, [x0]
8965 ///
8966 /// instead of:
8967 ///
8968 /// movi v0.2d, #0
8969 /// str q0, [x0]
8970 ///
8972  SDValue StVal = St.getValue();
8973  EVT VT = StVal.getValueType();
8974 
8975  // It is beneficial to scalarize a zero splat store for 2 or 3 i64 elements or
8976  // 2, 3 or 4 i32 elements.
8977  int NumVecElts = VT.getVectorNumElements();
8978  if (!(((NumVecElts == 2 || NumVecElts == 3) &&
8979  VT.getVectorElementType().getSizeInBits() == 64) ||
8980  ((NumVecElts == 2 || NumVecElts == 3 || NumVecElts == 4) &&
8981  VT.getVectorElementType().getSizeInBits() == 32)))
8982  return SDValue();
8983 
8984  if (StVal.getOpcode() != ISD::BUILD_VECTOR)
8985  return SDValue();
8986 
8987  // If the zero constant has more than one use then the vector store could be
8988  // better since the constant mov will be amortized and stp q instructions
8989  // should be able to be formed.
8990  if (!StVal.hasOneUse())
8991  return SDValue();
8992 
8993  // If the immediate offset of the address operand is too large for the stp
8994  // instruction, then bail out.
8995  if (DAG.isBaseWithConstantOffset(St.getBasePtr())) {
8996  int64_t Offset = St.getBasePtr()->getConstantOperandVal(1);
8997  if (Offset < -512 || Offset > 504)
8998  return SDValue();
8999  }
9000 
9001  for (int I = 0; I < NumVecElts; ++I) {
9002  SDValue EltVal = StVal.getOperand(I);
9003  if (!isNullConstant(EltVal) && !isNullFPConstant(EltVal))
9004  return SDValue();
9005  }
9006 
9007  // Use WZR/XZR here to prevent DAGCombiner::MergeConsecutiveStores from
9008  // undoing this transformation.
9009  SDValue SplatVal = VT.getVectorElementType().getSizeInBits() == 32
9010  ? DAG.getRegister(AArch64::WZR, MVT::i32)
9011  : DAG.getRegister(AArch64::XZR, MVT::i64);
9012  return splitStoreSplat(DAG, St, SplatVal, NumVecElts);
9013 }
9014 
9015 /// Replace a splat of a scalar to a vector store by scalar stores of the scalar
9016 /// value. The load store optimizer pass will merge them to store pair stores.
9017 /// This has better performance than a splat of the scalar followed by a split
9018 /// vector store. Even if the stores are not merged it is four stores vs a dup,
9019 /// followed by an ext.b and two stores.
9021  SDValue StVal = St.getValue();
9022  EVT VT = StVal.getValueType();
9023 
9024  // Don't replace floating point stores, they possibly won't be transformed to
9025  // stp because of the store pair suppress pass.
9026  if (VT.isFloatingPoint())
9027  return SDValue();
9028 
9029  // We can express a splat as store pair(s) for 2 or 4 elements.
9030  unsigned NumVecElts = VT.getVectorNumElements();
9031  if (NumVecElts != 4 && NumVecElts != 2)
9032  return SDValue();
9033 
9034  // Check that this is a splat.
9035  // Make sure that each of the relevant vector element locations are inserted
9036  // to, i.e. 0 and 1 for v2i64 and 0, 1, 2, 3 for v4i32.
9037  std::bitset<4> IndexNotInserted((1 << NumVecElts) - 1);
9038  SDValue SplatVal;
9039  for (unsigned I = 0; I < NumVecElts; ++I) {
9040  // Check for insert vector elements.
9041  if (StVal.getOpcode() != ISD::INSERT_VECTOR_ELT)
9042  return SDValue();
9043 
9044  // Check that same value is inserted at each vector element.
9045  if (I == 0)
9046  SplatVal = StVal.getOperand(1);
9047  else if (StVal.getOperand(1) != SplatVal)
9048  return SDValue();
9049 
9050  // Check insert element index.
9051  ConstantSDNode *CIndex = dyn_cast<ConstantSDNode>(StVal.getOperand(2));
9052  if (!CIndex)
9053  return SDValue();
9054  uint64_t IndexVal = CIndex->getZExtValue();
9055  if (IndexVal >= NumVecElts)
9056  return SDValue();
9057  IndexNotInserted.reset(IndexVal);
9058 
9059  StVal = StVal.getOperand(0);
9060  }
9061  // Check that all vector element locations were inserted to.
9062  if (IndexNotInserted.any())
9063  return SDValue();
9064 
9065  return splitStoreSplat(DAG, St, SplatVal, NumVecElts);
9066 }
9067 
9069  SelectionDAG &DAG,
9070  const AArch64Subtarget *Subtarget) {
9071  if (!DCI.isBeforeLegalize())
9072  return SDValue();
9073 
9074  StoreSDNode *S = cast<StoreSDNode>(N);
9075  if (S->isVolatile())
9076  return SDValue();
9077 
9078  SDValue StVal = S->getValue();
9079  EVT VT = StVal.getValueType();
9080  if (!VT.isVector())
9081  return SDValue();
9082 
9083  // If we get a splat of zeros, convert this vector store to a store of
9084  // scalars. They will be merged into store pairs of xzr thereby removing one
9085  // instruction and one register.
9086  if (SDValue ReplacedZeroSplat = replaceZeroVectorStore(DAG, *S))
9087  return ReplacedZeroSplat;
9088 
9089  // FIXME: The logic for deciding if an unaligned store should be split should
9090  // be included in TLI.allowsMisalignedMemoryAccesses(), and there should be
9091  // a call to that function here.
9092 
9093  if (!Subtarget->isMisaligned128StoreSlow())
9094  return SDValue();
9095 
9096  // Don't split at -Oz.
9098  return SDValue();
9099 
9100  // Don't split v2i64 vectors. Memcpy lowering produces those and splitting
9101  // those up regresses performance on micro-benchmarks and olden/bh.
9102  if (VT.getVectorNumElements() < 2 || VT == MVT::v2i64)
9103  return SDValue();
9104 
9105  // Split unaligned 16B stores. They are terrible for performance.
9106  // Don't split stores with alignment of 1 or 2. Code that uses clang vector
9107  // extensions can use this to mark that it does not want splitting to happen
9108  // (by underspecifying alignment to be 1 or 2). Furthermore, the chance of
9109  // eliminating alignment hazards is only 1 in 8 for alignment of 2.
9110  if (VT.getSizeInBits() != 128 || S->getAlignment() >= 16 ||
9111  S->getAlignment() <= 2)
9112  return SDValue();
9113 
9114  // If we get a splat of a scalar convert this vector store to a store of
9115  // scalars. They will be merged into store pairs thereby removing two
9116  // instructions.
9117  if (SDValue ReplacedSplat = replaceSplatVectorStore(DAG, *S))
9118  return ReplacedSplat;
9119 
9120  SDLoc DL(S);
9121  unsigned NumElts = VT.getVectorNumElements() / 2;
9122  // Split VT into two.
9123  EVT HalfVT =
9124  EVT::getVectorVT(*DAG.getContext(), VT.getVectorElementType(), NumElts);
9125  SDValue SubVector0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, StVal,
9126  DAG.getConstant(0, DL, MVT::i64));
9127  SDValue SubVector1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, StVal,
9128  DAG.getConstant(NumElts, DL, MVT::i64));
9129  SDValue BasePtr = S->getBasePtr();
9130  SDValue NewST1 =
9131  DAG.getStore(S->getChain(), DL, SubVector0, BasePtr, S->getPointerInfo(),
9132  S->getAlignment(), S->getMemOperand()->getFlags());
9133  SDValue OffsetPtr = DAG.getNode(ISD::ADD, DL, MVT::i64, BasePtr,
9134  DAG.getConstant(8, DL, MVT::i64));
9135  return DAG.getStore(NewST1.getValue(0), DL, SubVector1, OffsetPtr,
9136  S->getPointerInfo(), S->getAlignment(),
9137  S->getMemOperand()->getFlags());
9138 }
9139 
9140 /// Target-specific DAG combine function for post-increment LD1 (lane) and
9141 /// post-increment LD1R.
9144  bool IsLaneOp) {
9145  if (DCI.isBeforeLegalizeOps())
9146  return SDValue();
9147 
9148  SelectionDAG &DAG = DCI.DAG;
9149  EVT VT = N->getValueType(0);
9150 
9151  unsigned LoadIdx = IsLaneOp ? 1 : 0;
9152  SDNode *LD = N->getOperand(LoadIdx).getNode();
9153  // If it is not LOAD, can not do such combine.
9154  if (LD->getOpcode() != ISD::LOAD)
9155  return SDValue();
9156 
9157  LoadSDNode *LoadSDN = cast<LoadSDNode>(LD);
9158  EVT MemVT = LoadSDN->getMemoryVT();
9159  // Check if memory operand is the same type as the vector element.
9160  if (MemVT != VT.getVectorElementType())
9161  return SDValue();
9162 
9163  // Check if there are other uses. If so, do not combine as it will introduce
9164  // an extra load.
9165  for (SDNode::use_iterator UI = LD->use_begin(), UE = LD->use_end(); UI != UE;
9166  ++UI) {
9167  if (UI.getUse().getResNo() == 1) // Ignore uses of the chain result.
9168  continue;
9169  if (*UI != N)
9170  return SDValue();
9171  }
9172 
9173  SDValue Addr = LD->getOperand(1);
9174  SDValue Vector = N->getOperand(0);
9175  // Search for a use of the address operand that is an increment.
9176  for (SDNode::use_iterator UI = Addr.getNode()->use_begin(), UE =
9177  Addr.getNode()->use_end(); UI != UE; ++UI) {
9178  SDNode *User = *UI;
9179  if (User->getOpcode() != ISD::ADD
9180  || UI.getUse().getResNo() != Addr.getResNo())
9181  continue;
9182 
9183  // Check that the add is independent of the load. Otherwise, folding it
9184  // would create a cycle.
9185  if (User->isPredecessorOf(LD) || LD->isPredecessorOf(User))
9186  continue;
9187  // Also check that add is not used in the vector operand. This would also
9188  // create a cycle.
9189  if (User->isPredecessorOf(Vector.getNode()))
9190  continue;
9191 
9192  // If the increment is a constant, it must match the memory ref size.
9193  SDValue Inc = User->getOperand(User->getOperand(0) == Addr ? 1 : 0);
9194  if (ConstantSDNode *CInc = dyn_cast<ConstantSDNode>(Inc.getNode())) {
9195  uint32_t IncVal = CInc->getZExtValue();
9196  unsigned NumBytes = VT.getScalarSizeInBits() / 8;
9197  if (IncVal != NumBytes)
9198  continue;
9199  Inc = DAG.getRegister(AArch64::XZR, MVT::i64);
9200  }
9201 
9202  // Finally, check that the vector doesn't depend on the load.
9203  // Again, this would create a cycle.
9204  // The load depending on the vector is fine, as that's the case for the
9205  // LD1*post we'll eventually generate anyway.
9206  if (LoadSDN->isPredecessorOf(Vector.getNode()))
9207  continue;
9208 
9210  Ops.push_back(LD->getOperand(0)); // Chain
9211  if (IsLaneOp) {
9212  Ops.push_back(Vector); // The vector to be inserted
9213  Ops.push_back(N->getOperand(2)); // The lane to be inserted in the vector
9214  }
9215  Ops.push_back(Addr);
9216  Ops.push_back(Inc);
9217 
9218  EVT Tys[3] = { VT, MVT::i64, MVT::Other };
9219  SDVTList SDTys = DAG.getVTList(Tys);
9220  unsigned NewOp = IsLaneOp ? AArch64ISD::LD1LANEpost : AArch64ISD::LD1DUPpost;
9221  SDValue UpdN = DAG.getMemIntrinsicNode(NewOp, SDLoc(N), SDTys, Ops,
9222  MemVT,
9223  LoadSDN->getMemOperand());
9224 
9225  // Update the uses.
9226  SDValue NewResults[] = {
9227  SDValue(LD, 0), // The result of load
9228  SDValue(UpdN.getNode(), 2) // Chain
9229  };
9230  DCI.CombineTo(LD, NewResults);
9231  DCI.CombineTo(N, SDValue(UpdN.getNode(), 0)); // Dup/Inserted Result
9232  DCI.CombineTo(User, SDValue(UpdN.getNode(), 1)); // Write back register
9233 
9234  break;
9235  }
9236  return SDValue();
9237 }
9238 
9239 /// Simplify \Addr given that the top byte of it is ignored by HW during
9240 /// address translation.
9243  SelectionDAG &DAG) {
9244  APInt DemandedMask = APInt::getLowBitsSet(64, 56);
9245  APInt KnownZero, KnownOne;
9247  DCI.isBeforeLegalizeOps());
9248  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
9249  if (TLI.SimplifyDemandedBits(Addr, DemandedMask, KnownZero, KnownOne, TLO)) {
9250  DCI.CommitTargetLoweringOpt(TLO);
9251  return true;
9252  }
9253  return false;
9254 }
9255 
9258  SelectionDAG &DAG,
9259  const AArch64Subtarget *Subtarget) {
9260  if (SDValue Split = splitStores(N, DCI, DAG, Subtarget))
9261  return Split;
9262 
9263  if (Subtarget->supportsAddressTopByteIgnored() &&
9264  performTBISimplification(N->getOperand(2), DCI, DAG))
9265  return SDValue(N, 0);
9266 
9267  return SDValue();
9268 }
9269 
9270  /// This function handles the log2-shuffle pattern produced by the
9271 /// LoopVectorizer for the across vector reduction. It consists of
9272 /// log2(NumVectorElements) steps and, in each step, 2^(s) elements
9273 /// are reduced, where s is an induction variable from 0 to
9274 /// log2(NumVectorElements).
9276  unsigned Op,
9277  SelectionDAG &DAG) {
9278  EVT VTy = OpV->getOperand(0).getValueType();
9279  if (!VTy.isVector())
9280  return SDValue();
9281 
9282  int NumVecElts = VTy.getVectorNumElements();
9283  if (Op == ISD::FMAXNUM || Op == ISD::FMINNUM) {
9284  if (NumVecElts != 4)
9285  return SDValue();
9286  } else {
9287  if (NumVecElts != 4 && NumVecElts != 8 && NumVecElts != 16)
9288  return SDValue();
9289  }
9290 
9291  int NumExpectedSteps = APInt(8, NumVecElts).logBase2();
9292  SDValue PreOp = OpV;
9293  // Iterate over each step of the across vector reduction.
9294  for (int CurStep = 0; CurStep != NumExpectedSteps; ++CurStep) {
9295  SDValue CurOp = PreOp.getOperand(0);
9296  SDValue Shuffle = PreOp.getOperand(1);
9297  if (Shuffle.getOpcode() != ISD::VECTOR_SHUFFLE) {
9298  // Try to swap the 1st and 2nd operand as add and min/max instructions
9299  // are commutative.
9300  CurOp = PreOp.getOperand(1);
9301  Shuffle = PreOp.getOperand(0);
9302  if (Shuffle.getOpcode() != ISD::VECTOR_SHUFFLE)
9303  return SDValue();
9304  }
9305 
9306  // Check if the input vector is fed by the operator we want to handle,
9307  // except the last step; the very first input vector is not necessarily
9308  // the same operator we are handling.
9309  if (CurOp.getOpcode() != Op && (CurStep != (NumExpectedSteps - 1)))
9310  return SDValue();
9311 
9312  // Check if it forms one step of the across vector reduction.
9313  // E.g.,
9314  // %cur = add %1, %0
9315  // %shuffle = vector_shuffle %cur, <2, 3, u, u>
9316  // %pre = add %cur, %shuffle
9317  if (Shuffle.getOperand(0) != CurOp)
9318  return SDValue();
9319 
9320  int NumMaskElts = 1 << CurStep;
9321  ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(Shuffle)->getMask();
9322  // Check mask values in each step.
9323  // We expect the shuffle mask in each step follows a specific pattern
9324  // denoted here by the <M, U> form, where M is a sequence of integers
9325  // starting from NumMaskElts, increasing by 1, and the number integers
9326  // in M should be NumMaskElts. U is a sequence of UNDEFs and the number
9327  // of undef in U should be NumVecElts - NumMaskElts.
9328  // E.g., for <8 x i16>, mask values in each step should be :
9329  // step 0 : <1,u,u,u,u,u,u,u>
9330  // step 1 : <2,3,u,u,u,u,u,u>
9331  // step 2 : <4,5,6,7,u,u,u,u>
9332  for (int i = 0; i < NumVecElts; ++i)
9333  if ((i < NumMaskElts && Mask[i] != (NumMaskElts + i)) ||
9334  (i >= NumMaskElts && !(Mask[i] < 0)))
9335  return SDValue();
9336 
9337  PreOp = CurOp;
9338  }
9339  unsigned Opcode;
9340  bool IsIntrinsic = false;
9341 
9342  switch (Op) {
9343  default:
9344  llvm_unreachable("Unexpected operator for across vector reduction");
9345  case ISD::ADD:
9346  Opcode = AArch64ISD::UADDV;
9347  break;
9348  case ISD::SMAX:
9349  Opcode = AArch64ISD::SMAXV;
9350  break;
9351  case ISD::UMAX:
9352  Opcode = AArch64ISD::UMAXV;
9353  break;
9354  case ISD::SMIN:
9355  Opcode = AArch64ISD::SMINV;
9356  break;
9357  case ISD::UMIN:
9358  Opcode = AArch64ISD::UMINV;
9359  break;
9360  case ISD::FMAXNUM:
9361  Opcode = Intrinsic::aarch64_neon_fmaxnmv;
9362  IsIntrinsic = true;
9363  break;
9364  case ISD::FMINNUM:
9365  Opcode = Intrinsic::aarch64_neon_fminnmv;
9366  IsIntrinsic = true;
9367  break;
9368  }
9369  SDLoc DL(N);
9370 
9371  return IsIntrinsic
9372  ? DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, N->getValueType(0),
9373  DAG.getConstant(Opcode, DL, MVT::i32), PreOp)
9374  : DAG.getNode(
9376  DAG.getNode(Opcode, DL, PreOp.getSimpleValueType(), PreOp),
9377  DAG.getConstant(0, DL, MVT::i64));
9378 }
9379 
9380 /// Target-specific DAG combine for the across vector min/max reductions.
9381 /// This function specifically handles the final clean-up step of the vector
9382 /// min/max reductions produced by the LoopVectorizer. It is the log2-shuffle
9383 /// pattern, which narrows down and finds the final min/max value from all
9384 /// elements of the vector.
9385 /// For example, for a <16 x i8> vector :
9386 /// svn0 = vector_shuffle %0, undef<8,9,10,11,12,13,14,15,u,u,u,u,u,u,u,u>
9387 /// %smax0 = smax %arr, svn0
9388 /// %svn1 = vector_shuffle %smax0, undef<4,5,6,7,u,u,u,u,u,u,u,u,u,u,u,u>
9389 /// %smax1 = smax %smax0, %svn1
9390 /// %svn2 = vector_shuffle %smax1, undef<2,3,u,u,u,u,u,u,u,u,u,u,u,u,u,u>
9391 /// %smax2 = smax %smax1, svn2
9392 /// %svn3 = vector_shuffle %smax2, undef<1,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u>
9393 /// %sc = setcc %smax2, %svn3, gt
9394 /// %n0 = extract_vector_elt %sc, #0
9395 /// %n1 = extract_vector_elt %smax2, #0
9396 /// %n2 = extract_vector_elt $smax2, #1
9397 /// %result = select %n0, %n1, n2
9398 /// becomes :
9399 /// %1 = smaxv %0
9400 /// %result = extract_vector_elt %1, 0
9401 static SDValue
9403  const AArch64Subtarget *Subtarget) {
9404  if (!Subtarget->hasNEON())
9405  return SDValue();
9406 
9407  SDValue N0 = N->getOperand(0);
9408  SDValue IfTrue = N->getOperand(1);
9409  SDValue IfFalse = N->getOperand(2);
9410 
9411  // Check if the SELECT merges up the final result of the min/max
9412  // from a vector.
9413  if (N0.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
9414  IfTrue.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
9415  IfFalse.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
9416  return SDValue();
9417 
9418  // Expect N0 is fed by SETCC.
9419  SDValue SetCC = N0.getOperand(0);
9420  EVT SetCCVT = SetCC.getValueType();
9421  if (SetCC.getOpcode() != ISD::SETCC || !SetCCVT.isVector() ||
9422  SetCCVT.getVectorElementType() != MVT::i1)
9423  return SDValue();
9424 
9425  SDValue VectorOp = SetCC.getOperand(0);
9426  unsigned Op = VectorOp->getOpcode();
9427  // Check if the input vector is fed by the operator we want to handle.
9428  if (Op != ISD::SMAX && Op != ISD::UMAX && Op != ISD::SMIN &&
9429  Op != ISD::UMIN && Op != ISD::FMAXNUM && Op != ISD::FMINNUM)
9430  return SDValue();
9431 
9432  EVT VTy = VectorOp.getValueType();
9433  if (!VTy.isVector())
9434  return SDValue();
9435 
9436  if (VTy.getSizeInBits() < 64)
9437  return SDValue();
9438 
9439  EVT EltTy = VTy.getVectorElementType();
9440  if (Op == ISD::FMAXNUM || Op == ISD::FMINNUM) {
9441  if (EltTy != MVT::f32)
9442  return SDValue();
9443  } else {
9444  if (EltTy != MVT::i32 && EltTy != MVT::i16 && EltTy != MVT::i8)
9445  return SDValue();
9446  }
9447 
9448  // Check if extracting from the same vector.
9449  // For example,
9450  // %sc = setcc %vector, %svn1, gt
9451  // %n0 = extract_vector_elt %sc, #0
9452  // %n1 = extract_vector_elt %vector, #0
9453  // %n2 = extract_vector_elt $vector, #1
9454  if (!(VectorOp == IfTrue->getOperand(0) &&
9455  VectorOp == IfFalse->getOperand(0)))
9456  return SDValue();
9457 
9458  // Check if the condition code is matched with the operator type.
9459  ISD::CondCode CC = cast<CondCodeSDNode>(SetCC->getOperand(2))->get();
9460  if ((Op == ISD::SMAX && CC != ISD::SETGT && CC != ISD::SETGE) ||
9461  (Op == ISD::UMAX && CC != ISD::SETUGT && CC != ISD::SETUGE) ||
9462  (Op == ISD::SMIN && CC != ISD::SETLT && CC != ISD::SETLE) ||
9463  (Op == ISD::UMIN && CC != ISD::SETULT && CC != ISD::SETULE) ||
9464  (Op == ISD::FMAXNUM && CC != ISD::SETOGT && CC != ISD::SETOGE &&
9465  CC != ISD::SETUGT && CC != ISD::SETUGE && CC != ISD::SETGT &&
9466  CC != ISD::SETGE) ||
9467  (Op == ISD::FMINNUM && CC != ISD::SETOLT && CC != ISD::SETOLE &&
9468  CC != ISD::SETULT && CC != ISD::SETULE && CC != ISD::SETLT &&
9469  CC != ISD::SETLE))
9470  return SDValue();
9471 
9472  // Expect to check only lane 0 from the vector SETCC.
9473  if (!isNullConstant(N0.getOperand(1)))
9474  return SDValue();
9475 
9476  // Expect to extract the true value from lane 0.
9477  if (!isNullConstant(IfTrue.getOperand(1)))
9478  return SDValue();
9479 
9480  // Expect to extract the false value from lane 1.
9481  if (!isOneConstant(IfFalse.getOperand(1)))
9482  return SDValue();
9483 
9484  return tryMatchAcrossLaneShuffleForReduction(N, SetCC, Op, DAG);
9485 }
9486 
9487 /// Target-specific DAG combine for the across vector add reduction.
9488 /// This function specifically handles the final clean-up step of the vector
9489 /// add reduction produced by the LoopVectorizer. It is the log2-shuffle
9490 /// pattern, which adds all elements of a vector together.
9491 /// For example, for a <4 x i32> vector :
9492 /// %1 = vector_shuffle %0, <2,3,u,u>
9493 /// %2 = add %0, %1
9494 /// %3 = vector_shuffle %2, <1,u,u,u>
9495 /// %4 = add %2, %3
9496 /// %result = extract_vector_elt %4, 0
9497 /// becomes :
9498 /// %0 = uaddv %0
9499 /// %result = extract_vector_elt %0, 0
9500 static SDValue
9502  const AArch64Subtarget *Subtarget) {
9503  if (!Subtarget->hasNEON())
9504  return SDValue();
9505  SDValue N0 = N->getOperand(0);
9506  SDValue N1 = N->getOperand(1);
9507 
9508  // Check if the input vector is fed by the ADD.
9509  if (N0->getOpcode() != ISD::ADD)
9510  return SDValue();
9511 
9512  // The vector extract idx must constant zero because we only expect the final
9513  // result of the reduction is placed in lane 0.
9514  if (!isNullConstant(N1))
9515  return SDValue();
9516 
9517  EVT VTy = N0.getValueType();
9518  if (!VTy.isVector())
9519  return SDValue();
9520 
9521  EVT EltTy = VTy.getVectorElementType();
9522  if (EltTy != MVT::i32 && EltTy != MVT::i16 && EltTy != MVT::i8)
9523  return SDValue();
9524 
9525  if (VTy.getSizeInBits() < 64)
9526  return SDValue();
9527 
9528  return tryMatchAcrossLaneShuffleForReduction(N, N0, ISD::ADD, DAG);
9529 }
9530 
9531 /// Target-specific DAG combine function for NEON load/store intrinsics
9532 /// to merge base address updates.
9535  SelectionDAG &DAG) {
9536  if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())
9537  return SDValue();
9538 
9539  unsigned AddrOpIdx = N->getNumOperands() - 1;
9540  SDValue Addr = N->getOperand(AddrOpIdx);
9541 
9542  // Search for a use of the address operand that is an increment.
9543  for (SDNode::use_iterator UI = Addr.getNode()->use_begin(),
9544  UE = Addr.getNode()->use_end(); UI != UE; ++UI) {
9545  SDNode *User = *UI;
9546  if (User->getOpcode() != ISD::ADD ||
9547  UI.getUse().getResNo() != Addr.getResNo())
9548  continue;
9549 
9550  // Check that the add is independent of the load/store. Otherwise, folding
9551  // it would create a cycle.
9552  if (User->isPredecessorOf(N) || N->isPredecessorOf(User))
9553  continue;
9554 
9555  // Find the new opcode for the updating load/store.
9556  bool IsStore = false;
9557  bool IsLaneOp = false;
9558  bool IsDupOp = false;
9559  unsigned NewOpc = 0;
9560  unsigned NumVecs = 0;
9561  unsigned IntNo = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
9562  switch (IntNo) {
9563  default: llvm_unreachable("unexpected intrinsic for Neon base update");
9564  case Intrinsic::aarch64_neon_ld2: NewOpc = AArch64ISD::LD2post;
9565  NumVecs = 2; break;
9566  case Intrinsic::aarch64_neon_ld3: NewOpc = AArch64ISD::LD3post;
9567  NumVecs = 3; break;
9568  case Intrinsic::aarch64_neon_ld4: NewOpc = AArch64ISD::LD4post;
9569  NumVecs = 4; break;
9570  case Intrinsic::aarch64_neon_st2: NewOpc = AArch64ISD::ST2post;
9571  NumVecs = 2; IsStore = true; break;
9572  case Intrinsic::aarch64_neon_st3: NewOpc = AArch64ISD::ST3post;
9573  NumVecs = 3; IsStore = true; break;
9574  case Intrinsic::aarch64_neon_st4: NewOpc = AArch64ISD::ST4post;
9575  NumVecs = 4; IsStore = true; break;
9576  case Intrinsic::aarch64_neon_ld1x2: NewOpc = AArch64ISD::LD1x2post;
9577  NumVecs = 2; break;
9578  case Intrinsic::aarch64_neon_ld1x3: NewOpc = AArch64ISD::LD1x3post;
9579  NumVecs = 3; break;
9580  case Intrinsic::aarch64_neon_ld1x4: NewOpc = AArch64ISD::LD1x4post;
9581  NumVecs = 4; break;
9582  case Intrinsic::aarch64_neon_st1x2: NewOpc = AArch64ISD::ST1x2post;
9583  NumVecs = 2; IsStore = true; break;
9584  case Intrinsic::aarch64_neon_st1x3: NewOpc = AArch64ISD::ST1x3post;
9585  NumVecs = 3; IsStore = true; break;
9586  case Intrinsic::aarch64_neon_st1x4: NewOpc = AArch64ISD::ST1x4post;
9587  NumVecs = 4; IsStore = true; break;
9588  case Intrinsic::aarch64_neon_ld2r: NewOpc = AArch64ISD::LD2DUPpost;
9589  NumVecs = 2; IsDupOp = true; break;
9590  case Intrinsic::aarch64_neon_ld3r: NewOpc = AArch64ISD::LD3DUPpost;
9591  NumVecs = 3; IsDupOp = true; break;
9592  case Intrinsic::aarch64_neon_ld4r: NewOpc = AArch64ISD::LD4DUPpost;
9593  NumVecs = 4; IsDupOp = true; break;
9594  case Intrinsic::aarch64_neon_ld2lane: NewOpc = AArch64ISD::LD2LANEpost;
9595  NumVecs = 2; IsLaneOp = true; break;
9596  case Intrinsic::aarch64_neon_ld3lane: NewOpc = AArch64ISD::LD3LANEpost;
9597  NumVecs = 3; IsLaneOp = true; break;
9598  case Intrinsic::aarch64_neon_ld4lane: NewOpc = AArch64ISD::LD4LANEpost;
9599  NumVecs = 4; IsLaneOp = true; break;
9600  case Intrinsic::aarch64_neon_st2lane: NewOpc = AArch64ISD::ST2LANEpost;
9601  NumVecs = 2; IsStore = true; IsLaneOp = true; break;
9602  case Intrinsic::aarch64_neon_st3lane: NewOpc = AArch64ISD::ST3LANEpost;
9603  NumVecs = 3; IsStore = true; IsLaneOp = true; break;
9604  case Intrinsic::aarch64_neon_st4lane: NewOpc = AArch64ISD::ST4LANEpost;
9605  NumVecs = 4; IsStore = true; IsLaneOp = true; break;
9606  }
9607 
9608  EVT VecTy;
9609  if (IsStore)
9610  VecTy = N->getOperand(2).getValueType();
9611  else
9612  VecTy = N->getValueType(0);
9613 
9614  // If the increment is a constant, it must match the memory ref size.
9615  SDValue Inc = User->getOperand(User->getOperand(0) == Addr ? 1 : 0);
9616  if (ConstantSDNode *CInc = dyn_cast<ConstantSDNode>(Inc.getNode())) {
9617  uint32_t IncVal = CInc->getZExtValue();
9618  unsigned NumBytes = NumVecs * VecTy.getSizeInBits() / 8;
9619  if (IsLaneOp || IsDupOp)
9620  NumBytes /= VecTy.getVectorNumElements();
9621  if (IncVal != NumBytes)
9622  continue;
9623  Inc = DAG.getRegister(AArch64::XZR, MVT::i64);
9624  }
9626  Ops.push_back(N->getOperand(0)); // Incoming chain
9627  // Load lane and store have vector list as input.
9628  if (IsLaneOp || IsStore)
9629  for (unsigned i = 2; i < AddrOpIdx; ++i)
9630  Ops.push_back(N->getOperand(i));
9631  Ops.push_back(Addr); // Base register
9632  Ops.push_back(Inc);
9633 
9634  // Return Types.
9635  EVT Tys[6];
9636  unsigned NumResultVecs = (IsStore ? 0 : NumVecs);
9637  unsigned n;
9638  for (n = 0; n < NumResultVecs; ++n)
9639  Tys[n] = VecTy;
9640  Tys[n++] = MVT::i64; // Type of write back register
9641  Tys[n] = MVT::Other; // Type of the chain
9642  SDVTList SDTys = DAG.getVTList(makeArrayRef(Tys, NumResultVecs + 2));
9643 
9644  MemIntrinsicSDNode *MemInt = cast<MemIntrinsicSDNode>(N);
9645  SDValue UpdN = DAG.getMemIntrinsicNode(NewOpc, SDLoc(N), SDTys, Ops,
9646  MemInt->getMemoryVT(),
9647  MemInt->getMemOperand());
9648 
9649  // Update the uses.
9650  std::vector<SDValue> NewResults;
9651  for (unsigned i = 0; i < NumResultVecs; ++i) {
9652  NewResults.push_back(SDValue(UpdN.getNode(), i));
9653  }
9654  NewResults.push_back(SDValue(UpdN.getNode(), NumResultVecs + 1));
9655  DCI.CombineTo(N, NewResults);
9656  DCI.CombineTo(User, SDValue(UpdN.getNode(), NumResultVecs));
9657 
9658  break;
9659  }
9660  return SDValue();
9661 }
9662 
9663 // Checks to see if the value is the prescribed width and returns information
9664 // about its extension mode.
9665 static
9666 bool checkValueWidth(SDValue V, unsigned width, ISD::LoadExtType &ExtType) {
9667  ExtType = ISD::NON_EXTLOAD;
9668  switch(V.getNode()->getOpcode()) {
9669  default:
9670  return false;
9671  case ISD::LOAD: {
9672  LoadSDNode *LoadNode = cast<LoadSDNode>(V.getNode());
9673  if ((LoadNode->getMemoryVT() == MVT::i8 && width == 8)
9674  || (LoadNode->getMemoryVT() == MVT::i16 && width == 16)) {
9675  ExtType = LoadNode->getExtensionType();
9676  return true;
9677  }
9678  return false;
9679  }
9680  case ISD::AssertSext: {
9681  VTSDNode *TypeNode = cast<VTSDNode>(V.getNode()->getOperand(1));
9682  if ((TypeNode->getVT() == MVT::i8 && width == 8)
9683  || (TypeNode->getVT() == MVT::i16 && width == 16)) {
9684  ExtType = ISD::SEXTLOAD;
9685  return true;
9686  }
9687  return false;
9688  }
9689  case ISD::AssertZext: {
9690  VTSDNode *TypeNode = cast<VTSDNode>(V.getNode()->getOperand(1));
9691  if ((TypeNode->getVT() == MVT::i8 && width == 8)
9692  || (TypeNode->getVT() == MVT::i16 && width == 16)) {
9693  ExtType = ISD::ZEXTLOAD;
9694  return true;
9695  }
9696  return false;
9697  }
9698  case ISD::Constant:
9699  case ISD::TargetConstant: {
9700  return std::abs(cast<ConstantSDNode>(V.getNode())->getSExtValue()) <
9701  1LL << (width - 1);
9702  }
9703  }
9704 
9705  return true;
9706 }
9707 
9708 // This function does a whole lot of voodoo to determine if the tests are
9709 // equivalent without and with a mask. Essentially what happens is that given a
9710 // DAG resembling:
9711 //
9712 // +-------------+ +-------------+ +-------------+ +-------------+
9713 // | Input | | AddConstant | | CompConstant| | CC |
9714 // +-------------+ +-------------+ +-------------+ +-------------+
9715 // | | | |
9716 // V V | +----------+
9717 // +-------------+ +----+ | |
9718 // | ADD | |0xff| | |
9719 // +-------------+ +----+ | |
9720 // | | | |
9721 // V V | |
9722 // +-------------+ | |
9723 // | AND | | |
9724 // +-------------+ | |
9725 // | | |
9726 // +-----+ | |
9727 // | | |
9728 // V V V
9729 // +-------------+
9730 // | CMP |
9731 // +-------------+
9732 //
9733 // The AND node may be safely removed for some combinations of inputs. In
9734 // particular we need to take into account the extension type of the Input,
9735 // the exact values of AddConstant, CompConstant, and CC, along with the nominal
9736 // width of the input (this can work for any width inputs, the above graph is
9737 // specific to 8 bits.
9738 //
9739 // The specific equations were worked out by generating output tables for each
9740 // AArch64CC value in terms of and AddConstant (w1), CompConstant(w2). The
9741 // problem was simplified by working with 4 bit inputs, which means we only
9742 // needed to reason about 24 distinct bit patterns: 8 patterns unique to zero
9743 // extension (8,15), 8 patterns unique to sign extensions (-8,-1), and 8
9744 // patterns present in both extensions (0,7). For every distinct set of
9745 // AddConstant and CompConstants bit patterns we can consider the masked and
9746 // unmasked versions to be equivalent if the result of this function is true for
9747 // all 16 distinct bit patterns of for the current extension type of Input (w0).
9748 //
9749 // sub w8, w0, w1
9750 // and w10, w8, #0x0f
9751 // cmp w8, w2
9752 // cset w9, AArch64CC
9753 // cmp w10, w2
9754 // cset w11, AArch64CC
9755 // cmp w9, w11
9756 // cset w0, eq
9757 // ret
9758 //
9759 // Since the above function shows when the outputs are equivalent it defines
9760 // when it is safe to remove the AND. Unfortunately it only runs on AArch64 and
9761 // would be expensive to run during compiles. The equations below were written
9762 // in a test harness that confirmed they gave equivalent outputs to the above
9763 // for all inputs function, so they can be used determine if the removal is
9764 // legal instead.
9765 //
9766 // isEquivalentMaskless() is the code for testing if the AND can be removed
9767 // factored out of the DAG recognition as the DAG can take several forms.
9768 
9769 static bool isEquivalentMaskless(unsigned CC, unsigned width,
9770  ISD::LoadExtType ExtType, int AddConstant,
9771  int CompConstant) {
9772  // By being careful about our equations and only writing the in term
9773  // symbolic values and well known constants (0, 1, -1, MaxUInt) we can
9774  // make them generally applicable to all bit widths.
9775  int MaxUInt = (1 << width);
9776 
9777  // For the purposes of these comparisons sign extending the type is
9778  // equivalent to zero extending the add and displacing it by half the integer
9779  // width. Provided we are careful and make sure our equations are valid over
9780  // the whole range we can just adjust the input and avoid writing equations
9781  // for sign extended inputs.
9782  if (ExtType == ISD::SEXTLOAD)
9783  AddConstant -= (1 << (width-1));
9784 
9785  switch(CC) {
9786  case AArch64CC::LE:
9787  case AArch64CC::GT:
9788  if ((AddConstant == 0) ||
9789  (CompConstant == MaxUInt - 1 && AddConstant < 0) ||
9790  (AddConstant >= 0 && CompConstant < 0) ||
9791  (AddConstant <= 0 && CompConstant <= 0 && CompConstant < AddConstant))
9792  return true;
9793  break;
9794  case AArch64CC::LT:
9795  case AArch64CC::GE:
9796  if ((AddConstant == 0) ||
9797  (AddConstant >= 0 && CompConstant <= 0) ||
9798  (AddConstant <= 0 && CompConstant <= 0 && CompConstant <= AddConstant))
9799  return true;
9800  break;
9801  case AArch64CC::HI:
9802  case AArch64CC::LS:
9803  if ((AddConstant >= 0 && CompConstant < 0) ||
9804  (AddConstant <= 0 && CompConstant >= -1 &&
9805  CompConstant < AddConstant + MaxUInt))
9806  return true;
9807  break;
9808  case AArch64CC::PL:
9809  case AArch64CC::MI:
9810  if ((AddConstant == 0) ||
9811  (AddConstant > 0 && CompConstant <= 0) ||
9812  (AddConstant < 0 && CompConstant <= AddConstant))
9813  return true;
9814  break;
9815  case AArch64CC::LO:
9816  case AArch64CC::HS:
9817  if ((AddConstant >= 0 && CompConstant <= 0) ||
9818  (AddConstant <= 0 && CompConstant >= 0 &&
9819  CompConstant <= AddConstant + MaxUInt))
9820  return true;
9821  break;
9822  case AArch64CC::EQ:
9823  case AArch64CC::NE:
9824  if ((AddConstant > 0 && CompConstant < 0) ||
9825  (AddConstant < 0 && CompConstant >= 0 &&
9826  CompConstant < AddConstant + MaxUInt) ||
9827  (AddConstant >= 0 && CompConstant >= 0 &&
9828  CompConstant >= AddConstant) ||
9829  (AddConstant <= 0 && CompConstant < 0 && CompConstant < AddConstant))
9830  return true;
9831  break;
9832  case AArch64CC::VS:
9833  case AArch64CC::VC:
9834  case AArch64CC::AL:
9835  case AArch64CC::NV:
9836  return true;
9837  case AArch64CC::Invalid:
9838  break;
9839  }
9840 
9841  return false;
9842 }
9843 
9844 static
9847  SelectionDAG &DAG, unsigned CCIndex,
9848  unsigned CmpIndex) {
9849  unsigned CC = cast<ConstantSDNode>(N->getOperand(CCIndex))->getSExtValue();
9850  SDNode *SubsNode = N->getOperand(CmpIndex).getNode();
9851  unsigned CondOpcode = SubsNode->getOpcode();
9852 
9853  if (CondOpcode != AArch64ISD::SUBS)
9854  return SDValue();
9855 
9856  // There is a SUBS feeding this condition. Is it fed by a mask we can
9857  // use?
9858 
9859  SDNode *AndNode = SubsNode->getOperand(0).getNode();
9860  unsigned MaskBits = 0;
9861 
9862  if (AndNode->getOpcode() != ISD::AND)
9863  return SDValue();
9864 
9865  if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(AndNode->getOperand(1))) {
9866  uint32_t CNV = CN->getZExtValue();
9867  if (CNV == 255)
9868  MaskBits = 8;
9869  else if (CNV == 65535)
9870  MaskBits = 16;
9871  }
9872 
9873  if (!MaskBits)
9874  return SDValue();
9875 
9876  SDValue AddValue = AndNode->getOperand(0);
9877 
9878  if (AddValue.getOpcode() != ISD::ADD)
9879  return SDValue();
9880 
9881  // The basic dag structure is correct, grab the inputs and validate them.
9882 
9883  SDValue AddInputValue1 = AddValue.getNode()->getOperand(0);
9884  SDValue AddInputValue2 = AddValue.getNode()->getOperand(1);
9885  SDValue SubsInputValue = SubsNode->getOperand(1);
9886 
9887  // The mask is present and the provenance of all the values is a smaller type,
9888  // lets see if the mask is superfluous.
9889 
9890  if (!isa<ConstantSDNode>(AddInputValue2.getNode()) ||
9891  !isa<ConstantSDNode>(SubsInputValue.getNode()))
9892  return SDValue();
9893 
9894  ISD::LoadExtType ExtType;
9895 
9896  if (!checkValueWidth(SubsInputValue, MaskBits, ExtType) ||
9897  !checkValueWidth(AddInputValue2, MaskBits, ExtType) ||
9898  !checkValueWidth(AddInputValue1, MaskBits, ExtType) )
9899  return SDValue();
9900 
9901  if(!isEquivalentMaskless(CC, MaskBits, ExtType,
9902  cast<ConstantSDNode>(AddInputValue2.getNode())->getSExtValue(),
9903  cast<ConstantSDNode>(SubsInputValue.getNode())->getSExtValue()))
9904  return SDValue();
9905 
9906  // The AND is not necessary, remove it.
9907 
9908  SDVTList VTs = DAG.getVTList(SubsNode->getValueType(0),
9909  SubsNode->getValueType(1));
9910  SDValue Ops[] = { AddValue, SubsNode->getOperand(1) };
9911 
9912  SDValue NewValue = DAG.getNode(CondOpcode, SDLoc(SubsNode), VTs, Ops);
9913  DAG.ReplaceAllUsesWith(SubsNode, NewValue.getNode());
9914 
9915  return SDValue(N, 0);
9916 }
9917 
9918 // Optimize compare with zero and branch.
9921  SelectionDAG &DAG) {
9922  if (SDValue NV = performCONDCombine(N, DCI, DAG, 2, 3))
9923  N = NV.getNode();
9924  SDValue Chain = N->getOperand(0);
9925  SDValue Dest = N->getOperand(1);
9926  SDValue CCVal = N->getOperand(2);
9927  SDValue Cmp = N->getOperand(3);
9928 
9929  assert(isa<ConstantSDNode>(CCVal) && "Expected a ConstantSDNode here!");
9930  unsigned CC = cast<ConstantSDNode>(CCVal)->getZExtValue();
9931  if (CC != AArch64CC::EQ && CC != AArch64CC::NE)
9932  return SDValue();
9933 
9934  unsigned CmpOpc = Cmp.getOpcode();
9935  if (CmpOpc != AArch64ISD::ADDS && CmpOpc != AArch64ISD::SUBS)
9936  return SDValue();
9937 
9938  // Only attempt folding if there is only one use of the flag and no use of the
9939  // value.
9940  if (!Cmp->hasNUsesOfValue(0, 0) || !Cmp->hasNUsesOfValue(1, 1))
9941  return SDValue();
9942 
9943  SDValue LHS = Cmp.getOperand(0);
9944  SDValue RHS = Cmp.getOperand(1);
9945 
9946  assert(LHS.getValueType() == RHS.getValueType() &&
9947  "Expected the value type to be the same for both operands!");
9948  if (LHS.getValueType() != MVT::i32 && LHS.getValueType() != MVT::i64)
9949  return SDValue();
9950 
9951  if (isNullConstant(LHS))
9952  std::swap(LHS, RHS);
9953 
9954  if (!isNullConstant(RHS))
9955  return SDValue();
9956 
9957  if (LHS.getOpcode() == ISD::SHL || LHS.getOpcode() == ISD::SRA ||
9958  LHS.getOpcode() == ISD::SRL)
9959  return SDValue();
9960 
9961  // Fold the compare into the branch instruction.
9962  SDValue BR;
9963  if (CC == AArch64CC::EQ)
9964  BR = DAG.getNode(AArch64ISD::CBZ, SDLoc(N), MVT::Other, Chain, LHS, Dest);
9965  else
9966  BR = DAG.getNode(AArch64ISD::CBNZ, SDLoc(N), MVT::Other, Chain, LHS, Dest);
9967 
9968  // Do not add new nodes to DAG combiner worklist.
9969  DCI.CombineTo(N, BR, false);
9970 
9971  return SDValue();
9972 }
9973 
9974 // Optimize some simple tbz/tbnz cases. Returns the new operand and bit to test
9975 // as well as whether the test should be inverted. This code is required to
9976 // catch these cases (as opposed to standard dag combines) because
9977 // AArch64ISD::TBZ is matched during legalization.
9978 static SDValue getTestBitOperand(SDValue Op, unsigned &Bit, bool &Invert,
9979  SelectionDAG &DAG) {
9980 
9981  if (!Op->hasOneUse())
9982  return Op;
9983 
9984  // We don't handle undef/constant-fold cases below, as they should have
9985  // already been taken care of (e.g. and of 0, test of undefined shifted bits,
9986  // etc.)
9987 
9988  // (tbz (trunc x), b) -> (tbz x, b)
9989  // This case is just here to enable more of the below cases to be caught.
9990  if (Op->getOpcode() == ISD::TRUNCATE &&
9991  Bit < Op->getValueType(0).getSizeInBits()) {
9992  return getTestBitOperand(Op->getOperand(0), Bit, Invert, DAG);
9993  }
9994 
9995  if (Op->getNumOperands() != 2)
9996  return Op;
9997 
9998  auto *C = dyn_cast<ConstantSDNode>(Op->getOperand(1));
9999  if (!C)
10000  return Op;
10001 
10002  switch (Op->getOpcode()) {
10003  default:
10004  return Op;
10005 
10006  // (tbz (and x, m), b) -> (tbz x, b)
10007  case ISD::AND:
10008  if ((C->getZExtValue() >> Bit) & 1)
10009  return getTestBitOperand(Op->getOperand(0), Bit, Invert, DAG);
10010  return Op;
10011 
10012  // (tbz (shl x, c), b) -> (tbz x, b-c)
10013  case ISD::SHL:
10014  if (C->getZExtValue() <= Bit &&
10015  (Bit - C->getZExtValue()) < Op->getValueType(0).getSizeInBits()) {
10016  Bit = Bit - C->getZExtValue();
10017  return getTestBitOperand(Op->getOperand(0), Bit, Invert, DAG);
10018  }
10019  return Op;
10020 
10021  // (tbz (sra x, c), b) -> (tbz x, b+c) or (tbz x, msb) if b+c is > # bits in x
10022  case ISD::SRA:
10023  Bit = Bit + C->getZExtValue();
10024  if (Bit >= Op->getValueType(0).getSizeInBits())
10025  Bit = Op->getValueType(0).getSizeInBits() - 1;
10026  return getTestBitOperand(Op->getOperand(0), Bit, Invert, DAG);
10027 
10028  // (tbz (srl x, c), b) -> (tbz x, b+c)
10029  case ISD::SRL:
10030  if ((Bit + C->getZExtValue()) < Op->getValueType(0).getSizeInBits()) {
10031  Bit = Bit + C->getZExtValue();
10032  return getTestBitOperand(Op->getOperand(0), Bit, Invert, DAG);
10033  }
10034  return Op;
10035 
10036  // (tbz (xor x, -1), b) -> (tbnz x, b)
10037  case ISD::XOR:
10038  if ((C->getZExtValue() >> Bit) & 1)
10039  Invert = !Invert;
10040  return getTestBitOperand(Op->getOperand(0), Bit, Invert, DAG);
10041  }
10042 }
10043 
10044 // Optimize test single bit zero/non-zero and branch.
10047  SelectionDAG &DAG) {
10048  unsigned Bit = cast<ConstantSDNode>(N->getOperand(2))->getZExtValue();
10049  bool Invert = false;
10050  SDValue TestSrc = N->getOperand(1);
10051  SDValue NewTestSrc = getTestBitOperand(TestSrc, Bit, Invert, DAG);
10052 
10053  if (TestSrc == NewTestSrc)
10054  return SDValue();
10055 
10056  unsigned NewOpc = N->getOpcode();
10057  if (Invert) {
10058  if (NewOpc == AArch64ISD::TBZ)
10059  NewOpc = AArch64ISD::TBNZ;
10060  else {
10061  assert(NewOpc == AArch64ISD::TBNZ);
10062  NewOpc = AArch64ISD::TBZ;
10063  }
10064  }
10065 
10066  SDLoc DL(N);
10067  return DAG.getNode(NewOpc, DL, MVT::Other, N->getOperand(0), NewTestSrc,
10068  DAG.getConstant(Bit, DL, MVT::i64), N->getOperand(3));
10069 }
10070 
10071 // vselect (v1i1 setcc) ->
10072 // vselect (v1iXX setcc) (XX is the size of the compared operand type)
10073 // FIXME: Currently the type legalizer can't handle VSELECT having v1i1 as
10074 // condition. If it can legalize "VSELECT v1i1" correctly, no need to combine
10075 // such VSELECT.
10077  SDValue N0 = N->getOperand(0);
10078  EVT CCVT = N0.getValueType();
10079 
10080  if (N0.getOpcode() != ISD::SETCC || CCVT.getVectorNumElements() != 1 ||
10081  CCVT.getVectorElementType() != MVT::i1)
10082  return SDValue();
10083 
10084  EVT ResVT = N->getValueType(0);
10085  EVT CmpVT = N0.getOperand(0).getValueType();
10086  // Only combine when the result type is of the same size as the compared
10087  // operands.
10088  if (ResVT.getSizeInBits() != CmpVT.getSizeInBits())
10089  return SDValue();
10090 
10091  SDValue IfTrue = N->getOperand(1);
10092  SDValue IfFalse = N->getOperand(2);
10093  SDValue SetCC =
10095  N0.getOperand(0), N0.getOperand(1),
10096  cast<CondCodeSDNode>(N0.getOperand(2))->get());
10097  return DAG.getNode(ISD::VSELECT, SDLoc(N), ResVT, SetCC,
10098  IfTrue, IfFalse);
10099 }
10100 
10101 /// A vector select: "(select vL, vR, (setcc LHS, RHS))" is best performed with
10102 /// the compare-mask instructions rather than going via NZCV, even if LHS and
10103 /// RHS are really scalar. This replaces any scalar setcc in the above pattern
10104 /// with a vector one followed by a DUP shuffle on the result.
10107  SelectionDAG &DAG = DCI.DAG;
10108  SDValue N0 = N->getOperand(0);
10109  EVT ResVT = N->getValueType(0);
10110 
10111  if (N0.getOpcode() != ISD::SETCC)
10112  return SDValue();
10113 
10114  // Make sure the SETCC result is either i1 (initial DAG), or i32, the lowered
10115  // scalar SetCCResultType. We also don't expect vectors, because we assume
10116  // that selects fed by vector SETCCs are canonicalized to VSELECT.
10117  assert((N0.getValueType() == MVT::i1 || N0.getValueType() == MVT::i32) &&
10118  "Scalar-SETCC feeding SELECT has unexpected result type!");
10119 
10120  // If NumMaskElts == 0, the comparison is larger than select result. The
10121  // largest real NEON comparison is 64-bits per lane, which means the result is
10122  // at most 32-bits and an illegal vector. Just bail out for now.
10123  EVT SrcVT = N0.getOperand(0).getValueType();
10124 
10125  // Don't try to do this optimization when the setcc itself has i1 operands.
10126  // There are no legal vectors of i1, so this would be pointless.
10127  if (SrcVT == MVT::i1)
10128  return SDValue();
10129 
10130  int NumMaskElts = ResVT.getSizeInBits() / SrcVT.getSizeInBits();
10131  if (!ResVT.isVector() || NumMaskElts == 0)
10132  return SDValue();
10133 
10134  SrcVT = EVT::getVectorVT(*DAG.getContext(), SrcVT, NumMaskElts);
10135  EVT CCVT = SrcVT.changeVectorElementTypeToInteger();
10136 
10137  // Also bail out if the vector CCVT isn't the same size as ResVT.
10138  // This can happen if the SETCC operand size doesn't divide the ResVT size
10139  // (e.g., f64 vs v3f32).
10140  if (CCVT.getSizeInBits() != ResVT.getSizeInBits())
10141  return SDValue();
10142 
10143  // Make sure we didn't create illegal types, if we're not supposed to.
10144  assert(DCI.isBeforeLegalize() ||
10145  DAG.getTargetLoweringInfo().isTypeLegal(SrcVT));
10146 
10147  // First perform a vector comparison, where lane 0 is the one we're interested
10148  // in.
10149  SDLoc DL(N0);
10150  SDValue LHS =
10151  DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, SrcVT, N0.getOperand(0));
10152  SDValue RHS =
10153  DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, SrcVT, N0.getOperand(1));
10154  SDValue SetCC = DAG.getNode(ISD::SETCC, DL, CCVT, LHS, RHS, N0.getOperand(2));
10155 
10156  // Now duplicate the comparison mask we want across all other lanes.
10157  SmallVector<int, 8> DUPMask(CCVT.getVectorNumElements(), 0);
10158  SDValue Mask = DAG.getVectorShuffle(CCVT, DL, SetCC, SetCC, DUPMask);
10159  Mask = DAG.getNode(ISD::BITCAST, DL,
10161 
10162  return DAG.getSelect(DL, ResVT, Mask, N->getOperand(1), N->getOperand(2));
10163 }
10164 
10165 /// Get rid of unnecessary NVCASTs (that don't change the type).
10167  if (N->getValueType(0) == N->getOperand(0).getValueType())
10168  return N->getOperand(0);
10169 
10170  return SDValue();
10171 }
10172 
10174  DAGCombinerInfo &DCI) const {
10175  SelectionDAG &DAG = DCI.DAG;
10176  switch (N->getOpcode()) {
10177  default:
10178  break;
10179  case ISD::ADD:
10180  case ISD::SUB:
10181  return performAddSubLongCombine(N, DCI, DAG);
10182  case ISD::XOR:
10183  return performXorCombine(N, DAG, DCI, Subtarget);
10184  case ISD::MUL:
10185  return performMulCombine(N, DAG, DCI, Subtarget);
10186  case ISD::SINT_TO_FP:
10187  case ISD::UINT_TO_FP:
10188  return performIntToFpCombine(N, DAG, Subtarget);
10189  case ISD::FP_TO_SINT:
10190  case ISD::FP_TO_UINT:
10191  return performFpToIntCombine(N, DAG, DCI, Subtarget);
10192  case ISD::FDIV:
10193  return performFDivCombine(N, DAG, DCI, Subtarget);
10194  case ISD::OR:
10195  return performORCombine(N, DCI, Subtarget);
10196  case ISD::SRL:
10197  return performSRLCombine(N, DCI);
10199  return performIntrinsicCombine(N, DCI, Subtarget);
10200  case ISD::ANY_EXTEND:
10201  case ISD::ZERO_EXTEND:
10202  case ISD::SIGN_EXTEND:
10203  return performExtendCombine(N, DCI, DAG);
10204  case ISD::BITCAST:
10205  return performBitcastCombine(N, DCI, DAG);
10206  case ISD::CONCAT_VECTORS:
10207  return performConcatVectorsCombine(N, DCI, DAG);
10208  case ISD::SELECT: {
10209  SDValue RV = performSelectCombine(N, DCI);
10210  if (!RV.getNode())
10211  RV = performAcrossLaneMinMaxReductionCombine(N, DAG, Subtarget);
10212  return RV;
10213  }
10214  case ISD::VSELECT:
10215  return performVSelectCombine(N, DCI.DAG);
10216  case ISD::LOAD:
10217  if (performTBISimplification(N->getOperand(1), DCI, DAG))
10218  return SDValue(N, 0);
10219  break;
10220  case ISD::STORE:
10221  return performSTORECombine(N, DCI, DAG, Subtarget);
10222  case AArch64ISD::BRCOND:
10223  return performBRCONDCombine(N, DCI, DAG);
10224  case AArch64ISD::TBNZ:
10225  case AArch64ISD::TBZ:
10226  return performTBZCombine(N, DCI, DAG);
10227  case AArch64ISD::CSEL:
10228  return performCONDCombine(N, DCI, DAG, 2, 3);
10229  case AArch64ISD::DUP:
10230  return performPostLD1Combine(N, DCI, false);
10231  case AArch64ISD::NVCAST:
10232  return performNVCASTCombine(N);
10234  return performPostLD1Combine(N, DCI, true);
10236  return performAcrossLaneAddReductionCombine(N, DAG, Subtarget);
10237  case ISD::INTRINSIC_VOID:
10239  switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
10240  case Intrinsic::aarch64_neon_ld2:
10241  case Intrinsic::aarch64_neon_ld3:
10242  case Intrinsic::aarch64_neon_ld4:
10243  case Intrinsic::aarch64_neon_ld1x2:
10244  case Intrinsic::aarch64_neon_ld1x3:
10245  case Intrinsic::aarch64_neon_ld1x4:
10246  case Intrinsic::aarch64_neon_ld2lane:
10247  case Intrinsic::aarch64_neon_ld3lane:
10248  case Intrinsic::aarch64_neon_ld4lane:
10249  case Intrinsic::aarch64_neon_ld2r:
10250  case Intrinsic::aarch64_neon_ld3r:
10251  case Intrinsic::aarch64_neon_ld4r:
10252  case Intrinsic::aarch64_neon_st2:
10253  case Intrinsic::aarch64_neon_st3:
10254  case Intrinsic::aarch64_neon_st4:
10255  case Intrinsic::aarch64_neon_st1x2:
10256  case Intrinsic::aarch64_neon_st1x3:
10257  case Intrinsic::aarch64_neon_st1x4:
10258  case Intrinsic::aarch64_neon_st2lane:
10259  case Intrinsic::aarch64_neon_st3lane:
10260  case Intrinsic::aarch64_neon_st4lane:
10261  return performNEONPostLDSTCombine(N, DCI, DAG);
10262  default:
10263  break;
10264  }
10265  }
10266  return SDValue();
10267 }
10268 
10269 // Check if the return value is used as only a return value, as otherwise
10270 // we can't perform a tail-call. In particular, we need to check for
10271 // target ISD nodes that are returns and any other "odd" constructs
10272 // that the generic analysis code won't necessarily catch.
10273 bool AArch64TargetLowering::isUsedByReturnOnly(SDNode *N,
10274  SDValue &Chain) const {
10275  if (N->getNumValues() != 1)
10276  return false;
10277  if (!N->hasNUsesOfValue(1, 0))
10278  return false;
10279 
10280  SDValue TCChain = Chain;
10281  SDNode *Copy = *N->use_begin();
10282  if (Copy->getOpcode() == ISD::CopyToReg) {
10283  // If the copy has a glue operand, we conservatively assume it isn't safe to
10284  // perform a tail call.
10285  if (Copy->getOperand(Copy->getNumOperands() - 1).getValueType() ==
10286  MVT::Glue)
10287  return false;
10288  TCChain = Copy->getOperand(0);
10289  } else if (Copy->getOpcode() != ISD::FP_EXTEND)
10290  return false;
10291 
10292  bool HasRet = false;
10293  for (SDNode *Node : Copy->uses()) {
10294  if (Node->getOpcode() != AArch64ISD::RET_FLAG)
10295  return false;
10296  HasRet = true;
10297  }
10298 
10299  if (!HasRet)
10300  return false;
10301 
10302  Chain = TCChain;
10303  return true;
10304 }
10305 
10306 // Return whether the an instruction can potentially be optimized to a tail
10307 // call. This will cause the optimizers to attempt to move, or duplicate,
10308 // return instructions to help enable tail call optimizations for this
10309 // instruction.
10310 bool AArch64TargetLowering::mayBeEmittedAsTailCall(CallInst *CI) const {
10311  return CI->isTailCall();
10312 }
10313 
10314 bool AArch64TargetLowering::getIndexedAddressParts(SDNode *Op, SDValue &Base,
10315  SDValue &Offset,
10316  ISD::MemIndexedMode &AM,
10317  bool &IsInc,
10318  SelectionDAG &DAG) const {
10319  if (Op->getOpcode() != ISD::ADD && Op->getOpcode() != ISD::SUB)
10320  return false;
10321 
10322  Base = Op->getOperand(0);
10323  // All of the indexed addressing mode instructions take a signed
10324  // 9 bit immediate offset.
10325  if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Op->getOperand(1))) {
10326  int64_t RHSC = RHS->getSExtValue();
10327  if (Op->getOpcode() == ISD::SUB)
10328  RHSC = -(uint64_t)RHSC;
10329  if (!isInt<9>(RHSC))
10330  return false;
10331  IsInc = (Op->getOpcode() == ISD::ADD);
10332  Offset = Op->getOperand(1);
10333  return true;
10334  }
10335  return false;
10336 }
10337 
10338 bool AArch64TargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base,
10339  SDValue &Offset,
10340  ISD::MemIndexedMode &AM,
10341  SelectionDAG &DAG) const {
10342  EVT VT;
10343  SDValue Ptr;
10344  if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
10345  VT = LD->getMemoryVT();
10346  Ptr = LD->getBasePtr();
10347  } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
10348  VT = ST->getMemoryVT();
10349  Ptr = ST->getBasePtr();
10350  } else
10351  return false;
10352 
10353  bool IsInc;
10354  if (!getIndexedAddressParts(Ptr.getNode(), Base, Offset, AM, IsInc, DAG))
10355  return false;
10356  AM = IsInc ? ISD::PRE_INC : ISD::PRE_DEC;
10357  return true;
10358 }
10359 
10360 bool AArch64TargetLowering::getPostIndexedAddressParts(
10361  SDNode *N, SDNode *Op, SDValue &Base, SDValue &Offset,
10362  ISD::MemIndexedMode &AM, SelectionDAG &DAG) const {
10363  EVT VT;
10364  SDValue Ptr;
10365  if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
10366  VT = LD->getMemoryVT();
10367  Ptr = LD->getBasePtr();
10368  } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
10369  VT = ST->getMemoryVT();
10370  Ptr = ST->getBasePtr();
10371  } else
10372  return false;
10373 
10374  bool IsInc;
10375  if (!getIndexedAddressParts(Op, Base, Offset, AM, IsInc, DAG))
10376  return false;
10377  // Post-indexing updates the base, so it's not a valid transform
10378  // if that's not the same as the load's pointer.
10379  if (Ptr != Base)
10380  return false;
10381  AM = IsInc ? ISD::POST_INC : ISD::POST_DEC;
10382  return true;
10383 }
10384 
10386  SelectionDAG &DAG) {
10387  SDLoc DL(N);
10388  SDValue Op = N->getOperand(0);
10389 
10390  if (N->getValueType(0) != MVT::i16 || Op.getValueType() != MVT::f16)
10391  return;
10392 
10393  Op = SDValue(
10394  DAG.getMachineNode(TargetOpcode::INSERT_SUBREG, DL, MVT::f32,
10395  DAG.getUNDEF(MVT::i32), Op,
10396  DAG.getTargetConstant(AArch64::hsub, DL, MVT::i32)),
10397  0);
10398  Op = DAG.getNode(ISD::BITCAST, DL, MVT::i32, Op);
10399  Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, Op));
10400 }
10401 
10404  SelectionDAG &DAG, unsigned InterOp,
10405  unsigned AcrossOp) {
10406  EVT LoVT, HiVT;
10407  SDValue Lo, Hi;
10408  SDLoc dl(N);
10409  std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
10410  std::tie(Lo, Hi) = DAG.SplitVectorOperand(N, 0);
10411  SDValue InterVal = DAG.getNode(InterOp, dl, LoVT, Lo, Hi);
10412  SDValue SplitVal = DAG.getNode(AcrossOp, dl, LoVT, InterVal);
10413  Results.push_back(SplitVal);
10414 }
10415 
10416 static std::pair<SDValue, SDValue> splitInt128(SDValue N, SelectionDAG &DAG) {
10417  SDLoc DL(N);
10418  SDValue Lo = DAG.getNode(ISD::TRUNCATE, DL, MVT::i64, N);
10419  SDValue Hi = DAG.getNode(ISD::TRUNCATE, DL, MVT::i64,
10420  DAG.getNode(ISD::SRL, DL, MVT::i128, N,
10421  DAG.getConstant(64, DL, MVT::i64)));
10422  return std::make_pair(Lo, Hi);
10423 }
10424 
10427  SelectionDAG &DAG) {
10428  assert(N->getValueType(0) == MVT::i128 &&
10429  "AtomicCmpSwap on types less than 128 should be legal");
10430  auto Desired = splitInt128(N->getOperand(2), DAG);
10431  auto New = splitInt128(N->getOperand(3), DAG);
10432  SDValue Ops[] = {N->getOperand(1), Desired.first, Desired.second,
10433  New.first, New.second, N->getOperand(0)};
10434  SDNode *CmpSwap = DAG.getMachineNode(
10435  AArch64::CMP_SWAP_128, SDLoc(N),
10437 
10438  MachineFunction &MF = DAG.getMachineFunction();
10440  MemOp[0] = cast<MemSDNode>(N)->getMemOperand();
10441  cast<MachineSDNode>(CmpSwap)->setMemRefs(MemOp, MemOp + 1);
10442 
10443  Results.push_back(SDValue(CmpSwap, 0));
10444  Results.push_back(SDValue(CmpSwap, 1));
10445  Results.push_back(SDValue(CmpSwap, 3));
10446 }
10447 
10448 void AArch64TargetLowering::ReplaceNodeResults(
10450  switch (N->getOpcode()) {
10451  default:
10452  llvm_unreachable("Don't know how to custom expand this");
10453  case ISD::BITCAST:
10454  ReplaceBITCASTResults(N, Results, DAG);
10455  return;
10456  case AArch64ISD::SADDV:
10458  return;
10459  case AArch64ISD::UADDV:
10461  return;
10462  case AArch64ISD::SMINV:
10464  return;
10465  case AArch64ISD::UMINV:
10467  return;
10468  case AArch64ISD::SMAXV:
10470  return;
10471  case AArch64ISD::UMAXV:
10473  return;
10474  case ISD::FP_TO_UINT:
10475  case ISD::FP_TO_SINT:
10476  assert(N->getValueType(0) == MVT::i128 && "unexpected illegal conversion");
10477  // Let normal code take care of it by not adding anything to Results.
10478  return;
10479  case ISD::ATOMIC_CMP_SWAP:
10480  ReplaceCMP_SWAP_128Results(N, Results, DAG);
10481  return;
10482  }
10483 }
10484 
10486  if (!Subtarget->isTargetAndroid())
10487  return true;
10489 }
10490 
10491 unsigned AArch64TargetLowering::combineRepeatedFPDivisors() const {
10492  // Combine multiple FDIVs with the same divisor into multiple FMULs by the
10493  // reciprocal if there are three or more FDIVs.
10494  return 3;
10495 }
10496 
10499  MVT SVT = VT.getSimpleVT();
10500  // During type legalization, we prefer to widen v1i8, v1i16, v1i32 to v8i8,
10501  // v4i16, v2i32 instead of to promote.
10502  if (SVT == MVT::v1i8 || SVT == MVT::v1i16 || SVT == MVT::v1i32
10503  || SVT == MVT::v1f32)
10504  return TypeWidenVector;
10505 
10507 }
10508 
10509 // Loads and stores less than 128-bits are already atomic; ones above that
10510 // are doomed anyway, so defer to the default libcall and blame the OS when
10511 // things go wrong.
10513  unsigned Size = SI->getValueOperand()->getType()->getPrimitiveSizeInBits();
10514  return Size == 128;
10515 }
10516 
10517 // Loads and stores less than 128-bits are already atomic; ones above that
10518 // are doomed anyway, so defer to the default libcall and blame the OS when
10519 // things go wrong.
10522  unsigned Size = LI->getType()->getPrimitiveSizeInBits();
10524 }
10525 
10526 // For the real atomic operations, we have ldxr/stxr up to 128 bits,
10529  unsigned Size = AI->getType()->getPrimitiveSizeInBits();
10531 }
10532 
10534  AtomicCmpXchgInst *AI) const {
10535  // At -O0, fast-regalloc cannot cope with the live vregs necessary to
10536  // implement cmpxchg without spilling. If the address being exchanged is also
10537  // on the stack and close enough to the spill slot, this can lead to a
10538  // situation where the monitor always gets cleared and the atomic operation
10539  // can never succeed. So at -O0 we need a late-expanded pseudo-inst instead.
10540  return getTargetMachine().getOptLevel() != 0;
10541 }
10542 
10544  AtomicOrdering Ord) const {
10545  Module *M = Builder.GetInsertBlock()->getParent()->getParent();
10546  Type *ValTy = cast<PointerType>(Addr->getType())->getElementType();
10547  bool IsAcquire = isAcquireOrStronger(Ord);
10548 
10549  // Since i128 isn't legal and intrinsics don't get type-lowered, the ldrexd
10550  // intrinsic must return {i64, i64} and we have to recombine them into a
10551  // single i128 here.
10552  if (ValTy->getPrimitiveSizeInBits() == 128) {
10553  Intrinsic::ID Int =
10554  IsAcquire ? Intrinsic::aarch64_ldaxp : Intrinsic::aarch64_ldxp;
10555  Function *Ldxr = Intrinsic::getDeclaration(M, Int);
10556 
10557  Addr = Builder.CreateBitCast(Addr, Type::getInt8PtrTy(M->getContext()));
10558  Value *LoHi = Builder.CreateCall(Ldxr, Addr, "lohi");
10559 
10560  Value *Lo = Builder.CreateExtractValue(LoHi, 0, "lo");
10561  Value *Hi = Builder.CreateExtractValue(LoHi, 1, "hi");
10562  Lo = Builder.CreateZExt(Lo, ValTy, "lo64");
10563  Hi = Builder.CreateZExt(Hi, ValTy, "hi64");
10564  return Builder.CreateOr(
10565  Lo, Builder.CreateShl(Hi, ConstantInt::get(ValTy, 64)), "val64");
10566  }
10567 
10568  Type *Tys[] = { Addr->getType() };
10569  Intrinsic::ID Int =
10570  IsAcquire ? Intrinsic::aarch64_ldaxr : Intrinsic::aarch64_ldxr;
10571  Function *Ldxr = Intrinsic::getDeclaration(M, Int, Tys);
10572 
10573  return Builder.CreateTruncOrBitCast(
10574  Builder.CreateCall(Ldxr, Addr),
10575  cast<PointerType>(Addr->getType())->getElementType());
10576 }
10577 
10579  IRBuilder<> &Builder) const {
10580  Module *M = Builder.GetInsertBlock()->getParent()->getParent();
10581  Builder.CreateCall(Intrinsic::getDeclaration(M, Intrinsic::aarch64_clrex));
10582 }
10583 
10585  Value *Val, Value *Addr,
10586  AtomicOrdering Ord) const {
10587  Module *M = Builder.GetInsertBlock()->getParent()->getParent();
10588  bool IsRelease = isReleaseOrStronger(Ord);
10589 
10590  // Since the intrinsics must have legal type, the i128 intrinsics take two
10591  // parameters: "i64, i64". We must marshal Val into the appropriate form
10592  // before the call.
10593  if (Val->getType()->getPrimitiveSizeInBits() == 128) {
10594  Intrinsic::ID Int =
10595  IsRelease ? Intrinsic::aarch64_stlxp : Intrinsic::aarch64_stxp;
10596  Function *Stxr = Intrinsic::getDeclaration(M, Int);
10597  Type *Int64Ty = Type::getInt64Ty(M->getContext());
10598 
10599  Value *Lo = Builder.CreateTrunc(Val, Int64Ty, "lo");
10600  Value *Hi = Builder.CreateTrunc(Builder.CreateLShr(Val, 64), Int64Ty, "hi");
10601  Addr = Builder.CreateBitCast(Addr, Type::getInt8PtrTy(M->getContext()));
10602  return Builder.CreateCall(Stxr, {Lo, Hi, Addr});
10603  }
10604 
10605  Intrinsic::ID Int =
10606  IsRelease ? Intrinsic::aarch64_stlxr : Intrinsic::aarch64_stxr;
10607  Type *Tys[] = { Addr->getType() };
10608  Function *Stxr = Intrinsic::getDeclaration(M, Int, Tys);
10609 
10610  return Builder.CreateCall(Stxr,
10611  {Builder.CreateZExtOrBitCast(
10612  Val, Stxr->getFunctionType()->getParamType(0)),
10613  Addr});
10614 }
10615 
10616 bool AArch64TargetLowering::functionArgumentNeedsConsecutiveRegisters(
10617  Type *Ty, CallingConv::ID CallConv, bool isVarArg) const {
10618  return Ty->isArrayTy();
10619 }
10620 
10621 bool AArch64TargetLowering::shouldNormalizeToSelectSequence(LLVMContext &,
10622  EVT) const {
10623  return false;
10624 }
10625 
10627  if (!Subtarget->isTargetAndroid())
10628  return TargetLowering::getIRStackGuard(IRB);
10629 
10630  // Android provides a fixed TLS slot for the stack cookie. See the definition
10631  // of TLS_SLOT_STACK_GUARD in
10632  // https://android.googlesource.com/platform/bionic/+/master/libc/private/bionic_tls.h
10633  const unsigned TlsOffset = 0x28;
10634  Module *M = IRB.GetInsertBlock()->getParent()->getParent();
10635  Function *ThreadPointerFunc =
10636  Intrinsic::getDeclaration(M, Intrinsic::thread_pointer);
10637  return IRB.CreatePointerCast(
10638  IRB.CreateConstGEP1_32(IRB.CreateCall(ThreadPointerFunc), TlsOffset),
10639  Type::getInt8PtrTy(IRB.getContext())->getPointerTo(0));
10640 }
10641 
10643  if (!Subtarget->isTargetAndroid())
10645 
10646  // Android provides a fixed TLS slot for the SafeStack pointer. See the
10647  // definition of TLS_SLOT_SAFESTACK in
10648  // https://android.googlesource.com/platform/bionic/+/master/libc/private/bionic_tls.h
10649  const unsigned TlsOffset = 0x48;
10650  Module *M = IRB.GetInsertBlock()->getParent()->getParent();
10651  Function *ThreadPointerFunc =
10652  Intrinsic::getDeclaration(M, Intrinsic::thread_pointer);
10653  return IRB.CreatePointerCast(
10654  IRB.CreateConstGEP1_32(IRB.CreateCall(ThreadPointerFunc), TlsOffset),
10655  Type::getInt8PtrTy(IRB.getContext())->getPointerTo(0));
10656 }
10657 
10659  // Update IsSplitCSR in AArch64unctionInfo.
10661  AFI->setIsSplitCSR(true);
10662 }
10663 
10665  MachineBasicBlock *Entry,
10666  const SmallVectorImpl<MachineBasicBlock *> &Exits) const {
10667  const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo();
10668  const MCPhysReg *IStart = TRI->getCalleeSavedRegsViaCopy(Entry->getParent());
10669  if (!IStart)
10670  return;
10671 
10672  const TargetInstrInfo *TII = Subtarget->getInstrInfo();
10673  MachineRegisterInfo *MRI = &Entry->getParent()->getRegInfo();
10674  MachineBasicBlock::iterator MBBI = Entry->begin();
10675  for (const MCPhysReg *I = IStart; *I; ++I) {
10676  const TargetRegisterClass *RC = nullptr;
10677  if (AArch64::GPR64RegClass.contains(*I))
10678  RC = &AArch64::GPR64RegClass;
10679  else if (AArch64::FPR64RegClass.contains(*I))
10680  RC = &AArch64::FPR64RegClass;
10681  else
10682  llvm_unreachable("Unexpected register class in CSRsViaCopy!");
10683 
10684  unsigned NewVR = MRI->createVirtualRegister(RC);
10685  // Create copy from CSR to a virtual register.
10686  // FIXME: this currently does not emit CFI pseudo-instructions, it works
10687  // fine for CXX_FAST_TLS since the C++-style TLS access functions should be
10688  // nounwind. If we want to generalize this later, we may need to emit
10689  // CFI pseudo-instructions.
10691  Attribute::NoUnwind) &&
10692  "Function should be nounwind in insertCopiesSplitCSR!");
10693  Entry->addLiveIn(*I);
10694  BuildMI(*Entry, MBBI, DebugLoc(), TII->get(TargetOpcode::COPY), NewVR)
10695  .addReg(*I);
10696 
10697  // Insert the copy-back instructions right before the terminator.
10698  for (auto *Exit : Exits)
10699  BuildMI(*Exit, Exit->getFirstTerminator(), DebugLoc(),
10700  TII->get(TargetOpcode::COPY), *I)
10701  .addReg(NewVR);
10702  }
10703 }
10704 
10706  // Integer division on AArch64 is expensive. However, when aggressively
10707  // optimizing for code size, we prefer to use a div instruction, as it is
10708  // usually smaller than the alternative sequence.
10709  // The exception to this is vector division. Since AArch64 doesn't have vector
10710  // integer division, leaving the division as-is is a loss even in terms of
10711  // size, because it will have to be scalarized, while the alternative code
10712  // sequence can be performed in vector form.
10713  bool OptSize =
10714  Attr.hasAttribute(AttributeSet::FunctionIndex, Attribute::MinSize);
10715  return OptSize && !VT.isVector();
10716 }
MachineLoop * L
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, unsigned Alignment=0, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
static bool isAdvSIMDModImmType6(uint64_t Imm)
static bool isVShiftLImm(SDValue Op, EVT VT, bool isLong, int64_t &Cnt)
isVShiftLImm - Check if this is a valid build_vector for the immediate operand of a vector shift left...
void setFrameAddressIsTaken(bool T)
bool parametersInCSRMatch(const MachineRegisterInfo &MRI, const uint32_t *CallerPreservedMask, const SmallVectorImpl< CCValAssign > &ArgLocs, const SmallVectorImpl< SDValue > &OutVals) const
Check whether parameters to a call that are passed in callee saved registers are the same as from the...
std::enable_if< std::numeric_limits< T >::is_signed, bool >::type getAsInteger(unsigned Radix, T &Result) const
Parse the current string as an integer of the specified radix.
Definition: StringRef.h:494
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:500
constexpr bool isUInt< 32 >(uint64_t x)
Definition: MathExtras.h:315
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition: ISDOpcodes.h:467
APInt ashr(unsigned shiftAmt) const
Arithmetic right-shift function.
Definition: APInt.cpp:1035
Value * getValueOperand()
Definition: Instructions.h:391
Helper structure to keep track of SetCC information.
static MVT getIntegerVT(unsigned BitWidth)
static bool isUZP_v_undef_Mask(ArrayRef< int > M, EVT VT, unsigned &WhichResult)
isUZP_v_undef_Mask - Special case of isUZPMask for canonical form of "vector_shuffle v...
void AnalyzeCallResult(const SmallVectorImpl< ISD::InputArg > &Ins, CCAssignFn Fn)
AnalyzeCallResult - Analyze the return values of a call, incorporating info about the passed values i...
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:102
FMINNUM/FMAXNUM - Perform floating-point minimum or maximum on two values.
Definition: ISDOpcodes.h:524
static SDValue performBitcastCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
SDValue getValue(unsigned R) const
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg If BaseGV is null...
MO_G3 - A symbol operand with this flag (granule 3) represents the high 16-bits of a 64-bit address...
Value * CreateConstGEP1_32(Value *Ptr, unsigned Idx0, const Twine &Name="")
Definition: IRBuilder.h:1192
static SDValue LowerADDC_ADDE_SUBC_SUBE(SDValue Op, SelectionDAG &DAG)
void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified load with extension does not work with the specified type and indicate wh...
MVT getValVT() const
iterator_range< use_iterator > uses()
Definition: Value.h:326
bool isTargetAndroid() const
bool requiresStrictAlign() const
This SDNode is used for target intrinsics that touch memory and need an associated MachineMemOperand...
const MCPhysReg * getScratchRegisters(CallingConv::ID CC) const override
Returns a 0 terminated array of registers that can be safely used as scratch registers.
virtual ConstraintType getConstraintType(StringRef Constraint) const
Given a constraint, return the type of constraint it is for this target.
AArch64FunctionInfo - This class is derived from MachineFunctionInfo and contains private AArch64-spe...
Flags getFlags() const
Return the raw flags of the source value,.
LLVMContext * getContext() const
Definition: SelectionDAG.h:333
static uint8_t encodeAdvSIMDModImmType3(uint64_t Imm)
bool isOSBinFormatMachO() const
Tests whether the environment is MachO.
Definition: Triple.h:575
LLVMContext & Context
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1309
DiagnosticInfoOptimizationBase::Argument NV
SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond)
Helper function to make it easier to build SetCC's if you just have an ISD::CondCode instead of an SD...
Definition: SelectionDAG.h:804
SDValue CombineTo(SDNode *N, ArrayRef< SDValue > To, bool AddTo=true)
SDValue getCALLSEQ_END(SDValue Chain, SDValue Op1, SDValue Op2, SDValue InGlue, const SDLoc &DL)
Return a new CALLSEQ_END node, which always must have a glue result (to ensure it's not CSE'd)...
Definition: SelectionDAG.h:724
static SDValue performVSelectCombine(SDNode *N, SelectionDAG &DAG)
static SDValue performNEONPostLDSTCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
Target-specific DAG combine function for NEON load/store intrinsics to merge base address updates...
STATISTIC(NumFunctions,"Total number of functions")
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR (an vector value) starting with the ...
Definition: ISDOpcodes.h:304
LLVM_ATTRIBUTE_NORETURN void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
size_t i
BR_CC - Conditional branch.
Definition: ISDOpcodes.h:572
LocInfo getLocInfo() const
static bool isUZPMask(ArrayRef< int > M, EVT VT, unsigned &WhichResult)
MO_PAGE - A symbol operand with this flag represents the pc-relative offset of the 4K page containing...
static bool isTRN_v_undef_Mask(ArrayRef< int > M, EVT VT, unsigned &WhichResult)
isTRN_v_undef_Mask - Special case of isTRNMask for canonical form of "vector_shuffle v...
CCAssignFn * CCAssignFnForReturn(CallingConv::ID CC) const
Selects the correct CCAssignFn for a given CallingConvention value.
static MVT getVectorVT(MVT VT, unsigned NumElements)
bool hasOneUse() const
Return true if there is exactly one use of this node.
bool hasNUsesOfValue(unsigned NUses, unsigned Value) const
Return true if there are exactly NUSES uses of the indicated value.
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition: ISDOpcodes.h:313
FastISel * createFastISel(FunctionLoweringInfo &funcInfo, const TargetLibraryInfo *libInfo)
A Module instance is used to store all the information related to an LLVM module. ...
Definition: Module.h:52
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
static SDValue tryCombineToEXTR(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
EXTR instruction extracts a contiguous chunk of bits from two existing registers viewed as a high/low...
static bool isAdvSIMDModImmType12(uint64_t Imm)
const TargetMachine & getTargetMachine() const
an instruction that atomically checks whether a specified value is in a memory location, and, if it is, stores a new value there.
Definition: Instructions.h:504
static bool isAdvSIMDModImmType4(uint64_t Imm)
unsigned createVirtualRegister(const TargetRegisterClass *RegClass)
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
static SDValue performXorCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const AArch64Subtarget *Subtarget)
static uint8_t encodeAdvSIMDModImmType1(uint64_t Imm)
static bool isConjunctionDisjunctionTree(const SDValue Val, bool &CanNegate, unsigned Depth=0)
Returns true if Val is a tree of AND/OR/SETCC operations.
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AddrSpace=0, unsigned Align=1, bool *Fast=nullptr) const override
Returns true if the target allows unaligned memory accesses of the specified type.
const uint32_t * getTLSCallPreservedMask() const
Carry-setting nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:219
const TargetMachine & getTarget() const
Definition: SelectionDAG.h:329
static bool isINSMask(ArrayRef< int > M, int NumInputElements, bool &DstIsLeft, int &Anomaly)
static bool isAddSubZExt(SDNode *N, SelectionDAG &DAG)
TargetLoweringBase::LegalizeTypeAction getPreferredVectorAction(EVT VT) const override
Return the preferred vector type legalization action.
unsigned addLiveIn(unsigned PReg, const TargetRegisterClass *RC)
addLiveIn - Add the specified physical register as a live-in value and create a corresponding virtual...
static CondCode getInvertedCondCode(CondCode Code)
iterator end() const
Definition: ArrayRef.h:130
bool isNonNegative() const
Determine if this APInt Value is non-negative (>= 0)
Definition: APInt.h:329
static bool isZIP_v_undef_Mask(ArrayRef< int > M, EVT VT, unsigned &WhichResult)
isZIP_v_undef_Mask - Special case of isZIPMask for canonical form of "vector_shuffle v...
void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
STACKRESTORE has two operands, an input chain and a pointer to restore to it returns an output chain...
Definition: ISDOpcodes.h:615
This class represents a function call, abstracting a target machine's calling convention.
void computeKnownBits(SDValue Op, APInt &KnownZero, APInt &KnownOne, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in the KnownZero/KnownO...
std::pair< EVT, EVT > GetSplitDestVTs(const EVT &VT) const
Compute the VTs needed for the low/hi parts of a type which is split (or expanded) into two not neces...
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Get a value with low bits set.
Definition: APInt.h:536
const GlobalValue * getGlobal() const
Libcall
RTLIB::Libcall enum - This enum defines all of the runtime library calls the backend can emit...
static SDValue splitStores(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG, const AArch64Subtarget *Subtarget)
bool CCAssignFn(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
CCAssignFn - This function assigns a location for Val, updating State to reflect the change...
LLVM_NODISCARD bool equals_lower(StringRef RHS) const
equals_lower - Check for string equality, ignoring case.
Definition: StringRef.h:173
static MVT getFloatingPointVT(unsigned BitWidth)
RESULT, BOOL = [SU]ADDO(LHS, RHS) - Overflow-aware nodes for addition.
Definition: ISDOpcodes.h:237
int getScalingFactorCost(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS) const override
Return the cost of the scaling factor used in the addressing mode represented by AM for this target...
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
std::pair< Value *, Value * > ShuffleOps
We are building a shuffle to create V, which is a sequence of insertelement, extractelement pairs...
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Function Alias Analysis Results
Type * getTypeForEVT(LLVMContext &Context) const
getTypeForEVT - This method returns an LLVM type corresponding to the specified EVT.
Definition: ValueTypes.cpp:204
unsigned getSizeInBits() const
This instruction constructs a fixed permutation of two input vectors.
static SDValue performSelectCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
A vector select: "(select vL, vR, (setcc LHS, RHS))" is best performed with the compare-mask instruct...
unsigned getByValSize() const
static SDValue performVectorCompareAndMaskUnaryOpCombine(SDNode *N, SelectionDAG &DAG)
unsigned getNumOperands() const
Return the number of values used by this operation.
void setBooleanVectorContents(BooleanContent Ty)
Specify how the target extends the result of a vector boolean value from a vector of i1 to a wider ty...
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:100
unsigned getNumOperands() const
arg_iterator arg_end()
Definition: Function.h:559
unsigned getValueSizeInBits() const
Returns the size of the value in bits.
static SDValue LowerPREFETCH(SDValue Op, SelectionDAG &DAG)
A debug info location.
Definition: DebugLoc.h:34
const SDValue & getOperand(unsigned Num) const
MO_G0 - A symbol operand with this flag (granule 0) represents the bits 0-15 of a 64-bit address...
const Function * getFunction() const
getFunction - Return the LLVM function that this machine code represents
SDValue getSelect(const SDLoc &DL, EVT VT, SDValue Cond, SDValue LHS, SDValue RHS)
Helper function to make it easier to build Select's if you just have operands and don't want to check...
Definition: SelectionDAG.h:817
static SDValue performExtendCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
static SDValue NormalizeBuildVector(SDValue Op, SelectionDAG &DAG)
uint64_t alignTo(uint64_t Value, uint64_t Align, uint64_t Skew=0)
Returns the next integer (mod 2**64) that is greater than or equal to Value and is a multiple of Alig...
Definition: MathExtras.h:664
An instruction for reading from memory.
Definition: Instructions.h:164
static SDValue emitComparison(SDValue LHS, SDValue RHS, ISD::CondCode CC, const SDLoc &dl, SelectionDAG &DAG)
static unsigned getDUPLANEOp(EVT EltType)
static IntegerType * getInt64Ty(LLVMContext &C)
Definition: Type.cpp:170
static bool isAdvSIMDModImmType3(uint64_t Imm)
[US]{MIN/MAX} - Binary minimum or maximum or signed or unsigned integers.
Definition: ISDOpcodes.h:330
an instruction that atomically reads a memory location, combines it with another value, and then stores the result back.
Definition: Instructions.h:669
SDValue getTargetExternalSymbol(const char *Sym, EVT VT, unsigned char TargetFlags=0)
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
static SDValue LowerVectorINT_TO_FP(SDValue Op, SelectionDAG &DAG)
static SDValue performSetccAddFolding(SDNode *Op, SelectionDAG &DAG)
bool CheckReturn(const SmallVectorImpl< ISD::OutputArg > &ArgsFlags, CCAssignFn Fn)
CheckReturn - Analyze the return values of a function, returning true if the return can be performed ...
This defines the Use class.
AtomicExpansionKind
Enum that specifies what an atomic load/AtomicRMWInst is expanded to, if at all.
Type * getElementType() const
Definition: DerivedTypes.h:462
static SDValue EmitVectorComparison(SDValue LHS, SDValue RHS, AArch64CC::CondCode CC, bool NoNans, EVT VT, const SDLoc &dl, SelectionDAG &DAG)
Same for subtraction.
Definition: ISDOpcodes.h:240
bool hasAttribute(unsigned Index, Attribute::AttrKind Kind) const
Return true if the attribute exists at the given index.
Definition: Attributes.cpp:994
static SDValue tryCombineFixedPointConvert(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
const SDValue & getBasePtr() const
bool lowerInterleavedLoad(LoadInst *LI, ArrayRef< ShuffleVectorInst * > Shuffles, ArrayRef< unsigned > Indices, unsigned Factor) const override
Lower an interleaved load into a ldN intrinsic.
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1 at the ...
Definition: ISDOpcodes.h:299
APInt zextOrTrunc(unsigned width) const
Zero extend or truncate to width.
Definition: APInt.cpp:999
std::size_t countLeadingZeros(T Val, ZeroBehavior ZB=ZB_Width)
Count number of 0's from the most significant bit to the least stopping at the first 1...
Definition: MathExtras.h:180
static bool isZeroExtended(SDNode *N, SelectionDAG &DAG)
static std::pair< SDValue, SDValue > getAArch64XALUOOp(AArch64CC::CondCode &CC, SDValue Op, SelectionDAG &DAG)
static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS, SDValue RHS, SelectionDAG &DAG, const SDLoc &dl)
GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit the specified operations t...
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition: ISDOpcodes.h:369
unsigned getResNo() const
get the index which selects a specific result in the SDNode
Libcall getFPROUND(EVT OpVT, EVT RetVT)
getFPROUND - Return the FPROUND_*_* value for the given types, or UNKNOWN_LIBCALL if there is none...
bool bitsLT(EVT VT) const
bitsLT - Return true if this has less bits than VT.
Definition: ValueTypes.h:212
bool isUnsignedIntSetCC(CondCode Code)
Return true if this is a setcc instruction that performs an unsigned comparison when used with intege...
Definition: ISDOpcodes.h:881
bool isUndef() const
Return true if the type of the node type undefined.
static SDValue performIntegerAbsCombine(SDNode *N, SelectionDAG &DAG)
bool isRegLoc() const
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
bool optForMinSize() const
Optimize this function for minimum size (-Oz).
Definition: Function.h:461
This SDNode is used to implement the code generator support for the llvm IR shufflevector instruction...
return AArch64::GPR64RegClass contains(Reg)
SDValue getExternalSymbol(const char *Sym, EVT VT)
bool isAllOnesValue() const
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:165
bool isOSWindows() const
Tests whether the OS is Windows.
Definition: Triple.h:540
void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified truncating store does not work with the specified type and indicate what ...
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition: ISDOpcodes.h:159
A convenience struct that encapsulates a DAG, and two SDValues for returning information from TargetL...
Libcall getUINTTOFP(EVT OpVT, EVT RetVT)
getUINTTOFP - Return the UINTTOFP_*_* value for the given types, or UNKNOWN_LIBCALL if there is none...
const Triple & getTargetTriple() const
MO_G2 - A symbol operand with this flag (granule 2) represents the bits 32-47 of a 64-bit address...
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition: ISDOpcodes.h:209
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations...
Definition: ISDOpcodes.h:388
bool isVector() const
isVector - Return true if this is a vector value type.
Definition: ValueTypes.h:133
SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
lazy value info
static bool isAdvSIMDModImmType7(uint64_t Imm)
static SDValue tryFormConcatFromShuffle(SDValue Op, SelectionDAG &DAG)
The address of a basic block.
Definition: Constants.h:822
static bool isTRNMask(ArrayRef< int > M, EVT VT, unsigned &WhichResult)
cl::opt< bool > EnableAArch64ELFLocalDynamicTLSGeneration("aarch64-elf-ldtls-generation", cl::Hidden, cl::desc("Allow AArch64 Local Dynamic TLS code generation"), cl::init(false))
A description of a memory reference used in the backend.
static SDValue performTBZCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
static SDValue performCONDCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG, unsigned CCIndex, unsigned CmpIndex)
struct fuzzer::@269 Flags
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
const HexagonInstrInfo * TII
static SDValue tryCombineCRC32(unsigned Mask, SDNode *N, SelectionDAG &DAG)
ArrayRef< T > makeArrayRef(const T &OneElt)
Construct an ArrayRef from a single element.
Definition: ArrayRef.h:440
unsigned getMaximumJumpTableSize() const
Shift and rotation operations.
Definition: ISDOpcodes.h:344
static SDValue performAddSubLongCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
Class to represent struct types.
Definition: DerivedTypes.h:199
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
A Use represents the edge between a Value definition and its users.
Definition: Use.h:56
static SDValue performAcrossLaneAddReductionCombine(SDNode *N, SelectionDAG &DAG, const AArch64Subtarget *Subtarget)
Target-specific DAG combine for the across vector add reduction.
static SDValue performFpToIntCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const AArch64Subtarget *Subtarget)
Fold a floating-point multiply by power of two into floating-point to fixed-point conversion...
SDValue getTargetExtractSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand)
A convenience function for creating TargetInstrInfo::EXTRACT_SUBREG nodes.
bool hasInternalLinkage() const
Definition: GlobalValue.h:413
MachineFunction & getMachineFunction() const
Definition: SelectionDAG.h:327
MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s), MachineInstr opcode, and operands.
static void advance(T &it, size_t Val)
static uint8_t encodeAdvSIMDModImmType6(uint64_t Imm)
unsigned getNumArgOperands() const
Return the number of call arguments.
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
CopyToReg - This node has three operands: a chain, a register number to set to this value...
Definition: ISDOpcodes.h:170
unsigned SubReg
static Constant * get(ArrayRef< Constant * > V)
Definition: Constants.cpp:994
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:588
LLVM_ATTRIBUTE_ALWAYS_INLINE R Default(const T &Value) const
Definition: StringSwitch.h:244
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const override
Return the ISD::SETCC ValueType.
bool Eq(const uint8_t *Data, size_t Size, const char *Str)
Definition: StrcmpTest.cpp:11
Helper structure to keep track of a SET_CC lowered into AArch64 code.
Reg
All possible values of the reg field in the ModR/M byte.
This file contains the simple types necessary to represent the attributes associated with functions a...
SimpleValueType SimpleTy
MVT getScalarType() const
getScalarType - If this is a vector type, return the element type, otherwise return this...
The memory access is dereferenceable (i.e., doesn't trap).
static SDValue LowerMUL(SDValue Op, SelectionDAG &DAG)
EVT getScalarType() const
getScalarType - If this is a vector type, return the element type, otherwise return this...
Definition: ValueTypes.h:233
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted...
static bool isLogicalImmediate(uint64_t imm, unsigned regSize)
isLogicalImmediate - Return true if the immediate is valid for a logical immediate instruction of the...
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
static SDValue combineAcrossLanesIntrinsic(unsigned Opc, SDNode *N, SelectionDAG &DAG)
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
This class is used to represent EVT's, which are used to parameterize some operations.
static SDValue foldVectorXorShiftIntoCmp(SDNode *N, SelectionDAG &DAG, const AArch64Subtarget *Subtarget)
Turn vector tests of the signbit in the form of: xor (sra X, elt_size(X)-1), -1 into: cmge X...
FRAMEADDR, RETURNADDR - These nodes represent llvm.frameaddress and llvm.returnaddress on the DAG...
Definition: ISDOpcodes.h:73
Type * getVectorElementType() const
Definition: Type.h:353
#define im(i)
static bool isAdvSIMDModImmType5(uint64_t Imm)
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
ELFYAML::ELF_STO Other
Definition: ELFYAML.cpp:662
bool isInteger() const
isInteger - Return true if this is an integer, or a vector integer type.
Definition: ValueTypes.h:123
This file implements a class to represent arbitrary precision integral constant values and operations...
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, unsigned Alignment=0, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
LLVM_NODISCARD bool empty() const
Definition: SmallVector.h:60
AtomicOrdering
Atomic ordering for LLVM's memory model.
STACKSAVE - STACKSAVE has one operand, an input chain.
Definition: ISDOpcodes.h:611
SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)
Create a MERGE_VALUES node from the given operands.
EVT getVectorElementType() const
getVectorElementType - Given a vector type, return the type of each element.
Definition: ValueTypes.h:239
Value * getSafeStackPointerLocation(IRBuilder<> &IRB) const override
If the target has a standard location for the unsafe stack pointer, returns the address of that locat...
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
Definition: Instructions.h:399
SDValue ReconstructShuffle(SDValue Op, SelectionDAG &DAG) const
virtual Value * getIRStackGuard(IRBuilder<> &IRB) const
If the target has a standard location for the stack protector guard, returns the address of that loca...
bool isConstantSplat(APInt &SplatValue, APInt &SplatUndef, unsigned &SplatBitSize, bool &HasAnyUndefs, unsigned MinSplatBits=0, bool isBigEndian=false) const
Check if this is a constant splat, and if so, find the smallest element size that splats the vector...
static const unsigned PerfectShuffleTable[6561+1]
bool isInConsecutiveRegs() const
SDValue getSExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either sign-extending or trunca...
unsigned getLocReg() const
void setArgumentStackToRestore(unsigned bytes)
This is a fast-path instruction selection class that generates poor code and doesn't support illegal ...
Definition: FastISel.h:31
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1362
#define F(x, y, z)
Definition: MD5.cpp:51
unsigned NoNaNsFPMath
NoNaNsFPMath - This flag is enabled when the -enable-no-nans-fp-math flag is specified on the command...
bool isKill() const
static SDValue performConcatVectorsCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition: ISDOpcodes.h:410
void computeRegisterProperties(const TargetRegisterInfo *TRI)
Once all of the register classes are added, this allows us to compute derived properties we expose...
static bool isEXTMask(ArrayRef< int > M, EVT VT, bool &ReverseEXT, unsigned &Imm)
void emitAtomicCmpXchgNoStoreLLBalance(IRBuilder<> &Builder) const override
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
Definition: Type.h:128
static bool checkValueWidth(SDValue V, unsigned width, ISD::LoadExtType &ExtType)
std::pair< SDValue, SDValue > LowerCallTo(CallLoweringInfo &CLI) const
This function lowers an abstract call to a function into an actual call.
unsigned getNumValues() const
Return the number of values defined/returned by this operator.
MO_GOT - This flag indicates that a symbol operand represents the address of the GOT entry for the sy...
virtual Value * getSafeStackPointerLocation(IRBuilder<> &IRB) const
Returns the target-specific address of the unsafe stack pointer.
SDValue getRegisterMask(const uint32_t *RegMask)
constexpr bool isMask_64(uint64_t Value)
isMask_64 - This function returns true if the argument is a non-empty sequence of ones starting at th...
Definition: MathExtras.h:380
const AArch64RegisterInfo * getRegisterInfo() const override
MachineBasicBlock * MBB
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition: ISDOpcodes.h:363
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:200
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:33
LLVM_NODISCARD LLVM_ATTRIBUTE_ALWAYS_INLINE StringRef slice(size_t Start, size_t End) const
Return a reference to the substring from [Start, End).
Definition: StringRef.h:699
bool isFloatingPointTy() const
Return true if this is one of the six floating-point types.
Definition: Type.h:160
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=None)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
Definition: Function.cpp:949
const SDValue & getBasePtr() const
static bool isAdvSIMDModImmType2(uint64_t Imm)
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
Definition: SelectionDAG.h:737
static GCRegistry::Add< OcamlGC > B("ocaml","ocaml 3.10-compatible GC")
An instruction for storing to memory.
Definition: Instructions.h:300
Natural vector cast.
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out...
Definition: ISDOpcodes.h:842
const APInt & getAPIntValue() const
static Error getOffset(const SymbolRef &Sym, SectionRef Sec, uint64_t &Result)
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS) const override
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:401
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
bool isArrayTy() const
True if this is an instance of ArrayType.
Definition: Type.h:210
TargetConstant* - Like Constant*, but the DAG does not do any folding, simplification, or lowering of the constant.
Definition: ISDOpcodes.h:125
EVT getMemoryVT() const
Return the type of the in-memory value.
CodeGenOpt::Level getOptLevel() const
Returns the optimization level: None, Less, Default, or Aggressive.
int64_t getImm() const
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1301
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:487
READCYCLECOUNTER - This corresponds to the readcyclecounter intrinsic.
Definition: ISDOpcodes.h:656
bool isFPImmLegal(const APFloat &Imm, EVT VT) const override
Returns true if the target can instruction select the specified FP immediate natively.
TargetLoweringBase::AtomicExpansionKind shouldExpandAtomicLoadInIR(LoadInst *LI) const override
Returns how the given (atomic) load should be expanded by the IR-level AtomicExpand pass...
bool isIntDivCheap(EVT VT, AttributeSet Attr) const override
Return true if integer divide is usually cheaper than a sequence of several shifts, adds, and multiplies for this target.
LLVM_NODISCARD LLVM_ATTRIBUTE_ALWAYS_INLINE size_t size() const
size - Get the string size.
Definition: StringRef.h:135
static SDValue LowerVectorFP_TO_INT(SDValue Op, SelectionDAG &DAG)
static const MCPhysReg GPRArgRegs[]
Value * CreateZExtOrBitCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1370
void computeKnownBitsForTargetNode(const SDValue Op, APInt &KnownZero, APInt &KnownOne, const SelectionDAG &DAG, unsigned Depth=0) const override
Determine which of the bits specified in Mask are known to be either zero or one and return them in t...
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition: ISDOpcodes.h:151
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:141
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *bb=nullptr)
CreateMachineBasicBlock - Allocate a new MachineBasicBlock.
const DataLayout & getDataLayout() const
Definition: SelectionDAG.h:328
void initializeSplitCSR(MachineBasicBlock *Entry) const override
Perform necessary initialization to handle a subset of CSRs explicitly via copies.
static SDValue WidenVector(SDValue V64Reg, SelectionDAG &DAG)
WidenVector - Given a value in the V64 register class, produce the equivalent value in the V128 regis...
Maximum length of the test input libFuzzer tries to guess a good value based on the corpus and reports it always prefer smaller inputs during the corpus shuffle When libFuzzer itself reports a bug this exit code will be used If indicates the maximal total time in seconds to run the fuzzer minimizes the provided crash input Use with etc Experimental Use value profile to guide fuzzing Number of simultaneous worker processes to run the jobs If min(jobs, NumberOfCpuCores()/2)\" is used.") FUZZER_FLAG_INT(reload
static MachinePointerInfo getStack(MachineFunction &MF, int64_t Offset)
Stack pointer relative access.
static bool isREVMask(ArrayRef< int > M, EVT VT, unsigned BlockSize)
isREVMask - Check if a vector shuffle corresponds to a REV instruction with the specified blocksize...
static SDValue tryExtendDUPToExtractHigh(SDValue N, SelectionDAG &DAG)
Class to represent pointers.
Definition: DerivedTypes.h:443
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
const uint32_t * getCallPreservedMask(const MachineFunction &MF, CallingConv::ID) const override
static bool isEssentiallyExtractSubvector(SDValue N)
This class is used to represent ISD::STORE nodes.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:273
static GCRegistry::Add< CoreCLRGC > E("coreclr","CoreCLR-compatible GC")
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:453
static SDValue performBRCONDCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
Flag
These should be considered private to the implementation of the MCInstrDesc class.
Definition: MCInstrDesc.h:121
AArch64SetCCInfo AArch64
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a vector with the specified, possibly variable...
Definition: ISDOpcodes.h:274
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1003
TargetInstrInfo - Interface to description of machine instruction set.
bool isOneConstant(SDValue V)
Returns true if V is a constant integer one.
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Get a value with high bits set.
Definition: APInt.h:518
bool isLegalICmpImmediate(int64_t) const override
Return true if the specified immediate is legal icmp immediate, that is the target has icmp instructi...
static bool isAdvSIMDModImmType9(uint64_t Imm)
void softenSetCCOperands(SelectionDAG &DAG, EVT VT, SDValue &NewLHS, SDValue &NewRHS, ISD::CondCode &CCCode, const SDLoc &DL) const
Soften the operands of a comparison.
bool isSized(SmallPtrSetImpl< Type * > *Visited=nullptr) const
Return true if it makes sense to take the size of this type.
Definition: Type.h:254
SDNode * getNode() const
get the SDNode which holds the desired result
bool isTruncateFree(Type *Ty1, Type *Ty2) const override
Return true if it's free to truncate a value of type FromTy to type ToTy.
constexpr uint64_t MinAlign(uint64_t A, uint64_t B)
MinAlign - A and B are either alignments or offsets.
Definition: MathExtras.h:589
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
static SDValue tryCombineToBSL(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
unsigned getScalarSizeInBits() const
Definition: ValueTypes.h:262
unsigned getStoreSize() const
getStoreSize - Return the number of bytes overwritten by a store of the specified value type...
Definition: ValueTypes.h:268
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
A switch()-like statement whose cases are string literals.
Definition: StringSwitch.h:43
Type * getParamType(unsigned i) const
Parameter type accessors.
Definition: DerivedTypes.h:133
unsigned getFirstUnallocated(ArrayRef< MCPhysReg > Regs) const
getFirstUnallocated - Return the index of the first unallocated register in the set, or Regs.size() if they are all allocated.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:395
bool isNormalLoad(const SDNode *N)
Returns true if the specified node is a non-extending and unindexed load.
SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition: ISDOpcodes.h:166
void addLiveIn(MCPhysReg PhysReg, LaneBitmask LaneMask=LaneBitmask::getAll())
Adds the specified register as a live in.
unsigned UnsafeFPMath
UnsafeFPMath - This flag is enabled when the -enable-unsafe-fp-math flag is specified on the command ...
bool shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const override
Returns true if the given atomic cmpxchg should be expanded by the IR-level AtomicExpand pass into a ...
FPOpFusion::FPOpFusionMode AllowFPOpFusion
AllowFPOpFusion - This flag is set by the -fuse-fp-ops=xxx option.
Control flow instructions. These all have token chains.
Definition: ISDOpcodes.h:551
MO_G1 - A symbol operand with this flag (granule 1) represents the bits 16-31 of a 64-bit address...
unsigned const MachineRegisterInfo * MRI
bool isZero() const
Return true if the value is positive or negative zero.
std::size_t countTrailingZeros(T Val, ZeroBehavior ZB=ZB_Width)
Count number of 0's from the least significant bit to the most stopping at the first 1...
Definition: MathExtras.h:111
static int getFP32Imm(const APInt &Imm)
getFP32Imm - Return an 8-bit floating-point version of the 32-bit floating-point value.
unsigned getVectorNumElements() const
bool isZExtFree(Type *Ty1, Type *Ty2) const override
Return true if any actual instruction that defines a value of type FromTy implicitly zero-extends the...
CodeModel::Model getCodeModel() const
Returns the code model.
MVT - Machine Value Type.
LLVM Basic Block Representation.
Definition: BasicBlock.h:51
const SDValue & getOperand(unsigned i) const
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:45
GenericSetCCInfo Generic
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:48
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Add the specified register class as an available regclass for the specified value type...
Simple binary floating point operators.
Definition: ISDOpcodes.h:246
void setTargetDAGCombine(ISD::NodeType NT)
Targets should invoke this method for each target independent node that they want to provide a custom...
C - The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:219
SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef< SDValue > Ops, EVT MemVT, MachinePointerInfo PtrInfo, unsigned Align=0, bool Vol=false, bool ReadMem=true, bool WriteMem=true, unsigned Size=0)
Creates a MemIntrinsicNode that may produce a result and takes a list of operands.
bool isOperationLegalOrCustom(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
MVT getLocVT() const
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
This is an important base class in LLVM.
Definition: Constant.h:42
VAEND, VASTART - VAEND and VASTART have three operands: an input chain, pointer, and a SRCVALUE...
Definition: ISDOpcodes.h:637
bool isVector() const
isVector - Return true if this is a vector value type.
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
Definition: ISDOpcodes.h:818
int64_t getSExtValue() const
Get sign extended value.
Definition: APInt.h:1321
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL...
Definition: ISDOpcodes.h:279
const Constant * getConstVal() const
This file contains the declarations for the subclasses of Constant, which represent the different fla...
bool isFloatingPoint() const
isFloatingPoint - Return true if this is a FP, or a vector FP type.
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:279
static const MVT MVT_CC
Value type used for condition codes.
Carry-using nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:228
static SDValue tryCombineLongOpWithDup(unsigned IID, SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
static SDValue performAcrossLaneMinMaxReductionCombine(SDNode *N, SelectionDAG &DAG, const AArch64Subtarget *Subtarget)
Target-specific DAG combine for the across vector min/max reductions.
bool isLittleEndian() const
void setBooleanContents(BooleanContent Ty)
Specify how the target extends the result of integer and floating point boolean values from i1 to a w...
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
std::pair< SDValue, SDValue > SplitVectorOperand(const SDNode *N, unsigned OpNo)
Split the node's operand with EXTRACT_SUBVECTOR and return the low/high part.
Libcall getFPTOSINT(EVT OpVT, EVT RetVT)
getFPTOSINT - Return the FPTOSINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none...
static bool isSetCCOrZExtSetCC(const SDValue &Op, SetCCInfoAndKind &Info)
static EVT getExtensionTo64Bits(const EVT &OrigVT)
static std::pair< SDValue, SDValue > splitInt128(SDValue N, SelectionDAG &DAG)
static bool isAdvSIMDModImmType1(uint64_t Imm)
static mvt_range fp_valuetypes()
static void ReplaceReductionResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG, unsigned InterOp, unsigned AcrossOp)
MachineBasicBlock * EmitF128CSEL(MachineInstr &MI, MachineBasicBlock *BB) const
virtual std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const
Given a physical register constraint (e.g.
bool isDesirableToCommuteWithShift(const SDNode *N) const override
Returns false if N is a bit extraction pattern of (X >> C) & Mask.
constexpr bool isPowerOf2_64(uint64_t Value)
isPowerOf2_64 - This function returns true if the argument is a power of two 0 (64 bit edition...
Definition: MathExtras.h:405
This file declares a class to represent arbitrary precision floating point values and provide a varie...
This class provides iterator support for SDUse operands that use a specific SDNode.
bool shouldConvertConstantLoadToIntImm(const APInt &Imm, Type *Ty) const override
Returns true if it is beneficial to convert a load of a constant to just the constant itself...
static SDValue emitConjunctionDisjunctionTree(SelectionDAG &DAG, SDValue Val, AArch64CC::CondCode &OutCC)
Emit conjunction or disjunction tree with the CMP/FCMP followed by a chain of CCMP/CFCMP ops...
uint32_t Offset
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang","erlang-compatible garbage collector")
EVT getVT() const
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1255
static void ReplaceCMP_SWAP_128Results(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG)
static bool mayTailCallThisCC(CallingConv::ID CC)
Return true if we might ever do TCO for calls with this calling convention.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
unsigned getOpcode() const
TRAP - Trapping instruction.
Definition: ISDOpcodes.h:676
unsigned GuaranteedTailCallOpt
GuaranteedTailCallOpt - This flag is enabled when -tailcallopt is specified on the commandline...
Value * getOperand(unsigned i) const
Definition: User.h:145
Value * getPointerOperand()
Definition: Instructions.h:270
static bool isAdvSIMDModImmType8(uint64_t Imm)
AssertSext, AssertZext - These nodes record if a register contains a value that has already been zero...
Definition: ISDOpcodes.h:57
void setPrefFunctionAlignment(unsigned Align)
Set the target's preferred function alignment.
static bool resultsCompatible(CallingConv::ID CalleeCC, CallingConv::ID CallerCC, MachineFunction &MF, LLVMContext &C, const SmallVectorImpl< ISD::InputArg > &Ins, CCAssignFn CalleeFn, CCAssignFn CallerFn)
Returns true if the results of the two calling conventions are compatible.
static mvt_range vector_valuetypes()
virtual bool useLoadStackGuardNode() const
If this function returns true, SelectionDAGBuilder emits a LOAD_STACK_GUARD node when it is lowering ...
arg_iterator arg_begin()
Definition: Function.h:550
self_iterator getIterator()
Definition: ilist_node.h:81
The memory access is non-temporal.
static MVT getVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
Definition: ValueTypes.cpp:281
int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool Immutable, bool isAliased=false)
Create a new object at a fixed location on the stack.
VAARG - VAARG has four operands: an input chain, a pointer, a SRCVALUE, and the alignment.
Definition: ISDOpcodes.h:628
static void changeFPCCToAArch64CC(ISD::CondCode CC, AArch64CC::CondCode &CondCode, AArch64CC::CondCode &CondCode2)
changeFPCCToAArch64CC - Convert a DAG fp condition code to an AArch64 CC.
bool empty() const
empty - Check if the array is empty.
Definition: ArrayRef.h:136
use_iterator use_begin() const
Provide iteration support to walk over all uses of an SDNode.
TargetLoweringBase::AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override
Returns how the IR-level AtomicExpand pass should expand the given AtomicRMW, if at all...
bool isMisaligned128StoreSlow() const
bool isVolatile() const
const SDValue & getValue() const
void AddPromotedToType(unsigned Opc, MVT OrigVT, MVT DestVT)
If Opc/OrigVT is specified as being promoted, the promotion code defaults to trying a larger integer/...
unsigned MaxStoresPerMemmove
Specify maximum bytes of store instructions per memmove call.
Helper structure to be able to read SetCC information.
Val, OUTCHAIN = ATOMIC_CMP_SWAP(INCHAIN, ptr, cmp, swap) For double-word atomic operations: ValLo...
Definition: ISDOpcodes.h:705
bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if 'Op & Mask' is known to be zero.
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:485
static SDValue addRequiredExtensionForVectorMULL(SDValue N, SelectionDAG &DAG, const EVT &OrigTy, const EVT &ExtTy, unsigned ExtOpcode)
static SDValue emitConjunctionDisjunctionTreeRec(SelectionDAG &DAG, SDValue Val, AArch64CC::CondCode &OutCC, bool Negate, SDValue CCOp, AArch64CC::CondCode Predicate)
Emit conjunction or disjunction tree with the CMP/FCMP followed by a chain of CCMP/CFCMP ops...
MO_HI12 - This flag indicates that a symbol operand represents the bits 13-24 of a 64-bit address...
EVT - Extended Value Type.
Definition: ValueTypes.h:31
bool isIntN(unsigned N, int64_t x)
isIntN - Checks if an signed integer fits into the given (dynamic) bit width.
Definition: MathExtras.h:366
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:213
std::vector< ArgListEntry > ArgListTy
static SDValue performIntToFpCombine(SDNode *N, SelectionDAG &DAG, const AArch64Subtarget *Subtarget)
unsigned getNextStackOffset() const
getNextStackOffset - Return the next stack offset such that all stack slots satisfy their alignment r...
static bool isSingletonEXTMask(ArrayRef< int > M, EVT VT, unsigned &Imm)
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
LLVMContext & getContext() const
Definition: IRBuilder.h:123
Value * CreateExtractValue(Value *Agg, ArrayRef< unsigned > Idxs, const Twine &Name="")
Definition: IRBuilder.h:1671
This structure contains all information that is necessary for lowering calls.
static PointerType * getInt8PtrTy(LLVMContext &C, unsigned AS=0)
Definition: Type.cpp:213
static SDValue GenerateTBL(SDValue Op, ArrayRef< int > ShuffleMask, SelectionDAG &DAG)
bool predictableSelectIsExpensive() const
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements)
getVectorVT - Returns the EVT that represents a vector NumElements in length, where each element is o...
Definition: ValueTypes.h:70
std::pair< SDValue, SDValue > makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT, ArrayRef< SDValue > Ops, bool isSigned, const SDLoc &dl, bool doesNotReturn=false, bool isReturnValueUsed=true) const
Returns a pair of (return value, chain).
This class contains a discriminated union of information about pointers in memory operands...
uint64_t getConstantOperandVal(unsigned Num) const
Helper method returns the integer value of a ConstantSDNode operand.
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
Definition: APInt.h:391
MO_TLS - Indicates that the operand being accessed is some kind of thread-local symbol.
unsigned char ClassifyGlobalReference(const GlobalValue *GV, const TargetMachine &TM) const
ClassifyGlobalReference - Find the target operand flags that describe how a global value should be re...
static SDValue LowerXOR(SDValue Op, SelectionDAG &DAG)
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
static uint8_t encodeAdvSIMDModImmType12(uint64_t Imm)
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1298
static SDValue performSTORECombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG, const AArch64Subtarget *Subtarget)
static SDValue NarrowVector(SDValue V128Reg, SelectionDAG &DAG)
NarrowVector - Given a value in the V128 register class, produce the equivalent value in the V64 regi...
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode...
Definition: MCInstrInfo.h:45
SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, unsigned Alignment=0, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands...
Triple - Helper class for working with autoconf configuration names.
Definition: Triple.h:44
bool needsCustom() const
static bool isReleaseOrStronger(AtomicOrdering ao)
const MachinePointerInfo & getPointerInfo() const
int64_t getObjectOffset(int ObjectIdx) const
Return the assigned stack offset of the specified object from the incoming stack pointer.
unsigned getByValAlign() const
static bool findEXTRHalf(SDValue N, SDValue &Src, uint32_t &ShiftAmount, bool &FromHi)
An EXTR instruction is made up of two shifts, ORed together.
static const int BlockSize
Definition: TarWriter.cpp:34
static SDValue performORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const AArch64Subtarget *Subtarget)
void setHasExtractBitsInsn(bool hasExtractInsn=true)
Tells the code generator that the target has BitExtract instructions.
static cl::opt< bool > EnableAArch64SlrGeneration("aarch64-shift-insert-generation", cl::Hidden, cl::desc("Allow AArch64 SLI/SRI formation"), cl::init(false))
bool bitsGT(EVT VT) const
bitsGT - Return true if this has more bits than VT.
Definition: ValueTypes.h:200
static uint8_t encodeAdvSIMDModImmType2(uint64_t Imm)
ArrayRef< int > getMask() const
unsigned countTrailingZeros() const
Count the number of trailing zero bits.
Definition: APInt.cpp:703
Libcall getFPEXT(EVT OpVT, EVT RetVT)
getFPEXT - Return the FPEXT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none...
IntegerType * getIntPtrType(LLVMContext &C, unsigned AddressSpace=0) const
Returns an integer type with size at least as big as that of a pointer in the given address space...
Definition: DataLayout.cpp:709
SDValue getTargetConstantPool(const Constant *C, EVT VT, unsigned Align=0, int Offset=0, unsigned char TargetFlags=0)
Definition: SelectionDAG.h:546
SDValue getTargetJumpTable(int JTI, EVT VT, unsigned char TargetFlags=0)
Definition: SelectionDAG.h:540
bool hasExternalWeakLinkage() const
Definition: GlobalValue.h:416
TokenFactor - This node takes multiple tokens as input and produces a single token result...
Definition: ISDOpcodes.h:50
static int getFP64Imm(const APInt &Imm)
getFP64Imm - Return an 8-bit floating-point version of the 64-bit floating-point value.
bool isBuildVectorAllOnes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are ~0 or undef...
static SDValue LowerBITCAST(SDValue Op, SelectionDAG &DAG)
bool isPosZero() const
Definition: APFloat.h:1046
Iterator for intrusive lists based on ilist_node.
CCState - This class holds information needed while lowering arguments and return values...
bool MaskAndBranchFoldingIsLegal
MaskAndBranchFoldingIsLegal - Indicates if the target supports folding a mask of a single bit...
void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
static unsigned getIntrinsicID(const SDNode *N)
void dump() const
Dump this node, for debugging.
static SDValue splitStoreSplat(SelectionDAG &DAG, StoreSDNode &St, SDValue SplatVal, unsigned NumVecElts)
auto find(R &&Range, const T &Val) -> decltype(std::begin(Range))
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:757
bool isBaseWithConstantOffset(SDValue Op) const
Return true if the specified operand is an ISD::ADD with a ConstantSDNode on the right-hand side...
EVT getOptimalMemOpType(uint64_t Size, unsigned DstAlign, unsigned SrcAlign, bool IsMemset, bool ZeroMemset, bool MemcpyStrSrc, MachineFunction &MF) const override
Returns the target specific optimal type for load and store operations as a result of memset...
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition: ISDOpcodes.h:285
uint64_t getTypeAllocSize(Type *Ty) const
Returns the offset in bytes between successive objects of the specified type, including alignment pad...
Definition: DataLayout.h:408
static StructType * get(LLVMContext &Context, ArrayRef< Type * > Elements, bool isPacked=false)
This static method is the primary way to create a literal StructType.
Definition: Type.cpp:330
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:166
MVT getScalarShiftAmountTy(const DataLayout &DL, EVT) const override
EVT is not used in-tree, but is used by out-of-tree target.
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:58
AArch64TargetLowering(const TargetMachine &TM, const AArch64Subtarget &STI)
unsigned logBase2() const
Definition: APInt.h:1507
bool isPredecessorOf(const SDNode *N) const
Return true if this node is a predecessor of N.
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:843
Module.h This file contains the declarations for the Module class.
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:230
Instruction * user_back()
Specialize the methods defined in Value, as we know that an instruction can only be used by other ins...
Definition: Instruction.h:59
static bool memOpAlign(unsigned DstAlign, unsigned SrcAlign, unsigned AlignCheck)
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
Definition: SelectionDAG.h:639
Provides information about what library functions are available for the current target.
static bool isLegalArithImmed(uint64_t C)
CCValAssign - Represent assignment of one arg/retval to a location.
const uint32_t * getThisReturnPreservedMask(const MachineFunction &MF, CallingConv::ID) const
getThisReturnPreservedMask - Returns a call preserved mask specific to the case that 'returned' is on...
static unsigned getExtFactor(SDValue &V)
getExtFactor - Determine the adjustment factor for the position when generating an "extract from vect...
SDValue getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src, SDValue Size, unsigned Align, bool isVol, bool AlwaysInline, bool isTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo)
constexpr size_t array_lengthof(T(&)[N])
Find the length of an array.
Definition: STLExtras.h:649
BRCOND - Conditional branch.
Definition: ISDOpcodes.h:566
const DataFlowGraph & G
Definition: RDFGraph.cpp:206
An SDNode that represents everything that will be needed to construct a MachineInstr.
static bool isAllConstantBuildVector(const SDValue &PotentialBVec, uint64_t &ConstVal)
const SDValue & getChain() const
Byte Swap and Counting operators.
Definition: ISDOpcodes.h:347
ConstantInt * getInt32(uint32_t C)
Get a constant 32-bit value.
Definition: IRBuilder.h:307
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
BasicBlock * GetInsertBlock() const
Definition: IRBuilder.h:121
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
Definition: IRBuilder.h:1656
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
static SDValue tryCombineShiftImm(unsigned IID, SDNode *N, SelectionDAG &DAG)
void dump(const TargetInstrInfo *TII=nullptr) const
static Constant * get(Type *Ty, uint64_t V, bool isSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:558
Helper structure to keep track of ISD::SET_CC operands.
bool is64BitVector() const
is64BitVector - Return true if this is a 64-bit vector type.
Represents one node in the SelectionDAG.
bool isProfitableToHoist(Instruction *I) const override
Check if it is profitable to hoist instruction in then/else to if.
CondCode getSetCCInverse(CondCode Operation, bool isInteger)
Return the operation corresponding to !(X op Y), where 'op' is a valid SetCC operation.
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
Definition: Instructions.h:275
static bool isSignExtended(SDNode *N, SelectionDAG &DAG)
SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, unsigned Reg, SDValue N)
Definition: SelectionDAG.h:584
static bool Enabled
Definition: Statistic.cpp:49
static GCRegistry::Add< ShadowStackGC > C("shadow-stack","Very portable GC for uncooperative code generators")
bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI, unsigned Factor) const override
Lower an interleaved store into a stN intrinsic.
static void changeFPCCToANDAArch64CC(ISD::CondCode CC, AArch64CC::CondCode &CondCode, AArch64CC::CondCode &CondCode2)
Convert a DAG fp condition code to an AArch64 CC.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
static mvt_range integer_valuetypes()
static SDValue performFDivCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const AArch64Subtarget *Subtarget)
Fold a floating-point divide by power of two into fixed-point to floating-point conversion.
MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *MBB) const override
This method should be implemented by targets that mark instructions with the 'usesCustomInserter' fla...
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:586
AttributeSet getAttributes() const
Return the attribute list for this Function.
Definition: Function.h:176
static bool isZIPMask(ArrayRef< int > M, EVT VT, unsigned &WhichResult)
SDValue getCALLSEQ_START(SDValue Chain, SDValue Op, const SDLoc &DL)
Return a new CALLSEQ_START node, which always must have a glue result (to ensure it's not CSE'd)...
Definition: SelectionDAG.h:715
bool is128BitVector() const
is128BitVector - Return true if this is a 128-bit vector type.
CCAssignFn * CCAssignFnForCall(CallingConv::ID CC, bool IsVarArg) const
Selects the correct CCAssignFn for a given CallingConvention value.
Value * getArgOperand(unsigned i) const
getArgOperand/setArgOperand - Return/set the i-th call argument.
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
Class to represent vector types.
Definition: DerivedTypes.h:369
void insertCopiesSplitCSR(MachineBasicBlock *Entry, const SmallVectorImpl< MachineBasicBlock * > &Exits) const override
Insert explicit copies in entry and exit blocks.
bool shouldExpandAtomicStoreInIR(StoreInst *SI) const override
Returns true if the given (atomic) store should be expanded by the IR-level AtomicExpand pass into an...
const char * getTargetNodeName(unsigned Opcode) const override
This method returns the name of a target specific DAG node.
void setIndexedLoadAction(unsigned IdxMode, MVT VT, LegalizeAction Action)
Indicate that the specified indexed load does or does not work with the specified type and indicate w...
static unsigned getNZCVToSatisfyCondCode(CondCode Code)
Given a condition code, return NZCV flags that would satisfy that condition.
static bool performTBISimplification(SDValue Addr, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
Simplify given that the top byte of it is ignored by HW during address translation.
Class for arbitrary precision integers.
Definition: APInt.h:77
SDValue getConstantFP(double Val, const SDLoc &DL, EVT VT, bool isTarget=false)
Create a ConstantFPSDNode wrapping a constant value.
Value * getIRStackGuard(IRBuilder<> &IRB) const override
If the target has a standard location for the stack protector cookie, returns the address of that loc...
iterator_range< use_iterator > uses()
A "pseudo-class" with methods for operating on BUILD_VECTORs.
Select(COND, TRUEVAL, FALSEVAL).
Definition: ISDOpcodes.h:354
void setMinFunctionAlignment(unsigned Align)
Set the target's minimum function alignment (in log2(bytes))
int64_t getSExtValue() const
static void changeVectorFPCCToAArch64CC(ISD::CondCode CC, AArch64CC::CondCode &CondCode, AArch64CC::CondCode &CondCode2, bool &Invert)
changeVectorFPCCToAArch64CC - Convert a DAG fp condition code to an AArch64 CC usable with the vector...
op_iterator op_begin() const
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:195
unsigned EmulatedTLS
EmulatedTLS - This flag enables emulated TLS model, using emutls function in the runtime library...
static uint8_t encodeAdvSIMDModImmType10(uint64_t Imm)
static use_iterator use_end()
bool isNullFPConstant(SDValue V)
Returns true if V is an FP constant with a value of positive zero.
void setPrefLoopAlignment(unsigned Align)
Set the target's preferred loop alignment.
void setMaximumJumpTableSize(unsigned)
Indicate the maximum number of entries in jump tables.
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:400
bool isMemLoc() const
unsigned getVectorNumElements() const
Definition: DerivedTypes.h:438
void AnalyzeCallOperands(const SmallVectorImpl< ISD::OutputArg > &Outs, CCAssignFn Fn)
AnalyzeCallOperands - Analyze the outgoing arguments to a call, incorporating info about the passed v...
Value * CreateShl(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:932
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:403
static SDValue performNVCASTCombine(SDNode *N)
Get rid of unnecessary NVCASTs (that don't change the type).
static SDValue performMulCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const AArch64Subtarget *Subtarget)
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
Definition: MachineInstr.h:250
iterator insert(iterator I, T &&Elt)
Definition: SmallVector.h:464
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
Definition: ISDOpcodes.h:259
FMINNAN/FMAXNAN - Behave identically to FMINNUM/FMAXNUM, except that when a single input is NaN...
Definition: ISDOpcodes.h:527
Value * CreatePointerCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1402
virtual TargetLoweringBase::LegalizeTypeAction getPreferredVectorAction(EVT VT) const
Return the preferred vector type legalization action.
bool isTargetDarwin() const
static SDValue skipExtensionForVectorMULL(SDNode *N, SelectionDAG &DAG)
iterator_range< value_op_iterator > op_values() const
APInt And(const APInt &LHS, const APInt &RHS)
Bitwise AND function for APInt.
Definition: APInt.h:1942
static SDValue performPostLD1Combine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, bool IsLaneOp)
Target-specific DAG combine function for post-increment LD1 (lane) and post-increment LD1R...
static bool isAcquireOrStronger(AtomicOrdering ao)
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
static uint8_t encodeAdvSIMDModImmType5(uint64_t Imm)
static Constant * getSequentialMask(IRBuilder<> &Builder, unsigned Start, unsigned NumElts)
Get a mask consisting of sequential integers starting from Start.
static bool isEquivalentMaskless(unsigned CC, unsigned width, ISD::LoadExtType ExtType, int AddConstant, int CompConstant)
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
Value * CreateTruncOrBitCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1386
BR_JT - Jumptable branch.
Definition: ISDOpcodes.h:560
static mvt_range all_valuetypes()
SimpleValueType Iteration.
Representation of each machine instruction.
Definition: MachineInstr.h:52
VACOPY - VACOPY has 5 operands: an input chain, a destination pointer, a source pointer, a SRCVALUE for the destination, and a SRCVALUE for the source.
Definition: ISDOpcodes.h:633
static bool isAdvSIMDModImmType10(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType8(uint64_t Imm)
These are IR-level optimization flags that may be propagated to SDNodes.
Libcall getFPTOUINT(EVT OpVT, EVT RetVT)
getFPTOUINT - Return the FPTOUINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none...
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
Definition: Module.cpp:384
SDValue getTargetBlockAddress(const BlockAddress *BA, EVT VT, int64_t Offset=0, unsigned char TargetFlags=0)
Definition: SelectionDAG.h:578
LLVM_ATTRIBUTE_ALWAYS_INLINE iterator end()
Definition: SmallVector.h:119
uint64_t getConstantOperandVal(unsigned i) const
bool isOSBinFormatELF() const
Tests whether the OS uses the ELF binary format.
Definition: Triple.h:565
static SDValue LowerXALUO(SDValue Op, SelectionDAG &DAG)
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:333
pointer data()
Return a pointer to the vector's buffer, even if empty().
Definition: SmallVector.h:142
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.h:226
MachineBasicBlock * emitPatchPoint(MachineInstr &MI, MachineBasicBlock *MBB) const
Replace/modify any TargetFrameIndex operands with a targte-dependent sequence of memory operands that...
bool isUndef() const
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, unsigned Reg, EVT VT)
Definition: SelectionDAG.h:610
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const MCPhysReg * getCalleeSavedRegsViaCopy(const MachineFunction *MF) const
virtual void LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const
Lower the specified operand into the Ops vector.
FastISel * createFastISel(FunctionLoweringInfo &funcInfo, const TargetLibraryInfo *libInfo) const override
This method returns a target specific FastISel object, or null if the target does not support "fast" ...
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition: ISDOpcodes.h:205
static uint8_t encodeAdvSIMDModImmType4(uint64_t Imm)
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition: ISDOpcodes.h:418
static uint8_t encodeAdvSIMDModImmType11(uint64_t Imm)
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:536
unsigned getSizeInBits() const
getSizeInBits - Return the size of the specified value type in bits.
Definition: ValueTypes.h:256
void ReplaceAllUsesWith(SDValue From, SDValue Op)
Modify anything using 'From' to use 'To' instead.
#define I(x, y, z)
Definition: MD5.cpp:54
static bool isAddSubSExt(SDNode *N, SelectionDAG &DAG)
#define N
ArrayRef< SDUse > ops() const
void AnalyzeReturn(const SmallVectorImpl< ISD::OutputArg > &Outs, CCAssignFn Fn)
AnalyzeReturn - Analyze the returned values of a return, incorporating info about the result values i...
LLVM_ATTRIBUTE_ALWAYS_INLINE size_type size() const
Definition: SmallVector.h:135
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Definition: Function.cpp:230
FunctionLoweringInfo - This contains information that is global to a function that is used when lower...
APFloat abs(APFloat X)
Returns the absolute value of the argument.
Definition: APFloat.h:1099
MO_PAGEOFF - A symbol operand with this flag represents the offset of that symbol within a 4K page...
The memory access always returns the same value (or traps).
CallInst * CreateCall(Value *Callee, ArrayRef< Value * > Args=None, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:1579
unsigned MaxStoresPerMemmoveOptSize
Maximum number of store instructions that may be substituted for a call to memmove, used for functions with OptSize attribute.
unsigned MaxStoresPerMemcpyOptSize
Maximum number of store operations that may be substituted for a call to memcpy, used for functions w...
Value * emitLoadLinked(IRBuilder<> &Builder, Value *Addr, AtomicOrdering Ord) const override
Perform a load-linked operation on Addr, returning a "Value *" with the corresponding pointee type...
void setStackPointerRegisterToSaveRestore(unsigned R)
If set to a physical register, this specifies the register that llvm.savestack/llvm.restorestack should save and restore.
static SDValue getTestBitOperand(SDValue Op, unsigned &Bit, bool &Invert, SelectionDAG &DAG)
bool isTailCall() const
static bool isVShiftRImm(SDValue Op, EVT VT, bool isNarrow, int64_t &Cnt)
isVShiftRImm - Check if this is a valid build_vector for the immediate operand of a vector shift righ...
bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, unsigned Intrinsic) const override
getTgtMemIntrinsic - Represent NEON load and store intrinsics as MemIntrinsicNodes.
op_iterator op_end() const
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:287
static SDValue performIntrinsicCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const AArch64Subtarget *Subtarget)
VectorType * getType() const
Overload to return most specific vector type.
Same for multiplication.
Definition: ISDOpcodes.h:243
static SDValue replaceSplatVectorStore(SelectionDAG &DAG, StoreSDNode &St)
Replace a splat of a scalar to a vector store by scalar stores of the scalar value.
static volatile int Zero
bool supportsAddressTopByteIgnored() const
CPU has TBI (top byte of addresses is ignored during HW address translation) and OS enables it...
FSINCOS - Compute both fsin and fcos as a single operation.
Definition: ISDOpcodes.h:530
bool useLoadStackGuardNode() const override
If this function returns true, SelectionDAGBuilder emits a LOAD_STACK_GUARD node when it is lowering ...
unsigned MaxStoresPerMemcpy
Specify maximum bytes of store instructions per memcpy call.
AArch64CC::CondCode CC
int32_t getConstantFPSplatPow2ToLog2Int(BitVector *UndefElements, uint32_t BitWidth) const
If this is a constant FP splat and the splatted constant FP is an exact power or 2, return the log base 2 integer value.
int CreateStackObject(uint64_t Size, unsigned Alignment, bool isSS, const AllocaInst *Alloca=nullptr)
Create a new statically sized stack object, returning a nonnegative identifier to represent it...
EVT getValueType() const
Return the ValueType of the referenced return value.
Value * CreatePtrToInt(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1354
SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
void setBytesInStackArgArea(unsigned bytes)
Libcall getSINTTOFP(EVT OpVT, EVT RetVT)
getSINTTOFP - Return the SINTTOFP_*_* value for the given types, or UNKNOWN_LIBCALL if there is none...
void setUnsafeAlgebra(bool b)
bool is128BitVector() const
is128BitVector - Return true if this is a 128-bit vector type.
Definition: ValueTypes.h:153
TLSModel::Model getTLSModel(const GlobalValue *GV) const
Returns the TLS model which should be used for the given global variable.
static SDValue replaceZeroVectorStore(SelectionDAG &DAG, StoreSDNode &St)
Replace a splat of zeros to a vector store by scalar stores of WZR/XZR.
bool isLegalAddImmediate(int64_t) const override
Return true if the specified immediate is legal add immediate, that is the target has add instruction...
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition: ISDOpcodes.h:291
unsigned getReg() const
getReg - Returns the register number.
bool isFloatingPoint() const
isFloatingPoint - Return true if this is a FP, or a vector FP type.
Definition: ValueTypes.h:118
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static SDValue tryMatchAcrossLaneShuffleForReduction(SDNode *N, SDValue OpV, unsigned Op, SelectionDAG &DAG)
This function handles the log2-shuffle pattern produced by the.
void insert(iterator MBBI, MachineBasicBlock *MBB)
bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
SDValue getFrameIndex(int FI, EVT VT, bool isTarget=false)
LegalizeTypeAction
This enum indicates whether a types are legal for a target, and if not, what action should be used to...
static AArch64CC::CondCode changeIntCCToAArch64CC(ISD::CondCode CC)
changeIntCCToAArch64CC - Convert a DAG integer condition code to an AArch64 CC
static bool isSetCC(SDValue Op, SetCCInfoAndKind &SetCCInfo)
Check whether or not Op is a SET_CC operation, either a generic or an AArch64 lowered one...
void setReturnAddressIsTaken(bool s)
bool isSimple() const
isSimple - Test if the given EVT is simple (as opposed to being extended).
Definition: ValueTypes.h:107
unsigned getAlignment() const
MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
void setSchedulingPreference(Sched::Preference Pref)
Specify the target scheduling preference.
static MachinePointerInfo getGOT(MachineFunction &MF)
Return a MachinePointerInfo record that refers to a GOT entry.
unsigned getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition: Type.cpp:108
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:537
LLVM Value Representation.
Definition: Value.h:71
FMA - Perform a * b + c with no intermediate rounding step.
Definition: ISDOpcodes.h:249
SDValue getRegister(unsigned Reg, EVT VT)
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:111
unsigned getMaximumJumpTableSize() const
Return upper limit for number of entries in a jump table.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned char TargetFlags=0) const
const AArch64InstrInfo * getInstrInfo() const override
static void ReplaceBITCASTResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG)
static VectorType * get(Type *ElementType, unsigned NumElements)
This static method is the primary way to construct an VectorType.
Definition: Type.cpp:631
bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
#define LLVM_FALLTHROUGH
LLVM_FALLTHROUGH - Mark fallthrough cases in switch statements.
Definition: Compiler.h:239
SDValue getValueType(EVT)
Value * emitStoreConditional(IRBuilder<> &Builder, Value *Val, Value *Addr, AtomicOrdering Ord) const override
Perform a store-conditional operation to Addr.
std::underlying_type< E >::type Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:81
static bool getVShiftImm(SDValue Op, unsigned ElementBits, int64_t &Cnt)
getVShiftImm - Check if this is a valid build_vector for the immediate operand of a vector shift oper...
PREFETCH - This corresponds to a prefetch intrinsic.
Definition: ISDOpcodes.h:685
static bool isExtendedBUILD_VECTOR(SDNode *N, SelectionDAG &DAG, bool isSigned)
unsigned getPrefFunctionAlignment() const
bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override
Return true if folding a constant offset with the given GlobalAddress is legal.
Value * CreateLShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Definition: IRBuilder.h:951
uint64_t getTypeSizeInBits(Type *Ty) const
Size examples:
Definition: DataLayout.h:533
unsigned getMaxSupportedInterleaveFactor() const override
Get the maximum supported factor for interleaved memory accesses.
const TargetLowering & getTargetLoweringInfo() const
Definition: SelectionDAG.h:331
void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.Val alone...
#define DEBUG(X)
Definition: Debug.h:100
Primary interface to the complete machine description for the target machine.
PointerType * getPointerTo(unsigned AddrSpace=0) const
Return a pointer to the current type.
Definition: Type.cpp:678
IRTranslator LLVM IR MI
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:47
MO_NC - Indicates whether the linker is expected to check the symbol reference for overflow...
SetCC operator - This evaluates to a true value iff the condition is true.
Definition: ISDOpcodes.h:377
virtual SDValue LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA, SelectionDAG &DAG) const
Lower TLS global address SDNode for target independent emulated TLS model.
unsigned getPrefLoopAlignment() const
unsigned MaxStoresPerMemset
Specify maximum number of store instructions per memset call.
static uint8_t encodeAdvSIMDModImmType9(uint64_t Imm)
unsigned MaxStoresPerMemsetOptSize
Maximum number of stores operations that may be substituted for the call to memset, used for functions with OptSize attribute.
static bool isConcatMask(ArrayRef< int > Mask, EVT VT, bool SplitLHS)
static bool isAdvSIMDModImmType11(uint64_t Imm)
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml","ocaml 3.10-compatible collector")
bool hasPairedLoad(EVT LoadedType, unsigned &RequiredAligment) const override
Return true if the target supplies and combines to a paired load two loaded values of type LoadedType...
bool operator==(uint64_t V1, const APInt &V2)
Definition: APInt.h:1722
MERGE_VALUES - This node takes multiple discrete operands and returns them all as its individual resu...
Definition: ISDOpcodes.h:197
unsigned getLocMemOffset() const
MVT getVectorElementType() const
static bool isVolatile(Instruction *Inst)
Conversion operators.
Definition: ISDOpcodes.h:397
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
Provide custom lowering hooks for some operations.
int * Ptr
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
Definition: SelectionDAG.h:381
unsigned getNumUses() const
This method computes the number of uses of this Value.
Definition: Value.cpp:137
SDValue getTargetInsertSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand, SDValue Subreg)
A convenience function for creating TargetInstrInfo::INSERT_SUBREG nodes.
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:406
static void Split(std::vector< std::string > &V, StringRef S)
Split - Splits a string of comma separated items in to a vector of strings.
auto find_if(R &&Range, UnaryPredicate P) -> decltype(std::begin(Range))
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:764
static SDValue emitConditionalComparison(SDValue LHS, SDValue RHS, ISD::CondCode CC, SDValue CCOp, AArch64CC::CondCode Predicate, AArch64CC::CondCode OutCC, const SDLoc &DL, SelectionDAG &DAG)
Create a conditional comparison; Use CCMP, CCMN or FCCMP as appropriate.
bool isUIntN(unsigned N, uint64_t x)
isUIntN - Checks if an unsigned integer fits into the given (dynamic) bit width.
Definition: MathExtras.h:360
unsigned getAlignment() const
static void getShuffleMask(Constant *Mask, SmallVectorImpl< int > &Result)
Convert the input shuffle mask operand to a vector of integers.
virtual ConstraintWeight getSingleConstraintMatchWeight(AsmOperandInfo &info, const char *constraint) const
Examine constraint string and operand type and determine a weight value.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation...
const MachineInstrBuilder & addReg(unsigned RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
MVT getSimpleValueType(unsigned ResNo) const
Return the type of a specified result as a simple type.
bool isShuffleMaskLegal(const SmallVectorImpl< int > &M, EVT VT) const override
Return true if the given shuffle mask can be codegen'd directly, or if it should be stack expanded...
bool is64BitVector() const
is64BitVector - Return true if this is a 64-bit vector type.
Definition: ValueTypes.h:148
FNEG, FABS, FSQRT, FSIN, FCOS, FPOWI, FPOW, FLOG, FLOG2, FLOG10, FEXP, FEXP2, FCEIL, FTRUNC, FRINT, FNEARBYINT, FROUND, FFLOOR - Perform various unary floating point operations.
Definition: ISDOpcodes.h:516
static uint8_t encodeAdvSIMDModImmType7(uint64_t Imm)
Value * getPointerOperand()
Definition: Instructions.h:394
static SDValue getEstimate(const AArch64Subtarget *ST, unsigned Opcode, SDValue Operand, SelectionDAG &DAG, int &ExtraSteps)
Fast - This calling convention attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:42
MachineInstr::mmo_iterator allocateMemRefsArray(unsigned long Num)
allocateMemRefsArray - Allocate an array to hold MachineMemOperand pointers.
static SDValue performSRLCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
unsigned Log2_64(uint64_t Value)
Log2_64 - This function returns the floor log base 2 of the specified value, -1 if the value is zero...
Definition: MathExtras.h:519
int64_t getObjectSize(int ObjectIdx) const
Return the size of the specified object.
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition: ISDOpcodes.h:321
static bool isSplatMask(const int *Mask, EVT VT)
static bool resolveBuildVector(BuildVectorSDNode *BVN, APInt &CnstBits, APInt &UndefBits)
EVT changeVectorElementTypeToInteger() const
changeVectorElementTypeToInteger - Return a vector with the same number of elements as this vector...
Definition: ValueTypes.h:80
bool PredictableSelectIsExpensive
Tells the code generator that select is more expensive than a branch if the branch is usually predict...
MVT getSimpleVT() const
getSimpleVT - Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:226
static bool canGuaranteeTCO(CallingConv::ID CC)
Return true if the calling convention is one that we can guarantee TCO for.
bool isMachineOpcode() const
Test if this node has a post-isel opcode, directly corresponding to a MachineInstr opcode...
LLVMContext & getContext() const
Get the global data context.
Definition: Module.h:222
void CommitTargetLoweringOpt(const TargetLoweringOpt &TLO)
unsigned getMachineOpcode() const
This may only be called if isMachineOpcode returns true.
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned char TargetFlags=0)
Definition: SelectionDAG.h:529
bool isFMAFasterThanFMulAndFAdd(EVT VT) const override
Return true if an FMA operation is faster than a pair of fmul and fadd instructions.
static SDValue tryLowerToSLI(SDNode *N, SelectionDAG &DAG)
void setIndexedStoreAction(unsigned IdxMode, MVT VT, LegalizeAction Action)
Indicate that the specified indexed store does or does not work with the specified type and indicate ...
gep_type_iterator gep_type_begin(const User *GEP)
uint64_t getZExtValue() const
MemIndexedMode
MemIndexedMode enum - This enum defines the load / store indexed addressing modes.
Definition: ISDOpcodes.h:799
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition: ISDOpcodes.h:326
unsigned getVectorNumElements() const
getVectorNumElements - Given a vector type, return the number of elements it contains.
Definition: ValueTypes.h:248
This class is used to represent ISD::LOAD nodes.
DYNAMIC_STACKALLOC - Allocate some number of bytes on the stack aligned to a specified boundary...
Definition: ISDOpcodes.h:545
static SDValue getAArch64Cmp(SDValue LHS, SDValue RHS, ISD::CondCode CC, SDValue &AArch64cc, SelectionDAG &DAG, const SDLoc &dl)