LLVM  mainline
ARMISelLowering.cpp
Go to the documentation of this file.
00001 //===-- ARMISelLowering.cpp - ARM DAG Lowering Implementation -------------===//
00002 //
00003 //                     The LLVM Compiler Infrastructure
00004 //
00005 // This file is distributed under the University of Illinois Open Source
00006 // License. See LICENSE.TXT for details.
00007 //
00008 //===----------------------------------------------------------------------===//
00009 //
00010 // This file defines the interfaces that ARM uses to lower LLVM code into a
00011 // selection DAG.
00012 //
00013 //===----------------------------------------------------------------------===//
00014 
00015 #include "ARMISelLowering.h"
00016 #include "ARMCallingConv.h"
00017 #include "ARMConstantPoolValue.h"
00018 #include "ARMMachineFunctionInfo.h"
00019 #include "ARMPerfectShuffle.h"
00020 #include "ARMSubtarget.h"
00021 #include "ARMTargetMachine.h"
00022 #include "ARMTargetObjectFile.h"
00023 #include "MCTargetDesc/ARMAddressingModes.h"
00024 #include "llvm/ADT/Statistic.h"
00025 #include "llvm/ADT/StringExtras.h"
00026 #include "llvm/ADT/StringSwitch.h"
00027 #include "llvm/CodeGen/CallingConvLower.h"
00028 #include "llvm/CodeGen/IntrinsicLowering.h"
00029 #include "llvm/CodeGen/MachineBasicBlock.h"
00030 #include "llvm/CodeGen/MachineFrameInfo.h"
00031 #include "llvm/CodeGen/MachineFunction.h"
00032 #include "llvm/CodeGen/MachineInstrBuilder.h"
00033 #include "llvm/CodeGen/MachineJumpTableInfo.h"
00034 #include "llvm/CodeGen/MachineModuleInfo.h"
00035 #include "llvm/CodeGen/MachineRegisterInfo.h"
00036 #include "llvm/CodeGen/SelectionDAG.h"
00037 #include "llvm/IR/CallingConv.h"
00038 #include "llvm/IR/Constants.h"
00039 #include "llvm/IR/Function.h"
00040 #include "llvm/IR/GlobalValue.h"
00041 #include "llvm/IR/IRBuilder.h"
00042 #include "llvm/IR/Instruction.h"
00043 #include "llvm/IR/Instructions.h"
00044 #include "llvm/IR/IntrinsicInst.h"
00045 #include "llvm/IR/Intrinsics.h"
00046 #include "llvm/IR/Type.h"
00047 #include "llvm/MC/MCSectionMachO.h"
00048 #include "llvm/Support/CommandLine.h"
00049 #include "llvm/Support/Debug.h"
00050 #include "llvm/Support/ErrorHandling.h"
00051 #include "llvm/Support/MathExtras.h"
00052 #include "llvm/Support/raw_ostream.h"
00053 #include "llvm/Target/TargetOptions.h"
00054 #include <utility>
00055 using namespace llvm;
00056 
00057 #define DEBUG_TYPE "arm-isel"
00058 
00059 STATISTIC(NumTailCalls, "Number of tail calls");
00060 STATISTIC(NumMovwMovt, "Number of GAs materialized with movw + movt");
00061 STATISTIC(NumLoopByVals, "Number of loops generated for byval arguments");
00062 
00063 static cl::opt<bool>
00064 ARMInterworking("arm-interworking", cl::Hidden,
00065   cl::desc("Enable / disable ARM interworking (for debugging only)"),
00066   cl::init(true));
00067 
00068 namespace {
00069   class ARMCCState : public CCState {
00070   public:
00071     ARMCCState(CallingConv::ID CC, bool isVarArg, MachineFunction &MF,
00072                SmallVectorImpl<CCValAssign> &locs, LLVMContext &C,
00073                ParmContext PC)
00074         : CCState(CC, isVarArg, MF, locs, C) {
00075       assert(((PC == Call) || (PC == Prologue)) &&
00076              "ARMCCState users must specify whether their context is call"
00077              "or prologue generation.");
00078       CallOrPrologue = PC;
00079     }
00080   };
00081 }
00082 
00083 // The APCS parameter registers.
00084 static const MCPhysReg GPRArgRegs[] = {
00085   ARM::R0, ARM::R1, ARM::R2, ARM::R3
00086 };
00087 
00088 void ARMTargetLowering::addTypeForNEON(MVT VT, MVT PromotedLdStVT,
00089                                        MVT PromotedBitwiseVT) {
00090   if (VT != PromotedLdStVT) {
00091     setOperationAction(ISD::LOAD, VT, Promote);
00092     AddPromotedToType (ISD::LOAD, VT, PromotedLdStVT);
00093 
00094     setOperationAction(ISD::STORE, VT, Promote);
00095     AddPromotedToType (ISD::STORE, VT, PromotedLdStVT);
00096   }
00097 
00098   MVT ElemTy = VT.getVectorElementType();
00099   if (ElemTy != MVT::i64 && ElemTy != MVT::f64)
00100     setOperationAction(ISD::SETCC, VT, Custom);
00101   setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
00102   setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
00103   if (ElemTy == MVT::i32) {
00104     setOperationAction(ISD::SINT_TO_FP, VT, Custom);
00105     setOperationAction(ISD::UINT_TO_FP, VT, Custom);
00106     setOperationAction(ISD::FP_TO_SINT, VT, Custom);
00107     setOperationAction(ISD::FP_TO_UINT, VT, Custom);
00108   } else {
00109     setOperationAction(ISD::SINT_TO_FP, VT, Expand);
00110     setOperationAction(ISD::UINT_TO_FP, VT, Expand);
00111     setOperationAction(ISD::FP_TO_SINT, VT, Expand);
00112     setOperationAction(ISD::FP_TO_UINT, VT, Expand);
00113   }
00114   setOperationAction(ISD::BUILD_VECTOR,      VT, Custom);
00115   setOperationAction(ISD::VECTOR_SHUFFLE,    VT, Custom);
00116   setOperationAction(ISD::CONCAT_VECTORS,    VT, Legal);
00117   setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Legal);
00118   setOperationAction(ISD::SELECT,            VT, Expand);
00119   setOperationAction(ISD::SELECT_CC,         VT, Expand);
00120   setOperationAction(ISD::VSELECT,           VT, Expand);
00121   setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand);
00122   if (VT.isInteger()) {
00123     setOperationAction(ISD::SHL, VT, Custom);
00124     setOperationAction(ISD::SRA, VT, Custom);
00125     setOperationAction(ISD::SRL, VT, Custom);
00126   }
00127 
00128   // Promote all bit-wise operations.
00129   if (VT.isInteger() && VT != PromotedBitwiseVT) {
00130     setOperationAction(ISD::AND, VT, Promote);
00131     AddPromotedToType (ISD::AND, VT, PromotedBitwiseVT);
00132     setOperationAction(ISD::OR,  VT, Promote);
00133     AddPromotedToType (ISD::OR,  VT, PromotedBitwiseVT);
00134     setOperationAction(ISD::XOR, VT, Promote);
00135     AddPromotedToType (ISD::XOR, VT, PromotedBitwiseVT);
00136   }
00137 
00138   // Neon does not support vector divide/remainder operations.
00139   setOperationAction(ISD::SDIV, VT, Expand);
00140   setOperationAction(ISD::UDIV, VT, Expand);
00141   setOperationAction(ISD::FDIV, VT, Expand);
00142   setOperationAction(ISD::SREM, VT, Expand);
00143   setOperationAction(ISD::UREM, VT, Expand);
00144   setOperationAction(ISD::FREM, VT, Expand);
00145 
00146   if (!VT.isFloatingPoint() &&
00147       VT != MVT::v2i64 && VT != MVT::v1i64)
00148     for (unsigned Opcode : {ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX})
00149       setOperationAction(Opcode, VT, Legal);
00150 }
00151 
00152 void ARMTargetLowering::addDRTypeForNEON(MVT VT) {
00153   addRegisterClass(VT, &ARM::DPRRegClass);
00154   addTypeForNEON(VT, MVT::f64, MVT::v2i32);
00155 }
00156 
00157 void ARMTargetLowering::addQRTypeForNEON(MVT VT) {
00158   addRegisterClass(VT, &ARM::DPairRegClass);
00159   addTypeForNEON(VT, MVT::v2f64, MVT::v4i32);
00160 }
00161 
00162 ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
00163                                      const ARMSubtarget &STI)
00164     : TargetLowering(TM), Subtarget(&STI) {
00165   RegInfo = Subtarget->getRegisterInfo();
00166   Itins = Subtarget->getInstrItineraryData();
00167 
00168   setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
00169 
00170   if (Subtarget->isTargetMachO()) {
00171     // Uses VFP for Thumb libfuncs if available.
00172     if (Subtarget->isThumb() && Subtarget->hasVFP2() &&
00173         Subtarget->hasARMOps() && !Subtarget->useSoftFloat()) {
00174       static const struct {
00175         const RTLIB::Libcall Op;
00176         const char * const Name;
00177         const ISD::CondCode Cond;
00178       } LibraryCalls[] = {
00179         // Single-precision floating-point arithmetic.
00180         { RTLIB::ADD_F32, "__addsf3vfp", ISD::SETCC_INVALID },
00181         { RTLIB::SUB_F32, "__subsf3vfp", ISD::SETCC_INVALID },
00182         { RTLIB::MUL_F32, "__mulsf3vfp", ISD::SETCC_INVALID },
00183         { RTLIB::DIV_F32, "__divsf3vfp", ISD::SETCC_INVALID },
00184 
00185         // Double-precision floating-point arithmetic.
00186         { RTLIB::ADD_F64, "__adddf3vfp", ISD::SETCC_INVALID },
00187         { RTLIB::SUB_F64, "__subdf3vfp", ISD::SETCC_INVALID },
00188         { RTLIB::MUL_F64, "__muldf3vfp", ISD::SETCC_INVALID },
00189         { RTLIB::DIV_F64, "__divdf3vfp", ISD::SETCC_INVALID },
00190 
00191         // Single-precision comparisons.
00192         { RTLIB::OEQ_F32, "__eqsf2vfp",    ISD::SETNE },
00193         { RTLIB::UNE_F32, "__nesf2vfp",    ISD::SETNE },
00194         { RTLIB::OLT_F32, "__ltsf2vfp",    ISD::SETNE },
00195         { RTLIB::OLE_F32, "__lesf2vfp",    ISD::SETNE },
00196         { RTLIB::OGE_F32, "__gesf2vfp",    ISD::SETNE },
00197         { RTLIB::OGT_F32, "__gtsf2vfp",    ISD::SETNE },
00198         { RTLIB::UO_F32,  "__unordsf2vfp", ISD::SETNE },
00199         { RTLIB::O_F32,   "__unordsf2vfp", ISD::SETEQ },
00200 
00201         // Double-precision comparisons.
00202         { RTLIB::OEQ_F64, "__eqdf2vfp",    ISD::SETNE },
00203         { RTLIB::UNE_F64, "__nedf2vfp",    ISD::SETNE },
00204         { RTLIB::OLT_F64, "__ltdf2vfp",    ISD::SETNE },
00205         { RTLIB::OLE_F64, "__ledf2vfp",    ISD::SETNE },
00206         { RTLIB::OGE_F64, "__gedf2vfp",    ISD::SETNE },
00207         { RTLIB::OGT_F64, "__gtdf2vfp",    ISD::SETNE },
00208         { RTLIB::UO_F64,  "__unorddf2vfp", ISD::SETNE },
00209         { RTLIB::O_F64,   "__unorddf2vfp", ISD::SETEQ },
00210 
00211         // Floating-point to integer conversions.
00212         // i64 conversions are done via library routines even when generating VFP
00213         // instructions, so use the same ones.
00214         { RTLIB::FPTOSINT_F64_I32, "__fixdfsivfp",    ISD::SETCC_INVALID },
00215         { RTLIB::FPTOUINT_F64_I32, "__fixunsdfsivfp", ISD::SETCC_INVALID },
00216         { RTLIB::FPTOSINT_F32_I32, "__fixsfsivfp",    ISD::SETCC_INVALID },
00217         { RTLIB::FPTOUINT_F32_I32, "__fixunssfsivfp", ISD::SETCC_INVALID },
00218 
00219         // Conversions between floating types.
00220         { RTLIB::FPROUND_F64_F32, "__truncdfsf2vfp",  ISD::SETCC_INVALID },
00221         { RTLIB::FPEXT_F32_F64,   "__extendsfdf2vfp", ISD::SETCC_INVALID },
00222 
00223         // Integer to floating-point conversions.
00224         // i64 conversions are done via library routines even when generating VFP
00225         // instructions, so use the same ones.
00226         // FIXME: There appears to be some naming inconsistency in ARM libgcc:
00227         // e.g., __floatunsidf vs. __floatunssidfvfp.
00228         { RTLIB::SINTTOFP_I32_F64, "__floatsidfvfp",    ISD::SETCC_INVALID },
00229         { RTLIB::UINTTOFP_I32_F64, "__floatunssidfvfp", ISD::SETCC_INVALID },
00230         { RTLIB::SINTTOFP_I32_F32, "__floatsisfvfp",    ISD::SETCC_INVALID },
00231         { RTLIB::UINTTOFP_I32_F32, "__floatunssisfvfp", ISD::SETCC_INVALID },
00232       };
00233 
00234       for (const auto &LC : LibraryCalls) {
00235         setLibcallName(LC.Op, LC.Name);
00236         if (LC.Cond != ISD::SETCC_INVALID)
00237           setCmpLibcallCC(LC.Op, LC.Cond);
00238       }
00239     }
00240 
00241     // Set the correct calling convention for ARMv7k WatchOS. It's just
00242     // AAPCS_VFP for functions as simple as libcalls.
00243     if (Subtarget->isTargetWatchOS()) {
00244       for (int i = 0; i < RTLIB::UNKNOWN_LIBCALL; ++i)
00245         setLibcallCallingConv((RTLIB::Libcall)i, CallingConv::ARM_AAPCS_VFP);
00246     }
00247   }
00248 
00249   // These libcalls are not available in 32-bit.
00250   setLibcallName(RTLIB::SHL_I128, nullptr);
00251   setLibcallName(RTLIB::SRL_I128, nullptr);
00252   setLibcallName(RTLIB::SRA_I128, nullptr);
00253 
00254   // RTLIB
00255   if (Subtarget->isAAPCS_ABI() &&
00256       (Subtarget->isTargetAEABI() || Subtarget->isTargetGNUAEABI() ||
00257        Subtarget->isTargetAndroid())) {
00258     static const struct {
00259       const RTLIB::Libcall Op;
00260       const char * const Name;
00261       const CallingConv::ID CC;
00262       const ISD::CondCode Cond;
00263     } LibraryCalls[] = {
00264       // Double-precision floating-point arithmetic helper functions
00265       // RTABI chapter 4.1.2, Table 2
00266       { RTLIB::ADD_F64, "__aeabi_dadd", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
00267       { RTLIB::DIV_F64, "__aeabi_ddiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
00268       { RTLIB::MUL_F64, "__aeabi_dmul", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
00269       { RTLIB::SUB_F64, "__aeabi_dsub", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
00270 
00271       // Double-precision floating-point comparison helper functions
00272       // RTABI chapter 4.1.2, Table 3
00273       { RTLIB::OEQ_F64, "__aeabi_dcmpeq", CallingConv::ARM_AAPCS, ISD::SETNE },
00274       { RTLIB::UNE_F64, "__aeabi_dcmpeq", CallingConv::ARM_AAPCS, ISD::SETEQ },
00275       { RTLIB::OLT_F64, "__aeabi_dcmplt", CallingConv::ARM_AAPCS, ISD::SETNE },
00276       { RTLIB::OLE_F64, "__aeabi_dcmple", CallingConv::ARM_AAPCS, ISD::SETNE },
00277       { RTLIB::OGE_F64, "__aeabi_dcmpge", CallingConv::ARM_AAPCS, ISD::SETNE },
00278       { RTLIB::OGT_F64, "__aeabi_dcmpgt", CallingConv::ARM_AAPCS, ISD::SETNE },
00279       { RTLIB::UO_F64,  "__aeabi_dcmpun", CallingConv::ARM_AAPCS, ISD::SETNE },
00280       { RTLIB::O_F64,   "__aeabi_dcmpun", CallingConv::ARM_AAPCS, ISD::SETEQ },
00281 
00282       // Single-precision floating-point arithmetic helper functions
00283       // RTABI chapter 4.1.2, Table 4
00284       { RTLIB::ADD_F32, "__aeabi_fadd", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
00285       { RTLIB::DIV_F32, "__aeabi_fdiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
00286       { RTLIB::MUL_F32, "__aeabi_fmul", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
00287       { RTLIB::SUB_F32, "__aeabi_fsub", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
00288 
00289       // Single-precision floating-point comparison helper functions
00290       // RTABI chapter 4.1.2, Table 5
00291       { RTLIB::OEQ_F32, "__aeabi_fcmpeq", CallingConv::ARM_AAPCS, ISD::SETNE },
00292       { RTLIB::UNE_F32, "__aeabi_fcmpeq", CallingConv::ARM_AAPCS, ISD::SETEQ },
00293       { RTLIB::OLT_F32, "__aeabi_fcmplt", CallingConv::ARM_AAPCS, ISD::SETNE },
00294       { RTLIB::OLE_F32, "__aeabi_fcmple", CallingConv::ARM_AAPCS, ISD::SETNE },
00295       { RTLIB::OGE_F32, "__aeabi_fcmpge", CallingConv::ARM_AAPCS, ISD::SETNE },
00296       { RTLIB::OGT_F32, "__aeabi_fcmpgt", CallingConv::ARM_AAPCS, ISD::SETNE },
00297       { RTLIB::UO_F32,  "__aeabi_fcmpun", CallingConv::ARM_AAPCS, ISD::SETNE },
00298       { RTLIB::O_F32,   "__aeabi_fcmpun", CallingConv::ARM_AAPCS, ISD::SETEQ },
00299 
00300       // Floating-point to integer conversions.
00301       // RTABI chapter 4.1.2, Table 6
00302       { RTLIB::FPTOSINT_F64_I32, "__aeabi_d2iz",  CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
00303       { RTLIB::FPTOUINT_F64_I32, "__aeabi_d2uiz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
00304       { RTLIB::FPTOSINT_F64_I64, "__aeabi_d2lz",  CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
00305       { RTLIB::FPTOUINT_F64_I64, "__aeabi_d2ulz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
00306       { RTLIB::FPTOSINT_F32_I32, "__aeabi_f2iz",  CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
00307       { RTLIB::FPTOUINT_F32_I32, "__aeabi_f2uiz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
00308       { RTLIB::FPTOSINT_F32_I64, "__aeabi_f2lz",  CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
00309       { RTLIB::FPTOUINT_F32_I64, "__aeabi_f2ulz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
00310 
00311       // Conversions between floating types.
00312       // RTABI chapter 4.1.2, Table 7
00313       { RTLIB::FPROUND_F64_F32, "__aeabi_d2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
00314       { RTLIB::FPROUND_F64_F16, "__aeabi_d2h", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
00315       { RTLIB::FPEXT_F32_F64,   "__aeabi_f2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
00316 
00317       // Integer to floating-point conversions.
00318       // RTABI chapter 4.1.2, Table 8
00319       { RTLIB::SINTTOFP_I32_F64, "__aeabi_i2d",  CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
00320       { RTLIB::UINTTOFP_I32_F64, "__aeabi_ui2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
00321       { RTLIB::SINTTOFP_I64_F64, "__aeabi_l2d",  CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
00322       { RTLIB::UINTTOFP_I64_F64, "__aeabi_ul2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
00323       { RTLIB::SINTTOFP_I32_F32, "__aeabi_i2f",  CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
00324       { RTLIB::UINTTOFP_I32_F32, "__aeabi_ui2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
00325       { RTLIB::SINTTOFP_I64_F32, "__aeabi_l2f",  CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
00326       { RTLIB::UINTTOFP_I64_F32, "__aeabi_ul2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
00327 
00328       // Long long helper functions
00329       // RTABI chapter 4.2, Table 9
00330       { RTLIB::MUL_I64, "__aeabi_lmul", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
00331       { RTLIB::SHL_I64, "__aeabi_llsl", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
00332       { RTLIB::SRL_I64, "__aeabi_llsr", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
00333       { RTLIB::SRA_I64, "__aeabi_lasr", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
00334 
00335       // Integer division functions
00336       // RTABI chapter 4.3.1
00337       { RTLIB::SDIV_I8,  "__aeabi_idiv",     CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
00338       { RTLIB::SDIV_I16, "__aeabi_idiv",     CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
00339       { RTLIB::SDIV_I32, "__aeabi_idiv",     CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
00340       { RTLIB::SDIV_I64, "__aeabi_ldivmod",  CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
00341       { RTLIB::UDIV_I8,  "__aeabi_uidiv",    CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
00342       { RTLIB::UDIV_I16, "__aeabi_uidiv",    CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
00343       { RTLIB::UDIV_I32, "__aeabi_uidiv",    CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
00344       { RTLIB::UDIV_I64, "__aeabi_uldivmod", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
00345     };
00346 
00347     for (const auto &LC : LibraryCalls) {
00348       setLibcallName(LC.Op, LC.Name);
00349       setLibcallCallingConv(LC.Op, LC.CC);
00350       if (LC.Cond != ISD::SETCC_INVALID)
00351         setCmpLibcallCC(LC.Op, LC.Cond);
00352     }
00353 
00354     // EABI dependent RTLIB
00355     if (TM.Options.EABIVersion == EABI::EABI4 ||
00356         TM.Options.EABIVersion == EABI::EABI5) {
00357       static const struct {
00358         const RTLIB::Libcall Op;
00359         const char *const Name;
00360         const CallingConv::ID CC;
00361         const ISD::CondCode Cond;
00362       } MemOpsLibraryCalls[] = {
00363         // Memory operations
00364         // RTABI chapter 4.3.4
00365         { RTLIB::MEMCPY,  "__aeabi_memcpy",  CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
00366         { RTLIB::MEMMOVE, "__aeabi_memmove", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
00367         { RTLIB::MEMSET,  "__aeabi_memset",  CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
00368       };
00369 
00370       for (const auto &LC : MemOpsLibraryCalls) {
00371         setLibcallName(LC.Op, LC.Name);
00372         setLibcallCallingConv(LC.Op, LC.CC);
00373         if (LC.Cond != ISD::SETCC_INVALID)
00374           setCmpLibcallCC(LC.Op, LC.Cond);
00375       }
00376     }
00377   }
00378 
00379   if (Subtarget->isTargetWindows()) {
00380     static const struct {
00381       const RTLIB::Libcall Op;
00382       const char * const Name;
00383       const CallingConv::ID CC;
00384     } LibraryCalls[] = {
00385       { RTLIB::FPTOSINT_F32_I64, "__stoi64", CallingConv::ARM_AAPCS_VFP },
00386       { RTLIB::FPTOSINT_F64_I64, "__dtoi64", CallingConv::ARM_AAPCS_VFP },
00387       { RTLIB::FPTOUINT_F32_I64, "__stou64", CallingConv::ARM_AAPCS_VFP },
00388       { RTLIB::FPTOUINT_F64_I64, "__dtou64", CallingConv::ARM_AAPCS_VFP },
00389       { RTLIB::SINTTOFP_I64_F32, "__i64tos", CallingConv::ARM_AAPCS_VFP },
00390       { RTLIB::SINTTOFP_I64_F64, "__i64tod", CallingConv::ARM_AAPCS_VFP },
00391       { RTLIB::UINTTOFP_I64_F32, "__u64tos", CallingConv::ARM_AAPCS_VFP },
00392       { RTLIB::UINTTOFP_I64_F64, "__u64tod", CallingConv::ARM_AAPCS_VFP },
00393       { RTLIB::SDIV_I32, "__rt_sdiv",   CallingConv::ARM_AAPCS_VFP },
00394       { RTLIB::UDIV_I32, "__rt_udiv",   CallingConv::ARM_AAPCS_VFP },
00395       { RTLIB::SDIV_I64, "__rt_sdiv64", CallingConv::ARM_AAPCS_VFP },
00396       { RTLIB::UDIV_I64, "__rt_udiv64", CallingConv::ARM_AAPCS_VFP },
00397     };
00398 
00399     for (const auto &LC : LibraryCalls) {
00400       setLibcallName(LC.Op, LC.Name);
00401       setLibcallCallingConv(LC.Op, LC.CC);
00402     }
00403   }
00404 
00405   // Use divmod compiler-rt calls for iOS 5.0 and later.
00406   if (Subtarget->isTargetWatchOS() ||
00407       (Subtarget->isTargetIOS() &&
00408        !Subtarget->getTargetTriple().isOSVersionLT(5, 0))) {
00409     setLibcallName(RTLIB::SDIVREM_I32, "__divmodsi4");
00410     setLibcallName(RTLIB::UDIVREM_I32, "__udivmodsi4");
00411   }
00412 
00413   // The half <-> float conversion functions are always soft-float, but are
00414   // needed for some targets which use a hard-float calling convention by
00415   // default.
00416   if (Subtarget->isAAPCS_ABI()) {
00417     setLibcallCallingConv(RTLIB::FPROUND_F32_F16, CallingConv::ARM_AAPCS);
00418     setLibcallCallingConv(RTLIB::FPROUND_F64_F16, CallingConv::ARM_AAPCS);
00419     setLibcallCallingConv(RTLIB::FPEXT_F16_F32, CallingConv::ARM_AAPCS);
00420   } else {
00421     setLibcallCallingConv(RTLIB::FPROUND_F32_F16, CallingConv::ARM_APCS);
00422     setLibcallCallingConv(RTLIB::FPROUND_F64_F16, CallingConv::ARM_APCS);
00423     setLibcallCallingConv(RTLIB::FPEXT_F16_F32, CallingConv::ARM_APCS);
00424   }
00425 
00426   // In EABI, these functions have an __aeabi_ prefix, but in GNUEABI they have
00427   // a __gnu_ prefix (which is the default).
00428   if (Subtarget->isTargetAEABI()) {
00429     setLibcallName(RTLIB::FPROUND_F32_F16, "__aeabi_f2h");
00430     setLibcallName(RTLIB::FPROUND_F64_F16, "__aeabi_d2h");
00431     setLibcallName(RTLIB::FPEXT_F16_F32,   "__aeabi_h2f");
00432   }
00433 
00434   if (Subtarget->isThumb1Only())
00435     addRegisterClass(MVT::i32, &ARM::tGPRRegClass);
00436   else
00437     addRegisterClass(MVT::i32, &ARM::GPRRegClass);
00438   if (!Subtarget->useSoftFloat() && Subtarget->hasVFP2() &&
00439       !Subtarget->isThumb1Only()) {
00440     addRegisterClass(MVT::f32, &ARM::SPRRegClass);
00441     addRegisterClass(MVT::f64, &ARM::DPRRegClass);
00442   }
00443 
00444   for (MVT VT : MVT::vector_valuetypes()) {
00445     for (MVT InnerVT : MVT::vector_valuetypes()) {
00446       setTruncStoreAction(VT, InnerVT, Expand);
00447       setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand);
00448       setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand);
00449       setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
00450     }
00451 
00452     setOperationAction(ISD::MULHS, VT, Expand);
00453     setOperationAction(ISD::SMUL_LOHI, VT, Expand);
00454     setOperationAction(ISD::MULHU, VT, Expand);
00455     setOperationAction(ISD::UMUL_LOHI, VT, Expand);
00456 
00457     setOperationAction(ISD::BSWAP, VT, Expand);
00458   }
00459 
00460   setOperationAction(ISD::ConstantFP, MVT::f32, Custom);
00461   setOperationAction(ISD::ConstantFP, MVT::f64, Custom);
00462 
00463   setOperationAction(ISD::READ_REGISTER, MVT::i64, Custom);
00464   setOperationAction(ISD::WRITE_REGISTER, MVT::i64, Custom);
00465 
00466   if (Subtarget->hasNEON()) {
00467     addDRTypeForNEON(MVT::v2f32);
00468     addDRTypeForNEON(MVT::v8i8);
00469     addDRTypeForNEON(MVT::v4i16);
00470     addDRTypeForNEON(MVT::v2i32);
00471     addDRTypeForNEON(MVT::v1i64);
00472 
00473     addQRTypeForNEON(MVT::v4f32);
00474     addQRTypeForNEON(MVT::v2f64);
00475     addQRTypeForNEON(MVT::v16i8);
00476     addQRTypeForNEON(MVT::v8i16);
00477     addQRTypeForNEON(MVT::v4i32);
00478     addQRTypeForNEON(MVT::v2i64);
00479 
00480     // v2f64 is legal so that QR subregs can be extracted as f64 elements, but
00481     // neither Neon nor VFP support any arithmetic operations on it.
00482     // The same with v4f32. But keep in mind that vadd, vsub, vmul are natively
00483     // supported for v4f32.
00484     setOperationAction(ISD::FADD, MVT::v2f64, Expand);
00485     setOperationAction(ISD::FSUB, MVT::v2f64, Expand);
00486     setOperationAction(ISD::FMUL, MVT::v2f64, Expand);
00487     // FIXME: Code duplication: FDIV and FREM are expanded always, see
00488     // ARMTargetLowering::addTypeForNEON method for details.
00489     setOperationAction(ISD::FDIV, MVT::v2f64, Expand);
00490     setOperationAction(ISD::FREM, MVT::v2f64, Expand);
00491     // FIXME: Create unittest.
00492     // In another words, find a way when "copysign" appears in DAG with vector
00493     // operands.
00494     setOperationAction(ISD::FCOPYSIGN, MVT::v2f64, Expand);
00495     // FIXME: Code duplication: SETCC has custom operation action, see
00496     // ARMTargetLowering::addTypeForNEON method for details.
00497     setOperationAction(ISD::SETCC, MVT::v2f64, Expand);
00498     // FIXME: Create unittest for FNEG and for FABS.
00499     setOperationAction(ISD::FNEG, MVT::v2f64, Expand);
00500     setOperationAction(ISD::FABS, MVT::v2f64, Expand);
00501     setOperationAction(ISD::FSQRT, MVT::v2f64, Expand);
00502     setOperationAction(ISD::FSIN, MVT::v2f64, Expand);
00503     setOperationAction(ISD::FCOS, MVT::v2f64, Expand);
00504     setOperationAction(ISD::FPOWI, MVT::v2f64, Expand);
00505     setOperationAction(ISD::FPOW, MVT::v2f64, Expand);
00506     setOperationAction(ISD::FLOG, MVT::v2f64, Expand);
00507     setOperationAction(ISD::FLOG2, MVT::v2f64, Expand);
00508     setOperationAction(ISD::FLOG10, MVT::v2f64, Expand);
00509     setOperationAction(ISD::FEXP, MVT::v2f64, Expand);
00510     setOperationAction(ISD::FEXP2, MVT::v2f64, Expand);
00511     // FIXME: Create unittest for FCEIL, FTRUNC, FRINT, FNEARBYINT, FFLOOR.
00512     setOperationAction(ISD::FCEIL, MVT::v2f64, Expand);
00513     setOperationAction(ISD::FTRUNC, MVT::v2f64, Expand);
00514     setOperationAction(ISD::FRINT, MVT::v2f64, Expand);
00515     setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Expand);
00516     setOperationAction(ISD::FFLOOR, MVT::v2f64, Expand);
00517     setOperationAction(ISD::FMA, MVT::v2f64, Expand);
00518 
00519     setOperationAction(ISD::FSQRT, MVT::v4f32, Expand);
00520     setOperationAction(ISD::FSIN, MVT::v4f32, Expand);
00521     setOperationAction(ISD::FCOS, MVT::v4f32, Expand);
00522     setOperationAction(ISD::FPOWI, MVT::v4f32, Expand);
00523     setOperationAction(ISD::FPOW, MVT::v4f32, Expand);
00524     setOperationAction(ISD::FLOG, MVT::v4f32, Expand);
00525     setOperationAction(ISD::FLOG2, MVT::v4f32, Expand);
00526     setOperationAction(ISD::FLOG10, MVT::v4f32, Expand);
00527     setOperationAction(ISD::FEXP, MVT::v4f32, Expand);
00528     setOperationAction(ISD::FEXP2, MVT::v4f32, Expand);
00529     setOperationAction(ISD::FCEIL, MVT::v4f32, Expand);
00530     setOperationAction(ISD::FTRUNC, MVT::v4f32, Expand);
00531     setOperationAction(ISD::FRINT, MVT::v4f32, Expand);
00532     setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Expand);
00533     setOperationAction(ISD::FFLOOR, MVT::v4f32, Expand);
00534 
00535     // Mark v2f32 intrinsics.
00536     setOperationAction(ISD::FSQRT, MVT::v2f32, Expand);
00537     setOperationAction(ISD::FSIN, MVT::v2f32, Expand);
00538     setOperationAction(ISD::FCOS, MVT::v2f32, Expand);
00539     setOperationAction(ISD::FPOWI, MVT::v2f32, Expand);
00540     setOperationAction(ISD::FPOW, MVT::v2f32, Expand);
00541     setOperationAction(ISD::FLOG, MVT::v2f32, Expand);
00542     setOperationAction(ISD::FLOG2, MVT::v2f32, Expand);
00543     setOperationAction(ISD::FLOG10, MVT::v2f32, Expand);
00544     setOperationAction(ISD::FEXP, MVT::v2f32, Expand);
00545     setOperationAction(ISD::FEXP2, MVT::v2f32, Expand);
00546     setOperationAction(ISD::FCEIL, MVT::v2f32, Expand);
00547     setOperationAction(ISD::FTRUNC, MVT::v2f32, Expand);
00548     setOperationAction(ISD::FRINT, MVT::v2f32, Expand);
00549     setOperationAction(ISD::FNEARBYINT, MVT::v2f32, Expand);
00550     setOperationAction(ISD::FFLOOR, MVT::v2f32, Expand);
00551 
00552     // Neon does not support some operations on v1i64 and v2i64 types.
00553     setOperationAction(ISD::MUL, MVT::v1i64, Expand);
00554     // Custom handling for some quad-vector types to detect VMULL.
00555     setOperationAction(ISD::MUL, MVT::v8i16, Custom);
00556     setOperationAction(ISD::MUL, MVT::v4i32, Custom);
00557     setOperationAction(ISD::MUL, MVT::v2i64, Custom);
00558     // Custom handling for some vector types to avoid expensive expansions
00559     setOperationAction(ISD::SDIV, MVT::v4i16, Custom);
00560     setOperationAction(ISD::SDIV, MVT::v8i8, Custom);
00561     setOperationAction(ISD::UDIV, MVT::v4i16, Custom);
00562     setOperationAction(ISD::UDIV, MVT::v8i8, Custom);
00563     setOperationAction(ISD::SETCC, MVT::v1i64, Expand);
00564     setOperationAction(ISD::SETCC, MVT::v2i64, Expand);
00565     // Neon does not have single instruction SINT_TO_FP and UINT_TO_FP with
00566     // a destination type that is wider than the source, and nor does
00567     // it have a FP_TO_[SU]INT instruction with a narrower destination than
00568     // source.
00569     setOperationAction(ISD::SINT_TO_FP, MVT::v4i16, Custom);
00570     setOperationAction(ISD::UINT_TO_FP, MVT::v4i16, Custom);
00571     setOperationAction(ISD::FP_TO_UINT, MVT::v4i16, Custom);
00572     setOperationAction(ISD::FP_TO_SINT, MVT::v4i16, Custom);
00573 
00574     setOperationAction(ISD::FP_ROUND,   MVT::v2f32, Expand);
00575     setOperationAction(ISD::FP_EXTEND,  MVT::v2f64, Expand);
00576 
00577     // NEON does not have single instruction CTPOP for vectors with element
00578     // types wider than 8-bits.  However, custom lowering can leverage the
00579     // v8i8/v16i8 vcnt instruction.
00580     setOperationAction(ISD::CTPOP,      MVT::v2i32, Custom);
00581     setOperationAction(ISD::CTPOP,      MVT::v4i32, Custom);
00582     setOperationAction(ISD::CTPOP,      MVT::v4i16, Custom);
00583     setOperationAction(ISD::CTPOP,      MVT::v8i16, Custom);
00584 
00585     // NEON does not have single instruction CTTZ for vectors.
00586     setOperationAction(ISD::CTTZ, MVT::v8i8, Custom);
00587     setOperationAction(ISD::CTTZ, MVT::v4i16, Custom);
00588     setOperationAction(ISD::CTTZ, MVT::v2i32, Custom);
00589     setOperationAction(ISD::CTTZ, MVT::v1i64, Custom);
00590 
00591     setOperationAction(ISD::CTTZ, MVT::v16i8, Custom);
00592     setOperationAction(ISD::CTTZ, MVT::v8i16, Custom);
00593     setOperationAction(ISD::CTTZ, MVT::v4i32, Custom);
00594     setOperationAction(ISD::CTTZ, MVT::v2i64, Custom);
00595 
00596     setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v8i8, Custom);
00597     setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v4i16, Custom);
00598     setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v2i32, Custom);
00599     setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v1i64, Custom);
00600 
00601     setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v16i8, Custom);
00602     setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v8i16, Custom);
00603     setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v4i32, Custom);
00604     setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v2i64, Custom);
00605 
00606     // NEON only has FMA instructions as of VFP4.
00607     if (!Subtarget->hasVFP4()) {
00608       setOperationAction(ISD::FMA, MVT::v2f32, Expand);
00609       setOperationAction(ISD::FMA, MVT::v4f32, Expand);
00610     }
00611 
00612     setTargetDAGCombine(ISD::INTRINSIC_VOID);
00613     setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN);
00614     setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN);
00615     setTargetDAGCombine(ISD::SHL);
00616     setTargetDAGCombine(ISD::SRL);
00617     setTargetDAGCombine(ISD::SRA);
00618     setTargetDAGCombine(ISD::SIGN_EXTEND);
00619     setTargetDAGCombine(ISD::ZERO_EXTEND);
00620     setTargetDAGCombine(ISD::ANY_EXTEND);
00621     setTargetDAGCombine(ISD::BUILD_VECTOR);
00622     setTargetDAGCombine(ISD::VECTOR_SHUFFLE);
00623     setTargetDAGCombine(ISD::INSERT_VECTOR_ELT);
00624     setTargetDAGCombine(ISD::STORE);
00625     setTargetDAGCombine(ISD::FP_TO_SINT);
00626     setTargetDAGCombine(ISD::FP_TO_UINT);
00627     setTargetDAGCombine(ISD::FDIV);
00628     setTargetDAGCombine(ISD::LOAD);
00629 
00630     // It is legal to extload from v4i8 to v4i16 or v4i32.
00631     for (MVT Ty : {MVT::v8i8, MVT::v4i8, MVT::v2i8, MVT::v4i16, MVT::v2i16,
00632                    MVT::v2i32}) {
00633       for (MVT VT : MVT::integer_vector_valuetypes()) {
00634         setLoadExtAction(ISD::EXTLOAD, VT, Ty, Legal);
00635         setLoadExtAction(ISD::ZEXTLOAD, VT, Ty, Legal);
00636         setLoadExtAction(ISD::SEXTLOAD, VT, Ty, Legal);
00637       }
00638     }
00639   }
00640 
00641   // ARM and Thumb2 support UMLAL/SMLAL.
00642   if (!Subtarget->isThumb1Only())
00643     setTargetDAGCombine(ISD::ADDC);
00644 
00645   if (Subtarget->isFPOnlySP()) {
00646     // When targeting a floating-point unit with only single-precision
00647     // operations, f64 is legal for the few double-precision instructions which
00648     // are present However, no double-precision operations other than moves,
00649     // loads and stores are provided by the hardware.
00650     setOperationAction(ISD::FADD,       MVT::f64, Expand);
00651     setOperationAction(ISD::FSUB,       MVT::f64, Expand);
00652     setOperationAction(ISD::FMUL,       MVT::f64, Expand);
00653     setOperationAction(ISD::FMA,        MVT::f64, Expand);
00654     setOperationAction(ISD::FDIV,       MVT::f64, Expand);
00655     setOperationAction(ISD::FREM,       MVT::f64, Expand);
00656     setOperationAction(ISD::FCOPYSIGN,  MVT::f64, Expand);
00657     setOperationAction(ISD::FGETSIGN,   MVT::f64, Expand);
00658     setOperationAction(ISD::FNEG,       MVT::f64, Expand);
00659     setOperationAction(ISD::FABS,       MVT::f64, Expand);
00660     setOperationAction(ISD::FSQRT,      MVT::f64, Expand);
00661     setOperationAction(ISD::FSIN,       MVT::f64, Expand);
00662     setOperationAction(ISD::FCOS,       MVT::f64, Expand);
00663     setOperationAction(ISD::FPOWI,      MVT::f64, Expand);
00664     setOperationAction(ISD::FPOW,       MVT::f64, Expand);
00665     setOperationAction(ISD::FLOG,       MVT::f64, Expand);
00666     setOperationAction(ISD::FLOG2,      MVT::f64, Expand);
00667     setOperationAction(ISD::FLOG10,     MVT::f64, Expand);
00668     setOperationAction(ISD::FEXP,       MVT::f64, Expand);
00669     setOperationAction(ISD::FEXP2,      MVT::f64, Expand);
00670     setOperationAction(ISD::FCEIL,      MVT::f64, Expand);
00671     setOperationAction(ISD::FTRUNC,     MVT::f64, Expand);
00672     setOperationAction(ISD::FRINT,      MVT::f64, Expand);
00673     setOperationAction(ISD::FNEARBYINT, MVT::f64, Expand);
00674     setOperationAction(ISD::FFLOOR,     MVT::f64, Expand);
00675     setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
00676     setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
00677     setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
00678     setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
00679     setOperationAction(ISD::FP_TO_SINT, MVT::f64, Custom);
00680     setOperationAction(ISD::FP_TO_UINT, MVT::f64, Custom);
00681     setOperationAction(ISD::FP_ROUND,   MVT::f32, Custom);
00682     setOperationAction(ISD::FP_EXTEND,  MVT::f64, Custom);
00683   }
00684 
00685   computeRegisterProperties(Subtarget->getRegisterInfo());
00686 
00687   // ARM does not have floating-point extending loads.
00688   for (MVT VT : MVT::fp_valuetypes()) {
00689     setLoadExtAction(ISD::EXTLOAD, VT, MVT::f32, Expand);
00690     setLoadExtAction(ISD::EXTLOAD, VT, MVT::f16, Expand);
00691   }
00692 
00693   // ... or truncating stores
00694   setTruncStoreAction(MVT::f64, MVT::f32, Expand);
00695   setTruncStoreAction(MVT::f32, MVT::f16, Expand);
00696   setTruncStoreAction(MVT::f64, MVT::f16, Expand);
00697 
00698   // ARM does not have i1 sign extending load.
00699   for (MVT VT : MVT::integer_valuetypes())
00700     setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
00701 
00702   // ARM supports all 4 flavors of integer indexed load / store.
00703   if (!Subtarget->isThumb1Only()) {
00704     for (unsigned im = (unsigned)ISD::PRE_INC;
00705          im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) {
00706       setIndexedLoadAction(im,  MVT::i1,  Legal);
00707       setIndexedLoadAction(im,  MVT::i8,  Legal);
00708       setIndexedLoadAction(im,  MVT::i16, Legal);
00709       setIndexedLoadAction(im,  MVT::i32, Legal);
00710       setIndexedStoreAction(im, MVT::i1,  Legal);
00711       setIndexedStoreAction(im, MVT::i8,  Legal);
00712       setIndexedStoreAction(im, MVT::i16, Legal);
00713       setIndexedStoreAction(im, MVT::i32, Legal);
00714     }
00715   }
00716 
00717   setOperationAction(ISD::SADDO, MVT::i32, Custom);
00718   setOperationAction(ISD::UADDO, MVT::i32, Custom);
00719   setOperationAction(ISD::SSUBO, MVT::i32, Custom);
00720   setOperationAction(ISD::USUBO, MVT::i32, Custom);
00721 
00722   // i64 operation support.
00723   setOperationAction(ISD::MUL,     MVT::i64, Expand);
00724   setOperationAction(ISD::MULHU,   MVT::i32, Expand);
00725   if (Subtarget->isThumb1Only()) {
00726     setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);
00727     setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
00728   }
00729   if (Subtarget->isThumb1Only() || !Subtarget->hasV6Ops()
00730       || (Subtarget->isThumb2() && !Subtarget->hasDSP()))
00731     setOperationAction(ISD::MULHS, MVT::i32, Expand);
00732 
00733   setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom);
00734   setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom);
00735   setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom);
00736   setOperationAction(ISD::SRL,       MVT::i64, Custom);
00737   setOperationAction(ISD::SRA,       MVT::i64, Custom);
00738 
00739   if (!Subtarget->isThumb1Only()) {
00740     // FIXME: We should do this for Thumb1 as well.
00741     setOperationAction(ISD::ADDC,    MVT::i32, Custom);
00742     setOperationAction(ISD::ADDE,    MVT::i32, Custom);
00743     setOperationAction(ISD::SUBC,    MVT::i32, Custom);
00744     setOperationAction(ISD::SUBE,    MVT::i32, Custom);
00745   }
00746 
00747   if (!Subtarget->isThumb1Only() && Subtarget->hasV6T2Ops())
00748     setOperationAction(ISD::BITREVERSE, MVT::i32, Legal);
00749 
00750   // ARM does not have ROTL.
00751   setOperationAction(ISD::ROTL, MVT::i32, Expand);
00752   for (MVT VT : MVT::vector_valuetypes()) {
00753     setOperationAction(ISD::ROTL, VT, Expand);
00754     setOperationAction(ISD::ROTR, VT, Expand);
00755   }
00756   setOperationAction(ISD::CTTZ,  MVT::i32, Custom);
00757   setOperationAction(ISD::CTPOP, MVT::i32, Expand);
00758   if (!Subtarget->hasV5TOps() || Subtarget->isThumb1Only())
00759     setOperationAction(ISD::CTLZ, MVT::i32, Expand);
00760 
00761   // These just redirect to CTTZ and CTLZ on ARM.
00762   setOperationAction(ISD::CTTZ_ZERO_UNDEF  , MVT::i32  , Expand);
00763   setOperationAction(ISD::CTLZ_ZERO_UNDEF  , MVT::i32  , Expand);
00764 
00765   // @llvm.readcyclecounter requires the Performance Monitors extension.
00766   // Default to the 0 expansion on unsupported platforms.
00767   // FIXME: Technically there are older ARM CPUs that have
00768   // implementation-specific ways of obtaining this information.
00769   if (Subtarget->hasPerfMon())
00770     setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Custom);
00771 
00772   // Only ARMv6 has BSWAP.
00773   if (!Subtarget->hasV6Ops())
00774     setOperationAction(ISD::BSWAP, MVT::i32, Expand);
00775 
00776   bool hasDivide = Subtarget->isThumb() ? Subtarget->hasDivide()
00777                                         : Subtarget->hasDivideInARMMode();
00778   if (!hasDivide) {
00779     // These are expanded into libcalls if the cpu doesn't have HW divider.
00780     setOperationAction(ISD::SDIV,  MVT::i32, LibCall);
00781     setOperationAction(ISD::UDIV,  MVT::i32, LibCall);
00782   }
00783 
00784   setOperationAction(ISD::SREM,  MVT::i32, Expand);
00785   setOperationAction(ISD::UREM,  MVT::i32, Expand);
00786   // Register based DivRem for AEABI (RTABI 4.2)
00787   if (Subtarget->isTargetAEABI() || Subtarget->isTargetAndroid()) {
00788     setOperationAction(ISD::SREM, MVT::i64, Custom);
00789     setOperationAction(ISD::UREM, MVT::i64, Custom);
00790 
00791     setLibcallName(RTLIB::SDIVREM_I8,  "__aeabi_idivmod");
00792     setLibcallName(RTLIB::SDIVREM_I16, "__aeabi_idivmod");
00793     setLibcallName(RTLIB::SDIVREM_I32, "__aeabi_idivmod");
00794     setLibcallName(RTLIB::SDIVREM_I64, "__aeabi_ldivmod");
00795     setLibcallName(RTLIB::UDIVREM_I8,  "__aeabi_uidivmod");
00796     setLibcallName(RTLIB::UDIVREM_I16, "__aeabi_uidivmod");
00797     setLibcallName(RTLIB::UDIVREM_I32, "__aeabi_uidivmod");
00798     setLibcallName(RTLIB::UDIVREM_I64, "__aeabi_uldivmod");
00799 
00800     setLibcallCallingConv(RTLIB::SDIVREM_I8, CallingConv::ARM_AAPCS);
00801     setLibcallCallingConv(RTLIB::SDIVREM_I16, CallingConv::ARM_AAPCS);
00802     setLibcallCallingConv(RTLIB::SDIVREM_I32, CallingConv::ARM_AAPCS);
00803     setLibcallCallingConv(RTLIB::SDIVREM_I64, CallingConv::ARM_AAPCS);
00804     setLibcallCallingConv(RTLIB::UDIVREM_I8, CallingConv::ARM_AAPCS);
00805     setLibcallCallingConv(RTLIB::UDIVREM_I16, CallingConv::ARM_AAPCS);
00806     setLibcallCallingConv(RTLIB::UDIVREM_I32, CallingConv::ARM_AAPCS);
00807     setLibcallCallingConv(RTLIB::UDIVREM_I64, CallingConv::ARM_AAPCS);
00808 
00809     setOperationAction(ISD::SDIVREM, MVT::i32, Custom);
00810     setOperationAction(ISD::UDIVREM, MVT::i32, Custom);
00811   } else {
00812     setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
00813     setOperationAction(ISD::UDIVREM, MVT::i32, Expand);
00814   }
00815 
00816   setOperationAction(ISD::GlobalAddress, MVT::i32,   Custom);
00817   setOperationAction(ISD::ConstantPool,  MVT::i32,   Custom);
00818   setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom);
00819   setOperationAction(ISD::BlockAddress, MVT::i32, Custom);
00820 
00821   setOperationAction(ISD::TRAP, MVT::Other, Legal);
00822 
00823   // Use the default implementation.
00824   setOperationAction(ISD::VASTART,            MVT::Other, Custom);
00825   setOperationAction(ISD::VAARG,              MVT::Other, Expand);
00826   setOperationAction(ISD::VACOPY,             MVT::Other, Expand);
00827   setOperationAction(ISD::VAEND,              MVT::Other, Expand);
00828   setOperationAction(ISD::STACKSAVE,          MVT::Other, Expand);
00829   setOperationAction(ISD::STACKRESTORE,       MVT::Other, Expand);
00830 
00831   if (Subtarget->getTargetTriple().isWindowsItaniumEnvironment())
00832     setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Custom);
00833   else
00834     setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand);
00835 
00836   // ARMv6 Thumb1 (except for CPUs that support dmb / dsb) and earlier use
00837   // the default expansion. If we are targeting a single threaded system,
00838   // then set them all for expand so we can lower them later into their
00839   // non-atomic form.
00840   if (TM.Options.ThreadModel == ThreadModel::Single)
00841     setOperationAction(ISD::ATOMIC_FENCE,   MVT::Other, Expand);
00842   else if (Subtarget->hasAnyDataBarrier() && (!Subtarget->isThumb() ||
00843                                               Subtarget->hasV8MBaselineOps())) {
00844     // ATOMIC_FENCE needs custom lowering; the others should have been expanded
00845     // to ldrex/strex loops already.
00846     setOperationAction(ISD::ATOMIC_FENCE,     MVT::Other, Custom);
00847 
00848     // On v8, we have particularly efficient implementations of atomic fences
00849     // if they can be combined with nearby atomic loads and stores.
00850     if (!Subtarget->hasV8Ops()) {
00851       // Automatically insert fences (dmb ish) around ATOMIC_SWAP etc.
00852       setInsertFencesForAtomic(true);
00853     }
00854   } else {
00855     // If there's anything we can use as a barrier, go through custom lowering
00856     // for ATOMIC_FENCE.
00857     setOperationAction(ISD::ATOMIC_FENCE,   MVT::Other,
00858                        Subtarget->hasAnyDataBarrier() ? Custom : Expand);
00859 
00860     // Set them all for expansion, which will force libcalls.
00861     setOperationAction(ISD::ATOMIC_CMP_SWAP,  MVT::i32, Expand);
00862     setOperationAction(ISD::ATOMIC_SWAP,      MVT::i32, Expand);
00863     setOperationAction(ISD::ATOMIC_LOAD_ADD,  MVT::i32, Expand);
00864     setOperationAction(ISD::ATOMIC_LOAD_SUB,  MVT::i32, Expand);
00865     setOperationAction(ISD::ATOMIC_LOAD_AND,  MVT::i32, Expand);
00866     setOperationAction(ISD::ATOMIC_LOAD_OR,   MVT::i32, Expand);
00867     setOperationAction(ISD::ATOMIC_LOAD_XOR,  MVT::i32, Expand);
00868     setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i32, Expand);
00869     setOperationAction(ISD::ATOMIC_LOAD_MIN, MVT::i32, Expand);
00870     setOperationAction(ISD::ATOMIC_LOAD_MAX, MVT::i32, Expand);
00871     setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i32, Expand);
00872     setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i32, Expand);
00873     // Mark ATOMIC_LOAD and ATOMIC_STORE custom so we can handle the
00874     // Unordered/Monotonic case.
00875     setOperationAction(ISD::ATOMIC_LOAD, MVT::i32, Custom);
00876     setOperationAction(ISD::ATOMIC_STORE, MVT::i32, Custom);
00877   }
00878 
00879   setOperationAction(ISD::PREFETCH,         MVT::Other, Custom);
00880 
00881   // Requires SXTB/SXTH, available on v6 and up in both ARM and Thumb modes.
00882   if (!Subtarget->hasV6Ops()) {
00883     setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand);
00884     setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8,  Expand);
00885   }
00886   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
00887 
00888   if (!Subtarget->useSoftFloat() && Subtarget->hasVFP2() &&
00889       !Subtarget->isThumb1Only()) {
00890     // Turn f64->i64 into VMOVRRD, i64 -> f64 to VMOVDRR
00891     // iff target supports vfp2.
00892     setOperationAction(ISD::BITCAST, MVT::i64, Custom);
00893     setOperationAction(ISD::FLT_ROUNDS_, MVT::i32, Custom);
00894   }
00895 
00896   // We want to custom lower some of our intrinsics.
00897   setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
00898   setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom);
00899   setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom);
00900   setOperationAction(ISD::EH_SJLJ_SETUP_DISPATCH, MVT::Other, Custom);
00901   if (Subtarget->useSjLjEH())
00902     setLibcallName(RTLIB::UNWIND_RESUME, "_Unwind_SjLj_Resume");
00903 
00904   setOperationAction(ISD::SETCC,     MVT::i32, Expand);
00905   setOperationAction(ISD::SETCC,     MVT::f32, Expand);
00906   setOperationAction(ISD::SETCC,     MVT::f64, Expand);
00907   setOperationAction(ISD::SELECT,    MVT::i32, Custom);
00908   setOperationAction(ISD::SELECT,    MVT::f32, Custom);
00909   setOperationAction(ISD::SELECT,    MVT::f64, Custom);
00910   setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
00911   setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
00912   setOperationAction(ISD::SELECT_CC, MVT::f64, Custom);
00913 
00914   setOperationAction(ISD::BRCOND,    MVT::Other, Expand);
00915   setOperationAction(ISD::BR_CC,     MVT::i32,   Custom);
00916   setOperationAction(ISD::BR_CC,     MVT::f32,   Custom);
00917   setOperationAction(ISD::BR_CC,     MVT::f64,   Custom);
00918   setOperationAction(ISD::BR_JT,     MVT::Other, Custom);
00919 
00920   // We don't support sin/cos/fmod/copysign/pow
00921   setOperationAction(ISD::FSIN,      MVT::f64, Expand);
00922   setOperationAction(ISD::FSIN,      MVT::f32, Expand);
00923   setOperationAction(ISD::FCOS,      MVT::f32, Expand);
00924   setOperationAction(ISD::FCOS,      MVT::f64, Expand);
00925   setOperationAction(ISD::FSINCOS,   MVT::f64, Expand);
00926   setOperationAction(ISD::FSINCOS,   MVT::f32, Expand);
00927   setOperationAction(ISD::FREM,      MVT::f64, Expand);
00928   setOperationAction(ISD::FREM,      MVT::f32, Expand);
00929   if (!Subtarget->useSoftFloat() && Subtarget->hasVFP2() &&
00930       !Subtarget->isThumb1Only()) {
00931     setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom);
00932     setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);
00933   }
00934   setOperationAction(ISD::FPOW,      MVT::f64, Expand);
00935   setOperationAction(ISD::FPOW,      MVT::f32, Expand);
00936 
00937   if (!Subtarget->hasVFP4()) {
00938     setOperationAction(ISD::FMA, MVT::f64, Expand);
00939     setOperationAction(ISD::FMA, MVT::f32, Expand);
00940   }
00941 
00942   // Various VFP goodness
00943   if (!Subtarget->useSoftFloat() && !Subtarget->isThumb1Only()) {
00944     // FP-ARMv8 adds f64 <-> f16 conversion. Before that it should be expanded.
00945     if (!Subtarget->hasFPARMv8() || Subtarget->isFPOnlySP()) {
00946       setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand);
00947       setOperationAction(ISD::FP_TO_FP16, MVT::f64, Expand);
00948     }
00949 
00950     // fp16 is a special v7 extension that adds f16 <-> f32 conversions.
00951     if (!Subtarget->hasFP16()) {
00952       setOperationAction(ISD::FP16_TO_FP, MVT::f32, Expand);
00953       setOperationAction(ISD::FP_TO_FP16, MVT::f32, Expand);
00954     }
00955   }
00956 
00957   // Combine sin / cos into one node or libcall if possible.
00958   if (Subtarget->hasSinCos()) {
00959     setLibcallName(RTLIB::SINCOS_F32, "sincosf");
00960     setLibcallName(RTLIB::SINCOS_F64, "sincos");
00961     if (Subtarget->isTargetWatchOS()) {
00962       setLibcallCallingConv(RTLIB::SINCOS_F32, CallingConv::ARM_AAPCS_VFP);
00963       setLibcallCallingConv(RTLIB::SINCOS_F64, CallingConv::ARM_AAPCS_VFP);
00964     }
00965     if (Subtarget->isTargetIOS() || Subtarget->isTargetWatchOS()) {
00966       // For iOS, we don't want to the normal expansion of a libcall to
00967       // sincos. We want to issue a libcall to __sincos_stret.
00968       setOperationAction(ISD::FSINCOS, MVT::f64, Custom);
00969       setOperationAction(ISD::FSINCOS, MVT::f32, Custom);
00970     }
00971   }
00972 
00973   // FP-ARMv8 implements a lot of rounding-like FP operations.
00974   if (Subtarget->hasFPARMv8()) {
00975     setOperationAction(ISD::FFLOOR, MVT::f32, Legal);
00976     setOperationAction(ISD::FCEIL, MVT::f32, Legal);
00977     setOperationAction(ISD::FROUND, MVT::f32, Legal);
00978     setOperationAction(ISD::FTRUNC, MVT::f32, Legal);
00979     setOperationAction(ISD::FNEARBYINT, MVT::f32, Legal);
00980     setOperationAction(ISD::FRINT, MVT::f32, Legal);
00981     setOperationAction(ISD::FMINNUM, MVT::f32, Legal);
00982     setOperationAction(ISD::FMAXNUM, MVT::f32, Legal);
00983     setOperationAction(ISD::FMINNUM, MVT::v2f32, Legal);
00984     setOperationAction(ISD::FMAXNUM, MVT::v2f32, Legal);
00985     setOperationAction(ISD::FMINNUM, MVT::v4f32, Legal);
00986     setOperationAction(ISD::FMAXNUM, MVT::v4f32, Legal);
00987 
00988     if (!Subtarget->isFPOnlySP()) {
00989       setOperationAction(ISD::FFLOOR, MVT::f64, Legal);
00990       setOperationAction(ISD::FCEIL, MVT::f64, Legal);
00991       setOperationAction(ISD::FROUND, MVT::f64, Legal);
00992       setOperationAction(ISD::FTRUNC, MVT::f64, Legal);
00993       setOperationAction(ISD::FNEARBYINT, MVT::f64, Legal);
00994       setOperationAction(ISD::FRINT, MVT::f64, Legal);
00995       setOperationAction(ISD::FMINNUM, MVT::f64, Legal);
00996       setOperationAction(ISD::FMAXNUM, MVT::f64, Legal);
00997     }
00998   }
00999 
01000   if (Subtarget->hasNEON()) {
01001     // vmin and vmax aren't available in a scalar form, so we use
01002     // a NEON instruction with an undef lane instead.
01003     setOperationAction(ISD::FMINNAN, MVT::f32, Legal);
01004     setOperationAction(ISD::FMAXNAN, MVT::f32, Legal);
01005     setOperationAction(ISD::FMINNAN, MVT::v2f32, Legal);
01006     setOperationAction(ISD::FMAXNAN, MVT::v2f32, Legal);
01007     setOperationAction(ISD::FMINNAN, MVT::v4f32, Legal);
01008     setOperationAction(ISD::FMAXNAN, MVT::v4f32, Legal);
01009   }
01010 
01011   // We have target-specific dag combine patterns for the following nodes:
01012   // ARMISD::VMOVRRD  - No need to call setTargetDAGCombine
01013   setTargetDAGCombine(ISD::ADD);
01014   setTargetDAGCombine(ISD::SUB);
01015   setTargetDAGCombine(ISD::MUL);
01016   setTargetDAGCombine(ISD::AND);
01017   setTargetDAGCombine(ISD::OR);
01018   setTargetDAGCombine(ISD::XOR);
01019 
01020   if (Subtarget->hasV6Ops())
01021     setTargetDAGCombine(ISD::SRL);
01022 
01023   setStackPointerRegisterToSaveRestore(ARM::SP);
01024 
01025   if (Subtarget->useSoftFloat() || Subtarget->isThumb1Only() ||
01026       !Subtarget->hasVFP2())
01027     setSchedulingPreference(Sched::RegPressure);
01028   else
01029     setSchedulingPreference(Sched::Hybrid);
01030 
01031   //// temporary - rewrite interface to use type
01032   MaxStoresPerMemset = 8;
01033   MaxStoresPerMemsetOptSize = 4;
01034   MaxStoresPerMemcpy = 4; // For @llvm.memcpy -> sequence of stores
01035   MaxStoresPerMemcpyOptSize = 2;
01036   MaxStoresPerMemmove = 4; // For @llvm.memmove -> sequence of stores
01037   MaxStoresPerMemmoveOptSize = 2;
01038 
01039   // On ARM arguments smaller than 4 bytes are extended, so all arguments
01040   // are at least 4 bytes aligned.
01041   setMinStackArgumentAlignment(4);
01042 
01043   // Prefer likely predicted branches to selects on out-of-order cores.
01044   PredictableSelectIsExpensive = Subtarget->isLikeA9();
01045 
01046   setMinFunctionAlignment(Subtarget->isThumb() ? 1 : 2);
01047 }
01048 
01049 bool ARMTargetLowering::useSoftFloat() const {
01050   return Subtarget->useSoftFloat();
01051 }
01052 
01053 // FIXME: It might make sense to define the representative register class as the
01054 // nearest super-register that has a non-null superset. For example, DPR_VFP2 is
01055 // a super-register of SPR, and DPR is a superset if DPR_VFP2. Consequently,
01056 // SPR's representative would be DPR_VFP2. This should work well if register
01057 // pressure tracking were modified such that a register use would increment the
01058 // pressure of the register class's representative and all of it's super
01059 // classes' representatives transitively. We have not implemented this because
01060 // of the difficulty prior to coalescing of modeling operand register classes
01061 // due to the common occurrence of cross class copies and subregister insertions
01062 // and extractions.
01063 std::pair<const TargetRegisterClass *, uint8_t>
01064 ARMTargetLowering::findRepresentativeClass(const TargetRegisterInfo *TRI,
01065                                            MVT VT) const {
01066   const TargetRegisterClass *RRC = nullptr;
01067   uint8_t Cost = 1;
01068   switch (VT.SimpleTy) {
01069   default:
01070     return TargetLowering::findRepresentativeClass(TRI, VT);
01071   // Use DPR as representative register class for all floating point
01072   // and vector types. Since there are 32 SPR registers and 32 DPR registers so
01073   // the cost is 1 for both f32 and f64.
01074   case MVT::f32: case MVT::f64: case MVT::v8i8: case MVT::v4i16:
01075   case MVT::v2i32: case MVT::v1i64: case MVT::v2f32:
01076     RRC = &ARM::DPRRegClass;
01077     // When NEON is used for SP, only half of the register file is available
01078     // because operations that define both SP and DP results will be constrained
01079     // to the VFP2 class (D0-D15). We currently model this constraint prior to
01080     // coalescing by double-counting the SP regs. See the FIXME above.
01081     if (Subtarget->useNEONForSinglePrecisionFP())
01082       Cost = 2;
01083     break;
01084   case MVT::v16i8: case MVT::v8i16: case MVT::v4i32: case MVT::v2i64:
01085   case MVT::v4f32: case MVT::v2f64:
01086     RRC = &ARM::DPRRegClass;
01087     Cost = 2;
01088     break;
01089   case MVT::v4i64:
01090     RRC = &ARM::DPRRegClass;
01091     Cost = 4;
01092     break;
01093   case MVT::v8i64:
01094     RRC = &ARM::DPRRegClass;
01095     Cost = 8;
01096     break;
01097   }
01098   return std::make_pair(RRC, Cost);
01099 }
01100 
01101 const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
01102   switch ((ARMISD::NodeType)Opcode) {
01103   case ARMISD::FIRST_NUMBER:  break;
01104   case ARMISD::Wrapper:       return "ARMISD::Wrapper";
01105   case ARMISD::WrapperPIC:    return "ARMISD::WrapperPIC";
01106   case ARMISD::WrapperJT:     return "ARMISD::WrapperJT";
01107   case ARMISD::COPY_STRUCT_BYVAL: return "ARMISD::COPY_STRUCT_BYVAL";
01108   case ARMISD::CALL:          return "ARMISD::CALL";
01109   case ARMISD::CALL_PRED:     return "ARMISD::CALL_PRED";
01110   case ARMISD::CALL_NOLINK:   return "ARMISD::CALL_NOLINK";
01111   case ARMISD::tCALL:         return "ARMISD::tCALL";
01112   case ARMISD::BRCOND:        return "ARMISD::BRCOND";
01113   case ARMISD::BR_JT:         return "ARMISD::BR_JT";
01114   case ARMISD::BR2_JT:        return "ARMISD::BR2_JT";
01115   case ARMISD::RET_FLAG:      return "ARMISD::RET_FLAG";
01116   case ARMISD::INTRET_FLAG:   return "ARMISD::INTRET_FLAG";
01117   case ARMISD::PIC_ADD:       return "ARMISD::PIC_ADD";
01118   case ARMISD::CMP:           return "ARMISD::CMP";
01119   case ARMISD::CMN:           return "ARMISD::CMN";
01120   case ARMISD::CMPZ:          return "ARMISD::CMPZ";
01121   case ARMISD::CMPFP:         return "ARMISD::CMPFP";
01122   case ARMISD::CMPFPw0:       return "ARMISD::CMPFPw0";
01123   case ARMISD::BCC_i64:       return "ARMISD::BCC_i64";
01124   case ARMISD::FMSTAT:        return "ARMISD::FMSTAT";
01125 
01126   case ARMISD::CMOV:          return "ARMISD::CMOV";
01127 
01128   case ARMISD::SRL_FLAG:      return "ARMISD::SRL_FLAG";
01129   case ARMISD::SRA_FLAG:      return "ARMISD::SRA_FLAG";
01130   case ARMISD::RRX:           return "ARMISD::RRX";
01131 
01132   case ARMISD::ADDC:          return "ARMISD::ADDC";
01133   case ARMISD::ADDE:          return "ARMISD::ADDE";
01134   case ARMISD::SUBC:          return "ARMISD::SUBC";
01135   case ARMISD::SUBE:          return "ARMISD::SUBE";
01136 
01137   case ARMISD::VMOVRRD:       return "ARMISD::VMOVRRD";
01138   case ARMISD::VMOVDRR:       return "ARMISD::VMOVDRR";
01139 
01140   case ARMISD::EH_SJLJ_SETJMP: return "ARMISD::EH_SJLJ_SETJMP";
01141   case ARMISD::EH_SJLJ_LONGJMP: return "ARMISD::EH_SJLJ_LONGJMP";
01142   case ARMISD::EH_SJLJ_SETUP_DISPATCH: return "ARMISD::EH_SJLJ_SETUP_DISPATCH";
01143 
01144   case ARMISD::TC_RETURN:     return "ARMISD::TC_RETURN";
01145 
01146   case ARMISD::THREAD_POINTER:return "ARMISD::THREAD_POINTER";
01147 
01148   case ARMISD::DYN_ALLOC:     return "ARMISD::DYN_ALLOC";
01149 
01150   case ARMISD::MEMBARRIER_MCR: return "ARMISD::MEMBARRIER_MCR";
01151 
01152   case ARMISD::PRELOAD:       return "ARMISD::PRELOAD";
01153 
01154   case ARMISD::WIN__CHKSTK:   return "ARMISD:::WIN__CHKSTK";
01155   case ARMISD::WIN__DBZCHK:   return "ARMISD::WIN__DBZCHK";
01156 
01157   case ARMISD::VCEQ:          return "ARMISD::VCEQ";
01158   case ARMISD::VCEQZ:         return "ARMISD::VCEQZ";
01159   case ARMISD::VCGE:          return "ARMISD::VCGE";
01160   case ARMISD::VCGEZ:         return "ARMISD::VCGEZ";
01161   case ARMISD::VCLEZ:         return "ARMISD::VCLEZ";
01162   case ARMISD::VCGEU:         return "ARMISD::VCGEU";
01163   case ARMISD::VCGT:          return "ARMISD::VCGT";
01164   case ARMISD::VCGTZ:         return "ARMISD::VCGTZ";
01165   case ARMISD::VCLTZ:         return "ARMISD::VCLTZ";
01166   case ARMISD::VCGTU:         return "ARMISD::VCGTU";
01167   case ARMISD::VTST:          return "ARMISD::VTST";
01168 
01169   case ARMISD::VSHL:          return "ARMISD::VSHL";
01170   case ARMISD::VSHRs:         return "ARMISD::VSHRs";
01171   case ARMISD::VSHRu:         return "ARMISD::VSHRu";
01172   case ARMISD::VRSHRs:        return "ARMISD::VRSHRs";
01173   case ARMISD::VRSHRu:        return "ARMISD::VRSHRu";
01174   case ARMISD::VRSHRN:        return "ARMISD::VRSHRN";
01175   case ARMISD::VQSHLs:        return "ARMISD::VQSHLs";
01176   case ARMISD::VQSHLu:        return "ARMISD::VQSHLu";
01177   case ARMISD::VQSHLsu:       return "ARMISD::VQSHLsu";
01178   case ARMISD::VQSHRNs:       return "ARMISD::VQSHRNs";
01179   case ARMISD::VQSHRNu:       return "ARMISD::VQSHRNu";
01180   case ARMISD::VQSHRNsu:      return "ARMISD::VQSHRNsu";
01181   case ARMISD::VQRSHRNs:      return "ARMISD::VQRSHRNs";
01182   case ARMISD::VQRSHRNu:      return "ARMISD::VQRSHRNu";
01183   case ARMISD::VQRSHRNsu:     return "ARMISD::VQRSHRNsu";
01184   case ARMISD::VSLI:          return "ARMISD::VSLI";
01185   case ARMISD::VSRI:          return "ARMISD::VSRI";
01186   case ARMISD::VGETLANEu:     return "ARMISD::VGETLANEu";
01187   case ARMISD::VGETLANEs:     return "ARMISD::VGETLANEs";
01188   case ARMISD::VMOVIMM:       return "ARMISD::VMOVIMM";
01189   case ARMISD::VMVNIMM:       return "ARMISD::VMVNIMM";
01190   case ARMISD::VMOVFPIMM:     return "ARMISD::VMOVFPIMM";
01191   case ARMISD::VDUP:          return "ARMISD::VDUP";
01192   case ARMISD::VDUPLANE:      return "ARMISD::VDUPLANE";
01193   case ARMISD::VEXT:          return "ARMISD::VEXT";
01194   case ARMISD::VREV64:        return "ARMISD::VREV64";
01195   case ARMISD::VREV32:        return "ARMISD::VREV32";
01196   case ARMISD::VREV16:        return "ARMISD::VREV16";
01197   case ARMISD::VZIP:          return "ARMISD::VZIP";
01198   case ARMISD::VUZP:          return "ARMISD::VUZP";
01199   case ARMISD::VTRN:          return "ARMISD::VTRN";
01200   case ARMISD::VTBL1:         return "ARMISD::VTBL1";
01201   case ARMISD::VTBL2:         return "ARMISD::VTBL2";
01202   case ARMISD::VMULLs:        return "ARMISD::VMULLs";
01203   case ARMISD::VMULLu:        return "ARMISD::VMULLu";
01204   case ARMISD::UMLAL:         return "ARMISD::UMLAL";
01205   case ARMISD::SMLAL:         return "ARMISD::SMLAL";
01206   case ARMISD::BUILD_VECTOR:  return "ARMISD::BUILD_VECTOR";
01207   case ARMISD::BFI:           return "ARMISD::BFI";
01208   case ARMISD::VORRIMM:       return "ARMISD::VORRIMM";
01209   case ARMISD::VBICIMM:       return "ARMISD::VBICIMM";
01210   case ARMISD::VBSL:          return "ARMISD::VBSL";
01211   case ARMISD::MEMCPY:        return "ARMISD::MEMCPY";
01212   case ARMISD::VLD2DUP:       return "ARMISD::VLD2DUP";
01213   case ARMISD::VLD3DUP:       return "ARMISD::VLD3DUP";
01214   case ARMISD::VLD4DUP:       return "ARMISD::VLD4DUP";
01215   case ARMISD::VLD1_UPD:      return "ARMISD::VLD1_UPD";
01216   case ARMISD::VLD2_UPD:      return "ARMISD::VLD2_UPD";
01217   case ARMISD::VLD3_UPD:      return "ARMISD::VLD3_UPD";
01218   case ARMISD::VLD4_UPD:      return "ARMISD::VLD4_UPD";
01219   case ARMISD::VLD2LN_UPD:    return "ARMISD::VLD2LN_UPD";
01220   case ARMISD::VLD3LN_UPD:    return "ARMISD::VLD3LN_UPD";
01221   case ARMISD::VLD4LN_UPD:    return "ARMISD::VLD4LN_UPD";
01222   case ARMISD::VLD2DUP_UPD:   return "ARMISD::VLD2DUP_UPD";
01223   case ARMISD::VLD3DUP_UPD:   return "ARMISD::VLD3DUP_UPD";
01224   case ARMISD::VLD4DUP_UPD:   return "ARMISD::VLD4DUP_UPD";
01225   case ARMISD::VST1_UPD:      return "ARMISD::VST1_UPD";
01226   case ARMISD::VST2_UPD:      return "ARMISD::VST2_UPD";
01227   case ARMISD::VST3_UPD:      return "ARMISD::VST3_UPD";
01228   case ARMISD::VST4_UPD:      return "ARMISD::VST4_UPD";
01229   case ARMISD::VST2LN_UPD:    return "ARMISD::VST2LN_UPD";
01230   case ARMISD::VST3LN_UPD:    return "ARMISD::VST3LN_UPD";
01231   case ARMISD::VST4LN_UPD:    return "ARMISD::VST4LN_UPD";
01232   }
01233   return nullptr;
01234 }
01235 
01236 EVT ARMTargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &,
01237                                           EVT VT) const {
01238   if (!VT.isVector())
01239     return getPointerTy(DL);
01240   return VT.changeVectorElementTypeToInteger();
01241 }
01242 
01243 /// getRegClassFor - Return the register class that should be used for the
01244 /// specified value type.
01245 const TargetRegisterClass *ARMTargetLowering::getRegClassFor(MVT VT) const {
01246   // Map v4i64 to QQ registers but do not make the type legal. Similarly map
01247   // v8i64 to QQQQ registers. v4i64 and v8i64 are only used for REG_SEQUENCE to
01248   // load / store 4 to 8 consecutive D registers.
01249   if (Subtarget->hasNEON()) {
01250     if (VT == MVT::v4i64)
01251       return &ARM::QQPRRegClass;
01252     if (VT == MVT::v8i64)
01253       return &ARM::QQQQPRRegClass;
01254   }
01255   return TargetLowering::getRegClassFor(VT);
01256 }
01257 
01258 // memcpy, and other memory intrinsics, typically tries to use LDM/STM if the
01259 // source/dest is aligned and the copy size is large enough. We therefore want
01260 // to align such objects passed to memory intrinsics.
01261 bool ARMTargetLowering::shouldAlignPointerArgs(CallInst *CI, unsigned &MinSize,
01262                                                unsigned &PrefAlign) const {
01263   if (!isa<MemIntrinsic>(CI))
01264     return false;
01265   MinSize = 8;
01266   // On ARM11 onwards (excluding M class) 8-byte aligned LDM is typically 1
01267   // cycle faster than 4-byte aligned LDM.
01268   PrefAlign = (Subtarget->hasV6Ops() && !Subtarget->isMClass() ? 8 : 4);
01269   return true;
01270 }
01271 
01272 // Create a fast isel object.
01273 FastISel *
01274 ARMTargetLowering::createFastISel(FunctionLoweringInfo &funcInfo,
01275                                   const TargetLibraryInfo *libInfo) const {
01276   return ARM::createFastISel(funcInfo, libInfo);
01277 }
01278 
01279 Sched::Preference ARMTargetLowering::getSchedulingPreference(SDNode *N) const {
01280   unsigned NumVals = N->getNumValues();
01281   if (!NumVals)
01282     return Sched::RegPressure;
01283 
01284   for (unsigned i = 0; i != NumVals; ++i) {
01285     EVT VT = N->getValueType(i);
01286     if (VT == MVT::Glue || VT == MVT::Other)
01287       continue;
01288     if (VT.isFloatingPoint() || VT.isVector())
01289       return Sched::ILP;
01290   }
01291 
01292   if (!N->isMachineOpcode())
01293     return Sched::RegPressure;
01294 
01295   // Load are scheduled for latency even if there instruction itinerary
01296   // is not available.
01297   const TargetInstrInfo *TII = Subtarget->getInstrInfo();
01298   const MCInstrDesc &MCID = TII->get(N->getMachineOpcode());
01299 
01300   if (MCID.getNumDefs() == 0)
01301     return Sched::RegPressure;
01302   if (!Itins->isEmpty() &&
01303       Itins->getOperandCycle(MCID.getSchedClass(), 0) > 2)
01304     return Sched::ILP;
01305 
01306   return Sched::RegPressure;
01307 }
01308 
01309 //===----------------------------------------------------------------------===//
01310 // Lowering Code
01311 //===----------------------------------------------------------------------===//
01312 
01313 /// IntCCToARMCC - Convert a DAG integer condition code to an ARM CC
01314 static ARMCC::CondCodes IntCCToARMCC(ISD::CondCode CC) {
01315   switch (CC) {
01316   default: llvm_unreachable("Unknown condition code!");
01317   case ISD::SETNE:  return ARMCC::NE;
01318   case ISD::SETEQ:  return ARMCC::EQ;
01319   case ISD::SETGT:  return ARMCC::GT;
01320   case ISD::SETGE:  return ARMCC::GE;
01321   case ISD::SETLT:  return ARMCC::LT;
01322   case ISD::SETLE:  return ARMCC::LE;
01323   case ISD::SETUGT: return ARMCC::HI;
01324   case ISD::SETUGE: return ARMCC::HS;
01325   case ISD::SETULT: return ARMCC::LO;
01326   case ISD::SETULE: return ARMCC::LS;
01327   }
01328 }
01329 
01330 /// FPCCToARMCC - Convert a DAG fp condition code to an ARM CC.
01331 static void FPCCToARMCC(ISD::CondCode CC, ARMCC::CondCodes &CondCode,
01332                         ARMCC::CondCodes &CondCode2) {
01333   CondCode2 = ARMCC::AL;
01334   switch (CC) {
01335   default: llvm_unreachable("Unknown FP condition!");
01336   case ISD::SETEQ:
01337   case ISD::SETOEQ: CondCode = ARMCC::EQ; break;
01338   case ISD::SETGT:
01339   case ISD::SETOGT: CondCode = ARMCC::GT; break;
01340   case ISD::SETGE:
01341   case ISD::SETOGE: CondCode = ARMCC::GE; break;
01342   case ISD::SETOLT: CondCode = ARMCC::MI; break;
01343   case ISD::SETOLE: CondCode = ARMCC::LS; break;
01344   case ISD::SETONE: CondCode = ARMCC::MI; CondCode2 = ARMCC::GT; break;
01345   case ISD::SETO:   CondCode = ARMCC::VC; break;
01346   case ISD::SETUO:  CondCode = ARMCC::VS; break;
01347   case ISD::SETUEQ: CondCode = ARMCC::EQ; CondCode2 = ARMCC::VS; break;
01348   case ISD::SETUGT: CondCode = ARMCC::HI; break;
01349   case ISD::SETUGE: CondCode = ARMCC::PL; break;
01350   case ISD::SETLT:
01351   case ISD::SETULT: CondCode = ARMCC::LT; break;
01352   case ISD::SETLE:
01353   case ISD::SETULE: CondCode = ARMCC::LE; break;
01354   case ISD::SETNE:
01355   case ISD::SETUNE: CondCode = ARMCC::NE; break;
01356   }
01357 }
01358 
01359 //===----------------------------------------------------------------------===//
01360 //                      Calling Convention Implementation
01361 //===----------------------------------------------------------------------===//
01362 
01363 #include "ARMGenCallingConv.inc"
01364 
01365 /// getEffectiveCallingConv - Get the effective calling convention, taking into
01366 /// account presence of floating point hardware and calling convention
01367 /// limitations, such as support for variadic functions.
01368 CallingConv::ID
01369 ARMTargetLowering::getEffectiveCallingConv(CallingConv::ID CC,
01370                                            bool isVarArg) const {
01371   switch (CC) {
01372   default:
01373     llvm_unreachable("Unsupported calling convention");
01374   case CallingConv::ARM_AAPCS:
01375   case CallingConv::ARM_APCS:
01376   case CallingConv::GHC:
01377     return CC;
01378   case CallingConv::ARM_AAPCS_VFP:
01379     return isVarArg ? CallingConv::ARM_AAPCS : CallingConv::ARM_AAPCS_VFP;
01380   case CallingConv::C:
01381     if (!Subtarget->isAAPCS_ABI())
01382       return CallingConv::ARM_APCS;
01383     else if (Subtarget->hasVFP2() && !Subtarget->isThumb1Only() &&
01384              getTargetMachine().Options.FloatABIType == FloatABI::Hard &&
01385              !isVarArg)
01386       return CallingConv::ARM_AAPCS_VFP;
01387     else
01388       return CallingConv::ARM_AAPCS;
01389   case CallingConv::Fast:
01390   case CallingConv::CXX_FAST_TLS:
01391     if (!Subtarget->isAAPCS_ABI()) {
01392       if (Subtarget->hasVFP2() && !Subtarget->isThumb1Only() && !isVarArg)
01393         return CallingConv::Fast;
01394       return CallingConv::ARM_APCS;
01395     } else if (Subtarget->hasVFP2() && !Subtarget->isThumb1Only() && !isVarArg)
01396       return CallingConv::ARM_AAPCS_VFP;
01397     else
01398       return CallingConv::ARM_AAPCS;
01399   }
01400 }
01401 
01402 /// CCAssignFnForNode - Selects the correct CCAssignFn for the given
01403 /// CallingConvention.
01404 CCAssignFn *ARMTargetLowering::CCAssignFnForNode(CallingConv::ID CC,
01405                                                  bool Return,
01406                                                  bool isVarArg) const {
01407   switch (getEffectiveCallingConv(CC, isVarArg)) {
01408   default:
01409     llvm_unreachable("Unsupported calling convention");
01410   case CallingConv::ARM_APCS:
01411     return (Return ? RetCC_ARM_APCS : CC_ARM_APCS);
01412   case CallingConv::ARM_AAPCS:
01413     return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS);
01414   case CallingConv::ARM_AAPCS_VFP:
01415     return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP);
01416   case CallingConv::Fast:
01417     return (Return ? RetFastCC_ARM_APCS : FastCC_ARM_APCS);
01418   case CallingConv::GHC:
01419     return (Return ? RetCC_ARM_APCS : CC_ARM_APCS_GHC);
01420   }
01421 }
01422 
01423 /// LowerCallResult - Lower the result values of a call into the
01424 /// appropriate copies out of appropriate physical registers.
01425 SDValue
01426 ARMTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
01427                                    CallingConv::ID CallConv, bool isVarArg,
01428                                    const SmallVectorImpl<ISD::InputArg> &Ins,
01429                                    SDLoc dl, SelectionDAG &DAG,
01430                                    SmallVectorImpl<SDValue> &InVals,
01431                                    bool isThisReturn, SDValue ThisVal) const {
01432 
01433   // Assign locations to each value returned by this call.
01434   SmallVector<CCValAssign, 16> RVLocs;
01435   ARMCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
01436                     *DAG.getContext(), Call);
01437   CCInfo.AnalyzeCallResult(Ins,
01438                            CCAssignFnForNode(CallConv, /* Return*/ true,
01439                                              isVarArg));
01440 
01441   // Copy all of the result registers out of their specified physreg.
01442   for (unsigned i = 0; i != RVLocs.size(); ++i) {
01443     CCValAssign VA = RVLocs[i];
01444 
01445     // Pass 'this' value directly from the argument to return value, to avoid
01446     // reg unit interference
01447     if (i == 0 && isThisReturn) {
01448       assert(!VA.needsCustom() && VA.getLocVT() == MVT::i32 &&
01449              "unexpected return calling convention register assignment");
01450       InVals.push_back(ThisVal);
01451       continue;
01452     }
01453 
01454     SDValue Val;
01455     if (VA.needsCustom()) {
01456       // Handle f64 or half of a v2f64.
01457       SDValue Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
01458                                       InFlag);
01459       Chain = Lo.getValue(1);
01460       InFlag = Lo.getValue(2);
01461       VA = RVLocs[++i]; // skip ahead to next loc
01462       SDValue Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
01463                                       InFlag);
01464       Chain = Hi.getValue(1);
01465       InFlag = Hi.getValue(2);
01466       if (!Subtarget->isLittle())
01467         std::swap (Lo, Hi);
01468       Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);
01469 
01470       if (VA.getLocVT() == MVT::v2f64) {
01471         SDValue Vec = DAG.getNode(ISD::UNDEF, dl, MVT::v2f64);
01472         Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Vec, Val,
01473                           DAG.getConstant(0, dl, MVT::i32));
01474 
01475         VA = RVLocs[++i]; // skip ahead to next loc
01476         Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InFlag);
01477         Chain = Lo.getValue(1);
01478         InFlag = Lo.getValue(2);
01479         VA = RVLocs[++i]; // skip ahead to next loc
01480         Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InFlag);
01481         Chain = Hi.getValue(1);
01482         InFlag = Hi.getValue(2);
01483         if (!Subtarget->isLittle())
01484           std::swap (Lo, Hi);
01485         Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);
01486         Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Vec, Val,
01487                           DAG.getConstant(1, dl, MVT::i32));
01488       }
01489     } else {
01490       Val = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), VA.getLocVT(),
01491                                InFlag);
01492       Chain = Val.getValue(1);
01493       InFlag = Val.getValue(2);
01494     }
01495 
01496     switch (VA.getLocInfo()) {
01497     default: llvm_unreachable("Unknown loc info!");
01498     case CCValAssign::Full: break;
01499     case CCValAssign::BCvt:
01500       Val = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), Val);
01501       break;
01502     }
01503 
01504     InVals.push_back(Val);
01505   }
01506 
01507   return Chain;
01508 }
01509 
01510 /// LowerMemOpCallTo - Store the argument to the stack.
01511 SDValue
01512 ARMTargetLowering::LowerMemOpCallTo(SDValue Chain,
01513                                     SDValue StackPtr, SDValue Arg,
01514                                     SDLoc dl, SelectionDAG &DAG,
01515                                     const CCValAssign &VA,
01516                                     ISD::ArgFlagsTy Flags) const {
01517   unsigned LocMemOffset = VA.getLocMemOffset();
01518   SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
01519   PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()),
01520                        StackPtr, PtrOff);
01521   return DAG.getStore(
01522       Chain, dl, Arg, PtrOff,
01523       MachinePointerInfo::getStack(DAG.getMachineFunction(), LocMemOffset),
01524       false, false, 0);
01525 }
01526 
01527 void ARMTargetLowering::PassF64ArgInRegs(SDLoc dl, SelectionDAG &DAG,
01528                                          SDValue Chain, SDValue &Arg,
01529                                          RegsToPassVector &RegsToPass,
01530                                          CCValAssign &VA, CCValAssign &NextVA,
01531                                          SDValue &StackPtr,
01532                                          SmallVectorImpl<SDValue> &MemOpChains,
01533                                          ISD::ArgFlagsTy Flags) const {
01534 
01535   SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl,
01536                               DAG.getVTList(MVT::i32, MVT::i32), Arg);
01537   unsigned id = Subtarget->isLittle() ? 0 : 1;
01538   RegsToPass.push_back(std::make_pair(VA.getLocReg(), fmrrd.getValue(id)));
01539 
01540   if (NextVA.isRegLoc())
01541     RegsToPass.push_back(std::make_pair(NextVA.getLocReg(), fmrrd.getValue(1-id)));
01542   else {
01543     assert(NextVA.isMemLoc());
01544     if (!StackPtr.getNode())
01545       StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP,
01546                                     getPointerTy(DAG.getDataLayout()));
01547 
01548     MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, fmrrd.getValue(1-id),
01549                                            dl, DAG, NextVA,
01550                                            Flags));
01551   }
01552 }
01553 
01554 /// LowerCall - Lowering a call into a callseq_start <-
01555 /// ARMISD:CALL <- callseq_end chain. Also add input and output parameter
01556 /// nodes.
01557 SDValue
01558 ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
01559                              SmallVectorImpl<SDValue> &InVals) const {
01560   SelectionDAG &DAG                     = CLI.DAG;
01561   SDLoc &dl                             = CLI.DL;
01562   SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
01563   SmallVectorImpl<SDValue> &OutVals     = CLI.OutVals;
01564   SmallVectorImpl<ISD::InputArg> &Ins   = CLI.Ins;
01565   SDValue Chain                         = CLI.Chain;
01566   SDValue Callee                        = CLI.Callee;
01567   bool &isTailCall                      = CLI.IsTailCall;
01568   CallingConv::ID CallConv              = CLI.CallConv;
01569   bool doesNotRet                       = CLI.DoesNotReturn;
01570   bool isVarArg                         = CLI.IsVarArg;
01571 
01572   MachineFunction &MF = DAG.getMachineFunction();
01573   bool isStructRet    = (Outs.empty()) ? false : Outs[0].Flags.isSRet();
01574   bool isThisReturn   = false;
01575   bool isSibCall      = false;
01576   auto Attr = MF.getFunction()->getFnAttribute("disable-tail-calls");
01577 
01578   // Disable tail calls if they're not supported.
01579   if (!Subtarget->supportsTailCall() || Attr.getValueAsString() == "true")
01580     isTailCall = false;
01581 
01582   if (isTailCall) {
01583     // Check if it's really possible to do a tail call.
01584     isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv,
01585                     isVarArg, isStructRet, MF.getFunction()->hasStructRetAttr(),
01586                                                    Outs, OutVals, Ins, DAG);
01587     if (!isTailCall && CLI.CS && CLI.CS->isMustTailCall())
01588       report_fatal_error("failed to perform tail call elimination on a call "
01589                          "site marked musttail");
01590     // We don't support GuaranteedTailCallOpt for ARM, only automatically
01591     // detected sibcalls.
01592     if (isTailCall) {
01593       ++NumTailCalls;
01594       isSibCall = true;
01595     }
01596   }
01597 
01598   // Analyze operands of the call, assigning locations to each operand.
01599   SmallVector<CCValAssign, 16> ArgLocs;
01600   ARMCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
01601                     *DAG.getContext(), Call);
01602   CCInfo.AnalyzeCallOperands(Outs,
01603                              CCAssignFnForNode(CallConv, /* Return*/ false,
01604                                                isVarArg));
01605 
01606   // Get a count of how many bytes are to be pushed on the stack.
01607   unsigned NumBytes = CCInfo.getNextStackOffset();
01608 
01609   // For tail calls, memory operands are available in our caller's stack.
01610   if (isSibCall)
01611     NumBytes = 0;
01612 
01613   // Adjust the stack pointer for the new arguments...
01614   // These operations are automatically eliminated by the prolog/epilog pass
01615   if (!isSibCall)
01616     Chain = DAG.getCALLSEQ_START(Chain,
01617                                  DAG.getIntPtrConstant(NumBytes, dl, true), dl);
01618 
01619   SDValue StackPtr =
01620       DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy(DAG.getDataLayout()));
01621 
01622   RegsToPassVector RegsToPass;
01623   SmallVector<SDValue, 8> MemOpChains;
01624 
01625   // Walk the register/memloc assignments, inserting copies/loads.  In the case
01626   // of tail call optimization, arguments are handled later.
01627   for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size();
01628        i != e;
01629        ++i, ++realArgIdx) {
01630     CCValAssign &VA = ArgLocs[i];
01631     SDValue Arg = OutVals[realArgIdx];
01632     ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags;
01633     bool isByVal = Flags.isByVal();
01634 
01635     // Promote the value if needed.
01636     switch (VA.getLocInfo()) {
01637     default: llvm_unreachable("Unknown loc info!");
01638     case CCValAssign::Full: break;
01639     case CCValAssign::SExt:
01640       Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
01641       break;
01642     case CCValAssign::ZExt:
01643       Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
01644       break;
01645     case CCValAssign::AExt:
01646       Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
01647       break;
01648     case CCValAssign::BCvt:
01649       Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg);
01650       break;
01651     }
01652 
01653     // f64 and v2f64 might be passed in i32 pairs and must be split into pieces
01654     if (VA.needsCustom()) {
01655       if (VA.getLocVT() == MVT::v2f64) {
01656         SDValue Op0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
01657                                   DAG.getConstant(0, dl, MVT::i32));
01658         SDValue Op1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
01659                                   DAG.getConstant(1, dl, MVT::i32));
01660 
01661         PassF64ArgInRegs(dl, DAG, Chain, Op0, RegsToPass,
01662                          VA, ArgLocs[++i], StackPtr, MemOpChains, Flags);
01663 
01664         VA = ArgLocs[++i]; // skip ahead to next loc
01665         if (VA.isRegLoc()) {
01666           PassF64ArgInRegs(dl, DAG, Chain, Op1, RegsToPass,
01667                            VA, ArgLocs[++i], StackPtr, MemOpChains, Flags);
01668         } else {
01669           assert(VA.isMemLoc());
01670 
01671           MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Op1,
01672                                                  dl, DAG, VA, Flags));
01673         }
01674       } else {
01675         PassF64ArgInRegs(dl, DAG, Chain, Arg, RegsToPass, VA, ArgLocs[++i],
01676                          StackPtr, MemOpChains, Flags);
01677       }
01678     } else if (VA.isRegLoc()) {
01679       if (realArgIdx == 0 && Flags.isReturned() && Outs[0].VT == MVT::i32) {
01680         assert(VA.getLocVT() == MVT::i32 &&
01681                "unexpected calling convention register assignment");
01682         assert(!Ins.empty() && Ins[0].VT == MVT::i32 &&
01683                "unexpected use of 'returned'");
01684         isThisReturn = true;
01685       }
01686       RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
01687     } else if (isByVal) {
01688       assert(VA.isMemLoc());
01689       unsigned offset = 0;
01690 
01691       // True if this byval aggregate will be split between registers
01692       // and memory.
01693       unsigned ByValArgsCount = CCInfo.getInRegsParamsCount();
01694       unsigned CurByValIdx = CCInfo.getInRegsParamsProcessed();
01695 
01696       if (CurByValIdx < ByValArgsCount) {
01697 
01698         unsigned RegBegin, RegEnd;
01699         CCInfo.getInRegsParamInfo(CurByValIdx, RegBegin, RegEnd);
01700 
01701         EVT PtrVT =
01702             DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
01703         unsigned int i, j;
01704         for (i = 0, j = RegBegin; j < RegEnd; i++, j++) {
01705           SDValue Const = DAG.getConstant(4*i, dl, MVT::i32);
01706           SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);
01707           SDValue Load = DAG.getLoad(PtrVT, dl, Chain, AddArg,
01708                                      MachinePointerInfo(),
01709                                      false, false, false,
01710                                      DAG.InferPtrAlignment(AddArg));
01711           MemOpChains.push_back(Load.getValue(1));
01712           RegsToPass.push_back(std::make_pair(j, Load));
01713         }
01714 
01715         // If parameter size outsides register area, "offset" value
01716         // helps us to calculate stack slot for remained part properly.
01717         offset = RegEnd - RegBegin;
01718 
01719         CCInfo.nextInRegsParam();
01720       }
01721 
01722       if (Flags.getByValSize() > 4*offset) {
01723         auto PtrVT = getPointerTy(DAG.getDataLayout());
01724         unsigned LocMemOffset = VA.getLocMemOffset();
01725         SDValue StkPtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
01726         SDValue Dst = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, StkPtrOff);
01727         SDValue SrcOffset = DAG.getIntPtrConstant(4*offset, dl);
01728         SDValue Src = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, SrcOffset);
01729         SDValue SizeNode = DAG.getConstant(Flags.getByValSize() - 4*offset, dl,
01730                                            MVT::i32);
01731         SDValue AlignNode = DAG.getConstant(Flags.getByValAlign(), dl,
01732                                             MVT::i32);
01733 
01734         SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue);
01735         SDValue Ops[] = { Chain, Dst, Src, SizeNode, AlignNode};
01736         MemOpChains.push_back(DAG.getNode(ARMISD::COPY_STRUCT_BYVAL, dl, VTs,
01737                                           Ops));
01738       }
01739     } else if (!isSibCall) {
01740       assert(VA.isMemLoc());
01741 
01742       MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Arg,
01743                                              dl, DAG, VA, Flags));
01744     }
01745   }
01746 
01747   if (!MemOpChains.empty())
01748     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
01749 
01750   // Build a sequence of copy-to-reg nodes chained together with token chain
01751   // and flag operands which copy the outgoing args into the appropriate regs.
01752   SDValue InFlag;
01753   // Tail call byval lowering might overwrite argument registers so in case of
01754   // tail call optimization the copies to registers are lowered later.
01755   if (!isTailCall)
01756     for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
01757       Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
01758                                RegsToPass[i].second, InFlag);
01759       InFlag = Chain.getValue(1);
01760     }
01761 
01762   // For tail calls lower the arguments to the 'real' stack slot.
01763   if (isTailCall) {
01764     // Force all the incoming stack arguments to be loaded from the stack
01765     // before any new outgoing arguments are stored to the stack, because the
01766     // outgoing stack slots may alias the incoming argument stack slots, and
01767     // the alias isn't otherwise explicit. This is slightly more conservative
01768     // than necessary, because it means that each store effectively depends
01769     // on every argument instead of just those arguments it would clobber.
01770 
01771     // Do not flag preceding copytoreg stuff together with the following stuff.
01772     InFlag = SDValue();
01773     for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
01774       Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
01775                                RegsToPass[i].second, InFlag);
01776       InFlag = Chain.getValue(1);
01777     }
01778     InFlag = SDValue();
01779   }
01780 
01781   // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
01782   // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
01783   // node so that legalize doesn't hack it.
01784   bool isDirect = false;
01785   bool isARMFunc = false;
01786   bool isLocalARMFunc = false;
01787   ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
01788   auto PtrVt = getPointerTy(DAG.getDataLayout());
01789 
01790   if (Subtarget->genLongCalls()) {
01791     assert((Subtarget->isTargetWindows() ||
01792             getTargetMachine().getRelocationModel() == Reloc::Static) &&
01793            "long-calls with non-static relocation model!");
01794     // Handle a global address or an external symbol. If it's not one of
01795     // those, the target's already in a register, so we don't need to do
01796     // anything extra.
01797     if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
01798       const GlobalValue *GV = G->getGlobal();
01799       // Create a constant pool entry for the callee address
01800       unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
01801       ARMConstantPoolValue *CPV =
01802         ARMConstantPoolConstant::Create(GV, ARMPCLabelIndex, ARMCP::CPValue, 0);
01803 
01804       // Get the address of the callee into a register
01805       SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVt, 4);
01806       CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
01807       Callee = DAG.getLoad(
01808           PtrVt, dl, DAG.getEntryNode(), CPAddr,
01809           MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), false,
01810           false, false, 0);
01811     } else if (ExternalSymbolSDNode *S=dyn_cast<ExternalSymbolSDNode>(Callee)) {
01812       const char *Sym = S->getSymbol();
01813 
01814       // Create a constant pool entry for the callee address
01815       unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
01816       ARMConstantPoolValue *CPV =
01817         ARMConstantPoolSymbol::Create(*DAG.getContext(), Sym,
01818                                       ARMPCLabelIndex, 0);
01819       // Get the address of the callee into a register
01820       SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVt, 4);
01821       CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
01822       Callee = DAG.getLoad(
01823           PtrVt, dl, DAG.getEntryNode(), CPAddr,
01824           MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), false,
01825           false, false, 0);
01826     }
01827   } else if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
01828     const GlobalValue *GV = G->getGlobal();
01829     isDirect = true;
01830     bool isDef = GV->isStrongDefinitionForLinker();
01831     bool isStub = (!isDef && Subtarget->isTargetMachO()) &&
01832                    getTargetMachine().getRelocationModel() != Reloc::Static;
01833     isARMFunc = !Subtarget->isThumb() || (isStub && !Subtarget->isMClass());
01834     // ARM call to a local ARM function is predicable.
01835     isLocalARMFunc = !Subtarget->isThumb() && (isDef || !ARMInterworking);
01836     // tBX takes a register source operand.
01837     if (isStub && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) {
01838       assert(Subtarget->isTargetMachO() && "WrapperPIC use on non-MachO?");
01839       Callee = DAG.getNode(
01840           ARMISD::WrapperPIC, dl, PtrVt,
01841           DAG.getTargetGlobalAddress(GV, dl, PtrVt, 0, ARMII::MO_NONLAZY));
01842       Callee = DAG.getLoad(PtrVt, dl, DAG.getEntryNode(), Callee,
01843                            MachinePointerInfo::getGOT(DAG.getMachineFunction()),
01844                            false, false, true, 0);
01845     } else if (Subtarget->isTargetCOFF()) {
01846       assert(Subtarget->isTargetWindows() &&
01847              "Windows is the only supported COFF target");
01848       unsigned TargetFlags = GV->hasDLLImportStorageClass()
01849                                  ? ARMII::MO_DLLIMPORT
01850                                  : ARMII::MO_NO_FLAG;
01851       Callee =
01852           DAG.getTargetGlobalAddress(GV, dl, PtrVt, /*Offset=*/0, TargetFlags);
01853       if (GV->hasDLLImportStorageClass())
01854         Callee =
01855             DAG.getLoad(PtrVt, dl, DAG.getEntryNode(),
01856                         DAG.getNode(ARMISD::Wrapper, dl, PtrVt, Callee),
01857                         MachinePointerInfo::getGOT(DAG.getMachineFunction()),
01858                         false, false, false, 0);
01859     } else {
01860       // On ELF targets for PIC code, direct calls should go through the PLT
01861       unsigned OpFlags = 0;
01862       if (Subtarget->isTargetELF() &&
01863           getTargetMachine().getRelocationModel() == Reloc::PIC_)
01864         OpFlags = ARMII::MO_PLT;
01865       Callee = DAG.getTargetGlobalAddress(GV, dl, PtrVt, 0, OpFlags);
01866     }
01867   } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
01868     isDirect = true;
01869     bool isStub = Subtarget->isTargetMachO() &&
01870                   getTargetMachine().getRelocationModel() != Reloc::Static;
01871     isARMFunc = !Subtarget->isThumb() || (isStub && !Subtarget->isMClass());
01872     // tBX takes a register source operand.
01873     const char *Sym = S->getSymbol();
01874     if (isARMFunc && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) {
01875       unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
01876       ARMConstantPoolValue *CPV =
01877         ARMConstantPoolSymbol::Create(*DAG.getContext(), Sym,
01878                                       ARMPCLabelIndex, 4);
01879       SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVt, 4);
01880       CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
01881       Callee = DAG.getLoad(
01882           PtrVt, dl, DAG.getEntryNode(), CPAddr,
01883           MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), false,
01884           false, false, 0);
01885       SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
01886       Callee = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVt, Callee, PICLabel);
01887     } else {
01888       unsigned OpFlags = 0;
01889       // On ELF targets for PIC code, direct calls should go through the PLT
01890       if (Subtarget->isTargetELF() &&
01891                   getTargetMachine().getRelocationModel() == Reloc::PIC_)
01892         OpFlags = ARMII::MO_PLT;
01893       Callee = DAG.getTargetExternalSymbol(Sym, PtrVt, OpFlags);
01894     }
01895   }
01896 
01897   // FIXME: handle tail calls differently.
01898   unsigned CallOpc;
01899   if (Subtarget->isThumb()) {
01900     if ((!isDirect || isARMFunc) && !Subtarget->hasV5TOps())
01901       CallOpc = ARMISD::CALL_NOLINK;
01902     else
01903       CallOpc = isARMFunc ? ARMISD::CALL : ARMISD::tCALL;
01904   } else {
01905     if (!isDirect && !Subtarget->hasV5TOps())
01906       CallOpc = ARMISD::CALL_NOLINK;
01907     else if (doesNotRet && isDirect && Subtarget->hasRAS() &&
01908              // Emit regular call when code size is the priority
01909              !MF.getFunction()->optForMinSize())
01910       // "mov lr, pc; b _foo" to avoid confusing the RSP
01911       CallOpc = ARMISD::CALL_NOLINK;
01912     else
01913       CallOpc = isLocalARMFunc ? ARMISD::CALL_PRED : ARMISD::CALL;
01914   }
01915 
01916   std::vector<SDValue> Ops;
01917   Ops.push_back(Chain);
01918   Ops.push_back(Callee);
01919 
01920   // Add argument registers to the end of the list so that they are known live
01921   // into the call.
01922   for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
01923     Ops.push_back(DAG.getRegister(RegsToPass[i].first,
01924                                   RegsToPass[i].second.getValueType()));
01925 
01926   // Add a register mask operand representing the call-preserved registers.
01927   if (!isTailCall) {
01928     const uint32_t *Mask;
01929     const ARMBaseRegisterInfo *ARI = Subtarget->getRegisterInfo();
01930     if (isThisReturn) {
01931       // For 'this' returns, use the R0-preserving mask if applicable
01932       Mask = ARI->getThisReturnPreservedMask(MF, CallConv);
01933       if (!Mask) {
01934         // Set isThisReturn to false if the calling convention is not one that
01935         // allows 'returned' to be modeled in this way, so LowerCallResult does
01936         // not try to pass 'this' straight through
01937         isThisReturn = false;
01938         Mask = ARI->getCallPreservedMask(MF, CallConv);
01939       }
01940     } else
01941       Mask = ARI->getCallPreservedMask(MF, CallConv);
01942 
01943     assert(Mask && "Missing call preserved mask for calling convention");
01944     Ops.push_back(DAG.getRegisterMask(Mask));
01945   }
01946 
01947   if (InFlag.getNode())
01948     Ops.push_back(InFlag);
01949 
01950   SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
01951   if (isTailCall) {
01952     MF.getFrameInfo()->setHasTailCall();
01953     return DAG.getNode(ARMISD::TC_RETURN, dl, NodeTys, Ops);
01954   }
01955 
01956   // Returns a chain and a flag for retval copy to use.
01957   Chain = DAG.getNode(CallOpc, dl, NodeTys, Ops);
01958   InFlag = Chain.getValue(1);
01959 
01960   Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, dl, true),
01961                              DAG.getIntPtrConstant(0, dl, true), InFlag, dl);
01962   if (!Ins.empty())
01963     InFlag = Chain.getValue(1);
01964 
01965   // Handle result values, copying them out of physregs into vregs that we
01966   // return.
01967   return LowerCallResult(Chain, InFlag, CallConv, isVarArg, Ins, dl, DAG,
01968                          InVals, isThisReturn,
01969                          isThisReturn ? OutVals[0] : SDValue());
01970 }
01971 
01972 /// HandleByVal - Every parameter *after* a byval parameter is passed
01973 /// on the stack.  Remember the next parameter register to allocate,
01974 /// and then confiscate the rest of the parameter registers to insure
01975 /// this.
01976 void ARMTargetLowering::HandleByVal(CCState *State, unsigned &Size,
01977                                     unsigned Align) const {
01978   assert((State->getCallOrPrologue() == Prologue ||
01979           State->getCallOrPrologue() == Call) &&
01980          "unhandled ParmContext");
01981 
01982   // Byval (as with any stack) slots are always at least 4 byte aligned.
01983   Align = std::max(Align, 4U);
01984 
01985   unsigned Reg = State->AllocateReg(GPRArgRegs);
01986   if (!Reg)
01987     return;
01988 
01989   unsigned AlignInRegs = Align / 4;
01990   unsigned Waste = (ARM::R4 - Reg) % AlignInRegs;
01991   for (unsigned i = 0; i < Waste; ++i)
01992     Reg = State->AllocateReg(GPRArgRegs);
01993 
01994   if (!Reg)
01995     return;
01996 
01997   unsigned Excess = 4 * (ARM::R4 - Reg);
01998 
01999   // Special case when NSAA != SP and parameter size greater than size of
02000   // all remained GPR regs. In that case we can't split parameter, we must
02001   // send it to stack. We also must set NCRN to R4, so waste all
02002   // remained registers.
02003   const unsigned NSAAOffset = State->getNextStackOffset();
02004   if (NSAAOffset != 0 && Size > Excess) {
02005     while (State->AllocateReg(GPRArgRegs))
02006       ;
02007     return;
02008   }
02009 
02010   // First register for byval parameter is the first register that wasn't
02011   // allocated before this method call, so it would be "reg".
02012   // If parameter is small enough to be saved in range [reg, r4), then
02013   // the end (first after last) register would be reg + param-size-in-regs,
02014   // else parameter would be splitted between registers and stack,
02015   // end register would be r4 in this case.
02016   unsigned ByValRegBegin = Reg;
02017   unsigned ByValRegEnd = std::min<unsigned>(Reg + Size / 4, ARM::R4);
02018   State->addInRegsParamInfo(ByValRegBegin, ByValRegEnd);
02019   // Note, first register is allocated in the beginning of function already,
02020   // allocate remained amount of registers we need.
02021   for (unsigned i = Reg + 1; i != ByValRegEnd; ++i)
02022     State->AllocateReg(GPRArgRegs);
02023   // A byval parameter that is split between registers and memory needs its
02024   // size truncated here.
02025   // In the case where the entire structure fits in registers, we set the
02026   // size in memory to zero.
02027   Size = std::max<int>(Size - Excess, 0);
02028 }
02029 
02030 /// MatchingStackOffset - Return true if the given stack call argument is
02031 /// already available in the same position (relatively) of the caller's
02032 /// incoming argument stack.
02033 static
02034 bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags,
02035                          MachineFrameInfo *MFI, const MachineRegisterInfo *MRI,
02036                          const TargetInstrInfo *TII) {
02037   unsigned Bytes = Arg.getValueType().getSizeInBits() / 8;
02038   int FI = INT_MAX;
02039   if (Arg.getOpcode() == ISD::CopyFromReg) {
02040     unsigned VR = cast<RegisterSDNode>(Arg.getOperand(1))->getReg();
02041     if (!TargetRegisterInfo::isVirtualRegister(VR))
02042       return false;
02043     MachineInstr *Def = MRI->getVRegDef(VR);
02044     if (!Def)
02045       return false;
02046     if (!Flags.isByVal()) {
02047       if (!TII->isLoadFromStackSlot(Def, FI))
02048         return false;
02049     } else {
02050       return false;
02051     }
02052   } else if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Arg)) {
02053     if (Flags.isByVal())
02054       // ByVal argument is passed in as a pointer but it's now being
02055       // dereferenced. e.g.
02056       // define @foo(%struct.X* %A) {
02057       //   tail call @bar(%struct.X* byval %A)
02058       // }
02059       return false;
02060     SDValue Ptr = Ld->getBasePtr();
02061     FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(Ptr);
02062     if (!FINode)
02063       return false;
02064     FI = FINode->getIndex();
02065   } else
02066     return false;
02067 
02068   assert(FI != INT_MAX);
02069   if (!MFI->isFixedObjectIndex(FI))
02070     return false;
02071   return Offset == MFI->getObjectOffset(FI) && Bytes == MFI->getObjectSize(FI);
02072 }
02073 
02074 /// IsEligibleForTailCallOptimization - Check whether the call is eligible
02075 /// for tail call optimization. Targets which want to do tail call
02076 /// optimization should implement this function.
02077 bool
02078 ARMTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
02079                                                      CallingConv::ID CalleeCC,
02080                                                      bool isVarArg,
02081                                                      bool isCalleeStructRet,
02082                                                      bool isCallerStructRet,
02083                                     const SmallVectorImpl<ISD::OutputArg> &Outs,
02084                                     const SmallVectorImpl<SDValue> &OutVals,
02085                                     const SmallVectorImpl<ISD::InputArg> &Ins,
02086                                                      SelectionDAG& DAG) const {
02087   const Function *CallerF = DAG.getMachineFunction().getFunction();
02088   CallingConv::ID CallerCC = CallerF->getCallingConv();
02089   bool CCMatch = CallerCC == CalleeCC;
02090 
02091   assert(Subtarget->supportsTailCall());
02092 
02093   // Look for obvious safe cases to perform tail call optimization that do not
02094   // require ABI changes. This is what gcc calls sibcall.
02095 
02096   // Do not sibcall optimize vararg calls unless the call site is not passing
02097   // any arguments.
02098   if (isVarArg && !Outs.empty())
02099     return false;
02100 
02101   // Exception-handling functions need a special set of instructions to indicate
02102   // a return to the hardware. Tail-calling another function would probably
02103   // break this.
02104   if (CallerF->hasFnAttribute("interrupt"))
02105     return false;
02106 
02107   // Also avoid sibcall optimization if either caller or callee uses struct
02108   // return semantics.
02109   if (isCalleeStructRet || isCallerStructRet)
02110     return false;
02111 
02112   // Externally-defined functions with weak linkage should not be
02113   // tail-called on ARM when the OS does not support dynamic
02114   // pre-emption of symbols, as the AAELF spec requires normal calls
02115   // to undefined weak functions to be replaced with a NOP or jump to the
02116   // next instruction. The behaviour of branch instructions in this
02117   // situation (as used for tail calls) is implementation-defined, so we
02118   // cannot rely on the linker replacing the tail call with a return.
02119   if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
02120     const GlobalValue *GV = G->getGlobal();
02121     const Triple &TT = getTargetMachine().getTargetTriple();
02122     if (GV->hasExternalWeakLinkage() &&
02123         (!TT.isOSWindows() || TT.isOSBinFormatELF() || TT.isOSBinFormatMachO()))
02124       return false;
02125   }
02126 
02127   // If the calling conventions do not match, then we'd better make sure the
02128   // results are returned in the same way as what the caller expects.
02129   if (!CCMatch) {
02130     SmallVector<CCValAssign, 16> RVLocs1;
02131     ARMCCState CCInfo1(CalleeCC, false, DAG.getMachineFunction(), RVLocs1,
02132                        *DAG.getContext(), Call);
02133     CCInfo1.AnalyzeCallResult(Ins, CCAssignFnForNode(CalleeCC, true, isVarArg));
02134 
02135     SmallVector<CCValAssign, 16> RVLocs2;
02136     ARMCCState CCInfo2(CallerCC, false, DAG.getMachineFunction(), RVLocs2,
02137                        *DAG.getContext(), Call);
02138     CCInfo2.AnalyzeCallResult(Ins, CCAssignFnForNode(CallerCC, true, isVarArg));
02139 
02140     if (RVLocs1.size() != RVLocs2.size())
02141       return false;
02142     for (unsigned i = 0, e = RVLocs1.size(); i != e; ++i) {
02143       if (RVLocs1[i].isRegLoc() != RVLocs2[i].isRegLoc())
02144         return false;
02145       if (RVLocs1[i].getLocInfo() != RVLocs2[i].getLocInfo())
02146         return false;
02147       if (RVLocs1[i].isRegLoc()) {
02148         if (RVLocs1[i].getLocReg() != RVLocs2[i].getLocReg())
02149           return false;
02150       } else {
02151         if (RVLocs1[i].getLocMemOffset() != RVLocs2[i].getLocMemOffset())
02152           return false;
02153       }
02154     }
02155   }
02156 
02157   // If Caller's vararg or byval argument has been split between registers and
02158   // stack, do not perform tail call, since part of the argument is in caller's
02159   // local frame.
02160   const ARMFunctionInfo *AFI_Caller = DAG.getMachineFunction().
02161                                       getInfo<ARMFunctionInfo>();
02162   if (AFI_Caller->getArgRegsSaveSize())
02163     return false;
02164 
02165   // If the callee takes no arguments then go on to check the results of the
02166   // call.
02167   if (!Outs.empty()) {
02168     // Check if stack adjustment is needed. For now, do not do this if any
02169     // argument is passed on the stack.
02170     SmallVector<CCValAssign, 16> ArgLocs;
02171     ARMCCState CCInfo(CalleeCC, isVarArg, DAG.getMachineFunction(), ArgLocs,
02172                       *DAG.getContext(), Call);
02173     CCInfo.AnalyzeCallOperands(Outs,
02174                                CCAssignFnForNode(CalleeCC, false, isVarArg));
02175     if (CCInfo.getNextStackOffset()) {
02176       MachineFunction &MF = DAG.getMachineFunction();
02177 
02178       // Check if the arguments are already laid out in the right way as
02179       // the caller's fixed stack objects.
02180       MachineFrameInfo *MFI = MF.getFrameInfo();
02181       const MachineRegisterInfo *MRI = &MF.getRegInfo();
02182       const TargetInstrInfo *TII = Subtarget->getInstrInfo();
02183       for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size();
02184            i != e;
02185            ++i, ++realArgIdx) {
02186         CCValAssign &VA = ArgLocs[i];
02187         EVT RegVT = VA.getLocVT();
02188         SDValue Arg = OutVals[realArgIdx];
02189         ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags;
02190         if (VA.getLocInfo() == CCValAssign::Indirect)
02191           return false;
02192         if (VA.needsCustom()) {
02193           // f64 and vector types are split into multiple registers or
02194           // register/stack-slot combinations.  The types will not match
02195           // the registers; give up on memory f64 refs until we figure
02196           // out what to do about this.
02197           if (!VA.isRegLoc())
02198             return false;
02199           if (!ArgLocs[++i].isRegLoc())
02200             return false;
02201           if (RegVT == MVT::v2f64) {
02202             if (!ArgLocs[++i].isRegLoc())
02203               return false;
02204             if (!ArgLocs[++i].isRegLoc())
02205               return false;
02206           }
02207         } else if (!VA.isRegLoc()) {
02208           if (!MatchingStackOffset(Arg, VA.getLocMemOffset(), Flags,
02209                                    MFI, MRI, TII))
02210             return false;
02211         }
02212       }
02213     }
02214   }
02215 
02216   return true;
02217 }
02218 
02219 bool
02220 ARMTargetLowering::CanLowerReturn(CallingConv::ID CallConv,
02221                                   MachineFunction &MF, bool isVarArg,
02222                                   const SmallVectorImpl<ISD::OutputArg> &Outs,
02223                                   LLVMContext &Context) const {
02224   SmallVector<CCValAssign, 16> RVLocs;
02225   CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
02226   return CCInfo.CheckReturn(Outs, CCAssignFnForNode(CallConv, /*Return=*/true,
02227                                                     isVarArg));
02228 }
02229 
02230 static SDValue LowerInterruptReturn(SmallVectorImpl<SDValue> &RetOps,
02231                                     SDLoc DL, SelectionDAG &DAG) {
02232   const MachineFunction &MF = DAG.getMachineFunction();
02233   const Function *F = MF.getFunction();
02234 
02235   StringRef IntKind = F->getFnAttribute("interrupt").getValueAsString();
02236 
02237   // See ARM ARM v7 B1.8.3. On exception entry LR is set to a possibly offset
02238   // version of the "preferred return address". These offsets affect the return
02239   // instruction if this is a return from PL1 without hypervisor extensions.
02240   //    IRQ/FIQ: +4     "subs pc, lr, #4"
02241   //    SWI:     0      "subs pc, lr, #0"
02242   //    ABORT:   +4     "subs pc, lr, #4"
02243   //    UNDEF:   +4/+2  "subs pc, lr, #0"
02244   // UNDEF varies depending on where the exception came from ARM or Thumb
02245   // mode. Alongside GCC, we throw our hands up in disgust and pretend it's 0.
02246 
02247   int64_t LROffset;
02248   if (IntKind == "" || IntKind == "IRQ" || IntKind == "FIQ" ||
02249       IntKind == "ABORT")
02250     LROffset = 4;
02251   else if (IntKind == "SWI" || IntKind == "UNDEF")
02252     LROffset = 0;
02253   else
02254     report_fatal_error("Unsupported interrupt attribute. If present, value "
02255                        "must be one of: IRQ, FIQ, SWI, ABORT or UNDEF");
02256 
02257   RetOps.insert(RetOps.begin() + 1,
02258                 DAG.getConstant(LROffset, DL, MVT::i32, false));
02259 
02260   return DAG.getNode(ARMISD::INTRET_FLAG, DL, MVT::Other, RetOps);
02261 }
02262 
02263 SDValue
02264 ARMTargetLowering::LowerReturn(SDValue Chain,
02265                                CallingConv::ID CallConv, bool isVarArg,
02266                                const SmallVectorImpl<ISD::OutputArg> &Outs,
02267                                const SmallVectorImpl<SDValue> &OutVals,
02268                                SDLoc dl, SelectionDAG &DAG) const {
02269 
02270   // CCValAssign - represent the assignment of the return value to a location.
02271   SmallVector<CCValAssign, 16> RVLocs;
02272 
02273   // CCState - Info about the registers and stack slots.
02274   ARMCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
02275                     *DAG.getContext(), Call);
02276 
02277   // Analyze outgoing return values.
02278   CCInfo.AnalyzeReturn(Outs, CCAssignFnForNode(CallConv, /* Return */ true,
02279                                                isVarArg));
02280 
02281   SDValue Flag;
02282   SmallVector<SDValue, 4> RetOps;
02283   RetOps.push_back(Chain); // Operand #0 = Chain (updated below)
02284   bool isLittleEndian = Subtarget->isLittle();
02285 
02286   MachineFunction &MF = DAG.getMachineFunction();
02287   ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
02288   AFI->setReturnRegsCount(RVLocs.size());
02289 
02290   // Copy the result values into the output registers.
02291   for (unsigned i = 0, realRVLocIdx = 0;
02292        i != RVLocs.size();
02293        ++i, ++realRVLocIdx) {
02294     CCValAssign &VA = RVLocs[i];
02295     assert(VA.isRegLoc() && "Can only return in registers!");
02296 
02297     SDValue Arg = OutVals[realRVLocIdx];
02298 
02299     switch (VA.getLocInfo()) {
02300     default: llvm_unreachable("Unknown loc info!");
02301     case CCValAssign::Full: break;
02302     case CCValAssign::BCvt:
02303       Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg);
02304       break;
02305     }
02306 
02307     if (VA.needsCustom()) {
02308       if (VA.getLocVT() == MVT::v2f64) {
02309         // Extract the first half and return it in two registers.
02310         SDValue Half = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
02311                                    DAG.getConstant(0, dl, MVT::i32));
02312         SDValue HalfGPRs = DAG.getNode(ARMISD::VMOVRRD, dl,
02313                                        DAG.getVTList(MVT::i32, MVT::i32), Half);
02314 
02315         Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
02316                                  HalfGPRs.getValue(isLittleEndian ? 0 : 1),
02317                                  Flag);
02318         Flag = Chain.getValue(1);
02319         RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
02320         VA = RVLocs[++i]; // skip ahead to next loc
02321         Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
02322                                  HalfGPRs.getValue(isLittleEndian ? 1 : 0),
02323                                  Flag);
02324         Flag = Chain.getValue(1);
02325         RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
02326         VA = RVLocs[++i]; // skip ahead to next loc
02327 
02328         // Extract the 2nd half and fall through to handle it as an f64 value.
02329         Arg = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
02330                           DAG.getConstant(1, dl, MVT::i32));
02331       }
02332       // Legalize ret f64 -> ret 2 x i32.  We always have fmrrd if f64 is
02333       // available.
02334       SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl,
02335                                   DAG.getVTList(MVT::i32, MVT::i32), Arg);
02336       Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
02337                                fmrrd.getValue(isLittleEndian ? 0 : 1),
02338                                Flag);
02339       Flag = Chain.getValue(1);
02340       RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
02341       VA = RVLocs[++i]; // skip ahead to next loc
02342       Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
02343                                fmrrd.getValue(isLittleEndian ? 1 : 0),
02344                                Flag);
02345     } else
02346       Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Flag);
02347 
02348     // Guarantee that all emitted copies are
02349     // stuck together, avoiding something bad.
02350     Flag = Chain.getValue(1);
02351     RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
02352   }
02353   const ARMBaseRegisterInfo *TRI = Subtarget->getRegisterInfo();
02354   const MCPhysReg *I =
02355       TRI->getCalleeSavedRegsViaCopy(&DAG.getMachineFunction());
02356   if (I) {
02357     for (; *I; ++I) {
02358       if (ARM::GPRRegClass.contains(*I))
02359         RetOps.push_back(DAG.getRegister(*I, MVT::i32));
02360       else if (ARM::DPRRegClass.contains(*I))
02361         RetOps.push_back(DAG.getRegister(*I, MVT::getFloatingPointVT(64)));
02362       else
02363         llvm_unreachable("Unexpected register class in CSRsViaCopy!");
02364     }
02365   }
02366 
02367   // Update chain and glue.
02368   RetOps[0] = Chain;
02369   if (Flag.getNode())
02370     RetOps.push_back(Flag);
02371 
02372   // CPUs which aren't M-class use a special sequence to return from
02373   // exceptions (roughly, any instruction setting pc and cpsr simultaneously,
02374   // though we use "subs pc, lr, #N").
02375   //
02376   // M-class CPUs actually use a normal return sequence with a special
02377   // (hardware-provided) value in LR, so the normal code path works.
02378   if (DAG.getMachineFunction().getFunction()->hasFnAttribute("interrupt") &&
02379       !Subtarget->isMClass()) {
02380     if (Subtarget->isThumb1Only())
02381       report_fatal_error("interrupt attribute is not supported in Thumb1");
02382     return LowerInterruptReturn(RetOps, dl, DAG);
02383   }
02384 
02385   return DAG.getNode(ARMISD::RET_FLAG, dl, MVT::Other, RetOps);
02386 }
02387 
02388 bool ARMTargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const {
02389   if (N->getNumValues() != 1)
02390     return false;
02391   if (!N->hasNUsesOfValue(1, 0))
02392     return false;
02393 
02394   SDValue TCChain = Chain;
02395   SDNode *Copy = *N->use_begin();
02396   if (Copy->getOpcode() == ISD::CopyToReg) {
02397     // If the copy has a glue operand, we conservatively assume it isn't safe to
02398     // perform a tail call.
02399     if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue)
02400       return false;
02401     TCChain = Copy->getOperand(0);
02402   } else if (Copy->getOpcode() == ARMISD::VMOVRRD) {
02403     SDNode *VMov = Copy;
02404     // f64 returned in a pair of GPRs.
02405     SmallPtrSet<SDNode*, 2> Copies;
02406     for (SDNode::use_iterator UI = VMov->use_begin(), UE = VMov->use_end();
02407          UI != UE; ++UI) {
02408       if (UI->getOpcode() != ISD::CopyToReg)
02409         return false;
02410       Copies.insert(*UI);
02411     }
02412     if (Copies.size() > 2)
02413       return false;
02414 
02415     for (SDNode::use_iterator UI = VMov->use_begin(), UE = VMov->use_end();
02416          UI != UE; ++UI) {
02417       SDValue UseChain = UI->getOperand(0);
02418       if (Copies.count(UseChain.getNode()))
02419         // Second CopyToReg
02420         Copy = *UI;
02421       else {
02422         // We are at the top of this chain.
02423         // If the copy has a glue operand, we conservatively assume it
02424         // isn't safe to perform a tail call.
02425         if (UI->getOperand(UI->getNumOperands()-1).getValueType() == MVT::Glue)
02426           return false;
02427         // First CopyToReg
02428         TCChain = UseChain;
02429       }
02430     }
02431   } else if (Copy->getOpcode() == ISD::BITCAST) {
02432     // f32 returned in a single GPR.
02433     if (!Copy->hasOneUse())
02434       return false;
02435     Copy = *Copy->use_begin();
02436     if (Copy->getOpcode() != ISD::CopyToReg || !Copy->hasNUsesOfValue(1, 0))
02437       return false;
02438     // If the copy has a glue operand, we conservatively assume it isn't safe to
02439     // perform a tail call.
02440     if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue)
02441       return false;
02442     TCChain = Copy->getOperand(0);
02443   } else {
02444     return false;
02445   }
02446 
02447   bool HasRet = false;
02448   for (SDNode::use_iterator UI = Copy->use_begin(), UE = Copy->use_end();
02449        UI != UE; ++UI) {
02450     if (UI->getOpcode() != ARMISD::RET_FLAG &&
02451         UI->getOpcode() != ARMISD::INTRET_FLAG)
02452       return false;
02453     HasRet = true;
02454   }
02455 
02456   if (!HasRet)
02457     return false;
02458 
02459   Chain = TCChain;
02460   return true;
02461 }
02462 
02463 bool ARMTargetLowering::mayBeEmittedAsTailCall(CallInst *CI) const {
02464   if (!Subtarget->supportsTailCall())
02465     return false;
02466 
02467   auto Attr =
02468       CI->getParent()->getParent()->getFnAttribute("disable-tail-calls");
02469   if (!CI->isTailCall() || Attr.getValueAsString() == "true")
02470     return false;
02471 
02472   return true;
02473 }
02474 
02475 // Trying to write a 64 bit value so need to split into two 32 bit values first,
02476 // and pass the lower and high parts through.
02477 static SDValue LowerWRITE_REGISTER(SDValue Op, SelectionDAG &DAG) {
02478   SDLoc DL(Op);
02479   SDValue WriteValue = Op->getOperand(2);
02480 
02481   // This function is only supposed to be called for i64 type argument.
02482   assert(WriteValue.getValueType() == MVT::i64
02483           && "LowerWRITE_REGISTER called for non-i64 type argument.");
02484 
02485   SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, WriteValue,
02486                            DAG.getConstant(0, DL, MVT::i32));
02487   SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, WriteValue,
02488                            DAG.getConstant(1, DL, MVT::i32));
02489   SDValue Ops[] = { Op->getOperand(0), Op->getOperand(1), Lo, Hi };
02490   return DAG.getNode(ISD::WRITE_REGISTER, DL, MVT::Other, Ops);
02491 }
02492 
02493 // ConstantPool, JumpTable, GlobalAddress, and ExternalSymbol are lowered as
02494 // their target counterpart wrapped in the ARMISD::Wrapper node. Suppose N is
02495 // one of the above mentioned nodes. It has to be wrapped because otherwise
02496 // Select(N) returns N. So the raw TargetGlobalAddress nodes, etc. can only
02497 // be used to form addressing mode. These wrapped nodes will be selected
02498 // into MOVi.
02499 static SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) {
02500   EVT PtrVT = Op.getValueType();
02501   // FIXME there is no actual debug info here
02502   SDLoc dl(Op);
02503   ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
02504   SDValue Res;
02505   if (CP->isMachineConstantPoolEntry())
02506     Res = DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT,
02507                                     CP->getAlignment());
02508   else
02509     Res = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT,
02510                                     CP->getAlignment());
02511   return DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Res);
02512 }
02513 
02514 unsigned ARMTargetLowering::getJumpTableEncoding() const {
02515   return MachineJumpTableInfo::EK_Inline;
02516 }
02517 
02518 SDValue ARMTargetLowering::LowerBlockAddress(SDValue Op,
02519                                              SelectionDAG &DAG) const {
02520   MachineFunction &MF = DAG.getMachineFunction();
02521   ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
02522   unsigned ARMPCLabelIndex = 0;
02523   SDLoc DL(Op);
02524   EVT PtrVT = getPointerTy(DAG.getDataLayout());
02525   const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
02526   Reloc::Model RelocM = getTargetMachine().getRelocationModel();
02527   SDValue CPAddr;
02528   if (RelocM == Reloc::Static) {
02529     CPAddr = DAG.getTargetConstantPool(BA, PtrVT, 4);
02530   } else {
02531     unsigned PCAdj = Subtarget->isThumb() ? 4 : 8;
02532     ARMPCLabelIndex = AFI->createPICLabelUId();
02533     ARMConstantPoolValue *CPV =
02534       ARMConstantPoolConstant::Create(BA, ARMPCLabelIndex,
02535                                       ARMCP::CPBlockAddress, PCAdj);
02536     CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
02537   }
02538   CPAddr = DAG.getNode(ARMISD::Wrapper, DL, PtrVT, CPAddr);
02539   SDValue Result =
02540       DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), CPAddr,
02541                   MachinePointerInfo::getConstantPool(DAG.getMachineFunction()),
02542                   false, false, false, 0);
02543   if (RelocM == Reloc::Static)
02544     return Result;
02545   SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, DL, MVT::i32);
02546   return DAG.getNode(ARMISD::PIC_ADD, DL, PtrVT, Result, PICLabel);
02547 }
02548 
02549 /// \brief Convert a TLS address reference into the correct sequence of loads
02550 /// and calls to compute the variable's address for Darwin, and return an
02551 /// SDValue containing the final node.
02552 
02553 /// Darwin only has one TLS scheme which must be capable of dealing with the
02554 /// fully general situation, in the worst case. This means:
02555 ///     + "extern __thread" declaration.
02556 ///     + Defined in a possibly unknown dynamic library.
02557 ///
02558 /// The general system is that each __thread variable has a [3 x i32] descriptor
02559 /// which contains information used by the runtime to calculate the address. The
02560 /// only part of this the compiler needs to know about is the first word, which
02561 /// contains a function pointer that must be called with the address of the
02562 /// entire descriptor in "r0".
02563 ///
02564 /// Since this descriptor may be in a different unit, in general access must
02565 /// proceed along the usual ARM rules. A common sequence to produce is:
02566 ///
02567 ///     movw rT1, :lower16:_var$non_lazy_ptr
02568 ///     movt rT1, :upper16:_var$non_lazy_ptr
02569 ///     ldr r0, [rT1]
02570 ///     ldr rT2, [r0]
02571 ///     blx rT2
02572 ///     [...address now in r0...]
02573 SDValue
02574 ARMTargetLowering::LowerGlobalTLSAddressDarwin(SDValue Op,
02575                                                SelectionDAG &DAG) const {
02576   assert(Subtarget->isTargetDarwin() && "TLS only supported on Darwin");
02577   SDLoc DL(Op);
02578 
02579   // First step is to get the address of the actua global symbol. This is where
02580   // the TLS descriptor lives.
02581   SDValue DescAddr = LowerGlobalAddressDarwin(Op, DAG);
02582 
02583   // The first entry in the descriptor is a function pointer that we must call
02584   // to obtain the address of the variable.
02585   SDValue Chain = DAG.getEntryNode();
02586   SDValue FuncTLVGet =
02587       DAG.getLoad(MVT::i32, DL, Chain, DescAddr,
02588                   MachinePointerInfo::getGOT(DAG.getMachineFunction()),
02589                   false, true, true, 4);
02590   Chain = FuncTLVGet.getValue(1);
02591 
02592   MachineFunction &F = DAG.getMachineFunction();
02593   MachineFrameInfo *MFI = F.getFrameInfo();
02594   MFI->setAdjustsStack(true);
02595 
02596   // TLS calls preserve all registers except those that absolutely must be
02597   // trashed: R0 (it takes an argument), LR (it's a call) and CPSR (let's not be
02598   // silly).
02599   auto TRI =
02600       getTargetMachine().getSubtargetImpl(*F.getFunction())->getRegisterInfo();
02601   auto ARI = static_cast<const ARMRegisterInfo *>(TRI);
02602   const uint32_t *Mask = ARI->getTLSCallPreservedMask(DAG.getMachineFunction());
02603 
02604   // Finally, we can make the call. This is just a degenerate version of a
02605   // normal AArch64 call node: r0 takes the address of the descriptor, and
02606   // returns the address of the variable in this thread.
02607   Chain = DAG.getCopyToReg(Chain, DL, ARM::R0, DescAddr, SDValue());
02608   Chain =
02609       DAG.getNode(ARMISD::CALL, DL, DAG.getVTList(MVT::Other, MVT::Glue),
02610                   Chain, FuncTLVGet, DAG.getRegister(ARM::R0, MVT::i32),
02611                   DAG.getRegisterMask(Mask), Chain.getValue(1));
02612   return DAG.getCopyFromReg(Chain, DL, ARM::R0, MVT::i32, Chain.getValue(1));
02613 }
02614 
02615 // Lower ISD::GlobalTLSAddress using the "general dynamic" model
02616 SDValue
02617 ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA,
02618                                                  SelectionDAG &DAG) const {
02619   SDLoc dl(GA);
02620   EVT PtrVT = getPointerTy(DAG.getDataLayout());
02621   unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8;
02622   MachineFunction &MF = DAG.getMachineFunction();
02623   ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
02624   unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
02625   ARMConstantPoolValue *CPV =
02626     ARMConstantPoolConstant::Create(GA->getGlobal(), ARMPCLabelIndex,
02627                                     ARMCP::CPValue, PCAdj, ARMCP::TLSGD, true);
02628   SDValue Argument = DAG.getTargetConstantPool(CPV, PtrVT, 4);
02629   Argument = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Argument);
02630   Argument =
02631       DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Argument,
02632                   MachinePointerInfo::getConstantPool(DAG.getMachineFunction()),
02633                   false, false, false, 0);
02634   SDValue Chain = Argument.getValue(1);
02635 
02636   SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
02637   Argument = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Argument, PICLabel);
02638 
02639   // call __tls_get_addr.
02640   ArgListTy Args;
02641   ArgListEntry Entry;
02642   Entry.Node = Argument;
02643   Entry.Ty = (Type *) Type::getInt32Ty(*DAG.getContext());
02644   Args.push_back(Entry);
02645 
02646   // FIXME: is there useful debug info available here?
02647   TargetLowering::CallLoweringInfo CLI(DAG);
02648   CLI.setDebugLoc(dl).setChain(Chain)
02649     .setCallee(CallingConv::C, Type::getInt32Ty(*DAG.getContext()),
02650                DAG.getExternalSymbol("__tls_get_addr", PtrVT), std::move(Args),
02651                0);
02652 
02653   std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
02654   return CallResult.first;
02655 }
02656 
02657 // Lower ISD::GlobalTLSAddress using the "initial exec" or
02658 // "local exec" model.
02659 SDValue
02660 ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA,
02661                                         SelectionDAG &DAG,
02662                                         TLSModel::Model model) const {
02663   const GlobalValue *GV = GA->getGlobal();
02664   SDLoc dl(GA);
02665   SDValue Offset;
02666   SDValue Chain = DAG.getEntryNode();
02667   EVT PtrVT = getPointerTy(DAG.getDataLayout());
02668   // Get the Thread Pointer
02669   SDValue ThreadPointer = DAG.getNode(ARMISD::THREAD_POINTER, dl, PtrVT);
02670 
02671   if (model == TLSModel::InitialExec) {
02672     MachineFunction &MF = DAG.getMachineFunction();
02673     ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
02674     unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
02675     // Initial exec model.
02676     unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8;
02677     ARMConstantPoolValue *CPV =
02678       ARMConstantPoolConstant::Create(GA->getGlobal(), ARMPCLabelIndex,
02679                                       ARMCP::CPValue, PCAdj, ARMCP::GOTTPOFF,
02680                                       true);
02681     Offset = DAG.getTargetConstantPool(CPV, PtrVT, 4);
02682     Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset);
02683     Offset = DAG.getLoad(
02684         PtrVT, dl, Chain, Offset,
02685         MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), false,
02686         false, false, 0);
02687     Chain = Offset.getValue(1);
02688 
02689     SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
02690     Offset = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Offset, PICLabel);
02691 
02692     Offset = DAG.getLoad(
02693         PtrVT, dl, Chain, Offset,
02694         MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), false,
02695         false, false, 0);
02696   } else {
02697     // local exec model
02698     assert(model == TLSModel::LocalExec);
02699     ARMConstantPoolValue *CPV =
02700       ARMConstantPoolConstant::Create(GV, ARMCP::TPOFF);
02701     Offset = DAG.getTargetConstantPool(CPV, PtrVT, 4);
02702     Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset);
02703     Offset = DAG.getLoad(
02704         PtrVT, dl, Chain, Offset,
02705         MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), false,
02706         false, false, 0);
02707   }
02708 
02709   // The address of the thread local variable is the add of the thread
02710   // pointer with the offset of the variable.
02711   return DAG.getNode(ISD::ADD, dl, PtrVT, ThreadPointer, Offset);
02712 }
02713 
02714 SDValue
02715 ARMTargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
02716   if (Subtarget->isTargetDarwin())
02717     return LowerGlobalTLSAddressDarwin(Op, DAG);
02718 
02719   // TODO: implement the "local dynamic" model
02720   assert(Subtarget->isTargetELF() && "Only ELF implemented here");
02721   GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
02722   if (DAG.getTarget().Options.EmulatedTLS)
02723     return LowerToTLSEmulatedModel(GA, DAG);
02724 
02725   TLSModel::Model model = getTargetMachine().getTLSModel(GA->getGlobal());
02726 
02727   switch (model) {
02728     case TLSModel::GeneralDynamic:
02729     case TLSModel::LocalDynamic:
02730       return LowerToTLSGeneralDynamicModel(GA, DAG);
02731     case TLSModel::InitialExec:
02732     case TLSModel::LocalExec:
02733       return LowerToTLSExecModels(GA, DAG, model);
02734   }
02735   llvm_unreachable("bogus TLS model");
02736 }
02737 
02738 SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op,
02739                                                  SelectionDAG &DAG) const {
02740   EVT PtrVT = getPointerTy(DAG.getDataLayout());
02741   SDLoc dl(Op);
02742   const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
02743   if (getTargetMachine().getRelocationModel() == Reloc::PIC_) {
02744     bool UseGOT_PREL =
02745         !(GV->hasHiddenVisibility() || GV->hasLocalLinkage());
02746 
02747     MachineFunction &MF = DAG.getMachineFunction();
02748     ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
02749     unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
02750     EVT PtrVT = getPointerTy(DAG.getDataLayout());
02751     SDLoc dl(Op);
02752     unsigned PCAdj = Subtarget->isThumb() ? 4 : 8;
02753     ARMConstantPoolValue *CPV = ARMConstantPoolConstant::Create(
02754         GV, ARMPCLabelIndex, ARMCP::CPValue, PCAdj,
02755         UseGOT_PREL ? ARMCP::GOT_PREL : ARMCP::no_modifier,
02756         /*AddCurrentAddress=*/UseGOT_PREL);
02757     SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
02758     CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
02759     SDValue Result = DAG.getLoad(
02760         PtrVT, dl, DAG.getEntryNode(), CPAddr,
02761         MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), false,
02762         false, false, 0);
02763     SDValue Chain = Result.getValue(1);
02764     SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
02765     Result = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel);
02766     if (UseGOT_PREL)
02767       Result = DAG.getLoad(PtrVT, dl, Chain, Result,
02768                            MachinePointerInfo::getGOT(DAG.getMachineFunction()),
02769                            false, false, false, 0);
02770     return Result;
02771   }
02772 
02773   // If we have T2 ops, we can materialize the address directly via movt/movw
02774   // pair. This is always cheaper.
02775   if (Subtarget->useMovt(DAG.getMachineFunction())) {
02776     ++NumMovwMovt;
02777     // FIXME: Once remat is capable of dealing with instructions with register
02778     // operands, expand this into two nodes.
02779     return DAG.getNode(ARMISD::Wrapper, dl, PtrVT,
02780                        DAG.getTargetGlobalAddress(GV, dl, PtrVT));
02781   } else {
02782     SDValue CPAddr = DAG.getTargetConstantPool(GV, PtrVT, 4);
02783     CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
02784     return DAG.getLoad(
02785         PtrVT, dl, DAG.getEntryNode(), CPAddr,
02786         MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), false,
02787         false, false, 0);
02788   }
02789 }
02790 
02791 SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op,
02792                                                     SelectionDAG &DAG) const {
02793   EVT PtrVT = getPointerTy(DAG.getDataLayout());
02794   SDLoc dl(Op);
02795   const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
02796   Reloc::Model RelocM = getTargetMachine().getRelocationModel();
02797 
02798   if (Subtarget->useMovt(DAG.getMachineFunction()))
02799     ++NumMovwMovt;
02800 
02801   // FIXME: Once remat is capable of dealing with instructions with register
02802   // operands, expand this into multiple nodes
02803   unsigned Wrapper =
02804       RelocM == Reloc::PIC_ ? ARMISD::WrapperPIC : ARMISD::Wrapper;
02805 
02806   SDValue G = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, ARMII::MO_NONLAZY);
02807   SDValue Result = DAG.getNode(Wrapper, dl, PtrVT, G);
02808 
02809   if (Subtarget->GVIsIndirectSymbol(GV, RelocM))
02810     Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Result,
02811                          MachinePointerInfo::getGOT(DAG.getMachineFunction()),
02812                          false, false, false, 0);
02813   return Result;
02814 }
02815 
02816 SDValue ARMTargetLowering::LowerGlobalAddressWindows(SDValue Op,
02817                                                      SelectionDAG &DAG) const {
02818   assert(Subtarget->isTargetWindows() && "non-Windows COFF is not supported");
02819   assert(Subtarget->useMovt(DAG.getMachineFunction()) &&
02820          "Windows on ARM expects to use movw/movt");
02821 
02822   const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
02823   const ARMII::TOF TargetFlags =
02824     (GV->hasDLLImportStorageClass() ? ARMII::MO_DLLIMPORT : ARMII::MO_NO_FLAG);
02825   EVT PtrVT = getPointerTy(DAG.getDataLayout());
02826   SDValue Result;
02827   SDLoc DL(Op);
02828 
02829   ++NumMovwMovt;
02830 
02831   // FIXME: Once remat is capable of dealing with instructions with register
02832   // operands, expand this into two nodes.
02833   Result = DAG.getNode(ARMISD::Wrapper, DL, PtrVT,
02834                        DAG.getTargetGlobalAddress(GV, DL, PtrVT, /*Offset=*/0,
02835                                                   TargetFlags));
02836   if (GV->hasDLLImportStorageClass())
02837     Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Result,
02838                          MachinePointerInfo::getGOT(DAG.getMachineFunction()),
02839                          false, false, false, 0);
02840   return Result;
02841 }
02842 
02843 SDValue
02844 ARMTargetLowering::LowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const {
02845   SDLoc dl(Op);
02846   SDValue Val = DAG.getConstant(0, dl, MVT::i32);
02847   return DAG.getNode(ARMISD::EH_SJLJ_SETJMP, dl,
02848                      DAG.getVTList(MVT::i32, MVT::Other), Op.getOperand(0),
02849                      Op.getOperand(1), Val);
02850 }
02851 
02852 SDValue
02853 ARMTargetLowering::LowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const {
02854   SDLoc dl(Op);
02855   return DAG.getNode(ARMISD::EH_SJLJ_LONGJMP, dl, MVT::Other, Op.getOperand(0),
02856                      Op.getOperand(1), DAG.getConstant(0, dl, MVT::i32));
02857 }
02858 
02859 SDValue ARMTargetLowering::LowerEH_SJLJ_SETUP_DISPATCH(SDValue Op,
02860                                                       SelectionDAG &DAG) const {
02861   SDLoc dl(Op);
02862   return DAG.getNode(ARMISD::EH_SJLJ_SETUP_DISPATCH, dl, MVT::Other,
02863                      Op.getOperand(0));
02864 }
02865 
02866 SDValue
02867 ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG,
02868                                           const ARMSubtarget *Subtarget) const {
02869   unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
02870   SDLoc dl(Op);
02871   switch (IntNo) {
02872   default: return SDValue();    // Don't custom lower most intrinsics.
02873   case Intrinsic::arm_rbit: {
02874     assert(Op.getOperand(1).getValueType() == MVT::i32 &&
02875            "RBIT intrinsic must have i32 type!");
02876     return DAG.getNode(ISD::BITREVERSE, dl, MVT::i32, Op.getOperand(1));
02877   }
02878   case Intrinsic::arm_thread_pointer: {
02879     EVT PtrVT = getPointerTy(DAG.getDataLayout());
02880     return DAG.getNode(ARMISD::THREAD_POINTER, dl, PtrVT);
02881   }
02882   case Intrinsic::eh_sjlj_lsda: {
02883     MachineFunction &MF = DAG.getMachineFunction();
02884     ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
02885     unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
02886     EVT PtrVT = getPointerTy(DAG.getDataLayout());
02887     Reloc::Model RelocM = getTargetMachine().getRelocationModel();
02888     SDValue CPAddr;
02889     unsigned PCAdj = (RelocM != Reloc::PIC_)
02890       ? 0 : (Subtarget->isThumb() ? 4 : 8);
02891     ARMConstantPoolValue *CPV =
02892       ARMConstantPoolConstant::Create(MF.getFunction(), ARMPCLabelIndex,
02893                                       ARMCP::CPLSDA, PCAdj);
02894     CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
02895     CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
02896     SDValue Result = DAG.getLoad(
02897         PtrVT, dl, DAG.getEntryNode(), CPAddr,
02898         MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), false,
02899         false, false, 0);
02900 
02901     if (RelocM == Reloc::PIC_) {
02902       SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
02903       Result = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel);
02904     }
02905     return Result;
02906   }
02907   case Intrinsic::arm_neon_vmulls:
02908   case Intrinsic::arm_neon_vmullu: {
02909     unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vmulls)
02910       ? ARMISD::VMULLs : ARMISD::VMULLu;
02911     return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
02912                        Op.getOperand(1), Op.getOperand(2));
02913   }
02914   case Intrinsic::arm_neon_vminnm:
02915   case Intrinsic::arm_neon_vmaxnm: {
02916     unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vminnm)
02917       ? ISD::FMINNUM : ISD::FMAXNUM;
02918     return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
02919                        Op.getOperand(1), Op.getOperand(2));
02920   }
02921   case Intrinsic::arm_neon_vminu:
02922   case Intrinsic::arm_neon_vmaxu: {
02923     if (Op.getValueType().isFloatingPoint())
02924       return SDValue();
02925     unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vminu)
02926       ? ISD::UMIN : ISD::UMAX;
02927     return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
02928                          Op.getOperand(1), Op.getOperand(2));
02929   }
02930   case Intrinsic::arm_neon_vmins:
02931   case Intrinsic::arm_neon_vmaxs: {
02932     // v{min,max}s is overloaded between signed integers and floats.
02933     if (!Op.getValueType().isFloatingPoint()) {
02934       unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vmins)
02935         ? ISD::SMIN : ISD::SMAX;
02936       return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
02937                          Op.getOperand(1), Op.getOperand(2));
02938     }
02939     unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vmins)
02940       ? ISD::FMINNAN : ISD::FMAXNAN;
02941     return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
02942                        Op.getOperand(1), Op.getOperand(2));
02943   }
02944   }
02945 }
02946 
02947 static SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG,
02948                                  const ARMSubtarget *Subtarget) {
02949   // FIXME: handle "fence singlethread" more efficiently.
02950   SDLoc dl(Op);
02951   if (!Subtarget->hasDataBarrier()) {
02952     // Some ARMv6 cpus can support data barriers with an mcr instruction.
02953     // Thumb1 and pre-v6 ARM mode use a libcall instead and should never get
02954     // here.
02955     assert(Subtarget->hasV6Ops() && !Subtarget->isThumb() &&
02956            "Unexpected ISD::ATOMIC_FENCE encountered. Should be libcall!");
02957     return DAG.getNode(ARMISD::MEMBARRIER_MCR, dl, MVT::Other, Op.getOperand(0),
02958                        DAG.getConstant(0, dl, MVT::i32));
02959   }
02960 
02961   ConstantSDNode *OrdN = cast<ConstantSDNode>(Op.getOperand(1));
02962   AtomicOrdering Ord = static_cast<AtomicOrdering>(OrdN->getZExtValue());
02963   ARM_MB::MemBOpt Domain = ARM_MB::ISH;
02964   if (Subtarget->isMClass()) {
02965     // Only a full system barrier exists in the M-class architectures.
02966     Domain = ARM_MB::SY;
02967   } else if (Subtarget->isSwift() && Ord == Release) {
02968     // Swift happens to implement ISHST barriers in a way that's compatible with
02969     // Release semantics but weaker than ISH so we'd be fools not to use
02970     // it. Beware: other processors probably don't!
02971     Domain = ARM_MB::ISHST;
02972   }
02973 
02974   return DAG.getNode(ISD::INTRINSIC_VOID, dl, MVT::Other, Op.getOperand(0),
02975                      DAG.getConstant(Intrinsic::arm_dmb, dl, MVT::i32),
02976                      DAG.getConstant(Domain, dl, MVT::i32));
02977 }
02978 
02979 static SDValue LowerPREFETCH(SDValue Op, SelectionDAG &DAG,
02980                              const ARMSubtarget *Subtarget) {
02981   // ARM pre v5TE and Thumb1 does not have preload instructions.
02982   if (!(Subtarget->isThumb2() ||
02983         (!Subtarget->isThumb1Only() && Subtarget->hasV5TEOps())))
02984     // Just preserve the chain.
02985     return Op.getOperand(0);
02986 
02987   SDLoc dl(Op);
02988   unsigned isRead = ~cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue() & 1;
02989   if (!isRead &&
02990       (!Subtarget->hasV7Ops() || !Subtarget->hasMPExtension()))
02991     // ARMv7 with MP extension has PLDW.
02992     return Op.getOperand(0);
02993 
02994   unsigned isData = cast<ConstantSDNode>(Op.getOperand(4))->getZExtValue();
02995   if (Subtarget->isThumb()) {
02996     // Invert the bits.
02997     isRead = ~isRead & 1;
02998     isData = ~isData & 1;
02999   }
03000 
03001   return DAG.getNode(ARMISD::PRELOAD, dl, MVT::Other, Op.getOperand(0),
03002                      Op.getOperand(1), DAG.getConstant(isRead, dl, MVT::i32),
03003                      DAG.getConstant(isData, dl, MVT::i32));
03004 }
03005 
03006 static SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) {
03007   MachineFunction &MF = DAG.getMachineFunction();
03008   ARMFunctionInfo *FuncInfo = MF.getInfo<ARMFunctionInfo>();
03009 
03010   // vastart just stores the address of the VarArgsFrameIndex slot into the
03011   // memory location argument.
03012   SDLoc dl(Op);
03013   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
03014   SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
03015   const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
03016   return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1),
03017                       MachinePointerInfo(SV), false, false, 0);
03018 }
03019 
03020 SDValue
03021 ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA,
03022                                         SDValue &Root, SelectionDAG &DAG,
03023                                         SDLoc dl) const {
03024   MachineFunction &MF = DAG.getMachineFunction();
03025   ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
03026 
03027   const TargetRegisterClass *RC;
03028   if (AFI->isThumb1OnlyFunction())
03029     RC = &ARM::tGPRRegClass;
03030   else
03031     RC = &ARM::GPRRegClass;
03032 
03033   // Transform the arguments stored in physical registers into virtual ones.
03034   unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
03035   SDValue ArgValue = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32);
03036 
03037   SDValue ArgValue2;
03038   if (NextVA.isMemLoc()) {
03039     MachineFrameInfo *MFI = MF.getFrameInfo();
03040     int FI = MFI->CreateFixedObject(4, NextVA.getLocMemOffset(), true);
03041 
03042     // Create load node to retrieve arguments from the stack.
03043     SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
03044     ArgValue2 = DAG.getLoad(
03045         MVT::i32, dl, Root, FIN,
03046         MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI), false,
03047         false, false, 0);
03048   } else {
03049     Reg = MF.addLiveIn(NextVA.getLocReg(), RC);
03050     ArgValue2 = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32);
03051   }
03052   if (!Subtarget->isLittle())
03053     std::swap (ArgValue, ArgValue2);
03054   return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, ArgValue, ArgValue2);
03055 }
03056 
03057 // The remaining GPRs hold either the beginning of variable-argument
03058 // data, or the beginning of an aggregate passed by value (usually
03059 // byval).  Either way, we allocate stack slots adjacent to the data
03060 // provided by our caller, and store the unallocated registers there.
03061 // If this is a variadic function, the va_list pointer will begin with
03062 // these values; otherwise, this reassembles a (byval) structure that
03063 // was split between registers and memory.
03064 // Return: The frame index registers were stored into.
03065 int
03066 ARMTargetLowering::StoreByValRegs(CCState &CCInfo, SelectionDAG &DAG,
03067                                   SDLoc dl, SDValue &Chain,
03068                                   const Value *OrigArg,
03069                                   unsigned InRegsParamRecordIdx,
03070                                   int ArgOffset,
03071                                   unsigned ArgSize) const {
03072   // Currently, two use-cases possible:
03073   // Case #1. Non-var-args function, and we meet first byval parameter.
03074   //          Setup first unallocated register as first byval register;
03075   //          eat all remained registers
03076   //          (these two actions are performed by HandleByVal method).
03077   //          Then, here, we initialize stack frame with
03078   //          "store-reg" instructions.
03079   // Case #2. Var-args function, that doesn't contain byval parameters.
03080   //          The same: eat all remained unallocated registers,
03081   //          initialize stack frame.
03082 
03083   MachineFunction &MF = DAG.getMachineFunction();
03084   MachineFrameInfo *MFI = MF.getFrameInfo();
03085   ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
03086   unsigned RBegin, REnd;
03087   if (InRegsParamRecordIdx < CCInfo.getInRegsParamsCount()) {
03088     CCInfo.getInRegsParamInfo(InRegsParamRecordIdx, RBegin, REnd);
03089   } else {
03090     unsigned RBeginIdx = CCInfo.getFirstUnallocated(GPRArgRegs);
03091     RBegin = RBeginIdx == 4 ? (unsigned)ARM::R4 : GPRArgRegs[RBeginIdx];
03092     REnd = ARM::R4;
03093   }
03094 
03095   if (REnd != RBegin)
03096     ArgOffset = -4 * (ARM::R4 - RBegin);
03097 
03098   auto PtrVT = getPointerTy(DAG.getDataLayout());
03099   int FrameIndex = MFI->CreateFixedObject(ArgSize, ArgOffset, false);
03100   SDValue FIN = DAG.getFrameIndex(FrameIndex, PtrVT);
03101 
03102   SmallVector<SDValue, 4> MemOps;
03103   const TargetRegisterClass *RC =
03104       AFI->isThumb1OnlyFunction() ? &ARM::tGPRRegClass : &ARM::GPRRegClass;
03105 
03106   for (unsigned Reg = RBegin, i = 0; Reg < REnd; ++Reg, ++i) {
03107     unsigned VReg = MF.addLiveIn(Reg, RC);
03108     SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32);
03109     SDValue Store =
03110         DAG.getStore(Val.getValue(1), dl, Val, FIN,
03111                      MachinePointerInfo(OrigArg, 4 * i), false, false, 0);
03112     MemOps.push_back(Store);
03113     FIN = DAG.getNode(ISD::ADD, dl, PtrVT, FIN, DAG.getConstant(4, dl, PtrVT));
03114   }
03115 
03116   if (!MemOps.empty())
03117     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
03118   return FrameIndex;
03119 }
03120 
03121 // Setup stack frame, the va_list pointer will start from.
03122 void
03123 ARMTargetLowering::VarArgStyleRegisters(CCState &CCInfo, SelectionDAG &DAG,
03124                                         SDLoc dl, SDValue &Chain,
03125                                         unsigned ArgOffset,
03126                                         unsigned TotalArgRegsSaveSize,
03127                                         bool ForceMutable) const {
03128   MachineFunction &MF = DAG.getMachineFunction();
03129   ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
03130 
03131   // Try to store any remaining integer argument regs
03132   // to their spots on the stack so that they may be loaded by deferencing
03133   // the result of va_next.
03134   // If there is no regs to be stored, just point address after last
03135   // argument passed via stack.
03136   int FrameIndex = StoreByValRegs(CCInfo, DAG, dl, Chain, nullptr,
03137                                   CCInfo.getInRegsParamsCount(),
03138                                   CCInfo.getNextStackOffset(), 4);
03139   AFI->setVarArgsFrameIndex(FrameIndex);
03140 }
03141 
03142 SDValue
03143 ARMTargetLowering::LowerFormalArguments(SDValue Chain,
03144                                         CallingConv::ID CallConv, bool isVarArg,
03145                                         const SmallVectorImpl<ISD::InputArg>
03146                                           &Ins,
03147                                         SDLoc dl, SelectionDAG &DAG,
03148                                         SmallVectorImpl<SDValue> &InVals)
03149                                           const {
03150   MachineFunction &MF = DAG.getMachineFunction();
03151   MachineFrameInfo *MFI = MF.getFrameInfo();
03152 
03153   ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
03154 
03155   // Assign locations to all of the incoming arguments.
03156   SmallVector<CCValAssign, 16> ArgLocs;
03157   ARMCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
03158                     *DAG.getContext(), Prologue);
03159   CCInfo.AnalyzeFormalArguments(Ins,
03160                                 CCAssignFnForNode(CallConv, /* Return*/ false,
03161                                                   isVarArg));
03162 
03163   SmallVector<SDValue, 16> ArgValues;
03164   SDValue ArgValue;
03165   Function::const_arg_iterator CurOrigArg = MF.getFunction()->arg_begin();
03166   unsigned CurArgIdx = 0;
03167 
03168   // Initially ArgRegsSaveSize is zero.
03169   // Then we increase this value each time we meet byval parameter.
03170   // We also increase this value in case of varargs function.
03171   AFI->setArgRegsSaveSize(0);
03172 
03173   // Calculate the amount of stack space that we need to allocate to store
03174   // byval and variadic arguments that are passed in registers.
03175   // We need to know this before we allocate the first byval or variadic
03176   // argument, as they will be allocated a stack slot below the CFA (Canonical
03177   // Frame Address, the stack pointer at entry to the function).
03178   unsigned ArgRegBegin = ARM::R4;
03179   for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
03180     if (CCInfo.getInRegsParamsProcessed() >= CCInfo.getInRegsParamsCount())
03181       break;
03182 
03183     CCValAssign &VA = ArgLocs[i];
03184     unsigned Index = VA.getValNo();
03185     ISD::ArgFlagsTy Flags = Ins[Index].Flags;
03186     if (!Flags.isByVal())
03187       continue;
03188 
03189     assert(VA.isMemLoc() && "unexpected byval pointer in reg");
03190     unsigned RBegin, REnd;
03191     CCInfo.getInRegsParamInfo(CCInfo.getInRegsParamsProcessed(), RBegin, REnd);
03192     ArgRegBegin = std::min(ArgRegBegin, RBegin);
03193 
03194     CCInfo.nextInRegsParam();
03195   }
03196   CCInfo.rewindByValRegsInfo();
03197 
03198   int lastInsIndex = -1;
03199   if (isVarArg && MFI->hasVAStart()) {
03200     unsigned RegIdx = CCInfo.getFirstUnallocated(GPRArgRegs);
03201     if (RegIdx != array_lengthof(GPRArgRegs))
03202       ArgRegBegin = std::min(ArgRegBegin, (unsigned)GPRArgRegs[RegIdx]);
03203   }
03204 
03205   unsigned TotalArgRegsSaveSize = 4 * (ARM::R4 - ArgRegBegin);
03206   AFI->setArgRegsSaveSize(TotalArgRegsSaveSize);
03207   auto PtrVT = getPointerTy(DAG.getDataLayout());
03208 
03209   for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
03210     CCValAssign &VA = ArgLocs[i];
03211     if (Ins[VA.getValNo()].isOrigArg()) {
03212       std::advance(CurOrigArg,
03213                    Ins[VA.getValNo()].getOrigArgIndex() - CurArgIdx);
03214       CurArgIdx = Ins[VA.getValNo()].getOrigArgIndex();
03215     }
03216     // Arguments stored in registers.
03217     if (VA.isRegLoc()) {
03218       EVT RegVT = VA.getLocVT();
03219 
03220       if (VA.needsCustom()) {
03221         // f64 and vector types are split up into multiple registers or
03222         // combinations of registers and stack slots.
03223         if (VA.getLocVT() == MVT::v2f64) {
03224           SDValue ArgValue1 = GetF64FormalArgument(VA, ArgLocs[++i],
03225                                                    Chain, DAG, dl);
03226           VA = ArgLocs[++i]; // skip ahead to next loc
03227           SDValue ArgValue2;
03228           if (VA.isMemLoc()) {
03229             int FI = MFI->CreateFixedObject(8, VA.getLocMemOffset(), true);
03230             SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
03231             ArgValue2 = DAG.getLoad(
03232                 MVT::f64, dl, Chain, FIN,
03233                 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI),
03234                 false, false, false, 0);
03235           } else {
03236             ArgValue2 = GetF64FormalArgument(VA, ArgLocs[++i],
03237                                              Chain, DAG, dl);
03238           }
03239           ArgValue = DAG.getNode(ISD::UNDEF, dl, MVT::v2f64);
03240           ArgValue = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64,
03241                                  ArgValue, ArgValue1,
03242                                  DAG.getIntPtrConstant(0, dl));
03243           ArgValue = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64,
03244                                  ArgValue, ArgValue2,
03245                                  DAG.getIntPtrConstant(1, dl));
03246         } else
03247           ArgValue = GetF64FormalArgument(VA, ArgLocs[++i], Chain, DAG, dl);
03248 
03249       } else {
03250         const TargetRegisterClass *RC;
03251 
03252         if (RegVT == MVT::f32)
03253           RC = &ARM::SPRRegClass;
03254         else if (RegVT == MVT::f64)
03255           RC = &ARM::DPRRegClass;
03256         else if (RegVT == MVT::v2f64)
03257           RC = &ARM::QPRRegClass;
03258         else if (RegVT == MVT::i32)
03259           RC = AFI->isThumb1OnlyFunction() ? &ARM::tGPRRegClass
03260                                            : &ARM::GPRRegClass;
03261         else
03262           llvm_unreachable("RegVT not supported by FORMAL_ARGUMENTS Lowering");
03263 
03264         // Transform the arguments in physical registers into virtual ones.
03265         unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
03266         ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT);
03267       }
03268 
03269       // If this is an 8 or 16-bit value, it is really passed promoted
03270       // to 32 bits.  Insert an assert[sz]ext to capture this, then
03271       // truncate to the right size.
03272       switch (VA.getLocInfo()) {
03273       default: llvm_unreachable("Unknown loc info!");
03274       case CCValAssign::Full: break;
03275       case CCValAssign::BCvt:
03276         ArgValue = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), ArgValue);
03277         break;
03278       case CCValAssign::SExt:
03279         ArgValue = DAG.getNode(ISD::AssertSext, dl, RegVT, ArgValue,
03280                                DAG.getValueType(VA.getValVT()));
03281         ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
03282         break;
03283       case CCValAssign::ZExt:
03284         ArgValue = DAG.getNode(ISD::AssertZext, dl, RegVT, ArgValue,
03285                                DAG.getValueType(VA.getValVT()));
03286         ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
03287         break;
03288       }
03289 
03290       InVals.push_back(ArgValue);
03291 
03292     } else { // VA.isRegLoc()
03293 
03294       // sanity check
03295       assert(VA.isMemLoc());
03296       assert(VA.getValVT() != MVT::i64 && "i64 should already be lowered");
03297 
03298       int index = VA.getValNo();
03299 
03300       // Some Ins[] entries become multiple ArgLoc[] entries.
03301       // Process them only once.
03302       if (index != lastInsIndex)
03303         {
03304           ISD::ArgFlagsTy Flags = Ins[index].Flags;
03305           // FIXME: For now, all byval parameter objects are marked mutable.
03306           // This can be changed with more analysis.
03307           // In case of tail call optimization mark all arguments mutable.
03308           // Since they could be overwritten by lowering of arguments in case of
03309           // a tail call.
03310           if (Flags.isByVal()) {
03311             assert(Ins[index].isOrigArg() &&
03312                    "Byval arguments cannot be implicit");
03313             unsigned CurByValIndex = CCInfo.getInRegsParamsProcessed();
03314 
03315             int FrameIndex = StoreByValRegs(
03316                 CCInfo, DAG, dl, Chain, &*CurOrigArg, CurByValIndex,
03317                 VA.getLocMemOffset(), Flags.getByValSize());
03318             InVals.push_back(DAG.getFrameIndex(FrameIndex, PtrVT));
03319             CCInfo.nextInRegsParam();
03320           } else {
03321             unsigned FIOffset = VA.getLocMemOffset();
03322             int FI = MFI->CreateFixedObject(VA.getLocVT().getSizeInBits()/8,
03323                                             FIOffset, true);
03324 
03325             // Create load nodes to retrieve arguments from the stack.
03326             SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
03327             InVals.push_back(DAG.getLoad(
03328                 VA.getValVT(), dl, Chain, FIN,
03329                 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI),
03330                 false, false, false, 0));
03331           }
03332           lastInsIndex = index;
03333         }
03334     }
03335   }
03336 
03337   // varargs
03338   if (isVarArg && MFI->hasVAStart())
03339     VarArgStyleRegisters(CCInfo, DAG, dl, Chain,
03340                          CCInfo.getNextStackOffset(),
03341                          TotalArgRegsSaveSize);
03342 
03343   AFI->setArgumentStackSize(CCInfo.getNextStackOffset());
03344 
03345   return Chain;
03346 }
03347 
03348 /// isFloatingPointZero - Return true if this is +0.0.
03349 static bool isFloatingPointZero(SDValue Op) {
03350   if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op))
03351     return CFP->getValueAPF().isPosZero();
03352   else if (ISD::isEXTLoad(Op.getNode()) || ISD::isNON_EXTLoad(Op.getNode())) {
03353     // Maybe this has already been legalized into the constant pool?
03354     if (Op.getOperand(1).getOpcode() == ARMISD::Wrapper) {
03355       SDValue WrapperOp = Op.getOperand(1).getOperand(0);
03356       if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(WrapperOp))
03357         if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal()))
03358           return CFP->getValueAPF().isPosZero();
03359     }
03360   } else if (Op->getOpcode() == ISD::BITCAST &&
03361              Op->getValueType(0) == MVT::f64) {
03362     // Handle (ISD::BITCAST (ARMISD::VMOVIMM (ISD::TargetConstant 0)) MVT::f64)
03363     // created by LowerConstantFP().
03364     SDValue BitcastOp = Op->getOperand(0);
03365     if (BitcastOp->getOpcode() == ARMISD::VMOVIMM &&
03366         isNullConstant(BitcastOp->getOperand(0)))
03367       return true;
03368   }
03369   return false;
03370 }
03371 
03372 /// Returns appropriate ARM CMP (cmp) and corresponding condition code for
03373 /// the given operands.
03374 SDValue
03375 ARMTargetLowering::getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
03376                              SDValue &ARMcc, SelectionDAG &DAG,
03377                              SDLoc dl) const {
03378   if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS.getNode())) {
03379     unsigned C = RHSC->getZExtValue();
03380     if (!isLegalICmpImmediate(C)) {
03381       // Constant does not fit, try adjusting it by one?
03382       switch (CC) {
03383       default: break;
03384       case ISD::SETLT:
03385       case ISD::SETGE:
03386         if (C != 0x80000000 && isLegalICmpImmediate(C-1)) {
03387           CC = (CC == ISD::SETLT) ? ISD::SETLE : ISD::SETGT;
03388           RHS = DAG.getConstant(C - 1, dl, MVT::i32);
03389         }
03390         break;
03391       case ISD::SETULT:
03392       case ISD::SETUGE:
03393         if (C != 0 && isLegalICmpImmediate(C-1)) {
03394           CC = (CC == ISD::SETULT) ? ISD::SETULE : ISD::SETUGT;
03395           RHS = DAG.getConstant(C - 1, dl, MVT::i32);
03396         }
03397         break;
03398       case ISD::SETLE:
03399       case ISD::SETGT:
03400         if (C != 0x7fffffff && isLegalICmpImmediate(C+1)) {
03401           CC = (CC == ISD::SETLE) ? ISD::SETLT : ISD::SETGE;
03402           RHS = DAG.getConstant(C + 1, dl, MVT::i32);
03403         }
03404         break;
03405       case ISD::SETULE:
03406       case ISD::SETUGT:
03407         if (C != 0xffffffff && isLegalICmpImmediate(C+1)) {
03408           CC = (CC == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE;
03409           RHS = DAG.getConstant(C + 1, dl, MVT::i32);
03410         }
03411         break;
03412       }
03413     }
03414   }
03415 
03416   ARMCC::CondCodes CondCode = IntCCToARMCC(CC);
03417   ARMISD::NodeType CompareType;
03418   switch (CondCode) {
03419   default:
03420     CompareType = ARMISD::CMP;
03421     break;
03422   case ARMCC::EQ:
03423   case ARMCC::NE:
03424     // Uses only Z Flag
03425     CompareType = ARMISD::CMPZ;
03426     break;
03427   }
03428   ARMcc = DAG.getConstant(CondCode, dl, MVT::i32);
03429   return DAG.getNode(CompareType, dl, MVT::Glue, LHS, RHS);
03430 }
03431 
03432 /// Returns a appropriate VFP CMP (fcmp{s|d}+fmstat) for the given operands.
03433 SDValue
03434 ARMTargetLowering::getVFPCmp(SDValue LHS, SDValue RHS, SelectionDAG &DAG,
03435                              SDLoc dl) const {
03436   assert(!Subtarget->isFPOnlySP() || RHS.getValueType() != MVT::f64);
03437   SDValue Cmp;
03438   if (!isFloatingPointZero(RHS))
03439     Cmp = DAG.getNode(ARMISD::CMPFP, dl, MVT::Glue, LHS, RHS);
03440   else
03441     Cmp = DAG.getNode(ARMISD::CMPFPw0, dl, MVT::Glue, LHS);
03442   return DAG.getNode(ARMISD::FMSTAT, dl, MVT::Glue, Cmp);
03443 }
03444 
03445 /// duplicateCmp - Glue values can have only one use, so this function
03446 /// duplicates a comparison node.
03447 SDValue
03448 ARMTargetLowering::duplicateCmp(SDValue Cmp, SelectionDAG &DAG) const {
03449   unsigned Opc = Cmp.getOpcode();
03450   SDLoc DL(Cmp);
03451   if (Opc == ARMISD::CMP || Opc == ARMISD::CMPZ)
03452     return DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0),Cmp.getOperand(1));
03453 
03454   assert(Opc == ARMISD::FMSTAT && "unexpected comparison operation");
03455   Cmp = Cmp.getOperand(0);
03456   Opc = Cmp.getOpcode();
03457   if (Opc == ARMISD::CMPFP)
03458     Cmp = DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0),Cmp.getOperand(1));
03459   else {
03460     assert(Opc == ARMISD::CMPFPw0 && "unexpected operand of FMSTAT");
03461     Cmp = DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0));
03462   }
03463   return DAG.getNode(ARMISD::FMSTAT, DL, MVT::Glue, Cmp);
03464 }
03465 
03466 std::pair<SDValue, SDValue>
03467 ARMTargetLowering::getARMXALUOOp(SDValue Op, SelectionDAG &DAG,
03468                                  SDValue &ARMcc) const {
03469   assert(Op.getValueType() == MVT::i32 &&  "Unsupported value type");
03470 
03471   SDValue Value, OverflowCmp;
03472   SDValue LHS = Op.getOperand(0);
03473   SDValue RHS = Op.getOperand(1);
03474   SDLoc dl(Op);
03475 
03476   // FIXME: We are currently always generating CMPs because we don't support
03477   // generating CMN through the backend. This is not as good as the natural
03478   // CMP case because it causes a register dependency and cannot be folded
03479   // later.
03480 
03481   switch (Op.getOpcode()) {
03482   default:
03483     llvm_unreachable("Unknown overflow instruction!");
03484   case ISD::SADDO:
03485     ARMcc = DAG.getConstant(ARMCC::VC, dl, MVT::i32);
03486     Value = DAG.getNode(ISD::ADD, dl, Op.getValueType(), LHS, RHS);
03487     OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, Value, LHS);
03488     break;
03489   case ISD::UADDO:
03490     ARMcc = DAG.getConstant(ARMCC::HS, dl, MVT::i32);
03491     Value = DAG.getNode(ISD::ADD, dl, Op.getValueType(), LHS, RHS);
03492     OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, Value, LHS);
03493     break;
03494   case ISD::SSUBO:
03495     ARMcc = DAG.getConstant(ARMCC::VC, dl, MVT::i32);
03496     Value = DAG.getNode(ISD::SUB, dl, Op.getValueType(), LHS, RHS);
03497     OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, LHS, RHS);
03498     break;
03499   case ISD::USUBO:
03500     ARMcc = DAG.getConstant(ARMCC::HS, dl, MVT::i32);
03501     Value = DAG.getNode(ISD::SUB, dl, Op.getValueType(), LHS, RHS);
03502     OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, LHS, RHS);
03503     break;
03504   } // switch (...)
03505 
03506   return std::make_pair(Value, OverflowCmp);
03507 }
03508 
03509 
03510 SDValue
03511 ARMTargetLowering::LowerXALUO(SDValue Op, SelectionDAG &DAG) const {
03512   // Let legalize expand this if it isn't a legal type yet.
03513   if (!DAG.getTargetLoweringInfo().isTypeLegal(Op.getValueType()))
03514     return SDValue();
03515 
03516   SDValue Value, OverflowCmp;
03517   SDValue ARMcc;
03518   std::tie(Value, OverflowCmp) = getARMXALUOOp(Op, DAG, ARMcc);
03519   SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
03520   SDLoc dl(Op);
03521   // We use 0 and 1 as false and true values.
03522   SDValue TVal = DAG.getConstant(1, dl, MVT::i32);
03523   SDValue FVal = DAG.getConstant(0, dl, MVT::i32);
03524   EVT VT = Op.getValueType();
03525 
03526   SDValue Overflow = DAG.getNode(ARMISD::CMOV, dl, VT, TVal, FVal,
03527                                  ARMcc, CCR, OverflowCmp);
03528 
03529   SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32);
03530   return DAG.getNode(ISD::MERGE_VALUES, dl, VTs, Value, Overflow);
03531 }
03532 
03533 
03534 SDValue ARMTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
03535   SDValue Cond = Op.getOperand(0);
03536   SDValue SelectTrue = Op.getOperand(1);
03537   SDValue SelectFalse = Op.getOperand(2);
03538   SDLoc dl(Op);
03539   unsigned Opc = Cond.getOpcode();
03540 
03541   if (Cond.getResNo() == 1 &&
03542       (Opc == ISD::SADDO || Opc == ISD::UADDO || Opc == ISD::SSUBO ||
03543        Opc == ISD::USUBO)) {
03544     if (!DAG.getTargetLoweringInfo().isTypeLegal(Cond->getValueType(0)))
03545       return SDValue();
03546 
03547     SDValue Value, OverflowCmp;
03548     SDValue ARMcc;
03549     std::tie(Value, OverflowCmp) = getARMXALUOOp(Cond, DAG, ARMcc);
03550     SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
03551     EVT VT = Op.getValueType();
03552 
03553     return getCMOV(dl, VT, SelectTrue, SelectFalse, ARMcc, CCR,
03554                    OverflowCmp, DAG);
03555   }
03556 
03557   // Convert:
03558   //
03559   //   (select (cmov 1, 0, cond), t, f) -> (cmov t, f, cond)
03560   //   (select (cmov 0, 1, cond), t, f) -> (cmov f, t, cond)
03561   //
03562   if (Cond.getOpcode() == ARMISD::CMOV && Cond.hasOneUse()) {
03563     const ConstantSDNode *CMOVTrue =
03564       dyn_cast<ConstantSDNode>(Cond.getOperand(0));
03565     const ConstantSDNode *CMOVFalse =
03566       dyn_cast<ConstantSDNode>(Cond.getOperand(1));
03567 
03568     if (CMOVTrue && CMOVFalse) {
03569       unsigned CMOVTrueVal = CMOVTrue->getZExtValue();
03570       unsigned CMOVFalseVal = CMOVFalse->getZExtValue();
03571 
03572       SDValue True;
03573       SDValue False;
03574       if (CMOVTrueVal == 1 && CMOVFalseVal == 0) {
03575         True = SelectTrue;
03576         False = SelectFalse;
03577       } else if (CMOVTrueVal == 0 && CMOVFalseVal == 1) {
03578         True = SelectFalse;
03579         False = SelectTrue;
03580       }
03581 
03582       if (True.getNode() && False.getNode()) {
03583         EVT VT = Op.getValueType();
03584         SDValue ARMcc = Cond.getOperand(2);
03585         SDValue CCR = Cond.getOperand(3);
03586         SDValue Cmp = duplicateCmp(Cond.getOperand(4), DAG);
03587         assert(True.getValueType() == VT);
03588         return getCMOV(dl, VT, True, False, ARMcc, CCR, Cmp, DAG);
03589       }
03590     }
03591   }
03592 
03593   // ARM's BooleanContents value is UndefinedBooleanContent. Mask out the
03594   // undefined bits before doing a full-word comparison with zero.
03595   Cond = DAG.getNode(ISD::AND, dl, Cond.getValueType(), Cond,
03596                      DAG.getConstant(1, dl, Cond.getValueType()));
03597 
03598   return DAG.getSelectCC(dl, Cond,
03599                          DAG.getConstant(0, dl, Cond.getValueType()),
03600                          SelectTrue, SelectFalse, ISD::SETNE);
03601 }
03602 
03603 static void checkVSELConstraints(ISD::CondCode CC, ARMCC::CondCodes &CondCode,
03604                                  bool &swpCmpOps, bool &swpVselOps) {
03605   // Start by selecting the GE condition code for opcodes that return true for
03606   // 'equality'
03607   if (CC == ISD::SETUGE || CC == ISD::SETOGE || CC == ISD::SETOLE ||
03608       CC == ISD::SETULE)
03609     CondCode = ARMCC::GE;
03610 
03611   // and GT for opcodes that return false for 'equality'.
03612   else if (CC == ISD::SETUGT || CC == ISD::SETOGT || CC == ISD::SETOLT ||
03613            CC == ISD::SETULT)
03614     CondCode = ARMCC::GT;
03615 
03616   // Since we are constrained to GE/GT, if the opcode contains 'less', we need
03617   // to swap the compare operands.
03618   if (CC == ISD::SETOLE || CC == ISD::SETULE || CC == ISD::SETOLT ||
03619       CC == ISD::SETULT)
03620     swpCmpOps = true;
03621 
03622   // Both GT and GE are ordered comparisons, and return false for 'unordered'.
03623   // If we have an unordered opcode, we need to swap the operands to the VSEL
03624   // instruction (effectively negating the condition).
03625   //
03626   // This also has the effect of swapping which one of 'less' or 'greater'
03627   // returns true, so we also swap the compare operands. It also switches
03628   // whether we return true for 'equality', so we compensate by picking the
03629   // opposite condition code to our original choice.
03630   if (CC == ISD::SETULE || CC == ISD::SETULT || CC == ISD::SETUGE ||
03631       CC == ISD::SETUGT) {
03632     swpCmpOps = !swpCmpOps;
03633     swpVselOps = !swpVselOps;
03634     CondCode = CondCode == ARMCC::GT ? ARMCC::GE : ARMCC::GT;
03635   }
03636 
03637   // 'ordered' is 'anything but unordered', so use the VS condition code and
03638   // swap the VSEL operands.
03639   if (CC == ISD::SETO) {
03640     CondCode = ARMCC::VS;
03641     swpVselOps = true;
03642   }
03643 
03644   // 'unordered or not equal' is 'anything but equal', so use the EQ condition
03645   // code and swap the VSEL operands.
03646   if (CC == ISD::SETUNE) {
03647     CondCode = ARMCC::EQ;
03648     swpVselOps = true;
03649   }
03650 }
03651 
03652 SDValue ARMTargetLowering::getCMOV(SDLoc dl, EVT VT, SDValue FalseVal,
03653                                    SDValue TrueVal, SDValue ARMcc, SDValue CCR,
03654                                    SDValue Cmp, SelectionDAG &DAG) const {
03655   if (Subtarget->isFPOnlySP() && VT == MVT::f64) {
03656     FalseVal = DAG.getNode(ARMISD::VMOVRRD, dl,
03657                            DAG.getVTList(MVT::i32, MVT::i32), FalseVal);
03658     TrueVal = DAG.getNode(ARMISD::VMOVRRD, dl,
03659                           DAG.getVTList(MVT::i32, MVT::i32), TrueVal);
03660 
03661     SDValue TrueLow = TrueVal.getValue(0);
03662     SDValue TrueHigh = TrueVal.getValue(1);
03663     SDValue FalseLow = FalseVal.getValue(0);
03664     SDValue FalseHigh = FalseVal.getValue(1);
03665 
03666     SDValue Low = DAG.getNode(ARMISD::CMOV, dl, MVT::i32, FalseLow, TrueLow,
03667                               ARMcc, CCR, Cmp);
03668     SDValue High = DAG.getNode(ARMISD::CMOV, dl, MVT::i32, FalseHigh, TrueHigh,
03669                                ARMcc, CCR, duplicateCmp(Cmp, DAG));
03670 
03671     return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Low, High);
03672   } else {
03673     return DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMcc, CCR,
03674                        Cmp);
03675   }
03676 }
03677 
03678 SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
03679   EVT VT = Op.getValueType();
03680   SDValue LHS = Op.getOperand(0);
03681   SDValue RHS = Op.getOperand(1);
03682   ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
03683   SDValue TrueVal = Op.getOperand(2);
03684   SDValue FalseVal = Op.getOperand(3);
03685   SDLoc dl(Op);
03686 
03687   if (Subtarget->isFPOnlySP() && LHS.getValueType() == MVT::f64) {
03688     DAG.getTargetLoweringInfo().softenSetCCOperands(DAG, MVT::f64, LHS, RHS, CC,
03689                                                     dl);
03690 
03691     // If softenSetCCOperands only returned one value, we should compare it to
03692     // zero.
03693     if (!RHS.getNode()) {
03694       RHS = DAG.getConstant(0, dl, LHS.getValueType());
03695       CC = ISD::SETNE;
03696     }
03697   }
03698 
03699   if (LHS.getValueType() == MVT::i32) {
03700     // Try to generate VSEL on ARMv8.
03701     // The VSEL instruction can't use all the usual ARM condition
03702     // codes: it only has two bits to select the condition code, so it's
03703     // constrained to use only GE, GT, VS and EQ.
03704     //
03705     // To implement all the various ISD::SETXXX opcodes, we sometimes need to
03706     // swap the operands of the previous compare instruction (effectively
03707     // inverting the compare condition, swapping 'less' and 'greater') and
03708     // sometimes need to swap the operands to the VSEL (which inverts the
03709     // condition in the sense of firing whenever the previous condition didn't)
03710     if (Subtarget->hasFPARMv8() && (TrueVal.getValueType() == MVT::f32 ||
03711                                     TrueVal.getValueType() == MVT::f64)) {
03712       ARMCC::CondCodes CondCode = IntCCToARMCC(CC);
03713       if (CondCode == ARMCC::LT || CondCode == ARMCC::LE ||
03714           CondCode == ARMCC::VC || CondCode == ARMCC::NE) {
03715         CC = ISD::getSetCCInverse(CC, true);
03716         std::swap(TrueVal, FalseVal);
03717       }
03718     }
03719 
03720     SDValue ARMcc;
03721     SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
03722     SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
03723     return getCMOV(dl, VT, FalseVal, TrueVal, ARMcc, CCR, Cmp, DAG);
03724   }
03725 
03726   ARMCC::CondCodes CondCode, CondCode2;
03727   FPCCToARMCC(CC, CondCode, CondCode2);
03728 
03729   // Try to generate VMAXNM/VMINNM on ARMv8.
03730   if (Subtarget->hasFPARMv8() && (TrueVal.getValueType() == MVT::f32 ||
03731                                   TrueVal.getValueType() == MVT::f64)) {
03732     bool swpCmpOps = false;
03733     bool swpVselOps = false;
03734     checkVSELConstraints(CC, CondCode, swpCmpOps, swpVselOps);
03735 
03736     if (CondCode == ARMCC::GT || CondCode == ARMCC::GE ||
03737         CondCode == ARMCC::VS || CondCode == ARMCC::EQ) {
03738       if (swpCmpOps)
03739         std::swap(LHS, RHS);
03740       if (swpVselOps)
03741         std::swap(TrueVal, FalseVal);
03742     }
03743   }
03744 
03745   SDValue ARMcc = DAG.getConstant(CondCode, dl, MVT::i32);
03746   SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl);
03747   SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
03748   SDValue Result = getCMOV(dl, VT, FalseVal, TrueVal, ARMcc, CCR, Cmp, DAG);
03749   if (CondCode2 != ARMCC::AL) {
03750     SDValue ARMcc2 = DAG.getConstant(CondCode2, dl, MVT::i32);
03751     // FIXME: Needs another CMP because flag can have but one use.
03752     SDValue Cmp2 = getVFPCmp(LHS, RHS, DAG, dl);
03753     Result = getCMOV(dl, VT, Result, TrueVal, ARMcc2, CCR, Cmp2, DAG);
03754   }
03755   return Result;
03756 }
03757 
03758 /// canChangeToInt - Given the fp compare operand, return true if it is suitable
03759 /// to morph to an integer compare sequence.
03760 static bool canChangeToInt(SDValue Op, bool &SeenZero,
03761                            const ARMSubtarget *Subtarget) {
03762   SDNode *N = Op.getNode();
03763   if (!N->hasOneUse())
03764     // Otherwise it requires moving the value from fp to integer registers.
03765     return false;
03766   if (!N->getNumValues())
03767     return false;
03768   EVT VT = Op.getValueType();
03769   if (VT != MVT::f32 && !Subtarget->isFPBrccSlow())
03770     // f32 case is generally profitable. f64 case only makes sense when vcmpe +
03771     // vmrs are very slow, e.g. cortex-a8.
03772     return false;
03773 
03774   if (isFloatingPointZero(Op)) {
03775     SeenZero = true;
03776     return true;
03777   }
03778   return ISD::isNormalLoad(N);
03779 }
03780 
03781 static SDValue bitcastf32Toi32(SDValue Op, SelectionDAG &DAG) {
03782   if (isFloatingPointZero(Op))
03783     return DAG.getConstant(0, SDLoc(Op), MVT::i32);
03784 
03785   if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Op))
03786     return DAG.getLoad(MVT::i32, SDLoc(Op),
03787                        Ld->getChain(), Ld->getBasePtr(), Ld->getPointerInfo(),
03788                        Ld->isVolatile(), Ld->isNonTemporal(),
03789                        Ld->isInvariant(), Ld->getAlignment());
03790 
03791   llvm_unreachable("Unknown VFP cmp argument!");
03792 }
03793 
03794 static void expandf64Toi32(SDValue Op, SelectionDAG &DAG,
03795                            SDValue &RetVal1, SDValue &RetVal2) {
03796   SDLoc dl(Op);
03797 
03798   if (isFloatingPointZero(Op)) {
03799     RetVal1 = DAG.getConstant(0, dl, MVT::i32);
03800     RetVal2 = DAG.getConstant(0, dl, MVT::i32);
03801     return;
03802   }
03803 
03804   if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Op)) {
03805     SDValue Ptr = Ld->getBasePtr();
03806     RetVal1 = DAG.getLoad(MVT::i32, dl,
03807                           Ld->getChain(), Ptr,
03808                           Ld->getPointerInfo(),
03809                           Ld->isVolatile(), Ld->isNonTemporal(),
03810                           Ld->isInvariant(), Ld->getAlignment());
03811 
03812     EVT PtrType = Ptr.getValueType();
03813     unsigned NewAlign = MinAlign(Ld->getAlignment(), 4);
03814     SDValue NewPtr = DAG.getNode(ISD::ADD, dl,
03815                                  PtrType, Ptr, DAG.getConstant(4, dl, PtrType));
03816     RetVal2 = DAG.getLoad(MVT::i32, dl,
03817                           Ld->getChain(), NewPtr,
03818                           Ld->getPointerInfo().getWithOffset(4),
03819                           Ld->isVolatile(), Ld->isNonTemporal(),
03820                           Ld->isInvariant(), NewAlign);
03821     return;
03822   }
03823 
03824   llvm_unreachable("Unknown VFP cmp argument!");
03825 }
03826 
03827 /// OptimizeVFPBrcond - With -enable-unsafe-fp-math, it's legal to optimize some
03828 /// f32 and even f64 comparisons to integer ones.
03829 SDValue
03830 ARMTargetLowering::OptimizeVFPBrcond(SDValue Op, SelectionDAG &DAG) const {
03831   SDValue Chain = Op.getOperand(0);
03832   ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
03833   SDValue LHS = Op.getOperand(2);
03834   SDValue RHS = Op.getOperand(3);
03835   SDValue Dest = Op.getOperand(4);
03836   SDLoc dl(Op);
03837 
03838   bool LHSSeenZero = false;
03839   bool LHSOk = canChangeToInt(LHS, LHSSeenZero, Subtarget);
03840   bool RHSSeenZero = false;
03841   bool RHSOk = canChangeToInt(RHS, RHSSeenZero, Subtarget);
03842   if (LHSOk && RHSOk && (LHSSeenZero || RHSSeenZero)) {
03843     // If unsafe fp math optimization is enabled and there are no other uses of
03844     // the CMP operands, and the condition code is EQ or NE, we can optimize it
03845     // to an integer comparison.
03846     if (CC == ISD::SETOEQ)
03847       CC = ISD::SETEQ;
03848     else if (CC == ISD::SETUNE)
03849       CC = ISD::SETNE;
03850 
03851     SDValue Mask = DAG.getConstant(0x7fffffff, dl, MVT::i32);
03852     SDValue ARMcc;
03853     if (LHS.getValueType() == MVT::f32) {
03854       LHS = DAG.getNode(ISD::AND, dl, MVT::i32,
03855                         bitcastf32Toi32(LHS, DAG), Mask);
03856       RHS = DAG.getNode(ISD::AND, dl, MVT::i32,
03857                         bitcastf32Toi32(RHS, DAG), Mask);
03858       SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
03859       SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
03860       return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other,
03861                          Chain, Dest, ARMcc, CCR, Cmp);
03862     }
03863 
03864     SDValue LHS1, LHS2;
03865     SDValue RHS1, RHS2;
03866     expandf64Toi32(LHS, DAG, LHS1, LHS2);
03867     expandf64Toi32(RHS, DAG, RHS1, RHS2);
03868     LHS2 = DAG.getNode(ISD::AND, dl, MVT::i32, LHS2, Mask);
03869     RHS2 = DAG.getNode(ISD::AND, dl, MVT::i32, RHS2, Mask);
03870     ARMCC::CondCodes CondCode = IntCCToARMCC(CC);
03871     ARMcc = DAG.getConstant(CondCode, dl, MVT::i32);
03872     SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Glue);
03873     SDValue Ops[] = { Chain, ARMcc, LHS1, LHS2, RHS1, RHS2, Dest };
03874     return DAG.getNode(ARMISD::BCC_i64, dl, VTList, Ops);
03875   }
03876 
03877   return SDValue();
03878 }
03879 
03880 SDValue ARMTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
03881   SDValue Chain = Op.getOperand(0);
03882   ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
03883   SDValue LHS = Op.getOperand(2);
03884   SDValue RHS = Op.getOperand(3);
03885   SDValue Dest = Op.getOperand(4);
03886   SDLoc dl(Op);
03887 
03888   if (Subtarget->isFPOnlySP() && LHS.getValueType() == MVT::f64) {
03889     DAG.getTargetLoweringInfo().softenSetCCOperands(DAG, MVT::f64, LHS, RHS, CC,
03890                                                     dl);
03891 
03892     // If softenSetCCOperands only returned one value, we should compare it to
03893     // zero.
03894     if (!RHS.getNode()) {
03895       RHS = DAG.getConstant(0, dl, LHS.getValueType());
03896       CC = ISD::SETNE;
03897     }
03898   }
03899 
03900   if (LHS.getValueType() == MVT::i32) {
03901     SDValue ARMcc;
03902     SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
03903     SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
03904     return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other,
03905                        Chain, Dest, ARMcc, CCR, Cmp);
03906   }
03907 
03908   assert(LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64);
03909 
03910   if (getTargetMachine().Options.UnsafeFPMath &&
03911       (CC == ISD::SETEQ || CC == ISD::SETOEQ ||
03912        CC == ISD::SETNE || CC == ISD::SETUNE)) {
03913     SDValue Result = OptimizeVFPBrcond(Op, DAG);
03914     if (Result.getNode())
03915       return Result;
03916   }
03917 
03918   ARMCC::CondCodes CondCode, CondCode2;
03919   FPCCToARMCC(CC, CondCode, CondCode2);
03920 
03921   SDValue ARMcc = DAG.getConstant(CondCode, dl, MVT::i32);
03922   SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl);
03923   SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
03924   SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Glue);
03925   SDValue Ops[] = { Chain, Dest, ARMcc, CCR, Cmp };
03926   SDValue Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops);
03927   if (CondCode2 != ARMCC::AL) {
03928     ARMcc = DAG.getConstant(CondCode2, dl, MVT::i32);
03929     SDValue Ops[] = { Res, Dest, ARMcc, CCR, Res.getValue(1) };
03930     Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops);
03931   }
03932   return Res;
03933 }
03934 
03935 SDValue ARMTargetLowering::LowerBR_JT(SDValue Op, SelectionDAG &DAG) const {
03936   SDValue Chain = Op.getOperand(0);
03937   SDValue Table = Op.getOperand(1);
03938   SDValue Index = Op.getOperand(2);
03939   SDLoc dl(Op);
03940 
03941   EVT PTy = getPointerTy(DAG.getDataLayout());
03942   JumpTableSDNode *JT = cast<JumpTableSDNode>(Table);
03943   SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PTy);
03944   Table = DAG.getNode(ARMISD::WrapperJT, dl, MVT::i32, JTI);
03945   Index = DAG.getNode(ISD::MUL, dl, PTy, Index, DAG.getConstant(4, dl, PTy));
03946   SDValue Addr = DAG.getNode(ISD::ADD, dl, PTy, Index, Table);
03947   if (Subtarget->isThumb2()) {
03948     // Thumb2 uses a two-level jump. That is, it jumps into the jump table
03949     // which does another jump to the destination. This also makes it easier
03950     // to translate it to TBB / TBH later.
03951     // FIXME: This might not work if the function is extremely large.
03952     return DAG.getNode(ARMISD::BR2_JT, dl, MVT::Other, Chain,
03953                        Addr, Op.getOperand(2), JTI);
03954   }
03955   if (getTargetMachine().getRelocationModel() == Reloc::PIC_) {
03956     Addr =
03957         DAG.getLoad((EVT)MVT::i32, dl, Chain, Addr,
03958                     MachinePointerInfo::getJumpTable(DAG.getMachineFunction()),
03959                     false, false, false, 0);
03960     Chain = Addr.getValue(1);
03961     Addr = DAG.getNode(ISD::ADD, dl, PTy, Addr, Table);
03962     return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI);
03963   } else {
03964     Addr =
03965         DAG.getLoad(PTy, dl, Chain, Addr,
03966                     MachinePointerInfo::getJumpTable(DAG.getMachineFunction()),
03967                     false, false, false, 0);
03968     Chain = Addr.getValue(1);
03969     return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI);
03970   }
03971 }
03972 
03973 static SDValue LowerVectorFP_TO_INT(SDValue Op, SelectionDAG &DAG) {
03974   EVT VT = Op.getValueType();
03975   SDLoc dl(Op);
03976 
03977   if (Op.getValueType().getVectorElementType() == MVT::i32) {
03978     if (Op.getOperand(0).getValueType().getVectorElementType() == MVT::f32)
03979       return Op;
03980     return DAG.UnrollVectorOp(Op.getNode());
03981   }
03982 
03983   assert(Op.getOperand(0).getValueType() == MVT::v4f32 &&
03984          "Invalid type for custom lowering!");
03985   if (VT != MVT::v4i16)
03986     return DAG.UnrollVectorOp(Op.getNode());
03987 
03988   Op = DAG.getNode(Op.getOpcode(), dl, MVT::v4i32, Op.getOperand(0));
03989   return DAG.getNode(ISD::TRUNCATE, dl, VT, Op);
03990 }
03991 
03992 SDValue ARMTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const {
03993   EVT VT = Op.getValueType();
03994   if (VT.isVector())
03995     return LowerVectorFP_TO_INT(Op, DAG);
03996   if (Subtarget->isFPOnlySP() && Op.getOperand(0).getValueType() == MVT::f64) {
03997     RTLIB::Libcall LC;
03998     if (Op.getOpcode() == ISD::FP_TO_SINT)
03999       LC = RTLIB::getFPTOSINT(Op.getOperand(0).getValueType(),
04000                               Op.getValueType());
04001     else
04002       LC = RTLIB::getFPTOUINT(Op.getOperand(0).getValueType(),
04003                               Op.getValueType());
04004     return makeLibCall(DAG, LC, Op.getValueType(), Op.getOperand(0),
04005                        /*isSigned*/ false, SDLoc(Op)).first;
04006   }
04007 
04008   return Op;
04009 }
04010 
04011 static SDValue LowerVectorINT_TO_FP(SDValue Op, SelectionDAG &DAG) {
04012   EVT VT = Op.getValueType();
04013   SDLoc dl(Op);
04014 
04015   if (Op.getOperand(0).getValueType().getVectorElementType() == MVT::i32) {
04016     if (VT.getVectorElementType() == MVT::f32)
04017       return Op;
04018     return DAG.UnrollVectorOp(Op.getNode());
04019   }
04020 
04021   assert(Op.getOperand(0).getValueType() == MVT::v4i16 &&
04022          "Invalid type for custom lowering!");
04023   if (VT != MVT::v4f32)
04024     return DAG.UnrollVectorOp(Op.getNode());
04025 
04026   unsigned CastOpc;
04027   unsigned Opc;
04028   switch (Op.getOpcode()) {
04029   default: llvm_unreachable("Invalid opcode!");
04030   case ISD::SINT_TO_FP:
04031     CastOpc = ISD::SIGN_EXTEND;
04032     Opc = ISD::SINT_TO_FP;
04033     break;
04034   case ISD::UINT_TO_FP:
04035     CastOpc = ISD::ZERO_EXTEND;
04036     Opc = ISD::UINT_TO_FP;
04037     break;
04038   }
04039 
04040   Op = DAG.getNode(CastOpc, dl, MVT::v4i32, Op.getOperand(0));
04041   return DAG.getNode(Opc, dl, VT, Op);
04042 }
04043 
04044 SDValue ARMTargetLowering::LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const {
04045   EVT VT = Op.getValueType();
04046   if (VT.isVector())
04047     return LowerVectorINT_TO_FP(Op, DAG);
04048   if (Subtarget->isFPOnlySP() && Op.getValueType() == MVT::f64) {
04049     RTLIB::Libcall LC;
04050     if (Op.getOpcode() == ISD::SINT_TO_FP)
04051       LC = RTLIB::getSINTTOFP(Op.getOperand(0).getValueType(),
04052                               Op.getValueType());
04053     else
04054       LC = RTLIB::getUINTTOFP(Op.getOperand(0).getValueType(),
04055                               Op.getValueType());
04056     return makeLibCall(DAG, LC, Op.getValueType(), Op.getOperand(0),
04057                        /*isSigned*/ false, SDLoc(Op)).first;
04058   }
04059 
04060   return Op;
04061 }
04062 
04063 SDValue ARMTargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const {
04064   // Implement fcopysign with a fabs and a conditional fneg.
04065   SDValue Tmp0 = Op.getOperand(0);
04066   SDValue Tmp1 = Op.getOperand(1);
04067   SDLoc dl(Op);
04068   EVT VT = Op.getValueType();
04069   EVT SrcVT = Tmp1.getValueType();
04070   bool InGPR = Tmp0.getOpcode() == ISD::BITCAST ||
04071     Tmp0.getOpcode() == ARMISD::VMOVDRR;
04072   bool UseNEON = !InGPR && Subtarget->hasNEON();
04073 
04074   if (UseNEON) {
04075     // Use VBSL to copy the sign bit.
04076     unsigned EncodedVal = ARM_AM::createNEONModImm(0x6, 0x80);
04077     SDValue Mask = DAG.getNode(ARMISD::VMOVIMM, dl, MVT::v2i32,
04078                                DAG.getTargetConstant(EncodedVal, dl, MVT::i32));
04079     EVT OpVT = (VT == MVT::f32) ? MVT::v2i32 : MVT::v1i64;
04080     if (VT == MVT::f64)
04081       Mask = DAG.getNode(ARMISD::VSHL, dl, OpVT,
04082                          DAG.getNode(ISD::BITCAST, dl, OpVT, Mask),
04083                          DAG.getConstant(32, dl, MVT::i32));
04084     else /*if (VT == MVT::f32)*/
04085       Tmp0 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2f32, Tmp0);
04086     if (SrcVT == MVT::f32) {
04087       Tmp1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2f32, Tmp1);
04088       if (VT == MVT::f64)
04089         Tmp1 = DAG.getNode(ARMISD::VSHL, dl, OpVT,
04090                            DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp1),
04091                            DAG.getConstant(32, dl, MVT::i32));
04092     } else if (VT == MVT::f32)
04093       Tmp1 = DAG.getNode(ARMISD::VSHRu, dl, MVT::v1i64,
04094                          DAG.getNode(ISD::BITCAST, dl, MVT::v1i64, Tmp1),
04095                          DAG.getConstant(32, dl, MVT::i32));
04096     Tmp0 = DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp0);
04097     Tmp1 = DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp1);
04098 
04099     SDValue AllOnes = DAG.getTargetConstant(ARM_AM::createNEONModImm(0xe, 0xff),
04100                                             dl, MVT::i32);
04101     AllOnes = DAG.getNode(ARMISD::VMOVIMM, dl, MVT::v8i8, AllOnes);
04102     SDValue MaskNot = DAG.getNode(ISD::XOR, dl, OpVT, Mask,
04103                                   DAG.getNode(ISD::BITCAST, dl, OpVT, AllOnes));
04104 
04105     SDValue Res = DAG.getNode(ISD::OR, dl, OpVT,
04106                               DAG.getNode(ISD::AND, dl, OpVT, Tmp1, Mask),
04107                               DAG.getNode(ISD::AND, dl, OpVT, Tmp0, MaskNot));
04108     if (VT == MVT::f32) {
04109       Res = DAG.getNode(ISD::BITCAST, dl, MVT::v2f32, Res);
04110       Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f32, Res,
04111                         DAG.getConstant(0, dl, MVT::i32));
04112     } else {
04113       Res = DAG.getNode(ISD::BITCAST, dl, MVT::f64, Res);
04114     }
04115 
04116     return Res;
04117   }
04118 
04119   // Bitcast operand 1 to i32.
04120   if (SrcVT == MVT::f64)
04121     Tmp1 = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32),
04122                        Tmp1).getValue(1);
04123   Tmp1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Tmp1);
04124 
04125   // Or in the signbit with integer operations.
04126   SDValue Mask1 = DAG.getConstant(0x80000000, dl, MVT::i32);
04127   SDValue Mask2 = DAG.getConstant(0x7fffffff, dl, MVT::i32);
04128   Tmp1 = DAG.getNode(ISD::AND, dl, MVT::i32, Tmp1, Mask1);
04129   if (VT == MVT::f32) {
04130     Tmp0 = DAG.getNode(ISD::AND, dl, MVT::i32,
04131                        DAG.getNode(ISD::BITCAST, dl, MVT::i32, Tmp0), Mask2);
04132     return DAG.getNode(ISD::BITCAST, dl, MVT::f32,
04133                        DAG.getNode(ISD::OR, dl, MVT::i32, Tmp0, Tmp1));
04134   }
04135 
04136   // f64: Or the high part with signbit and then combine two parts.
04137   Tmp0 = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32),
04138                      Tmp0);
04139   SDValue Lo = Tmp0.getValue(0);
04140   SDValue Hi = DAG.getNode(ISD::AND, dl, MVT::i32, Tmp0.getValue(1), Mask2);
04141   Hi = DAG.getNode(ISD::OR, dl, MVT::i32, Hi, Tmp1);
04142   return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);
04143 }
04144 
04145 SDValue ARMTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const{
04146   MachineFunction &MF = DAG.getMachineFunction();
04147   MachineFrameInfo *MFI = MF.getFrameInfo();
04148   MFI->setReturnAddressIsTaken(true);
04149 
04150   if (verifyReturnAddressArgumentIsConstant(Op, DAG))
04151     return SDValue();
04152 
04153   EVT VT = Op.getValueType();
04154   SDLoc dl(Op);
04155   unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
04156   if (Depth) {
04157     SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
04158     SDValue Offset = DAG.getConstant(4, dl, MVT::i32);
04159     return DAG.getLoad(VT, dl, DAG.getEntryNode(),
04160                        DAG.getNode(ISD::ADD, dl, VT, FrameAddr, Offset),
04161                        MachinePointerInfo(), false, false, false, 0);
04162   }
04163 
04164   // Return LR, which contains the return address. Mark it an implicit live-in.
04165   unsigned Reg = MF.addLiveIn(ARM::LR, getRegClassFor(MVT::i32));
04166   return DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg, VT);
04167 }
04168 
04169 SDValue ARMTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const {
04170   const ARMBaseRegisterInfo &ARI =
04171     *static_cast<const ARMBaseRegisterInfo*>(RegInfo);
04172   MachineFunction &MF = DAG.getMachineFunction();
04173   MachineFrameInfo *MFI = MF.getFrameInfo();
04174   MFI->setFrameAddressIsTaken(true);
04175 
04176   EVT VT = Op.getValueType();
04177   SDLoc dl(Op);  // FIXME probably not meaningful
04178   unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
04179   unsigned FrameReg = ARI.getFrameRegister(MF);
04180   SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, VT);
04181   while (Depth--)
04182     FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr,
04183                             MachinePointerInfo(),
04184                             false, false, false, 0);
04185   return FrameAddr;
04186 }
04187 
04188 // FIXME? Maybe this could be a TableGen attribute on some registers and
04189 // this table could be generated automatically from RegInfo.
04190 unsigned ARMTargetLowering::getRegisterByName(const char* RegName, EVT VT,
04191                                               SelectionDAG &DAG) const {
04192   unsigned Reg = StringSwitch<unsigned>(RegName)
04193                        .Case("sp", ARM::SP)
04194                        .Default(0);
04195   if (Reg)
04196     return Reg;
04197   report_fatal_error(Twine("Invalid register name \""
04198                               + StringRef(RegName)  + "\"."));
04199 }
04200 
04201 // Result is 64 bit value so split into two 32 bit values and return as a
04202 // pair of values.
04203 static void ExpandREAD_REGISTER(SDNode *N, SmallVectorImpl<SDValue> &Results,
04204                                 SelectionDAG &DAG) {
04205   SDLoc DL(N);
04206 
04207   // This function is only supposed to be called for i64 type destination.
04208   assert(N->getValueType(0) == MVT::i64
04209           && "ExpandREAD_REGISTER called for non-i64 type result.");
04210 
04211   SDValue Read = DAG.getNode(ISD::READ_REGISTER, DL,
04212                              DAG.getVTList(MVT::i32, MVT::i32, MVT::Other),
04213                              N->getOperand(0),
04214                              N->getOperand(1));
04215 
04216   Results.push_back(DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Read.getValue(0),
04217                     Read.getValue(1)));
04218   Results.push_back(Read.getOperand(0));
04219 }
04220 
04221 /// \p BC is a bitcast that is about to be turned into a VMOVDRR.
04222 /// When \p DstVT, the destination type of \p BC, is on the vector
</