LLVM API Documentation

ARMISelLowering.cpp
Go to the documentation of this file.
00001 //===-- ARMISelLowering.cpp - ARM DAG Lowering Implementation -------------===//
00002 //
00003 //                     The LLVM Compiler Infrastructure
00004 //
00005 // This file is distributed under the University of Illinois Open Source
00006 // License. See LICENSE.TXT for details.
00007 //
00008 //===----------------------------------------------------------------------===//
00009 //
00010 // This file defines the interfaces that ARM uses to lower LLVM code into a
00011 // selection DAG.
00012 //
00013 //===----------------------------------------------------------------------===//
00014 
00015 #include "ARMISelLowering.h"
00016 #include "ARMCallingConv.h"
00017 #include "ARMConstantPoolValue.h"
00018 #include "ARMMachineFunctionInfo.h"
00019 #include "ARMPerfectShuffle.h"
00020 #include "ARMSubtarget.h"
00021 #include "ARMTargetMachine.h"
00022 #include "ARMTargetObjectFile.h"
00023 #include "MCTargetDesc/ARMAddressingModes.h"
00024 #include "llvm/ADT/Statistic.h"
00025 #include "llvm/ADT/StringExtras.h"
00026 #include "llvm/CodeGen/CallingConvLower.h"
00027 #include "llvm/CodeGen/IntrinsicLowering.h"
00028 #include "llvm/CodeGen/MachineBasicBlock.h"
00029 #include "llvm/CodeGen/MachineFrameInfo.h"
00030 #include "llvm/CodeGen/MachineFunction.h"
00031 #include "llvm/CodeGen/MachineInstrBuilder.h"
00032 #include "llvm/CodeGen/MachineModuleInfo.h"
00033 #include "llvm/CodeGen/MachineRegisterInfo.h"
00034 #include "llvm/CodeGen/SelectionDAG.h"
00035 #include "llvm/IR/CallingConv.h"
00036 #include "llvm/IR/Constants.h"
00037 #include "llvm/IR/Function.h"
00038 #include "llvm/IR/GlobalValue.h"
00039 #include "llvm/IR/IRBuilder.h"
00040 #include "llvm/IR/Instruction.h"
00041 #include "llvm/IR/Instructions.h"
00042 #include "llvm/IR/Intrinsics.h"
00043 #include "llvm/IR/Type.h"
00044 #include "llvm/MC/MCSectionMachO.h"
00045 #include "llvm/Support/CommandLine.h"
00046 #include "llvm/Support/Debug.h"
00047 #include "llvm/Support/ErrorHandling.h"
00048 #include "llvm/Support/MathExtras.h"
00049 #include "llvm/Target/TargetOptions.h"
00050 #include <utility>
00051 using namespace llvm;
00052 
00053 #define DEBUG_TYPE "arm-isel"
00054 
00055 STATISTIC(NumTailCalls, "Number of tail calls");
00056 STATISTIC(NumMovwMovt, "Number of GAs materialized with movw + movt");
00057 STATISTIC(NumLoopByVals, "Number of loops generated for byval arguments");
00058 
00059 cl::opt<bool>
00060 EnableARMLongCalls("arm-long-calls", cl::Hidden,
00061   cl::desc("Generate calls via indirect call instructions"),
00062   cl::init(false));
00063 
00064 static cl::opt<bool>
00065 ARMInterworking("arm-interworking", cl::Hidden,
00066   cl::desc("Enable / disable ARM interworking (for debugging only)"),
00067   cl::init(true));
00068 
00069 namespace {
00070   class ARMCCState : public CCState {
00071   public:
00072     ARMCCState(CallingConv::ID CC, bool isVarArg, MachineFunction &MF,
00073                const TargetMachine &TM, SmallVectorImpl<CCValAssign> &locs,
00074                LLVMContext &C, ParmContext PC)
00075         : CCState(CC, isVarArg, MF, TM, locs, C) {
00076       assert(((PC == Call) || (PC == Prologue)) &&
00077              "ARMCCState users must specify whether their context is call"
00078              "or prologue generation.");
00079       CallOrPrologue = PC;
00080     }
00081   };
00082 }
00083 
00084 // The APCS parameter registers.
00085 static const MCPhysReg GPRArgRegs[] = {
00086   ARM::R0, ARM::R1, ARM::R2, ARM::R3
00087 };
00088 
00089 void ARMTargetLowering::addTypeForNEON(MVT VT, MVT PromotedLdStVT,
00090                                        MVT PromotedBitwiseVT) {
00091   if (VT != PromotedLdStVT) {
00092     setOperationAction(ISD::LOAD, VT, Promote);
00093     AddPromotedToType (ISD::LOAD, VT, PromotedLdStVT);
00094 
00095     setOperationAction(ISD::STORE, VT, Promote);
00096     AddPromotedToType (ISD::STORE, VT, PromotedLdStVT);
00097   }
00098 
00099   MVT ElemTy = VT.getVectorElementType();
00100   if (ElemTy != MVT::i64 && ElemTy != MVT::f64)
00101     setOperationAction(ISD::SETCC, VT, Custom);
00102   setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
00103   setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
00104   if (ElemTy == MVT::i32) {
00105     setOperationAction(ISD::SINT_TO_FP, VT, Custom);
00106     setOperationAction(ISD::UINT_TO_FP, VT, Custom);
00107     setOperationAction(ISD::FP_TO_SINT, VT, Custom);
00108     setOperationAction(ISD::FP_TO_UINT, VT, Custom);
00109   } else {
00110     setOperationAction(ISD::SINT_TO_FP, VT, Expand);
00111     setOperationAction(ISD::UINT_TO_FP, VT, Expand);
00112     setOperationAction(ISD::FP_TO_SINT, VT, Expand);
00113     setOperationAction(ISD::FP_TO_UINT, VT, Expand);
00114   }
00115   setOperationAction(ISD::BUILD_VECTOR,      VT, Custom);
00116   setOperationAction(ISD::VECTOR_SHUFFLE,    VT, Custom);
00117   setOperationAction(ISD::CONCAT_VECTORS,    VT, Legal);
00118   setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Legal);
00119   setOperationAction(ISD::SELECT,            VT, Expand);
00120   setOperationAction(ISD::SELECT_CC,         VT, Expand);
00121   setOperationAction(ISD::VSELECT,           VT, Expand);
00122   setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand);
00123   if (VT.isInteger()) {
00124     setOperationAction(ISD::SHL, VT, Custom);
00125     setOperationAction(ISD::SRA, VT, Custom);
00126     setOperationAction(ISD::SRL, VT, Custom);
00127   }
00128 
00129   // Promote all bit-wise operations.
00130   if (VT.isInteger() && VT != PromotedBitwiseVT) {
00131     setOperationAction(ISD::AND, VT, Promote);
00132     AddPromotedToType (ISD::AND, VT, PromotedBitwiseVT);
00133     setOperationAction(ISD::OR,  VT, Promote);
00134     AddPromotedToType (ISD::OR,  VT, PromotedBitwiseVT);
00135     setOperationAction(ISD::XOR, VT, Promote);
00136     AddPromotedToType (ISD::XOR, VT, PromotedBitwiseVT);
00137   }
00138 
00139   // Neon does not support vector divide/remainder operations.
00140   setOperationAction(ISD::SDIV, VT, Expand);
00141   setOperationAction(ISD::UDIV, VT, Expand);
00142   setOperationAction(ISD::FDIV, VT, Expand);
00143   setOperationAction(ISD::SREM, VT, Expand);
00144   setOperationAction(ISD::UREM, VT, Expand);
00145   setOperationAction(ISD::FREM, VT, Expand);
00146 }
00147 
00148 void ARMTargetLowering::addDRTypeForNEON(MVT VT) {
00149   addRegisterClass(VT, &ARM::DPRRegClass);
00150   addTypeForNEON(VT, MVT::f64, MVT::v2i32);
00151 }
00152 
00153 void ARMTargetLowering::addQRTypeForNEON(MVT VT) {
00154   addRegisterClass(VT, &ARM::DPairRegClass);
00155   addTypeForNEON(VT, MVT::v2f64, MVT::v4i32);
00156 }
00157 
00158 static TargetLoweringObjectFile *createTLOF(const Triple &TT) {
00159   if (TT.isOSBinFormatMachO())
00160     return new TargetLoweringObjectFileMachO();
00161   if (TT.isOSWindows())
00162     return new TargetLoweringObjectFileCOFF();
00163   return new ARMElfTargetObjectFile();
00164 }
00165 
00166 ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
00167     : TargetLowering(TM, createTLOF(Triple(TM.getTargetTriple()))) {
00168   Subtarget = &TM.getSubtarget<ARMSubtarget>();
00169   RegInfo = TM.getRegisterInfo();
00170   Itins = TM.getInstrItineraryData();
00171 
00172   setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
00173 
00174   if (Subtarget->isTargetMachO()) {
00175     // Uses VFP for Thumb libfuncs if available.
00176     if (Subtarget->isThumb() && Subtarget->hasVFP2() &&
00177         Subtarget->hasARMOps() && !TM.Options.UseSoftFloat) {
00178       // Single-precision floating-point arithmetic.
00179       setLibcallName(RTLIB::ADD_F32, "__addsf3vfp");
00180       setLibcallName(RTLIB::SUB_F32, "__subsf3vfp");
00181       setLibcallName(RTLIB::MUL_F32, "__mulsf3vfp");
00182       setLibcallName(RTLIB::DIV_F32, "__divsf3vfp");
00183 
00184       // Double-precision floating-point arithmetic.
00185       setLibcallName(RTLIB::ADD_F64, "__adddf3vfp");
00186       setLibcallName(RTLIB::SUB_F64, "__subdf3vfp");
00187       setLibcallName(RTLIB::MUL_F64, "__muldf3vfp");
00188       setLibcallName(RTLIB::DIV_F64, "__divdf3vfp");
00189 
00190       // Single-precision comparisons.
00191       setLibcallName(RTLIB::OEQ_F32, "__eqsf2vfp");
00192       setLibcallName(RTLIB::UNE_F32, "__nesf2vfp");
00193       setLibcallName(RTLIB::OLT_F32, "__ltsf2vfp");
00194       setLibcallName(RTLIB::OLE_F32, "__lesf2vfp");
00195       setLibcallName(RTLIB::OGE_F32, "__gesf2vfp");
00196       setLibcallName(RTLIB::OGT_F32, "__gtsf2vfp");
00197       setLibcallName(RTLIB::UO_F32,  "__unordsf2vfp");
00198       setLibcallName(RTLIB::O_F32,   "__unordsf2vfp");
00199 
00200       setCmpLibcallCC(RTLIB::OEQ_F32, ISD::SETNE);
00201       setCmpLibcallCC(RTLIB::UNE_F32, ISD::SETNE);
00202       setCmpLibcallCC(RTLIB::OLT_F32, ISD::SETNE);
00203       setCmpLibcallCC(RTLIB::OLE_F32, ISD::SETNE);
00204       setCmpLibcallCC(RTLIB::OGE_F32, ISD::SETNE);
00205       setCmpLibcallCC(RTLIB::OGT_F32, ISD::SETNE);
00206       setCmpLibcallCC(RTLIB::UO_F32,  ISD::SETNE);
00207       setCmpLibcallCC(RTLIB::O_F32,   ISD::SETEQ);
00208 
00209       // Double-precision comparisons.
00210       setLibcallName(RTLIB::OEQ_F64, "__eqdf2vfp");
00211       setLibcallName(RTLIB::UNE_F64, "__nedf2vfp");
00212       setLibcallName(RTLIB::OLT_F64, "__ltdf2vfp");
00213       setLibcallName(RTLIB::OLE_F64, "__ledf2vfp");
00214       setLibcallName(RTLIB::OGE_F64, "__gedf2vfp");
00215       setLibcallName(RTLIB::OGT_F64, "__gtdf2vfp");
00216       setLibcallName(RTLIB::UO_F64,  "__unorddf2vfp");
00217       setLibcallName(RTLIB::O_F64,   "__unorddf2vfp");
00218 
00219       setCmpLibcallCC(RTLIB::OEQ_F64, ISD::SETNE);
00220       setCmpLibcallCC(RTLIB::UNE_F64, ISD::SETNE);
00221       setCmpLibcallCC(RTLIB::OLT_F64, ISD::SETNE);
00222       setCmpLibcallCC(RTLIB::OLE_F64, ISD::SETNE);
00223       setCmpLibcallCC(RTLIB::OGE_F64, ISD::SETNE);
00224       setCmpLibcallCC(RTLIB::OGT_F64, ISD::SETNE);
00225       setCmpLibcallCC(RTLIB::UO_F64,  ISD::SETNE);
00226       setCmpLibcallCC(RTLIB::O_F64,   ISD::SETEQ);
00227 
00228       // Floating-point to integer conversions.
00229       // i64 conversions are done via library routines even when generating VFP
00230       // instructions, so use the same ones.
00231       setLibcallName(RTLIB::FPTOSINT_F64_I32, "__fixdfsivfp");
00232       setLibcallName(RTLIB::FPTOUINT_F64_I32, "__fixunsdfsivfp");
00233       setLibcallName(RTLIB::FPTOSINT_F32_I32, "__fixsfsivfp");
00234       setLibcallName(RTLIB::FPTOUINT_F32_I32, "__fixunssfsivfp");
00235 
00236       // Conversions between floating types.
00237       setLibcallName(RTLIB::FPROUND_F64_F32, "__truncdfsf2vfp");
00238       setLibcallName(RTLIB::FPEXT_F32_F64,   "__extendsfdf2vfp");
00239 
00240       // Integer to floating-point conversions.
00241       // i64 conversions are done via library routines even when generating VFP
00242       // instructions, so use the same ones.
00243       // FIXME: There appears to be some naming inconsistency in ARM libgcc:
00244       // e.g., __floatunsidf vs. __floatunssidfvfp.
00245       setLibcallName(RTLIB::SINTTOFP_I32_F64, "__floatsidfvfp");
00246       setLibcallName(RTLIB::UINTTOFP_I32_F64, "__floatunssidfvfp");
00247       setLibcallName(RTLIB::SINTTOFP_I32_F32, "__floatsisfvfp");
00248       setLibcallName(RTLIB::UINTTOFP_I32_F32, "__floatunssisfvfp");
00249     }
00250   }
00251 
00252   // These libcalls are not available in 32-bit.
00253   setLibcallName(RTLIB::SHL_I128, nullptr);
00254   setLibcallName(RTLIB::SRL_I128, nullptr);
00255   setLibcallName(RTLIB::SRA_I128, nullptr);
00256 
00257   if (Subtarget->isAAPCS_ABI() && !Subtarget->isTargetMachO() &&
00258       !Subtarget->isTargetWindows()) {
00259     static const struct {
00260       const RTLIB::Libcall Op;
00261       const char * const Name;
00262       const CallingConv::ID CC;
00263       const ISD::CondCode Cond;
00264     } LibraryCalls[] = {
00265       // Double-precision floating-point arithmetic helper functions
00266       // RTABI chapter 4.1.2, Table 2
00267       { RTLIB::ADD_F64, "__aeabi_dadd", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
00268       { RTLIB::DIV_F64, "__aeabi_ddiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
00269       { RTLIB::MUL_F64, "__aeabi_dmul", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
00270       { RTLIB::SUB_F64, "__aeabi_dsub", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
00271 
00272       // Double-precision floating-point comparison helper functions
00273       // RTABI chapter 4.1.2, Table 3
00274       { RTLIB::OEQ_F64, "__aeabi_dcmpeq", CallingConv::ARM_AAPCS, ISD::SETNE },
00275       { RTLIB::UNE_F64, "__aeabi_dcmpeq", CallingConv::ARM_AAPCS, ISD::SETEQ },
00276       { RTLIB::OLT_F64, "__aeabi_dcmplt", CallingConv::ARM_AAPCS, ISD::SETNE },
00277       { RTLIB::OLE_F64, "__aeabi_dcmple", CallingConv::ARM_AAPCS, ISD::SETNE },
00278       { RTLIB::OGE_F64, "__aeabi_dcmpge", CallingConv::ARM_AAPCS, ISD::SETNE },
00279       { RTLIB::OGT_F64, "__aeabi_dcmpgt", CallingConv::ARM_AAPCS, ISD::SETNE },
00280       { RTLIB::UO_F64,  "__aeabi_dcmpun", CallingConv::ARM_AAPCS, ISD::SETNE },
00281       { RTLIB::O_F64,   "__aeabi_dcmpun", CallingConv::ARM_AAPCS, ISD::SETEQ },
00282 
00283       // Single-precision floating-point arithmetic helper functions
00284       // RTABI chapter 4.1.2, Table 4
00285       { RTLIB::ADD_F32, "__aeabi_fadd", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
00286       { RTLIB::DIV_F32, "__aeabi_fdiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
00287       { RTLIB::MUL_F32, "__aeabi_fmul", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
00288       { RTLIB::SUB_F32, "__aeabi_fsub", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
00289 
00290       // Single-precision floating-point comparison helper functions
00291       // RTABI chapter 4.1.2, Table 5
00292       { RTLIB::OEQ_F32, "__aeabi_fcmpeq", CallingConv::ARM_AAPCS, ISD::SETNE },
00293       { RTLIB::UNE_F32, "__aeabi_fcmpeq", CallingConv::ARM_AAPCS, ISD::SETEQ },
00294       { RTLIB::OLT_F32, "__aeabi_fcmplt", CallingConv::ARM_AAPCS, ISD::SETNE },
00295       { RTLIB::OLE_F32, "__aeabi_fcmple", CallingConv::ARM_AAPCS, ISD::SETNE },
00296       { RTLIB::OGE_F32, "__aeabi_fcmpge", CallingConv::ARM_AAPCS, ISD::SETNE },
00297       { RTLIB::OGT_F32, "__aeabi_fcmpgt", CallingConv::ARM_AAPCS, ISD::SETNE },
00298       { RTLIB::UO_F32,  "__aeabi_fcmpun", CallingConv::ARM_AAPCS, ISD::SETNE },
00299       { RTLIB::O_F32,   "__aeabi_fcmpun", CallingConv::ARM_AAPCS, ISD::SETEQ },
00300 
00301       // Floating-point to integer conversions.
00302       // RTABI chapter 4.1.2, Table 6
00303       { RTLIB::FPTOSINT_F64_I32, "__aeabi_d2iz",  CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
00304       { RTLIB::FPTOUINT_F64_I32, "__aeabi_d2uiz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
00305       { RTLIB::FPTOSINT_F64_I64, "__aeabi_d2lz",  CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
00306       { RTLIB::FPTOUINT_F64_I64, "__aeabi_d2ulz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
00307       { RTLIB::FPTOSINT_F32_I32, "__aeabi_f2iz",  CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
00308       { RTLIB::FPTOUINT_F32_I32, "__aeabi_f2uiz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
00309       { RTLIB::FPTOSINT_F32_I64, "__aeabi_f2lz",  CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
00310       { RTLIB::FPTOUINT_F32_I64, "__aeabi_f2ulz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
00311 
00312       // Conversions between floating types.
00313       // RTABI chapter 4.1.2, Table 7
00314       { RTLIB::FPROUND_F64_F32, "__aeabi_d2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
00315       { RTLIB::FPEXT_F32_F64,   "__aeabi_f2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
00316 
00317       // Integer to floating-point conversions.
00318       // RTABI chapter 4.1.2, Table 8
00319       { RTLIB::SINTTOFP_I32_F64, "__aeabi_i2d",  CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
00320       { RTLIB::UINTTOFP_I32_F64, "__aeabi_ui2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
00321       { RTLIB::SINTTOFP_I64_F64, "__aeabi_l2d",  CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
00322       { RTLIB::UINTTOFP_I64_F64, "__aeabi_ul2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
00323       { RTLIB::SINTTOFP_I32_F32, "__aeabi_i2f",  CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
00324       { RTLIB::UINTTOFP_I32_F32, "__aeabi_ui2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
00325       { RTLIB::SINTTOFP_I64_F32, "__aeabi_l2f",  CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
00326       { RTLIB::UINTTOFP_I64_F32, "__aeabi_ul2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
00327 
00328       // Long long helper functions
00329       // RTABI chapter 4.2, Table 9
00330       { RTLIB::MUL_I64, "__aeabi_lmul", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
00331       { RTLIB::SHL_I64, "__aeabi_llsl", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
00332       { RTLIB::SRL_I64, "__aeabi_llsr", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
00333       { RTLIB::SRA_I64, "__aeabi_lasr", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
00334 
00335       // Integer division functions
00336       // RTABI chapter 4.3.1
00337       { RTLIB::SDIV_I8,  "__aeabi_idiv",     CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
00338       { RTLIB::SDIV_I16, "__aeabi_idiv",     CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
00339       { RTLIB::SDIV_I32, "__aeabi_idiv",     CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
00340       { RTLIB::SDIV_I64, "__aeabi_ldivmod",  CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
00341       { RTLIB::UDIV_I8,  "__aeabi_uidiv",    CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
00342       { RTLIB::UDIV_I16, "__aeabi_uidiv",    CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
00343       { RTLIB::UDIV_I32, "__aeabi_uidiv",    CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
00344       { RTLIB::UDIV_I64, "__aeabi_uldivmod", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
00345 
00346       // Memory operations
00347       // RTABI chapter 4.3.4
00348       { RTLIB::MEMCPY,  "__aeabi_memcpy",  CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
00349       { RTLIB::MEMMOVE, "__aeabi_memmove", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
00350       { RTLIB::MEMSET,  "__aeabi_memset",  CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
00351     };
00352 
00353     for (const auto &LC : LibraryCalls) {
00354       setLibcallName(LC.Op, LC.Name);
00355       setLibcallCallingConv(LC.Op, LC.CC);
00356       if (LC.Cond != ISD::SETCC_INVALID)
00357         setCmpLibcallCC(LC.Op, LC.Cond);
00358     }
00359   }
00360 
00361   if (Subtarget->isTargetWindows()) {
00362     static const struct {
00363       const RTLIB::Libcall Op;
00364       const char * const Name;
00365       const CallingConv::ID CC;
00366     } LibraryCalls[] = {
00367       { RTLIB::FPTOSINT_F32_I64, "__stoi64", CallingConv::ARM_AAPCS_VFP },
00368       { RTLIB::FPTOSINT_F64_I64, "__dtoi64", CallingConv::ARM_AAPCS_VFP },
00369       { RTLIB::FPTOUINT_F32_I64, "__stou64", CallingConv::ARM_AAPCS_VFP },
00370       { RTLIB::FPTOUINT_F64_I64, "__dtou64", CallingConv::ARM_AAPCS_VFP },
00371       { RTLIB::SINTTOFP_I64_F32, "__i64tos", CallingConv::ARM_AAPCS_VFP },
00372       { RTLIB::SINTTOFP_I64_F64, "__i64tod", CallingConv::ARM_AAPCS_VFP },
00373       { RTLIB::UINTTOFP_I64_F32, "__u64tos", CallingConv::ARM_AAPCS_VFP },
00374       { RTLIB::UINTTOFP_I64_F64, "__u64tod", CallingConv::ARM_AAPCS_VFP },
00375     };
00376 
00377     for (const auto &LC : LibraryCalls) {
00378       setLibcallName(LC.Op, LC.Name);
00379       setLibcallCallingConv(LC.Op, LC.CC);
00380     }
00381   }
00382 
00383   // Use divmod compiler-rt calls for iOS 5.0 and later.
00384   if (Subtarget->getTargetTriple().isiOS() &&
00385       !Subtarget->getTargetTriple().isOSVersionLT(5, 0)) {
00386     setLibcallName(RTLIB::SDIVREM_I32, "__divmodsi4");
00387     setLibcallName(RTLIB::UDIVREM_I32, "__udivmodsi4");
00388   }
00389 
00390   if (Subtarget->isThumb1Only())
00391     addRegisterClass(MVT::i32, &ARM::tGPRRegClass);
00392   else
00393     addRegisterClass(MVT::i32, &ARM::GPRRegClass);
00394   if (!TM.Options.UseSoftFloat && Subtarget->hasVFP2() &&
00395       !Subtarget->isThumb1Only()) {
00396     addRegisterClass(MVT::f32, &ARM::SPRRegClass);
00397     if (!Subtarget->isFPOnlySP())
00398       addRegisterClass(MVT::f64, &ARM::DPRRegClass);
00399   }
00400 
00401   for (unsigned VT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
00402        VT <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++VT) {
00403     for (unsigned InnerVT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
00404          InnerVT <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++InnerVT)
00405       setTruncStoreAction((MVT::SimpleValueType)VT,
00406                           (MVT::SimpleValueType)InnerVT, Expand);
00407     setLoadExtAction(ISD::SEXTLOAD, (MVT::SimpleValueType)VT, Expand);
00408     setLoadExtAction(ISD::ZEXTLOAD, (MVT::SimpleValueType)VT, Expand);
00409     setLoadExtAction(ISD::EXTLOAD, (MVT::SimpleValueType)VT, Expand);
00410 
00411     setOperationAction(ISD::MULHS, (MVT::SimpleValueType)VT, Expand);
00412     setOperationAction(ISD::SMUL_LOHI, (MVT::SimpleValueType)VT, Expand);
00413     setOperationAction(ISD::MULHU, (MVT::SimpleValueType)VT, Expand);
00414     setOperationAction(ISD::UMUL_LOHI, (MVT::SimpleValueType)VT, Expand);
00415 
00416     setOperationAction(ISD::BSWAP, (MVT::SimpleValueType)VT, Expand);
00417   }
00418 
00419   setOperationAction(ISD::ConstantFP, MVT::f32, Custom);
00420   setOperationAction(ISD::ConstantFP, MVT::f64, Custom);
00421 
00422   if (Subtarget->hasNEON()) {
00423     addDRTypeForNEON(MVT::v2f32);
00424     addDRTypeForNEON(MVT::v8i8);
00425     addDRTypeForNEON(MVT::v4i16);
00426     addDRTypeForNEON(MVT::v2i32);
00427     addDRTypeForNEON(MVT::v1i64);
00428 
00429     addQRTypeForNEON(MVT::v4f32);
00430     addQRTypeForNEON(MVT::v2f64);
00431     addQRTypeForNEON(MVT::v16i8);
00432     addQRTypeForNEON(MVT::v8i16);
00433     addQRTypeForNEON(MVT::v4i32);
00434     addQRTypeForNEON(MVT::v2i64);
00435 
00436     // v2f64 is legal so that QR subregs can be extracted as f64 elements, but
00437     // neither Neon nor VFP support any arithmetic operations on it.
00438     // The same with v4f32. But keep in mind that vadd, vsub, vmul are natively
00439     // supported for v4f32.
00440     setOperationAction(ISD::FADD, MVT::v2f64, Expand);
00441     setOperationAction(ISD::FSUB, MVT::v2f64, Expand);
00442     setOperationAction(ISD::FMUL, MVT::v2f64, Expand);
00443     // FIXME: Code duplication: FDIV and FREM are expanded always, see
00444     // ARMTargetLowering::addTypeForNEON method for details.
00445     setOperationAction(ISD::FDIV, MVT::v2f64, Expand);
00446     setOperationAction(ISD::FREM, MVT::v2f64, Expand);
00447     // FIXME: Create unittest.
00448     // In another words, find a way when "copysign" appears in DAG with vector
00449     // operands.
00450     setOperationAction(ISD::FCOPYSIGN, MVT::v2f64, Expand);
00451     // FIXME: Code duplication: SETCC has custom operation action, see
00452     // ARMTargetLowering::addTypeForNEON method for details.
00453     setOperationAction(ISD::SETCC, MVT::v2f64, Expand);
00454     // FIXME: Create unittest for FNEG and for FABS.
00455     setOperationAction(ISD::FNEG, MVT::v2f64, Expand);
00456     setOperationAction(ISD::FABS, MVT::v2f64, Expand);
00457     setOperationAction(ISD::FSQRT, MVT::v2f64, Expand);
00458     setOperationAction(ISD::FSIN, MVT::v2f64, Expand);
00459     setOperationAction(ISD::FCOS, MVT::v2f64, Expand);
00460     setOperationAction(ISD::FPOWI, MVT::v2f64, Expand);
00461     setOperationAction(ISD::FPOW, MVT::v2f64, Expand);
00462     setOperationAction(ISD::FLOG, MVT::v2f64, Expand);
00463     setOperationAction(ISD::FLOG2, MVT::v2f64, Expand);
00464     setOperationAction(ISD::FLOG10, MVT::v2f64, Expand);
00465     setOperationAction(ISD::FEXP, MVT::v2f64, Expand);
00466     setOperationAction(ISD::FEXP2, MVT::v2f64, Expand);
00467     // FIXME: Create unittest for FCEIL, FTRUNC, FRINT, FNEARBYINT, FFLOOR.
00468     setOperationAction(ISD::FCEIL, MVT::v2f64, Expand);
00469     setOperationAction(ISD::FTRUNC, MVT::v2f64, Expand);
00470     setOperationAction(ISD::FRINT, MVT::v2f64, Expand);
00471     setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Expand);
00472     setOperationAction(ISD::FFLOOR, MVT::v2f64, Expand);
00473     setOperationAction(ISD::FMA, MVT::v2f64, Expand);
00474 
00475     setOperationAction(ISD::FSQRT, MVT::v4f32, Expand);
00476     setOperationAction(ISD::FSIN, MVT::v4f32, Expand);
00477     setOperationAction(ISD::FCOS, MVT::v4f32, Expand);
00478     setOperationAction(ISD::FPOWI, MVT::v4f32, Expand);
00479     setOperationAction(ISD::FPOW, MVT::v4f32, Expand);
00480     setOperationAction(ISD::FLOG, MVT::v4f32, Expand);
00481     setOperationAction(ISD::FLOG2, MVT::v4f32, Expand);
00482     setOperationAction(ISD::FLOG10, MVT::v4f32, Expand);
00483     setOperationAction(ISD::FEXP, MVT::v4f32, Expand);
00484     setOperationAction(ISD::FEXP2, MVT::v4f32, Expand);
00485     setOperationAction(ISD::FCEIL, MVT::v4f32, Expand);
00486     setOperationAction(ISD::FTRUNC, MVT::v4f32, Expand);
00487     setOperationAction(ISD::FRINT, MVT::v4f32, Expand);
00488     setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Expand);
00489     setOperationAction(ISD::FFLOOR, MVT::v4f32, Expand);
00490 
00491     // Mark v2f32 intrinsics.
00492     setOperationAction(ISD::FSQRT, MVT::v2f32, Expand);
00493     setOperationAction(ISD::FSIN, MVT::v2f32, Expand);
00494     setOperationAction(ISD::FCOS, MVT::v2f32, Expand);
00495     setOperationAction(ISD::FPOWI, MVT::v2f32, Expand);
00496     setOperationAction(ISD::FPOW, MVT::v2f32, Expand);
00497     setOperationAction(ISD::FLOG, MVT::v2f32, Expand);
00498     setOperationAction(ISD::FLOG2, MVT::v2f32, Expand);
00499     setOperationAction(ISD::FLOG10, MVT::v2f32, Expand);
00500     setOperationAction(ISD::FEXP, MVT::v2f32, Expand);
00501     setOperationAction(ISD::FEXP2, MVT::v2f32, Expand);
00502     setOperationAction(ISD::FCEIL, MVT::v2f32, Expand);
00503     setOperationAction(ISD::FTRUNC, MVT::v2f32, Expand);
00504     setOperationAction(ISD::FRINT, MVT::v2f32, Expand);
00505     setOperationAction(ISD::FNEARBYINT, MVT::v2f32, Expand);
00506     setOperationAction(ISD::FFLOOR, MVT::v2f32, Expand);
00507 
00508     // Neon does not support some operations on v1i64 and v2i64 types.
00509     setOperationAction(ISD::MUL, MVT::v1i64, Expand);
00510     // Custom handling for some quad-vector types to detect VMULL.
00511     setOperationAction(ISD::MUL, MVT::v8i16, Custom);
00512     setOperationAction(ISD::MUL, MVT::v4i32, Custom);
00513     setOperationAction(ISD::MUL, MVT::v2i64, Custom);
00514     // Custom handling for some vector types to avoid expensive expansions
00515     setOperationAction(ISD::SDIV, MVT::v4i16, Custom);
00516     setOperationAction(ISD::SDIV, MVT::v8i8, Custom);
00517     setOperationAction(ISD::UDIV, MVT::v4i16, Custom);
00518     setOperationAction(ISD::UDIV, MVT::v8i8, Custom);
00519     setOperationAction(ISD::SETCC, MVT::v1i64, Expand);
00520     setOperationAction(ISD::SETCC, MVT::v2i64, Expand);
00521     // Neon does not have single instruction SINT_TO_FP and UINT_TO_FP with
00522     // a destination type that is wider than the source, and nor does
00523     // it have a FP_TO_[SU]INT instruction with a narrower destination than
00524     // source.
00525     setOperationAction(ISD::SINT_TO_FP, MVT::v4i16, Custom);
00526     setOperationAction(ISD::UINT_TO_FP, MVT::v4i16, Custom);
00527     setOperationAction(ISD::FP_TO_UINT, MVT::v4i16, Custom);
00528     setOperationAction(ISD::FP_TO_SINT, MVT::v4i16, Custom);
00529 
00530     setOperationAction(ISD::FP_ROUND,   MVT::v2f32, Expand);
00531     setOperationAction(ISD::FP_EXTEND,  MVT::v2f64, Expand);
00532 
00533     // NEON does not have single instruction CTPOP for vectors with element
00534     // types wider than 8-bits.  However, custom lowering can leverage the
00535     // v8i8/v16i8 vcnt instruction.
00536     setOperationAction(ISD::CTPOP,      MVT::v2i32, Custom);
00537     setOperationAction(ISD::CTPOP,      MVT::v4i32, Custom);
00538     setOperationAction(ISD::CTPOP,      MVT::v4i16, Custom);
00539     setOperationAction(ISD::CTPOP,      MVT::v8i16, Custom);
00540 
00541     // NEON only has FMA instructions as of VFP4.
00542     if (!Subtarget->hasVFP4()) {
00543       setOperationAction(ISD::FMA, MVT::v2f32, Expand);
00544       setOperationAction(ISD::FMA, MVT::v4f32, Expand);
00545     }
00546 
00547     setTargetDAGCombine(ISD::INTRINSIC_VOID);
00548     setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN);
00549     setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN);
00550     setTargetDAGCombine(ISD::SHL);
00551     setTargetDAGCombine(ISD::SRL);
00552     setTargetDAGCombine(ISD::SRA);
00553     setTargetDAGCombine(ISD::SIGN_EXTEND);
00554     setTargetDAGCombine(ISD::ZERO_EXTEND);
00555     setTargetDAGCombine(ISD::ANY_EXTEND);
00556     setTargetDAGCombine(ISD::SELECT_CC);
00557     setTargetDAGCombine(ISD::BUILD_VECTOR);
00558     setTargetDAGCombine(ISD::VECTOR_SHUFFLE);
00559     setTargetDAGCombine(ISD::INSERT_VECTOR_ELT);
00560     setTargetDAGCombine(ISD::STORE);
00561     setTargetDAGCombine(ISD::FP_TO_SINT);
00562     setTargetDAGCombine(ISD::FP_TO_UINT);
00563     setTargetDAGCombine(ISD::FDIV);
00564 
00565     // It is legal to extload from v4i8 to v4i16 or v4i32.
00566     MVT Tys[6] = {MVT::v8i8, MVT::v4i8, MVT::v2i8,
00567                   MVT::v4i16, MVT::v2i16,
00568                   MVT::v2i32};
00569     for (unsigned i = 0; i < 6; ++i) {
00570       setLoadExtAction(ISD::EXTLOAD, Tys[i], Legal);
00571       setLoadExtAction(ISD::ZEXTLOAD, Tys[i], Legal);
00572       setLoadExtAction(ISD::SEXTLOAD, Tys[i], Legal);
00573     }
00574   }
00575 
00576   // ARM and Thumb2 support UMLAL/SMLAL.
00577   if (!Subtarget->isThumb1Only())
00578     setTargetDAGCombine(ISD::ADDC);
00579 
00580 
00581   computeRegisterProperties();
00582 
00583   // ARM does not have floating-point extending loads.
00584   setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand);
00585   setLoadExtAction(ISD::EXTLOAD, MVT::f16, Expand);
00586 
00587   // ... or truncating stores
00588   setTruncStoreAction(MVT::f64, MVT::f32, Expand);
00589   setTruncStoreAction(MVT::f32, MVT::f16, Expand);
00590   setTruncStoreAction(MVT::f64, MVT::f16, Expand);
00591 
00592   // ARM does not have i1 sign extending load.
00593   setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
00594 
00595   // ARM supports all 4 flavors of integer indexed load / store.
00596   if (!Subtarget->isThumb1Only()) {
00597     for (unsigned im = (unsigned)ISD::PRE_INC;
00598          im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) {
00599       setIndexedLoadAction(im,  MVT::i1,  Legal);
00600       setIndexedLoadAction(im,  MVT::i8,  Legal);
00601       setIndexedLoadAction(im,  MVT::i16, Legal);
00602       setIndexedLoadAction(im,  MVT::i32, Legal);
00603       setIndexedStoreAction(im, MVT::i1,  Legal);
00604       setIndexedStoreAction(im, MVT::i8,  Legal);
00605       setIndexedStoreAction(im, MVT::i16, Legal);
00606       setIndexedStoreAction(im, MVT::i32, Legal);
00607     }
00608   }
00609 
00610   setOperationAction(ISD::SADDO, MVT::i32, Custom);
00611   setOperationAction(ISD::UADDO, MVT::i32, Custom);
00612   setOperationAction(ISD::SSUBO, MVT::i32, Custom);
00613   setOperationAction(ISD::USUBO, MVT::i32, Custom);
00614 
00615   // i64 operation support.
00616   setOperationAction(ISD::MUL,     MVT::i64, Expand);
00617   setOperationAction(ISD::MULHU,   MVT::i32, Expand);
00618   if (Subtarget->isThumb1Only()) {
00619     setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);
00620     setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
00621   }
00622   if (Subtarget->isThumb1Only() || !Subtarget->hasV6Ops()
00623       || (Subtarget->isThumb2() && !Subtarget->hasThumb2DSP()))
00624     setOperationAction(ISD::MULHS, MVT::i32, Expand);
00625 
00626   setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom);
00627   setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom);
00628   setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom);
00629   setOperationAction(ISD::SRL,       MVT::i64, Custom);
00630   setOperationAction(ISD::SRA,       MVT::i64, Custom);
00631 
00632   if (!Subtarget->isThumb1Only()) {
00633     // FIXME: We should do this for Thumb1 as well.
00634     setOperationAction(ISD::ADDC,    MVT::i32, Custom);
00635     setOperationAction(ISD::ADDE,    MVT::i32, Custom);
00636     setOperationAction(ISD::SUBC,    MVT::i32, Custom);
00637     setOperationAction(ISD::SUBE,    MVT::i32, Custom);
00638   }
00639 
00640   // ARM does not have ROTL.
00641   setOperationAction(ISD::ROTL,  MVT::i32, Expand);
00642   setOperationAction(ISD::CTTZ,  MVT::i32, Custom);
00643   setOperationAction(ISD::CTPOP, MVT::i32, Expand);
00644   if (!Subtarget->hasV5TOps() || Subtarget->isThumb1Only())
00645     setOperationAction(ISD::CTLZ, MVT::i32, Expand);
00646 
00647   // These just redirect to CTTZ and CTLZ on ARM.
00648   setOperationAction(ISD::CTTZ_ZERO_UNDEF  , MVT::i32  , Expand);
00649   setOperationAction(ISD::CTLZ_ZERO_UNDEF  , MVT::i32  , Expand);
00650 
00651   setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Custom);
00652 
00653   // Only ARMv6 has BSWAP.
00654   if (!Subtarget->hasV6Ops())
00655     setOperationAction(ISD::BSWAP, MVT::i32, Expand);
00656 
00657   if (!(Subtarget->hasDivide() && Subtarget->isThumb2()) &&
00658       !(Subtarget->hasDivideInARMMode() && !Subtarget->isThumb())) {
00659     // These are expanded into libcalls if the cpu doesn't have HW divider.
00660     setOperationAction(ISD::SDIV,  MVT::i32, Expand);
00661     setOperationAction(ISD::UDIV,  MVT::i32, Expand);
00662   }
00663 
00664   // FIXME: Also set divmod for SREM on EABI
00665   setOperationAction(ISD::SREM,  MVT::i32, Expand);
00666   setOperationAction(ISD::UREM,  MVT::i32, Expand);
00667   // Register based DivRem for AEABI (RTABI 4.2)
00668   if (Subtarget->isTargetAEABI()) {
00669     setLibcallName(RTLIB::SDIVREM_I8,  "__aeabi_idivmod");
00670     setLibcallName(RTLIB::SDIVREM_I16, "__aeabi_idivmod");
00671     setLibcallName(RTLIB::SDIVREM_I32, "__aeabi_idivmod");
00672     setLibcallName(RTLIB::SDIVREM_I64, "__aeabi_ldivmod");
00673     setLibcallName(RTLIB::UDIVREM_I8,  "__aeabi_uidivmod");
00674     setLibcallName(RTLIB::UDIVREM_I16, "__aeabi_uidivmod");
00675     setLibcallName(RTLIB::UDIVREM_I32, "__aeabi_uidivmod");
00676     setLibcallName(RTLIB::UDIVREM_I64, "__aeabi_uldivmod");
00677 
00678     setLibcallCallingConv(RTLIB::SDIVREM_I8, CallingConv::ARM_AAPCS);
00679     setLibcallCallingConv(RTLIB::SDIVREM_I16, CallingConv::ARM_AAPCS);
00680     setLibcallCallingConv(RTLIB::SDIVREM_I32, CallingConv::ARM_AAPCS);
00681     setLibcallCallingConv(RTLIB::SDIVREM_I64, CallingConv::ARM_AAPCS);
00682     setLibcallCallingConv(RTLIB::UDIVREM_I8, CallingConv::ARM_AAPCS);
00683     setLibcallCallingConv(RTLIB::UDIVREM_I16, CallingConv::ARM_AAPCS);
00684     setLibcallCallingConv(RTLIB::UDIVREM_I32, CallingConv::ARM_AAPCS);
00685     setLibcallCallingConv(RTLIB::UDIVREM_I64, CallingConv::ARM_AAPCS);
00686 
00687     setOperationAction(ISD::SDIVREM, MVT::i32, Custom);
00688     setOperationAction(ISD::UDIVREM, MVT::i32, Custom);
00689   } else {
00690     setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
00691     setOperationAction(ISD::UDIVREM, MVT::i32, Expand);
00692   }
00693 
00694   setOperationAction(ISD::GlobalAddress, MVT::i32,   Custom);
00695   setOperationAction(ISD::ConstantPool,  MVT::i32,   Custom);
00696   setOperationAction(ISD::GLOBAL_OFFSET_TABLE, MVT::i32, Custom);
00697   setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom);
00698   setOperationAction(ISD::BlockAddress, MVT::i32, Custom);
00699 
00700   setOperationAction(ISD::TRAP, MVT::Other, Legal);
00701 
00702   // Use the default implementation.
00703   setOperationAction(ISD::VASTART,            MVT::Other, Custom);
00704   setOperationAction(ISD::VAARG,              MVT::Other, Expand);
00705   setOperationAction(ISD::VACOPY,             MVT::Other, Expand);
00706   setOperationAction(ISD::VAEND,              MVT::Other, Expand);
00707   setOperationAction(ISD::STACKSAVE,          MVT::Other, Expand);
00708   setOperationAction(ISD::STACKRESTORE,       MVT::Other, Expand);
00709 
00710   if (!Subtarget->isTargetMachO()) {
00711     // Non-MachO platforms may return values in these registers via the
00712     // personality function.
00713     setExceptionPointerRegister(ARM::R0);
00714     setExceptionSelectorRegister(ARM::R1);
00715   }
00716 
00717   if (Subtarget->getTargetTriple().isWindowsItaniumEnvironment())
00718     setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Custom);
00719   else
00720     setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand);
00721 
00722   // ARMv6 Thumb1 (except for CPUs that support dmb / dsb) and earlier use
00723   // the default expansion.
00724   if (Subtarget->hasAnyDataBarrier() && !Subtarget->isThumb1Only()) {
00725     // ATOMIC_FENCE needs custom lowering; the others should have been expanded
00726     // to ldrex/strex loops already.
00727     setOperationAction(ISD::ATOMIC_FENCE,     MVT::Other, Custom);
00728 
00729     // On v8, we have particularly efficient implementations of atomic fences
00730     // if they can be combined with nearby atomic loads and stores.
00731     if (!Subtarget->hasV8Ops()) {
00732       // Automatically insert fences (dmb ist) around ATOMIC_SWAP etc.
00733       setInsertFencesForAtomic(true);
00734     }
00735   } else {
00736     // If there's anything we can use as a barrier, go through custom lowering
00737     // for ATOMIC_FENCE.
00738     setOperationAction(ISD::ATOMIC_FENCE,   MVT::Other,
00739                        Subtarget->hasAnyDataBarrier() ? Custom : Expand);
00740 
00741     // Set them all for expansion, which will force libcalls.
00742     setOperationAction(ISD::ATOMIC_CMP_SWAP,  MVT::i32, Expand);
00743     setOperationAction(ISD::ATOMIC_SWAP,      MVT::i32, Expand);
00744     setOperationAction(ISD::ATOMIC_LOAD_ADD,  MVT::i32, Expand);
00745     setOperationAction(ISD::ATOMIC_LOAD_SUB,  MVT::i32, Expand);
00746     setOperationAction(ISD::ATOMIC_LOAD_AND,  MVT::i32, Expand);
00747     setOperationAction(ISD::ATOMIC_LOAD_OR,   MVT::i32, Expand);
00748     setOperationAction(ISD::ATOMIC_LOAD_XOR,  MVT::i32, Expand);
00749     setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i32, Expand);
00750     setOperationAction(ISD::ATOMIC_LOAD_MIN, MVT::i32, Expand);
00751     setOperationAction(ISD::ATOMIC_LOAD_MAX, MVT::i32, Expand);
00752     setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i32, Expand);
00753     setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i32, Expand);
00754     // Mark ATOMIC_LOAD and ATOMIC_STORE custom so we can handle the
00755     // Unordered/Monotonic case.
00756     setOperationAction(ISD::ATOMIC_LOAD, MVT::i32, Custom);
00757     setOperationAction(ISD::ATOMIC_STORE, MVT::i32, Custom);
00758   }
00759 
00760   setOperationAction(ISD::PREFETCH,         MVT::Other, Custom);
00761 
00762   // Requires SXTB/SXTH, available on v6 and up in both ARM and Thumb modes.
00763   if (!Subtarget->hasV6Ops()) {
00764     setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand);
00765     setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8,  Expand);
00766   }
00767   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
00768 
00769   if (!TM.Options.UseSoftFloat && Subtarget->hasVFP2() &&
00770       !Subtarget->isThumb1Only()) {
00771     // Turn f64->i64 into VMOVRRD, i64 -> f64 to VMOVDRR
00772     // iff target supports vfp2.
00773     setOperationAction(ISD::BITCAST, MVT::i64, Custom);
00774     setOperationAction(ISD::FLT_ROUNDS_, MVT::i32, Custom);
00775   }
00776 
00777   // We want to custom lower some of our intrinsics.
00778   setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
00779   if (Subtarget->isTargetDarwin()) {
00780     setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom);
00781     setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom);
00782     setLibcallName(RTLIB::UNWIND_RESUME, "_Unwind_SjLj_Resume");
00783   }
00784 
00785   setOperationAction(ISD::SETCC,     MVT::i32, Expand);
00786   setOperationAction(ISD::SETCC,     MVT::f32, Expand);
00787   setOperationAction(ISD::SETCC,     MVT::f64, Expand);
00788   setOperationAction(ISD::SELECT,    MVT::i32, Custom);
00789   setOperationAction(ISD::SELECT,    MVT::f32, Custom);
00790   setOperationAction(ISD::SELECT,    MVT::f64, Custom);
00791   setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
00792   setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
00793   setOperationAction(ISD::SELECT_CC, MVT::f64, Custom);
00794 
00795   setOperationAction(ISD::BRCOND,    MVT::Other, Expand);
00796   setOperationAction(ISD::BR_CC,     MVT::i32,   Custom);
00797   setOperationAction(ISD::BR_CC,     MVT::f32,   Custom);
00798   setOperationAction(ISD::BR_CC,     MVT::f64,   Custom);
00799   setOperationAction(ISD::BR_JT,     MVT::Other, Custom);
00800 
00801   // We don't support sin/cos/fmod/copysign/pow
00802   setOperationAction(ISD::FSIN,      MVT::f64, Expand);
00803   setOperationAction(ISD::FSIN,      MVT::f32, Expand);
00804   setOperationAction(ISD::FCOS,      MVT::f32, Expand);
00805   setOperationAction(ISD::FCOS,      MVT::f64, Expand);
00806   setOperationAction(ISD::FSINCOS,   MVT::f64, Expand);
00807   setOperationAction(ISD::FSINCOS,   MVT::f32, Expand);
00808   setOperationAction(ISD::FREM,      MVT::f64, Expand);
00809   setOperationAction(ISD::FREM,      MVT::f32, Expand);
00810   if (!TM.Options.UseSoftFloat && Subtarget->hasVFP2() &&
00811       !Subtarget->isThumb1Only()) {
00812     setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom);
00813     setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);
00814   }
00815   setOperationAction(ISD::FPOW,      MVT::f64, Expand);
00816   setOperationAction(ISD::FPOW,      MVT::f32, Expand);
00817 
00818   if (!Subtarget->hasVFP4()) {
00819     setOperationAction(ISD::FMA, MVT::f64, Expand);
00820     setOperationAction(ISD::FMA, MVT::f32, Expand);
00821   }
00822 
00823   // Various VFP goodness
00824   if (!TM.Options.UseSoftFloat && !Subtarget->isThumb1Only()) {
00825     // int <-> fp are custom expanded into bit_convert + ARMISD ops.
00826     if (Subtarget->hasVFP2()) {
00827       setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
00828       setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
00829       setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
00830       setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
00831     }
00832 
00833     // v8 adds f64 <-> f16 conversion. Before that it should be expanded.
00834     if (!Subtarget->hasV8Ops()) {
00835       setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand);
00836       setOperationAction(ISD::FP_TO_FP16, MVT::f64, Expand);
00837     }
00838 
00839     // fp16 is a special v7 extension that adds f16 <-> f32 conversions.
00840     if (!Subtarget->hasFP16()) {
00841       setOperationAction(ISD::FP16_TO_FP, MVT::f32, Expand);
00842       setOperationAction(ISD::FP_TO_FP16, MVT::f32, Expand);
00843     }
00844   }
00845 
00846   // Combine sin / cos into one node or libcall if possible.
00847   if (Subtarget->hasSinCos()) {
00848     setLibcallName(RTLIB::SINCOS_F32, "sincosf");
00849     setLibcallName(RTLIB::SINCOS_F64, "sincos");
00850     if (Subtarget->getTargetTriple().getOS() == Triple::IOS) {
00851       // For iOS, we don't want to the normal expansion of a libcall to
00852       // sincos. We want to issue a libcall to __sincos_stret.
00853       setOperationAction(ISD::FSINCOS, MVT::f64, Custom);
00854       setOperationAction(ISD::FSINCOS, MVT::f32, Custom);
00855     }
00856   }
00857 
00858   // We have target-specific dag combine patterns for the following nodes:
00859   // ARMISD::VMOVRRD  - No need to call setTargetDAGCombine
00860   setTargetDAGCombine(ISD::ADD);
00861   setTargetDAGCombine(ISD::SUB);
00862   setTargetDAGCombine(ISD::MUL);
00863   setTargetDAGCombine(ISD::AND);
00864   setTargetDAGCombine(ISD::OR);
00865   setTargetDAGCombine(ISD::XOR);
00866 
00867   if (Subtarget->hasV6Ops())
00868     setTargetDAGCombine(ISD::SRL);
00869 
00870   setStackPointerRegisterToSaveRestore(ARM::SP);
00871 
00872   if (TM.Options.UseSoftFloat || Subtarget->isThumb1Only() ||
00873       !Subtarget->hasVFP2())
00874     setSchedulingPreference(Sched::RegPressure);
00875   else
00876     setSchedulingPreference(Sched::Hybrid);
00877 
00878   //// temporary - rewrite interface to use type
00879   MaxStoresPerMemset = 8;
00880   MaxStoresPerMemsetOptSize = Subtarget->isTargetDarwin() ? 8 : 4;
00881   MaxStoresPerMemcpy = 4; // For @llvm.memcpy -> sequence of stores
00882   MaxStoresPerMemcpyOptSize = Subtarget->isTargetDarwin() ? 4 : 2;
00883   MaxStoresPerMemmove = 4; // For @llvm.memmove -> sequence of stores
00884   MaxStoresPerMemmoveOptSize = Subtarget->isTargetDarwin() ? 4 : 2;
00885 
00886   // On ARM arguments smaller than 4 bytes are extended, so all arguments
00887   // are at least 4 bytes aligned.
00888   setMinStackArgumentAlignment(4);
00889 
00890   // Prefer likely predicted branches to selects on out-of-order cores.
00891   PredictableSelectIsExpensive = Subtarget->isLikeA9();
00892 
00893   setMinFunctionAlignment(Subtarget->isThumb() ? 1 : 2);
00894 }
00895 
00896 // FIXME: It might make sense to define the representative register class as the
00897 // nearest super-register that has a non-null superset. For example, DPR_VFP2 is
00898 // a super-register of SPR, and DPR is a superset if DPR_VFP2. Consequently,
00899 // SPR's representative would be DPR_VFP2. This should work well if register
00900 // pressure tracking were modified such that a register use would increment the
00901 // pressure of the register class's representative and all of it's super
00902 // classes' representatives transitively. We have not implemented this because
00903 // of the difficulty prior to coalescing of modeling operand register classes
00904 // due to the common occurrence of cross class copies and subregister insertions
00905 // and extractions.
00906 std::pair<const TargetRegisterClass*, uint8_t>
00907 ARMTargetLowering::findRepresentativeClass(MVT VT) const{
00908   const TargetRegisterClass *RRC = nullptr;
00909   uint8_t Cost = 1;
00910   switch (VT.SimpleTy) {
00911   default:
00912     return TargetLowering::findRepresentativeClass(VT);
00913   // Use DPR as representative register class for all floating point
00914   // and vector types. Since there are 32 SPR registers and 32 DPR registers so
00915   // the cost is 1 for both f32 and f64.
00916   case MVT::f32: case MVT::f64: case MVT::v8i8: case MVT::v4i16:
00917   case MVT::v2i32: case MVT::v1i64: case MVT::v2f32:
00918     RRC = &ARM::DPRRegClass;
00919     // When NEON is used for SP, only half of the register file is available
00920     // because operations that define both SP and DP results will be constrained
00921     // to the VFP2 class (D0-D15). We currently model this constraint prior to
00922     // coalescing by double-counting the SP regs. See the FIXME above.
00923     if (Subtarget->useNEONForSinglePrecisionFP())
00924       Cost = 2;
00925     break;
00926   case MVT::v16i8: case MVT::v8i16: case MVT::v4i32: case MVT::v2i64:
00927   case MVT::v4f32: case MVT::v2f64:
00928     RRC = &ARM::DPRRegClass;
00929     Cost = 2;
00930     break;
00931   case MVT::v4i64:
00932     RRC = &ARM::DPRRegClass;
00933     Cost = 4;
00934     break;
00935   case MVT::v8i64:
00936     RRC = &ARM::DPRRegClass;
00937     Cost = 8;
00938     break;
00939   }
00940   return std::make_pair(RRC, Cost);
00941 }
00942 
00943 const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
00944   switch (Opcode) {
00945   default: return nullptr;
00946   case ARMISD::Wrapper:       return "ARMISD::Wrapper";
00947   case ARMISD::WrapperPIC:    return "ARMISD::WrapperPIC";
00948   case ARMISD::WrapperJT:     return "ARMISD::WrapperJT";
00949   case ARMISD::CALL:          return "ARMISD::CALL";
00950   case ARMISD::CALL_PRED:     return "ARMISD::CALL_PRED";
00951   case ARMISD::CALL_NOLINK:   return "ARMISD::CALL_NOLINK";
00952   case ARMISD::tCALL:         return "ARMISD::tCALL";
00953   case ARMISD::BRCOND:        return "ARMISD::BRCOND";
00954   case ARMISD::BR_JT:         return "ARMISD::BR_JT";
00955   case ARMISD::BR2_JT:        return "ARMISD::BR2_JT";
00956   case ARMISD::RET_FLAG:      return "ARMISD::RET_FLAG";
00957   case ARMISD::INTRET_FLAG:   return "ARMISD::INTRET_FLAG";
00958   case ARMISD::PIC_ADD:       return "ARMISD::PIC_ADD";
00959   case ARMISD::CMP:           return "ARMISD::CMP";
00960   case ARMISD::CMN:           return "ARMISD::CMN";
00961   case ARMISD::CMPZ:          return "ARMISD::CMPZ";
00962   case ARMISD::CMPFP:         return "ARMISD::CMPFP";
00963   case ARMISD::CMPFPw0:       return "ARMISD::CMPFPw0";
00964   case ARMISD::BCC_i64:       return "ARMISD::BCC_i64";
00965   case ARMISD::FMSTAT:        return "ARMISD::FMSTAT";
00966 
00967   case ARMISD::CMOV:          return "ARMISD::CMOV";
00968 
00969   case ARMISD::RBIT:          return "ARMISD::RBIT";
00970 
00971   case ARMISD::FTOSI:         return "ARMISD::FTOSI";
00972   case ARMISD::FTOUI:         return "ARMISD::FTOUI";
00973   case ARMISD::SITOF:         return "ARMISD::SITOF";
00974   case ARMISD::UITOF:         return "ARMISD::UITOF";
00975 
00976   case ARMISD::SRL_FLAG:      return "ARMISD::SRL_FLAG";
00977   case ARMISD::SRA_FLAG:      return "ARMISD::SRA_FLAG";
00978   case ARMISD::RRX:           return "ARMISD::RRX";
00979 
00980   case ARMISD::ADDC:          return "ARMISD::ADDC";
00981   case ARMISD::ADDE:          return "ARMISD::ADDE";
00982   case ARMISD::SUBC:          return "ARMISD::SUBC";
00983   case ARMISD::SUBE:          return "ARMISD::SUBE";
00984 
00985   case ARMISD::VMOVRRD:       return "ARMISD::VMOVRRD";
00986   case ARMISD::VMOVDRR:       return "ARMISD::VMOVDRR";
00987 
00988   case ARMISD::EH_SJLJ_SETJMP: return "ARMISD::EH_SJLJ_SETJMP";
00989   case ARMISD::EH_SJLJ_LONGJMP:return "ARMISD::EH_SJLJ_LONGJMP";
00990 
00991   case ARMISD::TC_RETURN:     return "ARMISD::TC_RETURN";
00992 
00993   case ARMISD::THREAD_POINTER:return "ARMISD::THREAD_POINTER";
00994 
00995   case ARMISD::DYN_ALLOC:     return "ARMISD::DYN_ALLOC";
00996 
00997   case ARMISD::MEMBARRIER_MCR: return "ARMISD::MEMBARRIER_MCR";
00998 
00999   case ARMISD::PRELOAD:       return "ARMISD::PRELOAD";
01000 
01001   case ARMISD::WIN__CHKSTK:   return "ARMISD:::WIN__CHKSTK";
01002 
01003   case ARMISD::VCEQ:          return "ARMISD::VCEQ";
01004   case ARMISD::VCEQZ:         return "ARMISD::VCEQZ";
01005   case ARMISD::VCGE:          return "ARMISD::VCGE";
01006   case ARMISD::VCGEZ:         return "ARMISD::VCGEZ";
01007   case ARMISD::VCLEZ:         return "ARMISD::VCLEZ";
01008   case ARMISD::VCGEU:         return "ARMISD::VCGEU";
01009   case ARMISD::VCGT:          return "ARMISD::VCGT";
01010   case ARMISD::VCGTZ:         return "ARMISD::VCGTZ";
01011   case ARMISD::VCLTZ:         return "ARMISD::VCLTZ";
01012   case ARMISD::VCGTU:         return "ARMISD::VCGTU";
01013   case ARMISD::VTST:          return "ARMISD::VTST";
01014 
01015   case ARMISD::VSHL:          return "ARMISD::VSHL";
01016   case ARMISD::VSHRs:         return "ARMISD::VSHRs";
01017   case ARMISD::VSHRu:         return "ARMISD::VSHRu";
01018   case ARMISD::VRSHRs:        return "ARMISD::VRSHRs";
01019   case ARMISD::VRSHRu:        return "ARMISD::VRSHRu";
01020   case ARMISD::VRSHRN:        return "ARMISD::VRSHRN";
01021   case ARMISD::VQSHLs:        return "ARMISD::VQSHLs";
01022   case ARMISD::VQSHLu:        return "ARMISD::VQSHLu";
01023   case ARMISD::VQSHLsu:       return "ARMISD::VQSHLsu";
01024   case ARMISD::VQSHRNs:       return "ARMISD::VQSHRNs";
01025   case ARMISD::VQSHRNu:       return "ARMISD::VQSHRNu";
01026   case ARMISD::VQSHRNsu:      return "ARMISD::VQSHRNsu";
01027   case ARMISD::VQRSHRNs:      return "ARMISD::VQRSHRNs";
01028   case ARMISD::VQRSHRNu:      return "ARMISD::VQRSHRNu";
01029   case ARMISD::VQRSHRNsu:     return "ARMISD::VQRSHRNsu";
01030   case ARMISD::VGETLANEu:     return "ARMISD::VGETLANEu";
01031   case ARMISD::VGETLANEs:     return "ARMISD::VGETLANEs";
01032   case ARMISD::VMOVIMM:       return "ARMISD::VMOVIMM";
01033   case ARMISD::VMVNIMM:       return "ARMISD::VMVNIMM";
01034   case ARMISD::VMOVFPIMM:     return "ARMISD::VMOVFPIMM";
01035   case ARMISD::VDUP:          return "ARMISD::VDUP";
01036   case ARMISD::VDUPLANE:      return "ARMISD::VDUPLANE";
01037   case ARMISD::VEXT:          return "ARMISD::VEXT";
01038   case ARMISD::VREV64:        return "ARMISD::VREV64";
01039   case ARMISD::VREV32:        return "ARMISD::VREV32";
01040   case ARMISD::VREV16:        return "ARMISD::VREV16";
01041   case ARMISD::VZIP:          return "ARMISD::VZIP";
01042   case ARMISD::VUZP:          return "ARMISD::VUZP";
01043   case ARMISD::VTRN:          return "ARMISD::VTRN";
01044   case ARMISD::VTBL1:         return "ARMISD::VTBL1";
01045   case ARMISD::VTBL2:         return "ARMISD::VTBL2";
01046   case ARMISD::VMULLs:        return "ARMISD::VMULLs";
01047   case ARMISD::VMULLu:        return "ARMISD::VMULLu";
01048   case ARMISD::UMLAL:         return "ARMISD::UMLAL";
01049   case ARMISD::SMLAL:         return "ARMISD::SMLAL";
01050   case ARMISD::BUILD_VECTOR:  return "ARMISD::BUILD_VECTOR";
01051   case ARMISD::FMAX:          return "ARMISD::FMAX";
01052   case ARMISD::FMIN:          return "ARMISD::FMIN";
01053   case ARMISD::VMAXNM:        return "ARMISD::VMAX";
01054   case ARMISD::VMINNM:        return "ARMISD::VMIN";
01055   case ARMISD::BFI:           return "ARMISD::BFI";
01056   case ARMISD::VORRIMM:       return "ARMISD::VORRIMM";
01057   case ARMISD::VBICIMM:       return "ARMISD::VBICIMM";
01058   case ARMISD::VBSL:          return "ARMISD::VBSL";
01059   case ARMISD::VLD2DUP:       return "ARMISD::VLD2DUP";
01060   case ARMISD::VLD3DUP:       return "ARMISD::VLD3DUP";
01061   case ARMISD::VLD4DUP:       return "ARMISD::VLD4DUP";
01062   case ARMISD::VLD1_UPD:      return "ARMISD::VLD1_UPD";
01063   case ARMISD::VLD2_UPD:      return "ARMISD::VLD2_UPD";
01064   case ARMISD::VLD3_UPD:      return "ARMISD::VLD3_UPD";
01065   case ARMISD::VLD4_UPD:      return "ARMISD::VLD4_UPD";
01066   case ARMISD::VLD2LN_UPD:    return "ARMISD::VLD2LN_UPD";
01067   case ARMISD::VLD3LN_UPD:    return "ARMISD::VLD3LN_UPD";
01068   case ARMISD::VLD4LN_UPD:    return "ARMISD::VLD4LN_UPD";
01069   case ARMISD::VLD2DUP_UPD:   return "ARMISD::VLD2DUP_UPD";
01070   case ARMISD::VLD3DUP_UPD:   return "ARMISD::VLD3DUP_UPD";
01071   case ARMISD::VLD4DUP_UPD:   return "ARMISD::VLD4DUP_UPD";
01072   case ARMISD::VST1_UPD:      return "ARMISD::VST1_UPD";
01073   case ARMISD::VST2_UPD:      return "ARMISD::VST2_UPD";
01074   case ARMISD::VST3_UPD:      return "ARMISD::VST3_UPD";
01075   case ARMISD::VST4_UPD:      return "ARMISD::VST4_UPD";
01076   case ARMISD::VST2LN_UPD:    return "ARMISD::VST2LN_UPD";
01077   case ARMISD::VST3LN_UPD:    return "ARMISD::VST3LN_UPD";
01078   case ARMISD::VST4LN_UPD:    return "ARMISD::VST4LN_UPD";
01079   }
01080 }
01081 
01082 EVT ARMTargetLowering::getSetCCResultType(LLVMContext &, EVT VT) const {
01083   if (!VT.isVector()) return getPointerTy();
01084   return VT.changeVectorElementTypeToInteger();
01085 }
01086 
01087 /// getRegClassFor - Return the register class that should be used for the
01088 /// specified value type.
01089 const TargetRegisterClass *ARMTargetLowering::getRegClassFor(MVT VT) const {
01090   // Map v4i64 to QQ registers but do not make the type legal. Similarly map
01091   // v8i64 to QQQQ registers. v4i64 and v8i64 are only used for REG_SEQUENCE to
01092   // load / store 4 to 8 consecutive D registers.
01093   if (Subtarget->hasNEON()) {
01094     if (VT == MVT::v4i64)
01095       return &ARM::QQPRRegClass;
01096     if (VT == MVT::v8i64)
01097       return &ARM::QQQQPRRegClass;
01098   }
01099   return TargetLowering::getRegClassFor(VT);
01100 }
01101 
01102 // Create a fast isel object.
01103 FastISel *
01104 ARMTargetLowering::createFastISel(FunctionLoweringInfo &funcInfo,
01105                                   const TargetLibraryInfo *libInfo) const {
01106   return ARM::createFastISel(funcInfo, libInfo);
01107 }
01108 
01109 /// getMaximalGlobalOffset - Returns the maximal possible offset which can
01110 /// be used for loads / stores from the global.
01111 unsigned ARMTargetLowering::getMaximalGlobalOffset() const {
01112   return (Subtarget->isThumb1Only() ? 127 : 4095);
01113 }
01114 
01115 Sched::Preference ARMTargetLowering::getSchedulingPreference(SDNode *N) const {
01116   unsigned NumVals = N->getNumValues();
01117   if (!NumVals)
01118     return Sched::RegPressure;
01119 
01120   for (unsigned i = 0; i != NumVals; ++i) {
01121     EVT VT = N->getValueType(i);
01122     if (VT == MVT::Glue || VT == MVT::Other)
01123       continue;
01124     if (VT.isFloatingPoint() || VT.isVector())
01125       return Sched::ILP;
01126   }
01127 
01128   if (!N->isMachineOpcode())
01129     return Sched::RegPressure;
01130 
01131   // Load are scheduled for latency even if there instruction itinerary
01132   // is not available.
01133   const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
01134   const MCInstrDesc &MCID = TII->get(N->getMachineOpcode());
01135 
01136   if (MCID.getNumDefs() == 0)
01137     return Sched::RegPressure;
01138   if (!Itins->isEmpty() &&
01139       Itins->getOperandCycle(MCID.getSchedClass(), 0) > 2)
01140     return Sched::ILP;
01141 
01142   return Sched::RegPressure;
01143 }
01144 
01145 //===----------------------------------------------------------------------===//
01146 // Lowering Code
01147 //===----------------------------------------------------------------------===//
01148 
01149 /// IntCCToARMCC - Convert a DAG integer condition code to an ARM CC
01150 static ARMCC::CondCodes IntCCToARMCC(ISD::CondCode CC) {
01151   switch (CC) {
01152   default: llvm_unreachable("Unknown condition code!");
01153   case ISD::SETNE:  return ARMCC::NE;
01154   case ISD::SETEQ:  return ARMCC::EQ;
01155   case ISD::SETGT:  return ARMCC::GT;
01156   case ISD::SETGE:  return ARMCC::GE;
01157   case ISD::SETLT:  return ARMCC::LT;
01158   case ISD::SETLE:  return ARMCC::LE;
01159   case ISD::SETUGT: return ARMCC::HI;
01160   case ISD::SETUGE: return ARMCC::HS;
01161   case ISD::SETULT: return ARMCC::LO;
01162   case ISD::SETULE: return ARMCC::LS;
01163   }
01164 }
01165 
01166 /// FPCCToARMCC - Convert a DAG fp condition code to an ARM CC.
01167 static void FPCCToARMCC(ISD::CondCode CC, ARMCC::CondCodes &CondCode,
01168                         ARMCC::CondCodes &CondCode2) {
01169   CondCode2 = ARMCC::AL;
01170   switch (CC) {
01171   default: llvm_unreachable("Unknown FP condition!");
01172   case ISD::SETEQ:
01173   case ISD::SETOEQ: CondCode = ARMCC::EQ; break;
01174   case ISD::SETGT:
01175   case ISD::SETOGT: CondCode = ARMCC::GT; break;
01176   case ISD::SETGE:
01177   case ISD::SETOGE: CondCode = ARMCC::GE; break;
01178   case ISD::SETOLT: CondCode = ARMCC::MI; break;
01179   case ISD::SETOLE: CondCode = ARMCC::LS; break;
01180   case ISD::SETONE: CondCode = ARMCC::MI; CondCode2 = ARMCC::GT; break;
01181   case ISD::SETO:   CondCode = ARMCC::VC; break;
01182   case ISD::SETUO:  CondCode = ARMCC::VS; break;
01183   case ISD::SETUEQ: CondCode = ARMCC::EQ; CondCode2 = ARMCC::VS; break;
01184   case ISD::SETUGT: CondCode = ARMCC::HI; break;
01185   case ISD::SETUGE: CondCode = ARMCC::PL; break;
01186   case ISD::SETLT:
01187   case ISD::SETULT: CondCode = ARMCC::LT; break;
01188   case ISD::SETLE:
01189   case ISD::SETULE: CondCode = ARMCC::LE; break;
01190   case ISD::SETNE:
01191   case ISD::SETUNE: CondCode = ARMCC::NE; break;
01192   }
01193 }
01194 
01195 //===----------------------------------------------------------------------===//
01196 //                      Calling Convention Implementation
01197 //===----------------------------------------------------------------------===//
01198 
01199 #include "ARMGenCallingConv.inc"
01200 
01201 /// getEffectiveCallingConv - Get the effective calling convention, taking into
01202 /// account presence of floating point hardware and calling convention
01203 /// limitations, such as support for variadic functions.
01204 CallingConv::ID
01205 ARMTargetLowering::getEffectiveCallingConv(CallingConv::ID CC,
01206                                            bool isVarArg) const {
01207   switch (CC) {
01208   default:
01209     llvm_unreachable("Unsupported calling convention");
01210   case CallingConv::ARM_AAPCS:
01211   case CallingConv::ARM_APCS:
01212   case CallingConv::GHC:
01213     return CC;
01214   case CallingConv::ARM_AAPCS_VFP:
01215     return isVarArg ? CallingConv::ARM_AAPCS : CallingConv::ARM_AAPCS_VFP;
01216   case CallingConv::C:
01217     if (!Subtarget->isAAPCS_ABI())
01218       return CallingConv::ARM_APCS;
01219     else if (Subtarget->hasVFP2() && !Subtarget->isThumb1Only() &&
01220              getTargetMachine().Options.FloatABIType == FloatABI::Hard &&
01221              !isVarArg)
01222       return CallingConv::ARM_AAPCS_VFP;
01223     else
01224       return CallingConv::ARM_AAPCS;
01225   case CallingConv::Fast:
01226     if (!Subtarget->isAAPCS_ABI()) {
01227       if (Subtarget->hasVFP2() && !Subtarget->isThumb1Only() && !isVarArg)
01228         return CallingConv::Fast;
01229       return CallingConv::ARM_APCS;
01230     } else if (Subtarget->hasVFP2() && !Subtarget->isThumb1Only() && !isVarArg)
01231       return CallingConv::ARM_AAPCS_VFP;
01232     else
01233       return CallingConv::ARM_AAPCS;
01234   }
01235 }
01236 
01237 /// CCAssignFnForNode - Selects the correct CCAssignFn for the given
01238 /// CallingConvention.
01239 CCAssignFn *ARMTargetLowering::CCAssignFnForNode(CallingConv::ID CC,
01240                                                  bool Return,
01241                                                  bool isVarArg) const {
01242   switch (getEffectiveCallingConv(CC, isVarArg)) {
01243   default:
01244     llvm_unreachable("Unsupported calling convention");
01245   case CallingConv::ARM_APCS:
01246     return (Return ? RetCC_ARM_APCS : CC_ARM_APCS);
01247   case CallingConv::ARM_AAPCS:
01248     return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS);
01249   case CallingConv::ARM_AAPCS_VFP:
01250     return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP);
01251   case CallingConv::Fast:
01252     return (Return ? RetFastCC_ARM_APCS : FastCC_ARM_APCS);
01253   case CallingConv::GHC:
01254     return (Return ? RetCC_ARM_APCS : CC_ARM_APCS_GHC);
01255   }
01256 }
01257 
01258 /// LowerCallResult - Lower the result values of a call into the
01259 /// appropriate copies out of appropriate physical registers.
01260 SDValue
01261 ARMTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
01262                                    CallingConv::ID CallConv, bool isVarArg,
01263                                    const SmallVectorImpl<ISD::InputArg> &Ins,
01264                                    SDLoc dl, SelectionDAG &DAG,
01265                                    SmallVectorImpl<SDValue> &InVals,
01266                                    bool isThisReturn, SDValue ThisVal) const {
01267 
01268   // Assign locations to each value returned by this call.
01269   SmallVector<CCValAssign, 16> RVLocs;
01270   ARMCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
01271                     getTargetMachine(), RVLocs, *DAG.getContext(), Call);
01272   CCInfo.AnalyzeCallResult(Ins,
01273                            CCAssignFnForNode(CallConv, /* Return*/ true,
01274                                              isVarArg));
01275 
01276   // Copy all of the result registers out of their specified physreg.
01277   for (unsigned i = 0; i != RVLocs.size(); ++i) {
01278     CCValAssign VA = RVLocs[i];
01279 
01280     // Pass 'this' value directly from the argument to return value, to avoid
01281     // reg unit interference
01282     if (i == 0 && isThisReturn) {
01283       assert(!VA.needsCustom() && VA.getLocVT() == MVT::i32 &&
01284              "unexpected return calling convention register assignment");
01285       InVals.push_back(ThisVal);
01286       continue;
01287     }
01288 
01289     SDValue Val;
01290     if (VA.needsCustom()) {
01291       // Handle f64 or half of a v2f64.
01292       SDValue Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
01293                                       InFlag);
01294       Chain = Lo.getValue(1);
01295       InFlag = Lo.getValue(2);
01296       VA = RVLocs[++i]; // skip ahead to next loc
01297       SDValue Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
01298                                       InFlag);
01299       Chain = Hi.getValue(1);
01300       InFlag = Hi.getValue(2);
01301       if (!Subtarget->isLittle())
01302         std::swap (Lo, Hi);
01303       Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);
01304 
01305       if (VA.getLocVT() == MVT::v2f64) {
01306         SDValue Vec = DAG.getNode(ISD::UNDEF, dl, MVT::v2f64);
01307         Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Vec, Val,
01308                           DAG.getConstant(0, MVT::i32));
01309 
01310         VA = RVLocs[++i]; // skip ahead to next loc
01311         Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InFlag);
01312         Chain = Lo.getValue(1);
01313         InFlag = Lo.getValue(2);
01314         VA = RVLocs[++i]; // skip ahead to next loc
01315         Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InFlag);
01316         Chain = Hi.getValue(1);
01317         InFlag = Hi.getValue(2);
01318         if (!Subtarget->isLittle())
01319           std::swap (Lo, Hi);
01320         Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);
01321         Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Vec, Val,
01322                           DAG.getConstant(1, MVT::i32));
01323       }
01324     } else {
01325       Val = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), VA.getLocVT(),
01326                                InFlag);
01327       Chain = Val.getValue(1);
01328       InFlag = Val.getValue(2);
01329     }
01330 
01331     switch (VA.getLocInfo()) {
01332     default: llvm_unreachable("Unknown loc info!");
01333     case CCValAssign::Full: break;
01334     case CCValAssign::BCvt:
01335       Val = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), Val);
01336       break;
01337     }
01338 
01339     InVals.push_back(Val);
01340   }
01341 
01342   return Chain;
01343 }
01344 
01345 /// LowerMemOpCallTo - Store the argument to the stack.
01346 SDValue
01347 ARMTargetLowering::LowerMemOpCallTo(SDValue Chain,
01348                                     SDValue StackPtr, SDValue Arg,
01349                                     SDLoc dl, SelectionDAG &DAG,
01350                                     const CCValAssign &VA,
01351                                     ISD::ArgFlagsTy Flags) const {
01352   unsigned LocMemOffset = VA.getLocMemOffset();
01353   SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset);
01354   PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff);
01355   return DAG.getStore(Chain, dl, Arg, PtrOff,
01356                       MachinePointerInfo::getStack(LocMemOffset),
01357                       false, false, 0);
01358 }
01359 
01360 void ARMTargetLowering::PassF64ArgInRegs(SDLoc dl, SelectionDAG &DAG,
01361                                          SDValue Chain, SDValue &Arg,
01362                                          RegsToPassVector &RegsToPass,
01363                                          CCValAssign &VA, CCValAssign &NextVA,
01364                                          SDValue &StackPtr,
01365                                          SmallVectorImpl<SDValue> &MemOpChains,
01366                                          ISD::ArgFlagsTy Flags) const {
01367 
01368   SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl,
01369                               DAG.getVTList(MVT::i32, MVT::i32), Arg);
01370   unsigned id = Subtarget->isLittle() ? 0 : 1;
01371   RegsToPass.push_back(std::make_pair(VA.getLocReg(), fmrrd.getValue(id)));
01372 
01373   if (NextVA.isRegLoc())
01374     RegsToPass.push_back(std::make_pair(NextVA.getLocReg(), fmrrd.getValue(1-id)));
01375   else {
01376     assert(NextVA.isMemLoc());
01377     if (!StackPtr.getNode())
01378       StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy());
01379 
01380     MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, fmrrd.getValue(1-id),
01381                                            dl, DAG, NextVA,
01382                                            Flags));
01383   }
01384 }
01385 
01386 /// LowerCall - Lowering a call into a callseq_start <-
01387 /// ARMISD:CALL <- callseq_end chain. Also add input and output parameter
01388 /// nodes.
01389 SDValue
01390 ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
01391                              SmallVectorImpl<SDValue> &InVals) const {
01392   SelectionDAG &DAG                     = CLI.DAG;
01393   SDLoc &dl                          = CLI.DL;
01394   SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
01395   SmallVectorImpl<SDValue> &OutVals     = CLI.OutVals;
01396   SmallVectorImpl<ISD::InputArg> &Ins   = CLI.Ins;
01397   SDValue Chain                         = CLI.Chain;
01398   SDValue Callee                        = CLI.Callee;
01399   bool &isTailCall                      = CLI.IsTailCall;
01400   CallingConv::ID CallConv              = CLI.CallConv;
01401   bool doesNotRet                       = CLI.DoesNotReturn;
01402   bool isVarArg                         = CLI.IsVarArg;
01403 
01404   MachineFunction &MF = DAG.getMachineFunction();
01405   bool isStructRet    = (Outs.empty()) ? false : Outs[0].Flags.isSRet();
01406   bool isThisReturn   = false;
01407   bool isSibCall      = false;
01408 
01409   // Disable tail calls if they're not supported.
01410   if (!Subtarget->supportsTailCall() || MF.getTarget().Options.DisableTailCalls)
01411     isTailCall = false;
01412 
01413   if (isTailCall) {
01414     // Check if it's really possible to do a tail call.
01415     isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv,
01416                     isVarArg, isStructRet, MF.getFunction()->hasStructRetAttr(),
01417                                                    Outs, OutVals, Ins, DAG);
01418     if (!isTailCall && CLI.CS && CLI.CS->isMustTailCall())
01419       report_fatal_error("failed to perform tail call elimination on a call "
01420                          "site marked musttail");
01421     // We don't support GuaranteedTailCallOpt for ARM, only automatically
01422     // detected sibcalls.
01423     if (isTailCall) {
01424       ++NumTailCalls;
01425       isSibCall = true;
01426     }
01427   }
01428 
01429   // Analyze operands of the call, assigning locations to each operand.
01430   SmallVector<CCValAssign, 16> ArgLocs;
01431   ARMCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
01432                  getTargetMachine(), ArgLocs, *DAG.getContext(), Call);
01433   CCInfo.AnalyzeCallOperands(Outs,
01434                              CCAssignFnForNode(CallConv, /* Return*/ false,
01435                                                isVarArg));
01436 
01437   // Get a count of how many bytes are to be pushed on the stack.
01438   unsigned NumBytes = CCInfo.getNextStackOffset();
01439 
01440   // For tail calls, memory operands are available in our caller's stack.
01441   if (isSibCall)
01442     NumBytes = 0;
01443 
01444   // Adjust the stack pointer for the new arguments...
01445   // These operations are automatically eliminated by the prolog/epilog pass
01446   if (!isSibCall)
01447     Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true),
01448                                  dl);
01449 
01450   SDValue StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy());
01451 
01452   RegsToPassVector RegsToPass;
01453   SmallVector<SDValue, 8> MemOpChains;
01454 
01455   // Walk the register/memloc assignments, inserting copies/loads.  In the case
01456   // of tail call optimization, arguments are handled later.
01457   for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size();
01458        i != e;
01459        ++i, ++realArgIdx) {
01460     CCValAssign &VA = ArgLocs[i];
01461     SDValue Arg = OutVals[realArgIdx];
01462     ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags;
01463     bool isByVal = Flags.isByVal();
01464 
01465     // Promote the value if needed.
01466     switch (VA.getLocInfo()) {
01467     default: llvm_unreachable("Unknown loc info!");
01468     case CCValAssign::Full: break;
01469     case CCValAssign::SExt:
01470       Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
01471       break;
01472     case CCValAssign::ZExt:
01473       Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
01474       break;
01475     case CCValAssign::AExt:
01476       Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
01477       break;
01478     case CCValAssign::BCvt:
01479       Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg);
01480       break;
01481     }
01482 
01483     // f64 and v2f64 might be passed in i32 pairs and must be split into pieces
01484     if (VA.needsCustom()) {
01485       if (VA.getLocVT() == MVT::v2f64) {
01486         SDValue Op0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
01487                                   DAG.getConstant(0, MVT::i32));
01488         SDValue Op1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
01489                                   DAG.getConstant(1, MVT::i32));
01490 
01491         PassF64ArgInRegs(dl, DAG, Chain, Op0, RegsToPass,
01492                          VA, ArgLocs[++i], StackPtr, MemOpChains, Flags);
01493 
01494         VA = ArgLocs[++i]; // skip ahead to next loc
01495         if (VA.isRegLoc()) {
01496           PassF64ArgInRegs(dl, DAG, Chain, Op1, RegsToPass,
01497                            VA, ArgLocs[++i], StackPtr, MemOpChains, Flags);
01498         } else {
01499           assert(VA.isMemLoc());
01500 
01501           MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Op1,
01502                                                  dl, DAG, VA, Flags));
01503         }
01504       } else {
01505         PassF64ArgInRegs(dl, DAG, Chain, Arg, RegsToPass, VA, ArgLocs[++i],
01506                          StackPtr, MemOpChains, Flags);
01507       }
01508     } else if (VA.isRegLoc()) {
01509       if (realArgIdx == 0 && Flags.isReturned() && Outs[0].VT == MVT::i32) {
01510         assert(VA.getLocVT() == MVT::i32 &&
01511                "unexpected calling convention register assignment");
01512         assert(!Ins.empty() && Ins[0].VT == MVT::i32 &&
01513                "unexpected use of 'returned'");
01514         isThisReturn = true;
01515       }
01516       RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
01517     } else if (isByVal) {
01518       assert(VA.isMemLoc());
01519       unsigned offset = 0;
01520 
01521       // True if this byval aggregate will be split between registers
01522       // and memory.
01523       unsigned ByValArgsCount = CCInfo.getInRegsParamsCount();
01524       unsigned CurByValIdx = CCInfo.getInRegsParamsProceed();
01525 
01526       if (CurByValIdx < ByValArgsCount) {
01527 
01528         unsigned RegBegin, RegEnd;
01529         CCInfo.getInRegsParamInfo(CurByValIdx, RegBegin, RegEnd);
01530 
01531         EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
01532         unsigned int i, j;
01533         for (i = 0, j = RegBegin; j < RegEnd; i++, j++) {
01534           SDValue Const = DAG.getConstant(4*i, MVT::i32);
01535           SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);
01536           SDValue Load = DAG.getLoad(PtrVT, dl, Chain, AddArg,
01537                                      MachinePointerInfo(),
01538                                      false, false, false,
01539                                      DAG.InferPtrAlignment(AddArg));
01540           MemOpChains.push_back(Load.getValue(1));
01541           RegsToPass.push_back(std::make_pair(j, Load));
01542         }
01543 
01544         // If parameter size outsides register area, "offset" value
01545         // helps us to calculate stack slot for remained part properly.
01546         offset = RegEnd - RegBegin;
01547 
01548         CCInfo.nextInRegsParam();
01549       }
01550 
01551       if (Flags.getByValSize() > 4*offset) {
01552         unsigned LocMemOffset = VA.getLocMemOffset();
01553         SDValue StkPtrOff = DAG.getIntPtrConstant(LocMemOffset);
01554         SDValue Dst = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr,
01555                                   StkPtrOff);
01556         SDValue SrcOffset = DAG.getIntPtrConstant(4*offset);
01557         SDValue Src = DAG.getNode(ISD::ADD, dl, getPointerTy(), Arg, SrcOffset);
01558         SDValue SizeNode = DAG.getConstant(Flags.getByValSize() - 4*offset,
01559                                            MVT::i32);
01560         SDValue AlignNode = DAG.getConstant(Flags.getByValAlign(), MVT::i32);
01561 
01562         SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue);
01563         SDValue Ops[] = { Chain, Dst, Src, SizeNode, AlignNode};
01564         MemOpChains.push_back(DAG.getNode(ARMISD::COPY_STRUCT_BYVAL, dl, VTs,
01565                                           Ops));
01566       }
01567     } else if (!isSibCall) {
01568       assert(VA.isMemLoc());
01569 
01570       MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Arg,
01571                                              dl, DAG, VA, Flags));
01572     }
01573   }
01574 
01575   if (!MemOpChains.empty())
01576     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
01577 
01578   // Build a sequence of copy-to-reg nodes chained together with token chain
01579   // and flag operands which copy the outgoing args into the appropriate regs.
01580   SDValue InFlag;
01581   // Tail call byval lowering might overwrite argument registers so in case of
01582   // tail call optimization the copies to registers are lowered later.
01583   if (!isTailCall)
01584     for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
01585       Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
01586                                RegsToPass[i].second, InFlag);
01587       InFlag = Chain.getValue(1);
01588     }
01589 
01590   // For tail calls lower the arguments to the 'real' stack slot.
01591   if (isTailCall) {
01592     // Force all the incoming stack arguments to be loaded from the stack
01593     // before any new outgoing arguments are stored to the stack, because the
01594     // outgoing stack slots may alias the incoming argument stack slots, and
01595     // the alias isn't otherwise explicit. This is slightly more conservative
01596     // than necessary, because it means that each store effectively depends
01597     // on every argument instead of just those arguments it would clobber.
01598 
01599     // Do not flag preceding copytoreg stuff together with the following stuff.
01600     InFlag = SDValue();
01601     for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
01602       Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
01603                                RegsToPass[i].second, InFlag);
01604       InFlag = Chain.getValue(1);
01605     }
01606     InFlag = SDValue();
01607   }
01608 
01609   // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
01610   // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
01611   // node so that legalize doesn't hack it.
01612   bool isDirect = false;
01613   bool isARMFunc = false;
01614   bool isLocalARMFunc = false;
01615   ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
01616 
01617   if (EnableARMLongCalls) {
01618     assert((Subtarget->isTargetWindows() ||
01619             getTargetMachine().getRelocationModel() == Reloc::Static) &&
01620            "long-calls with non-static relocation model!");
01621     // Handle a global address or an external symbol. If it's not one of
01622     // those, the target's already in a register, so we don't need to do
01623     // anything extra.
01624     if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
01625       const GlobalValue *GV = G->getGlobal();
01626       // Create a constant pool entry for the callee address
01627       unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
01628       ARMConstantPoolValue *CPV =
01629         ARMConstantPoolConstant::Create(GV, ARMPCLabelIndex, ARMCP::CPValue, 0);
01630 
01631       // Get the address of the callee into a register
01632       SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4);
01633       CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
01634       Callee = DAG.getLoad(getPointerTy(), dl,
01635                            DAG.getEntryNode(), CPAddr,
01636                            MachinePointerInfo::getConstantPool(),
01637                            false, false, false, 0);
01638     } else if (ExternalSymbolSDNode *S=dyn_cast<ExternalSymbolSDNode>(Callee)) {
01639       const char *Sym = S->getSymbol();
01640 
01641       // Create a constant pool entry for the callee address
01642       unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
01643       ARMConstantPoolValue *CPV =
01644         ARMConstantPoolSymbol::Create(*DAG.getContext(), Sym,
01645                                       ARMPCLabelIndex, 0);
01646       // Get the address of the callee into a register
01647       SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4);
01648       CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
01649       Callee = DAG.getLoad(getPointerTy(), dl,
01650                            DAG.getEntryNode(), CPAddr,
01651                            MachinePointerInfo::getConstantPool(),
01652                            false, false, false, 0);
01653     }
01654   } else if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
01655     const GlobalValue *GV = G->getGlobal();
01656     isDirect = true;
01657     bool isExt = GV->isDeclaration() || GV->isWeakForLinker();
01658     bool isStub = (isExt && Subtarget->isTargetMachO()) &&
01659                    getTargetMachine().getRelocationModel() != Reloc::Static;
01660     isARMFunc = !Subtarget->isThumb() || isStub;
01661     // ARM call to a local ARM function is predicable.
01662     isLocalARMFunc = !Subtarget->isThumb() && (!isExt || !ARMInterworking);
01663     // tBX takes a register source operand.
01664     if (isStub && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) {
01665       assert(Subtarget->isTargetMachO() && "WrapperPIC use on non-MachO?");
01666       Callee = DAG.getNode(ARMISD::WrapperPIC, dl, getPointerTy(),
01667                            DAG.getTargetGlobalAddress(GV, dl, getPointerTy()));
01668     } else if (Subtarget->isTargetCOFF()) {
01669       assert(Subtarget->isTargetWindows() &&
01670              "Windows is the only supported COFF target");
01671       unsigned TargetFlags = GV->hasDLLImportStorageClass()
01672                                  ? ARMII::MO_DLLIMPORT
01673                                  : ARMII::MO_NO_FLAG;
01674       Callee = DAG.getTargetGlobalAddress(GV, dl, getPointerTy(), /*Offset=*/0,
01675                                           TargetFlags);
01676       if (GV->hasDLLImportStorageClass())
01677         Callee = DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(),
01678                              DAG.getNode(ARMISD::Wrapper, dl, getPointerTy(),
01679                                          Callee), MachinePointerInfo::getGOT(),
01680                              false, false, false, 0);
01681     } else {
01682       // On ELF targets for PIC code, direct calls should go through the PLT
01683       unsigned OpFlags = 0;
01684       if (Subtarget->isTargetELF() &&
01685           getTargetMachine().getRelocationModel() == Reloc::PIC_)
01686         OpFlags = ARMII::MO_PLT;
01687       Callee = DAG.getTargetGlobalAddress(GV, dl, getPointerTy(), 0, OpFlags);
01688     }
01689   } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
01690     isDirect = true;
01691     bool isStub = Subtarget->isTargetMachO() &&
01692                   getTargetMachine().getRelocationModel() != Reloc::Static;
01693     isARMFunc = !Subtarget->isThumb() || isStub;
01694     // tBX takes a register source operand.
01695     const char *Sym = S->getSymbol();
01696     if (isARMFunc && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) {
01697       unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
01698       ARMConstantPoolValue *CPV =
01699         ARMConstantPoolSymbol::Create(*DAG.getContext(), Sym,
01700                                       ARMPCLabelIndex, 4);
01701       SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4);
01702       CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
01703       Callee = DAG.getLoad(getPointerTy(), dl,
01704                            DAG.getEntryNode(), CPAddr,
01705                            MachinePointerInfo::getConstantPool(),
01706                            false, false, false, 0);
01707       SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
01708       Callee = DAG.getNode(ARMISD::PIC_ADD, dl,
01709                            getPointerTy(), Callee, PICLabel);
01710     } else {
01711       unsigned OpFlags = 0;
01712       // On ELF targets for PIC code, direct calls should go through the PLT
01713       if (Subtarget->isTargetELF() &&
01714                   getTargetMachine().getRelocationModel() == Reloc::PIC_)
01715         OpFlags = ARMII::MO_PLT;
01716       Callee = DAG.getTargetExternalSymbol(Sym, getPointerTy(), OpFlags);
01717     }
01718   }
01719 
01720   // FIXME: handle tail calls differently.
01721   unsigned CallOpc;
01722   bool HasMinSizeAttr = MF.getFunction()->getAttributes().hasAttribute(
01723       AttributeSet::FunctionIndex, Attribute::MinSize);
01724   if (Subtarget->isThumb()) {
01725     if ((!isDirect || isARMFunc) && !Subtarget->hasV5TOps())
01726       CallOpc = ARMISD::CALL_NOLINK;
01727     else
01728       CallOpc = isARMFunc ? ARMISD::CALL : ARMISD::tCALL;
01729   } else {
01730     if (!isDirect && !Subtarget->hasV5TOps())
01731       CallOpc = ARMISD::CALL_NOLINK;
01732     else if (doesNotRet && isDirect && Subtarget->hasRAS() &&
01733                // Emit regular call when code size is the priority
01734                !HasMinSizeAttr)
01735       // "mov lr, pc; b _foo" to avoid confusing the RSP
01736       CallOpc = ARMISD::CALL_NOLINK;
01737     else
01738       CallOpc = isLocalARMFunc ? ARMISD::CALL_PRED : ARMISD::CALL;
01739   }
01740 
01741   std::vector<SDValue> Ops;
01742   Ops.push_back(Chain);
01743   Ops.push_back(Callee);
01744 
01745   // Add argument registers to the end of the list so that they are known live
01746   // into the call.
01747   for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
01748     Ops.push_back(DAG.getRegister(RegsToPass[i].first,
01749                                   RegsToPass[i].second.getValueType()));
01750 
01751   // Add a register mask operand representing the call-preserved registers.
01752   if (!isTailCall) {
01753     const uint32_t *Mask;
01754     const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo();
01755     const ARMBaseRegisterInfo *ARI = static_cast<const ARMBaseRegisterInfo*>(TRI);
01756     if (isThisReturn) {
01757       // For 'this' returns, use the R0-preserving mask if applicable
01758       Mask = ARI->getThisReturnPreservedMask(CallConv);
01759       if (!Mask) {
01760         // Set isThisReturn to false if the calling convention is not one that
01761         // allows 'returned' to be modeled in this way, so LowerCallResult does
01762         // not try to pass 'this' straight through
01763         isThisReturn = false;
01764         Mask = ARI->getCallPreservedMask(CallConv);
01765       }
01766     } else
01767       Mask = ARI->getCallPreservedMask(CallConv);
01768 
01769     assert(Mask && "Missing call preserved mask for calling convention");
01770     Ops.push_back(DAG.getRegisterMask(Mask));
01771   }
01772 
01773   if (InFlag.getNode())
01774     Ops.push_back(InFlag);
01775 
01776   SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
01777   if (isTailCall)
01778     return DAG.getNode(ARMISD::TC_RETURN, dl, NodeTys, Ops);
01779 
01780   // Returns a chain and a flag for retval copy to use.
01781   Chain = DAG.getNode(CallOpc, dl, NodeTys, Ops);
01782   InFlag = Chain.getValue(1);
01783 
01784   Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true),
01785                              DAG.getIntPtrConstant(0, true), InFlag, dl);
01786   if (!Ins.empty())
01787     InFlag = Chain.getValue(1);
01788 
01789   // Handle result values, copying them out of physregs into vregs that we
01790   // return.
01791   return LowerCallResult(Chain, InFlag, CallConv, isVarArg, Ins, dl, DAG,
01792                          InVals, isThisReturn,
01793                          isThisReturn ? OutVals[0] : SDValue());
01794 }
01795 
01796 /// HandleByVal - Every parameter *after* a byval parameter is passed
01797 /// on the stack.  Remember the next parameter register to allocate,
01798 /// and then confiscate the rest of the parameter registers to insure
01799 /// this.
01800 void
01801 ARMTargetLowering::HandleByVal(
01802     CCState *State, unsigned &size, unsigned Align) const {
01803   unsigned reg = State->AllocateReg(GPRArgRegs, 4);
01804   assert((State->getCallOrPrologue() == Prologue ||
01805           State->getCallOrPrologue() == Call) &&
01806          "unhandled ParmContext");
01807 
01808   if ((ARM::R0 <= reg) && (reg <= ARM::R3)) {
01809     if (Subtarget->isAAPCS_ABI() && Align > 4) {
01810       unsigned AlignInRegs = Align / 4;
01811       unsigned Waste = (ARM::R4 - reg) % AlignInRegs;
01812       for (unsigned i = 0; i < Waste; ++i)
01813         reg = State->AllocateReg(GPRArgRegs, 4);
01814     }
01815     if (reg != 0) {
01816       unsigned excess = 4 * (ARM::R4 - reg);
01817 
01818       // Special case when NSAA != SP and parameter size greater than size of
01819       // all remained GPR regs. In that case we can't split parameter, we must
01820       // send it to stack. We also must set NCRN to R4, so waste all
01821       // remained registers.
01822       const unsigned NSAAOffset = State->getNextStackOffset();
01823       if (Subtarget->isAAPCS_ABI() && NSAAOffset != 0 && size > excess) {
01824         while (State->AllocateReg(GPRArgRegs, 4))
01825           ;
01826         return;
01827       }
01828 
01829       // First register for byval parameter is the first register that wasn't
01830       // allocated before this method call, so it would be "reg".
01831       // If parameter is small enough to be saved in range [reg, r4), then
01832       // the end (first after last) register would be reg + param-size-in-regs,
01833       // else parameter would be splitted between registers and stack,
01834       // end register would be r4 in this case.
01835       unsigned ByValRegBegin = reg;
01836       unsigned ByValRegEnd = (size < excess) ? reg + size/4 : (unsigned)ARM::R4;
01837       State->addInRegsParamInfo(ByValRegBegin, ByValRegEnd);
01838       // Note, first register is allocated in the beginning of function already,
01839       // allocate remained amount of registers we need.
01840       for (unsigned i = reg+1; i != ByValRegEnd; ++i)
01841         State->AllocateReg(GPRArgRegs, 4);
01842       // A byval parameter that is split between registers and memory needs its
01843       // size truncated here.
01844       // In the case where the entire structure fits in registers, we set the
01845       // size in memory to zero.
01846       if (size < excess)
01847         size = 0;
01848       else
01849         size -= excess;
01850     }
01851   }
01852 }
01853 
01854 /// MatchingStackOffset - Return true if the given stack call argument is
01855 /// already available in the same position (relatively) of the caller's
01856 /// incoming argument stack.
01857 static
01858 bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags,
01859                          MachineFrameInfo *MFI, const MachineRegisterInfo *MRI,
01860                          const TargetInstrInfo *TII) {
01861   unsigned Bytes = Arg.getValueType().getSizeInBits() / 8;
01862   int FI = INT_MAX;
01863   if (Arg.getOpcode() == ISD::CopyFromReg) {
01864     unsigned VR = cast<RegisterSDNode>(Arg.getOperand(1))->getReg();
01865     if (!TargetRegisterInfo::isVirtualRegister(VR))
01866       return false;
01867     MachineInstr *Def = MRI->getVRegDef(VR);
01868     if (!Def)
01869       return false;
01870     if (!Flags.isByVal()) {
01871       if (!TII->isLoadFromStackSlot(Def, FI))
01872         return false;
01873     } else {
01874       return false;
01875     }
01876   } else if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Arg)) {
01877     if (Flags.isByVal())
01878       // ByVal argument is passed in as a pointer but it's now being
01879       // dereferenced. e.g.
01880       // define @foo(%struct.X* %A) {
01881       //   tail call @bar(%struct.X* byval %A)
01882       // }
01883       return false;
01884     SDValue Ptr = Ld->getBasePtr();
01885     FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(Ptr);
01886     if (!FINode)
01887       return false;
01888     FI = FINode->getIndex();
01889   } else
01890     return false;
01891 
01892   assert(FI != INT_MAX);
01893   if (!MFI->isFixedObjectIndex(FI))
01894     return false;
01895   return Offset == MFI->getObjectOffset(FI) && Bytes == MFI->getObjectSize(FI);
01896 }
01897 
01898 /// IsEligibleForTailCallOptimization - Check whether the call is eligible
01899 /// for tail call optimization. Targets which want to do tail call
01900 /// optimization should implement this function.
01901 bool
01902 ARMTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
01903                                                      CallingConv::ID CalleeCC,
01904                                                      bool isVarArg,
01905                                                      bool isCalleeStructRet,
01906                                                      bool isCallerStructRet,
01907                                     const SmallVectorImpl<ISD::OutputArg> &Outs,
01908                                     const SmallVectorImpl<SDValue> &OutVals,
01909                                     const SmallVectorImpl<ISD::InputArg> &Ins,
01910                                                      SelectionDAG& DAG) const {
01911   const Function *CallerF = DAG.getMachineFunction().getFunction();
01912   CallingConv::ID CallerCC = CallerF->getCallingConv();
01913   bool CCMatch = CallerCC == CalleeCC;
01914 
01915   // Look for obvious safe cases to perform tail call optimization that do not
01916   // require ABI changes. This is what gcc calls sibcall.
01917 
01918   // Do not sibcall optimize vararg calls unless the call site is not passing
01919   // any arguments.
01920   if (isVarArg && !Outs.empty())
01921     return false;
01922 
01923   // Exception-handling functions need a special set of instructions to indicate
01924   // a return to the hardware. Tail-calling another function would probably
01925   // break this.
01926   if (CallerF->hasFnAttribute("interrupt"))
01927     return false;
01928 
01929   // Also avoid sibcall optimization if either caller or callee uses struct
01930   // return semantics.
01931   if (isCalleeStructRet || isCallerStructRet)
01932     return false;
01933 
01934   // FIXME: Completely disable sibcall for Thumb1 since Thumb1RegisterInfo::
01935   // emitEpilogue is not ready for them. Thumb tail calls also use t2B, as
01936   // the Thumb1 16-bit unconditional branch doesn't have sufficient relocation
01937   // support in the assembler and linker to be used. This would need to be
01938   // fixed to fully support tail calls in Thumb1.
01939   //
01940   // Doing this is tricky, since the LDM/POP instruction on Thumb doesn't take
01941   // LR.  This means if we need to reload LR, it takes an extra instructions,
01942   // which outweighs the value of the tail call; but here we don't know yet
01943   // whether LR is going to be used.  Probably the right approach is to
01944   // generate the tail call here and turn it back into CALL/RET in
01945   // emitEpilogue if LR is used.
01946 
01947   // Thumb1 PIC calls to external symbols use BX, so they can be tail calls,
01948   // but we need to make sure there are enough registers; the only valid
01949   // registers are the 4 used for parameters.  We don't currently do this
01950   // case.
01951   if (Subtarget->isThumb1Only())
01952     return false;
01953 
01954   // If the calling conventions do not match, then we'd better make sure the
01955   // results are returned in the same way as what the caller expects.
01956   if (!CCMatch) {
01957     SmallVector<CCValAssign, 16> RVLocs1;
01958     ARMCCState CCInfo1(CalleeCC, false, DAG.getMachineFunction(),
01959                        getTargetMachine(), RVLocs1, *DAG.getContext(), Call);
01960     CCInfo1.AnalyzeCallResult(Ins, CCAssignFnForNode(CalleeCC, true, isVarArg));
01961 
01962     SmallVector<CCValAssign, 16> RVLocs2;
01963     ARMCCState CCInfo2(CallerCC, false, DAG.getMachineFunction(),
01964                        getTargetMachine(), RVLocs2, *DAG.getContext(), Call);
01965     CCInfo2.AnalyzeCallResult(Ins, CCAssignFnForNode(CallerCC, true, isVarArg));
01966 
01967     if (RVLocs1.size() != RVLocs2.size())
01968       return false;
01969     for (unsigned i = 0, e = RVLocs1.size(); i != e; ++i) {
01970       if (RVLocs1[i].isRegLoc() != RVLocs2[i].isRegLoc())
01971         return false;
01972       if (RVLocs1[i].getLocInfo() != RVLocs2[i].getLocInfo())
01973         return false;
01974       if (RVLocs1[i].isRegLoc()) {
01975         if (RVLocs1[i].getLocReg() != RVLocs2[i].getLocReg())
01976           return false;
01977       } else {
01978         if (RVLocs1[i].getLocMemOffset() != RVLocs2[i].getLocMemOffset())
01979           return false;
01980       }
01981     }
01982   }
01983 
01984   // If Caller's vararg or byval argument has been split between registers and
01985   // stack, do not perform tail call, since part of the argument is in caller's
01986   // local frame.
01987   const ARMFunctionInfo *AFI_Caller = DAG.getMachineFunction().
01988                                       getInfo<ARMFunctionInfo>();
01989   if (AFI_Caller->getArgRegsSaveSize())
01990     return false;
01991 
01992   // If the callee takes no arguments then go on to check the results of the
01993   // call.
01994   if (!Outs.empty()) {
01995     // Check if stack adjustment is needed. For now, do not do this if any
01996     // argument is passed on the stack.
01997     SmallVector<CCValAssign, 16> ArgLocs;
01998     ARMCCState CCInfo(CalleeCC, isVarArg, DAG.getMachineFunction(),
01999                       getTargetMachine(), ArgLocs, *DAG.getContext(), Call);
02000     CCInfo.AnalyzeCallOperands(Outs,
02001                                CCAssignFnForNode(CalleeCC, false, isVarArg));
02002     if (CCInfo.getNextStackOffset()) {
02003       MachineFunction &MF = DAG.getMachineFunction();
02004 
02005       // Check if the arguments are already laid out in the right way as
02006       // the caller's fixed stack objects.
02007       MachineFrameInfo *MFI = MF.getFrameInfo();
02008       const MachineRegisterInfo *MRI = &MF.getRegInfo();
02009       const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
02010       for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size();
02011            i != e;
02012            ++i, ++realArgIdx) {
02013         CCValAssign &VA = ArgLocs[i];
02014         EVT RegVT = VA.getLocVT();
02015         SDValue Arg = OutVals[realArgIdx];
02016         ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags;
02017         if (VA.getLocInfo() == CCValAssign::Indirect)
02018           return false;
02019         if (VA.needsCustom()) {
02020           // f64 and vector types are split into multiple registers or
02021           // register/stack-slot combinations.  The types will not match
02022           // the registers; give up on memory f64 refs until we figure
02023           // out what to do about this.
02024           if (!VA.isRegLoc())
02025             return false;
02026           if (!ArgLocs[++i].isRegLoc())
02027             return false;
02028           if (RegVT == MVT::v2f64) {
02029             if (!ArgLocs[++i].isRegLoc())
02030               return false;
02031             if (!ArgLocs[++i].isRegLoc())
02032               return false;
02033           }
02034         } else if (!VA.isRegLoc()) {
02035           if (!MatchingStackOffset(Arg, VA.getLocMemOffset(), Flags,
02036                                    MFI, MRI, TII))
02037             return false;
02038         }
02039       }
02040     }
02041   }
02042 
02043   return true;
02044 }
02045 
02046 bool
02047 ARMTargetLowering::CanLowerReturn(CallingConv::ID CallConv,
02048                                   MachineFunction &MF, bool isVarArg,
02049                                   const SmallVectorImpl<ISD::OutputArg> &Outs,
02050                                   LLVMContext &Context) const {
02051   SmallVector<CCValAssign, 16> RVLocs;
02052   CCState CCInfo(CallConv, isVarArg, MF, getTargetMachine(), RVLocs, Context);
02053   return CCInfo.CheckReturn(Outs, CCAssignFnForNode(CallConv, /*Return=*/true,
02054                                                     isVarArg));
02055 }
02056 
02057 static SDValue LowerInterruptReturn(SmallVectorImpl<SDValue> &RetOps,
02058                                     SDLoc DL, SelectionDAG &DAG) {
02059   const MachineFunction &MF = DAG.getMachineFunction();
02060   const Function *F = MF.getFunction();
02061 
02062   StringRef IntKind = F->getFnAttribute("interrupt").getValueAsString();
02063 
02064   // See ARM ARM v7 B1.8.3. On exception entry LR is set to a possibly offset
02065   // version of the "preferred return address". These offsets affect the return
02066   // instruction if this is a return from PL1 without hypervisor extensions.
02067   //    IRQ/FIQ: +4     "subs pc, lr, #4"
02068   //    SWI:     0      "subs pc, lr, #0"
02069   //    ABORT:   +4     "subs pc, lr, #4"
02070   //    UNDEF:   +4/+2  "subs pc, lr, #0"
02071   // UNDEF varies depending on where the exception came from ARM or Thumb
02072   // mode. Alongside GCC, we throw our hands up in disgust and pretend it's 0.
02073 
02074   int64_t LROffset;
02075   if (IntKind == "" || IntKind == "IRQ" || IntKind == "FIQ" ||
02076       IntKind == "ABORT")
02077     LROffset = 4;
02078   else if (IntKind == "SWI" || IntKind == "UNDEF")
02079     LROffset = 0;
02080   else
02081     report_fatal_error("Unsupported interrupt attribute. If present, value "
02082                        "must be one of: IRQ, FIQ, SWI, ABORT or UNDEF");
02083 
02084   RetOps.insert(RetOps.begin() + 1, DAG.getConstant(LROffset, MVT::i32, false));
02085 
02086   return DAG.getNode(ARMISD::INTRET_FLAG, DL, MVT::Other, RetOps);
02087 }
02088 
02089 SDValue
02090 ARMTargetLowering::LowerReturn(SDValue Chain,
02091                                CallingConv::ID CallConv, bool isVarArg,
02092                                const SmallVectorImpl<ISD::OutputArg> &Outs,
02093                                const SmallVectorImpl<SDValue> &OutVals,
02094                                SDLoc dl, SelectionDAG &DAG) const {
02095 
02096   // CCValAssign - represent the assignment of the return value to a location.
02097   SmallVector<CCValAssign, 16> RVLocs;
02098 
02099   // CCState - Info about the registers and stack slots.
02100   ARMCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
02101                     getTargetMachine(), RVLocs, *DAG.getContext(), Call);
02102 
02103   // Analyze outgoing return values.
02104   CCInfo.AnalyzeReturn(Outs, CCAssignFnForNode(CallConv, /* Return */ true,
02105                                                isVarArg));
02106 
02107   SDValue Flag;
02108   SmallVector<SDValue, 4> RetOps;
02109   RetOps.push_back(Chain); // Operand #0 = Chain (updated below)
02110   bool isLittleEndian = Subtarget->isLittle();
02111 
02112   // Copy the result values into the output registers.
02113   for (unsigned i = 0, realRVLocIdx = 0;
02114        i != RVLocs.size();
02115        ++i, ++realRVLocIdx) {
02116     CCValAssign &VA = RVLocs[i];
02117     assert(VA.isRegLoc() && "Can only return in registers!");
02118 
02119     SDValue Arg = OutVals[realRVLocIdx];
02120 
02121     switch (VA.getLocInfo()) {
02122     default: llvm_unreachable("Unknown loc info!");
02123     case CCValAssign::Full: break;
02124     case CCValAssign::BCvt:
02125       Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg);
02126       break;
02127     }
02128 
02129     if (VA.needsCustom()) {
02130       if (VA.getLocVT() == MVT::v2f64) {
02131         // Extract the first half and return it in two registers.
02132         SDValue Half = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
02133                                    DAG.getConstant(0, MVT::i32));
02134         SDValue HalfGPRs = DAG.getNode(ARMISD::VMOVRRD, dl,
02135                                        DAG.getVTList(MVT::i32, MVT::i32), Half);
02136 
02137         Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
02138                                  HalfGPRs.getValue(isLittleEndian ? 0 : 1),
02139                                  Flag);
02140         Flag = Chain.getValue(1);
02141         RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
02142         VA = RVLocs[++i]; // skip ahead to next loc
02143         Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
02144                                  HalfGPRs.getValue(isLittleEndian ? 1 : 0),
02145                                  Flag);
02146         Flag = Chain.getValue(1);
02147         RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
02148         VA = RVLocs[++i]; // skip ahead to next loc
02149 
02150         // Extract the 2nd half and fall through to handle it as an f64 value.
02151         Arg = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
02152                           DAG.getConstant(1, MVT::i32));
02153       }
02154       // Legalize ret f64 -> ret 2 x i32.  We always have fmrrd if f64 is
02155       // available.
02156       SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl,
02157                                   DAG.getVTList(MVT::i32, MVT::i32), Arg);
02158       Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
02159                                fmrrd.getValue(isLittleEndian ? 0 : 1),
02160                                Flag);
02161       Flag = Chain.getValue(1);
02162       RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
02163       VA = RVLocs[++i]; // skip ahead to next loc
02164       Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
02165                                fmrrd.getValue(isLittleEndian ? 1 : 0),
02166                                Flag);
02167     } else
02168       Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Flag);
02169 
02170     // Guarantee that all emitted copies are
02171     // stuck together, avoiding something bad.
02172     Flag = Chain.getValue(1);
02173     RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
02174   }
02175 
02176   // Update chain and glue.
02177   RetOps[0] = Chain;
02178   if (Flag.getNode())
02179     RetOps.push_back(Flag);
02180 
02181   // CPUs which aren't M-class use a special sequence to return from
02182   // exceptions (roughly, any instruction setting pc and cpsr simultaneously,
02183   // though we use "subs pc, lr, #N").
02184   //
02185   // M-class CPUs actually use a normal return sequence with a special
02186   // (hardware-provided) value in LR, so the normal code path works.
02187   if (DAG.getMachineFunction().getFunction()->hasFnAttribute("interrupt") &&
02188       !Subtarget->isMClass()) {
02189     if (Subtarget->isThumb1Only())
02190       report_fatal_error("interrupt attribute is not supported in Thumb1");
02191     return LowerInterruptReturn(RetOps, dl, DAG);
02192   }
02193 
02194   return DAG.getNode(ARMISD::RET_FLAG, dl, MVT::Other, RetOps);
02195 }
02196 
02197 bool ARMTargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const {
02198   if (N->getNumValues() != 1)
02199     return false;
02200   if (!N->hasNUsesOfValue(1, 0))
02201     return false;
02202 
02203   SDValue TCChain = Chain;
02204   SDNode *Copy = *N->use_begin();
02205   if (Copy->getOpcode() == ISD::CopyToReg) {
02206     // If the copy has a glue operand, we conservatively assume it isn't safe to
02207     // perform a tail call.
02208     if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue)
02209       return false;
02210     TCChain = Copy->getOperand(0);
02211   } else if (Copy->getOpcode() == ARMISD::VMOVRRD) {
02212     SDNode *VMov = Copy;
02213     // f64 returned in a pair of GPRs.
02214     SmallPtrSet<SDNode*, 2> Copies;
02215     for (SDNode::use_iterator UI = VMov->use_begin(), UE = VMov->use_end();
02216          UI != UE; ++UI) {
02217       if (UI->getOpcode() != ISD::CopyToReg)
02218         return false;
02219       Copies.insert(*UI);
02220     }
02221     if (Copies.size() > 2)
02222       return false;
02223 
02224     for (SDNode::use_iterator UI = VMov->use_begin(), UE = VMov->use_end();
02225          UI != UE; ++UI) {
02226       SDValue UseChain = UI->getOperand(0);
02227       if (Copies.count(UseChain.getNode()))
02228         // Second CopyToReg
02229         Copy = *UI;
02230       else
02231         // First CopyToReg
02232         TCChain = UseChain;
02233     }
02234   } else if (Copy->getOpcode() == ISD::BITCAST) {
02235     // f32 returned in a single GPR.
02236     if (!Copy->hasOneUse())
02237       return false;
02238     Copy = *Copy->use_begin();
02239     if (Copy->getOpcode() != ISD::CopyToReg || !Copy->hasNUsesOfValue(1, 0))
02240       return false;
02241     TCChain = Copy->getOperand(0);
02242   } else {
02243     return false;
02244   }
02245 
02246   bool HasRet = false;
02247   for (SDNode::use_iterator UI = Copy->use_begin(), UE = Copy->use_end();
02248        UI != UE; ++UI) {
02249     if (UI->getOpcode() != ARMISD::RET_FLAG &&
02250         UI->getOpcode() != ARMISD::INTRET_FLAG)
02251       return false;
02252     HasRet = true;
02253   }
02254 
02255   if (!HasRet)
02256     return false;
02257 
02258   Chain = TCChain;
02259   return true;
02260 }
02261 
02262 bool ARMTargetLowering::mayBeEmittedAsTailCall(CallInst *CI) const {
02263   if (!Subtarget->supportsTailCall())
02264     return false;
02265 
02266   if (!CI->isTailCall() || getTargetMachine().Options.DisableTailCalls)
02267     return false;
02268 
02269   return !Subtarget->isThumb1Only();
02270 }
02271 
02272 // ConstantPool, JumpTable, GlobalAddress, and ExternalSymbol are lowered as
02273 // their target counterpart wrapped in the ARMISD::Wrapper node. Suppose N is
02274 // one of the above mentioned nodes. It has to be wrapped because otherwise
02275 // Select(N) returns N. So the raw TargetGlobalAddress nodes, etc. can only
02276 // be used to form addressing mode. These wrapped nodes will be selected
02277 // into MOVi.
02278 static SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) {
02279   EVT PtrVT = Op.getValueType();
02280   // FIXME there is no actual debug info here
02281   SDLoc dl(Op);
02282   ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
02283   SDValue Res;
02284   if (CP->isMachineConstantPoolEntry())
02285     Res = DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT,
02286                                     CP->getAlignment());
02287   else
02288     Res = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT,
02289                                     CP->getAlignment());
02290   return DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Res);
02291 }
02292 
02293 unsigned ARMTargetLowering::getJumpTableEncoding() const {
02294   return MachineJumpTableInfo::EK_Inline;
02295 }
02296 
02297 SDValue ARMTargetLowering::LowerBlockAddress(SDValue Op,
02298                                              SelectionDAG &DAG) const {
02299   MachineFunction &MF = DAG.getMachineFunction();
02300   ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
02301   unsigned ARMPCLabelIndex = 0;
02302   SDLoc DL(Op);
02303   EVT PtrVT = getPointerTy();
02304   const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
02305   Reloc::Model RelocM = getTargetMachine().getRelocationModel();
02306   SDValue CPAddr;
02307   if (RelocM == Reloc::Static) {
02308     CPAddr = DAG.getTargetConstantPool(BA, PtrVT, 4);
02309   } else {
02310     unsigned PCAdj = Subtarget->isThumb() ? 4 : 8;
02311     ARMPCLabelIndex = AFI->createPICLabelUId();
02312     ARMConstantPoolValue *CPV =
02313       ARMConstantPoolConstant::Create(BA, ARMPCLabelIndex,
02314                                       ARMCP::CPBlockAddress, PCAdj);
02315     CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
02316   }
02317   CPAddr = DAG.getNode(ARMISD::Wrapper, DL, PtrVT, CPAddr);
02318   SDValue Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), CPAddr,
02319                                MachinePointerInfo::getConstantPool(),
02320                                false, false, false, 0);
02321   if (RelocM == Reloc::Static)
02322     return Result;
02323   SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
02324   return DAG.getNode(ARMISD::PIC_ADD, DL, PtrVT, Result, PICLabel);
02325 }
02326 
02327 // Lower ISD::GlobalTLSAddress using the "general dynamic" model
02328 SDValue
02329 ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA,
02330                                                  SelectionDAG &DAG) const {
02331   SDLoc dl(GA);
02332   EVT PtrVT = getPointerTy();
02333   unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8;
02334   MachineFunction &MF = DAG.getMachineFunction();
02335   ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
02336   unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
02337   ARMConstantPoolValue *CPV =
02338     ARMConstantPoolConstant::Create(GA->getGlobal(), ARMPCLabelIndex,
02339                                     ARMCP::CPValue, PCAdj, ARMCP::TLSGD, true);
02340   SDValue Argument = DAG.getTargetConstantPool(CPV, PtrVT, 4);
02341   Argument = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Argument);
02342   Argument = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Argument,
02343                          MachinePointerInfo::getConstantPool(),
02344                          false, false, false, 0);
02345   SDValue Chain = Argument.getValue(1);
02346 
02347   SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
02348   Argument = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Argument, PICLabel);
02349 
02350   // call __tls_get_addr.
02351   ArgListTy Args;
02352   ArgListEntry Entry;
02353   Entry.Node = Argument;
02354   Entry.Ty = (Type *) Type::getInt32Ty(*DAG.getContext());
02355   Args.push_back(Entry);
02356 
02357   // FIXME: is there useful debug info available here?
02358   TargetLowering::CallLoweringInfo CLI(DAG);
02359   CLI.setDebugLoc(dl).setChain(Chain)
02360     .setCallee(CallingConv::C, Type::getInt32Ty(*DAG.getContext()),
02361                DAG.getExternalSymbol("__tls_get_addr", PtrVT), std::move(Args),
02362                0);
02363 
02364   std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
02365   return CallResult.first;
02366 }
02367 
02368 // Lower ISD::GlobalTLSAddress using the "initial exec" or
02369 // "local exec" model.
02370 SDValue
02371 ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA,
02372                                         SelectionDAG &DAG,
02373                                         TLSModel::Model model) const {
02374   const GlobalValue *GV = GA->getGlobal();
02375   SDLoc dl(GA);
02376   SDValue Offset;
02377   SDValue Chain = DAG.getEntryNode();
02378   EVT PtrVT = getPointerTy();
02379   // Get the Thread Pointer
02380   SDValue ThreadPointer = DAG.getNode(ARMISD::THREAD_POINTER, dl, PtrVT);
02381 
02382   if (model == TLSModel::InitialExec) {
02383     MachineFunction &MF = DAG.getMachineFunction();
02384     ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
02385     unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
02386     // Initial exec model.
02387     unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8;
02388     ARMConstantPoolValue *CPV =
02389       ARMConstantPoolConstant::Create(GA->getGlobal(), ARMPCLabelIndex,
02390                                       ARMCP::CPValue, PCAdj, ARMCP::GOTTPOFF,
02391                                       true);
02392     Offset = DAG.getTargetConstantPool(CPV, PtrVT, 4);
02393     Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset);
02394     Offset = DAG.getLoad(PtrVT, dl, Chain, Offset,
02395                          MachinePointerInfo::getConstantPool(),
02396                          false, false, false, 0);
02397     Chain = Offset.getValue(1);
02398 
02399     SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
02400     Offset = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Offset, PICLabel);
02401 
02402     Offset = DAG.getLoad(PtrVT, dl, Chain, Offset,
02403                          MachinePointerInfo::getConstantPool(),
02404                          false, false, false, 0);
02405   } else {
02406     // local exec model
02407     assert(model == TLSModel::LocalExec);
02408     ARMConstantPoolValue *CPV =
02409       ARMConstantPoolConstant::Create(GV, ARMCP::TPOFF);
02410     Offset = DAG.getTargetConstantPool(CPV, PtrVT, 4);
02411     Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset);
02412     Offset = DAG.getLoad(PtrVT, dl, Chain, Offset,
02413                          MachinePointerInfo::getConstantPool(),
02414                          false, false, false, 0);
02415   }
02416 
02417   // The address of the thread local variable is the add of the thread
02418   // pointer with the offset of the variable.
02419   return DAG.getNode(ISD::ADD, dl, PtrVT, ThreadPointer, Offset);
02420 }
02421 
02422 SDValue
02423 ARMTargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
02424   // TODO: implement the "local dynamic" model
02425   assert(Subtarget->isTargetELF() &&
02426          "TLS not implemented for non-ELF targets");
02427   GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
02428 
02429   TLSModel::Model model = getTargetMachine().getTLSModel(GA->getGlobal());
02430 
02431   switch (model) {
02432     case TLSModel::GeneralDynamic:
02433     case TLSModel::LocalDynamic:
02434       return LowerToTLSGeneralDynamicModel(GA, DAG);
02435     case TLSModel::InitialExec:
02436     case TLSModel::LocalExec:
02437       return LowerToTLSExecModels(GA, DAG, model);
02438   }
02439   llvm_unreachable("bogus TLS model");
02440 }
02441 
02442 SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op,
02443                                                  SelectionDAG &DAG) const {
02444   EVT PtrVT = getPointerTy();
02445   SDLoc dl(Op);
02446   const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
02447   if (getTargetMachine().getRelocationModel() == Reloc::PIC_) {
02448     bool UseGOTOFF = GV->hasLocalLinkage() || GV->hasHiddenVisibility();
02449     ARMConstantPoolValue *CPV =
02450       ARMConstantPoolConstant::Create(GV,
02451                                       UseGOTOFF ? ARMCP::GOTOFF : ARMCP::GOT);
02452     SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
02453     CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
02454     SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(),
02455                                  CPAddr,
02456                                  MachinePointerInfo::getConstantPool(),
02457                                  false, false, false, 0);
02458     SDValue Chain = Result.getValue(1);
02459     SDValue GOT = DAG.getGLOBAL_OFFSET_TABLE(PtrVT);
02460     Result = DAG.getNode(ISD::ADD, dl, PtrVT, Result, GOT);
02461     if (!UseGOTOFF)
02462       Result = DAG.getLoad(PtrVT, dl, Chain, Result,
02463                            MachinePointerInfo::getGOT(),
02464                            false, false, false, 0);
02465     return Result;
02466   }
02467 
02468   // If we have T2 ops, we can materialize the address directly via movt/movw
02469   // pair. This is always cheaper.
02470   if (Subtarget->useMovt(DAG.getMachineFunction())) {
02471     ++NumMovwMovt;
02472     // FIXME: Once remat is capable of dealing with instructions with register
02473     // operands, expand this into two nodes.
02474     return DAG.getNode(ARMISD::Wrapper, dl, PtrVT,
02475                        DAG.getTargetGlobalAddress(GV, dl, PtrVT));
02476   } else {
02477     SDValue CPAddr = DAG.getTargetConstantPool(GV, PtrVT, 4);
02478     CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
02479     return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr,
02480                        MachinePointerInfo::getConstantPool(),
02481                        false, false, false, 0);
02482   }
02483 }
02484 
02485 SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op,
02486                                                     SelectionDAG &DAG) const {
02487   EVT PtrVT = getPointerTy();
02488   SDLoc dl(Op);
02489   const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
02490   Reloc::Model RelocM = getTargetMachine().getRelocationModel();
02491 
02492   if (Subtarget->useMovt(DAG.getMachineFunction()))
02493     ++NumMovwMovt;
02494 
02495   // FIXME: Once remat is capable of dealing with instructions with register
02496   // operands, expand this into multiple nodes
02497   unsigned Wrapper =
02498       RelocM == Reloc::PIC_ ? ARMISD::WrapperPIC : ARMISD::Wrapper;
02499 
02500   SDValue G = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, ARMII::MO_NONLAZY);
02501   SDValue Result = DAG.getNode(Wrapper, dl, PtrVT, G);
02502 
02503   if (Subtarget->GVIsIndirectSymbol(GV, RelocM))
02504     Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Result,
02505                          MachinePointerInfo::getGOT(), false, false, false, 0);
02506   return Result;
02507 }
02508 
02509 SDValue ARMTargetLowering::LowerGlobalAddressWindows(SDValue Op,
02510                                                      SelectionDAG &DAG) const {
02511   assert(Subtarget->isTargetWindows() && "non-Windows COFF is not supported");
02512   assert(Subtarget->useMovt(DAG.getMachineFunction()) &&
02513          "Windows on ARM expects to use movw/movt");
02514 
02515   const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
02516   const ARMII::TOF TargetFlags =
02517     (GV->hasDLLImportStorageClass() ? ARMII::MO_DLLIMPORT : ARMII::MO_NO_FLAG);
02518   EVT PtrVT = getPointerTy();
02519   SDValue Result;
02520   SDLoc DL(Op);
02521 
02522   ++NumMovwMovt;
02523 
02524   // FIXME: Once remat is capable of dealing with instructions with register
02525   // operands, expand this into two nodes.
02526   Result = DAG.getNode(ARMISD::Wrapper, DL, PtrVT,
02527                        DAG.getTargetGlobalAddress(GV, DL, PtrVT, /*Offset=*/0,
02528                                                   TargetFlags));
02529   if (GV->hasDLLImportStorageClass())
02530     Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Result,
02531                          MachinePointerInfo::getGOT(), false, false, false, 0);
02532   return Result;
02533 }
02534 
02535 SDValue ARMTargetLowering::LowerGLOBAL_OFFSET_TABLE(SDValue Op,
02536                                                     SelectionDAG &DAG) const {
02537   assert(Subtarget->isTargetELF() &&
02538          "GLOBAL OFFSET TABLE not implemented for non-ELF targets");
02539   MachineFunction &MF = DAG.getMachineFunction();
02540   ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
02541   unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
02542   EVT PtrVT = getPointerTy();
02543   SDLoc dl(Op);
02544   unsigned PCAdj = Subtarget->isThumb() ? 4 : 8;
02545   ARMConstantPoolValue *CPV =
02546     ARMConstantPoolSymbol::Create(*DAG.getContext(), "_GLOBAL_OFFSET_TABLE_",
02547                                   ARMPCLabelIndex, PCAdj);
02548   SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
02549   CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
02550   SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr,
02551                                MachinePointerInfo::getConstantPool(),
02552                                false, false, false, 0);
02553   SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
02554   return DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel);
02555 }
02556 
02557 SDValue
02558 ARMTargetLowering::LowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const {
02559   SDLoc dl(Op);
02560   SDValue Val = DAG.getConstant(0, MVT::i32);
02561   return DAG.getNode(ARMISD::EH_SJLJ_SETJMP, dl,
02562                      DAG.getVTList(MVT::i32, MVT::Other), Op.getOperand(0),
02563                      Op.getOperand(1), Val);
02564 }
02565 
02566 SDValue
02567 ARMTargetLowering::LowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const {
02568   SDLoc dl(Op);
02569   return DAG.getNode(ARMISD::EH_SJLJ_LONGJMP, dl, MVT::Other, Op.getOperand(0),
02570                      Op.getOperand(1), DAG.getConstant(0, MVT::i32));
02571 }
02572 
02573 SDValue
02574 ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG,
02575                                           const ARMSubtarget *Subtarget) const {
02576   unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
02577   SDLoc dl(Op);
02578   switch (IntNo) {
02579   default: return SDValue();    // Don't custom lower most intrinsics.
02580   case Intrinsic::arm_rbit: {
02581     assert(Op.getOperand(0).getValueType() == MVT::i32 &&
02582            "RBIT intrinsic must have i32 type!");
02583     return DAG.getNode(ARMISD::RBIT, dl, MVT::i32, Op.getOperand(0));
02584   }
02585   case Intrinsic::arm_thread_pointer: {
02586     EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
02587     return DAG.getNode(ARMISD::THREAD_POINTER, dl, PtrVT);
02588   }
02589   case Intrinsic::eh_sjlj_lsda: {
02590     MachineFunction &MF = DAG.getMachineFunction();
02591     ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
02592     unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
02593     EVT PtrVT = getPointerTy();
02594     Reloc::Model RelocM = getTargetMachine().getRelocationModel();
02595     SDValue CPAddr;
02596     unsigned PCAdj = (RelocM != Reloc::PIC_)
02597       ? 0 : (Subtarget->isThumb() ? 4 : 8);
02598     ARMConstantPoolValue *CPV =
02599       ARMConstantPoolConstant::Create(MF.getFunction(), ARMPCLabelIndex,
02600                                       ARMCP::CPLSDA, PCAdj);
02601     CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
02602     CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
02603     SDValue Result =
02604       DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr,
02605                   MachinePointerInfo::getConstantPool(),
02606                   false, false, false, 0);
02607 
02608     if (RelocM == Reloc::PIC_) {
02609       SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
02610       Result = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel);
02611     }
02612     return Result;
02613   }
02614   case Intrinsic::arm_neon_vmulls:
02615   case Intrinsic::arm_neon_vmullu: {
02616     unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vmulls)
02617       ? ARMISD::VMULLs : ARMISD::VMULLu;
02618     return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
02619                        Op.getOperand(1), Op.getOperand(2));
02620   }
02621   }
02622 }
02623 
02624 static SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG,
02625                                  const ARMSubtarget *Subtarget) {
02626   // FIXME: handle "fence singlethread" more efficiently.
02627   SDLoc dl(Op);
02628   if (!Subtarget->hasDataBarrier()) {
02629     // Some ARMv6 cpus can support data barriers with an mcr instruction.
02630     // Thumb1 and pre-v6 ARM mode use a libcall instead and should never get
02631     // here.
02632     assert(Subtarget->hasV6Ops() && !Subtarget->isThumb() &&
02633            "Unexpected ISD::ATOMIC_FENCE encountered. Should be libcall!");
02634     return DAG.getNode(ARMISD::MEMBARRIER_MCR, dl, MVT::Other, Op.getOperand(0),
02635                        DAG.getConstant(0, MVT::i32));
02636   }
02637 
02638   ConstantSDNode *OrdN = cast<ConstantSDNode>(Op.getOperand(1));
02639   AtomicOrdering Ord = static_cast<AtomicOrdering>(OrdN->getZExtValue());
02640   unsigned Domain = ARM_MB::ISH;
02641   if (Subtarget->isMClass()) {
02642     // Only a full system barrier exists in the M-class architectures.
02643     Domain = ARM_MB::SY;
02644   } else if (Subtarget->isSwift() && Ord == Release) {
02645     // Swift happens to implement ISHST barriers in a way that's compatible with
02646     // Release semantics but weaker than ISH so we'd be fools not to use
02647     // it. Beware: other processors probably don't!
02648     Domain = ARM_MB::ISHST;
02649   }
02650 
02651   return DAG.getNode(ISD::INTRINSIC_VOID, dl, MVT::Other, Op.getOperand(0),
02652                      DAG.getConstant(Intrinsic::arm_dmb, MVT::i32),
02653                      DAG.getConstant(Domain, MVT::i32));
02654 }
02655 
02656 static SDValue LowerPREFETCH(SDValue Op, SelectionDAG &DAG,
02657                              const ARMSubtarget *Subtarget) {
02658   // ARM pre v5TE and Thumb1 does not have preload instructions.
02659   if (!(Subtarget->isThumb2() ||
02660         (!Subtarget->isThumb1Only() && Subtarget->hasV5TEOps())))
02661     // Just preserve the chain.
02662     return Op.getOperand(0);
02663 
02664   SDLoc dl(Op);
02665   unsigned isRead = ~cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue() & 1;
02666   if (!isRead &&
02667       (!Subtarget->hasV7Ops() || !Subtarget->hasMPExtension()))
02668     // ARMv7 with MP extension has PLDW.
02669     return Op.getOperand(0);
02670 
02671   unsigned isData = cast<ConstantSDNode>(Op.getOperand(4))->getZExtValue();
02672   if (Subtarget->isThumb()) {
02673     // Invert the bits.
02674     isRead = ~isRead & 1;
02675     isData = ~isData & 1;
02676   }
02677 
02678   return DAG.getNode(ARMISD::PRELOAD, dl, MVT::Other, Op.getOperand(0),
02679                      Op.getOperand(1), DAG.getConstant(isRead, MVT::i32),
02680                      DAG.getConstant(isData, MVT::i32));
02681 }
02682 
02683 static SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) {
02684   MachineFunction &MF = DAG.getMachineFunction();
02685   ARMFunctionInfo *FuncInfo = MF.getInfo<ARMFunctionInfo>();
02686 
02687   // vastart just stores the address of the VarArgsFrameIndex slot into the
02688   // memory location argument.
02689   SDLoc dl(Op);
02690   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
02691   SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
02692   const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
02693   return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1),
02694                       MachinePointerInfo(SV), false, false, 0);
02695 }
02696 
02697 SDValue
02698 ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA,
02699                                         SDValue &Root, SelectionDAG &DAG,
02700                                         SDLoc dl) const {
02701   MachineFunction &MF = DAG.getMachineFunction();
02702   ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
02703 
02704   const TargetRegisterClass *RC;
02705   if (AFI->isThumb1OnlyFunction())
02706     RC = &ARM::tGPRRegClass;
02707   else
02708     RC = &ARM::GPRRegClass;
02709 
02710   // Transform the arguments stored in physical registers into virtual ones.
02711   unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
02712   SDValue ArgValue = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32);
02713 
02714   SDValue ArgValue2;
02715   if (NextVA.isMemLoc()) {
02716     MachineFrameInfo *MFI = MF.getFrameInfo();
02717     int FI = MFI->CreateFixedObject(4, NextVA.getLocMemOffset(), true);
02718 
02719     // Create load node to retrieve arguments from the stack.
02720     SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
02721     ArgValue2 = DAG.getLoad(MVT::i32, dl, Root, FIN,
02722                             MachinePointerInfo::getFixedStack(FI),
02723                             false, false, false, 0);
02724   } else {
02725     Reg = MF.addLiveIn(NextVA.getLocReg(), RC);
02726     ArgValue2 = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32);
02727   }
02728   if (!Subtarget->isLittle())
02729     std::swap (ArgValue, ArgValue2);
02730   return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, ArgValue, ArgValue2);
02731 }
02732 
02733 void
02734 ARMTargetLowering::computeRegArea(CCState &CCInfo, MachineFunction &MF,
02735                                   unsigned InRegsParamRecordIdx,
02736                                   unsigned ArgSize,
02737                                   unsigned &ArgRegsSize,
02738                                   unsigned &ArgRegsSaveSize)
02739   const {
02740   unsigned NumGPRs;
02741   if (InRegsParamRecordIdx < CCInfo.getInRegsParamsCount()) {
02742     unsigned RBegin, REnd;
02743     CCInfo.getInRegsParamInfo(InRegsParamRecordIdx, RBegin, REnd);
02744     NumGPRs = REnd - RBegin;
02745   } else {
02746     unsigned int firstUnalloced;
02747     firstUnalloced = CCInfo.getFirstUnallocated(GPRArgRegs,
02748                                                 sizeof(GPRArgRegs) /
02749                                                 sizeof(GPRArgRegs[0]));
02750     NumGPRs = (firstUnalloced <= 3) ? (4 - firstUnalloced) : 0;
02751   }
02752 
02753   unsigned Align = MF.getTarget().getFrameLowering()->getStackAlignment();
02754   ArgRegsSize = NumGPRs * 4;
02755 
02756   // If parameter is split between stack and GPRs...
02757   if (NumGPRs && Align > 4 &&
02758       (ArgRegsSize < ArgSize ||
02759         InRegsParamRecordIdx >= CCInfo.getInRegsParamsCount())) {
02760     // Add padding for part of param recovered from GPRs.  For example,
02761     // if Align == 8, its last byte must be at address K*8 - 1.
02762     // We need to do it, since remained (stack) part of parameter has
02763     // stack alignment, and we need to "attach" "GPRs head" without gaps
02764     // to it:
02765     // Stack:
02766     // |---- 8 bytes block ----| |---- 8 bytes block ----| |---- 8 bytes...
02767     // [ [padding] [GPRs head] ] [        Tail passed via stack       ....
02768     //
02769     ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
02770     unsigned Padding =
02771         OffsetToAlignment(ArgRegsSize + AFI->getArgRegsSaveSize(), Align);
02772     ArgRegsSaveSize = ArgRegsSize + Padding;
02773   } else
02774     // We don't need to extend regs save size for byval parameters if they
02775     // are passed via GPRs only.
02776     ArgRegsSaveSize = ArgRegsSize;
02777 }
02778 
02779 // The remaining GPRs hold either the beginning of variable-argument
02780 // data, or the beginning of an aggregate passed by value (usually
02781 // byval).  Either way, we allocate stack slots adjacent to the data
02782 // provided by our caller, and store the unallocated registers there.
02783 // If this is a variadic function, the va_list pointer will begin with
02784 // these values; otherwise, this reassembles a (byval) structure that
02785 // was split between registers and memory.
02786 // Return: The frame index registers were stored into.
02787 int
02788 ARMTargetLowering::StoreByValRegs(CCState &CCInfo, SelectionDAG &DAG,
02789                                   SDLoc dl, SDValue &Chain,
02790                                   const Value *OrigArg,
02791                                   unsigned InRegsParamRecordIdx,
02792                                   unsigned OffsetFromOrigArg,
02793                                   unsigned ArgOffset,
02794                                   unsigned ArgSize,
02795                                   bool ForceMutable,
02796                                   unsigned ByValStoreOffset,
02797                                   unsigned TotalArgRegsSaveSize) const {
02798 
02799   // Currently, two use-cases possible:
02800   // Case #1. Non-var-args function, and we meet first byval parameter.
02801   //          Setup first unallocated register as first byval register;
02802   //          eat all remained registers
02803   //          (these two actions are performed by HandleByVal method).
02804   //          Then, here, we initialize stack frame with
02805   //          "store-reg" instructions.
02806   // Case #2. Var-args function, that doesn't contain byval parameters.
02807   //          The same: eat all remained unallocated registers,
02808   //          initialize stack frame.
02809 
02810   MachineFunction &MF = DAG.getMachineFunction();
02811   MachineFrameInfo *MFI = MF.getFrameInfo();
02812   ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
02813   unsigned firstRegToSaveIndex, lastRegToSaveIndex;
02814   unsigned RBegin, REnd;
02815   if (InRegsParamRecordIdx < CCInfo.getInRegsParamsCount()) {
02816     CCInfo.getInRegsParamInfo(InRegsParamRecordIdx, RBegin, REnd);
02817     firstRegToSaveIndex = RBegin - ARM::R0;
02818     lastRegToSaveIndex = REnd - ARM::R0;
02819   } else {
02820     firstRegToSaveIndex = CCInfo.getFirstUnallocated
02821       (GPRArgRegs, array_lengthof(GPRArgRegs));
02822     lastRegToSaveIndex = 4;
02823   }
02824 
02825   unsigned ArgRegsSize, ArgRegsSaveSize;
02826   computeRegArea(CCInfo, MF, InRegsParamRecordIdx, ArgSize,
02827                  ArgRegsSize, ArgRegsSaveSize);
02828 
02829   // Store any by-val regs to their spots on the stack so that they may be
02830   // loaded by deferencing the result of formal parameter pointer or va_next.
02831   // Note: once stack area for byval/varargs registers
02832   // was initialized, it can't be initialized again.
02833   if (ArgRegsSaveSize) {
02834     unsigned Padding = ArgRegsSaveSize - ArgRegsSize;
02835 
02836     if (Padding) {
02837       assert(AFI->getStoredByValParamsPadding() == 0 &&
02838              "The only parameter may be padded.");
02839       AFI->setStoredByValParamsPadding(Padding);
02840     }
02841 
02842     int FrameIndex = MFI->CreateFixedObject(ArgRegsSaveSize,
02843                                             Padding +
02844                                               ByValStoreOffset -
02845                                               (int64_t)TotalArgRegsSaveSize,
02846                                             false);
02847     SDValue FIN = DAG.getFrameIndex(FrameIndex, getPointerTy());
02848     if (Padding) {
02849        MFI->CreateFixedObject(Padding,
02850                               ArgOffset + ByValStoreOffset -
02851                                 (int64_t)ArgRegsSaveSize,
02852                               false);
02853     }
02854 
02855     SmallVector<SDValue, 4> MemOps;
02856     for (unsigned i = 0; firstRegToSaveIndex < lastRegToSaveIndex;
02857          ++firstRegToSaveIndex, ++i) {
02858       const TargetRegisterClass *RC;
02859       if (AFI->isThumb1OnlyFunction())
02860         RC = &ARM::tGPRRegClass;
02861       else
02862         RC = &ARM::GPRRegClass;
02863 
02864       unsigned VReg = MF.addLiveIn(GPRArgRegs[firstRegToSaveIndex], RC);
02865       SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32);
02866       SDValue Store =
02867         DAG.getStore(Val.getValue(1), dl, Val, FIN,
02868                      MachinePointerInfo(OrigArg, OffsetFromOrigArg + 4*i),
02869                      false, false, 0);
02870       MemOps.push_back(Store);
02871       FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), FIN,
02872                         DAG.getConstant(4, getPointerTy()));
02873     }
02874 
02875     AFI->setArgRegsSaveSize(ArgRegsSaveSize + AFI->getArgRegsSaveSize());
02876 
02877     if (!MemOps.empty())
02878       Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
02879     return FrameIndex;
02880   } else {
02881     if (ArgSize == 0) {
02882       // We cannot allocate a zero-byte object for the first variadic argument,
02883       // so just make up a size.
02884       ArgSize = 4;
02885     }
02886     // This will point to the next argument passed via stack.
02887     return MFI->CreateFixedObject(
02888       ArgSize, ArgOffset, !ForceMutable);
02889   }
02890 }
02891 
02892 // Setup stack frame, the va_list pointer will start from.
02893 void
02894 ARMTargetLowering::VarArgStyleRegisters(CCState &CCInfo, SelectionDAG &DAG,
02895                                         SDLoc dl, SDValue &Chain,
02896                                         unsigned ArgOffset,
02897                                         unsigned TotalArgRegsSaveSize,
02898                                         bool ForceMutable) const {
02899   MachineFunction &MF = DAG.getMachineFunction();
02900   ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
02901 
02902   // Try to store any remaining integer argument regs
02903   // to their spots on the stack so that they may be loaded by deferencing
02904   // the result of va_next.
02905   // If there is no regs to be stored, just point address after last
02906   // argument passed via stack.
02907   int FrameIndex =
02908     StoreByValRegs(CCInfo, DAG, dl, Chain, nullptr,
02909                    CCInfo.getInRegsParamsCount(), 0, ArgOffset, 0, ForceMutable,
02910                    0, TotalArgRegsSaveSize);
02911 
02912   AFI->setVarArgsFrameIndex(FrameIndex);
02913 }
02914 
02915 SDValue
02916 ARMTargetLowering::LowerFormalArguments(SDValue Chain,
02917                                         CallingConv::ID CallConv, bool isVarArg,
02918                                         const SmallVectorImpl<ISD::InputArg>
02919                                           &Ins,
02920                                         SDLoc dl, SelectionDAG &DAG,
02921                                         SmallVectorImpl<SDValue> &InVals)
02922                                           const {
02923   MachineFunction &MF = DAG.getMachineFunction();
02924   MachineFrameInfo *MFI = MF.getFrameInfo();
02925 
02926   ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
02927 
02928   // Assign locations to all of the incoming arguments.
02929   SmallVector<CCValAssign, 16> ArgLocs;
02930   ARMCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
02931                     getTargetMachine(), ArgLocs, *DAG.getContext(), Prologue);
02932   CCInfo.AnalyzeFormalArguments(Ins,
02933                                 CCAssignFnForNode(CallConv, /* Return*/ false,
02934                                                   isVarArg));
02935 
02936   SmallVector<SDValue, 16> ArgValues;
02937   int lastInsIndex = -1;
02938   SDValue ArgValue;
02939   Function::const_arg_iterator CurOrigArg = MF.getFunction()->arg_begin();
02940   unsigned CurArgIdx = 0;
02941 
02942   // Initially ArgRegsSaveSize is zero.
02943   // Then we increase this value each time we meet byval parameter.
02944   // We also increase this value in case of varargs function.
02945   AFI->setArgRegsSaveSize(0);
02946 
02947   unsigned ByValStoreOffset = 0;
02948   unsigned TotalArgRegsSaveSize = 0;
02949   unsigned ArgRegsSaveSizeMaxAlign = 4;
02950 
02951   // Calculate the amount of stack space that we need to allocate to store
02952   // byval and variadic arguments that are passed in registers.
02953   // We need to know this before we allocate the first byval or variadic
02954   // argument, as they will be allocated a stack slot below the CFA (Canonical
02955   // Frame Address, the stack pointer at entry to the function).
02956   for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
02957     CCValAssign &VA = ArgLocs[i];
02958     if (VA.isMemLoc()) {
02959       int index = VA.getValNo();
02960       if (index != lastInsIndex) {
02961         ISD::ArgFlagsTy Flags = Ins[index].Flags;
02962         if (Flags.isByVal()) {
02963           unsigned ExtraArgRegsSize;
02964           unsigned ExtraArgRegsSaveSize;
02965           computeRegArea(CCInfo, MF, CCInfo.getInRegsParamsProceed(),
02966                          Flags.getByValSize(),
02967                          ExtraArgRegsSize, ExtraArgRegsSaveSize);
02968 
02969           TotalArgRegsSaveSize += ExtraArgRegsSaveSize;
02970           if (Flags.getByValAlign() > ArgRegsSaveSizeMaxAlign)
02971               ArgRegsSaveSizeMaxAlign = Flags.getByValAlign();
02972           CCInfo.nextInRegsParam();
02973         }
02974         lastInsIndex = index;
02975       }
02976     }
02977   }
02978   CCInfo.rewindByValRegsInfo();
02979   lastInsIndex = -1;
02980   if (isVarArg) {
02981     unsigned ExtraArgRegsSize;
02982     unsigned ExtraArgRegsSaveSize;
02983     computeRegArea(CCInfo, MF, CCInfo.getInRegsParamsCount(), 0,
02984                    ExtraArgRegsSize, ExtraArgRegsSaveSize);
02985     TotalArgRegsSaveSize += ExtraArgRegsSaveSize;
02986   }
02987   // If the arg regs save area contains N-byte aligned values, the
02988   // bottom of it must be at least N-byte aligned.
02989   TotalArgRegsSaveSize = RoundUpToAlignment(TotalArgRegsSaveSize, ArgRegsSaveSizeMaxAlign);
02990   TotalArgRegsSaveSize = std::min(TotalArgRegsSaveSize, 16U);
02991 
02992   for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
02993     CCValAssign &VA = ArgLocs[i];
02994     std::advance(CurOrigArg, Ins[VA.getValNo()].OrigArgIndex - CurArgIdx);
02995     CurArgIdx = Ins[VA.getValNo()].OrigArgIndex;
02996     // Arguments stored in registers.
02997     if (VA.isRegLoc()) {
02998       EVT RegVT = VA.getLocVT();
02999 
03000       if (VA.needsCustom()) {
03001         // f64 and vector types are split up into multiple registers or
03002         // combinations of registers and stack slots.
03003         if (VA.getLocVT() == MVT::v2f64) {
03004           SDValue ArgValue1 = GetF64FormalArgument(VA, ArgLocs[++i],
03005                                                    Chain, DAG, dl);
03006           VA = ArgLocs[++i]; // skip ahead to next loc
03007           SDValue ArgValue2;
03008           if (VA.isMemLoc()) {
03009             int FI = MFI->CreateFixedObject(8, VA.getLocMemOffset(), true);
03010             SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
03011             ArgValue2 = DAG.getLoad(MVT::f64, dl, Chain, FIN,
03012                                     MachinePointerInfo::getFixedStack(FI),
03013                                     false, false, false, 0);
03014           } else {
03015             ArgValue2 = GetF64FormalArgument(VA, ArgLocs[++i],
03016                                              Chain, DAG, dl);
03017           }
03018           ArgValue = DAG.getNode(ISD::UNDEF, dl, MVT::v2f64);
03019           ArgValue = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64,
03020                                  ArgValue, ArgValue1, DAG.getIntPtrConstant(0));
03021           ArgValue = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64,
03022                                  ArgValue, ArgValue2, DAG.getIntPtrConstant(1));
03023         } else
03024           ArgValue = GetF64FormalArgument(VA, ArgLocs[++i], Chain, DAG, dl);
03025 
03026       } else {
03027         const TargetRegisterClass *RC;
03028 
03029         if (RegVT == MVT::f32)
03030           RC = &ARM::SPRRegClass;
03031         else if (RegVT == MVT::f64)
03032           RC = &ARM::DPRRegClass;
03033         else if (RegVT == MVT::v2f64)
03034           RC = &ARM::QPRRegClass;
03035         else if (RegVT == MVT::i32)
03036           RC = AFI->isThumb1OnlyFunction() ?
03037             (const TargetRegisterClass*)&ARM::tGPRRegClass :
03038             (const TargetRegisterClass*)&ARM::GPRRegClass;
03039         else
03040           llvm_unreachable("RegVT not supported by FORMAL_ARGUMENTS Lowering");
03041 
03042         // Transform the arguments in physical registers into virtual ones.
03043         unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
03044         ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT);
03045       }
03046 
03047       // If this is an 8 or 16-bit value, it is really passed promoted
03048       // to 32 bits.  Insert an assert[sz]ext to capture this, then
03049       // truncate to the right size.
03050       switch (VA.getLocInfo()) {
03051       default: llvm_unreachable("Unknown loc info!");
03052       case CCValAssign::Full: break;
03053       case CCValAssign::BCvt:
03054         ArgValue = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), ArgValue);
03055         break;
03056       case CCValAssign::SExt:
03057         ArgValue = DAG.getNode(ISD::AssertSext, dl, RegVT, ArgValue,
03058                                DAG.getValueType(VA.getValVT()));
03059         ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
03060         break;
03061       case CCValAssign::ZExt:
03062         ArgValue = DAG.getNode(ISD::AssertZext, dl, RegVT, ArgValue,
03063                                DAG.getValueType(VA.getValVT()));
03064         ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
03065         break;
03066       }
03067 
03068       InVals.push_back(ArgValue);
03069 
03070     } else { // VA.isRegLoc()
03071 
03072       // sanity check
03073       assert(VA.isMemLoc());
03074       assert(VA.getValVT() != MVT::i64 && "i64 should already be lowered");
03075 
03076       int index = ArgLocs[i].getValNo();
03077 
03078       // Some Ins[] entries become multiple ArgLoc[] entries.
03079       // Process them only once.
03080       if (index != lastInsIndex)
03081         {
03082           ISD::ArgFlagsTy Flags = Ins[index].Flags;
03083           // FIXME: For now, all byval parameter objects are marked mutable.
03084           // This can be changed with more analysis.
03085           // In case of tail call optimization mark all arguments mutable.
03086           // Since they could be overwritten by lowering of arguments in case of
03087           // a tail call.
03088           if (Flags.isByVal()) {
03089             unsigned CurByValIndex = CCInfo.getInRegsParamsProceed();
03090 
03091             ByValStoreOffset = RoundUpToAlignment(ByValStoreOffset, Flags.getByValAlign());
03092             int FrameIndex = StoreByValRegs(
03093                 CCInfo, DAG, dl, Chain, CurOrigArg,
03094                 CurByValIndex,
03095                 Ins[VA.getValNo()].PartOffset,
03096                 VA.getLocMemOffset(),
03097                 Flags.getByValSize(),
03098                 true /*force mutable frames*/,
03099                 ByValStoreOffset,
03100                 TotalArgRegsSaveSize);
03101             ByValStoreOffset += Flags.getByValSize();
03102             ByValStoreOffset = std::min(ByValStoreOffset, 16U);
03103             InVals.push_back(DAG.getFrameIndex(FrameIndex, getPointerTy()));
03104             CCInfo.nextInRegsParam();
03105           } else {
03106             unsigned FIOffset = VA.getLocMemOffset();
03107             int FI = MFI->CreateFixedObject(VA.getLocVT().getSizeInBits()/8,
03108                                             FIOffset, true);
03109 
03110             // Create load nodes to retrieve arguments from the stack.
03111             SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
03112             InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN,
03113                                          MachinePointerInfo::getFixedStack(FI),
03114                                          false, false, false, 0));
03115           }
03116           lastInsIndex = index;
03117         }
03118     }
03119   }
03120 
03121   // varargs
03122   if (isVarArg)
03123     VarArgStyleRegisters(CCInfo, DAG, dl, Chain,
03124                          CCInfo.getNextStackOffset(),
03125                          TotalArgRegsSaveSize);
03126 
03127   AFI->setArgumentStackSize(CCInfo.getNextStackOffset());
03128 
03129   return Chain;
03130 }
03131 
03132 /// isFloatingPointZero - Return true if this is +0.0.
03133 static bool isFloatingPointZero(SDValue Op) {
03134   if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op))
03135     return CFP->getValueAPF().isPosZero();
03136   else if (ISD::isEXTLoad(Op.getNode()) || ISD::isNON_EXTLoad(Op.getNode())) {
03137     // Maybe this has already been legalized into the constant pool?
03138     if (Op.getOperand(1).getOpcode() == ARMISD::Wrapper) {
03139       SDValue WrapperOp = Op.getOperand(1).getOperand(0);
03140       if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(WrapperOp))
03141         if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal()))
03142           return CFP->getValueAPF().isPosZero();
03143     }
03144   }
03145   return false;
03146 }
03147 
03148 /// Returns appropriate ARM CMP (cmp) and corresponding condition code for
03149 /// the given operands.
03150 SDValue
03151 ARMTargetLowering::getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
03152                              SDValue &ARMcc, SelectionDAG &DAG,
03153                              SDLoc dl) const {
03154   if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS.getNode())) {
03155     unsigned C = RHSC->getZExtValue();
03156     if (!isLegalICmpImmediate(C)) {
03157       // Constant does not fit, try adjusting it by one?
03158       switch (CC) {
03159       default: break;
03160       case ISD::SETLT:
03161       case ISD::SETGE:
03162         if (C != 0x80000000 && isLegalICmpImmediate(C-1)) {
03163           CC = (CC == ISD::SETLT) ? ISD::SETLE : ISD::SETGT;
03164           RHS = DAG.getConstant(C-1, MVT::i32);
03165         }
03166         break;
03167       case ISD::SETULT:
03168       case ISD::SETUGE:
03169         if (C != 0 && isLegalICmpImmediate(C-1)) {
03170           CC = (CC == ISD::SETULT) ? ISD::SETULE : ISD::SETUGT;
03171           RHS = DAG.getConstant(C-1, MVT::i32);
03172         }
03173         break;
03174       case ISD::SETLE:
03175       case ISD::SETGT:
03176         if (C != 0x7fffffff && isLegalICmpImmediate(C+1)) {
03177           CC = (CC == ISD::SETLE) ? ISD::SETLT : ISD::SETGE;
03178           RHS = DAG.getConstant(C+1, MVT::i32);
03179         }
03180         break;
03181       case ISD::SETULE:
03182       case ISD::SETUGT:
03183         if (C != 0xffffffff && isLegalICmpImmediate(C+1)) {
03184           CC = (CC == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE;
03185           RHS = DAG.getConstant(C+1, MVT::i32);
03186         }
03187         break;
03188       }
03189     }
03190   }
03191 
03192   ARMCC::CondCodes CondCode = IntCCToARMCC(CC);
03193   ARMISD::NodeType CompareType;
03194   switch (CondCode) {
03195   default:
03196     CompareType = ARMISD::CMP;
03197     break;
03198   case ARMCC::EQ:
03199   case ARMCC::NE:
03200     // Uses only Z Flag
03201     CompareType = ARMISD::CMPZ;
03202     break;
03203   }
03204   ARMcc = DAG.getConstant(CondCode, MVT::i32);
03205   return DAG.getNode(CompareType, dl, MVT::Glue, LHS, RHS);
03206 }
03207 
03208 /// Returns a appropriate VFP CMP (fcmp{s|d}+fmstat) for the given operands.
03209 SDValue
03210 ARMTargetLowering::getVFPCmp(SDValue LHS, SDValue RHS, SelectionDAG &DAG,
03211                              SDLoc dl) const {
03212   SDValue Cmp;
03213   if (!isFloatingPointZero(RHS))
03214     Cmp = DAG.getNode(ARMISD::CMPFP, dl, MVT::Glue, LHS, RHS);
03215   else
03216     Cmp = DAG.getNode(ARMISD::CMPFPw0, dl, MVT::Glue, LHS);
03217   return DAG.getNode(ARMISD::FMSTAT, dl, MVT::Glue, Cmp);
03218 }
03219 
03220 /// duplicateCmp - Glue values can have only one use, so this function
03221 /// duplicates a comparison node.
03222 SDValue
03223 ARMTargetLowering::duplicateCmp(SDValue Cmp, SelectionDAG &DAG) const {
03224   unsigned Opc = Cmp.getOpcode();
03225   SDLoc DL(Cmp);
03226   if (Opc == ARMISD::CMP || Opc == ARMISD::CMPZ)
03227     return DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0),Cmp.getOperand(1));
03228 
03229   assert(Opc == ARMISD::FMSTAT && "unexpected comparison operation");
03230   Cmp = Cmp.getOperand(0);
03231   Opc = Cmp.getOpcode();
03232   if (Opc == ARMISD::CMPFP)
03233     Cmp = DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0),Cmp.getOperand(1));
03234   else {
03235     assert(Opc == ARMISD::CMPFPw0 && "unexpected operand of FMSTAT");
03236     Cmp = DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0));
03237   }
03238   return DAG.getNode(ARMISD::FMSTAT, DL, MVT::Glue, Cmp);
03239 }
03240 
03241 std::pair<SDValue, SDValue>
03242 ARMTargetLowering::getARMXALUOOp(SDValue Op, SelectionDAG &DAG,
03243                                  SDValue &ARMcc) const {
03244   assert(Op.getValueType() == MVT::i32 &&  "Unsupported value type");
03245 
03246   SDValue Value, OverflowCmp;
03247   SDValue LHS = Op.getOperand(0);
03248   SDValue RHS = Op.getOperand(1);
03249 
03250 
03251   // FIXME: We are currently always generating CMPs because we don't support
03252   // generating CMN through the backend. This is not as good as the natural
03253   // CMP case because it causes a register dependency and cannot be folded
03254   // later.
03255 
03256   switch (Op.getOpcode()) {
03257   default:
03258     llvm_unreachable("Unknown overflow instruction!");
03259   case ISD::SADDO:
03260     ARMcc = DAG.getConstant(ARMCC::VC, MVT::i32);
03261     Value = DAG.getNode(ISD::ADD, SDLoc(Op), Op.getValueType(), LHS, RHS);
03262     OverflowCmp = DAG.getNode(ARMISD::CMP, SDLoc(Op), MVT::Glue, Value, LHS);
03263     break;
03264   case ISD::UADDO:
03265     ARMcc = DAG.getConstant(ARMCC::HS, MVT::i32);
03266     Value = DAG.getNode(ISD::ADD, SDLoc(Op), Op.getValueType(), LHS, RHS);
03267     OverflowCmp = DAG.getNode(ARMISD::CMP, SDLoc(Op), MVT::Glue, Value, LHS);
03268     break;
03269   case ISD::SSUBO:
03270     ARMcc = DAG.getConstant(ARMCC::VC, MVT::i32);
03271     Value = DAG.getNode(ISD::SUB, SDLoc(Op), Op.getValueType(), LHS, RHS);
03272     OverflowCmp = DAG.getNode(ARMISD::CMP, SDLoc(Op), MVT::Glue, LHS, RHS);
03273     break;
03274   case ISD::USUBO:
03275     ARMcc = DAG.getConstant(ARMCC::HS, MVT::i32);
03276     Value = DAG.getNode(ISD::SUB, SDLoc(Op), Op.getValueType(), LHS, RHS);
03277     OverflowCmp = DAG.getNode(ARMISD::CMP, SDLoc(Op), MVT::Glue, LHS, RHS);
03278     break;
03279   } // switch (...)
03280 
03281   return std::make_pair(Value, OverflowCmp);
03282 }
03283 
03284 
03285 SDValue
03286 ARMTargetLowering::LowerXALUO(SDValue Op, SelectionDAG &DAG) const {
03287   // Let legalize expand this if it isn't a legal type yet.
03288   if (!DAG.getTargetLoweringInfo().isTypeLegal(Op.getValueType()))
03289     return SDValue();
03290 
03291   SDValue Value, OverflowCmp;
03292   SDValue ARMcc;
03293   std::tie(Value, OverflowCmp) = getARMXALUOOp(Op, DAG, ARMcc);
03294   SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
03295   // We use 0 and 1 as false and true values.
03296   SDValue TVal = DAG.getConstant(1, MVT::i32);
03297   SDValue FVal = DAG.getConstant(0, MVT::i32);
03298   EVT VT = Op.getValueType();
03299 
03300   SDValue Overflow = DAG.getNode(ARMISD::CMOV, SDLoc(Op), VT, TVal, FVal,
03301                                  ARMcc, CCR, OverflowCmp);
03302 
03303   SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32);
03304   return DAG.getNode(ISD::MERGE_VALUES, SDLoc(Op), VTs, Value, Overflow);
03305 }
03306 
03307 
03308 SDValue ARMTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
03309   SDValue Cond = Op.getOperand(0);
03310   SDValue SelectTrue = Op.getOperand(1);
03311   SDValue SelectFalse = Op.getOperand(2);
03312   SDLoc dl(Op);
03313   unsigned Opc = Cond.getOpcode();
03314 
03315   if (Cond.getResNo() == 1 &&
03316       (Opc == ISD::SADDO || Opc == ISD::UADDO || Opc == ISD::SSUBO ||
03317        Opc == ISD::USUBO)) {
03318     if (!DAG.getTargetLoweringInfo().isTypeLegal(Cond->getValueType(0)))
03319       return SDValue();
03320 
03321     SDValue Value, OverflowCmp;
03322     SDValue ARMcc;
03323     std::tie(Value, OverflowCmp) = getARMXALUOOp(Cond, DAG, ARMcc);
03324     SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
03325     EVT VT = Op.getValueType();
03326 
03327     return DAG.getNode(ARMISD::CMOV, SDLoc(Op), VT, SelectTrue, SelectFalse,
03328                        ARMcc, CCR, OverflowCmp);
03329 
03330   }
03331 
03332   // Convert:
03333   //
03334   //   (select (cmov 1, 0, cond), t, f) -> (cmov t, f, cond)
03335   //   (select (cmov 0, 1, cond), t, f) -> (cmov f, t, cond)
03336   //
03337   if (Cond.getOpcode() == ARMISD::CMOV && Cond.hasOneUse()) {
03338     const ConstantSDNode *CMOVTrue =
03339       dyn_cast<ConstantSDNode>(Cond.getOperand(0));
03340     const ConstantSDNode *CMOVFalse =
03341       dyn_cast<ConstantSDNode>(Cond.getOperand(1));
03342 
03343     if (CMOVTrue && CMOVFalse) {
03344       unsigned CMOVTrueVal = CMOVTrue->getZExtValue();
03345       unsigned CMOVFalseVal = CMOVFalse->getZExtValue();
03346 
03347       SDValue True;
03348       SDValue False;
03349       if (CMOVTrueVal == 1 && CMOVFalseVal == 0) {
03350         True = SelectTrue;
03351         False = SelectFalse;
03352       } else if (CMOVTrueVal == 0 && CMOVFalseVal == 1) {
03353         True = SelectFalse;
03354         False = SelectTrue;
03355       }
03356 
03357       if (True.getNode() && False.getNode()) {
03358         EVT VT = Op.getValueType();
03359         SDValue ARMcc = Cond.getOperand(2);
03360         SDValue CCR = Cond.getOperand(3);
03361         SDValue Cmp = duplicateCmp(Cond.getOperand(4), DAG);
03362         assert(True.getValueType() == VT);
03363         return DAG.getNode(ARMISD::CMOV, dl, VT, True, False, ARMcc, CCR, Cmp);
03364       }
03365     }
03366   }
03367 
03368   // ARM's BooleanContents value is UndefinedBooleanContent. Mask out the
03369   // undefined bits before doing a full-word comparison with zero.
03370   Cond = DAG.getNode(ISD::AND, dl, Cond.getValueType(), Cond,
03371                      DAG.getConstant(1, Cond.getValueType()));
03372 
03373   return DAG.getSelectCC(dl, Cond,
03374                          DAG.getConstant(0, Cond.getValueType()),
03375                          SelectTrue, SelectFalse, ISD::SETNE);
03376 }
03377 
03378 static ISD::CondCode getInverseCCForVSEL(ISD::CondCode CC) {
03379   if (CC == ISD::SETNE)
03380     return ISD::SETEQ;
03381   return ISD::getSetCCInverse(CC, true);
03382 }
03383 
03384 static void checkVSELConstraints(ISD::CondCode CC, ARMCC::CondCodes &CondCode,
03385                                  bool &swpCmpOps, bool &swpVselOps) {
03386   // Start by selecting the GE condition code for opcodes that return true for
03387   // 'equality'
03388   if (CC == ISD::SETUGE || CC == ISD::SETOGE || CC == ISD::SETOLE ||
03389       CC == ISD::SETULE)
03390     CondCode = ARMCC::GE;
03391 
03392   // and GT for opcodes that return false for 'equality'.
03393   else if (CC == ISD::SETUGT || CC == ISD::SETOGT || CC == ISD::SETOLT ||
03394            CC == ISD::SETULT)
03395     CondCode = ARMCC::GT;
03396 
03397   // Since we are constrained to GE/GT, if the opcode contains 'less', we need
03398   // to swap the compare operands.
03399   if (CC == ISD::SETOLE || CC == ISD::SETULE || CC == ISD::SETOLT ||
03400       CC == ISD::SETULT)
03401     swpCmpOps = true;
03402 
03403   // Both GT and GE are ordered comparisons, and return false for 'unordered'.
03404   // If we have an unordered opcode, we need to swap the operands to the VSEL
03405   // instruction (effectively negating the condition).
03406   //
03407   // This also has the effect of swapping which one of 'less' or 'greater'
03408   // returns true, so we also swap the compare operands. It also switches
03409   // whether we return true for 'equality', so we compensate by picking the
03410   // opposite condition code to our original choice.
03411   if (CC == ISD::SETULE || CC == ISD::SETULT || CC == ISD::SETUGE ||
03412       CC == ISD::SETUGT) {
03413     swpCmpOps = !swpCmpOps;
03414     swpVselOps = !swpVselOps;
03415     CondCode = CondCode == ARMCC::GT ? ARMCC::GE : ARMCC::GT;
03416   }
03417 
03418   // 'ordered' is 'anything but unordered', so use the VS condition code and
03419   // swap the VSEL operands.
03420   if (CC == ISD::SETO) {
03421     CondCode = ARMCC::VS;
03422     swpVselOps = true;
03423   }
03424 
03425   // 'unordered or not equal' is 'anything but equal', so use the EQ condition
03426   // code and swap the VSEL operands.
03427   if (CC == ISD::SETUNE) {
03428     CondCode = ARMCC::EQ;
03429     swpVselOps = true;
03430   }
03431 }
03432 
03433 SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
03434   EVT VT = Op.getValueType();
03435   SDValue LHS = Op.getOperand(0);
03436   SDValue RHS = Op.getOperand(1);
03437   ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
03438   SDValue TrueVal = Op.getOperand(2);
03439   SDValue FalseVal = Op.getOperand(3);
03440   SDLoc dl(Op);
03441 
03442   if (LHS.getValueType() == MVT::i32) {
03443     // Try to generate VSEL on ARMv8.
03444     // The VSEL instruction can't use all the usual ARM condition
03445     // codes: it only has two bits to select the condition code, so it's
03446     // constrained to use only GE, GT, VS and EQ.
03447     //
03448     // To implement all the various ISD::SETXXX opcodes, we sometimes need to
03449     // swap the operands of the previous compare instruction (effectively
03450     // inverting the compare condition, swapping 'less' and 'greater') and
03451     // sometimes need to swap the operands to the VSEL (which inverts the
03452     // condition in the sense of firing whenever the previous condition didn't)
03453     if (getSubtarget()->hasFPARMv8() && (TrueVal.getValueType() == MVT::f32 ||
03454                                       TrueVal.getValueType() == MVT::f64)) {
03455       ARMCC::CondCodes CondCode = IntCCToARMCC(CC);
03456       if (CondCode == ARMCC::LT || CondCode == ARMCC::LE ||
03457           CondCode == ARMCC::VC || CondCode == ARMCC::NE) {
03458         CC = getInverseCCForVSEL(CC);
03459         std::swap(TrueVal, FalseVal);
03460       }
03461     }
03462 
03463     SDValue ARMcc;
03464     SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
03465     SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
03466     return DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMcc, CCR,
03467                        Cmp);
03468   }
03469 
03470   ARMCC::CondCodes CondCode, CondCode2;
03471   FPCCToARMCC(CC, CondCode, CondCode2);
03472 
03473   // Try to generate VSEL on ARMv8.
03474   if (getSubtarget()->hasFPARMv8() && (TrueVal.getValueType() == MVT::f32 ||
03475                                     TrueVal.getValueType() == MVT::f64)) {
03476     // We can select VMAXNM/VMINNM from a compare followed by a select with the
03477     // same operands, as follows:
03478     //   c = fcmp [ogt, olt, ugt, ult] a, b
03479     //   select c, a, b
03480     // We only do this in unsafe-fp-math, because signed zeros and NaNs are
03481     // handled differently than the original code sequence.
03482     if (getTargetMachine().Options.UnsafeFPMath && LHS == TrueVal &&
03483         RHS == FalseVal) {
03484       if (CC == ISD::SETOGT || CC == ISD::SETUGT)
03485         return DAG.getNode(ARMISD::VMAXNM, dl, VT, TrueVal, FalseVal);
03486       if (CC == ISD::SETOLT || CC == ISD::SETULT)
03487         return DAG.getNode(ARMISD::VMINNM, dl, VT, TrueVal, FalseVal);
03488     }
03489 
03490     bool swpCmpOps = false;
03491     bool swpVselOps = false;
03492     checkVSELConstraints(CC, CondCode, swpCmpOps, swpVselOps);
03493 
03494     if (CondCode == ARMCC::GT || CondCode == ARMCC::GE ||
03495         CondCode == ARMCC::VS || CondCode == ARMCC::EQ) {
03496       if (swpCmpOps)
03497         std::swap(LHS, RHS);
03498       if (swpVselOps)
03499         std::swap(TrueVal, FalseVal);
03500     }
03501   }
03502 
03503   SDValue ARMcc = DAG.getConstant(CondCode, MVT::i32);
03504   SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl);
03505   SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
03506   SDValue Result = DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal,
03507                                ARMcc, CCR, Cmp);
03508   if (CondCode2 != ARMCC::AL) {
03509     SDValue ARMcc2 = DAG.getConstant(CondCode2, MVT::i32);
03510     // FIXME: Needs another CMP because flag can have but one use.
03511     SDValue Cmp2 = getVFPCmp(LHS, RHS, DAG, dl);
03512     Result = DAG.getNode(ARMISD::CMOV, dl, VT,
03513                          Result, TrueVal, ARMcc2, CCR, Cmp2);
03514   }
03515   return Result;
03516 }
03517 
03518 /// canChangeToInt - Given the fp compare operand, return true if it is suitable
03519 /// to morph to an integer compare sequence.
03520 static bool canChangeToInt(SDValue Op, bool &SeenZero,
03521                            const ARMSubtarget *Subtarget) {
03522   SDNode *N = Op.getNode();
03523   if (!N->hasOneUse())
03524     // Otherwise it requires moving the value from fp to integer registers.
03525     return false;
03526   if (!N->getNumValues())
03527     return false;
03528   EVT VT = Op.getValueType();
03529   if (VT != MVT::f32 && !Subtarget->isFPBrccSlow())
03530     // f32 case is generally profitable. f64 case only makes sense when vcmpe +
03531     // vmrs are very slow, e.g. cortex-a8.
03532     return false;
03533 
03534   if (isFloatingPointZero(Op)) {
03535     SeenZero = true;
03536     return true;
03537   }
03538   return ISD::isNormalLoad(N);
03539 }
03540 
03541 static SDValue bitcastf32Toi32(SDValue Op, SelectionDAG &DAG) {
03542   if (isFloatingPointZero(Op))
03543     return DAG.getConstant(0, MVT::i32);
03544 
03545   if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Op))
03546     return DAG.getLoad(MVT::i32, SDLoc(Op),
03547                        Ld->getChain(), Ld->getBasePtr(), Ld->getPointerInfo(),
03548                        Ld->isVolatile(), Ld->isNonTemporal(),
03549                        Ld->isInvariant(), Ld->getAlignment());
03550 
03551   llvm_unreachable("Unknown VFP cmp argument!");
03552 }
03553 
03554 static void expandf64Toi32(SDValue Op, SelectionDAG &DAG,
03555                            SDValue &RetVal1, SDValue &RetVal2) {
03556   if (isFloatingPointZero(Op)) {
03557     RetVal1 = DAG.getConstant(0, MVT::i32);
03558     RetVal2 = DAG.getConstant(0, MVT::i32);
03559     return;
03560   }
03561 
03562   if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Op)) {
03563     SDValue Ptr = Ld->getBasePtr();
03564     RetVal1 = DAG.getLoad(MVT::i32, SDLoc(Op),
03565                           Ld->getChain(), Ptr,
03566                           Ld->getPointerInfo(),
03567                           Ld->isVolatile(), Ld->isNonTemporal(),
03568                           Ld->isInvariant(), Ld->getAlignment());
03569 
03570     EVT PtrType = Ptr.getValueType();
03571     unsigned NewAlign = MinAlign(Ld->getAlignment(), 4);
03572     SDValue NewPtr = DAG.getNode(ISD::ADD, SDLoc(Op),
03573                                  PtrType, Ptr, DAG.getConstant(4, PtrType));
03574     RetVal2 = DAG.getLoad(MVT::i32, SDLoc(Op),
03575                           Ld->getChain(), NewPtr,
03576                           Ld->getPointerInfo().getWithOffset(4),
03577                           Ld->isVolatile(), Ld->isNonTemporal(),
03578                           Ld->isInvariant(), NewAlign);
03579     return;
03580   }
03581 
03582   llvm_unreachable("Unknown VFP cmp argument!");
03583 }
03584 
03585 /// OptimizeVFPBrcond - With -enable-unsafe-fp-math, it's legal to optimize some
03586 /// f32 and even f64 comparisons to integer ones.
03587 SDValue
03588 ARMTargetLowering::OptimizeVFPBrcond(SDValue Op, SelectionDAG &DAG) const {
03589   SDValue Chain = Op.getOperand(0);
03590   ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
03591   SDValue LHS = Op.getOperand(2);
03592   SDValue RHS = Op.getOperand(3);
03593   SDValue Dest = Op.getOperand(4);
03594   SDLoc dl(Op);
03595 
03596   bool LHSSeenZero = false;
03597   bool LHSOk = canChangeToInt(LHS, LHSSeenZero, Subtarget);
03598   bool RHSSeenZero = false;
03599   bool RHSOk = canChangeToInt(RHS, RHSSeenZero, Subtarget);
03600   if (LHSOk && RHSOk && (LHSSeenZero || RHSSeenZero)) {
03601     // If unsafe fp math optimization is enabled and there are no other uses of
03602     // the CMP operands, and the condition code is EQ or NE, we can optimize it
03603     // to an integer comparison.
03604     if (CC == ISD::SETOEQ)
03605       CC = ISD::SETEQ;
03606     else if (CC == ISD::SETUNE)
03607       CC = ISD::SETNE;
03608 
03609     SDValue Mask = DAG.getConstant(0x7fffffff, MVT::i32);
03610     SDValue ARMcc;
03611     if (LHS.getValueType() == MVT::f32) {
03612       LHS = DAG.getNode(ISD::AND, dl, MVT::i32,
03613                         bitcastf32Toi32(LHS, DAG), Mask);
03614       RHS = DAG.getNode(ISD::AND, dl, MVT::i32,
03615                         bitcastf32Toi32(RHS, DAG), Mask);
03616       SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
03617       SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
03618       return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other,
03619                          Chain, Dest, ARMcc, CCR, Cmp);
03620     }
03621 
03622     SDValue LHS1, LHS2;
03623     SDValue RHS1, RHS2;
03624     expandf64Toi32(LHS, DAG, LHS1, LHS2);
03625     expandf64Toi32(RHS, DAG, RHS1, RHS2);
03626     LHS2 = DAG.getNode(ISD::AND, dl, MVT::i32, LHS2, Mask);
03627     RHS2 = DAG.getNode(ISD::AND, dl, MVT::i32, RHS2, Mask);
03628     ARMCC::CondCodes CondCode = IntCCToARMCC(CC);
03629     ARMcc = DAG.getConstant(CondCode, MVT::i32);
03630     SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Glue);
03631     SDValue Ops[] = { Chain, ARMcc, LHS1, LHS2, RHS1, RHS2, Dest };
03632     return DAG.getNode(ARMISD::BCC_i64, dl, VTList, Ops);
03633   }
03634 
03635   return SDValue();
03636 }
03637 
03638 SDValue ARMTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
03639   SDValue Chain = Op.getOperand(0);
03640   ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
03641   SDValue LHS = Op.getOperand(2);
03642   SDValue RHS = Op.getOperand(3);
03643   SDValue Dest = Op.getOperand(4);
03644   SDLoc dl(Op);
03645 
03646   if (LHS.getValueType() == MVT::i32) {
03647     SDValue ARMcc;
03648     SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
03649     SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
03650     return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other,
03651                        Chain, Dest, ARMcc, CCR, Cmp);
03652   }
03653 
03654   assert(LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64);
03655 
03656   if (getTargetMachine().Options.UnsafeFPMath &&
03657       (CC == ISD::SETEQ || CC == ISD::SETOEQ ||
03658        CC == ISD::SETNE || CC == ISD::SETUNE)) {
03659     SDValue Result = OptimizeVFPBrcond(Op, DAG);
03660     if (Result.getNode())
03661       return Result;
03662   }
03663 
03664   ARMCC::CondCodes CondCode, CondCode2;
03665   FPCCToARMCC(CC, CondCode, CondCode2);
03666 
03667   SDValue ARMcc = DAG.getConstant(CondCode, MVT::i32);
03668   SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl);
03669   SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
03670   SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Glue);
03671   SDValue Ops[] = { Chain, Dest, ARMcc, CCR, Cmp };
03672   SDValue Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops);
03673   if (CondCode2 != ARMCC::AL) {
03674     ARMcc = DAG.getConstant(CondCode2, MVT::i32);
03675     SDValue Ops[] = { Res, Dest, ARMcc, CCR, Res.getValue(1) };
03676     Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops);
03677   }
03678   return Res;
03679 }
03680 
03681 SDValue ARMTargetLowering::LowerBR_JT(SDValue Op, SelectionDAG &DAG) const {
03682   SDValue Chain = Op.getOperand(0);
03683   SDValue Table = Op.getOperand(1);
03684   SDValue Index = Op.getOperand(2);
03685   SDLoc dl(Op);
03686 
03687   EVT PTy = getPointerTy();
03688   JumpTableSDNode *JT = cast<JumpTableSDNode>(Table);
03689   ARMFunctionInfo *AFI = DAG.getMachineFunction().getInfo<ARMFunctionInfo>();
03690   SDValue UId = DAG.getConstant(AFI->createJumpTableUId(), PTy);
03691   SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PTy);
03692   Table = DAG.getNode(ARMISD::WrapperJT, dl, MVT::i32, JTI, UId);
03693   Index = DAG.getNode(ISD::MUL, dl, PTy, Index, DAG.getConstant(4, PTy));
03694   SDValue Addr = DAG.getNode(ISD::ADD, dl, PTy, Index, Table);
03695   if (Subtarget->isThumb2()) {
03696     // Thumb2 uses a two-level jump. That is, it jumps into the jump table
03697     // which does another jump to the destination. This also makes it easier
03698     // to translate it to TBB / TBH later.
03699     // FIXME: This might not work if the function is extremely large.
03700     return DAG.getNode(ARMISD::BR2_JT, dl, MVT::Other, Chain,
03701                        Addr, Op.getOperand(2), JTI, UId);
03702   }
03703   if (getTargetMachine().getRelocationModel() == Reloc::PIC_) {
03704     Addr = DAG.getLoad((EVT)MVT::i32, dl, Chain, Addr,
03705                        MachinePointerInfo::getJumpTable(),
03706                        false, false, false, 0);
03707     Chain = Addr.getValue(1);
03708     Addr = DAG.getNode(ISD::ADD, dl, PTy, Addr, Table);
03709     return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI, UId);
03710   } else {
03711     Addr = DAG.getLoad(PTy, dl, Chain, Addr,
03712                        MachinePointerInfo::getJumpTable(),
03713                        false, false, false, 0);
03714     Chain = Addr.getValue(1);
03715     return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI, UId);
03716   }
03717 }
03718 
03719 static SDValue LowerVectorFP_TO_INT(SDValue Op, SelectionDAG &DAG) {
03720   EVT VT = Op.getValueType();
03721   SDLoc dl(Op);
03722 
03723   if (Op.getValueType().getVectorElementType() == MVT::i32) {
03724     if (Op.getOperand(0).getValueType().getVectorElementType() == MVT::f32)
03725       return Op;
03726     return DAG.UnrollVectorOp(Op.getNode());
03727   }
03728 
03729   assert(Op.getOperand(0).getValueType() == MVT::v4f32 &&
03730          "Invalid type for custom lowering!");
03731   if (VT != MVT::v4i16)
03732     return DAG.UnrollVectorOp(Op.getNode());
03733 
03734   Op = DAG.getNode(Op.getOpcode(), dl, MVT::v4i32, Op.getOperand(0));
03735   return DAG.getNode(ISD::TRUNCATE, dl, VT, Op);
03736 }
03737 
03738 static SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) {
03739   EVT VT = Op.getValueType();
03740   if (VT.isVector())
03741     return LowerVectorFP_TO_INT(Op, DAG);
03742 
03743   SDLoc dl(Op);
03744   unsigned Opc;
03745 
03746   switch (Op.getOpcode()) {
03747   default: llvm_unreachable("Invalid opcode!");
03748   case ISD::FP_TO_SINT:
03749     Opc = ARMISD::FTOSI;
03750     break;
03751   case ISD::FP_TO_UINT:
03752     Opc = ARMISD::FTOUI;
03753     break;
03754   }
03755   Op = DAG.getNode(Opc, dl, MVT::f32, Op.getOperand(0));
03756   return DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op);
03757 }
03758 
03759 static SDValue LowerVectorINT_TO_FP(SDValue Op, SelectionDAG &DAG) {
03760   EVT VT = Op.getValueType();
03761   SDLoc dl(Op);
03762 
03763   if (Op.getOperand(0).getValueType().getVectorElementType() == MVT::i32) {
03764     if (VT.getVectorElementType() == MVT::f32)
03765       return Op;
03766     return DAG.UnrollVectorOp(Op.getNode());
03767   }
03768 
03769   assert(Op.getOperand(0).getValueType() == MVT::v4i16 &&
03770          "Invalid type for custom lowering!");
03771   if (VT != MVT::v4f32)
03772     return DAG.UnrollVectorOp(Op.getNode());
03773 
03774   unsigned CastOpc;
03775   unsigned Opc;
03776   switch (Op.getOpcode()) {
03777   default: llvm_unreachable("Invalid opcode!");
03778   case ISD::SINT_TO_FP:
03779     CastOpc = ISD::SIGN_EXTEND;
03780     Opc = ISD::SINT_TO_FP;
03781     break;
03782   case ISD::UINT_TO_FP:
03783     CastOpc = ISD::ZERO_EXTEND;
03784     Opc = ISD::UINT_TO_FP;
03785     break;
03786   }
03787 
03788   Op = DAG.getNode(CastOpc, dl, MVT::v4i32, Op.getOperand(0));
03789   return DAG.getNode(Opc, dl, VT, Op);
03790 }
03791 
03792 static SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) {
03793   EVT VT = Op.getValueType();
03794   if (VT.isVector())
03795     return LowerVectorINT_TO_FP(Op, DAG);
03796 
03797   SDLoc dl(Op);
03798   unsigned Opc;
03799 
03800   switch (Op.getOpcode()) {
03801   default: llvm_unreachable("Invalid opcode!");
03802   case ISD::SINT_TO_FP:
03803     Opc = ARMISD::SITOF;
03804     break;
03805   case ISD::UINT_TO_FP:
03806     Opc = ARMISD::UITOF;
03807     break;
03808   }
03809 
03810   Op = DAG.getNode(ISD::BITCAST, dl, MVT::f32, Op.getOperand(0));
03811   return DAG.getNode(Opc, dl, VT, Op);
03812 }
03813 
03814 SDValue ARMTargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const {
03815   // Implement fcopysign with a fabs and a conditional fneg.
03816   SDValue Tmp0 = Op.getOperand(0);
03817   SDValue Tmp1 = Op.getOperand(1);
03818   SDLoc dl(Op);
03819   EVT VT = Op.getValueType();
03820   EVT SrcVT = Tmp1.getValueType();
03821   bool InGPR = Tmp0.getOpcode() == ISD::BITCAST ||
03822     Tmp0.getOpcode() == ARMISD::VMOVDRR;
03823   bool UseNEON = !InGPR && Subtarget->hasNEON();
03824 
03825   if (UseNEON) {
03826     // Use VBSL to copy the sign bit.
03827     unsigned EncodedVal = ARM_AM::createNEONModImm(0x6, 0x80);
03828     SDValue Mask = DAG.getNode(ARMISD::VMOVIMM, dl, MVT::v2i32,
03829                                DAG.getTargetConstant(EncodedVal, MVT::i32));
03830     EVT OpVT = (VT == MVT::f32) ? MVT::v2i32 : MVT::v1i64;
03831     if (VT == MVT::f64)
03832       Mask = DAG.getNode(ARMISD::VSHL, dl, OpVT,
03833                          DAG.getNode(ISD::BITCAST, dl, OpVT, Mask),
03834                          DAG.getConstant(32, MVT::i32));
03835     else /*if (VT == MVT::f32)*/
03836       Tmp0 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2f32, Tmp0);
03837     if (SrcVT == MVT::f32) {
03838       Tmp1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2f32, Tmp1);
03839       if (VT == MVT::f64)
03840         Tmp1 = DAG.getNode(ARMISD::VSHL, dl, OpVT,
03841                            DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp1),
03842                            DAG.getConstant(32, MVT::i32));
03843     } else if (VT == MVT::f32)
03844       Tmp1 = DAG.getNode(ARMISD::VSHRu, dl, MVT::v1i64,
03845                          DAG.getNode(ISD::BITCAST, dl, MVT::v1i64, Tmp1),
03846                          DAG.getConstant(32, MVT::i32));
03847     Tmp0 = DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp0);
03848     Tmp1 = DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp1);
03849 
03850     SDValue AllOnes = DAG.getTargetConstant(ARM_AM::createNEONModImm(0xe, 0xff),
03851                                             MVT::i32);
03852     AllOnes = DAG.getNode(ARMISD::VMOVIMM, dl, MVT::v8i8, AllOnes);
03853     SDValue MaskNot = DAG.getNode(ISD::XOR, dl, OpVT, Mask,
03854                                   DAG.getNode(ISD::BITCAST, dl, OpVT, AllOnes));
03855 
03856     SDValue Res = DAG.getNode(ISD::OR, dl, OpVT,
03857                               DAG.getNode(ISD::AND, dl, OpVT, Tmp1, Mask),
03858                               DAG.getNode(ISD::AND, dl, OpVT, Tmp0, MaskNot));
03859     if (VT == MVT::f32) {
03860       Res = DAG.getNode(ISD::BITCAST, dl, MVT::v2f32, Res);
03861       Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f32, Res,
03862                         DAG.getConstant(0, MVT::i32));
03863     } else {
03864       Res = DAG.getNode(ISD::BITCAST, dl, MVT::f64, Res);
03865     }
03866 
03867     return Res;
03868   }
03869 
03870   // Bitcast operand 1 to i32.
03871   if (SrcVT == MVT::f64)
03872     Tmp1 = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32),
03873                        Tmp1).getValue(1);
03874   Tmp1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Tmp1);
03875 
03876   // Or in the signbit with integer operations.
03877   SDValue Mask1 = DAG.getConstant(0x80000000, MVT::i32);
03878   SDValue Mask2 = DAG.getConstant(0x7fffffff, MVT::i32);
03879   Tmp1 = DAG.getNode(ISD::AND, dl, MVT::i32, Tmp1, Mask1);
03880   if (VT == MVT::f32) {
03881     Tmp0 = DAG.getNode(ISD::AND, dl, MVT::i32,
03882                        DAG.getNode(ISD::BITCAST, dl, MVT::i32, Tmp0), Mask2);
03883     return DAG.getNode(ISD::BITCAST, dl, MVT::f32,
03884                        DAG.getNode(ISD::OR, dl, MVT::i32, Tmp0, Tmp1));
03885   }
03886 
03887   // f64: Or the high part with signbit and then combine two parts.
03888   Tmp0 = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32),
03889                      Tmp0);
03890   SDValue Lo = Tmp0.getValue(0);
03891   SDValue Hi = DAG.getNode(ISD::AND, dl, MVT::i32, Tmp0.getValue(1), Mask2);
03892   Hi = DAG.getNode(ISD::OR, dl, MVT::i32, Hi, Tmp1);
03893   return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);
03894 }
03895 
03896 SDValue ARMTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const{
03897   MachineFunction &MF = DAG.getMachineFunction();
03898   MachineFrameInfo *MFI = MF.getFrameInfo();
03899   MFI->setReturnAddressIsTaken(true);
03900 
03901   if (verifyReturnAddressArgumentIsConstant(Op, DAG))
03902     return SDValue();
03903 
03904   EVT VT = Op.getValueType();
03905   SDLoc dl(Op);
03906   unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
03907   if (Depth) {
03908     SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
03909     SDValue Offset = DAG.getConstant(4, MVT::i32);
03910     return DAG.getLoad(VT, dl, DAG.getEntryNode(),
03911                        DAG.getNode(ISD::ADD, dl, VT, FrameAddr, Offset),
03912                        MachinePointerInfo(), false, false, false, 0);
03913   }
03914 
03915   // Return LR, which contains the return address. Mark it an implicit live-in.
03916   unsigned Reg = MF.addLiveIn(ARM::LR, getRegClassFor(MVT::i32));
03917   return DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg, VT);
03918 }
03919 
03920 SDValue ARMTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const {
03921   const ARMBaseRegisterInfo &ARI =
03922     *static_cast<const ARMBaseRegisterInfo*>(RegInfo);
03923   MachineFunction &MF = DAG.getMachineFunction();
03924   MachineFrameInfo *MFI = MF.getFrameInfo();
03925   MFI->setFrameAddressIsTaken(true);
03926 
03927   EVT VT = Op.getValueType();
03928   SDLoc dl(Op);  // FIXME probably not meaningful
03929   unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
03930   unsigned FrameReg = ARI.getFrameRegister(MF);
03931   SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, VT);
03932   while (Depth--)
03933     FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr,
03934                             MachinePointerInfo(),
03935                             false, false, false, 0);
03936   return FrameAddr;
03937 }
03938 
03939 // FIXME? Maybe this could be a TableGen attribute on some registers and
03940 // this table could be generated automatically from RegInfo.
03941 unsigned ARMTargetLowering::getRegisterByName(const char* RegName,
03942                                               EVT VT) const {
03943   unsigned Reg = StringSwitch<unsigned>(RegName)
03944                        .Case("sp", ARM::SP)
03945                        .Default(0);
03946   if (Reg)
03947     return Reg;
03948   report_fatal_error("Invalid register name global variable");
03949 }
03950 
03951 /// ExpandBITCAST - If the target supports VFP, this function is called to
03952 /// expand a bit convert where either the source or destination type is i64 to
03953 /// use a VMOVDRR or VMOVRRD node.  This should not be done when the non-i64
03954 /// operand type is illegal (e.g., v2f32 for a target that doesn't support
03955 /// vectors), since the legalizer won't know what to do with that.
03956 static SDValue ExpandBITCAST(SDNode *N, SelectionDAG &DAG) {
03957   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
03958   SDLoc dl(N);
03959   SDValue Op = N->getOperand(0);
03960 
03961   // This function is only supposed to be called for i64 types, either as the
03962   // source or destination of the bit convert.
03963   EVT SrcVT = Op.getValueType();
03964   EVT DstVT = N->getValueType(0);
03965   assert((SrcVT == MVT::i64 || DstVT == MVT::i64) &&
03966          "ExpandBITCAST called for non-i64 type");
03967 
03968   // Turn i64->f64 into VMOVDRR.
03969   if (SrcVT == MVT::i64 && TLI.isTypeLegal(DstVT)) {
03970     SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op,
03971                              DAG.getConstant(0, MVT::i32));
03972     SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op,
03973                              DAG.getConstant(1, MVT::i32));
03974     return DAG.getNode(ISD::BITCAST, dl, DstVT,
03975                        DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi));
03976   }
03977 
03978   // Turn f64->i64 into VMOVRRD.
03979   if (DstVT == MVT::i64 && TLI.isTypeLegal(SrcVT)) {
03980     SDValue Cvt;
03981     if (TLI.isBigEndian() && SrcVT.isVector() &&
03982         SrcVT.getVectorNumElements() > 1)
03983       Cvt = DAG.getNode(ARMISD::VMOVRRD, dl,
03984                         DAG.getVTList(MVT::i32, MVT::i32),
03985                         DAG.getNode(ARMISD::VREV64, dl, SrcVT, Op));
03986     else
03987       Cvt = DAG.getNode(ARMISD::VMOVRRD, dl,
03988                         DAG.getVTList(MVT::i32, MVT::i32), Op);
03989     // Merge the pieces into a single i64 value.
03990     return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Cvt, Cvt.getValue(1));
03991   }
03992 
03993   return SDValue();
03994 }
03995 
03996 /// getZeroVector - Returns a vector of specified type with all zero elements.
03997 /// Zero vectors are used to represent vector negation and in those cases
03998 /// will be implemented with the NEON VNEG instruction.  However, VNEG does
03999 /// not support i64 elements, so sometimes the zero vectors will need to be
04000 /// explicitly constructed.  Regardless, use a canonical VMOV to create the
04001 /// zero vector.
04002 static SDValue getZeroVector(EVT VT, SelectionDAG &DAG, SDLoc dl) {
04003   assert(VT.isVector() && "Expected a vector type");
04004   // The canonical modified immediate encoding of a zero vector is....0!
04005   SDValue EncodedVal = DAG.getTargetConstant(0, MVT::i32);
04006   EVT VmovVT = VT.is128BitVector() ? MVT::v4i32 : MVT::v2i32;
04007   SDValue Vmov = DAG.getNode(ARMISD::VMOVIMM, dl, VmovVT, EncodedVal);
04008   return DAG.getNode(ISD::BITCAST, dl, VT, Vmov);
04009 }
04010 
04011 /// LowerShiftRightParts - Lower SRA_PARTS, which returns two
04012 /// i32 values and take a 2 x i32 value to shift plus a shift amount.
04013 SDValue ARMTargetLowering::LowerShiftRightParts(SDValue Op,
04014                                                 SelectionDAG &DAG) const {
04015   assert(Op.getNumOperands() == 3 && "Not a double-shift!");
04016   EVT VT = Op.getValueType();
04017   unsigned VTBits = VT.getSizeInBits();
04018   SDLoc dl(Op);
04019   SDValue ShOpLo = Op.getOperand(0);
04020   SDValue ShOpHi = Op.getOperand(1);
04021   SDValue ShAmt  = Op.getOperand(2);
04022   SDValue ARMcc;
04023   unsigned Opc = (Op.getOpcode() == ISD::SRA_PARTS) ? ISD::SRA : ISD::SRL;
04024 
04025   assert(Op.getOpcode() == ISD::SRA_PARTS || Op.getOpcode() == ISD::SRL_PARTS);
04026 
04027   SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32,
04028                                  DAG.getConstant(VTBits, MVT::i32), ShAmt);
04029   SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, ShAmt);
04030   SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt,
04031                                    DAG.getConstant(VTBits, MVT::i32));
04032   SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, RevShAmt);
04033   SDValue FalseVal = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2);
04034   SDValue TrueVal = DAG.getNode(Opc, dl, VT, ShOpHi, ExtraShAmt);
04035 
04036   SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
04037   SDValue Cmp = getARMCmp(ExtraShAmt, DAG.getConstant(0, MVT::i32), ISD::SETGE,
04038                           ARMcc, DAG, dl);
04039   SDValue Hi = DAG.getNode(Opc, dl, VT, ShOpHi, ShAmt);
04040   SDValue Lo = DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMcc,
04041                            CCR, Cmp);
04042 
04043   SDValue Ops[2] = { Lo, Hi };
04044   return DAG.getMergeValues(Ops, dl);
04045 }
04046 
04047 /// LowerShiftLeftParts - Lower SHL_PARTS, which returns two
04048 /// i32 values and take a 2 x i32 value to shift plus a shift amount.
04049 SDValue ARMTargetLowering::LowerShiftLeftParts(SDValue Op,
04050                                                SelectionDAG &DAG) const {
04051   assert(Op.getNumOperands() == 3 && "Not a double-shift!");
04052   EVT VT = Op.getValueType();
04053   unsigned VTBits = VT.getSizeInBits();
04054   SDLoc dl(Op);
04055   SDValue ShOpLo = Op.getOperand(0);
04056   SDValue ShOpHi = Op.getOperand(1);
04057   SDValue ShAmt  = Op.getOperand(2);
04058   SDValue ARMcc;
04059 
04060   assert(Op.getOpcode() == ISD::SHL_PARTS);
04061   SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32,
04062                                  DAG.getConstant(VTBits, MVT::i32), ShAmt);
04063   SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, RevShAmt);
04064   SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt,
04065                                    DAG.getConstant(VTBits, MVT::i32));
04066   SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, ShAmt);
04067   SDValue Tmp3 = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ExtraShAmt);
04068 
04069   SDValue FalseVal = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2);
04070   SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
04071   SDValue Cmp = getARMCmp(ExtraShAmt, DAG.getConstant(0, MVT::i32), ISD::SETGE,
04072                           ARMcc, DAG, dl);
04073   SDValue Lo = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ShAmt);
04074   SDValue Hi = DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, Tmp3, ARMcc,
04075                            CCR, Cmp);
04076 
04077   SDValue Ops[2] = { Lo, Hi };
04078   return DAG.getMergeValues(Ops, dl);
04079 }
04080 
04081 SDValue ARMTargetLowering::LowerFLT_ROUNDS_(SDValue Op,
04082                                             SelectionDAG &DAG) const {
04083   // The rounding mode is in bits 23:22 of the FPSCR.
04084   // The ARM rounding mode value to FLT_ROUNDS mapping is 0->1, 1->2, 2->3, 3->0
04085   // The formula we use to implement this is (((FPSCR + 1 << 22) >> 22) & 3)
04086   // so that the shift + and get folded into a bitfield extract.
04087   SDLoc dl(Op);
04088   SDValue FPSCR = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::i32,
04089                               DAG.getConstant(Intrinsic::arm_get_fpscr,
04090                                               MVT::i32));
04091   SDValue FltRounds = DAG.getNode(ISD::ADD, dl, MVT::i32, FPSCR,
04092                                   DAG.getConstant(1U << 22, MVT::i32));
04093   SDValue RMODE = DAG.getNode(ISD::SRL, dl, MVT::i32, FltRounds,
04094                               DAG.getConstant(22, MVT::i32));
04095   return DAG.getNode(ISD::AND, dl, MVT::i32, RMODE,
04096                      DAG.getConstant(3, MVT::i32));
04097 }
04098 
04099 static SDValue LowerCTTZ(SDNode *N, SelectionDAG &DAG,
04100                          const ARMSubtarget *ST) {
04101   EVT VT = N->getValueType(0);
04102   SDLoc dl(N);
04103 
04104   if (!ST->hasV6T2Ops())
04105     return SDValue();
04106 
04107   SDValue rbit = DAG.getNode(ARMISD::RBIT, dl, VT, N->getOperand(0));
04108   return DAG.getNode(ISD::CTLZ, dl, VT, rbit);
04109 }
04110 
04111 /// getCTPOP16BitCounts - Returns a v8i8/v16i8 vector containing the bit-count
04112 /// for each 16-bit element from operand, repeated.  The basic idea is to
04113 /// leverage vcnt to get the 8-bit counts, gather and add the results.
04114 ///
04115 /// Trace for v4i16:
04116 /// input    = [v0    v1    v2    v3   ] (vi 16-bit element)
04117 /// cast: N0 = [w0 w1 w2 w3 w4 w5 w6 w7] (v0 = [w0 w1], wi 8-bit element)
04118 /// vcnt: N1 = [b0 b1 b2 b3 b4 b5 b6 b7] (bi = bit-count of 8-bit element wi)
04119 /// vrev: N2 = [b1 b0 b3 b2 b5 b4 b7 b6]
04120 ///            [b0 b1 b2 b3 b4 b5 b6 b7]
04121 ///           +[b1 b0 b3 b2 b5 b4 b7 b6]
04122 /// N3=N1+N2 = [k0 k0 k1 k1 k2 k2 k3 k3] (k0 = b0+b1 = bit-count of 16-bit v0,
04123 /// vuzp:    = [k0 k1 k2 k3 k0 k1 k2 k3]  each ki is 8-bits)
04124 static SDValue getCTPOP16BitCounts(SDNode *N, SelectionDAG &DAG) {
04125   EVT VT = N->getValueType(0);
04126   SDLoc DL(N);
04127 
04128   EVT VT8Bit = VT.is64BitVector() ? MVT::v8i8 : MVT::v16i8;
04129   SDValue N0 = DAG.getNode(ISD::BITCAST, DL, VT8Bit, N->getOperand(0));
04130   SDValue N1 = DAG.getNode(ISD::CTPOP, DL, VT8Bit, N0);
04131   SDValue N2 = DAG.getNode(ARMISD::VREV16, DL, VT8Bit, N1);
04132   SDValue N3 = DAG.getNode(ISD::ADD, DL, VT8Bit, N1, N2);
04133   return DAG.getNode(ARMISD::VUZP, DL, VT8Bit, N3, N3);
04134 }
04135 
04136 /// lowerCTPOP16BitElements - Returns a v4i16/v8i16 vector containing the
04137 /// bit-count for each 16-bit element from the operand.  We need slightly
04138 /// different sequencing for v4i16 and v8i16 to stay within NEON's available
04139 /// 64/128-bit registers.
04140 ///
04141 /// Trace for v4i16:
04142 /// input           = [v0    v1    v2    v3    ] (vi 16-bit element)
04143 /// v8i8: BitCounts = [k0 k1 k2 k3 k0 k1 k2 k3 ] (ki is the bit-count of vi)
04144 /// v8i16:Extended  = [k0    k1    k2    k3    k0    k1    k2    k3    ]
04145 /// v4i16:Extracted = [k0    k1    k2    k3    ]
04146 static SDValue lowerCTPOP16BitElements(SDNode *N, SelectionDAG &DAG) {
04147   EVT VT = N->getValueType(0);
04148   SDLoc DL(N);
04149 
04150   SDValue BitCounts = getCTPOP16BitCounts(N, DAG);
04151   if (VT.is64BitVector()) {
04152     SDValue Extended = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::v8i16, BitCounts);
04153     return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v4i16, Extended,
04154                        DAG.getIntPtrConstant(0));
04155   } else {
04156     SDValue Extracted = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v8i8,
04157                                     BitCounts, DAG.getIntPtrConstant(0));
04158     return DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::v8i16, Extracted);
04159   }
04160 }
04161 
04162 /// lowerCTPOP32BitElements - Returns a v2i32/v4i32 vector containing the
04163 /// bit-count for each 32-bit element from the operand.  The idea here is
04164 /// to split the vector into 16-bit elements, leverage the 16-bit count
04165 /// routine, and then combine the results.
04166 ///
04167 /// Trace for v2i32 (v4i32 similar with Extracted/Extended exchanged):
04168 /// input    = [v0    v1    ] (vi: 32-bit elements)
04169 /// Bitcast  = [w0 w1 w2 w3 ] (wi: 16-bit elements, v0 = [w0 w1])
04170 /// Counts16 = [k0 k1 k2 k3 ] (ki: 16-bit elements, bit-count of wi)
04171 /// vrev: N0 = [k1 k0 k3 k2 ]
04172 ///            [k0 k1 k2 k3 ]
04173 ///       N1 =+[k1 k0 k3 k2 ]
04174 ///            [k0 k2 k1 k3 ]
04175 ///       N2 =+[k1 k3 k0 k2 ]
04176 ///            [k0    k2    k1    k3    ]
04177 /// Extended =+[k1    k3    k0    k2    ]
04178 ///            [k0    k2    ]
04179 /// Extracted=+[k1    k3    ]
04180 ///
04181 static SDValue lowerCTPOP32BitElements(SDNode *N, SelectionDAG &DAG) {
04182   EVT VT = N->getValueType(0);
04183   SDLoc DL(N);
04184 
04185   EVT VT16Bit = VT.is64BitVector() ? MVT::v4i16 : MVT::v8i16;
04186 
04187   SDValue Bitcast = DAG.getNode(ISD::BITCAST, DL, VT16Bit, N->getOperand(0));
04188   SDValue Counts16 = lowerCTPOP16BitElements(Bitcast.getNode(), DAG);
04189   SDValue N0 = DAG.getNode(ARMISD::VREV32, DL, VT16Bit, Counts16);
04190   SDValue N1 = DAG.getNode(ISD::ADD, DL, VT16Bit, Counts16, N0);
04191   SDValue N2 = DAG.getNode(ARMISD::VUZP, DL, VT16Bit, N1, N1);
04192 
04193   if (VT.is64BitVector()) {
04194     SDValue Extended = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::v4i32, N2);
04195     return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v2i32, Extended,
04196                        DAG.getIntPtrConstant(0));
04197   } else {
04198     SDValue Extracted = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v4i16, N2,
04199                                     DAG.getIntPtrConstant(0));
04200     return DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::v4i32, Extracted);
04201   }
04202 }
04203 
04204 static SDValue LowerCTPOP(SDNode *N, SelectionDAG &DAG,
04205                           const ARMSubtarget *ST) {
04206   EVT VT = N->getValueType(0);
04207 
04208   assert(ST->hasNEON() && "Custom ctpop lowering requires NEON.");
04209   assert((VT == MVT::v2i32 || VT == MVT::v4i32 ||
04210           VT == MVT::v4i16 || VT == MVT::v8i16) &&
04211          "Unexpected type for custom ctpop lowering");
04212 
04213   if (VT.getVectorElementType() == MVT::i32)
04214     return lowerCTPOP32BitElements(N, DAG);
04215   else
04216     return lowerCTPOP16BitElements(N, DAG);
04217 }
04218 
04219 static SDValue LowerShift(SDNode *N, SelectionDAG &DAG,
04220                           const ARMSubtarget *ST) {
04221   EVT VT = N->getValueType(0);
04222   SDLoc dl(N);
04223 
04224   if (!VT.isVector())
04225     return SDValue();
04226 
04227   // Lower vector shifts on NEON to use VSHL.
04228   assert(ST->hasNEON() && "unexpected vector shift");
04229 
04230   // Left shifts translate directly to the vshiftu intrinsic.
04231   if (N->getOpcode() == ISD::SHL)
04232     return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
04233                        DAG.getConstant(Intrinsic::arm_neon_vshiftu, MVT::i32),
04234                        N->getOperand(0), N->getOperand(1));
04235 
04236   assert((N->getOpcode() == ISD::SRA ||
04237           N->getOpcode() == ISD::SRL) && "unexpected vector shift opcode");
04238 
04239   // NEON uses the same intrinsics for both left and right shifts.  For
04240   // right shifts, the shift amounts are negative, so negate the vector of
04241   // shift amounts.
04242   EVT ShiftVT = N->getOperand(1).getValueType();
04243   SDValue NegatedCount = DAG.getNode(ISD::SUB, dl, ShiftVT,
04244                                      getZeroVector(ShiftVT, DAG, dl),
04245                                      N->getOperand(1));
04246   Intrinsic::ID vshiftInt = (N->getOpcode() == ISD::SRA ?
04247                              Intrinsic::arm_neon_vshifts :
04248                              Intrinsic::arm_neon_vshiftu);
04249   return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
04250                      DAG.getConstant(vshiftInt, MVT::i32),
04251                      N->getOperand(0), NegatedCount);
04252 }
04253 
04254 static SDValue Expand64BitShift(SDNode *N, SelectionDAG &DAG,
04255                                 const ARMSubtarget *ST) {
04256   EVT VT = N->getValueType(0);
04257   SDLoc dl(N);
04258 
04259   // We can get here for a node like i32 = ISD::SHL i32, i64
04260   if (VT != MVT::i64)
04261     return SDValue();
04262 
04263   assert((N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) &&
04264          "Unknown shift to lower!");
04265 
04266   // We only lower SRA, SRL of 1 here, all others use generic lowering.
04267   if (!isa<ConstantSDNode>(N->getOperand(1)) ||
04268       cast<ConstantSDNode>(N->getOperand(1))->getZExtValue() != 1)
04269     return SDValue();
04270 
04271   // If we are in thumb mode, we don't have RRX.
04272   if (ST->isThumb1Only()) return SDValue();
04273 
04274   // Okay, we have a 64-bit SRA or SRL of 1.  Lower this to an RRX expr.
04275   SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, N->getOperand(0),
04276                            DAG.getConstant(0, MVT::i32));
04277   SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, N->getOperand(0),
04278                            DAG.getConstant(1, MVT::i32));
04279 
04280   // First, build a SRA_FLAG/SRL_FLAG op, which shifts the top part by one and
04281   // captures the result into a carry flag.
04282   unsigned Opc = N->getOpcode() == ISD::SRL ? ARMISD::SRL_FLAG:ARMISD::SRA_FLAG;
04283   Hi = DAG.getNode(Opc, dl, DAG.getVTList(MVT::i32, MVT::Glue), Hi);
04284 
04285   // The low part is an ARMISD::RRX operand, which shifts the carry in.
04286   Lo = DAG.getNode(ARMISD::RRX, dl, MVT::i32, Lo, Hi.getValue(1));
04287 
04288   // Merge the pieces into a single i64 value.
04289  return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
04290 }
04291 
04292 static SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) {
04293   SDValue TmpOp0, TmpOp1;
04294   bool Invert = false;
04295   bool Swap = false;
04296   unsigned Opc = 0;
04297 
04298   SDValue Op0 = Op.getOperand(0);
04299   SDValue Op1 = Op.getOperand(1);
04300   SDValue CC = Op.getOperand(2);
04301   EVT VT = Op.getValueType();
04302   ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get();
04303   SDLoc dl(Op);
04304 
04305   if (Op.getOperand(1).getValueType().isFloatingPoint()) {
04306     switch (SetCCOpcode) {
04307     default: llvm_unreachable("Illegal FP comparison");
04308     case ISD::SETUNE:
04309     case ISD::SETNE:  Invert = true; // Fallthrough
04310     case ISD::SETOEQ:
04311     case ISD::SETEQ:  Opc = ARMISD::VCEQ; break;
04312     case ISD::SETOLT:
04313     case ISD::SETLT: Swap = true; // Fallthrough
04314     case ISD::SETOGT:
04315     case ISD::SETGT:  Opc = ARMISD::VCGT; break;
04316     case ISD::SETOLE:
04317     case ISD::SETLE:  Swap = true; // Fallthrough
04318     case ISD::SETOGE:
04319     case ISD::SETGE: Opc = ARMISD::VCGE; break;
04320     case ISD::SETUGE: Swap = true; // Fallthrough
04321     case ISD::SETULE: Invert = true; Opc = ARMISD::VCGT; break;
04322     case ISD::SETUGT: Swap = true; // Fallthrough
04323     case ISD::SETULT: Invert = true; Opc = ARMISD::VCGE; break;
04324     case ISD::SETUEQ: Invert = true; // Fallthrough
04325     case ISD::SETONE:
04326       // Expand this to (OLT | OGT).
04327       TmpOp0 = Op0;
04328       TmpOp1 = Op1;
04329       Opc = ISD::OR;
04330       Op0 = DAG.getNode(ARMISD::VCGT, dl, VT, TmpOp1, TmpOp0);
04331       Op1 = DAG.getNode(ARMISD::VCGT, dl, VT, TmpOp0, TmpOp1);
04332       break;
04333     case ISD::SETUO: Invert = true; // Fallthrough
04334     case ISD::SETO:
04335       // Expand this to (OLT | OGE).
04336       TmpOp0 = Op0;
04337       TmpOp1 = Op1;
04338       Opc = ISD::OR;
04339       Op0 = DAG.getNode(ARMISD::VCGT, dl, VT, TmpOp1, TmpOp0);
04340       Op1 = DAG.getNode(ARMISD::VCGE, dl, VT, TmpOp0, TmpOp1);
04341       break;
04342     }
04343   } else {
04344     // Integer comparisons.
04345     switch (SetCCOpcode) {
04346     default: llvm_unreachable("Illegal integer comparison");
04347     case ISD::SETNE:  Invert = true;
04348     case ISD::SETEQ:  Opc = ARMISD::VCEQ; break;
04349     case ISD::SETLT:  Swap = true;
04350     case ISD::SETGT:  Opc = ARMISD::VCGT; break;
04351     case ISD::SETLE:  Swap = true;
04352     case ISD::SETGE:  Opc = ARMISD::VCGE; break;
04353     case ISD::SETULT: Swap = true;
04354     case ISD::SETUGT: Opc = ARMISD::VCGTU; break;
04355     case ISD::SETULE: Swap = true;
04356     case ISD::SETUGE: Opc = ARMISD::VCGEU; break;
04357     }
04358 
04359     // Detect VTST (Vector Test Bits) = icmp ne (and (op0, op1), zero).
04360     if (Opc == ARMISD::VCEQ) {
04361 
04362       SDValue AndOp;
04363       if (ISD::isBuildVectorAllZeros(Op1.getNode()))
04364         AndOp = Op0;
04365       else if (ISD::isBuildVectorAllZeros(Op0.getNode()))
04366         AndOp = Op1;
04367 
04368       // Ignore bitconvert.
04369       if (AndOp.getNode() && AndOp.getOpcode() == ISD::BITCAST)
04370         AndOp = AndOp.getOperand(0);
04371 
04372       if (AndOp.getNode() && AndOp.getOpcode() == ISD::AND) {
04373         Opc = ARMISD::VTST;
04374         Op0 = DAG.getNode(ISD::BITCAST, dl, VT, AndOp.getOperand(0));
04375         Op1 = DAG.getNode(ISD::BITCAST, dl, VT, AndOp.getOperand(1));
04376         Invert = !Invert;
04377       }
04378     }
04379   }
04380 
04381   if (Swap)
04382     std::swap(Op0, Op1);
04383 
04384   // If one of the operands is a constant vector zero, attempt to fold the
04385   // comparison to a specialized compare-against-zero form.
04386   SDValue SingleOp;
04387   if (ISD::isBuildVectorAllZeros(Op1.getNode()))
04388     SingleOp = Op0;
04389   else if (ISD::isBuildVectorAllZeros(Op0.getNode())) {
04390     if (Opc == ARMISD::VCGE)
04391       Opc = ARMISD::VCLEZ;
04392     else if (Opc == ARMISD::VCGT)
04393       Opc = ARMISD::VCLTZ;
04394     SingleOp = Op1;
04395   }
04396 
04397   SDValue Result;
04398   if (SingleOp.getNode()) {
04399     switch (Opc) {
04400     case ARMISD::VCEQ:
04401       Result = DAG.getNode(ARMISD::VCEQZ, dl, VT, SingleOp); break;
04402     case ARMISD::VCGE:
04403       Result = DAG.getNode(ARMISD::VCGEZ, dl, VT, SingleOp); break;
04404     case ARMISD::VCLEZ:
04405       Result = DAG.getNode(ARMISD::VCLEZ, dl, VT, SingleOp); break;
04406     case ARMISD::VCGT:
04407       Result = DAG.getNode(ARMISD::VCGTZ, dl, VT, SingleOp); break;
04408     case ARMISD::VCLTZ:
04409       Result = DAG.getNode(ARMISD::VCLTZ, dl, VT, SingleOp); break;
04410     default:
04411       Result = DAG.getNode(Opc, dl, VT, Op0, Op1);
04412     }
04413   } else {
04414      Result = DAG.getNode(Opc, dl, VT, Op0, Op1);
04415   }
04416 
04417   if (Invert)
04418     Result = DAG.getNOT(dl, Result, VT);
04419 
04420   return Result;
04421 }
04422 
04423 /// isNEONModifiedImm - Check if the specified splat value corresponds to a
04424 /// valid vector constant for a NEON instruction with a "modified immediate"
04425 /// operand (e.g., VMOV).  If so, return the encoded value.
04426 static SDValue isNEONModifiedImm(uint64_t SplatBits, uint64_t SplatUndef,
04427                                  unsigned SplatBitSize, SelectionDAG &DAG,
04428                                  EVT &VT, bool is128Bits, NEONModImmType type) {
04429   unsigned OpCmode, Imm;
04430 
04431   // SplatBitSize is set to the smallest size that splats the vector, so a
04432   // zero vector will always have SplatBitSize == 8.  However, NEON modified
04433   // immediate instructions others than VMOV do not support the 8-bit encoding
04434   // of a zero vector, and the default encoding of zero is supposed to be the
04435   // 32-bit version.
04436   if (SplatBits == 0)
04437     SplatBitSize = 32;
04438 
04439   switch (SplatBitSize) {
04440   case 8:
04441     if (type != VMOVModImm)
04442       return SDValue();
04443     // Any 1-byte value is OK.  Op=0, Cmode=1110.
04444     assert((SplatBits & ~0xff) == 0 && "one byte splat value is too big");
04445     OpCmode = 0xe;
04446     Imm = SplatBits;
04447     VT = is128Bits ? MVT::v16i8 : MVT::v8i8;
04448     break;
04449 
04450   case 16:
04451     // NEON's 16-bit VMOV supports splat values where only one byte is nonzero.
04452     VT = is128Bits ? MVT::v8i16 : MVT::v4i16;
04453     if ((SplatBits & ~0xff) == 0) {
04454       // Value = 0x00nn: Op=x, Cmode=100x.
04455       OpCmode = 0x8;
04456       Imm = SplatBits;
04457       break;
04458     }
04459     if ((SplatBits & ~0xff00) == 0) {
04460       // Value = 0xnn00: Op=x, Cmode=101x.
04461       OpCmode = 0xa;
04462       Imm = SplatBits >> 8;
04463       break;
04464     }
04465     return SDValue();
04466 
04467   case 32:
04468     // NEON's 32-bit VMOV supports splat values where:
04469     // * only one byte is nonzero, or
04470     // * the least significant byte is 0xff and the second byte is nonzero, or
04471     // * the least significant 2 bytes are 0xff and the third is nonzero.
04472     VT = is128Bits ? MVT::v4i32 : MVT::v2i32;
04473     if ((SplatBits & ~0xff) == 0) {
04474       // Value = 0x000000nn: Op=x, Cmode=000x.
04475       OpCmode = 0;
04476       Imm = SplatBits;
04477       break;
04478     }
04479     if ((SplatBits & ~0xff00) == 0) {
04480       // Value = 0x0000nn00: Op=x, Cmode=001x.
04481       OpCmode = 0x2;
04482       Imm = SplatBits >> 8;
04483       break;
04484     }
04485     if ((SplatBits & ~0xff0000) == 0) {
04486       // Value = 0x00nn0000: Op=x, Cmode=010x.
04487       OpCmode = 0x4;
04488       Imm = SplatBits >> 16;
04489       break;
04490     }
04491     if ((SplatBits & ~0xff000000) == 0) {
04492       // Value = 0xnn000000: Op=x, Cmode=011x.
04493       OpCmode = 0x6;
04494       Imm = SplatBits >> 24;
04495       break;
04496     }
04497 
04498     // cmode == 0b1100 and cmode == 0b1101 are not supported for VORR or VBIC
04499     if (type == OtherModImm) return SDValue();
04500 
04501     if ((SplatBits & ~0xffff) == 0 &&
04502         ((SplatBits | SplatUndef) & 0xff) == 0xff) {
04503       // Value = 0x0000nnff: Op=x, Cmode=1100.
04504       OpCmode = 0xc;
04505       Imm = SplatBits >> 8;
04506       break;
04507     }
04508 
04509     if ((SplatBits & ~0xffffff) == 0 &&
04510         ((SplatBits | SplatUndef) & 0xffff) == 0xffff) {
04511       // Value = 0x00nnffff: Op=x, Cmode=1101.
04512       OpCmode = 0xd;
04513       Imm = SplatBits >> 16;
04514       break;
04515     }
04516 
04517     // Note: there are a few 32-bit splat values (specifically: 00ffff00,
04518     // ff000000, ff0000ff, and ffff00ff) that are valid for VMOV.I64 but not
04519     // VMOV.I32.  A (very) minor optimization would be to replicate the value
04520     // and fall through here to test for a valid 64-bit splat.  But, then the
04521     // caller would also need to check and handle the change in size.
04522     return SDValue();
04523 
04524   case 64: {
04525     if (type != VMOVModImm)
04526       return SDValue();
04527     // NEON has a 64-bit VMOV splat where each byte is either 0 or 0xff.
04528     uint64_t BitMask = 0xff;
04529     uint64_t Val = 0;
04530     unsigned ImmMask = 1;
04531     Imm = 0;
04532     for (int ByteNum = 0; ByteNum < 8; ++ByteNum) {
04533       if (((SplatBits | SplatUndef) & BitMask) == BitMask) {
04534         Val |= BitMask;
04535         Imm |= ImmMask;
04536       } else if ((SplatBits & BitMask) != 0) {
04537         return SDValue();
04538       }
04539       BitMask <<= 8;
04540       ImmMask <<= 1;
04541     }
04542 
04543     if (DAG.getTargetLoweringInfo().isBigEndian())
04544       // swap higher and lower 32 bit word
04545       Imm = ((Imm & 0xf) << 4) | ((Imm & 0xf0) >> 4);
04546 
04547     // Op=1, Cmode=1110.
04548     OpCmode = 0x1e;
04549     VT = is128Bits ? MVT::v2i64 : MVT::v1i64;
04550     break;
04551   }
04552 
04553   default:
04554     llvm_unreachable("unexpected size for isNEONModifiedImm");
04555   }
04556 
04557   unsigned EncodedVal = ARM_AM::createNEONModImm(OpCmode, Imm);
04558   return DAG.getTargetConstant(EncodedVal, MVT::i32);
04559 }
04560 
04561 SDValue ARMTargetLowering::LowerConstantFP(SDValue Op, SelectionDAG &DAG,
04562                                            const ARMSubtarget *ST) const {
04563   if (!ST->hasVFP3())
04564     return SDValue();
04565 
04566   bool IsDouble = Op.getValueType() == MVT::f64;
04567   ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Op);
04568 
04569   // Try splatting with a VMOV.f32...
04570   APFloat FPVal = CFP->getValueAPF();
04571   int ImmVal = IsDouble ? ARM_AM::getFP64Imm(FPVal) : ARM_AM::getFP32Imm(FPVal);
04572 
04573   if (ImmVal != -1) {
04574     if (IsDouble || !ST->useNEONForSinglePrecisionFP()) {
04575       // We have code in place to select a valid ConstantFP already, no need to
04576       // do any mangling.
04577       return Op;
04578     }
04579 
04580     // It's a float and we are trying to use NEON operations where
04581     // possible. Lower it to a splat followed by an extract.
04582     SDLoc DL(Op);
04583     SDValue NewVal = DAG.getTargetConstant(ImmVal, MVT::i32);
04584     SDValue VecConstant = DAG.getNode(ARMISD::VMOVFPIMM, DL, MVT::v2f32,
04585                                       NewVal);
04586     return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, VecConstant,
04587                        DAG.getConstant(0, MVT::i32));
04588   }
04589 
04590   // The rest of our options are NEON only, make sure that's allowed before
04591   // proceeding..
04592   if (!ST->hasNEON() || (!IsDouble && !ST->useNEONForSinglePrecisionFP()))
04593     return SDValue();
04594 
04595   EVT VMovVT;
04596   uint64_t iVal = FPVal.bitcastToAPInt().getZExtValue();
04597 
04598   // It wouldn't really be worth bothering for doubles except for one very
04599   // important value, which does happen to match: 0.0. So make sure we don't do
04600   // anything stupid.
04601   if (IsDouble && (iVal & 0xffffffff) != (iVal >> 32))
04602     return SDValue();
04603 
04604   // Try a VMOV.i32 (FIXME: i8, i16, or i64 could work too).
04605   SDValue NewVal = isNEONModifiedImm(iVal & 0xffffffffU, 0, 32, DAG, VMovVT,
04606                                      false, VMOVModImm);
04607   if (NewVal != SDValue()) {
04608     SDLoc DL(Op);
04609     SDValue VecConstant = DAG.getNode(ARMISD::VMOVIMM, DL, VMovVT,
04610                                       NewVal);
04611     if (IsDouble)
04612       return DAG.getNode(ISD::BITCAST, DL, MVT::f64, VecConstant);
04613 
04614     // It's a float: cast and extract a vector element.
04615     SDValue VecFConstant = DAG.getNode(ISD::BITCAST, DL, MVT::v2f32,
04616                                        VecConstant);
04617     return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, VecFConstant,
04618                        DAG.getConstant(0, MVT::i32));
04619   }
04620 
04621   // Finally, try a VMVN.i32
04622   NewVal = isNEONModifiedImm(~iVal & 0xffffffffU, 0, 32, DAG, VMovVT,
04623                              false, VMVNModImm);
04624   if (NewVal != SDValue()) {
04625     SDLoc DL(Op);
04626     SDValue VecConstant = DAG.getNode(ARMISD::VMVNIMM, DL, VMovVT, NewVal);
04627 
04628     if (IsDouble)
04629       return DAG.getNode(ISD::BITCAST, DL, MVT::f64, VecConstant);
04630 
04631     // It's a float: cast and extract a vector element.
04632     SDValue VecFConstant = DAG.getNode(ISD::BITCAST, DL, MVT::v2f32,
04633                                        VecConstant);
04634     return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, VecFConstant,
04635                        DAG.getConstant(0, MVT::i32));
04636   }
04637 
04638   return SDValue();
04639 }
04640 
04641 // check if an VEXT instruction can handle the shuffle mask when the
04642 // vector sources of the shuffle are the same.
04643 static bool isSingletonVEXTMask(ArrayRef<int> M, EVT VT, unsigned &Imm) {
04644   unsigned NumElts = VT.getVectorNumElements();
04645 
04646   // Assume that the first shuffle index is not UNDEF.  Fail if it is.
04647   if (M[0] < 0)
04648     return false;
04649 
04650   Imm = M[0];
04651 
04652   // If this is a VEXT shuffle, the immediate value is the index of the first
04653   // element.  The other shuffle indices must be the successive elements after
04654   // the first one.
04655   unsigned ExpectedElt = Imm;
04656   for (unsigned i = 1; i < NumElts; ++i) {
04657     // Increment the expected index.  If it wraps around, just follow it
04658     // back to index zero and keep going.
04659     ++ExpectedElt;
04660     if (ExpectedElt == NumElts)
04661       ExpectedElt = 0;
04662 
04663     if (M[i] < 0) continue; // ignore UNDEF indices
04664     if (ExpectedElt != static_cast<unsigned>(M[i]))
04665       return false;
04666   }
04667 
04668   return true;
04669 }
04670 
04671 
04672 static bool isVEXTMask(ArrayRef<int> M, EVT VT,
04673                        bool &ReverseVEXT, unsigned &Imm) {
04674   unsigned NumElts = VT.getVectorNumElements();
04675   ReverseVEXT = false;
04676 
04677   // Assume that the first shuffle index is not UNDEF.  Fail if it is.
04678   if (M[0] < 0)
04679     return false;
04680 
04681   Imm = M[0];
04682 
04683   // If this is a VEXT shuffle, the immediate value is the index of the first
04684   // element.  The other shuffle indices must be the successive elements after
04685   // the first one.
04686   unsigned ExpectedElt = Imm;
04687   for (unsigned i = 1; i < NumElts; ++i) {
04688     // Increment the expected index.  If it wraps around, it may still be
04689     // a VEXT but the source vectors must be swapped.
04690     ExpectedElt += 1;
04691     if (ExpectedElt == NumElts * 2) {
04692       ExpectedElt = 0;
04693       ReverseVEXT = true;
04694     }
04695 
04696     if (M[i] < 0) continue; // ignore UNDEF indices
04697     if (ExpectedElt != static_cast<unsigned>(M[i]))
04698       return false;
04699   }
04700 
04701   // Adjust the index value if the source operands will be swapped.
04702   if (ReverseVEXT)
04703     Imm -= NumElts;
04704 
04705   return true;
04706 }
04707 
04708 /// isVREVMask - Check if a vector shuffle corresponds to a VREV
04709 /// instruction with the specified blocksize.  (The order of the elements
04710 /// within each block of the vector is reversed.)
04711 static bool isVREVMask(ArrayRef<int> M, EVT VT, unsigned BlockSize) {
04712   assert((BlockSize==16 || BlockSize==32 || BlockSize==64) &&
04713          "Only possible block sizes for VREV are: 16, 32, 64");
04714 
04715   unsigned EltSz = VT.getVectorElementType().getSizeInBits();
04716   if (EltSz == 64)
04717     return false;
04718 
04719   unsigned NumElts = VT.getVectorNumElements();
04720   unsigned BlockElts = M[0] + 1;
04721   // If the first shuffle index is UNDEF, be optimistic.
04722   if (M[0] < 0)
04723     BlockElts = BlockSize / EltSz;
04724 
04725   if (BlockSize <= EltSz || BlockSize != BlockElts * EltSz)
04726     return false;
04727 
04728   for (unsigned i = 0; i < NumElts; ++i) {
04729     if (M[i] < 0) continue; // ignore UNDEF indices
04730     if ((unsigned) M[i] != (i - i%BlockElts) + (BlockElts - 1 - i%BlockElts))
04731       return false;
04732   }
04733 
04734   return true;
04735 }
04736 
04737 static bool isVTBLMask(ArrayRef<int> M, EVT VT) {
04738   // We can handle <8 x i8> vector shuffles. If the index in the mask is out of
04739   // range, then 0 is placed into the resulting vector. So pretty much any mask
04740   // of 8 elements can work here.
04741   return VT == MVT::v8i8 && M.size() == 8;
04742 }
04743 
04744 static bool isVTRNMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
04745   unsigned EltSz = VT.getVectorElementType().getSizeInBits();
04746   if (EltSz == 64)
04747     return false;
04748 
04749   unsigned NumElts = VT.getVectorNumElements();
04750   WhichResult = (M[0] == 0 ? 0 : 1);
04751   for (unsigned i = 0; i < NumElts; i += 2) {
04752     if ((M[i] >= 0 && (unsigned) M[i] != i + WhichResult) ||
04753         (M[i+1] >= 0 && (unsigned) M[i+1] != i + NumElts + WhichResult))
04754       return false;
04755   }
04756   return true;
04757 }
04758 
04759 /// isVTRN_v_undef_Mask - Special case of isVTRNMask for canonical form of
04760 /// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
04761 /// Mask is e.g., <0, 0, 2, 2> instead of <0, 4, 2, 6>.
04762 static bool isVTRN_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult){
04763   unsigned EltSz = VT.getVectorElementType().getSizeInBits();
04764   if (EltSz == 64)
04765     return false;
04766 
04767   unsigned NumElts = VT.getVectorNumElements();
04768   WhichResult = (M[0] == 0 ? 0 : 1);
04769   for (unsigned i = 0; i < NumElts; i += 2) {
04770     if ((M[i] >= 0 && (unsigned) M[i] != i + WhichResult) ||
04771         (M[i+1] >= 0 && (unsigned) M[i+1] != i + WhichResult))
04772       return false;
04773   }
04774   return true;
04775 }
04776 
04777 static bool isVUZPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
04778   unsigned EltSz = VT.getVectorElementType().getSizeInBits();
04779   if (EltSz == 64)
04780     return false;
04781 
04782   unsigned NumElts = VT.getVectorNumElements();
04783   WhichResult = (M[0] == 0 ? 0 : 1);
04784   for (unsigned i = 0; i != NumElts; ++i) {
04785     if (M[i] < 0) continue; // ignore UNDEF indices
04786     if ((unsigned) M[i] != 2 * i + WhichResult)
04787       return false;
04788   }
04789 
04790   // VUZP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
04791   if (VT.is64BitVector() && EltSz == 32)
04792     return false;
04793 
04794   return true;
04795 }
04796 
04797 /// isVUZP_v_undef_Mask - Special case of isVUZPMask for canonical form of
04798 /// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
04799 /// Mask is e.g., <0, 2, 0, 2> instead of <0, 2, 4, 6>,
04800 static bool isVUZP_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult){
04801   unsigned EltSz = VT.getVectorElementType().getSizeInBits();
04802   if (EltSz == 64)
04803     return false;
04804 
04805   unsigned Half = VT.getVectorNumElements() / 2;
04806   WhichResult = (M[0] == 0 ? 0 : 1);
04807   for (unsigned j = 0; j != 2; ++j) {
04808     unsigned Idx = WhichResult;
04809     for (unsigned i = 0; i != Half; ++i) {
04810       int MIdx = M[i + j * Half];
04811       if (MIdx >= 0 && (unsigned) MIdx != Idx)
04812         return false;
04813       Idx += 2;
04814     }
04815   }
04816 
04817   // VUZP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
04818   if (VT.is64BitVector() && EltSz == 32)
04819     return false;
04820 
04821   return true;
04822 }
04823 
04824 static bool isVZIPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
04825   unsigned EltSz = VT.getVectorElementType().getSizeInBits();
04826   if (EltSz == 64)
04827     return false;
04828 
04829   unsigned NumElts = VT.getVectorNumElements();
04830   WhichResult = (M[0] == 0 ? 0 : 1);
04831   unsigned Idx = WhichResult * NumElts / 2;
04832   for (unsigned i = 0; i != NumElts; i += 2) {
04833     if ((M[i] >= 0 && (unsigned) M[i] != Idx) ||
04834         (M[i+1] >= 0 && (unsigned) M[i+1] != Idx + NumElts))
04835       return false;
04836     Idx += 1;
04837   }
04838 
04839   // VZIP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
04840   if (VT.is64BitVector() && EltSz == 32)
04841     return false;
04842 
04843   return true;
04844 }
04845 
04846 /// isVZIP_v_undef_Mask - Special case of isVZIPMask for canonical form of
04847 /// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
04848 /// Mask is e.g., <0, 0, 1, 1> instead of <0, 4, 1, 5>.
04849 static bool isVZIP_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult){
04850   unsigned EltSz = VT.getVectorElementType().getSizeInBits();
04851   if (EltSz == 64)
04852     return false;
04853 
04854   unsigned NumElts = VT.getVectorNumElements();
04855   WhichResult = (M[0] == 0 ? 0 : 1);
04856   unsigned Idx = WhichResult * NumElts / 2;
04857   for (unsigned i = 0; i != NumElts; i += 2) {
04858     if ((M[i] >= 0 && (unsigned) M[i] != Idx) ||
04859         (M[i+1] >= 0 && (unsigned) M[i+1] != Idx))
04860       return false;
04861     Idx += 1;
04862   }
04863 
04864   // VZIP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
04865   if (VT.is64BitVector() && EltSz == 32)
04866     return false;
04867 
04868   return true;
04869 }
04870 
04871 /// \return true if this is a reverse operation on an vector.
04872 static bool isReverseMask(ArrayRef<int> M, EVT VT) {
04873   unsigned NumElts = VT.getVectorNumElements();
04874   // Make sure the mask has the right size.
04875   if (NumElts != M.size())
04876       return false;
04877 
04878   // Look for <15, ..., 3, -1, 1, 0>.
04879   for (unsigned i = 0; i != NumElts; ++i)
04880     if (M[i] >= 0 && M[i] != (int) (NumElts - 1 - i))
04881       return false;
04882 
04883   return true;
04884 }
04885 
04886 // If N is an integer constant that can be moved into a register in one
04887 // instruction, return an SDValue of such a constant (will become a MOV
04888 // instruction).  Otherwise return null.
04889 static SDValue IsSingleInstrConstant(SDValue N, SelectionDAG &DAG,
04890                                      const ARMSubtarget *ST, SDLoc dl) {
04891   uint64_t Val;
04892   if (!isa<ConstantSDNode>(N))
04893     return SDValue();
04894   Val = cast<ConstantSDNode>(N)->getZExtValue();
04895 
04896   if (ST->isThumb1Only()) {
04897     if (Val <= 255 || ~Val <= 255)
04898       return DAG.getConstant(Val, MVT::i32);
04899   } else {
04900     if (ARM_AM::getSOImmVal(Val) != -1 || ARM_AM::getSOImmVal(~Val) != -1)
04901       return DAG.getConstant(Val, MVT::i32);
04902   }
04903   return SDValue();
04904 }
04905 
04906 // If this is a case we can't handle, return null and let the default
04907 // expansion code take care of it.
04908 SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
04909                                              const ARMSubtarget *ST) const {
04910   BuildVectorSDNode *BVN = cast<BuildVectorSDNode>(Op.getNode());
04911   SDLoc dl(Op);
04912   EVT VT = Op.getValueType();
04913 
04914   APInt SplatBits, SplatUndef;
04915   unsigned SplatBitSize;
04916   bool HasAnyUndefs;
04917   if (BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) {
04918     if (SplatBitSize <= 64) {
04919       // Check if an immediate VMOV works.
04920       EVT VmovVT;
04921       SDValue Val = isNEONModifiedImm(SplatBits.getZExtValue(),
04922                                       SplatUndef.getZExtValue(), SplatBitSize,
04923                                       DAG, VmovVT, VT.is128BitVector(),
04924                                       VMOVModImm);
04925       if (Val.getNode()) {
04926         SDValue Vmov = DAG.getNode(ARMISD::VMOVIMM, dl, VmovVT, Val);
04927         return DAG.getNode(ISD::BITCAST, dl, VT, Vmov);
04928       }
04929 
04930       // Try an immediate VMVN.
04931       uint64_t NegatedImm = (~SplatBits).getZExtValue();
04932       Val = isNEONModifiedImm(NegatedImm,
04933                                       SplatUndef.getZExtValue(), SplatBitSize,
04934                                       DAG, VmovVT, VT.is128BitVector(),
04935                                       VMVNModImm);
04936       if (Val.getNode()) {
04937         SDValue Vmov = DAG.getNode(ARMISD::VMVNIMM, dl, VmovVT, Val);
04938         return DAG.getNode(ISD::BITCAST, dl, VT, Vmov);
04939       }
04940 
04941       // Use vmov.f32 to materialize other v2f32 and v4f32 splats.
04942       if ((VT == MVT::v2f32 || VT == MVT::v4f32) && SplatBitSize == 32) {
04943         int ImmVal = ARM_AM::getFP32Imm(SplatBits);
04944         if (ImmVal != -1) {
04945           SDValue Val = DAG.getTargetConstant(ImmVal, MVT::i32);
04946           return DAG.getNode(ARMISD::VMOVFPIMM, dl, VT, Val);
04947         }
04948       }
04949     }
04950   }
04951 
04952   // Scan through the operands to see if only one value is used.
04953   //
04954   // As an optimisation, even if more than one value is used it may be more
04955   // profitable to splat with one value then change some lanes.
04956   //
04957   // Heuristically we decide to do this if the vector has a "dominant" value,
04958   // defined as splatted to more than half of the lanes.
04959   unsigned NumElts = VT.getVectorNumElements();
04960   bool isOnlyLowElement = true;
04961   bool usesOnlyOneValue = true;
04962   bool hasDominantValue = false;
04963   bool isConstant = true;
04964 
04965   // Map of the number of times a particular SDValue appears in the
04966   // element list.
04967   DenseMap<SDValue, unsigned> ValueCounts;
04968   SDValue Value;
04969   for (unsigned i = 0; i < NumElts; ++i) {
04970     SDValue V = Op.getOperand(i);
04971     if (V.getOpcode() == ISD::UNDEF)
04972       continue;
04973     if (i > 0)
04974       isOnlyLowElement = false;
04975     if (!isa<ConstantFPSDNode>(V) && !isa<ConstantSDNode>(V))
04976       isConstant = false;
04977 
04978     ValueCounts.insert(std::make_pair(V, 0));
04979     unsigned &Count = ValueCounts[V];
04980 
04981     // Is this value dominant? (takes up more than half of the lanes)
04982     if (++Count > (NumElts / 2)) {
04983       hasDominantValue = true;
04984       Value = V;
04985     }
04986   }
04987   if (ValueCounts.size() != 1)
04988     usesOnlyOneValue = false;
04989   if (!Value.getNode() && ValueCounts.size() > 0)
04990     Value = ValueCounts.begin()->first;
04991 
04992   if (ValueCounts.size() == 0)
04993     return DAG.getUNDEF(VT);
04994 
04995   // Loads are better lowered with insert_vector_elt/ARMISD::BUILD_VECTOR.
04996   // Keep going if we are hitting this case.
04997   if (isOnlyLowElement && !ISD::isNormalLoad(Value.getNode()))
04998     return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Value);
04999 
05000   unsigned EltSize = VT.getVectorElementType().getSizeInBits();
05001 
05002   // Use VDUP for non-constant splats.  For f32 constant splats, reduce to
05003   // i32 and try again.
05004   if (hasDominantValue && EltSize <= 32) {
05005     if (!isConstant) {
05006       SDValue N;
05007 
05008       // If we are VDUPing a value that comes directly from a vector, that will
05009       // cause an unnecessary move to and from a GPR, where instead we could
05010       // just use VDUPLANE. We can only do this if the lane being extracted
05011       // is at a constant index, as the VDUP from lane instructions only have
05012       // constant-index forms.
05013       if (Value->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
05014           isa<ConstantSDNode>(Value->getOperand(1))) {
05015         // We need to create a new undef vector to use for the VDUPLANE if the
05016         // size of the vector from which we get the value is different than the
05017         // size of the vector that we need to create. We will insert the element
05018         // such that the register coalescer will remove unnecessary copies.
05019         if (VT != Value->getOperand(0).getValueType()) {
05020           ConstantSDNode *constIndex;
05021           constIndex = dyn_cast<ConstantSDNode>(Value->getOperand(1));
05022           assert(constIndex && "The index is not a constant!");
05023           unsigned index = constIndex->getAPIntValue().getLimitedValue() %
05024                              VT.getVectorNumElements();
05025           N =  DAG.getNode(ARMISD::VDUPLANE, dl, VT,
05026                  DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, DAG.getUNDEF(VT),
05027                         Value, DAG.getConstant(index, MVT::i32)),
05028                            DAG.getConstant(index, MVT::i32));
05029         } else
05030           N = DAG.getNode(ARMISD::VDUPLANE, dl, VT,
05031                         Value->getOperand(0), Value->getOperand(1));
05032       } else
05033         N = DAG.getNode(ARMISD::VDUP, dl, VT, Value);
05034 
05035       if (!usesOnlyOneValue) {
05036         // The dominant value was splatted as 'N', but we now have to insert
05037         // all differing elements.
05038         for (unsigned I = 0; I < NumElts; ++I) {
05039           if (Op.getOperand(I) == Value)
05040             continue;
05041           SmallVector<SDValue, 3> Ops;
05042           Ops.push_back(N);
05043           Ops.push_back(Op.getOperand(I));
05044           Ops.push_back(DAG.getConstant(I, MVT::i32));
05045           N = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Ops);
05046         }
05047       }
05048       return N;
05049     }
05050     if (VT.getVectorElementType().isFloatingPoint()) {
05051       SmallVector<SDValue, 8> Ops;
05052       for (unsigned i = 0; i < NumElts; ++i)
05053         Ops.push_back(DAG.getNode(ISD::BITCAST, dl, MVT::i32,
05054                                   Op.getOperand(i)));
05055       EVT VecVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32, NumElts);
05056       SDValue Val = DAG.getNode(ISD::BUILD_VECTOR, dl, VecVT, Ops);
05057       Val = LowerBUILD_VECTOR(Val, DAG, ST);
05058       if (Val.getNode())
05059         return DAG.getNode(ISD::BITCAST, dl, VT, Val);
05060     }
05061     if (usesOnlyOneValue) {
05062       SDValue Val = IsSingleInstrConstant(Value, DAG, ST, dl);
05063       if (isConstant && Val.getNode())
05064         return DAG.getNode(ARMISD::VDUP, dl, VT, Val);
05065     }
05066   }
05067 
05068   // If all elements are constants and the case above didn't get hit, fall back
05069   // to the default expansion, which will generate a load from the constant
05070   // pool.
05071   if (isConstant)
05072     return SDValue();
05073 
05074   // Empirical tests suggest this is rarely worth it for vectors of length <= 2.
05075   if (NumElts >= 4) {
05076     SDValue shuffle = ReconstructShuffle(Op, DAG);
05077     if (shuffle != SDValue())
05078       return shuffle;
05079   }
05080 
05081   // Vectors with 32- or 64-bit elements can be built by directly assigning
05082   // the subregisters.  Lower it to an ARMISD::BUILD_VECTOR so the operands
05083   // will be legalized.
05084   if (EltSize >= 32) {
05085     // Do the expansion with floating-point types, since that is what the VFP
05086     // registers are defined to use, and since i64 is not legal.
05087     EVT EltVT = EVT::getFloatingPointVT(EltSize);
05088     EVT VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumElts);
05089     SmallVector<SDValue, 8> Ops;
05090     for (unsigned i = 0; i < NumElts; ++i)
05091       Ops.push_back(DAG.getNode(ISD::BITCAST, dl, EltVT, Op.getOperand(i)));
05092     SDValue Val = DAG.getNode(ARMISD::BUILD_VECTOR, dl, VecVT, Ops);
05093     return DAG.getNode(ISD::BITCAST, dl, VT, Val);
05094   }
05095 
05096   // If all else fails, just use a sequence of INSERT_VECTOR_ELT when we
05097   // know the default expansion would otherwise fall back on something even
05098   // worse. For a vector with one or two non-undef values, that's
05099   // scalar_to_vector for the elements followed by a shuffle (provided the
05100   // shuffle is valid for the target) and materialization element by element
05101   // on the stack followed by a load for everything else.
05102   if (!isConstant && !usesOnlyOneValue) {
05103     SDValue Vec = DAG.getUNDEF(VT);
05104     for (unsigned i = 0 ; i < NumElts; ++i) {
05105       SDValue V = Op.getOperand(i);
05106       if (V.getOpcode() == ISD::UNDEF)
05107         continue;
05108       SDValue LaneIdx = DAG.getConstant(i, MVT::i32);
05109       Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Vec, V, LaneIdx);
05110     }
05111     return Vec;
05112   }
05113 
05114   return SDValue();
05115 }
05116 
05117 // Gather data to see if the operation can be modelled as a
05118 // shuffle in combination with VEXTs.
05119 SDValue ARMTargetLowering::ReconstructShuffle(SDValue Op,
05120                                               SelectionDAG &DAG) const {
05121   SDLoc dl(Op);
05122   EVT VT = Op.getValueType();
05123   unsigned NumElts = VT.getVectorNumElements();
05124 
05125   SmallVector<SDValue, 2> SourceVecs;
05126   SmallVector<unsigned, 2> MinElts;
05127   SmallVector<unsigned, 2> MaxElts;
05128 
05129   for (unsigned i = 0; i < NumElts; ++i) {
05130     SDValue V = Op.getOperand(i);
05131     if (V.getOpcode() == ISD::UNDEF)
05132       continue;
05133     else if (V.getOpcode() != ISD::EXTRACT_VECTOR_ELT) {
05134       // A shuffle can only come from building a vector from various
05135       // elements of other vectors.
05136       return SDValue();
05137     } else if (V.getOperand(0).getValueType().getVectorElementType() !=
05138                VT.getVectorElementType()) {
05139       // This code doesn't know how to handle shuffles where the vector
05140       // element types do not match (this happens because type legalization
05141       // promotes the return type of EXTRACT_VECTOR_ELT).
05142       // FIXME: It might be appropriate to extend this code to handle
05143       // mismatched types.
05144       return SDValue();
05145     }
05146 
05147     // Record this extraction against the appropriate vector if possible...
05148     SDValue SourceVec = V.getOperand(0);
05149     // If the element number isn't a constant, we can't effectively
05150     // analyze what's going on.
05151     if (!isa<ConstantSDNode>(V.getOperand(1)))
05152       return SDValue();
05153     unsigned EltNo = cast<ConstantSDNode>(V.getOperand(1))->getZExtValue();
05154     bool FoundSource = false;
05155     for (unsigned j = 0; j < SourceVecs.size(); ++j) {
05156       if (SourceVecs[j] == SourceVec) {
05157         if (MinElts[j] > EltNo)
05158           MinElts[j] = EltNo;
05159         if (MaxElts[j] < EltNo)
05160           MaxElts[j] = EltNo;
05161         FoundSource = true;
05162         break;
05163       }
05164     }
05165 
05166     // Or record a new source if not...
05167     if (!FoundSource) {
05168       SourceVecs.push_back(SourceVec);
05169       MinElts.push_back(EltNo);
05170       MaxElts.push_back(EltNo);
05171     }
05172   }
05173 
05174   // Currently only do something sane when at most two source vectors
05175   // involved.
05176   if (SourceVecs.size() > 2)
05177     return SDValue();
05178 
05179   SDValue ShuffleSrcs[2] = {DAG.getUNDEF(VT), DAG.getUNDEF(VT) };
05180   int VEXTOffsets[2] = {0, 0};
05181 
05182   // This loop extracts the usage patterns of the source vectors
05183   // and prepares appropriate SDValues for a shuffle if possible.
05184   for (unsigned i = 0; i < SourceVecs.size(); ++i) {
05185     if (SourceVecs[i].getValueType() == VT) {
05186       // No VEXT necessary
05187       ShuffleSrcs[i] = SourceVecs[i];
05188       VEXTOffsets[i] = 0;
05189       continue;
05190     } else if (SourceVecs[i].getValueType().getVectorNumElements() < NumElts) {
05191       // It probably isn't worth padding out a smaller vector just to
05192       // break it down again in a shuffle.
05193       return SDValue();
05194     }
05195 
05196     // Since only 64-bit and 128-bit vectors are legal on ARM and
05197     // we've eliminated the other cases...
05198     assert(SourceVecs[i].getValueType().getVectorNumElements() == 2*NumElts &&
05199            "unexpected vector sizes in ReconstructShuffle");
05200 
05201     if (MaxElts[i] - MinElts[i] >= NumElts) {
05202       // Span too large for a VEXT to cope
05203       return SDValue();
05204     }
05205 
05206     if (MinElts[i] >= NumElts) {
05207       // The extraction can just take the second half
05208       VEXTOffsets[i] = NumElts;
05209       ShuffleSrcs[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT,
05210                                    SourceVecs[i],
05211                                    DAG.getIntPtrConstant(NumElts));
05212     } else if (MaxElts[i] < NumElts) {
05213       // The extraction can just take the first half
05214       VEXTOffsets[i] = 0;
05215       ShuffleSrcs[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT,
05216                                    SourceVecs[i],
05217                                    DAG.getIntPtrConstant(0));
05218     } else {
05219       // An actual VEXT is needed
05220       VEXTOffsets[i] = MinElts[i];
05221       SDValue VEXTSrc1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT,
05222                                      SourceVecs[i],
05223                                      DAG.getIntPtrConstant(0));
05224       SDValue VEXTSrc2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT,
05225                                      SourceVecs[i],
05226                                      DAG.getIntPtrConstant(NumElts));
05227       ShuffleSrcs[i] = DAG.getNode(ARMISD::VEXT, dl, VT, VEXTSrc1, VEXTSrc2,
05228                                    DAG.getConstant(VEXTOffsets[i], MVT::i32));
05229     }
05230   }
05231 
05232   SmallVector<int, 8> Mask;
05233 
05234   for (unsigned i = 0; i < NumElts; ++i) {
05235     SDValue Entry = Op.getOperand(i);
05236     if (Entry.getOpcode() == ISD::UNDEF) {
05237       Mask.push_back(-1);
05238       continue;
05239     }
05240 
05241     SDValue ExtractVec = Entry.getOperand(0);
05242     int ExtractElt = cast<ConstantSDNode>(Op.getOperand(i)
05243                                           .getOperand(1))->getSExtValue();
05244     if (ExtractVec == SourceVecs[0]) {
05245       Mask.push_back(ExtractElt - VEXTOffsets[0]);
05246     } else {
05247       Mask.push_back(ExtractElt + NumElts - VEXTOffsets[1]);
05248     }
05249   }
05250 
05251   // Final check before we try to produce nonsense...
05252   if (isShuffleMaskLegal(Mask, VT))
05253     return DAG.getVectorShuffle(VT, dl, ShuffleSrcs[0], ShuffleSrcs[1],
05254                                 &Mask[0]);
05255 
05256   return SDValue();
05257 }
05258 
05259 /// isShuffleMaskLegal - Targets can use this to indicate that they only
05260 /// support *some* VECTOR_SHUFFLE operations, those with specific masks.
05261 /// By default, if a target supports the VECTOR_SHUFFLE node, all mask values
05262 /// are assumed to be legal.
05263 bool
05264 ARMTargetLowering::isShuffleMaskLegal(const SmallVectorImpl<int> &M,
05265                                       EVT VT) const {
05266   if (VT.getVectorNumElements() == 4 &&
05267       (VT.is128BitVector() || VT.is64BitVector())) {
05268     unsigned PFIndexes[4];
05269     for (unsigned i = 0; i != 4; ++i) {
05270       if (M[i] < 0)
05271         PFIndexes[i] = 8;
05272       else
05273         PFIndexes[i] = M[i];
05274     }
05275 
05276     // Compute the index in the perfect shuffle table.
05277     unsigned PFTableIndex =
05278       PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3];
05279     unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
05280     unsigned Cost = (PFEntry >> 30);
05281 
05282     if (Cost <= 4)
05283       return true;
05284   }
05285 
05286   bool ReverseVEXT;
05287   unsigned Imm, WhichResult;
05288 
05289   unsigned EltSize = VT.getVectorElementType().getSizeInBits();
05290   return (EltSize >= 32 ||
05291           ShuffleVectorSDNode::isSplatMask(&M[0], VT) ||
05292           isVREVMask(M, VT, 64) ||
05293           isVREVMask(M, VT, 32) ||
05294           isVREVMask(M, VT, 16) ||
05295           isVEXTMask(M, VT, ReverseVEXT, Imm) ||
05296           isVTBLMask(M, VT) ||
05297           isVTRNMask(M, VT, WhichResult) ||
05298           isVUZPMask(M, VT, WhichResult) ||
05299           isVZIPMask(M, VT, WhichResult) ||
05300           isVTRN_v_undef_Mask(M, VT, WhichResult) ||
05301           isVUZP_v_undef_Mask(M, VT, WhichResult) ||
05302           isVZIP_v_undef_Mask(M, VT, WhichResult) ||
05303           ((VT == MVT::v8i16 || VT == MVT::v16i8) && isReverseMask(M, VT)));
05304 }
05305 
05306 /// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit
05307 /// the specified operations to build the shuffle.
05308 static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS,
05309                                       SDValue RHS, SelectionDAG &DAG,
05310                                       SDLoc dl) {
05311   unsigned OpNum = (PFEntry >> 26) & 0x0F;
05312   unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1);
05313   unsigned RHSID = (PFEntry >>  0) & ((1 << 13)-1);
05314 
05315   enum {
05316     OP_COPY = 0, // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3>
05317     OP_VREV,
05318     OP_VDUP0,
05319     OP_VDUP1,
05320     OP_VDUP2,
05321     OP_VDUP3,
05322     OP_VEXT1,
05323     OP_VEXT2,
05324     OP_VEXT3,
05325     OP_VUZPL, // VUZP, left result
05326     OP_VUZPR, // VUZP, right result
05327     OP_VZIPL, // VZIP, left result
05328     OP_VZIPR, // VZIP, right result
05329     OP_VTRNL, // VTRN, left result
05330     OP_VTRNR  // VTRN, right result
05331   };
05332 
05333   if (OpNum == OP_COPY) {
05334     if (LHSID == (1*9+2)*9+3) return LHS;
05335     assert(LHSID == ((4*9+5)*9+6)*9+7 && "Illegal OP_COPY!");
05336     return RHS;
05337   }
05338 
05339   SDValue OpLHS, OpRHS;
05340   OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG, dl);
05341   OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG, dl);
05342   EVT VT = OpLHS.getValueType();
05343 
05344   switch (OpNum) {
05345   default: llvm_unreachable("Unknown shuffle opcode!");
05346   case OP_VREV:
05347     // VREV divides the vector in half and swaps within the half.
05348     if (VT.getVectorElementType() == MVT::i32 ||
05349         VT.getVectorElementType() == MVT::f32)
05350       return DAG.getNode(ARMISD::VREV64, dl, VT, OpLHS);
05351     // vrev <4 x i16> -> VREV32
05352     if (VT.getVectorElementType() == MVT::i16)
05353       return DAG.getNode(ARMISD::VREV32, dl, VT, OpLHS);
05354     // vrev <4 x i8> -> VREV16
05355     assert(VT.getVectorElementType() == MVT::i8);
05356     return DAG.getNode(ARMISD::VREV16, dl, VT, OpLHS);
05357   case OP_VDUP0:
05358   case OP_VDUP1:
05359   case OP_VDUP2:
05360   case OP_VDUP3:
05361     return DAG.getNode(ARMISD::VDUPLANE, dl, VT,
05362                        OpLHS, DAG.getConstant(OpNum-OP_VDUP0, MVT::i32));
05363   case OP_VEXT1:
05364   case OP_VEXT2:
05365   case OP_VEXT3:
05366     return DAG.getNode(ARMISD::VEXT, dl, VT,
05367                        OpLHS, OpRHS,
05368                        DAG.getConstant(OpNum-OP_VEXT1+1, MVT::i32));
05369   case OP_VUZPL:
05370   case OP_VUZPR:
05371     return DAG.getNode(ARMISD::VUZP, dl, DAG.getVTList(VT, VT),
05372                        OpLHS, OpRHS).getValue(OpNum-OP_VUZPL);
05373   case OP_VZIPL:
05374   case OP_VZIPR:
05375     return DAG.getNode(ARMISD::VZIP, dl, DAG.getVTList(VT, VT),
05376                        OpLHS, OpRHS).getValue(OpNum-OP_VZIPL);
05377   case OP_VTRNL:
05378   case OP_VTRNR:
05379     return DAG.getNode(ARMISD::VTRN, dl, DAG.getVTList(VT, VT),
05380                        OpLHS, OpRHS).getValue(OpNum-OP_VTRNL);
05381   }
05382 }
05383 
05384 static SDValue LowerVECTOR_SHUFFLEv8i8(SDValue Op,
05385                                        ArrayRef<int> ShuffleMask,
05386                                        SelectionDAG &DAG) {
05387   // Check to see if we can use the VTBL instruction.
05388   SDValue V1 = Op.getOperand(0);
05389   SDValue V2 = Op.getOperand(1);
05390   SDLoc DL(Op);
05391 
05392   SmallVector<SDValue, 8> VTBLMask;
05393   for (ArrayRef<int>::iterator
05394          I = ShuffleMask.begin(), E = ShuffleMask.end(); I != E; ++I)
05395     VTBLMask.push_back(DAG.getConstant(*I, MVT::i32));
05396 
05397   if (V2.getNode()->getOpcode() == ISD::UNDEF)
05398     return DAG.getNode(ARMISD::VTBL1, DL, MVT::v8i8, V1,
05399                        DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v8i8, VTBLMask));
05400 
05401   return DAG.getNode(ARMISD::VTBL2, DL, MVT::v8i8, V1, V2,
05402                      DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v8i8, VTBLMask));
05403 }
05404 
05405 static SDValue LowerReverse_VECTOR_SHUFFLEv16i8_v8i16(SDValue Op,
05406                                                       SelectionDAG &DAG) {
05407   SDLoc DL(Op);
05408   SDValue OpLHS = Op.getOperand(0);
05409   EVT VT = OpLHS.getValueType();
05410 
05411   assert((VT == MVT::v8i16 || VT == MVT::v16i8) &&
05412          "Expect an v8i16/v16i8 type");
05413   OpLHS = DAG.getNode(ARMISD::VREV64, DL, VT, OpLHS);
05414   // For a v16i8 type: After the VREV, we have got <8, ...15, 8, ..., 0>. Now,
05415   // extract the first 8 bytes into the top double word and the last 8 bytes
05416   // into the bottom double word. The v8i16 case is similar.
05417   unsigned ExtractNum = (VT == MVT::v16i8) ? 8 : 4;
05418   return DAG.getNode(ARMISD::VEXT, DL, VT, OpLHS, OpLHS,
05419                      DAG.getConstant(ExtractNum, MVT::i32));
05420 }
05421 
05422 static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
05423   SDValue V1 = Op.getOperand(0);
05424   SDValue V2 = Op.getOperand(1);
05425   SDLoc dl(Op);
05426   EVT VT = Op.getValueType();
05427   ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op.getNode());
05428 
05429   // Convert shuffles that are directly supported on NEON to target-specific
05430   // DAG nodes, instead of keeping them as shuffles and matching them again
05431   // during code selection.  This is more efficient and avoids the possibility
05432   // of inconsistencies between legalization and selection.
05433   // FIXME: floating-point vectors should be canonicalized to integer vectors
05434   // of the same time so that they get CSEd properly.
05435   ArrayRef<int> ShuffleMask = SVN->getMask();
05436 
05437   unsigned EltSize = VT.getVectorElementType().getSizeInBits();
05438   if (EltSize <= 32) {
05439     if (ShuffleVectorSDNode::isSplatMask(&ShuffleMask[0], VT)) {
05440       int Lane = SVN->getSplatIndex();
05441       // If this is undef splat, generate it via "just" vdup, if possible.
05442       if (Lane == -1) Lane = 0;
05443 
05444       // Test if V1 is a SCALAR_TO_VECTOR.
05445       if (Lane == 0 && V1.getOpcode() == ISD::SCALAR_TO_VECTOR) {
05446         return DAG.getNode(ARMISD::VDUP, dl, VT, V1.getOperand(0));
05447       }
05448       // Test if V1 is a BUILD_VECTOR which is equivalent to a SCALAR_TO_VECTOR
05449       // (and probably will turn into a SCALAR_TO_VECTOR once legalization
05450       // reaches it).
05451       if (Lane == 0 && V1.getOpcode() == ISD::BUILD_VECTOR &&
05452           !isa<ConstantSDNode>(V1.getOperand(0))) {
05453         bool IsScalarToVector = true;
05454         for (unsigned i = 1, e = V1.getNumOperands(); i != e; ++i)
05455           if (V1.getOperand(i).getOpcode() != ISD::UNDEF) {
05456             IsScalarToVector = false;
05457             break;
05458           }
05459         if (IsScalarToVector)
05460           return DAG.getNode(ARMISD::VDUP, dl, VT, V1.getOperand(0));
05461       }
05462       return DAG.getNode(ARMISD::VDUPLANE, dl, VT, V1,
05463                          DAG.getConstant(Lane, MVT::i32));
05464     }
05465 
05466     bool ReverseVEXT;
05467     unsigned Imm;
05468     if (isVEXTMask(ShuffleMask, VT, ReverseVEXT, Imm)) {
05469       if (ReverseVEXT)
05470         std::swap(V1, V2);
05471       return DAG.getNode(ARMISD::VEXT, dl, VT, V1, V2,
05472                          DAG.getConstant(Imm, MVT::i32));
05473     }
05474 
05475     if (isVREVMask(ShuffleMask, VT, 64))
05476       return DAG.getNode(ARMISD::VREV64, dl, VT, V1);
05477     if (isVREVMask(ShuffleMask, VT, 32))
05478       return DAG.getNode(ARMISD::VREV32, dl, VT, V1);
05479     if (isVREVMask(ShuffleMask, VT, 16))
05480       return DAG.getNode(ARMISD::VREV16, dl, VT, V1);
05481 
05482     if (V2->getOpcode() == ISD::UNDEF &&
05483         isSingletonVEXTMask(ShuffleMask, VT, Imm)) {
05484       return DAG.getNode(ARMISD::VEXT, dl, VT, V1, V1,
05485                          DAG.getConstant(Imm, MVT::i32));
05486     }
05487 
05488     // Check for Neon shuffles that modify both input vectors in place.
05489     // If both results are used, i.e., if there are two shuffles with the same
05490     // source operands and with masks corresponding to both results of one of
05491     // these operations, DAG memoization will ensure that a single node is
05492     // used for both shuffles.
05493     unsigned WhichResult;
05494     if (isVTRNMask(ShuffleMask, VT, WhichResult))
05495       return DAG.getNode(ARMISD::VTRN, dl, DAG.getVTList(VT, VT),
05496                          V1, V2).getValue(WhichResult);
05497     if (isVUZPMask(ShuffleMask, VT, WhichResult))
05498       return DAG.getNode(ARMISD::VUZP, dl, DAG.getVTList(VT, VT),
05499                          V1, V2).getValue(WhichResult);
05500     if (isVZIPMask(ShuffleMask, VT, WhichResult))
05501       return DAG.getNode(ARMISD::VZIP, dl, DAG.getVTList(VT, VT),
05502                          V1, V2).getValue(WhichResult);
05503 
05504     if (isVTRN_v_undef_Mask(ShuffleMask, VT, WhichResult))
05505       return DAG.getNode(ARMISD::VTRN, dl, DAG.getVTList(VT, VT),
05506                          V1, V1).getValue(WhichResult);
05507     if (isVUZP_v_undef_Mask(ShuffleMask, VT, WhichResult))
05508       return DAG.getNode(ARMISD::VUZP, dl, DAG.getVTList(VT, VT),
05509                          V1, V1).getValue(WhichResult);
05510     if (isVZIP_v_undef_Mask(ShuffleMask, VT, WhichResult))
05511       return DAG.getNode(ARMISD::VZIP, dl, DAG.getVTList(VT, VT),
05512                          V1, V1).getValue(WhichResult);
05513   }
05514 
05515   // If the shuffle is not directly supported and it has 4 elements, use
05516   // the PerfectShuffle-generated table to synthesize it from other shuffles.
05517   unsigned NumElts = VT.getVectorNumElements();
05518   if (NumElts == 4) {
05519     unsigned PFIndexes[4];
05520     for (unsigned i = 0; i != 4; ++i) {
05521       if (ShuffleMask[i] < 0)
05522         PFIndexes[i] = 8;
05523       else
05524         PFIndexes[i] = ShuffleMask[i];
05525     }
05526 
05527     // Compute the index in the perfect shuffle table.
05528     unsigned PFTableIndex =
05529       PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3];
05530     unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
05531     unsigned Cost = (PFEntry >> 30);
05532 
05533     if (Cost <= 4)
05534       return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl);
05535   }
05536 
05537   // Implement shuffles with 32- or 64-bit elements as ARMISD::BUILD_VECTORs.
05538   if (EltSize >= 32) {
05539     // Do the expansion with floating-point types, since that is what the VFP
05540     // registers are defined to use, and since i64 is not legal.
05541     EVT EltVT = EVT::getFloatingPointVT(EltSize);
05542     EVT VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumElts);
05543     V1 = DAG.getNode(ISD::BITCAST, dl, VecVT, V1);
05544     V2 = DAG.getNode(ISD::BITCAST, dl, VecVT, V2);
05545     SmallVector<SDValue, 8> Ops;
05546     for (unsigned i = 0; i < NumElts; ++i) {
05547       if (ShuffleMask[i] < 0)
05548         Ops.push_back(DAG.getUNDEF(EltVT));
05549       else
05550         Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT,
05551                                   ShuffleMask[i] < (int)NumElts ? V1 : V2,
05552                                   DAG.getConstant(ShuffleMask[i] & (NumElts-1),
05553                                                   MVT::i32)));
05554     }
05555     SDValue Val = DAG.getNode(ARMISD::BUILD_VECTOR, dl, VecVT, Ops);
05556     return DAG.getNode(ISD::BITCAST, dl, VT, Val);
05557   }
05558 
05559   if ((VT == MVT::v8i16 || VT == MVT::v16i8) && isReverseMask(ShuffleMask, VT))
05560     return LowerReverse_VECTOR_SHUFFLEv16i8_v8i16(Op, DAG);
05561 
05562   if (VT == MVT::v8i8) {
05563     SDValue NewOp = LowerVECTOR_SHUFFLEv8i8(Op, ShuffleMask, DAG);
05564     if (NewOp.getNode())
05565       return NewOp;
05566   }
05567 
05568   return SDValue();
05569 }
05570 
05571 static SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG<