LLVM API Documentation

ARMISelLowering.cpp
Go to the documentation of this file.
00001 //===-- ARMISelLowering.cpp - ARM DAG Lowering Implementation -------------===//
00002 //
00003 //                     The LLVM Compiler Infrastructure
00004 //
00005 // This file is distributed under the University of Illinois Open Source
00006 // License. See LICENSE.TXT for details.
00007 //
00008 //===----------------------------------------------------------------------===//
00009 //
00010 // This file defines the interfaces that ARM uses to lower LLVM code into a
00011 // selection DAG.
00012 //
00013 //===----------------------------------------------------------------------===//
00014 
00015 #include "ARMISelLowering.h"
00016 #include "ARMCallingConv.h"
00017 #include "ARMConstantPoolValue.h"
00018 #include "ARMMachineFunctionInfo.h"
00019 #include "ARMPerfectShuffle.h"
00020 #include "ARMSubtarget.h"
00021 #include "ARMTargetMachine.h"
00022 #include "ARMTargetObjectFile.h"
00023 #include "MCTargetDesc/ARMAddressingModes.h"
00024 #include "llvm/ADT/Statistic.h"
00025 #include "llvm/ADT/StringExtras.h"
00026 #include "llvm/CodeGen/CallingConvLower.h"
00027 #include "llvm/CodeGen/IntrinsicLowering.h"
00028 #include "llvm/CodeGen/MachineBasicBlock.h"
00029 #include "llvm/CodeGen/MachineFrameInfo.h"
00030 #include "llvm/CodeGen/MachineFunction.h"
00031 #include "llvm/CodeGen/MachineInstrBuilder.h"
00032 #include "llvm/CodeGen/MachineModuleInfo.h"
00033 #include "llvm/CodeGen/MachineRegisterInfo.h"
00034 #include "llvm/CodeGen/SelectionDAG.h"
00035 #include "llvm/IR/CallingConv.h"
00036 #include "llvm/IR/Constants.h"
00037 #include "llvm/IR/Function.h"
00038 #include "llvm/IR/GlobalValue.h"
00039 #include "llvm/IR/IRBuilder.h"
00040 #include "llvm/IR/Instruction.h"
00041 #include "llvm/IR/Instructions.h"
00042 #include "llvm/IR/Intrinsics.h"
00043 #include "llvm/IR/Type.h"
00044 #include "llvm/MC/MCSectionMachO.h"
00045 #include "llvm/Support/CommandLine.h"
00046 #include "llvm/Support/Debug.h"
00047 #include "llvm/Support/ErrorHandling.h"
00048 #include "llvm/Support/MathExtras.h"
00049 #include "llvm/Target/TargetOptions.h"
00050 #include <utility>
00051 using namespace llvm;
00052 
00053 #define DEBUG_TYPE "arm-isel"
00054 
00055 STATISTIC(NumTailCalls, "Number of tail calls");
00056 STATISTIC(NumMovwMovt, "Number of GAs materialized with movw + movt");
00057 STATISTIC(NumLoopByVals, "Number of loops generated for byval arguments");
00058 
00059 cl::opt<bool>
00060 EnableARMLongCalls("arm-long-calls", cl::Hidden,
00061   cl::desc("Generate calls via indirect call instructions"),
00062   cl::init(false));
00063 
00064 static cl::opt<bool>
00065 ARMInterworking("arm-interworking", cl::Hidden,
00066   cl::desc("Enable / disable ARM interworking (for debugging only)"),
00067   cl::init(true));
00068 
00069 namespace {
00070   class ARMCCState : public CCState {
00071   public:
00072     ARMCCState(CallingConv::ID CC, bool isVarArg, MachineFunction &MF,
00073                const TargetMachine &TM, SmallVectorImpl<CCValAssign> &locs,
00074                LLVMContext &C, ParmContext PC)
00075         : CCState(CC, isVarArg, MF, TM, locs, C) {
00076       assert(((PC == Call) || (PC == Prologue)) &&
00077              "ARMCCState users must specify whether their context is call"
00078              "or prologue generation.");
00079       CallOrPrologue = PC;
00080     }
00081   };
00082 }
00083 
00084 // The APCS parameter registers.
00085 static const MCPhysReg GPRArgRegs[] = {
00086   ARM::R0, ARM::R1, ARM::R2, ARM::R3
00087 };
00088 
00089 void ARMTargetLowering::addTypeForNEON(MVT VT, MVT PromotedLdStVT,
00090                                        MVT PromotedBitwiseVT) {
00091   if (VT != PromotedLdStVT) {
00092     setOperationAction(ISD::LOAD, VT, Promote);
00093     AddPromotedToType (ISD::LOAD, VT, PromotedLdStVT);
00094 
00095     setOperationAction(ISD::STORE, VT, Promote);
00096     AddPromotedToType (ISD::STORE, VT, PromotedLdStVT);
00097   }
00098 
00099   MVT ElemTy = VT.getVectorElementType();
00100   if (ElemTy != MVT::i64 && ElemTy != MVT::f64)
00101     setOperationAction(ISD::SETCC, VT, Custom);
00102   setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
00103   setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
00104   if (ElemTy == MVT::i32) {
00105     setOperationAction(ISD::SINT_TO_FP, VT, Custom);
00106     setOperationAction(ISD::UINT_TO_FP, VT, Custom);
00107     setOperationAction(ISD::FP_TO_SINT, VT, Custom);
00108     setOperationAction(ISD::FP_TO_UINT, VT, Custom);
00109   } else {
00110     setOperationAction(ISD::SINT_TO_FP, VT, Expand);
00111     setOperationAction(ISD::UINT_TO_FP, VT, Expand);
00112     setOperationAction(ISD::FP_TO_SINT, VT, Expand);
00113     setOperationAction(ISD::FP_TO_UINT, VT, Expand);
00114   }
00115   setOperationAction(ISD::BUILD_VECTOR,      VT, Custom);
00116   setOperationAction(ISD::VECTOR_SHUFFLE,    VT, Custom);
00117   setOperationAction(ISD::CONCAT_VECTORS,    VT, Legal);
00118   setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Legal);
00119   setOperationAction(ISD::SELECT,            VT, Expand);
00120   setOperationAction(ISD::SELECT_CC,         VT, Expand);
00121   setOperationAction(ISD::VSELECT,           VT, Expand);
00122   setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand);
00123   if (VT.isInteger()) {
00124     setOperationAction(ISD::SHL, VT, Custom);
00125     setOperationAction(ISD::SRA, VT, Custom);
00126     setOperationAction(ISD::SRL, VT, Custom);
00127   }
00128 
00129   // Promote all bit-wise operations.
00130   if (VT.isInteger() && VT != PromotedBitwiseVT) {
00131     setOperationAction(ISD::AND, VT, Promote);
00132     AddPromotedToType (ISD::AND, VT, PromotedBitwiseVT);
00133     setOperationAction(ISD::OR,  VT, Promote);
00134     AddPromotedToType (ISD::OR,  VT, PromotedBitwiseVT);
00135     setOperationAction(ISD::XOR, VT, Promote);
00136     AddPromotedToType (ISD::XOR, VT, PromotedBitwiseVT);
00137   }
00138 
00139   // Neon does not support vector divide/remainder operations.
00140   setOperationAction(ISD::SDIV, VT, Expand);
00141   setOperationAction(ISD::UDIV, VT, Expand);
00142   setOperationAction(ISD::FDIV, VT, Expand);
00143   setOperationAction(ISD::SREM, VT, Expand);
00144   setOperationAction(ISD::UREM, VT, Expand);
00145   setOperationAction(ISD::FREM, VT, Expand);
00146 }
00147 
00148 void ARMTargetLowering::addDRTypeForNEON(MVT VT) {
00149   addRegisterClass(VT, &ARM::DPRRegClass);
00150   addTypeForNEON(VT, MVT::f64, MVT::v2i32);
00151 }
00152 
00153 void ARMTargetLowering::addQRTypeForNEON(MVT VT) {
00154   addRegisterClass(VT, &ARM::DPairRegClass);
00155   addTypeForNEON(VT, MVT::v2f64, MVT::v4i32);
00156 }
00157 
00158 static TargetLoweringObjectFile *createTLOF(const Triple &TT) {
00159   if (TT.isOSBinFormatMachO())
00160     return new TargetLoweringObjectFileMachO();
00161   if (TT.isOSWindows())
00162     return new TargetLoweringObjectFileCOFF();
00163   return new ARMElfTargetObjectFile();
00164 }
00165 
00166 ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
00167     : TargetLowering(TM, createTLOF(Triple(TM.getTargetTriple()))) {
00168   Subtarget = &TM.getSubtarget<ARMSubtarget>();
00169   RegInfo = TM.getRegisterInfo();
00170   Itins = TM.getInstrItineraryData();
00171 
00172   setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
00173 
00174   if (Subtarget->isTargetMachO()) {
00175     // Uses VFP for Thumb libfuncs if available.
00176     if (Subtarget->isThumb() && Subtarget->hasVFP2() &&
00177         Subtarget->hasARMOps() && !TM.Options.UseSoftFloat) {
00178       // Single-precision floating-point arithmetic.
00179       setLibcallName(RTLIB::ADD_F32, "__addsf3vfp");
00180       setLibcallName(RTLIB::SUB_F32, "__subsf3vfp");
00181       setLibcallName(RTLIB::MUL_F32, "__mulsf3vfp");
00182       setLibcallName(RTLIB::DIV_F32, "__divsf3vfp");
00183 
00184       // Double-precision floating-point arithmetic.
00185       setLibcallName(RTLIB::ADD_F64, "__adddf3vfp");
00186       setLibcallName(RTLIB::SUB_F64, "__subdf3vfp");
00187       setLibcallName(RTLIB::MUL_F64, "__muldf3vfp");
00188       setLibcallName(RTLIB::DIV_F64, "__divdf3vfp");
00189 
00190       // Single-precision comparisons.
00191       setLibcallName(RTLIB::OEQ_F32, "__eqsf2vfp");
00192       setLibcallName(RTLIB::UNE_F32, "__nesf2vfp");
00193       setLibcallName(RTLIB::OLT_F32, "__ltsf2vfp");
00194       setLibcallName(RTLIB::OLE_F32, "__lesf2vfp");
00195       setLibcallName(RTLIB::OGE_F32, "__gesf2vfp");
00196       setLibcallName(RTLIB::OGT_F32, "__gtsf2vfp");
00197       setLibcallName(RTLIB::UO_F32,  "__unordsf2vfp");
00198       setLibcallName(RTLIB::O_F32,   "__unordsf2vfp");
00199 
00200       setCmpLibcallCC(RTLIB::OEQ_F32, ISD::SETNE);
00201       setCmpLibcallCC(RTLIB::UNE_F32, ISD::SETNE);
00202       setCmpLibcallCC(RTLIB::OLT_F32, ISD::SETNE);
00203       setCmpLibcallCC(RTLIB::OLE_F32, ISD::SETNE);
00204       setCmpLibcallCC(RTLIB::OGE_F32, ISD::SETNE);
00205       setCmpLibcallCC(RTLIB::OGT_F32, ISD::SETNE);
00206       setCmpLibcallCC(RTLIB::UO_F32,  ISD::SETNE);
00207       setCmpLibcallCC(RTLIB::O_F32,   ISD::SETEQ);
00208 
00209       // Double-precision comparisons.
00210       setLibcallName(RTLIB::OEQ_F64, "__eqdf2vfp");
00211       setLibcallName(RTLIB::UNE_F64, "__nedf2vfp");
00212       setLibcallName(RTLIB::OLT_F64, "__ltdf2vfp");
00213       setLibcallName(RTLIB::OLE_F64, "__ledf2vfp");
00214       setLibcallName(RTLIB::OGE_F64, "__gedf2vfp");
00215       setLibcallName(RTLIB::OGT_F64, "__gtdf2vfp");
00216       setLibcallName(RTLIB::UO_F64,  "__unorddf2vfp");
00217       setLibcallName(RTLIB::O_F64,   "__unorddf2vfp");
00218 
00219       setCmpLibcallCC(RTLIB::OEQ_F64, ISD::SETNE);
00220       setCmpLibcallCC(RTLIB::UNE_F64, ISD::SETNE);
00221       setCmpLibcallCC(RTLIB::OLT_F64, ISD::SETNE);
00222       setCmpLibcallCC(RTLIB::OLE_F64, ISD::SETNE);
00223       setCmpLibcallCC(RTLIB::OGE_F64, ISD::SETNE);
00224       setCmpLibcallCC(RTLIB::OGT_F64, ISD::SETNE);
00225       setCmpLibcallCC(RTLIB::UO_F64,  ISD::SETNE);
00226       setCmpLibcallCC(RTLIB::O_F64,   ISD::SETEQ);
00227 
00228       // Floating-point to integer conversions.
00229       // i64 conversions are done via library routines even when generating VFP
00230       // instructions, so use the same ones.
00231       setLibcallName(RTLIB::FPTOSINT_F64_I32, "__fixdfsivfp");
00232       setLibcallName(RTLIB::FPTOUINT_F64_I32, "__fixunsdfsivfp");
00233       setLibcallName(RTLIB::FPTOSINT_F32_I32, "__fixsfsivfp");
00234       setLibcallName(RTLIB::FPTOUINT_F32_I32, "__fixunssfsivfp");
00235 
00236       // Conversions between floating types.
00237       setLibcallName(RTLIB::FPROUND_F64_F32, "__truncdfsf2vfp");
00238       setLibcallName(RTLIB::FPEXT_F32_F64,   "__extendsfdf2vfp");
00239 
00240       // Integer to floating-point conversions.
00241       // i64 conversions are done via library routines even when generating VFP
00242       // instructions, so use the same ones.
00243       // FIXME: There appears to be some naming inconsistency in ARM libgcc:
00244       // e.g., __floatunsidf vs. __floatunssidfvfp.
00245       setLibcallName(RTLIB::SINTTOFP_I32_F64, "__floatsidfvfp");
00246       setLibcallName(RTLIB::UINTTOFP_I32_F64, "__floatunssidfvfp");
00247       setLibcallName(RTLIB::SINTTOFP_I32_F32, "__floatsisfvfp");
00248       setLibcallName(RTLIB::UINTTOFP_I32_F32, "__floatunssisfvfp");
00249     }
00250   }
00251 
00252   // These libcalls are not available in 32-bit.
00253   setLibcallName(RTLIB::SHL_I128, nullptr);
00254   setLibcallName(RTLIB::SRL_I128, nullptr);
00255   setLibcallName(RTLIB::SRA_I128, nullptr);
00256 
00257   if (Subtarget->isAAPCS_ABI() && !Subtarget->isTargetMachO() &&
00258       !Subtarget->isTargetWindows()) {
00259     static const struct {
00260       const RTLIB::Libcall Op;
00261       const char * const Name;
00262       const CallingConv::ID CC;
00263       const ISD::CondCode Cond;
00264     } LibraryCalls[] = {
00265       // Double-precision floating-point arithmetic helper functions
00266       // RTABI chapter 4.1.2, Table 2
00267       { RTLIB::ADD_F64, "__aeabi_dadd", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
00268       { RTLIB::DIV_F64, "__aeabi_ddiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
00269       { RTLIB::MUL_F64, "__aeabi_dmul", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
00270       { RTLIB::SUB_F64, "__aeabi_dsub", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
00271 
00272       // Double-precision floating-point comparison helper functions
00273       // RTABI chapter 4.1.2, Table 3
00274       { RTLIB::OEQ_F64, "__aeabi_dcmpeq", CallingConv::ARM_AAPCS, ISD::SETNE },
00275       { RTLIB::UNE_F64, "__aeabi_dcmpeq", CallingConv::ARM_AAPCS, ISD::SETEQ },
00276       { RTLIB::OLT_F64, "__aeabi_dcmplt", CallingConv::ARM_AAPCS, ISD::SETNE },
00277       { RTLIB::OLE_F64, "__aeabi_dcmple", CallingConv::ARM_AAPCS, ISD::SETNE },
00278       { RTLIB::OGE_F64, "__aeabi_dcmpge", CallingConv::ARM_AAPCS, ISD::SETNE },
00279       { RTLIB::OGT_F64, "__aeabi_dcmpgt", CallingConv::ARM_AAPCS, ISD::SETNE },
00280       { RTLIB::UO_F64,  "__aeabi_dcmpun", CallingConv::ARM_AAPCS, ISD::SETNE },
00281       { RTLIB::O_F64,   "__aeabi_dcmpun", CallingConv::ARM_AAPCS, ISD::SETEQ },
00282 
00283       // Single-precision floating-point arithmetic helper functions
00284       // RTABI chapter 4.1.2, Table 4
00285       { RTLIB::ADD_F32, "__aeabi_fadd", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
00286       { RTLIB::DIV_F32, "__aeabi_fdiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
00287       { RTLIB::MUL_F32, "__aeabi_fmul", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
00288       { RTLIB::SUB_F32, "__aeabi_fsub", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
00289 
00290       // Single-precision floating-point comparison helper functions
00291       // RTABI chapter 4.1.2, Table 5
00292       { RTLIB::OEQ_F32, "__aeabi_fcmpeq", CallingConv::ARM_AAPCS, ISD::SETNE },
00293       { RTLIB::UNE_F32, "__aeabi_fcmpeq", CallingConv::ARM_AAPCS, ISD::SETEQ },
00294       { RTLIB::OLT_F32, "__aeabi_fcmplt", CallingConv::ARM_AAPCS, ISD::SETNE },
00295       { RTLIB::OLE_F32, "__aeabi_fcmple", CallingConv::ARM_AAPCS, ISD::SETNE },
00296       { RTLIB::OGE_F32, "__aeabi_fcmpge", CallingConv::ARM_AAPCS, ISD::SETNE },
00297       { RTLIB::OGT_F32, "__aeabi_fcmpgt", CallingConv::ARM_AAPCS, ISD::SETNE },
00298       { RTLIB::UO_F32,  "__aeabi_fcmpun", CallingConv::ARM_AAPCS, ISD::SETNE },
00299       { RTLIB::O_F32,   "__aeabi_fcmpun", CallingConv::ARM_AAPCS, ISD::SETEQ },
00300 
00301       // Floating-point to integer conversions.
00302       // RTABI chapter 4.1.2, Table 6
00303       { RTLIB::FPTOSINT_F64_I32, "__aeabi_d2iz",  CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
00304       { RTLIB::FPTOUINT_F64_I32, "__aeabi_d2uiz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
00305       { RTLIB::FPTOSINT_F64_I64, "__aeabi_d2lz",  CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
00306       { RTLIB::FPTOUINT_F64_I64, "__aeabi_d2ulz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
00307       { RTLIB::FPTOSINT_F32_I32, "__aeabi_f2iz",  CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
00308       { RTLIB::FPTOUINT_F32_I32, "__aeabi_f2uiz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
00309       { RTLIB::FPTOSINT_F32_I64, "__aeabi_f2lz",  CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
00310       { RTLIB::FPTOUINT_F32_I64, "__aeabi_f2ulz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
00311 
00312       // Conversions between floating types.
00313       // RTABI chapter 4.1.2, Table 7
00314       { RTLIB::FPROUND_F64_F32, "__aeabi_d2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
00315       { RTLIB::FPROUND_F64_F16, "__aeabi_d2h", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
00316       { RTLIB::FPEXT_F32_F64,   "__aeabi_f2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
00317 
00318       // Integer to floating-point conversions.
00319       // RTABI chapter 4.1.2, Table 8
00320       { RTLIB::SINTTOFP_I32_F64, "__aeabi_i2d",  CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
00321       { RTLIB::UINTTOFP_I32_F64, "__aeabi_ui2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
00322       { RTLIB::SINTTOFP_I64_F64, "__aeabi_l2d",  CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
00323       { RTLIB::UINTTOFP_I64_F64, "__aeabi_ul2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
00324       { RTLIB::SINTTOFP_I32_F32, "__aeabi_i2f",  CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
00325       { RTLIB::UINTTOFP_I32_F32, "__aeabi_ui2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
00326       { RTLIB::SINTTOFP_I64_F32, "__aeabi_l2f",  CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
00327       { RTLIB::UINTTOFP_I64_F32, "__aeabi_ul2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
00328 
00329       // Long long helper functions
00330       // RTABI chapter 4.2, Table 9
00331       { RTLIB::MUL_I64, "__aeabi_lmul", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
00332       { RTLIB::SHL_I64, "__aeabi_llsl", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
00333       { RTLIB::SRL_I64, "__aeabi_llsr", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
00334       { RTLIB::SRA_I64, "__aeabi_lasr", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
00335 
00336       // Integer division functions
00337       // RTABI chapter 4.3.1
00338       { RTLIB::SDIV_I8,  "__aeabi_idiv",     CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
00339       { RTLIB::SDIV_I16, "__aeabi_idiv",     CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
00340       { RTLIB::SDIV_I32, "__aeabi_idiv",     CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
00341       { RTLIB::SDIV_I64, "__aeabi_ldivmod",  CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
00342       { RTLIB::UDIV_I8,  "__aeabi_uidiv",    CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
00343       { RTLIB::UDIV_I16, "__aeabi_uidiv",    CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
00344       { RTLIB::UDIV_I32, "__aeabi_uidiv",    CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
00345       { RTLIB::UDIV_I64, "__aeabi_uldivmod", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
00346 
00347       // Memory operations
00348       // RTABI chapter 4.3.4
00349       { RTLIB::MEMCPY,  "__aeabi_memcpy",  CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
00350       { RTLIB::MEMMOVE, "__aeabi_memmove", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
00351       { RTLIB::MEMSET,  "__aeabi_memset",  CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
00352     };
00353 
00354     for (const auto &LC : LibraryCalls) {
00355       setLibcallName(LC.Op, LC.Name);
00356       setLibcallCallingConv(LC.Op, LC.CC);
00357       if (LC.Cond != ISD::SETCC_INVALID)
00358         setCmpLibcallCC(LC.Op, LC.Cond);
00359     }
00360   }
00361 
00362   if (Subtarget->isTargetWindows()) {
00363     static const struct {
00364       const RTLIB::Libcall Op;
00365       const char * const Name;
00366       const CallingConv::ID CC;
00367     } LibraryCalls[] = {
00368       { RTLIB::FPTOSINT_F32_I64, "__stoi64", CallingConv::ARM_AAPCS_VFP },
00369       { RTLIB::FPTOSINT_F64_I64, "__dtoi64", CallingConv::ARM_AAPCS_VFP },
00370       { RTLIB::FPTOUINT_F32_I64, "__stou64", CallingConv::ARM_AAPCS_VFP },
00371       { RTLIB::FPTOUINT_F64_I64, "__dtou64", CallingConv::ARM_AAPCS_VFP },
00372       { RTLIB::SINTTOFP_I64_F32, "__i64tos", CallingConv::ARM_AAPCS_VFP },
00373       { RTLIB::SINTTOFP_I64_F64, "__i64tod", CallingConv::ARM_AAPCS_VFP },
00374       { RTLIB::UINTTOFP_I64_F32, "__u64tos", CallingConv::ARM_AAPCS_VFP },
00375       { RTLIB::UINTTOFP_I64_F64, "__u64tod", CallingConv::ARM_AAPCS_VFP },
00376     };
00377 
00378     for (const auto &LC : LibraryCalls) {
00379       setLibcallName(LC.Op, LC.Name);
00380       setLibcallCallingConv(LC.Op, LC.CC);
00381     }
00382   }
00383 
00384   // Use divmod compiler-rt calls for iOS 5.0 and later.
00385   if (Subtarget->getTargetTriple().isiOS() &&
00386       !Subtarget->getTargetTriple().isOSVersionLT(5, 0)) {
00387     setLibcallName(RTLIB::SDIVREM_I32, "__divmodsi4");
00388     setLibcallName(RTLIB::UDIVREM_I32, "__udivmodsi4");
00389   }
00390 
00391   if (Subtarget->isThumb1Only())
00392     addRegisterClass(MVT::i32, &ARM::tGPRRegClass);
00393   else
00394     addRegisterClass(MVT::i32, &ARM::GPRRegClass);
00395   if (!TM.Options.UseSoftFloat && Subtarget->hasVFP2() &&
00396       !Subtarget->isThumb1Only()) {
00397     addRegisterClass(MVT::f32, &ARM::SPRRegClass);
00398     if (!Subtarget->isFPOnlySP())
00399       addRegisterClass(MVT::f64, &ARM::DPRRegClass);
00400   }
00401 
00402   for (unsigned VT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
00403        VT <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++VT) {
00404     for (unsigned InnerVT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
00405          InnerVT <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++InnerVT)
00406       setTruncStoreAction((MVT::SimpleValueType)VT,
00407                           (MVT::SimpleValueType)InnerVT, Expand);
00408     setLoadExtAction(ISD::SEXTLOAD, (MVT::SimpleValueType)VT, Expand);
00409     setLoadExtAction(ISD::ZEXTLOAD, (MVT::SimpleValueType)VT, Expand);
00410     setLoadExtAction(ISD::EXTLOAD, (MVT::SimpleValueType)VT, Expand);
00411 
00412     setOperationAction(ISD::MULHS, (MVT::SimpleValueType)VT, Expand);
00413     setOperationAction(ISD::SMUL_LOHI, (MVT::SimpleValueType)VT, Expand);
00414     setOperationAction(ISD::MULHU, (MVT::SimpleValueType)VT, Expand);
00415     setOperationAction(ISD::UMUL_LOHI, (MVT::SimpleValueType)VT, Expand);
00416 
00417     setOperationAction(ISD::BSWAP, (MVT::SimpleValueType)VT, Expand);
00418   }
00419 
00420   setOperationAction(ISD::ConstantFP, MVT::f32, Custom);
00421   setOperationAction(ISD::ConstantFP, MVT::f64, Custom);
00422 
00423   if (Subtarget->hasNEON()) {
00424     addDRTypeForNEON(MVT::v2f32);
00425     addDRTypeForNEON(MVT::v8i8);
00426     addDRTypeForNEON(MVT::v4i16);
00427     addDRTypeForNEON(MVT::v2i32);
00428     addDRTypeForNEON(MVT::v1i64);
00429 
00430     addQRTypeForNEON(MVT::v4f32);
00431     addQRTypeForNEON(MVT::v2f64);
00432     addQRTypeForNEON(MVT::v16i8);
00433     addQRTypeForNEON(MVT::v8i16);
00434     addQRTypeForNEON(MVT::v4i32);
00435     addQRTypeForNEON(MVT::v2i64);
00436 
00437     // v2f64 is legal so that QR subregs can be extracted as f64 elements, but
00438     // neither Neon nor VFP support any arithmetic operations on it.
00439     // The same with v4f32. But keep in mind that vadd, vsub, vmul are natively
00440     // supported for v4f32.
00441     setOperationAction(ISD::FADD, MVT::v2f64, Expand);
00442     setOperationAction(ISD::FSUB, MVT::v2f64, Expand);
00443     setOperationAction(ISD::FMUL, MVT::v2f64, Expand);
00444     // FIXME: Code duplication: FDIV and FREM are expanded always, see
00445     // ARMTargetLowering::addTypeForNEON method for details.
00446     setOperationAction(ISD::FDIV, MVT::v2f64, Expand);
00447     setOperationAction(ISD::FREM, MVT::v2f64, Expand);
00448     // FIXME: Create unittest.
00449     // In another words, find a way when "copysign" appears in DAG with vector
00450     // operands.
00451     setOperationAction(ISD::FCOPYSIGN, MVT::v2f64, Expand);
00452     // FIXME: Code duplication: SETCC has custom operation action, see
00453     // ARMTargetLowering::addTypeForNEON method for details.
00454     setOperationAction(ISD::SETCC, MVT::v2f64, Expand);
00455     // FIXME: Create unittest for FNEG and for FABS.
00456     setOperationAction(ISD::FNEG, MVT::v2f64, Expand);
00457     setOperationAction(ISD::FABS, MVT::v2f64, Expand);
00458     setOperationAction(ISD::FSQRT, MVT::v2f64, Expand);
00459     setOperationAction(ISD::FSIN, MVT::v2f64, Expand);
00460     setOperationAction(ISD::FCOS, MVT::v2f64, Expand);
00461     setOperationAction(ISD::FPOWI, MVT::v2f64, Expand);
00462     setOperationAction(ISD::FPOW, MVT::v2f64, Expand);
00463     setOperationAction(ISD::FLOG, MVT::v2f64, Expand);
00464     setOperationAction(ISD::FLOG2, MVT::v2f64, Expand);
00465     setOperationAction(ISD::FLOG10, MVT::v2f64, Expand);
00466     setOperationAction(ISD::FEXP, MVT::v2f64, Expand);
00467     setOperationAction(ISD::FEXP2, MVT::v2f64, Expand);
00468     // FIXME: Create unittest for FCEIL, FTRUNC, FRINT, FNEARBYINT, FFLOOR.
00469     setOperationAction(ISD::FCEIL, MVT::v2f64, Expand);
00470     setOperationAction(ISD::FTRUNC, MVT::v2f64, Expand);
00471     setOperationAction(ISD::FRINT, MVT::v2f64, Expand);
00472     setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Expand);
00473     setOperationAction(ISD::FFLOOR, MVT::v2f64, Expand);
00474     setOperationAction(ISD::FMA, MVT::v2f64, Expand);
00475 
00476     setOperationAction(ISD::FSQRT, MVT::v4f32, Expand);
00477     setOperationAction(ISD::FSIN, MVT::v4f32, Expand);
00478     setOperationAction(ISD::FCOS, MVT::v4f32, Expand);
00479     setOperationAction(ISD::FPOWI, MVT::v4f32, Expand);
00480     setOperationAction(ISD::FPOW, MVT::v4f32, Expand);
00481     setOperationAction(ISD::FLOG, MVT::v4f32, Expand);
00482     setOperationAction(ISD::FLOG2, MVT::v4f32, Expand);
00483     setOperationAction(ISD::FLOG10, MVT::v4f32, Expand);
00484     setOperationAction(ISD::FEXP, MVT::v4f32, Expand);
00485     setOperationAction(ISD::FEXP2, MVT::v4f32, Expand);
00486     setOperationAction(ISD::FCEIL, MVT::v4f32, Expand);
00487     setOperationAction(ISD::FTRUNC, MVT::v4f32, Expand);
00488     setOperationAction(ISD::FRINT, MVT::v4f32, Expand);
00489     setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Expand);
00490     setOperationAction(ISD::FFLOOR, MVT::v4f32, Expand);
00491 
00492     // Mark v2f32 intrinsics.
00493     setOperationAction(ISD::FSQRT, MVT::v2f32, Expand);
00494     setOperationAction(ISD::FSIN, MVT::v2f32, Expand);
00495     setOperationAction(ISD::FCOS, MVT::v2f32, Expand);
00496     setOperationAction(ISD::FPOWI, MVT::v2f32, Expand);
00497     setOperationAction(ISD::FPOW, MVT::v2f32, Expand);
00498     setOperationAction(ISD::FLOG, MVT::v2f32, Expand);
00499     setOperationAction(ISD::FLOG2, MVT::v2f32, Expand);
00500     setOperationAction(ISD::FLOG10, MVT::v2f32, Expand);
00501     setOperationAction(ISD::FEXP, MVT::v2f32, Expand);
00502     setOperationAction(ISD::FEXP2, MVT::v2f32, Expand);
00503     setOperationAction(ISD::FCEIL, MVT::v2f32, Expand);
00504     setOperationAction(ISD::FTRUNC, MVT::v2f32, Expand);
00505     setOperationAction(ISD::FRINT, MVT::v2f32, Expand);
00506     setOperationAction(ISD::FNEARBYINT, MVT::v2f32, Expand);
00507     setOperationAction(ISD::FFLOOR, MVT::v2f32, Expand);
00508 
00509     // Neon does not support some operations on v1i64 and v2i64 types.
00510     setOperationAction(ISD::MUL, MVT::v1i64, Expand);
00511     // Custom handling for some quad-vector types to detect VMULL.
00512     setOperationAction(ISD::MUL, MVT::v8i16, Custom);
00513     setOperationAction(ISD::MUL, MVT::v4i32, Custom);
00514     setOperationAction(ISD::MUL, MVT::v2i64, Custom);
00515     // Custom handling for some vector types to avoid expensive expansions
00516     setOperationAction(ISD::SDIV, MVT::v4i16, Custom);
00517     setOperationAction(ISD::SDIV, MVT::v8i8, Custom);
00518     setOperationAction(ISD::UDIV, MVT::v4i16, Custom);
00519     setOperationAction(ISD::UDIV, MVT::v8i8, Custom);
00520     setOperationAction(ISD::SETCC, MVT::v1i64, Expand);
00521     setOperationAction(ISD::SETCC, MVT::v2i64, Expand);
00522     // Neon does not have single instruction SINT_TO_FP and UINT_TO_FP with
00523     // a destination type that is wider than the source, and nor does
00524     // it have a FP_TO_[SU]INT instruction with a narrower destination than
00525     // source.
00526     setOperationAction(ISD::SINT_TO_FP, MVT::v4i16, Custom);
00527     setOperationAction(ISD::UINT_TO_FP, MVT::v4i16, Custom);
00528     setOperationAction(ISD::FP_TO_UINT, MVT::v4i16, Custom);
00529     setOperationAction(ISD::FP_TO_SINT, MVT::v4i16, Custom);
00530 
00531     setOperationAction(ISD::FP_ROUND,   MVT::v2f32, Expand);
00532     setOperationAction(ISD::FP_EXTEND,  MVT::v2f64, Expand);
00533 
00534     // NEON does not have single instruction CTPOP for vectors with element
00535     // types wider than 8-bits.  However, custom lowering can leverage the
00536     // v8i8/v16i8 vcnt instruction.
00537     setOperationAction(ISD::CTPOP,      MVT::v2i32, Custom);
00538     setOperationAction(ISD::CTPOP,      MVT::v4i32, Custom);
00539     setOperationAction(ISD::CTPOP,      MVT::v4i16, Custom);
00540     setOperationAction(ISD::CTPOP,      MVT::v8i16, Custom);
00541 
00542     // NEON only has FMA instructions as of VFP4.
00543     if (!Subtarget->hasVFP4()) {
00544       setOperationAction(ISD::FMA, MVT::v2f32, Expand);
00545       setOperationAction(ISD::FMA, MVT::v4f32, Expand);
00546     }
00547 
00548     setTargetDAGCombine(ISD::INTRINSIC_VOID);
00549     setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN);
00550     setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN);
00551     setTargetDAGCombine(ISD::SHL);
00552     setTargetDAGCombine(ISD::SRL);
00553     setTargetDAGCombine(ISD::SRA);
00554     setTargetDAGCombine(ISD::SIGN_EXTEND);
00555     setTargetDAGCombine(ISD::ZERO_EXTEND);
00556     setTargetDAGCombine(ISD::ANY_EXTEND);
00557     setTargetDAGCombine(ISD::SELECT_CC);
00558     setTargetDAGCombine(ISD::BUILD_VECTOR);
00559     setTargetDAGCombine(ISD::VECTOR_SHUFFLE);
00560     setTargetDAGCombine(ISD::INSERT_VECTOR_ELT);
00561     setTargetDAGCombine(ISD::STORE);
00562     setTargetDAGCombine(ISD::FP_TO_SINT);
00563     setTargetDAGCombine(ISD::FP_TO_UINT);
00564     setTargetDAGCombine(ISD::FDIV);
00565 
00566     // It is legal to extload from v4i8 to v4i16 or v4i32.
00567     MVT Tys[6] = {MVT::v8i8, MVT::v4i8, MVT::v2i8,
00568                   MVT::v4i16, MVT::v2i16,
00569                   MVT::v2i32};
00570     for (unsigned i = 0; i < 6; ++i) {
00571       setLoadExtAction(ISD::EXTLOAD, Tys[i], Legal);
00572       setLoadExtAction(ISD::ZEXTLOAD, Tys[i], Legal);
00573       setLoadExtAction(ISD::SEXTLOAD, Tys[i], Legal);
00574     }
00575   }
00576 
00577   // ARM and Thumb2 support UMLAL/SMLAL.
00578   if (!Subtarget->isThumb1Only())
00579     setTargetDAGCombine(ISD::ADDC);
00580 
00581 
00582   computeRegisterProperties();
00583 
00584   // ARM does not have floating-point extending loads.
00585   setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand);
00586   setLoadExtAction(ISD::EXTLOAD, MVT::f16, Expand);
00587 
00588   // ... or truncating stores
00589   setTruncStoreAction(MVT::f64, MVT::f32, Expand);
00590   setTruncStoreAction(MVT::f32, MVT::f16, Expand);
00591   setTruncStoreAction(MVT::f64, MVT::f16, Expand);
00592 
00593   // ARM does not have i1 sign extending load.
00594   setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
00595 
00596   // ARM supports all 4 flavors of integer indexed load / store.
00597   if (!Subtarget->isThumb1Only()) {
00598     for (unsigned im = (unsigned)ISD::PRE_INC;
00599          im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) {
00600       setIndexedLoadAction(im,  MVT::i1,  Legal);
00601       setIndexedLoadAction(im,  MVT::i8,  Legal);
00602       setIndexedLoadAction(im,  MVT::i16, Legal);
00603       setIndexedLoadAction(im,  MVT::i32, Legal);
00604       setIndexedStoreAction(im, MVT::i1,  Legal);
00605       setIndexedStoreAction(im, MVT::i8,  Legal);
00606       setIndexedStoreAction(im, MVT::i16, Legal);
00607       setIndexedStoreAction(im, MVT::i32, Legal);
00608     }
00609   }
00610 
00611   setOperationAction(ISD::SADDO, MVT::i32, Custom);
00612   setOperationAction(ISD::UADDO, MVT::i32, Custom);
00613   setOperationAction(ISD::SSUBO, MVT::i32, Custom);
00614   setOperationAction(ISD::USUBO, MVT::i32, Custom);
00615 
00616   // i64 operation support.
00617   setOperationAction(ISD::MUL,     MVT::i64, Expand);
00618   setOperationAction(ISD::MULHU,   MVT::i32, Expand);
00619   if (Subtarget->isThumb1Only()) {
00620     setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);
00621     setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
00622   }
00623   if (Subtarget->isThumb1Only() || !Subtarget->hasV6Ops()
00624       || (Subtarget->isThumb2() && !Subtarget->hasThumb2DSP()))
00625     setOperationAction(ISD::MULHS, MVT::i32, Expand);
00626 
00627   setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom);
00628   setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom);
00629   setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom);
00630   setOperationAction(ISD::SRL,       MVT::i64, Custom);
00631   setOperationAction(ISD::SRA,       MVT::i64, Custom);
00632 
00633   if (!Subtarget->isThumb1Only()) {
00634     // FIXME: We should do this for Thumb1 as well.
00635     setOperationAction(ISD::ADDC,    MVT::i32, Custom);
00636     setOperationAction(ISD::ADDE,    MVT::i32, Custom);
00637     setOperationAction(ISD::SUBC,    MVT::i32, Custom);
00638     setOperationAction(ISD::SUBE,    MVT::i32, Custom);
00639   }
00640 
00641   // ARM does not have ROTL.
00642   setOperationAction(ISD::ROTL,  MVT::i32, Expand);
00643   setOperationAction(ISD::CTTZ,  MVT::i32, Custom);
00644   setOperationAction(ISD::CTPOP, MVT::i32, Expand);
00645   if (!Subtarget->hasV5TOps() || Subtarget->isThumb1Only())
00646     setOperationAction(ISD::CTLZ, MVT::i32, Expand);
00647 
00648   // These just redirect to CTTZ and CTLZ on ARM.
00649   setOperationAction(ISD::CTTZ_ZERO_UNDEF  , MVT::i32  , Expand);
00650   setOperationAction(ISD::CTLZ_ZERO_UNDEF  , MVT::i32  , Expand);
00651 
00652   setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Custom);
00653 
00654   // Only ARMv6 has BSWAP.
00655   if (!Subtarget->hasV6Ops())
00656     setOperationAction(ISD::BSWAP, MVT::i32, Expand);
00657 
00658   if (!(Subtarget->hasDivide() && Subtarget->isThumb2()) &&
00659       !(Subtarget->hasDivideInARMMode() && !Subtarget->isThumb())) {
00660     // These are expanded into libcalls if the cpu doesn't have HW divider.
00661     setOperationAction(ISD::SDIV,  MVT::i32, Expand);
00662     setOperationAction(ISD::UDIV,  MVT::i32, Expand);
00663   }
00664 
00665   // FIXME: Also set divmod for SREM on EABI
00666   setOperationAction(ISD::SREM,  MVT::i32, Expand);
00667   setOperationAction(ISD::UREM,  MVT::i32, Expand);
00668   // Register based DivRem for AEABI (RTABI 4.2)
00669   if (Subtarget->isTargetAEABI()) {
00670     setLibcallName(RTLIB::SDIVREM_I8,  "__aeabi_idivmod");
00671     setLibcallName(RTLIB::SDIVREM_I16, "__aeabi_idivmod");
00672     setLibcallName(RTLIB::SDIVREM_I32, "__aeabi_idivmod");
00673     setLibcallName(RTLIB::SDIVREM_I64, "__aeabi_ldivmod");
00674     setLibcallName(RTLIB::UDIVREM_I8,  "__aeabi_uidivmod");
00675     setLibcallName(RTLIB::UDIVREM_I16, "__aeabi_uidivmod");
00676     setLibcallName(RTLIB::UDIVREM_I32, "__aeabi_uidivmod");
00677     setLibcallName(RTLIB::UDIVREM_I64, "__aeabi_uldivmod");
00678 
00679     setLibcallCallingConv(RTLIB::SDIVREM_I8, CallingConv::ARM_AAPCS);
00680     setLibcallCallingConv(RTLIB::SDIVREM_I16, CallingConv::ARM_AAPCS);
00681     setLibcallCallingConv(RTLIB::SDIVREM_I32, CallingConv::ARM_AAPCS);
00682     setLibcallCallingConv(RTLIB::SDIVREM_I64, CallingConv::ARM_AAPCS);
00683     setLibcallCallingConv(RTLIB::UDIVREM_I8, CallingConv::ARM_AAPCS);
00684     setLibcallCallingConv(RTLIB::UDIVREM_I16, CallingConv::ARM_AAPCS);
00685     setLibcallCallingConv(RTLIB::UDIVREM_I32, CallingConv::ARM_AAPCS);
00686     setLibcallCallingConv(RTLIB::UDIVREM_I64, CallingConv::ARM_AAPCS);
00687 
00688     setOperationAction(ISD::SDIVREM, MVT::i32, Custom);
00689     setOperationAction(ISD::UDIVREM, MVT::i32, Custom);
00690   } else {
00691     setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
00692     setOperationAction(ISD::UDIVREM, MVT::i32, Expand);
00693   }
00694 
00695   setOperationAction(ISD::GlobalAddress, MVT::i32,   Custom);
00696   setOperationAction(ISD::ConstantPool,  MVT::i32,   Custom);
00697   setOperationAction(ISD::GLOBAL_OFFSET_TABLE, MVT::i32, Custom);
00698   setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom);
00699   setOperationAction(ISD::BlockAddress, MVT::i32, Custom);
00700 
00701   setOperationAction(ISD::TRAP, MVT::Other, Legal);
00702 
00703   // Use the default implementation.
00704   setOperationAction(ISD::VASTART,            MVT::Other, Custom);
00705   setOperationAction(ISD::VAARG,              MVT::Other, Expand);
00706   setOperationAction(ISD::VACOPY,             MVT::Other, Expand);
00707   setOperationAction(ISD::VAEND,              MVT::Other, Expand);
00708   setOperationAction(ISD::STACKSAVE,          MVT::Other, Expand);
00709   setOperationAction(ISD::STACKRESTORE,       MVT::Other, Expand);
00710 
00711   if (!Subtarget->isTargetMachO()) {
00712     // Non-MachO platforms may return values in these registers via the
00713     // personality function.
00714     setExceptionPointerRegister(ARM::R0);
00715     setExceptionSelectorRegister(ARM::R1);
00716   }
00717 
00718   if (Subtarget->getTargetTriple().isWindowsItaniumEnvironment())
00719     setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Custom);
00720   else
00721     setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand);
00722 
00723   // ARMv6 Thumb1 (except for CPUs that support dmb / dsb) and earlier use
00724   // the default expansion.
00725   if (Subtarget->hasAnyDataBarrier() && !Subtarget->isThumb1Only()) {
00726     // ATOMIC_FENCE needs custom lowering; the others should have been expanded
00727     // to ldrex/strex loops already.
00728     setOperationAction(ISD::ATOMIC_FENCE,     MVT::Other, Custom);
00729 
00730     // On v8, we have particularly efficient implementations of atomic fences
00731     // if they can be combined with nearby atomic loads and stores.
00732     if (!Subtarget->hasV8Ops()) {
00733       // Automatically insert fences (dmb ist) around ATOMIC_SWAP etc.
00734       setInsertFencesForAtomic(true);
00735     }
00736   } else {
00737     // If there's anything we can use as a barrier, go through custom lowering
00738     // for ATOMIC_FENCE.
00739     setOperationAction(ISD::ATOMIC_FENCE,   MVT::Other,
00740                        Subtarget->hasAnyDataBarrier() ? Custom : Expand);
00741 
00742     // Set them all for expansion, which will force libcalls.
00743     setOperationAction(ISD::ATOMIC_CMP_SWAP,  MVT::i32, Expand);
00744     setOperationAction(ISD::ATOMIC_SWAP,      MVT::i32, Expand);
00745     setOperationAction(ISD::ATOMIC_LOAD_ADD,  MVT::i32, Expand);
00746     setOperationAction(ISD::ATOMIC_LOAD_SUB,  MVT::i32, Expand);
00747     setOperationAction(ISD::ATOMIC_LOAD_AND,  MVT::i32, Expand);
00748     setOperationAction(ISD::ATOMIC_LOAD_OR,   MVT::i32, Expand);
00749     setOperationAction(ISD::ATOMIC_LOAD_XOR,  MVT::i32, Expand);
00750     setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i32, Expand);
00751     setOperationAction(ISD::ATOMIC_LOAD_MIN, MVT::i32, Expand);
00752     setOperationAction(ISD::ATOMIC_LOAD_MAX, MVT::i32, Expand);
00753     setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i32, Expand);
00754     setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i32, Expand);
00755     // Mark ATOMIC_LOAD and ATOMIC_STORE custom so we can handle the
00756     // Unordered/Monotonic case.
00757     setOperationAction(ISD::ATOMIC_LOAD, MVT::i32, Custom);
00758     setOperationAction(ISD::ATOMIC_STORE, MVT::i32, Custom);
00759   }
00760 
00761   setOperationAction(ISD::PREFETCH,         MVT::Other, Custom);
00762 
00763   // Requires SXTB/SXTH, available on v6 and up in both ARM and Thumb modes.
00764   if (!Subtarget->hasV6Ops()) {
00765     setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand);
00766     setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8,  Expand);
00767   }
00768   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
00769 
00770   if (!TM.Options.UseSoftFloat && Subtarget->hasVFP2() &&
00771       !Subtarget->isThumb1Only()) {
00772     // Turn f64->i64 into VMOVRRD, i64 -> f64 to VMOVDRR
00773     // iff target supports vfp2.
00774     setOperationAction(ISD::BITCAST, MVT::i64, Custom);
00775     setOperationAction(ISD::FLT_ROUNDS_, MVT::i32, Custom);
00776   }
00777 
00778   // We want to custom lower some of our intrinsics.
00779   setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
00780   if (Subtarget->isTargetDarwin()) {
00781     setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom);
00782     setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom);
00783     setLibcallName(RTLIB::UNWIND_RESUME, "_Unwind_SjLj_Resume");
00784   }
00785 
00786   setOperationAction(ISD::SETCC,     MVT::i32, Expand);
00787   setOperationAction(ISD::SETCC,     MVT::f32, Expand);
00788   setOperationAction(ISD::SETCC,     MVT::f64, Expand);
00789   setOperationAction(ISD::SELECT,    MVT::i32, Custom);
00790   setOperationAction(ISD::SELECT,    MVT::f32, Custom);
00791   setOperationAction(ISD::SELECT,    MVT::f64, Custom);
00792   setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
00793   setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
00794   setOperationAction(ISD::SELECT_CC, MVT::f64, Custom);
00795 
00796   setOperationAction(ISD::BRCOND,    MVT::Other, Expand);
00797   setOperationAction(ISD::BR_CC,     MVT::i32,   Custom);
00798   setOperationAction(ISD::BR_CC,     MVT::f32,   Custom);
00799   setOperationAction(ISD::BR_CC,     MVT::f64,   Custom);
00800   setOperationAction(ISD::BR_JT,     MVT::Other, Custom);
00801 
00802   // We don't support sin/cos/fmod/copysign/pow
00803   setOperationAction(ISD::FSIN,      MVT::f64, Expand);
00804   setOperationAction(ISD::FSIN,      MVT::f32, Expand);
00805   setOperationAction(ISD::FCOS,      MVT::f32, Expand);
00806   setOperationAction(ISD::FCOS,      MVT::f64, Expand);
00807   setOperationAction(ISD::FSINCOS,   MVT::f64, Expand);
00808   setOperationAction(ISD::FSINCOS,   MVT::f32, Expand);
00809   setOperationAction(ISD::FREM,      MVT::f64, Expand);
00810   setOperationAction(ISD::FREM,      MVT::f32, Expand);
00811   if (!TM.Options.UseSoftFloat && Subtarget->hasVFP2() &&
00812       !Subtarget->isThumb1Only()) {
00813     setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom);
00814     setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);
00815   }
00816   setOperationAction(ISD::FPOW,      MVT::f64, Expand);
00817   setOperationAction(ISD::FPOW,      MVT::f32, Expand);
00818 
00819   if (!Subtarget->hasVFP4()) {
00820     setOperationAction(ISD::FMA, MVT::f64, Expand);
00821     setOperationAction(ISD::FMA, MVT::f32, Expand);
00822   }
00823 
00824   // Various VFP goodness
00825   if (!TM.Options.UseSoftFloat && !Subtarget->isThumb1Only()) {
00826     // int <-> fp are custom expanded into bit_convert + ARMISD ops.
00827     if (Subtarget->hasVFP2()) {
00828       setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
00829       setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
00830       setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
00831       setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
00832     }
00833 
00834     // v8 adds f64 <-> f16 conversion. Before that it should be expanded.
00835     if (!Subtarget->hasV8Ops()) {
00836       setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand);
00837       setOperationAction(ISD::FP_TO_FP16, MVT::f64, Expand);
00838     }
00839 
00840     // fp16 is a special v7 extension that adds f16 <-> f32 conversions.
00841     if (!Subtarget->hasFP16()) {
00842       setOperationAction(ISD::FP16_TO_FP, MVT::f32, Expand);
00843       setOperationAction(ISD::FP_TO_FP16, MVT::f32, Expand);
00844     }
00845   }
00846 
00847   // Combine sin / cos into one node or libcall if possible.
00848   if (Subtarget->hasSinCos()) {
00849     setLibcallName(RTLIB::SINCOS_F32, "sincosf");
00850     setLibcallName(RTLIB::SINCOS_F64, "sincos");
00851     if (Subtarget->getTargetTriple().getOS() == Triple::IOS) {
00852       // For iOS, we don't want to the normal expansion of a libcall to
00853       // sincos. We want to issue a libcall to __sincos_stret.
00854       setOperationAction(ISD::FSINCOS, MVT::f64, Custom);
00855       setOperationAction(ISD::FSINCOS, MVT::f32, Custom);
00856     }
00857   }
00858 
00859   // We have target-specific dag combine patterns for the following nodes:
00860   // ARMISD::VMOVRRD  - No need to call setTargetDAGCombine
00861   setTargetDAGCombine(ISD::ADD);
00862   setTargetDAGCombine(ISD::SUB);
00863   setTargetDAGCombine(ISD::MUL);
00864   setTargetDAGCombine(ISD::AND);
00865   setTargetDAGCombine(ISD::OR);
00866   setTargetDAGCombine(ISD::XOR);
00867 
00868   if (Subtarget->hasV6Ops())
00869     setTargetDAGCombine(ISD::SRL);
00870 
00871   setStackPointerRegisterToSaveRestore(ARM::SP);
00872 
00873   if (TM.Options.UseSoftFloat || Subtarget->isThumb1Only() ||
00874       !Subtarget->hasVFP2())
00875     setSchedulingPreference(Sched::RegPressure);
00876   else
00877     setSchedulingPreference(Sched::Hybrid);
00878 
00879   //// temporary - rewrite interface to use type
00880   MaxStoresPerMemset = 8;
00881   MaxStoresPerMemsetOptSize = Subtarget->isTargetDarwin() ? 8 : 4;
00882   MaxStoresPerMemcpy = 4; // For @llvm.memcpy -> sequence of stores
00883   MaxStoresPerMemcpyOptSize = Subtarget->isTargetDarwin() ? 4 : 2;
00884   MaxStoresPerMemmove = 4; // For @llvm.memmove -> sequence of stores
00885   MaxStoresPerMemmoveOptSize = Subtarget->isTargetDarwin() ? 4 : 2;
00886 
00887   // On ARM arguments smaller than 4 bytes are extended, so all arguments
00888   // are at least 4 bytes aligned.
00889   setMinStackArgumentAlignment(4);
00890 
00891   // Prefer likely predicted branches to selects on out-of-order cores.
00892   PredictableSelectIsExpensive = Subtarget->isLikeA9();
00893 
00894   setMinFunctionAlignment(Subtarget->isThumb() ? 1 : 2);
00895 }
00896 
00897 // FIXME: It might make sense to define the representative register class as the
00898 // nearest super-register that has a non-null superset. For example, DPR_VFP2 is
00899 // a super-register of SPR, and DPR is a superset if DPR_VFP2. Consequently,
00900 // SPR's representative would be DPR_VFP2. This should work well if register
00901 // pressure tracking were modified such that a register use would increment the
00902 // pressure of the register class's representative and all of it's super
00903 // classes' representatives transitively. We have not implemented this because
00904 // of the difficulty prior to coalescing of modeling operand register classes
00905 // due to the common occurrence of cross class copies and subregister insertions
00906 // and extractions.
00907 std::pair<const TargetRegisterClass*, uint8_t>
00908 ARMTargetLowering::findRepresentativeClass(MVT VT) const{
00909   const TargetRegisterClass *RRC = nullptr;
00910   uint8_t Cost = 1;
00911   switch (VT.SimpleTy) {
00912   default:
00913     return TargetLowering::findRepresentativeClass(VT);
00914   // Use DPR as representative register class for all floating point
00915   // and vector types. Since there are 32 SPR registers and 32 DPR registers so
00916   // the cost is 1 for both f32 and f64.
00917   case MVT::f32: case MVT::f64: case MVT::v8i8: case MVT::v4i16:
00918   case MVT::v2i32: case MVT::v1i64: case MVT::v2f32:
00919     RRC = &ARM::DPRRegClass;
00920     // When NEON is used for SP, only half of the register file is available
00921     // because operations that define both SP and DP results will be constrained
00922     // to the VFP2 class (D0-D15). We currently model this constraint prior to
00923     // coalescing by double-counting the SP regs. See the FIXME above.
00924     if (Subtarget->useNEONForSinglePrecisionFP())
00925       Cost = 2;
00926     break;
00927   case MVT::v16i8: case MVT::v8i16: case MVT::v4i32: case MVT::v2i64:
00928   case MVT::v4f32: case MVT::v2f64:
00929     RRC = &ARM::DPRRegClass;
00930     Cost = 2;
00931     break;
00932   case MVT::v4i64:
00933     RRC = &ARM::DPRRegClass;
00934     Cost = 4;
00935     break;
00936   case MVT::v8i64:
00937     RRC = &ARM::DPRRegClass;
00938     Cost = 8;
00939     break;
00940   }
00941   return std::make_pair(RRC, Cost);
00942 }
00943 
00944 const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
00945   switch (Opcode) {
00946   default: return nullptr;
00947   case ARMISD::Wrapper:       return "ARMISD::Wrapper";
00948   case ARMISD::WrapperPIC:    return "ARMISD::WrapperPIC";
00949   case ARMISD::WrapperJT:     return "ARMISD::WrapperJT";
00950   case ARMISD::CALL:          return "ARMISD::CALL";
00951   case ARMISD::CALL_PRED:     return "ARMISD::CALL_PRED";
00952   case ARMISD::CALL_NOLINK:   return "ARMISD::CALL_NOLINK";
00953   case ARMISD::tCALL:         return "ARMISD::tCALL";
00954   case ARMISD::BRCOND:        return "ARMISD::BRCOND";
00955   case ARMISD::BR_JT:         return "ARMISD::BR_JT";
00956   case ARMISD::BR2_JT:        return "ARMISD::BR2_JT";
00957   case ARMISD::RET_FLAG:      return "ARMISD::RET_FLAG";
00958   case ARMISD::INTRET_FLAG:   return "ARMISD::INTRET_FLAG";
00959   case ARMISD::PIC_ADD:       return "ARMISD::PIC_ADD";
00960   case ARMISD::CMP:           return "ARMISD::CMP";
00961   case ARMISD::CMN:           return "ARMISD::CMN";
00962   case ARMISD::CMPZ:          return "ARMISD::CMPZ";
00963   case ARMISD::CMPFP:         return "ARMISD::CMPFP";
00964   case ARMISD::CMPFPw0:       return "ARMISD::CMPFPw0";
00965   case ARMISD::BCC_i64:       return "ARMISD::BCC_i64";
00966   case ARMISD::FMSTAT:        return "ARMISD::FMSTAT";
00967 
00968   case ARMISD::CMOV:          return "ARMISD::CMOV";
00969 
00970   case ARMISD::RBIT:          return "ARMISD::RBIT";
00971 
00972   case ARMISD::FTOSI:         return "ARMISD::FTOSI";
00973   case ARMISD::FTOUI:         return "ARMISD::FTOUI";
00974   case ARMISD::SITOF:         return "ARMISD::SITOF";
00975   case ARMISD::UITOF:         return "ARMISD::UITOF";
00976 
00977   case ARMISD::SRL_FLAG:      return "ARMISD::SRL_FLAG";
00978   case ARMISD::SRA_FLAG:      return "ARMISD::SRA_FLAG";
00979   case ARMISD::RRX:           return "ARMISD::RRX";
00980 
00981   case ARMISD::ADDC:          return "ARMISD::ADDC";
00982   case ARMISD::ADDE:          return "ARMISD::ADDE";
00983   case ARMISD::SUBC:          return "ARMISD::SUBC";
00984   case ARMISD::SUBE:          return "ARMISD::SUBE";
00985 
00986   case ARMISD::VMOVRRD:       return "ARMISD::VMOVRRD";
00987   case ARMISD::VMOVDRR:       return "ARMISD::VMOVDRR";
00988 
00989   case ARMISD::EH_SJLJ_SETJMP: return "ARMISD::EH_SJLJ_SETJMP";
00990   case ARMISD::EH_SJLJ_LONGJMP:return "ARMISD::EH_SJLJ_LONGJMP";
00991 
00992   case ARMISD::TC_RETURN:     return "ARMISD::TC_RETURN";
00993 
00994   case ARMISD::THREAD_POINTER:return "ARMISD::THREAD_POINTER";
00995 
00996   case ARMISD::DYN_ALLOC:     return "ARMISD::DYN_ALLOC";
00997 
00998   case ARMISD::MEMBARRIER_MCR: return "ARMISD::MEMBARRIER_MCR";
00999 
01000   case ARMISD::PRELOAD:       return "ARMISD::PRELOAD";
01001 
01002   case ARMISD::WIN__CHKSTK:   return "ARMISD:::WIN__CHKSTK";
01003 
01004   case ARMISD::VCEQ:          return "ARMISD::VCEQ";
01005   case ARMISD::VCEQZ:         return "ARMISD::VCEQZ";
01006   case ARMISD::VCGE:          return "ARMISD::VCGE";
01007   case ARMISD::VCGEZ:         return "ARMISD::VCGEZ";
01008   case ARMISD::VCLEZ:         return "ARMISD::VCLEZ";
01009   case ARMISD::VCGEU:         return "ARMISD::VCGEU";
01010   case ARMISD::VCGT:          return "ARMISD::VCGT";
01011   case ARMISD::VCGTZ:         return "ARMISD::VCGTZ";
01012   case ARMISD::VCLTZ:         return "ARMISD::VCLTZ";
01013   case ARMISD::VCGTU:         return "ARMISD::VCGTU";
01014   case ARMISD::VTST:          return "ARMISD::VTST";
01015 
01016   case ARMISD::VSHL:          return "ARMISD::VSHL";
01017   case ARMISD::VSHRs:         return "ARMISD::VSHRs";
01018   case ARMISD::VSHRu:         return "ARMISD::VSHRu";
01019   case ARMISD::VRSHRs:        return "ARMISD::VRSHRs";
01020   case ARMISD::VRSHRu:        return "ARMISD::VRSHRu";
01021   case ARMISD::VRSHRN:        return "ARMISD::VRSHRN";
01022   case ARMISD::VQSHLs:        return "ARMISD::VQSHLs";
01023   case ARMISD::VQSHLu:        return "ARMISD::VQSHLu";
01024   case ARMISD::VQSHLsu:       return "ARMISD::VQSHLsu";
01025   case ARMISD::VQSHRNs:       return "ARMISD::VQSHRNs";
01026   case ARMISD::VQSHRNu:       return "ARMISD::VQSHRNu";
01027   case ARMISD::VQSHRNsu:      return "ARMISD::VQSHRNsu";
01028   case ARMISD::VQRSHRNs:      return "ARMISD::VQRSHRNs";
01029   case ARMISD::VQRSHRNu:      return "ARMISD::VQRSHRNu";
01030   case ARMISD::VQRSHRNsu:     return "ARMISD::VQRSHRNsu";
01031   case ARMISD::VGETLANEu:     return "ARMISD::VGETLANEu";
01032   case ARMISD::VGETLANEs:     return "ARMISD::VGETLANEs";
01033   case ARMISD::VMOVIMM:       return "ARMISD::VMOVIMM";
01034   case ARMISD::VMVNIMM:       return "ARMISD::VMVNIMM";
01035   case ARMISD::VMOVFPIMM:     return "ARMISD::VMOVFPIMM";
01036   case ARMISD::VDUP:          return "ARMISD::VDUP";
01037   case ARMISD::VDUPLANE:      return "ARMISD::VDUPLANE";
01038   case ARMISD::VEXT:          return "ARMISD::VEXT";
01039   case ARMISD::VREV64:        return "ARMISD::VREV64";
01040   case ARMISD::VREV32:        return "ARMISD::VREV32";
01041   case ARMISD::VREV16:        return "ARMISD::VREV16";
01042   case ARMISD::VZIP:          return "ARMISD::VZIP";
01043   case ARMISD::VUZP:          return "ARMISD::VUZP";
01044   case ARMISD::VTRN:          return "ARMISD::VTRN";
01045   case ARMISD::VTBL1:         return "ARMISD::VTBL1";
01046   case ARMISD::VTBL2:         return "ARMISD::VTBL2";
01047   case ARMISD::VMULLs:        return "ARMISD::VMULLs";
01048   case ARMISD::VMULLu:        return "ARMISD::VMULLu";
01049   case ARMISD::UMLAL:         return "ARMISD::UMLAL";
01050   case ARMISD::SMLAL:         return "ARMISD::SMLAL";
01051   case ARMISD::BUILD_VECTOR:  return "ARMISD::BUILD_VECTOR";
01052   case ARMISD::FMAX:          return "ARMISD::FMAX";
01053   case ARMISD::FMIN:          return "ARMISD::FMIN";
01054   case ARMISD::VMAXNM:        return "ARMISD::VMAX";
01055   case ARMISD::VMINNM:        return "ARMISD::VMIN";
01056   case ARMISD::BFI:           return "ARMISD::BFI";
01057   case ARMISD::VORRIMM:       return "ARMISD::VORRIMM";
01058   case ARMISD::VBICIMM:       return "ARMISD::VBICIMM";
01059   case ARMISD::VBSL:          return "ARMISD::VBSL";
01060   case ARMISD::VLD2DUP:       return "ARMISD::VLD2DUP";
01061   case ARMISD::VLD3DUP:       return "ARMISD::VLD3DUP";
01062   case ARMISD::VLD4DUP:       return "ARMISD::VLD4DUP";
01063   case ARMISD::VLD1_UPD:      return "ARMISD::VLD1_UPD";
01064   case ARMISD::VLD2_UPD:      return "ARMISD::VLD2_UPD";
01065   case ARMISD::VLD3_UPD:      return "ARMISD::VLD3_UPD";
01066   case ARMISD::VLD4_UPD:      return "ARMISD::VLD4_UPD";
01067   case ARMISD::VLD2LN_UPD:    return "ARMISD::VLD2LN_UPD";
01068   case ARMISD::VLD3LN_UPD:    return "ARMISD::VLD3LN_UPD";
01069   case ARMISD::VLD4LN_UPD:    return "ARMISD::VLD4LN_UPD";
01070   case ARMISD::VLD2DUP_UPD:   return "ARMISD::VLD2DUP_UPD";
01071   case ARMISD::VLD3DUP_UPD:   return "ARMISD::VLD3DUP_UPD";
01072   case ARMISD::VLD4DUP_UPD:   return "ARMISD::VLD4DUP_UPD";
01073   case ARMISD::VST1_UPD:      return "ARMISD::VST1_UPD";
01074   case ARMISD::VST2_UPD:      return "ARMISD::VST2_UPD";
01075   case ARMISD::VST3_UPD:      return "ARMISD::VST3_UPD";
01076   case ARMISD::VST4_UPD:      return "ARMISD::VST4_UPD";
01077   case ARMISD::VST2LN_UPD:    return "ARMISD::VST2LN_UPD";
01078   case ARMISD::VST3LN_UPD:    return "ARMISD::VST3LN_UPD";
01079   case ARMISD::VST4LN_UPD:    return "ARMISD::VST4LN_UPD";
01080   }
01081 }
01082 
01083 EVT ARMTargetLowering::getSetCCResultType(LLVMContext &, EVT VT) const {
01084   if (!VT.isVector()) return getPointerTy();
01085   return VT.changeVectorElementTypeToInteger();
01086 }
01087 
01088 /// getRegClassFor - Return the register class that should be used for the
01089 /// specified value type.
01090 const TargetRegisterClass *ARMTargetLowering::getRegClassFor(MVT VT) const {
01091   // Map v4i64 to QQ registers but do not make the type legal. Similarly map
01092   // v8i64 to QQQQ registers. v4i64 and v8i64 are only used for REG_SEQUENCE to
01093   // load / store 4 to 8 consecutive D registers.
01094   if (Subtarget->hasNEON()) {
01095     if (VT == MVT::v4i64)
01096       return &ARM::QQPRRegClass;
01097     if (VT == MVT::v8i64)
01098       return &ARM::QQQQPRRegClass;
01099   }
01100   return TargetLowering::getRegClassFor(VT);
01101 }
01102 
01103 // Create a fast isel object.
01104 FastISel *
01105 ARMTargetLowering::createFastISel(FunctionLoweringInfo &funcInfo,
01106                                   const TargetLibraryInfo *libInfo) const {
01107   return ARM::createFastISel(funcInfo, libInfo);
01108 }
01109 
01110 /// getMaximalGlobalOffset - Returns the maximal possible offset which can
01111 /// be used for loads / stores from the global.
01112 unsigned ARMTargetLowering::getMaximalGlobalOffset() const {
01113   return (Subtarget->isThumb1Only() ? 127 : 4095);
01114 }
01115 
01116 Sched::Preference ARMTargetLowering::getSchedulingPreference(SDNode *N) const {
01117   unsigned NumVals = N->getNumValues();
01118   if (!NumVals)
01119     return Sched::RegPressure;
01120 
01121   for (unsigned i = 0; i != NumVals; ++i) {
01122     EVT VT = N->getValueType(i);
01123     if (VT == MVT::Glue || VT == MVT::Other)
01124       continue;
01125     if (VT.isFloatingPoint() || VT.isVector())
01126       return Sched::ILP;
01127   }
01128 
01129   if (!N->isMachineOpcode())
01130     return Sched::RegPressure;
01131 
01132   // Load are scheduled for latency even if there instruction itinerary
01133   // is not available.
01134   const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
01135   const MCInstrDesc &MCID = TII->get(N->getMachineOpcode());
01136 
01137   if (MCID.getNumDefs() == 0)
01138     return Sched::RegPressure;
01139   if (!Itins->isEmpty() &&
01140       Itins->getOperandCycle(MCID.getSchedClass(), 0) > 2)
01141     return Sched::ILP;
01142 
01143   return Sched::RegPressure;
01144 }
01145 
01146 //===----------------------------------------------------------------------===//
01147 // Lowering Code
01148 //===----------------------------------------------------------------------===//
01149 
01150 /// IntCCToARMCC - Convert a DAG integer condition code to an ARM CC
01151 static ARMCC::CondCodes IntCCToARMCC(ISD::CondCode CC) {
01152   switch (CC) {
01153   default: llvm_unreachable("Unknown condition code!");
01154   case ISD::SETNE:  return ARMCC::NE;
01155   case ISD::SETEQ:  return ARMCC::EQ;
01156   case ISD::SETGT:  return ARMCC::GT;
01157   case ISD::SETGE:  return ARMCC::GE;
01158   case ISD::SETLT:  return ARMCC::LT;
01159   case ISD::SETLE:  return ARMCC::LE;
01160   case ISD::SETUGT: return ARMCC::HI;
01161   case ISD::SETUGE: return ARMCC::HS;
01162   case ISD::SETULT: return ARMCC::LO;
01163   case ISD::SETULE: return ARMCC::LS;
01164   }
01165 }
01166 
01167 /// FPCCToARMCC - Convert a DAG fp condition code to an ARM CC.
01168 static void FPCCToARMCC(ISD::CondCode CC, ARMCC::CondCodes &CondCode,
01169                         ARMCC::CondCodes &CondCode2) {
01170   CondCode2 = ARMCC::AL;
01171   switch (CC) {
01172   default: llvm_unreachable("Unknown FP condition!");
01173   case ISD::SETEQ:
01174   case ISD::SETOEQ: CondCode = ARMCC::EQ; break;
01175   case ISD::SETGT:
01176   case ISD::SETOGT: CondCode = ARMCC::GT; break;
01177   case ISD::SETGE:
01178   case ISD::SETOGE: CondCode = ARMCC::GE; break;
01179   case ISD::SETOLT: CondCode = ARMCC::MI; break;
01180   case ISD::SETOLE: CondCode = ARMCC::LS; break;
01181   case ISD::SETONE: CondCode = ARMCC::MI; CondCode2 = ARMCC::GT; break;
01182   case ISD::SETO:   CondCode = ARMCC::VC; break;
01183   case ISD::SETUO:  CondCode = ARMCC::VS; break;
01184   case ISD::SETUEQ: CondCode = ARMCC::EQ; CondCode2 = ARMCC::VS; break;
01185   case ISD::SETUGT: CondCode = ARMCC::HI; break;
01186   case ISD::SETUGE: CondCode = ARMCC::PL; break;
01187   case ISD::SETLT:
01188   case ISD::SETULT: CondCode = ARMCC::LT; break;
01189   case ISD::SETLE:
01190   case ISD::SETULE: CondCode = ARMCC::LE; break;
01191   case ISD::SETNE:
01192   case ISD::SETUNE: CondCode = ARMCC::NE; break;
01193   }
01194 }
01195 
01196 //===----------------------------------------------------------------------===//
01197 //                      Calling Convention Implementation
01198 //===----------------------------------------------------------------------===//
01199 
01200 #include "ARMGenCallingConv.inc"
01201 
01202 /// getEffectiveCallingConv - Get the effective calling convention, taking into
01203 /// account presence of floating point hardware and calling convention
01204 /// limitations, such as support for variadic functions.
01205 CallingConv::ID
01206 ARMTargetLowering::getEffectiveCallingConv(CallingConv::ID CC,
01207                                            bool isVarArg) const {
01208   switch (CC) {
01209   default:
01210     llvm_unreachable("Unsupported calling convention");
01211   case CallingConv::ARM_AAPCS:
01212   case CallingConv::ARM_APCS:
01213   case CallingConv::GHC:
01214     return CC;
01215   case CallingConv::ARM_AAPCS_VFP:
01216     return isVarArg ? CallingConv::ARM_AAPCS : CallingConv::ARM_AAPCS_VFP;
01217   case CallingConv::C:
01218     if (!Subtarget->isAAPCS_ABI())
01219       return CallingConv::ARM_APCS;
01220     else if (Subtarget->hasVFP2() && !Subtarget->isThumb1Only() &&
01221              getTargetMachine().Options.FloatABIType == FloatABI::Hard &&
01222              !isVarArg)
01223       return CallingConv::ARM_AAPCS_VFP;
01224     else
01225       return CallingConv::ARM_AAPCS;
01226   case CallingConv::Fast:
01227     if (!Subtarget->isAAPCS_ABI()) {
01228       if (Subtarget->hasVFP2() && !Subtarget->isThumb1Only() && !isVarArg)
01229         return CallingConv::Fast;
01230       return CallingConv::ARM_APCS;
01231     } else if (Subtarget->hasVFP2() && !Subtarget->isThumb1Only() && !isVarArg)
01232       return CallingConv::ARM_AAPCS_VFP;
01233     else
01234       return CallingConv::ARM_AAPCS;
01235   }
01236 }
01237 
01238 /// CCAssignFnForNode - Selects the correct CCAssignFn for the given
01239 /// CallingConvention.
01240 CCAssignFn *ARMTargetLowering::CCAssignFnForNode(CallingConv::ID CC,
01241                                                  bool Return,
01242                                                  bool isVarArg) const {
01243   switch (getEffectiveCallingConv(CC, isVarArg)) {
01244   default:
01245     llvm_unreachable("Unsupported calling convention");
01246   case CallingConv::ARM_APCS:
01247     return (Return ? RetCC_ARM_APCS : CC_ARM_APCS);
01248   case CallingConv::ARM_AAPCS:
01249     return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS);
01250   case CallingConv::ARM_AAPCS_VFP:
01251     return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP);
01252   case CallingConv::Fast:
01253     return (Return ? RetFastCC_ARM_APCS : FastCC_ARM_APCS);
01254   case CallingConv::GHC:
01255     return (Return ? RetCC_ARM_APCS : CC_ARM_APCS_GHC);
01256   }
01257 }
01258 
01259 /// LowerCallResult - Lower the result values of a call into the
01260 /// appropriate copies out of appropriate physical registers.
01261 SDValue
01262 ARMTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
01263                                    CallingConv::ID CallConv, bool isVarArg,
01264                                    const SmallVectorImpl<ISD::InputArg> &Ins,
01265                                    SDLoc dl, SelectionDAG &DAG,
01266                                    SmallVectorImpl<SDValue> &InVals,
01267                                    bool isThisReturn, SDValue ThisVal) const {
01268 
01269   // Assign locations to each value returned by this call.
01270   SmallVector<CCValAssign, 16> RVLocs;
01271   ARMCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
01272                     getTargetMachine(), RVLocs, *DAG.getContext(), Call);
01273   CCInfo.AnalyzeCallResult(Ins,
01274                            CCAssignFnForNode(CallConv, /* Return*/ true,
01275                                              isVarArg));
01276 
01277   // Copy all of the result registers out of their specified physreg.
01278   for (unsigned i = 0; i != RVLocs.size(); ++i) {
01279     CCValAssign VA = RVLocs[i];
01280 
01281     // Pass 'this' value directly from the argument to return value, to avoid
01282     // reg unit interference
01283     if (i == 0 && isThisReturn) {
01284       assert(!VA.needsCustom() && VA.getLocVT() == MVT::i32 &&
01285              "unexpected return calling convention register assignment");
01286       InVals.push_back(ThisVal);
01287       continue;
01288     }
01289 
01290     SDValue Val;
01291     if (VA.needsCustom()) {
01292       // Handle f64 or half of a v2f64.
01293       SDValue Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
01294                                       InFlag);
01295       Chain = Lo.getValue(1);
01296       InFlag = Lo.getValue(2);
01297       VA = RVLocs[++i]; // skip ahead to next loc
01298       SDValue Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
01299                                       InFlag);
01300       Chain = Hi.getValue(1);
01301       InFlag = Hi.getValue(2);
01302       if (!Subtarget->isLittle())
01303         std::swap (Lo, Hi);
01304       Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);
01305 
01306       if (VA.getLocVT() == MVT::v2f64) {
01307         SDValue Vec = DAG.getNode(ISD::UNDEF, dl, MVT::v2f64);
01308         Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Vec, Val,
01309                           DAG.getConstant(0, MVT::i32));
01310 
01311         VA = RVLocs[++i]; // skip ahead to next loc
01312         Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InFlag);
01313         Chain = Lo.getValue(1);
01314         InFlag = Lo.getValue(2);
01315         VA = RVLocs[++i]; // skip ahead to next loc
01316         Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InFlag);
01317         Chain = Hi.getValue(1);
01318         InFlag = Hi.getValue(2);
01319         if (!Subtarget->isLittle())
01320           std::swap (Lo, Hi);
01321         Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);
01322         Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Vec, Val,
01323                           DAG.getConstant(1, MVT::i32));
01324       }
01325     } else {
01326       Val = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), VA.getLocVT(),
01327                                InFlag);
01328       Chain = Val.getValue(1);
01329       InFlag = Val.getValue(2);
01330     }
01331 
01332     switch (VA.getLocInfo()) {
01333     default: llvm_unreachable("Unknown loc info!");
01334     case CCValAssign::Full: break;
01335     case CCValAssign::BCvt:
01336       Val = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), Val);
01337       break;
01338     }
01339 
01340     InVals.push_back(Val);
01341   }
01342 
01343   return Chain;
01344 }
01345 
01346 /// LowerMemOpCallTo - Store the argument to the stack.
01347 SDValue
01348 ARMTargetLowering::LowerMemOpCallTo(SDValue Chain,
01349                                     SDValue StackPtr, SDValue Arg,
01350                                     SDLoc dl, SelectionDAG &DAG,
01351                                     const CCValAssign &VA,
01352                                     ISD::ArgFlagsTy Flags) const {
01353   unsigned LocMemOffset = VA.getLocMemOffset();
01354   SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset);
01355   PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff);
01356   return DAG.getStore(Chain, dl, Arg, PtrOff,
01357                       MachinePointerInfo::getStack(LocMemOffset),
01358                       false, false, 0);
01359 }
01360 
01361 void ARMTargetLowering::PassF64ArgInRegs(SDLoc dl, SelectionDAG &DAG,
01362                                          SDValue Chain, SDValue &Arg,
01363                                          RegsToPassVector &RegsToPass,
01364                                          CCValAssign &VA, CCValAssign &NextVA,
01365                                          SDValue &StackPtr,
01366                                          SmallVectorImpl<SDValue> &MemOpChains,
01367                                          ISD::ArgFlagsTy Flags) const {
01368 
01369   SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl,
01370                               DAG.getVTList(MVT::i32, MVT::i32), Arg);
01371   unsigned id = Subtarget->isLittle() ? 0 : 1;
01372   RegsToPass.push_back(std::make_pair(VA.getLocReg(), fmrrd.getValue(id)));
01373 
01374   if (NextVA.isRegLoc())
01375     RegsToPass.push_back(std::make_pair(NextVA.getLocReg(), fmrrd.getValue(1-id)));
01376   else {
01377     assert(NextVA.isMemLoc());
01378     if (!StackPtr.getNode())
01379       StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy());
01380 
01381     MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, fmrrd.getValue(1-id),
01382                                            dl, DAG, NextVA,
01383                                            Flags));
01384   }
01385 }
01386 
01387 /// LowerCall - Lowering a call into a callseq_start <-
01388 /// ARMISD:CALL <- callseq_end chain. Also add input and output parameter
01389 /// nodes.
01390 SDValue
01391 ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
01392                              SmallVectorImpl<SDValue> &InVals) const {
01393   SelectionDAG &DAG                     = CLI.DAG;
01394   SDLoc &dl                          = CLI.DL;
01395   SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
01396   SmallVectorImpl<SDValue> &OutVals     = CLI.OutVals;
01397   SmallVectorImpl<ISD::InputArg> &Ins   = CLI.Ins;
01398   SDValue Chain                         = CLI.Chain;
01399   SDValue Callee                        = CLI.Callee;
01400   bool &isTailCall                      = CLI.IsTailCall;
01401   CallingConv::ID CallConv              = CLI.CallConv;
01402   bool doesNotRet                       = CLI.DoesNotReturn;
01403   bool isVarArg                         = CLI.IsVarArg;
01404 
01405   MachineFunction &MF = DAG.getMachineFunction();
01406   bool isStructRet    = (Outs.empty()) ? false : Outs[0].Flags.isSRet();
01407   bool isThisReturn   = false;
01408   bool isSibCall      = false;
01409 
01410   // Disable tail calls if they're not supported.
01411   if (!Subtarget->supportsTailCall() || MF.getTarget().Options.DisableTailCalls)
01412     isTailCall = false;
01413 
01414   if (isTailCall) {
01415     // Check if it's really possible to do a tail call.
01416     isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv,
01417                     isVarArg, isStructRet, MF.getFunction()->hasStructRetAttr(),
01418                                                    Outs, OutVals, Ins, DAG);
01419     if (!isTailCall && CLI.CS && CLI.CS->isMustTailCall())
01420       report_fatal_error("failed to perform tail call elimination on a call "
01421                          "site marked musttail");
01422     // We don't support GuaranteedTailCallOpt for ARM, only automatically
01423     // detected sibcalls.
01424     if (isTailCall) {
01425       ++NumTailCalls;
01426       isSibCall = true;
01427     }
01428   }
01429 
01430   // Analyze operands of the call, assigning locations to each operand.
01431   SmallVector<CCValAssign, 16> ArgLocs;
01432   ARMCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
01433                  getTargetMachine(), ArgLocs, *DAG.getContext(), Call);
01434   CCInfo.AnalyzeCallOperands(Outs,
01435                              CCAssignFnForNode(CallConv, /* Return*/ false,
01436                                                isVarArg));
01437 
01438   // Get a count of how many bytes are to be pushed on the stack.
01439   unsigned NumBytes = CCInfo.getNextStackOffset();
01440 
01441   // For tail calls, memory operands are available in our caller's stack.
01442   if (isSibCall)
01443     NumBytes = 0;
01444 
01445   // Adjust the stack pointer for the new arguments...
01446   // These operations are automatically eliminated by the prolog/epilog pass
01447   if (!isSibCall)
01448     Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true),
01449                                  dl);
01450 
01451   SDValue StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy());
01452 
01453   RegsToPassVector RegsToPass;
01454   SmallVector<SDValue, 8> MemOpChains;
01455 
01456   // Walk the register/memloc assignments, inserting copies/loads.  In the case
01457   // of tail call optimization, arguments are handled later.
01458   for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size();
01459        i != e;
01460        ++i, ++realArgIdx) {
01461     CCValAssign &VA = ArgLocs[i];
01462     SDValue Arg = OutVals[realArgIdx];
01463     ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags;
01464     bool isByVal = Flags.isByVal();
01465 
01466     // Promote the value if needed.
01467     switch (VA.getLocInfo()) {
01468     default: llvm_unreachable("Unknown loc info!");
01469     case CCValAssign::Full: break;
01470     case CCValAssign::SExt:
01471       Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
01472       break;
01473     case CCValAssign::ZExt:
01474       Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
01475       break;
01476     case CCValAssign::AExt:
01477       Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
01478       break;
01479     case CCValAssign::BCvt:
01480       Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg);
01481       break;
01482     }
01483 
01484     // f64 and v2f64 might be passed in i32 pairs and must be split into pieces
01485     if (VA.needsCustom()) {
01486       if (VA.getLocVT() == MVT::v2f64) {
01487         SDValue Op0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
01488                                   DAG.getConstant(0, MVT::i32));
01489         SDValue Op1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
01490                                   DAG.getConstant(1, MVT::i32));
01491 
01492         PassF64ArgInRegs(dl, DAG, Chain, Op0, RegsToPass,
01493                          VA, ArgLocs[++i], StackPtr, MemOpChains, Flags);
01494 
01495         VA = ArgLocs[++i]; // skip ahead to next loc
01496         if (VA.isRegLoc()) {
01497           PassF64ArgInRegs(dl, DAG, Chain, Op1, RegsToPass,
01498                            VA, ArgLocs[++i], StackPtr, MemOpChains, Flags);
01499         } else {
01500           assert(VA.isMemLoc());
01501 
01502           MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Op1,
01503                                                  dl, DAG, VA, Flags));
01504         }
01505       } else {
01506         PassF64ArgInRegs(dl, DAG, Chain, Arg, RegsToPass, VA, ArgLocs[++i],
01507                          StackPtr, MemOpChains, Flags);
01508       }
01509     } else if (VA.isRegLoc()) {
01510       if (realArgIdx == 0 && Flags.isReturned() && Outs[0].VT == MVT::i32) {
01511         assert(VA.getLocVT() == MVT::i32 &&
01512                "unexpected calling convention register assignment");
01513         assert(!Ins.empty() && Ins[0].VT == MVT::i32 &&
01514                "unexpected use of 'returned'");
01515         isThisReturn = true;
01516       }
01517       RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
01518     } else if (isByVal) {
01519       assert(VA.isMemLoc());
01520       unsigned offset = 0;
01521 
01522       // True if this byval aggregate will be split between registers
01523       // and memory.
01524       unsigned ByValArgsCount = CCInfo.getInRegsParamsCount();
01525       unsigned CurByValIdx = CCInfo.getInRegsParamsProceed();
01526 
01527       if (CurByValIdx < ByValArgsCount) {
01528 
01529         unsigned RegBegin, RegEnd;
01530         CCInfo.getInRegsParamInfo(CurByValIdx, RegBegin, RegEnd);
01531 
01532         EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
01533         unsigned int i, j;
01534         for (i = 0, j = RegBegin; j < RegEnd; i++, j++) {
01535           SDValue Const = DAG.getConstant(4*i, MVT::i32);
01536           SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);
01537           SDValue Load = DAG.getLoad(PtrVT, dl, Chain, AddArg,
01538                                      MachinePointerInfo(),
01539                                      false, false, false,
01540                                      DAG.InferPtrAlignment(AddArg));
01541           MemOpChains.push_back(Load.getValue(1));
01542           RegsToPass.push_back(std::make_pair(j, Load));
01543         }
01544 
01545         // If parameter size outsides register area, "offset" value
01546         // helps us to calculate stack slot for remained part properly.
01547         offset = RegEnd - RegBegin;
01548 
01549         CCInfo.nextInRegsParam();
01550       }
01551 
01552       if (Flags.getByValSize() > 4*offset) {
01553         unsigned LocMemOffset = VA.getLocMemOffset();
01554         SDValue StkPtrOff = DAG.getIntPtrConstant(LocMemOffset);
01555         SDValue Dst = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr,
01556                                   StkPtrOff);
01557         SDValue SrcOffset = DAG.getIntPtrConstant(4*offset);
01558         SDValue Src = DAG.getNode(ISD::ADD, dl, getPointerTy(), Arg, SrcOffset);
01559         SDValue SizeNode = DAG.getConstant(Flags.getByValSize() - 4*offset,
01560                                            MVT::i32);
01561         SDValue AlignNode = DAG.getConstant(Flags.getByValAlign(), MVT::i32);
01562 
01563         SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue);
01564         SDValue Ops[] = { Chain, Dst, Src, SizeNode, AlignNode};
01565         MemOpChains.push_back(DAG.getNode(ARMISD::COPY_STRUCT_BYVAL, dl, VTs,
01566                                           Ops));
01567       }
01568     } else if (!isSibCall) {
01569       assert(VA.isMemLoc());
01570 
01571       MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Arg,
01572                                              dl, DAG, VA, Flags));
01573     }
01574   }
01575 
01576   if (!MemOpChains.empty())
01577     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
01578 
01579   // Build a sequence of copy-to-reg nodes chained together with token chain
01580   // and flag operands which copy the outgoing args into the appropriate regs.
01581   SDValue InFlag;
01582   // Tail call byval lowering might overwrite argument registers so in case of
01583   // tail call optimization the copies to registers are lowered later.
01584   if (!isTailCall)
01585     for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
01586       Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
01587                                RegsToPass[i].second, InFlag);
01588       InFlag = Chain.getValue(1);
01589     }
01590 
01591   // For tail calls lower the arguments to the 'real' stack slot.
01592   if (isTailCall) {
01593     // Force all the incoming stack arguments to be loaded from the stack
01594     // before any new outgoing arguments are stored to the stack, because the
01595     // outgoing stack slots may alias the incoming argument stack slots, and
01596     // the alias isn't otherwise explicit. This is slightly more conservative
01597     // than necessary, because it means that each store effectively depends
01598     // on every argument instead of just those arguments it would clobber.
01599 
01600     // Do not flag preceding copytoreg stuff together with the following stuff.
01601     InFlag = SDValue();
01602     for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
01603       Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
01604                                RegsToPass[i].second, InFlag);
01605       InFlag = Chain.getValue(1);
01606     }
01607     InFlag = SDValue();
01608   }
01609 
01610   // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
01611   // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
01612   // node so that legalize doesn't hack it.
01613   bool isDirect = false;
01614   bool isARMFunc = false;
01615   bool isLocalARMFunc = false;
01616   ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
01617 
01618   if (EnableARMLongCalls) {
01619     assert((Subtarget->isTargetWindows() ||
01620             getTargetMachine().getRelocationModel() == Reloc::Static) &&
01621            "long-calls with non-static relocation model!");
01622     // Handle a global address or an external symbol. If it's not one of
01623     // those, the target's already in a register, so we don't need to do
01624     // anything extra.
01625     if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
01626       const GlobalValue *GV = G->getGlobal();
01627       // Create a constant pool entry for the callee address
01628       unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
01629       ARMConstantPoolValue *CPV =
01630         ARMConstantPoolConstant::Create(GV, ARMPCLabelIndex, ARMCP::CPValue, 0);
01631 
01632       // Get the address of the callee into a register
01633       SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4);
01634       CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
01635       Callee = DAG.getLoad(getPointerTy(), dl,
01636                            DAG.getEntryNode(), CPAddr,
01637                            MachinePointerInfo::getConstantPool(),
01638                            false, false, false, 0);
01639     } else if (ExternalSymbolSDNode *S=dyn_cast<ExternalSymbolSDNode>(Callee)) {
01640       const char *Sym = S->getSymbol();
01641 
01642       // Create a constant pool entry for the callee address
01643       unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
01644       ARMConstantPoolValue *CPV =
01645         ARMConstantPoolSymbol::Create(*DAG.getContext(), Sym,
01646                                       ARMPCLabelIndex, 0);
01647       // Get the address of the callee into a register
01648       SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4);
01649       CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
01650       Callee = DAG.getLoad(getPointerTy(), dl,
01651                            DAG.getEntryNode(), CPAddr,
01652                            MachinePointerInfo::getConstantPool(),
01653                            false, false, false, 0);
01654     }
01655   } else if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
01656     const GlobalValue *GV = G->getGlobal();
01657     isDirect = true;
01658     bool isExt = GV->isDeclaration() || GV->isWeakForLinker();
01659     bool isStub = (isExt && Subtarget->isTargetMachO()) &&
01660                    getTargetMachine().getRelocationModel() != Reloc::Static;
01661     isARMFunc = !Subtarget->isThumb() || isStub;
01662     // ARM call to a local ARM function is predicable.
01663     isLocalARMFunc = !Subtarget->isThumb() && (!isExt || !ARMInterworking);
01664     // tBX takes a register source operand.
01665     if (isStub && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) {
01666       assert(Subtarget->isTargetMachO() && "WrapperPIC use on non-MachO?");
01667       Callee = DAG.getNode(ARMISD::WrapperPIC, dl, getPointerTy(),
01668                            DAG.getTargetGlobalAddress(GV, dl, getPointerTy()));
01669     } else if (Subtarget->isTargetCOFF()) {
01670       assert(Subtarget->isTargetWindows() &&
01671              "Windows is the only supported COFF target");
01672       unsigned TargetFlags = GV->hasDLLImportStorageClass()
01673                                  ? ARMII::MO_DLLIMPORT
01674                                  : ARMII::MO_NO_FLAG;
01675       Callee = DAG.getTargetGlobalAddress(GV, dl, getPointerTy(), /*Offset=*/0,
01676                                           TargetFlags);
01677       if (GV->hasDLLImportStorageClass())
01678         Callee = DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(),
01679                              DAG.getNode(ARMISD::Wrapper, dl, getPointerTy(),
01680                                          Callee), MachinePointerInfo::getGOT(),
01681                              false, false, false, 0);
01682     } else {
01683       // On ELF targets for PIC code, direct calls should go through the PLT
01684       unsigned OpFlags = 0;
01685       if (Subtarget->isTargetELF() &&
01686           getTargetMachine().getRelocationModel() == Reloc::PIC_)
01687         OpFlags = ARMII::MO_PLT;
01688       Callee = DAG.getTargetGlobalAddress(GV, dl, getPointerTy(), 0, OpFlags);
01689     }
01690   } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
01691     isDirect = true;
01692     bool isStub = Subtarget->isTargetMachO() &&
01693                   getTargetMachine().getRelocationModel() != Reloc::Static;
01694     isARMFunc = !Subtarget->isThumb() || isStub;
01695     // tBX takes a register source operand.
01696     const char *Sym = S->getSymbol();
01697     if (isARMFunc && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) {
01698       unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
01699       ARMConstantPoolValue *CPV =
01700         ARMConstantPoolSymbol::Create(*DAG.getContext(), Sym,
01701                                       ARMPCLabelIndex, 4);
01702       SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4);
01703       CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
01704       Callee = DAG.getLoad(getPointerTy(), dl,
01705                            DAG.getEntryNode(), CPAddr,
01706                            MachinePointerInfo::getConstantPool(),
01707                            false, false, false, 0);
01708       SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
01709       Callee = DAG.getNode(ARMISD::PIC_ADD, dl,
01710                            getPointerTy(), Callee, PICLabel);
01711     } else {
01712       unsigned OpFlags = 0;
01713       // On ELF targets for PIC code, direct calls should go through the PLT
01714       if (Subtarget->isTargetELF() &&
01715                   getTargetMachine().getRelocationModel() == Reloc::PIC_)
01716         OpFlags = ARMII::MO_PLT;
01717       Callee = DAG.getTargetExternalSymbol(Sym, getPointerTy(), OpFlags);
01718     }
01719   }
01720 
01721   // FIXME: handle tail calls differently.
01722   unsigned CallOpc;
01723   bool HasMinSizeAttr = MF.getFunction()->getAttributes().hasAttribute(
01724       AttributeSet::FunctionIndex, Attribute::MinSize);
01725   if (Subtarget->isThumb()) {
01726     if ((!isDirect || isARMFunc) && !Subtarget->hasV5TOps())
01727       CallOpc = ARMISD::CALL_NOLINK;
01728     else
01729       CallOpc = isARMFunc ? ARMISD::CALL : ARMISD::tCALL;
01730   } else {
01731     if (!isDirect && !Subtarget->hasV5TOps())
01732       CallOpc = ARMISD::CALL_NOLINK;
01733     else if (doesNotRet && isDirect && Subtarget->hasRAS() &&
01734                // Emit regular call when code size is the priority
01735                !HasMinSizeAttr)
01736       // "mov lr, pc; b _foo" to avoid confusing the RSP
01737       CallOpc = ARMISD::CALL_NOLINK;
01738     else
01739       CallOpc = isLocalARMFunc ? ARMISD::CALL_PRED : ARMISD::CALL;
01740   }
01741 
01742   std::vector<SDValue> Ops;
01743   Ops.push_back(Chain);
01744   Ops.push_back(Callee);
01745 
01746   // Add argument registers to the end of the list so that they are known live
01747   // into the call.
01748   for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
01749     Ops.push_back(DAG.getRegister(RegsToPass[i].first,
01750                                   RegsToPass[i].second.getValueType()));
01751 
01752   // Add a register mask operand representing the call-preserved registers.
01753   if (!isTailCall) {
01754     const uint32_t *Mask;
01755     const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo();
01756     const ARMBaseRegisterInfo *ARI = static_cast<const ARMBaseRegisterInfo*>(TRI);
01757     if (isThisReturn) {
01758       // For 'this' returns, use the R0-preserving mask if applicable
01759       Mask = ARI->getThisReturnPreservedMask(CallConv);
01760       if (!Mask) {
01761         // Set isThisReturn to false if the calling convention is not one that
01762         // allows 'returned' to be modeled in this way, so LowerCallResult does
01763         // not try to pass 'this' straight through
01764         isThisReturn = false;
01765         Mask = ARI->getCallPreservedMask(CallConv);
01766       }
01767     } else
01768       Mask = ARI->getCallPreservedMask(CallConv);
01769 
01770     assert(Mask && "Missing call preserved mask for calling convention");
01771     Ops.push_back(DAG.getRegisterMask(Mask));
01772   }
01773 
01774   if (InFlag.getNode())
01775     Ops.push_back(InFlag);
01776 
01777   SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
01778   if (isTailCall)
01779     return DAG.getNode(ARMISD::TC_RETURN, dl, NodeTys, Ops);
01780 
01781   // Returns a chain and a flag for retval copy to use.
01782   Chain = DAG.getNode(CallOpc, dl, NodeTys, Ops);
01783   InFlag = Chain.getValue(1);
01784 
01785   Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true),
01786                              DAG.getIntPtrConstant(0, true), InFlag, dl);
01787   if (!Ins.empty())
01788     InFlag = Chain.getValue(1);
01789 
01790   // Handle result values, copying them out of physregs into vregs that we
01791   // return.
01792   return LowerCallResult(Chain, InFlag, CallConv, isVarArg, Ins, dl, DAG,
01793                          InVals, isThisReturn,
01794                          isThisReturn ? OutVals[0] : SDValue());
01795 }
01796 
01797 /// HandleByVal - Every parameter *after* a byval parameter is passed
01798 /// on the stack.  Remember the next parameter register to allocate,
01799 /// and then confiscate the rest of the parameter registers to insure
01800 /// this.
01801 void
01802 ARMTargetLowering::HandleByVal(
01803     CCState *State, unsigned &size, unsigned Align) const {
01804   unsigned reg = State->AllocateReg(GPRArgRegs, 4);
01805   assert((State->getCallOrPrologue() == Prologue ||
01806           State->getCallOrPrologue() == Call) &&
01807          "unhandled ParmContext");
01808 
01809   if ((ARM::R0 <= reg) && (reg <= ARM::R3)) {
01810     if (Subtarget->isAAPCS_ABI() && Align > 4) {
01811       unsigned AlignInRegs = Align / 4;
01812       unsigned Waste = (ARM::R4 - reg) % AlignInRegs;
01813       for (unsigned i = 0; i < Waste; ++i)
01814         reg = State->AllocateReg(GPRArgRegs, 4);
01815     }
01816     if (reg != 0) {
01817       unsigned excess = 4 * (ARM::R4 - reg);
01818 
01819       // Special case when NSAA != SP and parameter size greater than size of
01820       // all remained GPR regs. In that case we can't split parameter, we must
01821       // send it to stack. We also must set NCRN to R4, so waste all
01822       // remained registers.
01823       const unsigned NSAAOffset = State->getNextStackOffset();
01824       if (Subtarget->isAAPCS_ABI() && NSAAOffset != 0 && size > excess) {
01825         while (State->AllocateReg(GPRArgRegs, 4))
01826           ;
01827         return;
01828       }
01829 
01830       // First register for byval parameter is the first register that wasn't
01831       // allocated before this method call, so it would be "reg".
01832       // If parameter is small enough to be saved in range [reg, r4), then
01833       // the end (first after last) register would be reg + param-size-in-regs,
01834       // else parameter would be splitted between registers and stack,
01835       // end register would be r4 in this case.
01836       unsigned ByValRegBegin = reg;
01837       unsigned ByValRegEnd = (size < excess) ? reg + size/4 : (unsigned)ARM::R4;
01838       State->addInRegsParamInfo(ByValRegBegin, ByValRegEnd);
01839       // Note, first register is allocated in the beginning of function already,
01840       // allocate remained amount of registers we need.
01841       for (unsigned i = reg+1; i != ByValRegEnd; ++i)
01842         State->AllocateReg(GPRArgRegs, 4);
01843       // A byval parameter that is split between registers and memory needs its
01844       // size truncated here.
01845       // In the case where the entire structure fits in registers, we set the
01846       // size in memory to zero.
01847       if (size < excess)
01848         size = 0;
01849       else
01850         size -= excess;
01851     }
01852   }
01853 }
01854 
01855 /// MatchingStackOffset - Return true if the given stack call argument is
01856 /// already available in the same position (relatively) of the caller's
01857 /// incoming argument stack.
01858 static
01859 bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags,
01860                          MachineFrameInfo *MFI, const MachineRegisterInfo *MRI,
01861                          const TargetInstrInfo *TII) {
01862   unsigned Bytes = Arg.getValueType().getSizeInBits() / 8;
01863   int FI = INT_MAX;
01864   if (Arg.getOpcode() == ISD::CopyFromReg) {
01865     unsigned VR = cast<RegisterSDNode>(Arg.getOperand(1))->getReg();
01866     if (!TargetRegisterInfo::isVirtualRegister(VR))
01867       return false;
01868     MachineInstr *Def = MRI->getVRegDef(VR);
01869     if (!Def)
01870       return false;
01871     if (!Flags.isByVal()) {
01872       if (!TII->isLoadFromStackSlot(Def, FI))
01873         return false;
01874     } else {
01875       return false;
01876     }
01877   } else if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Arg)) {
01878     if (Flags.isByVal())
01879       // ByVal argument is passed in as a pointer but it's now being
01880       // dereferenced. e.g.
01881       // define @foo(%struct.X* %A) {
01882       //   tail call @bar(%struct.X* byval %A)
01883       // }
01884       return false;
01885     SDValue Ptr = Ld->getBasePtr();
01886     FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(Ptr);
01887     if (!FINode)
01888       return false;
01889     FI = FINode->getIndex();
01890   } else
01891     return false;
01892 
01893   assert(FI != INT_MAX);
01894   if (!MFI->isFixedObjectIndex(FI))
01895     return false;
01896   return Offset == MFI->getObjectOffset(FI) && Bytes == MFI->getObjectSize(FI);
01897 }
01898 
01899 /// IsEligibleForTailCallOptimization - Check whether the call is eligible
01900 /// for tail call optimization. Targets which want to do tail call
01901 /// optimization should implement this function.
01902 bool
01903 ARMTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
01904                                                      CallingConv::ID CalleeCC,
01905                                                      bool isVarArg,
01906                                                      bool isCalleeStructRet,
01907                                                      bool isCallerStructRet,
01908                                     const SmallVectorImpl<ISD::OutputArg> &Outs,
01909                                     const SmallVectorImpl<SDValue> &OutVals,
01910                                     const SmallVectorImpl<ISD::InputArg> &Ins,
01911                                                      SelectionDAG& DAG) const {
01912   const Function *CallerF = DAG.getMachineFunction().getFunction();
01913   CallingConv::ID CallerCC = CallerF->getCallingConv();
01914   bool CCMatch = CallerCC == CalleeCC;
01915 
01916   // Look for obvious safe cases to perform tail call optimization that do not
01917   // require ABI changes. This is what gcc calls sibcall.
01918 
01919   // Do not sibcall optimize vararg calls unless the call site is not passing
01920   // any arguments.
01921   if (isVarArg && !Outs.empty())
01922     return false;
01923 
01924   // Exception-handling functions need a special set of instructions to indicate
01925   // a return to the hardware. Tail-calling another function would probably
01926   // break this.
01927   if (CallerF->hasFnAttribute("interrupt"))
01928     return false;
01929 
01930   // Also avoid sibcall optimization if either caller or callee uses struct
01931   // return semantics.
01932   if (isCalleeStructRet || isCallerStructRet)
01933     return false;
01934 
01935   // FIXME: Completely disable sibcall for Thumb1 since Thumb1RegisterInfo::
01936   // emitEpilogue is not ready for them. Thumb tail calls also use t2B, as
01937   // the Thumb1 16-bit unconditional branch doesn't have sufficient relocation
01938   // support in the assembler and linker to be used. This would need to be
01939   // fixed to fully support tail calls in Thumb1.
01940   //
01941   // Doing this is tricky, since the LDM/POP instruction on Thumb doesn't take
01942   // LR.  This means if we need to reload LR, it takes an extra instructions,
01943   // which outweighs the value of the tail call; but here we don't know yet
01944   // whether LR is going to be used.  Probably the right approach is to
01945   // generate the tail call here and turn it back into CALL/RET in
01946   // emitEpilogue if LR is used.
01947 
01948   // Thumb1 PIC calls to external symbols use BX, so they can be tail calls,
01949   // but we need to make sure there are enough registers; the only valid
01950   // registers are the 4 used for parameters.  We don't currently do this
01951   // case.
01952   if (Subtarget->isThumb1Only())
01953     return false;
01954 
01955   // If the calling conventions do not match, then we'd better make sure the
01956   // results are returned in the same way as what the caller expects.
01957   if (!CCMatch) {
01958     SmallVector<CCValAssign, 16> RVLocs1;
01959     ARMCCState CCInfo1(CalleeCC, false, DAG.getMachineFunction(),
01960                        getTargetMachine(), RVLocs1, *DAG.getContext(), Call);
01961     CCInfo1.AnalyzeCallResult(Ins, CCAssignFnForNode(CalleeCC, true, isVarArg));
01962 
01963     SmallVector<CCValAssign, 16> RVLocs2;
01964     ARMCCState CCInfo2(CallerCC, false, DAG.getMachineFunction(),
01965                        getTargetMachine(), RVLocs2, *DAG.getContext(), Call);
01966     CCInfo2.AnalyzeCallResult(Ins, CCAssignFnForNode(CallerCC, true, isVarArg));
01967 
01968     if (RVLocs1.size() != RVLocs2.size())
01969       return false;
01970     for (unsigned i = 0, e = RVLocs1.size(); i != e; ++i) {
01971       if (RVLocs1[i].isRegLoc() != RVLocs2[i].isRegLoc())
01972         return false;
01973       if (RVLocs1[i].getLocInfo() != RVLocs2[i].getLocInfo())
01974         return false;
01975       if (RVLocs1[i].isRegLoc()) {
01976         if (RVLocs1[i].getLocReg() != RVLocs2[i].getLocReg())
01977           return false;
01978       } else {
01979         if (RVLocs1[i].getLocMemOffset() != RVLocs2[i].getLocMemOffset())
01980           return false;
01981       }
01982     }
01983   }
01984 
01985   // If Caller's vararg or byval argument has been split between registers and
01986   // stack, do not perform tail call, since part of the argument is in caller's
01987   // local frame.
01988   const ARMFunctionInfo *AFI_Caller = DAG.getMachineFunction().
01989                                       getInfo<ARMFunctionInfo>();
01990   if (AFI_Caller->getArgRegsSaveSize())
01991     return false;
01992 
01993   // If the callee takes no arguments then go on to check the results of the
01994   // call.
01995   if (!Outs.empty()) {
01996     // Check if stack adjustment is needed. For now, do not do this if any
01997     // argument is passed on the stack.
01998     SmallVector<CCValAssign, 16> ArgLocs;
01999     ARMCCState CCInfo(CalleeCC, isVarArg, DAG.getMachineFunction(),
02000                       getTargetMachine(), ArgLocs, *DAG.getContext(), Call);
02001     CCInfo.AnalyzeCallOperands(Outs,
02002                                CCAssignFnForNode(CalleeCC, false, isVarArg));
02003     if (CCInfo.getNextStackOffset()) {
02004       MachineFunction &MF = DAG.getMachineFunction();
02005 
02006       // Check if the arguments are already laid out in the right way as
02007       // the caller's fixed stack objects.
02008       MachineFrameInfo *MFI = MF.getFrameInfo();
02009       const MachineRegisterInfo *MRI = &MF.getRegInfo();
02010       const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
02011       for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size();
02012            i != e;
02013            ++i, ++realArgIdx) {
02014         CCValAssign &VA = ArgLocs[i];
02015         EVT RegVT = VA.getLocVT();
02016         SDValue Arg = OutVals[realArgIdx];
02017         ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags;
02018         if (VA.getLocInfo() == CCValAssign::Indirect)
02019           return false;
02020         if (VA.needsCustom()) {
02021           // f64 and vector types are split into multiple registers or
02022           // register/stack-slot combinations.  The types will not match
02023           // the registers; give up on memory f64 refs until we figure
02024           // out what to do about this.
02025           if (!VA.isRegLoc())
02026             return false;
02027           if (!ArgLocs[++i].isRegLoc())
02028             return false;
02029           if (RegVT == MVT::v2f64) {
02030             if (!ArgLocs[++i].isRegLoc())
02031               return false;
02032             if (!ArgLocs[++i].isRegLoc())
02033               return false;
02034           }
02035         } else if (!VA.isRegLoc()) {
02036           if (!MatchingStackOffset(Arg, VA.getLocMemOffset(), Flags,
02037                                    MFI, MRI, TII))
02038             return false;
02039         }
02040       }
02041     }
02042   }
02043 
02044   return true;
02045 }
02046 
02047 bool
02048 ARMTargetLowering::CanLowerReturn(CallingConv::ID CallConv,
02049                                   MachineFunction &MF, bool isVarArg,
02050                                   const SmallVectorImpl<ISD::OutputArg> &Outs,
02051                                   LLVMContext &Context) const {
02052   SmallVector<CCValAssign, 16> RVLocs;
02053   CCState CCInfo(CallConv, isVarArg, MF, getTargetMachine(), RVLocs, Context);
02054   return CCInfo.CheckReturn(Outs, CCAssignFnForNode(CallConv, /*Return=*/true,
02055                                                     isVarArg));
02056 }
02057 
02058 static SDValue LowerInterruptReturn(SmallVectorImpl<SDValue> &RetOps,
02059                                     SDLoc DL, SelectionDAG &DAG) {
02060   const MachineFunction &MF = DAG.getMachineFunction();
02061   const Function *F = MF.getFunction();
02062 
02063   StringRef IntKind = F->getFnAttribute("interrupt").getValueAsString();
02064 
02065   // See ARM ARM v7 B1.8.3. On exception entry LR is set to a possibly offset
02066   // version of the "preferred return address". These offsets affect the return
02067   // instruction if this is a return from PL1 without hypervisor extensions.
02068   //    IRQ/FIQ: +4     "subs pc, lr, #4"
02069   //    SWI:     0      "subs pc, lr, #0"
02070   //    ABORT:   +4     "subs pc, lr, #4"
02071   //    UNDEF:   +4/+2  "subs pc, lr, #0"
02072   // UNDEF varies depending on where the exception came from ARM or Thumb
02073   // mode. Alongside GCC, we throw our hands up in disgust and pretend it's 0.
02074 
02075   int64_t LROffset;
02076   if (IntKind == "" || IntKind == "IRQ" || IntKind == "FIQ" ||
02077       IntKind == "ABORT")
02078     LROffset = 4;
02079   else if (IntKind == "SWI" || IntKind == "UNDEF")
02080     LROffset = 0;
02081   else
02082     report_fatal_error("Unsupported interrupt attribute. If present, value "
02083                        "must be one of: IRQ, FIQ, SWI, ABORT or UNDEF");
02084 
02085   RetOps.insert(RetOps.begin() + 1, DAG.getConstant(LROffset, MVT::i32, false));
02086 
02087   return DAG.getNode(ARMISD::INTRET_FLAG, DL, MVT::Other, RetOps);
02088 }
02089 
02090 SDValue
02091 ARMTargetLowering::LowerReturn(SDValue Chain,
02092                                CallingConv::ID CallConv, bool isVarArg,
02093                                const SmallVectorImpl<ISD::OutputArg> &Outs,
02094                                const SmallVectorImpl<SDValue> &OutVals,
02095                                SDLoc dl, SelectionDAG &DAG) const {
02096 
02097   // CCValAssign - represent the assignment of the return value to a location.
02098   SmallVector<CCValAssign, 16> RVLocs;
02099 
02100   // CCState - Info about the registers and stack slots.
02101   ARMCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
02102                     getTargetMachine(), RVLocs, *DAG.getContext(), Call);
02103 
02104   // Analyze outgoing return values.
02105   CCInfo.AnalyzeReturn(Outs, CCAssignFnForNode(CallConv, /* Return */ true,
02106                                                isVarArg));
02107 
02108   SDValue Flag;
02109   SmallVector<SDValue, 4> RetOps;
02110   RetOps.push_back(Chain); // Operand #0 = Chain (updated below)
02111   bool isLittleEndian = Subtarget->isLittle();
02112 
02113   // Copy the result values into the output registers.
02114   for (unsigned i = 0, realRVLocIdx = 0;
02115        i != RVLocs.size();
02116        ++i, ++realRVLocIdx) {
02117     CCValAssign &VA = RVLocs[i];
02118     assert(VA.isRegLoc() && "Can only return in registers!");
02119 
02120     SDValue Arg = OutVals[realRVLocIdx];
02121 
02122     switch (VA.getLocInfo()) {
02123     default: llvm_unreachable("Unknown loc info!");
02124     case CCValAssign::Full: break;
02125     case CCValAssign::BCvt:
02126       Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg);
02127       break;
02128     }
02129 
02130     if (VA.needsCustom()) {
02131       if (VA.getLocVT() == MVT::v2f64) {
02132         // Extract the first half and return it in two registers.
02133         SDValue Half = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
02134                                    DAG.getConstant(0, MVT::i32));
02135         SDValue HalfGPRs = DAG.getNode(ARMISD::VMOVRRD, dl,
02136                                        DAG.getVTList(MVT::i32, MVT::i32), Half);
02137 
02138         Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
02139                                  HalfGPRs.getValue(isLittleEndian ? 0 : 1),
02140                                  Flag);
02141         Flag = Chain.getValue(1);
02142         RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
02143         VA = RVLocs[++i]; // skip ahead to next loc
02144         Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
02145                                  HalfGPRs.getValue(isLittleEndian ? 1 : 0),
02146                                  Flag);
02147         Flag = Chain.getValue(1);
02148         RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
02149         VA = RVLocs[++i]; // skip ahead to next loc
02150 
02151         // Extract the 2nd half and fall through to handle it as an f64 value.
02152         Arg = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
02153                           DAG.getConstant(1, MVT::i32));
02154       }
02155       // Legalize ret f64 -> ret 2 x i32.  We always have fmrrd if f64 is
02156       // available.
02157       SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl,
02158                                   DAG.getVTList(MVT::i32, MVT::i32), Arg);
02159       Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
02160                                fmrrd.getValue(isLittleEndian ? 0 : 1),
02161                                Flag);
02162       Flag = Chain.getValue(1);
02163       RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
02164       VA = RVLocs[++i]; // skip ahead to next loc
02165       Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
02166                                fmrrd.getValue(isLittleEndian ? 1 : 0),
02167                                Flag);
02168     } else
02169       Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Flag);
02170 
02171     // Guarantee that all emitted copies are
02172     // stuck together, avoiding something bad.
02173     Flag = Chain.getValue(1);
02174     RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
02175   }
02176 
02177   // Update chain and glue.
02178   RetOps[0] = Chain;
02179   if (Flag.getNode())
02180     RetOps.push_back(Flag);
02181 
02182   // CPUs which aren't M-class use a special sequence to return from
02183   // exceptions (roughly, any instruction setting pc and cpsr simultaneously,
02184   // though we use "subs pc, lr, #N").
02185   //
02186   // M-class CPUs actually use a normal return sequence with a special
02187   // (hardware-provided) value in LR, so the normal code path works.
02188   if (DAG.getMachineFunction().getFunction()->hasFnAttribute("interrupt") &&
02189       !Subtarget->isMClass()) {
02190     if (Subtarget->isThumb1Only())
02191       report_fatal_error("interrupt attribute is not supported in Thumb1");
02192     return LowerInterruptReturn(RetOps, dl, DAG);
02193   }
02194 
02195   return DAG.getNode(ARMISD::RET_FLAG, dl, MVT::Other, RetOps);
02196 }
02197 
02198 bool ARMTargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const {
02199   if (N->getNumValues() != 1)
02200     return false;
02201   if (!N->hasNUsesOfValue(1, 0))
02202     return false;
02203 
02204   SDValue TCChain = Chain;
02205   SDNode *Copy = *N->use_begin();
02206   if (Copy->getOpcode() == ISD::CopyToReg) {
02207     // If the copy has a glue operand, we conservatively assume it isn't safe to
02208     // perform a tail call.
02209     if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue)
02210       return false;
02211     TCChain = Copy->getOperand(0);
02212   } else if (Copy->getOpcode() == ARMISD::VMOVRRD) {
02213     SDNode *VMov = Copy;
02214     // f64 returned in a pair of GPRs.
02215     SmallPtrSet<SDNode*, 2> Copies;
02216     for (SDNode::use_iterator UI = VMov->use_begin(), UE = VMov->use_end();
02217          UI != UE; ++UI) {
02218       if (UI->getOpcode() != ISD::CopyToReg)
02219         return false;
02220       Copies.insert(*UI);
02221     }
02222     if (Copies.size() > 2)
02223       return false;
02224 
02225     for (SDNode::use_iterator UI = VMov->use_begin(), UE = VMov->use_end();
02226          UI != UE; ++UI) {
02227       SDValue UseChain = UI->getOperand(0);
02228       if (Copies.count(UseChain.getNode()))
02229         // Second CopyToReg
02230         Copy = *UI;
02231       else
02232         // First CopyToReg
02233         TCChain = UseChain;
02234     }
02235   } else if (Copy->getOpcode() == ISD::BITCAST) {
02236     // f32 returned in a single GPR.
02237     if (!Copy->hasOneUse())
02238       return false;
02239     Copy = *Copy->use_begin();
02240     if (Copy->getOpcode() != ISD::CopyToReg || !Copy->hasNUsesOfValue(1, 0))
02241       return false;
02242     TCChain = Copy->getOperand(0);
02243   } else {
02244     return false;
02245   }
02246 
02247   bool HasRet = false;
02248   for (SDNode::use_iterator UI = Copy->use_begin(), UE = Copy->use_end();
02249        UI != UE; ++UI) {
02250     if (UI->getOpcode() != ARMISD::RET_FLAG &&
02251         UI->getOpcode() != ARMISD::INTRET_FLAG)
02252       return false;
02253     HasRet = true;
02254   }
02255 
02256   if (!HasRet)
02257     return false;
02258 
02259   Chain = TCChain;
02260   return true;
02261 }
02262 
02263 bool ARMTargetLowering::mayBeEmittedAsTailCall(CallInst *CI) const {
02264   if (!Subtarget->supportsTailCall())
02265     return false;
02266 
02267   if (!CI->isTailCall() || getTargetMachine().Options.DisableTailCalls)
02268     return false;
02269 
02270   return !Subtarget->isThumb1Only();
02271 }
02272 
02273 // ConstantPool, JumpTable, GlobalAddress, and ExternalSymbol are lowered as
02274 // their target counterpart wrapped in the ARMISD::Wrapper node. Suppose N is
02275 // one of the above mentioned nodes. It has to be wrapped because otherwise
02276 // Select(N) returns N. So the raw TargetGlobalAddress nodes, etc. can only
02277 // be used to form addressing mode. These wrapped nodes will be selected
02278 // into MOVi.
02279 static SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) {
02280   EVT PtrVT = Op.getValueType();
02281   // FIXME there is no actual debug info here
02282   SDLoc dl(Op);
02283   ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
02284   SDValue Res;
02285   if (CP->isMachineConstantPoolEntry())
02286     Res = DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT,
02287                                     CP->getAlignment());
02288   else
02289     Res = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT,
02290                                     CP->getAlignment());
02291   return DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Res);
02292 }
02293 
02294 unsigned ARMTargetLowering::getJumpTableEncoding() const {
02295   return MachineJumpTableInfo::EK_Inline;
02296 }
02297 
02298 SDValue ARMTargetLowering::LowerBlockAddress(SDValue Op,
02299                                              SelectionDAG &DAG) const {
02300   MachineFunction &MF = DAG.getMachineFunction();
02301   ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
02302   unsigned ARMPCLabelIndex = 0;
02303   SDLoc DL(Op);
02304   EVT PtrVT = getPointerTy();
02305   const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
02306   Reloc::Model RelocM = getTargetMachine().getRelocationModel();
02307   SDValue CPAddr;
02308   if (RelocM == Reloc::Static) {
02309     CPAddr = DAG.getTargetConstantPool(BA, PtrVT, 4);
02310   } else {
02311     unsigned PCAdj = Subtarget->isThumb() ? 4 : 8;
02312     ARMPCLabelIndex = AFI->createPICLabelUId();
02313     ARMConstantPoolValue *CPV =
02314       ARMConstantPoolConstant::Create(BA, ARMPCLabelIndex,
02315                                       ARMCP::CPBlockAddress, PCAdj);
02316     CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
02317   }
02318   CPAddr = DAG.getNode(ARMISD::Wrapper, DL, PtrVT, CPAddr);
02319   SDValue Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), CPAddr,
02320                                MachinePointerInfo::getConstantPool(),
02321                                false, false, false, 0);
02322   if (RelocM == Reloc::Static)
02323     return Result;
02324   SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
02325   return DAG.getNode(ARMISD::PIC_ADD, DL, PtrVT, Result, PICLabel);
02326 }
02327 
02328 // Lower ISD::GlobalTLSAddress using the "general dynamic" model
02329 SDValue
02330 ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA,
02331                                                  SelectionDAG &DAG) const {
02332   SDLoc dl(GA);
02333   EVT PtrVT = getPointerTy();
02334   unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8;
02335   MachineFunction &MF = DAG.getMachineFunction();
02336   ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
02337   unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
02338   ARMConstantPoolValue *CPV =
02339     ARMConstantPoolConstant::Create(GA->getGlobal(), ARMPCLabelIndex,
02340                                     ARMCP::CPValue, PCAdj, ARMCP::TLSGD, true);
02341   SDValue Argument = DAG.getTargetConstantPool(CPV, PtrVT, 4);
02342   Argument = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Argument);
02343   Argument = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Argument,
02344                          MachinePointerInfo::getConstantPool(),
02345                          false, false, false, 0);
02346   SDValue Chain = Argument.getValue(1);
02347 
02348   SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
02349   Argument = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Argument, PICLabel);
02350 
02351   // call __tls_get_addr.
02352   ArgListTy Args;
02353   ArgListEntry Entry;
02354   Entry.Node = Argument;
02355   Entry.Ty = (Type *) Type::getInt32Ty(*DAG.getContext());
02356   Args.push_back(Entry);
02357 
02358   // FIXME: is there useful debug info available here?
02359   TargetLowering::CallLoweringInfo CLI(DAG);
02360   CLI.setDebugLoc(dl).setChain(Chain)
02361     .setCallee(CallingConv::C, Type::getInt32Ty(*DAG.getContext()),
02362                DAG.getExternalSymbol("__tls_get_addr", PtrVT), std::move(Args),
02363                0);
02364 
02365   std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
02366   return CallResult.first;
02367 }
02368 
02369 // Lower ISD::GlobalTLSAddress using the "initial exec" or
02370 // "local exec" model.
02371 SDValue
02372 ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA,
02373                                         SelectionDAG &DAG,
02374                                         TLSModel::Model model) const {
02375   const GlobalValue *GV = GA->getGlobal();
02376   SDLoc dl(GA);
02377   SDValue Offset;
02378   SDValue Chain = DAG.getEntryNode();
02379   EVT PtrVT = getPointerTy();
02380   // Get the Thread Pointer
02381   SDValue ThreadPointer = DAG.getNode(ARMISD::THREAD_POINTER, dl, PtrVT);
02382 
02383   if (model == TLSModel::InitialExec) {
02384     MachineFunction &MF = DAG.getMachineFunction();
02385     ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
02386     unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
02387     // Initial exec model.
02388     unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8;
02389     ARMConstantPoolValue *CPV =
02390       ARMConstantPoolConstant::Create(GA->getGlobal(), ARMPCLabelIndex,
02391                                       ARMCP::CPValue, PCAdj, ARMCP::GOTTPOFF,
02392                                       true);
02393     Offset = DAG.getTargetConstantPool(CPV, PtrVT, 4);
02394     Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset);
02395     Offset = DAG.getLoad(PtrVT, dl, Chain, Offset,
02396                          MachinePointerInfo::getConstantPool(),
02397                          false, false, false, 0);
02398     Chain = Offset.getValue(1);
02399 
02400     SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
02401     Offset = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Offset, PICLabel);
02402 
02403     Offset = DAG.getLoad(PtrVT, dl, Chain, Offset,
02404                          MachinePointerInfo::getConstantPool(),
02405                          false, false, false, 0);
02406   } else {
02407     // local exec model
02408     assert(model == TLSModel::LocalExec);
02409     ARMConstantPoolValue *CPV =
02410       ARMConstantPoolConstant::Create(GV, ARMCP::TPOFF);
02411     Offset = DAG.getTargetConstantPool(CPV, PtrVT, 4);
02412     Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset);
02413     Offset = DAG.getLoad(PtrVT, dl, Chain, Offset,
02414                          MachinePointerInfo::getConstantPool(),
02415                          false, false, false, 0);
02416   }
02417 
02418   // The address of the thread local variable is the add of the thread
02419   // pointer with the offset of the variable.
02420   return DAG.getNode(ISD::ADD, dl, PtrVT, ThreadPointer, Offset);
02421 }
02422 
02423 SDValue
02424 ARMTargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
02425   // TODO: implement the "local dynamic" model
02426   assert(Subtarget->isTargetELF() &&
02427          "TLS not implemented for non-ELF targets");
02428   GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
02429 
02430   TLSModel::Model model = getTargetMachine().getTLSModel(GA->getGlobal());
02431 
02432   switch (model) {
02433     case TLSModel::GeneralDynamic:
02434     case TLSModel::LocalDynamic:
02435       return LowerToTLSGeneralDynamicModel(GA, DAG);
02436     case TLSModel::InitialExec:
02437     case TLSModel::LocalExec:
02438       return LowerToTLSExecModels(GA, DAG, model);
02439   }
02440   llvm_unreachable("bogus TLS model");
02441 }
02442 
02443 SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op,
02444                                                  SelectionDAG &DAG) const {
02445   EVT PtrVT = getPointerTy();
02446   SDLoc dl(Op);
02447   const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
02448   if (getTargetMachine().getRelocationModel() == Reloc::PIC_) {
02449     bool UseGOTOFF = GV->hasLocalLinkage() || GV->hasHiddenVisibility();
02450     ARMConstantPoolValue *CPV =
02451       ARMConstantPoolConstant::Create(GV,
02452                                       UseGOTOFF ? ARMCP::GOTOFF : ARMCP::GOT);
02453     SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
02454     CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
02455     SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(),
02456                                  CPAddr,
02457                                  MachinePointerInfo::getConstantPool(),
02458                                  false, false, false, 0);
02459     SDValue Chain = Result.getValue(1);
02460     SDValue GOT = DAG.getGLOBAL_OFFSET_TABLE(PtrVT);
02461     Result = DAG.getNode(ISD::ADD, dl, PtrVT, Result, GOT);
02462     if (!UseGOTOFF)
02463       Result = DAG.getLoad(PtrVT, dl, Chain, Result,
02464                            MachinePointerInfo::getGOT(),
02465                            false, false, false, 0);
02466     return Result;
02467   }
02468 
02469   // If we have T2 ops, we can materialize the address directly via movt/movw
02470   // pair. This is always cheaper.
02471   if (Subtarget->useMovt(DAG.getMachineFunction())) {
02472     ++NumMovwMovt;
02473     // FIXME: Once remat is capable of dealing with instructions with register
02474     // operands, expand this into two nodes.
02475     return DAG.getNode(ARMISD::Wrapper, dl, PtrVT,
02476                        DAG.getTargetGlobalAddress(GV, dl, PtrVT));
02477   } else {
02478     SDValue CPAddr = DAG.getTargetConstantPool(GV, PtrVT, 4);
02479     CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
02480     return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr,
02481                        MachinePointerInfo::getConstantPool(),
02482                        false, false, false, 0);
02483   }
02484 }
02485 
02486 SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op,
02487                                                     SelectionDAG &DAG) const {
02488   EVT PtrVT = getPointerTy();
02489   SDLoc dl(Op);
02490   const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
02491   Reloc::Model RelocM = getTargetMachine().getRelocationModel();
02492 
02493   if (Subtarget->useMovt(DAG.getMachineFunction()))
02494     ++NumMovwMovt;
02495 
02496   // FIXME: Once remat is capable of dealing with instructions with register
02497   // operands, expand this into multiple nodes
02498   unsigned Wrapper =
02499       RelocM == Reloc::PIC_ ? ARMISD::WrapperPIC : ARMISD::Wrapper;
02500 
02501   SDValue G = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, ARMII::MO_NONLAZY);
02502   SDValue Result = DAG.getNode(Wrapper, dl, PtrVT, G);
02503 
02504   if (Subtarget->GVIsIndirectSymbol(GV, RelocM))
02505     Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Result,
02506                          MachinePointerInfo::getGOT(), false, false, false, 0);
02507   return Result;
02508 }
02509 
02510 SDValue ARMTargetLowering::LowerGlobalAddressWindows(SDValue Op,
02511                                                      SelectionDAG &DAG) const {
02512   assert(Subtarget->isTargetWindows() && "non-Windows COFF is not supported");
02513   assert(Subtarget->useMovt(DAG.getMachineFunction()) &&
02514          "Windows on ARM expects to use movw/movt");
02515 
02516   const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
02517   const ARMII::TOF TargetFlags =
02518     (GV->hasDLLImportStorageClass() ? ARMII::MO_DLLIMPORT : ARMII::MO_NO_FLAG);
02519   EVT PtrVT = getPointerTy();
02520   SDValue Result;
02521   SDLoc DL(Op);
02522 
02523   ++NumMovwMovt;
02524 
02525   // FIXME: Once remat is capable of dealing with instructions with register
02526   // operands, expand this into two nodes.
02527   Result = DAG.getNode(ARMISD::Wrapper, DL, PtrVT,
02528                        DAG.getTargetGlobalAddress(GV, DL, PtrVT, /*Offset=*/0,
02529                                                   TargetFlags));
02530   if (GV->hasDLLImportStorageClass())
02531     Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Result,
02532                          MachinePointerInfo::getGOT(), false, false, false, 0);
02533   return Result;
02534 }
02535 
02536 SDValue ARMTargetLowering::LowerGLOBAL_OFFSET_TABLE(SDValue Op,
02537                                                     SelectionDAG &DAG) const {
02538   assert(Subtarget->isTargetELF() &&
02539          "GLOBAL OFFSET TABLE not implemented for non-ELF targets");
02540   MachineFunction &MF = DAG.getMachineFunction();
02541   ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
02542   unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
02543   EVT PtrVT = getPointerTy();
02544   SDLoc dl(Op);
02545   unsigned PCAdj = Subtarget->isThumb() ? 4 : 8;
02546   ARMConstantPoolValue *CPV =
02547     ARMConstantPoolSymbol::Create(*DAG.getContext(), "_GLOBAL_OFFSET_TABLE_",
02548                                   ARMPCLabelIndex, PCAdj);
02549   SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
02550   CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
02551   SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr,
02552                                MachinePointerInfo::getConstantPool(),
02553                                false, false, false, 0);
02554   SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
02555   return DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel);
02556 }
02557 
02558 SDValue
02559 ARMTargetLowering::LowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const {
02560   SDLoc dl(Op);
02561   SDValue Val = DAG.getConstant(0, MVT::i32);
02562   return DAG.getNode(ARMISD::EH_SJLJ_SETJMP, dl,
02563                      DAG.getVTList(MVT::i32, MVT::Other), Op.getOperand(0),
02564                      Op.getOperand(1), Val);
02565 }
02566 
02567 SDValue
02568 ARMTargetLowering::LowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const {
02569   SDLoc dl(Op);
02570   return DAG.getNode(ARMISD::EH_SJLJ_LONGJMP, dl, MVT::Other, Op.getOperand(0),
02571                      Op.getOperand(1), DAG.getConstant(0, MVT::i32));
02572 }
02573 
02574 SDValue
02575 ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG,
02576                                           const ARMSubtarget *Subtarget) const {
02577   unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
02578   SDLoc dl(Op);
02579   switch (IntNo) {
02580   default: return SDValue();    // Don't custom lower most intrinsics.
02581   case Intrinsic::arm_rbit: {
02582     assert(Op.getOperand(0).getValueType() == MVT::i32 &&
02583            "RBIT intrinsic must have i32 type!");
02584     return DAG.getNode(ARMISD::RBIT, dl, MVT::i32, Op.getOperand(0));
02585   }
02586   case Intrinsic::arm_thread_pointer: {
02587     EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
02588     return DAG.getNode(ARMISD::THREAD_POINTER, dl, PtrVT);
02589   }
02590   case Intrinsic::eh_sjlj_lsda: {
02591     MachineFunction &MF = DAG.getMachineFunction();
02592     ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
02593     unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
02594     EVT PtrVT = getPointerTy();
02595     Reloc::Model RelocM = getTargetMachine().getRelocationModel();
02596     SDValue CPAddr;
02597     unsigned PCAdj = (RelocM != Reloc::PIC_)
02598       ? 0 : (Subtarget->isThumb() ? 4 : 8);
02599     ARMConstantPoolValue *CPV =
02600       ARMConstantPoolConstant::Create(MF.getFunction(), ARMPCLabelIndex,
02601                                       ARMCP::CPLSDA, PCAdj);
02602     CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
02603     CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
02604     SDValue Result =
02605       DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr,
02606                   MachinePointerInfo::getConstantPool(),
02607                   false, false, false, 0);
02608 
02609     if (RelocM == Reloc::PIC_) {
02610       SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
02611       Result = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel);
02612     }
02613     return Result;
02614   }
02615   case Intrinsic::arm_neon_vmulls:
02616   case Intrinsic::arm_neon_vmullu: {
02617     unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vmulls)
02618       ? ARMISD::VMULLs : ARMISD::VMULLu;
02619     return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
02620                        Op.getOperand(1), Op.getOperand(2));
02621   }
02622   }
02623 }
02624 
02625 static SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG,
02626                                  const ARMSubtarget *Subtarget) {
02627   // FIXME: handle "fence singlethread" more efficiently.
02628   SDLoc dl(Op);
02629   if (!Subtarget->hasDataBarrier()) {
02630     // Some ARMv6 cpus can support data barriers with an mcr instruction.
02631     // Thumb1 and pre-v6 ARM mode use a libcall instead and should never get
02632     // here.
02633     assert(Subtarget->hasV6Ops() && !Subtarget->isThumb() &&
02634            "Unexpected ISD::ATOMIC_FENCE encountered. Should be libcall!");
02635     return DAG.getNode(ARMISD::MEMBARRIER_MCR, dl, MVT::Other, Op.getOperand(0),
02636                        DAG.getConstant(0, MVT::i32));
02637   }
02638 
02639   ConstantSDNode *OrdN = cast<ConstantSDNode>(Op.getOperand(1));
02640   AtomicOrdering Ord = static_cast<AtomicOrdering>(OrdN->getZExtValue());
02641   unsigned Domain = ARM_MB::ISH;
02642   if (Subtarget->isMClass()) {
02643     // Only a full system barrier exists in the M-class architectures.
02644     Domain = ARM_MB::SY;
02645   } else if (Subtarget->isSwift() && Ord == Release) {
02646     // Swift happens to implement ISHST barriers in a way that's compatible with
02647     // Release semantics but weaker than ISH so we'd be fools not to use
02648     // it. Beware: other processors probably don't!
02649     Domain = ARM_MB::ISHST;
02650   }
02651 
02652   return DAG.getNode(ISD::INTRINSIC_VOID, dl, MVT::Other, Op.getOperand(0),
02653                      DAG.getConstant(Intrinsic::arm_dmb, MVT::i32),
02654                      DAG.getConstant(Domain, MVT::i32));
02655 }
02656 
02657 static SDValue LowerPREFETCH(SDValue Op, SelectionDAG &DAG,
02658                              const ARMSubtarget *Subtarget) {
02659   // ARM pre v5TE and Thumb1 does not have preload instructions.
02660   if (!(Subtarget->isThumb2() ||
02661         (!Subtarget->isThumb1Only() && Subtarget->hasV5TEOps())))
02662     // Just preserve the chain.
02663     return Op.getOperand(0);
02664 
02665   SDLoc dl(Op);
02666   unsigned isRead = ~cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue() & 1;
02667   if (!isRead &&
02668       (!Subtarget->hasV7Ops() || !Subtarget->hasMPExtension()))
02669     // ARMv7 with MP extension has PLDW.
02670     return Op.getOperand(0);
02671 
02672   unsigned isData = cast<ConstantSDNode>(Op.getOperand(4))->getZExtValue();
02673   if (Subtarget->isThumb()) {
02674     // Invert the bits.
02675     isRead = ~isRead & 1;
02676     isData = ~isData & 1;
02677   }
02678 
02679   return DAG.getNode(ARMISD::PRELOAD, dl, MVT::Other, Op.getOperand(0),
02680                      Op.getOperand(1), DAG.getConstant(isRead, MVT::i32),
02681                      DAG.getConstant(isData, MVT::i32));
02682 }
02683 
02684 static SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) {
02685   MachineFunction &MF = DAG.getMachineFunction();
02686   ARMFunctionInfo *FuncInfo = MF.getInfo<ARMFunctionInfo>();
02687 
02688   // vastart just stores the address of the VarArgsFrameIndex slot into the
02689   // memory location argument.
02690   SDLoc dl(Op);
02691   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
02692   SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
02693   const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
02694   return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1),
02695                       MachinePointerInfo(SV), false, false, 0);
02696 }
02697 
02698 SDValue
02699 ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA,
02700                                         SDValue &Root, SelectionDAG &DAG,
02701                                         SDLoc dl) const {
02702   MachineFunction &MF = DAG.getMachineFunction();
02703   ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
02704 
02705   const TargetRegisterClass *RC;
02706   if (AFI->isThumb1OnlyFunction())
02707     RC = &ARM::tGPRRegClass;
02708   else
02709     RC = &ARM::GPRRegClass;
02710 
02711   // Transform the arguments stored in physical registers into virtual ones.
02712   unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
02713   SDValue ArgValue = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32);
02714 
02715   SDValue ArgValue2;
02716   if (NextVA.isMemLoc()) {
02717     MachineFrameInfo *MFI = MF.getFrameInfo();
02718     int FI = MFI->CreateFixedObject(4, NextVA.getLocMemOffset(), true);
02719 
02720     // Create load node to retrieve arguments from the stack.
02721     SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
02722     ArgValue2 = DAG.getLoad(MVT::i32, dl, Root, FIN,
02723                             MachinePointerInfo::getFixedStack(FI),
02724                             false, false, false, 0);
02725   } else {
02726     Reg = MF.addLiveIn(NextVA.getLocReg(), RC);
02727     ArgValue2 = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32);
02728   }
02729   if (!Subtarget->isLittle())
02730     std::swap (ArgValue, ArgValue2);
02731   return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, ArgValue, ArgValue2);
02732 }
02733 
02734 void
02735 ARMTargetLowering::computeRegArea(CCState &CCInfo, MachineFunction &MF,
02736                                   unsigned InRegsParamRecordIdx,
02737                                   unsigned ArgSize,
02738                                   unsigned &ArgRegsSize,
02739                                   unsigned &ArgRegsSaveSize)
02740   const {
02741   unsigned NumGPRs;
02742   if (InRegsParamRecordIdx < CCInfo.getInRegsParamsCount()) {
02743     unsigned RBegin, REnd;
02744     CCInfo.getInRegsParamInfo(InRegsParamRecordIdx, RBegin, REnd);
02745     NumGPRs = REnd - RBegin;
02746   } else {
02747     unsigned int firstUnalloced;
02748     firstUnalloced = CCInfo.getFirstUnallocated(GPRArgRegs,
02749                                                 sizeof(GPRArgRegs) /
02750                                                 sizeof(GPRArgRegs[0]));
02751     NumGPRs = (firstUnalloced <= 3) ? (4 - firstUnalloced) : 0;
02752   }
02753 
02754   unsigned Align = MF.getTarget().getFrameLowering()->getStackAlignment();
02755   ArgRegsSize = NumGPRs * 4;
02756 
02757   // If parameter is split between stack and GPRs...
02758   if (NumGPRs && Align > 4 &&
02759       (ArgRegsSize < ArgSize ||
02760         InRegsParamRecordIdx >= CCInfo.getInRegsParamsCount())) {
02761     // Add padding for part of param recovered from GPRs.  For example,
02762     // if Align == 8, its last byte must be at address K*8 - 1.
02763     // We need to do it, since remained (stack) part of parameter has
02764     // stack alignment, and we need to "attach" "GPRs head" without gaps
02765     // to it:
02766     // Stack:
02767     // |---- 8 bytes block ----| |---- 8 bytes block ----| |---- 8 bytes...
02768     // [ [padding] [GPRs head] ] [        Tail passed via stack       ....
02769     //
02770     ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
02771     unsigned Padding =
02772         OffsetToAlignment(ArgRegsSize + AFI->getArgRegsSaveSize(), Align);
02773     ArgRegsSaveSize = ArgRegsSize + Padding;
02774   } else
02775     // We don't need to extend regs save size for byval parameters if they
02776     // are passed via GPRs only.
02777     ArgRegsSaveSize = ArgRegsSize;
02778 }
02779 
02780 // The remaining GPRs hold either the beginning of variable-argument
02781 // data, or the beginning of an aggregate passed by value (usually
02782 // byval).  Either way, we allocate stack slots adjacent to the data
02783 // provided by our caller, and store the unallocated registers there.
02784 // If this is a variadic function, the va_list pointer will begin with
02785 // these values; otherwise, this reassembles a (byval) structure that
02786 // was split between registers and memory.
02787 // Return: The frame index registers were stored into.
02788 int
02789 ARMTargetLowering::StoreByValRegs(CCState &CCInfo, SelectionDAG &DAG,
02790                                   SDLoc dl, SDValue &Chain,
02791                                   const Value *OrigArg,
02792                                   unsigned InRegsParamRecordIdx,
02793                                   unsigned OffsetFromOrigArg,
02794                                   unsigned ArgOffset,
02795                                   unsigned ArgSize,
02796                                   bool ForceMutable,
02797                                   unsigned ByValStoreOffset,
02798                                   unsigned TotalArgRegsSaveSize) const {
02799 
02800   // Currently, two use-cases possible:
02801   // Case #1. Non-var-args function, and we meet first byval parameter.
02802   //          Setup first unallocated register as first byval register;
02803   //          eat all remained registers
02804   //          (these two actions are performed by HandleByVal method).
02805   //          Then, here, we initialize stack frame with
02806   //          "store-reg" instructions.
02807   // Case #2. Var-args function, that doesn't contain byval parameters.
02808   //          The same: eat all remained unallocated registers,
02809   //          initialize stack frame.
02810 
02811   MachineFunction &MF = DAG.getMachineFunction();
02812   MachineFrameInfo *MFI = MF.getFrameInfo();
02813   ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
02814   unsigned firstRegToSaveIndex, lastRegToSaveIndex;
02815   unsigned RBegin, REnd;
02816   if (InRegsParamRecordIdx < CCInfo.getInRegsParamsCount()) {
02817     CCInfo.getInRegsParamInfo(InRegsParamRecordIdx, RBegin, REnd);
02818     firstRegToSaveIndex = RBegin - ARM::R0;
02819     lastRegToSaveIndex = REnd - ARM::R0;
02820   } else {
02821     firstRegToSaveIndex = CCInfo.getFirstUnallocated
02822       (GPRArgRegs, array_lengthof(GPRArgRegs));
02823     lastRegToSaveIndex = 4;
02824   }
02825 
02826   unsigned ArgRegsSize, ArgRegsSaveSize;
02827   computeRegArea(CCInfo, MF, InRegsParamRecordIdx, ArgSize,
02828                  ArgRegsSize, ArgRegsSaveSize);
02829 
02830   // Store any by-val regs to their spots on the stack so that they may be
02831   // loaded by deferencing the result of formal parameter pointer or va_next.
02832   // Note: once stack area for byval/varargs registers
02833   // was initialized, it can't be initialized again.
02834   if (ArgRegsSaveSize) {
02835     unsigned Padding = ArgRegsSaveSize - ArgRegsSize;
02836 
02837     if (Padding) {
02838       assert(AFI->getStoredByValParamsPadding() == 0 &&
02839              "The only parameter may be padded.");
02840       AFI->setStoredByValParamsPadding(Padding);
02841     }
02842 
02843     int FrameIndex = MFI->CreateFixedObject(ArgRegsSaveSize,
02844                                             Padding +
02845                                               ByValStoreOffset -
02846                                               (int64_t)TotalArgRegsSaveSize,
02847                                             false);
02848     SDValue FIN = DAG.getFrameIndex(FrameIndex, getPointerTy());
02849     if (Padding) {
02850        MFI->CreateFixedObject(Padding,
02851                               ArgOffset + ByValStoreOffset -
02852                                 (int64_t)ArgRegsSaveSize,
02853                               false);
02854     }
02855 
02856     SmallVector<SDValue, 4> MemOps;
02857     for (unsigned i = 0; firstRegToSaveIndex < lastRegToSaveIndex;
02858          ++firstRegToSaveIndex, ++i) {
02859       const TargetRegisterClass *RC;
02860       if (AFI->isThumb1OnlyFunction())
02861         RC = &ARM::tGPRRegClass;
02862       else
02863         RC = &ARM::GPRRegClass;
02864 
02865       unsigned VReg = MF.addLiveIn(GPRArgRegs[firstRegToSaveIndex], RC);
02866       SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32);
02867       SDValue Store =
02868         DAG.getStore(Val.getValue(1), dl, Val, FIN,
02869                      MachinePointerInfo(OrigArg, OffsetFromOrigArg + 4*i),
02870                      false, false, 0);
02871       MemOps.push_back(Store);
02872       FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), FIN,
02873                         DAG.getConstant(4, getPointerTy()));
02874     }
02875 
02876     AFI->setArgRegsSaveSize(ArgRegsSaveSize + AFI->getArgRegsSaveSize());
02877 
02878     if (!MemOps.empty())
02879       Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
02880     return FrameIndex;
02881   } else {
02882     if (ArgSize == 0) {
02883       // We cannot allocate a zero-byte object for the first variadic argument,
02884       // so just make up a size.
02885       ArgSize = 4;
02886     }
02887     // This will point to the next argument passed via stack.
02888     return MFI->CreateFixedObject(
02889       ArgSize, ArgOffset, !ForceMutable);
02890   }
02891 }
02892 
02893 // Setup stack frame, the va_list pointer will start from.
02894 void
02895 ARMTargetLowering::VarArgStyleRegisters(CCState &CCInfo, SelectionDAG &DAG,
02896                                         SDLoc dl, SDValue &Chain,
02897                                         unsigned ArgOffset,
02898                                         unsigned TotalArgRegsSaveSize,
02899                                         bool ForceMutable) const {
02900   MachineFunction &MF = DAG.getMachineFunction();
02901   ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
02902 
02903   // Try to store any remaining integer argument regs
02904   // to their spots on the stack so that they may be loaded by deferencing
02905   // the result of va_next.
02906   // If there is no regs to be stored, just point address after last
02907   // argument passed via stack.
02908   int FrameIndex =
02909     StoreByValRegs(CCInfo, DAG, dl, Chain, nullptr,
02910                    CCInfo.getInRegsParamsCount(), 0, ArgOffset, 0, ForceMutable,
02911                    0, TotalArgRegsSaveSize);
02912 
02913   AFI->setVarArgsFrameIndex(FrameIndex);
02914 }
02915 
02916 SDValue
02917 ARMTargetLowering::LowerFormalArguments(SDValue Chain,
02918                                         CallingConv::ID CallConv, bool isVarArg,
02919                                         const SmallVectorImpl<ISD::InputArg>
02920                                           &Ins,
02921                                         SDLoc dl, SelectionDAG &DAG,
02922                                         SmallVectorImpl<SDValue> &InVals)
02923                                           const {
02924   MachineFunction &MF = DAG.getMachineFunction();
02925   MachineFrameInfo *MFI = MF.getFrameInfo();
02926 
02927   ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
02928 
02929   // Assign locations to all of the incoming arguments.
02930   SmallVector<CCValAssign, 16> ArgLocs;
02931   ARMCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
02932                     getTargetMachine(), ArgLocs, *DAG.getContext(), Prologue);
02933   CCInfo.AnalyzeFormalArguments(Ins,
02934                                 CCAssignFnForNode(CallConv, /* Return*/ false,
02935                                                   isVarArg));
02936 
02937   SmallVector<SDValue, 16> ArgValues;
02938   int lastInsIndex = -1;
02939   SDValue ArgValue;
02940   Function::const_arg_iterator CurOrigArg = MF.getFunction()->arg_begin();
02941   unsigned CurArgIdx = 0;
02942 
02943   // Initially ArgRegsSaveSize is zero.
02944   // Then we increase this value each time we meet byval parameter.
02945   // We also increase this value in case of varargs function.
02946   AFI->setArgRegsSaveSize(0);
02947 
02948   unsigned ByValStoreOffset = 0;
02949   unsigned TotalArgRegsSaveSize = 0;
02950   unsigned ArgRegsSaveSizeMaxAlign = 4;
02951 
02952   // Calculate the amount of stack space that we need to allocate to store
02953   // byval and variadic arguments that are passed in registers.
02954   // We need to know this before we allocate the first byval or variadic
02955   // argument, as they will be allocated a stack slot below the CFA (Canonical
02956   // Frame Address, the stack pointer at entry to the function).
02957   for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
02958     CCValAssign &VA = ArgLocs[i];
02959     if (VA.isMemLoc()) {
02960       int index = VA.getValNo();
02961       if (index != lastInsIndex) {
02962         ISD::ArgFlagsTy Flags = Ins[index].Flags;
02963         if (Flags.isByVal()) {
02964           unsigned ExtraArgRegsSize;
02965           unsigned ExtraArgRegsSaveSize;
02966           computeRegArea(CCInfo, MF, CCInfo.getInRegsParamsProceed(),
02967                          Flags.getByValSize(),
02968                          ExtraArgRegsSize, ExtraArgRegsSaveSize);
02969 
02970           TotalArgRegsSaveSize += ExtraArgRegsSaveSize;
02971           if (Flags.getByValAlign() > ArgRegsSaveSizeMaxAlign)
02972               ArgRegsSaveSizeMaxAlign = Flags.getByValAlign();
02973           CCInfo.nextInRegsParam();
02974         }
02975         lastInsIndex = index;
02976       }
02977     }
02978   }
02979   CCInfo.rewindByValRegsInfo();
02980   lastInsIndex = -1;
02981   if (isVarArg) {
02982     unsigned ExtraArgRegsSize;
02983     unsigned ExtraArgRegsSaveSize;
02984     computeRegArea(CCInfo, MF, CCInfo.getInRegsParamsCount(), 0,
02985                    ExtraArgRegsSize, ExtraArgRegsSaveSize);
02986     TotalArgRegsSaveSize += ExtraArgRegsSaveSize;
02987   }
02988   // If the arg regs save area contains N-byte aligned values, the
02989   // bottom of it must be at least N-byte aligned.
02990   TotalArgRegsSaveSize = RoundUpToAlignment(TotalArgRegsSaveSize, ArgRegsSaveSizeMaxAlign);
02991   TotalArgRegsSaveSize = std::min(TotalArgRegsSaveSize, 16U);
02992 
02993   for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
02994     CCValAssign &VA = ArgLocs[i];
02995     std::advance(CurOrigArg, Ins[VA.getValNo()].OrigArgIndex - CurArgIdx);
02996     CurArgIdx = Ins[VA.getValNo()].OrigArgIndex;
02997     // Arguments stored in registers.
02998     if (VA.isRegLoc()) {
02999       EVT RegVT = VA.getLocVT();
03000 
03001       if (VA.needsCustom()) {
03002         // f64 and vector types are split up into multiple registers or
03003         // combinations of registers and stack slots.
03004         if (VA.getLocVT() == MVT::v2f64) {
03005           SDValue ArgValue1 = GetF64FormalArgument(VA, ArgLocs[++i],
03006                                                    Chain, DAG, dl);
03007           VA = ArgLocs[++i]; // skip ahead to next loc
03008           SDValue ArgValue2;
03009           if (VA.isMemLoc()) {
03010             int FI = MFI->CreateFixedObject(8, VA.getLocMemOffset(), true);
03011             SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
03012             ArgValue2 = DAG.getLoad(MVT::f64, dl, Chain, FIN,
03013                                     MachinePointerInfo::getFixedStack(FI),
03014                                     false, false, false, 0);
03015           } else {
03016             ArgValue2 = GetF64FormalArgument(VA, ArgLocs[++i],
03017                                              Chain, DAG, dl);
03018           }
03019           ArgValue = DAG.getNode(ISD::UNDEF, dl, MVT::v2f64);
03020           ArgValue = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64,
03021                                  ArgValue, ArgValue1, DAG.getIntPtrConstant(0));
03022           ArgValue = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64,
03023                                  ArgValue, ArgValue2, DAG.getIntPtrConstant(1));
03024         } else
03025           ArgValue = GetF64FormalArgument(VA, ArgLocs[++i], Chain, DAG, dl);
03026 
03027       } else {
03028         const TargetRegisterClass *RC;
03029 
03030         if (RegVT == MVT::f32)
03031           RC = &ARM::SPRRegClass;
03032         else if (RegVT == MVT::f64)
03033           RC = &ARM::DPRRegClass;
03034         else if (RegVT == MVT::v2f64)
03035           RC = &ARM::QPRRegClass;
03036         else if (RegVT == MVT::i32)
03037           RC = AFI->isThumb1OnlyFunction() ?
03038             (const TargetRegisterClass*)&ARM::tGPRRegClass :
03039             (const TargetRegisterClass*)&ARM::GPRRegClass;
03040         else
03041           llvm_unreachable("RegVT not supported by FORMAL_ARGUMENTS Lowering");
03042 
03043         // Transform the arguments in physical registers into virtual ones.
03044         unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
03045         ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT);
03046       }
03047 
03048       // If this is an 8 or 16-bit value, it is really passed promoted
03049       // to 32 bits.  Insert an assert[sz]ext to capture this, then
03050       // truncate to the right size.
03051       switch (VA.getLocInfo()) {
03052       default: llvm_unreachable("Unknown loc info!");
03053       case CCValAssign::Full: break;
03054       case CCValAssign::BCvt:
03055         ArgValue = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), ArgValue);
03056         break;
03057       case CCValAssign::SExt:
03058         ArgValue = DAG.getNode(ISD::AssertSext, dl, RegVT, ArgValue,
03059                                DAG.getValueType(VA.getValVT()));
03060         ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
03061         break;
03062       case CCValAssign::ZExt:
03063         ArgValue = DAG.getNode(ISD::AssertZext, dl, RegVT, ArgValue,
03064                                DAG.getValueType(VA.getValVT()));
03065         ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
03066         break;
03067       }
03068 
03069       InVals.push_back(ArgValue);
03070 
03071     } else { // VA.isRegLoc()
03072 
03073       // sanity check
03074       assert(VA.isMemLoc());
03075       assert(VA.getValVT() != MVT::i64 && "i64 should already be lowered");
03076 
03077       int index = ArgLocs[i].getValNo();
03078 
03079       // Some Ins[] entries become multiple ArgLoc[] entries.
03080       // Process them only once.
03081       if (index != lastInsIndex)
03082         {
03083           ISD::ArgFlagsTy Flags = Ins[index].Flags;
03084           // FIXME: For now, all byval parameter objects are marked mutable.
03085           // This can be changed with more analysis.
03086           // In case of tail call optimization mark all arguments mutable.
03087           // Since they could be overwritten by lowering of arguments in case of
03088           // a tail call.
03089           if (Flags.isByVal()) {
03090             unsigned CurByValIndex = CCInfo.getInRegsParamsProceed();
03091 
03092             ByValStoreOffset = RoundUpToAlignment(ByValStoreOffset, Flags.getByValAlign());
03093             int FrameIndex = StoreByValRegs(
03094                 CCInfo, DAG, dl, Chain, CurOrigArg,
03095                 CurByValIndex,
03096                 Ins[VA.getValNo()].PartOffset,
03097                 VA.getLocMemOffset(),
03098                 Flags.getByValSize(),
03099                 true /*force mutable frames*/,
03100                 ByValStoreOffset,
03101                 TotalArgRegsSaveSize);
03102             ByValStoreOffset += Flags.getByValSize();
03103             ByValStoreOffset = std::min(ByValStoreOffset, 16U);
03104             InVals.push_back(DAG.getFrameIndex(FrameIndex, getPointerTy()));
03105             CCInfo.nextInRegsParam();
03106           } else {
03107             unsigned FIOffset = VA.getLocMemOffset();
03108             int FI = MFI->CreateFixedObject(VA.getLocVT().getSizeInBits()/8,
03109                                             FIOffset, true);
03110 
03111             // Create load nodes to retrieve arguments from the stack.
03112             SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
03113             InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN,
03114                                          MachinePointerInfo::getFixedStack(FI),
03115                                          false, false, false, 0));
03116           }
03117           lastInsIndex = index;
03118         }
03119     }
03120   }
03121 
03122   // varargs
03123   if (isVarArg)
03124     VarArgStyleRegisters(CCInfo, DAG, dl, Chain,
03125                          CCInfo.getNextStackOffset(),
03126                          TotalArgRegsSaveSize);
03127 
03128   AFI->setArgumentStackSize(CCInfo.getNextStackOffset());
03129 
03130   return Chain;
03131 }
03132 
03133 /// isFloatingPointZero - Return true if this is +0.0.
03134 static bool isFloatingPointZero(SDValue Op) {
03135   if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op))
03136     return CFP->getValueAPF().isPosZero();
03137   else if (ISD::isEXTLoad(Op.getNode()) || ISD::isNON_EXTLoad(Op.getNode())) {
03138     // Maybe this has already been legalized into the constant pool?
03139     if (Op.getOperand(1).getOpcode() == ARMISD::Wrapper) {
03140       SDValue WrapperOp = Op.getOperand(1).getOperand(0);
03141       if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(WrapperOp))
03142         if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal()))
03143           return CFP->getValueAPF().isPosZero();
03144     }
03145   }
03146   return false;
03147 }
03148 
03149 /// Returns appropriate ARM CMP (cmp) and corresponding condition code for
03150 /// the given operands.
03151 SDValue
03152 ARMTargetLowering::getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
03153                              SDValue &ARMcc, SelectionDAG &DAG,
03154                              SDLoc dl) const {
03155   if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS.getNode())) {
03156     unsigned C = RHSC->getZExtValue();
03157     if (!isLegalICmpImmediate(C)) {
03158       // Constant does not fit, try adjusting it by one?
03159       switch (CC) {
03160       default: break;
03161       case ISD::SETLT:
03162       case ISD::SETGE:
03163         if (C != 0x80000000 && isLegalICmpImmediate(C-1)) {
03164           CC = (CC == ISD::SETLT) ? ISD::SETLE : ISD::SETGT;
03165           RHS = DAG.getConstant(C-1, MVT::i32);
03166         }
03167         break;
03168       case ISD::SETULT:
03169       case ISD::SETUGE:
03170         if (C != 0 && isLegalICmpImmediate(C-1)) {
03171           CC = (CC == ISD::SETULT) ? ISD::SETULE : ISD::SETUGT;
03172           RHS = DAG.getConstant(C-1, MVT::i32);
03173         }
03174         break;
03175       case ISD::SETLE:
03176       case ISD::SETGT:
03177         if (C != 0x7fffffff && isLegalICmpImmediate(C+1)) {
03178           CC = (CC == ISD::SETLE) ? ISD::SETLT : ISD::SETGE;
03179           RHS = DAG.getConstant(C+1, MVT::i32);
03180         }
03181         break;
03182       case ISD::SETULE:
03183       case ISD::SETUGT:
03184         if (C != 0xffffffff && isLegalICmpImmediate(C+1)) {
03185           CC = (CC == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE;
03186           RHS = DAG.getConstant(C+1, MVT::i32);
03187         }
03188         break;
03189       }
03190     }
03191   }
03192 
03193   ARMCC::CondCodes CondCode = IntCCToARMCC(CC);
03194   ARMISD::NodeType CompareType;
03195   switch (CondCode) {
03196   default:
03197     CompareType = ARMISD::CMP;
03198     break;
03199   case ARMCC::EQ:
03200   case ARMCC::NE:
03201     // Uses only Z Flag
03202     CompareType = ARMISD::CMPZ;
03203     break;
03204   }
03205   ARMcc = DAG.getConstant(CondCode, MVT::i32);
03206   return DAG.getNode(CompareType, dl, MVT::Glue, LHS, RHS);
03207 }
03208 
03209 /// Returns a appropriate VFP CMP (fcmp{s|d}+fmstat) for the given operands.
03210 SDValue
03211 ARMTargetLowering::getVFPCmp(SDValue LHS, SDValue RHS, SelectionDAG &DAG,
03212                              SDLoc dl) const {
03213   SDValue Cmp;
03214   if (!isFloatingPointZero(RHS))
03215     Cmp = DAG.getNode(ARMISD::CMPFP, dl, MVT::Glue, LHS, RHS);
03216   else
03217     Cmp = DAG.getNode(ARMISD::CMPFPw0, dl, MVT::Glue, LHS);
03218   return DAG.getNode(ARMISD::FMSTAT, dl, MVT::Glue, Cmp);
03219 }
03220 
03221 /// duplicateCmp - Glue values can have only one use, so this function
03222 /// duplicates a comparison node.
03223 SDValue
03224 ARMTargetLowering::duplicateCmp(SDValue Cmp, SelectionDAG &DAG) const {
03225   unsigned Opc = Cmp.getOpcode();
03226   SDLoc DL(Cmp);
03227   if (Opc == ARMISD::CMP || Opc == ARMISD::CMPZ)
03228     return DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0),Cmp.getOperand(1));
03229 
03230   assert(Opc == ARMISD::FMSTAT && "unexpected comparison operation");
03231   Cmp = Cmp.getOperand(0);
03232   Opc = Cmp.getOpcode();
03233   if (Opc == ARMISD::CMPFP)
03234     Cmp = DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0),Cmp.getOperand(1));
03235   else {
03236     assert(Opc == ARMISD::CMPFPw0 && "unexpected operand of FMSTAT");
03237     Cmp = DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0));
03238   }
03239   return DAG.getNode(ARMISD::FMSTAT, DL, MVT::Glue, Cmp);
03240 }
03241 
03242 std::pair<SDValue, SDValue>
03243 ARMTargetLowering::getARMXALUOOp(SDValue Op, SelectionDAG &DAG,
03244                                  SDValue &ARMcc) const {
03245   assert(Op.getValueType() == MVT::i32 &&  "Unsupported value type");
03246 
03247   SDValue Value, OverflowCmp;
03248   SDValue LHS = Op.getOperand(0);
03249   SDValue RHS = Op.getOperand(1);
03250 
03251 
03252   // FIXME: We are currently always generating CMPs because we don't support
03253   // generating CMN through the backend. This is not as good as the natural
03254   // CMP case because it causes a register dependency and cannot be folded
03255   // later.
03256 
03257   switch (Op.getOpcode()) {
03258   default:
03259     llvm_unreachable("Unknown overflow instruction!");
03260   case ISD::SADDO:
03261     ARMcc = DAG.getConstant(ARMCC::VC, MVT::i32);
03262     Value = DAG.getNode(ISD::ADD, SDLoc(Op), Op.getValueType(), LHS, RHS);
03263     OverflowCmp = DAG.getNode(ARMISD::CMP, SDLoc(Op), MVT::Glue, Value, LHS);
03264     break;
03265   case ISD::UADDO:
03266     ARMcc = DAG.getConstant(ARMCC::HS, MVT::i32);
03267     Value = DAG.getNode(ISD::ADD, SDLoc(Op), Op.getValueType(), LHS, RHS);
03268     OverflowCmp = DAG.getNode(ARMISD::CMP, SDLoc(Op), MVT::Glue, Value, LHS);
03269     break;
03270   case ISD::SSUBO:
03271     ARMcc = DAG.getConstant(ARMCC::VC, MVT::i32);
03272     Value = DAG.getNode(ISD::SUB, SDLoc(Op), Op.getValueType(), LHS, RHS);
03273     OverflowCmp = DAG.getNode(ARMISD::CMP, SDLoc(Op), MVT::Glue, LHS, RHS);
03274     break;
03275   case ISD::USUBO:
03276     ARMcc = DAG.getConstant(ARMCC::HS, MVT::i32);
03277     Value = DAG.getNode(ISD::SUB, SDLoc(Op), Op.getValueType(), LHS, RHS);
03278     OverflowCmp = DAG.getNode(ARMISD::CMP, SDLoc(Op), MVT::Glue, LHS, RHS);
03279     break;
03280   } // switch (...)
03281 
03282   return std::make_pair(Value, OverflowCmp);
03283 }
03284 
03285 
03286 SDValue
03287 ARMTargetLowering::LowerXALUO(SDValue Op, SelectionDAG &DAG) const {
03288   // Let legalize expand this if it isn't a legal type yet.
03289   if (!DAG.getTargetLoweringInfo().isTypeLegal(Op.getValueType()))
03290     return SDValue();
03291 
03292   SDValue Value, OverflowCmp;
03293   SDValue ARMcc;
03294   std::tie(Value, OverflowCmp) = getARMXALUOOp(Op, DAG, ARMcc);
03295   SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
03296   // We use 0 and 1 as false and true values.
03297   SDValue TVal = DAG.getConstant(1, MVT::i32);
03298   SDValue FVal = DAG.getConstant(0, MVT::i32);
03299   EVT VT = Op.getValueType();
03300 
03301   SDValue Overflow = DAG.getNode(ARMISD::CMOV, SDLoc(Op), VT, TVal, FVal,
03302                                  ARMcc, CCR, OverflowCmp);
03303 
03304   SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32);
03305   return DAG.getNode(ISD::MERGE_VALUES, SDLoc(Op), VTs, Value, Overflow);
03306 }
03307 
03308 
03309 SDValue ARMTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
03310   SDValue Cond = Op.getOperand(0);
03311   SDValue SelectTrue = Op.getOperand(1);
03312   SDValue SelectFalse = Op.getOperand(2);
03313   SDLoc dl(Op);
03314   unsigned Opc = Cond.getOpcode();
03315 
03316   if (Cond.getResNo() == 1 &&
03317       (Opc == ISD::SADDO || Opc == ISD::UADDO || Opc == ISD::SSUBO ||
03318        Opc == ISD::USUBO)) {
03319     if (!DAG.getTargetLoweringInfo().isTypeLegal(Cond->getValueType(0)))
03320       return SDValue();
03321 
03322     SDValue Value, OverflowCmp;
03323     SDValue ARMcc;
03324     std::tie(Value, OverflowCmp) = getARMXALUOOp(Cond, DAG, ARMcc);
03325     SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
03326     EVT VT = Op.getValueType();
03327 
03328     return DAG.getNode(ARMISD::CMOV, SDLoc(Op), VT, SelectTrue, SelectFalse,
03329                        ARMcc, CCR, OverflowCmp);
03330 
03331   }
03332 
03333   // Convert:
03334   //
03335   //   (select (cmov 1, 0, cond), t, f) -> (cmov t, f, cond)
03336   //   (select (cmov 0, 1, cond), t, f) -> (cmov f, t, cond)
03337   //
03338   if (Cond.getOpcode() == ARMISD::CMOV && Cond.hasOneUse()) {
03339     const ConstantSDNode *CMOVTrue =
03340       dyn_cast<ConstantSDNode>(Cond.getOperand(0));
03341     const ConstantSDNode *CMOVFalse =
03342       dyn_cast<ConstantSDNode>(Cond.getOperand(1));
03343 
03344     if (CMOVTrue && CMOVFalse) {
03345       unsigned CMOVTrueVal = CMOVTrue->getZExtValue();
03346       unsigned CMOVFalseVal = CMOVFalse->getZExtValue();
03347 
03348       SDValue True;
03349       SDValue False;
03350       if (CMOVTrueVal == 1 && CMOVFalseVal == 0) {
03351         True = SelectTrue;
03352         False = SelectFalse;
03353       } else if (CMOVTrueVal == 0 && CMOVFalseVal == 1) {
03354         True = SelectFalse;
03355         False = SelectTrue;
03356       }
03357 
03358       if (True.getNode() && False.getNode()) {
03359         EVT VT = Op.getValueType();
03360         SDValue ARMcc = Cond.getOperand(2);
03361         SDValue CCR = Cond.getOperand(3);
03362         SDValue Cmp = duplicateCmp(Cond.getOperand(4), DAG);
03363         assert(True.getValueType() == VT);
03364         return DAG.getNode(ARMISD::CMOV, dl, VT, True, False, ARMcc, CCR, Cmp);
03365       }
03366     }
03367   }
03368 
03369   // ARM's BooleanContents value is UndefinedBooleanContent. Mask out the
03370   // undefined bits before doing a full-word comparison with zero.
03371   Cond = DAG.getNode(ISD::AND, dl, Cond.getValueType(), Cond,
03372                      DAG.getConstant(1, Cond.getValueType()));
03373 
03374   return DAG.getSelectCC(dl, Cond,
03375                          DAG.getConstant(0, Cond.getValueType()),
03376                          SelectTrue, SelectFalse, ISD::SETNE);
03377 }
03378 
03379 static ISD::CondCode getInverseCCForVSEL(ISD::CondCode CC) {
03380   if (CC == ISD::SETNE)
03381     return ISD::SETEQ;
03382   return ISD::getSetCCInverse(CC, true);
03383 }
03384 
03385 static void checkVSELConstraints(ISD::CondCode CC, ARMCC::CondCodes &CondCode,
03386                                  bool &swpCmpOps, bool &swpVselOps) {
03387   // Start by selecting the GE condition code for opcodes that return true for
03388   // 'equality'
03389   if (CC == ISD::SETUGE || CC == ISD::SETOGE || CC == ISD::SETOLE ||
03390       CC == ISD::SETULE)
03391     CondCode = ARMCC::GE;
03392 
03393   // and GT for opcodes that return false for 'equality'.
03394   else if (CC == ISD::SETUGT || CC == ISD::SETOGT || CC == ISD::SETOLT ||
03395            CC == ISD::SETULT)
03396     CondCode = ARMCC::GT;
03397 
03398   // Since we are constrained to GE/GT, if the opcode contains 'less', we need
03399   // to swap the compare operands.
03400   if (CC == ISD::SETOLE || CC == ISD::SETULE || CC == ISD::SETOLT ||
03401       CC == ISD::SETULT)
03402     swpCmpOps = true;
03403 
03404   // Both GT and GE are ordered comparisons, and return false for 'unordered'.
03405   // If we have an unordered opcode, we need to swap the operands to the VSEL
03406   // instruction (effectively negating the condition).
03407   //
03408   // This also has the effect of swapping which one of 'less' or 'greater'
03409   // returns true, so we also swap the compare operands. It also switches
03410   // whether we return true for 'equality', so we compensate by picking the
03411   // opposite condition code to our original choice.
03412   if (CC == ISD::SETULE || CC == ISD::SETULT || CC == ISD::SETUGE ||
03413       CC == ISD::SETUGT) {
03414     swpCmpOps = !swpCmpOps;
03415     swpVselOps = !swpVselOps;
03416     CondCode = CondCode == ARMCC::GT ? ARMCC::GE : ARMCC::GT;
03417   }
03418 
03419   // 'ordered' is 'anything but unordered', so use the VS condition code and
03420   // swap the VSEL operands.
03421   if (CC == ISD::SETO) {
03422     CondCode = ARMCC::VS;
03423     swpVselOps = true;
03424   }
03425 
03426   // 'unordered or not equal' is 'anything but equal', so use the EQ condition
03427   // code and swap the VSEL operands.
03428   if (CC == ISD::SETUNE) {
03429     CondCode = ARMCC::EQ;
03430     swpVselOps = true;
03431   }
03432 }
03433 
03434 SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
03435   EVT VT = Op.getValueType();
03436   SDValue LHS = Op.getOperand(0);
03437   SDValue RHS = Op.getOperand(1);
03438   ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
03439   SDValue TrueVal = Op.getOperand(2);
03440   SDValue FalseVal = Op.getOperand(3);
03441   SDLoc dl(Op);
03442 
03443   if (LHS.getValueType() == MVT::i32) {
03444     // Try to generate VSEL on ARMv8.
03445     // The VSEL instruction can't use all the usual ARM condition
03446     // codes: it only has two bits to select the condition code, so it's
03447     // constrained to use only GE, GT, VS and EQ.
03448     //
03449     // To implement all the various ISD::SETXXX opcodes, we sometimes need to
03450     // swap the operands of the previous compare instruction (effectively
03451     // inverting the compare condition, swapping 'less' and 'greater') and
03452     // sometimes need to swap the operands to the VSEL (which inverts the
03453     // condition in the sense of firing whenever the previous condition didn't)
03454     if (getSubtarget()->hasFPARMv8() && (TrueVal.getValueType() == MVT::f32 ||
03455                                       TrueVal.getValueType() == MVT::f64)) {
03456       ARMCC::CondCodes CondCode = IntCCToARMCC(CC);
03457       if (CondCode == ARMCC::LT || CondCode == ARMCC::LE ||
03458           CondCode == ARMCC::VC || CondCode == ARMCC::NE) {
03459         CC = getInverseCCForVSEL(CC);
03460         std::swap(TrueVal, FalseVal);
03461       }
03462     }
03463 
03464     SDValue ARMcc;
03465     SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
03466     SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
03467     return DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMcc, CCR,
03468                        Cmp);
03469   }
03470 
03471   ARMCC::CondCodes CondCode, CondCode2;
03472   FPCCToARMCC(CC, CondCode, CondCode2);
03473 
03474   // Try to generate VSEL on ARMv8.
03475   if (getSubtarget()->hasFPARMv8() && (TrueVal.getValueType() == MVT::f32 ||
03476                                     TrueVal.getValueType() == MVT::f64)) {
03477     // We can select VMAXNM/VMINNM from a compare followed by a select with the
03478     // same operands, as follows:
03479     //   c = fcmp [ogt, olt, ugt, ult] a, b
03480     //   select c, a, b
03481     // We only do this in unsafe-fp-math, because signed zeros and NaNs are
03482     // handled differently than the original code sequence.
03483     if (getTargetMachine().Options.UnsafeFPMath && LHS == TrueVal &&
03484         RHS == FalseVal) {
03485       if (CC == ISD::SETOGT || CC == ISD::SETUGT)
03486         return DAG.getNode(ARMISD::VMAXNM, dl, VT, TrueVal, FalseVal);
03487       if (CC == ISD::SETOLT || CC == ISD::SETULT)
03488         return DAG.getNode(ARMISD::VMINNM, dl, VT, TrueVal, FalseVal);
03489     }
03490 
03491     bool swpCmpOps = false;
03492     bool swpVselOps = false;
03493     checkVSELConstraints(CC, CondCode, swpCmpOps, swpVselOps);
03494 
03495     if (CondCode == ARMCC::GT || CondCode == ARMCC::GE ||
03496         CondCode == ARMCC::VS || CondCode == ARMCC::EQ) {
03497       if (swpCmpOps)
03498         std::swap(LHS, RHS);
03499       if (swpVselOps)
03500         std::swap(TrueVal, FalseVal);
03501     }
03502   }
03503 
03504   SDValue ARMcc = DAG.getConstant(CondCode, MVT::i32);
03505   SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl);
03506   SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
03507   SDValue Result = DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal,
03508                                ARMcc, CCR, Cmp);
03509   if (CondCode2 != ARMCC::AL) {
03510     SDValue ARMcc2 = DAG.getConstant(CondCode2, MVT::i32);
03511     // FIXME: Needs another CMP because flag can have but one use.
03512     SDValue Cmp2 = getVFPCmp(LHS, RHS, DAG, dl);
03513     Result = DAG.getNode(ARMISD::CMOV, dl, VT,
03514                          Result, TrueVal, ARMcc2, CCR, Cmp2);
03515   }
03516   return Result;
03517 }
03518 
03519 /// canChangeToInt - Given the fp compare operand, return true if it is suitable
03520 /// to morph to an integer compare sequence.
03521 static bool canChangeToInt(SDValue Op, bool &SeenZero,
03522                            const ARMSubtarget *Subtarget) {
03523   SDNode *N = Op.getNode();
03524   if (!N->hasOneUse())
03525     // Otherwise it requires moving the value from fp to integer registers.
03526     return false;
03527   if (!N->getNumValues())
03528     return false;
03529   EVT VT = Op.getValueType();
03530   if (VT != MVT::f32 && !Subtarget->isFPBrccSlow())
03531     // f32 case is generally profitable. f64 case only makes sense when vcmpe +
03532     // vmrs are very slow, e.g. cortex-a8.
03533     return false;
03534 
03535   if (isFloatingPointZero(Op)) {
03536     SeenZero = true;
03537     return true;
03538   }
03539   return ISD::isNormalLoad(N);
03540 }
03541 
03542 static SDValue bitcastf32Toi32(SDValue Op, SelectionDAG &DAG) {
03543   if (isFloatingPointZero(Op))
03544     return DAG.getConstant(0, MVT::i32);
03545 
03546   if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Op))
03547     return DAG.getLoad(MVT::i32, SDLoc(Op),
03548                        Ld->getChain(), Ld->getBasePtr(), Ld->getPointerInfo(),
03549                        Ld->isVolatile(), Ld->isNonTemporal(),
03550                        Ld->isInvariant(), Ld->getAlignment());
03551 
03552   llvm_unreachable("Unknown VFP cmp argument!");
03553 }
03554 
03555 static void expandf64Toi32(SDValue Op, SelectionDAG &DAG,
03556                            SDValue &RetVal1, SDValue &RetVal2) {
03557   if (isFloatingPointZero(Op)) {
03558     RetVal1 = DAG.getConstant(0, MVT::i32);
03559     RetVal2 = DAG.getConstant(0, MVT::i32);
03560     return;
03561   }
03562 
03563   if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Op)) {
03564     SDValue Ptr = Ld->getBasePtr();
03565     RetVal1 = DAG.getLoad(MVT::i32, SDLoc(Op),
03566                           Ld->getChain(), Ptr,
03567                           Ld->getPointerInfo(),
03568                           Ld->isVolatile(), Ld->isNonTemporal(),
03569                           Ld->isInvariant(), Ld->getAlignment());
03570 
03571     EVT PtrType = Ptr.getValueType();
03572     unsigned NewAlign = MinAlign(Ld->getAlignment(), 4);
03573     SDValue NewPtr = DAG.getNode(ISD::ADD, SDLoc(Op),
03574                                  PtrType, Ptr, DAG.getConstant(4, PtrType));
03575     RetVal2 = DAG.getLoad(MVT::i32, SDLoc(Op),
03576                           Ld->getChain(), NewPtr,
03577                           Ld->getPointerInfo().getWithOffset(4),
03578                           Ld->isVolatile(), Ld->isNonTemporal(),
03579                           Ld->isInvariant(), NewAlign);
03580     return;
03581   }
03582 
03583   llvm_unreachable("Unknown VFP cmp argument!");
03584 }
03585 
03586 /// OptimizeVFPBrcond - With -enable-unsafe-fp-math, it's legal to optimize some
03587 /// f32 and even f64 comparisons to integer ones.
03588 SDValue
03589 ARMTargetLowering::OptimizeVFPBrcond(SDValue Op, SelectionDAG &DAG) const {
03590   SDValue Chain = Op.getOperand(0);
03591   ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
03592   SDValue LHS = Op.getOperand(2);
03593   SDValue RHS = Op.getOperand(3);
03594   SDValue Dest = Op.getOperand(4);
03595   SDLoc dl(Op);
03596 
03597   bool LHSSeenZero = false;
03598   bool LHSOk = canChangeToInt(LHS, LHSSeenZero, Subtarget);
03599   bool RHSSeenZero = false;
03600   bool RHSOk = canChangeToInt(RHS, RHSSeenZero, Subtarget);
03601   if (LHSOk && RHSOk && (LHSSeenZero || RHSSeenZero)) {
03602     // If unsafe fp math optimization is enabled and there are no other uses of
03603     // the CMP operands, and the condition code is EQ or NE, we can optimize it
03604     // to an integer comparison.
03605     if (CC == ISD::SETOEQ)
03606       CC = ISD::SETEQ;
03607     else if (CC == ISD::SETUNE)
03608       CC = ISD::SETNE;
03609 
03610     SDValue Mask = DAG.getConstant(0x7fffffff, MVT::i32);
03611     SDValue ARMcc;
03612     if (LHS.getValueType() == MVT::f32) {
03613       LHS = DAG.getNode(ISD::AND, dl, MVT::i32,
03614                         bitcastf32Toi32(LHS, DAG), Mask);
03615       RHS = DAG.getNode(ISD::AND, dl, MVT::i32,
03616                         bitcastf32Toi32(RHS, DAG), Mask);
03617       SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
03618       SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
03619       return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other,
03620                          Chain, Dest, ARMcc, CCR, Cmp);
03621     }
03622 
03623     SDValue LHS1, LHS2;
03624     SDValue RHS1, RHS2;
03625     expandf64Toi32(LHS, DAG, LHS1, LHS2);
03626     expandf64Toi32(RHS, DAG, RHS1, RHS2);
03627     LHS2 = DAG.getNode(ISD::AND, dl, MVT::i32, LHS2, Mask);
03628     RHS2 = DAG.getNode(ISD::AND, dl, MVT::i32, RHS2, Mask);
03629     ARMCC::CondCodes CondCode = IntCCToARMCC(CC);
03630     ARMcc = DAG.getConstant(CondCode, MVT::i32);
03631     SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Glue);
03632     SDValue Ops[] = { Chain, ARMcc, LHS1, LHS2, RHS1, RHS2, Dest };
03633     return DAG.getNode(ARMISD::BCC_i64, dl, VTList, Ops);
03634   }
03635 
03636   return SDValue();
03637 }
03638 
03639 SDValue ARMTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
03640   SDValue Chain = Op.getOperand(0);
03641   ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
03642   SDValue LHS = Op.getOperand(2);
03643   SDValue RHS = Op.getOperand(3);
03644   SDValue Dest = Op.getOperand(4);
03645   SDLoc dl(Op);
03646 
03647   if (LHS.getValueType() == MVT::i32) {
03648     SDValue ARMcc;
03649     SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
03650     SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
03651     return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other,
03652                        Chain, Dest, ARMcc, CCR, Cmp);
03653   }
03654 
03655   assert(LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64);
03656 
03657   if (getTargetMachine().Options.UnsafeFPMath &&
03658       (CC == ISD::SETEQ || CC == ISD::SETOEQ ||
03659        CC == ISD::SETNE || CC == ISD::SETUNE)) {
03660     SDValue Result = OptimizeVFPBrcond(Op, DAG);
03661     if (Result.getNode())
03662       return Result;
03663   }
03664 
03665   ARMCC::CondCodes CondCode, CondCode2;
03666   FPCCToARMCC(CC, CondCode, CondCode2);
03667 
03668   SDValue ARMcc = DAG.getConstant(CondCode, MVT::i32);
03669   SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl);
03670   SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
03671   SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Glue);
03672   SDValue Ops[] = { Chain, Dest, ARMcc, CCR, Cmp };
03673   SDValue Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops);
03674   if (CondCode2 != ARMCC::AL) {
03675     ARMcc = DAG.getConstant(CondCode2, MVT::i32);
03676     SDValue Ops[] = { Res, Dest, ARMcc, CCR, Res.getValue(1) };
03677     Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops);
03678   }
03679   return Res;
03680 }
03681 
03682 SDValue ARMTargetLowering::LowerBR_JT(SDValue Op, SelectionDAG &DAG) const {
03683   SDValue Chain = Op.getOperand(0);
03684   SDValue Table = Op.getOperand(1);
03685   SDValue Index = Op.getOperand(2);
03686   SDLoc dl(Op);
03687 
03688   EVT PTy = getPointerTy();
03689   JumpTableSDNode *JT = cast<JumpTableSDNode>(Table);
03690   ARMFunctionInfo *AFI = DAG.getMachineFunction().getInfo<ARMFunctionInfo>();
03691   SDValue UId = DAG.getConstant(AFI->createJumpTableUId(), PTy);
03692   SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PTy);
03693   Table = DAG.getNode(ARMISD::WrapperJT, dl, MVT::i32, JTI, UId);
03694   Index = DAG.getNode(ISD::MUL, dl, PTy, Index, DAG.getConstant(4, PTy));
03695   SDValue Addr = DAG.getNode(ISD::ADD, dl, PTy, Index, Table);
03696   if (Subtarget->isThumb2()) {
03697     // Thumb2 uses a two-level jump. That is, it jumps into the jump table
03698     // which does another jump to the destination. This also makes it easier
03699     // to translate it to TBB / TBH later.
03700     // FIXME: This might not work if the function is extremely large.
03701     return DAG.getNode(ARMISD::BR2_JT, dl, MVT::Other, Chain,
03702                        Addr, Op.getOperand(2), JTI, UId);
03703   }
03704   if (getTargetMachine().getRelocationModel() == Reloc::PIC_) {
03705     Addr = DAG.getLoad((EVT)MVT::i32, dl, Chain, Addr,
03706                        MachinePointerInfo::getJumpTable(),
03707                        false, false, false, 0);
03708     Chain = Addr.getValue(1);
03709     Addr = DAG.getNode(ISD::ADD, dl, PTy, Addr, Table);
03710     return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI, UId);
03711   } else {
03712     Addr = DAG.getLoad(PTy, dl, Chain, Addr,
03713                        MachinePointerInfo::getJumpTable(),
03714                        false, false, false, 0);
03715     Chain = Addr.getValue(1);
03716     return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI, UId);
03717   }
03718 }
03719 
03720 static SDValue LowerVectorFP_TO_INT(SDValue Op, SelectionDAG &DAG) {
03721   EVT VT = Op.getValueType();
03722   SDLoc dl(Op);
03723 
03724   if (Op.getValueType().getVectorElementType() == MVT::i32) {
03725     if (Op.getOperand(0).getValueType().getVectorElementType() == MVT::f32)
03726       return Op;
03727     return DAG.UnrollVectorOp(Op.getNode());
03728   }
03729 
03730   assert(Op.getOperand(0).getValueType() == MVT::v4f32 &&
03731          "Invalid type for custom lowering!");
03732   if (VT != MVT::v4i16)
03733     return DAG.UnrollVectorOp(Op.getNode());
03734 
03735   Op = DAG.getNode(Op.getOpcode(), dl, MVT::v4i32, Op.getOperand(0));
03736   return DAG.getNode(ISD::TRUNCATE, dl, VT, Op);
03737 }
03738 
03739 static SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) {
03740   EVT VT = Op.getValueType();
03741   if (VT.isVector())
03742     return LowerVectorFP_TO_INT(Op, DAG);
03743 
03744   SDLoc dl(Op);
03745   unsigned Opc;
03746 
03747   switch (Op.getOpcode()) {
03748   default: llvm_unreachable("Invalid opcode!");
03749   case ISD::FP_TO_SINT:
03750     Opc = ARMISD::FTOSI;
03751     break;
03752   case ISD::FP_TO_UINT:
03753     Opc = ARMISD::FTOUI;
03754     break;
03755   }
03756   Op = DAG.getNode(Opc, dl, MVT::f32, Op.getOperand(0));
03757   return DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op);
03758 }
03759 
03760 static SDValue LowerVectorINT_TO_FP(SDValue Op, SelectionDAG &DAG) {
03761   EVT VT = Op.getValueType();
03762   SDLoc dl(Op);
03763 
03764   if (Op.getOperand(0).getValueType().getVectorElementType() == MVT::i32) {
03765     if (VT.getVectorElementType() == MVT::f32)
03766       return Op;
03767     return DAG.UnrollVectorOp(Op.getNode());
03768   }
03769 
03770   assert(Op.getOperand(0).getValueType() == MVT::v4i16 &&
03771          "Invalid type for custom lowering!");
03772   if (VT != MVT::v4f32)
03773     return DAG.UnrollVectorOp(Op.getNode());
03774 
03775   unsigned CastOpc;
03776   unsigned Opc;
03777   switch (Op.getOpcode()) {
03778   default: llvm_unreachable("Invalid opcode!");
03779   case ISD::SINT_TO_FP:
03780     CastOpc = ISD::SIGN_EXTEND;
03781     Opc = ISD::SINT_TO_FP;
03782     break;
03783   case ISD::UINT_TO_FP:
03784     CastOpc = ISD::ZERO_EXTEND;
03785     Opc = ISD::UINT_TO_FP;
03786     break;
03787   }
03788 
03789   Op = DAG.getNode(CastOpc, dl, MVT::v4i32, Op.getOperand(0));
03790   return DAG.getNode(Opc, dl, VT, Op);
03791 }
03792 
03793 static SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) {
03794   EVT VT = Op.getValueType();
03795   if (VT.isVector())
03796     return LowerVectorINT_TO_FP(Op, DAG);
03797 
03798   SDLoc dl(Op);
03799   unsigned Opc;
03800 
03801   switch (Op.getOpcode()) {
03802   default: llvm_unreachable("Invalid opcode!");
03803   case ISD::SINT_TO_FP:
03804     Opc = ARMISD::SITOF;
03805     break;
03806   case ISD::UINT_TO_FP:
03807     Opc = ARMISD::UITOF;
03808     break;
03809   }
03810 
03811   Op = DAG.getNode(ISD::BITCAST, dl, MVT::f32, Op.getOperand(0));
03812   return DAG.getNode(Opc, dl, VT, Op);
03813 }
03814 
03815 SDValue ARMTargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const {
03816   // Implement fcopysign with a fabs and a conditional fneg.
03817   SDValue Tmp0 = Op.getOperand(0);
03818   SDValue Tmp1 = Op.getOperand(1);
03819   SDLoc dl(Op);
03820   EVT VT = Op.getValueType();
03821   EVT SrcVT = Tmp1.getValueType();
03822   bool InGPR = Tmp0.getOpcode() == ISD::BITCAST ||
03823     Tmp0.getOpcode() == ARMISD::VMOVDRR;
03824   bool UseNEON = !InGPR && Subtarget->hasNEON();
03825 
03826   if (UseNEON) {
03827     // Use VBSL to copy the sign bit.
03828     unsigned EncodedVal = ARM_AM::createNEONModImm(0x6, 0x80);
03829     SDValue Mask = DAG.getNode(ARMISD::VMOVIMM, dl, MVT::v2i32,
03830                                DAG.getTargetConstant(EncodedVal, MVT::i32));
03831     EVT OpVT = (VT == MVT::f32) ? MVT::v2i32 : MVT::v1i64;
03832     if (VT == MVT::f64)
03833       Mask = DAG.getNode(ARMISD::VSHL, dl, OpVT,
03834                          DAG.getNode(ISD::BITCAST, dl, OpVT, Mask),
03835                          DAG.getConstant(32, MVT::i32));
03836     else /*if (VT == MVT::f32)*/
03837       Tmp0 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2f32, Tmp0);
03838     if (SrcVT == MVT::f32) {
03839       Tmp1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2f32, Tmp1);
03840       if (VT == MVT::f64)
03841         Tmp1 = DAG.getNode(ARMISD::VSHL, dl, OpVT,
03842                            DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp1),
03843                            DAG.getConstant(32, MVT::i32));
03844     } else if (VT == MVT::f32)
03845       Tmp1 = DAG.getNode(ARMISD::VSHRu, dl, MVT::v1i64,
03846                          DAG.getNode(ISD::BITCAST, dl, MVT::v1i64, Tmp1),
03847                          DAG.getConstant(32, MVT::i32));
03848     Tmp0 = DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp0);
03849     Tmp1 = DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp1);
03850 
03851     SDValue AllOnes = DAG.getTargetConstant(ARM_AM::createNEONModImm(0xe, 0xff),
03852                                             MVT::i32);
03853     AllOnes = DAG.getNode(ARMISD::VMOVIMM, dl, MVT::v8i8, AllOnes);
03854     SDValue MaskNot = DAG.getNode(ISD::XOR, dl, OpVT, Mask,
03855                                   DAG.getNode(ISD::BITCAST, dl, OpVT, AllOnes));
03856 
03857     SDValue Res = DAG.getNode(ISD::OR, dl, OpVT,
03858                               DAG.getNode(ISD::AND, dl, OpVT, Tmp1, Mask),
03859                               DAG.getNode(ISD::AND, dl, OpVT, Tmp0, MaskNot));
03860     if (VT == MVT::f32) {
03861       Res = DAG.getNode(ISD::BITCAST, dl, MVT::v2f32, Res);
03862       Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f32, Res,
03863                         DAG.getConstant(0, MVT::i32));
03864     } else {
03865       Res = DAG.getNode(ISD::BITCAST, dl, MVT::f64, Res);
03866     }
03867 
03868     return Res;
03869   }
03870 
03871   // Bitcast operand 1 to i32.
03872   if (SrcVT == MVT::f64)
03873     Tmp1 = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32),
03874                        Tmp1).getValue(1);
03875   Tmp1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Tmp1);
03876 
03877   // Or in the signbit with integer operations.
03878   SDValue Mask1 = DAG.getConstant(0x80000000, MVT::i32);
03879   SDValue Mask2 = DAG.getConstant(0x7fffffff, MVT::i32);
03880   Tmp1 = DAG.getNode(ISD::AND, dl, MVT::i32, Tmp1, Mask1);
03881   if (VT == MVT::f32) {
03882     Tmp0 = DAG.getNode(ISD::AND, dl, MVT::i32,
03883                        DAG.getNode(ISD::BITCAST, dl, MVT::i32, Tmp0), Mask2);
03884     return DAG.getNode(ISD::BITCAST, dl, MVT::f32,
03885                        DAG.getNode(ISD::OR, dl, MVT::i32, Tmp0, Tmp1));
03886   }
03887 
03888   // f64: Or the high part with signbit and then combine two parts.
03889   Tmp0 = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32),
03890                      Tmp0);
03891   SDValue Lo = Tmp0.getValue(0);
03892   SDValue Hi = DAG.getNode(ISD::AND, dl, MVT::i32, Tmp0.getValue(1), Mask2);
03893   Hi = DAG.getNode(ISD::OR, dl, MVT::i32, Hi, Tmp1);
03894   return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);
03895 }
03896 
03897 SDValue ARMTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const{
03898   MachineFunction &MF = DAG.getMachineFunction();
03899   MachineFrameInfo *MFI = MF.getFrameInfo();
03900   MFI->setReturnAddressIsTaken(true);
03901 
03902   if (verifyReturnAddressArgumentIsConstant(Op, DAG))
03903     return SDValue();
03904 
03905   EVT VT = Op.getValueType();
03906   SDLoc dl(Op);
03907   unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
03908   if (Depth) {
03909     SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
03910     SDValue Offset = DAG.getConstant(4, MVT::i32);
03911     return DAG.getLoad(VT, dl, DAG.getEntryNode(),
03912                        DAG.getNode(ISD::ADD, dl, VT, FrameAddr, Offset),
03913                        MachinePointerInfo(), false, false, false, 0);
03914   }
03915 
03916   // Return LR, which contains the return address. Mark it an implicit live-in.
03917   unsigned Reg = MF.addLiveIn(ARM::LR, getRegClassFor(MVT::i32));
03918   return DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg, VT);
03919 }
03920 
03921 SDValue ARMTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const {
03922   const ARMBaseRegisterInfo &ARI =
03923     *static_cast<const ARMBaseRegisterInfo*>(RegInfo);
03924   MachineFunction &MF = DAG.getMachineFunction();
03925   MachineFrameInfo *MFI = MF.getFrameInfo();
03926   MFI->setFrameAddressIsTaken(true);
03927 
03928   EVT VT = Op.getValueType();
03929   SDLoc dl(Op);  // FIXME probably not meaningful
03930   unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
03931   unsigned FrameReg = ARI.getFrameRegister(MF);
03932   SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, VT);
03933   while (Depth--)
03934     FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr,
03935                             MachinePointerInfo(),
03936                             false, false, false, 0);
03937   return FrameAddr;
03938 }
03939 
03940 // FIXME? Maybe this could be a TableGen attribute on some registers and
03941 // this table could be generated automatically from RegInfo.
03942 unsigned ARMTargetLowering::getRegisterByName(const char* RegName,
03943                                               EVT VT) const {
03944   unsigned Reg = StringSwitch<unsigned>(RegName)
03945                        .Case("sp", ARM::SP)
03946                        .Default(0);
03947   if (Reg)
03948     return Reg;
03949   report_fatal_error("Invalid register name global variable");
03950 }
03951 
03952 /// ExpandBITCAST - If the target supports VFP, this function is called to
03953 /// expand a bit convert where either the source or destination type is i64 to
03954 /// use a VMOVDRR or VMOVRRD node.  This should not be done when the non-i64
03955 /// operand type is illegal (e.g., v2f32 for a target that doesn't support
03956 /// vectors), since the legalizer won't know what to do with that.
03957 static SDValue ExpandBITCAST(SDNode *N, SelectionDAG &DAG) {
03958   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
03959   SDLoc dl(N);
03960   SDValue Op = N->getOperand(0);
03961 
03962   // This function is only supposed to be called for i64 types, either as the
03963   // source or destination of the bit convert.
03964   EVT SrcVT = Op.getValueType();
03965   EVT DstVT = N->getValueType(0);
03966   assert((SrcVT == MVT::i64 || DstVT == MVT::i64) &&
03967          "ExpandBITCAST called for non-i64 type");
03968 
03969   // Turn i64->f64 into VMOVDRR.
03970   if (SrcVT == MVT::i64 && TLI.isTypeLegal(DstVT)) {
03971     SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op,
03972                              DAG.getConstant(0, MVT::i32));
03973     SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op,
03974                              DAG.getConstant(1, MVT::i32));
03975     return DAG.getNode(ISD::BITCAST, dl, DstVT,
03976                        DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi));
03977   }
03978 
03979   // Turn f64->i64 into VMOVRRD.
03980   if (DstVT == MVT::i64 && TLI.isTypeLegal(SrcVT)) {
03981     SDValue Cvt;
03982     if (TLI.isBigEndian() && SrcVT.isVector() &&
03983         SrcVT.getVectorNumElements() > 1)
03984       Cvt = DAG.getNode(ARMISD::VMOVRRD, dl,
03985                         DAG.getVTList(MVT::i32, MVT::i32),
03986                         DAG.getNode(ARMISD::VREV64, dl, SrcVT, Op));
03987     else
03988       Cvt = DAG.getNode(ARMISD::VMOVRRD, dl,
03989                         DAG.getVTList(MVT::i32, MVT::i32), Op);
03990     // Merge the pieces into a single i64 value.
03991     return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Cvt, Cvt.getValue(1));
03992   }
03993 
03994   return SDValue();
03995 }
03996 
03997 /// getZeroVector - Returns a vector of specified type with all zero elements.
03998 /// Zero vectors are used to represent vector negation and in those cases
03999 /// will be implemented with the NEON VNEG instruction.  However, VNEG does
04000 /// not support i64 elements, so sometimes the zero vectors will need to be
04001 /// explicitly constructed.  Regardless, use a canonical VMOV to create the
04002 /// zero vector.
04003 static SDValue getZeroVector(EVT VT, SelectionDAG &DAG, SDLoc dl) {
04004   assert(VT.isVector() && "Expected a vector type");
04005   // The canonical modified immediate encoding of a zero vector is....0!
04006   SDValue EncodedVal = DAG.getTargetConstant(0, MVT::i32);
04007   EVT VmovVT = VT.is128BitVector() ? MVT::v4i32 : MVT::v2i32;
04008   SDValue Vmov = DAG.getNode(ARMISD::VMOVIMM, dl, VmovVT, EncodedVal);
04009   return DAG.getNode(ISD::BITCAST, dl, VT, Vmov);
04010 }
04011 
04012 /// LowerShiftRightParts - Lower SRA_PARTS, which returns two
04013 /// i32 values and take a 2 x i32 value to shift plus a shift amount.
04014 SDValue ARMTargetLowering::LowerShiftRightParts(SDValue Op,
04015                                                 SelectionDAG &DAG) const {
04016   assert(Op.getNumOperands() == 3 && "Not a double-shift!");
04017   EVT VT = Op.getValueType();
04018   unsigned VTBits = VT.getSizeInBits();
04019   SDLoc dl(Op);
04020   SDValue ShOpLo = Op.getOperand(0);
04021   SDValue ShOpHi = Op.getOperand(1);
04022   SDValue ShAmt  = Op.getOperand(2);
04023   SDValue ARMcc;
04024   unsigned Opc = (Op.getOpcode() == ISD::SRA_PARTS) ? ISD::SRA : ISD::SRL;
04025 
04026   assert(Op.getOpcode() == ISD::SRA_PARTS || Op.getOpcode() == ISD::SRL_PARTS);
04027 
04028   SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32,
04029                                  DAG.getConstant(VTBits, MVT::i32), ShAmt);
04030   SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, ShAmt);
04031   SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt,
04032                                    DAG.getConstant(VTBits, MVT::i32));
04033   SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, RevShAmt);
04034   SDValue FalseVal = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2);
04035   SDValue TrueVal = DAG.getNode(Opc, dl, VT, ShOpHi, ExtraShAmt);
04036 
04037   SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
04038   SDValue Cmp = getARMCmp(ExtraShAmt, DAG.getConstant(0, MVT::i32), ISD::SETGE,
04039                           ARMcc, DAG, dl);
04040   SDValue Hi = DAG.getNode(Opc, dl, VT, ShOpHi, ShAmt);
04041   SDValue Lo = DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMcc,
04042                            CCR, Cmp);
04043 
04044   SDValue Ops[2] = { Lo, Hi };
04045   return DAG.getMergeValues(Ops, dl);
04046 }
04047 
04048 /// LowerShiftLeftParts - Lower SHL_PARTS, which returns two
04049 /// i32 values and take a 2 x i32 value to shift plus a shift amount.
04050 SDValue ARMTargetLowering::LowerShiftLeftParts(SDValue Op,
04051                                                SelectionDAG &DAG) const {
04052   assert(Op.getNumOperands() == 3 && "Not a double-shift!");
04053   EVT VT = Op.getValueType();
04054   unsigned VTBits = VT.getSizeInBits();
04055   SDLoc dl(Op);
04056   SDValue ShOpLo = Op.getOperand(0);
04057   SDValue ShOpHi = Op.getOperand(1);
04058   SDValue ShAmt  = Op.getOperand(2);
04059   SDValue ARMcc;
04060 
04061   assert(Op.getOpcode() == ISD::SHL_PARTS);
04062   SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32,
04063                                  DAG.getConstant(VTBits, MVT::i32), ShAmt);
04064   SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, RevShAmt);
04065   SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt,
04066                                    DAG.getConstant(VTBits, MVT::i32));
04067   SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, ShAmt);
04068   SDValue Tmp3 = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ExtraShAmt);
04069 
04070   SDValue FalseVal = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2);
04071   SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
04072   SDValue Cmp = getARMCmp(ExtraShAmt, DAG.getConstant(0, MVT::i32), ISD::SETGE,
04073                           ARMcc, DAG, dl);
04074   SDValue Lo = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ShAmt);
04075   SDValue Hi = DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, Tmp3, ARMcc,
04076                            CCR, Cmp);
04077 
04078   SDValue Ops[2] = { Lo, Hi };
04079   return DAG.getMergeValues(Ops, dl);
04080 }
04081 
04082 SDValue ARMTargetLowering::LowerFLT_ROUNDS_(SDValue Op,
04083                                             SelectionDAG &DAG) const {
04084   // The rounding mode is in bits 23:22 of the FPSCR.
04085   // The ARM rounding mode value to FLT_ROUNDS mapping is 0->1, 1->2, 2->3, 3->0
04086   // The formula we use to implement this is (((FPSCR + 1 << 22) >> 22) & 3)
04087   // so that the shift + and get folded into a bitfield extract.
04088   SDLoc dl(Op);
04089   SDValue FPSCR = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::i32,
04090                               DAG.getConstant(Intrinsic::arm_get_fpscr,
04091                                               MVT::i32));
04092   SDValue FltRounds = DAG.getNode(ISD::ADD, dl, MVT::i32, FPSCR,
04093                                   DAG.getConstant(1U << 22, MVT::i32));
04094   SDValue RMODE = DAG.getNode(ISD::SRL, dl, MVT::i32, FltRounds,
04095                               DAG.getConstant(22, MVT::i32));
04096   return DAG.getNode(ISD::AND, dl, MVT::i32, RMODE,
04097                      DAG.getConstant(3, MVT::i32));
04098 }
04099 
04100 static SDValue LowerCTTZ(SDNode *N, SelectionDAG &DAG,
04101                          const ARMSubtarget *ST) {
04102   EVT VT = N->getValueType(0);
04103   SDLoc dl(N);
04104 
04105   if (!ST->hasV6T2Ops())
04106     return SDValue();
04107 
04108   SDValue rbit = DAG.getNode(ARMISD::RBIT, dl, VT, N->getOperand(0));
04109   return DAG.getNode(ISD::CTLZ, dl, VT, rbit);
04110 }
04111 
04112 /// getCTPOP16BitCounts - Returns a v8i8/v16i8 vector containing the bit-count
04113 /// for each 16-bit element from operand, repeated.  The basic idea is to
04114 /// leverage vcnt to get the 8-bit counts, gather and add the results.
04115 ///
04116 /// Trace for v4i16:
04117 /// input    = [v0    v1    v2    v3   ] (vi 16-bit element)
04118 /// cast: N0 = [w0 w1 w2 w3 w4 w5 w6 w7] (v0 = [w0 w1], wi 8-bit element)
04119 /// vcnt: N1 = [b0 b1 b2 b3 b4 b5 b6 b7] (bi = bit-count of 8-bit element wi)
04120 /// vrev: N2 = [b1 b0 b3 b2 b5 b4 b7 b6]
04121 ///            [b0 b1 b2 b3 b4 b5 b6 b7]
04122 ///           +[b1 b0 b3 b2 b5 b4 b7 b6]
04123 /// N3=N1+N2 = [k0 k0 k1 k1 k2 k2 k3 k3] (k0 = b0+b1 = bit-count of 16-bit v0,
04124 /// vuzp:    = [k0 k1 k2 k3 k0 k1 k2 k3]  each ki is 8-bits)
04125 static SDValue getCTPOP16BitCounts(SDNode *N, SelectionDAG &DAG) {
04126   EVT VT = N->getValueType(0);
04127   SDLoc DL(N);
04128 
04129   EVT VT8Bit = VT.is64BitVector() ? MVT::v8i8 : MVT::v16i8;
04130   SDValue N0 = DAG.getNode(ISD::BITCAST, DL, VT8Bit, N->getOperand(0));
04131   SDValue N1 = DAG.getNode(ISD::CTPOP, DL, VT8Bit, N0);
04132   SDValue N2 = DAG.getNode(ARMISD::VREV16, DL, VT8Bit, N1);
04133   SDValue N3 = DAG.getNode(ISD::ADD, DL, VT8Bit, N1, N2);
04134   return DAG.getNode(ARMISD::VUZP, DL, VT8Bit, N3, N3);
04135 }
04136 
04137 /// lowerCTPOP16BitElements - Returns a v4i16/v8i16 vector containing the
04138 /// bit-count for each 16-bit element from the operand.  We need slightly
04139 /// different sequencing for v4i16 and v8i16 to stay within NEON's available
04140 /// 64/128-bit registers.
04141 ///
04142 /// Trace for v4i16:
04143 /// input           = [v0    v1    v2    v3    ] (vi 16-bit element)
04144 /// v8i8: BitCounts = [k0 k1 k2 k3 k0 k1 k2 k3 ] (ki is the bit-count of vi)
04145 /// v8i16:Extended  = [k0    k1    k2    k3    k0    k1    k2    k3    ]
04146 /// v4i16:Extracted = [k0    k1    k2    k3    ]
04147 static SDValue lowerCTPOP16BitElements(SDNode *N, SelectionDAG &DAG) {
04148   EVT VT = N->getValueType(0);
04149   SDLoc DL(N);
04150 
04151   SDValue BitCounts = getCTPOP16BitCounts(N, DAG);
04152   if (VT.is64BitVector()) {
04153     SDValue Extended = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::v8i16, BitCounts);
04154     return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v4i16, Extended,
04155                        DAG.getIntPtrConstant(0));
04156   } else {
04157     SDValue Extracted = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v8i8,
04158                                     BitCounts, DAG.getIntPtrConstant(0));
04159     return DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::v8i16, Extracted);
04160   }
04161 }
04162 
04163 /// lowerCTPOP32BitElements - Returns a v2i32/v4i32 vector containing the
04164 /// bit-count for each 32-bit element from the operand.  The idea here is
04165 /// to split the vector into 16-bit elements, leverage the 16-bit count
04166 /// routine, and then combine the results.
04167 ///
04168 /// Trace for v2i32 (v4i32 similar with Extracted/Extended exchanged):
04169 /// input    = [v0    v1    ] (vi: 32-bit elements)
04170 /// Bitcast  = [w0 w1 w2 w3 ] (wi: 16-bit elements, v0 = [w0 w1])
04171 /// Counts16 = [k0 k1 k2 k3 ] (ki: 16-bit elements, bit-count of wi)
04172 /// vrev: N0 = [k1 k0 k3 k2 ]
04173 ///            [k0 k1 k2 k3 ]
04174 ///       N1 =+[k1 k0 k3 k2 ]
04175 ///            [k0 k2 k1 k3 ]
04176 ///       N2 =+[k1 k3 k0 k2 ]
04177 ///            [k0    k2    k1    k3    ]
04178 /// Extended =+[k1    k3    k0    k2    ]
04179 ///            [k0    k2    ]
04180 /// Extracted=+[k1    k3    ]
04181 ///
04182 static SDValue lowerCTPOP32BitElements(SDNode *N, SelectionDAG &DAG) {
04183   EVT VT = N->getValueType(0);
04184   SDLoc DL(N);
04185 
04186   EVT VT16Bit = VT.is64BitVector() ? MVT::v4i16 : MVT::v8i16;
04187 
04188   SDValue Bitcast = DAG.getNode(ISD::BITCAST, DL, VT16Bit, N->getOperand(0));
04189   SDValue Counts16 = lowerCTPOP16BitElements(Bitcast.getNode(), DAG);
04190   SDValue N0 = DAG.getNode(ARMISD::VREV32, DL, VT16Bit, Counts16);
04191   SDValue N1 = DAG.getNode(ISD::ADD, DL, VT16Bit, Counts16, N0);
04192   SDValue N2 = DAG.getNode(ARMISD::VUZP, DL, VT16Bit, N1, N1);
04193 
04194   if (VT.is64BitVector()) {
04195     SDValue Extended = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::v4i32, N2);
04196     return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v2i32, Extended,
04197                        DAG.getIntPtrConstant(0));
04198   } else {
04199     SDValue Extracted = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v4i16, N2,
04200                                     DAG.getIntPtrConstant(0));
04201     return DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::v4i32, Extracted);
04202   }
04203 }
04204 
04205 static SDValue LowerCTPOP(SDNode *N, SelectionDAG &DAG,
04206                           const ARMSubtarget *ST) {
04207   EVT VT = N->getValueType(0);
04208 
04209   assert(ST->hasNEON() && "Custom ctpop lowering requires NEON.");
04210   assert((VT == MVT::v2i32 || VT == MVT::v4i32 ||
04211           VT == MVT::v4i16 || VT == MVT::v8i16) &&
04212          "Unexpected type for custom ctpop lowering");
04213 
04214   if (VT.getVectorElementType() == MVT::i32)
04215     return lowerCTPOP32BitElements(N, DAG);
04216   else
04217     return lowerCTPOP16BitElements(N, DAG);
04218 }
04219 
04220 static SDValue LowerShift(SDNode *N, SelectionDAG &DAG,
04221                           const ARMSubtarget *ST) {
04222   EVT VT = N->getValueType(0);
04223   SDLoc dl(N);
04224 
04225   if (!VT.isVector())
04226     return SDValue();
04227 
04228   // Lower vector shifts on NEON to use VSHL.
04229   assert(ST->hasNEON() && "unexpected vector shift");
04230 
04231   // Left shifts translate directly to the vshiftu intrinsic.
04232   if (N->getOpcode() == ISD::SHL)
04233     return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
04234                        DAG.getConstant(Intrinsic::arm_neon_vshiftu, MVT::i32),
04235                        N->getOperand(0), N->getOperand(1));
04236 
04237   assert((N->getOpcode() == ISD::SRA ||
04238           N->getOpcode() == ISD::SRL) && "unexpected vector shift opcode");
04239 
04240   // NEON uses the same intrinsics for both left and right shifts.  For
04241   // right shifts, the shift amounts are negative, so negate the vector of
04242   // shift amounts.
04243   EVT ShiftVT = N->getOperand(1).getValueType();
04244   SDValue NegatedCount = DAG.getNode(ISD::SUB, dl, ShiftVT,
04245                                      getZeroVector(ShiftVT, DAG, dl),
04246                                      N->getOperand(1));
04247   Intrinsic::ID vshiftInt = (N->getOpcode() == ISD::SRA ?
04248                              Intrinsic::arm_neon_vshifts :
04249                              Intrinsic::arm_neon_vshiftu);
04250   return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
04251                      DAG.getConstant(vshiftInt, MVT::i32),
04252                      N->getOperand(0), NegatedCount);
04253 }
04254 
04255 static SDValue Expand64BitShift(SDNode *N, SelectionDAG &DAG,
04256                                 const ARMSubtarget *ST) {
04257   EVT VT = N->getValueType(0);
04258   SDLoc dl(N);
04259 
04260   // We can get here for a node like i32 = ISD::SHL i32, i64
04261   if (VT != MVT::i64)
04262     return SDValue();
04263 
04264   assert((N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) &&
04265          "Unknown shift to lower!");
04266 
04267   // We only lower SRA, SRL of 1 here, all others use generic lowering.
04268   if (!isa<ConstantSDNode>(N->getOperand(1)) ||
04269       cast<ConstantSDNode>(N->getOperand(1))->getZExtValue() != 1)
04270     return SDValue();
04271 
04272   // If we are in thumb mode, we don't have RRX.
04273   if (ST->isThumb1Only()) return SDValue();
04274 
04275   // Okay, we have a 64-bit SRA or SRL of 1.  Lower this to an RRX expr.
04276   SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, N->getOperand(0),
04277                            DAG.getConstant(0, MVT::i32));
04278   SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, N->getOperand(0),
04279                            DAG.getConstant(1, MVT::i32));
04280 
04281   // First, build a SRA_FLAG/SRL_FLAG op, which shifts the top part by one and
04282   // captures the result into a carry flag.
04283   unsigned Opc = N->getOpcode() == ISD::SRL ? ARMISD::SRL_FLAG:ARMISD::SRA_FLAG;
04284   Hi = DAG.getNode(Opc, dl, DAG.getVTList(MVT::i32, MVT::Glue), Hi);
04285 
04286   // The low part is an ARMISD::RRX operand, which shifts the carry in.
04287   Lo = DAG.getNode(ARMISD::RRX, dl, MVT::i32, Lo, Hi.getValue(1));
04288 
04289   // Merge the pieces into a single i64 value.
04290  return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
04291 }
04292 
04293 static SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) {
04294   SDValue TmpOp0, TmpOp1;
04295   bool Invert = false;
04296   bool Swap = false;
04297   unsigned Opc = 0;
04298 
04299   SDValue Op0 = Op.getOperand(0);
04300   SDValue Op1 = Op.getOperand(1);
04301   SDValue CC = Op.getOperand(2);
04302   EVT VT = Op.getValueType();
04303   ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get();
04304   SDLoc dl(Op);
04305 
04306   if (Op.getOperand(1).getValueType().isFloatingPoint()) {
04307     switch (SetCCOpcode) {
04308     default: llvm_unreachable("Illegal FP comparison");
04309     case ISD::SETUNE:
04310     case ISD::SETNE:  Invert = true; // Fallthrough
04311     case ISD::SETOEQ:
04312     case ISD::SETEQ:  Opc = ARMISD::VCEQ; break;
04313     case ISD::SETOLT:
04314     case ISD::SETLT: Swap = true; // Fallthrough
04315     case ISD::SETOGT:
04316     case ISD::SETGT:  Opc = ARMISD::VCGT; break;
04317     case ISD::SETOLE:
04318     case ISD::SETLE:  Swap = true; // Fallthrough
04319     case ISD::SETOGE:
04320     case ISD::SETGE: Opc = ARMISD::VCGE; break;
04321     case ISD::SETUGE: Swap = true; // Fallthrough
04322     case ISD::SETULE: Invert = true; Opc = ARMISD::VCGT; break;
04323     case ISD::SETUGT: Swap = true; // Fallthrough
04324     case ISD::SETULT: Invert = true; Opc = ARMISD::VCGE; break;
04325     case ISD::SETUEQ: Invert = true; // Fallthrough
04326     case ISD::SETONE:
04327       // Expand this to (OLT | OGT).
04328       TmpOp0 = Op0;
04329       TmpOp1 = Op1;
04330       Opc = ISD::OR;
04331       Op0 = DAG.getNode(ARMISD::VCGT, dl, VT, TmpOp1, TmpOp0);
04332       Op1 = DAG.getNode(ARMISD::VCGT, dl, VT, TmpOp0, TmpOp1);
04333       break;
04334     case ISD::SETUO: Invert = true; // Fallthrough
04335     case ISD::SETO:
04336       // Expand this to (OLT | OGE).
04337       TmpOp0 = Op0;
04338       TmpOp1 = Op1;
04339       Opc = ISD::OR;
04340       Op0 = DAG.getNode(ARMISD::VCGT, dl, VT, TmpOp1, TmpOp0);
04341       Op1 = DAG.getNode(ARMISD::VCGE, dl, VT, TmpOp0, TmpOp1);
04342       break;
04343     }
04344   } else {
04345     // Integer comparisons.
04346     switch (SetCCOpcode) {
04347     default: llvm_unreachable("Illegal integer comparison");
04348     case ISD::SETNE:  Invert = true;
04349     case ISD::SETEQ:  Opc = ARMISD::VCEQ; break;
04350     case ISD::SETLT:  Swap = true;
04351     case ISD::SETGT:  Opc = ARMISD::VCGT; break;
04352     case ISD::SETLE:  Swap = true;
04353     case ISD::SETGE:  Opc = ARMISD::VCGE; break;
04354     case ISD::SETULT: Swap = true;
04355     case ISD::SETUGT: Opc = ARMISD::VCGTU; break;
04356     case ISD::SETULE: Swap = true;
04357     case ISD::SETUGE: Opc = ARMISD::VCGEU; break;
04358     }
04359 
04360     // Detect VTST (Vector Test Bits) = icmp ne (and (op0, op1), zero).
04361     if (Opc == ARMISD::VCEQ) {
04362 
04363       SDValue AndOp;
04364       if (ISD::isBuildVectorAllZeros(Op1.getNode()))
04365         AndOp = Op0;
04366       else if (ISD::isBuildVectorAllZeros(Op0.getNode()))
04367         AndOp = Op1;
04368 
04369       // Ignore bitconvert.
04370       if (AndOp.getNode() && AndOp.getOpcode() == ISD::BITCAST)
04371         AndOp = AndOp.getOperand(0);
04372 
04373       if (AndOp.getNode() && AndOp.getOpcode() == ISD::AND) {
04374         Opc = ARMISD::VTST;
04375         Op0 = DAG.getNode(ISD::BITCAST, dl, VT, AndOp.getOperand(0));
04376         Op1 = DAG.getNode(ISD::BITCAST, dl, VT, AndOp.getOperand(1));
04377         Invert = !Invert;
04378       }
04379     }
04380   }
04381 
04382   if (Swap)
04383     std::swap(Op0, Op1);
04384 
04385   // If one of the operands is a constant vector zero, attempt to fold the
04386   // comparison to a specialized compare-against-zero form.
04387   SDValue SingleOp;
04388   if (ISD::isBuildVectorAllZeros(Op1.getNode()))
04389     SingleOp = Op0;
04390   else if (ISD::isBuildVectorAllZeros(Op0.getNode())) {
04391     if (Opc == ARMISD::VCGE)
04392       Opc = ARMISD::VCLEZ;
04393     else if (Opc == ARMISD::VCGT)
04394       Opc = ARMISD::VCLTZ;
04395     SingleOp = Op1;
04396   }
04397 
04398   SDValue Result;
04399   if (SingleOp.getNode()) {
04400     switch (Opc) {
04401     case ARMISD::VCEQ:
04402       Result = DAG.getNode(ARMISD::VCEQZ, dl, VT, SingleOp); break;
04403     case ARMISD::VCGE:
04404       Result = DAG.getNode(ARMISD::VCGEZ, dl, VT, SingleOp); break;
04405     case ARMISD::VCLEZ:
04406       Result = DAG.getNode(ARMISD::VCLEZ, dl, VT, SingleOp); break;
04407     case ARMISD::VCGT:
04408       Result = DAG.getNode(ARMISD::VCGTZ, dl, VT, SingleOp); break;
04409     case ARMISD::VCLTZ:
04410       Result = DAG.getNode(ARMISD::VCLTZ, dl, VT, SingleOp); break;
04411     default:
04412       Result = DAG.getNode(Opc, dl, VT, Op0, Op1);
04413     }
04414   } else {
04415      Result = DAG.getNode(Opc, dl, VT, Op0, Op1);
04416   }
04417 
04418   if (Invert)
04419     Result = DAG.getNOT(dl, Result, VT);
04420 
04421   return Result;
04422 }
04423 
04424 /// isNEONModifiedImm - Check if the specified splat value corresponds to a
04425 /// valid vector constant for a NEON instruction with a "modified immediate"
04426 /// operand (e.g., VMOV).  If so, return the encoded value.
04427 static SDValue isNEONModifiedImm(uint64_t SplatBits, uint64_t SplatUndef,
04428                                  unsigned SplatBitSize, SelectionDAG &DAG,
04429                                  EVT &VT, bool is128Bits, NEONModImmType type) {
04430   unsigned OpCmode, Imm;
04431 
04432   // SplatBitSize is set to the smallest size that splats the vector, so a
04433   // zero vector will always have SplatBitSize == 8.  However, NEON modified
04434   // immediate instructions others than VMOV do not support the 8-bit encoding
04435   // of a zero vector, and the default encoding of zero is supposed to be the
04436   // 32-bit version.
04437   if (SplatBits == 0)
04438     SplatBitSize = 32;
04439 
04440   switch (SplatBitSize) {
04441   case 8:
04442     if (type != VMOVModImm)
04443       return SDValue();
04444     // Any 1-byte value is OK.  Op=0, Cmode=1110.
04445     assert((SplatBits & ~0xff) == 0 && "one byte splat value is too big");
04446     OpCmode = 0xe;
04447     Imm = SplatBits;
04448     VT = is128Bits ? MVT::v16i8 : MVT::v8i8;
04449     break;
04450 
04451   case 16:
04452     // NEON's 16-bit VMOV supports splat values where only one byte is nonzero.
04453     VT = is128Bits ? MVT::v8i16 : MVT::v4i16;
04454     if ((SplatBits & ~0xff) == 0) {
04455       // Value = 0x00nn: Op=x, Cmode=100x.
04456       OpCmode = 0x8;
04457       Imm = SplatBits;
04458       break;
04459     }
04460     if ((SplatBits & ~0xff00) == 0) {
04461       // Value = 0xnn00: Op=x, Cmode=101x.
04462       OpCmode = 0xa;
04463       Imm = SplatBits >> 8;
04464       break;
04465     }
04466     return SDValue();
04467 
04468   case 32:
04469     // NEON's 32-bit VMOV supports splat values where:
04470     // * only one byte is nonzero, or
04471     // * the least significant byte is 0xff and the second byte is nonzero, or
04472     // * the least significant 2 bytes are 0xff and the third is nonzero.
04473     VT = is128Bits ? MVT::v4i32 : MVT::v2i32;
04474     if ((SplatBits & ~0xff) == 0) {
04475       // Value = 0x000000nn: Op=x, Cmode=000x.
04476       OpCmode = 0;
04477       Imm = SplatBits;
04478       break;
04479     }
04480     if ((SplatBits & ~0xff00) == 0) {
04481       // Value = 0x0000nn00: Op=x, Cmode=001x.
04482       OpCmode = 0x2;
04483       Imm = SplatBits >> 8;
04484       break;
04485     }
04486     if ((SplatBits & ~0xff0000) == 0) {
04487       // Value = 0x00nn0000: Op=x, Cmode=010x.
04488       OpCmode = 0x4;
04489       Imm = SplatBits >> 16;
04490       break;
04491     }
04492     if ((SplatBits & ~0xff000000) == 0) {
04493       // Value = 0xnn000000: Op=x, Cmode=011x.
04494       OpCmode = 0x6;
04495       Imm = SplatBits >> 24;
04496       break;
04497     }
04498 
04499     // cmode == 0b1100 and cmode == 0b1101 are not supported for VORR or VBIC
04500     if (type == OtherModImm) return SDValue();
04501 
04502     if ((SplatBits & ~0xffff) == 0 &&
04503         ((SplatBits | SplatUndef) & 0xff) == 0xff) {
04504       // Value = 0x0000nnff: Op=x, Cmode=1100.
04505       OpCmode = 0xc;
04506       Imm = SplatBits >> 8;
04507       break;
04508     }
04509 
04510     if ((SplatBits & ~0xffffff) == 0 &&
04511         ((SplatBits | SplatUndef) & 0xffff) == 0xffff) {
04512       // Value = 0x00nnffff: Op=x, Cmode=1101.
04513       OpCmode = 0xd;
04514       Imm = SplatBits >> 16;
04515       break;
04516     }
04517 
04518     // Note: there are a few 32-bit splat values (specifically: 00ffff00,
04519     // ff000000, ff0000ff, and ffff00ff) that are valid for VMOV.I64 but not
04520     // VMOV.I32.  A (very) minor optimization would be to replicate the value
04521     // and fall through here to test for a valid 64-bit splat.  But, then the
04522     // caller would also need to check and handle the change in size.
04523     return SDValue();
04524 
04525   case 64: {
04526     if (type != VMOVModImm)
04527       return SDValue();
04528     // NEON has a 64-bit VMOV splat where each byte is either 0 or 0xff.
04529     uint64_t BitMask = 0xff;
04530     uint64_t Val = 0;
04531     unsigned ImmMask = 1;
04532     Imm = 0;
04533     for (int ByteNum = 0; ByteNum < 8; ++ByteNum) {
04534       if (((SplatBits | SplatUndef) & BitMask) == BitMask) {
04535         Val |= BitMask;
04536         Imm |= ImmMask;
04537       } else if ((SplatBits & BitMask) != 0) {
04538         return SDValue();
04539       }
04540       BitMask <<= 8;
04541       ImmMask <<= 1;
04542     }
04543 
04544     if (DAG.getTargetLoweringInfo().isBigEndian())
04545       // swap higher and lower 32 bit word
04546       Imm = ((Imm & 0xf) << 4) | ((Imm & 0xf0) >> 4);
04547 
04548     // Op=1, Cmode=1110.
04549     OpCmode = 0x1e;
04550     VT = is128Bits ? MVT::v2i64 : MVT::v1i64;
04551     break;
04552   }
04553 
04554   default:
04555     llvm_unreachable("unexpected size for isNEONModifiedImm");
04556   }
04557 
04558   unsigned EncodedVal = ARM_AM::createNEONModImm(OpCmode, Imm);
04559   return DAG.getTargetConstant(EncodedVal, MVT::i32);
04560 }
04561 
04562 SDValue ARMTargetLowering::LowerConstantFP(SDValue Op, SelectionDAG &DAG,
04563                                            const ARMSubtarget *ST) const {
04564   if (!ST->hasVFP3())
04565     return SDValue();
04566 
04567   bool IsDouble = Op.getValueType() == MVT::f64;
04568   ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Op);
04569 
04570   // Try splatting with a VMOV.f32...
04571   APFloat FPVal = CFP->getValueAPF();
04572   int ImmVal = IsDouble ? ARM_AM::getFP64Imm(FPVal) : ARM_AM::getFP32Imm(FPVal);
04573 
04574   if (ImmVal != -1) {
04575     if (IsDouble || !ST->useNEONForSinglePrecisionFP()) {
04576       // We have code in place to select a valid ConstantFP already, no need to
04577       // do any mangling.
04578       return Op;
04579     }
04580 
04581     // It's a float and we are trying to use NEON operations where
04582     // possible. Lower it to a splat followed by an extract.
04583     SDLoc DL(Op);
04584     SDValue NewVal = DAG.getTargetConstant(ImmVal, MVT::i32);
04585     SDValue VecConstant = DAG.getNode(ARMISD::VMOVFPIMM, DL, MVT::v2f32,
04586                                       NewVal);
04587     return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, VecConstant,
04588                        DAG.getConstant(0, MVT::i32));
04589   }
04590 
04591   // The rest of our options are NEON only, make sure that's allowed before
04592   // proceeding..
04593   if (!ST->hasNEON() || (!IsDouble && !ST->useNEONForSinglePrecisionFP()))
04594     return SDValue();
04595 
04596   EVT VMovVT;
04597   uint64_t iVal = FPVal.bitcastToAPInt().getZExtValue();
04598 
04599   // It wouldn't really be worth bothering for doubles except for one very
04600   // important value, which does happen to match: 0.0. So make sure we don't do
04601   // anything stupid.
04602   if (IsDouble && (iVal & 0xffffffff) != (iVal >> 32))
04603     return SDValue();
04604 
04605   // Try a VMOV.i32 (FIXME: i8, i16, or i64 could work too).
04606   SDValue NewVal = isNEONModifiedImm(iVal & 0xffffffffU, 0, 32, DAG, VMovVT,
04607                                      false, VMOVModImm);
04608   if (NewVal != SDValue()) {
04609     SDLoc DL(Op);
04610     SDValue VecConstant = DAG.getNode(ARMISD::VMOVIMM, DL, VMovVT,
04611                                       NewVal);
04612     if (IsDouble)
04613       return DAG.getNode(ISD::BITCAST, DL, MVT::f64, VecConstant);
04614 
04615     // It's a float: cast and extract a vector element.
04616     SDValue VecFConstant = DAG.getNode(ISD::BITCAST, DL, MVT::v2f32,
04617                                        VecConstant);
04618     return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, VecFConstant,
04619                        DAG.getConstant(0, MVT::i32));
04620   }
04621 
04622   // Finally, try a VMVN.i32
04623   NewVal = isNEONModifiedImm(~iVal & 0xffffffffU, 0, 32, DAG, VMovVT,
04624                              false, VMVNModImm);
04625   if (NewVal != SDValue()) {
04626     SDLoc DL(Op);
04627     SDValue VecConstant = DAG.getNode(ARMISD::VMVNIMM, DL, VMovVT, NewVal);
04628 
04629     if (IsDouble)
04630       return DAG.getNode(ISD::BITCAST, DL, MVT::f64, VecConstant);
04631 
04632     // It's a float: cast and extract a vector element.
04633     SDValue VecFConstant = DAG.getNode(ISD::BITCAST, DL, MVT::v2f32,
04634                                        VecConstant);
04635     return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, VecFConstant,
04636                        DAG.getConstant(0, MVT::i32));
04637   }
04638 
04639   return SDValue();
04640 }
04641 
04642 // check if an VEXT instruction can handle the shuffle mask when the
04643 // vector sources of the shuffle are the same.
04644 static bool isSingletonVEXTMask(ArrayRef<int> M, EVT VT, unsigned &Imm) {
04645   unsigned NumElts = VT.getVectorNumElements();
04646 
04647   // Assume that the first shuffle index is not UNDEF.  Fail if it is.
04648   if (M[0] < 0)
04649     return false;
04650 
04651   Imm = M[0];
04652 
04653   // If this is a VEXT shuffle, the immediate value is the index of the first
04654   // element.  The other shuffle indices must be the successive elements after
04655   // the first one.
04656   unsigned ExpectedElt = Imm;
04657   for (unsigned i = 1; i < NumElts; ++i) {
04658     // Increment the expected index.  If it wraps around, just follow it
04659     // back to index zero and keep going.
04660     ++ExpectedElt;
04661     if (ExpectedElt == NumElts)
04662       ExpectedElt = 0;
04663 
04664     if (M[i] < 0) continue; // ignore UNDEF indices
04665     if (ExpectedElt != static_cast<unsigned>(M[i]))
04666       return false;
04667   }
04668 
04669   return true;
04670 }
04671 
04672 
04673 static bool isVEXTMask(ArrayRef<int> M, EVT VT,
04674                        bool &ReverseVEXT, unsigned &Imm) {
04675   unsigned NumElts = VT.getVectorNumElements();
04676   ReverseVEXT = false;
04677 
04678   // Assume that the first shuffle index is not UNDEF.  Fail if it is.
04679   if (M[0] < 0)
04680     return false;
04681 
04682   Imm = M[0];
04683 
04684   // If this is a VEXT shuffle, the immediate value is the index of the first
04685   // element.  The other shuffle indices must be the successive elements after
04686   // the first one.
04687   unsigned ExpectedElt = Imm;
04688   for (unsigned i = 1; i < NumElts; ++i) {
04689     // Increment the expected index.  If it wraps around, it may still be
04690     // a VEXT but the source vectors must be swapped.
04691     ExpectedElt += 1;
04692     if (ExpectedElt == NumElts * 2) {
04693       ExpectedElt = 0;
04694       ReverseVEXT = true;
04695     }
04696 
04697     if (M[i] < 0) continue; // ignore UNDEF indices
04698     if (ExpectedElt != static_cast<unsigned>(M[i]))
04699       return false;
04700   }
04701 
04702   // Adjust the index value if the source operands will be swapped.
04703   if (ReverseVEXT)
04704     Imm -= NumElts;
04705 
04706   return true;
04707 }
04708 
04709 /// isVREVMask - Check if a vector shuffle corresponds to a VREV
04710 /// instruction with the specified blocksize.  (The order of the elements
04711 /// within each block of the vector is reversed.)
04712 static bool isVREVMask(ArrayRef<int> M, EVT VT, unsigned BlockSize) {
04713   assert((BlockSize==16 || BlockSize==32 || BlockSize==64) &&
04714          "Only possible block sizes for VREV are: 16, 32, 64");
04715 
04716   unsigned EltSz = VT.getVectorElementType().getSizeInBits();
04717   if (EltSz == 64)
04718     return false;
04719 
04720   unsigned NumElts = VT.getVectorNumElements();
04721   unsigned BlockElts = M[0] + 1;
04722   // If the first shuffle index is UNDEF, be optimistic.
04723   if (M[0] < 0)
04724     BlockElts = BlockSize / EltSz;
04725 
04726   if (BlockSize <= EltSz || BlockSize != BlockElts * EltSz)
04727     return false;
04728 
04729   for (unsigned i = 0; i < NumElts; ++i) {
04730     if (M[i] < 0) continue; // ignore UNDEF indices
04731     if ((unsigned) M[i] != (i - i%BlockElts) + (BlockElts - 1 - i%BlockElts))
04732       return false;
04733   }
04734 
04735   return true;
04736 }
04737 
04738 static bool isVTBLMask(ArrayRef<int> M, EVT VT) {
04739   // We can handle <8 x i8> vector shuffles. If the index in the mask is out of
04740   // range, then 0 is placed into the resulting vector. So pretty much any mask
04741   // of 8 elements can work here.
04742   return VT == MVT::v8i8 && M.size() == 8;
04743 }
04744 
04745 static bool isVTRNMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
04746   unsigned EltSz = VT.getVectorElementType().getSizeInBits();
04747   if (EltSz == 64)
04748     return false;
04749 
04750   unsigned NumElts = VT.getVectorNumElements();
04751   WhichResult = (M[0] == 0 ? 0 : 1);
04752   for (unsigned i = 0; i < NumElts; i += 2) {
04753     if ((M[i] >= 0 && (unsigned) M[i] != i + WhichResult) ||
04754         (M[i+1] >= 0 && (unsigned) M[i+1] != i + NumElts + WhichResult))
04755       return false;
04756   }
04757   return true;
04758 }
04759 
04760 /// isVTRN_v_undef_Mask - Special case of isVTRNMask for canonical form of
04761 /// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
04762 /// Mask is e.g., <0, 0, 2, 2> instead of <0, 4, 2, 6>.
04763 static bool isVTRN_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult){
04764   unsigned EltSz = VT.getVectorElementType().getSizeInBits();
04765   if (EltSz == 64)
04766     return false;
04767 
04768   unsigned NumElts = VT.getVectorNumElements();
04769   WhichResult = (M[0] == 0 ? 0 : 1);
04770   for (unsigned i = 0; i < NumElts; i += 2) {
04771     if ((M[i] >= 0 && (unsigned) M[i] != i + WhichResult) ||
04772         (M[i+1] >= 0 && (unsigned) M[i+1] != i + WhichResult))
04773       return false;
04774   }
04775   return true;
04776 }
04777 
04778 static bool isVUZPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
04779   unsigned EltSz = VT.getVectorElementType().getSizeInBits();
04780   if (EltSz == 64)
04781     return false;
04782 
04783   unsigned NumElts = VT.getVectorNumElements();
04784   WhichResult = (M[0] == 0 ? 0 : 1);
04785   for (unsigned i = 0; i != NumElts; ++i) {
04786     if (M[i] < 0) continue; // ignore UNDEF indices
04787     if ((unsigned) M[i] != 2 * i + WhichResult)
04788       return false;
04789   }
04790 
04791   // VUZP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
04792   if (VT.is64BitVector() && EltSz == 32)
04793     return false;
04794 
04795   return true;
04796 }
04797 
04798 /// isVUZP_v_undef_Mask - Special case of isVUZPMask for canonical form of
04799 /// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
04800 /// Mask is e.g., <0, 2, 0, 2> instead of <0, 2, 4, 6>,
04801 static bool isVUZP_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult){
04802   unsigned EltSz = VT.getVectorElementType().getSizeInBits();
04803   if (EltSz == 64)
04804     return false;
04805 
04806   unsigned Half = VT.getVectorNumElements() / 2;
04807   WhichResult = (M[0] == 0 ? 0 : 1);
04808   for (unsigned j = 0; j != 2; ++j) {
04809     unsigned Idx = WhichResult;
04810     for (unsigned i = 0; i != Half; ++i) {
04811       int MIdx = M[i + j * Half];
04812       if (MIdx >= 0 && (unsigned) MIdx != Idx)
04813         return false;
04814       Idx += 2;
04815     }
04816   }
04817 
04818   // VUZP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
04819   if (VT.is64BitVector() && EltSz == 32)
04820     return false;
04821 
04822   return true;
04823 }
04824 
04825 static bool isVZIPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
04826   unsigned EltSz = VT.getVectorElementType().getSizeInBits();
04827   if (EltSz == 64)
04828     return false;
04829 
04830   unsigned NumElts = VT.getVectorNumElements();
04831   WhichResult = (M[0] == 0 ? 0 : 1);
04832   unsigned Idx = WhichResult * NumElts / 2;
04833   for (unsigned i = 0; i != NumElts; i += 2) {
04834     if ((M[i] >= 0 && (unsigned) M[i] != Idx) ||
04835         (M[i+1] >= 0 && (unsigned) M[i+1] != Idx + NumElts))
04836       return false;
04837     Idx += 1;
04838   }
04839 
04840   // VZIP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
04841   if (VT.is64BitVector() && EltSz == 32)
04842     return false;
04843 
04844   return true;
04845 }
04846 
04847 /// isVZIP_v_undef_Mask - Special case of isVZIPMask for canonical form of
04848 /// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
04849 /// Mask is e.g., <0, 0, 1, 1> instead of <0, 4, 1, 5>.
04850 static bool isVZIP_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult){
04851   unsigned EltSz = VT.getVectorElementType().getSizeInBits();
04852   if (EltSz == 64)
04853     return false;
04854 
04855   unsigned NumElts = VT.getVectorNumElements();
04856   WhichResult = (M[0] == 0 ? 0 : 1);
04857   unsigned Idx = WhichResult * NumElts / 2;
04858   for (unsigned i = 0; i != NumElts; i += 2) {
04859     if ((M[i] >= 0 && (unsigned) M[i] != Idx) ||
04860         (M[i+1] >= 0 && (unsigned) M[i+1] != Idx))
04861       return false;
04862     Idx += 1;
04863   }
04864 
04865   // VZIP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
04866   if (VT.is64BitVector() && EltSz == 32)
04867     return false;
04868 
04869   return true;
04870 }
04871 
04872 /// \return true if this is a reverse operation on an vector.
04873 static bool isReverseMask(ArrayRef<int> M, EVT VT) {
04874   unsigned NumElts = VT.getVectorNumElements();
04875   // Make sure the mask has the right size.
04876   if (NumElts != M.size())
04877       return false;
04878 
04879   // Look for <15, ..., 3, -1, 1, 0>.
04880   for (unsigned i = 0; i != NumElts; ++i)
04881     if (M[i] >= 0 && M[i] != (int) (NumElts - 1 - i))
04882       return false;
04883 
04884   return true;
04885 }
04886 
04887 // If N is an integer constant that can be moved into a register in one
04888 // instruction, return an SDValue of such a constant (will become a MOV
04889 // instruction).  Otherwise return null.
04890 static SDValue IsSingleInstrConstant(SDValue N, SelectionDAG &DAG,
04891                                      const ARMSubtarget *ST, SDLoc dl) {
04892   uint64_t Val;
04893   if (!isa<ConstantSDNode>(N))
04894     return SDValue();
04895   Val = cast<ConstantSDNode>(N)->getZExtValue();
04896 
04897   if (ST->isThumb1Only()) {
04898     if (Val <= 255 || ~Val <= 255)
04899       return DAG.getConstant(Val, MVT::i32);
04900   } else {
04901     if (ARM_AM::getSOImmVal(Val) != -1 || ARM_AM::getSOImmVal(~Val) != -1)
04902       return DAG.getConstant(Val, MVT::i32);
04903   }
04904   return SDValue();
04905 }
04906 
04907 // If this is a case we can't handle, return null and let the default
04908 // expansion code take care of it.
04909 SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
04910                                              const ARMSubtarget *ST) const {
04911   BuildVectorSDNode *BVN = cast<BuildVectorSDNode>(Op.getNode());
04912   SDLoc dl(Op);
04913   EVT VT = Op.getValueType();
04914 
04915   APInt SplatBits, SplatUndef;
04916   unsigned SplatBitSize;
04917   bool HasAnyUndefs;
04918   if (BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) {
04919     if (SplatBitSize <= 64) {
04920       // Check if an immediate VMOV works.
04921       EVT VmovVT;
04922       SDValue Val = isNEONModifiedImm(SplatBits.getZExtValue(),
04923                                       SplatUndef.getZExtValue(), SplatBitSize,
04924                                       DAG, VmovVT, VT.is128BitVector(),
04925                                       VMOVModImm);
04926       if (Val.getNode()) {
04927         SDValue Vmov = DAG.getNode(ARMISD::VMOVIMM, dl, VmovVT, Val);
04928         return DAG.getNode(ISD::BITCAST, dl, VT, Vmov);
04929       }
04930 
04931       // Try an immediate VMVN.
04932       uint64_t NegatedImm = (~SplatBits).getZExtValue();
04933       Val = isNEONModifiedImm(NegatedImm,
04934                                       SplatUndef.getZExtValue(), SplatBitSize,
04935                                       DAG, VmovVT, VT.is128BitVector(),
04936                                       VMVNModImm);
04937       if (Val.getNode()) {
04938         SDValue Vmov = DAG.getNode(ARMISD::VMVNIMM, dl, VmovVT, Val);
04939         return DAG.getNode(ISD::BITCAST, dl, VT, Vmov);
04940       }
04941 
04942       // Use vmov.f32 to materialize other v2f32 and v4f32 splats.
04943       if ((VT == MVT::v2f32 || VT == MVT::v4f32) && SplatBitSize == 32) {
04944         int ImmVal = ARM_AM::getFP32Imm(SplatBits);
04945         if (ImmVal != -1) {
04946           SDValue Val = DAG.getTargetConstant(ImmVal, MVT::i32);
04947           return DAG.getNode(ARMISD::VMOVFPIMM, dl, VT, Val);
04948         }
04949       }
04950     }
04951   }
04952 
04953   // Scan through the operands to see if only one value is used.
04954   //
04955   // As an optimisation, even if more than one value is used it may be more
04956   // profitable to splat with one value then change some lanes.
04957   //
04958   // Heuristically we decide to do this if the vector has a "dominant" value,
04959   // defined as splatted to more than half of the lanes.
04960   unsigned NumElts = VT.getVectorNumElements();
04961   bool isOnlyLowElement = true;
04962   bool usesOnlyOneValue = true;
04963   bool hasDominantValue = false;
04964   bool isConstant = true;
04965 
04966   // Map of the number of times a particular SDValue appears in the
04967   // element list.
04968   DenseMap<SDValue, unsigned> ValueCounts;
04969   SDValue Value;
04970   for (unsigned i = 0; i < NumElts; ++i) {
04971     SDValue V = Op.getOperand(i);
04972     if (V.getOpcode() == ISD::UNDEF)
04973       continue;
04974     if (i > 0)
04975       isOnlyLowElement = false;
04976     if (!isa<ConstantFPSDNode>(V) && !isa<ConstantSDNode>(V))
04977       isConstant = false;
04978 
04979     ValueCounts.insert(std::make_pair(V, 0));
04980     unsigned &Count = ValueCounts[V];
04981 
04982     // Is this value dominant? (takes up more than half of the lanes)
04983     if (++Count > (NumElts / 2)) {
04984       hasDominantValue = true;
04985       Value = V;
04986     }
04987   }
04988   if (ValueCounts.size() != 1)
04989     usesOnlyOneValue = false;
04990   if (!Value.getNode() && ValueCounts.size() > 0)
04991     Value = ValueCounts.begin()->first;
04992 
04993   if (ValueCounts.size() == 0)
04994     return DAG.getUNDEF(VT);
04995 
04996   // Loads are better lowered with insert_vector_elt/ARMISD::BUILD_VECTOR.
04997   // Keep going if we are hitting this case.
04998   if (isOnlyLowElement && !ISD::isNormalLoad(Value.getNode()))
04999     return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Value);
05000 
05001   unsigned EltSize = VT.getVectorElementType().getSizeInBits();
05002 
05003   // Use VDUP for non-constant splats.  For f32 constant splats, reduce to
05004   // i32 and try again.
05005   if (hasDominantValue && EltSize <= 32) {
05006     if (!isConstant) {
05007       SDValue N;
05008 
05009       // If we are VDUPing a value that comes directly from a vector, that will
05010       // cause an unnecessary move to and from a GPR, where instead we could
05011       // just use VDUPLANE. We can only do this if the lane being extracted
05012       // is at a constant index, as the VDUP from lane instructions only have
05013       // constant-index forms.
05014       if (Value->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
05015           isa<ConstantSDNode>(Value->getOperand(1))) {
05016         // We need to create a new undef vector to use for the VDUPLANE if the
05017         // size of the vector from which we get the value is different than the
05018         // size of the vector that we need to create. We will insert the element
05019         // such that the register coalescer will remove unnecessary copies.
05020         if (VT != Value->getOperand(0).getValueType()) {
05021           ConstantSDNode *constIndex;
05022           constIndex = dyn_cast<ConstantSDNode>(Value->getOperand(1));
05023           assert(constIndex && "The index is not a constant!");
05024           unsigned index = constIndex->getAPIntValue().getLimitedValue() %
05025                              VT.getVectorNumElements();
05026           N =  DAG.getNode(ARMISD::VDUPLANE, dl, VT,
05027                  DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, DAG.getUNDEF(VT),
05028                         Value, DAG.getConstant(index, MVT::i32)),
05029                            DAG.getConstant(index, MVT::i32));
05030         } else
05031           N = DAG.getNode(ARMISD::VDUPLANE, dl, VT,
05032                         Value->getOperand(0), Value->getOperand(1));
05033       } else
05034         N = DAG.getNode(ARMISD::VDUP, dl, VT, Value);
05035 
05036       if (!usesOnlyOneValue) {
05037         // The dominant value was splatted as 'N', but we now have to insert
05038         // all differing elements.
05039         for (unsigned I = 0; I < NumElts; ++I) {
05040           if (Op.getOperand(I) == Value)
05041             continue;
05042           SmallVector<SDValue, 3> Ops;
05043           Ops.push_back(N);
05044           Ops.push_back(Op.getOperand(I));
05045           Ops.push_back(DAG.getConstant(I, MVT::i32));
05046           N = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Ops);
05047         }
05048       }
05049       return N;
05050     }
05051     if (VT.getVectorElementType().isFloatingPoint()) {
05052       SmallVector<SDValue, 8> Ops;
05053       for (unsigned i = 0; i < NumElts; ++i)
05054         Ops.push_back(DAG.getNode(ISD::BITCAST, dl, MVT::i32,
05055                                   Op.getOperand(i)));
05056       EVT VecVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32, NumElts);
05057       SDValue Val = DAG.getNode(ISD::BUILD_VECTOR, dl, VecVT, Ops);
05058       Val = LowerBUILD_VECTOR(Val, DAG, ST);
05059       if (Val.getNode())
05060         return DAG.getNode(ISD::BITCAST, dl, VT, Val);
05061     }
05062     if (usesOnlyOneValue) {
05063       SDValue Val = IsSingleInstrConstant(Value, DAG, ST, dl);
05064       if (isConstant && Val.getNode())
05065         return DAG.getNode(ARMISD::VDUP, dl, VT, Val);
05066     }
05067   }
05068 
05069   // If all elements are constants and the case above didn't get hit, fall back
05070   // to the default expansion, which will generate a load from the constant
05071   // pool.
05072   if (isConstant)
05073     return SDValue();
05074 
05075   // Empirical tests suggest this is rarely worth it for vectors of length <= 2.
05076   if (NumElts >= 4) {
05077     SDValue shuffle = ReconstructShuffle(Op, DAG);
05078     if (shuffle != SDValue())
05079       return shuffle;
05080   }
05081 
05082   // Vectors with 32- or 64-bit elements can be built by directly assigning
05083   // the subregisters.  Lower it to an ARMISD::BUILD_VECTOR so the operands
05084   // will be legalized.
05085   if (EltSize >= 32) {
05086     // Do the expansion with floating-point types, since that is what the VFP
05087     // registers are defined to use, and since i64 is not legal.
05088     EVT EltVT = EVT::getFloatingPointVT(EltSize);
05089     EVT VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumElts);
05090     SmallVector<SDValue, 8> Ops;
05091     for (unsigned i = 0; i < NumElts; ++i)
05092       Ops.push_back(DAG.getNode(ISD::BITCAST, dl, EltVT, Op.getOperand(i)));
05093     SDValue Val = DAG.getNode(ARMISD::BUILD_VECTOR, dl, VecVT, Ops);
05094     return DAG.getNode(ISD::BITCAST, dl, VT, Val);
05095   }
05096 
05097   // If all else fails, just use a sequence of INSERT_VECTOR_ELT when we
05098   // know the default expansion would otherwise fall back on something even
05099   // worse. For a vector with one or two non-undef values, that's
05100   // scalar_to_vector for the elements followed by a shuffle (provided the
05101   // shuffle is valid for the target) and materialization element by element
05102   // on the stack followed by a load for everything else.
05103   if (!isConstant && !usesOnlyOneValue) {
05104     SDValue Vec = DAG.getUNDEF(VT);
05105     for (unsigned i = 0 ; i < NumElts; ++i) {
05106       SDValue V = Op.getOperand(i);
05107       if (V.getOpcode() == ISD::UNDEF)
05108         continue;
05109       SDValue LaneIdx = DAG.getConstant(i, MVT::i32);
05110       Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Vec, V, LaneIdx);
05111     }
05112     return Vec;
05113   }
05114 
05115   return SDValue();
05116 }
05117 
05118 // Gather data to see if the operation can be modelled as a
05119 // shuffle in combination with VEXTs.
05120 SDValue ARMTargetLowering::ReconstructShuffle(SDValue Op,
05121                                               SelectionDAG &DAG) const {
05122   SDLoc dl(Op);
05123   EVT VT = Op.getValueType();
05124   unsigned NumElts = VT.getVectorNumElements();
05125 
05126   SmallVector<SDValue, 2> SourceVecs;
05127   SmallVector<unsigned, 2> MinElts;
05128   SmallVector<unsigned, 2> MaxElts;
05129 
05130   for (unsigned i = 0; i < NumElts; ++i) {
05131     SDValue V = Op.getOperand(i);
05132     if (V.getOpcode() == ISD::UNDEF)
05133       continue;
05134     else if (V.getOpcode() != ISD::EXTRACT_VECTOR_ELT) {
05135       // A shuffle can only come from building a vector from various
05136       // elements of other vectors.
05137       return SDValue();
05138     } else if (V.getOperand(0).getValueType().getVectorElementType() !=
05139                VT.getVectorElementType()) {
05140       // This code doesn't know how to handle shuffles where the vector
05141       // element types do not match (this happens because type legalization
05142       // promotes the return type of EXTRACT_VECTOR_ELT).
05143       // FIXME: It might be appropriate to extend this code to handle
05144       // mismatched types.
05145       return SDValue();
05146     }
05147 
05148     // Record this extraction against the appropriate vector if possible...
05149     SDValue SourceVec = V.getOperand(0);
05150     // If the element number isn't a constant, we can't effectively
05151     // analyze what's going on.
05152     if (!isa<ConstantSDNode>(V.getOperand(1)))
05153       return SDValue();
05154     unsigned EltNo = cast<ConstantSDNode>(V.getOperand(1))->getZExtValue();
05155     bool FoundSource = false;
05156     for (unsigned j = 0; j < SourceVecs.size(); ++j) {
05157       if (SourceVecs[j] == SourceVec) {
05158         if (MinElts[j] > EltNo)
05159           MinElts[j] = EltNo;
05160         if (MaxElts[j] < EltNo)
05161           MaxElts[j] = EltNo;
05162         FoundSource = true;
05163         break;
05164       }
05165     }
05166 
05167     // Or record a new source if not...
05168     if (!FoundSource) {
05169       SourceVecs.push_back(SourceVec);
05170       MinElts.push_back(EltNo);
05171       MaxElts.push_back(EltNo);
05172     }
05173   }
05174 
05175   // Currently only do something sane when at most two source vectors
05176   // involved.
05177   if (SourceVecs.size() > 2)
05178     return SDValue();
05179 
05180   SDValue ShuffleSrcs[2] = {DAG.getUNDEF(VT), DAG.getUNDEF(VT) };
05181   int VEXTOffsets[2] = {0, 0};
05182 
05183   // This loop extracts the usage patterns of the source vectors
05184   // and prepares appropriate SDValues for a shuffle if possible.
05185   for (unsigned i = 0; i < SourceVecs.size(); ++i) {
05186     if (SourceVecs[i].getValueType() == VT) {
05187       // No VEXT necessary
05188       ShuffleSrcs[i] = SourceVecs[i];
05189       VEXTOffsets[i] = 0;
05190       continue;
05191     } else if (SourceVecs[i].getValueType().getVectorNumElements() < NumElts) {
05192       // It probably isn't worth padding out a smaller vector just to
05193       // break it down again in a shuffle.
05194       return SDValue();
05195     }
05196 
05197     // Since only 64-bit and 128-bit vectors are legal on ARM and
05198     // we've eliminated the other cases...
05199     assert(SourceVecs[i].getValueType().getVectorNumElements() == 2*NumElts &&
05200            "unexpected vector sizes in ReconstructShuffle");
05201 
05202     if (MaxElts[i] - MinElts[i] >= NumElts) {
05203       // Span too large for a VEXT to cope
05204       return SDValue();
05205     }
05206 
05207     if (MinElts[i] >= NumElts) {
05208       // The extraction can just take the second half
05209       VEXTOffsets[i] = NumElts;
05210       ShuffleSrcs[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT,
05211                                    SourceVecs[i],
05212                                    DAG.getIntPtrConstant(NumElts));
05213     } else if (MaxElts[i] < NumElts) {
05214       // The extraction can just take the first half
05215       VEXTOffsets[i] = 0;
05216       ShuffleSrcs[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT,
05217                                    SourceVecs[i],
05218                                    DAG.getIntPtrConstant(0));
05219     } else {
05220       // An actual VEXT is needed
05221       VEXTOffsets[i] = MinElts[i];
05222       SDValue VEXTSrc1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT,
05223                                      SourceVecs[i],
05224                                      DAG.getIntPtrConstant(0));
05225       SDValue VEXTSrc2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT,
05226                                      SourceVecs[i],
05227                                      DAG.getIntPtrConstant(NumElts));
05228       ShuffleSrcs[i] = DAG.getNode(ARMISD::VEXT, dl, VT, VEXTSrc1, VEXTSrc2,
05229                                    DAG.getConstant(VEXTOffsets[i], MVT::i32));
05230     }
05231   }
05232 
05233   SmallVector<int, 8> Mask;
05234 
05235   for (unsigned i = 0; i < NumElts; ++i) {
05236     SDValue Entry = Op.getOperand(i);
05237     if (Entry.getOpcode() == ISD::UNDEF) {
05238       Mask.push_back(-1);
05239       continue;
05240     }
05241 
05242     SDValue ExtractVec = Entry.getOperand(0);
05243     int ExtractElt = cast<ConstantSDNode>(Op.getOperand(i)
05244                                           .getOperand(1))->getSExtValue();
05245     if (ExtractVec == SourceVecs[0]) {
05246       Mask.push_back(ExtractElt - VEXTOffsets[0]);
05247     } else {
05248       Mask.push_back(ExtractElt + NumElts - VEXTOffsets[1]);
05249     }
05250   }
05251 
05252   // Final check before we try to produce nonsense...
05253   if (isShuffleMaskLegal(Mask, VT))
05254     return DAG.getVectorShuffle(VT, dl, ShuffleSrcs[0], ShuffleSrcs[1],
05255                                 &Mask[0]);
05256 
05257   return SDValue();
05258 }
05259 
05260 /// isShuffleMaskLegal - Targets can use this to indicate that they only
05261 /// support *some* VECTOR_SHUFFLE operations, those with specific masks.
05262 /// By default, if a target supports the VECTOR_SHUFFLE node, all mask values
05263 /// are assumed to be legal.
05264 bool
05265 ARMTargetLowering::isShuffleMaskLegal(const SmallVectorImpl<int> &M,
05266                                       EVT VT) const {
05267   if (VT.getVectorNumElements() == 4 &&
05268       (VT.is128BitVector() || VT.is64BitVector())) {
05269     unsigned PFIndexes[4];
05270     for (unsigned i = 0; i != 4; ++i) {
05271       if (M[i] < 0)
05272         PFIndexes[i] = 8;
05273       else
05274         PFIndexes[i] = M[i];
05275     }
05276 
05277     // Compute the index in the perfect shuffle table.
05278     unsigned PFTableIndex =
05279       PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3];
05280     unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
05281     unsigned Cost = (PFEntry >> 30);
05282 
05283     if (Cost <= 4)
05284       return true;
05285   }
05286 
05287   bool ReverseVEXT;
05288   unsigned Imm, WhichResult;
05289 
05290   unsigned EltSize = VT.getVectorElementType().getSizeInBits();
05291   return (EltSize >= 32 ||
05292           ShuffleVectorSDNode::isSplatMask(&M[0], VT) ||
05293           isVREVMask(M, VT, 64) ||
05294           isVREVMask(M, VT, 32) ||
05295           isVREVMask(M, VT, 16) ||
05296           isVEXTMask(M, VT, ReverseVEXT, Imm) ||
05297           isVTBLMask(M, VT) ||
05298           isVTRNMask(M, VT, WhichResult) ||
05299           isVUZPMask(M, VT, WhichResult) ||
05300           isVZIPMask(M, VT, WhichResult) ||
05301           isVTRN_v_undef_Mask(M, VT, WhichResult) ||
05302           isVUZP_v_undef_Mask(M, VT, WhichResult) ||
05303           isVZIP_v_undef_Mask(M, VT, WhichResult) ||
05304           ((VT == MVT::v8i16 || VT == MVT::v16i8) && isReverseMask(M, VT)));
05305 }
05306 
05307 /// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit
05308 /// the specified operations to build the shuffle.
05309 static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS,
05310                                       SDValue RHS, SelectionDAG &DAG,
05311                                       SDLoc dl) {
05312   unsigned OpNum = (PFEntry >> 26) & 0x0F;
05313   unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1);
05314   unsigned RHSID = (PFEntry >>  0) & ((1 << 13)-1);
05315 
05316   enum {
05317     OP_COPY = 0, // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3>
05318     OP_VREV,
05319     OP_VDUP0,
05320     OP_VDUP1,
05321     OP_VDUP2,
05322     OP_VDUP3,
05323     OP_VEXT1,
05324     OP_VEXT2,
05325     OP_VEXT3,
05326     OP_VUZPL, // VUZP, left result
05327     OP_VUZPR, // VUZP, right result
05328     OP_VZIPL, // VZIP, left result
05329     OP_VZIPR, // VZIP, right result
05330     OP_VTRNL, // VTRN, left result
05331     OP_VTRNR  // VTRN, right result
05332   };
05333 
05334   if (OpNum == OP_COPY) {
05335     if (LHSID == (1*9+2)*9+3) return LHS;
05336     assert(LHSID == ((4*9+5)*9+6)*9+7 && "Illegal OP_COPY!");
05337     return RHS;
05338   }
05339 
05340   SDValue OpLHS, OpRHS;
05341   OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG, dl);
05342   OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG, dl);
05343   EVT VT = OpLHS.getValueType();
05344 
05345   switch (OpNum) {
05346   default: llvm_unreachable("Unknown shuffle opcode!");
05347   case OP_VREV:
05348     // VREV divides the vector in half and swaps within the half.
05349     if (VT.getVectorElementType() == MVT::i32 ||
05350         VT.getVectorElementType() == MVT::f32)
05351       return DAG.getNode(ARMISD::VREV64, dl, VT, OpLHS);
05352     // vrev <4 x i16> -> VREV32
05353     if (VT.getVectorElementType() == MVT::i16)
05354       return DAG.getNode(ARMISD::VREV32, dl, VT, OpLHS);
05355     // vrev <4 x i8> -> VREV16
05356     assert(VT.getVectorElementType() == MVT::i8);
05357     return DAG.getNode(ARMISD::VREV16, dl, VT, OpLHS);
05358   case OP_VDUP0:
05359   case OP_VDUP1:
05360   case OP_VDUP2:
05361   case OP_VDUP3:
05362     return DAG.getNode(ARMISD::VDUPLANE, dl, VT,
05363                        OpLHS, DAG.getConstant(OpNum-OP_VDUP0, MVT::i32));
05364   case OP_VEXT1:
05365   case OP_VEXT2:
05366   case OP_VEXT3:
05367     return DAG.getNode(ARMISD::VEXT, dl, VT,
05368                        OpLHS, OpRHS,
05369                        DAG.getConstant(OpNum-OP_VEXT1+1, MVT::i32));
05370   case OP_VUZPL:
05371   case OP_VUZPR:
05372     return DAG.getNode(ARMISD::VUZP, dl, DAG.getVTList(VT, VT),
05373                        OpLHS, OpRHS).getValue(OpNum-OP_VUZPL);
05374   case OP_VZIPL:
05375   case OP_VZIPR:
05376     return DAG.getNode(ARMISD::VZIP, dl, DAG.getVTList(VT, VT),
05377                        OpLHS, OpRHS).getValue(OpNum-OP_VZIPL);
05378   case OP_VTRNL:
05379   case OP_VTRNR:
05380     return DAG.getNode(ARMISD::VTRN, dl, DAG.getVTList(VT, VT),
05381                        OpLHS, OpRHS).getValue(OpNum-OP_VTRNL);
05382   }
05383 }
05384 
05385 static SDValue LowerVECTOR_SHUFFLEv8i8(SDValue Op,
05386                                        ArrayRef<int> ShuffleMask,
05387                                        SelectionDAG &DAG) {
05388   // Check to see if we can use the VTBL instruction.
05389   SDValue V1 = Op.getOperand(0);
05390   SDValue V2 = Op.getOperand(1);
05391   SDLoc DL(Op);
05392 
05393   SmallVector<SDValue, 8> VTBLMask;
05394   for (ArrayRef<int>::iterator
05395          I = ShuffleMask.begin(), E = ShuffleMask.end(); I != E; ++I)
05396     VTBLMask.push_back(DAG.getConstant(*I, MVT::i32));
05397 
05398   if (V2.getNode()->getOpcode() == ISD::UNDEF)
05399     return DAG.getNode(ARMISD::VTBL1, DL, MVT::v8i8, V1,
05400                        DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v8i8, VTBLMask));
05401 
05402   return DAG.getNode(ARMISD::VTBL2, DL, MVT::v8i8, V1, V2,
05403                      DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v8i8, VTBLMask));
05404 }
05405 
05406 static SDValue LowerReverse_VECTOR_SHUFFLEv16i8_v8i16(SDValue Op,
05407                                                       SelectionDAG &DAG) {
05408   SDLoc DL(Op);
05409   SDValue OpLHS = Op.getOperand(0);
05410   EVT VT = OpLHS.getValueType();
05411 
05412   assert((VT == MVT::v8i16 || VT == MVT::v16i8) &&
05413          "Expect an v8i16/v16i8 type");
05414   OpLHS = DAG.getNode(ARMISD::VREV64, DL, VT, OpLHS);
05415   // For a v16i8 type: After the VREV, we have got <8, ...15, 8, ..., 0>. Now,
05416   // extract the first 8 bytes into the top double word and the last 8 bytes
05417   // into the bottom double word. The v8i16 case is similar.
05418   unsigned ExtractNum = (VT == MVT::v16i8) ? 8 : 4;
05419   return DAG.getNode(ARMISD::VEXT, DL, VT, OpLHS, OpLHS,
05420                      DAG.getConstant(ExtractNum, MVT::i32));
05421 }
05422 
05423 static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
05424   SDValue V1 = Op.getOperand(0);
05425   SDValue V2 = Op.getOperand(1);
05426   SDLoc dl(Op);
05427   EVT VT = Op.getValueType();
05428   ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op.getNode());
05429 
05430   // Convert shuffles that are directly supported on NEON to target-specific
05431   // DAG nodes, instead of keeping them as shuffles and matching them again
05432   // during code selection.  This is more efficient and avoids the possibility
05433   // of inconsistencies between legalization and selection.
05434   // FIXME: floating-point vectors should be canonicalized to integer vectors
05435   // of the same time so that they get CSEd properly.
05436   ArrayRef<int> ShuffleMask = SVN->getMask();
05437 
05438   unsigned EltSize = VT.getVectorElementType().getSizeInBits();
05439   if (EltSize <= 32) {
05440     if (ShuffleVectorSDNode::isSplatMask(&ShuffleMask[0], VT)) {
05441       int Lane = SVN->getSplatIndex();
05442       // If this is undef splat, generate it via "just" vdup, if possible.
05443       if (Lane == -1) Lane = 0;
05444 
05445       // Test if V1 is a SCALAR_TO_VECTOR.
05446       if (Lane == 0 && V1.getOpcode() == ISD::SCALAR_TO_VECTOR) {
05447         return DAG.getNode(ARMISD::VDUP, dl, VT, V1.getOperand(0));
05448       }
05449       // Test if V1 is a BUILD_VECTOR which is equivalent to a SCALAR_TO_VECTOR
05450       // (and probably will turn into a SCALAR_TO_VECTOR once legalization
05451       // reaches it).
05452       if (Lane == 0 && V1.getOpcode() == ISD::BUILD_VECTOR &&
05453           !isa<ConstantSDNode>(V1.getOperand(0))) {
05454         bool IsScalarToVector = true;
05455         for (unsigned i = 1, e = V1.getNumOperands(); i != e; ++i)
05456           if (V1.getOperand(i).getOpcode() != ISD::UNDEF) {
05457             IsScalarToVector = false;
05458             break;
05459           }
05460         if (IsScalarToVector)
05461           return DAG.getNode(ARMISD::VDUP, dl, VT, V1.getOperand(0));
05462       }
05463       return DAG.getNode(ARMISD::VDUPLANE, dl, VT, V1,
05464                          DAG.getConstant(Lane, MVT::i32));
05465     }
05466 
05467     bool ReverseVEXT;
05468     unsigned Imm;
05469     if (isVEXTMask(ShuffleMask, VT, ReverseVEXT, Imm)) {
05470       if (ReverseVEXT)
05471         std::swap(V1, V2);
05472       return DAG.getNode(ARMISD::VEXT, dl, VT, V1, V2,
05473                          DAG.getConstant(Imm, MVT::i32));
05474     }
05475 
05476     if (isVREVMask(ShuffleMask, VT, 64))
05477       return DAG.getNode(ARMISD::VREV64, dl, VT, V1);
05478     if (isVREVMask(ShuffleMask, VT, 32))
05479       return DAG.getNode(ARMISD::VREV32, dl, VT, V1);
05480     if (isVREVMask(ShuffleMask, VT, 16))
05481       return DAG.getNode(ARMISD::VREV16, dl, VT, V1);
05482 
05483     if (V2->getOpcode() == ISD::UNDEF &&
05484         isSingletonVEXTMask(ShuffleMask, VT, Imm)) {
05485       return DAG.getNode(ARMISD::VEXT, dl, VT, V1, V1,
05486                          DAG.getConstant(Imm, MVT::i32));
05487     }
05488 
05489     // Check for Neon shuffles that modify both input vectors in place.
05490     // If both results are used, i.e., if there are two shuffles with the same
05491     // source operands and with masks corresponding to both results of one of
05492     // these operations, DAG memoization will ensure that a single node is
05493     // used for both shuffles.
05494     unsigned WhichResult;
05495     if (isVTRNMask(ShuffleMask, VT, WhichResult))
05496       return DAG.getNode(ARMISD::VTRN, dl, DAG.getVTList(VT, VT),
05497                          V1, V2).getValue(WhichResult);
05498     if (isVUZPMask(ShuffleMask, VT, WhichResult))
05499       return DAG.getNode(ARMISD::VUZP, dl, DAG.getVTList(VT, VT),
05500                          V1, V2).getValue(WhichResult);
05501     if (isVZIPMask(ShuffleMask, VT, WhichResult))
05502       return DAG.getNode(ARMISD::VZIP, dl, DAG.getVTList(VT, VT),
05503                          V1, V2).getValue(WhichResult);
05504 
05505     if (isVTRN_v_undef_Mask(ShuffleMask, VT, WhichResult))
05506       return DAG.getNode(ARMISD::VTRN, dl, DAG.getVTList(VT, VT),
05507                          V1, V1).getValue(WhichResult);
05508     if (isVUZP_v_undef_Mask(ShuffleMask, VT, WhichResult))
05509       return DAG.getNode(ARMISD::VUZP, dl, DAG.getVTList(VT, VT),
05510                          V1, V1).getValue(WhichResult);
05511     if (isVZIP_v_undef_Mask(ShuffleMask, VT, WhichResult))
05512       return DAG.getNode(ARMISD::VZIP, dl, DAG.getVTList(VT, VT),
05513                          V1, V1).getValue(WhichResult);
05514   }
05515 
05516   // If the shuffle is not directly supported and it has 4 elements, use
05517   // the PerfectShuffle-generated table to synthesize it from other shuffles.
05518   unsigned NumElts = VT.getVectorNumElements();
05519   if (NumElts == 4) {
05520     unsigned PFIndexes[4];
05521     for (unsigned i = 0; i != 4; ++i) {
05522       if (ShuffleMask[i] < 0)
05523         PFIndexes[i] = 8;
05524       else
05525         PFIndexes[i] = ShuffleMask[i];
05526     }
05527 
05528     // Compute the index in the perfect shuffle table.
05529     unsigned PFTableIndex =
05530       PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3];
05531     unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
05532     unsigned Cost = (PFEntry >> 30);
05533 
05534     if (Cost <= 4)
05535       return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl);
05536   }
05537 
05538   // Implement shuffles with 32- or 64-bit elements as ARMISD::BUILD_VECTORs.
05539   if (EltSize >= 32) {
05540     // Do the expansion with floating-point types, since that is what the VFP
05541     // registers are defined to use, and since i64 is not legal.
05542     EVT EltVT = EVT::getFloatingPointVT(EltSize);
05543     EVT VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumElts);
05544     V1 = DAG.getNode(ISD::BITCAST, dl, VecVT, V1);
05545     V2 = DAG.getNode(ISD::BITCAST, dl, VecVT, V2);
05546     SmallVector<SDValue, 8> Ops;
05547     for (unsigned i = 0; i < NumElts; ++i) {
05548       if (ShuffleMask[i] < 0)
05549         Ops.push_back(DAG.getUNDEF(EltVT));
05550       else
05551         Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT,
05552                                   ShuffleMask[i] < (int)NumElts ? V1 : V2,
05553                                   DAG.getConstant(ShuffleMask[i] & (NumElts-1),
05554                                                   MVT::i32)));
05555     }
05556     SDValue Val = DAG.getNode(ARMISD::BUILD_VECTOR, dl, VecVT, Ops);
05557     return DAG.getNode(ISD::BITCAST, dl, VT, Val);
05558   }
05559 
05560   if ((VT == MVT::v8i16 || VT == MVT::v16i8) && isReverseMask(ShuffleMask, VT))
05561     return LowerReverse_VECTOR_SHUFFLEv16i8_v8i16(Op, DAG);
05562 
05563   if (VT == MVT::v8i8) {
05564     SDValue NewOp = LowerVECTOR_SHUFFLEv8i8(Op, ShuffleMask, DAG);
05565     if (NewOp.getNode())
05566       return NewOp;
05567   }
05568 
05569   return SDValue();