LLVM API Documentation

ARMISelLowering.cpp
Go to the documentation of this file.
00001 //===-- ARMISelLowering.cpp - ARM DAG Lowering Implementation -------------===//
00002 //
00003 //                     The LLVM Compiler Infrastructure
00004 //
00005 // This file is distributed under the University of Illinois Open Source
00006 // License. See LICENSE.TXT for details.
00007 //
00008 //===----------------------------------------------------------------------===//
00009 //
00010 // This file defines the interfaces that ARM uses to lower LLVM code into a
00011 // selection DAG.
00012 //
00013 //===----------------------------------------------------------------------===//
00014 
00015 #define DEBUG_TYPE "arm-isel"
00016 #include "ARMISelLowering.h"
00017 #include "ARMCallingConv.h"
00018 #include "ARMConstantPoolValue.h"
00019 #include "ARMMachineFunctionInfo.h"
00020 #include "ARMPerfectShuffle.h"
00021 #include "ARMSubtarget.h"
00022 #include "ARMTargetMachine.h"
00023 #include "ARMTargetObjectFile.h"
00024 #include "MCTargetDesc/ARMAddressingModes.h"
00025 #include "llvm/ADT/Statistic.h"
00026 #include "llvm/ADT/StringExtras.h"
00027 #include "llvm/CodeGen/CallingConvLower.h"
00028 #include "llvm/CodeGen/IntrinsicLowering.h"
00029 #include "llvm/CodeGen/MachineBasicBlock.h"
00030 #include "llvm/CodeGen/MachineFrameInfo.h"
00031 #include "llvm/CodeGen/MachineFunction.h"
00032 #include "llvm/CodeGen/MachineInstrBuilder.h"
00033 #include "llvm/CodeGen/MachineModuleInfo.h"
00034 #include "llvm/CodeGen/MachineRegisterInfo.h"
00035 #include "llvm/CodeGen/SelectionDAG.h"
00036 #include "llvm/IR/CallingConv.h"
00037 #include "llvm/IR/Constants.h"
00038 #include "llvm/IR/Function.h"
00039 #include "llvm/IR/GlobalValue.h"
00040 #include "llvm/IR/Instruction.h"
00041 #include "llvm/IR/Instructions.h"
00042 #include "llvm/IR/Intrinsics.h"
00043 #include "llvm/IR/Type.h"
00044 #include "llvm/MC/MCSectionMachO.h"
00045 #include "llvm/Support/CommandLine.h"
00046 #include "llvm/Support/ErrorHandling.h"
00047 #include "llvm/Support/MathExtras.h"
00048 #include "llvm/Target/TargetOptions.h"
00049 #include <utility>
00050 using namespace llvm;
00051 
00052 STATISTIC(NumTailCalls, "Number of tail calls");
00053 STATISTIC(NumMovwMovt, "Number of GAs materialized with movw + movt");
00054 STATISTIC(NumLoopByVals, "Number of loops generated for byval arguments");
00055 
00056 cl::opt<bool>
00057 EnableARMLongCalls("arm-long-calls", cl::Hidden,
00058   cl::desc("Generate calls via indirect call instructions"),
00059   cl::init(false));
00060 
00061 static cl::opt<bool>
00062 ARMInterworking("arm-interworking", cl::Hidden,
00063   cl::desc("Enable / disable ARM interworking (for debugging only)"),
00064   cl::init(true));
00065 
00066 namespace {
00067   class ARMCCState : public CCState {
00068   public:
00069     ARMCCState(CallingConv::ID CC, bool isVarArg, MachineFunction &MF,
00070                const TargetMachine &TM, SmallVectorImpl<CCValAssign> &locs,
00071                LLVMContext &C, ParmContext PC)
00072         : CCState(CC, isVarArg, MF, TM, locs, C) {
00073       assert(((PC == Call) || (PC == Prologue)) &&
00074              "ARMCCState users must specify whether their context is call"
00075              "or prologue generation.");
00076       CallOrPrologue = PC;
00077     }
00078   };
00079 }
00080 
00081 // The APCS parameter registers.
00082 static const MCPhysReg GPRArgRegs[] = {
00083   ARM::R0, ARM::R1, ARM::R2, ARM::R3
00084 };
00085 
00086 void ARMTargetLowering::addTypeForNEON(MVT VT, MVT PromotedLdStVT,
00087                                        MVT PromotedBitwiseVT) {
00088   if (VT != PromotedLdStVT) {
00089     setOperationAction(ISD::LOAD, VT, Promote);
00090     AddPromotedToType (ISD::LOAD, VT, PromotedLdStVT);
00091 
00092     setOperationAction(ISD::STORE, VT, Promote);
00093     AddPromotedToType (ISD::STORE, VT, PromotedLdStVT);
00094   }
00095 
00096   MVT ElemTy = VT.getVectorElementType();
00097   if (ElemTy != MVT::i64 && ElemTy != MVT::f64)
00098     setOperationAction(ISD::SETCC, VT, Custom);
00099   setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
00100   setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
00101   if (ElemTy == MVT::i32) {
00102     setOperationAction(ISD::SINT_TO_FP, VT, Custom);
00103     setOperationAction(ISD::UINT_TO_FP, VT, Custom);
00104     setOperationAction(ISD::FP_TO_SINT, VT, Custom);
00105     setOperationAction(ISD::FP_TO_UINT, VT, Custom);
00106   } else {
00107     setOperationAction(ISD::SINT_TO_FP, VT, Expand);
00108     setOperationAction(ISD::UINT_TO_FP, VT, Expand);
00109     setOperationAction(ISD::FP_TO_SINT, VT, Expand);
00110     setOperationAction(ISD::FP_TO_UINT, VT, Expand);
00111   }
00112   setOperationAction(ISD::BUILD_VECTOR,      VT, Custom);
00113   setOperationAction(ISD::VECTOR_SHUFFLE,    VT, Custom);
00114   setOperationAction(ISD::CONCAT_VECTORS,    VT, Legal);
00115   setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Legal);
00116   setOperationAction(ISD::SELECT,            VT, Expand);
00117   setOperationAction(ISD::SELECT_CC,         VT, Expand);
00118   setOperationAction(ISD::VSELECT,           VT, Expand);
00119   setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand);
00120   if (VT.isInteger()) {
00121     setOperationAction(ISD::SHL, VT, Custom);
00122     setOperationAction(ISD::SRA, VT, Custom);
00123     setOperationAction(ISD::SRL, VT, Custom);
00124   }
00125 
00126   // Promote all bit-wise operations.
00127   if (VT.isInteger() && VT != PromotedBitwiseVT) {
00128     setOperationAction(ISD::AND, VT, Promote);
00129     AddPromotedToType (ISD::AND, VT, PromotedBitwiseVT);
00130     setOperationAction(ISD::OR,  VT, Promote);
00131     AddPromotedToType (ISD::OR,  VT, PromotedBitwiseVT);
00132     setOperationAction(ISD::XOR, VT, Promote);
00133     AddPromotedToType (ISD::XOR, VT, PromotedBitwiseVT);
00134   }
00135 
00136   // Neon does not support vector divide/remainder operations.
00137   setOperationAction(ISD::SDIV, VT, Expand);
00138   setOperationAction(ISD::UDIV, VT, Expand);
00139   setOperationAction(ISD::FDIV, VT, Expand);
00140   setOperationAction(ISD::SREM, VT, Expand);
00141   setOperationAction(ISD::UREM, VT, Expand);
00142   setOperationAction(ISD::FREM, VT, Expand);
00143 }
00144 
00145 void ARMTargetLowering::addDRTypeForNEON(MVT VT) {
00146   addRegisterClass(VT, &ARM::DPRRegClass);
00147   addTypeForNEON(VT, MVT::f64, MVT::v2i32);
00148 }
00149 
00150 void ARMTargetLowering::addQRTypeForNEON(MVT VT) {
00151   addRegisterClass(VT, &ARM::DPairRegClass);
00152   addTypeForNEON(VT, MVT::v2f64, MVT::v4i32);
00153 }
00154 
00155 static TargetLoweringObjectFile *createTLOF(TargetMachine &TM) {
00156   if (TM.getSubtarget<ARMSubtarget>().isTargetMachO())
00157     return new TargetLoweringObjectFileMachO();
00158 
00159   return new ARMElfTargetObjectFile();
00160 }
00161 
00162 ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
00163     : TargetLowering(TM, createTLOF(TM)) {
00164   Subtarget = &TM.getSubtarget<ARMSubtarget>();
00165   RegInfo = TM.getRegisterInfo();
00166   Itins = TM.getInstrItineraryData();
00167 
00168   setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
00169 
00170   if (Subtarget->isTargetMachO()) {
00171     // Uses VFP for Thumb libfuncs if available.
00172     if (Subtarget->isThumb() && Subtarget->hasVFP2() &&
00173         Subtarget->hasARMOps()) {
00174       // Single-precision floating-point arithmetic.
00175       setLibcallName(RTLIB::ADD_F32, "__addsf3vfp");
00176       setLibcallName(RTLIB::SUB_F32, "__subsf3vfp");
00177       setLibcallName(RTLIB::MUL_F32, "__mulsf3vfp");
00178       setLibcallName(RTLIB::DIV_F32, "__divsf3vfp");
00179 
00180       // Double-precision floating-point arithmetic.
00181       setLibcallName(RTLIB::ADD_F64, "__adddf3vfp");
00182       setLibcallName(RTLIB::SUB_F64, "__subdf3vfp");
00183       setLibcallName(RTLIB::MUL_F64, "__muldf3vfp");
00184       setLibcallName(RTLIB::DIV_F64, "__divdf3vfp");
00185 
00186       // Single-precision comparisons.
00187       setLibcallName(RTLIB::OEQ_F32, "__eqsf2vfp");
00188       setLibcallName(RTLIB::UNE_F32, "__nesf2vfp");
00189       setLibcallName(RTLIB::OLT_F32, "__ltsf2vfp");
00190       setLibcallName(RTLIB::OLE_F32, "__lesf2vfp");
00191       setLibcallName(RTLIB::OGE_F32, "__gesf2vfp");
00192       setLibcallName(RTLIB::OGT_F32, "__gtsf2vfp");
00193       setLibcallName(RTLIB::UO_F32,  "__unordsf2vfp");
00194       setLibcallName(RTLIB::O_F32,   "__unordsf2vfp");
00195 
00196       setCmpLibcallCC(RTLIB::OEQ_F32, ISD::SETNE);
00197       setCmpLibcallCC(RTLIB::UNE_F32, ISD::SETNE);
00198       setCmpLibcallCC(RTLIB::OLT_F32, ISD::SETNE);
00199       setCmpLibcallCC(RTLIB::OLE_F32, ISD::SETNE);
00200       setCmpLibcallCC(RTLIB::OGE_F32, ISD::SETNE);
00201       setCmpLibcallCC(RTLIB::OGT_F32, ISD::SETNE);
00202       setCmpLibcallCC(RTLIB::UO_F32,  ISD::SETNE);
00203       setCmpLibcallCC(RTLIB::O_F32,   ISD::SETEQ);
00204 
00205       // Double-precision comparisons.
00206       setLibcallName(RTLIB::OEQ_F64, "__eqdf2vfp");
00207       setLibcallName(RTLIB::UNE_F64, "__nedf2vfp");
00208       setLibcallName(RTLIB::OLT_F64, "__ltdf2vfp");
00209       setLibcallName(RTLIB::OLE_F64, "__ledf2vfp");
00210       setLibcallName(RTLIB::OGE_F64, "__gedf2vfp");
00211       setLibcallName(RTLIB::OGT_F64, "__gtdf2vfp");
00212       setLibcallName(RTLIB::UO_F64,  "__unorddf2vfp");
00213       setLibcallName(RTLIB::O_F64,   "__unorddf2vfp");
00214 
00215       setCmpLibcallCC(RTLIB::OEQ_F64, ISD::SETNE);
00216       setCmpLibcallCC(RTLIB::UNE_F64, ISD::SETNE);
00217       setCmpLibcallCC(RTLIB::OLT_F64, ISD::SETNE);
00218       setCmpLibcallCC(RTLIB::OLE_F64, ISD::SETNE);
00219       setCmpLibcallCC(RTLIB::OGE_F64, ISD::SETNE);
00220       setCmpLibcallCC(RTLIB::OGT_F64, ISD::SETNE);
00221       setCmpLibcallCC(RTLIB::UO_F64,  ISD::SETNE);
00222       setCmpLibcallCC(RTLIB::O_F64,   ISD::SETEQ);
00223 
00224       // Floating-point to integer conversions.
00225       // i64 conversions are done via library routines even when generating VFP
00226       // instructions, so use the same ones.
00227       setLibcallName(RTLIB::FPTOSINT_F64_I32, "__fixdfsivfp");
00228       setLibcallName(RTLIB::FPTOUINT_F64_I32, "__fixunsdfsivfp");
00229       setLibcallName(RTLIB::FPTOSINT_F32_I32, "__fixsfsivfp");
00230       setLibcallName(RTLIB::FPTOUINT_F32_I32, "__fixunssfsivfp");
00231 
00232       // Conversions between floating types.
00233       setLibcallName(RTLIB::FPROUND_F64_F32, "__truncdfsf2vfp");
00234       setLibcallName(RTLIB::FPEXT_F32_F64,   "__extendsfdf2vfp");
00235 
00236       // Integer to floating-point conversions.
00237       // i64 conversions are done via library routines even when generating VFP
00238       // instructions, so use the same ones.
00239       // FIXME: There appears to be some naming inconsistency in ARM libgcc:
00240       // e.g., __floatunsidf vs. __floatunssidfvfp.
00241       setLibcallName(RTLIB::SINTTOFP_I32_F64, "__floatsidfvfp");
00242       setLibcallName(RTLIB::UINTTOFP_I32_F64, "__floatunssidfvfp");
00243       setLibcallName(RTLIB::SINTTOFP_I32_F32, "__floatsisfvfp");
00244       setLibcallName(RTLIB::UINTTOFP_I32_F32, "__floatunssisfvfp");
00245     }
00246   }
00247 
00248   // These libcalls are not available in 32-bit.
00249   setLibcallName(RTLIB::SHL_I128, 0);
00250   setLibcallName(RTLIB::SRL_I128, 0);
00251   setLibcallName(RTLIB::SRA_I128, 0);
00252 
00253   if (Subtarget->isAAPCS_ABI() && !Subtarget->isTargetMachO() &&
00254       !Subtarget->isTargetWindows()) {
00255     // Double-precision floating-point arithmetic helper functions
00256     // RTABI chapter 4.1.2, Table 2
00257     setLibcallName(RTLIB::ADD_F64, "__aeabi_dadd");
00258     setLibcallName(RTLIB::DIV_F64, "__aeabi_ddiv");
00259     setLibcallName(RTLIB::MUL_F64, "__aeabi_dmul");
00260     setLibcallName(RTLIB::SUB_F64, "__aeabi_dsub");
00261     setLibcallCallingConv(RTLIB::ADD_F64, CallingConv::ARM_AAPCS);
00262     setLibcallCallingConv(RTLIB::DIV_F64, CallingConv::ARM_AAPCS);
00263     setLibcallCallingConv(RTLIB::MUL_F64, CallingConv::ARM_AAPCS);
00264     setLibcallCallingConv(RTLIB::SUB_F64, CallingConv::ARM_AAPCS);
00265 
00266     // Double-precision floating-point comparison helper functions
00267     // RTABI chapter 4.1.2, Table 3
00268     setLibcallName(RTLIB::OEQ_F64, "__aeabi_dcmpeq");
00269     setCmpLibcallCC(RTLIB::OEQ_F64, ISD::SETNE);
00270     setLibcallName(RTLIB::UNE_F64, "__aeabi_dcmpeq");
00271     setCmpLibcallCC(RTLIB::UNE_F64, ISD::SETEQ);
00272     setLibcallName(RTLIB::OLT_F64, "__aeabi_dcmplt");
00273     setCmpLibcallCC(RTLIB::OLT_F64, ISD::SETNE);
00274     setLibcallName(RTLIB::OLE_F64, "__aeabi_dcmple");
00275     setCmpLibcallCC(RTLIB::OLE_F64, ISD::SETNE);
00276     setLibcallName(RTLIB::OGE_F64, "__aeabi_dcmpge");
00277     setCmpLibcallCC(RTLIB::OGE_F64, ISD::SETNE);
00278     setLibcallName(RTLIB::OGT_F64, "__aeabi_dcmpgt");
00279     setCmpLibcallCC(RTLIB::OGT_F64, ISD::SETNE);
00280     setLibcallName(RTLIB::UO_F64,  "__aeabi_dcmpun");
00281     setCmpLibcallCC(RTLIB::UO_F64,  ISD::SETNE);
00282     setLibcallName(RTLIB::O_F64,   "__aeabi_dcmpun");
00283     setCmpLibcallCC(RTLIB::O_F64,   ISD::SETEQ);
00284     setLibcallCallingConv(RTLIB::OEQ_F64, CallingConv::ARM_AAPCS);
00285     setLibcallCallingConv(RTLIB::UNE_F64, CallingConv::ARM_AAPCS);
00286     setLibcallCallingConv(RTLIB::OLT_F64, CallingConv::ARM_AAPCS);
00287     setLibcallCallingConv(RTLIB::OLE_F64, CallingConv::ARM_AAPCS);
00288     setLibcallCallingConv(RTLIB::OGE_F64, CallingConv::ARM_AAPCS);
00289     setLibcallCallingConv(RTLIB::OGT_F64, CallingConv::ARM_AAPCS);
00290     setLibcallCallingConv(RTLIB::UO_F64, CallingConv::ARM_AAPCS);
00291     setLibcallCallingConv(RTLIB::O_F64, CallingConv::ARM_AAPCS);
00292 
00293     // Single-precision floating-point arithmetic helper functions
00294     // RTABI chapter 4.1.2, Table 4
00295     setLibcallName(RTLIB::ADD_F32, "__aeabi_fadd");
00296     setLibcallName(RTLIB::DIV_F32, "__aeabi_fdiv");
00297     setLibcallName(RTLIB::MUL_F32, "__aeabi_fmul");
00298     setLibcallName(RTLIB::SUB_F32, "__aeabi_fsub");
00299     setLibcallCallingConv(RTLIB::ADD_F32, CallingConv::ARM_AAPCS);
00300     setLibcallCallingConv(RTLIB::DIV_F32, CallingConv::ARM_AAPCS);
00301     setLibcallCallingConv(RTLIB::MUL_F32, CallingConv::ARM_AAPCS);
00302     setLibcallCallingConv(RTLIB::SUB_F32, CallingConv::ARM_AAPCS);
00303 
00304     // Single-precision floating-point comparison helper functions
00305     // RTABI chapter 4.1.2, Table 5
00306     setLibcallName(RTLIB::OEQ_F32, "__aeabi_fcmpeq");
00307     setCmpLibcallCC(RTLIB::OEQ_F32, ISD::SETNE);
00308     setLibcallName(RTLIB::UNE_F32, "__aeabi_fcmpeq");
00309     setCmpLibcallCC(RTLIB::UNE_F32, ISD::SETEQ);
00310     setLibcallName(RTLIB::OLT_F32, "__aeabi_fcmplt");
00311     setCmpLibcallCC(RTLIB::OLT_F32, ISD::SETNE);
00312     setLibcallName(RTLIB::OLE_F32, "__aeabi_fcmple");
00313     setCmpLibcallCC(RTLIB::OLE_F32, ISD::SETNE);
00314     setLibcallName(RTLIB::OGE_F32, "__aeabi_fcmpge");
00315     setCmpLibcallCC(RTLIB::OGE_F32, ISD::SETNE);
00316     setLibcallName(RTLIB::OGT_F32, "__aeabi_fcmpgt");
00317     setCmpLibcallCC(RTLIB::OGT_F32, ISD::SETNE);
00318     setLibcallName(RTLIB::UO_F32,  "__aeabi_fcmpun");
00319     setCmpLibcallCC(RTLIB::UO_F32,  ISD::SETNE);
00320     setLibcallName(RTLIB::O_F32,   "__aeabi_fcmpun");
00321     setCmpLibcallCC(RTLIB::O_F32,   ISD::SETEQ);
00322     setLibcallCallingConv(RTLIB::OEQ_F32, CallingConv::ARM_AAPCS);
00323     setLibcallCallingConv(RTLIB::UNE_F32, CallingConv::ARM_AAPCS);
00324     setLibcallCallingConv(RTLIB::OLT_F32, CallingConv::ARM_AAPCS);
00325     setLibcallCallingConv(RTLIB::OLE_F32, CallingConv::ARM_AAPCS);
00326     setLibcallCallingConv(RTLIB::OGE_F32, CallingConv::ARM_AAPCS);
00327     setLibcallCallingConv(RTLIB::OGT_F32, CallingConv::ARM_AAPCS);
00328     setLibcallCallingConv(RTLIB::UO_F32, CallingConv::ARM_AAPCS);
00329     setLibcallCallingConv(RTLIB::O_F32, CallingConv::ARM_AAPCS);
00330 
00331     // Floating-point to integer conversions.
00332     // RTABI chapter 4.1.2, Table 6
00333     setLibcallName(RTLIB::FPTOSINT_F64_I32, "__aeabi_d2iz");
00334     setLibcallName(RTLIB::FPTOUINT_F64_I32, "__aeabi_d2uiz");
00335     setLibcallName(RTLIB::FPTOSINT_F64_I64, "__aeabi_d2lz");
00336     setLibcallName(RTLIB::FPTOUINT_F64_I64, "__aeabi_d2ulz");
00337     setLibcallName(RTLIB::FPTOSINT_F32_I32, "__aeabi_f2iz");
00338     setLibcallName(RTLIB::FPTOUINT_F32_I32, "__aeabi_f2uiz");
00339     setLibcallName(RTLIB::FPTOSINT_F32_I64, "__aeabi_f2lz");
00340     setLibcallName(RTLIB::FPTOUINT_F32_I64, "__aeabi_f2ulz");
00341     setLibcallCallingConv(RTLIB::FPTOSINT_F64_I32, CallingConv::ARM_AAPCS);
00342     setLibcallCallingConv(RTLIB::FPTOUINT_F64_I32, CallingConv::ARM_AAPCS);
00343     setLibcallCallingConv(RTLIB::FPTOSINT_F64_I64, CallingConv::ARM_AAPCS);
00344     setLibcallCallingConv(RTLIB::FPTOUINT_F64_I64, CallingConv::ARM_AAPCS);
00345     setLibcallCallingConv(RTLIB::FPTOSINT_F32_I32, CallingConv::ARM_AAPCS);
00346     setLibcallCallingConv(RTLIB::FPTOUINT_F32_I32, CallingConv::ARM_AAPCS);
00347     setLibcallCallingConv(RTLIB::FPTOSINT_F32_I64, CallingConv::ARM_AAPCS);
00348     setLibcallCallingConv(RTLIB::FPTOUINT_F32_I64, CallingConv::ARM_AAPCS);
00349 
00350     // Conversions between floating types.
00351     // RTABI chapter 4.1.2, Table 7
00352     setLibcallName(RTLIB::FPROUND_F64_F32, "__aeabi_d2f");
00353     setLibcallName(RTLIB::FPEXT_F32_F64,   "__aeabi_f2d");
00354     setLibcallCallingConv(RTLIB::FPROUND_F64_F32, CallingConv::ARM_AAPCS);
00355     setLibcallCallingConv(RTLIB::FPEXT_F32_F64, CallingConv::ARM_AAPCS);
00356 
00357     // Integer to floating-point conversions.
00358     // RTABI chapter 4.1.2, Table 8
00359     setLibcallName(RTLIB::SINTTOFP_I32_F64, "__aeabi_i2d");
00360     setLibcallName(RTLIB::UINTTOFP_I32_F64, "__aeabi_ui2d");
00361     setLibcallName(RTLIB::SINTTOFP_I64_F64, "__aeabi_l2d");
00362     setLibcallName(RTLIB::UINTTOFP_I64_F64, "__aeabi_ul2d");
00363     setLibcallName(RTLIB::SINTTOFP_I32_F32, "__aeabi_i2f");
00364     setLibcallName(RTLIB::UINTTOFP_I32_F32, "__aeabi_ui2f");
00365     setLibcallName(RTLIB::SINTTOFP_I64_F32, "__aeabi_l2f");
00366     setLibcallName(RTLIB::UINTTOFP_I64_F32, "__aeabi_ul2f");
00367     setLibcallCallingConv(RTLIB::SINTTOFP_I32_F64, CallingConv::ARM_AAPCS);
00368     setLibcallCallingConv(RTLIB::UINTTOFP_I32_F64, CallingConv::ARM_AAPCS);
00369     setLibcallCallingConv(RTLIB::SINTTOFP_I64_F64, CallingConv::ARM_AAPCS);
00370     setLibcallCallingConv(RTLIB::UINTTOFP_I64_F64, CallingConv::ARM_AAPCS);
00371     setLibcallCallingConv(RTLIB::SINTTOFP_I32_F32, CallingConv::ARM_AAPCS);
00372     setLibcallCallingConv(RTLIB::UINTTOFP_I32_F32, CallingConv::ARM_AAPCS);
00373     setLibcallCallingConv(RTLIB::SINTTOFP_I64_F32, CallingConv::ARM_AAPCS);
00374     setLibcallCallingConv(RTLIB::UINTTOFP_I64_F32, CallingConv::ARM_AAPCS);
00375 
00376     // Long long helper functions
00377     // RTABI chapter 4.2, Table 9
00378     setLibcallName(RTLIB::MUL_I64,  "__aeabi_lmul");
00379     setLibcallName(RTLIB::SHL_I64, "__aeabi_llsl");
00380     setLibcallName(RTLIB::SRL_I64, "__aeabi_llsr");
00381     setLibcallName(RTLIB::SRA_I64, "__aeabi_lasr");
00382     setLibcallCallingConv(RTLIB::MUL_I64, CallingConv::ARM_AAPCS);
00383     setLibcallCallingConv(RTLIB::SDIV_I64, CallingConv::ARM_AAPCS);
00384     setLibcallCallingConv(RTLIB::UDIV_I64, CallingConv::ARM_AAPCS);
00385     setLibcallCallingConv(RTLIB::SHL_I64, CallingConv::ARM_AAPCS);
00386     setLibcallCallingConv(RTLIB::SRL_I64, CallingConv::ARM_AAPCS);
00387     setLibcallCallingConv(RTLIB::SRA_I64, CallingConv::ARM_AAPCS);
00388 
00389     // Integer division functions
00390     // RTABI chapter 4.3.1
00391     setLibcallName(RTLIB::SDIV_I8,  "__aeabi_idiv");
00392     setLibcallName(RTLIB::SDIV_I16, "__aeabi_idiv");
00393     setLibcallName(RTLIB::SDIV_I32, "__aeabi_idiv");
00394     setLibcallName(RTLIB::SDIV_I64, "__aeabi_ldivmod");
00395     setLibcallName(RTLIB::UDIV_I8,  "__aeabi_uidiv");
00396     setLibcallName(RTLIB::UDIV_I16, "__aeabi_uidiv");
00397     setLibcallName(RTLIB::UDIV_I32, "__aeabi_uidiv");
00398     setLibcallName(RTLIB::UDIV_I64, "__aeabi_uldivmod");
00399     setLibcallCallingConv(RTLIB::SDIV_I8, CallingConv::ARM_AAPCS);
00400     setLibcallCallingConv(RTLIB::SDIV_I16, CallingConv::ARM_AAPCS);
00401     setLibcallCallingConv(RTLIB::SDIV_I32, CallingConv::ARM_AAPCS);
00402     setLibcallCallingConv(RTLIB::SDIV_I64, CallingConv::ARM_AAPCS);
00403     setLibcallCallingConv(RTLIB::UDIV_I8, CallingConv::ARM_AAPCS);
00404     setLibcallCallingConv(RTLIB::UDIV_I16, CallingConv::ARM_AAPCS);
00405     setLibcallCallingConv(RTLIB::UDIV_I32, CallingConv::ARM_AAPCS);
00406     setLibcallCallingConv(RTLIB::UDIV_I64, CallingConv::ARM_AAPCS);
00407 
00408     // Memory operations
00409     // RTABI chapter 4.3.4
00410     setLibcallName(RTLIB::MEMCPY,  "__aeabi_memcpy");
00411     setLibcallName(RTLIB::MEMMOVE, "__aeabi_memmove");
00412     setLibcallName(RTLIB::MEMSET,  "__aeabi_memset");
00413     setLibcallCallingConv(RTLIB::MEMCPY, CallingConv::ARM_AAPCS);
00414     setLibcallCallingConv(RTLIB::MEMMOVE, CallingConv::ARM_AAPCS);
00415     setLibcallCallingConv(RTLIB::MEMSET, CallingConv::ARM_AAPCS);
00416   }
00417 
00418   // Use divmod compiler-rt calls for iOS 5.0 and later.
00419   if (Subtarget->getTargetTriple().isiOS() &&
00420       !Subtarget->getTargetTriple().isOSVersionLT(5, 0)) {
00421     setLibcallName(RTLIB::SDIVREM_I32, "__divmodsi4");
00422     setLibcallName(RTLIB::UDIVREM_I32, "__udivmodsi4");
00423   }
00424 
00425   if (Subtarget->isThumb1Only())
00426     addRegisterClass(MVT::i32, &ARM::tGPRRegClass);
00427   else
00428     addRegisterClass(MVT::i32, &ARM::GPRRegClass);
00429   if (!TM.Options.UseSoftFloat && Subtarget->hasVFP2() &&
00430       !Subtarget->isThumb1Only()) {
00431     addRegisterClass(MVT::f32, &ARM::SPRRegClass);
00432     if (!Subtarget->isFPOnlySP())
00433       addRegisterClass(MVT::f64, &ARM::DPRRegClass);
00434 
00435     setTruncStoreAction(MVT::f64, MVT::f32, Expand);
00436   }
00437 
00438   for (unsigned VT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
00439        VT <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++VT) {
00440     for (unsigned InnerVT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
00441          InnerVT <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++InnerVT)
00442       setTruncStoreAction((MVT::SimpleValueType)VT,
00443                           (MVT::SimpleValueType)InnerVT, Expand);
00444     setLoadExtAction(ISD::SEXTLOAD, (MVT::SimpleValueType)VT, Expand);
00445     setLoadExtAction(ISD::ZEXTLOAD, (MVT::SimpleValueType)VT, Expand);
00446     setLoadExtAction(ISD::EXTLOAD, (MVT::SimpleValueType)VT, Expand);
00447   }
00448 
00449   setOperationAction(ISD::ConstantFP, MVT::f32, Custom);
00450   setOperationAction(ISD::ConstantFP, MVT::f64, Custom);
00451 
00452   if (Subtarget->hasNEON()) {
00453     addDRTypeForNEON(MVT::v2f32);
00454     addDRTypeForNEON(MVT::v8i8);
00455     addDRTypeForNEON(MVT::v4i16);
00456     addDRTypeForNEON(MVT::v2i32);
00457     addDRTypeForNEON(MVT::v1i64);
00458 
00459     addQRTypeForNEON(MVT::v4f32);
00460     addQRTypeForNEON(MVT::v2f64);
00461     addQRTypeForNEON(MVT::v16i8);
00462     addQRTypeForNEON(MVT::v8i16);
00463     addQRTypeForNEON(MVT::v4i32);
00464     addQRTypeForNEON(MVT::v2i64);
00465 
00466     // v2f64 is legal so that QR subregs can be extracted as f64 elements, but
00467     // neither Neon nor VFP support any arithmetic operations on it.
00468     // The same with v4f32. But keep in mind that vadd, vsub, vmul are natively
00469     // supported for v4f32.
00470     setOperationAction(ISD::FADD, MVT::v2f64, Expand);
00471     setOperationAction(ISD::FSUB, MVT::v2f64, Expand);
00472     setOperationAction(ISD::FMUL, MVT::v2f64, Expand);
00473     // FIXME: Code duplication: FDIV and FREM are expanded always, see
00474     // ARMTargetLowering::addTypeForNEON method for details.
00475     setOperationAction(ISD::FDIV, MVT::v2f64, Expand);
00476     setOperationAction(ISD::FREM, MVT::v2f64, Expand);
00477     // FIXME: Create unittest.
00478     // In another words, find a way when "copysign" appears in DAG with vector
00479     // operands.
00480     setOperationAction(ISD::FCOPYSIGN, MVT::v2f64, Expand);
00481     // FIXME: Code duplication: SETCC has custom operation action, see
00482     // ARMTargetLowering::addTypeForNEON method for details.
00483     setOperationAction(ISD::SETCC, MVT::v2f64, Expand);
00484     // FIXME: Create unittest for FNEG and for FABS.
00485     setOperationAction(ISD::FNEG, MVT::v2f64, Expand);
00486     setOperationAction(ISD::FABS, MVT::v2f64, Expand);
00487     setOperationAction(ISD::FSQRT, MVT::v2f64, Expand);
00488     setOperationAction(ISD::FSIN, MVT::v2f64, Expand);
00489     setOperationAction(ISD::FCOS, MVT::v2f64, Expand);
00490     setOperationAction(ISD::FPOWI, MVT::v2f64, Expand);
00491     setOperationAction(ISD::FPOW, MVT::v2f64, Expand);
00492     setOperationAction(ISD::FLOG, MVT::v2f64, Expand);
00493     setOperationAction(ISD::FLOG2, MVT::v2f64, Expand);
00494     setOperationAction(ISD::FLOG10, MVT::v2f64, Expand);
00495     setOperationAction(ISD::FEXP, MVT::v2f64, Expand);
00496     setOperationAction(ISD::FEXP2, MVT::v2f64, Expand);
00497     // FIXME: Create unittest for FCEIL, FTRUNC, FRINT, FNEARBYINT, FFLOOR.
00498     setOperationAction(ISD::FCEIL, MVT::v2f64, Expand);
00499     setOperationAction(ISD::FTRUNC, MVT::v2f64, Expand);
00500     setOperationAction(ISD::FRINT, MVT::v2f64, Expand);
00501     setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Expand);
00502     setOperationAction(ISD::FFLOOR, MVT::v2f64, Expand);
00503     setOperationAction(ISD::FMA, MVT::v2f64, Expand);
00504 
00505     setOperationAction(ISD::FSQRT, MVT::v4f32, Expand);
00506     setOperationAction(ISD::FSIN, MVT::v4f32, Expand);
00507     setOperationAction(ISD::FCOS, MVT::v4f32, Expand);
00508     setOperationAction(ISD::FPOWI, MVT::v4f32, Expand);
00509     setOperationAction(ISD::FPOW, MVT::v4f32, Expand);
00510     setOperationAction(ISD::FLOG, MVT::v4f32, Expand);
00511     setOperationAction(ISD::FLOG2, MVT::v4f32, Expand);
00512     setOperationAction(ISD::FLOG10, MVT::v4f32, Expand);
00513     setOperationAction(ISD::FEXP, MVT::v4f32, Expand);
00514     setOperationAction(ISD::FEXP2, MVT::v4f32, Expand);
00515     setOperationAction(ISD::FCEIL, MVT::v4f32, Expand);
00516     setOperationAction(ISD::FTRUNC, MVT::v4f32, Expand);
00517     setOperationAction(ISD::FRINT, MVT::v4f32, Expand);
00518     setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Expand);
00519     setOperationAction(ISD::FFLOOR, MVT::v4f32, Expand);
00520 
00521     // Mark v2f32 intrinsics.
00522     setOperationAction(ISD::FSQRT, MVT::v2f32, Expand);
00523     setOperationAction(ISD::FSIN, MVT::v2f32, Expand);
00524     setOperationAction(ISD::FCOS, MVT::v2f32, Expand);
00525     setOperationAction(ISD::FPOWI, MVT::v2f32, Expand);
00526     setOperationAction(ISD::FPOW, MVT::v2f32, Expand);
00527     setOperationAction(ISD::FLOG, MVT::v2f32, Expand);
00528     setOperationAction(ISD::FLOG2, MVT::v2f32, Expand);
00529     setOperationAction(ISD::FLOG10, MVT::v2f32, Expand);
00530     setOperationAction(ISD::FEXP, MVT::v2f32, Expand);
00531     setOperationAction(ISD::FEXP2, MVT::v2f32, Expand);
00532     setOperationAction(ISD::FCEIL, MVT::v2f32, Expand);
00533     setOperationAction(ISD::FTRUNC, MVT::v2f32, Expand);
00534     setOperationAction(ISD::FRINT, MVT::v2f32, Expand);
00535     setOperationAction(ISD::FNEARBYINT, MVT::v2f32, Expand);
00536     setOperationAction(ISD::FFLOOR, MVT::v2f32, Expand);
00537 
00538     // Neon does not support some operations on v1i64 and v2i64 types.
00539     setOperationAction(ISD::MUL, MVT::v1i64, Expand);
00540     // Custom handling for some quad-vector types to detect VMULL.
00541     setOperationAction(ISD::MUL, MVT::v8i16, Custom);
00542     setOperationAction(ISD::MUL, MVT::v4i32, Custom);
00543     setOperationAction(ISD::MUL, MVT::v2i64, Custom);
00544     // Custom handling for some vector types to avoid expensive expansions
00545     setOperationAction(ISD::SDIV, MVT::v4i16, Custom);
00546     setOperationAction(ISD::SDIV, MVT::v8i8, Custom);
00547     setOperationAction(ISD::UDIV, MVT::v4i16, Custom);
00548     setOperationAction(ISD::UDIV, MVT::v8i8, Custom);
00549     setOperationAction(ISD::SETCC, MVT::v1i64, Expand);
00550     setOperationAction(ISD::SETCC, MVT::v2i64, Expand);
00551     // Neon does not have single instruction SINT_TO_FP and UINT_TO_FP with
00552     // a destination type that is wider than the source, and nor does
00553     // it have a FP_TO_[SU]INT instruction with a narrower destination than
00554     // source.
00555     setOperationAction(ISD::SINT_TO_FP, MVT::v4i16, Custom);
00556     setOperationAction(ISD::UINT_TO_FP, MVT::v4i16, Custom);
00557     setOperationAction(ISD::FP_TO_UINT, MVT::v4i16, Custom);
00558     setOperationAction(ISD::FP_TO_SINT, MVT::v4i16, Custom);
00559 
00560     setOperationAction(ISD::FP_ROUND,   MVT::v2f32, Expand);
00561     setOperationAction(ISD::FP_EXTEND,  MVT::v2f64, Expand);
00562 
00563     // NEON does not have single instruction CTPOP for vectors with element
00564     // types wider than 8-bits.  However, custom lowering can leverage the
00565     // v8i8/v16i8 vcnt instruction.
00566     setOperationAction(ISD::CTPOP,      MVT::v2i32, Custom);
00567     setOperationAction(ISD::CTPOP,      MVT::v4i32, Custom);
00568     setOperationAction(ISD::CTPOP,      MVT::v4i16, Custom);
00569     setOperationAction(ISD::CTPOP,      MVT::v8i16, Custom);
00570 
00571     // NEON only has FMA instructions as of VFP4.
00572     if (!Subtarget->hasVFP4()) {
00573       setOperationAction(ISD::FMA, MVT::v2f32, Expand);
00574       setOperationAction(ISD::FMA, MVT::v4f32, Expand);
00575     }
00576 
00577     setTargetDAGCombine(ISD::INTRINSIC_VOID);
00578     setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN);
00579     setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN);
00580     setTargetDAGCombine(ISD::SHL);
00581     setTargetDAGCombine(ISD::SRL);
00582     setTargetDAGCombine(ISD::SRA);
00583     setTargetDAGCombine(ISD::SIGN_EXTEND);
00584     setTargetDAGCombine(ISD::ZERO_EXTEND);
00585     setTargetDAGCombine(ISD::ANY_EXTEND);
00586     setTargetDAGCombine(ISD::SELECT_CC);
00587     setTargetDAGCombine(ISD::BUILD_VECTOR);
00588     setTargetDAGCombine(ISD::VECTOR_SHUFFLE);
00589     setTargetDAGCombine(ISD::INSERT_VECTOR_ELT);
00590     setTargetDAGCombine(ISD::STORE);
00591     setTargetDAGCombine(ISD::FP_TO_SINT);
00592     setTargetDAGCombine(ISD::FP_TO_UINT);
00593     setTargetDAGCombine(ISD::FDIV);
00594 
00595     // It is legal to extload from v4i8 to v4i16 or v4i32.
00596     MVT Tys[6] = {MVT::v8i8, MVT::v4i8, MVT::v2i8,
00597                   MVT::v4i16, MVT::v2i16,
00598                   MVT::v2i32};
00599     for (unsigned i = 0; i < 6; ++i) {
00600       setLoadExtAction(ISD::EXTLOAD, Tys[i], Legal);
00601       setLoadExtAction(ISD::ZEXTLOAD, Tys[i], Legal);
00602       setLoadExtAction(ISD::SEXTLOAD, Tys[i], Legal);
00603     }
00604   }
00605 
00606   // ARM and Thumb2 support UMLAL/SMLAL.
00607   if (!Subtarget->isThumb1Only())
00608     setTargetDAGCombine(ISD::ADDC);
00609 
00610 
00611   computeRegisterProperties();
00612 
00613   // ARM does not have f32 extending load.
00614   setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand);
00615 
00616   // ARM does not have i1 sign extending load.
00617   setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
00618 
00619   // ARM supports all 4 flavors of integer indexed load / store.
00620   if (!Subtarget->isThumb1Only()) {
00621     for (unsigned im = (unsigned)ISD::PRE_INC;
00622          im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) {
00623       setIndexedLoadAction(im,  MVT::i1,  Legal);
00624       setIndexedLoadAction(im,  MVT::i8,  Legal);
00625       setIndexedLoadAction(im,  MVT::i16, Legal);
00626       setIndexedLoadAction(im,  MVT::i32, Legal);
00627       setIndexedStoreAction(im, MVT::i1,  Legal);
00628       setIndexedStoreAction(im, MVT::i8,  Legal);
00629       setIndexedStoreAction(im, MVT::i16, Legal);
00630       setIndexedStoreAction(im, MVT::i32, Legal);
00631     }
00632   }
00633 
00634   // i64 operation support.
00635   setOperationAction(ISD::MUL,     MVT::i64, Expand);
00636   setOperationAction(ISD::MULHU,   MVT::i32, Expand);
00637   if (Subtarget->isThumb1Only()) {
00638     setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);
00639     setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
00640   }
00641   if (Subtarget->isThumb1Only() || !Subtarget->hasV6Ops()
00642       || (Subtarget->isThumb2() && !Subtarget->hasThumb2DSP()))
00643     setOperationAction(ISD::MULHS, MVT::i32, Expand);
00644 
00645   setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom);
00646   setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom);
00647   setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom);
00648   setOperationAction(ISD::SRL,       MVT::i64, Custom);
00649   setOperationAction(ISD::SRA,       MVT::i64, Custom);
00650 
00651   if (!Subtarget->isThumb1Only()) {
00652     // FIXME: We should do this for Thumb1 as well.
00653     setOperationAction(ISD::ADDC,    MVT::i32, Custom);
00654     setOperationAction(ISD::ADDE,    MVT::i32, Custom);
00655     setOperationAction(ISD::SUBC,    MVT::i32, Custom);
00656     setOperationAction(ISD::SUBE,    MVT::i32, Custom);
00657   }
00658 
00659   // ARM does not have ROTL.
00660   setOperationAction(ISD::ROTL,  MVT::i32, Expand);
00661   setOperationAction(ISD::CTTZ,  MVT::i32, Custom);
00662   setOperationAction(ISD::CTPOP, MVT::i32, Expand);
00663   if (!Subtarget->hasV5TOps() || Subtarget->isThumb1Only())
00664     setOperationAction(ISD::CTLZ, MVT::i32, Expand);
00665 
00666   // These just redirect to CTTZ and CTLZ on ARM.
00667   setOperationAction(ISD::CTTZ_ZERO_UNDEF  , MVT::i32  , Expand);
00668   setOperationAction(ISD::CTLZ_ZERO_UNDEF  , MVT::i32  , Expand);
00669 
00670   setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Custom);
00671 
00672   // Only ARMv6 has BSWAP.
00673   if (!Subtarget->hasV6Ops())
00674     setOperationAction(ISD::BSWAP, MVT::i32, Expand);
00675 
00676   if (!(Subtarget->hasDivide() && Subtarget->isThumb2()) &&
00677       !(Subtarget->hasDivideInARMMode() && !Subtarget->isThumb())) {
00678     // These are expanded into libcalls if the cpu doesn't have HW divider.
00679     setOperationAction(ISD::SDIV,  MVT::i32, Expand);
00680     setOperationAction(ISD::UDIV,  MVT::i32, Expand);
00681   }
00682 
00683   // FIXME: Also set divmod for SREM on EABI
00684   setOperationAction(ISD::SREM,  MVT::i32, Expand);
00685   setOperationAction(ISD::UREM,  MVT::i32, Expand);
00686   // Register based DivRem for AEABI (RTABI 4.2)
00687   if (Subtarget->isTargetAEABI()) {
00688     setLibcallName(RTLIB::SDIVREM_I8,  "__aeabi_idivmod");
00689     setLibcallName(RTLIB::SDIVREM_I16, "__aeabi_idivmod");
00690     setLibcallName(RTLIB::SDIVREM_I32, "__aeabi_idivmod");
00691     setLibcallName(RTLIB::SDIVREM_I64, "__aeabi_ldivmod");
00692     setLibcallName(RTLIB::UDIVREM_I8,  "__aeabi_uidivmod");
00693     setLibcallName(RTLIB::UDIVREM_I16, "__aeabi_uidivmod");
00694     setLibcallName(RTLIB::UDIVREM_I32, "__aeabi_uidivmod");
00695     setLibcallName(RTLIB::UDIVREM_I64, "__aeabi_uldivmod");
00696 
00697     setLibcallCallingConv(RTLIB::SDIVREM_I8, CallingConv::ARM_AAPCS);
00698     setLibcallCallingConv(RTLIB::SDIVREM_I16, CallingConv::ARM_AAPCS);
00699     setLibcallCallingConv(RTLIB::SDIVREM_I32, CallingConv::ARM_AAPCS);
00700     setLibcallCallingConv(RTLIB::SDIVREM_I64, CallingConv::ARM_AAPCS);
00701     setLibcallCallingConv(RTLIB::UDIVREM_I8, CallingConv::ARM_AAPCS);
00702     setLibcallCallingConv(RTLIB::UDIVREM_I16, CallingConv::ARM_AAPCS);
00703     setLibcallCallingConv(RTLIB::UDIVREM_I32, CallingConv::ARM_AAPCS);
00704     setLibcallCallingConv(RTLIB::UDIVREM_I64, CallingConv::ARM_AAPCS);
00705 
00706     setOperationAction(ISD::SDIVREM, MVT::i32, Custom);
00707     setOperationAction(ISD::UDIVREM, MVT::i32, Custom);
00708   } else {
00709     setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
00710     setOperationAction(ISD::UDIVREM, MVT::i32, Expand);
00711   }
00712 
00713   setOperationAction(ISD::GlobalAddress, MVT::i32,   Custom);
00714   setOperationAction(ISD::ConstantPool,  MVT::i32,   Custom);
00715   setOperationAction(ISD::GLOBAL_OFFSET_TABLE, MVT::i32, Custom);
00716   setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom);
00717   setOperationAction(ISD::BlockAddress, MVT::i32, Custom);
00718 
00719   setOperationAction(ISD::TRAP, MVT::Other, Legal);
00720 
00721   // Use the default implementation.
00722   setOperationAction(ISD::VASTART,            MVT::Other, Custom);
00723   setOperationAction(ISD::VAARG,              MVT::Other, Expand);
00724   setOperationAction(ISD::VACOPY,             MVT::Other, Expand);
00725   setOperationAction(ISD::VAEND,              MVT::Other, Expand);
00726   setOperationAction(ISD::STACKSAVE,          MVT::Other, Expand);
00727   setOperationAction(ISD::STACKRESTORE,       MVT::Other, Expand);
00728 
00729   if (!Subtarget->isTargetMachO()) {
00730     // Non-MachO platforms may return values in these registers via the
00731     // personality function.
00732     setExceptionPointerRegister(ARM::R0);
00733     setExceptionSelectorRegister(ARM::R1);
00734   }
00735 
00736   setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand);
00737   // ARMv6 Thumb1 (except for CPUs that support dmb / dsb) and earlier use
00738   // the default expansion.
00739   if (Subtarget->hasAnyDataBarrier() && !Subtarget->isThumb1Only()) {
00740     // ATOMIC_FENCE needs custom lowering; the others should have been expanded
00741     // to ldrex/strex loops already.
00742     setOperationAction(ISD::ATOMIC_FENCE,     MVT::Other, Custom);
00743 
00744     // On v8, we have particularly efficient implementations of atomic fences
00745     // if they can be combined with nearby atomic loads and stores.
00746     if (!Subtarget->hasV8Ops()) {
00747       // Automatically insert fences (dmb ist) around ATOMIC_SWAP etc.
00748       setInsertFencesForAtomic(true);
00749     }
00750   } else {
00751     // If there's anything we can use as a barrier, go through custom lowering
00752     // for ATOMIC_FENCE.
00753     setOperationAction(ISD::ATOMIC_FENCE,   MVT::Other,
00754                        Subtarget->hasAnyDataBarrier() ? Custom : Expand);
00755 
00756     // Set them all for expansion, which will force libcalls.
00757     setOperationAction(ISD::ATOMIC_CMP_SWAP,  MVT::i32, Expand);
00758     setOperationAction(ISD::ATOMIC_SWAP,      MVT::i32, Expand);
00759     setOperationAction(ISD::ATOMIC_LOAD_ADD,  MVT::i32, Expand);
00760     setOperationAction(ISD::ATOMIC_LOAD_SUB,  MVT::i32, Expand);
00761     setOperationAction(ISD::ATOMIC_LOAD_AND,  MVT::i32, Expand);
00762     setOperationAction(ISD::ATOMIC_LOAD_OR,   MVT::i32, Expand);
00763     setOperationAction(ISD::ATOMIC_LOAD_XOR,  MVT::i32, Expand);
00764     setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i32, Expand);
00765     setOperationAction(ISD::ATOMIC_LOAD_MIN, MVT::i32, Expand);
00766     setOperationAction(ISD::ATOMIC_LOAD_MAX, MVT::i32, Expand);
00767     setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i32, Expand);
00768     setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i32, Expand);
00769     // Mark ATOMIC_LOAD and ATOMIC_STORE custom so we can handle the
00770     // Unordered/Monotonic case.
00771     setOperationAction(ISD::ATOMIC_LOAD, MVT::i32, Custom);
00772     setOperationAction(ISD::ATOMIC_STORE, MVT::i32, Custom);
00773   }
00774 
00775   setOperationAction(ISD::PREFETCH,         MVT::Other, Custom);
00776 
00777   // Requires SXTB/SXTH, available on v6 and up in both ARM and Thumb modes.
00778   if (!Subtarget->hasV6Ops()) {
00779     setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand);
00780     setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8,  Expand);
00781   }
00782   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
00783 
00784   if (!TM.Options.UseSoftFloat && Subtarget->hasVFP2() &&
00785       !Subtarget->isThumb1Only()) {
00786     // Turn f64->i64 into VMOVRRD, i64 -> f64 to VMOVDRR
00787     // iff target supports vfp2.
00788     setOperationAction(ISD::BITCAST, MVT::i64, Custom);
00789     setOperationAction(ISD::FLT_ROUNDS_, MVT::i32, Custom);
00790   }
00791 
00792   // We want to custom lower some of our intrinsics.
00793   setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
00794   if (Subtarget->isTargetDarwin()) {
00795     setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom);
00796     setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom);
00797     setLibcallName(RTLIB::UNWIND_RESUME, "_Unwind_SjLj_Resume");
00798   }
00799 
00800   setOperationAction(ISD::SETCC,     MVT::i32, Expand);
00801   setOperationAction(ISD::SETCC,     MVT::f32, Expand);
00802   setOperationAction(ISD::SETCC,     MVT::f64, Expand);
00803   setOperationAction(ISD::SELECT,    MVT::i32, Custom);
00804   setOperationAction(ISD::SELECT,    MVT::f32, Custom);
00805   setOperationAction(ISD::SELECT,    MVT::f64, Custom);
00806   setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
00807   setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
00808   setOperationAction(ISD::SELECT_CC, MVT::f64, Custom);
00809 
00810   setOperationAction(ISD::BRCOND,    MVT::Other, Expand);
00811   setOperationAction(ISD::BR_CC,     MVT::i32,   Custom);
00812   setOperationAction(ISD::BR_CC,     MVT::f32,   Custom);
00813   setOperationAction(ISD::BR_CC,     MVT::f64,   Custom);
00814   setOperationAction(ISD::BR_JT,     MVT::Other, Custom);
00815 
00816   // We don't support sin/cos/fmod/copysign/pow
00817   setOperationAction(ISD::FSIN,      MVT::f64, Expand);
00818   setOperationAction(ISD::FSIN,      MVT::f32, Expand);
00819   setOperationAction(ISD::FCOS,      MVT::f32, Expand);
00820   setOperationAction(ISD::FCOS,      MVT::f64, Expand);
00821   setOperationAction(ISD::FSINCOS,   MVT::f64, Expand);
00822   setOperationAction(ISD::FSINCOS,   MVT::f32, Expand);
00823   setOperationAction(ISD::FREM,      MVT::f64, Expand);
00824   setOperationAction(ISD::FREM,      MVT::f32, Expand);
00825   if (!TM.Options.UseSoftFloat && Subtarget->hasVFP2() &&
00826       !Subtarget->isThumb1Only()) {
00827     setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom);
00828     setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);
00829   }
00830   setOperationAction(ISD::FPOW,      MVT::f64, Expand);
00831   setOperationAction(ISD::FPOW,      MVT::f32, Expand);
00832 
00833   if (!Subtarget->hasVFP4()) {
00834     setOperationAction(ISD::FMA, MVT::f64, Expand);
00835     setOperationAction(ISD::FMA, MVT::f32, Expand);
00836   }
00837 
00838   // Various VFP goodness
00839   if (!TM.Options.UseSoftFloat && !Subtarget->isThumb1Only()) {
00840     // int <-> fp are custom expanded into bit_convert + ARMISD ops.
00841     if (Subtarget->hasVFP2()) {
00842       setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
00843       setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
00844       setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
00845       setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
00846     }
00847     // Special handling for half-precision FP.
00848     if (!Subtarget->hasFP16()) {
00849       setOperationAction(ISD::FP16_TO_FP32, MVT::f32, Expand);
00850       setOperationAction(ISD::FP32_TO_FP16, MVT::i32, Expand);
00851     }
00852   }
00853 
00854   // Combine sin / cos into one node or libcall if possible.
00855   if (Subtarget->hasSinCos()) {
00856     setLibcallName(RTLIB::SINCOS_F32, "sincosf");
00857     setLibcallName(RTLIB::SINCOS_F64, "sincos");
00858     if (Subtarget->getTargetTriple().getOS() == Triple::IOS) {
00859       // For iOS, we don't want to the normal expansion of a libcall to
00860       // sincos. We want to issue a libcall to __sincos_stret.
00861       setOperationAction(ISD::FSINCOS, MVT::f64, Custom);
00862       setOperationAction(ISD::FSINCOS, MVT::f32, Custom);
00863     }
00864   }
00865 
00866   // We have target-specific dag combine patterns for the following nodes:
00867   // ARMISD::VMOVRRD  - No need to call setTargetDAGCombine
00868   setTargetDAGCombine(ISD::ADD);
00869   setTargetDAGCombine(ISD::SUB);
00870   setTargetDAGCombine(ISD::MUL);
00871   setTargetDAGCombine(ISD::AND);
00872   setTargetDAGCombine(ISD::OR);
00873   setTargetDAGCombine(ISD::XOR);
00874 
00875   if (Subtarget->hasV6Ops())
00876     setTargetDAGCombine(ISD::SRL);
00877 
00878   setStackPointerRegisterToSaveRestore(ARM::SP);
00879 
00880   if (TM.Options.UseSoftFloat || Subtarget->isThumb1Only() ||
00881       !Subtarget->hasVFP2())
00882     setSchedulingPreference(Sched::RegPressure);
00883   else
00884     setSchedulingPreference(Sched::Hybrid);
00885 
00886   //// temporary - rewrite interface to use type
00887   MaxStoresPerMemset = 8;
00888   MaxStoresPerMemsetOptSize = Subtarget->isTargetDarwin() ? 8 : 4;
00889   MaxStoresPerMemcpy = 4; // For @llvm.memcpy -> sequence of stores
00890   MaxStoresPerMemcpyOptSize = Subtarget->isTargetDarwin() ? 4 : 2;
00891   MaxStoresPerMemmove = 4; // For @llvm.memmove -> sequence of stores
00892   MaxStoresPerMemmoveOptSize = Subtarget->isTargetDarwin() ? 4 : 2;
00893 
00894   // On ARM arguments smaller than 4 bytes are extended, so all arguments
00895   // are at least 4 bytes aligned.
00896   setMinStackArgumentAlignment(4);
00897 
00898   // Prefer likely predicted branches to selects on out-of-order cores.
00899   PredictableSelectIsExpensive = Subtarget->isLikeA9();
00900 
00901   setMinFunctionAlignment(Subtarget->isThumb() ? 1 : 2);
00902 }
00903 
00904 // FIXME: It might make sense to define the representative register class as the
00905 // nearest super-register that has a non-null superset. For example, DPR_VFP2 is
00906 // a super-register of SPR, and DPR is a superset if DPR_VFP2. Consequently,
00907 // SPR's representative would be DPR_VFP2. This should work well if register
00908 // pressure tracking were modified such that a register use would increment the
00909 // pressure of the register class's representative and all of it's super
00910 // classes' representatives transitively. We have not implemented this because
00911 // of the difficulty prior to coalescing of modeling operand register classes
00912 // due to the common occurrence of cross class copies and subregister insertions
00913 // and extractions.
00914 std::pair<const TargetRegisterClass*, uint8_t>
00915 ARMTargetLowering::findRepresentativeClass(MVT VT) const{
00916   const TargetRegisterClass *RRC = 0;
00917   uint8_t Cost = 1;
00918   switch (VT.SimpleTy) {
00919   default:
00920     return TargetLowering::findRepresentativeClass(VT);
00921   // Use DPR as representative register class for all floating point
00922   // and vector types. Since there are 32 SPR registers and 32 DPR registers so
00923   // the cost is 1 for both f32 and f64.
00924   case MVT::f32: case MVT::f64: case MVT::v8i8: case MVT::v4i16:
00925   case MVT::v2i32: case MVT::v1i64: case MVT::v2f32:
00926     RRC = &ARM::DPRRegClass;
00927     // When NEON is used for SP, only half of the register file is available
00928     // because operations that define both SP and DP results will be constrained
00929     // to the VFP2 class (D0-D15). We currently model this constraint prior to
00930     // coalescing by double-counting the SP regs. See the FIXME above.
00931     if (Subtarget->useNEONForSinglePrecisionFP())
00932       Cost = 2;
00933     break;
00934   case MVT::v16i8: case MVT::v8i16: case MVT::v4i32: case MVT::v2i64:
00935   case MVT::v4f32: case MVT::v2f64:
00936     RRC = &ARM::DPRRegClass;
00937     Cost = 2;
00938     break;
00939   case MVT::v4i64:
00940     RRC = &ARM::DPRRegClass;
00941     Cost = 4;
00942     break;
00943   case MVT::v8i64:
00944     RRC = &ARM::DPRRegClass;
00945     Cost = 8;
00946     break;
00947   }
00948   return std::make_pair(RRC, Cost);
00949 }
00950 
00951 const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
00952   switch (Opcode) {
00953   default: return 0;
00954   case ARMISD::Wrapper:       return "ARMISD::Wrapper";
00955   case ARMISD::WrapperPIC:    return "ARMISD::WrapperPIC";
00956   case ARMISD::WrapperJT:     return "ARMISD::WrapperJT";
00957   case ARMISD::CALL:          return "ARMISD::CALL";
00958   case ARMISD::CALL_PRED:     return "ARMISD::CALL_PRED";
00959   case ARMISD::CALL_NOLINK:   return "ARMISD::CALL_NOLINK";
00960   case ARMISD::tCALL:         return "ARMISD::tCALL";
00961   case ARMISD::BRCOND:        return "ARMISD::BRCOND";
00962   case ARMISD::BR_JT:         return "ARMISD::BR_JT";
00963   case ARMISD::BR2_JT:        return "ARMISD::BR2_JT";
00964   case ARMISD::RET_FLAG:      return "ARMISD::RET_FLAG";
00965   case ARMISD::INTRET_FLAG:   return "ARMISD::INTRET_FLAG";
00966   case ARMISD::PIC_ADD:       return "ARMISD::PIC_ADD";
00967   case ARMISD::CMP:           return "ARMISD::CMP";
00968   case ARMISD::CMN:           return "ARMISD::CMN";
00969   case ARMISD::CMPZ:          return "ARMISD::CMPZ";
00970   case ARMISD::CMPFP:         return "ARMISD::CMPFP";
00971   case ARMISD::CMPFPw0:       return "ARMISD::CMPFPw0";
00972   case ARMISD::BCC_i64:       return "ARMISD::BCC_i64";
00973   case ARMISD::FMSTAT:        return "ARMISD::FMSTAT";
00974 
00975   case ARMISD::CMOV:          return "ARMISD::CMOV";
00976 
00977   case ARMISD::RBIT:          return "ARMISD::RBIT";
00978 
00979   case ARMISD::FTOSI:         return "ARMISD::FTOSI";
00980   case ARMISD::FTOUI:         return "ARMISD::FTOUI";
00981   case ARMISD::SITOF:         return "ARMISD::SITOF";
00982   case ARMISD::UITOF:         return "ARMISD::UITOF";
00983 
00984   case ARMISD::SRL_FLAG:      return "ARMISD::SRL_FLAG";
00985   case ARMISD::SRA_FLAG:      return "ARMISD::SRA_FLAG";
00986   case ARMISD::RRX:           return "ARMISD::RRX";
00987 
00988   case ARMISD::ADDC:          return "ARMISD::ADDC";
00989   case ARMISD::ADDE:          return "ARMISD::ADDE";
00990   case ARMISD::SUBC:          return "ARMISD::SUBC";
00991   case ARMISD::SUBE:          return "ARMISD::SUBE";
00992 
00993   case ARMISD::VMOVRRD:       return "ARMISD::VMOVRRD";
00994   case ARMISD::VMOVDRR:       return "ARMISD::VMOVDRR";
00995 
00996   case ARMISD::EH_SJLJ_SETJMP: return "ARMISD::EH_SJLJ_SETJMP";
00997   case ARMISD::EH_SJLJ_LONGJMP:return "ARMISD::EH_SJLJ_LONGJMP";
00998 
00999   case ARMISD::TC_RETURN:     return "ARMISD::TC_RETURN";
01000 
01001   case ARMISD::THREAD_POINTER:return "ARMISD::THREAD_POINTER";
01002 
01003   case ARMISD::DYN_ALLOC:     return "ARMISD::DYN_ALLOC";
01004 
01005   case ARMISD::MEMBARRIER_MCR: return "ARMISD::MEMBARRIER_MCR";
01006 
01007   case ARMISD::PRELOAD:       return "ARMISD::PRELOAD";
01008 
01009   case ARMISD::VCEQ:          return "ARMISD::VCEQ";
01010   case ARMISD::VCEQZ:         return "ARMISD::VCEQZ";
01011   case ARMISD::VCGE:          return "ARMISD::VCGE";
01012   case ARMISD::VCGEZ:         return "ARMISD::VCGEZ";
01013   case ARMISD::VCLEZ:         return "ARMISD::VCLEZ";
01014   case ARMISD::VCGEU:         return "ARMISD::VCGEU";
01015   case ARMISD::VCGT:          return "ARMISD::VCGT";
01016   case ARMISD::VCGTZ:         return "ARMISD::VCGTZ";
01017   case ARMISD::VCLTZ:         return "ARMISD::VCLTZ";
01018   case ARMISD::VCGTU:         return "ARMISD::VCGTU";
01019   case ARMISD::VTST:          return "ARMISD::VTST";
01020 
01021   case ARMISD::VSHL:          return "ARMISD::VSHL";
01022   case ARMISD::VSHRs:         return "ARMISD::VSHRs";
01023   case ARMISD::VSHRu:         return "ARMISD::VSHRu";
01024   case ARMISD::VRSHRs:        return "ARMISD::VRSHRs";
01025   case ARMISD::VRSHRu:        return "ARMISD::VRSHRu";
01026   case ARMISD::VRSHRN:        return "ARMISD::VRSHRN";
01027   case ARMISD::VQSHLs:        return "ARMISD::VQSHLs";
01028   case ARMISD::VQSHLu:        return "ARMISD::VQSHLu";
01029   case ARMISD::VQSHLsu:       return "ARMISD::VQSHLsu";
01030   case ARMISD::VQSHRNs:       return "ARMISD::VQSHRNs";
01031   case ARMISD::VQSHRNu:       return "ARMISD::VQSHRNu";
01032   case ARMISD::VQSHRNsu:      return "ARMISD::VQSHRNsu";
01033   case ARMISD::VQRSHRNs:      return "ARMISD::VQRSHRNs";
01034   case ARMISD::VQRSHRNu:      return "ARMISD::VQRSHRNu";
01035   case ARMISD::VQRSHRNsu:     return "ARMISD::VQRSHRNsu";
01036   case ARMISD::VGETLANEu:     return "ARMISD::VGETLANEu";
01037   case ARMISD::VGETLANEs:     return "ARMISD::VGETLANEs";
01038   case ARMISD::VMOVIMM:       return "ARMISD::VMOVIMM";
01039   case ARMISD::VMVNIMM:       return "ARMISD::VMVNIMM";
01040   case ARMISD::VMOVFPIMM:     return "ARMISD::VMOVFPIMM";
01041   case ARMISD::VDUP:          return "ARMISD::VDUP";
01042   case ARMISD::VDUPLANE:      return "ARMISD::VDUPLANE";
01043   case ARMISD::VEXT:          return "ARMISD::VEXT";
01044   case ARMISD::VREV64:        return "ARMISD::VREV64";
01045   case ARMISD::VREV32:        return "ARMISD::VREV32";
01046   case ARMISD::VREV16:        return "ARMISD::VREV16";
01047   case ARMISD::VZIP:          return "ARMISD::VZIP";
01048   case ARMISD::VUZP:          return "ARMISD::VUZP";
01049   case ARMISD::VTRN:          return "ARMISD::VTRN";
01050   case ARMISD::VTBL1:         return "ARMISD::VTBL1";
01051   case ARMISD::VTBL2:         return "ARMISD::VTBL2";
01052   case ARMISD::VMULLs:        return "ARMISD::VMULLs";
01053   case ARMISD::VMULLu:        return "ARMISD::VMULLu";
01054   case ARMISD::UMLAL:         return "ARMISD::UMLAL";
01055   case ARMISD::SMLAL:         return "ARMISD::SMLAL";
01056   case ARMISD::BUILD_VECTOR:  return "ARMISD::BUILD_VECTOR";
01057   case ARMISD::FMAX:          return "ARMISD::FMAX";
01058   case ARMISD::FMIN:          return "ARMISD::FMIN";
01059   case ARMISD::VMAXNM:        return "ARMISD::VMAX";
01060   case ARMISD::VMINNM:        return "ARMISD::VMIN";
01061   case ARMISD::BFI:           return "ARMISD::BFI";
01062   case ARMISD::VORRIMM:       return "ARMISD::VORRIMM";
01063   case ARMISD::VBICIMM:       return "ARMISD::VBICIMM";
01064   case ARMISD::VBSL:          return "ARMISD::VBSL";
01065   case ARMISD::VLD2DUP:       return "ARMISD::VLD2DUP";
01066   case ARMISD::VLD3DUP:       return "ARMISD::VLD3DUP";
01067   case ARMISD::VLD4DUP:       return "ARMISD::VLD4DUP";
01068   case ARMISD::VLD1_UPD:      return "ARMISD::VLD1_UPD";
01069   case ARMISD::VLD2_UPD:      return "ARMISD::VLD2_UPD";
01070   case ARMISD::VLD3_UPD:      return "ARMISD::VLD3_UPD";
01071   case ARMISD::VLD4_UPD:      return "ARMISD::VLD4_UPD";
01072   case ARMISD::VLD2LN_UPD:    return "ARMISD::VLD2LN_UPD";
01073   case ARMISD::VLD3LN_UPD:    return "ARMISD::VLD3LN_UPD";
01074   case ARMISD::VLD4LN_UPD:    return "ARMISD::VLD4LN_UPD";
01075   case ARMISD::VLD2DUP_UPD:   return "ARMISD::VLD2DUP_UPD";
01076   case ARMISD::VLD3DUP_UPD:   return "ARMISD::VLD3DUP_UPD";
01077   case ARMISD::VLD4DUP_UPD:   return "ARMISD::VLD4DUP_UPD";
01078   case ARMISD::VST1_UPD:      return "ARMISD::VST1_UPD";
01079   case ARMISD::VST2_UPD:      return "ARMISD::VST2_UPD";
01080   case ARMISD::VST3_UPD:      return "ARMISD::VST3_UPD";
01081   case ARMISD::VST4_UPD:      return "ARMISD::VST4_UPD";
01082   case ARMISD::VST2LN_UPD:    return "ARMISD::VST2LN_UPD";
01083   case ARMISD::VST3LN_UPD:    return "ARMISD::VST3LN_UPD";
01084   case ARMISD::VST4LN_UPD:    return "ARMISD::VST4LN_UPD";
01085   }
01086 }
01087 
01088 EVT ARMTargetLowering::getSetCCResultType(LLVMContext &, EVT VT) const {
01089   if (!VT.isVector()) return getPointerTy();
01090   return VT.changeVectorElementTypeToInteger();
01091 }
01092 
01093 /// getRegClassFor - Return the register class that should be used for the
01094 /// specified value type.
01095 const TargetRegisterClass *ARMTargetLowering::getRegClassFor(MVT VT) const {
01096   // Map v4i64 to QQ registers but do not make the type legal. Similarly map
01097   // v8i64 to QQQQ registers. v4i64 and v8i64 are only used for REG_SEQUENCE to
01098   // load / store 4 to 8 consecutive D registers.
01099   if (Subtarget->hasNEON()) {
01100     if (VT == MVT::v4i64)
01101       return &ARM::QQPRRegClass;
01102     if (VT == MVT::v8i64)
01103       return &ARM::QQQQPRRegClass;
01104   }
01105   return TargetLowering::getRegClassFor(VT);
01106 }
01107 
01108 // Create a fast isel object.
01109 FastISel *
01110 ARMTargetLowering::createFastISel(FunctionLoweringInfo &funcInfo,
01111                                   const TargetLibraryInfo *libInfo) const {
01112   return ARM::createFastISel(funcInfo, libInfo);
01113 }
01114 
01115 /// getMaximalGlobalOffset - Returns the maximal possible offset which can
01116 /// be used for loads / stores from the global.
01117 unsigned ARMTargetLowering::getMaximalGlobalOffset() const {
01118   return (Subtarget->isThumb1Only() ? 127 : 4095);
01119 }
01120 
01121 Sched::Preference ARMTargetLowering::getSchedulingPreference(SDNode *N) const {
01122   unsigned NumVals = N->getNumValues();
01123   if (!NumVals)
01124     return Sched::RegPressure;
01125 
01126   for (unsigned i = 0; i != NumVals; ++i) {
01127     EVT VT = N->getValueType(i);
01128     if (VT == MVT::Glue || VT == MVT::Other)
01129       continue;
01130     if (VT.isFloatingPoint() || VT.isVector())
01131       return Sched::ILP;
01132   }
01133 
01134   if (!N->isMachineOpcode())
01135     return Sched::RegPressure;
01136 
01137   // Load are scheduled for latency even if there instruction itinerary
01138   // is not available.
01139   const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
01140   const MCInstrDesc &MCID = TII->get(N->getMachineOpcode());
01141 
01142   if (MCID.getNumDefs() == 0)
01143     return Sched::RegPressure;
01144   if (!Itins->isEmpty() &&
01145       Itins->getOperandCycle(MCID.getSchedClass(), 0) > 2)
01146     return Sched::ILP;
01147 
01148   return Sched::RegPressure;
01149 }
01150 
01151 //===----------------------------------------------------------------------===//
01152 // Lowering Code
01153 //===----------------------------------------------------------------------===//
01154 
01155 /// IntCCToARMCC - Convert a DAG integer condition code to an ARM CC
01156 static ARMCC::CondCodes IntCCToARMCC(ISD::CondCode CC) {
01157   switch (CC) {
01158   default: llvm_unreachable("Unknown condition code!");
01159   case ISD::SETNE:  return ARMCC::NE;
01160   case ISD::SETEQ:  return ARMCC::EQ;
01161   case ISD::SETGT:  return ARMCC::GT;
01162   case ISD::SETGE:  return ARMCC::GE;
01163   case ISD::SETLT:  return ARMCC::LT;
01164   case ISD::SETLE:  return ARMCC::LE;
01165   case ISD::SETUGT: return ARMCC::HI;
01166   case ISD::SETUGE: return ARMCC::HS;
01167   case ISD::SETULT: return ARMCC::LO;
01168   case ISD::SETULE: return ARMCC::LS;
01169   }
01170 }
01171 
01172 /// FPCCToARMCC - Convert a DAG fp condition code to an ARM CC.
01173 static void FPCCToARMCC(ISD::CondCode CC, ARMCC::CondCodes &CondCode,
01174                         ARMCC::CondCodes &CondCode2) {
01175   CondCode2 = ARMCC::AL;
01176   switch (CC) {
01177   default: llvm_unreachable("Unknown FP condition!");
01178   case ISD::SETEQ:
01179   case ISD::SETOEQ: CondCode = ARMCC::EQ; break;
01180   case ISD::SETGT:
01181   case ISD::SETOGT: CondCode = ARMCC::GT; break;
01182   case ISD::SETGE:
01183   case ISD::SETOGE: CondCode = ARMCC::GE; break;
01184   case ISD::SETOLT: CondCode = ARMCC::MI; break;
01185   case ISD::SETOLE: CondCode = ARMCC::LS; break;
01186   case ISD::SETONE: CondCode = ARMCC::MI; CondCode2 = ARMCC::GT; break;
01187   case ISD::SETO:   CondCode = ARMCC::VC; break;
01188   case ISD::SETUO:  CondCode = ARMCC::VS; break;
01189   case ISD::SETUEQ: CondCode = ARMCC::EQ; CondCode2 = ARMCC::VS; break;
01190   case ISD::SETUGT: CondCode = ARMCC::HI; break;
01191   case ISD::SETUGE: CondCode = ARMCC::PL; break;
01192   case ISD::SETLT:
01193   case ISD::SETULT: CondCode = ARMCC::LT; break;
01194   case ISD::SETLE:
01195   case ISD::SETULE: CondCode = ARMCC::LE; break;
01196   case ISD::SETNE:
01197   case ISD::SETUNE: CondCode = ARMCC::NE; break;
01198   }
01199 }
01200 
01201 //===----------------------------------------------------------------------===//
01202 //                      Calling Convention Implementation
01203 //===----------------------------------------------------------------------===//
01204 
01205 #include "ARMGenCallingConv.inc"
01206 
01207 /// CCAssignFnForNode - Selects the correct CCAssignFn for a the
01208 /// given CallingConvention value.
01209 CCAssignFn *ARMTargetLowering::CCAssignFnForNode(CallingConv::ID CC,
01210                                                  bool Return,
01211                                                  bool isVarArg) const {
01212   switch (CC) {
01213   default:
01214     llvm_unreachable("Unsupported calling convention");
01215   case CallingConv::Fast:
01216     if (Subtarget->hasVFP2() && !isVarArg) {
01217       if (!Subtarget->isAAPCS_ABI())
01218         return (Return ? RetFastCC_ARM_APCS : FastCC_ARM_APCS);
01219       // For AAPCS ABI targets, just use VFP variant of the calling convention.
01220       return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP);
01221     }
01222     // Fallthrough
01223   case CallingConv::C: {
01224     // Use target triple & subtarget features to do actual dispatch.
01225     if (!Subtarget->isAAPCS_ABI())
01226       return (Return ? RetCC_ARM_APCS : CC_ARM_APCS);
01227     else if (Subtarget->hasVFP2() &&
01228              getTargetMachine().Options.FloatABIType == FloatABI::Hard &&
01229              !isVarArg)
01230       return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP);
01231     return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS);
01232   }
01233   case CallingConv::ARM_AAPCS_VFP:
01234     if (!isVarArg)
01235       return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP);
01236     // Fallthrough
01237   case CallingConv::ARM_AAPCS:
01238     return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS);
01239   case CallingConv::ARM_APCS:
01240     return (Return ? RetCC_ARM_APCS : CC_ARM_APCS);
01241   case CallingConv::GHC:
01242     return (Return ? RetCC_ARM_APCS : CC_ARM_APCS_GHC);
01243   }
01244 }
01245 
01246 /// LowerCallResult - Lower the result values of a call into the
01247 /// appropriate copies out of appropriate physical registers.
01248 SDValue
01249 ARMTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
01250                                    CallingConv::ID CallConv, bool isVarArg,
01251                                    const SmallVectorImpl<ISD::InputArg> &Ins,
01252                                    SDLoc dl, SelectionDAG &DAG,
01253                                    SmallVectorImpl<SDValue> &InVals,
01254                                    bool isThisReturn, SDValue ThisVal) const {
01255 
01256   // Assign locations to each value returned by this call.
01257   SmallVector<CCValAssign, 16> RVLocs;
01258   ARMCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
01259                     getTargetMachine(), RVLocs, *DAG.getContext(), Call);
01260   CCInfo.AnalyzeCallResult(Ins,
01261                            CCAssignFnForNode(CallConv, /* Return*/ true,
01262                                              isVarArg));
01263 
01264   // Copy all of the result registers out of their specified physreg.
01265   for (unsigned i = 0; i != RVLocs.size(); ++i) {
01266     CCValAssign VA = RVLocs[i];
01267 
01268     // Pass 'this' value directly from the argument to return value, to avoid
01269     // reg unit interference
01270     if (i == 0 && isThisReturn) {
01271       assert(!VA.needsCustom() && VA.getLocVT() == MVT::i32 &&
01272              "unexpected return calling convention register assignment");
01273       InVals.push_back(ThisVal);
01274       continue;
01275     }
01276 
01277     SDValue Val;
01278     if (VA.needsCustom()) {
01279       // Handle f64 or half of a v2f64.
01280       SDValue Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
01281                                       InFlag);
01282       Chain = Lo.getValue(1);
01283       InFlag = Lo.getValue(2);
01284       VA = RVLocs[++i]; // skip ahead to next loc
01285       SDValue Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
01286                                       InFlag);
01287       Chain = Hi.getValue(1);
01288       InFlag = Hi.getValue(2);
01289       Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);
01290 
01291       if (VA.getLocVT() == MVT::v2f64) {
01292         SDValue Vec = DAG.getNode(ISD::UNDEF, dl, MVT::v2f64);
01293         Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Vec, Val,
01294                           DAG.getConstant(0, MVT::i32));
01295 
01296         VA = RVLocs[++i]; // skip ahead to next loc
01297         Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InFlag);
01298         Chain = Lo.getValue(1);
01299         InFlag = Lo.getValue(2);
01300         VA = RVLocs[++i]; // skip ahead to next loc
01301         Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InFlag);
01302         Chain = Hi.getValue(1);
01303         InFlag = Hi.getValue(2);
01304         Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);
01305         Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Vec, Val,
01306                           DAG.getConstant(1, MVT::i32));
01307       }
01308     } else {
01309       Val = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), VA.getLocVT(),
01310                                InFlag);
01311       Chain = Val.getValue(1);
01312       InFlag = Val.getValue(2);
01313     }
01314 
01315     switch (VA.getLocInfo()) {
01316     default: llvm_unreachable("Unknown loc info!");
01317     case CCValAssign::Full: break;
01318     case CCValAssign::BCvt:
01319       Val = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), Val);
01320       break;
01321     }
01322 
01323     InVals.push_back(Val);
01324   }
01325 
01326   return Chain;
01327 }
01328 
01329 /// LowerMemOpCallTo - Store the argument to the stack.
01330 SDValue
01331 ARMTargetLowering::LowerMemOpCallTo(SDValue Chain,
01332                                     SDValue StackPtr, SDValue Arg,
01333                                     SDLoc dl, SelectionDAG &DAG,
01334                                     const CCValAssign &VA,
01335                                     ISD::ArgFlagsTy Flags) const {
01336   unsigned LocMemOffset = VA.getLocMemOffset();
01337   SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset);
01338   PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff);
01339   return DAG.getStore(Chain, dl, Arg, PtrOff,
01340                       MachinePointerInfo::getStack(LocMemOffset),
01341                       false, false, 0);
01342 }
01343 
01344 void ARMTargetLowering::PassF64ArgInRegs(SDLoc dl, SelectionDAG &DAG,
01345                                          SDValue Chain, SDValue &Arg,
01346                                          RegsToPassVector &RegsToPass,
01347                                          CCValAssign &VA, CCValAssign &NextVA,
01348                                          SDValue &StackPtr,
01349                                          SmallVectorImpl<SDValue> &MemOpChains,
01350                                          ISD::ArgFlagsTy Flags) const {
01351 
01352   SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl,
01353                               DAG.getVTList(MVT::i32, MVT::i32), Arg);
01354   RegsToPass.push_back(std::make_pair(VA.getLocReg(), fmrrd));
01355 
01356   if (NextVA.isRegLoc())
01357     RegsToPass.push_back(std::make_pair(NextVA.getLocReg(), fmrrd.getValue(1)));
01358   else {
01359     assert(NextVA.isMemLoc());
01360     if (StackPtr.getNode() == 0)
01361       StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy());
01362 
01363     MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, fmrrd.getValue(1),
01364                                            dl, DAG, NextVA,
01365                                            Flags));
01366   }
01367 }
01368 
01369 /// LowerCall - Lowering a call into a callseq_start <-
01370 /// ARMISD:CALL <- callseq_end chain. Also add input and output parameter
01371 /// nodes.
01372 SDValue
01373 ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
01374                              SmallVectorImpl<SDValue> &InVals) const {
01375   SelectionDAG &DAG                     = CLI.DAG;
01376   SDLoc &dl                          = CLI.DL;
01377   SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
01378   SmallVectorImpl<SDValue> &OutVals     = CLI.OutVals;
01379   SmallVectorImpl<ISD::InputArg> &Ins   = CLI.Ins;
01380   SDValue Chain                         = CLI.Chain;
01381   SDValue Callee                        = CLI.Callee;
01382   bool &isTailCall                      = CLI.IsTailCall;
01383   CallingConv::ID CallConv              = CLI.CallConv;
01384   bool doesNotRet                       = CLI.DoesNotReturn;
01385   bool isVarArg                         = CLI.IsVarArg;
01386 
01387   MachineFunction &MF = DAG.getMachineFunction();
01388   bool isStructRet    = (Outs.empty()) ? false : Outs[0].Flags.isSRet();
01389   bool isThisReturn   = false;
01390   bool isSibCall      = false;
01391 
01392   // Disable tail calls if they're not supported.
01393   if (!Subtarget->supportsTailCall() || MF.getTarget().Options.DisableTailCalls)
01394     isTailCall = false;
01395 
01396   if (isTailCall) {
01397     // Check if it's really possible to do a tail call.
01398     isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv,
01399                     isVarArg, isStructRet, MF.getFunction()->hasStructRetAttr(),
01400                                                    Outs, OutVals, Ins, DAG);
01401     // We don't support GuaranteedTailCallOpt for ARM, only automatically
01402     // detected sibcalls.
01403     if (isTailCall) {
01404       ++NumTailCalls;
01405       isSibCall = true;
01406     }
01407   }
01408 
01409   // Analyze operands of the call, assigning locations to each operand.
01410   SmallVector<CCValAssign, 16> ArgLocs;
01411   ARMCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
01412                  getTargetMachine(), ArgLocs, *DAG.getContext(), Call);
01413   CCInfo.AnalyzeCallOperands(Outs,
01414                              CCAssignFnForNode(CallConv, /* Return*/ false,
01415                                                isVarArg));
01416 
01417   // Get a count of how many bytes are to be pushed on the stack.
01418   unsigned NumBytes = CCInfo.getNextStackOffset();
01419 
01420   // For tail calls, memory operands are available in our caller's stack.
01421   if (isSibCall)
01422     NumBytes = 0;
01423 
01424   // Adjust the stack pointer for the new arguments...
01425   // These operations are automatically eliminated by the prolog/epilog pass
01426   if (!isSibCall)
01427     Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true),
01428                                  dl);
01429 
01430   SDValue StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy());
01431 
01432   RegsToPassVector RegsToPass;
01433   SmallVector<SDValue, 8> MemOpChains;
01434 
01435   // Walk the register/memloc assignments, inserting copies/loads.  In the case
01436   // of tail call optimization, arguments are handled later.
01437   for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size();
01438        i != e;
01439        ++i, ++realArgIdx) {
01440     CCValAssign &VA = ArgLocs[i];
01441     SDValue Arg = OutVals[realArgIdx];
01442     ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags;
01443     bool isByVal = Flags.isByVal();
01444 
01445     // Promote the value if needed.
01446     switch (VA.getLocInfo()) {
01447     default: llvm_unreachable("Unknown loc info!");
01448     case CCValAssign::Full: break;
01449     case CCValAssign::SExt:
01450       Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
01451       break;
01452     case CCValAssign::ZExt:
01453       Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
01454       break;
01455     case CCValAssign::AExt:
01456       Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
01457       break;
01458     case CCValAssign::BCvt:
01459       Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg);
01460       break;
01461     }
01462 
01463     // f64 and v2f64 might be passed in i32 pairs and must be split into pieces
01464     if (VA.needsCustom()) {
01465       if (VA.getLocVT() == MVT::v2f64) {
01466         SDValue Op0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
01467                                   DAG.getConstant(0, MVT::i32));
01468         SDValue Op1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
01469                                   DAG.getConstant(1, MVT::i32));
01470 
01471         PassF64ArgInRegs(dl, DAG, Chain, Op0, RegsToPass,
01472                          VA, ArgLocs[++i], StackPtr, MemOpChains, Flags);
01473 
01474         VA = ArgLocs[++i]; // skip ahead to next loc
01475         if (VA.isRegLoc()) {
01476           PassF64ArgInRegs(dl, DAG, Chain, Op1, RegsToPass,
01477                            VA, ArgLocs[++i], StackPtr, MemOpChains, Flags);
01478         } else {
01479           assert(VA.isMemLoc());
01480 
01481           MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Op1,
01482                                                  dl, DAG, VA, Flags));
01483         }
01484       } else {
01485         PassF64ArgInRegs(dl, DAG, Chain, Arg, RegsToPass, VA, ArgLocs[++i],
01486                          StackPtr, MemOpChains, Flags);
01487       }
01488     } else if (VA.isRegLoc()) {
01489       if (realArgIdx == 0 && Flags.isReturned() && Outs[0].VT == MVT::i32) {
01490         assert(VA.getLocVT() == MVT::i32 &&
01491                "unexpected calling convention register assignment");
01492         assert(!Ins.empty() && Ins[0].VT == MVT::i32 &&
01493                "unexpected use of 'returned'");
01494         isThisReturn = true;
01495       }
01496       RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
01497     } else if (isByVal) {
01498       assert(VA.isMemLoc());
01499       unsigned offset = 0;
01500 
01501       // True if this byval aggregate will be split between registers
01502       // and memory.
01503       unsigned ByValArgsCount = CCInfo.getInRegsParamsCount();
01504       unsigned CurByValIdx = CCInfo.getInRegsParamsProceed();
01505 
01506       if (CurByValIdx < ByValArgsCount) {
01507 
01508         unsigned RegBegin, RegEnd;
01509         CCInfo.getInRegsParamInfo(CurByValIdx, RegBegin, RegEnd);
01510 
01511         EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
01512         unsigned int i, j;
01513         for (i = 0, j = RegBegin; j < RegEnd; i++, j++) {
01514           SDValue Const = DAG.getConstant(4*i, MVT::i32);
01515           SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);
01516           SDValue Load = DAG.getLoad(PtrVT, dl, Chain, AddArg,
01517                                      MachinePointerInfo(),
01518                                      false, false, false,
01519                                      DAG.InferPtrAlignment(AddArg));
01520           MemOpChains.push_back(Load.getValue(1));
01521           RegsToPass.push_back(std::make_pair(j, Load));
01522         }
01523 
01524         // If parameter size outsides register area, "offset" value
01525         // helps us to calculate stack slot for remained part properly.
01526         offset = RegEnd - RegBegin;
01527 
01528         CCInfo.nextInRegsParam();
01529       }
01530 
01531       if (Flags.getByValSize() > 4*offset) {
01532         unsigned LocMemOffset = VA.getLocMemOffset();
01533         SDValue StkPtrOff = DAG.getIntPtrConstant(LocMemOffset);
01534         SDValue Dst = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr,
01535                                   StkPtrOff);
01536         SDValue SrcOffset = DAG.getIntPtrConstant(4*offset);
01537         SDValue Src = DAG.getNode(ISD::ADD, dl, getPointerTy(), Arg, SrcOffset);
01538         SDValue SizeNode = DAG.getConstant(Flags.getByValSize() - 4*offset,
01539                                            MVT::i32);
01540         SDValue AlignNode = DAG.getConstant(Flags.getByValAlign(), MVT::i32);
01541 
01542         SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue);
01543         SDValue Ops[] = { Chain, Dst, Src, SizeNode, AlignNode};
01544         MemOpChains.push_back(DAG.getNode(ARMISD::COPY_STRUCT_BYVAL, dl, VTs,
01545                                           Ops, array_lengthof(Ops)));
01546       }
01547     } else if (!isSibCall) {
01548       assert(VA.isMemLoc());
01549 
01550       MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Arg,
01551                                              dl, DAG, VA, Flags));
01552     }
01553   }
01554 
01555   if (!MemOpChains.empty())
01556     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
01557                         &MemOpChains[0], MemOpChains.size());
01558 
01559   // Build a sequence of copy-to-reg nodes chained together with token chain
01560   // and flag operands which copy the outgoing args into the appropriate regs.
01561   SDValue InFlag;
01562   // Tail call byval lowering might overwrite argument registers so in case of
01563   // tail call optimization the copies to registers are lowered later.
01564   if (!isTailCall)
01565     for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
01566       Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
01567                                RegsToPass[i].second, InFlag);
01568       InFlag = Chain.getValue(1);
01569     }
01570 
01571   // For tail calls lower the arguments to the 'real' stack slot.
01572   if (isTailCall) {
01573     // Force all the incoming stack arguments to be loaded from the stack
01574     // before any new outgoing arguments are stored to the stack, because the
01575     // outgoing stack slots may alias the incoming argument stack slots, and
01576     // the alias isn't otherwise explicit. This is slightly more conservative
01577     // than necessary, because it means that each store effectively depends
01578     // on every argument instead of just those arguments it would clobber.
01579 
01580     // Do not flag preceding copytoreg stuff together with the following stuff.
01581     InFlag = SDValue();
01582     for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
01583       Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
01584                                RegsToPass[i].second, InFlag);
01585       InFlag = Chain.getValue(1);
01586     }
01587     InFlag = SDValue();
01588   }
01589 
01590   // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
01591   // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
01592   // node so that legalize doesn't hack it.
01593   bool isDirect = false;
01594   bool isARMFunc = false;
01595   bool isLocalARMFunc = false;
01596   ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
01597 
01598   if (EnableARMLongCalls) {
01599     assert (getTargetMachine().getRelocationModel() == Reloc::Static
01600             && "long-calls with non-static relocation model!");
01601     // Handle a global address or an external symbol. If it's not one of
01602     // those, the target's already in a register, so we don't need to do
01603     // anything extra.
01604     if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
01605       const GlobalValue *GV = G->getGlobal();
01606       // Create a constant pool entry for the callee address
01607       unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
01608       ARMConstantPoolValue *CPV =
01609         ARMConstantPoolConstant::Create(GV, ARMPCLabelIndex, ARMCP::CPValue, 0);
01610 
01611       // Get the address of the callee into a register
01612       SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4);
01613       CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
01614       Callee = DAG.getLoad(getPointerTy(), dl,
01615                            DAG.getEntryNode(), CPAddr,
01616                            MachinePointerInfo::getConstantPool(),
01617                            false, false, false, 0);
01618     } else if (ExternalSymbolSDNode *S=dyn_cast<ExternalSymbolSDNode>(Callee)) {
01619       const char *Sym = S->getSymbol();
01620 
01621       // Create a constant pool entry for the callee address
01622       unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
01623       ARMConstantPoolValue *CPV =
01624         ARMConstantPoolSymbol::Create(*DAG.getContext(), Sym,
01625                                       ARMPCLabelIndex, 0);
01626       // Get the address of the callee into a register
01627       SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4);
01628       CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
01629       Callee = DAG.getLoad(getPointerTy(), dl,
01630                            DAG.getEntryNode(), CPAddr,
01631                            MachinePointerInfo::getConstantPool(),
01632                            false, false, false, 0);
01633     }
01634   } else if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
01635     const GlobalValue *GV = G->getGlobal();
01636     isDirect = true;
01637     bool isExt = GV->isDeclaration() || GV->isWeakForLinker();
01638     bool isStub = (isExt && Subtarget->isTargetMachO()) &&
01639                    getTargetMachine().getRelocationModel() != Reloc::Static;
01640     isARMFunc = !Subtarget->isThumb() || isStub;
01641     // ARM call to a local ARM function is predicable.
01642     isLocalARMFunc = !Subtarget->isThumb() && (!isExt || !ARMInterworking);
01643     // tBX takes a register source operand.
01644     if (isStub && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) {
01645       assert(Subtarget->isTargetMachO() && "WrapperPIC use on non-MachO?");
01646       Callee = DAG.getNode(ARMISD::WrapperPIC, dl, getPointerTy(),
01647                            DAG.getTargetGlobalAddress(GV, dl, getPointerTy()));
01648     } else {
01649       // On ELF targets for PIC code, direct calls should go through the PLT
01650       unsigned OpFlags = 0;
01651       if (Subtarget->isTargetELF() &&
01652           getTargetMachine().getRelocationModel() == Reloc::PIC_)
01653         OpFlags = ARMII::MO_PLT;
01654       Callee = DAG.getTargetGlobalAddress(GV, dl, getPointerTy(), 0, OpFlags);
01655     }
01656   } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
01657     isDirect = true;
01658     bool isStub = Subtarget->isTargetMachO() &&
01659                   getTargetMachine().getRelocationModel() != Reloc::Static;
01660     isARMFunc = !Subtarget->isThumb() || isStub;
01661     // tBX takes a register source operand.
01662     const char *Sym = S->getSymbol();
01663     if (isARMFunc && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) {
01664       unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
01665       ARMConstantPoolValue *CPV =
01666         ARMConstantPoolSymbol::Create(*DAG.getContext(), Sym,
01667                                       ARMPCLabelIndex, 4);
01668       SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4);
01669       CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
01670       Callee = DAG.getLoad(getPointerTy(), dl,
01671                            DAG.getEntryNode(), CPAddr,
01672                            MachinePointerInfo::getConstantPool(),
01673                            false, false, false, 0);
01674       SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
01675       Callee = DAG.getNode(ARMISD::PIC_ADD, dl,
01676                            getPointerTy(), Callee, PICLabel);
01677     } else {
01678       unsigned OpFlags = 0;
01679       // On ELF targets for PIC code, direct calls should go through the PLT
01680       if (Subtarget->isTargetELF() &&
01681                   getTargetMachine().getRelocationModel() == Reloc::PIC_)
01682         OpFlags = ARMII::MO_PLT;
01683       Callee = DAG.getTargetExternalSymbol(Sym, getPointerTy(), OpFlags);
01684     }
01685   }
01686 
01687   // FIXME: handle tail calls differently.
01688   unsigned CallOpc;
01689   bool HasMinSizeAttr = Subtarget->isMinSize();
01690   if (Subtarget->isThumb()) {
01691     if ((!isDirect || isARMFunc) && !Subtarget->hasV5TOps())
01692       CallOpc = ARMISD::CALL_NOLINK;
01693     else
01694       CallOpc = isARMFunc ? ARMISD::CALL : ARMISD::tCALL;
01695   } else {
01696     if (!isDirect && !Subtarget->hasV5TOps())
01697       CallOpc = ARMISD::CALL_NOLINK;
01698     else if (doesNotRet && isDirect && Subtarget->hasRAS() &&
01699                // Emit regular call when code size is the priority
01700                !HasMinSizeAttr)
01701       // "mov lr, pc; b _foo" to avoid confusing the RSP
01702       CallOpc = ARMISD::CALL_NOLINK;
01703     else
01704       CallOpc = isLocalARMFunc ? ARMISD::CALL_PRED : ARMISD::CALL;
01705   }
01706 
01707   std::vector<SDValue> Ops;
01708   Ops.push_back(Chain);
01709   Ops.push_back(Callee);
01710 
01711   // Add argument registers to the end of the list so that they are known live
01712   // into the call.
01713   for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
01714     Ops.push_back(DAG.getRegister(RegsToPass[i].first,
01715                                   RegsToPass[i].second.getValueType()));
01716 
01717   // Add a register mask operand representing the call-preserved registers.
01718   if (!isTailCall) {
01719     const uint32_t *Mask;
01720     const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo();
01721     const ARMBaseRegisterInfo *ARI = static_cast<const ARMBaseRegisterInfo*>(TRI);
01722     if (isThisReturn) {
01723       // For 'this' returns, use the R0-preserving mask if applicable
01724       Mask = ARI->getThisReturnPreservedMask(CallConv);
01725       if (!Mask) {
01726         // Set isThisReturn to false if the calling convention is not one that
01727         // allows 'returned' to be modeled in this way, so LowerCallResult does
01728         // not try to pass 'this' straight through
01729         isThisReturn = false;
01730         Mask = ARI->getCallPreservedMask(CallConv);
01731       }
01732     } else
01733       Mask = ARI->getCallPreservedMask(CallConv);
01734 
01735     assert(Mask && "Missing call preserved mask for calling convention");
01736     Ops.push_back(DAG.getRegisterMask(Mask));
01737   }
01738 
01739   if (InFlag.getNode())
01740     Ops.push_back(InFlag);
01741 
01742   SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
01743   if (isTailCall)
01744     return DAG.getNode(ARMISD::TC_RETURN, dl, NodeTys, &Ops[0], Ops.size());
01745 
01746   // Returns a chain and a flag for retval copy to use.
01747   Chain = DAG.getNode(CallOpc, dl, NodeTys, &Ops[0], Ops.size());
01748   InFlag = Chain.getValue(1);
01749 
01750   Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true),
01751                              DAG.getIntPtrConstant(0, true), InFlag, dl);
01752   if (!Ins.empty())
01753     InFlag = Chain.getValue(1);
01754 
01755   // Handle result values, copying them out of physregs into vregs that we
01756   // return.
01757   return LowerCallResult(Chain, InFlag, CallConv, isVarArg, Ins, dl, DAG,
01758                          InVals, isThisReturn,
01759                          isThisReturn ? OutVals[0] : SDValue());
01760 }
01761 
01762 /// HandleByVal - Every parameter *after* a byval parameter is passed
01763 /// on the stack.  Remember the next parameter register to allocate,
01764 /// and then confiscate the rest of the parameter registers to insure
01765 /// this.
01766 void
01767 ARMTargetLowering::HandleByVal(
01768     CCState *State, unsigned &size, unsigned Align) const {
01769   unsigned reg = State->AllocateReg(GPRArgRegs, 4);
01770   assert((State->getCallOrPrologue() == Prologue ||
01771           State->getCallOrPrologue() == Call) &&
01772          "unhandled ParmContext");
01773 
01774   if ((ARM::R0 <= reg) && (reg <= ARM::R3)) {
01775     if (Subtarget->isAAPCS_ABI() && Align > 4) {
01776       unsigned AlignInRegs = Align / 4;
01777       unsigned Waste = (ARM::R4 - reg) % AlignInRegs;
01778       for (unsigned i = 0; i < Waste; ++i)
01779         reg = State->AllocateReg(GPRArgRegs, 4);
01780     }
01781     if (reg != 0) {
01782       unsigned excess = 4 * (ARM::R4 - reg);
01783 
01784       // Special case when NSAA != SP and parameter size greater than size of
01785       // all remained GPR regs. In that case we can't split parameter, we must
01786       // send it to stack. We also must set NCRN to R4, so waste all
01787       // remained registers.
01788       const unsigned NSAAOffset = State->getNextStackOffset();
01789       if (Subtarget->isAAPCS_ABI() && NSAAOffset != 0 && size > excess) {
01790         while (State->AllocateReg(GPRArgRegs, 4))
01791           ;
01792         return;
01793       }
01794 
01795       // First register for byval parameter is the first register that wasn't
01796       // allocated before this method call, so it would be "reg".
01797       // If parameter is small enough to be saved in range [reg, r4), then
01798       // the end (first after last) register would be reg + param-size-in-regs,
01799       // else parameter would be splitted between registers and stack,
01800       // end register would be r4 in this case.
01801       unsigned ByValRegBegin = reg;
01802       unsigned ByValRegEnd = (size < excess) ? reg + size/4 : (unsigned)ARM::R4;
01803       State->addInRegsParamInfo(ByValRegBegin, ByValRegEnd);
01804       // Note, first register is allocated in the beginning of function already,
01805       // allocate remained amount of registers we need.
01806       for (unsigned i = reg+1; i != ByValRegEnd; ++i)
01807         State->AllocateReg(GPRArgRegs, 4);
01808       // A byval parameter that is split between registers and memory needs its
01809       // size truncated here.
01810       // In the case where the entire structure fits in registers, we set the
01811       // size in memory to zero.
01812       if (size < excess)
01813         size = 0;
01814       else
01815         size -= excess;
01816     }
01817   }
01818 }
01819 
01820 /// MatchingStackOffset - Return true if the given stack call argument is
01821 /// already available in the same position (relatively) of the caller's
01822 /// incoming argument stack.
01823 static
01824 bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags,
01825                          MachineFrameInfo *MFI, const MachineRegisterInfo *MRI,
01826                          const TargetInstrInfo *TII) {
01827   unsigned Bytes = Arg.getValueType().getSizeInBits() / 8;
01828   int FI = INT_MAX;
01829   if (Arg.getOpcode() == ISD::CopyFromReg) {
01830     unsigned VR = cast<RegisterSDNode>(Arg.getOperand(1))->getReg();
01831     if (!TargetRegisterInfo::isVirtualRegister(VR))
01832       return false;
01833     MachineInstr *Def = MRI->getVRegDef(VR);
01834     if (!Def)
01835       return false;
01836     if (!Flags.isByVal()) {
01837       if (!TII->isLoadFromStackSlot(Def, FI))
01838         return false;
01839     } else {
01840       return false;
01841     }
01842   } else if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Arg)) {
01843     if (Flags.isByVal())
01844       // ByVal argument is passed in as a pointer but it's now being
01845       // dereferenced. e.g.
01846       // define @foo(%struct.X* %A) {
01847       //   tail call @bar(%struct.X* byval %A)
01848       // }
01849       return false;
01850     SDValue Ptr = Ld->getBasePtr();
01851     FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(Ptr);
01852     if (!FINode)
01853       return false;
01854     FI = FINode->getIndex();
01855   } else
01856     return false;
01857 
01858   assert(FI != INT_MAX);
01859   if (!MFI->isFixedObjectIndex(FI))
01860     return false;
01861   return Offset == MFI->getObjectOffset(FI) && Bytes == MFI->getObjectSize(FI);
01862 }
01863 
01864 /// IsEligibleForTailCallOptimization - Check whether the call is eligible
01865 /// for tail call optimization. Targets which want to do tail call
01866 /// optimization should implement this function.
01867 bool
01868 ARMTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
01869                                                      CallingConv::ID CalleeCC,
01870                                                      bool isVarArg,
01871                                                      bool isCalleeStructRet,
01872                                                      bool isCallerStructRet,
01873                                     const SmallVectorImpl<ISD::OutputArg> &Outs,
01874                                     const SmallVectorImpl<SDValue> &OutVals,
01875                                     const SmallVectorImpl<ISD::InputArg> &Ins,
01876                                                      SelectionDAG& DAG) const {
01877   const Function *CallerF = DAG.getMachineFunction().getFunction();
01878   CallingConv::ID CallerCC = CallerF->getCallingConv();
01879   bool CCMatch = CallerCC == CalleeCC;
01880 
01881   // Look for obvious safe cases to perform tail call optimization that do not
01882   // require ABI changes. This is what gcc calls sibcall.
01883 
01884   // Do not sibcall optimize vararg calls unless the call site is not passing
01885   // any arguments.
01886   if (isVarArg && !Outs.empty())
01887     return false;
01888 
01889   // Exception-handling functions need a special set of instructions to indicate
01890   // a return to the hardware. Tail-calling another function would probably
01891   // break this.
01892   if (CallerF->hasFnAttribute("interrupt"))
01893     return false;
01894 
01895   // Also avoid sibcall optimization if either caller or callee uses struct
01896   // return semantics.
01897   if (isCalleeStructRet || isCallerStructRet)
01898     return false;
01899 
01900   // FIXME: Completely disable sibcall for Thumb1 since Thumb1RegisterInfo::
01901   // emitEpilogue is not ready for them. Thumb tail calls also use t2B, as
01902   // the Thumb1 16-bit unconditional branch doesn't have sufficient relocation
01903   // support in the assembler and linker to be used. This would need to be
01904   // fixed to fully support tail calls in Thumb1.
01905   //
01906   // Doing this is tricky, since the LDM/POP instruction on Thumb doesn't take
01907   // LR.  This means if we need to reload LR, it takes an extra instructions,
01908   // which outweighs the value of the tail call; but here we don't know yet
01909   // whether LR is going to be used.  Probably the right approach is to
01910   // generate the tail call here and turn it back into CALL/RET in
01911   // emitEpilogue if LR is used.
01912 
01913   // Thumb1 PIC calls to external symbols use BX, so they can be tail calls,
01914   // but we need to make sure there are enough registers; the only valid
01915   // registers are the 4 used for parameters.  We don't currently do this
01916   // case.
01917   if (Subtarget->isThumb1Only())
01918     return false;
01919 
01920   // If the calling conventions do not match, then we'd better make sure the
01921   // results are returned in the same way as what the caller expects.
01922   if (!CCMatch) {
01923     SmallVector<CCValAssign, 16> RVLocs1;
01924     ARMCCState CCInfo1(CalleeCC, false, DAG.getMachineFunction(),
01925                        getTargetMachine(), RVLocs1, *DAG.getContext(), Call);
01926     CCInfo1.AnalyzeCallResult(Ins, CCAssignFnForNode(CalleeCC, true, isVarArg));
01927 
01928     SmallVector<CCValAssign, 16> RVLocs2;
01929     ARMCCState CCInfo2(CallerCC, false, DAG.getMachineFunction(),
01930                        getTargetMachine(), RVLocs2, *DAG.getContext(), Call);
01931     CCInfo2.AnalyzeCallResult(Ins, CCAssignFnForNode(CallerCC, true, isVarArg));
01932 
01933     if (RVLocs1.size() != RVLocs2.size())
01934       return false;
01935     for (unsigned i = 0, e = RVLocs1.size(); i != e; ++i) {
01936       if (RVLocs1[i].isRegLoc() != RVLocs2[i].isRegLoc())
01937         return false;
01938       if (RVLocs1[i].getLocInfo() != RVLocs2[i].getLocInfo())
01939         return false;
01940       if (RVLocs1[i].isRegLoc()) {
01941         if (RVLocs1[i].getLocReg() != RVLocs2[i].getLocReg())
01942           return false;
01943       } else {
01944         if (RVLocs1[i].getLocMemOffset() != RVLocs2[i].getLocMemOffset())
01945           return false;
01946       }
01947     }
01948   }
01949 
01950   // If Caller's vararg or byval argument has been split between registers and
01951   // stack, do not perform tail call, since part of the argument is in caller's
01952   // local frame.
01953   const ARMFunctionInfo *AFI_Caller = DAG.getMachineFunction().
01954                                       getInfo<ARMFunctionInfo>();
01955   if (AFI_Caller->getArgRegsSaveSize())
01956     return false;
01957 
01958   // If the callee takes no arguments then go on to check the results of the
01959   // call.
01960   if (!Outs.empty()) {
01961     // Check if stack adjustment is needed. For now, do not do this if any
01962     // argument is passed on the stack.
01963     SmallVector<CCValAssign, 16> ArgLocs;
01964     ARMCCState CCInfo(CalleeCC, isVarArg, DAG.getMachineFunction(),
01965                       getTargetMachine(), ArgLocs, *DAG.getContext(), Call);
01966     CCInfo.AnalyzeCallOperands(Outs,
01967                                CCAssignFnForNode(CalleeCC, false, isVarArg));
01968     if (CCInfo.getNextStackOffset()) {
01969       MachineFunction &MF = DAG.getMachineFunction();
01970 
01971       // Check if the arguments are already laid out in the right way as
01972       // the caller's fixed stack objects.
01973       MachineFrameInfo *MFI = MF.getFrameInfo();
01974       const MachineRegisterInfo *MRI = &MF.getRegInfo();
01975       const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
01976       for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size();
01977            i != e;
01978            ++i, ++realArgIdx) {
01979         CCValAssign &VA = ArgLocs[i];
01980         EVT RegVT = VA.getLocVT();
01981         SDValue Arg = OutVals[realArgIdx];
01982         ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags;
01983         if (VA.getLocInfo() == CCValAssign::Indirect)
01984           return false;
01985         if (VA.needsCustom()) {
01986           // f64 and vector types are split into multiple registers or
01987           // register/stack-slot combinations.  The types will not match
01988           // the registers; give up on memory f64 refs until we figure
01989           // out what to do about this.
01990           if (!VA.isRegLoc())
01991             return false;
01992           if (!ArgLocs[++i].isRegLoc())
01993             return false;
01994           if (RegVT == MVT::v2f64) {
01995             if (!ArgLocs[++i].isRegLoc())
01996               return false;
01997             if (!ArgLocs[++i].isRegLoc())
01998               return false;
01999           }
02000         } else if (!VA.isRegLoc()) {
02001           if (!MatchingStackOffset(Arg, VA.getLocMemOffset(), Flags,
02002                                    MFI, MRI, TII))
02003             return false;
02004         }
02005       }
02006     }
02007   }
02008 
02009   return true;
02010 }
02011 
02012 bool
02013 ARMTargetLowering::CanLowerReturn(CallingConv::ID CallConv,
02014                                   MachineFunction &MF, bool isVarArg,
02015                                   const SmallVectorImpl<ISD::OutputArg> &Outs,
02016                                   LLVMContext &Context) const {
02017   SmallVector<CCValAssign, 16> RVLocs;
02018   CCState CCInfo(CallConv, isVarArg, MF, getTargetMachine(), RVLocs, Context);
02019   return CCInfo.CheckReturn(Outs, CCAssignFnForNode(CallConv, /*Return=*/true,
02020                                                     isVarArg));
02021 }
02022 
02023 static SDValue LowerInterruptReturn(SmallVectorImpl<SDValue> &RetOps,
02024                                     SDLoc DL, SelectionDAG &DAG) {
02025   const MachineFunction &MF = DAG.getMachineFunction();
02026   const Function *F = MF.getFunction();
02027 
02028   StringRef IntKind = F->getFnAttribute("interrupt").getValueAsString();
02029 
02030   // See ARM ARM v7 B1.8.3. On exception entry LR is set to a possibly offset
02031   // version of the "preferred return address". These offsets affect the return
02032   // instruction if this is a return from PL1 without hypervisor extensions.
02033   //    IRQ/FIQ: +4     "subs pc, lr, #4"
02034   //    SWI:     0      "subs pc, lr, #0"
02035   //    ABORT:   +4     "subs pc, lr, #4"
02036   //    UNDEF:   +4/+2  "subs pc, lr, #0"
02037   // UNDEF varies depending on where the exception came from ARM or Thumb
02038   // mode. Alongside GCC, we throw our hands up in disgust and pretend it's 0.
02039 
02040   int64_t LROffset;
02041   if (IntKind == "" || IntKind == "IRQ" || IntKind == "FIQ" ||
02042       IntKind == "ABORT")
02043     LROffset = 4;
02044   else if (IntKind == "SWI" || IntKind == "UNDEF")
02045     LROffset = 0;
02046   else
02047     report_fatal_error("Unsupported interrupt attribute. If present, value "
02048                        "must be one of: IRQ, FIQ, SWI, ABORT or UNDEF");
02049 
02050   RetOps.insert(RetOps.begin() + 1, DAG.getConstant(LROffset, MVT::i32, false));
02051 
02052   return DAG.getNode(ARMISD::INTRET_FLAG, DL, MVT::Other,
02053                      RetOps.data(), RetOps.size());
02054 }
02055 
02056 SDValue
02057 ARMTargetLowering::LowerReturn(SDValue Chain,
02058                                CallingConv::ID CallConv, bool isVarArg,
02059                                const SmallVectorImpl<ISD::OutputArg> &Outs,
02060                                const SmallVectorImpl<SDValue> &OutVals,
02061                                SDLoc dl, SelectionDAG &DAG) const {
02062 
02063   // CCValAssign - represent the assignment of the return value to a location.
02064   SmallVector<CCValAssign, 16> RVLocs;
02065 
02066   // CCState - Info about the registers and stack slots.
02067   ARMCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
02068                     getTargetMachine(), RVLocs, *DAG.getContext(), Call);
02069 
02070   // Analyze outgoing return values.
02071   CCInfo.AnalyzeReturn(Outs, CCAssignFnForNode(CallConv, /* Return */ true,
02072                                                isVarArg));
02073 
02074   SDValue Flag;
02075   SmallVector<SDValue, 4> RetOps;
02076   RetOps.push_back(Chain); // Operand #0 = Chain (updated below)
02077 
02078   // Copy the result values into the output registers.
02079   for (unsigned i = 0, realRVLocIdx = 0;
02080        i != RVLocs.size();
02081        ++i, ++realRVLocIdx) {
02082     CCValAssign &VA = RVLocs[i];
02083     assert(VA.isRegLoc() && "Can only return in registers!");
02084 
02085     SDValue Arg = OutVals[realRVLocIdx];
02086 
02087     switch (VA.getLocInfo()) {
02088     default: llvm_unreachable("Unknown loc info!");
02089     case CCValAssign::Full: break;
02090     case CCValAssign::BCvt:
02091       Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg);
02092       break;
02093     }
02094 
02095     if (VA.needsCustom()) {
02096       if (VA.getLocVT() == MVT::v2f64) {
02097         // Extract the first half and return it in two registers.
02098         SDValue Half = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
02099                                    DAG.getConstant(0, MVT::i32));
02100         SDValue HalfGPRs = DAG.getNode(ARMISD::VMOVRRD, dl,
02101                                        DAG.getVTList(MVT::i32, MVT::i32), Half);
02102 
02103         Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), HalfGPRs, Flag);
02104         Flag = Chain.getValue(1);
02105         RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
02106         VA = RVLocs[++i]; // skip ahead to next loc
02107         Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
02108                                  HalfGPRs.getValue(1), Flag);
02109         Flag = Chain.getValue(1);
02110         RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
02111         VA = RVLocs[++i]; // skip ahead to next loc
02112 
02113         // Extract the 2nd half and fall through to handle it as an f64 value.
02114         Arg = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
02115                           DAG.getConstant(1, MVT::i32));
02116       }
02117       // Legalize ret f64 -> ret 2 x i32.  We always have fmrrd if f64 is
02118       // available.
02119       SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl,
02120                                   DAG.getVTList(MVT::i32, MVT::i32), &Arg, 1);
02121       Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), fmrrd, Flag);
02122       Flag = Chain.getValue(1);
02123       RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
02124       VA = RVLocs[++i]; // skip ahead to next loc
02125       Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), fmrrd.getValue(1),
02126                                Flag);
02127     } else
02128       Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Flag);
02129 
02130     // Guarantee that all emitted copies are
02131     // stuck together, avoiding something bad.
02132     Flag = Chain.getValue(1);
02133     RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
02134   }
02135 
02136   // Update chain and glue.
02137   RetOps[0] = Chain;
02138   if (Flag.getNode())
02139     RetOps.push_back(Flag);
02140 
02141   // CPUs which aren't M-class use a special sequence to return from
02142   // exceptions (roughly, any instruction setting pc and cpsr simultaneously,
02143   // though we use "subs pc, lr, #N").
02144   //
02145   // M-class CPUs actually use a normal return sequence with a special
02146   // (hardware-provided) value in LR, so the normal code path works.
02147   if (DAG.getMachineFunction().getFunction()->hasFnAttribute("interrupt") &&
02148       !Subtarget->isMClass()) {
02149     if (Subtarget->isThumb1Only())
02150       report_fatal_error("interrupt attribute is not supported in Thumb1");
02151     return LowerInterruptReturn(RetOps, dl, DAG);
02152   }
02153 
02154   return DAG.getNode(ARMISD::RET_FLAG, dl, MVT::Other,
02155                      RetOps.data(), RetOps.size());
02156 }
02157 
02158 bool ARMTargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const {
02159   if (N->getNumValues() != 1)
02160     return false;
02161   if (!N->hasNUsesOfValue(1, 0))
02162     return false;
02163 
02164   SDValue TCChain = Chain;
02165   SDNode *Copy = *N->use_begin();
02166   if (Copy->getOpcode() == ISD::CopyToReg) {
02167     // If the copy has a glue operand, we conservatively assume it isn't safe to
02168     // perform a tail call.
02169     if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue)
02170       return false;
02171     TCChain = Copy->getOperand(0);
02172   } else if (Copy->getOpcode() == ARMISD::VMOVRRD) {
02173     SDNode *VMov = Copy;
02174     // f64 returned in a pair of GPRs.
02175     SmallPtrSet<SDNode*, 2> Copies;
02176     for (SDNode::use_iterator UI = VMov->use_begin(), UE = VMov->use_end();
02177          UI != UE; ++UI) {
02178       if (UI->getOpcode() != ISD::CopyToReg)
02179         return false;
02180       Copies.insert(*UI);
02181     }
02182     if (Copies.size() > 2)
02183       return false;
02184 
02185     for (SDNode::use_iterator UI = VMov->use_begin(), UE = VMov->use_end();
02186          UI != UE; ++UI) {
02187       SDValue UseChain = UI->getOperand(0);
02188       if (Copies.count(UseChain.getNode()))
02189         // Second CopyToReg
02190         Copy = *UI;
02191       else
02192         // First CopyToReg
02193         TCChain = UseChain;
02194     }
02195   } else if (Copy->getOpcode() == ISD::BITCAST) {
02196     // f32 returned in a single GPR.
02197     if (!Copy->hasOneUse())
02198       return false;
02199     Copy = *Copy->use_begin();
02200     if (Copy->getOpcode() != ISD::CopyToReg || !Copy->hasNUsesOfValue(1, 0))
02201       return false;
02202     TCChain = Copy->getOperand(0);
02203   } else {
02204     return false;
02205   }
02206 
02207   bool HasRet = false;
02208   for (SDNode::use_iterator UI = Copy->use_begin(), UE = Copy->use_end();
02209        UI != UE; ++UI) {
02210     if (UI->getOpcode() != ARMISD::RET_FLAG &&
02211         UI->getOpcode() != ARMISD::INTRET_FLAG)
02212       return false;
02213     HasRet = true;
02214   }
02215 
02216   if (!HasRet)
02217     return false;
02218 
02219   Chain = TCChain;
02220   return true;
02221 }
02222 
02223 bool ARMTargetLowering::mayBeEmittedAsTailCall(CallInst *CI) const {
02224   if (!Subtarget->supportsTailCall())
02225     return false;
02226 
02227   if (!CI->isTailCall() || getTargetMachine().Options.DisableTailCalls)
02228     return false;
02229 
02230   return !Subtarget->isThumb1Only();
02231 }
02232 
02233 // ConstantPool, JumpTable, GlobalAddress, and ExternalSymbol are lowered as
02234 // their target counterpart wrapped in the ARMISD::Wrapper node. Suppose N is
02235 // one of the above mentioned nodes. It has to be wrapped because otherwise
02236 // Select(N) returns N. So the raw TargetGlobalAddress nodes, etc. can only
02237 // be used to form addressing mode. These wrapped nodes will be selected
02238 // into MOVi.
02239 static SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) {
02240   EVT PtrVT = Op.getValueType();
02241   // FIXME there is no actual debug info here
02242   SDLoc dl(Op);
02243   ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
02244   SDValue Res;
02245   if (CP->isMachineConstantPoolEntry())
02246     Res = DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT,
02247                                     CP->getAlignment());
02248   else
02249     Res = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT,
02250                                     CP->getAlignment());
02251   return DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Res);
02252 }
02253 
02254 unsigned ARMTargetLowering::getJumpTableEncoding() const {
02255   return MachineJumpTableInfo::EK_Inline;
02256 }
02257 
02258 SDValue ARMTargetLowering::LowerBlockAddress(SDValue Op,
02259                                              SelectionDAG &DAG) const {
02260   MachineFunction &MF = DAG.getMachineFunction();
02261   ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
02262   unsigned ARMPCLabelIndex = 0;
02263   SDLoc DL(Op);
02264   EVT PtrVT = getPointerTy();
02265   const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
02266   Reloc::Model RelocM = getTargetMachine().getRelocationModel();
02267   SDValue CPAddr;
02268   if (RelocM == Reloc::Static) {
02269     CPAddr = DAG.getTargetConstantPool(BA, PtrVT, 4);
02270   } else {
02271     unsigned PCAdj = Subtarget->isThumb() ? 4 : 8;
02272     ARMPCLabelIndex = AFI->createPICLabelUId();
02273     ARMConstantPoolValue *CPV =
02274       ARMConstantPoolConstant::Create(BA, ARMPCLabelIndex,
02275                                       ARMCP::CPBlockAddress, PCAdj);
02276     CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
02277   }
02278   CPAddr = DAG.getNode(ARMISD::Wrapper, DL, PtrVT, CPAddr);
02279   SDValue Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), CPAddr,
02280                                MachinePointerInfo::getConstantPool(),
02281                                false, false, false, 0);
02282   if (RelocM == Reloc::Static)
02283     return Result;
02284   SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
02285   return DAG.getNode(ARMISD::PIC_ADD, DL, PtrVT, Result, PICLabel);
02286 }
02287 
02288 // Lower ISD::GlobalTLSAddress using the "general dynamic" model
02289 SDValue
02290 ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA,
02291                                                  SelectionDAG &DAG) const {
02292   SDLoc dl(GA);
02293   EVT PtrVT = getPointerTy();
02294   unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8;
02295   MachineFunction &MF = DAG.getMachineFunction();
02296   ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
02297   unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
02298   ARMConstantPoolValue *CPV =
02299     ARMConstantPoolConstant::Create(GA->getGlobal(), ARMPCLabelIndex,
02300                                     ARMCP::CPValue, PCAdj, ARMCP::TLSGD, true);
02301   SDValue Argument = DAG.getTargetConstantPool(CPV, PtrVT, 4);
02302   Argument = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Argument);
02303   Argument = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Argument,
02304                          MachinePointerInfo::getConstantPool(),
02305                          false, false, false, 0);
02306   SDValue Chain = Argument.getValue(1);
02307 
02308   SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
02309   Argument = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Argument, PICLabel);
02310 
02311   // call __tls_get_addr.
02312   ArgListTy Args;
02313   ArgListEntry Entry;
02314   Entry.Node = Argument;
02315   Entry.Ty = (Type *) Type::getInt32Ty(*DAG.getContext());
02316   Args.push_back(Entry);
02317   // FIXME: is there useful debug info available here?
02318   TargetLowering::CallLoweringInfo CLI(Chain,
02319                 (Type *) Type::getInt32Ty(*DAG.getContext()),
02320                 false, false, false, false,
02321                 0, CallingConv::C, /*isTailCall=*/false,
02322                 /*doesNotRet=*/false, /*isReturnValueUsed=*/true,
02323                 DAG.getExternalSymbol("__tls_get_addr", PtrVT), Args, DAG, dl);
02324   std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
02325   return CallResult.first;
02326 }
02327 
02328 // Lower ISD::GlobalTLSAddress using the "initial exec" or
02329 // "local exec" model.
02330 SDValue
02331 ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA,
02332                                         SelectionDAG &DAG,
02333                                         TLSModel::Model model) const {
02334   const GlobalValue *GV = GA->getGlobal();
02335   SDLoc dl(GA);
02336   SDValue Offset;
02337   SDValue Chain = DAG.getEntryNode();
02338   EVT PtrVT = getPointerTy();
02339   // Get the Thread Pointer
02340   SDValue ThreadPointer = DAG.getNode(ARMISD::THREAD_POINTER, dl, PtrVT);
02341 
02342   if (model == TLSModel::InitialExec) {
02343     MachineFunction &MF = DAG.getMachineFunction();
02344     ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
02345     unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
02346     // Initial exec model.
02347     unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8;
02348     ARMConstantPoolValue *CPV =
02349       ARMConstantPoolConstant::Create(GA->getGlobal(), ARMPCLabelIndex,
02350                                       ARMCP::CPValue, PCAdj, ARMCP::GOTTPOFF,
02351                                       true);
02352     Offset = DAG.getTargetConstantPool(CPV, PtrVT, 4);
02353     Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset);
02354     Offset = DAG.getLoad(PtrVT, dl, Chain, Offset,
02355                          MachinePointerInfo::getConstantPool(),
02356                          false, false, false, 0);
02357     Chain = Offset.getValue(1);
02358 
02359     SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
02360     Offset = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Offset, PICLabel);
02361 
02362     Offset = DAG.getLoad(PtrVT, dl, Chain, Offset,
02363                          MachinePointerInfo::getConstantPool(),
02364                          false, false, false, 0);
02365   } else {
02366     // local exec model
02367     assert(model == TLSModel::LocalExec);
02368     ARMConstantPoolValue *CPV =
02369       ARMConstantPoolConstant::Create(GV, ARMCP::TPOFF);
02370     Offset = DAG.getTargetConstantPool(CPV, PtrVT, 4);
02371     Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset);
02372     Offset = DAG.getLoad(PtrVT, dl, Chain, Offset,
02373                          MachinePointerInfo::getConstantPool(),
02374                          false, false, false, 0);
02375   }
02376 
02377   // The address of the thread local variable is the add of the thread
02378   // pointer with the offset of the variable.
02379   return DAG.getNode(ISD::ADD, dl, PtrVT, ThreadPointer, Offset);
02380 }
02381 
02382 SDValue
02383 ARMTargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
02384   // TODO: implement the "local dynamic" model
02385   assert(Subtarget->isTargetELF() &&
02386          "TLS not implemented for non-ELF targets");
02387   GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
02388 
02389   TLSModel::Model model = getTargetMachine().getTLSModel(GA->getGlobal());
02390 
02391   switch (model) {
02392     case TLSModel::GeneralDynamic:
02393     case TLSModel::LocalDynamic:
02394       return LowerToTLSGeneralDynamicModel(GA, DAG);
02395     case TLSModel::InitialExec:
02396     case TLSModel::LocalExec:
02397       return LowerToTLSExecModels(GA, DAG, model);
02398   }
02399   llvm_unreachable("bogus TLS model");
02400 }
02401 
02402 SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op,
02403                                                  SelectionDAG &DAG) const {
02404   EVT PtrVT = getPointerTy();
02405   SDLoc dl(Op);
02406   const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
02407   if (getTargetMachine().getRelocationModel() == Reloc::PIC_) {
02408     bool UseGOTOFF = GV->hasLocalLinkage() || GV->hasHiddenVisibility();
02409     ARMConstantPoolValue *CPV =
02410       ARMConstantPoolConstant::Create(GV,
02411                                       UseGOTOFF ? ARMCP::GOTOFF : ARMCP::GOT);
02412     SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
02413     CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
02414     SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(),
02415                                  CPAddr,
02416                                  MachinePointerInfo::getConstantPool(),
02417                                  false, false, false, 0);
02418     SDValue Chain = Result.getValue(1);
02419     SDValue GOT = DAG.getGLOBAL_OFFSET_TABLE(PtrVT);
02420     Result = DAG.getNode(ISD::ADD, dl, PtrVT, Result, GOT);
02421     if (!UseGOTOFF)
02422       Result = DAG.getLoad(PtrVT, dl, Chain, Result,
02423                            MachinePointerInfo::getGOT(),
02424                            false, false, false, 0);
02425     return Result;
02426   }
02427 
02428   // If we have T2 ops, we can materialize the address directly via movt/movw
02429   // pair. This is always cheaper.
02430   if (Subtarget->useMovt()) {
02431     ++NumMovwMovt;
02432     // FIXME: Once remat is capable of dealing with instructions with register
02433     // operands, expand this into two nodes.
02434     return DAG.getNode(ARMISD::Wrapper, dl, PtrVT,
02435                        DAG.getTargetGlobalAddress(GV, dl, PtrVT));
02436   } else {
02437     SDValue CPAddr = DAG.getTargetConstantPool(GV, PtrVT, 4);
02438     CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
02439     return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr,
02440                        MachinePointerInfo::getConstantPool(),
02441                        false, false, false, 0);
02442   }
02443 }
02444 
02445 SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op,
02446                                                     SelectionDAG &DAG) const {
02447   EVT PtrVT = getPointerTy();
02448   SDLoc dl(Op);
02449   const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
02450   Reloc::Model RelocM = getTargetMachine().getRelocationModel();
02451 
02452   if (Subtarget->useMovt())
02453     ++NumMovwMovt;
02454 
02455   // FIXME: Once remat is capable of dealing with instructions with register
02456   // operands, expand this into multiple nodes
02457   unsigned Wrapper =
02458       RelocM == Reloc::PIC_ ? ARMISD::WrapperPIC : ARMISD::Wrapper;
02459 
02460   SDValue G = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, ARMII::MO_NONLAZY);
02461   SDValue Result = DAG.getNode(Wrapper, dl, PtrVT, G);
02462 
02463   if (Subtarget->GVIsIndirectSymbol(GV, RelocM))
02464     Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Result,
02465                          MachinePointerInfo::getGOT(), false, false, false, 0);
02466   return Result;
02467 }
02468 
02469 SDValue ARMTargetLowering::LowerGLOBAL_OFFSET_TABLE(SDValue Op,
02470                                                     SelectionDAG &DAG) const {
02471   assert(Subtarget->isTargetELF() &&
02472          "GLOBAL OFFSET TABLE not implemented for non-ELF targets");
02473   MachineFunction &MF = DAG.getMachineFunction();
02474   ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
02475   unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
02476   EVT PtrVT = getPointerTy();
02477   SDLoc dl(Op);
02478   unsigned PCAdj = Subtarget->isThumb() ? 4 : 8;
02479   ARMConstantPoolValue *CPV =
02480     ARMConstantPoolSymbol::Create(*DAG.getContext(), "_GLOBAL_OFFSET_TABLE_",
02481                                   ARMPCLabelIndex, PCAdj);
02482   SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
02483   CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
02484   SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr,
02485                                MachinePointerInfo::getConstantPool(),
02486                                false, false, false, 0);
02487   SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
02488   return DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel);
02489 }
02490 
02491 SDValue
02492 ARMTargetLowering::LowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const {
02493   SDLoc dl(Op);
02494   SDValue Val = DAG.getConstant(0, MVT::i32);
02495   return DAG.getNode(ARMISD::EH_SJLJ_SETJMP, dl,
02496                      DAG.getVTList(MVT::i32, MVT::Other), Op.getOperand(0),
02497                      Op.getOperand(1), Val);
02498 }
02499 
02500 SDValue
02501 ARMTargetLowering::LowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const {
02502   SDLoc dl(Op);
02503   return DAG.getNode(ARMISD::EH_SJLJ_LONGJMP, dl, MVT::Other, Op.getOperand(0),
02504                      Op.getOperand(1), DAG.getConstant(0, MVT::i32));
02505 }
02506 
02507 SDValue
02508 ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG,
02509                                           const ARMSubtarget *Subtarget) const {
02510   unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
02511   SDLoc dl(Op);
02512   switch (IntNo) {
02513   default: return SDValue();    // Don't custom lower most intrinsics.
02514   case Intrinsic::arm_thread_pointer: {
02515     EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
02516     return DAG.getNode(ARMISD::THREAD_POINTER, dl, PtrVT);
02517   }
02518   case Intrinsic::eh_sjlj_lsda: {
02519     MachineFunction &MF = DAG.getMachineFunction();
02520     ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
02521     unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
02522     EVT PtrVT = getPointerTy();
02523     Reloc::Model RelocM = getTargetMachine().getRelocationModel();
02524     SDValue CPAddr;
02525     unsigned PCAdj = (RelocM != Reloc::PIC_)
02526       ? 0 : (Subtarget->isThumb() ? 4 : 8);
02527     ARMConstantPoolValue *CPV =
02528       ARMConstantPoolConstant::Create(MF.getFunction(), ARMPCLabelIndex,
02529                                       ARMCP::CPLSDA, PCAdj);
02530     CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
02531     CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
02532     SDValue Result =
02533       DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr,
02534                   MachinePointerInfo::getConstantPool(),
02535                   false, false, false, 0);
02536 
02537     if (RelocM == Reloc::PIC_) {
02538       SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
02539       Result = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel);
02540     }
02541     return Result;
02542   }
02543   case Intrinsic::arm_neon_vmulls:
02544   case Intrinsic::arm_neon_vmullu: {
02545     unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vmulls)
02546       ? ARMISD::VMULLs : ARMISD::VMULLu;
02547     return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
02548                        Op.getOperand(1), Op.getOperand(2));
02549   }
02550   }
02551 }
02552 
02553 static SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG,
02554                                  const ARMSubtarget *Subtarget) {
02555   // FIXME: handle "fence singlethread" more efficiently.
02556   SDLoc dl(Op);
02557   if (!Subtarget->hasDataBarrier()) {
02558     // Some ARMv6 cpus can support data barriers with an mcr instruction.
02559     // Thumb1 and pre-v6 ARM mode use a libcall instead and should never get
02560     // here.
02561     assert(Subtarget->hasV6Ops() && !Subtarget->isThumb() &&
02562            "Unexpected ISD::ATOMIC_FENCE encountered. Should be libcall!");
02563     return DAG.getNode(ARMISD::MEMBARRIER_MCR, dl, MVT::Other, Op.getOperand(0),
02564                        DAG.getConstant(0, MVT::i32));
02565   }
02566 
02567   ConstantSDNode *OrdN = cast<ConstantSDNode>(Op.getOperand(1));
02568   AtomicOrdering Ord = static_cast<AtomicOrdering>(OrdN->getZExtValue());
02569   unsigned Domain = ARM_MB::ISH;
02570   if (Subtarget->isMClass()) {
02571     // Only a full system barrier exists in the M-class architectures.
02572     Domain = ARM_MB::SY;
02573   } else if (Subtarget->isSwift() && Ord == Release) {
02574     // Swift happens to implement ISHST barriers in a way that's compatible with
02575     // Release semantics but weaker than ISH so we'd be fools not to use
02576     // it. Beware: other processors probably don't!
02577     Domain = ARM_MB::ISHST;
02578   }
02579 
02580   return DAG.getNode(ISD::INTRINSIC_VOID, dl, MVT::Other, Op.getOperand(0),
02581                      DAG.getConstant(Intrinsic::arm_dmb, MVT::i32),
02582                      DAG.getConstant(Domain, MVT::i32));
02583 }
02584 
02585 static SDValue LowerPREFETCH(SDValue Op, SelectionDAG &DAG,
02586                              const ARMSubtarget *Subtarget) {
02587   // ARM pre v5TE and Thumb1 does not have preload instructions.
02588   if (!(Subtarget->isThumb2() ||
02589         (!Subtarget->isThumb1Only() && Subtarget->hasV5TEOps())))
02590     // Just preserve the chain.
02591     return Op.getOperand(0);
02592 
02593   SDLoc dl(Op);
02594   unsigned isRead = ~cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue() & 1;
02595   if (!isRead &&
02596       (!Subtarget->hasV7Ops() || !Subtarget->hasMPExtension()))
02597     // ARMv7 with MP extension has PLDW.
02598     return Op.getOperand(0);
02599 
02600   unsigned isData = cast<ConstantSDNode>(Op.getOperand(4))->getZExtValue();
02601   if (Subtarget->isThumb()) {
02602     // Invert the bits.
02603     isRead = ~isRead & 1;
02604     isData = ~isData & 1;
02605   }
02606 
02607   return DAG.getNode(ARMISD::PRELOAD, dl, MVT::Other, Op.getOperand(0),
02608                      Op.getOperand(1), DAG.getConstant(isRead, MVT::i32),
02609                      DAG.getConstant(isData, MVT::i32));
02610 }
02611 
02612 static SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) {
02613   MachineFunction &MF = DAG.getMachineFunction();
02614   ARMFunctionInfo *FuncInfo = MF.getInfo<ARMFunctionInfo>();
02615 
02616   // vastart just stores the address of the VarArgsFrameIndex slot into the
02617   // memory location argument.
02618   SDLoc dl(Op);
02619   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
02620   SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
02621   const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
02622   return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1),
02623                       MachinePointerInfo(SV), false, false, 0);
02624 }
02625 
02626 SDValue
02627 ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA,
02628                                         SDValue &Root, SelectionDAG &DAG,
02629                                         SDLoc dl) const {
02630   MachineFunction &MF = DAG.getMachineFunction();
02631   ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
02632 
02633   const TargetRegisterClass *RC;
02634   if (AFI->isThumb1OnlyFunction())
02635     RC = &ARM::tGPRRegClass;
02636   else
02637     RC = &ARM::GPRRegClass;
02638 
02639   // Transform the arguments stored in physical registers into virtual ones.
02640   unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
02641   SDValue ArgValue = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32);
02642 
02643   SDValue ArgValue2;
02644   if (NextVA.isMemLoc()) {
02645     MachineFrameInfo *MFI = MF.getFrameInfo();
02646     int FI = MFI->CreateFixedObject(4, NextVA.getLocMemOffset(), true);
02647 
02648     // Create load node to retrieve arguments from the stack.
02649     SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
02650     ArgValue2 = DAG.getLoad(MVT::i32, dl, Root, FIN,
02651                             MachinePointerInfo::getFixedStack(FI),
02652                             false, false, false, 0);
02653   } else {
02654     Reg = MF.addLiveIn(NextVA.getLocReg(), RC);
02655     ArgValue2 = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32);
02656   }
02657 
02658   return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, ArgValue, ArgValue2);
02659 }
02660 
02661 void
02662 ARMTargetLowering::computeRegArea(CCState &CCInfo, MachineFunction &MF,
02663                                   unsigned InRegsParamRecordIdx,
02664                                   unsigned ArgSize,
02665                                   unsigned &ArgRegsSize,
02666                                   unsigned &ArgRegsSaveSize)
02667   const {
02668   unsigned NumGPRs;
02669   if (InRegsParamRecordIdx < CCInfo.getInRegsParamsCount()) {
02670     unsigned RBegin, REnd;
02671     CCInfo.getInRegsParamInfo(InRegsParamRecordIdx, RBegin, REnd);
02672     NumGPRs = REnd - RBegin;
02673   } else {
02674     unsigned int firstUnalloced;
02675     firstUnalloced = CCInfo.getFirstUnallocated(GPRArgRegs,
02676                                                 sizeof(GPRArgRegs) /
02677                                                 sizeof(GPRArgRegs[0]));
02678     NumGPRs = (firstUnalloced <= 3) ? (4 - firstUnalloced) : 0;
02679   }
02680 
02681   unsigned Align = MF.getTarget().getFrameLowering()->getStackAlignment();
02682   ArgRegsSize = NumGPRs * 4;
02683 
02684   // If parameter is split between stack and GPRs...
02685   if (NumGPRs && Align > 4 &&
02686       (ArgRegsSize < ArgSize ||
02687         InRegsParamRecordIdx >= CCInfo.getInRegsParamsCount())) {
02688     // Add padding for part of param recovered from GPRs.  For example,
02689     // if Align == 8, its last byte must be at address K*8 - 1.
02690     // We need to do it, since remained (stack) part of parameter has
02691     // stack alignment, and we need to "attach" "GPRs head" without gaps
02692     // to it:
02693     // Stack:
02694     // |---- 8 bytes block ----| |---- 8 bytes block ----| |---- 8 bytes...
02695     // [ [padding] [GPRs head] ] [        Tail passed via stack       ....
02696     //
02697     ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
02698     unsigned Padding =
02699         OffsetToAlignment(ArgRegsSize + AFI->getArgRegsSaveSize(), Align);
02700     ArgRegsSaveSize = ArgRegsSize + Padding;
02701   } else
02702     // We don't need to extend regs save size for byval parameters if they
02703     // are passed via GPRs only.
02704     ArgRegsSaveSize = ArgRegsSize;
02705 }
02706 
02707 // The remaining GPRs hold either the beginning of variable-argument
02708 // data, or the beginning of an aggregate passed by value (usually
02709 // byval).  Either way, we allocate stack slots adjacent to the data
02710 // provided by our caller, and store the unallocated registers there.
02711 // If this is a variadic function, the va_list pointer will begin with
02712 // these values; otherwise, this reassembles a (byval) structure that
02713 // was split between registers and memory.
02714 // Return: The frame index registers were stored into.
02715 int
02716 ARMTargetLowering::StoreByValRegs(CCState &CCInfo, SelectionDAG &DAG,
02717                                   SDLoc dl, SDValue &Chain,
02718                                   const Value *OrigArg,
02719                                   unsigned InRegsParamRecordIdx,
02720                                   unsigned OffsetFromOrigArg,
02721                                   unsigned ArgOffset,
02722                                   unsigned ArgSize,
02723                                   bool ForceMutable,
02724                                   unsigned ByValStoreOffset,
02725                                   unsigned TotalArgRegsSaveSize) const {
02726 
02727   // Currently, two use-cases possible:
02728   // Case #1. Non-var-args function, and we meet first byval parameter.
02729   //          Setup first unallocated register as first byval register;
02730   //          eat all remained registers
02731   //          (these two actions are performed by HandleByVal method).
02732   //          Then, here, we initialize stack frame with
02733   //          "store-reg" instructions.
02734   // Case #2. Var-args function, that doesn't contain byval parameters.
02735   //          The same: eat all remained unallocated registers,
02736   //          initialize stack frame.
02737 
02738   MachineFunction &MF = DAG.getMachineFunction();
02739   MachineFrameInfo *MFI = MF.getFrameInfo();
02740   ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
02741   unsigned firstRegToSaveIndex, lastRegToSaveIndex;
02742   unsigned RBegin, REnd;
02743   if (InRegsParamRecordIdx < CCInfo.getInRegsParamsCount()) {
02744     CCInfo.getInRegsParamInfo(InRegsParamRecordIdx, RBegin, REnd);
02745     firstRegToSaveIndex = RBegin - ARM::R0;
02746     lastRegToSaveIndex = REnd - ARM::R0;
02747   } else {
02748     firstRegToSaveIndex = CCInfo.getFirstUnallocated
02749       (GPRArgRegs, array_lengthof(GPRArgRegs));
02750     lastRegToSaveIndex = 4;
02751   }
02752 
02753   unsigned ArgRegsSize, ArgRegsSaveSize;
02754   computeRegArea(CCInfo, MF, InRegsParamRecordIdx, ArgSize,
02755                  ArgRegsSize, ArgRegsSaveSize);
02756 
02757   // Store any by-val regs to their spots on the stack so that they may be
02758   // loaded by deferencing the result of formal parameter pointer or va_next.
02759   // Note: once stack area for byval/varargs registers
02760   // was initialized, it can't be initialized again.
02761   if (ArgRegsSaveSize) {
02762     unsigned Padding = ArgRegsSaveSize - ArgRegsSize;
02763 
02764     if (Padding) {
02765       assert(AFI->getStoredByValParamsPadding() == 0 &&
02766              "The only parameter may be padded.");
02767       AFI->setStoredByValParamsPadding(Padding);
02768     }
02769 
02770     int FrameIndex = MFI->CreateFixedObject(ArgRegsSaveSize,
02771                                             Padding +
02772                                               ByValStoreOffset -
02773                                               (int64_t)TotalArgRegsSaveSize,
02774                                             false);
02775     SDValue FIN = DAG.getFrameIndex(FrameIndex, getPointerTy());
02776     if (Padding) {
02777        MFI->CreateFixedObject(Padding,
02778                               ArgOffset + ByValStoreOffset -
02779                                 (int64_t)ArgRegsSaveSize,
02780                               false);
02781     }
02782 
02783     SmallVector<SDValue, 4> MemOps;
02784     for (unsigned i = 0; firstRegToSaveIndex < lastRegToSaveIndex;
02785          ++firstRegToSaveIndex, ++i) {
02786       const TargetRegisterClass *RC;
02787       if (AFI->isThumb1OnlyFunction())
02788         RC = &ARM::tGPRRegClass;
02789       else
02790         RC = &ARM::GPRRegClass;
02791 
02792       unsigned VReg = MF.addLiveIn(GPRArgRegs[firstRegToSaveIndex], RC);
02793       SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32);
02794       SDValue Store =
02795         DAG.getStore(Val.getValue(1), dl, Val, FIN,
02796                      MachinePointerInfo(OrigArg, OffsetFromOrigArg + 4*i),
02797                      false, false, 0);
02798       MemOps.push_back(Store);
02799       FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), FIN,
02800                         DAG.getConstant(4, getPointerTy()));
02801     }
02802 
02803     AFI->setArgRegsSaveSize(ArgRegsSaveSize + AFI->getArgRegsSaveSize());
02804 
02805     if (!MemOps.empty())
02806       Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
02807                           &MemOps[0], MemOps.size());
02808     return FrameIndex;
02809   } else {
02810     if (ArgSize == 0) {
02811       // We cannot allocate a zero-byte object for the first variadic argument,
02812       // so just make up a size.
02813       ArgSize = 4;
02814     }
02815     // This will point to the next argument passed via stack.
02816     return MFI->CreateFixedObject(
02817       ArgSize, ArgOffset, !ForceMutable);
02818   }
02819 }
02820 
02821 // Setup stack frame, the va_list pointer will start from.
02822 void
02823 ARMTargetLowering::VarArgStyleRegisters(CCState &CCInfo, SelectionDAG &DAG,
02824                                         SDLoc dl, SDValue &Chain,
02825                                         unsigned ArgOffset,
02826                                         unsigned TotalArgRegsSaveSize,
02827                                         bool ForceMutable) const {
02828   MachineFunction &MF = DAG.getMachineFunction();
02829   ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
02830 
02831   // Try to store any remaining integer argument regs
02832   // to their spots on the stack so that they may be loaded by deferencing
02833   // the result of va_next.
02834   // If there is no regs to be stored, just point address after last
02835   // argument passed via stack.
02836   int FrameIndex =
02837     StoreByValRegs(CCInfo, DAG, dl, Chain, 0, CCInfo.getInRegsParamsCount(),
02838                    0, ArgOffset, 0, ForceMutable, 0, TotalArgRegsSaveSize);
02839 
02840   AFI->setVarArgsFrameIndex(FrameIndex);
02841 }
02842 
02843 SDValue
02844 ARMTargetLowering::LowerFormalArguments(SDValue Chain,
02845                                         CallingConv::ID CallConv, bool isVarArg,
02846                                         const SmallVectorImpl<ISD::InputArg>
02847                                           &Ins,
02848                                         SDLoc dl, SelectionDAG &DAG,
02849                                         SmallVectorImpl<SDValue> &InVals)
02850                                           const {
02851   MachineFunction &MF = DAG.getMachineFunction();
02852   MachineFrameInfo *MFI = MF.getFrameInfo();
02853 
02854   ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
02855 
02856   // Assign locations to all of the incoming arguments.
02857   SmallVector<CCValAssign, 16> ArgLocs;
02858   ARMCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
02859                     getTargetMachine(), ArgLocs, *DAG.getContext(), Prologue);
02860   CCInfo.AnalyzeFormalArguments(Ins,
02861                                 CCAssignFnForNode(CallConv, /* Return*/ false,
02862                                                   isVarArg));
02863 
02864   SmallVector<SDValue, 16> ArgValues;
02865   int lastInsIndex = -1;
02866   SDValue ArgValue;
02867   Function::const_arg_iterator CurOrigArg = MF.getFunction()->arg_begin();
02868   unsigned CurArgIdx = 0;
02869 
02870   // Initially ArgRegsSaveSize is zero.
02871   // Then we increase this value each time we meet byval parameter.
02872   // We also increase this value in case of varargs function.
02873   AFI->setArgRegsSaveSize(0);
02874 
02875   unsigned ByValStoreOffset = 0;
02876   unsigned TotalArgRegsSaveSize = 0;
02877   unsigned ArgRegsSaveSizeMaxAlign = 4;
02878 
02879   // Calculate the amount of stack space that we need to allocate to store
02880   // byval and variadic arguments that are passed in registers.
02881   // We need to know this before we allocate the first byval or variadic
02882   // argument, as they will be allocated a stack slot below the CFA (Canonical
02883   // Frame Address, the stack pointer at entry to the function).
02884   for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
02885     CCValAssign &VA = ArgLocs[i];
02886     if (VA.isMemLoc()) {
02887       int index = VA.getValNo();
02888       if (index != lastInsIndex) {
02889         ISD::ArgFlagsTy Flags = Ins[index].Flags;
02890         if (Flags.isByVal()) {
02891           unsigned ExtraArgRegsSize;
02892           unsigned ExtraArgRegsSaveSize;
02893           computeRegArea(CCInfo, MF, CCInfo.getInRegsParamsProceed(),
02894                          Flags.getByValSize(),
02895                          ExtraArgRegsSize, ExtraArgRegsSaveSize);
02896 
02897           TotalArgRegsSaveSize += ExtraArgRegsSaveSize;
02898           if (Flags.getByValAlign() > ArgRegsSaveSizeMaxAlign)
02899               ArgRegsSaveSizeMaxAlign = Flags.getByValAlign();
02900           CCInfo.nextInRegsParam();
02901         }
02902         lastInsIndex = index;
02903       }
02904     }
02905   }
02906   CCInfo.rewindByValRegsInfo();
02907   lastInsIndex = -1;
02908   if (isVarArg) {
02909     unsigned ExtraArgRegsSize;
02910     unsigned ExtraArgRegsSaveSize;
02911     computeRegArea(CCInfo, MF, CCInfo.getInRegsParamsCount(), 0,
02912                    ExtraArgRegsSize, ExtraArgRegsSaveSize);
02913     TotalArgRegsSaveSize += ExtraArgRegsSaveSize;
02914   }
02915   // If the arg regs save area contains N-byte aligned values, the
02916   // bottom of it must be at least N-byte aligned.
02917   TotalArgRegsSaveSize = RoundUpToAlignment(TotalArgRegsSaveSize, ArgRegsSaveSizeMaxAlign);
02918   TotalArgRegsSaveSize = std::min(TotalArgRegsSaveSize, 16U);
02919 
02920   for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
02921     CCValAssign &VA = ArgLocs[i];
02922     std::advance(CurOrigArg, Ins[VA.getValNo()].OrigArgIndex - CurArgIdx);
02923     CurArgIdx = Ins[VA.getValNo()].OrigArgIndex;
02924     // Arguments stored in registers.
02925     if (VA.isRegLoc()) {
02926       EVT RegVT = VA.getLocVT();
02927 
02928       if (VA.needsCustom()) {
02929         // f64 and vector types are split up into multiple registers or
02930         // combinations of registers and stack slots.
02931         if (VA.getLocVT() == MVT::v2f64) {
02932           SDValue ArgValue1 = GetF64FormalArgument(VA, ArgLocs[++i],
02933                                                    Chain, DAG, dl);
02934           VA = ArgLocs[++i]; // skip ahead to next loc
02935           SDValue ArgValue2;
02936           if (VA.isMemLoc()) {
02937             int FI = MFI->CreateFixedObject(8, VA.getLocMemOffset(), true);
02938             SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
02939             ArgValue2 = DAG.getLoad(MVT::f64, dl, Chain, FIN,
02940                                     MachinePointerInfo::getFixedStack(FI),
02941                                     false, false, false, 0);
02942           } else {
02943             ArgValue2 = GetF64FormalArgument(VA, ArgLocs[++i],
02944                                              Chain, DAG, dl);
02945           }
02946           ArgValue = DAG.getNode(ISD::UNDEF, dl, MVT::v2f64);
02947           ArgValue = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64,
02948                                  ArgValue, ArgValue1, DAG.getIntPtrConstant(0));
02949           ArgValue = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64,
02950                                  ArgValue, ArgValue2, DAG.getIntPtrConstant(1));
02951         } else
02952           ArgValue = GetF64FormalArgument(VA, ArgLocs[++i], Chain, DAG, dl);
02953 
02954       } else {
02955         const TargetRegisterClass *RC;
02956 
02957         if (RegVT == MVT::f32)
02958           RC = &ARM::SPRRegClass;
02959         else if (RegVT == MVT::f64)
02960           RC = &ARM::DPRRegClass;
02961         else if (RegVT == MVT::v2f64)
02962           RC = &ARM::QPRRegClass;
02963         else if (RegVT == MVT::i32)
02964           RC = AFI->isThumb1OnlyFunction() ?
02965             (const TargetRegisterClass*)&ARM::tGPRRegClass :
02966             (const TargetRegisterClass*)&ARM::GPRRegClass;
02967         else
02968           llvm_unreachable("RegVT not supported by FORMAL_ARGUMENTS Lowering");
02969 
02970         // Transform the arguments in physical registers into virtual ones.
02971         unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
02972         ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT);
02973       }
02974 
02975       // If this is an 8 or 16-bit value, it is really passed promoted
02976       // to 32 bits.  Insert an assert[sz]ext to capture this, then
02977       // truncate to the right size.
02978       switch (VA.getLocInfo()) {
02979       default: llvm_unreachable("Unknown loc info!");
02980       case CCValAssign::Full: break;
02981       case CCValAssign::BCvt:
02982         ArgValue = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), ArgValue);
02983         break;
02984       case CCValAssign::SExt:
02985         ArgValue = DAG.getNode(ISD::AssertSext, dl, RegVT, ArgValue,
02986                                DAG.getValueType(VA.getValVT()));
02987         ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
02988         break;
02989       case CCValAssign::ZExt:
02990         ArgValue = DAG.getNode(ISD::AssertZext, dl, RegVT, ArgValue,
02991                                DAG.getValueType(VA.getValVT()));
02992         ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
02993         break;
02994       }
02995 
02996       InVals.push_back(ArgValue);
02997 
02998     } else { // VA.isRegLoc()
02999 
03000       // sanity check
03001       assert(VA.isMemLoc());
03002       assert(VA.getValVT() != MVT::i64 && "i64 should already be lowered");
03003 
03004       int index = ArgLocs[i].getValNo();
03005 
03006       // Some Ins[] entries become multiple ArgLoc[] entries.
03007       // Process them only once.
03008       if (index != lastInsIndex)
03009         {
03010           ISD::ArgFlagsTy Flags = Ins[index].Flags;
03011           // FIXME: For now, all byval parameter objects are marked mutable.
03012           // This can be changed with more analysis.
03013           // In case of tail call optimization mark all arguments mutable.
03014           // Since they could be overwritten by lowering of arguments in case of
03015           // a tail call.
03016           if (Flags.isByVal()) {
03017             unsigned CurByValIndex = CCInfo.getInRegsParamsProceed();
03018 
03019             ByValStoreOffset = RoundUpToAlignment(ByValStoreOffset, Flags.getByValAlign());
03020             int FrameIndex = StoreByValRegs(
03021                 CCInfo, DAG, dl, Chain, CurOrigArg,
03022                 CurByValIndex,
03023                 Ins[VA.getValNo()].PartOffset,
03024                 VA.getLocMemOffset(),
03025                 Flags.getByValSize(),
03026                 true /*force mutable frames*/,
03027                 ByValStoreOffset,
03028                 TotalArgRegsSaveSize);
03029             ByValStoreOffset += Flags.getByValSize();
03030             ByValStoreOffset = std::min(ByValStoreOffset, 16U);
03031             InVals.push_back(DAG.getFrameIndex(FrameIndex, getPointerTy()));
03032             CCInfo.nextInRegsParam();
03033           } else {
03034             unsigned FIOffset = VA.getLocMemOffset();
03035             int FI = MFI->CreateFixedObject(VA.getLocVT().getSizeInBits()/8,
03036                                             FIOffset, true);
03037 
03038             // Create load nodes to retrieve arguments from the stack.
03039             SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
03040             InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN,
03041                                          MachinePointerInfo::getFixedStack(FI),
03042                                          false, false, false, 0));
03043           }
03044           lastInsIndex = index;
03045         }
03046     }
03047   }
03048 
03049   // varargs
03050   if (isVarArg)
03051     VarArgStyleRegisters(CCInfo, DAG, dl, Chain,
03052                          CCInfo.getNextStackOffset(),
03053                          TotalArgRegsSaveSize);
03054 
03055   AFI->setArgumentStackSize(CCInfo.getNextStackOffset());
03056 
03057   return Chain;
03058 }
03059 
03060 /// isFloatingPointZero - Return true if this is +0.0.
03061 static bool isFloatingPointZero(SDValue Op) {
03062   if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op))
03063     return CFP->getValueAPF().isPosZero();
03064   else if (ISD::isEXTLoad(Op.getNode()) || ISD::isNON_EXTLoad(Op.getNode())) {
03065     // Maybe this has already been legalized into the constant pool?
03066     if (Op.getOperand(1).getOpcode() == ARMISD::Wrapper) {
03067       SDValue WrapperOp = Op.getOperand(1).getOperand(0);
03068       if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(WrapperOp))
03069         if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal()))
03070           return CFP->getValueAPF().isPosZero();
03071     }
03072   }
03073   return false;
03074 }
03075 
03076 /// Returns appropriate ARM CMP (cmp) and corresponding condition code for
03077 /// the given operands.
03078 SDValue
03079 ARMTargetLowering::getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
03080                              SDValue &ARMcc, SelectionDAG &DAG,
03081                              SDLoc dl) const {
03082   if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS.getNode())) {
03083     unsigned C = RHSC->getZExtValue();
03084     if (!isLegalICmpImmediate(C)) {
03085       // Constant does not fit, try adjusting it by one?
03086       switch (CC) {
03087       default: break;
03088       case ISD::SETLT:
03089       case ISD::SETGE:
03090         if (C != 0x80000000 && isLegalICmpImmediate(C-1)) {
03091           CC = (CC == ISD::SETLT) ? ISD::SETLE : ISD::SETGT;
03092           RHS = DAG.getConstant(C-1, MVT::i32);
03093         }
03094         break;
03095       case ISD::SETULT:
03096       case ISD::SETUGE:
03097         if (C != 0 && isLegalICmpImmediate(C-1)) {
03098           CC = (CC == ISD::SETULT) ? ISD::SETULE : ISD::SETUGT;
03099           RHS = DAG.getConstant(C-1, MVT::i32);
03100         }
03101         break;
03102       case ISD::SETLE:
03103       case ISD::SETGT:
03104         if (C != 0x7fffffff && isLegalICmpImmediate(C+1)) {
03105           CC = (CC == ISD::SETLE) ? ISD::SETLT : ISD::SETGE;
03106           RHS = DAG.getConstant(C+1, MVT::i32);
03107         }
03108         break;
03109       case ISD::SETULE:
03110       case ISD::SETUGT:
03111         if (C != 0xffffffff && isLegalICmpImmediate(C+1)) {
03112           CC = (CC == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE;
03113           RHS = DAG.getConstant(C+1, MVT::i32);
03114         }
03115         break;
03116       }
03117     }
03118   }
03119 
03120   ARMCC::CondCodes CondCode = IntCCToARMCC(CC);
03121   ARMISD::NodeType CompareType;
03122   switch (CondCode) {
03123   default:
03124     CompareType = ARMISD::CMP;
03125     break;
03126   case ARMCC::EQ:
03127   case ARMCC::NE:
03128     // Uses only Z Flag
03129     CompareType = ARMISD::CMPZ;
03130     break;
03131   }
03132   ARMcc = DAG.getConstant(CondCode, MVT::i32);
03133   return DAG.getNode(CompareType, dl, MVT::Glue, LHS, RHS);
03134 }
03135 
03136 /// Returns a appropriate VFP CMP (fcmp{s|d}+fmstat) for the given operands.
03137 SDValue
03138 ARMTargetLowering::getVFPCmp(SDValue LHS, SDValue RHS, SelectionDAG &DAG,
03139                              SDLoc dl) const {
03140   SDValue Cmp;
03141   if (!isFloatingPointZero(RHS))
03142     Cmp = DAG.getNode(ARMISD::CMPFP, dl, MVT::Glue, LHS, RHS);
03143   else
03144     Cmp = DAG.getNode(ARMISD::CMPFPw0, dl, MVT::Glue, LHS);
03145   return DAG.getNode(ARMISD::FMSTAT, dl, MVT::Glue, Cmp);
03146 }
03147 
03148 /// duplicateCmp - Glue values can have only one use, so this function
03149 /// duplicates a comparison node.
03150 SDValue
03151 ARMTargetLowering::duplicateCmp(SDValue Cmp, SelectionDAG &DAG) const {
03152   unsigned Opc = Cmp.getOpcode();
03153   SDLoc DL(Cmp);
03154   if (Opc == ARMISD::CMP || Opc == ARMISD::CMPZ)
03155     return DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0),Cmp.getOperand(1));
03156 
03157   assert(Opc == ARMISD::FMSTAT && "unexpected comparison operation");
03158   Cmp = Cmp.getOperand(0);
03159   Opc = Cmp.getOpcode();
03160   if (Opc == ARMISD::CMPFP)
03161     Cmp = DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0),Cmp.getOperand(1));
03162   else {
03163     assert(Opc == ARMISD::CMPFPw0 && "unexpected operand of FMSTAT");
03164     Cmp = DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0));
03165   }
03166   return DAG.getNode(ARMISD::FMSTAT, DL, MVT::Glue, Cmp);
03167 }
03168 
03169 SDValue ARMTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
03170   SDValue Cond = Op.getOperand(0);
03171   SDValue SelectTrue = Op.getOperand(1);
03172   SDValue SelectFalse = Op.getOperand(2);
03173   SDLoc dl(Op);
03174 
03175   // Convert:
03176   //
03177   //   (select (cmov 1, 0, cond), t, f) -> (cmov t, f, cond)
03178   //   (select (cmov 0, 1, cond), t, f) -> (cmov f, t, cond)
03179   //
03180   if (Cond.getOpcode() == ARMISD::CMOV && Cond.hasOneUse()) {
03181     const ConstantSDNode *CMOVTrue =
03182       dyn_cast<ConstantSDNode>(Cond.getOperand(0));
03183     const ConstantSDNode *CMOVFalse =
03184       dyn_cast<ConstantSDNode>(Cond.getOperand(1));
03185 
03186     if (CMOVTrue && CMOVFalse) {
03187       unsigned CMOVTrueVal = CMOVTrue->getZExtValue();
03188       unsigned CMOVFalseVal = CMOVFalse->getZExtValue();
03189 
03190       SDValue True;
03191       SDValue False;
03192       if (CMOVTrueVal == 1 && CMOVFalseVal == 0) {
03193         True = SelectTrue;
03194         False = SelectFalse;
03195       } else if (CMOVTrueVal == 0 && CMOVFalseVal == 1) {
03196         True = SelectFalse;
03197         False = SelectTrue;
03198       }
03199 
03200       if (True.getNode() && False.getNode()) {
03201         EVT VT = Op.getValueType();
03202         SDValue ARMcc = Cond.getOperand(2);
03203         SDValue CCR = Cond.getOperand(3);
03204         SDValue Cmp = duplicateCmp(Cond.getOperand(4), DAG);
03205         assert(True.getValueType() == VT);
03206         return DAG.getNode(ARMISD::CMOV, dl, VT, True, False, ARMcc, CCR, Cmp);
03207       }
03208     }
03209   }
03210 
03211   // ARM's BooleanContents value is UndefinedBooleanContent. Mask out the
03212   // undefined bits before doing a full-word comparison with zero.
03213   Cond = DAG.getNode(ISD::AND, dl, Cond.getValueType(), Cond,
03214                      DAG.getConstant(1, Cond.getValueType()));
03215 
03216   return DAG.getSelectCC(dl, Cond,
03217                          DAG.getConstant(0, Cond.getValueType()),
03218                          SelectTrue, SelectFalse, ISD::SETNE);
03219 }
03220 
03221 static ISD::CondCode getInverseCCForVSEL(ISD::CondCode CC) {
03222   if (CC == ISD::SETNE)
03223     return ISD::SETEQ;
03224   return ISD::getSetCCInverse(CC, true);
03225 }
03226 
03227 static void checkVSELConstraints(ISD::CondCode CC, ARMCC::CondCodes &CondCode,
03228                                  bool &swpCmpOps, bool &swpVselOps) {
03229   // Start by selecting the GE condition code for opcodes that return true for
03230   // 'equality'
03231   if (CC == ISD::SETUGE || CC == ISD::SETOGE || CC == ISD::SETOLE ||
03232       CC == ISD::SETULE)
03233     CondCode = ARMCC::GE;
03234 
03235   // and GT for opcodes that return false for 'equality'.
03236   else if (CC == ISD::SETUGT || CC == ISD::SETOGT || CC == ISD::SETOLT ||
03237            CC == ISD::SETULT)
03238     CondCode = ARMCC::GT;
03239 
03240   // Since we are constrained to GE/GT, if the opcode contains 'less', we need
03241   // to swap the compare operands.
03242   if (CC == ISD::SETOLE || CC == ISD::SETULE || CC == ISD::SETOLT ||
03243       CC == ISD::SETULT)
03244     swpCmpOps = true;
03245 
03246   // Both GT and GE are ordered comparisons, and return false for 'unordered'.
03247   // If we have an unordered opcode, we need to swap the operands to the VSEL
03248   // instruction (effectively negating the condition).
03249   //
03250   // This also has the effect of swapping which one of 'less' or 'greater'
03251   // returns true, so we also swap the compare operands. It also switches
03252   // whether we return true for 'equality', so we compensate by picking the
03253   // opposite condition code to our original choice.
03254   if (CC == ISD::SETULE || CC == ISD::SETULT || CC == ISD::SETUGE ||
03255       CC == ISD::SETUGT) {
03256     swpCmpOps = !swpCmpOps;
03257     swpVselOps = !swpVselOps;
03258     CondCode = CondCode == ARMCC::GT ? ARMCC::GE : ARMCC::GT;
03259   }
03260 
03261   // 'ordered' is 'anything but unordered', so use the VS condition code and
03262   // swap the VSEL operands.
03263   if (CC == ISD::SETO) {
03264     CondCode = ARMCC::VS;
03265     swpVselOps = true;
03266   }
03267 
03268   // 'unordered or not equal' is 'anything but equal', so use the EQ condition
03269   // code and swap the VSEL operands.
03270   if (CC == ISD::SETUNE) {
03271     CondCode = ARMCC::EQ;
03272     swpVselOps = true;
03273   }
03274 }
03275 
03276 SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
03277   EVT VT = Op.getValueType();
03278   SDValue LHS = Op.getOperand(0);
03279   SDValue RHS = Op.getOperand(1);
03280   ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
03281   SDValue TrueVal = Op.getOperand(2);
03282   SDValue FalseVal = Op.getOperand(3);
03283   SDLoc dl(Op);
03284 
03285   if (LHS.getValueType() == MVT::i32) {
03286     // Try to generate VSEL on ARMv8.
03287     // The VSEL instruction can't use all the usual ARM condition
03288     // codes: it only has two bits to select the condition code, so it's
03289     // constrained to use only GE, GT, VS and EQ.
03290     //
03291     // To implement all the various ISD::SETXXX opcodes, we sometimes need to
03292     // swap the operands of the previous compare instruction (effectively
03293     // inverting the compare condition, swapping 'less' and 'greater') and
03294     // sometimes need to swap the operands to the VSEL (which inverts the
03295     // condition in the sense of firing whenever the previous condition didn't)
03296     if (getSubtarget()->hasFPARMv8() && (TrueVal.getValueType() == MVT::f32 ||
03297                                       TrueVal.getValueType() == MVT::f64)) {
03298       ARMCC::CondCodes CondCode = IntCCToARMCC(CC);
03299       if (CondCode == ARMCC::LT || CondCode == ARMCC::LE ||
03300           CondCode == ARMCC::VC || CondCode == ARMCC::NE) {
03301         CC = getInverseCCForVSEL(CC);
03302         std::swap(TrueVal, FalseVal);
03303       }
03304     }
03305 
03306     SDValue ARMcc;
03307     SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
03308     SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
03309     return DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMcc, CCR,
03310                        Cmp);
03311   }
03312 
03313   ARMCC::CondCodes CondCode, CondCode2;
03314   FPCCToARMCC(CC, CondCode, CondCode2);
03315 
03316   // Try to generate VSEL on ARMv8.
03317   if (getSubtarget()->hasFPARMv8() && (TrueVal.getValueType() == MVT::f32 ||
03318                                     TrueVal.getValueType() == MVT::f64)) {
03319     // We can select VMAXNM/VMINNM from a compare followed by a select with the
03320     // same operands, as follows:
03321     //   c = fcmp [ogt, olt, ugt, ult] a, b
03322     //   select c, a, b
03323     // We only do this in unsafe-fp-math, because signed zeros and NaNs are
03324     // handled differently than the original code sequence.
03325     if (getTargetMachine().Options.UnsafeFPMath && LHS == TrueVal &&
03326         RHS == FalseVal) {
03327       if (CC == ISD::SETOGT || CC == ISD::SETUGT)
03328         return DAG.getNode(ARMISD::VMAXNM, dl, VT, TrueVal, FalseVal);
03329       if (CC == ISD::SETOLT || CC == ISD::SETULT)
03330         return DAG.getNode(ARMISD::VMINNM, dl, VT, TrueVal, FalseVal);
03331     }
03332 
03333     bool swpCmpOps = false;
03334     bool swpVselOps = false;
03335     checkVSELConstraints(CC, CondCode, swpCmpOps, swpVselOps);
03336 
03337     if (CondCode == ARMCC::GT || CondCode == ARMCC::GE ||
03338         CondCode == ARMCC::VS || CondCode == ARMCC::EQ) {
03339       if (swpCmpOps)
03340         std::swap(LHS, RHS);
03341       if (swpVselOps)
03342         std::swap(TrueVal, FalseVal);
03343     }
03344   }
03345 
03346   SDValue ARMcc = DAG.getConstant(CondCode, MVT::i32);
03347   SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl);
03348   SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
03349   SDValue Result = DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal,
03350                                ARMcc, CCR, Cmp);
03351   if (CondCode2 != ARMCC::AL) {
03352     SDValue ARMcc2 = DAG.getConstant(CondCode2, MVT::i32);
03353     // FIXME: Needs another CMP because flag can have but one use.
03354     SDValue Cmp2 = getVFPCmp(LHS, RHS, DAG, dl);
03355     Result = DAG.getNode(ARMISD::CMOV, dl, VT,
03356                          Result, TrueVal, ARMcc2, CCR, Cmp2);
03357   }
03358   return Result;
03359 }
03360 
03361 /// canChangeToInt - Given the fp compare operand, return true if it is suitable
03362 /// to morph to an integer compare sequence.
03363 static bool canChangeToInt(SDValue Op, bool &SeenZero,
03364                            const ARMSubtarget *Subtarget) {
03365   SDNode *N = Op.getNode();
03366   if (!N->hasOneUse())
03367     // Otherwise it requires moving the value from fp to integer registers.
03368     return false;
03369   if (!N->getNumValues())
03370     return false;
03371   EVT VT = Op.getValueType();
03372   if (VT != MVT::f32 && !Subtarget->isFPBrccSlow())
03373     // f32 case is generally profitable. f64 case only makes sense when vcmpe +
03374     // vmrs are very slow, e.g. cortex-a8.
03375     return false;
03376 
03377   if (isFloatingPointZero(Op)) {
03378     SeenZero = true;
03379     return true;
03380   }
03381   return ISD::isNormalLoad(N);
03382 }
03383 
03384 static SDValue bitcastf32Toi32(SDValue Op, SelectionDAG &DAG) {
03385   if (isFloatingPointZero(Op))
03386     return DAG.getConstant(0, MVT::i32);
03387 
03388   if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Op))
03389     return DAG.getLoad(MVT::i32, SDLoc(Op),
03390                        Ld->getChain(), Ld->getBasePtr(), Ld->getPointerInfo(),
03391                        Ld->isVolatile(), Ld->isNonTemporal(),
03392                        Ld->isInvariant(), Ld->getAlignment());
03393 
03394   llvm_unreachable("Unknown VFP cmp argument!");
03395 }
03396 
03397 static void expandf64Toi32(SDValue Op, SelectionDAG &DAG,
03398                            SDValue &RetVal1, SDValue &RetVal2) {
03399   if (isFloatingPointZero(Op)) {
03400     RetVal1 = DAG.getConstant(0, MVT::i32);
03401     RetVal2 = DAG.getConstant(0, MVT::i32);
03402     return;
03403   }
03404 
03405   if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Op)) {
03406     SDValue Ptr = Ld->getBasePtr();
03407     RetVal1 = DAG.getLoad(MVT::i32, SDLoc(Op),
03408                           Ld->getChain(), Ptr,
03409                           Ld->getPointerInfo(),
03410                           Ld->isVolatile(), Ld->isNonTemporal(),
03411                           Ld->isInvariant(), Ld->getAlignment());
03412 
03413     EVT PtrType = Ptr.getValueType();
03414     unsigned NewAlign = MinAlign(Ld->getAlignment(), 4);
03415     SDValue NewPtr = DAG.getNode(ISD::ADD, SDLoc(Op),
03416                                  PtrType, Ptr, DAG.getConstant(4, PtrType));
03417     RetVal2 = DAG.getLoad(MVT::i32, SDLoc(Op),
03418                           Ld->getChain(), NewPtr,
03419                           Ld->getPointerInfo().getWithOffset(4),
03420                           Ld->isVolatile(), Ld->isNonTemporal(),
03421                           Ld->isInvariant(), NewAlign);
03422     return;
03423   }
03424 
03425   llvm_unreachable("Unknown VFP cmp argument!");
03426 }
03427 
03428 /// OptimizeVFPBrcond - With -enable-unsafe-fp-math, it's legal to optimize some
03429 /// f32 and even f64 comparisons to integer ones.
03430 SDValue
03431 ARMTargetLowering::OptimizeVFPBrcond(SDValue Op, SelectionDAG &DAG) const {
03432   SDValue Chain = Op.getOperand(0);
03433   ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
03434   SDValue LHS = Op.getOperand(2);
03435   SDValue RHS = Op.getOperand(3);
03436   SDValue Dest = Op.getOperand(4);
03437   SDLoc dl(Op);
03438 
03439   bool LHSSeenZero = false;
03440   bool LHSOk = canChangeToInt(LHS, LHSSeenZero, Subtarget);
03441   bool RHSSeenZero = false;
03442   bool RHSOk = canChangeToInt(RHS, RHSSeenZero, Subtarget);
03443   if (LHSOk && RHSOk && (LHSSeenZero || RHSSeenZero)) {
03444     // If unsafe fp math optimization is enabled and there are no other uses of
03445     // the CMP operands, and the condition code is EQ or NE, we can optimize it
03446     // to an integer comparison.
03447     if (CC == ISD::SETOEQ)
03448       CC = ISD::SETEQ;
03449     else if (CC == ISD::SETUNE)
03450       CC = ISD::SETNE;
03451 
03452     SDValue Mask = DAG.getConstant(0x7fffffff, MVT::i32);
03453     SDValue ARMcc;
03454     if (LHS.getValueType() == MVT::f32) {
03455       LHS = DAG.getNode(ISD::AND, dl, MVT::i32,
03456                         bitcastf32Toi32(LHS, DAG), Mask);
03457       RHS = DAG.getNode(ISD::AND, dl, MVT::i32,
03458                         bitcastf32Toi32(RHS, DAG), Mask);
03459       SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
03460       SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
03461       return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other,
03462                          Chain, Dest, ARMcc, CCR, Cmp);
03463     }
03464 
03465     SDValue LHS1, LHS2;
03466     SDValue RHS1, RHS2;
03467     expandf64Toi32(LHS, DAG, LHS1, LHS2);
03468     expandf64Toi32(RHS, DAG, RHS1, RHS2);
03469     LHS2 = DAG.getNode(ISD::AND, dl, MVT::i32, LHS2, Mask);
03470     RHS2 = DAG.getNode(ISD::AND, dl, MVT::i32, RHS2, Mask);
03471     ARMCC::CondCodes CondCode = IntCCToARMCC(CC);
03472     ARMcc = DAG.getConstant(CondCode, MVT::i32);
03473     SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Glue);
03474     SDValue Ops[] = { Chain, ARMcc, LHS1, LHS2, RHS1, RHS2, Dest };
03475     return DAG.getNode(ARMISD::BCC_i64, dl, VTList, Ops, 7);
03476   }
03477 
03478   return SDValue();
03479 }
03480 
03481 SDValue ARMTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
03482   SDValue Chain = Op.getOperand(0);
03483   ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
03484   SDValue LHS = Op.getOperand(2);
03485   SDValue RHS = Op.getOperand(3);
03486   SDValue Dest = Op.getOperand(4);
03487   SDLoc dl(Op);
03488 
03489   if (LHS.getValueType() == MVT::i32) {
03490     SDValue ARMcc;
03491     SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
03492     SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
03493     return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other,
03494                        Chain, Dest, ARMcc, CCR, Cmp);
03495   }
03496 
03497   assert(LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64);
03498 
03499   if (getTargetMachine().Options.UnsafeFPMath &&
03500       (CC == ISD::SETEQ || CC == ISD::SETOEQ ||
03501        CC == ISD::SETNE || CC == ISD::SETUNE)) {
03502     SDValue Result = OptimizeVFPBrcond(Op, DAG);
03503     if (Result.getNode())
03504       return Result;
03505   }
03506 
03507   ARMCC::CondCodes CondCode, CondCode2;
03508   FPCCToARMCC(CC, CondCode, CondCode2);
03509 
03510   SDValue ARMcc = DAG.getConstant(CondCode, MVT::i32);
03511   SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl);
03512   SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
03513   SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Glue);
03514   SDValue Ops[] = { Chain, Dest, ARMcc, CCR, Cmp };
03515   SDValue Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops, 5);
03516   if (CondCode2 != ARMCC::AL) {
03517     ARMcc = DAG.getConstant(CondCode2, MVT::i32);
03518     SDValue Ops[] = { Res, Dest, ARMcc, CCR, Res.getValue(1) };
03519     Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops, 5);
03520   }
03521   return Res;
03522 }
03523 
03524 SDValue ARMTargetLowering::LowerBR_JT(SDValue Op, SelectionDAG &DAG) const {
03525   SDValue Chain = Op.getOperand(0);
03526   SDValue Table = Op.getOperand(1);
03527   SDValue Index = Op.getOperand(2);
03528   SDLoc dl(Op);
03529 
03530   EVT PTy = getPointerTy();
03531   JumpTableSDNode *JT = cast<JumpTableSDNode>(Table);
03532   ARMFunctionInfo *AFI = DAG.getMachineFunction().getInfo<ARMFunctionInfo>();
03533   SDValue UId = DAG.getConstant(AFI->createJumpTableUId(), PTy);
03534   SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PTy);
03535   Table = DAG.getNode(ARMISD::WrapperJT, dl, MVT::i32, JTI, UId);
03536   Index = DAG.getNode(ISD::MUL, dl, PTy, Index, DAG.getConstant(4, PTy));
03537   SDValue Addr = DAG.getNode(ISD::ADD, dl, PTy, Index, Table);
03538   if (Subtarget->isThumb2()) {
03539     // Thumb2 uses a two-level jump. That is, it jumps into the jump table
03540     // which does another jump to the destination. This also makes it easier
03541     // to translate it to TBB / TBH later.
03542     // FIXME: This might not work if the function is extremely large.
03543     return DAG.getNode(ARMISD::BR2_JT, dl, MVT::Other, Chain,
03544                        Addr, Op.getOperand(2), JTI, UId);
03545   }
03546   if (getTargetMachine().getRelocationModel() == Reloc::PIC_) {
03547     Addr = DAG.getLoad((EVT)MVT::i32, dl, Chain, Addr,
03548                        MachinePointerInfo::getJumpTable(),
03549                        false, false, false, 0);
03550     Chain = Addr.getValue(1);
03551     Addr = DAG.getNode(ISD::ADD, dl, PTy, Addr, Table);
03552     return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI, UId);
03553   } else {
03554     Addr = DAG.getLoad(PTy, dl, Chain, Addr,
03555                        MachinePointerInfo::getJumpTable(),
03556                        false, false, false, 0);
03557     Chain = Addr.getValue(1);
03558     return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI, UId);
03559   }
03560 }
03561 
03562 static SDValue LowerVectorFP_TO_INT(SDValue Op, SelectionDAG &DAG) {
03563   EVT VT = Op.getValueType();
03564   SDLoc dl(Op);
03565 
03566   if (Op.getValueType().getVectorElementType() == MVT::i32) {
03567     if (Op.getOperand(0).getValueType().getVectorElementType() == MVT::f32)
03568       return Op;
03569     return DAG.UnrollVectorOp(Op.getNode());
03570   }
03571 
03572   assert(Op.getOperand(0).getValueType() == MVT::v4f32 &&
03573          "Invalid type for custom lowering!");
03574   if (VT != MVT::v4i16)
03575     return DAG.UnrollVectorOp(Op.getNode());
03576 
03577   Op = DAG.getNode(Op.getOpcode(), dl, MVT::v4i32, Op.getOperand(0));
03578   return DAG.getNode(ISD::TRUNCATE, dl, VT, Op);
03579 }
03580 
03581 static SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) {
03582   EVT VT = Op.getValueType();
03583   if (VT.isVector())
03584     return LowerVectorFP_TO_INT(Op, DAG);
03585 
03586   SDLoc dl(Op);
03587   unsigned Opc;
03588 
03589   switch (Op.getOpcode()) {
03590   default: llvm_unreachable("Invalid opcode!");
03591   case ISD::FP_TO_SINT:
03592     Opc = ARMISD::FTOSI;
03593     break;
03594   case ISD::FP_TO_UINT:
03595     Opc = ARMISD::FTOUI;
03596     break;
03597   }
03598   Op = DAG.getNode(Opc, dl, MVT::f32, Op.getOperand(0));
03599   return DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op);
03600 }
03601 
03602 static SDValue LowerVectorINT_TO_FP(SDValue Op, SelectionDAG &DAG) {
03603   EVT VT = Op.getValueType();
03604   SDLoc dl(Op);
03605 
03606   if (Op.getOperand(0).getValueType().getVectorElementType() == MVT::i32) {
03607     if (VT.getVectorElementType() == MVT::f32)
03608       return Op;
03609     return DAG.UnrollVectorOp(Op.getNode());
03610   }
03611 
03612   assert(Op.getOperand(0).getValueType() == MVT::v4i16 &&
03613          "Invalid type for custom lowering!");
03614   if (VT != MVT::v4f32)
03615     return DAG.UnrollVectorOp(Op.getNode());
03616 
03617   unsigned CastOpc;
03618   unsigned Opc;
03619   switch (Op.getOpcode()) {
03620   default: llvm_unreachable("Invalid opcode!");
03621   case ISD::SINT_TO_FP:
03622     CastOpc = ISD::SIGN_EXTEND;
03623     Opc = ISD::SINT_TO_FP;
03624     break;
03625   case ISD::UINT_TO_FP:
03626     CastOpc = ISD::ZERO_EXTEND;
03627     Opc = ISD::UINT_TO_FP;
03628     break;
03629   }
03630 
03631   Op = DAG.getNode(CastOpc, dl, MVT::v4i32, Op.getOperand(0));
03632   return DAG.getNode(Opc, dl, VT, Op);
03633 }
03634 
03635 static SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) {
03636   EVT VT = Op.getValueType();
03637   if (VT.isVector())
03638     return LowerVectorINT_TO_FP(Op, DAG);
03639 
03640   SDLoc dl(Op);
03641   unsigned Opc;
03642 
03643   switch (Op.getOpcode()) {
03644   default: llvm_unreachable("Invalid opcode!");
03645   case ISD::SINT_TO_FP:
03646     Opc = ARMISD::SITOF;
03647     break;
03648   case ISD::UINT_TO_FP:
03649     Opc = ARMISD::UITOF;
03650     break;
03651   }
03652 
03653   Op = DAG.getNode(ISD::BITCAST, dl, MVT::f32, Op.getOperand(0));
03654   return DAG.getNode(Opc, dl, VT, Op);
03655 }
03656 
03657 SDValue ARMTargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const {
03658   // Implement fcopysign with a fabs and a conditional fneg.
03659   SDValue Tmp0 = Op.getOperand(0);
03660   SDValue Tmp1 = Op.getOperand(1);
03661   SDLoc dl(Op);
03662   EVT VT = Op.getValueType();
03663   EVT SrcVT = Tmp1.getValueType();
03664   bool InGPR = Tmp0.getOpcode() == ISD::BITCAST ||
03665     Tmp0.getOpcode() == ARMISD::VMOVDRR;
03666   bool UseNEON = !InGPR && Subtarget->hasNEON();
03667 
03668   if (UseNEON) {
03669     // Use VBSL to copy the sign bit.
03670     unsigned EncodedVal = ARM_AM::createNEONModImm(0x6, 0x80);
03671     SDValue Mask = DAG.getNode(ARMISD::VMOVIMM, dl, MVT::v2i32,
03672                                DAG.getTargetConstant(EncodedVal, MVT::i32));
03673     EVT OpVT = (VT == MVT::f32) ? MVT::v2i32 : MVT::v1i64;
03674     if (VT == MVT::f64)
03675       Mask = DAG.getNode(ARMISD::VSHL, dl, OpVT,
03676                          DAG.getNode(ISD::BITCAST, dl, OpVT, Mask),
03677                          DAG.getConstant(32, MVT::i32));
03678     else /*if (VT == MVT::f32)*/
03679       Tmp0 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2f32, Tmp0);
03680     if (SrcVT == MVT::f32) {
03681       Tmp1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2f32, Tmp1);
03682       if (VT == MVT::f64)
03683         Tmp1 = DAG.getNode(ARMISD::VSHL, dl, OpVT,
03684                            DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp1),
03685                            DAG.getConstant(32, MVT::i32));
03686     } else if (VT == MVT::f32)
03687       Tmp1 = DAG.getNode(ARMISD::VSHRu, dl, MVT::v1i64,
03688                          DAG.getNode(ISD::BITCAST, dl, MVT::v1i64, Tmp1),
03689                          DAG.getConstant(32, MVT::i32));
03690     Tmp0 = DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp0);
03691     Tmp1 = DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp1);
03692 
03693     SDValue AllOnes = DAG.getTargetConstant(ARM_AM::createNEONModImm(0xe, 0xff),
03694                                             MVT::i32);
03695     AllOnes = DAG.getNode(ARMISD::VMOVIMM, dl, MVT::v8i8, AllOnes);
03696     SDValue MaskNot = DAG.getNode(ISD::XOR, dl, OpVT, Mask,
03697                                   DAG.getNode(ISD::BITCAST, dl, OpVT, AllOnes));
03698 
03699     SDValue Res = DAG.getNode(ISD::OR, dl, OpVT,
03700                               DAG.getNode(ISD::AND, dl, OpVT, Tmp1, Mask),
03701                               DAG.getNode(ISD::AND, dl, OpVT, Tmp0, MaskNot));
03702     if (VT == MVT::f32) {
03703       Res = DAG.getNode(ISD::BITCAST, dl, MVT::v2f32, Res);
03704       Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f32, Res,
03705                         DAG.getConstant(0, MVT::i32));
03706     } else {
03707       Res = DAG.getNode(ISD::BITCAST, dl, MVT::f64, Res);
03708     }
03709 
03710     return Res;
03711   }
03712 
03713   // Bitcast operand 1 to i32.
03714   if (SrcVT == MVT::f64)
03715     Tmp1 = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32),
03716                        &Tmp1, 1).getValue(1);
03717   Tmp1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Tmp1);
03718 
03719   // Or in the signbit with integer operations.
03720   SDValue Mask1 = DAG.getConstant(0x80000000, MVT::i32);
03721   SDValue Mask2 = DAG.getConstant(0x7fffffff, MVT::i32);
03722   Tmp1 = DAG.getNode(ISD::AND, dl, MVT::i32, Tmp1, Mask1);
03723   if (VT == MVT::f32) {
03724     Tmp0 = DAG.getNode(ISD::AND, dl, MVT::i32,
03725                        DAG.getNode(ISD::BITCAST, dl, MVT::i32, Tmp0), Mask2);
03726     return DAG.getNode(ISD::BITCAST, dl, MVT::f32,
03727                        DAG.getNode(ISD::OR, dl, MVT::i32, Tmp0, Tmp1));
03728   }
03729 
03730   // f64: Or the high part with signbit and then combine two parts.
03731   Tmp0 = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32),
03732                      &Tmp0, 1);
03733   SDValue Lo = Tmp0.getValue(0);
03734   SDValue Hi = DAG.getNode(ISD::AND, dl, MVT::i32, Tmp0.getValue(1), Mask2);
03735   Hi = DAG.getNode(ISD::OR, dl, MVT::i32, Hi, Tmp1);
03736   return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);
03737 }
03738 
03739 SDValue ARMTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const{
03740   MachineFunction &MF = DAG.getMachineFunction();
03741   MachineFrameInfo *MFI = MF.getFrameInfo();
03742   MFI->setReturnAddressIsTaken(true);
03743 
03744   if (verifyReturnAddressArgumentIsConstant(Op, DAG))
03745     return SDValue();
03746 
03747   EVT VT = Op.getValueType();
03748   SDLoc dl(Op);
03749   unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
03750   if (Depth) {
03751     SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
03752     SDValue Offset = DAG.getConstant(4, MVT::i32);
03753     return DAG.getLoad(VT, dl, DAG.getEntryNode(),
03754                        DAG.getNode(ISD::ADD, dl, VT, FrameAddr, Offset),
03755                        MachinePointerInfo(), false, false, false, 0);
03756   }
03757 
03758   // Return LR, which contains the return address. Mark it an implicit live-in.
03759   unsigned Reg = MF.addLiveIn(ARM::LR, getRegClassFor(MVT::i32));
03760   return DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg, VT);
03761 }
03762 
03763 SDValue ARMTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const {
03764   MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
03765   MFI->setFrameAddressIsTaken(true);
03766 
03767   EVT VT = Op.getValueType();
03768   SDLoc dl(Op);  // FIXME probably not meaningful
03769   unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
03770   unsigned FrameReg = (Subtarget->isThumb() || Subtarget->isTargetMachO())
03771     ? ARM::R7 : ARM::R11;
03772   SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, VT);
03773   while (Depth--)
03774     FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr,
03775                             MachinePointerInfo(),
03776                             false, false, false, 0);
03777   return FrameAddr;
03778 }
03779 
03780 /// ExpandBITCAST - If the target supports VFP, this function is called to
03781 /// expand a bit convert where either the source or destination type is i64 to
03782 /// use a VMOVDRR or VMOVRRD node.  This should not be done when the non-i64
03783 /// operand type is illegal (e.g., v2f32 for a target that doesn't support
03784 /// vectors), since the legalizer won't know what to do with that.
03785 static SDValue ExpandBITCAST(SDNode *N, SelectionDAG &DAG) {
03786   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
03787   SDLoc dl(N);
03788   SDValue Op = N->getOperand(0);
03789 
03790   // This function is only supposed to be called for i64 types, either as the
03791   // source or destination of the bit convert.
03792   EVT SrcVT = Op.getValueType();
03793   EVT DstVT = N->getValueType(0);
03794   assert((SrcVT == MVT::i64 || DstVT == MVT::i64) &&
03795          "ExpandBITCAST called for non-i64 type");
03796 
03797   // Turn i64->f64 into VMOVDRR.
03798   if (SrcVT == MVT::i64 && TLI.isTypeLegal(DstVT)) {
03799     SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op,
03800                              DAG.getConstant(0, MVT::i32));
03801     SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op,
03802                              DAG.getConstant(1, MVT::i32));
03803     return DAG.getNode(ISD::BITCAST, dl, DstVT,
03804                        DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi));
03805   }
03806 
03807   // Turn f64->i64 into VMOVRRD.
03808   if (DstVT == MVT::i64 && TLI.isTypeLegal(SrcVT)) {
03809     SDValue Cvt = DAG.getNode(ARMISD::VMOVRRD, dl,
03810                               DAG.getVTList(MVT::i32, MVT::i32), &Op, 1);
03811     // Merge the pieces into a single i64 value.
03812     return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Cvt, Cvt.getValue(1));
03813   }
03814 
03815   return SDValue();
03816 }
03817 
03818 /// getZeroVector - Returns a vector of specified type with all zero elements.
03819 /// Zero vectors are used to represent vector negation and in those cases
03820 /// will be implemented with the NEON VNEG instruction.  However, VNEG does
03821 /// not support i64 elements, so sometimes the zero vectors will need to be
03822 /// explicitly constructed.  Regardless, use a canonical VMOV to create the
03823 /// zero vector.
03824 static SDValue getZeroVector(EVT VT, SelectionDAG &DAG, SDLoc dl) {
03825   assert(VT.isVector() && "Expected a vector type");
03826   // The canonical modified immediate encoding of a zero vector is....0!
03827   SDValue EncodedVal = DAG.getTargetConstant(0, MVT::i32);
03828   EVT VmovVT = VT.is128BitVector() ? MVT::v4i32 : MVT::v2i32;
03829   SDValue Vmov = DAG.getNode(ARMISD::VMOVIMM, dl, VmovVT, EncodedVal);
03830   return DAG.getNode(ISD::BITCAST, dl, VT, Vmov);
03831 }
03832 
03833 /// LowerShiftRightParts - Lower SRA_PARTS, which returns two
03834 /// i32 values and take a 2 x i32 value to shift plus a shift amount.
03835 SDValue ARMTargetLowering::LowerShiftRightParts(SDValue Op,
03836                                                 SelectionDAG &DAG) const {
03837   assert(Op.getNumOperands() == 3 && "Not a double-shift!");
03838   EVT VT = Op.getValueType();
03839   unsigned VTBits = VT.getSizeInBits();
03840   SDLoc dl(Op);
03841   SDValue ShOpLo = Op.getOperand(0);
03842   SDValue ShOpHi = Op.getOperand(1);
03843   SDValue ShAmt  = Op.getOperand(2);
03844   SDValue ARMcc;
03845   unsigned Opc = (Op.getOpcode() == ISD::SRA_PARTS) ? ISD::SRA : ISD::SRL;
03846 
03847   assert(Op.getOpcode() == ISD::SRA_PARTS || Op.getOpcode() == ISD::SRL_PARTS);
03848 
03849   SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32,
03850                                  DAG.getConstant(VTBits, MVT::i32), ShAmt);
03851   SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, ShAmt);
03852   SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt,
03853                                    DAG.getConstant(VTBits, MVT::i32));
03854   SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, RevShAmt);
03855   SDValue FalseVal = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2);
03856   SDValue TrueVal = DAG.getNode(Opc, dl, VT, ShOpHi, ExtraShAmt);
03857 
03858   SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
03859   SDValue Cmp = getARMCmp(ExtraShAmt, DAG.getConstant(0, MVT::i32), ISD::SETGE,
03860                           ARMcc, DAG, dl);
03861   SDValue Hi = DAG.getNode(Opc, dl, VT, ShOpHi, ShAmt);
03862   SDValue Lo = DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMcc,
03863                            CCR, Cmp);
03864 
03865   SDValue Ops[2] = { Lo, Hi };
03866   return DAG.getMergeValues(Ops, 2, dl);
03867 }
03868 
03869 /// LowerShiftLeftParts - Lower SHL_PARTS, which returns two
03870 /// i32 values and take a 2 x i32 value to shift plus a shift amount.
03871 SDValue ARMTargetLowering::LowerShiftLeftParts(SDValue Op,
03872                                                SelectionDAG &DAG) const {
03873   assert(Op.getNumOperands() == 3 && "Not a double-shift!");
03874   EVT VT = Op.getValueType();
03875   unsigned VTBits = VT.getSizeInBits();
03876   SDLoc dl(Op);
03877   SDValue ShOpLo = Op.getOperand(0);
03878   SDValue ShOpHi = Op.getOperand(1);
03879   SDValue ShAmt  = Op.getOperand(2);
03880   SDValue ARMcc;
03881 
03882   assert(Op.getOpcode() == ISD::SHL_PARTS);
03883   SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32,
03884                                  DAG.getConstant(VTBits, MVT::i32), ShAmt);
03885   SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, RevShAmt);
03886   SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt,
03887                                    DAG.getConstant(VTBits, MVT::i32));
03888   SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, ShAmt);
03889   SDValue Tmp3 = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ExtraShAmt);
03890 
03891   SDValue FalseVal = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2);
03892   SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
03893   SDValue Cmp = getARMCmp(ExtraShAmt, DAG.getConstant(0, MVT::i32), ISD::SETGE,
03894                           ARMcc, DAG, dl);
03895   SDValue Lo = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ShAmt);
03896   SDValue Hi = DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, Tmp3, ARMcc,
03897                            CCR, Cmp);
03898 
03899   SDValue Ops[2] = { Lo, Hi };
03900   return DAG.getMergeValues(Ops, 2, dl);
03901 }
03902 
03903 SDValue ARMTargetLowering::LowerFLT_ROUNDS_(SDValue Op,
03904                                             SelectionDAG &DAG) const {
03905   // The rounding mode is in bits 23:22 of the FPSCR.
03906   // The ARM rounding mode value to FLT_ROUNDS mapping is 0->1, 1->2, 2->3, 3->0
03907   // The formula we use to implement this is (((FPSCR + 1 << 22) >> 22) & 3)
03908   // so that the shift + and get folded into a bitfield extract.
03909   SDLoc dl(Op);
03910   SDValue FPSCR = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::i32,
03911                               DAG.getConstant(Intrinsic::arm_get_fpscr,
03912                                               MVT::i32));
03913   SDValue FltRounds = DAG.getNode(ISD::ADD, dl, MVT::i32, FPSCR,
03914                                   DAG.getConstant(1U << 22, MVT::i32));
03915   SDValue RMODE = DAG.getNode(ISD::SRL, dl, MVT::i32, FltRounds,
03916                               DAG.getConstant(22, MVT::i32));
03917   return DAG.getNode(ISD::AND, dl, MVT::i32, RMODE,
03918                      DAG.getConstant(3, MVT::i32));
03919 }
03920 
03921 static SDValue LowerCTTZ(SDNode *N, SelectionDAG &DAG,
03922                          const ARMSubtarget *ST) {
03923   EVT VT = N->getValueType(0);
03924   SDLoc dl(N);
03925 
03926   if (!ST->hasV6T2Ops())
03927     return SDValue();
03928 
03929   SDValue rbit = DAG.getNode(ARMISD::RBIT, dl, VT, N->getOperand(0));
03930   return DAG.getNode(ISD::CTLZ, dl, VT, rbit);
03931 }
03932 
03933 /// getCTPOP16BitCounts - Returns a v8i8/v16i8 vector containing the bit-count
03934 /// for each 16-bit element from operand, repeated.  The basic idea is to
03935 /// leverage vcnt to get the 8-bit counts, gather and add the results.
03936 ///
03937 /// Trace for v4i16:
03938 /// input    = [v0    v1    v2    v3   ] (vi 16-bit element)
03939 /// cast: N0 = [w0 w1 w2 w3 w4 w5 w6 w7] (v0 = [w0 w1], wi 8-bit element)
03940 /// vcnt: N1 = [b0 b1 b2 b3 b4 b5 b6 b7] (bi = bit-count of 8-bit element wi)
03941 /// vrev: N2 = [b1 b0 b3 b2 b5 b4 b7 b6]
03942 ///            [b0 b1 b2 b3 b4 b5 b6 b7]
03943 ///           +[b1 b0 b3 b2 b5 b4 b7 b6]
03944 /// N3=N1+N2 = [k0 k0 k1 k1 k2 k2 k3 k3] (k0 = b0+b1 = bit-count of 16-bit v0,
03945 /// vuzp:    = [k0 k1 k2 k3 k0 k1 k2 k3]  each ki is 8-bits)
03946 static SDValue getCTPOP16BitCounts(SDNode *N, SelectionDAG &DAG) {
03947   EVT VT = N->getValueType(0);
03948   SDLoc DL(N);
03949 
03950   EVT VT8Bit = VT.is64BitVector() ? MVT::v8i8 : MVT::v16i8;
03951   SDValue N0 = DAG.getNode(ISD::BITCAST, DL, VT8Bit, N->getOperand(0));
03952   SDValue N1 = DAG.getNode(ISD::CTPOP, DL, VT8Bit, N0);
03953   SDValue N2 = DAG.getNode(ARMISD::VREV16, DL, VT8Bit, N1);
03954   SDValue N3 = DAG.getNode(ISD::ADD, DL, VT8Bit, N1, N2);
03955   return DAG.getNode(ARMISD::VUZP, DL, VT8Bit, N3, N3);
03956 }
03957 
03958 /// lowerCTPOP16BitElements - Returns a v4i16/v8i16 vector containing the
03959 /// bit-count for each 16-bit element from the operand.  We need slightly
03960 /// different sequencing for v4i16 and v8i16 to stay within NEON's available
03961 /// 64/128-bit registers.
03962 ///
03963 /// Trace for v4i16:
03964 /// input           = [v0    v1    v2    v3    ] (vi 16-bit element)
03965 /// v8i8: BitCounts = [k0 k1 k2 k3 k0 k1 k2 k3 ] (ki is the bit-count of vi)
03966 /// v8i16:Extended  = [k0    k1    k2    k3    k0    k1    k2    k3    ]
03967 /// v4i16:Extracted = [k0    k1    k2    k3    ]
03968 static SDValue lowerCTPOP16BitElements(SDNode *N, SelectionDAG &DAG) {
03969   EVT VT = N->getValueType(0);
03970   SDLoc DL(N);
03971 
03972   SDValue BitCounts = getCTPOP16BitCounts(N, DAG);
03973   if (VT.is64BitVector()) {
03974     SDValue Extended = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::v8i16, BitCounts);
03975     return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v4i16, Extended,
03976                        DAG.getIntPtrConstant(0));
03977   } else {
03978     SDValue Extracted = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v8i8,
03979                                     BitCounts, DAG.getIntPtrConstant(0));
03980     return DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::v8i16, Extracted);
03981   }
03982 }
03983 
03984 /// lowerCTPOP32BitElements - Returns a v2i32/v4i32 vector containing the
03985 /// bit-count for each 32-bit element from the operand.  The idea here is
03986 /// to split the vector into 16-bit elements, leverage the 16-bit count
03987 /// routine, and then combine the results.
03988 ///
03989 /// Trace for v2i32 (v4i32 similar with Extracted/Extended exchanged):
03990 /// input    = [v0    v1    ] (vi: 32-bit elements)
03991 /// Bitcast  = [w0 w1 w2 w3 ] (wi: 16-bit elements, v0 = [w0 w1])
03992 /// Counts16 = [k0 k1 k2 k3 ] (ki: 16-bit elements, bit-count of wi)
03993 /// vrev: N0 = [k1 k0 k3 k2 ]
03994 ///            [k0 k1 k2 k3 ]
03995 ///       N1 =+[k1 k0 k3 k2 ]
03996 ///            [k0 k2 k1 k3 ]
03997 ///       N2 =+[k1 k3 k0 k2 ]
03998 ///            [k0    k2    k1    k3    ]
03999 /// Extended =+[k1    k3    k0    k2    ]
04000 ///            [k0    k2    ]
04001 /// Extracted=+[k1    k3    ]
04002 ///
04003 static SDValue lowerCTPOP32BitElements(SDNode *N, SelectionDAG &DAG) {
04004   EVT VT = N->getValueType(0);
04005   SDLoc DL(N);
04006 
04007   EVT VT16Bit = VT.is64BitVector() ? MVT::v4i16 : MVT::v8i16;
04008 
04009   SDValue Bitcast = DAG.getNode(ISD::BITCAST, DL, VT16Bit, N->getOperand(0));
04010   SDValue Counts16 = lowerCTPOP16BitElements(Bitcast.getNode(), DAG);
04011   SDValue N0 = DAG.getNode(ARMISD::VREV32, DL, VT16Bit, Counts16);
04012   SDValue N1 = DAG.getNode(ISD::ADD, DL, VT16Bit, Counts16, N0);
04013   SDValue N2 = DAG.getNode(ARMISD::VUZP, DL, VT16Bit, N1, N1);
04014 
04015   if (VT.is64BitVector()) {
04016     SDValue Extended = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::v4i32, N2);
04017     return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v2i32, Extended,
04018                        DAG.getIntPtrConstant(0));
04019   } else {
04020     SDValue Extracted = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v4i16, N2,
04021                                     DAG.getIntPtrConstant(0));
04022     return DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::v4i32, Extracted);
04023   }
04024 }
04025 
04026 static SDValue LowerCTPOP(SDNode *N, SelectionDAG &DAG,
04027                           const ARMSubtarget *ST) {
04028   EVT VT = N->getValueType(0);
04029 
04030   assert(ST->hasNEON() && "Custom ctpop lowering requires NEON.");
04031   assert((VT == MVT::v2i32 || VT == MVT::v4i32 ||
04032           VT == MVT::v4i16 || VT == MVT::v8i16) &&
04033          "Unexpected type for custom ctpop lowering");
04034 
04035   if (VT.getVectorElementType() == MVT::i32)
04036     return lowerCTPOP32BitElements(N, DAG);
04037   else
04038     return lowerCTPOP16BitElements(N, DAG);
04039 }
04040 
04041 static SDValue LowerShift(SDNode *N, SelectionDAG &DAG,
04042                           const ARMSubtarget *ST) {
04043   EVT VT = N->getValueType(0);
04044   SDLoc dl(N);
04045 
04046   if (!VT.isVector())
04047     return SDValue();
04048 
04049   // Lower vector shifts on NEON to use VSHL.
04050   assert(ST->hasNEON() && "unexpected vector shift");
04051 
04052   // Left shifts translate directly to the vshiftu intrinsic.
04053   if (N->getOpcode() == ISD::SHL)
04054     return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
04055                        DAG.getConstant(Intrinsic::arm_neon_vshiftu, MVT::i32),
04056                        N->getOperand(0), N->getOperand(1));
04057 
04058   assert((N->getOpcode() == ISD::SRA ||
04059           N->getOpcode() == ISD::SRL) && "unexpected vector shift opcode");
04060 
04061   // NEON uses the same intrinsics for both left and right shifts.  For
04062   // right shifts, the shift amounts are negative, so negate the vector of
04063   // shift amounts.
04064   EVT ShiftVT = N->getOperand(1).getValueType();
04065   SDValue NegatedCount = DAG.getNode(ISD::SUB, dl, ShiftVT,
04066                                      getZeroVector(ShiftVT, DAG, dl),
04067                                      N->getOperand(1));
04068   Intrinsic::ID vshiftInt = (N->getOpcode() == ISD::SRA ?
04069                              Intrinsic::arm_neon_vshifts :
04070                              Intrinsic::arm_neon_vshiftu);
04071   return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
04072                      DAG.getConstant(vshiftInt, MVT::i32),
04073                      N->getOperand(0), NegatedCount);
04074 }
04075 
04076 static SDValue Expand64BitShift(SDNode *N, SelectionDAG &DAG,
04077                                 const ARMSubtarget *ST) {
04078   EVT VT = N->getValueType(0);
04079   SDLoc dl(N);
04080 
04081   // We can get here for a node like i32 = ISD::SHL i32, i64
04082   if (VT != MVT::i64)
04083     return SDValue();
04084 
04085   assert((N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) &&
04086          "Unknown shift to lower!");
04087 
04088   // We only lower SRA, SRL of 1 here, all others use generic lowering.
04089   if (!isa<ConstantSDNode>(N->getOperand(1)) ||
04090       cast<ConstantSDNode>(N->getOperand(1))->getZExtValue() != 1)
04091     return SDValue();
04092 
04093   // If we are in thumb mode, we don't have RRX.
04094   if (ST->isThumb1Only()) return SDValue();
04095 
04096   // Okay, we have a 64-bit SRA or SRL of 1.  Lower this to an RRX expr.
04097   SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, N->getOperand(0),
04098                            DAG.getConstant(0, MVT::i32));
04099   SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, N->getOperand(0),
04100                            DAG.getConstant(1, MVT::i32));
04101 
04102   // First, build a SRA_FLAG/SRL_FLAG op, which shifts the top part by one and
04103   // captures the result into a carry flag.
04104   unsigned Opc = N->getOpcode() == ISD::SRL ? ARMISD::SRL_FLAG:ARMISD::SRA_FLAG;
04105   Hi = DAG.getNode(Opc, dl, DAG.getVTList(MVT::i32, MVT::Glue), &Hi, 1);
04106 
04107   // The low part is an ARMISD::RRX operand, which shifts the carry in.
04108   Lo = DAG.getNode(ARMISD::RRX, dl, MVT::i32, Lo, Hi.getValue(1));
04109 
04110   // Merge the pieces into a single i64 value.
04111  return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
04112 }
04113 
04114 static SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) {
04115   SDValue TmpOp0, TmpOp1;
04116   bool Invert = false;
04117   bool Swap = false;
04118   unsigned Opc = 0;
04119 
04120   SDValue Op0 = Op.getOperand(0);
04121   SDValue Op1 = Op.getOperand(1);
04122   SDValue CC = Op.getOperand(2);
04123   EVT VT = Op.getValueType();
04124   ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get();
04125   SDLoc dl(Op);
04126 
04127   if (Op.getOperand(1).getValueType().isFloatingPoint()) {
04128     switch (SetCCOpcode) {
04129     default: llvm_unreachable("Illegal FP comparison");
04130     case ISD::SETUNE:
04131     case ISD::SETNE:  Invert = true; // Fallthrough
04132     case ISD::SETOEQ:
04133     case ISD::SETEQ:  Opc = ARMISD::VCEQ; break;
04134     case ISD::SETOLT:
04135     case ISD::SETLT: Swap = true; // Fallthrough
04136     case ISD::SETOGT:
04137     case ISD::SETGT:  Opc = ARMISD::VCGT; break;
04138     case ISD::SETOLE:
04139     case ISD::SETLE:  Swap = true; // Fallthrough
04140     case ISD::SETOGE:
04141     case ISD::SETGE: Opc = ARMISD::VCGE; break;
04142     case ISD::SETUGE: Swap = true; // Fallthrough
04143     case ISD::SETULE: Invert = true; Opc = ARMISD::VCGT; break;
04144     case ISD::SETUGT: Swap = true; // Fallthrough
04145     case ISD::SETULT: Invert = true; Opc = ARMISD::VCGE; break;
04146     case ISD::SETUEQ: Invert = true; // Fallthrough
04147     case ISD::SETONE:
04148       // Expand this to (OLT | OGT).
04149       TmpOp0 = Op0;
04150       TmpOp1 = Op1;
04151       Opc = ISD::OR;
04152       Op0 = DAG.getNode(ARMISD::VCGT, dl, VT, TmpOp1, TmpOp0);
04153       Op1 = DAG.getNode(ARMISD::VCGT, dl, VT, TmpOp0, TmpOp1);
04154       break;
04155     case ISD::SETUO: Invert = true; // Fallthrough
04156     case ISD::SETO:
04157       // Expand this to (OLT | OGE).
04158       TmpOp0 = Op0;
04159       TmpOp1 = Op1;
04160       Opc = ISD::OR;
04161       Op0 = DAG.getNode(ARMISD::VCGT, dl, VT, TmpOp1, TmpOp0);
04162       Op1 = DAG.getNode(ARMISD::VCGE, dl, VT, TmpOp0, TmpOp1);
04163       break;
04164     }
04165   } else {
04166     // Integer comparisons.
04167     switch (SetCCOpcode) {
04168     default: llvm_unreachable("Illegal integer comparison");
04169     case ISD::SETNE:  Invert = true;
04170     case ISD::SETEQ:  Opc = ARMISD::VCEQ; break;
04171     case ISD::SETLT:  Swap = true;
04172     case ISD::SETGT:  Opc = ARMISD::VCGT; break;
04173     case ISD::SETLE:  Swap = true;
04174     case ISD::SETGE:  Opc = ARMISD::VCGE; break;
04175     case ISD::SETULT: Swap = true;
04176     case ISD::SETUGT: Opc = ARMISD::VCGTU; break;
04177     case ISD::SETULE: Swap = true;
04178     case ISD::SETUGE: Opc = ARMISD::VCGEU; break;
04179     }
04180 
04181     // Detect VTST (Vector Test Bits) = icmp ne (and (op0, op1), zero).
04182     if (Opc == ARMISD::VCEQ) {
04183 
04184       SDValue AndOp;
04185       if (ISD::isBuildVectorAllZeros(Op1.getNode()))
04186         AndOp = Op0;
04187       else if (ISD::isBuildVectorAllZeros(Op0.getNode()))
04188         AndOp = Op1;
04189 
04190       // Ignore bitconvert.
04191       if (AndOp.getNode() && AndOp.getOpcode() == ISD::BITCAST)
04192         AndOp = AndOp.getOperand(0);
04193 
04194       if (AndOp.getNode() && AndOp.getOpcode() == ISD::AND) {
04195         Opc = ARMISD::VTST;
04196         Op0 = DAG.getNode(ISD::BITCAST, dl, VT, AndOp.getOperand(0));
04197         Op1 = DAG.getNode(ISD::BITCAST, dl, VT, AndOp.getOperand(1));
04198         Invert = !Invert;
04199       }
04200     }
04201   }
04202 
04203   if (Swap)
04204     std::swap(Op0, Op1);
04205 
04206   // If one of the operands is a constant vector zero, attempt to fold the
04207   // comparison to a specialized compare-against-zero form.
04208   SDValue SingleOp;
04209   if (ISD::isBuildVectorAllZeros(Op1.getNode()))
04210     SingleOp = Op0;
04211   else if (ISD::isBuildVectorAllZeros(Op0.getNode())) {
04212     if (Opc == ARMISD::VCGE)
04213       Opc = ARMISD::VCLEZ;
04214     else if (Opc == ARMISD::VCGT)
04215       Opc = ARMISD::VCLTZ;
04216     SingleOp = Op1;
04217   }
04218 
04219   SDValue Result;
04220   if (SingleOp.getNode()) {
04221     switch (Opc) {
04222     case ARMISD::VCEQ:
04223       Result = DAG.getNode(ARMISD::VCEQZ, dl, VT, SingleOp); break;
04224     case ARMISD::VCGE:
04225       Result = DAG.getNode(ARMISD::VCGEZ, dl, VT, SingleOp); break;
04226     case ARMISD::VCLEZ:
04227       Result = DAG.getNode(ARMISD::VCLEZ, dl, VT, SingleOp); break;
04228     case ARMISD::VCGT:
04229       Result = DAG.getNode(ARMISD::VCGTZ, dl, VT, SingleOp); break;
04230     case ARMISD::VCLTZ:
04231       Result = DAG.getNode(ARMISD::VCLTZ, dl, VT, SingleOp); break;
04232     default:
04233       Result = DAG.getNode(Opc, dl, VT, Op0, Op1);
04234     }
04235   } else {
04236      Result = DAG.getNode(Opc, dl, VT, Op0, Op1);
04237   }
04238 
04239   if (Invert)
04240     Result = DAG.getNOT(dl, Result, VT);
04241 
04242   return Result;
04243 }
04244 
04245 /// isNEONModifiedImm - Check if the specified splat value corresponds to a
04246 /// valid vector constant for a NEON instruction with a "modified immediate"
04247 /// operand (e.g., VMOV).  If so, return the encoded value.
04248 static SDValue isNEONModifiedImm(uint64_t SplatBits, uint64_t SplatUndef,
04249                                  unsigned SplatBitSize, SelectionDAG &DAG,
04250                                  EVT &VT, bool is128Bits, NEONModImmType type) {
04251   unsigned OpCmode, Imm;
04252 
04253   // SplatBitSize is set to the smallest size that splats the vector, so a
04254   // zero vector will always have SplatBitSize == 8.  However, NEON modified
04255   // immediate instructions others than VMOV do not support the 8-bit encoding
04256   // of a zero vector, and the default encoding of zero is supposed to be the
04257   // 32-bit version.
04258   if (SplatBits == 0)
04259     SplatBitSize = 32;
04260 
04261   switch (SplatBitSize) {
04262   case 8:
04263     if (type != VMOVModImm)
04264       return SDValue();
04265     // Any 1-byte value is OK.  Op=0, Cmode=1110.
04266     assert((SplatBits & ~0xff) == 0 && "one byte splat value is too big");
04267     OpCmode = 0xe;
04268     Imm = SplatBits;
04269     VT = is128Bits ? MVT::v16i8 : MVT::v8i8;
04270     break;
04271 
04272   case 16:
04273     // NEON's 16-bit VMOV supports splat values where only one byte is nonzero.
04274     VT = is128Bits ? MVT::v8i16 : MVT::v4i16;
04275     if ((SplatBits & ~0xff) == 0) {
04276       // Value = 0x00nn: Op=x, Cmode=100x.
04277       OpCmode = 0x8;
04278       Imm = SplatBits;
04279       break;
04280     }
04281     if ((SplatBits & ~0xff00) == 0) {
04282       // Value = 0xnn00: Op=x, Cmode=101x.
04283       OpCmode = 0xa;
04284       Imm = SplatBits >> 8;
04285       break;
04286     }
04287     return SDValue();
04288 
04289   case 32:
04290     // NEON's 32-bit VMOV supports splat values where:
04291     // * only one byte is nonzero, or
04292     // * the least significant byte is 0xff and the second byte is nonzero, or
04293     // * the least significant 2 bytes are 0xff and the third is nonzero.
04294     VT = is128Bits ? MVT::v4i32 : MVT::v2i32;
04295     if ((SplatBits & ~0xff) == 0) {
04296       // Value = 0x000000nn: Op=x, Cmode=000x.
04297       OpCmode = 0;
04298       Imm = SplatBits;
04299       break;
04300     }
04301     if ((SplatBits & ~0xff00) == 0) {
04302       // Value = 0x0000nn00: Op=x, Cmode=001x.
04303       OpCmode = 0x2;
04304       Imm = SplatBits >> 8;
04305       break;
04306     }
04307     if ((SplatBits & ~0xff0000) == 0) {
04308       // Value = 0x00nn0000: Op=x, Cmode=010x.
04309       OpCmode = 0x4;
04310       Imm = SplatBits >> 16;
04311       break;
04312     }
04313     if ((SplatBits & ~0xff000000) == 0) {
04314       // Value = 0xnn000000: Op=x, Cmode=011x.
04315       OpCmode = 0x6;
04316       Imm = SplatBits >> 24;
04317       break;
04318     }
04319 
04320     // cmode == 0b1100 and cmode == 0b1101 are not supported for VORR or VBIC
04321     if (type == OtherModImm) return SDValue();
04322 
04323     if ((SplatBits & ~0xffff) == 0 &&
04324         ((SplatBits | SplatUndef) & 0xff) == 0xff) {
04325       // Value = 0x0000nnff: Op=x, Cmode=1100.
04326       OpCmode = 0xc;
04327       Imm = SplatBits >> 8;
04328       break;
04329     }
04330 
04331     if ((SplatBits & ~0xffffff) == 0 &&
04332         ((SplatBits | SplatUndef) & 0xffff) == 0xffff) {
04333       // Value = 0x00nnffff: Op=x, Cmode=1101.
04334       OpCmode = 0xd;
04335       Imm = SplatBits >> 16;
04336       break;
04337     }
04338 
04339     // Note: there are a few 32-bit splat values (specifically: 00ffff00,
04340     // ff000000, ff0000ff, and ffff00ff) that are valid for VMOV.I64 but not
04341     // VMOV.I32.  A (very) minor optimization would be to replicate the value
04342     // and fall through here to test for a valid 64-bit splat.  But, then the
04343     // caller would also need to check and handle the change in size.
04344     return SDValue();
04345 
04346   case 64: {
04347     if (type != VMOVModImm)
04348       return SDValue();
04349     // NEON has a 64-bit VMOV splat where each byte is either 0 or 0xff.
04350     uint64_t BitMask = 0xff;
04351     uint64_t Val = 0;
04352     unsigned ImmMask = 1;
04353     Imm = 0;
04354     for (int ByteNum = 0; ByteNum < 8; ++ByteNum) {
04355       if (((SplatBits | SplatUndef) & BitMask) == BitMask) {
04356         Val |= BitMask;
04357         Imm |= ImmMask;
04358       } else if ((SplatBits & BitMask) != 0) {
04359         return SDValue();
04360       }
04361       BitMask <<= 8;
04362       ImmMask <<= 1;
04363     }
04364     // Op=1, Cmode=1110.
04365     OpCmode = 0x1e;
04366     VT = is128Bits ? MVT::v2i64 : MVT::v1i64;
04367     break;
04368   }
04369 
04370   default:
04371     llvm_unreachable("unexpected size for isNEONModifiedImm");
04372   }
04373 
04374   unsigned EncodedVal = ARM_AM::createNEONModImm(OpCmode, Imm);
04375   return DAG.getTargetConstant(EncodedVal, MVT::i32);
04376 }
04377 
04378 SDValue ARMTargetLowering::LowerConstantFP(SDValue Op, SelectionDAG &DAG,
04379                                            const ARMSubtarget *ST) const {
04380   if (!ST->hasVFP3())
04381     return SDValue();
04382 
04383   bool IsDouble = Op.getValueType() == MVT::f64;
04384   ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Op);
04385 
04386   // Try splatting with a VMOV.f32...
04387   APFloat FPVal = CFP->getValueAPF();
04388   int ImmVal = IsDouble ? ARM_AM::getFP64Imm(FPVal) : ARM_AM::getFP32Imm(FPVal);
04389 
04390   if (ImmVal != -1) {
04391     if (IsDouble || !ST->useNEONForSinglePrecisionFP()) {
04392       // We have code in place to select a valid ConstantFP already, no need to
04393       // do any mangling.
04394       return Op;
04395     }
04396 
04397     // It's a float and we are trying to use NEON operations where
04398     // possible. Lower it to a splat followed by an extract.
04399     SDLoc DL(Op);
04400     SDValue NewVal = DAG.getTargetConstant(ImmVal, MVT::i32);
04401     SDValue VecConstant = DAG.getNode(ARMISD::VMOVFPIMM, DL, MVT::v2f32,
04402                                       NewVal);
04403     return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, VecConstant,
04404                        DAG.getConstant(0, MVT::i32));
04405   }
04406 
04407   // The rest of our options are NEON only, make sure that's allowed before
04408   // proceeding..
04409   if (!ST->hasNEON() || (!IsDouble && !ST->useNEONForSinglePrecisionFP()))
04410     return SDValue();
04411 
04412   EVT VMovVT;
04413   uint64_t iVal = FPVal.bitcastToAPInt().getZExtValue();
04414 
04415   // It wouldn't really be worth bothering for doubles except for one very
04416   // important value, which does happen to match: 0.0. So make sure we don't do
04417   // anything stupid.
04418   if (IsDouble && (iVal & 0xffffffff) != (iVal >> 32))
04419     return SDValue();
04420 
04421   // Try a VMOV.i32 (FIXME: i8, i16, or i64 could work too).
04422   SDValue NewVal = isNEONModifiedImm(iVal & 0xffffffffU, 0, 32, DAG, VMovVT,
04423                                      false, VMOVModImm);
04424   if (NewVal != SDValue()) {
04425     SDLoc DL(Op);
04426     SDValue VecConstant = DAG.getNode(ARMISD::VMOVIMM, DL, VMovVT,
04427                                       NewVal);
04428     if (IsDouble)
04429       return DAG.getNode(ISD::BITCAST, DL, MVT::f64, VecConstant);
04430 
04431     // It's a float: cast and extract a vector element.
04432     SDValue VecFConstant = DAG.getNode(ISD::BITCAST, DL, MVT::v2f32,
04433                                        VecConstant);
04434     return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, VecFConstant,
04435                        DAG.getConstant(0, MVT::i32));
04436   }
04437 
04438   // Finally, try a VMVN.i32
04439   NewVal = isNEONModifiedImm(~iVal & 0xffffffffU, 0, 32, DAG, VMovVT,
04440                              false, VMVNModImm);
04441   if (NewVal != SDValue()) {
04442     SDLoc DL(Op);
04443     SDValue VecConstant = DAG.getNode(ARMISD::VMVNIMM, DL, VMovVT, NewVal);
04444 
04445     if (IsDouble)
04446       return DAG.getNode(ISD::BITCAST, DL, MVT::f64, VecConstant);
04447 
04448     // It's a float: cast and extract a vector element.
04449     SDValue VecFConstant = DAG.getNode(ISD::BITCAST, DL, MVT::v2f32,
04450                                        VecConstant);
04451     return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, VecFConstant,
04452                        DAG.getConstant(0, MVT::i32));
04453   }
04454 
04455   return SDValue();
04456 }
04457 
04458 // check if an VEXT instruction can handle the shuffle mask when the
04459 // vector sources of the shuffle are the same.
04460 static bool isSingletonVEXTMask(ArrayRef<int> M, EVT VT, unsigned &Imm) {
04461   unsigned NumElts = VT.getVectorNumElements();
04462 
04463   // Assume that the first shuffle index is not UNDEF.  Fail if it is.
04464   if (M[0] < 0)
04465     return false;
04466 
04467   Imm = M[0];
04468 
04469   // If this is a VEXT shuffle, the immediate value is the index of the first
04470   // element.  The other shuffle indices must be the successive elements after
04471   // the first one.
04472   unsigned ExpectedElt = Imm;
04473   for (unsigned i = 1; i < NumElts; ++i) {
04474     // Increment the expected index.  If it wraps around, just follow it
04475     // back to index zero and keep going.
04476     ++ExpectedElt;
04477     if (ExpectedElt == NumElts)
04478       ExpectedElt = 0;
04479 
04480     if (M[i] < 0) continue; // ignore UNDEF indices
04481     if (ExpectedElt != static_cast<unsigned>(M[i]))
04482       return false;
04483   }
04484 
04485   return true;
04486 }
04487 
04488 
04489 static bool isVEXTMask(ArrayRef<int> M, EVT VT,
04490                        bool &ReverseVEXT, unsigned &Imm) {
04491   unsigned NumElts = VT.getVectorNumElements();
04492   ReverseVEXT = false;
04493 
04494   // Assume that the first shuffle index is not UNDEF.  Fail if it is.
04495   if (M[0] < 0)
04496     return false;
04497 
04498   Imm = M[0];
04499 
04500   // If this is a VEXT shuffle, the immediate value is the index of the first
04501   // element.  The other shuffle indices must be the successive elements after
04502   // the first one.
04503   unsigned ExpectedElt = Imm;
04504   for (unsigned i = 1; i < NumElts; ++i) {
04505     // Increment the expected index.  If it wraps around, it may still be
04506     // a VEXT but the source vectors must be swapped.
04507     ExpectedElt += 1;
04508     if (ExpectedElt == NumElts * 2) {
04509       ExpectedElt = 0;
04510       ReverseVEXT = true;
04511     }
04512 
04513     if (M[i] < 0) continue; // ignore UNDEF indices
04514     if (ExpectedElt != static_cast<unsigned>(M[i]))
04515       return false;
04516   }
04517 
04518   // Adjust the index value if the source operands will be swapped.
04519   if (ReverseVEXT)
04520     Imm -= NumElts;
04521 
04522   return true;
04523 }
04524 
04525 /// isVREVMask - Check if a vector shuffle corresponds to a VREV
04526 /// instruction with the specified blocksize.  (The order of the elements
04527 /// within each block of the vector is reversed.)
04528 static bool isVREVMask(ArrayRef<int> M, EVT VT, unsigned BlockSize) {
04529   assert((BlockSize==16 || BlockSize==32 || BlockSize==64) &&
04530          "Only possible block sizes for VREV are: 16, 32, 64");
04531 
04532   unsigned EltSz = VT.getVectorElementType().getSizeInBits();
04533   if (EltSz == 64)
04534     return false;
04535 
04536   unsigned NumElts = VT.getVectorNumElements();
04537   unsigned BlockElts = M[0] + 1;
04538   // If the first shuffle index is UNDEF, be optimistic.
04539   if (M[0] < 0)
04540     BlockElts = BlockSize / EltSz;
04541 
04542   if (BlockSize <= EltSz || BlockSize != BlockElts * EltSz)
04543     return false;
04544 
04545   for (unsigned i = 0; i < NumElts; ++i) {
04546     if (M[i] < 0) continue; // ignore UNDEF indices
04547     if ((unsigned) M[i] != (i - i%BlockElts) + (BlockElts - 1 - i%BlockElts))
04548       return false;
04549   }
04550 
04551   return true;
04552 }
04553 
04554 static bool isVTBLMask(ArrayRef<int> M, EVT VT) {
04555   // We can handle <8 x i8> vector shuffles. If the index in the mask is out of
04556   // range, then 0 is placed into the resulting vector. So pretty much any mask
04557   // of 8 elements can work here.
04558   return VT == MVT::v8i8 && M.size() == 8;
04559 }
04560 
04561 static bool isVTRNMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
04562   unsigned EltSz = VT.getVectorElementType().getSizeInBits();
04563   if (EltSz == 64)
04564     return false;
04565 
04566   unsigned NumElts = VT.getVectorNumElements();
04567   WhichResult = (M[0] == 0 ? 0 : 1);
04568   for (unsigned i = 0; i < NumElts; i += 2) {
04569     if ((M[i] >= 0 && (unsigned) M[i] != i + WhichResult) ||
04570         (M[i+1] >= 0 && (unsigned) M[i+1] != i + NumElts + WhichResult))
04571       return false;
04572   }
04573   return true;
04574 }
04575 
04576 /// isVTRN_v_undef_Mask - Special case of isVTRNMask for canonical form of
04577 /// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
04578 /// Mask is e.g., <0, 0, 2, 2> instead of <0, 4, 2, 6>.
04579 static bool isVTRN_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult){
04580   unsigned EltSz = VT.getVectorElementType().getSizeInBits();
04581   if (EltSz == 64)
04582     return false;
04583 
04584   unsigned NumElts = VT.getVectorNumElements();
04585   WhichResult = (M[0] == 0 ? 0 : 1);
04586   for (unsigned i = 0; i < NumElts; i += 2) {
04587     if ((M[i] >= 0 && (unsigned) M[i] != i + WhichResult) ||
04588         (M[i+1] >= 0 && (unsigned) M[i+1] != i + WhichResult))
04589       return false;
04590   }
04591   return true;
04592 }
04593 
04594 static bool isVUZPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
04595   unsigned EltSz = VT.getVectorElementType().getSizeInBits();
04596   if (EltSz == 64)
04597     return false;
04598 
04599   unsigned NumElts = VT.getVectorNumElements();
04600   WhichResult = (M[0] == 0 ? 0 : 1);
04601   for (unsigned i = 0; i != NumElts; ++i) {
04602     if (M[i] < 0) continue; // ignore UNDEF indices
04603     if ((unsigned) M[i] != 2 * i + WhichResult)
04604       return false;
04605   }
04606 
04607   // VUZP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
04608   if (VT.is64BitVector() && EltSz == 32)
04609     return false;
04610 
04611   return true;
04612 }
04613 
04614 /// isVUZP_v_undef_Mask - Special case of isVUZPMask for canonical form of
04615 /// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
04616 /// Mask is e.g., <0, 2, 0, 2> instead of <0, 2, 4, 6>,
04617 static bool isVUZP_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult){
04618   unsigned EltSz = VT.getVectorElementType().getSizeInBits();
04619   if (EltSz == 64)
04620     return false;
04621 
04622   unsigned Half = VT.getVectorNumElements() / 2;
04623   WhichResult = (M[0] == 0 ? 0 : 1);
04624   for (unsigned j = 0; j != 2; ++j) {
04625     unsigned Idx = WhichResult;
04626     for (unsigned i = 0; i != Half; ++i) {
04627       int MIdx = M[i + j * Half];
04628       if (MIdx >= 0 && (unsigned) MIdx != Idx)
04629         return false;
04630       Idx += 2;
04631     }
04632   }
04633 
04634   // VUZP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
04635   if (VT.is64BitVector() && EltSz == 32)
04636     return false;
04637 
04638   return true;
04639 }
04640 
04641 static bool isVZIPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
04642   unsigned EltSz = VT.getVectorElementType().getSizeInBits();
04643   if (EltSz == 64)
04644     return false;
04645 
04646   unsigned NumElts = VT.getVectorNumElements();
04647   WhichResult = (M[0] == 0 ? 0 : 1);
04648   unsigned Idx = WhichResult * NumElts / 2;
04649   for (unsigned i = 0; i != NumElts; i += 2) {
04650     if ((M[i] >= 0 && (unsigned) M[i] != Idx) ||
04651         (M[i+1] >= 0 && (unsigned) M[i+1] != Idx + NumElts))
04652       return false;
04653     Idx += 1;
04654   }
04655 
04656   // VZIP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
04657   if (VT.is64BitVector() && EltSz == 32)
04658     return false;
04659 
04660   return true;
04661 }
04662 
04663 /// isVZIP_v_undef_Mask - Special case of isVZIPMask for canonical form of
04664 /// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
04665 /// Mask is e.g., <0, 0, 1, 1> instead of <0, 4, 1, 5>.
04666 static bool isVZIP_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult){
04667   unsigned EltSz = VT.getVectorElementType().getSizeInBits();
04668   if (EltSz == 64)
04669     return false;
04670 
04671   unsigned NumElts = VT.getVectorNumElements();
04672   WhichResult = (M[0] == 0 ? 0 : 1);
04673   unsigned Idx = WhichResult * NumElts / 2;
04674   for (unsigned i = 0; i != NumElts; i += 2) {
04675     if ((M[i] >= 0 && (unsigned) M[i] != Idx) ||
04676         (M[i+1] >= 0 && (unsigned) M[i+1] != Idx))
04677       return false;
04678     Idx += 1;
04679   }
04680 
04681   // VZIP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
04682   if (VT.is64BitVector() && EltSz == 32)
04683     return false;
04684 
04685   return true;
04686 }
04687 
04688 /// \return true if this is a reverse operation on an vector.
04689 static bool isReverseMask(ArrayRef<int> M, EVT VT) {
04690   unsigned NumElts = VT.getVectorNumElements();
04691   // Make sure the mask has the right size.
04692   if (NumElts != M.size())
04693       return false;
04694 
04695   // Look for <15, ..., 3, -1, 1, 0>.
04696   for (unsigned i = 0; i != NumElts; ++i)
04697     if (M[i] >= 0 && M[i] != (int) (NumElts - 1 - i))
04698       return false;
04699 
04700   return true;
04701 }
04702 
04703 // If N is an integer constant that can be moved into a register in one
04704 // instruction, return an SDValue of such a constant (will become a MOV
04705 // instruction).  Otherwise return null.
04706 static SDValue IsSingleInstrConstant(SDValue N, SelectionDAG &DAG,
04707                                      const ARMSubtarget *ST, SDLoc dl) {
04708   uint64_t Val;
04709   if (!isa<ConstantSDNode>(N))
04710     return SDValue();
04711   Val = cast<ConstantSDNode>(N)->getZExtValue();
04712 
04713   if (ST->isThumb1Only()) {
04714     if (Val <= 255 || ~Val <= 255)
04715       return DAG.getConstant(Val, MVT::i32);
04716   } else {
04717     if (ARM_AM::getSOImmVal(Val) != -1 || ARM_AM::getSOImmVal(~Val) != -1)
04718       return DAG.getConstant(Val, MVT::i32);
04719   }
04720   return SDValue();
04721 }
04722 
04723 // If this is a case we can't handle, return null and let the default
04724 // expansion code take care of it.
04725 SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
04726                                              const ARMSubtarget *ST) const {
04727   BuildVectorSDNode *BVN = cast<BuildVectorSDNode>(Op.getNode());
04728   SDLoc dl(Op);
04729   EVT VT = Op.getValueType();
04730 
04731   APInt SplatBits, SplatUndef;
04732   unsigned SplatBitSize;
04733   bool HasAnyUndefs;
04734   if (BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) {
04735     if (SplatBitSize <= 64) {
04736       // Check if an immediate VMOV works.
04737       EVT VmovVT;
04738       SDValue Val = isNEONModifiedImm(SplatBits.getZExtValue(),
04739                                       SplatUndef.getZExtValue(), SplatBitSize,
04740                                       DAG, VmovVT, VT.is128BitVector(),
04741                                       VMOVModImm);
04742       if (Val.getNode()) {
04743         SDValue Vmov = DAG.getNode(ARMISD::VMOVIMM, dl, VmovVT, Val);
04744         return DAG.getNode(ISD::BITCAST, dl, VT, Vmov);
04745       }
04746 
04747       // Try an immediate VMVN.
04748       uint64_t NegatedImm = (~SplatBits).getZExtValue();
04749       Val = isNEONModifiedImm(NegatedImm,
04750                                       SplatUndef.getZExtValue(), SplatBitSize,
04751                                       DAG, VmovVT, VT.is128BitVector(),
04752                                       VMVNModImm);
04753       if (Val.getNode()) {
04754         SDValue Vmov = DAG.getNode(ARMISD::VMVNIMM, dl, VmovVT, Val);
04755         return DAG.getNode(ISD::BITCAST, dl, VT, Vmov);
04756       }
04757 
04758       // Use vmov.f32 to materialize other v2f32 and v4f32 splats.
04759       if ((VT == MVT::v2f32 || VT == MVT::v4f32) && SplatBitSize == 32) {
04760         int ImmVal = ARM_AM::getFP32Imm(SplatBits);
04761         if (ImmVal != -1) {
04762           SDValue Val = DAG.getTargetConstant(ImmVal, MVT::i32);
04763           return DAG.getNode(ARMISD::VMOVFPIMM, dl, VT, Val);
04764         }
04765       }
04766     }
04767   }
04768 
04769   // Scan through the operands to see if only one value is used.
04770   //
04771   // As an optimisation, even if more than one value is used it may be more
04772   // profitable to splat with one value then change some lanes.
04773   //
04774   // Heuristically we decide to do this if the vector has a "dominant" value,
04775   // defined as splatted to more than half of the lanes.
04776   unsigned NumElts = VT.getVectorNumElements();
04777   bool isOnlyLowElement = true;
04778   bool usesOnlyOneValue = true;
04779   bool hasDominantValue = false;
04780   bool isConstant = true;
04781 
04782   // Map of the number of times a particular SDValue appears in the
04783   // element list.
04784   DenseMap<SDValue, unsigned> ValueCounts;
04785   SDValue Value;
04786   for (unsigned i = 0; i < NumElts; ++i) {
04787     SDValue V = Op.getOperand(i);
04788     if (V.getOpcode() == ISD::UNDEF)
04789       continue;
04790     if (i > 0)
04791       isOnlyLowElement = false;
04792     if (!isa<ConstantFPSDNode>(V) && !isa<ConstantSDNode>(V))
04793       isConstant = false;
04794 
04795     ValueCounts.insert(std::make_pair(V, 0));
04796     unsigned &Count = ValueCounts[V];
04797 
04798     // Is this value dominant? (takes up more than half of the lanes)
04799     if (++Count > (NumElts / 2)) {
04800       hasDominantValue = true;
04801       Value = V;
04802     }
04803   }
04804   if (ValueCounts.size() != 1)
04805     usesOnlyOneValue = false;
04806   if (!Value.getNode() && ValueCounts.size() > 0)
04807     Value = ValueCounts.begin()->first;
04808 
04809   if (ValueCounts.size() == 0)
04810     return DAG.getUNDEF(VT);
04811 
04812   // Loads are better lowered with insert_vector_elt/ARMISD::BUILD_VECTOR.
04813   // Keep going if we are hitting this case.
04814   if (isOnlyLowElement && !ISD::isNormalLoad(Value.getNode()))
04815     return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Value);
04816 
04817   unsigned EltSize = VT.getVectorElementType().getSizeInBits();
04818 
04819   // Use VDUP for non-constant splats.  For f32 constant splats, reduce to
04820   // i32 and try again.
04821   if (hasDominantValue && EltSize <= 32) {
04822     if (!isConstant) {
04823       SDValue N;
04824 
04825       // If we are VDUPing a value that comes directly from a vector, that will
04826       // cause an unnecessary move to and from a GPR, where instead we could
04827       // just use VDUPLANE. We can only do this if the lane being extracted
04828       // is at a constant index, as the VDUP from lane instructions only have
04829       // constant-index forms.
04830       if (Value->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
04831           isa<ConstantSDNode>(Value->getOperand(1))) {
04832         // We need to create a new undef vector to use for the VDUPLANE if the
04833         // size of the vector from which we get the value is different than the
04834         // size of the vector that we need to create. We will insert the element
04835         // such that the register coalescer will remove unnecessary copies.
04836         if (VT != Value->getOperand(0).getValueType()) {
04837           ConstantSDNode *constIndex;
04838           constIndex = dyn_cast<ConstantSDNode>(Value->getOperand(1));
04839           assert(constIndex && "The index is not a constant!");
04840           unsigned index = constIndex->getAPIntValue().getLimitedValue() %
04841                              VT.getVectorNumElements();
04842           N =  DAG.getNode(ARMISD::VDUPLANE, dl, VT,
04843                  DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, DAG.getUNDEF(VT),
04844                         Value, DAG.getConstant(index, MVT::i32)),
04845                            DAG.getConstant(index, MVT::i32));
04846         } else
04847           N = DAG.getNode(ARMISD::VDUPLANE, dl, VT,
04848                         Value->getOperand(0), Value->getOperand(1));
04849       } else
04850         N = DAG.getNode(ARMISD::VDUP, dl, VT, Value);
04851 
04852       if (!usesOnlyOneValue) {
04853         // The dominant value was splatted as 'N', but we now have to insert
04854         // all differing elements.
04855         for (unsigned I = 0; I < NumElts; ++I) {
04856           if (Op.getOperand(I) == Value)
04857             continue;
04858           SmallVector<SDValue, 3> Ops;
04859           Ops.push_back(N);
04860           Ops.push_back(Op.getOperand(I));
04861           Ops.push_back(DAG.getConstant(I, MVT::i32));
04862           N = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, &Ops[0], 3);
04863         }
04864       }
04865       return N;
04866     }
04867     if (VT.getVectorElementType().isFloatingPoint()) {
04868       SmallVector<SDValue, 8> Ops;
04869       for (unsigned i = 0; i < NumElts; ++i)
04870         Ops.push_back(DAG.getNode(ISD::BITCAST, dl, MVT::i32,
04871                                   Op.getOperand(i)));
04872       EVT VecVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32, NumElts);
04873       SDValue Val = DAG.getNode(ISD::BUILD_VECTOR, dl, VecVT, &Ops[0], NumElts);
04874       Val = LowerBUILD_VECTOR(Val, DAG, ST);
04875       if (Val.getNode())
04876         return DAG.getNode(ISD::BITCAST, dl, VT, Val);
04877     }
04878     if (usesOnlyOneValue) {
04879       SDValue Val = IsSingleInstrConstant(Value, DAG, ST, dl);
04880       if (isConstant && Val.getNode())
04881         return DAG.getNode(ARMISD::VDUP, dl, VT, Val);
04882     }
04883   }
04884 
04885   // If all elements are constants and the case above didn't get hit, fall back
04886   // to the default expansion, which will generate a load from the constant
04887   // pool.
04888   if (isConstant)
04889     return SDValue();
04890 
04891   // Empirical tests suggest this is rarely worth it for vectors of length <= 2.
04892   if (NumElts >= 4) {
04893     SDValue shuffle = ReconstructShuffle(Op, DAG);
04894     if (shuffle != SDValue())
04895       return shuffle;
04896   }
04897 
04898   // Vectors with 32- or 64-bit elements can be built by directly assigning
04899   // the subregisters.  Lower it to an ARMISD::BUILD_VECTOR so the operands
04900   // will be legalized.
04901   if (EltSize >= 32) {
04902     // Do the expansion with floating-point types, since that is what the VFP
04903     // registers are defined to use, and since i64 is not legal.
04904     EVT EltVT = EVT::getFloatingPointVT(EltSize);
04905     EVT VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumElts);
04906     SmallVector<SDValue, 8> Ops;
04907     for (unsigned i = 0; i < NumElts; ++i)
04908       Ops.push_back(DAG.getNode(ISD::BITCAST, dl, EltVT, Op.getOperand(i)));
04909     SDValue Val = DAG.getNode(ARMISD::BUILD_VECTOR, dl, VecVT, &Ops[0],NumElts);
04910     return DAG.getNode(ISD::BITCAST, dl, VT, Val);
04911   }
04912 
04913   // If all else fails, just use a sequence of INSERT_VECTOR_ELT when we
04914   // know the default expansion would otherwise fall back on something even
04915   // worse. For a vector with one or two non-undef values, that's
04916   // scalar_to_vector for the elements followed by a shuffle (provided the
04917   // shuffle is valid for the target) and materialization element by element
04918   // on the stack followed by a load for everything else.
04919   if (!isConstant && !usesOnlyOneValue) {
04920     SDValue Vec = DAG.getUNDEF(VT);
04921     for (unsigned i = 0 ; i < NumElts; ++i) {
04922       SDValue V = Op.getOperand(i);
04923       if (V.getOpcode() == ISD::UNDEF)
04924         continue;
04925       SDValue LaneIdx = DAG.getConstant(i, MVT::i32);
04926       Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Vec, V, LaneIdx);
04927     }
04928     return Vec;
04929   }
04930 
04931   return SDValue();
04932 }
04933 
04934 // Gather data to see if the operation can be modelled as a
04935 // shuffle in combination with VEXTs.
04936 SDValue ARMTargetLowering::ReconstructShuffle(SDValue Op,
04937                                               SelectionDAG &DAG) const {
04938   SDLoc dl(Op);
04939   EVT VT = Op.getValueType();
04940   unsigned NumElts = VT.getVectorNumElements();
04941 
04942   SmallVector<SDValue, 2> SourceVecs;
04943   SmallVector<unsigned, 2> MinElts;
04944   SmallVector<unsigned, 2> MaxElts;
04945 
04946   for (unsigned i = 0; i < NumElts; ++i) {
04947     SDValue V = Op.getOperand(i);
04948     if (V.getOpcode() == ISD::UNDEF)
04949       continue;
04950     else if (V.getOpcode() != ISD::EXTRACT_VECTOR_ELT) {
04951       // A shuffle can only come from building a vector from various
04952       // elements of other vectors.
04953       return SDValue();
04954     } else if (V.getOperand(0).getValueType().getVectorElementType() !=
04955                VT.getVectorElementType()) {
04956       // This code doesn't know how to handle shuffles where the vector
04957       // element types do not match (this happens because type legalization
04958       // promotes the return type of EXTRACT_VECTOR_ELT).
04959       // FIXME: It might be appropriate to extend this code to handle
04960       // mismatched types.
04961       return SDValue();
04962     }
04963 
04964     // Record this extraction against the appropriate vector if possible...
04965     SDValue SourceVec = V.getOperand(0);
04966     // If the element number isn't a constant, we can't effectively
04967     // analyze what's going on.
04968     if (!isa<ConstantSDNode>(V.getOperand(1)))
04969       return SDValue();
04970     unsigned EltNo = cast<ConstantSDNode>(V.getOperand(1))->getZExtValue();
04971     bool FoundSource = false;
04972     for (unsigned j = 0; j < SourceVecs.size(); ++j) {
04973       if (SourceVecs[j] == SourceVec) {
04974         if (MinElts[j] > EltNo)
04975           MinElts[j] = EltNo;
04976         if (MaxElts[j] < EltNo)
04977           MaxElts[j] = EltNo;
04978         FoundSource = true;
04979         break;
04980       }
04981     }
04982 
04983     // Or record a new source if not...
04984     if (!FoundSource) {
04985       SourceVecs.push_back(SourceVec);
04986       MinElts.push_back(EltNo);
04987       MaxElts.push_back(EltNo);
04988     }
04989   }
04990 
04991   // Currently only do something sane when at most two source vectors
04992   // involved.
04993   if (SourceVecs.size() > 2)
04994     return SDValue();
04995 
04996   SDValue ShuffleSrcs[2] = {DAG.getUNDEF(VT), DAG.getUNDEF(VT) };
04997   int VEXTOffsets[2] = {0, 0};
04998 
04999   // This loop extracts the usage patterns of the source vectors
05000   // and prepares appropriate SDValues for a shuffle if possible.
05001   for (unsigned i = 0; i < SourceVecs.size(); ++i) {
05002     if (SourceVecs[i].getValueType() == VT) {
05003       // No VEXT necessary
05004       ShuffleSrcs[i] = SourceVecs[i];
05005       VEXTOffsets[i] = 0;
05006       continue;
05007     } else if (SourceVecs[i].getValueType().getVectorNumElements() < NumElts) {
05008       // It probably isn't worth padding out a smaller vector just to
05009       // break it down again in a shuffle.
05010       return SDValue();
05011     }
05012 
05013     // Since only 64-bit and 128-bit vectors are legal on ARM and
05014     // we've eliminated the other cases...
05015     assert(SourceVecs[i].getValueType().getVectorNumElements() == 2*NumElts &&
05016            "unexpected vector sizes in ReconstructShuffle");
05017 
05018     if (MaxElts[i] - MinElts[i] >= NumElts) {
05019       // Span too large for a VEXT to cope
05020       return SDValue();
05021     }
05022 
05023     if (MinElts[i] >= NumElts) {
05024       // The extraction can just take the second half
05025       VEXTOffsets[i] = NumElts;
05026       ShuffleSrcs[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT,
05027                                    SourceVecs[i],
05028                                    DAG.getIntPtrConstant(NumElts));
05029     } else if (MaxElts[i] < NumElts) {
05030       // The extraction can just take the first half
05031       VEXTOffsets[i] = 0;
05032       ShuffleSrcs[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT,
05033                                    SourceVecs[i],
05034                                    DAG.getIntPtrConstant(0));
05035     } else {
05036       // An actual VEXT is needed
05037       VEXTOffsets[i] = MinElts[i];
05038       SDValue VEXTSrc1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT,
05039                                      SourceVecs[i],
05040                                      DAG.getIntPtrConstant(0));
05041       SDValue VEXTSrc2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT,
05042                                      SourceVecs[i],
05043                                      DAG.getIntPtrConstant(NumElts));
05044       ShuffleSrcs[i] = DAG.getNode(ARMISD::VEXT, dl, VT, VEXTSrc1, VEXTSrc2,
05045                                    DAG.getConstant(VEXTOffsets[i], MVT::i32));
05046     }
05047   }
05048 
05049   SmallVector<int, 8> Mask;
05050 
05051   for (unsigned i = 0; i < NumElts; ++i) {
05052     SDValue Entry = Op.getOperand(i);
05053     if (Entry.getOpcode() == ISD::UNDEF) {
05054       Mask.push_back(-1);
05055       continue;
05056     }
05057 
05058     SDValue ExtractVec = Entry.getOperand(0);
05059     int ExtractElt = cast<ConstantSDNode>(Op.getOperand(i)
05060                                           .getOperand(1))->getSExtValue();
05061     if (ExtractVec == SourceVecs[0]) {
05062       Mask.push_back(ExtractElt - VEXTOffsets[0]);
05063     } else {
05064       Mask.push_back(ExtractElt + NumElts - VEXTOffsets[1]);
05065     }
05066   }
05067 
05068   // Final check before we try to produce nonsense...
05069   if (isShuffleMaskLegal(Mask, VT))
05070     return DAG.getVectorShuffle(VT, dl, ShuffleSrcs[0], ShuffleSrcs[1],
05071                                 &Mask[0]);
05072 
05073   return SDValue();
05074 }
05075 
05076 /// isShuffleMaskLegal - Targets can use this to indicate that they only
05077 /// support *some* VECTOR_SHUFFLE operations, those with specific masks.
05078 /// By default, if a target supports the VECTOR_SHUFFLE node, all mask values
05079 /// are assumed to be legal.
05080 bool
05081 ARMTargetLowering::isShuffleMaskLegal(const SmallVectorImpl<int> &M,
05082                                       EVT VT) const {
05083   if (VT.getVectorNumElements() == 4 &&
05084       (VT.is128BitVector() || VT.is64BitVector())) {
05085     unsigned PFIndexes[4];
05086     for (unsigned i = 0; i != 4; ++i) {
05087       if (M[i] < 0)
05088         PFIndexes[i] = 8;
05089       else
05090         PFIndexes[i] = M[i];
05091     }
05092 
05093     // Compute the index in the perfect shuffle table.
05094     unsigned PFTableIndex =
05095       PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3];
05096     unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
05097     unsigned Cost = (PFEntry >> 30);
05098 
05099     if (Cost <= 4)
05100       return true;
05101   }
05102 
05103   bool ReverseVEXT;
05104   unsigned Imm, WhichResult;
05105 
05106   unsigned EltSize = VT.getVectorElementType().getSizeInBits();
05107   return (EltSize >= 32 ||
05108           ShuffleVectorSDNode::isSplatMask(&M[0], VT) ||
05109           isVREVMask(M, VT, 64) ||
05110           isVREVMask(M, VT, 32) ||
05111           isVREVMask(M, VT, 16) ||
05112           isVEXTMask(M, VT, ReverseVEXT, Imm) ||
05113           isVTBLMask(M, VT) ||
05114           isVTRNMask(M, VT, WhichResult) ||
05115           isVUZPMask(M, VT, WhichResult) ||
05116           isVZIPMask(M, VT, WhichResult) ||
05117           isVTRN_v_undef_Mask(M, VT, WhichResult) ||
05118           isVUZP_v_undef_Mask(M, VT, WhichResult) ||
05119           isVZIP_v_undef_Mask(M, VT, WhichResult) ||
05120           ((VT == MVT::v8i16 || VT == MVT::v16i8) && isReverseMask(M, VT)));
05121 }
05122 
05123 /// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit
05124 /// the specified operations to build the shuffle.
05125 static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS,
05126                                       SDValue RHS, SelectionDAG &DAG,
05127                                       SDLoc dl) {
05128   unsigned OpNum = (PFEntry >> 26) & 0x0F;
05129   unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1);
05130   unsigned RHSID = (PFEntry >>  0) & ((1 << 13)-1);
05131 
05132   enum {
05133     OP_COPY = 0, // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3>
05134     OP_VREV,
05135     OP_VDUP0,
05136     OP_VDUP1,
05137     OP_VDUP2,
05138     OP_VDUP3,
05139     OP_VEXT1,
05140     OP_VEXT2,
05141     OP_VEXT3,
05142     OP_VUZPL, // VUZP, left result
05143     OP_VUZPR, // VUZP, right result
05144     OP_VZIPL, // VZIP, left result
05145     OP_VZIPR, // VZIP, right result
05146     OP_VTRNL, // VTRN, left result
05147     OP_VTRNR  // VTRN, right result
05148   };
05149 
05150   if (OpNum == OP_COPY) {
05151     if (LHSID == (1*9+2)*9+3) return LHS;
05152     assert(LHSID == ((4*9+5)*9+6)*9+7 && "Illegal OP_COPY!");
05153     return RHS;
05154   }
05155 
05156   SDValue OpLHS, OpRHS;
05157   OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG, dl);
05158   OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG, dl);
05159   EVT VT = OpLHS.getValueType();
05160 
05161   switch (OpNum) {
05162   default: llvm_unreachable("Unknown shuffle opcode!");
05163   case OP_VREV:
05164     // VREV divides the vector in half and swaps within the half.
05165     if (VT.getVectorElementType() == MVT::i32 ||
05166         VT.getVectorElementType() == MVT::f32)
05167       return DAG.getNode(ARMISD::VREV64, dl, VT, OpLHS);
05168     // vrev <4 x i16> -> VREV32
05169     if (VT.getVectorElementType() == MVT::i16)
05170       return DAG.getNode(ARMISD::VREV32, dl, VT, OpLHS);
05171     // vrev <4 x i8> -> VREV16
05172     assert(VT.getVectorElementType() == MVT::i8);
05173     return DAG.getNode(ARMISD::VREV16, dl, VT, OpLHS);
05174   case OP_VDUP0:
05175   case OP_VDUP1:
05176   case OP_VDUP2:
05177   case OP_VDUP3:
05178     return DAG.getNode(ARMISD::VDUPLANE, dl, VT,
05179                        OpLHS, DAG.getConstant(OpNum-OP_VDUP0, MVT::i32));
05180   case OP_VEXT1:
05181   case OP_VEXT2:
05182   case OP_VEXT3:
05183     return DAG.getNode(ARMISD::VEXT, dl, VT,
05184                        OpLHS, OpRHS,
05185                        DAG.getConstant(OpNum-OP_VEXT1+1, MVT::i32));
05186   case OP_VUZPL:
05187   case OP_VUZPR:
05188     return DAG.getNode(ARMISD::VUZP, dl, DAG.getVTList(VT, VT),
05189                        OpLHS, OpRHS).getValue(OpNum-OP_VUZPL);
05190   case OP_VZIPL:
05191   case OP_VZIPR:
05192     return DAG.getNode(ARMISD::VZIP, dl, DAG.getVTList(VT, VT),
05193                        OpLHS, OpRHS).getValue(OpNum-OP_VZIPL);
05194   case OP_VTRNL:
05195   case OP_VTRNR:
05196     return DAG.getNode(ARMISD::VTRN, dl, DAG.getVTList(VT, VT),
05197                        OpLHS, OpRHS).getValue(OpNum-OP_VTRNL);
05198   }
05199 }
05200 
05201 static SDValue LowerVECTOR_SHUFFLEv8i8(SDValue Op,
05202                                        ArrayRef<int> ShuffleMask,
05203                                        SelectionDAG &DAG) {
05204   // Check to see if we can use the VTBL instruction.
05205   SDValue V1 = Op.getOperand(0);
05206   SDValue V2 = Op.getOperand(1);
05207   SDLoc DL(Op);
05208 
05209   SmallVector<SDValue, 8> VTBLMask;
05210   for (ArrayRef<int>::iterator
05211          I = ShuffleMask.begin(), E = ShuffleMask.end(); I != E; ++I)
05212     VTBLMask.push_back(DAG.getConstant(*I, MVT::i32));
05213 
05214   if (V2.getNode()->getOpcode() == ISD::UNDEF)
05215     return DAG.getNode(ARMISD::VTBL1, DL, MVT::v8i8, V1,
05216                        DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v8i8,
05217                                    &VTBLMask[0], 8));
05218 
05219   return DAG.getNode(ARMISD::VTBL2, DL, MVT::v8i8, V1, V2,
05220                      DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v8i8,
05221                                  &VTBLMask[0], 8));
05222 }
05223 
05224 static SDValue LowerReverse_VECTOR_SHUFFLEv16i8_v8i16(SDValue Op,
05225                                                       SelectionDAG &DAG) {
05226   SDLoc DL(Op);
05227   SDValue OpLHS = Op.getOperand(0);
05228   EVT VT = OpLHS.getValueType();
05229 
05230   assert((VT == MVT::v8i16 || VT == MVT::v16i8) &&
05231          "Expect an v8i16/v16i8 type");
05232   OpLHS = DAG.getNode(ARMISD::VREV64, DL, VT, OpLHS);
05233   // For a v16i8 type: After the VREV, we have got <8, ...15, 8, ..., 0>. Now,
05234   // extract the first 8 bytes into the top double word and the last 8 bytes
05235   // into the bottom double word. The v8i16 case is similar.
05236   unsigned ExtractNum = (VT == MVT::v16i8) ? 8 : 4;
05237   return DAG.getNode(ARMISD::VEXT, DL, VT, OpLHS, OpLHS,
05238                      DAG.getConstant(ExtractNum, MVT::i32));
05239 }
05240 
05241 static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
05242   SDValue V1 = Op.getOperand(0);
05243   SDValue V2 = Op.getOperand(1);
05244   SDLoc dl(Op);
05245   EVT VT = Op.getValueType();
05246   ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op.getNode());
05247 
05248   // Convert shuffles that are directly supported on NEON to target-specific
05249   // DAG nodes, instead of keeping them as shuffles and matching them again
05250   // during code selection.  This is more efficient and avoids the possibility
05251   // of inconsistencies between legalization and selection.
05252   // FIXME: floating-point vectors should be canonicalized to integer vectors
05253   // of the same time so that they get CSEd properly.
05254   ArrayRef<int> ShuffleMask = SVN->getMask();
05255 
05256   unsigned EltSize = VT.getVectorElementType().getSizeInBits();
05257   if (EltSize <= 32) {
05258     if (ShuffleVectorSDNode::isSplatMask(&ShuffleMask[0], VT)) {
05259       int Lane = SVN->getSplatIndex();
05260       // If this is undef splat, generate it via "just" vdup, if possible.
05261       if (Lane == -1) Lane = 0;
05262 
05263       // Test if V1 is a SCALAR_TO_VECTOR.
05264       if (Lane == 0 && V1.getOpcode() == ISD::SCALAR_TO_VECTOR) {
05265         return DAG.getNode(ARMISD::VDUP, dl, VT, V1.getOperand(0));
05266       }
05267       // Test if V1 is a BUILD_VECTOR which is equivalent to a SCALAR_TO_VECTOR
05268       // (and probably will turn into a SCALAR_TO_VECTOR once legalization
05269       // reaches it).
05270       if (Lane == 0 && V1.getOpcode() == ISD::BUILD_VECTOR &&
05271           !isa<ConstantSDNode>(V1.getOperand(0))) {
05272         bool IsScalarToVector = true;
05273         for (unsigned i = 1, e = V1.getNumOperands(); i != e; ++i)
05274           if (V1.getOperand(i).getOpcode() != ISD::UNDEF) {
05275             IsScalarToVector = false;
05276             break;
05277           }
05278         if (IsScalarToVector)
05279           return DAG.getNode(ARMISD::VDUP, dl, VT, V1.getOperand(0));
05280       }
05281       return DAG.getNode(ARMISD::VDUPLANE, dl, VT, V1,
05282                          DAG.getConstant(Lane, MVT::i32));
05283     }
05284 
05285     bool ReverseVEXT;
05286     unsigned Imm;
05287     if (isVEXTMask(ShuffleMask, VT, ReverseVEXT, Imm)) {
05288       if (ReverseVEXT)
05289         std::swap(V1, V2);
05290       return DAG.getNode(ARMISD::VEXT, dl, VT, V1, V2,
05291                          DAG.getConstant(Imm, MVT::i32));
05292     }
05293 
05294     if (isVREVMask(ShuffleMask, VT, 64))
05295       return DAG.getNode(ARMISD::VREV64, dl, VT, V1);
05296     if (isVREVMask(ShuffleMask, VT, 32))
05297       return DAG.getNode(ARMISD::VREV32, dl, VT, V1);
05298     if (isVREVMask(ShuffleMask, VT, 16))
05299       return DAG.getNode(ARMISD::VREV16, dl, VT, V1);
05300 
05301     if (V2->getOpcode() == ISD::UNDEF &&
05302         isSingletonVEXTMask(ShuffleMask, VT, Imm)) {
05303       return DAG.getNode(ARMISD::VEXT, dl, VT, V1, V1,
05304                          DAG.getConstant(Imm, MVT::i32));
05305     }
05306 
05307     // Check for Neon shuffles that modify both input vectors in place.
05308     // If both results are used, i.e., if there are two shuffles with the same
05309     // source operands and with masks corresponding to both results of one of
05310     // these operations, DAG memoization will ensure that a single node is
05311     // used for both shuffles.
05312     unsigned WhichResult;
05313     if (isVTRNMask(ShuffleMask, VT, WhichResult))
05314       return DAG.getNode(ARMISD::VTRN, dl, DAG.getVTList(VT, VT),
05315                          V1, V2).getValue(WhichResult);
05316     if (isVUZPMask(ShuffleMask, VT, WhichResult))
05317       return DAG.getNode(ARMISD::VUZP, dl, DAG.getVTList(VT, VT),
05318                          V1, V2).getValue(WhichResult);
05319     if (isVZIPMask(ShuffleMask, VT, WhichResult))
05320       return DAG.getNode(ARMISD::VZIP, dl, DAG.getVTList(VT, VT),
05321                          V1, V2).getValue(WhichResult);
05322 
05323     if (isVTRN_v_undef_Mask(ShuffleMask, VT, WhichResult))
05324       return DAG.getNode(ARMISD::VTRN, dl, DAG.getVTList(VT, VT),
05325                          V1, V1).getValue(WhichResult);
05326     if (isVUZP_v_undef_Mask(ShuffleMask, VT, WhichResult))
05327       return DAG.getNode(ARMISD::VUZP, dl, DAG.getVTList(VT, VT),
05328                          V1, V1).getValue(WhichResult);
05329     if (isVZIP_v_undef_Mask(ShuffleMask, VT, WhichResult))
05330       return DAG.getNode(ARMISD::VZIP, dl, DAG.getVTList(VT, VT),
05331                          V1, V1).getValue(WhichResult);
05332   }
05333 
05334   // If the shuffle is not directly supported and it has 4 elements, use
05335   // the PerfectShuffle-generated table to synthesize it from other shuffles.
05336   unsigned NumElts = VT.getVectorNumElements();
05337   if (NumElts == 4) {
05338     unsigned PFIndexes[4];
05339     for (unsigned i = 0; i != 4; ++i) {
05340       if (ShuffleMask[i] < 0)
05341         PFIndexes[i] = 8;
05342       else
05343         PFIndexes[i] = ShuffleMask[i];
05344     }
05345 
05346     // Compute the index in the perfect shuffle table.
05347     unsigned PFTableIndex =
05348       PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3];
05349     unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
05350     unsigned Cost = (PFEntry >> 30);
05351 
05352     if (Cost <= 4)
05353       return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl);
05354   }
05355 
05356   // Implement shuffles with 32- or 64-bit elements as ARMISD::BUILD_VECTORs.
05357   if (EltSize >= 32) {
05358     // Do the expansion with floating-point types, since that is what the VFP
05359     // registers are defined to use, and since i64 is not legal.
05360     EVT EltVT = EVT::getFloatingPointVT(EltSize);
05361     EVT VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumElts);
05362     V1 = DAG.getNode(ISD::BITCAST, dl, VecVT, V1);
05363     V2 = DAG.getNode(ISD::BITCAST, dl, VecVT, V2);
05364     SmallVector<SDValue, 8> Ops;
05365     for (unsigned i = 0; i < NumElts; ++i) {
05366       if (ShuffleMask[i] < 0)
05367         Ops.push_back(DAG.getUNDEF(EltVT));
05368       else
05369         Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT,
05370                                   ShuffleMask[i] < (int)NumElts ? V1 : V2,
05371                                   DAG.getConstant(ShuffleMask[i] & (NumElts-1),
05372                                                   MVT::i32)));
05373     }
05374     SDValue Val = DAG.getNode(ARMISD::BUILD_VECTOR, dl, VecVT, &Ops[0],NumElts);
05375     return DAG.getNode(ISD::BITCAST, dl, VT, Val);
05376   }
05377 
05378   if ((VT == MVT::v8i16 || VT == MVT::v16i8) && isReverseMask(ShuffleMask, VT))
05379     return LowerReverse_VECTOR_SHUFFLEv16i8_v8i16(Op, DAG);
05380 
05381   if (VT == MVT::v8i8) {
05382     SDValue NewOp = LowerVECTOR_SHUFFLEv8i8(Op, ShuffleMask, DAG);
05383     if (NewOp.getNode())
05384       return NewOp;
05385   }
05386 
05387   return SDValue();
05388 }
05389 
05390 static SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
05391   // INSERT_VECTOR_ELT is legal only for immediate indexes.
05392   SDValue Lane = Op.getOperand(2);
05393   if (!isa<ConstantSDNode>(Lane))
05394     return SDValue();
05395 
05396   return Op;
05397 }
05398 
05399 static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
05400   // EXTRACT_VECTOR_ELT is legal only for immediate indexes.
05401   SDValue Lane = Op.getOperand(1);
05402   if (!isa<ConstantSDNode>(Lane))
05403     return SDValue();
05404 
05405   SDValue Vec = Op.getOperand(0);
05406   if (Op.getValueType() == MVT::i32 &&
05407       Vec.getValueType().getVectorElementType().getSizeInBits() < 32) {
05408     SDLoc dl(Op);
05409     return DAG.getNode(ARMISD::VGETLANEu, dl, MVT::i32, Vec, Lane);
05410   }
05411 
05412   return Op;
05413 }
05414 
05415 static SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) {
05416   // The only time a CONCAT_VECTORS operation can have legal types is when
05417   // two 64-bit vectors are concatenated to a 128-bit vector.
05418   assert(Op.getValueType().is128BitVector() && Op.getNumOperands() == 2 &&
05419          "unexpected CONCAT_VECTORS");
05420   SDLoc dl(Op);
05421   SDValue Val = DAG.getUNDEF(MVT::v2f64);
05422   SDValue Op0 = Op.getOperand(0);
05423   SDValue Op1 = Op.getOperand(1);
05424   if (Op0.getOpcode() != ISD::UNDEF)
05425     Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Val,
05426                       DAG.getNode(ISD::BITCAST, dl, MVT::f64, Op0),
05427                       DAG.getIntPtrConstant(0));
05428   if (Op1.getOpcode() != ISD::UNDEF)
05429     Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Val,
05430                       DAG.getNode(ISD::BITCAST, dl, MVT::f64, Op1),
05431                       DAG.getIntPtrConstant(1));
05432   return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Val);
05433 }
05434 
05435 /// isExtendedBUILD_VECTOR - Check if N is a constant BUILD_VECTOR where each
05436 /// element has been zero/sign-extended, depending on the isSigned parameter,
05437 /// from an integer type half its size.
05438 static bool isExtendedBUILD_VECTOR(SDNode *N, SelectionDAG &DAG,
05439                                    bool isSigned) {
05440   // A v2i64 BUILD_VECTOR will have been legalized to a BITCAST from v4i32.
05441   EVT VT = N->getValueType(0);
05442   if (VT == MVT::v2i64 && N->getOpcode() == ISD::BITCAST) {
05443     SDNode *BVN = N->getOperand(0).getNode();
05444     if (BVN->getValueType(0) != MVT::v4i32 ||
05445         BVN->getOpcode() != ISD::BUILD_VECTOR)
05446       return false;
05447     unsigned LoElt = DAG.getTargetLoweringInfo().isBigEndian() ? 1 : 0;
05448     unsigned HiElt = 1 - LoElt;
05449     ConstantSDNode *Lo0 = dyn_cast<ConstantSDNode>(BVN->getOperand(LoElt));
05450     ConstantSDNode *Hi0 = dyn_cast<ConstantSDNode>(BVN->getOperand(HiElt));
05451     ConstantSDNode *Lo1 = dyn_cast<ConstantSDNode>(BVN->getOperand(LoElt+2));
05452     ConstantSDNode *Hi1 = dyn_cast<ConstantSDNode>(BVN->getOperand(HiElt+2));
05453     if (!Lo0 || !Hi0 || !Lo1 || !Hi1)
05454       return false;
05455     if (isSigned) {
05456       if (Hi0->getSExtValue() == Lo0->getSExtValue() >> 32 &&
05457           Hi1->getSExtValue() == Lo1->getSExtValue() >> 32)
05458         return true;
05459     } else {
05460       if (Hi0->isNullValue() && Hi1->isNullValue())
05461         return true;
05462     }
05463     return false;
05464   }
05465 
05466   if (N->getOpcode() != ISD::BUILD_VECTOR)
05467     return false;
05468 
05469   for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
05470     SDNode *Elt = N->getOperand(i).getNode();
05471     if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt)) {
05472       unsigned EltSize = VT.getVectorElementType().getSizeInBits();
05473       unsigned HalfSize = EltSize / 2;
05474       if (isSigned) {
05475         if (!isIntN(HalfSize, C->getSExtValue()))
05476           return false;
05477       } else {
05478         if (!isUIntN(HalfSize, C->getZExtValue()))
05479           return false;
05480       }
05481       continue;
05482     }
05483     return false;
05484   }
05485 
05486   return true;
05487 }
05488 
05489 /// isSignExtended - Check if a node is a vector value that is sign-extended
05490 /// or a constant BUILD_VECTOR with sign-extended elements.
05491 static bool isSignExtended(SDNode *N, SelectionDAG &DAG) {
05492   if (N->getOpcode() == ISD::SIGN_EXTEND || ISD::isSEXTLoad(N))
05493     return true;
05494   if (isExtendedBUILD_VECTOR(N, DAG, true))
05495     return true;
05496   return false;
05497 }
05498 
05499 /// isZeroExtended - Check if a node is a vector value that is zero-extended
05500 /// or a constant BUILD_VECTOR with zero-extended elements.
05501 static bool isZeroExtended(SDNode *N, SelectionDAG &DAG) {
05502   if (N->getOpcode() == ISD::ZERO_EXTEND || ISD::isZEXTLoad(N))
05503     return true;
05504   if (isExtendedBUILD_VECTOR(N, DAG, false))
05505     return true;
05506   return false;
05507 }
05508 
05509 static EVT getExtensionTo64Bits(const EVT &OrigVT) {
05510   if (OrigVT.getSizeInBits() >= 64)
05511     return OrigVT;
05512 
05513   assert(OrigVT.isSimple() && "Expecting a simple value type");
05514 
05515   MVT::SimpleValueType OrigSimpleTy = OrigVT.getSimpleVT().SimpleTy;
05516   switch (OrigSimpleTy) {
05517   default: llvm_unreachable("Unexpected Vector Type");
05518   case MVT::v2i8:
05519   case MVT::v2i16:
05520      return MVT::v2i32;
05521   case MVT::v4i8:
05522     return  MVT::v4i16;
05523   }
05524 }
05525 
05526 /