LLVM API Documentation
00001 //===-- ARMISelLowering.cpp - ARM DAG Lowering Implementation -------------===// 00002 // 00003 // The LLVM Compiler Infrastructure 00004 // 00005 // This file is distributed under the University of Illinois Open Source 00006 // License. See LICENSE.TXT for details. 00007 // 00008 //===----------------------------------------------------------------------===// 00009 // 00010 // This file defines the interfaces that ARM uses to lower LLVM code into a 00011 // selection DAG. 00012 // 00013 //===----------------------------------------------------------------------===// 00014 00015 #define DEBUG_TYPE "arm-isel" 00016 #include "ARMISelLowering.h" 00017 #include "ARM.h" 00018 #include "ARMCallingConv.h" 00019 #include "ARMConstantPoolValue.h" 00020 #include "ARMMachineFunctionInfo.h" 00021 #include "ARMPerfectShuffle.h" 00022 #include "ARMSubtarget.h" 00023 #include "ARMTargetMachine.h" 00024 #include "ARMTargetObjectFile.h" 00025 #include "MCTargetDesc/ARMAddressingModes.h" 00026 #include "llvm/ADT/Statistic.h" 00027 #include "llvm/ADT/StringExtras.h" 00028 #include "llvm/CodeGen/CallingConvLower.h" 00029 #include "llvm/CodeGen/IntrinsicLowering.h" 00030 #include "llvm/CodeGen/MachineBasicBlock.h" 00031 #include "llvm/CodeGen/MachineFrameInfo.h" 00032 #include "llvm/CodeGen/MachineFunction.h" 00033 #include "llvm/CodeGen/MachineInstrBuilder.h" 00034 #include "llvm/CodeGen/MachineModuleInfo.h" 00035 #include "llvm/CodeGen/MachineRegisterInfo.h" 00036 #include "llvm/CodeGen/SelectionDAG.h" 00037 #include "llvm/IR/CallingConv.h" 00038 #include "llvm/IR/Constants.h" 00039 #include "llvm/IR/Function.h" 00040 #include "llvm/IR/GlobalValue.h" 00041 #include "llvm/IR/Instruction.h" 00042 #include "llvm/IR/Instructions.h" 00043 #include "llvm/IR/Intrinsics.h" 00044 #include "llvm/IR/Type.h" 00045 #include "llvm/MC/MCSectionMachO.h" 00046 #include "llvm/Support/CommandLine.h" 00047 #include "llvm/Support/ErrorHandling.h" 00048 #include "llvm/Support/MathExtras.h" 00049 #include "llvm/Support/raw_ostream.h" 00050 #include "llvm/Target/TargetOptions.h" 00051 using namespace llvm; 00052 00053 STATISTIC(NumTailCalls, "Number of tail calls"); 00054 STATISTIC(NumMovwMovt, "Number of GAs materialized with movw + movt"); 00055 STATISTIC(NumLoopByVals, "Number of loops generated for byval arguments"); 00056 00057 // This option should go away when tail calls fully work. 00058 static cl::opt<bool> 00059 EnableARMTailCalls("arm-tail-calls", cl::Hidden, 00060 cl::desc("Generate tail calls (TEMPORARY OPTION)."), 00061 cl::init(false)); 00062 00063 cl::opt<bool> 00064 EnableARMLongCalls("arm-long-calls", cl::Hidden, 00065 cl::desc("Generate calls via indirect call instructions"), 00066 cl::init(false)); 00067 00068 static cl::opt<bool> 00069 ARMInterworking("arm-interworking", cl::Hidden, 00070 cl::desc("Enable / disable ARM interworking (for debugging only)"), 00071 cl::init(true)); 00072 00073 namespace { 00074 class ARMCCState : public CCState { 00075 public: 00076 ARMCCState(CallingConv::ID CC, bool isVarArg, MachineFunction &MF, 00077 const TargetMachine &TM, SmallVector<CCValAssign, 16> &locs, 00078 LLVMContext &C, ParmContext PC) 00079 : CCState(CC, isVarArg, MF, TM, locs, C) { 00080 assert(((PC == Call) || (PC == Prologue)) && 00081 "ARMCCState users must specify whether their context is call" 00082 "or prologue generation."); 00083 CallOrPrologue = PC; 00084 } 00085 }; 00086 } 00087 00088 // The APCS parameter registers. 00089 static const uint16_t GPRArgRegs[] = { 00090 ARM::R0, ARM::R1, ARM::R2, ARM::R3 00091 }; 00092 00093 void ARMTargetLowering::addTypeForNEON(MVT VT, MVT PromotedLdStVT, 00094 MVT PromotedBitwiseVT) { 00095 if (VT != PromotedLdStVT) { 00096 setOperationAction(ISD::LOAD, VT, Promote); 00097 AddPromotedToType (ISD::LOAD, VT, PromotedLdStVT); 00098 00099 setOperationAction(ISD::STORE, VT, Promote); 00100 AddPromotedToType (ISD::STORE, VT, PromotedLdStVT); 00101 } 00102 00103 MVT ElemTy = VT.getVectorElementType(); 00104 if (ElemTy != MVT::i64 && ElemTy != MVT::f64) 00105 setOperationAction(ISD::SETCC, VT, Custom); 00106 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); 00107 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); 00108 if (ElemTy == MVT::i32) { 00109 setOperationAction(ISD::SINT_TO_FP, VT, Custom); 00110 setOperationAction(ISD::UINT_TO_FP, VT, Custom); 00111 setOperationAction(ISD::FP_TO_SINT, VT, Custom); 00112 setOperationAction(ISD::FP_TO_UINT, VT, Custom); 00113 } else { 00114 setOperationAction(ISD::SINT_TO_FP, VT, Expand); 00115 setOperationAction(ISD::UINT_TO_FP, VT, Expand); 00116 setOperationAction(ISD::FP_TO_SINT, VT, Expand); 00117 setOperationAction(ISD::FP_TO_UINT, VT, Expand); 00118 } 00119 setOperationAction(ISD::BUILD_VECTOR, VT, Custom); 00120 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); 00121 setOperationAction(ISD::CONCAT_VECTORS, VT, Legal); 00122 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Legal); 00123 setOperationAction(ISD::SELECT, VT, Expand); 00124 setOperationAction(ISD::SELECT_CC, VT, Expand); 00125 setOperationAction(ISD::VSELECT, VT, Expand); 00126 setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand); 00127 if (VT.isInteger()) { 00128 setOperationAction(ISD::SHL, VT, Custom); 00129 setOperationAction(ISD::SRA, VT, Custom); 00130 setOperationAction(ISD::SRL, VT, Custom); 00131 } 00132 00133 // Promote all bit-wise operations. 00134 if (VT.isInteger() && VT != PromotedBitwiseVT) { 00135 setOperationAction(ISD::AND, VT, Promote); 00136 AddPromotedToType (ISD::AND, VT, PromotedBitwiseVT); 00137 setOperationAction(ISD::OR, VT, Promote); 00138 AddPromotedToType (ISD::OR, VT, PromotedBitwiseVT); 00139 setOperationAction(ISD::XOR, VT, Promote); 00140 AddPromotedToType (ISD::XOR, VT, PromotedBitwiseVT); 00141 } 00142 00143 // Neon does not support vector divide/remainder operations. 00144 setOperationAction(ISD::SDIV, VT, Expand); 00145 setOperationAction(ISD::UDIV, VT, Expand); 00146 setOperationAction(ISD::FDIV, VT, Expand); 00147 setOperationAction(ISD::SREM, VT, Expand); 00148 setOperationAction(ISD::UREM, VT, Expand); 00149 setOperationAction(ISD::FREM, VT, Expand); 00150 } 00151 00152 void ARMTargetLowering::addDRTypeForNEON(MVT VT) { 00153 addRegisterClass(VT, &ARM::DPRRegClass); 00154 addTypeForNEON(VT, MVT::f64, MVT::v2i32); 00155 } 00156 00157 void ARMTargetLowering::addQRTypeForNEON(MVT VT) { 00158 addRegisterClass(VT, &ARM::QPRRegClass); 00159 addTypeForNEON(VT, MVT::v2f64, MVT::v4i32); 00160 } 00161 00162 static TargetLoweringObjectFile *createTLOF(TargetMachine &TM) { 00163 if (TM.getSubtarget<ARMSubtarget>().isTargetDarwin()) 00164 return new TargetLoweringObjectFileMachO(); 00165 00166 return new ARMElfTargetObjectFile(); 00167 } 00168 00169 ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) 00170 : TargetLowering(TM, createTLOF(TM)) { 00171 Subtarget = &TM.getSubtarget<ARMSubtarget>(); 00172 RegInfo = TM.getRegisterInfo(); 00173 Itins = TM.getInstrItineraryData(); 00174 00175 setBooleanVectorContents(ZeroOrNegativeOneBooleanContent); 00176 00177 if (Subtarget->isTargetDarwin()) { 00178 // Uses VFP for Thumb libfuncs if available. 00179 if (Subtarget->isThumb() && Subtarget->hasVFP2()) { 00180 // Single-precision floating-point arithmetic. 00181 setLibcallName(RTLIB::ADD_F32, "__addsf3vfp"); 00182 setLibcallName(RTLIB::SUB_F32, "__subsf3vfp"); 00183 setLibcallName(RTLIB::MUL_F32, "__mulsf3vfp"); 00184 setLibcallName(RTLIB::DIV_F32, "__divsf3vfp"); 00185 00186 // Double-precision floating-point arithmetic. 00187 setLibcallName(RTLIB::ADD_F64, "__adddf3vfp"); 00188 setLibcallName(RTLIB::SUB_F64, "__subdf3vfp"); 00189 setLibcallName(RTLIB::MUL_F64, "__muldf3vfp"); 00190 setLibcallName(RTLIB::DIV_F64, "__divdf3vfp"); 00191 00192 // Single-precision comparisons. 00193 setLibcallName(RTLIB::OEQ_F32, "__eqsf2vfp"); 00194 setLibcallName(RTLIB::UNE_F32, "__nesf2vfp"); 00195 setLibcallName(RTLIB::OLT_F32, "__ltsf2vfp"); 00196 setLibcallName(RTLIB::OLE_F32, "__lesf2vfp"); 00197 setLibcallName(RTLIB::OGE_F32, "__gesf2vfp"); 00198 setLibcallName(RTLIB::OGT_F32, "__gtsf2vfp"); 00199 setLibcallName(RTLIB::UO_F32, "__unordsf2vfp"); 00200 setLibcallName(RTLIB::O_F32, "__unordsf2vfp"); 00201 00202 setCmpLibcallCC(RTLIB::OEQ_F32, ISD::SETNE); 00203 setCmpLibcallCC(RTLIB::UNE_F32, ISD::SETNE); 00204 setCmpLibcallCC(RTLIB::OLT_F32, ISD::SETNE); 00205 setCmpLibcallCC(RTLIB::OLE_F32, ISD::SETNE); 00206 setCmpLibcallCC(RTLIB::OGE_F32, ISD::SETNE); 00207 setCmpLibcallCC(RTLIB::OGT_F32, ISD::SETNE); 00208 setCmpLibcallCC(RTLIB::UO_F32, ISD::SETNE); 00209 setCmpLibcallCC(RTLIB::O_F32, ISD::SETEQ); 00210 00211 // Double-precision comparisons. 00212 setLibcallName(RTLIB::OEQ_F64, "__eqdf2vfp"); 00213 setLibcallName(RTLIB::UNE_F64, "__nedf2vfp"); 00214 setLibcallName(RTLIB::OLT_F64, "__ltdf2vfp"); 00215 setLibcallName(RTLIB::OLE_F64, "__ledf2vfp"); 00216 setLibcallName(RTLIB::OGE_F64, "__gedf2vfp"); 00217 setLibcallName(RTLIB::OGT_F64, "__gtdf2vfp"); 00218 setLibcallName(RTLIB::UO_F64, "__unorddf2vfp"); 00219 setLibcallName(RTLIB::O_F64, "__unorddf2vfp"); 00220 00221 setCmpLibcallCC(RTLIB::OEQ_F64, ISD::SETNE); 00222 setCmpLibcallCC(RTLIB::UNE_F64, ISD::SETNE); 00223 setCmpLibcallCC(RTLIB::OLT_F64, ISD::SETNE); 00224 setCmpLibcallCC(RTLIB::OLE_F64, ISD::SETNE); 00225 setCmpLibcallCC(RTLIB::OGE_F64, ISD::SETNE); 00226 setCmpLibcallCC(RTLIB::OGT_F64, ISD::SETNE); 00227 setCmpLibcallCC(RTLIB::UO_F64, ISD::SETNE); 00228 setCmpLibcallCC(RTLIB::O_F64, ISD::SETEQ); 00229 00230 // Floating-point to integer conversions. 00231 // i64 conversions are done via library routines even when generating VFP 00232 // instructions, so use the same ones. 00233 setLibcallName(RTLIB::FPTOSINT_F64_I32, "__fixdfsivfp"); 00234 setLibcallName(RTLIB::FPTOUINT_F64_I32, "__fixunsdfsivfp"); 00235 setLibcallName(RTLIB::FPTOSINT_F32_I32, "__fixsfsivfp"); 00236 setLibcallName(RTLIB::FPTOUINT_F32_I32, "__fixunssfsivfp"); 00237 00238 // Conversions between floating types. 00239 setLibcallName(RTLIB::FPROUND_F64_F32, "__truncdfsf2vfp"); 00240 setLibcallName(RTLIB::FPEXT_F32_F64, "__extendsfdf2vfp"); 00241 00242 // Integer to floating-point conversions. 00243 // i64 conversions are done via library routines even when generating VFP 00244 // instructions, so use the same ones. 00245 // FIXME: There appears to be some naming inconsistency in ARM libgcc: 00246 // e.g., __floatunsidf vs. __floatunssidfvfp. 00247 setLibcallName(RTLIB::SINTTOFP_I32_F64, "__floatsidfvfp"); 00248 setLibcallName(RTLIB::UINTTOFP_I32_F64, "__floatunssidfvfp"); 00249 setLibcallName(RTLIB::SINTTOFP_I32_F32, "__floatsisfvfp"); 00250 setLibcallName(RTLIB::UINTTOFP_I32_F32, "__floatunssisfvfp"); 00251 } 00252 } 00253 00254 // These libcalls are not available in 32-bit. 00255 setLibcallName(RTLIB::SHL_I128, 0); 00256 setLibcallName(RTLIB::SRL_I128, 0); 00257 setLibcallName(RTLIB::SRA_I128, 0); 00258 00259 if (Subtarget->isAAPCS_ABI() && !Subtarget->isTargetDarwin()) { 00260 // Double-precision floating-point arithmetic helper functions 00261 // RTABI chapter 4.1.2, Table 2 00262 setLibcallName(RTLIB::ADD_F64, "__aeabi_dadd"); 00263 setLibcallName(RTLIB::DIV_F64, "__aeabi_ddiv"); 00264 setLibcallName(RTLIB::MUL_F64, "__aeabi_dmul"); 00265 setLibcallName(RTLIB::SUB_F64, "__aeabi_dsub"); 00266 setLibcallCallingConv(RTLIB::ADD_F64, CallingConv::ARM_AAPCS); 00267 setLibcallCallingConv(RTLIB::DIV_F64, CallingConv::ARM_AAPCS); 00268 setLibcallCallingConv(RTLIB::MUL_F64, CallingConv::ARM_AAPCS); 00269 setLibcallCallingConv(RTLIB::SUB_F64, CallingConv::ARM_AAPCS); 00270 00271 // Double-precision floating-point comparison helper functions 00272 // RTABI chapter 4.1.2, Table 3 00273 setLibcallName(RTLIB::OEQ_F64, "__aeabi_dcmpeq"); 00274 setCmpLibcallCC(RTLIB::OEQ_F64, ISD::SETNE); 00275 setLibcallName(RTLIB::UNE_F64, "__aeabi_dcmpeq"); 00276 setCmpLibcallCC(RTLIB::UNE_F64, ISD::SETEQ); 00277 setLibcallName(RTLIB::OLT_F64, "__aeabi_dcmplt"); 00278 setCmpLibcallCC(RTLIB::OLT_F64, ISD::SETNE); 00279 setLibcallName(RTLIB::OLE_F64, "__aeabi_dcmple"); 00280 setCmpLibcallCC(RTLIB::OLE_F64, ISD::SETNE); 00281 setLibcallName(RTLIB::OGE_F64, "__aeabi_dcmpge"); 00282 setCmpLibcallCC(RTLIB::OGE_F64, ISD::SETNE); 00283 setLibcallName(RTLIB::OGT_F64, "__aeabi_dcmpgt"); 00284 setCmpLibcallCC(RTLIB::OGT_F64, ISD::SETNE); 00285 setLibcallName(RTLIB::UO_F64, "__aeabi_dcmpun"); 00286 setCmpLibcallCC(RTLIB::UO_F64, ISD::SETNE); 00287 setLibcallName(RTLIB::O_F64, "__aeabi_dcmpun"); 00288 setCmpLibcallCC(RTLIB::O_F64, ISD::SETEQ); 00289 setLibcallCallingConv(RTLIB::OEQ_F64, CallingConv::ARM_AAPCS); 00290 setLibcallCallingConv(RTLIB::UNE_F64, CallingConv::ARM_AAPCS); 00291 setLibcallCallingConv(RTLIB::OLT_F64, CallingConv::ARM_AAPCS); 00292 setLibcallCallingConv(RTLIB::OLE_F64, CallingConv::ARM_AAPCS); 00293 setLibcallCallingConv(RTLIB::OGE_F64, CallingConv::ARM_AAPCS); 00294 setLibcallCallingConv(RTLIB::OGT_F64, CallingConv::ARM_AAPCS); 00295 setLibcallCallingConv(RTLIB::UO_F64, CallingConv::ARM_AAPCS); 00296 setLibcallCallingConv(RTLIB::O_F64, CallingConv::ARM_AAPCS); 00297 00298 // Single-precision floating-point arithmetic helper functions 00299 // RTABI chapter 4.1.2, Table 4 00300 setLibcallName(RTLIB::ADD_F32, "__aeabi_fadd"); 00301 setLibcallName(RTLIB::DIV_F32, "__aeabi_fdiv"); 00302 setLibcallName(RTLIB::MUL_F32, "__aeabi_fmul"); 00303 setLibcallName(RTLIB::SUB_F32, "__aeabi_fsub"); 00304 setLibcallCallingConv(RTLIB::ADD_F32, CallingConv::ARM_AAPCS); 00305 setLibcallCallingConv(RTLIB::DIV_F32, CallingConv::ARM_AAPCS); 00306 setLibcallCallingConv(RTLIB::MUL_F32, CallingConv::ARM_AAPCS); 00307 setLibcallCallingConv(RTLIB::SUB_F32, CallingConv::ARM_AAPCS); 00308 00309 // Single-precision floating-point comparison helper functions 00310 // RTABI chapter 4.1.2, Table 5 00311 setLibcallName(RTLIB::OEQ_F32, "__aeabi_fcmpeq"); 00312 setCmpLibcallCC(RTLIB::OEQ_F32, ISD::SETNE); 00313 setLibcallName(RTLIB::UNE_F32, "__aeabi_fcmpeq"); 00314 setCmpLibcallCC(RTLIB::UNE_F32, ISD::SETEQ); 00315 setLibcallName(RTLIB::OLT_F32, "__aeabi_fcmplt"); 00316 setCmpLibcallCC(RTLIB::OLT_F32, ISD::SETNE); 00317 setLibcallName(RTLIB::OLE_F32, "__aeabi_fcmple"); 00318 setCmpLibcallCC(RTLIB::OLE_F32, ISD::SETNE); 00319 setLibcallName(RTLIB::OGE_F32, "__aeabi_fcmpge"); 00320 setCmpLibcallCC(RTLIB::OGE_F32, ISD::SETNE); 00321 setLibcallName(RTLIB::OGT_F32, "__aeabi_fcmpgt"); 00322 setCmpLibcallCC(RTLIB::OGT_F32, ISD::SETNE); 00323 setLibcallName(RTLIB::UO_F32, "__aeabi_fcmpun"); 00324 setCmpLibcallCC(RTLIB::UO_F32, ISD::SETNE); 00325 setLibcallName(RTLIB::O_F32, "__aeabi_fcmpun"); 00326 setCmpLibcallCC(RTLIB::O_F32, ISD::SETEQ); 00327 setLibcallCallingConv(RTLIB::OEQ_F32, CallingConv::ARM_AAPCS); 00328 setLibcallCallingConv(RTLIB::UNE_F32, CallingConv::ARM_AAPCS); 00329 setLibcallCallingConv(RTLIB::OLT_F32, CallingConv::ARM_AAPCS); 00330 setLibcallCallingConv(RTLIB::OLE_F32, CallingConv::ARM_AAPCS); 00331 setLibcallCallingConv(RTLIB::OGE_F32, CallingConv::ARM_AAPCS); 00332 setLibcallCallingConv(RTLIB::OGT_F32, CallingConv::ARM_AAPCS); 00333 setLibcallCallingConv(RTLIB::UO_F32, CallingConv::ARM_AAPCS); 00334 setLibcallCallingConv(RTLIB::O_F32, CallingConv::ARM_AAPCS); 00335 00336 // Floating-point to integer conversions. 00337 // RTABI chapter 4.1.2, Table 6 00338 setLibcallName(RTLIB::FPTOSINT_F64_I32, "__aeabi_d2iz"); 00339 setLibcallName(RTLIB::FPTOUINT_F64_I32, "__aeabi_d2uiz"); 00340 setLibcallName(RTLIB::FPTOSINT_F64_I64, "__aeabi_d2lz"); 00341 setLibcallName(RTLIB::FPTOUINT_F64_I64, "__aeabi_d2ulz"); 00342 setLibcallName(RTLIB::FPTOSINT_F32_I32, "__aeabi_f2iz"); 00343 setLibcallName(RTLIB::FPTOUINT_F32_I32, "__aeabi_f2uiz"); 00344 setLibcallName(RTLIB::FPTOSINT_F32_I64, "__aeabi_f2lz"); 00345 setLibcallName(RTLIB::FPTOUINT_F32_I64, "__aeabi_f2ulz"); 00346 setLibcallCallingConv(RTLIB::FPTOSINT_F64_I32, CallingConv::ARM_AAPCS); 00347 setLibcallCallingConv(RTLIB::FPTOUINT_F64_I32, CallingConv::ARM_AAPCS); 00348 setLibcallCallingConv(RTLIB::FPTOSINT_F64_I64, CallingConv::ARM_AAPCS); 00349 setLibcallCallingConv(RTLIB::FPTOUINT_F64_I64, CallingConv::ARM_AAPCS); 00350 setLibcallCallingConv(RTLIB::FPTOSINT_F32_I32, CallingConv::ARM_AAPCS); 00351 setLibcallCallingConv(RTLIB::FPTOUINT_F32_I32, CallingConv::ARM_AAPCS); 00352 setLibcallCallingConv(RTLIB::FPTOSINT_F32_I64, CallingConv::ARM_AAPCS); 00353 setLibcallCallingConv(RTLIB::FPTOUINT_F32_I64, CallingConv::ARM_AAPCS); 00354 00355 // Conversions between floating types. 00356 // RTABI chapter 4.1.2, Table 7 00357 setLibcallName(RTLIB::FPROUND_F64_F32, "__aeabi_d2f"); 00358 setLibcallName(RTLIB::FPEXT_F32_F64, "__aeabi_f2d"); 00359 setLibcallCallingConv(RTLIB::FPROUND_F64_F32, CallingConv::ARM_AAPCS); 00360 setLibcallCallingConv(RTLIB::FPEXT_F32_F64, CallingConv::ARM_AAPCS); 00361 00362 // Integer to floating-point conversions. 00363 // RTABI chapter 4.1.2, Table 8 00364 setLibcallName(RTLIB::SINTTOFP_I32_F64, "__aeabi_i2d"); 00365 setLibcallName(RTLIB::UINTTOFP_I32_F64, "__aeabi_ui2d"); 00366 setLibcallName(RTLIB::SINTTOFP_I64_F64, "__aeabi_l2d"); 00367 setLibcallName(RTLIB::UINTTOFP_I64_F64, "__aeabi_ul2d"); 00368 setLibcallName(RTLIB::SINTTOFP_I32_F32, "__aeabi_i2f"); 00369 setLibcallName(RTLIB::UINTTOFP_I32_F32, "__aeabi_ui2f"); 00370 setLibcallName(RTLIB::SINTTOFP_I64_F32, "__aeabi_l2f"); 00371 setLibcallName(RTLIB::UINTTOFP_I64_F32, "__aeabi_ul2f"); 00372 setLibcallCallingConv(RTLIB::SINTTOFP_I32_F64, CallingConv::ARM_AAPCS); 00373 setLibcallCallingConv(RTLIB::UINTTOFP_I32_F64, CallingConv::ARM_AAPCS); 00374 setLibcallCallingConv(RTLIB::SINTTOFP_I64_F64, CallingConv::ARM_AAPCS); 00375 setLibcallCallingConv(RTLIB::UINTTOFP_I64_F64, CallingConv::ARM_AAPCS); 00376 setLibcallCallingConv(RTLIB::SINTTOFP_I32_F32, CallingConv::ARM_AAPCS); 00377 setLibcallCallingConv(RTLIB::UINTTOFP_I32_F32, CallingConv::ARM_AAPCS); 00378 setLibcallCallingConv(RTLIB::SINTTOFP_I64_F32, CallingConv::ARM_AAPCS); 00379 setLibcallCallingConv(RTLIB::UINTTOFP_I64_F32, CallingConv::ARM_AAPCS); 00380 00381 // Long long helper functions 00382 // RTABI chapter 4.2, Table 9 00383 setLibcallName(RTLIB::MUL_I64, "__aeabi_lmul"); 00384 setLibcallName(RTLIB::SHL_I64, "__aeabi_llsl"); 00385 setLibcallName(RTLIB::SRL_I64, "__aeabi_llsr"); 00386 setLibcallName(RTLIB::SRA_I64, "__aeabi_lasr"); 00387 setLibcallCallingConv(RTLIB::MUL_I64, CallingConv::ARM_AAPCS); 00388 setLibcallCallingConv(RTLIB::SDIV_I64, CallingConv::ARM_AAPCS); 00389 setLibcallCallingConv(RTLIB::UDIV_I64, CallingConv::ARM_AAPCS); 00390 setLibcallCallingConv(RTLIB::SHL_I64, CallingConv::ARM_AAPCS); 00391 setLibcallCallingConv(RTLIB::SRL_I64, CallingConv::ARM_AAPCS); 00392 setLibcallCallingConv(RTLIB::SRA_I64, CallingConv::ARM_AAPCS); 00393 00394 // Integer division functions 00395 // RTABI chapter 4.3.1 00396 setLibcallName(RTLIB::SDIV_I8, "__aeabi_idiv"); 00397 setLibcallName(RTLIB::SDIV_I16, "__aeabi_idiv"); 00398 setLibcallName(RTLIB::SDIV_I32, "__aeabi_idiv"); 00399 setLibcallName(RTLIB::SDIV_I64, "__aeabi_ldivmod"); 00400 setLibcallName(RTLIB::UDIV_I8, "__aeabi_uidiv"); 00401 setLibcallName(RTLIB::UDIV_I16, "__aeabi_uidiv"); 00402 setLibcallName(RTLIB::UDIV_I32, "__aeabi_uidiv"); 00403 setLibcallName(RTLIB::UDIV_I64, "__aeabi_uldivmod"); 00404 setLibcallCallingConv(RTLIB::SDIV_I8, CallingConv::ARM_AAPCS); 00405 setLibcallCallingConv(RTLIB::SDIV_I16, CallingConv::ARM_AAPCS); 00406 setLibcallCallingConv(RTLIB::SDIV_I32, CallingConv::ARM_AAPCS); 00407 setLibcallCallingConv(RTLIB::SDIV_I64, CallingConv::ARM_AAPCS); 00408 setLibcallCallingConv(RTLIB::UDIV_I8, CallingConv::ARM_AAPCS); 00409 setLibcallCallingConv(RTLIB::UDIV_I16, CallingConv::ARM_AAPCS); 00410 setLibcallCallingConv(RTLIB::UDIV_I32, CallingConv::ARM_AAPCS); 00411 setLibcallCallingConv(RTLIB::UDIV_I64, CallingConv::ARM_AAPCS); 00412 00413 // Memory operations 00414 // RTABI chapter 4.3.4 00415 setLibcallName(RTLIB::MEMCPY, "__aeabi_memcpy"); 00416 setLibcallName(RTLIB::MEMMOVE, "__aeabi_memmove"); 00417 setLibcallName(RTLIB::MEMSET, "__aeabi_memset"); 00418 setLibcallCallingConv(RTLIB::MEMCPY, CallingConv::ARM_AAPCS); 00419 setLibcallCallingConv(RTLIB::MEMMOVE, CallingConv::ARM_AAPCS); 00420 setLibcallCallingConv(RTLIB::MEMSET, CallingConv::ARM_AAPCS); 00421 } 00422 00423 // Use divmod compiler-rt calls for iOS 5.0 and later. 00424 if (Subtarget->getTargetTriple().getOS() == Triple::IOS && 00425 !Subtarget->getTargetTriple().isOSVersionLT(5, 0)) { 00426 setLibcallName(RTLIB::SDIVREM_I32, "__divmodsi4"); 00427 setLibcallName(RTLIB::UDIVREM_I32, "__udivmodsi4"); 00428 } 00429 00430 if (Subtarget->isThumb1Only()) 00431 addRegisterClass(MVT::i32, &ARM::tGPRRegClass); 00432 else 00433 addRegisterClass(MVT::i32, &ARM::GPRRegClass); 00434 if (!TM.Options.UseSoftFloat && Subtarget->hasVFP2() && 00435 !Subtarget->isThumb1Only()) { 00436 addRegisterClass(MVT::f32, &ARM::SPRRegClass); 00437 if (!Subtarget->isFPOnlySP()) 00438 addRegisterClass(MVT::f64, &ARM::DPRRegClass); 00439 00440 setTruncStoreAction(MVT::f64, MVT::f32, Expand); 00441 } 00442 00443 for (unsigned VT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE; 00444 VT <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++VT) { 00445 for (unsigned InnerVT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE; 00446 InnerVT <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++InnerVT) 00447 setTruncStoreAction((MVT::SimpleValueType)VT, 00448 (MVT::SimpleValueType)InnerVT, Expand); 00449 setLoadExtAction(ISD::SEXTLOAD, (MVT::SimpleValueType)VT, Expand); 00450 setLoadExtAction(ISD::ZEXTLOAD, (MVT::SimpleValueType)VT, Expand); 00451 setLoadExtAction(ISD::EXTLOAD, (MVT::SimpleValueType)VT, Expand); 00452 } 00453 00454 setOperationAction(ISD::ConstantFP, MVT::f32, Custom); 00455 00456 if (Subtarget->hasNEON()) { 00457 addDRTypeForNEON(MVT::v2f32); 00458 addDRTypeForNEON(MVT::v8i8); 00459 addDRTypeForNEON(MVT::v4i16); 00460 addDRTypeForNEON(MVT::v2i32); 00461 addDRTypeForNEON(MVT::v1i64); 00462 00463 addQRTypeForNEON(MVT::v4f32); 00464 addQRTypeForNEON(MVT::v2f64); 00465 addQRTypeForNEON(MVT::v16i8); 00466 addQRTypeForNEON(MVT::v8i16); 00467 addQRTypeForNEON(MVT::v4i32); 00468 addQRTypeForNEON(MVT::v2i64); 00469 00470 // v2f64 is legal so that QR subregs can be extracted as f64 elements, but 00471 // neither Neon nor VFP support any arithmetic operations on it. 00472 // The same with v4f32. But keep in mind that vadd, vsub, vmul are natively 00473 // supported for v4f32. 00474 setOperationAction(ISD::FADD, MVT::v2f64, Expand); 00475 setOperationAction(ISD::FSUB, MVT::v2f64, Expand); 00476 setOperationAction(ISD::FMUL, MVT::v2f64, Expand); 00477 // FIXME: Code duplication: FDIV and FREM are expanded always, see 00478 // ARMTargetLowering::addTypeForNEON method for details. 00479 setOperationAction(ISD::FDIV, MVT::v2f64, Expand); 00480 setOperationAction(ISD::FREM, MVT::v2f64, Expand); 00481 // FIXME: Create unittest. 00482 // In another words, find a way when "copysign" appears in DAG with vector 00483 // operands. 00484 setOperationAction(ISD::FCOPYSIGN, MVT::v2f64, Expand); 00485 // FIXME: Code duplication: SETCC has custom operation action, see 00486 // ARMTargetLowering::addTypeForNEON method for details. 00487 setOperationAction(ISD::SETCC, MVT::v2f64, Expand); 00488 // FIXME: Create unittest for FNEG and for FABS. 00489 setOperationAction(ISD::FNEG, MVT::v2f64, Expand); 00490 setOperationAction(ISD::FABS, MVT::v2f64, Expand); 00491 setOperationAction(ISD::FSQRT, MVT::v2f64, Expand); 00492 setOperationAction(ISD::FSIN, MVT::v2f64, Expand); 00493 setOperationAction(ISD::FCOS, MVT::v2f64, Expand); 00494 setOperationAction(ISD::FPOWI, MVT::v2f64, Expand); 00495 setOperationAction(ISD::FPOW, MVT::v2f64, Expand); 00496 setOperationAction(ISD::FLOG, MVT::v2f64, Expand); 00497 setOperationAction(ISD::FLOG2, MVT::v2f64, Expand); 00498 setOperationAction(ISD::FLOG10, MVT::v2f64, Expand); 00499 setOperationAction(ISD::FEXP, MVT::v2f64, Expand); 00500 setOperationAction(ISD::FEXP2, MVT::v2f64, Expand); 00501 // FIXME: Create unittest for FCEIL, FTRUNC, FRINT, FNEARBYINT, FFLOOR. 00502 setOperationAction(ISD::FCEIL, MVT::v2f64, Expand); 00503 setOperationAction(ISD::FTRUNC, MVT::v2f64, Expand); 00504 setOperationAction(ISD::FRINT, MVT::v2f64, Expand); 00505 setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Expand); 00506 setOperationAction(ISD::FFLOOR, MVT::v2f64, Expand); 00507 setOperationAction(ISD::FMA, MVT::v2f64, Expand); 00508 00509 setOperationAction(ISD::FSQRT, MVT::v4f32, Expand); 00510 setOperationAction(ISD::FSIN, MVT::v4f32, Expand); 00511 setOperationAction(ISD::FCOS, MVT::v4f32, Expand); 00512 setOperationAction(ISD::FPOWI, MVT::v4f32, Expand); 00513 setOperationAction(ISD::FPOW, MVT::v4f32, Expand); 00514 setOperationAction(ISD::FLOG, MVT::v4f32, Expand); 00515 setOperationAction(ISD::FLOG2, MVT::v4f32, Expand); 00516 setOperationAction(ISD::FLOG10, MVT::v4f32, Expand); 00517 setOperationAction(ISD::FEXP, MVT::v4f32, Expand); 00518 setOperationAction(ISD::FEXP2, MVT::v4f32, Expand); 00519 setOperationAction(ISD::FCEIL, MVT::v4f32, Expand); 00520 setOperationAction(ISD::FTRUNC, MVT::v4f32, Expand); 00521 setOperationAction(ISD::FRINT, MVT::v4f32, Expand); 00522 setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Expand); 00523 setOperationAction(ISD::FFLOOR, MVT::v4f32, Expand); 00524 00525 // Mark v2f32 intrinsics. 00526 setOperationAction(ISD::FSQRT, MVT::v2f32, Expand); 00527 setOperationAction(ISD::FSIN, MVT::v2f32, Expand); 00528 setOperationAction(ISD::FCOS, MVT::v2f32, Expand); 00529 setOperationAction(ISD::FPOWI, MVT::v2f32, Expand); 00530 setOperationAction(ISD::FPOW, MVT::v2f32, Expand); 00531 setOperationAction(ISD::FLOG, MVT::v2f32, Expand); 00532 setOperationAction(ISD::FLOG2, MVT::v2f32, Expand); 00533 setOperationAction(ISD::FLOG10, MVT::v2f32, Expand); 00534 setOperationAction(ISD::FEXP, MVT::v2f32, Expand); 00535 setOperationAction(ISD::FEXP2, MVT::v2f32, Expand); 00536 setOperationAction(ISD::FCEIL, MVT::v2f32, Expand); 00537 setOperationAction(ISD::FTRUNC, MVT::v2f32, Expand); 00538 setOperationAction(ISD::FRINT, MVT::v2f32, Expand); 00539 setOperationAction(ISD::FNEARBYINT, MVT::v2f32, Expand); 00540 setOperationAction(ISD::FFLOOR, MVT::v2f32, Expand); 00541 00542 // Neon does not support some operations on v1i64 and v2i64 types. 00543 setOperationAction(ISD::MUL, MVT::v1i64, Expand); 00544 // Custom handling for some quad-vector types to detect VMULL. 00545 setOperationAction(ISD::MUL, MVT::v8i16, Custom); 00546 setOperationAction(ISD::MUL, MVT::v4i32, Custom); 00547 setOperationAction(ISD::MUL, MVT::v2i64, Custom); 00548 // Custom handling for some vector types to avoid expensive expansions 00549 setOperationAction(ISD::SDIV, MVT::v4i16, Custom); 00550 setOperationAction(ISD::SDIV, MVT::v8i8, Custom); 00551 setOperationAction(ISD::UDIV, MVT::v4i16, Custom); 00552 setOperationAction(ISD::UDIV, MVT::v8i8, Custom); 00553 setOperationAction(ISD::SETCC, MVT::v1i64, Expand); 00554 setOperationAction(ISD::SETCC, MVT::v2i64, Expand); 00555 // Neon does not have single instruction SINT_TO_FP and UINT_TO_FP with 00556 // a destination type that is wider than the source, and nor does 00557 // it have a FP_TO_[SU]INT instruction with a narrower destination than 00558 // source. 00559 setOperationAction(ISD::SINT_TO_FP, MVT::v4i16, Custom); 00560 setOperationAction(ISD::UINT_TO_FP, MVT::v4i16, Custom); 00561 setOperationAction(ISD::FP_TO_UINT, MVT::v4i16, Custom); 00562 setOperationAction(ISD::FP_TO_SINT, MVT::v4i16, Custom); 00563 00564 setOperationAction(ISD::FP_ROUND, MVT::v2f32, Expand); 00565 setOperationAction(ISD::FP_EXTEND, MVT::v2f64, Expand); 00566 00567 // Custom expand long extensions to vectors. 00568 setOperationAction(ISD::SIGN_EXTEND, MVT::v8i32, Custom); 00569 setOperationAction(ISD::ZERO_EXTEND, MVT::v8i32, Custom); 00570 setOperationAction(ISD::SIGN_EXTEND, MVT::v4i64, Custom); 00571 setOperationAction(ISD::ZERO_EXTEND, MVT::v4i64, Custom); 00572 setOperationAction(ISD::SIGN_EXTEND, MVT::v16i32, Custom); 00573 setOperationAction(ISD::ZERO_EXTEND, MVT::v16i32, Custom); 00574 setOperationAction(ISD::SIGN_EXTEND, MVT::v8i64, Custom); 00575 setOperationAction(ISD::ZERO_EXTEND, MVT::v8i64, Custom); 00576 00577 // NEON does not have single instruction CTPOP for vectors with element 00578 // types wider than 8-bits. However, custom lowering can leverage the 00579 // v8i8/v16i8 vcnt instruction. 00580 setOperationAction(ISD::CTPOP, MVT::v2i32, Custom); 00581 setOperationAction(ISD::CTPOP, MVT::v4i32, Custom); 00582 setOperationAction(ISD::CTPOP, MVT::v4i16, Custom); 00583 setOperationAction(ISD::CTPOP, MVT::v8i16, Custom); 00584 00585 // NEON only has FMA instructions as of VFP4. 00586 if (!Subtarget->hasVFP4()) { 00587 setOperationAction(ISD::FMA, MVT::v2f32, Expand); 00588 setOperationAction(ISD::FMA, MVT::v4f32, Expand); 00589 } 00590 00591 setTargetDAGCombine(ISD::INTRINSIC_VOID); 00592 setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN); 00593 setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN); 00594 setTargetDAGCombine(ISD::SHL); 00595 setTargetDAGCombine(ISD::SRL); 00596 setTargetDAGCombine(ISD::SRA); 00597 setTargetDAGCombine(ISD::SIGN_EXTEND); 00598 setTargetDAGCombine(ISD::ZERO_EXTEND); 00599 setTargetDAGCombine(ISD::ANY_EXTEND); 00600 setTargetDAGCombine(ISD::SELECT_CC); 00601 setTargetDAGCombine(ISD::BUILD_VECTOR); 00602 setTargetDAGCombine(ISD::VECTOR_SHUFFLE); 00603 setTargetDAGCombine(ISD::INSERT_VECTOR_ELT); 00604 setTargetDAGCombine(ISD::STORE); 00605 setTargetDAGCombine(ISD::FP_TO_SINT); 00606 setTargetDAGCombine(ISD::FP_TO_UINT); 00607 setTargetDAGCombine(ISD::FDIV); 00608 00609 // It is legal to extload from v4i8 to v4i16 or v4i32. 00610 MVT Tys[6] = {MVT::v8i8, MVT::v4i8, MVT::v2i8, 00611 MVT::v4i16, MVT::v2i16, 00612 MVT::v2i32}; 00613 for (unsigned i = 0; i < 6; ++i) { 00614 setLoadExtAction(ISD::EXTLOAD, Tys[i], Legal); 00615 setLoadExtAction(ISD::ZEXTLOAD, Tys[i], Legal); 00616 setLoadExtAction(ISD::SEXTLOAD, Tys[i], Legal); 00617 } 00618 } 00619 00620 // ARM and Thumb2 support UMLAL/SMLAL. 00621 if (!Subtarget->isThumb1Only()) 00622 setTargetDAGCombine(ISD::ADDC); 00623 00624 00625 computeRegisterProperties(); 00626 00627 // ARM does not have f32 extending load. 00628 setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand); 00629 00630 // ARM does not have i1 sign extending load. 00631 setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote); 00632 00633 // ARM supports all 4 flavors of integer indexed load / store. 00634 if (!Subtarget->isThumb1Only()) { 00635 for (unsigned im = (unsigned)ISD::PRE_INC; 00636 im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) { 00637 setIndexedLoadAction(im, MVT::i1, Legal); 00638 setIndexedLoadAction(im, MVT::i8, Legal); 00639 setIndexedLoadAction(im, MVT::i16, Legal); 00640 setIndexedLoadAction(im, MVT::i32, Legal); 00641 setIndexedStoreAction(im, MVT::i1, Legal); 00642 setIndexedStoreAction(im, MVT::i8, Legal); 00643 setIndexedStoreAction(im, MVT::i16, Legal); 00644 setIndexedStoreAction(im, MVT::i32, Legal); 00645 } 00646 } 00647 00648 // i64 operation support. 00649 setOperationAction(ISD::MUL, MVT::i64, Expand); 00650 setOperationAction(ISD::MULHU, MVT::i32, Expand); 00651 if (Subtarget->isThumb1Only()) { 00652 setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand); 00653 setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand); 00654 } 00655 if (Subtarget->isThumb1Only() || !Subtarget->hasV6Ops() 00656 || (Subtarget->isThumb2() && !Subtarget->hasThumb2DSP())) 00657 setOperationAction(ISD::MULHS, MVT::i32, Expand); 00658 00659 setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom); 00660 setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom); 00661 setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom); 00662 setOperationAction(ISD::SRL, MVT::i64, Custom); 00663 setOperationAction(ISD::SRA, MVT::i64, Custom); 00664 00665 if (!Subtarget->isThumb1Only()) { 00666 // FIXME: We should do this for Thumb1 as well. 00667 setOperationAction(ISD::ADDC, MVT::i32, Custom); 00668 setOperationAction(ISD::ADDE, MVT::i32, Custom); 00669 setOperationAction(ISD::SUBC, MVT::i32, Custom); 00670 setOperationAction(ISD::SUBE, MVT::i32, Custom); 00671 } 00672 00673 // ARM does not have ROTL. 00674 setOperationAction(ISD::ROTL, MVT::i32, Expand); 00675 setOperationAction(ISD::CTTZ, MVT::i32, Custom); 00676 setOperationAction(ISD::CTPOP, MVT::i32, Expand); 00677 if (!Subtarget->hasV5TOps() || Subtarget->isThumb1Only()) 00678 setOperationAction(ISD::CTLZ, MVT::i32, Expand); 00679 00680 // These just redirect to CTTZ and CTLZ on ARM. 00681 setOperationAction(ISD::CTTZ_ZERO_UNDEF , MVT::i32 , Expand); 00682 setOperationAction(ISD::CTLZ_ZERO_UNDEF , MVT::i32 , Expand); 00683 00684 setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Custom); 00685 00686 // Only ARMv6 has BSWAP. 00687 if (!Subtarget->hasV6Ops()) 00688 setOperationAction(ISD::BSWAP, MVT::i32, Expand); 00689 00690 if (!(Subtarget->hasDivide() && Subtarget->isThumb2()) && 00691 !(Subtarget->hasDivideInARMMode() && !Subtarget->isThumb())) { 00692 // These are expanded into libcalls if the cpu doesn't have HW divider. 00693 setOperationAction(ISD::SDIV, MVT::i32, Expand); 00694 setOperationAction(ISD::UDIV, MVT::i32, Expand); 00695 } 00696 setOperationAction(ISD::SREM, MVT::i32, Expand); 00697 setOperationAction(ISD::UREM, MVT::i32, Expand); 00698 setOperationAction(ISD::SDIVREM, MVT::i32, Expand); 00699 setOperationAction(ISD::UDIVREM, MVT::i32, Expand); 00700 00701 setOperationAction(ISD::GlobalAddress, MVT::i32, Custom); 00702 setOperationAction(ISD::ConstantPool, MVT::i32, Custom); 00703 setOperationAction(ISD::GLOBAL_OFFSET_TABLE, MVT::i32, Custom); 00704 setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom); 00705 setOperationAction(ISD::BlockAddress, MVT::i32, Custom); 00706 00707 setOperationAction(ISD::TRAP, MVT::Other, Legal); 00708 00709 // Use the default implementation. 00710 setOperationAction(ISD::VASTART, MVT::Other, Custom); 00711 setOperationAction(ISD::VAARG, MVT::Other, Expand); 00712 setOperationAction(ISD::VACOPY, MVT::Other, Expand); 00713 setOperationAction(ISD::VAEND, MVT::Other, Expand); 00714 setOperationAction(ISD::STACKSAVE, MVT::Other, Expand); 00715 setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand); 00716 00717 if (!Subtarget->isTargetDarwin()) { 00718 // Non-Darwin platforms may return values in these registers via the 00719 // personality function. 00720 setOperationAction(ISD::EHSELECTION, MVT::i32, Expand); 00721 setOperationAction(ISD::EXCEPTIONADDR, MVT::i32, Expand); 00722 setExceptionPointerRegister(ARM::R0); 00723 setExceptionSelectorRegister(ARM::R1); 00724 } 00725 00726 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand); 00727 // ARMv6 Thumb1 (except for CPUs that support dmb / dsb) and earlier use 00728 // the default expansion. 00729 // FIXME: This should be checking for v6k, not just v6. 00730 if (Subtarget->hasDataBarrier() || 00731 (Subtarget->hasV6Ops() && !Subtarget->isThumb())) { 00732 // membarrier needs custom lowering; the rest are legal and handled 00733 // normally. 00734 setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom); 00735 // Custom lowering for 64-bit ops 00736 setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i64, Custom); 00737 setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i64, Custom); 00738 setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i64, Custom); 00739 setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i64, Custom); 00740 setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i64, Custom); 00741 setOperationAction(ISD::ATOMIC_SWAP, MVT::i64, Custom); 00742 setOperationAction(ISD::ATOMIC_LOAD_MIN, MVT::i64, Custom); 00743 setOperationAction(ISD::ATOMIC_LOAD_MAX, MVT::i64, Custom); 00744 setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i64, Custom); 00745 setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i64, Custom); 00746 setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i64, Custom); 00747 // Automatically insert fences (dmb ist) around ATOMIC_SWAP etc. 00748 setInsertFencesForAtomic(true); 00749 } else { 00750 // Set them all for expansion, which will force libcalls. 00751 setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Expand); 00752 setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, Expand); 00753 setOperationAction(ISD::ATOMIC_SWAP, MVT::i32, Expand); 00754 setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i32, Expand); 00755 setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i32, Expand); 00756 setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i32, Expand); 00757 setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i32, Expand); 00758 setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i32, Expand); 00759 setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i32, Expand); 00760 setOperationAction(ISD::ATOMIC_LOAD_MIN, MVT::i32, Expand); 00761 setOperationAction(ISD::ATOMIC_LOAD_MAX, MVT::i32, Expand); 00762 setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i32, Expand); 00763 setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i32, Expand); 00764 // Mark ATOMIC_LOAD and ATOMIC_STORE custom so we can handle the 00765 // Unordered/Monotonic case. 00766 setOperationAction(ISD::ATOMIC_LOAD, MVT::i32, Custom); 00767 setOperationAction(ISD::ATOMIC_STORE, MVT::i32, Custom); 00768 } 00769 00770 setOperationAction(ISD::PREFETCH, MVT::Other, Custom); 00771 00772 // Requires SXTB/SXTH, available on v6 and up in both ARM and Thumb modes. 00773 if (!Subtarget->hasV6Ops()) { 00774 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand); 00775 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Expand); 00776 } 00777 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); 00778 00779 if (!TM.Options.UseSoftFloat && Subtarget->hasVFP2() && 00780 !Subtarget->isThumb1Only()) { 00781 // Turn f64->i64 into VMOVRRD, i64 -> f64 to VMOVDRR 00782 // iff target supports vfp2. 00783 setOperationAction(ISD::BITCAST, MVT::i64, Custom); 00784 setOperationAction(ISD::FLT_ROUNDS_, MVT::i32, Custom); 00785 } 00786 00787 // We want to custom lower some of our intrinsics. 00788 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); 00789 if (Subtarget->isTargetDarwin()) { 00790 setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom); 00791 setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom); 00792 setLibcallName(RTLIB::UNWIND_RESUME, "_Unwind_SjLj_Resume"); 00793 } 00794 00795 setOperationAction(ISD::SETCC, MVT::i32, Expand); 00796 setOperationAction(ISD::SETCC, MVT::f32, Expand); 00797 setOperationAction(ISD::SETCC, MVT::f64, Expand); 00798 setOperationAction(ISD::SELECT, MVT::i32, Custom); 00799 setOperationAction(ISD::SELECT, MVT::f32, Custom); 00800 setOperationAction(ISD::SELECT, MVT::f64, Custom); 00801 setOperationAction(ISD::SELECT_CC, MVT::i32, Custom); 00802 setOperationAction(ISD::SELECT_CC, MVT::f32, Custom); 00803 setOperationAction(ISD::SELECT_CC, MVT::f64, Custom); 00804 00805 setOperationAction(ISD::BRCOND, MVT::Other, Expand); 00806 setOperationAction(ISD::BR_CC, MVT::i32, Custom); 00807 setOperationAction(ISD::BR_CC, MVT::f32, Custom); 00808 setOperationAction(ISD::BR_CC, MVT::f64, Custom); 00809 setOperationAction(ISD::BR_JT, MVT::Other, Custom); 00810 00811 // We don't support sin/cos/fmod/copysign/pow 00812 setOperationAction(ISD::FSIN, MVT::f64, Expand); 00813 setOperationAction(ISD::FSIN, MVT::f32, Expand); 00814 setOperationAction(ISD::FCOS, MVT::f32, Expand); 00815 setOperationAction(ISD::FCOS, MVT::f64, Expand); 00816 setOperationAction(ISD::FSINCOS, MVT::f64, Expand); 00817 setOperationAction(ISD::FSINCOS, MVT::f32, Expand); 00818 setOperationAction(ISD::FREM, MVT::f64, Expand); 00819 setOperationAction(ISD::FREM, MVT::f32, Expand); 00820 if (!TM.Options.UseSoftFloat && Subtarget->hasVFP2() && 00821 !Subtarget->isThumb1Only()) { 00822 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom); 00823 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom); 00824 } 00825 setOperationAction(ISD::FPOW, MVT::f64, Expand); 00826 setOperationAction(ISD::FPOW, MVT::f32, Expand); 00827 00828 if (!Subtarget->hasVFP4()) { 00829 setOperationAction(ISD::FMA, MVT::f64, Expand); 00830 setOperationAction(ISD::FMA, MVT::f32, Expand); 00831 } 00832 00833 // Various VFP goodness 00834 if (!TM.Options.UseSoftFloat && !Subtarget->isThumb1Only()) { 00835 // int <-> fp are custom expanded into bit_convert + ARMISD ops. 00836 if (Subtarget->hasVFP2()) { 00837 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom); 00838 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom); 00839 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom); 00840 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom); 00841 } 00842 // Special handling for half-precision FP. 00843 if (!Subtarget->hasFP16()) { 00844 setOperationAction(ISD::FP16_TO_FP32, MVT::f32, Expand); 00845 setOperationAction(ISD::FP32_TO_FP16, MVT::i32, Expand); 00846 } 00847 } 00848 00849 // We have target-specific dag combine patterns for the following nodes: 00850 // ARMISD::VMOVRRD - No need to call setTargetDAGCombine 00851 setTargetDAGCombine(ISD::ADD); 00852 setTargetDAGCombine(ISD::SUB); 00853 setTargetDAGCombine(ISD::MUL); 00854 setTargetDAGCombine(ISD::AND); 00855 setTargetDAGCombine(ISD::OR); 00856 setTargetDAGCombine(ISD::XOR); 00857 00858 if (Subtarget->hasV6Ops()) 00859 setTargetDAGCombine(ISD::SRL); 00860 00861 setStackPointerRegisterToSaveRestore(ARM::SP); 00862 00863 if (TM.Options.UseSoftFloat || Subtarget->isThumb1Only() || 00864 !Subtarget->hasVFP2()) 00865 setSchedulingPreference(Sched::RegPressure); 00866 else 00867 setSchedulingPreference(Sched::Hybrid); 00868 00869 //// temporary - rewrite interface to use type 00870 MaxStoresPerMemset = 8; 00871 MaxStoresPerMemsetOptSize = Subtarget->isTargetDarwin() ? 8 : 4; 00872 MaxStoresPerMemcpy = 4; // For @llvm.memcpy -> sequence of stores 00873 MaxStoresPerMemcpyOptSize = Subtarget->isTargetDarwin() ? 4 : 2; 00874 MaxStoresPerMemmove = 4; // For @llvm.memmove -> sequence of stores 00875 MaxStoresPerMemmoveOptSize = Subtarget->isTargetDarwin() ? 4 : 2; 00876 00877 // On ARM arguments smaller than 4 bytes are extended, so all arguments 00878 // are at least 4 bytes aligned. 00879 setMinStackArgumentAlignment(4); 00880 00881 // Prefer likely predicted branches to selects on out-of-order cores. 00882 PredictableSelectIsExpensive = Subtarget->isLikeA9(); 00883 00884 setMinFunctionAlignment(Subtarget->isThumb() ? 1 : 2); 00885 } 00886 00887 // FIXME: It might make sense to define the representative register class as the 00888 // nearest super-register that has a non-null superset. For example, DPR_VFP2 is 00889 // a super-register of SPR, and DPR is a superset if DPR_VFP2. Consequently, 00890 // SPR's representative would be DPR_VFP2. This should work well if register 00891 // pressure tracking were modified such that a register use would increment the 00892 // pressure of the register class's representative and all of it's super 00893 // classes' representatives transitively. We have not implemented this because 00894 // of the difficulty prior to coalescing of modeling operand register classes 00895 // due to the common occurrence of cross class copies and subregister insertions 00896 // and extractions. 00897 std::pair<const TargetRegisterClass*, uint8_t> 00898 ARMTargetLowering::findRepresentativeClass(MVT VT) const{ 00899 const TargetRegisterClass *RRC = 0; 00900 uint8_t Cost = 1; 00901 switch (VT.SimpleTy) { 00902 default: 00903 return TargetLowering::findRepresentativeClass(VT); 00904 // Use DPR as representative register class for all floating point 00905 // and vector types. Since there are 32 SPR registers and 32 DPR registers so 00906 // the cost is 1 for both f32 and f64. 00907 case MVT::f32: case MVT::f64: case MVT::v8i8: case MVT::v4i16: 00908 case MVT::v2i32: case MVT::v1i64: case MVT::v2f32: 00909 RRC = &ARM::DPRRegClass; 00910 // When NEON is used for SP, only half of the register file is available 00911 // because operations that define both SP and DP results will be constrained 00912 // to the VFP2 class (D0-D15). We currently model this constraint prior to 00913 // coalescing by double-counting the SP regs. See the FIXME above. 00914 if (Subtarget->useNEONForSinglePrecisionFP()) 00915 Cost = 2; 00916 break; 00917 case MVT::v16i8: case MVT::v8i16: case MVT::v4i32: case MVT::v2i64: 00918 case MVT::v4f32: case MVT::v2f64: 00919 RRC = &ARM::DPRRegClass; 00920 Cost = 2; 00921 break; 00922 case MVT::v4i64: 00923 RRC = &ARM::DPRRegClass; 00924 Cost = 4; 00925 break; 00926 case MVT::v8i64: 00927 RRC = &ARM::DPRRegClass; 00928 Cost = 8; 00929 break; 00930 } 00931 return std::make_pair(RRC, Cost); 00932 } 00933 00934 const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const { 00935 switch (Opcode) { 00936 default: return 0; 00937 case ARMISD::Wrapper: return "ARMISD::Wrapper"; 00938 case ARMISD::WrapperDYN: return "ARMISD::WrapperDYN"; 00939 case ARMISD::WrapperPIC: return "ARMISD::WrapperPIC"; 00940 case ARMISD::WrapperJT: return "ARMISD::WrapperJT"; 00941 case ARMISD::CALL: return "ARMISD::CALL"; 00942 case ARMISD::CALL_PRED: return "ARMISD::CALL_PRED"; 00943 case ARMISD::CALL_NOLINK: return "ARMISD::CALL_NOLINK"; 00944 case ARMISD::tCALL: return "ARMISD::tCALL"; 00945 case ARMISD::BRCOND: return "ARMISD::BRCOND"; 00946 case ARMISD::BR_JT: return "ARMISD::BR_JT"; 00947 case ARMISD::BR2_JT: return "ARMISD::BR2_JT"; 00948 case ARMISD::RET_FLAG: return "ARMISD::RET_FLAG"; 00949 case ARMISD::PIC_ADD: return "ARMISD::PIC_ADD"; 00950 case ARMISD::CMP: return "ARMISD::CMP"; 00951 case ARMISD::CMN: return "ARMISD::CMN"; 00952 case ARMISD::CMPZ: return "ARMISD::CMPZ"; 00953 case ARMISD::CMPFP: return "ARMISD::CMPFP"; 00954 case ARMISD::CMPFPw0: return "ARMISD::CMPFPw0"; 00955 case ARMISD::BCC_i64: return "ARMISD::BCC_i64"; 00956 case ARMISD::FMSTAT: return "ARMISD::FMSTAT"; 00957 00958 case ARMISD::CMOV: return "ARMISD::CMOV"; 00959 00960 case ARMISD::RBIT: return "ARMISD::RBIT"; 00961 00962 case ARMISD::FTOSI: return "ARMISD::FTOSI"; 00963 case ARMISD::FTOUI: return "ARMISD::FTOUI"; 00964 case ARMISD::SITOF: return "ARMISD::SITOF"; 00965 case ARMISD::UITOF: return "ARMISD::UITOF"; 00966 00967 case ARMISD::SRL_FLAG: return "ARMISD::SRL_FLAG"; 00968 case ARMISD::SRA_FLAG: return "ARMISD::SRA_FLAG"; 00969 case ARMISD::RRX: return "ARMISD::RRX"; 00970 00971 case ARMISD::ADDC: return "ARMISD::ADDC"; 00972 case ARMISD::ADDE: return "ARMISD::ADDE"; 00973 case ARMISD::SUBC: return "ARMISD::SUBC"; 00974 case ARMISD::SUBE: return "ARMISD::SUBE"; 00975 00976 case ARMISD::VMOVRRD: return "ARMISD::VMOVRRD"; 00977 case ARMISD::VMOVDRR: return "ARMISD::VMOVDRR"; 00978 00979 case ARMISD::EH_SJLJ_SETJMP: return "ARMISD::EH_SJLJ_SETJMP"; 00980 case ARMISD::EH_SJLJ_LONGJMP:return "ARMISD::EH_SJLJ_LONGJMP"; 00981 00982 case ARMISD::TC_RETURN: return "ARMISD::TC_RETURN"; 00983 00984 case ARMISD::THREAD_POINTER:return "ARMISD::THREAD_POINTER"; 00985 00986 case ARMISD::DYN_ALLOC: return "ARMISD::DYN_ALLOC"; 00987 00988 case ARMISD::MEMBARRIER: return "ARMISD::MEMBARRIER"; 00989 case ARMISD::MEMBARRIER_MCR: return "ARMISD::MEMBARRIER_MCR"; 00990 00991 case ARMISD::PRELOAD: return "ARMISD::PRELOAD"; 00992 00993 case ARMISD::VCEQ: return "ARMISD::VCEQ"; 00994 case ARMISD::VCEQZ: return "ARMISD::VCEQZ"; 00995 case ARMISD::VCGE: return "ARMISD::VCGE"; 00996 case ARMISD::VCGEZ: return "ARMISD::VCGEZ"; 00997 case ARMISD::VCLEZ: return "ARMISD::VCLEZ"; 00998 case ARMISD::VCGEU: return "ARMISD::VCGEU"; 00999 case ARMISD::VCGT: return "ARMISD::VCGT"; 01000 case ARMISD::VCGTZ: return "ARMISD::VCGTZ"; 01001 case ARMISD::VCLTZ: return "ARMISD::VCLTZ"; 01002 case ARMISD::VCGTU: return "ARMISD::VCGTU"; 01003 case ARMISD::VTST: return "ARMISD::VTST"; 01004 01005 case ARMISD::VSHL: return "ARMISD::VSHL"; 01006 case ARMISD::VSHRs: return "ARMISD::VSHRs"; 01007 case ARMISD::VSHRu: return "ARMISD::VSHRu"; 01008 case ARMISD::VSHLLs: return "ARMISD::VSHLLs"; 01009 case ARMISD::VSHLLu: return "ARMISD::VSHLLu"; 01010 case ARMISD::VSHLLi: return "ARMISD::VSHLLi"; 01011 case ARMISD::VSHRN: return "ARMISD::VSHRN"; 01012 case ARMISD::VRSHRs: return "ARMISD::VRSHRs"; 01013 case ARMISD::VRSHRu: return "ARMISD::VRSHRu"; 01014 case ARMISD::VRSHRN: return "ARMISD::VRSHRN"; 01015 case ARMISD::VQSHLs: return "ARMISD::VQSHLs"; 01016 case ARMISD::VQSHLu: return "ARMISD::VQSHLu"; 01017 case ARMISD::VQSHLsu: return "ARMISD::VQSHLsu"; 01018 case ARMISD::VQSHRNs: return "ARMISD::VQSHRNs"; 01019 case ARMISD::VQSHRNu: return "ARMISD::VQSHRNu"; 01020 case ARMISD::VQSHRNsu: return "ARMISD::VQSHRNsu"; 01021 case ARMISD::VQRSHRNs: return "ARMISD::VQRSHRNs"; 01022 case ARMISD::VQRSHRNu: return "ARMISD::VQRSHRNu"; 01023 case ARMISD::VQRSHRNsu: return "ARMISD::VQRSHRNsu"; 01024 case ARMISD::VGETLANEu: return "ARMISD::VGETLANEu"; 01025 case ARMISD::VGETLANEs: return "ARMISD::VGETLANEs"; 01026 case ARMISD::VMOVIMM: return "ARMISD::VMOVIMM"; 01027 case ARMISD::VMVNIMM: return "ARMISD::VMVNIMM"; 01028 case ARMISD::VMOVFPIMM: return "ARMISD::VMOVFPIMM"; 01029 case ARMISD::VDUP: return "ARMISD::VDUP"; 01030 case ARMISD::VDUPLANE: return "ARMISD::VDUPLANE"; 01031 case ARMISD::VEXT: return "ARMISD::VEXT"; 01032 case ARMISD::VREV64: return "ARMISD::VREV64"; 01033 case ARMISD::VREV32: return "ARMISD::VREV32"; 01034 case ARMISD::VREV16: return "ARMISD::VREV16"; 01035 case ARMISD::VZIP: return "ARMISD::VZIP"; 01036 case ARMISD::VUZP: return "ARMISD::VUZP"; 01037 case ARMISD::VTRN: return "ARMISD::VTRN"; 01038 case ARMISD::VTBL1: return "ARMISD::VTBL1"; 01039 case ARMISD::VTBL2: return "ARMISD::VTBL2"; 01040 case ARMISD::VMULLs: return "ARMISD::VMULLs"; 01041 case ARMISD::VMULLu: return "ARMISD::VMULLu"; 01042 case ARMISD::UMLAL: return "ARMISD::UMLAL"; 01043 case ARMISD::SMLAL: return "ARMISD::SMLAL"; 01044 case ARMISD::BUILD_VECTOR: return "ARMISD::BUILD_VECTOR"; 01045 case ARMISD::FMAX: return "ARMISD::FMAX"; 01046 case ARMISD::FMIN: return "ARMISD::FMIN"; 01047 case ARMISD::BFI: return "ARMISD::BFI"; 01048 case ARMISD::VORRIMM: return "ARMISD::VORRIMM"; 01049 case ARMISD::VBICIMM: return "ARMISD::VBICIMM"; 01050 case ARMISD::VBSL: return "ARMISD::VBSL"; 01051 case ARMISD::VLD2DUP: return "ARMISD::VLD2DUP"; 01052 case ARMISD::VLD3DUP: return "ARMISD::VLD3DUP"; 01053 case ARMISD::VLD4DUP: return "ARMISD::VLD4DUP"; 01054 case ARMISD::VLD1_UPD: return "ARMISD::VLD1_UPD"; 01055 case ARMISD::VLD2_UPD: return "ARMISD::VLD2_UPD"; 01056 case ARMISD::VLD3_UPD: return "ARMISD::VLD3_UPD"; 01057 case ARMISD::VLD4_UPD: return "ARMISD::VLD4_UPD"; 01058 case ARMISD::VLD2LN_UPD: return "ARMISD::VLD2LN_UPD"; 01059 case ARMISD::VLD3LN_UPD: return "ARMISD::VLD3LN_UPD"; 01060 case ARMISD::VLD4LN_UPD: return "ARMISD::VLD4LN_UPD"; 01061 case ARMISD::VLD2DUP_UPD: return "ARMISD::VLD2DUP_UPD"; 01062 case ARMISD::VLD3DUP_UPD: return "ARMISD::VLD3DUP_UPD"; 01063 case ARMISD::VLD4DUP_UPD: return "ARMISD::VLD4DUP_UPD"; 01064 case ARMISD::VST1_UPD: return "ARMISD::VST1_UPD"; 01065 case ARMISD::VST2_UPD: return "ARMISD::VST2_UPD"; 01066 case ARMISD::VST3_UPD: return "ARMISD::VST3_UPD"; 01067 case ARMISD::VST4_UPD: return "ARMISD::VST4_UPD"; 01068 case ARMISD::VST2LN_UPD: return "ARMISD::VST2LN_UPD"; 01069 case ARMISD::VST3LN_UPD: return "ARMISD::VST3LN_UPD"; 01070 case ARMISD::VST4LN_UPD: return "ARMISD::VST4LN_UPD"; 01071 } 01072 } 01073 01074 EVT ARMTargetLowering::getSetCCResultType(LLVMContext &, EVT VT) const { 01075 if (!VT.isVector()) return getPointerTy(); 01076 return VT.changeVectorElementTypeToInteger(); 01077 } 01078 01079 /// getRegClassFor - Return the register class that should be used for the 01080 /// specified value type. 01081 const TargetRegisterClass *ARMTargetLowering::getRegClassFor(MVT VT) const { 01082 // Map v4i64 to QQ registers but do not make the type legal. Similarly map 01083 // v8i64 to QQQQ registers. v4i64 and v8i64 are only used for REG_SEQUENCE to 01084 // load / store 4 to 8 consecutive D registers. 01085 if (Subtarget->hasNEON()) { 01086 if (VT == MVT::v4i64) 01087 return &ARM::QQPRRegClass; 01088 if (VT == MVT::v8i64) 01089 return &ARM::QQQQPRRegClass; 01090 } 01091 return TargetLowering::getRegClassFor(VT); 01092 } 01093 01094 // Create a fast isel object. 01095 FastISel * 01096 ARMTargetLowering::createFastISel(FunctionLoweringInfo &funcInfo, 01097 const TargetLibraryInfo *libInfo) const { 01098 return ARM::createFastISel(funcInfo, libInfo); 01099 } 01100 01101 /// getMaximalGlobalOffset - Returns the maximal possible offset which can 01102 /// be used for loads / stores from the global. 01103 unsigned ARMTargetLowering::getMaximalGlobalOffset() const { 01104 return (Subtarget->isThumb1Only() ? 127 : 4095); 01105 } 01106 01107 Sched::Preference ARMTargetLowering::getSchedulingPreference(SDNode *N) const { 01108 unsigned NumVals = N->getNumValues(); 01109 if (!NumVals) 01110 return Sched::RegPressure; 01111 01112 for (unsigned i = 0; i != NumVals; ++i) { 01113 EVT VT = N->getValueType(i); 01114 if (VT == MVT::Glue || VT == MVT::Other) 01115 continue; 01116 if (VT.isFloatingPoint() || VT.isVector()) 01117 return Sched::ILP; 01118 } 01119 01120 if (!N->isMachineOpcode()) 01121 return Sched::RegPressure; 01122 01123 // Load are scheduled for latency even if there instruction itinerary 01124 // is not available. 01125 const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); 01126 const MCInstrDesc &MCID = TII->get(N->getMachineOpcode()); 01127 01128 if (MCID.getNumDefs() == 0) 01129 return Sched::RegPressure; 01130 if (!Itins->isEmpty() && 01131 Itins->getOperandCycle(MCID.getSchedClass(), 0) > 2) 01132 return Sched::ILP; 01133 01134 return Sched::RegPressure; 01135 } 01136 01137 //===----------------------------------------------------------------------===// 01138 // Lowering Code 01139 //===----------------------------------------------------------------------===// 01140 01141 /// IntCCToARMCC - Convert a DAG integer condition code to an ARM CC 01142 static ARMCC::CondCodes IntCCToARMCC(ISD::CondCode CC) { 01143 switch (CC) { 01144 default: llvm_unreachable("Unknown condition code!"); 01145 case ISD::SETNE: return ARMCC::NE; 01146 case ISD::SETEQ: return ARMCC::EQ; 01147 case ISD::SETGT: return ARMCC::GT; 01148 case ISD::SETGE: return ARMCC::GE; 01149 case ISD::SETLT: return ARMCC::LT; 01150 case ISD::SETLE: return ARMCC::LE; 01151 case ISD::SETUGT: return ARMCC::HI; 01152 case ISD::SETUGE: return ARMCC::HS; 01153 case ISD::SETULT: return ARMCC::LO; 01154 case ISD::SETULE: return ARMCC::LS; 01155 } 01156 } 01157 01158 /// FPCCToARMCC - Convert a DAG fp condition code to an ARM CC. 01159 static void FPCCToARMCC(ISD::CondCode CC, ARMCC::CondCodes &CondCode, 01160 ARMCC::CondCodes &CondCode2) { 01161 CondCode2 = ARMCC::AL; 01162 switch (CC) { 01163 default: llvm_unreachable("Unknown FP condition!"); 01164 case ISD::SETEQ: 01165 case ISD::SETOEQ: CondCode = ARMCC::EQ; break; 01166 case ISD::SETGT: 01167 case ISD::SETOGT: CondCode = ARMCC::GT; break; 01168 case ISD::SETGE: 01169 case ISD::SETOGE: CondCode = ARMCC::GE; break; 01170 case ISD::SETOLT: CondCode = ARMCC::MI; break; 01171 case ISD::SETOLE: CondCode = ARMCC::LS; break; 01172 case ISD::SETONE: CondCode = ARMCC::MI; CondCode2 = ARMCC::GT; break; 01173 case ISD::SETO: CondCode = ARMCC::VC; break; 01174 case ISD::SETUO: CondCode = ARMCC::VS; break; 01175 case ISD::SETUEQ: CondCode = ARMCC::EQ; CondCode2 = ARMCC::VS; break; 01176 case ISD::SETUGT: CondCode = ARMCC::HI; break; 01177 case ISD::SETUGE: CondCode = ARMCC::PL; break; 01178 case ISD::SETLT: 01179 case ISD::SETULT: CondCode = ARMCC::LT; break; 01180 case ISD::SETLE: 01181 case ISD::SETULE: CondCode = ARMCC::LE; break; 01182 case ISD::SETNE: 01183 case ISD::SETUNE: CondCode = ARMCC::NE; break; 01184 } 01185 } 01186 01187 //===----------------------------------------------------------------------===// 01188 // Calling Convention Implementation 01189 //===----------------------------------------------------------------------===// 01190 01191 #include "ARMGenCallingConv.inc" 01192 01193 /// CCAssignFnForNode - Selects the correct CCAssignFn for a the 01194 /// given CallingConvention value. 01195 CCAssignFn *ARMTargetLowering::CCAssignFnForNode(CallingConv::ID CC, 01196 bool Return, 01197 bool isVarArg) const { 01198 switch (CC) { 01199 default: 01200 llvm_unreachable("Unsupported calling convention"); 01201 case CallingConv::Fast: 01202 if (Subtarget->hasVFP2() && !isVarArg) { 01203 if (!Subtarget->isAAPCS_ABI()) 01204 return (Return ? RetFastCC_ARM_APCS : FastCC_ARM_APCS); 01205 // For AAPCS ABI targets, just use VFP variant of the calling convention. 01206 return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP); 01207 } 01208 // Fallthrough 01209 case CallingConv::C: { 01210 // Use target triple & subtarget features to do actual dispatch. 01211 if (!Subtarget->isAAPCS_ABI()) 01212 return (Return ? RetCC_ARM_APCS : CC_ARM_APCS); 01213 else if (Subtarget->hasVFP2() && 01214 getTargetMachine().Options.FloatABIType == FloatABI::Hard && 01215 !isVarArg) 01216 return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP); 01217 return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS); 01218 } 01219 case CallingConv::ARM_AAPCS_VFP: 01220 if (!isVarArg) 01221 return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP); 01222 // Fallthrough 01223 case CallingConv::ARM_AAPCS: 01224 return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS); 01225 case CallingConv::ARM_APCS: 01226 return (Return ? RetCC_ARM_APCS : CC_ARM_APCS); 01227 case CallingConv::GHC: 01228 return (Return ? RetCC_ARM_APCS : CC_ARM_APCS_GHC); 01229 } 01230 } 01231 01232 /// LowerCallResult - Lower the result values of a call into the 01233 /// appropriate copies out of appropriate physical registers. 01234 SDValue 01235 ARMTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag, 01236 CallingConv::ID CallConv, bool isVarArg, 01237 const SmallVectorImpl<ISD::InputArg> &Ins, 01238 DebugLoc dl, SelectionDAG &DAG, 01239 SmallVectorImpl<SDValue> &InVals, 01240 bool isThisReturn, SDValue ThisVal) const { 01241 01242 // Assign locations to each value returned by this call. 01243 SmallVector<CCValAssign, 16> RVLocs; 01244 ARMCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), 01245 getTargetMachine(), RVLocs, *DAG.getContext(), Call); 01246 CCInfo.AnalyzeCallResult(Ins, 01247 CCAssignFnForNode(CallConv, /* Return*/ true, 01248 isVarArg)); 01249 01250 // Copy all of the result registers out of their specified physreg. 01251 for (unsigned i = 0; i != RVLocs.size(); ++i) { 01252 CCValAssign VA = RVLocs[i]; 01253 01254 // Pass 'this' value directly from the argument to return value, to avoid 01255 // reg unit interference 01256 if (i == 0 && isThisReturn) { 01257 assert(!VA.needsCustom() && VA.getLocVT() == MVT::i32 && 01258 "unexpected return calling convention register assignment"); 01259 InVals.push_back(ThisVal); 01260 continue; 01261 } 01262 01263 SDValue Val; 01264 if (VA.needsCustom()) { 01265 // Handle f64 or half of a v2f64. 01266 SDValue Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, 01267 InFlag); 01268 Chain = Lo.getValue(1); 01269 InFlag = Lo.getValue(2); 01270 VA = RVLocs[++i]; // skip ahead to next loc 01271 SDValue Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, 01272 InFlag); 01273 Chain = Hi.getValue(1); 01274 InFlag = Hi.getValue(2); 01275 Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi); 01276 01277 if (VA.getLocVT() == MVT::v2f64) { 01278 SDValue Vec = DAG.getNode(ISD::UNDEF, dl, MVT::v2f64); 01279 Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Vec, Val, 01280 DAG.getConstant(0, MVT::i32)); 01281 01282 VA = RVLocs[++i]; // skip ahead to next loc 01283 Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InFlag); 01284 Chain = Lo.getValue(1); 01285 InFlag = Lo.getValue(2); 01286 VA = RVLocs[++i]; // skip ahead to next loc 01287 Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InFlag); 01288 Chain = Hi.getValue(1); 01289 InFlag = Hi.getValue(2); 01290 Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi); 01291 Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Vec, Val, 01292 DAG.getConstant(1, MVT::i32)); 01293 } 01294 } else { 01295 Val = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), VA.getLocVT(), 01296 InFlag); 01297 Chain = Val.getValue(1); 01298 InFlag = Val.getValue(2); 01299 } 01300 01301 switch (VA.getLocInfo()) { 01302 default: llvm_unreachable("Unknown loc info!"); 01303 case CCValAssign::Full: break; 01304 case CCValAssign::BCvt: 01305 Val = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), Val); 01306 break; 01307 } 01308 01309 InVals.push_back(Val); 01310 } 01311 01312 return Chain; 01313 } 01314 01315 /// LowerMemOpCallTo - Store the argument to the stack. 01316 SDValue 01317 ARMTargetLowering::LowerMemOpCallTo(SDValue Chain, 01318 SDValue StackPtr, SDValue Arg, 01319 DebugLoc dl, SelectionDAG &DAG, 01320 const CCValAssign &VA, 01321 ISD::ArgFlagsTy Flags) const { 01322 unsigned LocMemOffset = VA.getLocMemOffset(); 01323 SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset); 01324 PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff); 01325 return DAG.getStore(Chain, dl, Arg, PtrOff, 01326 MachinePointerInfo::getStack(LocMemOffset), 01327 false, false, 0); 01328 } 01329 01330 void ARMTargetLowering::PassF64ArgInRegs(DebugLoc dl, SelectionDAG &DAG, 01331 SDValue Chain, SDValue &Arg, 01332 RegsToPassVector &RegsToPass, 01333 CCValAssign &VA, CCValAssign &NextVA, 01334 SDValue &StackPtr, 01335 SmallVector<SDValue, 8> &MemOpChains, 01336 ISD::ArgFlagsTy Flags) const { 01337 01338 SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl, 01339 DAG.getVTList(MVT::i32, MVT::i32), Arg); 01340 RegsToPass.push_back(std::make_pair(VA.getLocReg(), fmrrd)); 01341 01342 if (NextVA.isRegLoc()) 01343 RegsToPass.push_back(std::make_pair(NextVA.getLocReg(), fmrrd.getValue(1))); 01344 else { 01345 assert(NextVA.isMemLoc()); 01346 if (StackPtr.getNode() == 0) 01347 StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy()); 01348 01349 MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, fmrrd.getValue(1), 01350 dl, DAG, NextVA, 01351 Flags)); 01352 } 01353 } 01354 01355 /// LowerCall - Lowering a call into a callseq_start <- 01356 /// ARMISD:CALL <- callseq_end chain. Also add input and output parameter 01357 /// nodes. 01358 SDValue 01359 ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, 01360 SmallVectorImpl<SDValue> &InVals) const { 01361 SelectionDAG &DAG = CLI.DAG; 01362 DebugLoc &dl = CLI.DL; 01363 SmallVector<ISD::OutputArg, 32> &Outs = CLI.Outs; 01364 SmallVector<SDValue, 32> &OutVals = CLI.OutVals; 01365 SmallVector<ISD::InputArg, 32> &Ins = CLI.Ins; 01366 SDValue Chain = CLI.Chain; 01367 SDValue Callee = CLI.Callee; 01368 bool &isTailCall = CLI.IsTailCall; 01369 CallingConv::ID CallConv = CLI.CallConv; 01370 bool doesNotRet = CLI.DoesNotReturn; 01371 bool isVarArg = CLI.IsVarArg; 01372 01373 MachineFunction &MF = DAG.getMachineFunction(); 01374 bool isStructRet = (Outs.empty()) ? false : Outs[0].Flags.isSRet(); 01375 bool isThisReturn = false; 01376 bool isSibCall = false; 01377 // Disable tail calls if they're not supported. 01378 if (!EnableARMTailCalls && !Subtarget->supportsTailCall()) 01379 isTailCall = false; 01380 if (isTailCall) { 01381 // Check if it's really possible to do a tail call. 01382 isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv, 01383 isVarArg, isStructRet, MF.getFunction()->hasStructRetAttr(), 01384 Outs, OutVals, Ins, DAG); 01385 // We don't support GuaranteedTailCallOpt for ARM, only automatically 01386 // detected sibcalls. 01387 if (isTailCall) { 01388 ++NumTailCalls; 01389 isSibCall = true; 01390 } 01391 } 01392 01393 // Analyze operands of the call, assigning locations to each operand. 01394 SmallVector<CCValAssign, 16> ArgLocs; 01395 ARMCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), 01396 getTargetMachine(), ArgLocs, *DAG.getContext(), Call); 01397 CCInfo.AnalyzeCallOperands(Outs, 01398 CCAssignFnForNode(CallConv, /* Return*/ false, 01399 isVarArg)); 01400 01401 // Get a count of how many bytes are to be pushed on the stack. 01402 unsigned NumBytes = CCInfo.getNextStackOffset(); 01403 01404 // For tail calls, memory operands are available in our caller's stack. 01405 if (isSibCall) 01406 NumBytes = 0; 01407 01408 // Adjust the stack pointer for the new arguments... 01409 // These operations are automatically eliminated by the prolog/epilog pass 01410 if (!isSibCall) 01411 Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true)); 01412 01413 SDValue StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy()); 01414 01415 RegsToPassVector RegsToPass; 01416 SmallVector<SDValue, 8> MemOpChains; 01417 01418 // Walk the register/memloc assignments, inserting copies/loads. In the case 01419 // of tail call optimization, arguments are handled later. 01420 for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size(); 01421 i != e; 01422 ++i, ++realArgIdx) { 01423 CCValAssign &VA = ArgLocs[i]; 01424 SDValue Arg = OutVals[realArgIdx]; 01425 ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags; 01426 bool isByVal = Flags.isByVal(); 01427 01428 // Promote the value if needed. 01429 switch (VA.getLocInfo()) { 01430 default: llvm_unreachable("Unknown loc info!"); 01431 case CCValAssign::Full: break; 01432 case CCValAssign::SExt: 01433 Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg); 01434 break; 01435 case CCValAssign::ZExt: 01436 Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg); 01437 break; 01438 case CCValAssign::AExt: 01439 Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg); 01440 break; 01441 case CCValAssign::BCvt: 01442 Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg); 01443 break; 01444 } 01445 01446 // f64 and v2f64 might be passed in i32 pairs and must be split into pieces 01447 if (VA.needsCustom()) { 01448 if (VA.getLocVT() == MVT::v2f64) { 01449 SDValue Op0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg, 01450 DAG.getConstant(0, MVT::i32)); 01451 SDValue Op1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg, 01452 DAG.getConstant(1, MVT::i32)); 01453 01454 PassF64ArgInRegs(dl, DAG, Chain, Op0, RegsToPass, 01455 VA, ArgLocs[++i], StackPtr, MemOpChains, Flags); 01456 01457 VA = ArgLocs[++i]; // skip ahead to next loc 01458 if (VA.isRegLoc()) { 01459 PassF64ArgInRegs(dl, DAG, Chain, Op1, RegsToPass, 01460 VA, ArgLocs[++i], StackPtr, MemOpChains, Flags); 01461 } else { 01462 assert(VA.isMemLoc()); 01463 01464 MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Op1, 01465 dl, DAG, VA, Flags)); 01466 } 01467 } else { 01468 PassF64ArgInRegs(dl, DAG, Chain, Arg, RegsToPass, VA, ArgLocs[++i], 01469 StackPtr, MemOpChains, Flags); 01470 } 01471 } else if (VA.isRegLoc()) { 01472 if (realArgIdx == 0 && Flags.isReturned() && Outs[0].VT == MVT::i32) { 01473 assert(VA.getLocVT() == MVT::i32 && 01474 "unexpected calling convention register assignment"); 01475 assert(!Ins.empty() && Ins[0].VT == MVT::i32 && 01476 "unexpected use of 'returned'"); 01477 isThisReturn = true; 01478 } 01479 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); 01480 } else if (isByVal) { 01481 assert(VA.isMemLoc()); 01482 unsigned offset = 0; 01483 01484 // True if this byval aggregate will be split between registers 01485 // and memory. 01486 unsigned ByValArgsCount = CCInfo.getInRegsParamsCount(); 01487 unsigned CurByValIdx = CCInfo.getInRegsParamsProceed(); 01488 01489 if (CurByValIdx < ByValArgsCount) { 01490 01491 unsigned RegBegin, RegEnd; 01492 CCInfo.getInRegsParamInfo(CurByValIdx, RegBegin, RegEnd); 01493 01494 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 01495 unsigned int i, j; 01496 for (i = 0, j = RegBegin; j < RegEnd; i++, j++) { 01497 SDValue Const = DAG.getConstant(4*i, MVT::i32); 01498 SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const); 01499 SDValue Load = DAG.getLoad(PtrVT, dl, Chain, AddArg, 01500 MachinePointerInfo(), 01501 false, false, false, 0); 01502 MemOpChains.push_back(Load.getValue(1)); 01503 RegsToPass.push_back(std::make_pair(j, Load)); 01504 } 01505 01506 // If parameter size outsides register area, "offset" value 01507 // helps us to calculate stack slot for remained part properly. 01508 offset = RegEnd - RegBegin; 01509 01510 CCInfo.nextInRegsParam(); 01511 } 01512 01513 if (Flags.getByValSize() > 4*offset) { 01514 unsigned LocMemOffset = VA.getLocMemOffset(); 01515 SDValue StkPtrOff = DAG.getIntPtrConstant(LocMemOffset); 01516 SDValue Dst = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, 01517 StkPtrOff); 01518 SDValue SrcOffset = DAG.getIntPtrConstant(4*offset); 01519 SDValue Src = DAG.getNode(ISD::ADD, dl, getPointerTy(), Arg, SrcOffset); 01520 SDValue SizeNode = DAG.getConstant(Flags.getByValSize() - 4*offset, 01521 MVT::i32); 01522 SDValue AlignNode = DAG.getConstant(Flags.getByValAlign(), MVT::i32); 01523 01524 SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue); 01525 SDValue Ops[] = { Chain, Dst, Src, SizeNode, AlignNode}; 01526 MemOpChains.push_back(DAG.getNode(ARMISD::COPY_STRUCT_BYVAL, dl, VTs, 01527 Ops, array_lengthof(Ops))); 01528 } 01529 } else if (!isSibCall) { 01530 assert(VA.isMemLoc()); 01531 01532 MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Arg, 01533 dl, DAG, VA, Flags)); 01534 } 01535 } 01536 01537 if (!MemOpChains.empty()) 01538 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, 01539 &MemOpChains[0], MemOpChains.size()); 01540 01541 // Build a sequence of copy-to-reg nodes chained together with token chain 01542 // and flag operands which copy the outgoing args into the appropriate regs. 01543 SDValue InFlag; 01544 // Tail call byval lowering might overwrite argument registers so in case of 01545 // tail call optimization the copies to registers are lowered later. 01546 if (!isTailCall) 01547 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { 01548 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, 01549 RegsToPass[i].second, InFlag); 01550 InFlag = Chain.getValue(1); 01551 } 01552 01553 // For tail calls lower the arguments to the 'real' stack slot. 01554 if (isTailCall) { 01555 // Force all the incoming stack arguments to be loaded from the stack 01556 // before any new outgoing arguments are stored to the stack, because the 01557 // outgoing stack slots may alias the incoming argument stack slots, and 01558 // the alias isn't otherwise explicit. This is slightly more conservative 01559 // than necessary, because it means that each store effectively depends 01560 // on every argument instead of just those arguments it would clobber. 01561 01562 // Do not flag preceding copytoreg stuff together with the following stuff. 01563 InFlag = SDValue(); 01564 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { 01565 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, 01566 RegsToPass[i].second, InFlag); 01567 InFlag = Chain.getValue(1); 01568 } 01569 InFlag = SDValue(); 01570 } 01571 01572 // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every 01573 // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol 01574 // node so that legalize doesn't hack it. 01575 bool isDirect = false; 01576 bool isARMFunc = false; 01577 bool isLocalARMFunc = false; 01578 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 01579 01580 if (EnableARMLongCalls) { 01581 assert (getTargetMachine().getRelocationModel() == Reloc::Static 01582 && "long-calls with non-static relocation model!"); 01583 // Handle a global address or an external symbol. If it's not one of 01584 // those, the target's already in a register, so we don't need to do 01585 // anything extra. 01586 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { 01587 const GlobalValue *GV = G->getGlobal(); 01588 // Create a constant pool entry for the callee address 01589 unsigned ARMPCLabelIndex = AFI->createPICLabelUId(); 01590 ARMConstantPoolValue *CPV = 01591 ARMConstantPoolConstant::Create(GV, ARMPCLabelIndex, ARMCP::CPValue, 0); 01592 01593 // Get the address of the callee into a register 01594 SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4); 01595 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); 01596 Callee = DAG.getLoad(getPointerTy(), dl, 01597 DAG.getEntryNode(), CPAddr, 01598 MachinePointerInfo::getConstantPool(), 01599 false, false, false, 0); 01600 } else if (ExternalSymbolSDNode *S=dyn_cast<ExternalSymbolSDNode>(Callee)) { 01601 const char *Sym = S->getSymbol(); 01602 01603 // Create a constant pool entry for the callee address 01604 unsigned ARMPCLabelIndex = AFI->createPICLabelUId(); 01605 ARMConstantPoolValue *CPV = 01606 ARMConstantPoolSymbol::Create(*DAG.getContext(), Sym, 01607 ARMPCLabelIndex, 0); 01608 // Get the address of the callee into a register 01609 SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4); 01610 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); 01611 Callee = DAG.getLoad(getPointerTy(), dl, 01612 DAG.getEntryNode(), CPAddr, 01613 MachinePointerInfo::getConstantPool(), 01614 false, false, false, 0); 01615 } 01616 } else if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { 01617 const GlobalValue *GV = G->getGlobal(); 01618 isDirect = true; 01619 bool isExt = GV->isDeclaration() || GV->isWeakForLinker(); 01620 bool isStub = (isExt && Subtarget->isTargetDarwin()) && 01621 getTargetMachine().getRelocationModel() != Reloc::Static; 01622 isARMFunc = !Subtarget->isThumb() || isStub; 01623 // ARM call to a local ARM function is predicable. 01624 isLocalARMFunc = !Subtarget->isThumb() && (!isExt || !ARMInterworking); 01625 // tBX takes a register source operand. 01626 if (isARMFunc && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) { 01627 unsigned ARMPCLabelIndex = AFI->createPICLabelUId(); 01628 ARMConstantPoolValue *CPV = 01629 ARMConstantPoolConstant::Create(GV, ARMPCLabelIndex, ARMCP::CPValue, 4); 01630 SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4); 01631 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); 01632 Callee = DAG.getLoad(getPointerTy(), dl, 01633 DAG.getEntryNode(), CPAddr, 01634 MachinePointerInfo::getConstantPool(), 01635 false, false, false, 0); 01636 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32); 01637 Callee = DAG.getNode(ARMISD::PIC_ADD, dl, 01638 getPointerTy(), Callee, PICLabel); 01639 } else { 01640 // On ELF targets for PIC code, direct calls should go through the PLT 01641 unsigned OpFlags = 0; 01642 if (Subtarget->isTargetELF() && 01643 getTargetMachine().getRelocationModel() == Reloc::PIC_) 01644 OpFlags = ARMII::MO_PLT; 01645 Callee = DAG.getTargetGlobalAddress(GV, dl, getPointerTy(), 0, OpFlags); 01646 } 01647 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) { 01648 isDirect = true; 01649 bool isStub = Subtarget->isTargetDarwin() && 01650 getTargetMachine().getRelocationModel() != Reloc::Static; 01651 isARMFunc = !Subtarget->isThumb() || isStub; 01652 // tBX takes a register source operand. 01653 const char *Sym = S->getSymbol(); 01654 if (isARMFunc && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) { 01655 unsigned ARMPCLabelIndex = AFI->createPICLabelUId(); 01656 ARMConstantPoolValue *CPV = 01657 ARMConstantPoolSymbol::Create(*DAG.getContext(), Sym, 01658 ARMPCLabelIndex, 4); 01659 SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4); 01660 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); 01661 Callee = DAG.getLoad(getPointerTy(), dl, 01662 DAG.getEntryNode(), CPAddr, 01663 MachinePointerInfo::getConstantPool(), 01664 false, false, false, 0); 01665 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32); 01666 Callee = DAG.getNode(ARMISD::PIC_ADD, dl, 01667 getPointerTy(), Callee, PICLabel); 01668 } else { 01669 unsigned OpFlags = 0; 01670 // On ELF targets for PIC code, direct calls should go through the PLT 01671 if (Subtarget->isTargetELF() && 01672 getTargetMachine().getRelocationModel() == Reloc::PIC_) 01673 OpFlags = ARMII::MO_PLT; 01674 Callee = DAG.getTargetExternalSymbol(Sym, getPointerTy(), OpFlags); 01675 } 01676 } 01677 01678 // FIXME: handle tail calls differently. 01679 unsigned CallOpc; 01680 bool HasMinSizeAttr = MF.getFunction()->getAttributes(). 01681 hasAttribute(AttributeSet::FunctionIndex, Attribute::MinSize); 01682 if (Subtarget->isThumb()) { 01683 if ((!isDirect || isARMFunc) && !Subtarget->hasV5TOps()) 01684 CallOpc = ARMISD::CALL_NOLINK; 01685 else 01686 CallOpc = isARMFunc ? ARMISD::CALL : ARMISD::tCALL; 01687 } else { 01688 if (!isDirect && !Subtarget->hasV5TOps()) 01689 CallOpc = ARMISD::CALL_NOLINK; 01690 else if (doesNotRet && isDirect && Subtarget->hasRAS() && 01691 // Emit regular call when code size is the priority 01692 !HasMinSizeAttr) 01693 // "mov lr, pc; b _foo" to avoid confusing the RSP 01694 CallOpc = ARMISD::CALL_NOLINK; 01695 else 01696 CallOpc = isLocalARMFunc ? ARMISD::CALL_PRED : ARMISD::CALL; 01697 } 01698 01699 std::vector<SDValue> Ops; 01700 Ops.push_back(Chain); 01701 Ops.push_back(Callee); 01702 01703 // Add argument registers to the end of the list so that they are known live 01704 // into the call. 01705 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) 01706 Ops.push_back(DAG.getRegister(RegsToPass[i].first, 01707 RegsToPass[i].second.getValueType())); 01708 01709 // Add a register mask operand representing the call-preserved registers. 01710 const uint32_t *Mask; 01711 const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo(); 01712 const ARMBaseRegisterInfo *ARI = static_cast<const ARMBaseRegisterInfo*>(TRI); 01713 if (isThisReturn) 01714 // For 'this' returns, use the R0-preserving mask 01715 Mask = ARI->getThisReturnPreservedMask(CallConv); 01716 else 01717 Mask = ARI->getCallPreservedMask(CallConv); 01718 01719 assert(Mask && "Missing call preserved mask for calling convention"); 01720 Ops.push_back(DAG.getRegisterMask(Mask)); 01721 01722 if (InFlag.getNode()) 01723 Ops.push_back(InFlag); 01724 01725 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); 01726 if (isTailCall) 01727 return DAG.getNode(ARMISD::TC_RETURN, dl, NodeTys, &Ops[0], Ops.size()); 01728 01729 // Returns a chain and a flag for retval copy to use. 01730 Chain = DAG.getNode(CallOpc, dl, NodeTys, &Ops[0], Ops.size()); 01731 InFlag = Chain.getValue(1); 01732 01733 Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true), 01734 DAG.getIntPtrConstant(0, true), InFlag); 01735 if (!Ins.empty()) 01736 InFlag = Chain.getValue(1); 01737 01738 // Handle result values, copying them out of physregs into vregs that we 01739 // return. 01740 return LowerCallResult(Chain, InFlag, CallConv, isVarArg, Ins, dl, DAG, 01741 InVals, isThisReturn, 01742 isThisReturn ? OutVals[0] : SDValue()); 01743 } 01744 01745 /// HandleByVal - Every parameter *after* a byval parameter is passed 01746 /// on the stack. Remember the next parameter register to allocate, 01747 /// and then confiscate the rest of the parameter registers to insure 01748 /// this. 01749 void 01750 ARMTargetLowering::HandleByVal( 01751 CCState *State, unsigned &size, unsigned Align) const { 01752 unsigned reg = State->AllocateReg(GPRArgRegs, 4); 01753 assert((State->getCallOrPrologue() == Prologue || 01754 State->getCallOrPrologue() == Call) && 01755 "unhandled ParmContext"); 01756 01757 // For in-prologue parameters handling, we also introduce stack offset 01758 // for byval registers: see CallingConvLower.cpp, CCState::HandleByVal. 01759 // This behaviour outsides AAPCS rules (5.5 Parameters Passing) of how 01760 // NSAA should be evaluted (NSAA means "next stacked argument address"). 01761 // So: NextStackOffset = NSAAOffset + SizeOfByValParamsStoredInRegs. 01762 // Then: NSAAOffset = NextStackOffset - SizeOfByValParamsStoredInRegs. 01763 unsigned NSAAOffset = State->getNextStackOffset(); 01764 if (State->getCallOrPrologue() != Call) { 01765 for (unsigned i = 0, e = State->getInRegsParamsCount(); i != e; ++i) { 01766 unsigned RB, RE; 01767 State->getInRegsParamInfo(i, RB, RE); 01768 assert(NSAAOffset >= (RE-RB)*4 && 01769 "Stack offset for byval regs doesn't introduced anymore?"); 01770 NSAAOffset -= (RE-RB)*4; 01771 } 01772 } 01773 if ((ARM::R0 <= reg) && (reg <= ARM::R3)) { 01774 if (Subtarget->isAAPCS_ABI() && Align > 4) { 01775 unsigned AlignInRegs = Align / 4; 01776 unsigned Waste = (ARM::R4 - reg) % AlignInRegs; 01777 for (unsigned i = 0; i < Waste; ++i) 01778 reg = State->AllocateReg(GPRArgRegs, 4); 01779 } 01780 if (reg != 0) { 01781 unsigned excess = 4 * (ARM::R4 - reg); 01782 01783 // Special case when NSAA != SP and parameter size greater than size of 01784 // all remained GPR regs. In that case we can't split parameter, we must 01785 // send it to stack. We also must set NCRN to R4, so waste all 01786 // remained registers. 01787 if (Subtarget->isAAPCS_ABI() && NSAAOffset != 0 && size > excess) { 01788 while (State->AllocateReg(GPRArgRegs, 4)) 01789 ; 01790 return; 01791 } 01792 01793 // First register for byval parameter is the first register that wasn't 01794 // allocated before this method call, so it would be "reg". 01795 // If parameter is small enough to be saved in range [reg, r4), then 01796 // the end (first after last) register would be reg + param-size-in-regs, 01797 // else parameter would be splitted between registers and stack, 01798 // end register would be r4 in this case. 01799 unsigned ByValRegBegin = reg; 01800 unsigned ByValRegEnd = (size < excess) ? reg + size/4 : (unsigned)ARM::R4; 01801 State->addInRegsParamInfo(ByValRegBegin, ByValRegEnd); 01802 // Note, first register is allocated in the beginning of function already, 01803 // allocate remained amount of registers we need. 01804 for (unsigned i = reg+1; i != ByValRegEnd; ++i) 01805 State->AllocateReg(GPRArgRegs, 4); 01806 // At a call site, a byval parameter that is split between 01807 // registers and memory needs its size truncated here. In a 01808 // function prologue, such byval parameters are reassembled in 01809 // memory, and are not truncated. 01810 if (State->getCallOrPrologue() == Call) { 01811 // Make remained size equal to 0 in case, when 01812 // the whole structure may be stored into registers. 01813 if (size < excess) 01814 size = 0; 01815 else 01816 size -= excess; 01817 } 01818 } 01819 } 01820 } 01821 01822 /// MatchingStackOffset - Return true if the given stack call argument is 01823 /// already available in the same position (relatively) of the caller's 01824 /// incoming argument stack. 01825 static 01826 bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags, 01827 MachineFrameInfo *MFI, const MachineRegisterInfo *MRI, 01828 const TargetInstrInfo *TII) { 01829 unsigned Bytes = Arg.getValueType().getSizeInBits() / 8; 01830 int FI = INT_MAX; 01831 if (Arg.getOpcode() == ISD::CopyFromReg) { 01832 unsigned VR = cast<RegisterSDNode>(Arg.getOperand(1))->getReg(); 01833 if (!TargetRegisterInfo::isVirtualRegister(VR)) 01834 return false; 01835 MachineInstr *Def = MRI->getVRegDef(VR); 01836 if (!Def) 01837 return false; 01838 if (!Flags.isByVal()) { 01839 if (!TII->isLoadFromStackSlot(Def, FI)) 01840 return false; 01841 } else { 01842 return false; 01843 } 01844 } else if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Arg)) { 01845 if (Flags.isByVal()) 01846 // ByVal argument is passed in as a pointer but it's now being 01847 // dereferenced. e.g. 01848 // define @foo(%struct.X* %A) { 01849 // tail call @bar(%struct.X* byval %A) 01850 // } 01851 return false; 01852 SDValue Ptr = Ld->getBasePtr(); 01853 FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(Ptr); 01854 if (!FINode) 01855 return false; 01856 FI = FINode->getIndex(); 01857 } else 01858 return false; 01859 01860 assert(FI != INT_MAX); 01861 if (!MFI->isFixedObjectIndex(FI)) 01862 return false; 01863 return Offset == MFI->getObjectOffset(FI) && Bytes == MFI->getObjectSize(FI); 01864 } 01865 01866 /// IsEligibleForTailCallOptimization - Check whether the call is eligible 01867 /// for tail call optimization. Targets which want to do tail call 01868 /// optimization should implement this function. 01869 bool 01870 ARMTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee, 01871 CallingConv::ID CalleeCC, 01872 bool isVarArg, 01873 bool isCalleeStructRet, 01874 bool isCallerStructRet, 01875 const SmallVectorImpl<ISD::OutputArg> &Outs, 01876 const SmallVectorImpl<SDValue> &OutVals, 01877 const SmallVectorImpl<ISD::InputArg> &Ins, 01878 SelectionDAG& DAG) const { 01879 const Function *CallerF = DAG.getMachineFunction().getFunction(); 01880 CallingConv::ID CallerCC = CallerF->getCallingConv(); 01881 bool CCMatch = CallerCC == CalleeCC; 01882 01883 // Look for obvious safe cases to perform tail call optimization that do not 01884 // require ABI changes. This is what gcc calls sibcall. 01885 01886 // Do not sibcall optimize vararg calls unless the call site is not passing 01887 // any arguments. 01888 if (isVarArg && !Outs.empty()) 01889 return false; 01890 01891 // Also avoid sibcall optimization if either caller or callee uses struct 01892 // return semantics. 01893 if (isCalleeStructRet || isCallerStructRet) 01894 return false; 01895 01896 // FIXME: Completely disable sibcall for Thumb1 since Thumb1RegisterInfo:: 01897 // emitEpilogue is not ready for them. Thumb tail calls also use t2B, as 01898 // the Thumb1 16-bit unconditional branch doesn't have sufficient relocation 01899 // support in the assembler and linker to be used. This would need to be 01900 // fixed to fully support tail calls in Thumb1. 01901 // 01902 // Doing this is tricky, since the LDM/POP instruction on Thumb doesn't take 01903 // LR. This means if we need to reload LR, it takes an extra instructions, 01904 // which outweighs the value of the tail call; but here we don't know yet 01905 // whether LR is going to be used. Probably the right approach is to 01906 // generate the tail call here and turn it back into CALL/RET in 01907 // emitEpilogue if LR is used. 01908 01909 // Thumb1 PIC calls to external symbols use BX, so they can be tail calls, 01910 // but we need to make sure there are enough registers; the only valid 01911 // registers are the 4 used for parameters. We don't currently do this 01912 // case. 01913 if (Subtarget->isThumb1Only()) 01914 return false; 01915 01916 // If the calling conventions do not match, then we'd better make sure the 01917 // results are returned in the same way as what the caller expects. 01918 if (!CCMatch) { 01919 SmallVector<CCValAssign, 16> RVLocs1; 01920 ARMCCState CCInfo1(CalleeCC, false, DAG.getMachineFunction(), 01921 getTargetMachine(), RVLocs1, *DAG.getContext(), Call); 01922 CCInfo1.AnalyzeCallResult(Ins, CCAssignFnForNode(CalleeCC, true, isVarArg)); 01923 01924 SmallVector<CCValAssign, 16> RVLocs2; 01925 ARMCCState CCInfo2(CallerCC, false, DAG.getMachineFunction(), 01926 getTargetMachine(), RVLocs2, *DAG.getContext(), Call); 01927 CCInfo2.AnalyzeCallResult(Ins, CCAssignFnForNode(CallerCC, true, isVarArg)); 01928 01929 if (RVLocs1.size() != RVLocs2.size()) 01930 return false; 01931 for (unsigned i = 0, e = RVLocs1.size(); i != e; ++i) { 01932 if (RVLocs1[i].isRegLoc() != RVLocs2[i].isRegLoc()) 01933 return false; 01934 if (RVLocs1[i].getLocInfo() != RVLocs2[i].getLocInfo()) 01935 return false; 01936 if (RVLocs1[i].isRegLoc()) { 01937 if (RVLocs1[i].getLocReg() != RVLocs2[i].getLocReg()) 01938 return false; 01939 } else { 01940 if (RVLocs1[i].getLocMemOffset() != RVLocs2[i].getLocMemOffset()) 01941 return false; 01942 } 01943 } 01944 } 01945 01946 // If Caller's vararg or byval argument has been split between registers and 01947 // stack, do not perform tail call, since part of the argument is in caller's 01948 // local frame. 01949 const ARMFunctionInfo *AFI_Caller = DAG.getMachineFunction(). 01950 getInfo<ARMFunctionInfo>(); 01951 if (AFI_Caller->getArgRegsSaveSize()) 01952 return false; 01953 01954 // If the callee takes no arguments then go on to check the results of the 01955 // call. 01956 if (!Outs.empty()) { 01957 // Check if stack adjustment is needed. For now, do not do this if any 01958 // argument is passed on the stack. 01959 SmallVector<CCValAssign, 16> ArgLocs; 01960 ARMCCState CCInfo(CalleeCC, isVarArg, DAG.getMachineFunction(), 01961 getTargetMachine(), ArgLocs, *DAG.getContext(), Call); 01962 CCInfo.AnalyzeCallOperands(Outs, 01963 CCAssignFnForNode(CalleeCC, false, isVarArg)); 01964 if (CCInfo.getNextStackOffset()) { 01965 MachineFunction &MF = DAG.getMachineFunction(); 01966 01967 // Check if the arguments are already laid out in the right way as 01968 // the caller's fixed stack objects. 01969 MachineFrameInfo *MFI = MF.getFrameInfo(); 01970 const MachineRegisterInfo *MRI = &MF.getRegInfo(); 01971 const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); 01972 for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size(); 01973 i != e; 01974 ++i, ++realArgIdx) { 01975 CCValAssign &VA = ArgLocs[i]; 01976 EVT RegVT = VA.getLocVT(); 01977 SDValue Arg = OutVals[realArgIdx]; 01978 ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags; 01979 if (VA.getLocInfo() == CCValAssign::Indirect) 01980 return false; 01981 if (VA.needsCustom()) { 01982 // f64 and vector types are split into multiple registers or 01983 // register/stack-slot combinations. The types will not match 01984 // the registers; give up on memory f64 refs until we figure 01985 // out what to do about this. 01986 if (!VA.isRegLoc()) 01987 return false; 01988 if (!ArgLocs[++i].isRegLoc()) 01989 return false; 01990 if (RegVT == MVT::v2f64) { 01991 if (!ArgLocs[++i].isRegLoc()) 01992 return false; 01993 if (!ArgLocs[++i].isRegLoc()) 01994 return false; 01995 } 01996 } else if (!VA.isRegLoc()) { 01997 if (!MatchingStackOffset(Arg, VA.getLocMemOffset(), Flags, 01998 MFI, MRI, TII)) 01999 return false; 02000 } 02001 } 02002 } 02003 } 02004 02005 return true; 02006 } 02007 02008 bool 02009 ARMTargetLowering::CanLowerReturn(CallingConv::ID CallConv, 02010 MachineFunction &MF, bool isVarArg, 02011 const SmallVectorImpl<ISD::OutputArg> &Outs, 02012 LLVMContext &Context) const { 02013 SmallVector<CCValAssign, 16> RVLocs; 02014 CCState CCInfo(CallConv, isVarArg, MF, getTargetMachine(), RVLocs, Context); 02015 return CCInfo.CheckReturn(Outs, CCAssignFnForNode(CallConv, /*Return=*/true, 02016 isVarArg)); 02017 } 02018 02019 SDValue 02020 ARMTargetLowering::LowerReturn(SDValue Chain, 02021 CallingConv::ID CallConv, bool isVarArg, 02022 const SmallVectorImpl<ISD::OutputArg> &Outs, 02023 const SmallVectorImpl<SDValue> &OutVals, 02024 DebugLoc dl, SelectionDAG &DAG) const { 02025 02026 // CCValAssign - represent the assignment of the return value to a location. 02027 SmallVector<CCValAssign, 16> RVLocs; 02028 02029 // CCState - Info about the registers and stack slots. 02030 ARMCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), 02031 getTargetMachine(), RVLocs, *DAG.getContext(), Call); 02032 02033 // Analyze outgoing return values. 02034 CCInfo.AnalyzeReturn(Outs, CCAssignFnForNode(CallConv, /* Return */ true, 02035 isVarArg)); 02036 02037 SDValue Flag; 02038 SmallVector<SDValue, 4> RetOps; 02039 RetOps.push_back(Chain); // Operand #0 = Chain (updated below) 02040 02041 // Copy the result values into the output registers. 02042 for (unsigned i = 0, realRVLocIdx = 0; 02043 i != RVLocs.size(); 02044 ++i, ++realRVLocIdx) { 02045 CCValAssign &VA = RVLocs[i]; 02046 assert(VA.isRegLoc() && "Can only return in registers!"); 02047 02048 SDValue Arg = OutVals[realRVLocIdx]; 02049 02050 switch (VA.getLocInfo()) { 02051 default: llvm_unreachable("Unknown loc info!"); 02052 case CCValAssign::Full: break; 02053 case CCValAssign::BCvt: 02054 Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg); 02055 break; 02056 } 02057 02058 if (VA.needsCustom()) { 02059 if (VA.getLocVT() == MVT::v2f64) { 02060 // Extract the first half and return it in two registers. 02061 SDValue Half = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg, 02062 DAG.getConstant(0, MVT::i32)); 02063 SDValue HalfGPRs = DAG.getNode(ARMISD::VMOVRRD, dl, 02064 DAG.getVTList(MVT::i32, MVT::i32), Half); 02065 02066 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), HalfGPRs, Flag); 02067 Flag = Chain.getValue(1); 02068 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT())); 02069 VA = RVLocs[++i]; // skip ahead to next loc 02070 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), 02071 HalfGPRs.getValue(1), Flag); 02072 Flag = Chain.getValue(1); 02073 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT())); 02074 VA = RVLocs[++i]; // skip ahead to next loc 02075 02076 // Extract the 2nd half and fall through to handle it as an f64 value. 02077 Arg = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg, 02078 DAG.getConstant(1, MVT::i32)); 02079 } 02080 // Legalize ret f64 -> ret 2 x i32. We always have fmrrd if f64 is 02081 // available. 02082 SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl, 02083 DAG.getVTList(MVT::i32, MVT::i32), &Arg, 1); 02084 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), fmrrd, Flag); 02085 Flag = Chain.getValue(1); 02086 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT())); 02087 VA = RVLocs[++i]; // skip ahead to next loc 02088 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), fmrrd.getValue(1), 02089 Flag); 02090 } else 02091 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Flag); 02092 02093 // Guarantee that all emitted copies are 02094 // stuck together, avoiding something bad. 02095 Flag = Chain.getValue(1); 02096 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT())); 02097 } 02098 02099 // Update chain and glue. 02100 RetOps[0] = Chain; 02101 if (Flag.getNode()) 02102 RetOps.push_back(Flag); 02103 02104 return DAG.getNode(ARMISD::RET_FLAG, dl, MVT::Other, 02105 RetOps.data(), RetOps.size()); 02106 } 02107 02108 bool ARMTargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const { 02109 if (N->getNumValues() != 1) 02110 return false; 02111 if (!N->hasNUsesOfValue(1, 0)) 02112 return false; 02113 02114 SDValue TCChain = Chain; 02115 SDNode *Copy = *N->use_begin(); 02116 if (Copy->getOpcode() == ISD::CopyToReg) { 02117 // If the copy has a glue operand, we conservatively assume it isn't safe to 02118 // perform a tail call. 02119 if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue) 02120 return false; 02121 TCChain = Copy->getOperand(0); 02122 } else if (Copy->getOpcode() == ARMISD::VMOVRRD) { 02123 SDNode *VMov = Copy; 02124 // f64 returned in a pair of GPRs. 02125 SmallPtrSet<SDNode*, 2> Copies; 02126 for (SDNode::use_iterator UI = VMov->use_begin(), UE = VMov->use_end(); 02127 UI != UE; ++UI) { 02128 if (UI->getOpcode() != ISD::CopyToReg) 02129 return false; 02130 Copies.insert(*UI); 02131 } 02132 if (Copies.size() > 2) 02133 return false; 02134 02135 for (SDNode::use_iterator UI = VMov->use_begin(), UE = VMov->use_end(); 02136 UI != UE; ++UI) { 02137 SDValue UseChain = UI->getOperand(0); 02138 if (Copies.count(UseChain.getNode())) 02139 // Second CopyToReg 02140 Copy = *UI; 02141 else 02142 // First CopyToReg 02143 TCChain = UseChain; 02144 } 02145 } else if (Copy->getOpcode() == ISD::BITCAST) { 02146 // f32 returned in a single GPR. 02147 if (!Copy->hasOneUse()) 02148 return false; 02149 Copy = *Copy->use_begin(); 02150 if (Copy->getOpcode() != ISD::CopyToReg || !Copy->hasNUsesOfValue(1, 0)) 02151 return false; 02152 TCChain = Copy->getOperand(0); 02153 } else { 02154 return false; 02155 } 02156 02157 bool HasRet = false; 02158 for (SDNode::use_iterator UI = Copy->use_begin(), UE = Copy->use_end(); 02159 UI != UE; ++UI) { 02160 if (UI->getOpcode() != ARMISD::RET_FLAG) 02161 return false; 02162 HasRet = true; 02163 } 02164 02165 if (!HasRet) 02166 return false; 02167 02168 Chain = TCChain; 02169 return true; 02170 } 02171 02172 bool ARMTargetLowering::mayBeEmittedAsTailCall(CallInst *CI) const { 02173 if (!EnableARMTailCalls && !Subtarget->supportsTailCall()) 02174 return false; 02175 02176 if (!CI->isTailCall()) 02177 return false; 02178 02179 return !Subtarget->isThumb1Only(); 02180 } 02181 02182 // ConstantPool, JumpTable, GlobalAddress, and ExternalSymbol are lowered as 02183 // their target counterpart wrapped in the ARMISD::Wrapper node. Suppose N is 02184 // one of the above mentioned nodes. It has to be wrapped because otherwise 02185 // Select(N) returns N. So the raw TargetGlobalAddress nodes, etc. can only 02186 // be used to form addressing mode. These wrapped nodes will be selected 02187 // into MOVi. 02188 static SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) { 02189 EVT PtrVT = Op.getValueType(); 02190 // FIXME there is no actual debug info here 02191 DebugLoc dl = Op.getDebugLoc(); 02192 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op); 02193 SDValue Res; 02194 if (CP->isMachineConstantPoolEntry()) 02195 Res = DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT, 02196 CP->getAlignment()); 02197 else 02198 Res = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT, 02199 CP->getAlignment()); 02200 return DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Res); 02201 } 02202 02203 unsigned ARMTargetLowering::getJumpTableEncoding() const { 02204 return MachineJumpTableInfo::EK_Inline; 02205 } 02206 02207 SDValue ARMTargetLowering::LowerBlockAddress(SDValue Op, 02208 SelectionDAG &DAG) const { 02209 MachineFunction &MF = DAG.getMachineFunction(); 02210 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 02211 unsigned ARMPCLabelIndex = 0; 02212 DebugLoc DL = Op.getDebugLoc(); 02213 EVT PtrVT = getPointerTy(); 02214 const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress(); 02215 Reloc::Model RelocM = getTargetMachine().getRelocationModel(); 02216 SDValue CPAddr; 02217 if (RelocM == Reloc::Static) { 02218 CPAddr = DAG.getTargetConstantPool(BA, PtrVT, 4); 02219 } else { 02220 unsigned PCAdj = Subtarget->isThumb() ? 4 : 8; 02221 ARMPCLabelIndex = AFI->createPICLabelUId(); 02222 ARMConstantPoolValue *CPV = 02223 ARMConstantPoolConstant::Create(BA, ARMPCLabelIndex, 02224 ARMCP::CPBlockAddress, PCAdj); 02225 CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4); 02226 } 02227 CPAddr = DAG.getNode(ARMISD::Wrapper, DL, PtrVT, CPAddr); 02228 SDValue Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), CPAddr, 02229 MachinePointerInfo::getConstantPool(), 02230 false, false, false, 0); 02231 if (RelocM == Reloc::Static) 02232 return Result; 02233 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32); 02234 return DAG.getNode(ARMISD::PIC_ADD, DL, PtrVT, Result, PICLabel); 02235 } 02236 02237 // Lower ISD::GlobalTLSAddress using the "general dynamic" model 02238 SDValue 02239 ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA, 02240 SelectionDAG &DAG) const { 02241 DebugLoc dl = GA->getDebugLoc(); 02242 EVT PtrVT = getPointerTy(); 02243 unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8; 02244 MachineFunction &MF = DAG.getMachineFunction(); 02245 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 02246 unsigned ARMPCLabelIndex = AFI->createPICLabelUId(); 02247 ARMConstantPoolValue *CPV = 02248 ARMConstantPoolConstant::Create(GA->getGlobal(), ARMPCLabelIndex, 02249 ARMCP::CPValue, PCAdj, ARMCP::TLSGD, true); 02250 SDValue Argument = DAG.getTargetConstantPool(CPV, PtrVT, 4); 02251 Argument = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Argument); 02252 Argument = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Argument, 02253 MachinePointerInfo::getConstantPool(), 02254 false, false, false, 0); 02255 SDValue Chain = Argument.getValue(1); 02256 02257 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32); 02258 Argument = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Argument, PICLabel); 02259 02260 // call __tls_get_addr. 02261 ArgListTy Args; 02262 ArgListEntry Entry; 02263 Entry.Node = Argument; 02264 Entry.Ty = (Type *) Type::getInt32Ty(*DAG.getContext()); 02265 Args.push_back(Entry); 02266 // FIXME: is there useful debug info available here? 02267 TargetLowering::CallLoweringInfo CLI(Chain, 02268 (Type *) Type::getInt32Ty(*DAG.getContext()), 02269 false, false, false, false, 02270 0, CallingConv::C, /*isTailCall=*/false, 02271 /*doesNotRet=*/false, /*isReturnValueUsed=*/true, 02272 DAG.getExternalSymbol("__tls_get_addr", PtrVT), Args, DAG, dl); 02273 std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI); 02274 return CallResult.first; 02275 } 02276 02277 // Lower ISD::GlobalTLSAddress using the "initial exec" or 02278 // "local exec" model. 02279 SDValue 02280 ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA, 02281 SelectionDAG &DAG, 02282 TLSModel::Model model) const { 02283 const GlobalValue *GV = GA->getGlobal(); 02284 DebugLoc dl = GA->getDebugLoc(); 02285 SDValue Offset; 02286 SDValue Chain = DAG.getEntryNode(); 02287 EVT PtrVT = getPointerTy(); 02288 // Get the Thread Pointer 02289 SDValue ThreadPointer = DAG.getNode(ARMISD::THREAD_POINTER, dl, PtrVT); 02290 02291 if (model == TLSModel::InitialExec) { 02292 MachineFunction &MF = DAG.getMachineFunction(); 02293 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 02294 unsigned ARMPCLabelIndex = AFI->createPICLabelUId(); 02295 // Initial exec model. 02296 unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8; 02297 ARMConstantPoolValue *CPV = 02298 ARMConstantPoolConstant::Create(GA->getGlobal(), ARMPCLabelIndex, 02299 ARMCP::CPValue, PCAdj, ARMCP::GOTTPOFF, 02300 true); 02301 Offset = DAG.getTargetConstantPool(CPV, PtrVT, 4); 02302 Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset); 02303 Offset = DAG.getLoad(PtrVT, dl, Chain, Offset, 02304 MachinePointerInfo::getConstantPool(), 02305 false, false, false, 0); 02306 Chain = Offset.getValue(1); 02307 02308 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32); 02309 Offset = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Offset, PICLabel); 02310 02311 Offset = DAG.getLoad(PtrVT, dl, Chain, Offset, 02312 MachinePointerInfo::getConstantPool(), 02313 false, false, false, 0); 02314 } else { 02315 // local exec model 02316 assert(model == TLSModel::LocalExec); 02317 ARMConstantPoolValue *CPV = 02318 ARMConstantPoolConstant::Create(GV, ARMCP::TPOFF); 02319 Offset = DAG.getTargetConstantPool(CPV, PtrVT, 4); 02320 Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset); 02321 Offset = DAG.getLoad(PtrVT, dl, Chain, Offset, 02322 MachinePointerInfo::getConstantPool(), 02323 false, false, false, 0); 02324 } 02325 02326 // The address of the thread local variable is the add of the thread 02327 // pointer with the offset of the variable. 02328 return DAG.getNode(ISD::ADD, dl, PtrVT, ThreadPointer, Offset); 02329 } 02330 02331 SDValue 02332 ARMTargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const { 02333 // TODO: implement the "local dynamic" model 02334 assert(Subtarget->isTargetELF() && 02335 "TLS not implemented for non-ELF targets"); 02336 GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op); 02337 02338 TLSModel::Model model = getTargetMachine().getTLSModel(GA->getGlobal()); 02339 02340 switch (model) { 02341 case TLSModel::GeneralDynamic: 02342 case TLSModel::LocalDynamic: 02343 return LowerToTLSGeneralDynamicModel(GA, DAG); 02344 case TLSModel::InitialExec: 02345 case TLSModel::LocalExec: 02346 return LowerToTLSExecModels(GA, DAG, model); 02347 } 02348 llvm_unreachable("bogus TLS model"); 02349 } 02350 02351 SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op, 02352 SelectionDAG &DAG) const { 02353 EVT PtrVT = getPointerTy(); 02354 DebugLoc dl = Op.getDebugLoc(); 02355 const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal(); 02356 if (getTargetMachine().getRelocationModel() == Reloc::PIC_) { 02357 bool UseGOTOFF = GV->hasLocalLinkage() || GV->hasHiddenVisibility(); 02358 ARMConstantPoolValue *CPV = 02359 ARMConstantPoolConstant::Create(GV, 02360 UseGOTOFF ? ARMCP::GOTOFF : ARMCP::GOT); 02361 SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4); 02362 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); 02363 SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), 02364 CPAddr, 02365 MachinePointerInfo::getConstantPool(), 02366 false, false, false, 0); 02367 SDValue Chain = Result.getValue(1); 02368 SDValue GOT = DAG.getGLOBAL_OFFSET_TABLE(PtrVT); 02369 Result = DAG.getNode(ISD::ADD, dl, PtrVT, Result, GOT); 02370 if (!UseGOTOFF) 02371 Result = DAG.getLoad(PtrVT, dl, Chain, Result, 02372 MachinePointerInfo::getGOT(), 02373 false, false, false, 0); 02374 return Result; 02375 } 02376 02377 // If we have T2 ops, we can materialize the address directly via movt/movw 02378 // pair. This is always cheaper. 02379 if (Subtarget->useMovt()) { 02380 ++NumMovwMovt; 02381 // FIXME: Once remat is capable of dealing with instructions with register 02382 // operands, expand this into two nodes. 02383 return DAG.getNode(ARMISD::Wrapper, dl, PtrVT, 02384 DAG.getTargetGlobalAddress(GV, dl, PtrVT)); 02385 } else { 02386 SDValue CPAddr = DAG.getTargetConstantPool(GV, PtrVT, 4); 02387 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); 02388 return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr, 02389 MachinePointerInfo::getConstantPool(), 02390 false, false, false, 0); 02391 } 02392 } 02393 02394 SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op, 02395 SelectionDAG &DAG) const { 02396 EVT PtrVT = getPointerTy(); 02397 DebugLoc dl = Op.getDebugLoc(); 02398 const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal(); 02399 Reloc::Model RelocM = getTargetMachine().getRelocationModel(); 02400 02401 // FIXME: Enable this for static codegen when tool issues are fixed. Also 02402 // update ARMFastISel::ARMMaterializeGV. 02403 if (Subtarget->useMovt() && RelocM != Reloc::Static) { 02404 ++NumMovwMovt; 02405 // FIXME: Once remat is capable of dealing with instructions with register 02406 // operands, expand this into two nodes. 02407 if (RelocM == Reloc::Static) 02408 return DAG.getNode(ARMISD::Wrapper, dl, PtrVT, 02409 DAG.getTargetGlobalAddress(GV, dl, PtrVT)); 02410 02411 unsigned Wrapper = (RelocM == Reloc::PIC_) 02412 ? ARMISD::WrapperPIC : ARMISD::WrapperDYN; 02413 SDValue Result = DAG.getNode(Wrapper, dl, PtrVT, 02414 DAG.getTargetGlobalAddress(GV, dl, PtrVT)); 02415 if (Subtarget->GVIsIndirectSymbol(GV, RelocM)) 02416 Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Result, 02417 MachinePointerInfo::getGOT(), 02418 false, false, false, 0); 02419 return Result; 02420 } 02421 02422 unsigned ARMPCLabelIndex = 0; 02423 SDValue CPAddr; 02424 if (RelocM == Reloc::Static) { 02425 CPAddr = DAG.getTargetConstantPool(GV, PtrVT, 4); 02426 } else { 02427 ARMFunctionInfo *AFI = DAG.getMachineFunction().getInfo<ARMFunctionInfo>(); 02428 ARMPCLabelIndex = AFI->createPICLabelUId(); 02429 unsigned PCAdj = (RelocM != Reloc::PIC_) ? 0 : (Subtarget->isThumb()?4:8); 02430 ARMConstantPoolValue *CPV = 02431 ARMConstantPoolConstant::Create(GV, ARMPCLabelIndex, ARMCP::CPValue, 02432 PCAdj); 02433 CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4); 02434 } 02435 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); 02436 02437 SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr, 02438 MachinePointerInfo::getConstantPool(), 02439 false, false, false, 0); 02440 SDValue Chain = Result.getValue(1); 02441 02442 if (RelocM == Reloc::PIC_) { 02443 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32); 02444 Result = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel); 02445 } 02446 02447 if (Subtarget->GVIsIndirectSymbol(GV, RelocM)) 02448 Result = DAG.getLoad(PtrVT, dl, Chain, Result, MachinePointerInfo::getGOT(), 02449 false, false, false, 0); 02450 02451 return Result; 02452 } 02453 02454 SDValue ARMTargetLowering::LowerGLOBAL_OFFSET_TABLE(SDValue Op, 02455 SelectionDAG &DAG) const { 02456 assert(Subtarget->isTargetELF() && 02457 "GLOBAL OFFSET TABLE not implemented for non-ELF targets"); 02458 MachineFunction &MF = DAG.getMachineFunction(); 02459 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 02460 unsigned ARMPCLabelIndex = AFI->createPICLabelUId(); 02461 EVT PtrVT = getPointerTy(); 02462 DebugLoc dl = Op.getDebugLoc(); 02463 unsigned PCAdj = Subtarget->isThumb() ? 4 : 8; 02464 ARMConstantPoolValue *CPV = 02465 ARMConstantPoolSymbol::Create(*DAG.getContext(), "_GLOBAL_OFFSET_TABLE_", 02466 ARMPCLabelIndex, PCAdj); 02467 SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4); 02468 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); 02469 SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr, 02470 MachinePointerInfo::getConstantPool(), 02471 false, false, false, 0); 02472 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32); 02473 return DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel); 02474 } 02475 02476 SDValue 02477 ARMTargetLowering::LowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const { 02478 DebugLoc dl = Op.getDebugLoc(); 02479 SDValue Val = DAG.getConstant(0, MVT::i32); 02480 return DAG.getNode(ARMISD::EH_SJLJ_SETJMP, dl, 02481 DAG.getVTList(MVT::i32, MVT::Other), Op.getOperand(0), 02482 Op.getOperand(1), Val); 02483 } 02484 02485 SDValue 02486 ARMTargetLowering::LowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const { 02487 DebugLoc dl = Op.getDebugLoc(); 02488 return DAG.getNode(ARMISD::EH_SJLJ_LONGJMP, dl, MVT::Other, Op.getOperand(0), 02489 Op.getOperand(1), DAG.getConstant(0, MVT::i32)); 02490 } 02491 02492 SDValue 02493 ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG, 02494 const ARMSubtarget *Subtarget) const { 02495 unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); 02496 DebugLoc dl = Op.getDebugLoc(); 02497 switch (IntNo) { 02498 default: return SDValue(); // Don't custom lower most intrinsics. 02499 case Intrinsic::arm_thread_pointer: { 02500 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 02501 return DAG.getNode(ARMISD::THREAD_POINTER, dl, PtrVT); 02502 } 02503 case Intrinsic::eh_sjlj_lsda: { 02504 MachineFunction &MF = DAG.getMachineFunction(); 02505 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 02506 unsigned ARMPCLabelIndex = AFI->createPICLabelUId(); 02507 EVT PtrVT = getPointerTy(); 02508 Reloc::Model RelocM = getTargetMachine().getRelocationModel(); 02509 SDValue CPAddr; 02510 unsigned PCAdj = (RelocM != Reloc::PIC_) 02511 ? 0 : (Subtarget->isThumb() ? 4 : 8); 02512 ARMConstantPoolValue *CPV = 02513 ARMConstantPoolConstant::Create(MF.getFunction(), ARMPCLabelIndex, 02514 ARMCP::CPLSDA, PCAdj); 02515 CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4); 02516 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); 02517 SDValue Result = 02518 DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr, 02519 MachinePointerInfo::getConstantPool(), 02520 false, false, false, 0); 02521 02522 if (RelocM == Reloc::PIC_) { 02523 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32); 02524 Result = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel); 02525 } 02526 return Result; 02527 } 02528 case Intrinsic::arm_neon_vmulls: 02529 case Intrinsic::arm_neon_vmullu: { 02530 unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vmulls) 02531 ? ARMISD::VMULLs : ARMISD::VMULLu; 02532 return DAG.getNode(NewOpc, Op.getDebugLoc(), Op.getValueType(), 02533 Op.getOperand(1), Op.getOperand(2)); 02534 } 02535 } 02536 } 02537 02538 static SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG, 02539 const ARMSubtarget *Subtarget) { 02540 // FIXME: handle "fence singlethread" more efficiently. 02541 DebugLoc dl = Op.getDebugLoc(); 02542 if (!Subtarget->hasDataBarrier()) { 02543 // Some ARMv6 cpus can support data barriers with an mcr instruction. 02544 // Thumb1 and pre-v6 ARM mode use a libcall instead and should never get 02545 // here. 02546 assert(Subtarget->hasV6Ops() && !Subtarget->isThumb() && 02547 "Unexpected ISD::MEMBARRIER encountered. Should be libcall!"); 02548 return DAG.getNode(ARMISD::MEMBARRIER_MCR, dl, MVT::Other, Op.getOperand(0), 02549 DAG.getConstant(0, MVT::i32)); 02550 } 02551 02552 return DAG.getNode(ARMISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0), 02553 DAG.getConstant(ARM_MB::ISH, MVT::i32)); 02554 } 02555 02556 static SDValue LowerPREFETCH(SDValue Op, SelectionDAG &DAG, 02557 const ARMSubtarget *Subtarget) { 02558 // ARM pre v5TE and Thumb1 does not have preload instructions. 02559 if (!(Subtarget->isThumb2() || 02560 (!Subtarget->isThumb1Only() && Subtarget->hasV5TEOps()))) 02561 // Just preserve the chain. 02562 return Op.getOperand(0); 02563 02564 DebugLoc dl = Op.getDebugLoc(); 02565 unsigned isRead = ~cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue() & 1; 02566 if (!isRead && 02567 (!Subtarget->hasV7Ops() || !Subtarget->hasMPExtension())) 02568 // ARMv7 with MP extension has PLDW. 02569 return Op.getOperand(0); 02570 02571 unsigned isData = cast<ConstantSDNode>(Op.getOperand(4))->getZExtValue(); 02572 if (Subtarget->isThumb()) { 02573 // Invert the bits. 02574 isRead = ~isRead & 1; 02575 isData = ~isData & 1; 02576 } 02577 02578 return DAG.getNode(ARMISD::PRELOAD, dl, MVT::Other, Op.getOperand(0), 02579 Op.getOperand(1), DAG.getConstant(isRead, MVT::i32), 02580 DAG.getConstant(isData, MVT::i32)); 02581 } 02582 02583 static SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) { 02584 MachineFunction &MF = DAG.getMachineFunction(); 02585 ARMFunctionInfo *FuncInfo = MF.getInfo<ARMFunctionInfo>(); 02586 02587 // vastart just stores the address of the VarArgsFrameIndex slot into the 02588 // memory location argument. 02589 DebugLoc dl = Op.getDebugLoc(); 02590 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 02591 SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT); 02592 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue(); 02593 return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1), 02594 MachinePointerInfo(SV), false, false, 0); 02595 } 02596 02597 SDValue 02598 ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA, 02599 SDValue &Root, SelectionDAG &DAG, 02600 DebugLoc dl) const { 02601 MachineFunction &MF = DAG.getMachineFunction(); 02602 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 02603 02604 const TargetRegisterClass *RC; 02605 if (AFI->isThumb1OnlyFunction()) 02606 RC = &ARM::tGPRRegClass; 02607 else 02608 RC = &ARM::GPRRegClass; 02609 02610 // Transform the arguments stored in physical registers into virtual ones. 02611 unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC); 02612 SDValue ArgValue = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32); 02613 02614 SDValue ArgValue2; 02615 if (NextVA.isMemLoc()) { 02616 MachineFrameInfo *MFI = MF.getFrameInfo(); 02617 int FI = MFI->CreateFixedObject(4, NextVA.getLocMemOffset(), true); 02618 02619 // Create load node to retrieve arguments from the stack. 02620 SDValue FIN = DAG.getFrameIndex(FI, getPointerTy()); 02621 ArgValue2 = DAG.getLoad(MVT::i32, dl, Root, FIN, 02622 MachinePointerInfo::getFixedStack(FI), 02623 false, false, false, 0); 02624 } else { 02625 Reg = MF.addLiveIn(NextVA.getLocReg(), RC); 02626 ArgValue2 = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32); 02627 } 02628 02629 return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, ArgValue, ArgValue2); 02630 } 02631 02632 void 02633 ARMTargetLowering::computeRegArea(CCState &CCInfo, MachineFunction &MF, 02634 unsigned InRegsParamRecordIdx, 02635 unsigned ArgSize, 02636 unsigned &ArgRegsSize, 02637 unsigned &ArgRegsSaveSize) 02638 const { 02639 unsigned NumGPRs; 02640 if (InRegsParamRecordIdx < CCInfo.getInRegsParamsCount()) { 02641 unsigned RBegin, REnd; 02642 CCInfo.getInRegsParamInfo(InRegsParamRecordIdx, RBegin, REnd); 02643 NumGPRs = REnd - RBegin; 02644 } else { 02645 unsigned int firstUnalloced; 02646 firstUnalloced = CCInfo.getFirstUnallocated(GPRArgRegs, 02647 sizeof(GPRArgRegs) / 02648 sizeof(GPRArgRegs[0])); 02649 NumGPRs = (firstUnalloced <= 3) ? (4 - firstUnalloced) : 0; 02650 } 02651 02652 unsigned Align = MF.getTarget().getFrameLowering()->getStackAlignment(); 02653 ArgRegsSize = NumGPRs * 4; 02654 02655 // If parameter is split between stack and GPRs... 02656 if (NumGPRs && Align == 8 && 02657 (ArgRegsSize < ArgSize || 02658 InRegsParamRecordIdx >= CCInfo.getInRegsParamsCount())) { 02659 // Add padding for part of param recovered from GPRs, so 02660 // its last byte must be at address K*8 - 1. 02661 // We need to do it, since remained (stack) part of parameter has 02662 // stack alignment, and we need to "attach" "GPRs head" without gaps 02663 // to it: 02664 // Stack: 02665 // |---- 8 bytes block ----| |---- 8 bytes block ----| |---- 8 bytes... 02666 // [ [padding] [GPRs head] ] [ Tail passed via stack .... 02667 // 02668 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 02669 unsigned Padding = 02670 ((ArgRegsSize + AFI->getArgRegsSaveSize() + Align - 1) & ~(Align-1)) - 02671 (ArgRegsSize + AFI->getArgRegsSaveSize()); 02672 ArgRegsSaveSize = ArgRegsSize + Padding; 02673 } else 02674 // We don't need to extend regs save size for byval parameters if they 02675 // are passed via GPRs only. 02676 ArgRegsSaveSize = ArgRegsSize; 02677 } 02678 02679 // The remaining GPRs hold either the beginning of variable-argument 02680 // data, or the beginning of an aggregate passed by value (usually 02681 // byval). Either way, we allocate stack slots adjacent to the data 02682 // provided by our caller, and store the unallocated registers there. 02683 // If this is a variadic function, the va_list pointer will begin with 02684 // these values; otherwise, this reassembles a (byval) structure that 02685 // was split between registers and memory. 02686 // Return: The frame index registers were stored into. 02687 int 02688 ARMTargetLowering::StoreByValRegs(CCState &CCInfo, SelectionDAG &DAG, 02689 DebugLoc dl, SDValue &Chain, 02690 const Value *OrigArg, 02691 unsigned InRegsParamRecordIdx, 02692 unsigned OffsetFromOrigArg, 02693 unsigned ArgOffset, 02694 unsigned ArgSize, 02695 bool ForceMutable) const { 02696 02697 // Currently, two use-cases possible: 02698 // Case #1. Non var-args function, and we meet first byval parameter. 02699 // Setup first unallocated register as first byval register; 02700 // eat all remained registers 02701 // (these two actions are performed by HandleByVal method). 02702 // Then, here, we initialize stack frame with 02703 // "store-reg" instructions. 02704 // Case #2. Var-args function, that doesn't contain byval parameters. 02705 // The same: eat all remained unallocated registers, 02706 // initialize stack frame. 02707 02708 MachineFunction &MF = DAG.getMachineFunction(); 02709 MachineFrameInfo *MFI = MF.getFrameInfo(); 02710 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 02711 unsigned firstRegToSaveIndex, lastRegToSaveIndex; 02712 unsigned RBegin, REnd; 02713 if (InRegsParamRecordIdx < CCInfo.getInRegsParamsCount()) { 02714 CCInfo.getInRegsParamInfo(InRegsParamRecordIdx, RBegin, REnd); 02715 firstRegToSaveIndex = RBegin - ARM::R0; 02716 lastRegToSaveIndex = REnd - ARM::R0; 02717 } else { 02718 firstRegToSaveIndex = CCInfo.getFirstUnallocated 02719 (GPRArgRegs, sizeof(GPRArgRegs) / sizeof(GPRArgRegs[0])); 02720 lastRegToSaveIndex = 4; 02721 } 02722 02723 unsigned ArgRegsSize, ArgRegsSaveSize; 02724 computeRegArea(CCInfo, MF, InRegsParamRecordIdx, ArgSize, 02725 ArgRegsSize, ArgRegsSaveSize); 02726 02727 // Store any by-val regs to their spots on the stack so that they may be 02728 // loaded by deferencing the result of formal parameter pointer or va_next. 02729 // Note: once stack area for byval/varargs registers 02730 // was initialized, it can't be initialized again. 02731 if (ArgRegsSaveSize) { 02732 02733 unsigned Padding = ArgRegsSaveSize - ArgRegsSize; 02734 02735 if (Padding) { 02736 assert(AFI->getStoredByValParamsPadding() == 0 && 02737 "The only parameter may be padded."); 02738 AFI->setStoredByValParamsPadding(Padding); 02739 } 02740 02741 int FrameIndex = MFI->CreateFixedObject( 02742 ArgRegsSaveSize, 02743 Padding + ArgOffset, 02744 false); 02745 SDValue FIN = DAG.getFrameIndex(FrameIndex, getPointerTy()); 02746 02747 SmallVector<SDValue, 4> MemOps; 02748 for (unsigned i = 0; firstRegToSaveIndex < lastRegToSaveIndex; 02749 ++firstRegToSaveIndex, ++i) { 02750 const TargetRegisterClass *RC; 02751 if (AFI->isThumb1OnlyFunction()) 02752 RC = &ARM::tGPRRegClass; 02753 else 02754 RC = &ARM::GPRRegClass; 02755 02756 unsigned VReg = MF.addLiveIn(GPRArgRegs[firstRegToSaveIndex], RC); 02757 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32); 02758 SDValue Store = 02759 DAG.getStore(Val.getValue(1), dl, Val, FIN, 02760 MachinePointerInfo(OrigArg, OffsetFromOrigArg + 4*i), 02761 false, false, 0); 02762 MemOps.push_back(Store); 02763 FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), FIN, 02764 DAG.getConstant(4, getPointerTy())); 02765 } 02766 02767 AFI->setArgRegsSaveSize(ArgRegsSaveSize + AFI->getArgRegsSaveSize()); 02768 02769 if (!MemOps.empty()) 02770 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, 02771 &MemOps[0], MemOps.size()); 02772 return FrameIndex; 02773 } else 02774 // This will point to the next argument passed via stack. 02775 return MFI->CreateFixedObject( 02776 4, AFI->getStoredByValParamsPadding() + ArgOffset, !ForceMutable); 02777 } 02778 02779 // Setup stack frame, the va_list pointer will start from. 02780 void 02781 ARMTargetLowering::VarArgStyleRegisters(CCState &CCInfo, SelectionDAG &DAG, 02782 DebugLoc dl, SDValue &Chain, 02783 unsigned ArgOffset, 02784 bool ForceMutable) const { 02785 MachineFunction &MF = DAG.getMachineFunction(); 02786 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 02787 02788 // Try to store any remaining integer argument regs 02789 // to their spots on the stack so that they may be loaded by deferencing 02790 // the result of va_next. 02791 // If there is no regs to be stored, just point address after last 02792 // argument passed via stack. 02793 int FrameIndex = 02794 StoreByValRegs(CCInfo, DAG, dl, Chain, 0, CCInfo.getInRegsParamsCount(), 02795 0, ArgOffset, 0, ForceMutable); 02796 02797 AFI->setVarArgsFrameIndex(FrameIndex); 02798 } 02799 02800 SDValue 02801 ARMTargetLowering::LowerFormalArguments(SDValue Chain, 02802 CallingConv::ID CallConv, bool isVarArg, 02803 const SmallVectorImpl<ISD::InputArg> 02804 &Ins, 02805 DebugLoc dl, SelectionDAG &DAG, 02806 SmallVectorImpl<SDValue> &InVals) 02807 const { 02808 MachineFunction &MF = DAG.getMachineFunction(); 02809 MachineFrameInfo *MFI = MF.getFrameInfo(); 02810 02811 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 02812 02813 // Assign locations to all of the incoming arguments. 02814 SmallVector<CCValAssign, 16> ArgLocs; 02815 ARMCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), 02816 getTargetMachine(), ArgLocs, *DAG.getContext(), Prologue); 02817 CCInfo.AnalyzeFormalArguments(Ins, 02818 CCAssignFnForNode(CallConv, /* Return*/ false, 02819 isVarArg)); 02820 02821 SmallVector<SDValue, 16> ArgValues; 02822 int lastInsIndex = -1; 02823 SDValue ArgValue; 02824 Function::const_arg_iterator CurOrigArg = MF.getFunction()->arg_begin(); 02825 unsigned CurArgIdx = 0; 02826 02827 // Initially ArgRegsSaveSize is zero. 02828 // Then we increase this value each time we meet byval parameter. 02829 // We also increase this value in case of varargs function. 02830 AFI->setArgRegsSaveSize(0); 02831 02832 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { 02833 CCValAssign &VA = ArgLocs[i]; 02834 std::advance(CurOrigArg, Ins[VA.getValNo()].OrigArgIndex - CurArgIdx); 02835 CurArgIdx = Ins[VA.getValNo()].OrigArgIndex; 02836 // Arguments stored in registers. 02837 if (VA.isRegLoc()) { 02838 EVT RegVT = VA.getLocVT(); 02839 02840 if (VA.needsCustom()) { 02841 // f64 and vector types are split up into multiple registers or 02842 // combinations of registers and stack slots. 02843 if (VA.getLocVT() == MVT::v2f64) { 02844 SDValue ArgValue1 = GetF64FormalArgument(VA, ArgLocs[++i], 02845 Chain, DAG, dl); 02846 VA = ArgLocs[++i]; // skip ahead to next loc 02847 SDValue ArgValue2; 02848 if (VA.isMemLoc()) { 02849 int FI = MFI->CreateFixedObject(8, VA.getLocMemOffset(), true); 02850 SDValue FIN = DAG.getFrameIndex(FI, getPointerTy()); 02851 ArgValue2 = DAG.getLoad(MVT::f64, dl, Chain, FIN, 02852 MachinePointerInfo::getFixedStack(FI), 02853 false, false, false, 0); 02854 } else { 02855 ArgValue2 = GetF64FormalArgument(VA, ArgLocs[++i], 02856 Chain, DAG, dl); 02857 } 02858 ArgValue = DAG.getNode(ISD::UNDEF, dl, MVT::v2f64); 02859 ArgValue = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, 02860 ArgValue, ArgValue1, DAG.getIntPtrConstant(0)); 02861 ArgValue = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, 02862 ArgValue, ArgValue2, DAG.getIntPtrConstant(1)); 02863 } else 02864 ArgValue = GetF64FormalArgument(VA, ArgLocs[++i], Chain, DAG, dl); 02865 02866 } else { 02867 const TargetRegisterClass *RC; 02868 02869 if (RegVT == MVT::f32) 02870 RC = &ARM::SPRRegClass; 02871 else if (RegVT == MVT::f64) 02872 RC = &ARM::DPRRegClass; 02873 else if (RegVT == MVT::v2f64) 02874 RC = &ARM::QPRRegClass; 02875 else if (RegVT == MVT::i32) 02876 RC = AFI->isThumb1OnlyFunction() ? 02877 (const TargetRegisterClass*)&ARM::tGPRRegClass : 02878 (const TargetRegisterClass*)&ARM::GPRRegClass; 02879 else 02880 llvm_unreachable("RegVT not supported by FORMAL_ARGUMENTS Lowering"); 02881 02882 // Transform the arguments in physical registers into virtual ones. 02883 unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC); 02884 ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT); 02885 } 02886 02887 // If this is an 8 or 16-bit value, it is really passed promoted 02888 // to 32 bits. Insert an assert[sz]ext to capture this, then 02889 // truncate to the right size. 02890 switch (VA.getLocInfo()) { 02891 default: llvm_unreachable("Unknown loc info!"); 02892 case CCValAssign::Full: break; 02893 case CCValAssign::BCvt: 02894 ArgValue = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), ArgValue); 02895 break; 02896 case CCValAssign::SExt: 02897 ArgValue = DAG.getNode(ISD::AssertSext, dl, RegVT, ArgValue, 02898 DAG.getValueType(VA.getValVT())); 02899 ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue); 02900 break; 02901 case CCValAssign::ZExt: 02902 ArgValue = DAG.getNode(ISD::AssertZext, dl, RegVT, ArgValue, 02903 DAG.getValueType(VA.getValVT())); 02904 ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue); 02905 break; 02906 } 02907 02908 InVals.push_back(ArgValue); 02909 02910 } else { // VA.isRegLoc() 02911 02912 // sanity check 02913 assert(VA.isMemLoc()); 02914 assert(VA.getValVT() != MVT::i64 && "i64 should already be lowered"); 02915 02916 int index = ArgLocs[i].getValNo(); 02917 02918 // Some Ins[] entries become multiple ArgLoc[] entries. 02919 // Process them only once. 02920 if (index != lastInsIndex) 02921 { 02922 ISD::ArgFlagsTy Flags = Ins[index].Flags; 02923 // FIXME: For now, all byval parameter objects are marked mutable. 02924 // This can be changed with more analysis. 02925 // In case of tail call optimization mark all arguments mutable. 02926 // Since they could be overwritten by lowering of arguments in case of 02927 // a tail call. 02928 if (Flags.isByVal()) { 02929 unsigned CurByValIndex = CCInfo.getInRegsParamsProceed(); 02930 int FrameIndex = StoreByValRegs( 02931 CCInfo, DAG, dl, Chain, CurOrigArg, 02932 CurByValIndex, 02933 Ins[VA.getValNo()].PartOffset, 02934 VA.getLocMemOffset(), 02935 Flags.getByValSize(), 02936 true /*force mutable frames*/); 02937 InVals.push_back(DAG.getFrameIndex(FrameIndex, getPointerTy())); 02938 CCInfo.nextInRegsParam(); 02939 } else { 02940 unsigned FIOffset = VA.getLocMemOffset() + 02941 AFI->getStoredByValParamsPadding(); 02942 int FI = MFI->CreateFixedObject(VA.getLocVT().getSizeInBits()/8, 02943 FIOffset, true); 02944 02945 // Create load nodes to retrieve arguments from the stack. 02946 SDValue FIN = DAG.getFrameIndex(FI, getPointerTy()); 02947 InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN, 02948 MachinePointerInfo::getFixedStack(FI), 02949 false, false, false, 0)); 02950 } 02951 lastInsIndex = index; 02952 } 02953 } 02954 } 02955 02956 // varargs 02957 if (isVarArg) 02958 VarArgStyleRegisters(CCInfo, DAG, dl, Chain, 02959 CCInfo.getNextStackOffset()); 02960 02961 return Chain; 02962 } 02963 02964 /// isFloatingPointZero - Return true if this is +0.0. 02965 static bool isFloatingPointZero(SDValue Op) { 02966 if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op)) 02967 return CFP->getValueAPF().isPosZero(); 02968 else if (ISD::isEXTLoad(Op.getNode()) || ISD::isNON_EXTLoad(Op.getNode())) { 02969 // Maybe this has already been legalized into the constant pool? 02970 if (Op.getOperand(1).getOpcode() == ARMISD::Wrapper) { 02971 SDValue WrapperOp = Op.getOperand(1).getOperand(0); 02972 if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(WrapperOp)) 02973 if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal())) 02974 return CFP->getValueAPF().isPosZero(); 02975 } 02976 } 02977 return false; 02978 } 02979 02980 /// Returns appropriate ARM CMP (cmp) and corresponding condition code for 02981 /// the given operands. 02982 SDValue 02983 ARMTargetLowering::getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC, 02984 SDValue &ARMcc, SelectionDAG &DAG, 02985 DebugLoc dl) const { 02986 if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS.getNode())) { 02987 unsigned C = RHSC->getZExtValue(); 02988 if (!isLegalICmpImmediate(C)) { 02989 // Constant does not fit, try adjusting it by one? 02990 switch (CC) { 02991 default: break; 02992 case ISD::SETLT: 02993 case ISD::SETGE: 02994 if (C != 0x80000000 && isLegalICmpImmediate(C-1)) { 02995 CC = (CC == ISD::SETLT) ? ISD::SETLE : ISD::SETGT; 02996 RHS = DAG.getConstant(C-1, MVT::i32); 02997 } 02998 break; 02999 case ISD::SETULT: 03000 case ISD::SETUGE: 03001 if (C != 0 && isLegalICmpImmediate(C-1)) { 03002 CC = (CC == ISD::SETULT) ? ISD::SETULE : ISD::SETUGT; 03003 RHS = DAG.getConstant(C-1, MVT::i32); 03004 } 03005 break; 03006 case ISD::SETLE: 03007 case ISD::SETGT: 03008 if (C != 0x7fffffff && isLegalICmpImmediate(C+1)) { 03009 CC = (CC == ISD::SETLE) ? ISD::SETLT : ISD::SETGE; 03010 RHS = DAG.getConstant(C+1, MVT::i32); 03011 } 03012 break; 03013 case ISD::SETULE: 03014 case ISD::SETUGT: 03015 if (C != 0xffffffff && isLegalICmpImmediate(C+1)) { 03016 CC = (CC == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE; 03017 RHS = DAG.getConstant(C+1, MVT::i32); 03018 } 03019 break; 03020 } 03021 } 03022 } 03023 03024 ARMCC::CondCodes CondCode = IntCCToARMCC(CC); 03025 ARMISD::NodeType CompareType; 03026 switch (CondCode) { 03027 default: 03028 CompareType = ARMISD::CMP; 03029 break; 03030 case ARMCC::EQ: 03031 case ARMCC::NE: 03032 // Uses only Z Flag 03033 CompareType = ARMISD::CMPZ; 03034 break; 03035 } 03036 ARMcc = DAG.getConstant(CondCode, MVT::i32); 03037 return DAG.getNode(CompareType, dl, MVT::Glue, LHS, RHS); 03038 } 03039 03040 /// Returns a appropriate VFP CMP (fcmp{s|d}+fmstat) for the given operands. 03041 SDValue 03042 ARMTargetLowering::getVFPCmp(SDValue LHS, SDValue RHS, SelectionDAG &DAG, 03043 DebugLoc dl) const { 03044 SDValue Cmp; 03045 if (!isFloatingPointZero(RHS)) 03046 Cmp = DAG.getNode(ARMISD::CMPFP, dl, MVT::Glue, LHS, RHS); 03047 else 03048 Cmp = DAG.getNode(ARMISD::CMPFPw0, dl, MVT::Glue, LHS); 03049 return DAG.getNode(ARMISD::FMSTAT, dl, MVT::Glue, Cmp); 03050 } 03051 03052 /// duplicateCmp - Glue values can have only one use, so this function 03053 /// duplicates a comparison node. 03054 SDValue 03055 ARMTargetLowering::duplicateCmp(SDValue Cmp, SelectionDAG &DAG) const { 03056 unsigned Opc = Cmp.getOpcode(); 03057 DebugLoc DL = Cmp.getDebugLoc(); 03058 if (Opc == ARMISD::CMP || Opc == ARMISD::CMPZ) 03059 return DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0),Cmp.getOperand(1)); 03060 03061 assert(Opc == ARMISD::FMSTAT && "unexpected comparison operation"); 03062 Cmp = Cmp.getOperand(0); 03063 Opc = Cmp.getOpcode(); 03064 if (Opc == ARMISD::CMPFP) 03065 Cmp = DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0),Cmp.getOperand(1)); 03066 else { 03067 assert(Opc == ARMISD::CMPFPw0 && "unexpected operand of FMSTAT"); 03068 Cmp = DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0)); 03069 } 03070 return DAG.getNode(ARMISD::FMSTAT, DL, MVT::Glue, Cmp); 03071 } 03072 03073 SDValue ARMTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { 03074 SDValue Cond = Op.getOperand(0); 03075 SDValue SelectTrue = Op.getOperand(1); 03076 SDValue SelectFalse = Op.getOperand(2); 03077 DebugLoc dl = Op.getDebugLoc(); 03078 03079 // Convert: 03080 // 03081 // (select (cmov 1, 0, cond), t, f) -> (cmov t, f, cond) 03082 // (select (cmov 0, 1, cond), t, f) -> (cmov f, t, cond) 03083 // 03084 if (Cond.getOpcode() == ARMISD::CMOV && Cond.hasOneUse()) { 03085 const ConstantSDNode *CMOVTrue = 03086 dyn_cast<ConstantSDNode>(Cond.getOperand(0)); 03087 const ConstantSDNode *CMOVFalse = 03088 dyn_cast<ConstantSDNode>(Cond.getOperand(1)); 03089 03090 if (CMOVTrue && CMOVFalse) { 03091 unsigned CMOVTrueVal = CMOVTrue->getZExtValue(); 03092 unsigned CMOVFalseVal = CMOVFalse->getZExtValue(); 03093 03094 SDValue True; 03095 SDValue False; 03096 if (CMOVTrueVal == 1 && CMOVFalseVal == 0) { 03097 True = SelectTrue; 03098 False = SelectFalse; 03099 } else if (CMOVTrueVal == 0 && CMOVFalseVal == 1) { 03100 True = SelectFalse; 03101 False = SelectTrue; 03102 } 03103 03104 if (True.getNode() && False.getNode()) { 03105 EVT VT = Op.getValueType(); 03106 SDValue ARMcc = Cond.getOperand(2); 03107 SDValue CCR = Cond.getOperand(3); 03108 SDValue Cmp = duplicateCmp(Cond.getOperand(4), DAG); 03109 assert(True.getValueType() == VT); 03110 return DAG.getNode(ARMISD::CMOV, dl, VT, True, False, ARMcc, CCR, Cmp); 03111 } 03112 } 03113 } 03114 03115 // ARM's BooleanContents value is UndefinedBooleanContent. Mask out the 03116 // undefined bits before doing a full-word comparison with zero. 03117 Cond = DAG.getNode(ISD::AND, dl, Cond.getValueType(), Cond, 03118 DAG.getConstant(1, Cond.getValueType())); 03119 03120 return DAG.getSelectCC(dl, Cond, 03121 DAG.getConstant(0, Cond.getValueType()), 03122 SelectTrue, SelectFalse, ISD::SETNE); 03123 } 03124 03125 SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { 03126 EVT VT = Op.getValueType(); 03127 SDValue LHS = Op.getOperand(0); 03128 SDValue RHS = Op.getOperand(1); 03129 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get(); 03130 SDValue TrueVal = Op.getOperand(2); 03131 SDValue FalseVal = Op.getOperand(3); 03132 DebugLoc dl = Op.getDebugLoc(); 03133 03134 if (LHS.getValueType() == MVT::i32) { 03135 SDValue ARMcc; 03136 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); 03137 SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl); 03138 return DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMcc, CCR,Cmp); 03139 } 03140 03141 ARMCC::CondCodes CondCode, CondCode2; 03142 FPCCToARMCC(CC, CondCode, CondCode2); 03143 03144 SDValue ARMcc = DAG.getConstant(CondCode, MVT::i32); 03145 SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl); 03146 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); 03147 SDValue Result = DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, 03148 ARMcc, CCR, Cmp); 03149 if (CondCode2 != ARMCC::AL) { 03150 SDValue ARMcc2 = DAG.getConstant(CondCode2, MVT::i32); 03151 // FIXME: Needs another CMP because flag can have but one use. 03152 SDValue Cmp2 = getVFPCmp(LHS, RHS, DAG, dl); 03153 Result = DAG.getNode(ARMISD::CMOV, dl, VT, 03154 Result, TrueVal, ARMcc2, CCR, Cmp2); 03155 } 03156 return Result; 03157 } 03158 03159 /// canChangeToInt - Given the fp compare operand, return true if it is suitable 03160 /// to morph to an integer compare sequence. 03161 static bool canChangeToInt(SDValue Op, bool &SeenZero, 03162 const ARMSubtarget *Subtarget) { 03163 SDNode *N = Op.getNode(); 03164 if (!N->hasOneUse()) 03165 // Otherwise it requires moving the value from fp to integer registers. 03166 return false; 03167 if (!N->getNumValues()) 03168 return false; 03169 EVT VT = Op.getValueType(); 03170 if (VT != MVT::f32 && !Subtarget->isFPBrccSlow()) 03171 // f32 case is generally profitable. f64 case only makes sense when vcmpe + 03172 // vmrs are very slow, e.g. cortex-a8. 03173 return false; 03174 03175 if (isFloatingPointZero(Op)) { 03176 SeenZero = true; 03177 return true; 03178 } 03179 return ISD::isNormalLoad(N); 03180 } 03181 03182 static SDValue bitcastf32Toi32(SDValue Op, SelectionDAG &DAG) { 03183 if (isFloatingPointZero(Op)) 03184 return DAG.getConstant(0, MVT::i32); 03185 03186 if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Op)) 03187 return DAG.getLoad(MVT::i32, Op.getDebugLoc(), 03188 Ld->getChain(), Ld->getBasePtr(), Ld->getPointerInfo(), 03189 Ld->isVolatile(), Ld->isNonTemporal(), 03190 Ld->isInvariant(), Ld->getAlignment()); 03191 03192 llvm_unreachable("Unknown VFP cmp argument!"); 03193 } 03194 03195 static void expandf64Toi32(SDValue Op, SelectionDAG &DAG, 03196 SDValue &RetVal1, SDValue &RetVal2) { 03197 if (isFloatingPointZero(Op)) { 03198 RetVal1 = DAG.getConstant(0, MVT::i32); 03199 RetVal2 = DAG.getConstant(0, MVT::i32); 03200 return; 03201 } 03202 03203 if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Op)) { 03204 SDValue Ptr = Ld->getBasePtr(); 03205 RetVal1 = DAG.getLoad(MVT::i32, Op.getDebugLoc(), 03206 Ld->getChain(), Ptr, 03207 Ld->getPointerInfo(), 03208 Ld->isVolatile(), Ld->isNonTemporal(), 03209 Ld->isInvariant(), Ld->getAlignment()); 03210 03211 EVT PtrType = Ptr.getValueType(); 03212 unsigned NewAlign = MinAlign(Ld->getAlignment(), 4); 03213 SDValue NewPtr = DAG.getNode(ISD::ADD, Op.getDebugLoc(), 03214 PtrType, Ptr, DAG.getConstant(4, PtrType)); 03215 RetVal2 = DAG.getLoad(MVT::i32, Op.getDebugLoc(), 03216 Ld->getChain(), NewPtr, 03217 Ld->getPointerInfo().getWithOffset(4), 03218 Ld->isVolatile(), Ld->isNonTemporal(), 03219 Ld->isInvariant(), NewAlign); 03220 return; 03221 } 03222 03223 llvm_unreachable("Unknown VFP cmp argument!"); 03224 } 03225 03226 /// OptimizeVFPBrcond - With -enable-unsafe-fp-math, it's legal to optimize some 03227 /// f32 and even f64 comparisons to integer ones. 03228 SDValue 03229 ARMTargetLowering::OptimizeVFPBrcond(SDValue Op, SelectionDAG &DAG) const { 03230 SDValue Chain = Op.getOperand(0); 03231 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get(); 03232 SDValue LHS = Op.getOperand(2); 03233 SDValue RHS = Op.getOperand(3); 03234 SDValue Dest = Op.getOperand(4); 03235 DebugLoc dl = Op.getDebugLoc(); 03236 03237 bool LHSSeenZero = false; 03238 bool LHSOk = canChangeToInt(LHS, LHSSeenZero, Subtarget); 03239 bool RHSSeenZero = false; 03240 bool RHSOk = canChangeToInt(RHS, RHSSeenZero, Subtarget); 03241 if (LHSOk && RHSOk && (LHSSeenZero || RHSSeenZero)) { 03242 // If unsafe fp math optimization is enabled and there are no other uses of 03243 // the CMP operands, and the condition code is EQ or NE, we can optimize it 03244 // to an integer comparison. 03245 if (CC == ISD::SETOEQ) 03246 CC = ISD::SETEQ; 03247 else if (CC == ISD::SETUNE) 03248 CC = ISD::SETNE; 03249 03250 SDValue Mask = DAG.getConstant(0x7fffffff, MVT::i32); 03251 SDValue ARMcc; 03252 if (LHS.getValueType() == MVT::f32) { 03253 LHS = DAG.getNode(ISD::AND, dl, MVT::i32, 03254 bitcastf32Toi32(LHS, DAG), Mask); 03255 RHS = DAG.getNode(ISD::AND, dl, MVT::i32, 03256 bitcastf32Toi32(RHS, DAG), Mask); 03257 SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl); 03258 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); 03259 return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other, 03260 Chain, Dest, ARMcc, CCR, Cmp); 03261 } 03262 03263 SDValue LHS1, LHS2; 03264 SDValue RHS1, RHS2; 03265 expandf64Toi32(LHS, DAG, LHS1, LHS2); 03266 expandf64Toi32(RHS, DAG, RHS1, RHS2); 03267 LHS2 = DAG.getNode(ISD::AND, dl, MVT::i32, LHS2, Mask); 03268 RHS2 = DAG.getNode(ISD::AND, dl, MVT::i32, RHS2, Mask); 03269 ARMCC::CondCodes CondCode = IntCCToARMCC(CC); 03270 ARMcc = DAG.getConstant(CondCode, MVT::i32); 03271 SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Glue); 03272 SDValue Ops[] = { Chain, ARMcc, LHS1, LHS2, RHS1, RHS2, Dest }; 03273 return DAG.getNode(ARMISD::BCC_i64, dl, VTList, Ops, 7); 03274 } 03275 03276 return SDValue(); 03277 } 03278 03279 SDValue ARMTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const { 03280 SDValue Chain = Op.getOperand(0); 03281 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get(); 03282 SDValue LHS = Op.getOperand(2); 03283 SDValue RHS = Op.getOperand(3); 03284 SDValue Dest = Op.getOperand(4); 03285 DebugLoc dl = Op.getDebugLoc(); 03286 03287 if (LHS.getValueType() == MVT::i32) { 03288 SDValue ARMcc; 03289 SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl); 03290 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); 03291 return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other, 03292 Chain, Dest, ARMcc, CCR, Cmp); 03293 } 03294 03295 assert(LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64); 03296 03297 if (getTargetMachine().Options.UnsafeFPMath && 03298 (CC == ISD::SETEQ || CC == ISD::SETOEQ || 03299 CC == ISD::SETNE || CC == ISD::SETUNE)) { 03300 SDValue Result = OptimizeVFPBrcond(Op, DAG); 03301 if (Result.getNode()) 03302 return Result; 03303 } 03304 03305 ARMCC::CondCodes CondCode, CondCode2; 03306 FPCCToARMCC(CC, CondCode, CondCode2); 03307 03308 SDValue ARMcc = DAG.getConstant(CondCode, MVT::i32); 03309 SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl); 03310 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); 03311 SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Glue); 03312 SDValue Ops[] = { Chain, Dest, ARMcc, CCR, Cmp }; 03313 SDValue Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops, 5); 03314 if (CondCode2 != ARMCC::AL) { 03315 ARMcc = DAG.getConstant(CondCode2, MVT::i32); 03316 SDValue Ops[] = { Res, Dest, ARMcc, CCR, Res.getValue(1) }; 03317 Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops, 5); 03318 } 03319 return Res; 03320 } 03321 03322 SDValue ARMTargetLowering::LowerBR_JT(SDValue Op, SelectionDAG &DAG) const { 03323 SDValue Chain = Op.getOperand(0); 03324 SDValue Table = Op.getOperand(1); 03325 SDValue Index = Op.getOperand(2); 03326 DebugLoc dl = Op.getDebugLoc(); 03327 03328 EVT PTy = getPointerTy(); 03329 JumpTableSDNode *JT = cast<JumpTableSDNode>(Table); 03330 ARMFunctionInfo *AFI = DAG.getMachineFunction().getInfo<ARMFunctionInfo>(); 03331 SDValue UId = DAG.getConstant(AFI->createJumpTableUId(), PTy); 03332 SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PTy); 03333 Table = DAG.getNode(ARMISD::WrapperJT, dl, MVT::i32, JTI, UId); 03334 Index = DAG.getNode(ISD::MUL, dl, PTy, Index, DAG.getConstant(4, PTy)); 03335 SDValue Addr = DAG.getNode(ISD::ADD, dl, PTy, Index, Table); 03336 if (Subtarget->isThumb2()) { 03337 // Thumb2 uses a two-level jump. That is, it jumps into the jump table 03338 // which does another jump to the destination. This also makes it easier 03339 // to translate it to TBB / TBH later. 03340 // FIXME: This might not work if the function is extremely large. 03341 return DAG.getNode(ARMISD::BR2_JT, dl, MVT::Other, Chain, 03342 Addr, Op.getOperand(2), JTI, UId); 03343 } 03344 if (getTargetMachine().getRelocationModel() == Reloc::PIC_) { 03345 Addr = DAG.getLoad((EVT)MVT::i32, dl, Chain, Addr, 03346 MachinePointerInfo::getJumpTable(), 03347 false, false, false, 0); 03348 Chain = Addr.getValue(1); 03349 Addr = DAG.getNode(ISD::ADD, dl, PTy, Addr, Table); 03350 return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI, UId); 03351 } else { 03352 Addr = DAG.getLoad(PTy, dl, Chain, Addr, 03353 MachinePointerInfo::getJumpTable(), 03354 false, false, false, 0); 03355 Chain = Addr.getValue(1); 03356 return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI, UId); 03357 } 03358 } 03359 03360 static SDValue LowerVectorFP_TO_INT(SDValue Op, SelectionDAG &DAG) { 03361 EVT VT = Op.getValueType(); 03362 DebugLoc dl = Op.getDebugLoc(); 03363 03364 if (Op.getValueType().getVectorElementType() == MVT::i32) { 03365 if (Op.getOperand(0).getValueType().getVectorElementType() == MVT::f32) 03366 return Op; 03367 return DAG.UnrollVectorOp(Op.getNode()); 03368 } 03369 03370 assert(Op.getOperand(0).getValueType() == MVT::v4f32 && 03371 "Invalid type for custom lowering!"); 03372 if (VT != MVT::v4i16) 03373 return DAG.UnrollVectorOp(Op.getNode()); 03374 03375 Op = DAG.getNode(Op.getOpcode(), dl, MVT::v4i32, Op.getOperand(0)); 03376 return DAG.getNode(ISD::TRUNCATE, dl, VT, Op); 03377 } 03378 03379 static SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) { 03380 EVT VT = Op.getValueType(); 03381 if (VT.isVector()) 03382 return LowerVectorFP_TO_INT(Op, DAG); 03383 03384 DebugLoc dl = Op.getDebugLoc(); 03385 unsigned Opc; 03386 03387 switch (Op.getOpcode()) { 03388 default: llvm_unreachable("Invalid opcode!"); 03389 case ISD::FP_TO_SINT: 03390 Opc = ARMISD::FTOSI; 03391 break; 03392 case ISD::FP_TO_UINT: 03393 Opc = ARMISD::FTOUI; 03394 break; 03395 } 03396 Op = DAG.getNode(Opc, dl, MVT::f32, Op.getOperand(0)); 03397 return DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op); 03398 } 03399 03400 static SDValue LowerVectorINT_TO_FP(SDValue Op, SelectionDAG &DAG) { 03401 EVT VT = Op.getValueType(); 03402 DebugLoc dl = Op.getDebugLoc(); 03403 03404 if (Op.getOperand(0).getValueType().getVectorElementType() == MVT::i32) { 03405 if (VT.getVectorElementType() == MVT::f32) 03406 return Op; 03407 return DAG.UnrollVectorOp(Op.getNode()); 03408 } 03409 03410 assert(Op.getOperand(0).getValueType() == MVT::v4i16 && 03411 "Invalid type for custom lowering!"); 03412 if (VT != MVT::v4f32) 03413 return DAG.UnrollVectorOp(Op.getNode()); 03414 03415 unsigned CastOpc; 03416 unsigned Opc; 03417 switch (Op.getOpcode()) { 03418 default: llvm_unreachable("Invalid opcode!"); 03419 case ISD::SINT_TO_FP: 03420 CastOpc = ISD::SIGN_EXTEND; 03421 Opc = ISD::SINT_TO_FP; 03422 break; 03423 case ISD::UINT_TO_FP: 03424 CastOpc = ISD::ZERO_EXTEND; 03425 Opc = ISD::UINT_TO_FP; 03426 break; 03427 } 03428 03429 Op = DAG.getNode(CastOpc, dl, MVT::v4i32, Op.getOperand(0)); 03430 return DAG.getNode(Opc, dl, VT, Op); 03431 } 03432 03433 static SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) { 03434 EVT VT = Op.getValueType(); 03435 if (VT.isVector()) 03436 return LowerVectorINT_TO_FP(Op, DAG); 03437 03438 DebugLoc dl = Op.getDebugLoc(); 03439 unsigned Opc; 03440 03441 switch (Op.getOpcode()) { 03442 default: llvm_unreachable("Invalid opcode!"); 03443 case ISD::SINT_TO_FP: 03444 Opc = ARMISD::SITOF; 03445 break; 03446 case ISD::UINT_TO_FP: 03447 Opc = ARMISD::UITOF; 03448 break; 03449 } 03450 03451 Op = DAG.getNode(ISD::BITCAST, dl, MVT::f32, Op.getOperand(0)); 03452 return DAG.getNode(Opc, dl, VT, Op); 03453 } 03454 03455 SDValue ARMTargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const { 03456 // Implement fcopysign with a fabs and a conditional fneg. 03457 SDValue Tmp0 = Op.getOperand(0); 03458 SDValue Tmp1 = Op.getOperand(1); 03459 DebugLoc dl = Op.getDebugLoc(); 03460 EVT VT = Op.getValueType(); 03461 EVT SrcVT = Tmp1.getValueType(); 03462 bool InGPR = Tmp0.getOpcode() == ISD::BITCAST || 03463 Tmp0.getOpcode() == ARMISD::VMOVDRR; 03464 bool UseNEON = !InGPR && Subtarget->hasNEON(); 03465 03466 if (UseNEON) { 03467 // Use VBSL to copy the sign bit. 03468 unsigned EncodedVal = ARM_AM::createNEONModImm(0x6, 0x80); 03469 SDValue Mask = DAG.getNode(ARMISD::VMOVIMM, dl, MVT::v2i32, 03470 DAG.getTargetConstant(EncodedVal, MVT::i32)); 03471 EVT OpVT = (VT == MVT::f32) ? MVT::v2i32 : MVT::v1i64; 03472 if (VT == MVT::f64) 03473 Mask = DAG.getNode(ARMISD::VSHL, dl, OpVT, 03474 DAG.getNode(ISD::BITCAST, dl, OpVT, Mask), 03475 DAG.getConstant(32, MVT::i32)); 03476 else /*if (VT == MVT::f32)*/ 03477 Tmp0 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2f32, Tmp0); 03478 if (SrcVT == MVT::f32) { 03479 Tmp1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2f32, Tmp1); 03480 if (VT == MVT::f64) 03481 Tmp1 = DAG.getNode(ARMISD::VSHL, dl, OpVT, 03482 DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp1), 03483 DAG.getConstant(32, MVT::i32)); 03484 } else if (VT == MVT::f32) 03485 Tmp1 = DAG.getNode(ARMISD::VSHRu, dl, MVT::v1i64, 03486 DAG.getNode(ISD::BITCAST, dl, MVT::v1i64, Tmp1), 03487 DAG.getConstant(32, MVT::i32)); 03488 Tmp0 = DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp0); 03489 Tmp1 = DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp1); 03490 03491 SDValue AllOnes = DAG.getTargetConstant(ARM_AM::createNEONModImm(0xe, 0xff), 03492 MVT::i32); 03493 AllOnes = DAG.getNode(ARMISD::VMOVIMM, dl, MVT::v8i8, AllOnes); 03494 SDValue MaskNot = DAG.getNode(ISD::XOR, dl, OpVT, Mask, 03495 DAG.getNode(ISD::BITCAST, dl, OpVT, AllOnes)); 03496 03497 SDValue Res = DAG.getNode(ISD::OR, dl, OpVT, 03498 DAG.getNode(ISD::AND, dl, OpVT, Tmp1, Mask), 03499 DAG.getNode(ISD::AND, dl, OpVT, Tmp0, MaskNot)); 03500 if (VT == MVT::f32) { 03501 Res = DAG.getNode(ISD::BITCAST, dl, MVT::v2f32, Res); 03502 Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f32, Res, 03503 DAG.getConstant(0, MVT::i32)); 03504 } else { 03505 Res = DAG.getNode(ISD::BITCAST, dl, MVT::f64, Res); 03506 } 03507 03508 return Res; 03509 } 03510 03511 // Bitcast operand 1 to i32. 03512 if (SrcVT == MVT::f64) 03513 Tmp1 = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32), 03514 &Tmp1, 1).getValue(1); 03515 Tmp1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Tmp1); 03516 03517 // Or in the signbit with integer operations. 03518 SDValue Mask1 = DAG.getConstant(0x80000000, MVT::i32); 03519 SDValue Mask2 = DAG.getConstant(0x7fffffff, MVT::i32); 03520 Tmp1 = DAG.getNode(ISD::AND, dl, MVT::i32, Tmp1, Mask1); 03521 if (VT == MVT::f32) { 03522 Tmp0 = DAG.getNode(ISD::AND, dl, MVT::i32, 03523 DAG.getNode(ISD::BITCAST, dl, MVT::i32, Tmp0), Mask2); 03524 return DAG.getNode(ISD::BITCAST, dl, MVT::f32, 03525 DAG.getNode(ISD::OR, dl, MVT::i32, Tmp0, Tmp1)); 03526 } 03527 03528 // f64: Or the high part with signbit and then combine two parts. 03529 Tmp0 = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32), 03530 &Tmp0, 1); 03531 SDValue Lo = Tmp0.getValue(0); 03532 SDValue Hi = DAG.getNode(ISD::AND, dl, MVT::i32, Tmp0.getValue(1), Mask2); 03533 Hi = DAG.getNode(ISD::OR, dl, MVT::i32, Hi, Tmp1); 03534 return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi); 03535 } 03536 03537 SDValue ARMTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const{ 03538 MachineFunction &MF = DAG.getMachineFunction(); 03539 MachineFrameInfo *MFI = MF.getFrameInfo(); 03540 MFI->setReturnAddressIsTaken(true); 03541 03542 EVT VT = Op.getValueType(); 03543 DebugLoc dl = Op.getDebugLoc(); 03544 unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); 03545 if (Depth) { 03546 SDValue FrameAddr = LowerFRAMEADDR(Op, DAG); 03547 SDValue Offset = DAG.getConstant(4, MVT::i32); 03548 return DAG.getLoad(VT, dl, DAG.getEntryNode(), 03549 DAG.getNode(ISD::ADD, dl, VT, FrameAddr, Offset), 03550 MachinePointerInfo(), false, false, false, 0); 03551 } 03552 03553 // Return LR, which contains the return address. Mark it an implicit live-in. 03554 unsigned Reg = MF.addLiveIn(ARM::LR, getRegClassFor(MVT::i32)); 03555 return DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg, VT); 03556 } 03557 03558 SDValue ARMTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const { 03559 MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); 03560 MFI->setFrameAddressIsTaken(true); 03561 03562 EVT VT = Op.getValueType(); 03563 DebugLoc dl = Op.getDebugLoc(); // FIXME probably not meaningful 03564 unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); 03565 unsigned FrameReg = (Subtarget->isThumb() || Subtarget->isTargetDarwin()) 03566 ? ARM::R7 : ARM::R11; 03567 SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, VT); 03568 while (Depth--) 03569 FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr, 03570 MachinePointerInfo(), 03571 false, false, false, 0); 03572 return FrameAddr; 03573 } 03574 03575 /// Custom Expand long vector extensions, where size(DestVec) > 2*size(SrcVec), 03576 /// and size(DestVec) > 128-bits. 03577 /// This is achieved by doing the one extension from the SrcVec, splitting the 03578 /// result, extending these parts, and then concatenating these into the 03579 /// destination. 03580 static SDValue ExpandVectorExtension(SDNode *N, SelectionDAG &DAG) { 03581 SDValue Op = N->getOperand(0); 03582 EVT SrcVT = Op.getValueType(); 03583 EVT DestVT = N->getValueType(0); 03584 03585 assert(DestVT.getSizeInBits() > 128 && 03586 "Custom sext/zext expansion needs >128-bit vector."); 03587 // If this is a normal length extension, use the default expansion. 03588 if (SrcVT.getSizeInBits()*4 != DestVT.getSizeInBits() && 03589 SrcVT.getSizeInBits()*8 != DestVT.getSizeInBits()) 03590 return SDValue(); 03591 03592 DebugLoc dl = N->getDebugLoc(); 03593 unsigned SrcEltSize = SrcVT.getVectorElementType().getSizeInBits(); 03594 unsigned DestEltSize = DestVT.getVectorElementType().getSizeInBits(); 03595 unsigned NumElts = SrcVT.getVectorNumElements(); 03596 LLVMContext &Ctx = *DAG.getContext(); 03597 SDValue Mid, SplitLo, SplitHi, ExtLo, ExtHi; 03598 03599 EVT MidVT = EVT::getVectorVT(Ctx, EVT::getIntegerVT(Ctx, SrcEltSize*2), 03600 NumElts); 03601 EVT SplitVT = EVT::getVectorVT(Ctx, EVT::getIntegerVT(Ctx, SrcEltSize*2), 03602 NumElts/2); 03603 EVT ExtVT = EVT::getVectorVT(Ctx, EVT::getIntegerVT(Ctx, DestEltSize), 03604 NumElts/2); 03605 03606 Mid = DAG.getNode(N->getOpcode(), dl, MidVT, Op); 03607 SplitLo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, SplitVT, Mid, 03608 DAG.getIntPtrConstant(0)); 03609 SplitHi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, SplitVT, Mid, 03610 DAG.getIntPtrConstant(NumElts/2)); 03611 ExtLo = DAG.getNode(N->getOpcode(), dl, ExtVT, SplitLo); 03612 ExtHi = DAG.getNode(N->getOpcode(), dl, ExtVT, SplitHi); 03613 return DAG.getNode(ISD::CONCAT_VECTORS, dl, DestVT, ExtLo, ExtHi); 03614 } 03615 03616 /// ExpandBITCAST - If the target supports VFP, this function is called to 03617 /// expand a bit convert where either the source or destination type is i64 to 03618 /// use a VMOVDRR or VMOVRRD node. This should not be done when the non-i64 03619 /// operand type is illegal (e.g., v2f32 for a target that doesn't support 03620 /// vectors), since the legalizer won't know what to do with that. 03621 static SDValue ExpandBITCAST(SDNode *N, SelectionDAG &DAG) { 03622 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 03623 DebugLoc dl = N->getDebugLoc(); 03624 SDValue Op = N->getOperand(0); 03625 03626 // This function is only supposed to be called for i64 types, either as the 03627 // source or destination of the bit convert. 03628 EVT SrcVT = Op.getValueType(); 03629 EVT DstVT = N->getValueType(0); 03630 assert((SrcVT == MVT::i64 || DstVT == MVT::i64) && 03631 "ExpandBITCAST called for non-i64 type"); 03632 03633 // Turn i64->f64 into VMOVDRR. 03634 if (SrcVT == MVT::i64 && TLI.isTypeLegal(DstVT)) { 03635 SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op, 03636 DAG.getConstant(0, MVT::i32)); 03637 SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op, 03638 DAG.getConstant(1, MVT::i32)); 03639 return DAG.getNode(ISD::BITCAST, dl, DstVT, 03640 DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi)); 03641 } 03642 03643 // Turn f64->i64 into VMOVRRD. 03644 if (DstVT == MVT::i64 && TLI.isTypeLegal(SrcVT)) { 03645 SDValue Cvt = DAG.getNode(ARMISD::VMOVRRD, dl, 03646 DAG.getVTList(MVT::i32, MVT::i32), &Op, 1); 03647 // Merge the pieces into a single i64 value. 03648 return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Cvt, Cvt.getValue(1)); 03649 } 03650 03651 return SDValue(); 03652 } 03653 03654 /// getZeroVector - Returns a vector of specified type with all zero elements. 03655 /// Zero vectors are used to represent vector negation and in those cases 03656 /// will be implemented with the NEON VNEG instruction. However, VNEG does 03657 /// not support i64 elements, so sometimes the zero vectors will need to be 03658 /// explicitly constructed. Regardless, use a canonical VMOV to create the 03659 /// zero vector. 03660 static SDValue getZeroVector(EVT VT, SelectionDAG &DAG, DebugLoc dl) { 03661 assert(VT.isVector() && "Expected a vector type"); 03662 // The canonical modified immediate encoding of a zero vector is....0! 03663 SDValue EncodedVal = DAG.getTargetConstant(0, MVT::i32); 03664 EVT VmovVT = VT.is128BitVector() ? MVT::v4i32 : MVT::v2i32; 03665 SDValue Vmov = DAG.getNode(ARMISD::VMOVIMM, dl, VmovVT, EncodedVal); 03666 return DAG.getNode(ISD::BITCAST, dl, VT, Vmov); 03667 } 03668 03669 /// LowerShiftRightParts - Lower SRA_PARTS, which returns two 03670 /// i32 values and take a 2 x i32 value to shift plus a shift amount. 03671 SDValue ARMTargetLowering::LowerShiftRightParts(SDValue Op, 03672 SelectionDAG &DAG) const { 03673 assert(Op.getNumOperands() == 3 && "Not a double-shift!"); 03674 EVT VT = Op.getValueType(); 03675 unsigned VTBits = VT.getSizeInBits(); 03676 DebugLoc dl = Op.getDebugLoc(); 03677 SDValue ShOpLo = Op.getOperand(0); 03678 SDValue ShOpHi = Op.getOperand(1); 03679 SDValue ShAmt = Op.getOperand(2); 03680 SDValue ARMcc; 03681 unsigned Opc = (Op.getOpcode() == ISD::SRA_PARTS) ? ISD::SRA : ISD::SRL; 03682 03683 assert(Op.getOpcode() == ISD::SRA_PARTS || Op.getOpcode() == ISD::SRL_PARTS); 03684 03685 SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, 03686 DAG.getConstant(VTBits, MVT::i32), ShAmt); 03687 SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, ShAmt); 03688 SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt, 03689 DAG.getConstant(VTBits, MVT::i32)); 03690 SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, RevShAmt); 03691 SDValue FalseVal = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2); 03692 SDValue TrueVal = DAG.getNode(Opc, dl, VT, ShOpHi, ExtraShAmt); 03693 03694 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); 03695 SDValue Cmp = getARMCmp(ExtraShAmt, DAG.getConstant(0, MVT::i32), ISD::SETGE, 03696 ARMcc, DAG, dl); 03697 SDValue Hi = DAG.getNode(Opc, dl, VT, ShOpHi, ShAmt); 03698 SDValue Lo = DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMcc, 03699 CCR, Cmp); 03700 03701 SDValue Ops[2] = { Lo, Hi }; 03702 return DAG.getMergeValues(Ops, 2, dl); 03703 } 03704 03705 /// LowerShiftLeftParts - Lower SHL_PARTS, which returns two 03706 /// i32 values and take a 2 x i32 value to shift plus a shift amount. 03707 SDValue ARMTargetLowering::LowerShiftLeftParts(SDValue Op, 03708 SelectionDAG &DAG) const { 03709 assert(Op.getNumOperands() == 3 && "Not a double-shift!"); 03710 EVT VT = Op.getValueType(); 03711 unsigned VTBits = VT.getSizeInBits(); 03712 DebugLoc dl = Op.getDebugLoc(); 03713 SDValue ShOpLo = Op.getOperand(0); 03714 SDValue ShOpHi = Op.getOperand(1); 03715 SDValue ShAmt = Op.getOperand(2); 03716 SDValue ARMcc; 03717 03718 assert(Op.getOpcode() == ISD::SHL_PARTS); 03719 SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, 03720 DAG.getConstant(VTBits, MVT::i32), ShAmt); 03721 SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, RevShAmt); 03722 SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt, 03723 DAG.getConstant(VTBits, MVT::i32)); 03724 SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, ShAmt); 03725 SDValue Tmp3 = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ExtraShAmt); 03726 03727 SDValue FalseVal = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2); 03728 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); 03729 SDValue Cmp = getARMCmp(ExtraShAmt, DAG.getConstant(0, MVT::i32), ISD::SETGE, 03730 ARMcc, DAG, dl); 03731 SDValue Lo = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ShAmt); 03732 SDValue Hi = DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, Tmp3, ARMcc, 03733 CCR, Cmp); 03734 03735 SDValue Ops[2] = { Lo, Hi }; 03736 return DAG.getMergeValues(Ops, 2, dl); 03737 } 03738 03739 SDValue ARMTargetLowering::LowerFLT_ROUNDS_(SDValue Op, 03740 SelectionDAG &DAG) const { 03741 // The rounding mode is in bits 23:22 of the FPSCR. 03742 // The ARM rounding mode value to FLT_ROUNDS mapping is 0->1, 1->2, 2->3, 3->0 03743 // The formula we use to implement this is (((FPSCR + 1 << 22) >> 22) & 3) 03744 // so that the shift + and get folded into a bitfield extract. 03745 DebugLoc dl = Op.getDebugLoc(); 03746 SDValue FPSCR = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::i32, 03747 DAG.getConstant(Intrinsic::arm_get_fpscr, 03748 MVT::i32)); 03749 SDValue FltRounds = DAG.getNode(ISD::ADD, dl, MVT::i32, FPSCR, 03750 DAG.getConstant(1U << 22, MVT::i32)); 03751 SDValue RMODE = DAG.getNode(ISD::SRL, dl, MVT::i32, FltRounds, 03752 DAG.getConstant(22, MVT::i32)); 03753 return DAG.getNode(ISD::AND, dl, MVT::i32, RMODE, 03754 DAG.getConstant(3, MVT::i32)); 03755 } 03756 03757 static SDValue LowerCTTZ(SDNode *N, SelectionDAG &DAG, 03758 const ARMSubtarget *ST) { 03759 EVT VT = N->getValueType(0); 03760 DebugLoc dl = N->getDebugLoc(); 03761 03762 if (!ST->hasV6T2Ops()) 03763 return SDValue(); 03764 03765 SDValue rbit = DAG.getNode(ARMISD::RBIT, dl, VT, N->getOperand(0)); 03766 return DAG.getNode(ISD::CTLZ, dl, VT, rbit); 03767 } 03768 03769 /// getCTPOP16BitCounts - Returns a v8i8/v16i8 vector containing the bit-count 03770 /// for each 16-bit element from operand, repeated. The basic idea is to 03771 /// leverage vcnt to get the 8-bit counts, gather and add the results. 03772 /// 03773 /// Trace for v4i16: 03774 /// input = [v0 v1 v2 v3 ] (vi 16-bit element) 03775 /// cast: N0 = [w0 w1 w2 w3 w4 w5 w6 w7] (v0 = [w0 w1], wi 8-bit element) 03776 /// vcnt: N1 = [b0 b1 b2 b3 b4 b5 b6 b7] (bi = bit-count of 8-bit element wi) 03777 /// vrev: N2 = [b1 b0 b3 b2 b5 b4 b7 b6] 03778 /// [b0 b1 b2 b3 b4 b5 b6 b7] 03779 /// +[b1 b0 b3 b2 b5 b4 b7 b6] 03780 /// N3=N1+N2 = [k0 k0 k1 k1 k2 k2 k3 k3] (k0 = b0+b1 = bit-count of 16-bit v0, 03781 /// vuzp: = [k0 k1 k2 k3 k0 k1 k2 k3] each ki is 8-bits) 03782 static SDValue getCTPOP16BitCounts(SDNode *N, SelectionDAG &DAG) { 03783 EVT VT = N->getValueType(0); 03784 DebugLoc DL = N->getDebugLoc(); 03785 03786 EVT VT8Bit = VT.is64BitVector() ? MVT::v8i8 : MVT::v16i8; 03787 SDValue N0 = DAG.getNode(ISD::BITCAST, DL, VT8Bit, N->getOperand(0)); 03788 SDValue N1 = DAG.getNode(ISD::CTPOP, DL, VT8Bit, N0); 03789 SDValue N2 = DAG.getNode(ARMISD::VREV16, DL, VT8Bit, N1); 03790 SDValue N3 = DAG.getNode(ISD::ADD, DL, VT8Bit, N1, N2); 03791 return DAG.getNode(ARMISD::VUZP, DL, VT8Bit, N3, N3); 03792 } 03793 03794 /// lowerCTPOP16BitElements - Returns a v4i16/v8i16 vector containing the 03795 /// bit-count for each 16-bit element from the operand. We need slightly 03796 /// different sequencing for v4i16 and v8i16 to stay within NEON's available 03797 /// 64/128-bit registers. 03798 /// 03799 /// Trace for v4i16: 03800 /// input = [v0 v1 v2 v3 ] (vi 16-bit element) 03801 /// v8i8: BitCounts = [k0 k1 k2 k3 k0 k1 k2 k3 ] (ki is the bit-count of vi) 03802 /// v8i16:Extended = [k0 k1 k2 k3 k0 k1 k2 k3 ] 03803 /// v4i16:Extracted = [k0 k1 k2 k3 ] 03804 static SDValue lowerCTPOP16BitElements(SDNode *N, SelectionDAG &DAG) { 03805 EVT VT = N->getValueType(0); 03806 DebugLoc DL = N->getDebugLoc(); 03807 03808 SDValue BitCounts = getCTPOP16BitCounts(N, DAG); 03809 if (VT.is64BitVector()) { 03810 SDValue Extended = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::v8i16, BitCounts); 03811 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v4i16, Extended, 03812 DAG.getIntPtrConstant(0)); 03813 } else { 03814 SDValue Extracted = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v8i8, 03815 BitCounts, DAG.getIntPtrConstant(0)); 03816 return DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::v8i16, Extracted); 03817 } 03818 } 03819 03820 /// lowerCTPOP32BitElements - Returns a v2i32/v4i32 vector containing the 03821 /// bit-count for each 32-bit element from the operand. The idea here is 03822 /// to split the vector into 16-bit elements, leverage the 16-bit count 03823 /// routine, and then combine the results. 03824 /// 03825 /// Trace for v2i32 (v4i32 similar with Extracted/Extended exchanged): 03826 /// input = [v0 v1 ] (vi: 32-bit elements) 03827 /// Bitcast = [w0 w1 w2 w3 ] (wi: 16-bit elements, v0 = [w0 w1]) 03828 /// Counts16 = [k0 k1 k2 k3 ] (ki: 16-bit elements, bit-count of wi) 03829 /// vrev: N0 = [k1 k0 k3 k2 ] 03830 /// [k0 k1 k2 k3 ] 03831 /// N1 =+[k1 k0 k3 k2 ] 03832 /// [k0 k2 k1 k3 ] 03833 /// N2 =+[k1 k3 k0 k2 ] 03834 /// [k0 k2 k1 k3 ] 03835 /// Extended =+[k1 k3 k0 k2 ] 03836 /// [k0 k2 ] 03837 /// Extracted=+[k1 k3 ] 03838 /// 03839 static SDValue lowerCTPOP32BitElements(SDNode *N, SelectionDAG &DAG) { 03840 EVT VT = N->getValueType(0); 03841 DebugLoc DL = N->getDebugLoc(); 03842 03843 EVT VT16Bit = VT.is64BitVector() ? MVT::v4i16 : MVT::v8i16; 03844 03845 SDValue Bitcast = DAG.getNode(ISD::BITCAST, DL, VT16Bit, N->getOperand(0)); 03846 SDValue Counts16 = lowerCTPOP16BitElements(Bitcast.getNode(), DAG); 03847 SDValue N0 = DAG.getNode(ARMISD::VREV32, DL, VT16Bit, Counts16); 03848 SDValue N1 = DAG.getNode(ISD::ADD, DL, VT16Bit, Counts16, N0); 03849 SDValue N2 = DAG.getNode(ARMISD::VUZP, DL, VT16Bit, N1, N1); 03850 03851 if (VT.is64BitVector()) { 03852 SDValue Extended = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::v4i32, N2); 03853 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v2i32, Extended, 03854 DAG.getIntPtrConstant(0)); 03855 } else { 03856 SDValue Extracted = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v4i16, N2, 03857 DAG.getIntPtrConstant(0)); 03858 return DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::v4i32, Extracted); 03859 } 03860 } 03861 03862 static SDValue LowerCTPOP(SDNode *N, SelectionDAG &DAG, 03863 const ARMSubtarget *ST) { 03864 EVT VT = N->getValueType(0); 03865 03866 assert(ST->hasNEON() && "Custom ctpop lowering requires NEON."); 03867 assert((VT == MVT::v2i32 || VT == MVT::v4i32 || 03868 VT == MVT::v4i16 || VT == MVT::v8i16) && 03869 "Unexpected type for custom ctpop lowering"); 03870 03871 if (VT.getVectorElementType() == MVT::i32) 03872 return lowerCTPOP32BitElements(N, DAG); 03873 else 03874 return lowerCTPOP16BitElements(N, DAG); 03875 } 03876 03877 static SDValue LowerShift(SDNode *N, SelectionDAG &DAG, 03878 const ARMSubtarget *ST) { 03879 EVT VT = N->getValueType(0); 03880 DebugLoc dl = N->getDebugLoc(); 03881 03882 if (!VT.isVector()) 03883 return SDValue(); 03884 03885 // Lower vector shifts on NEON to use VSHL. 03886 assert(ST->hasNEON() && "unexpected vector shift"); 03887 03888 // Left shifts translate directly to the vshiftu intrinsic. 03889 if (N->getOpcode() == ISD::SHL) 03890 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, 03891 DAG.getConstant(Intrinsic::arm_neon_vshiftu, MVT::i32), 03892 N->getOperand(0), N->getOperand(1)); 03893 03894 assert((N->getOpcode() == ISD::SRA || 03895 N->getOpcode() == ISD::SRL) && "unexpected vector shift opcode"); 03896 03897 // NEON uses the same intrinsics for both left and right shifts. For 03898 // right shifts, the shift amounts are negative, so negate the vector of 03899 // shift amounts. 03900 EVT ShiftVT = N->getOperand(1).getValueType(); 03901 SDValue NegatedCount = DAG.getNode(ISD::SUB, dl, ShiftVT, 03902 getZeroVector(ShiftVT, DAG, dl), 03903 N->getOperand(1)); 03904 Intrinsic::ID vshiftInt = (N->getOpcode() == ISD::SRA ? 03905 Intrinsic::arm_neon_vshifts : 03906 Intrinsic::arm_neon_vshiftu); 03907 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, 03908 DAG.getConstant(vshiftInt, MVT::i32), 03909 N->getOperand(0), NegatedCount); 03910 } 03911 03912 static SDValue Expand64BitShift(SDNode *N, SelectionDAG &DAG, 03913 const ARMSubtarget *ST) { 03914 EVT VT = N->getValueType(0); 03915 DebugLoc dl = N->getDebugLoc(); 03916 03917 // We can get here for a node like i32 = ISD::SHL i32, i64 03918 if (VT != MVT::i64) 03919 return SDValue(); 03920 03921 assert((N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) && 03922 "Unknown shift to lower!"); 03923 03924 // We only lower SRA, SRL of 1 here, all others use generic lowering. 03925 if (!isa<ConstantSDNode>(N->getOperand(1)) || 03926 cast<ConstantSDNode>(N->getOperand(1))->getZExtValue() != 1) 03927 return SDValue(); 03928 03929 // If we are in thumb mode, we don't have RRX. 03930 if (ST->isThumb1Only()) return SDValue(); 03931 03932 // Okay, we have a 64-bit SRA or SRL of 1. Lower this to an RRX expr. 03933 SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, N->getOperand(0), 03934 DAG.getConstant(0, MVT::i32)); 03935 SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, N->getOperand(0), 03936 DAG.getConstant(1, MVT::i32)); 03937 03938 // First, build a SRA_FLAG/SRL_FLAG op, which shifts the top part by one and 03939 // captures the result into a carry flag. 03940 unsigned Opc = N->getOpcode() == ISD::SRL ? ARMISD::SRL_FLAG:ARMISD::SRA_FLAG; 03941 Hi = DAG.getNode(Opc, dl, DAG.getVTList(MVT::i32, MVT::Glue), &Hi, 1); 03942 03943 // The low part is an ARMISD::RRX operand, which shifts the carry in. 03944 Lo = DAG.getNode(ARMISD::RRX, dl, MVT::i32, Lo, Hi.getValue(1)); 03945 03946 // Merge the pieces into a single i64 value. 03947 return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi); 03948 } 03949 03950 static SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) { 03951 SDValue TmpOp0, TmpOp1; 03952 bool Invert = false; 03953 bool Swap = false; 03954 unsigned Opc = 0; 03955 03956 SDValue Op0 = Op.getOperand(0); 03957 SDValue Op1 = Op.getOperand(1); 03958 SDValue CC = Op.getOperand(2); 03959 EVT VT = Op.getValueType(); 03960 ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get(); 03961 DebugLoc dl = Op.getDebugLoc(); 03962 03963 if (Op.getOperand(1).getValueType().isFloatingPoint()) { 03964 switch (SetCCOpcode) { 03965 default: llvm_unreachable("Illegal FP comparison"); 03966 case ISD::SETUNE: 03967 case ISD::SETNE: Invert = true; // Fallthrough 03968 case ISD::SETOEQ: 03969 case ISD::SETEQ: Opc = ARMISD::VCEQ; break; 03970 case ISD::SETOLT: 03971 case ISD::SETLT: Swap = true; // Fallthrough 03972 case ISD::SETOGT: 03973 case ISD::SETGT: Opc = ARMISD::VCGT; break; 03974 case ISD::SETOLE: 03975 case ISD::SETLE: Swap = true; // Fallthrough 03976 case ISD::SETOGE: 03977 case ISD::SETGE: Opc = ARMISD::VCGE; break; 03978 case ISD::SETUGE: Swap = true; // Fallthrough 03979 case ISD::SETULE: Invert = true; Opc = ARMISD::VCGT; break; 03980 case ISD::SETUGT: Swap = true; // Fallthrough 03981 case ISD::SETULT: Invert = true; Opc = ARMISD::VCGE; break; 03982 case ISD::SETUEQ: Invert = true; // Fallthrough 03983 case ISD::SETONE: 03984 // Expand this to (OLT | OGT). 03985 TmpOp0 = Op0; 03986 TmpOp1 = Op1; 03987 Opc = ISD::OR; 03988 Op0 = DAG.getNode(ARMISD::VCGT, dl, VT, TmpOp1, TmpOp0); 03989 Op1 = DAG.getNode(ARMISD::VCGT, dl, VT, TmpOp0, TmpOp1); 03990 break; 03991 case ISD::SETUO: Invert = true; // Fallthrough 03992 case ISD::SETO: 03993 // Expand this to (OLT | OGE). 03994 TmpOp0 = Op0; 03995 TmpOp1 = Op1; 03996 Opc = ISD::OR; 03997 Op0 = DAG.getNode(ARMISD::VCGT, dl, VT, TmpOp1, TmpOp0); 03998 Op1 = DAG.getNode(ARMISD::VCGE, dl, VT, TmpOp0, TmpOp1); 03999 break; 04000 } 04001 } else { 04002 // Integer comparisons. 04003 switch (SetCCOpcode) { 04004 default: llvm_unreachable("Illegal integer comparison"); 04005 case ISD::SETNE: Invert = true; 04006 case ISD::SETEQ: Opc = ARMISD::VCEQ; break; 04007 case ISD::SETLT: Swap = true; 04008 case ISD::SETGT: Opc = ARMISD::VCGT; break; 04009 case ISD::SETLE: Swap = true; 04010 case ISD::SETGE: Opc = ARMISD::VCGE; break; 04011 case ISD::SETULT: Swap = true; 04012 case ISD::SETUGT: Opc = ARMISD::VCGTU; break; 04013 case ISD::SETULE: Swap = true; 04014 case ISD::SETUGE: Opc = ARMISD::VCGEU; break; 04015 } 04016 04017 // Detect VTST (Vector Test Bits) = icmp ne (and (op0, op1), zero). 04018 if (Opc == ARMISD::VCEQ) { 04019 04020 SDValue AndOp; 04021 if (ISD::isBuildVectorAllZeros(Op1.getNode())) 04022 AndOp = Op0; 04023 else if (ISD::isBuildVectorAllZeros(Op0.getNode())) 04024 AndOp = Op1; 04025 04026 // Ignore bitconvert. 04027 if (AndOp.getNode() && AndOp.getOpcode() == ISD::BITCAST) 04028 AndOp = AndOp.getOperand(0); 04029 04030 if (AndOp.getNode() && AndOp.getOpcode() == ISD::AND) { 04031 Opc = ARMISD::VTST; 04032 Op0 = DAG.getNode(ISD::BITCAST, dl, VT, AndOp.getOperand(0)); 04033 Op1 = DAG.getNode(ISD::BITCAST, dl, VT, AndOp.getOperand(1)); 04034 Invert = !Invert; 04035 } 04036 } 04037 } 04038 04039 if (Swap) 04040 std::swap(Op0, Op1); 04041 04042 // If one of the operands is a constant vector zero, attempt to fold the 04043 // comparison to a specialized compare-against-zero form. 04044 SDValue SingleOp; 04045 if (ISD::isBuildVectorAllZeros(Op1.getNode())) 04046 SingleOp = Op0; 04047 else if (ISD::isBuildVectorAllZeros(Op0.getNode())) { 04048 if (Opc == ARMISD::VCGE) 04049 Opc = ARMISD::VCLEZ; 04050 else if (Opc == ARMISD::VCGT) 04051 Opc = ARMISD::VCLTZ; 04052 SingleOp = Op1; 04053 } 04054 04055 SDValue Result; 04056 if (SingleOp.getNode()) { 04057 switch (Opc) { 04058 case ARMISD::VCEQ: 04059 Result = DAG.getNode(ARMISD::VCEQZ, dl, VT, SingleOp); break; 04060 case ARMISD::VCGE: 04061 Result = DAG.getNode(ARMISD::VCGEZ, dl, VT, SingleOp); break; 04062 case ARMISD::VCLEZ: 04063 Result = DAG.getNode(ARMISD::VCLEZ, dl, VT, SingleOp); break; 04064 case ARMISD::VCGT: 04065 Result = DAG.getNode(ARMISD::VCGTZ, dl, VT, SingleOp); break; 04066 case ARMISD::VCLTZ: 04067 Result = DAG.getNode(ARMISD::VCLTZ, dl, VT, SingleOp); break; 04068 default: 04069 Result = DAG.getNode(Opc, dl, VT, Op0, Op1); 04070 } 04071 } else { 04072 Result = DAG.getNode(Opc, dl, VT, Op0, Op1); 04073 } 04074 04075 if (Invert) 04076 Result = DAG.getNOT(dl, Result, VT); 04077 04078 return Result; 04079 } 04080 04081 /// isNEONModifiedImm - Check if the specified splat value corresponds to a 04082 /// valid vector constant for a NEON instruction with a "modified immediate" 04083 /// operand (e.g., VMOV). If so, return the encoded value. 04084 static SDValue isNEONModifiedImm(uint64_t SplatBits, uint64_t SplatUndef, 04085 unsigned SplatBitSize, SelectionDAG &DAG, 04086 EVT &VT, bool is128Bits, NEONModImmType type) { 04087 unsigned OpCmode, Imm; 04088 04089 // SplatBitSize is set to the smallest size that splats the vector, so a 04090 // zero vector will always have SplatBitSize == 8. However, NEON modified 04091 // immediate instructions others than VMOV do not support the 8-bit encoding 04092 // of a zero vector, and the default encoding of zero is supposed to be the 04093 // 32-bit version. 04094 if (SplatBits == 0) 04095 SplatBitSize = 32; 04096 04097 switch (SplatBitSize) { 04098 case 8: 04099 if (type != VMOVModImm) 04100 return SDValue(); 04101 // Any 1-byte value is OK. Op=0, Cmode=1110. 04102 assert((SplatBits & ~0xff) == 0 && "one byte splat value is too big"); 04103 OpCmode = 0xe; 04104 Imm = SplatBits; 04105 VT = is128Bits ? MVT::v16i8 : MVT::v8i8; 04106 break; 04107 04108 case 16: 04109 // NEON's 16-bit VMOV supports splat values where only one byte is nonzero. 04110 VT = is128Bits ? MVT::v8i16 : MVT::v4i16; 04111 if ((SplatBits & ~0xff) == 0) { 04112 // Value = 0x00nn: Op=x, Cmode=100x. 04113 OpCmode = 0x8; 04114 Imm = SplatBits; 04115 break; 04116 } 04117 if ((SplatBits & ~0xff00) == 0) { 04118 // Value = 0xnn00: Op=x, Cmode=101x. 04119 OpCmode = 0xa; 04120 Imm = SplatBits >> 8; 04121 break; 04122 } 04123 return SDValue(); 04124 04125 case 32: 04126 // NEON's 32-bit VMOV supports splat values where: 04127 // * only one byte is nonzero, or 04128 // * the least significant byte is 0xff and the second byte is nonzero, or 04129 // * the least significant 2 bytes are 0xff and the third is nonzero. 04130 VT = is128Bits ? MVT::v4i32 : MVT::v2i32; 04131 if ((SplatBits & ~0xff) == 0) { 04132 // Value = 0x000000nn: Op=x, Cmode=000x. 04133 OpCmode = 0; 04134 Imm = SplatBits; 04135 break; 04136 } 04137 if ((SplatBits & ~0xff00) == 0) { 04138 // Value = 0x0000nn00: Op=x, Cmode=001x. 04139 OpCmode = 0x2; 04140 Imm = SplatBits >> 8; 04141 break; 04142 } 04143 if ((SplatBits & ~0xff0000) == 0) { 04144 // Value = 0x00nn0000: Op=x, Cmode=010x. 04145 OpCmode = 0x4; 04146 Imm = SplatBits >> 16; 04147 break; 04148 } 04149 if ((SplatBits & ~0xff000000) == 0) { 04150 // Value = 0xnn000000: Op=x, Cmode=011x. 04151 OpCmode = 0x6; 04152 Imm = SplatBits >> 24; 04153 break; 04154 } 04155 04156 // cmode == 0b1100 and cmode == 0b1101 are not supported for VORR or VBIC 04157 if (type == OtherModImm) return SDValue(); 04158 04159 if ((SplatBits & ~0xffff) == 0 && 04160 ((SplatBits | SplatUndef) & 0xff) == 0xff) { 04161 // Value = 0x0000nnff: Op=x, Cmode=1100. 04162 OpCmode = 0xc; 04163 Imm = SplatBits >> 8; 04164 SplatBits |= 0xff; 04165 break; 04166 } 04167 04168 if ((SplatBits & ~0xffffff) == 0 && 04169 ((SplatBits | SplatUndef) & 0xffff) == 0xffff) { 04170 // Value = 0x00nnffff: Op=x, Cmode=1101. 04171 OpCmode = 0xd; 04172 Imm = SplatBits >> 16; 04173 SplatBits |= 0xffff; 04174 break; 04175 } 04176 04177 // Note: there are a few 32-bit splat values (specifically: 00ffff00, 04178 // ff000000, ff0000ff, and ffff00ff) that are valid for VMOV.I64 but not 04179 // VMOV.I32. A (very) minor optimization would be to replicate the value 04180 // and fall through here to test for a valid 64-bit splat. But, then the 04181 // caller would also need to check and handle the change in size. 04182 return SDValue(); 04183 04184 case 64: { 04185 if (type != VMOVModImm) 04186 return SDValue(); 04187 // NEON has a 64-bit VMOV splat where each byte is either 0 or 0xff. 04188 uint64_t BitMask = 0xff; 04189 uint64_t Val = 0; 04190 unsigned ImmMask = 1; 04191 Imm = 0; 04192 for (int ByteNum = 0; ByteNum < 8; ++ByteNum) { 04193 if (((SplatBits | SplatUndef) & BitMask) == BitMask) { 04194 Val |= BitMask; 04195 Imm |= ImmMask; 04196 } else if ((SplatBits & BitMask) != 0) { 04197 return SDValue(); 04198 } 04199 BitMask <<= 8; 04200 ImmMask <<= 1; 04201 } 04202 // Op=1, Cmode=1110. 04203 OpCmode = 0x1e; 04204 SplatBits = Val; 04205 VT = is128Bits ? MVT::v2i64 : MVT::v1i64; 04206 break; 04207 } 04208 04209 default: 04210 llvm_unreachable("unexpected size for isNEONModifiedImm"); 04211 } 04212 04213 unsigned EncodedVal = ARM_AM::createNEONModImm(OpCmode, Imm); 04214 return DAG.getTargetConstant(EncodedVal, MVT::i32); 04215 } 04216 04217 SDValue ARMTargetLowering::LowerConstantFP(SDValue Op, SelectionDAG &DAG, 04218 const ARMSubtarget *ST) const { 04219 if (!ST->useNEONForSinglePrecisionFP() || !ST->hasVFP3() || ST->hasD16()) 04220 return SDValue(); 04221 04222 ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Op); 04223 assert(Op.getValueType() == MVT::f32 && 04224 "ConstantFP custom lowering should only occur for f32."); 04225 04226 // Try splatting with a VMOV.f32... 04227 APFloat FPVal = CFP->getValueAPF(); 04228 int ImmVal = ARM_AM::getFP32Imm(FPVal); 04229 if (ImmVal != -1) { 04230 DebugLoc DL = Op.getDebugLoc(); 04231 SDValue NewVal = DAG.getTargetConstant(ImmVal, MVT::i32); 04232 SDValue VecConstant = DAG.getNode(ARMISD::VMOVFPIMM, DL, MVT::v2f32, 04233 NewVal); 04234 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, VecConstant, 04235 DAG.getConstant(0, MVT::i32)); 04236 } 04237 04238 // If that fails, try a VMOV.i32 04239 EVT VMovVT; 04240 unsigned iVal = FPVal.bitcastToAPInt().getZExtValue(); 04241 SDValue NewVal = isNEONModifiedImm(iVal, 0, 32, DAG, VMovVT, false, 04242 VMOVModImm); 04243 if (NewVal != SDValue()) { 04244 DebugLoc DL = Op.getDebugLoc(); 04245 SDValue VecConstant = DAG.getNode(ARMISD::VMOVIMM, DL, VMovVT, 04246 NewVal); 04247 SDValue VecFConstant = DAG.getNode(ISD::BITCAST, DL, MVT::v2f32, 04248 VecConstant); 04249 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, VecFConstant, 04250 DAG.getConstant(0, MVT::i32)); 04251 } 04252 04253 // Finally, try a VMVN.i32 04254 NewVal = isNEONModifiedImm(~iVal & 0xffffffff, 0, 32, DAG, VMovVT, false, 04255 VMVNModImm); 04256 if (NewVal != SDValue()) { 04257 DebugLoc DL = Op.getDebugLoc(); 04258 SDValue VecConstant = DAG.getNode(ARMISD::VMVNIMM, DL, VMovVT, NewVal); 04259 SDValue VecFConstant = DAG.getNode(ISD::BITCAST, DL, MVT::v2f32, 04260 VecConstant); 04261 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, VecFConstant, 04262 DAG.getConstant(0, MVT::i32)); 04263 } 04264 04265 return SDValue(); 04266 } 04267 04268 // check if an VEXT instruction can handle the shuffle mask when the 04269 // vector sources of the shuffle are the same. 04270 static bool isSingletonVEXTMask(ArrayRef<int> M, EVT VT, unsigned &Imm) { 04271 unsigned NumElts = VT.getVectorNumElements(); 04272 04273 // Assume that the first shuffle index is not UNDEF. Fail if it is. 04274 if (M[0] < 0) 04275 return false; 04276 04277 Imm = M[0]; 04278 04279 // If this is a VEXT shuffle, the immediate value is the index of the first 04280 // element. The other shuffle indices must be the successive elements after 04281 // the first one. 04282 unsigned ExpectedElt = Imm; 04283 for (unsigned i = 1; i < NumElts; ++i) { 04284 // Increment the expected index. If it wraps around, just follow it 04285 // back to index zero and keep going. 04286 ++ExpectedElt; 04287 if (ExpectedElt == NumElts) 04288 ExpectedElt = 0; 04289 04290 if (M[i] < 0) continue; // ignore UNDEF indices 04291 if (ExpectedElt != static_cast<unsigned>(M[i])) 04292 return false; 04293 } 04294 04295 return true; 04296 } 04297 04298 04299 static bool isVEXTMask(ArrayRef<int> M, EVT VT, 04300 bool &ReverseVEXT, unsigned &Imm) { 04301 unsigned NumElts = VT.getVectorNumElements(); 04302 ReverseVEXT = false; 04303 04304 // Assume that the first shuffle index is not UNDEF. Fail if it is. 04305 if (M[0] < 0) 04306 return false; 04307 04308 Imm = M[0]; 04309 04310 // If this is a VEXT shuffle, the immediate value is the index of the first 04311 // element. The other shuffle indices must be the successive elements after 04312 // the first one. 04313 unsigned ExpectedElt = Imm; 04314 for (unsigned i = 1; i < NumElts; ++i) { 04315 // Increment the expected index. If it wraps around, it may still be 04316 // a VEXT but the source vectors must be swapped. 04317 ExpectedElt += 1; 04318 if (ExpectedElt == NumElts * 2) { 04319 ExpectedElt = 0; 04320 ReverseVEXT = true; 04321 } 04322 04323 if (M[i] < 0) continue; // ignore UNDEF indices 04324 if (ExpectedElt != static_cast<unsigned>(M[i])) 04325 return false; 04326 } 04327 04328 // Adjust the index value if the source operands will be swapped. 04329 if (ReverseVEXT) 04330 Imm -= NumElts; 04331 04332 return true; 04333 } 04334 04335 /// isVREVMask - Check if a vector shuffle corresponds to a VREV 04336 /// instruction with the specified blocksize. (The order of the elements 04337 /// within each block of the vector is reversed.) 04338 static bool isVREVMask(ArrayRef<int> M, EVT VT, unsigned BlockSize) { 04339 assert((BlockSize==16 || BlockSize==32 || BlockSize==64) && 04340 "Only possible block sizes for VREV are: 16, 32, 64"); 04341 04342 unsigned EltSz = VT.getVectorElementType().getSizeInBits(); 04343 if (EltSz == 64) 04344 return false; 04345 04346 unsigned NumElts = VT.getVectorNumElements(); 04347 unsigned BlockElts = M[0] + 1; 04348 // If the first shuffle index is UNDEF, be optimistic. 04349 if (M[0] < 0) 04350 BlockElts = BlockSize / EltSz; 04351 04352 if (BlockSize <= EltSz || BlockSize != BlockElts * EltSz) 04353 return false; 04354 04355 for (unsigned i = 0; i < NumElts; ++i) { 04356 if (M[i] < 0) continue; // ignore UNDEF indices 04357 if ((unsigned) M[i] != (i - i%BlockElts) + (BlockElts - 1 - i%BlockElts)) 04358 return false; 04359 } 04360 04361 return true; 04362 } 04363 04364 static bool isVTBLMask(ArrayRef<int> M, EVT VT) { 04365 // We can handle <8 x i8> vector shuffles. If the index in the mask is out of 04366 // range, then 0 is placed into the resulting vector. So pretty much any mask 04367 // of 8 elements can work here. 04368 return VT == MVT::v8i8 && M.size() == 8; 04369 } 04370 04371 static bool isVTRNMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) { 04372 unsigned EltSz = VT.getVectorElementType().getSizeInBits(); 04373 if (EltSz == 64) 04374 return false; 04375 04376 unsigned NumElts = VT.getVectorNumElements(); 04377 WhichResult = (M[0] == 0 ? 0 : 1); 04378 for (unsigned i = 0; i < NumElts; i += 2) { 04379 if ((M[i] >= 0 && (unsigned) M[i] != i + WhichResult) || 04380 (M[i+1] >= 0 && (unsigned) M[i+1] != i + NumElts + WhichResult)) 04381 return false; 04382 } 04383 return true; 04384 } 04385 04386 /// isVTRN_v_undef_Mask - Special case of isVTRNMask for canonical form of 04387 /// "vector_shuffle v, v", i.e., "vector_shuffle v, undef". 04388 /// Mask is e.g., <0, 0, 2, 2> instead of <0, 4, 2, 6>. 04389 static bool isVTRN_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult){ 04390 unsigned EltSz = VT.getVectorElementType().getSizeInBits(); 04391 if (EltSz == 64) 04392 return false; 04393 04394 unsigned NumElts = VT.getVectorNumElements(); 04395 WhichResult = (M[0] == 0 ? 0 : 1); 04396 for (unsigned i = 0; i < NumElts; i += 2) { 04397 if ((M[i] >= 0 && (unsigned) M[i] != i + WhichResult) || 04398 (M[i+1] >= 0 && (unsigned) M[i+1] != i + WhichResult)) 04399 return false; 04400 } 04401 return true; 04402 } 04403 04404 static bool isVUZPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) { 04405 unsigned EltSz = VT.getVectorElementType().getSizeInBits(); 04406 if (EltSz == 64) 04407 return false; 04408 04409 unsigned NumElts = VT.getVectorNumElements(); 04410 WhichResult = (M[0] == 0 ? 0 : 1); 04411 for (unsigned i = 0; i != NumElts; ++i) { 04412 if (M[i] < 0) continue; // ignore UNDEF indices 04413 if ((unsigned) M[i] != 2 * i + WhichResult) 04414 return false; 04415 } 04416 04417 // VUZP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32. 04418 if (VT.is64BitVector() && EltSz == 32) 04419 return false; 04420 04421 return true; 04422 } 04423 04424 /// isVUZP_v_undef_Mask - Special case of isVUZPMask for canonical form of 04425 /// "vector_shuffle v, v", i.e., "vector_shuffle v, undef". 04426 /// Mask is e.g., <0, 2, 0, 2> instead of <0, 2, 4, 6>, 04427 static bool isVUZP_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult){ 04428 unsigned EltSz = VT.getVectorElementType().getSizeInBits(); 04429 if (EltSz == 64) 04430 return false; 04431 04432 unsigned Half = VT.getVectorNumElements() / 2; 04433 WhichResult = (M[0] == 0 ? 0 : 1); 04434 for (unsigned j = 0; j != 2; ++j) { 04435 unsigned Idx = WhichResult; 04436 for (unsigned i = 0; i != Half; ++i) { 04437 int MIdx = M[i + j * Half]; 04438 if (MIdx >= 0 && (unsigned) MIdx != Idx) 04439 return false; 04440 Idx += 2; 04441 } 04442 } 04443 04444 // VUZP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32. 04445 if (VT.is64BitVector() && EltSz == 32) 04446 return false; 04447 04448 return true; 04449 } 04450 04451 static bool isVZIPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) { 04452 unsigned EltSz = VT.getVectorElementType().getSizeInBits(); 04453 if (EltSz == 64) 04454 return false; 04455 04456 unsigned NumElts = VT.getVectorNumElements(); 04457 WhichResult = (M[0] == 0 ? 0 : 1); 04458 unsigned Idx = WhichResult * NumElts / 2; 04459 for (unsigned i = 0; i != NumElts; i += 2) { 04460 if ((M[i] >= 0 && (unsigned) M[i] != Idx) || 04461 (M[i+1] >= 0 && (unsigned) M[i+1] != Idx + NumElts)) 04462 return false; 04463 Idx += 1; 04464 } 04465 04466 // VZIP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32. 04467 if (VT.is64BitVector() && EltSz == 32) 04468 return false; 04469 04470 return true; 04471 } 04472 04473 /// isVZIP_v_undef_Mask - Special case of isVZIPMask for canonical form of 04474 /// "vector_shuffle v, v", i.e., "vector_shuffle v, undef". 04475 /// Mask is e.g., <0, 0, 1, 1> instead of <0, 4, 1, 5>. 04476 static bool isVZIP_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult){ 04477 unsigned EltSz = VT.getVectorElementType().getSizeInBits(); 04478 if (EltSz == 64) 04479 return false; 04480 04481 unsigned NumElts = VT.getVectorNumElements(); 04482 WhichResult = (M[0] == 0 ? 0 : 1); 04483 unsigned Idx = WhichResult * NumElts / 2; 04484 for (unsigned i = 0; i != NumElts; i += 2) { 04485 if ((M[i] >= 0 && (unsigned) M[i] != Idx) || 04486 (M[i+1] >= 0 && (unsigned) M[i+1] != Idx)) 04487 return false; 04488 Idx += 1; 04489 } 04490 04491 // VZIP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32. 04492 if (VT.is64BitVector() && EltSz == 32) 04493 return false; 04494 04495 return true; 04496 } 04497 04498 /// \return true if this is a reverse operation on an vector. 04499 static bool isReverseMask(ArrayRef<int> M, EVT VT) { 04500 unsigned NumElts = VT.getVectorNumElements(); 04501 // Make sure the mask has the right size. 04502 if (NumElts != M.size()) 04503 return false; 04504 04505 // Look for <15, ..., 3, -1, 1, 0>. 04506 for (unsigned i = 0; i != NumElts; ++i) 04507 if (M[i] >= 0 && M[i] != (int) (NumElts - 1 - i)) 04508 return false; 04509 04510 return true; 04511 } 04512 04513 // If N is an integer constant that can be moved into a register in one 04514 // instruction, return an SDValue of such a constant (will become a MOV 04515 // instruction). Otherwise return null. 04516 static SDValue IsSingleInstrConstant(SDValue N, SelectionDAG &DAG, 04517 const ARMSubtarget *ST, DebugLoc dl) { 04518 uint64_t Val; 04519 if (!isa<ConstantSDNode>(N)) 04520 return SDValue(); 04521 Val = cast<ConstantSDNode>(N)->getZExtValue(); 04522 04523 if (ST->isThumb1Only()) { 04524 if (Val <= 255 || ~Val <= 255) 04525 return DAG.getConstant(Val, MVT::i32); 04526 } else { 04527 if (ARM_AM::getSOImmVal(Val) != -1 || ARM_AM::getSOImmVal(~Val) != -1) 04528 return DAG.getConstant(Val, MVT::i32); 04529 } 04530 return SDValue(); 04531 } 04532 04533 // If this is a case we can't handle, return null and let the default 04534 // expansion code take care of it. 04535 SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, 04536 const ARMSubtarget *ST) const { 04537 BuildVectorSDNode *BVN = cast<BuildVectorSDNode>(Op.getNode()); 04538 DebugLoc dl = Op.getDebugLoc(); 04539 EVT VT = Op.getValueType(); 04540 04541 APInt SplatBits, SplatUndef; 04542 unsigned SplatBitSize; 04543 bool HasAnyUndefs; 04544 if (BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) { 04545 if (SplatBitSize <= 64) { 04546 // Check if an immediate VMOV works. 04547 EVT VmovVT; 04548 SDValue Val = isNEONModifiedImm(SplatBits.getZExtValue(), 04549 SplatUndef.getZExtValue(), SplatBitSize, 04550 DAG, VmovVT, VT.is128BitVector(), 04551 VMOVModImm); 04552 if (Val.getNode()) { 04553 SDValue Vmov = DAG.getNode(ARMISD::VMOVIMM, dl, VmovVT, Val); 04554 return DAG.getNode(ISD::BITCAST, dl, VT, Vmov); 04555 } 04556 04557 // Try an immediate VMVN. 04558 uint64_t NegatedImm = (~SplatBits).getZExtValue(); 04559 Val = isNEONModifiedImm(NegatedImm, 04560 SplatUndef.getZExtValue(), SplatBitSize, 04561 DAG, VmovVT, VT.is128BitVector(), 04562 VMVNModImm); 04563 if (Val.getNode()) { 04564 SDValue Vmov = DAG.getNode(ARMISD::VMVNIMM, dl, VmovVT, Val); 04565 return DAG.getNode(ISD::BITCAST, dl, VT, Vmov); 04566 } 04567 04568 // Use vmov.f32 to materialize other v2f32 and v4f32 splats. 04569 if ((VT == MVT::v2f32 || VT == MVT::v4f32) && SplatBitSize == 32) { 04570 int ImmVal = ARM_AM::getFP32Imm(SplatBits); 04571 if (ImmVal != -1) { 04572 SDValue Val = DAG.getTargetConstant(ImmVal, MVT::i32); 04573 return DAG.getNode(ARMISD::VMOVFPIMM, dl, VT, Val); 04574 } 04575 } 04576 } 04577 } 04578 04579 // Scan through the operands to see if only one value is used. 04580 // 04581 // As an optimisation, even if more than one value is used it may be more 04582 // profitable to splat with one value then change some lanes. 04583 // 04584 // Heuristically we decide to do this if the vector has a "dominant" value, 04585 // defined as splatted to more than half of the lanes. 04586 unsigned NumElts = VT.getVectorNumElements(); 04587 bool isOnlyLowElement = true; 04588 bool usesOnlyOneValue = true; 04589 bool hasDominantValue = false; 04590 bool isConstant = true; 04591 04592 // Map of the number of times a particular SDValue appears in the 04593 // element list. 04594 DenseMap<SDValue, unsigned> ValueCounts; 04595 SDValue Value; 04596 for (unsigned i = 0; i < NumElts; ++i) { 04597 SDValue V = Op.getOperand(i); 04598 if (V.getOpcode() == ISD::UNDEF) 04599 continue; 04600 if (i > 0) 04601 isOnlyLowElement = false; 04602 if (!isa<ConstantFPSDNode>(V) && !isa<ConstantSDNode>(V)) 04603 isConstant = false; 04604 04605 ValueCounts.insert(std::make_pair(V, 0)); 04606 unsigned &Count = ValueCounts[V]; 04607 04608 // Is this value dominant? (takes up more than half of the lanes) 04609 if (++Count > (NumElts / 2)) { 04610 hasDominantValue = true; 04611 Value = V; 04612 } 04613 } 04614 if (ValueCounts.size() != 1) 04615 usesOnlyOneValue = false; 04616 if (!Value.getNode() && ValueCounts.size() > 0) 04617 Value = ValueCounts.begin()->first; 04618 04619 if (ValueCounts.size() == 0) 04620 return DAG.getUNDEF(VT); 04621 04622 if (isOnlyLowElement) 04623 return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Value); 04624 04625 unsigned EltSize = VT.getVectorElementType().getSizeInBits(); 04626 04627 // Use VDUP for non-constant splats. For f32 constant splats, reduce to 04628 // i32 and try again. 04629 if (hasDominantValue && EltSize <= 32) { 04630 if (!isConstant) { 04631 SDValue N; 04632 04633 // If we are VDUPing a value that comes directly from a vector, that will 04634 // cause an unnecessary move to and from a GPR, where instead we could 04635 // just use VDUPLANE. We can only do this if the lane being extracted 04636 // is at a constant index, as the VDUP from lane instructions only have 04637 // constant-index forms. 04638 if (Value->getOpcode() == ISD::EXTRACT_VECTOR_ELT && 04639 isa<ConstantSDNode>(Value->getOperand(1))) { 04640 // We need to create a new undef vector to use for the VDUPLANE if the 04641 // size of the vector from which we get the value is different than the 04642 // size of the vector that we need to create. We will insert the element 04643 // such that the register coalescer will remove unnecessary copies. 04644 if (VT != Value->getOperand(0).getValueType()) { 04645 ConstantSDNode *constIndex; 04646 constIndex = dyn_cast<ConstantSDNode>(Value->getOperand(1)); 04647 assert(constIndex && "The index is not a constant!"); 04648 unsigned index = constIndex->getAPIntValue().getLimitedValue() % 04649 VT.getVectorNumElements(); 04650 N = DAG.getNode(ARMISD::VDUPLANE, dl, VT, 04651 DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, DAG.getUNDEF(VT), 04652 Value, DAG.getConstant(index, MVT::i32)), 04653 DAG.getConstant(index, MVT::i32)); 04654 } else 04655 N = DAG.getNode(ARMISD::VDUPLANE, dl, VT, 04656 Value->getOperand(0), Value->getOperand(1)); 04657 } else 04658 N = DAG.getNode(ARMISD::VDUP, dl, VT, Value); 04659 04660 if (!usesOnlyOneValue) { 04661 // The dominant value was splatted as 'N', but we now have to insert 04662 // all differing elements. 04663 for (unsigned I = 0; I < NumElts; ++I) { 04664 if (Op.getOperand(I) == Value) 04665 continue; 04666 SmallVector<SDValue, 3> Ops; 04667 Ops.push_back(N); 04668 Ops.push_back(Op.getOperand(I)); 04669 Ops.push_back(DAG.getConstant(I, MVT::i32)); 04670 N = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, &Ops[0], 3); 04671 } 04672 } 04673 return N; 04674 } 04675 if (VT.getVectorElementType().isFloatingPoint()) { 04676 SmallVector<SDValue, 8> Ops; 04677 for (unsigned i = 0; i < NumElts; ++i) 04678 Ops.push_back(DAG.getNode(ISD::BITCAST, dl, MVT::i32, 04679 Op.getOperand(i))); 04680 EVT VecVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32, NumElts); 04681 SDValue Val = DAG.getNode(ISD::BUILD_VECTOR, dl, VecVT, &Ops[0], NumElts); 04682 Val = LowerBUILD_VECTOR(Val, DAG, ST); 04683 if (Val.getNode()) 04684 return DAG.getNode(ISD::BITCAST, dl, VT, Val); 04685 } 04686 if (usesOnlyOneValue) { 04687 SDValue Val = IsSingleInstrConstant(Value, DAG, ST, dl); 04688 if (isConstant && Val.getNode()) 04689 return DAG.getNode(ARMISD::VDUP, dl, VT, Val); 04690 } 04691 } 04692 04693 // If all elements are constants and the case above didn't get hit, fall back 04694 // to the default expansion, which will generate a load from the constant 04695 // pool. 04696 if (isConstant) 04697 return SDValue(); 04698 04699 // Empirical tests suggest this is rarely worth it for vectors of length <= 2. 04700 if (NumElts >= 4) { 04701 SDValue shuffle = ReconstructShuffle(Op, DAG); 04702 if (shuffle != SDValue()) 04703 return shuffle; 04704 } 04705 04706 // Vectors with 32- or 64-bit elements can be built by directly assigning 04707 // the subregisters. Lower it to an ARMISD::BUILD_VECTOR so the operands 04708 // will be legalized. 04709 if (EltSize >= 32) { 04710 // Do the expansion with floating-point types, since that is what the VFP 04711 // registers are defined to use, and since i64 is not legal. 04712 EVT EltVT = EVT::getFloatingPointVT(EltSize); 04713 EVT VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumElts); 04714 SmallVector<SDValue, 8> Ops; 04715 for (unsigned i = 0; i < NumElts; ++i) 04716 Ops.push_back(DAG.getNode(ISD::BITCAST, dl, EltVT, Op.getOperand(i))); 04717 SDValue Val = DAG.getNode(ARMISD::BUILD_VECTOR, dl, VecVT, &Ops[0],NumElts); 04718 return DAG.getNode(ISD::BITCAST, dl, VT, Val); 04719 } 04720 04721 return SDValue(); 04722 } 04723 04724 // Gather data to see if the operation can be modelled as a 04725 // shuffle in combination with VEXTs. 04726 SDValue ARMTargetLowering::ReconstructShuffle(SDValue Op, 04727 SelectionDAG &DAG) const { 04728 DebugLoc dl = Op.getDebugLoc(); 04729 EVT VT = Op.getValueType(); 04730 unsigned NumElts = VT.getVectorNumElements(); 04731 04732 SmallVector<SDValue, 2> SourceVecs; 04733 SmallVector<unsigned, 2> MinElts; 04734 SmallVector<unsigned, 2> MaxElts; 04735 04736 for (unsigned i = 0; i < NumElts; ++i) { 04737 SDValue V = Op.getOperand(i); 04738 if (V.getOpcode() == ISD::UNDEF) 04739 continue; 04740 else if (V.getOpcode() != ISD::EXTRACT_VECTOR_ELT) { 04741 // A shuffle can only come from building a vector from various 04742 // elements of other vectors. 04743 return SDValue(); 04744 } else if (V.getOperand(0).getValueType().getVectorElementType() != 04745 VT.getVectorElementType()) { 04746 // This code doesn't know how to handle shuffles where the vector 04747 // element types do not match (this happens because type legalization 04748 // promotes the return type of EXTRACT_VECTOR_ELT). 04749 // FIXME: It might be appropriate to extend this code to handle 04750 // mismatched types. 04751 return SDValue(); 04752 } 04753 04754 // Record this extraction against the appropriate vector if possible... 04755 SDValue SourceVec = V.getOperand(0); 04756 // If the element number isn't a constant, we can't effectively 04757 // analyze what's going on. 04758 if (!isa<ConstantSDNode>(V.getOperand(1))) 04759 return SDValue(); 04760 unsigned EltNo = cast<ConstantSDNode>(V.getOperand(1))->getZExtValue(); 04761 bool FoundSource = false; 04762 for (unsigned j = 0; j < SourceVecs.size(); ++j) { 04763 if (SourceVecs[j] == SourceVec) { 04764 if (MinElts[j] > EltNo) 04765 MinElts[j] = EltNo; 04766 if (MaxElts[j] < EltNo) 04767 MaxElts[j] = EltNo; 04768 FoundSource = true; 04769 break; 04770 } 04771 } 04772 04773 // Or record a new source if not... 04774 if (!FoundSource) { 04775 SourceVecs.push_back(SourceVec); 04776 MinElts.push_back(EltNo); 04777 MaxElts.push_back(EltNo); 04778 } 04779 } 04780 04781 // Currently only do something sane when at most two source vectors 04782 // involved. 04783 if (SourceVecs.size() > 2) 04784 return SDValue(); 04785 04786 SDValue ShuffleSrcs[2] = {DAG.getUNDEF(VT), DAG.getUNDEF(VT) }; 04787 int VEXTOffsets[2] = {0, 0}; 04788 04789 // This loop extracts the usage patterns of the source vectors 04790 // and prepares appropriate SDValues for a shuffle if possible. 04791 for (unsigned i = 0; i < SourceVecs.size(); ++i) { 04792 if (SourceVecs[i].getValueType() == VT) { 04793 // No VEXT necessary 04794 ShuffleSrcs[i] = SourceVecs[i]; 04795 VEXTOffsets[i] = 0; 04796 continue; 04797 } else if (SourceVecs[i].getValueType().getVectorNumElements() < NumElts) { 04798 // It probably isn't worth padding out a smaller vector just to 04799 // break it down again in a shuffle. 04800 return SDValue(); 04801 } 04802 04803 // Since only 64-bit and 128-bit vectors are legal on ARM and 04804 // we've eliminated the other cases... 04805 assert(SourceVecs[i].getValueType().getVectorNumElements() == 2*NumElts && 04806 "unexpected vector sizes in ReconstructShuffle"); 04807 04808 if (MaxElts[i] - MinElts[i] >= NumElts) { 04809 // Span too large for a VEXT to cope 04810 return SDValue(); 04811 } 04812 04813 if (MinElts[i] >= NumElts) { 04814 // The extraction can just take the second half 04815 VEXTOffsets[i] = NumElts; 04816 ShuffleSrcs[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, 04817 SourceVecs[i], 04818 DAG.getIntPtrConstant(NumElts)); 04819 } else if (MaxElts[i] < NumElts) { 04820 // The extraction can just take the first half 04821 VEXTOffsets[i] = 0; 04822 ShuffleSrcs[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, 04823 SourceVecs[i], 04824 DAG.getIntPtrConstant(0)); 04825 } else { 04826 // An actual VEXT is needed 04827 VEXTOffsets[i] = MinElts[i]; 04828 SDValue VEXTSrc1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, 04829 SourceVecs[i], 04830 DAG.getIntPtrConstant(0)); 04831 SDValue VEXTSrc2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, 04832 SourceVecs[i], 04833 DAG.getIntPtrConstant(NumElts)); 04834 ShuffleSrcs[i] = DAG.getNode(ARMISD::VEXT, dl, VT, VEXTSrc1, VEXTSrc2, 04835 DAG.getConstant(VEXTOffsets[i], MVT::i32)); 04836 } 04837 } 04838 04839 SmallVector<int, 8> Mask; 04840 04841 for (unsigned i = 0; i < NumElts; ++i) { 04842 SDValue Entry = Op.getOperand(i); 04843 if (Entry.getOpcode() == ISD::UNDEF) { 04844 Mask.push_back(-1); 04845 continue; 04846 } 04847 04848 SDValue ExtractVec = Entry.getOperand(0); 04849 int ExtractElt = cast<ConstantSDNode>(Op.getOperand(i) 04850 .getOperand(1))->getSExtValue(); 04851 if (ExtractVec == SourceVecs[0]) { 04852 Mask.push_back(ExtractElt - VEXTOffsets[0]); 04853 } else { 04854 Mask.push_back(ExtractElt + NumElts - VEXTOffsets[1]); 04855 } 04856 } 04857 04858 // Final check before we try to produce nonsense... 04859 if (isShuffleMaskLegal(Mask, VT)) 04860 return DAG.getVectorShuffle(VT, dl, ShuffleSrcs[0], ShuffleSrcs[1], 04861 &Mask[0]); 04862 04863 return SDValue(); 04864 } 04865 04866 /// isShuffleMaskLegal - Targets can use this to indicate that they only 04867 /// support *some* VECTOR_SHUFFLE operations, those with specific masks. 04868 /// By default, if a target supports the VECTOR_SHUFFLE node, all mask values 04869 /// are assumed to be legal. 04870 bool 04871 ARMTargetLowering::isShuffleMaskLegal(const SmallVectorImpl<int> &M, 04872 EVT VT) const { 04873 if (VT.getVectorNumElements() == 4 && 04874 (VT.is128BitVector() || VT.is64BitVector())) { 04875 unsigned PFIndexes[4]; 04876 for (unsigned i = 0; i != 4; ++i) { 04877 if (M[i] < 0) 04878 PFIndexes[i] = 8; 04879 else 04880 PFIndexes[i] = M[i]; 04881 } 04882 04883 // Compute the index in the perfect shuffle table. 04884 unsigned PFTableIndex = 04885 PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3]; 04886 unsigned PFEntry = PerfectShuffleTable[PFTableIndex]; 04887 unsigned Cost = (PFEntry >> 30); 04888 04889 if (Cost <= 4) 04890 return true; 04891 } 04892 04893 bool ReverseVEXT; 04894 unsigned Imm, WhichResult; 04895 04896 unsigned EltSize = VT.getVectorElementType().getSizeInBits(); 04897 return (EltSize >= 32 || 04898 ShuffleVectorSDNode::isSplatMask(&M[0], VT) || 04899 isVREVMask(M, VT, 64) || 04900 isVREVMask(M, VT, 32) || 04901 isVREVMask(M, VT, 16) || 04902 isVEXTMask(M, VT, ReverseVEXT, Imm) || 04903 isVTBLMask(M, VT) || 04904 isVTRNMask(M, VT, WhichResult) || 04905 isVUZPMask(M, VT, WhichResult) || 04906 isVZIPMask(M, VT, WhichResult) || 04907 isVTRN_v_undef_Mask(M, VT, WhichResult) || 04908 isVUZP_v_undef_Mask(M, VT, WhichResult) || 04909 isVZIP_v_undef_Mask(M, VT, WhichResult) || 04910 ((VT == MVT::v8i16 || VT == MVT::v16i8) && isReverseMask(M, VT))); 04911 } 04912 04913 /// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit 04914 /// the specified operations to build the shuffle. 04915 static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS, 04916 SDValue RHS, SelectionDAG &DAG, 04917 DebugLoc dl) { 04918 unsigned OpNum = (PFEntry >> 26) & 0x0F; 04919 unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1); 04920 unsigned RHSID = (PFEntry >> 0) & ((1 << 13)-1); 04921 04922 enum { 04923 OP_COPY = 0, // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3> 04924 OP_VREV, 04925 OP_VDUP0, 04926 OP_VDUP1, 04927 OP_VDUP2, 04928 OP_VDUP3, 04929 OP_VEXT1, 04930 OP_VEXT2, 04931 OP_VEXT3, 04932 OP_VUZPL, // VUZP, left result 04933 OP_VUZPR, // VUZP, right result 04934 OP_VZIPL, // VZIP, left result 04935 OP_VZIPR, // VZIP, right result 04936 OP_VTRNL, // VTRN, left result 04937 OP_VTRNR // VTRN, right result 04938 }; 04939 04940 if (OpNum == OP_COPY) { 04941 if (LHSID == (1*9+2)*9+3) return LHS; 04942 assert(LHSID == ((4*9+5)*9+6)*9+7 && "Illegal OP_COPY!"); 04943 return RHS; 04944 } 04945 04946 SDValue OpLHS, OpRHS; 04947 OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG, dl); 04948 OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG, dl); 04949 EVT VT = OpLHS.getValueType(); 04950 04951 switch (OpNum) { 04952 default: llvm_unreachable("Unknown shuffle opcode!"); 04953 case OP_VREV: 04954 // VREV divides the vector in half and swaps within the half. 04955 if (VT.getVectorElementType() == MVT::i32 || 04956 VT.getVectorElementType() == MVT::f32) 04957 return DAG.getNode(ARMISD::VREV64, dl, VT, OpLHS); 04958 // vrev <4 x i16> -> VREV32 04959 if (VT.getVectorElementType() == MVT::i16) 04960 return DAG.getNode(ARMISD::VREV32, dl, VT, OpLHS); 04961 // vrev <4 x i8> -> VREV16 04962 assert(VT.getVectorElementType() == MVT::i8); 04963 return DAG.getNode(ARMISD::VREV16, dl, VT, OpLHS); 04964 case OP_VDUP0: 04965 case OP_VDUP1: 04966 case OP_VDUP2: 04967 case OP_VDUP3: 04968 return DAG.getNode(ARMISD::VDUPLANE, dl, VT, 04969 OpLHS, DAG.getConstant(OpNum-OP_VDUP0, MVT::i32)); 04970 case OP_VEXT1: 04971 case OP_VEXT2: 04972 case OP_VEXT3: 04973 return DAG.getNode(ARMISD::VEXT, dl, VT, 04974 OpLHS, OpRHS, 04975 DAG.getConstant(OpNum-OP_VEXT1+1, MVT::i32)); 04976 case OP_VUZPL: 04977 case OP_VUZPR: 04978 return DAG.getNode(ARMISD::VUZP, dl, DAG.getVTList(VT, VT), 04979 OpLHS, OpRHS).getValue(OpNum-OP_VUZPL); 04980 case OP_VZIPL: 04981 case OP_VZIPR: 04982 return DAG.getNode(ARMISD::VZIP, dl, DAG.getVTList(VT, VT), 04983 OpLHS, OpRHS).getValue(OpNum-OP_VZIPL); 04984 case OP_VTRNL: 04985 case OP_VTRNR: 04986 return DAG.getNode(ARMISD::VTRN, dl, DAG.getVTList(VT, VT), 04987 OpLHS, OpRHS).getValue(OpNum-OP_VTRNL); 04988 } 04989 } 04990 04991 static SDValue LowerVECTOR_SHUFFLEv8i8(SDValue Op, 04992 ArrayRef<int> ShuffleMask, 04993 SelectionDAG &DAG) { 04994 // Check to see if we can use the VTBL instruction. 04995 SDValue V1 = Op.getOperand(0); 04996 SDValue V2 = Op.getOperand(1); 04997 DebugLoc DL = Op.getDebugLoc(); 04998 04999 SmallVector<SDValue, 8> VTBLMask; 05000 for (ArrayRef<int>::iterator 05001 I = ShuffleMask.begin(), E = ShuffleMask.end(); I != E; ++I) 05002 VTBLMask.push_back(DAG.getConstant(*I, MVT::i32)); 05003 05004 if (V2.getNode()->getOpcode() == ISD::UNDEF) 05005 return DAG.getNode(ARMISD::VTBL1, DL, MVT::v8i8, V1, 05006 DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v8i8, 05007 &VTBLMask[0], 8)); 05008 05009 return DAG.getNode(ARMISD::VTBL2, DL, MVT::v8i8, V1, V2, 05010 DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v8i8, 05011 &VTBLMask[0], 8)); 05012 } 05013 05014 static SDValue LowerReverse_VECTOR_SHUFFLEv16i8_v8i16(SDValue Op, 05015 SelectionDAG &DAG) { 05016 DebugLoc DL = Op.getDebugLoc(); 05017 SDValue OpLHS = Op.getOperand(0); 05018 EVT VT = OpLHS.getValueType(); 05019 05020 assert((VT == MVT::v8i16 || VT == MVT::v16i8) && 05021 "Expect an v8i16/v16i8 type"); 05022 OpLHS = DAG.getNode(ARMISD::VREV64, DL, VT, OpLHS); 05023 // For a v16i8 type: After the VREV, we have got <8, ...15, 8, ..., 0>. Now, 05024 // extract the first 8 bytes into the top double word and the last 8 bytes 05025 // into the bottom double word. The v8i16 case is similar. 05026 unsigned ExtractNum = (VT == MVT::v16i8) ? 8 : 4; 05027 return DAG.getNode(ARMISD::VEXT, DL, VT, OpLHS, OpLHS, 05028 DAG.getConstant(ExtractNum, MVT::i32)); 05029 } 05030 05031 static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { 05032 SDValue V1 = Op.getOperand(0); 05033 SDValue V2 = Op.getOperand(1); 05034 DebugLoc dl = Op.getDebugLoc(); 05035 EVT VT = Op.getValueType(); 05036 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op.getNode()); 05037 05038 // Convert shuffles that are directly supported on NEON to target-specific 05039 // DAG nodes, instead of keeping them as shuffles and matching them again 05040 // during code selection. This is more efficient and avoids the possibility 05041 // of inconsistencies between legalization and selection. 05042 // FIXME: floating-point vectors should be canonicalized to integer vectors 05043 // of the same time so that they get CSEd properly. 05044 ArrayRef<int> ShuffleMask = SVN->getMask(); 05045 05046 unsigned EltSize = VT.getVectorElementType().getSizeInBits(); 05047 if (EltSize <= 32) { 05048 if (ShuffleVectorSDNode::isSplatMask(&ShuffleMask[0], VT)) { 05049 int Lane = SVN->getSplatIndex(); 05050 // If this is undef splat, generate it via "just" vdup, if possible. 05051 if (Lane == -1) Lane = 0; 05052 05053 // Test if V1 is a SCALAR_TO_VECTOR. 05054 if (Lane == 0 && V1.getOpcode() == ISD::SCALAR_TO_VECTOR) { 05055 return DAG.getNode(ARMISD::VDUP, dl, VT, V1.getOperand(0)); 05056 } 05057 // Test if V1 is a BUILD_VECTOR which is equivalent to a SCALAR_TO_VECTOR 05058 // (and probably will turn into a SCALAR_TO_VECTOR once legalization 05059 // reaches it). 05060 if (Lane == 0 && V1.getOpcode() == ISD::BUILD_VECTOR && 05061 !isa<ConstantSDNode>(V1.getOperand(0))) { 05062 bool IsScalarToVector = true; 05063 for (unsigned i = 1, e = V1.getNumOperands(); i != e; ++i) 05064 if (V1.getOperand(i).getOpcode() != ISD::UNDEF) { 05065 IsScalarToVector = false; 05066 break; 05067 } 05068 if (IsScalarToVector) 05069 return DAG.getNode(ARMISD::VDUP, dl, VT, V1.getOperand(0)); 05070 } 05071 return DAG.getNode(ARMISD::VDUPLANE, dl, VT, V1, 05072 DAG.getConstant(Lane, MVT::i32)); 05073 } 05074 05075 bool ReverseVEXT; 05076 unsigned Imm; 05077 if (isVEXTMask(ShuffleMask, VT, ReverseVEXT, Imm)) { 05078 if (ReverseVEXT) 05079 std::swap(V1, V2); 05080 return DAG.getNode(ARMISD::VEXT, dl, VT, V1, V2, 05081 DAG.getConstant(Imm, MVT::i32)); 05082 } 05083 05084 if (isVREVMask(ShuffleMask, VT, 64)) 05085 return DAG.getNode(ARMISD::VREV64, dl, VT, V1); 05086 if (isVREVMask(ShuffleMask, VT, 32)) 05087 return DAG.getNode(ARMISD::VREV32, dl, VT, V1); 05088 if (isVREVMask(ShuffleMask, VT, 16)) 05089 return DAG.getNode(ARMISD::VREV16, dl, VT, V1); 05090 05091 if (V2->getOpcode() == ISD::UNDEF && 05092 isSingletonVEXTMask(ShuffleMask, VT, Imm)) { 05093 return DAG.getNode(ARMISD::VEXT, dl, VT, V1, V1, 05094 DAG.getConstant(Imm, MVT::i32)); 05095 } 05096 05097 // Check for Neon shuffles that modify both input vectors in place. 05098 // If both results are used, i.e., if there are two shuffles with the same 05099 // source operands and with masks corresponding to both results of one of 05100 // these operations, DAG memoization will ensure that a single node is 05101 // used for both shuffles. 05102 unsigned WhichResult; 05103 if (isVTRNMask(ShuffleMask, VT, WhichResult)) 05104 return DAG.getNode(ARMISD::VTRN, dl, DAG.getVTList(VT, VT), 05105 V1, V2).getValue(WhichResult); 05106 if (isVUZPMask(ShuffleMask, VT, WhichResult)) 05107 return DAG.getNode(ARMISD::VUZP, dl, DAG.getVTList(VT, VT), 05108 V1, V2).getValue(WhichResult); 05109 if (isVZIPMask(ShuffleMask, VT, WhichResult)) 05110 return DAG.getNode(ARMISD::VZIP, dl, DAG.getVTList(VT, VT), 05111 V1, V2).getValue(WhichResult); 05112 05113 if (isVTRN_v_undef_Mask(ShuffleMask, VT, WhichResult)) 05114 return DAG.getNode(ARMISD::VTRN, dl, DAG.getVTList(VT, VT), 05115 V1, V1).getValue(WhichResult); 05116 if (isVUZP_v_undef_Mask(ShuffleMask, VT, WhichResult)) 05117 return DAG.getNode(ARMISD::VUZP, dl, DAG.getVTList(VT, VT), 05118 V1, V1).getValue(WhichResult); 05119 if (isVZIP_v_undef_Mask(ShuffleMask, VT, WhichResult)) 05120 return DAG.getNode(ARMISD::VZIP, dl, DAG.getVTList(VT, VT), 05121 V1, V1).getValue(WhichResult); 05122 } 05123 05124 // If the shuffle is not directly supported and it has 4 elements, use 05125 // the PerfectShuffle-generated table to synthesize it from other shuffles. 05126 unsigned NumElts = VT.getVectorNumElements(); 05127 if (NumElts == 4) { 05128 unsigned PFIndexes[4]; 05129 for (unsigned i = 0; i != 4; ++i) { 05130 if (ShuffleMask[i] < 0) 05131 PFIndexes[i] = 8; 05132 else 05133 PFIndexes[i] = ShuffleMask[i]; 05134 } 05135 05136 // Compute the index in the perfect shuffle table. 05137 unsigned PFTableIndex = 05138 PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3]; 05139 unsigned PFEntry = PerfectShuffleTable[PFTableIndex]; 05140 unsigned Cost = (PFEntry >> 30); 05141 05142 if (Cost <= 4) 05143 return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl); 05144 } 05145 05146 // Implement shuffles with 32- or 64-bit elements as ARMISD::BUILD_VECTORs. 05147 if (EltSize >= 32) { 05148 // Do the expansion with floating-point types, since that is what the VFP 05149 // registers are defined to use, and since i64 is not legal. 05150 EVT EltVT = EVT::getFloatingPointVT(EltSize); 05151 EVT VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumElts); 05152 V1 = DAG.getNode(ISD::BITCAST, dl, VecVT, V1); 05153 V2 = DAG.getNode(ISD::BITCAST, dl, VecVT, V2); 05154 SmallVector<SDValue, 8> Ops; 05155 for (unsigned i = 0; i < NumElts; ++i) { 05156 if (ShuffleMask[i] < 0) 05157 Ops.push_back(DAG.getUNDEF(EltVT)); 05158 else 05159 Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, 05160 ShuffleMask[i] < (int)NumElts ? V1 : V2, 05161 DAG.getConstant(ShuffleMask[i] & (NumElts-1), 05162 MVT::i32))); 05163 } 05164 SDValue Val = DAG.getNode(ARMISD::BUILD_VECTOR, dl, VecVT, &Ops[0],NumElts); 05165 return DAG.getNode(ISD::BITCAST, dl, VT, Val); 05166 } 05167 05168 if ((VT == MVT::v8i16 || VT == MVT::v16i8) && isReverseMask(ShuffleMask, VT)) 05169 return LowerReverse_VECTOR_SHUFFLEv16i8_v8i16(Op, DAG); 05170 05171 if (VT == MVT::v8i8) { 05172 SDValue NewOp = LowerVECTOR_SHUFFLEv8i8(Op, ShuffleMask, DAG); 05173 if (NewOp.getNode()) 05174 return NewOp; 05175 } 05176 05177 return SDValue(); 05178 } 05179 05180 static SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) { 05181 // INSERT_VECTOR_ELT is legal only for immediate indexes. 05182 SDValue Lane = Op.getOperand(2); 05183 if (!isa<ConstantSDNode>(Lane)) 05184 return SDValue(); 05185 05186 return Op; 05187 } 05188 05189 static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) { 05190 // EXTRACT_VECTOR_ELT is legal only for immediate indexes. 05191 SDValue Lane = Op.getOperand(1); 05192 if (!isa<ConstantSDNode>(Lane)) 05193 return SDValue(); 05194 05195 SDValue Vec = Op.getOperand(0); 05196 if (Op.getValueType() == MVT::i32 && 05197 Vec.getValueType().getVectorElementType().getSizeInBits() < 32) { 05198 DebugLoc dl = Op.getDebugLoc(); 05199 return DAG.getNode(ARMISD::VGETLANEu, dl, MVT::i32, Vec, Lane); 05200 } 05201 05202 return Op; 05203 } 05204 05205 static SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) { 05206 // The only time a CONCAT_VECTORS operation can have legal types is when 05207 // two 64-bit vectors are concatenated to a 128-bit vector. 05208 assert(Op.getValueType().is128BitVector() && Op.getNumOperands() == 2 && 05209 "unexpected CONCAT_VECTORS"); 05210 DebugLoc dl = Op.getDebugLoc(); 05211 SDValue Val = DAG.getUNDEF(MVT::v2f64); 05212 SDValue Op0 = Op.getOperand(0); 05213 SDValue Op1 = Op.getOperand(1); 05214 if (Op0.getOpcode() != ISD::UNDEF) 05215 Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Val, 05216 DAG.getNode(ISD::BITCAST, dl, MVT::f64, Op0), 05217 DAG.getIntPtrConstant(0)); 05218 if (Op1.getOpcode() != ISD::UNDEF) 05219 Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Val, 05220 DAG.getNode(ISD::BITCAST, dl, MVT::f64, Op1), 05221 DAG.getIntPtrConstant(1)); 05222 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Val); 05223 } 05224 05225 /// isExtendedBUILD_VECTOR - Check if N is a constant BUILD_VECTOR where each 05226 /// element has been zero/sign-extended, depending on the isSigned parameter, 05227 /// from an integer type half its size. 05228 static bool isExtendedBUILD_VECTOR(SDNode *N, SelectionDAG &DAG, 05229 bool isSigned) { 05230 // A v2i64 BUILD_VECTOR will have been legalized to a BITCAST from v4i32. 05231 EVT VT = N->getValueType(0); 05232 if (VT == MVT::v2i64 && N->getOpcode() == ISD::BITCAST) { 05233 SDNode *BVN = N->getOperand(0).getNode(); 05234 if (BVN->getValueType(0) != MVT::v4i32 || 05235 BVN->getOpcode() != ISD::BUILD_VECTOR) 05236 return false; 05237 unsigned LoElt = DAG.getTargetLoweringInfo().isBigEndian() ? 1 : 0; 05238 unsigned HiElt = 1 - LoElt; 05239 ConstantSDNode *Lo0 = dyn_cast<ConstantSDNode>(BVN->getOperand(LoElt)); 05240 ConstantSDNode *Hi0 = dyn_cast<ConstantSDNode>(BVN->getOperand(HiElt)); 05241 ConstantSDNode *Lo1 = dyn_cast<ConstantSDNode>(BVN->getOperand(LoElt+2)); 05242 ConstantSDNode *Hi1 = dyn_cast<ConstantSDNode>(BVN->getOperand(HiElt+2)); 05243 if (!Lo0 || !Hi0 || !Lo1 || !Hi1) 05244 return false; 05245 if (isSigned) { 05246 if (Hi0->getSExtValue() == Lo0->getSExtValue() >> 32 && 05247 Hi1->getSExtValue() == Lo1->getSExtValue() >> 32) 05248 return true; 05249 } else { 05250 if (Hi0->isNullValue() && Hi1->isNullValue()) 05251 return true; 05252 } 05253 return false; 05254 } 05255 05256 if (N->getOpcode() != ISD::BUILD_VECTOR) 05257 return false; 05258 05259 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { 05260 SDNode *Elt = N->getOperand(i).getNode(); 05261 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt)) { 05262 unsigned EltSize = VT.getVectorElementType().getSizeInBits(); 05263 unsigned HalfSize = EltSize / 2; 05264 if (isSigned) { 05265 if (!isIntN(HalfSize, C->getSExtValue())) 05266 return false; 05267 } else { 05268 if (!isUIntN(HalfSize, C->getZExtValue())) 05269 return false; 05270 } 05271 continue; 05272 } 05273 return false; 05274 } 05275 05276 return true; 05277 } 05278 05279 /// isSignExtended - Check if a node is a vector value that is sign-extended 05280 /// or a constant BUILD_VECTOR with sign-extended elements. 05281 static bool isSignExtended(SDNode *N, SelectionDAG &DAG) { 05282 if (N->getOpcode() == ISD::SIGN_EXTEND || ISD::isSEXTLoad(N)) 05283 return true; 05284 if (isExtendedBUILD_VECTOR(N, DAG, true)) 05285 return true; 05286 return false; 05287 } 05288 05289 /// isZeroExtended - Check if a node is a vector value that is zero-extended 05290 /// or a constant BUILD_VECTOR with zero-extended elements. 05291 static bool isZeroExtended(SDNode *N, SelectionDAG &DAG) { 05292 if (N->getOpcode() == ISD::ZERO_EXTEND || ISD::isZEXTLoad(N)) 05293 return true; 05294 if (isExtendedBUILD_VECTOR(N, DAG, false)) 05295 return true; 05296 return false; 05297 } 05298 05299 static EVT getExtensionTo64Bits(const EVT &OrigVT) { 05300 if (OrigVT.getSizeInBits() >= 64) 05301 return OrigVT; 05302 05303 assert(OrigVT.isSimple() && "Expecting a simple value type"); 05304 05305 MVT::SimpleValueType OrigSimpleTy = OrigVT.getSimpleVT().SimpleTy; 05306 switch (OrigSimpleTy) { 05307 default: llvm_unreachable("Unexpected Vector Type"); 05308 case MVT::v2i8: 05309 case MVT::v2i16: 05310 return MVT::v2i32; 05311 case MVT::v4i8: 05312 return MVT::v4i16; 05313 } 05314 } 05315 05316 /// AddRequiredExtensionForVMULL - Add a sign/zero extension to extend the total 05317 /// value size to 64 bits. We need a 64-bit D register as an operand to VMULL. 05318 /// We insert the required extension here to get the vector to fill a D register. 05319 static SDValue AddRequiredExtensionForVMULL(SDValue N, SelectionDAG &DAG, 05320 const EVT &OrigTy, 05321 const EVT &ExtTy, 05322 unsigned ExtOpcode) { 05323 // The vector originally had a size of OrigTy. It was then extended to ExtTy. 05324 // We expect the ExtTy to be 128-bits total. If the OrigTy is less than 05325 // 64-bits we need to insert a new extension so that it will be 64-bits. 05326 assert(ExtTy.is128BitVector() && "Unexpected extension size"); 05327 if (OrigTy.getSizeInBits() >= 64) 05328 return N; 05329 05330 // Must extend size to at least 64 bits to be used as an operand for VMULL. 05331 EVT NewVT = getExtensionTo64Bits(OrigTy); 05332 05333 return DAG.getNode(ExtOpcode, N->getDebugLoc(), NewVT, N); 05334 } 05335 05336 /// SkipLoadExtensionForVMULL - return a load of the original vector size that 05337 /// does not do any sign/zero extension. If the original vector is less 05338 /// than 64 bits, an appropriate extension will be added after the load to 05339 /// reach a total size of 64 bits. We have to add the extension separately 05340 /// because ARM does not have a sign/zero extending load for vectors. 05341 static SDValue SkipLoadExtensionForVMULL(LoadSDNode *LD, SelectionDAG& DAG) { 05342 EVT ExtendedTy = getExtensionTo64Bits(LD->getMemoryVT()); 05343 05344 // The load already has the right type. 05345 if (ExtendedTy == LD->getMemoryVT()) 05346 return DAG.getLoad(LD->getMemoryVT(), LD->getDebugLoc(), LD->getChain(), 05347 LD->getBasePtr(), LD->getPointerInfo(), LD->isVolatile(), 05348 LD->isNonTemporal(), LD->isInvariant(), 05349 LD->getAlignment()); 05350 05351 // We need to create a zextload/sextload. We cannot just create a load 05352 // followed by a zext/zext node because LowerMUL is also run during normal 05353 // operation legalization where we can't create illegal types. 05354 return DAG.getExtLoad(LD->getExtensionType(), LD->getDebugLoc(), ExtendedTy, 05355 LD->getChain(), LD->getBasePtr(), LD->getPointerInfo(), 05356 LD->getMemoryVT(), LD->isVolatile(), 05357 LD->isNonTemporal(), LD->getAlignment()); 05358 } 05359 05360 /// SkipExtensionForVMULL - For a node that is a SIGN_EXTEND, ZERO_EXTEND, 05361 /// extending load, or BUILD_VECTOR with extended elements, return the 05362 /// unextended value. The unextended vector should be 64 bits so that it can 05363 /// be used as an operand to a VMULL instruction. If the original vector size 05364 /// before extension is less than 64 bits we add a an extension to resize 05365 /// the vector to 64 bits. 05366 static SDValue SkipExtensionForVMULL(SDNode *N, SelectionDAG &DAG) { 05367 if (N->getOpcode() == ISD::SIGN_EXTEND || N->getOpcode() == ISD::ZERO_EXTEND) 05368 return AddRequiredExtensionForVMULL(N->getOperand(0), DAG, 05369 N->getOperand(0)->getValueType(0), 05370 N->getValueType(0), 05371 N->getOpcode()); 05372 05373 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) 05374 return SkipLoadExtensionForVMULL(LD, DAG); 05375 05376 // Otherwise, the value must be a BUILD_VECTOR. For v2i64, it will 05377 // have been legalized as a BITCAST from v4i32. 05378 if (N->getOpcode() == ISD::BITCAST) { 05379 SDNode *BVN = N->getOperand(0).getNode(); 05380 assert(BVN->getOpcode() == ISD::BUILD_VECTOR && 05381 BVN->getValueType(0) == MVT::v4i32 && "expected v4i32 BUILD_VECTOR"); 05382 unsigned LowElt = DAG.getTargetLoweringInfo().isBigEndian() ? 1 : 0; 05383 return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), MVT::v2i32, 05384 BVN->getOperand(LowElt), BVN->getOperand(LowElt+2)); 05385 } 05386 // Construct a new BUILD_VECTOR with elements truncated to half the size. 05387 assert(N->getOpcode() == ISD::BUILD_VECTOR && "expected BUILD_VECTOR"); 05388 EVT VT = N->getValueType(0); 05389 unsigned EltSize = VT.getVectorElementType().getSizeInBits() / 2; 05390 unsigned NumElts = VT.getVectorNumElements(); 05391 MVT TruncVT = MVT::getIntegerVT(EltSize); 05392 SmallVector<SDValue, 8> Ops; 05393 for (unsigned i = 0; i != NumElts; ++i) { 05394 ConstantSDNode *C = cast<ConstantSDNode>(N->getOperand(i)); 05395 const APInt &CInt = C->getAPIntValue(); 05396 // Element types smaller than 32 bits are not legal, so use i32 elements. 05397 // The values are implicitly truncated so sext vs. zext doesn't matter. 05398 Ops.push_back(DAG.getConstant(CInt.zextOrTrunc(32), MVT::i32)); 05399 } 05400 return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), 05401 MVT::getVectorVT(TruncVT, NumElts), Ops.data(), NumElts); 05402 } 05403 05404 static bool isAddSubSExt(SDNode *N, SelectionDAG &DAG) { 05405 unsigned Opcode = N->getOpcode(); 05406 if (Opcode == ISD::ADD || Opcode == ISD::SUB) { 05407 SDNode *N0 = N->getOperand(0).getNode(); 05408 SDNode *N1 = N->getOperand(1).getNode(); 05409 return N0->hasOneUse() && N1->hasOneUse() && 05410 isSignExtended(N0, DAG) && isSignExtended(N1, DAG); 05411 } 05412 return false; 05413 } 05414 05415 static bool isAddSubZExt(SDNode *N, SelectionDAG &DAG) { 05416 unsigned Opcode = N->getOpcode(); 05417 if (Opcode == ISD::ADD || Opcode == ISD::SUB) { 05418 SDNode *N0 = N->getOperand(0).getNode(); 05419 SDNode *N1 = N->getOperand(1).getNode(); 05420 return N0->hasOneUse() && N1->hasOneUse() && 05421 isZeroExtended(N0, DAG) && isZeroExtended(N1, DAG); 05422 } 05423 return false; 05424 } 05425 05426 static SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) { 05427 // Multiplications are only custom-lowered for 128-bit vectors so that 05428 // VMULL can be detected. Otherwise v2i64 multiplications are not legal. 05429 EVT VT = Op.getValueType(); 05430 assert(VT.is128BitVector() && VT.isInteger() && 05431 "unexpected type for custom-lowering ISD::MUL"); 05432 SDNode *N0 = Op.getOperand(0).getNode(); 05433 SDNode *N1 = Op.getOperand(1).getNode(); 05434 unsigned NewOpc = 0; 05435 bool isMLA = false; 05436 bool isN0SExt = isSignExtended(N0, DAG); 05437 bool isN1SExt = isSignExtended(N1, DAG); 05438 if (isN0SExt && isN1SExt) 05439 NewOpc = ARMISD::VMULLs; 05440 else { 05441 bool isN0ZExt = isZeroExtended(N0, DAG); 05442 bool isN1ZExt = isZeroExtended(N1, DAG); 05443 if (isN0ZExt && isN1ZExt) 05444 NewOpc = ARMISD::VMULLu; 05445 else if (isN1SExt || isN1ZExt) { 05446 // Look for (s/zext A + s/zext B) * (s/zext C). We want to turn these 05447 // into (s/zext A * s/zext C) + (s/zext B * s/zext C) 05448 if (isN1SExt && isAddSubSExt(N0, DAG)) { 05449 NewOpc = ARMISD::VMULLs; 05450 isMLA = true; 05451 } else if (isN1ZExt && isAddSubZExt(N0, DAG)) { 05452 NewOpc = ARMISD::VMULLu; 05453 isMLA = true; 05454 } else if (isN0ZExt && isAddSubZExt(N1, DAG)) { 05455 std::swap(N0, N1); 05456 NewOpc = ARMISD::VMULLu; 05457 isMLA = true; 05458 } 05459 } 05460 05461 if (!NewOpc) { 05462 if (VT == MVT::v2i64) 05463 // Fall through to expand this. It is not legal. 05464 return SDValue(); 05465 else 05466 // Other vector multiplications are legal. 05467 return Op; 05468 } 05469 } 05470 05471 // Legalize to a VMULL instruction. 05472 DebugLoc DL = Op.