LLVM API Documentation

PPCISelLowering.cpp
Go to the documentation of this file.
00001 //===-- PPCISelLowering.cpp - PPC DAG Lowering Implementation -------------===//
00002 //
00003 //                     The LLVM Compiler Infrastructure
00004 //
00005 // This file is distributed under the University of Illinois Open Source
00006 // License. See LICENSE.TXT for details.
00007 //
00008 //===----------------------------------------------------------------------===//
00009 //
00010 // This file implements the PPCISelLowering class.
00011 //
00012 //===----------------------------------------------------------------------===//
00013 
00014 #include "PPCISelLowering.h"
00015 #include "MCTargetDesc/PPCPredicates.h"
00016 #include "PPCCallingConv.h"
00017 #include "PPCMachineFunctionInfo.h"
00018 #include "PPCPerfectShuffle.h"
00019 #include "PPCTargetMachine.h"
00020 #include "PPCTargetObjectFile.h"
00021 #include "llvm/ADT/STLExtras.h"
00022 #include "llvm/ADT/StringSwitch.h"
00023 #include "llvm/ADT/Triple.h"
00024 #include "llvm/CodeGen/CallingConvLower.h"
00025 #include "llvm/CodeGen/MachineFrameInfo.h"
00026 #include "llvm/CodeGen/MachineFunction.h"
00027 #include "llvm/CodeGen/MachineInstrBuilder.h"
00028 #include "llvm/CodeGen/MachineLoopInfo.h"
00029 #include "llvm/CodeGen/MachineRegisterInfo.h"
00030 #include "llvm/CodeGen/SelectionDAG.h"
00031 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
00032 #include "llvm/IR/CallingConv.h"
00033 #include "llvm/IR/Constants.h"
00034 #include "llvm/IR/DerivedTypes.h"
00035 #include "llvm/IR/Function.h"
00036 #include "llvm/IR/Intrinsics.h"
00037 #include "llvm/Support/CommandLine.h"
00038 #include "llvm/Support/ErrorHandling.h"
00039 #include "llvm/Support/MathExtras.h"
00040 #include "llvm/Support/raw_ostream.h"
00041 #include "llvm/Target/TargetOptions.h"
00042 using namespace llvm;
00043 
00044 // FIXME: Remove this once soft-float is supported.
00045 static cl::opt<bool> DisablePPCFloatInVariadic("disable-ppc-float-in-variadic",
00046 cl::desc("disable saving float registers for va_start on PPC"), cl::Hidden);
00047 
00048 static cl::opt<bool> DisablePPCPreinc("disable-ppc-preinc",
00049 cl::desc("disable preincrement load/store generation on PPC"), cl::Hidden);
00050 
00051 static cl::opt<bool> DisableILPPref("disable-ppc-ilp-pref",
00052 cl::desc("disable setting the node scheduling preference to ILP on PPC"), cl::Hidden);
00053 
00054 static cl::opt<bool> DisablePPCUnaligned("disable-ppc-unaligned",
00055 cl::desc("disable unaligned load/store generation on PPC"), cl::Hidden);
00056 
00057 // FIXME: Remove this once the bug has been fixed!
00058 extern cl::opt<bool> ANDIGlueBug;
00059 
00060 PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
00061                                      const PPCSubtarget &STI)
00062     : TargetLowering(TM), Subtarget(STI) {
00063   // Use _setjmp/_longjmp instead of setjmp/longjmp.
00064   setUseUnderscoreSetJmp(true);
00065   setUseUnderscoreLongJmp(true);
00066 
00067   // On PPC32/64, arguments smaller than 4/8 bytes are extended, so all
00068   // arguments are at least 4/8 bytes aligned.
00069   bool isPPC64 = Subtarget.isPPC64();
00070   setMinStackArgumentAlignment(isPPC64 ? 8:4);
00071 
00072   // Set up the register classes.
00073   addRegisterClass(MVT::i32, &PPC::GPRCRegClass);
00074   addRegisterClass(MVT::f32, &PPC::F4RCRegClass);
00075   addRegisterClass(MVT::f64, &PPC::F8RCRegClass);
00076 
00077   // PowerPC has an i16 but no i8 (or i1) SEXTLOAD
00078   for (MVT VT : MVT::integer_valuetypes()) {
00079     setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
00080     setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i8, Expand);
00081   }
00082 
00083   setTruncStoreAction(MVT::f64, MVT::f32, Expand);
00084 
00085   // PowerPC has pre-inc load and store's.
00086   setIndexedLoadAction(ISD::PRE_INC, MVT::i1, Legal);
00087   setIndexedLoadAction(ISD::PRE_INC, MVT::i8, Legal);
00088   setIndexedLoadAction(ISD::PRE_INC, MVT::i16, Legal);
00089   setIndexedLoadAction(ISD::PRE_INC, MVT::i32, Legal);
00090   setIndexedLoadAction(ISD::PRE_INC, MVT::i64, Legal);
00091   setIndexedLoadAction(ISD::PRE_INC, MVT::f32, Legal);
00092   setIndexedLoadAction(ISD::PRE_INC, MVT::f64, Legal);
00093   setIndexedStoreAction(ISD::PRE_INC, MVT::i1, Legal);
00094   setIndexedStoreAction(ISD::PRE_INC, MVT::i8, Legal);
00095   setIndexedStoreAction(ISD::PRE_INC, MVT::i16, Legal);
00096   setIndexedStoreAction(ISD::PRE_INC, MVT::i32, Legal);
00097   setIndexedStoreAction(ISD::PRE_INC, MVT::i64, Legal);
00098   setIndexedStoreAction(ISD::PRE_INC, MVT::f32, Legal);
00099   setIndexedStoreAction(ISD::PRE_INC, MVT::f64, Legal);
00100 
00101   if (Subtarget.useCRBits()) {
00102     setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
00103 
00104     if (isPPC64 || Subtarget.hasFPCVT()) {
00105       setOperationAction(ISD::SINT_TO_FP, MVT::i1, Promote);
00106       AddPromotedToType (ISD::SINT_TO_FP, MVT::i1,
00107                          isPPC64 ? MVT::i64 : MVT::i32);
00108       setOperationAction(ISD::UINT_TO_FP, MVT::i1, Promote);
00109       AddPromotedToType (ISD::UINT_TO_FP, MVT::i1, 
00110                          isPPC64 ? MVT::i64 : MVT::i32);
00111     } else {
00112       setOperationAction(ISD::SINT_TO_FP, MVT::i1, Custom);
00113       setOperationAction(ISD::UINT_TO_FP, MVT::i1, Custom);
00114     }
00115 
00116     // PowerPC does not support direct load / store of condition registers
00117     setOperationAction(ISD::LOAD, MVT::i1, Custom);
00118     setOperationAction(ISD::STORE, MVT::i1, Custom);
00119 
00120     // FIXME: Remove this once the ANDI glue bug is fixed:
00121     if (ANDIGlueBug)
00122       setOperationAction(ISD::TRUNCATE, MVT::i1, Custom);
00123 
00124     for (MVT VT : MVT::integer_valuetypes()) {
00125       setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
00126       setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote);
00127       setTruncStoreAction(VT, MVT::i1, Expand);
00128     }
00129 
00130     addRegisterClass(MVT::i1, &PPC::CRBITRCRegClass);
00131   }
00132 
00133   // This is used in the ppcf128->int sequence.  Note it has different semantics
00134   // from FP_ROUND:  that rounds to nearest, this rounds to zero.
00135   setOperationAction(ISD::FP_ROUND_INREG, MVT::ppcf128, Custom);
00136 
00137   // We do not currently implement these libm ops for PowerPC.
00138   setOperationAction(ISD::FFLOOR, MVT::ppcf128, Expand);
00139   setOperationAction(ISD::FCEIL,  MVT::ppcf128, Expand);
00140   setOperationAction(ISD::FTRUNC, MVT::ppcf128, Expand);
00141   setOperationAction(ISD::FRINT,  MVT::ppcf128, Expand);
00142   setOperationAction(ISD::FNEARBYINT, MVT::ppcf128, Expand);
00143   setOperationAction(ISD::FREM, MVT::ppcf128, Expand);
00144 
00145   // PowerPC has no SREM/UREM instructions
00146   setOperationAction(ISD::SREM, MVT::i32, Expand);
00147   setOperationAction(ISD::UREM, MVT::i32, Expand);
00148   setOperationAction(ISD::SREM, MVT::i64, Expand);
00149   setOperationAction(ISD::UREM, MVT::i64, Expand);
00150 
00151   // Don't use SMUL_LOHI/UMUL_LOHI or SDIVREM/UDIVREM to lower SREM/UREM.
00152   setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);
00153   setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
00154   setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand);
00155   setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand);
00156   setOperationAction(ISD::UDIVREM, MVT::i32, Expand);
00157   setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
00158   setOperationAction(ISD::UDIVREM, MVT::i64, Expand);
00159   setOperationAction(ISD::SDIVREM, MVT::i64, Expand);
00160 
00161   // We don't support sin/cos/sqrt/fmod/pow
00162   setOperationAction(ISD::FSIN , MVT::f64, Expand);
00163   setOperationAction(ISD::FCOS , MVT::f64, Expand);
00164   setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
00165   setOperationAction(ISD::FREM , MVT::f64, Expand);
00166   setOperationAction(ISD::FPOW , MVT::f64, Expand);
00167   setOperationAction(ISD::FMA  , MVT::f64, Legal);
00168   setOperationAction(ISD::FSIN , MVT::f32, Expand);
00169   setOperationAction(ISD::FCOS , MVT::f32, Expand);
00170   setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
00171   setOperationAction(ISD::FREM , MVT::f32, Expand);
00172   setOperationAction(ISD::FPOW , MVT::f32, Expand);
00173   setOperationAction(ISD::FMA  , MVT::f32, Legal);
00174 
00175   setOperationAction(ISD::FLT_ROUNDS_, MVT::i32, Custom);
00176 
00177   // If we're enabling GP optimizations, use hardware square root
00178   if (!Subtarget.hasFSQRT() &&
00179       !(TM.Options.UnsafeFPMath && Subtarget.hasFRSQRTE() &&
00180         Subtarget.hasFRE()))
00181     setOperationAction(ISD::FSQRT, MVT::f64, Expand);
00182 
00183   if (!Subtarget.hasFSQRT() &&
00184       !(TM.Options.UnsafeFPMath && Subtarget.hasFRSQRTES() &&
00185         Subtarget.hasFRES()))
00186     setOperationAction(ISD::FSQRT, MVT::f32, Expand);
00187 
00188   if (Subtarget.hasFCPSGN()) {
00189     setOperationAction(ISD::FCOPYSIGN, MVT::f64, Legal);
00190     setOperationAction(ISD::FCOPYSIGN, MVT::f32, Legal);
00191   } else {
00192     setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
00193     setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
00194   }
00195 
00196   if (Subtarget.hasFPRND()) {
00197     setOperationAction(ISD::FFLOOR, MVT::f64, Legal);
00198     setOperationAction(ISD::FCEIL,  MVT::f64, Legal);
00199     setOperationAction(ISD::FTRUNC, MVT::f64, Legal);
00200     setOperationAction(ISD::FROUND, MVT::f64, Legal);
00201 
00202     setOperationAction(ISD::FFLOOR, MVT::f32, Legal);
00203     setOperationAction(ISD::FCEIL,  MVT::f32, Legal);
00204     setOperationAction(ISD::FTRUNC, MVT::f32, Legal);
00205     setOperationAction(ISD::FROUND, MVT::f32, Legal);
00206   }
00207 
00208   // PowerPC does not have BSWAP, CTPOP or CTTZ
00209   setOperationAction(ISD::BSWAP, MVT::i32  , Expand);
00210   setOperationAction(ISD::CTTZ , MVT::i32  , Expand);
00211   setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32, Expand);
00212   setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Expand);
00213   setOperationAction(ISD::BSWAP, MVT::i64  , Expand);
00214   setOperationAction(ISD::CTTZ , MVT::i64  , Expand);
00215   setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i64, Expand);
00216   setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Expand);
00217 
00218   if (Subtarget.hasPOPCNTD()) {
00219     setOperationAction(ISD::CTPOP, MVT::i32  , Legal);
00220     setOperationAction(ISD::CTPOP, MVT::i64  , Legal);
00221   } else {
00222     setOperationAction(ISD::CTPOP, MVT::i32  , Expand);
00223     setOperationAction(ISD::CTPOP, MVT::i64  , Expand);
00224   }
00225 
00226   // PowerPC does not have ROTR
00227   setOperationAction(ISD::ROTR, MVT::i32   , Expand);
00228   setOperationAction(ISD::ROTR, MVT::i64   , Expand);
00229 
00230   if (!Subtarget.useCRBits()) {
00231     // PowerPC does not have Select
00232     setOperationAction(ISD::SELECT, MVT::i32, Expand);
00233     setOperationAction(ISD::SELECT, MVT::i64, Expand);
00234     setOperationAction(ISD::SELECT, MVT::f32, Expand);
00235     setOperationAction(ISD::SELECT, MVT::f64, Expand);
00236   }
00237 
00238   // PowerPC wants to turn select_cc of FP into fsel when possible.
00239   setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
00240   setOperationAction(ISD::SELECT_CC, MVT::f64, Custom);
00241 
00242   // PowerPC wants to optimize integer setcc a bit
00243   if (!Subtarget.useCRBits())
00244     setOperationAction(ISD::SETCC, MVT::i32, Custom);
00245 
00246   // PowerPC does not have BRCOND which requires SetCC
00247   if (!Subtarget.useCRBits())
00248     setOperationAction(ISD::BRCOND, MVT::Other, Expand);
00249 
00250   setOperationAction(ISD::BR_JT,  MVT::Other, Expand);
00251 
00252   // PowerPC turns FP_TO_SINT into FCTIWZ and some load/stores.
00253   setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
00254 
00255   // PowerPC does not have [U|S]INT_TO_FP
00256   setOperationAction(ISD::SINT_TO_FP, MVT::i32, Expand);
00257   setOperationAction(ISD::UINT_TO_FP, MVT::i32, Expand);
00258 
00259   setOperationAction(ISD::BITCAST, MVT::f32, Expand);
00260   setOperationAction(ISD::BITCAST, MVT::i32, Expand);
00261   setOperationAction(ISD::BITCAST, MVT::i64, Expand);
00262   setOperationAction(ISD::BITCAST, MVT::f64, Expand);
00263 
00264   // We cannot sextinreg(i1).  Expand to shifts.
00265   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
00266 
00267   // NOTE: EH_SJLJ_SETJMP/_LONGJMP supported here is NOT intended to support
00268   // SjLj exception handling but a light-weight setjmp/longjmp replacement to
00269   // support continuation, user-level threading, and etc.. As a result, no
00270   // other SjLj exception interfaces are implemented and please don't build
00271   // your own exception handling based on them.
00272   // LLVM/Clang supports zero-cost DWARF exception handling.
00273   setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom);
00274   setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom);
00275 
00276   // We want to legalize GlobalAddress and ConstantPool nodes into the
00277   // appropriate instructions to materialize the address.
00278   setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
00279   setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom);
00280   setOperationAction(ISD::BlockAddress,  MVT::i32, Custom);
00281   setOperationAction(ISD::ConstantPool,  MVT::i32, Custom);
00282   setOperationAction(ISD::JumpTable,     MVT::i32, Custom);
00283   setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
00284   setOperationAction(ISD::GlobalTLSAddress, MVT::i64, Custom);
00285   setOperationAction(ISD::BlockAddress,  MVT::i64, Custom);
00286   setOperationAction(ISD::ConstantPool,  MVT::i64, Custom);
00287   setOperationAction(ISD::JumpTable,     MVT::i64, Custom);
00288 
00289   // TRAP is legal.
00290   setOperationAction(ISD::TRAP, MVT::Other, Legal);
00291 
00292   // TRAMPOLINE is custom lowered.
00293   setOperationAction(ISD::INIT_TRAMPOLINE, MVT::Other, Custom);
00294   setOperationAction(ISD::ADJUST_TRAMPOLINE, MVT::Other, Custom);
00295 
00296   // VASTART needs to be custom lowered to use the VarArgsFrameIndex
00297   setOperationAction(ISD::VASTART           , MVT::Other, Custom);
00298 
00299   if (Subtarget.isSVR4ABI()) {
00300     if (isPPC64) {
00301       // VAARG always uses double-word chunks, so promote anything smaller.
00302       setOperationAction(ISD::VAARG, MVT::i1, Promote);
00303       AddPromotedToType (ISD::VAARG, MVT::i1, MVT::i64);
00304       setOperationAction(ISD::VAARG, MVT::i8, Promote);
00305       AddPromotedToType (ISD::VAARG, MVT::i8, MVT::i64);
00306       setOperationAction(ISD::VAARG, MVT::i16, Promote);
00307       AddPromotedToType (ISD::VAARG, MVT::i16, MVT::i64);
00308       setOperationAction(ISD::VAARG, MVT::i32, Promote);
00309       AddPromotedToType (ISD::VAARG, MVT::i32, MVT::i64);
00310       setOperationAction(ISD::VAARG, MVT::Other, Expand);
00311     } else {
00312       // VAARG is custom lowered with the 32-bit SVR4 ABI.
00313       setOperationAction(ISD::VAARG, MVT::Other, Custom);
00314       setOperationAction(ISD::VAARG, MVT::i64, Custom);
00315     }
00316   } else
00317     setOperationAction(ISD::VAARG, MVT::Other, Expand);
00318 
00319   if (Subtarget.isSVR4ABI() && !isPPC64)
00320     // VACOPY is custom lowered with the 32-bit SVR4 ABI.
00321     setOperationAction(ISD::VACOPY            , MVT::Other, Custom);
00322   else
00323     setOperationAction(ISD::VACOPY            , MVT::Other, Expand);
00324 
00325   // Use the default implementation.
00326   setOperationAction(ISD::VAEND             , MVT::Other, Expand);
00327   setOperationAction(ISD::STACKSAVE         , MVT::Other, Expand);
00328   setOperationAction(ISD::STACKRESTORE      , MVT::Other, Custom);
00329   setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32  , Custom);
00330   setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64  , Custom);
00331 
00332   // We want to custom lower some of our intrinsics.
00333   setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
00334 
00335   // To handle counter-based loop conditions.
00336   setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i1, Custom);
00337 
00338   // Comparisons that require checking two conditions.
00339   setCondCodeAction(ISD::SETULT, MVT::f32, Expand);
00340   setCondCodeAction(ISD::SETULT, MVT::f64, Expand);
00341   setCondCodeAction(ISD::SETUGT, MVT::f32, Expand);
00342   setCondCodeAction(ISD::SETUGT, MVT::f64, Expand);
00343   setCondCodeAction(ISD::SETUEQ, MVT::f32, Expand);
00344   setCondCodeAction(ISD::SETUEQ, MVT::f64, Expand);
00345   setCondCodeAction(ISD::SETOGE, MVT::f32, Expand);
00346   setCondCodeAction(ISD::SETOGE, MVT::f64, Expand);
00347   setCondCodeAction(ISD::SETOLE, MVT::f32, Expand);
00348   setCondCodeAction(ISD::SETOLE, MVT::f64, Expand);
00349   setCondCodeAction(ISD::SETONE, MVT::f32, Expand);
00350   setCondCodeAction(ISD::SETONE, MVT::f64, Expand);
00351 
00352   if (Subtarget.has64BitSupport()) {
00353     // They also have instructions for converting between i64 and fp.
00354     setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
00355     setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand);
00356     setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
00357     setOperationAction(ISD::UINT_TO_FP, MVT::i64, Expand);
00358     // This is just the low 32 bits of a (signed) fp->i64 conversion.
00359     // We cannot do this with Promote because i64 is not a legal type.
00360     setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
00361 
00362     if (Subtarget.hasLFIWAX() || Subtarget.isPPC64())
00363       setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
00364   } else {
00365     // PowerPC does not have FP_TO_UINT on 32-bit implementations.
00366     setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand);
00367   }
00368 
00369   // With the instructions enabled under FPCVT, we can do everything.
00370   if (Subtarget.hasFPCVT()) {
00371     if (Subtarget.has64BitSupport()) {
00372       setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
00373       setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
00374       setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
00375       setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
00376     }
00377 
00378     setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
00379     setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
00380     setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
00381     setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
00382   }
00383 
00384   if (Subtarget.use64BitRegs()) {
00385     // 64-bit PowerPC implementations can support i64 types directly
00386     addRegisterClass(MVT::i64, &PPC::G8RCRegClass);
00387     // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
00388     setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
00389     // 64-bit PowerPC wants to expand i128 shifts itself.
00390     setOperationAction(ISD::SHL_PARTS, MVT::i64, Custom);
00391     setOperationAction(ISD::SRA_PARTS, MVT::i64, Custom);
00392     setOperationAction(ISD::SRL_PARTS, MVT::i64, Custom);
00393   } else {
00394     // 32-bit PowerPC wants to expand i64 shifts itself.
00395     setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom);
00396     setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom);
00397     setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom);
00398   }
00399 
00400   if (Subtarget.hasAltivec()) {
00401     // First set operation action for all vector types to expand. Then we
00402     // will selectively turn on ones that can be effectively codegen'd.
00403     for (MVT VT : MVT::vector_valuetypes()) {
00404       // add/sub are legal for all supported vector VT's.
00405       setOperationAction(ISD::ADD , VT, Legal);
00406       setOperationAction(ISD::SUB , VT, Legal);
00407 
00408       // Vector instructions introduced in P8
00409       if (Subtarget.hasP8Altivec()) {
00410         setOperationAction(ISD::CTPOP, VT, Legal);
00411         setOperationAction(ISD::CTLZ, VT, Legal);
00412       }
00413       else {
00414         setOperationAction(ISD::CTPOP, VT, Expand);
00415         setOperationAction(ISD::CTLZ, VT, Expand);
00416       }
00417 
00418       // We promote all shuffles to v16i8.
00419       setOperationAction(ISD::VECTOR_SHUFFLE, VT, Promote);
00420       AddPromotedToType (ISD::VECTOR_SHUFFLE, VT, MVT::v16i8);
00421 
00422       // We promote all non-typed operations to v4i32.
00423       setOperationAction(ISD::AND   , VT, Promote);
00424       AddPromotedToType (ISD::AND   , VT, MVT::v4i32);
00425       setOperationAction(ISD::OR    , VT, Promote);
00426       AddPromotedToType (ISD::OR    , VT, MVT::v4i32);
00427       setOperationAction(ISD::XOR   , VT, Promote);
00428       AddPromotedToType (ISD::XOR   , VT, MVT::v4i32);
00429       setOperationAction(ISD::LOAD  , VT, Promote);
00430       AddPromotedToType (ISD::LOAD  , VT, MVT::v4i32);
00431       setOperationAction(ISD::SELECT, VT, Promote);
00432       AddPromotedToType (ISD::SELECT, VT, MVT::v4i32);
00433       setOperationAction(ISD::STORE, VT, Promote);
00434       AddPromotedToType (ISD::STORE, VT, MVT::v4i32);
00435 
00436       // No other operations are legal.
00437       setOperationAction(ISD::MUL , VT, Expand);
00438       setOperationAction(ISD::SDIV, VT, Expand);
00439       setOperationAction(ISD::SREM, VT, Expand);
00440       setOperationAction(ISD::UDIV, VT, Expand);
00441       setOperationAction(ISD::UREM, VT, Expand);
00442       setOperationAction(ISD::FDIV, VT, Expand);
00443       setOperationAction(ISD::FREM, VT, Expand);
00444       setOperationAction(ISD::FNEG, VT, Expand);
00445       setOperationAction(ISD::FSQRT, VT, Expand);
00446       setOperationAction(ISD::FLOG, VT, Expand);
00447       setOperationAction(ISD::FLOG10, VT, Expand);
00448       setOperationAction(ISD::FLOG2, VT, Expand);
00449       setOperationAction(ISD::FEXP, VT, Expand);
00450       setOperationAction(ISD::FEXP2, VT, Expand);
00451       setOperationAction(ISD::FSIN, VT, Expand);
00452       setOperationAction(ISD::FCOS, VT, Expand);
00453       setOperationAction(ISD::FABS, VT, Expand);
00454       setOperationAction(ISD::FPOWI, VT, Expand);
00455       setOperationAction(ISD::FFLOOR, VT, Expand);
00456       setOperationAction(ISD::FCEIL,  VT, Expand);
00457       setOperationAction(ISD::FTRUNC, VT, Expand);
00458       setOperationAction(ISD::FRINT,  VT, Expand);
00459       setOperationAction(ISD::FNEARBYINT, VT, Expand);
00460       setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Expand);
00461       setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Expand);
00462       setOperationAction(ISD::BUILD_VECTOR, VT, Expand);
00463       setOperationAction(ISD::MULHU, VT, Expand);
00464       setOperationAction(ISD::MULHS, VT, Expand);
00465       setOperationAction(ISD::UMUL_LOHI, VT, Expand);
00466       setOperationAction(ISD::SMUL_LOHI, VT, Expand);
00467       setOperationAction(ISD::UDIVREM, VT, Expand);
00468       setOperationAction(ISD::SDIVREM, VT, Expand);
00469       setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Expand);
00470       setOperationAction(ISD::FPOW, VT, Expand);
00471       setOperationAction(ISD::BSWAP, VT, Expand);
00472       setOperationAction(ISD::CTLZ_ZERO_UNDEF, VT, Expand);
00473       setOperationAction(ISD::CTTZ, VT, Expand);
00474       setOperationAction(ISD::CTTZ_ZERO_UNDEF, VT, Expand);
00475       setOperationAction(ISD::VSELECT, VT, Expand);
00476       setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand);
00477 
00478       for (MVT InnerVT : MVT::vector_valuetypes()) {
00479         setTruncStoreAction(VT, InnerVT, Expand);
00480         setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand);
00481         setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand);
00482         setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
00483       }
00484     }
00485 
00486     // We can custom expand all VECTOR_SHUFFLEs to VPERM, others we can handle
00487     // with merges, splats, etc.
00488     setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v16i8, Custom);
00489 
00490     setOperationAction(ISD::AND   , MVT::v4i32, Legal);
00491     setOperationAction(ISD::OR    , MVT::v4i32, Legal);
00492     setOperationAction(ISD::XOR   , MVT::v4i32, Legal);
00493     setOperationAction(ISD::LOAD  , MVT::v4i32, Legal);
00494     setOperationAction(ISD::SELECT, MVT::v4i32,
00495                        Subtarget.useCRBits() ? Legal : Expand);
00496     setOperationAction(ISD::STORE , MVT::v4i32, Legal);
00497     setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal);
00498     setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Legal);
00499     setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal);
00500     setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Legal);
00501     setOperationAction(ISD::FFLOOR, MVT::v4f32, Legal);
00502     setOperationAction(ISD::FCEIL, MVT::v4f32, Legal);
00503     setOperationAction(ISD::FTRUNC, MVT::v4f32, Legal);
00504     setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Legal);
00505 
00506     addRegisterClass(MVT::v4f32, &PPC::VRRCRegClass);
00507     addRegisterClass(MVT::v4i32, &PPC::VRRCRegClass);
00508     addRegisterClass(MVT::v8i16, &PPC::VRRCRegClass);
00509     addRegisterClass(MVT::v16i8, &PPC::VRRCRegClass);
00510 
00511     setOperationAction(ISD::MUL, MVT::v4f32, Legal);
00512     setOperationAction(ISD::FMA, MVT::v4f32, Legal);
00513 
00514     if (TM.Options.UnsafeFPMath || Subtarget.hasVSX()) {
00515       setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
00516       setOperationAction(ISD::FSQRT, MVT::v4f32, Legal);
00517     }
00518 
00519     setOperationAction(ISD::MUL, MVT::v4i32, Custom);
00520     setOperationAction(ISD::MUL, MVT::v8i16, Custom);
00521     setOperationAction(ISD::MUL, MVT::v16i8, Custom);
00522 
00523     setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom);
00524     setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i32, Custom);
00525 
00526     setOperationAction(ISD::BUILD_VECTOR, MVT::v16i8, Custom);
00527     setOperationAction(ISD::BUILD_VECTOR, MVT::v8i16, Custom);
00528     setOperationAction(ISD::BUILD_VECTOR, MVT::v4i32, Custom);
00529     setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom);
00530 
00531     // Altivec does not contain unordered floating-point compare instructions
00532     setCondCodeAction(ISD::SETUO, MVT::v4f32, Expand);
00533     setCondCodeAction(ISD::SETUEQ, MVT::v4f32, Expand);
00534     setCondCodeAction(ISD::SETO,   MVT::v4f32, Expand);
00535     setCondCodeAction(ISD::SETONE, MVT::v4f32, Expand);
00536 
00537     if (Subtarget.hasVSX()) {
00538       setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2f64, Legal);
00539       setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Legal);
00540 
00541       setOperationAction(ISD::FFLOOR, MVT::v2f64, Legal);
00542       setOperationAction(ISD::FCEIL, MVT::v2f64, Legal);
00543       setOperationAction(ISD::FTRUNC, MVT::v2f64, Legal);
00544       setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Legal);
00545       setOperationAction(ISD::FROUND, MVT::v2f64, Legal);
00546 
00547       setOperationAction(ISD::FROUND, MVT::v4f32, Legal);
00548 
00549       setOperationAction(ISD::MUL, MVT::v2f64, Legal);
00550       setOperationAction(ISD::FMA, MVT::v2f64, Legal);
00551 
00552       setOperationAction(ISD::FDIV, MVT::v2f64, Legal);
00553       setOperationAction(ISD::FSQRT, MVT::v2f64, Legal);
00554 
00555       setOperationAction(ISD::VSELECT, MVT::v16i8, Legal);
00556       setOperationAction(ISD::VSELECT, MVT::v8i16, Legal);
00557       setOperationAction(ISD::VSELECT, MVT::v4i32, Legal);
00558       setOperationAction(ISD::VSELECT, MVT::v4f32, Legal);
00559       setOperationAction(ISD::VSELECT, MVT::v2f64, Legal);
00560 
00561       // Share the Altivec comparison restrictions.
00562       setCondCodeAction(ISD::SETUO, MVT::v2f64, Expand);
00563       setCondCodeAction(ISD::SETUEQ, MVT::v2f64, Expand);
00564       setCondCodeAction(ISD::SETO,   MVT::v2f64, Expand);
00565       setCondCodeAction(ISD::SETONE, MVT::v2f64, Expand);
00566 
00567       setOperationAction(ISD::LOAD, MVT::v2f64, Legal);
00568       setOperationAction(ISD::STORE, MVT::v2f64, Legal);
00569 
00570       setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2f64, Legal);
00571 
00572       addRegisterClass(MVT::f64, &PPC::VSFRCRegClass);
00573 
00574       addRegisterClass(MVT::v4f32, &PPC::VSRCRegClass);
00575       addRegisterClass(MVT::v2f64, &PPC::VSRCRegClass);
00576 
00577       // VSX v2i64 only supports non-arithmetic operations.
00578       setOperationAction(ISD::ADD, MVT::v2i64, Expand);
00579       setOperationAction(ISD::SUB, MVT::v2i64, Expand);
00580 
00581       setOperationAction(ISD::SHL, MVT::v2i64, Expand);
00582       setOperationAction(ISD::SRA, MVT::v2i64, Expand);
00583       setOperationAction(ISD::SRL, MVT::v2i64, Expand);
00584 
00585       setOperationAction(ISD::SETCC, MVT::v2i64, Custom);
00586 
00587       setOperationAction(ISD::LOAD, MVT::v2i64, Promote);
00588       AddPromotedToType (ISD::LOAD, MVT::v2i64, MVT::v2f64);
00589       setOperationAction(ISD::STORE, MVT::v2i64, Promote);
00590       AddPromotedToType (ISD::STORE, MVT::v2i64, MVT::v2f64);
00591 
00592       setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i64, Legal);
00593 
00594       setOperationAction(ISD::SINT_TO_FP, MVT::v2i64, Legal);
00595       setOperationAction(ISD::UINT_TO_FP, MVT::v2i64, Legal);
00596       setOperationAction(ISD::FP_TO_SINT, MVT::v2i64, Legal);
00597       setOperationAction(ISD::FP_TO_UINT, MVT::v2i64, Legal);
00598 
00599       // Vector operation legalization checks the result type of
00600       // SIGN_EXTEND_INREG, overall legalization checks the inner type.
00601       setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i64, Legal);
00602       setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i32, Legal);
00603       setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i16, Custom);
00604       setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i8, Custom);
00605 
00606       addRegisterClass(MVT::v2i64, &PPC::VSRCRegClass);
00607     }
00608 
00609     if (Subtarget.hasP8Altivec()) 
00610       addRegisterClass(MVT::v2i64, &PPC::VRRCRegClass);
00611   }
00612 
00613   if (Subtarget.hasQPX()) {
00614     setOperationAction(ISD::FADD, MVT::v4f64, Legal);
00615     setOperationAction(ISD::FSUB, MVT::v4f64, Legal);
00616     setOperationAction(ISD::FMUL, MVT::v4f64, Legal);
00617     setOperationAction(ISD::FREM, MVT::v4f64, Expand);
00618 
00619     setOperationAction(ISD::FCOPYSIGN, MVT::v4f64, Legal);
00620     setOperationAction(ISD::FGETSIGN, MVT::v4f64, Expand);
00621 
00622     setOperationAction(ISD::LOAD  , MVT::v4f64, Custom);
00623     setOperationAction(ISD::STORE , MVT::v4f64, Custom);
00624 
00625     setTruncStoreAction(MVT::v4f64, MVT::v4f32, Custom);
00626     setLoadExtAction(ISD::EXTLOAD, MVT::v4f64, MVT::v4f32, Custom);
00627 
00628     if (!Subtarget.useCRBits())
00629       setOperationAction(ISD::SELECT, MVT::v4f64, Expand);
00630     setOperationAction(ISD::VSELECT, MVT::v4f64, Legal);
00631 
00632     setOperationAction(ISD::EXTRACT_VECTOR_ELT , MVT::v4f64, Legal);
00633     setOperationAction(ISD::INSERT_VECTOR_ELT , MVT::v4f64, Expand);
00634     setOperationAction(ISD::CONCAT_VECTORS , MVT::v4f64, Expand);
00635     setOperationAction(ISD::EXTRACT_SUBVECTOR , MVT::v4f64, Expand);
00636     setOperationAction(ISD::VECTOR_SHUFFLE , MVT::v4f64, Custom);
00637     setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f64, Legal);
00638     setOperationAction(ISD::BUILD_VECTOR, MVT::v4f64, Custom);
00639 
00640     setOperationAction(ISD::FP_TO_SINT , MVT::v4f64, Legal);
00641     setOperationAction(ISD::FP_TO_UINT , MVT::v4f64, Expand);
00642 
00643     setOperationAction(ISD::FP_ROUND , MVT::v4f32, Legal);
00644     setOperationAction(ISD::FP_ROUND_INREG , MVT::v4f32, Expand);
00645     setOperationAction(ISD::FP_EXTEND, MVT::v4f64, Legal);
00646 
00647     setOperationAction(ISD::FNEG , MVT::v4f64, Legal);
00648     setOperationAction(ISD::FABS , MVT::v4f64, Legal);
00649     setOperationAction(ISD::FSIN , MVT::v4f64, Expand);
00650     setOperationAction(ISD::FCOS , MVT::v4f64, Expand);
00651     setOperationAction(ISD::FPOWI , MVT::v4f64, Expand);
00652     setOperationAction(ISD::FPOW , MVT::v4f64, Expand);
00653     setOperationAction(ISD::FLOG , MVT::v4f64, Expand);
00654     setOperationAction(ISD::FLOG2 , MVT::v4f64, Expand);
00655     setOperationAction(ISD::FLOG10 , MVT::v4f64, Expand);
00656     setOperationAction(ISD::FEXP , MVT::v4f64, Expand);
00657     setOperationAction(ISD::FEXP2 , MVT::v4f64, Expand);
00658 
00659     setOperationAction(ISD::FMINNUM, MVT::v4f64, Legal);
00660     setOperationAction(ISD::FMAXNUM, MVT::v4f64, Legal);
00661 
00662     setIndexedLoadAction(ISD::PRE_INC, MVT::v4f64, Legal);
00663     setIndexedStoreAction(ISD::PRE_INC, MVT::v4f64, Legal);
00664 
00665     addRegisterClass(MVT::v4f64, &PPC::QFRCRegClass);
00666 
00667     setOperationAction(ISD::FADD, MVT::v4f32, Legal);
00668     setOperationAction(ISD::FSUB, MVT::v4f32, Legal);
00669     setOperationAction(ISD::FMUL, MVT::v4f32, Legal);
00670     setOperationAction(ISD::FREM, MVT::v4f32, Expand);
00671 
00672     setOperationAction(ISD::FCOPYSIGN, MVT::v4f32, Legal);
00673     setOperationAction(ISD::FGETSIGN, MVT::v4f32, Expand);
00674 
00675     setOperationAction(ISD::LOAD  , MVT::v4f32, Custom);
00676     setOperationAction(ISD::STORE , MVT::v4f32, Custom);
00677 
00678     if (!Subtarget.useCRBits())
00679       setOperationAction(ISD::SELECT, MVT::v4f32, Expand);
00680     setOperationAction(ISD::VSELECT, MVT::v4f32, Legal);
00681 
00682     setOperationAction(ISD::EXTRACT_VECTOR_ELT , MVT::v4f32, Legal);
00683     setOperationAction(ISD::INSERT_VECTOR_ELT , MVT::v4f32, Expand);
00684     setOperationAction(ISD::CONCAT_VECTORS , MVT::v4f32, Expand);
00685     setOperationAction(ISD::EXTRACT_SUBVECTOR , MVT::v4f32, Expand);
00686     setOperationAction(ISD::VECTOR_SHUFFLE , MVT::v4f32, Custom);
00687     setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Legal);
00688     setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom);
00689 
00690     setOperationAction(ISD::FP_TO_SINT , MVT::v4f32, Legal);
00691     setOperationAction(ISD::FP_TO_UINT , MVT::v4f32, Expand);
00692 
00693     setOperationAction(ISD::FNEG , MVT::v4f32, Legal);
00694     setOperationAction(ISD::FABS , MVT::v4f32, Legal);
00695     setOperationAction(ISD::FSIN , MVT::v4f32, Expand);
00696     setOperationAction(ISD::FCOS , MVT::v4f32, Expand);
00697     setOperationAction(ISD::FPOWI , MVT::v4f32, Expand);
00698     setOperationAction(ISD::FPOW , MVT::v4f32, Expand);
00699     setOperationAction(ISD::FLOG , MVT::v4f32, Expand);
00700     setOperationAction(ISD::FLOG2 , MVT::v4f32, Expand);
00701     setOperationAction(ISD::FLOG10 , MVT::v4f32, Expand);
00702     setOperationAction(ISD::FEXP , MVT::v4f32, Expand);
00703     setOperationAction(ISD::FEXP2 , MVT::v4f32, Expand);
00704 
00705     setOperationAction(ISD::FMINNUM, MVT::v4f32, Legal);
00706     setOperationAction(ISD::FMAXNUM, MVT::v4f32, Legal);
00707 
00708     setIndexedLoadAction(ISD::PRE_INC, MVT::v4f32, Legal);
00709     setIndexedStoreAction(ISD::PRE_INC, MVT::v4f32, Legal);
00710 
00711     addRegisterClass(MVT::v4f32, &PPC::QSRCRegClass);
00712 
00713     setOperationAction(ISD::AND , MVT::v4i1, Legal);
00714     setOperationAction(ISD::OR , MVT::v4i1, Legal);
00715     setOperationAction(ISD::XOR , MVT::v4i1, Legal);
00716 
00717     if (!Subtarget.useCRBits())
00718       setOperationAction(ISD::SELECT, MVT::v4i1, Expand);
00719     setOperationAction(ISD::VSELECT, MVT::v4i1, Legal);
00720 
00721     setOperationAction(ISD::LOAD  , MVT::v4i1, Custom);
00722     setOperationAction(ISD::STORE , MVT::v4i1, Custom);
00723 
00724     setOperationAction(ISD::EXTRACT_VECTOR_ELT , MVT::v4i1, Custom);
00725     setOperationAction(ISD::INSERT_VECTOR_ELT , MVT::v4i1, Expand);
00726     setOperationAction(ISD::CONCAT_VECTORS , MVT::v4i1, Expand);
00727     setOperationAction(ISD::EXTRACT_SUBVECTOR , MVT::v4i1, Expand);
00728     setOperationAction(ISD::VECTOR_SHUFFLE , MVT::v4i1, Custom);
00729     setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i1, Expand);
00730     setOperationAction(ISD::BUILD_VECTOR, MVT::v4i1, Custom);
00731 
00732     setOperationAction(ISD::SINT_TO_FP, MVT::v4i1, Custom);
00733     setOperationAction(ISD::UINT_TO_FP, MVT::v4i1, Custom);
00734 
00735     addRegisterClass(MVT::v4i1, &PPC::QBRCRegClass);
00736 
00737     setOperationAction(ISD::FFLOOR, MVT::v4f64, Legal);
00738     setOperationAction(ISD::FCEIL,  MVT::v4f64, Legal);
00739     setOperationAction(ISD::FTRUNC, MVT::v4f64, Legal);
00740     setOperationAction(ISD::FROUND, MVT::v4f64, Legal);
00741 
00742     setOperationAction(ISD::FFLOOR, MVT::v4f32, Legal);
00743     setOperationAction(ISD::FCEIL,  MVT::v4f32, Legal);
00744     setOperationAction(ISD::FTRUNC, MVT::v4f32, Legal);
00745     setOperationAction(ISD::FROUND, MVT::v4f32, Legal);
00746 
00747     setOperationAction(ISD::FNEARBYINT, MVT::v4f64, Expand);
00748     setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Expand);
00749 
00750     // These need to set FE_INEXACT, and so cannot be vectorized here.
00751     setOperationAction(ISD::FRINT, MVT::v4f64, Expand);
00752     setOperationAction(ISD::FRINT, MVT::v4f32, Expand);
00753 
00754     if (TM.Options.UnsafeFPMath) {
00755       setOperationAction(ISD::FDIV, MVT::v4f64, Legal);
00756       setOperationAction(ISD::FSQRT, MVT::v4f64, Legal);
00757 
00758       setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
00759       setOperationAction(ISD::FSQRT, MVT::v4f32, Legal);
00760     } else {
00761       setOperationAction(ISD::FDIV, MVT::v4f64, Expand);
00762       setOperationAction(ISD::FSQRT, MVT::v4f64, Expand);
00763 
00764       setOperationAction(ISD::FDIV, MVT::v4f32, Expand);
00765       setOperationAction(ISD::FSQRT, MVT::v4f32, Expand);
00766     }
00767   }
00768 
00769   if (Subtarget.has64BitSupport())
00770     setOperationAction(ISD::PREFETCH, MVT::Other, Legal);
00771 
00772   setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, isPPC64 ? Legal : Custom);
00773 
00774   if (!isPPC64) {
00775     setOperationAction(ISD::ATOMIC_LOAD,  MVT::i64, Expand);
00776     setOperationAction(ISD::ATOMIC_STORE, MVT::i64, Expand);
00777   }
00778 
00779   setBooleanContents(ZeroOrOneBooleanContent);
00780 
00781   if (Subtarget.hasAltivec()) {
00782     // Altivec instructions set fields to all zeros or all ones.
00783     setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
00784   }
00785 
00786   if (!isPPC64) {
00787     // These libcalls are not available in 32-bit.
00788     setLibcallName(RTLIB::SHL_I128, nullptr);
00789     setLibcallName(RTLIB::SRL_I128, nullptr);
00790     setLibcallName(RTLIB::SRA_I128, nullptr);
00791   }
00792 
00793   if (isPPC64) {
00794     setStackPointerRegisterToSaveRestore(PPC::X1);
00795     setExceptionPointerRegister(PPC::X3);
00796     setExceptionSelectorRegister(PPC::X4);
00797   } else {
00798     setStackPointerRegisterToSaveRestore(PPC::R1);
00799     setExceptionPointerRegister(PPC::R3);
00800     setExceptionSelectorRegister(PPC::R4);
00801   }
00802 
00803   // We have target-specific dag combine patterns for the following nodes:
00804   setTargetDAGCombine(ISD::SINT_TO_FP);
00805   if (Subtarget.hasFPCVT())
00806     setTargetDAGCombine(ISD::UINT_TO_FP);
00807   setTargetDAGCombine(ISD::LOAD);
00808   setTargetDAGCombine(ISD::STORE);
00809   setTargetDAGCombine(ISD::BR_CC);
00810   if (Subtarget.useCRBits())
00811     setTargetDAGCombine(ISD::BRCOND);
00812   setTargetDAGCombine(ISD::BSWAP);
00813   setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN);
00814   setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN);
00815   setTargetDAGCombine(ISD::INTRINSIC_VOID);
00816 
00817   setTargetDAGCombine(ISD::SIGN_EXTEND);
00818   setTargetDAGCombine(ISD::ZERO_EXTEND);
00819   setTargetDAGCombine(ISD::ANY_EXTEND);
00820 
00821   if (Subtarget.useCRBits()) {
00822     setTargetDAGCombine(ISD::TRUNCATE);
00823     setTargetDAGCombine(ISD::SETCC);
00824     setTargetDAGCombine(ISD::SELECT_CC);
00825   }
00826 
00827   // Use reciprocal estimates.
00828   if (TM.Options.UnsafeFPMath) {
00829     setTargetDAGCombine(ISD::FDIV);
00830     setTargetDAGCombine(ISD::FSQRT);
00831   }
00832 
00833   // Darwin long double math library functions have $LDBL128 appended.
00834   if (Subtarget.isDarwin()) {
00835     setLibcallName(RTLIB::COS_PPCF128, "cosl$LDBL128");
00836     setLibcallName(RTLIB::POW_PPCF128, "powl$LDBL128");
00837     setLibcallName(RTLIB::REM_PPCF128, "fmodl$LDBL128");
00838     setLibcallName(RTLIB::SIN_PPCF128, "sinl$LDBL128");
00839     setLibcallName(RTLIB::SQRT_PPCF128, "sqrtl$LDBL128");
00840     setLibcallName(RTLIB::LOG_PPCF128, "logl$LDBL128");
00841     setLibcallName(RTLIB::LOG2_PPCF128, "log2l$LDBL128");
00842     setLibcallName(RTLIB::LOG10_PPCF128, "log10l$LDBL128");
00843     setLibcallName(RTLIB::EXP_PPCF128, "expl$LDBL128");
00844     setLibcallName(RTLIB::EXP2_PPCF128, "exp2l$LDBL128");
00845   }
00846 
00847   // With 32 condition bits, we don't need to sink (and duplicate) compares
00848   // aggressively in CodeGenPrep.
00849   if (Subtarget.useCRBits()) {
00850     setHasMultipleConditionRegisters();
00851     setJumpIsExpensive();
00852   }
00853 
00854   setMinFunctionAlignment(2);
00855   if (Subtarget.isDarwin())
00856     setPrefFunctionAlignment(4);
00857 
00858   switch (Subtarget.getDarwinDirective()) {
00859   default: break;
00860   case PPC::DIR_970:
00861   case PPC::DIR_A2:
00862   case PPC::DIR_E500mc:
00863   case PPC::DIR_E5500:
00864   case PPC::DIR_PWR4:
00865   case PPC::DIR_PWR5:
00866   case PPC::DIR_PWR5X:
00867   case PPC::DIR_PWR6:
00868   case PPC::DIR_PWR6X:
00869   case PPC::DIR_PWR7:
00870   case PPC::DIR_PWR8:
00871     setPrefFunctionAlignment(4);
00872     setPrefLoopAlignment(4);
00873     break;
00874   }
00875 
00876   setInsertFencesForAtomic(true);
00877 
00878   if (Subtarget.enableMachineScheduler())
00879     setSchedulingPreference(Sched::Source);
00880   else
00881     setSchedulingPreference(Sched::Hybrid);
00882 
00883   computeRegisterProperties(STI.getRegisterInfo());
00884 
00885   // The Freescale cores do better with aggressive inlining of memcpy and
00886   // friends. GCC uses same threshold of 128 bytes (= 32 word stores).
00887   if (Subtarget.getDarwinDirective() == PPC::DIR_E500mc ||
00888       Subtarget.getDarwinDirective() == PPC::DIR_E5500) {
00889     MaxStoresPerMemset = 32;
00890     MaxStoresPerMemsetOptSize = 16;
00891     MaxStoresPerMemcpy = 32;
00892     MaxStoresPerMemcpyOptSize = 8;
00893     MaxStoresPerMemmove = 32;
00894     MaxStoresPerMemmoveOptSize = 8;
00895   } else if (Subtarget.getDarwinDirective() == PPC::DIR_A2) {
00896     // The A2 also benefits from (very) aggressive inlining of memcpy and
00897     // friends. The overhead of a the function call, even when warm, can be
00898     // over one hundred cycles.
00899     MaxStoresPerMemset = 128;
00900     MaxStoresPerMemcpy = 128;
00901     MaxStoresPerMemmove = 128;
00902   }
00903 }
00904 
00905 /// getMaxByValAlign - Helper for getByValTypeAlignment to determine
00906 /// the desired ByVal argument alignment.
00907 static void getMaxByValAlign(Type *Ty, unsigned &MaxAlign,
00908                              unsigned MaxMaxAlign) {
00909   if (MaxAlign == MaxMaxAlign)
00910     return;
00911   if (VectorType *VTy = dyn_cast<VectorType>(Ty)) {
00912     if (MaxMaxAlign >= 32 && VTy->getBitWidth() >= 256)
00913       MaxAlign = 32;
00914     else if (VTy->getBitWidth() >= 128 && MaxAlign < 16)
00915       MaxAlign = 16;
00916   } else if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
00917     unsigned EltAlign = 0;
00918     getMaxByValAlign(ATy->getElementType(), EltAlign, MaxMaxAlign);
00919     if (EltAlign > MaxAlign)
00920       MaxAlign = EltAlign;
00921   } else if (StructType *STy = dyn_cast<StructType>(Ty)) {
00922     for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
00923       unsigned EltAlign = 0;
00924       getMaxByValAlign(STy->getElementType(i), EltAlign, MaxMaxAlign);
00925       if (EltAlign > MaxAlign)
00926         MaxAlign = EltAlign;
00927       if (MaxAlign == MaxMaxAlign)
00928         break;
00929     }
00930   }
00931 }
00932 
00933 /// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
00934 /// function arguments in the caller parameter area.
00935 unsigned PPCTargetLowering::getByValTypeAlignment(Type *Ty) const {
00936   // Darwin passes everything on 4 byte boundary.
00937   if (Subtarget.isDarwin())
00938     return 4;
00939 
00940   // 16byte and wider vectors are passed on 16byte boundary.
00941   // The rest is 8 on PPC64 and 4 on PPC32 boundary.
00942   unsigned Align = Subtarget.isPPC64() ? 8 : 4;
00943   if (Subtarget.hasAltivec() || Subtarget.hasQPX())
00944     getMaxByValAlign(Ty, Align, Subtarget.hasQPX() ? 32 : 16);
00945   return Align;
00946 }
00947 
00948 const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
00949   switch (Opcode) {
00950   default: return nullptr;
00951   case PPCISD::FSEL:            return "PPCISD::FSEL";
00952   case PPCISD::FCFID:           return "PPCISD::FCFID";
00953   case PPCISD::FCFIDU:          return "PPCISD::FCFIDU";
00954   case PPCISD::FCFIDS:          return "PPCISD::FCFIDS";
00955   case PPCISD::FCFIDUS:         return "PPCISD::FCFIDUS";
00956   case PPCISD::FCTIDZ:          return "PPCISD::FCTIDZ";
00957   case PPCISD::FCTIWZ:          return "PPCISD::FCTIWZ";
00958   case PPCISD::FCTIDUZ:         return "PPCISD::FCTIDUZ";
00959   case PPCISD::FCTIWUZ:         return "PPCISD::FCTIWUZ";
00960   case PPCISD::FRE:             return "PPCISD::FRE";
00961   case PPCISD::FRSQRTE:         return "PPCISD::FRSQRTE";
00962   case PPCISD::STFIWX:          return "PPCISD::STFIWX";
00963   case PPCISD::VMADDFP:         return "PPCISD::VMADDFP";
00964   case PPCISD::VNMSUBFP:        return "PPCISD::VNMSUBFP";
00965   case PPCISD::VPERM:           return "PPCISD::VPERM";
00966   case PPCISD::CMPB:            return "PPCISD::CMPB";
00967   case PPCISD::Hi:              return "PPCISD::Hi";
00968   case PPCISD::Lo:              return "PPCISD::Lo";
00969   case PPCISD::TOC_ENTRY:       return "PPCISD::TOC_ENTRY";
00970   case PPCISD::DYNALLOC:        return "PPCISD::DYNALLOC";
00971   case PPCISD::GlobalBaseReg:   return "PPCISD::GlobalBaseReg";
00972   case PPCISD::SRL:             return "PPCISD::SRL";
00973   case PPCISD::SRA:             return "PPCISD::SRA";
00974   case PPCISD::SHL:             return "PPCISD::SHL";
00975   case PPCISD::CALL:            return "PPCISD::CALL";
00976   case PPCISD::CALL_NOP:        return "PPCISD::CALL_NOP";
00977   case PPCISD::MTCTR:           return "PPCISD::MTCTR";
00978   case PPCISD::BCTRL:           return "PPCISD::BCTRL";
00979   case PPCISD::BCTRL_LOAD_TOC:  return "PPCISD::BCTRL_LOAD_TOC";
00980   case PPCISD::RET_FLAG:        return "PPCISD::RET_FLAG";
00981   case PPCISD::READ_TIME_BASE:  return "PPCISD::READ_TIME_BASE";
00982   case PPCISD::EH_SJLJ_SETJMP:  return "PPCISD::EH_SJLJ_SETJMP";
00983   case PPCISD::EH_SJLJ_LONGJMP: return "PPCISD::EH_SJLJ_LONGJMP";
00984   case PPCISD::MFOCRF:          return "PPCISD::MFOCRF";
00985   case PPCISD::VCMP:            return "PPCISD::VCMP";
00986   case PPCISD::VCMPo:           return "PPCISD::VCMPo";
00987   case PPCISD::LBRX:            return "PPCISD::LBRX";
00988   case PPCISD::STBRX:           return "PPCISD::STBRX";
00989   case PPCISD::LFIWAX:          return "PPCISD::LFIWAX";
00990   case PPCISD::LFIWZX:          return "PPCISD::LFIWZX";
00991   case PPCISD::LARX:            return "PPCISD::LARX";
00992   case PPCISD::STCX:            return "PPCISD::STCX";
00993   case PPCISD::COND_BRANCH:     return "PPCISD::COND_BRANCH";
00994   case PPCISD::BDNZ:            return "PPCISD::BDNZ";
00995   case PPCISD::BDZ:             return "PPCISD::BDZ";
00996   case PPCISD::MFFS:            return "PPCISD::MFFS";
00997   case PPCISD::FADDRTZ:         return "PPCISD::FADDRTZ";
00998   case PPCISD::TC_RETURN:       return "PPCISD::TC_RETURN";
00999   case PPCISD::CR6SET:          return "PPCISD::CR6SET";
01000   case PPCISD::CR6UNSET:        return "PPCISD::CR6UNSET";
01001   case PPCISD::PPC32_GOT:       return "PPCISD::PPC32_GOT";
01002   case PPCISD::ADDIS_GOT_TPREL_HA: return "PPCISD::ADDIS_GOT_TPREL_HA";
01003   case PPCISD::LD_GOT_TPREL_L:  return "PPCISD::LD_GOT_TPREL_L";
01004   case PPCISD::ADD_TLS:         return "PPCISD::ADD_TLS";
01005   case PPCISD::ADDIS_TLSGD_HA:  return "PPCISD::ADDIS_TLSGD_HA";
01006   case PPCISD::ADDI_TLSGD_L:    return "PPCISD::ADDI_TLSGD_L";
01007   case PPCISD::GET_TLS_ADDR:    return "PPCISD::GET_TLS_ADDR";
01008   case PPCISD::ADDI_TLSGD_L_ADDR: return "PPCISD::ADDI_TLSGD_L_ADDR";
01009   case PPCISD::ADDIS_TLSLD_HA:  return "PPCISD::ADDIS_TLSLD_HA";
01010   case PPCISD::ADDI_TLSLD_L:    return "PPCISD::ADDI_TLSLD_L";
01011   case PPCISD::GET_TLSLD_ADDR:  return "PPCISD::GET_TLSLD_ADDR";
01012   case PPCISD::ADDI_TLSLD_L_ADDR: return "PPCISD::ADDI_TLSLD_L_ADDR";
01013   case PPCISD::ADDIS_DTPREL_HA: return "PPCISD::ADDIS_DTPREL_HA";
01014   case PPCISD::ADDI_DTPREL_L:   return "PPCISD::ADDI_DTPREL_L";
01015   case PPCISD::VADD_SPLAT:      return "PPCISD::VADD_SPLAT";
01016   case PPCISD::SC:              return "PPCISD::SC";
01017   case PPCISD::QVFPERM:         return "PPCISD::QVFPERM";
01018   case PPCISD::QVGPCI:          return "PPCISD::QVGPCI";
01019   case PPCISD::QVALIGNI:        return "PPCISD::QVALIGNI";
01020   case PPCISD::QVESPLATI:       return "PPCISD::QVESPLATI";
01021   case PPCISD::QBFLT:           return "PPCISD::QBFLT";
01022   case PPCISD::QVLFSb:          return "PPCISD::QVLFSb";
01023   }
01024 }
01025 
01026 EVT PPCTargetLowering::getSetCCResultType(LLVMContext &C, EVT VT) const {
01027   if (!VT.isVector())
01028     return Subtarget.useCRBits() ? MVT::i1 : MVT::i32;
01029 
01030   if (Subtarget.hasQPX())
01031     return EVT::getVectorVT(C, MVT::i1, VT.getVectorNumElements());
01032 
01033   return VT.changeVectorElementTypeToInteger();
01034 }
01035 
01036 bool PPCTargetLowering::enableAggressiveFMAFusion(EVT VT) const {
01037   assert(VT.isFloatingPoint() && "Non-floating-point FMA?");
01038   return true;
01039 }
01040 
01041 //===----------------------------------------------------------------------===//
01042 // Node matching predicates, for use by the tblgen matching code.
01043 //===----------------------------------------------------------------------===//
01044 
01045 /// isFloatingPointZero - Return true if this is 0.0 or -0.0.
01046 static bool isFloatingPointZero(SDValue Op) {
01047   if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op))
01048     return CFP->getValueAPF().isZero();
01049   else if (ISD::isEXTLoad(Op.getNode()) || ISD::isNON_EXTLoad(Op.getNode())) {
01050     // Maybe this has already been legalized into the constant pool?
01051     if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Op.getOperand(1)))
01052       if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal()))
01053         return CFP->getValueAPF().isZero();
01054   }
01055   return false;
01056 }
01057 
01058 /// isConstantOrUndef - Op is either an undef node or a ConstantSDNode.  Return
01059 /// true if Op is undef or if it matches the specified value.
01060 static bool isConstantOrUndef(int Op, int Val) {
01061   return Op < 0 || Op == Val;
01062 }
01063 
01064 /// isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a
01065 /// VPKUHUM instruction.
01066 /// The ShuffleKind distinguishes between big-endian operations with
01067 /// two different inputs (0), either-endian operations with two identical
01068 /// inputs (1), and little-endian operantion with two different inputs (2).
01069 /// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
01070 bool PPC::isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind,
01071                                SelectionDAG &DAG) {
01072   bool IsLE = DAG.getTarget().getDataLayout()->isLittleEndian();
01073   if (ShuffleKind == 0) {
01074     if (IsLE)
01075       return false;
01076     for (unsigned i = 0; i != 16; ++i)
01077       if (!isConstantOrUndef(N->getMaskElt(i), i*2+1))
01078         return false;
01079   } else if (ShuffleKind == 2) {
01080     if (!IsLE)
01081       return false;
01082     for (unsigned i = 0; i != 16; ++i)
01083       if (!isConstantOrUndef(N->getMaskElt(i), i*2))
01084         return false;
01085   } else if (ShuffleKind == 1) {
01086     unsigned j = IsLE ? 0 : 1;
01087     for (unsigned i = 0; i != 8; ++i)
01088       if (!isConstantOrUndef(N->getMaskElt(i),    i*2+j) ||
01089           !isConstantOrUndef(N->getMaskElt(i+8),  i*2+j))
01090         return false;
01091   }
01092   return true;
01093 }
01094 
01095 /// isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a
01096 /// VPKUWUM instruction.
01097 /// The ShuffleKind distinguishes between big-endian operations with
01098 /// two different inputs (0), either-endian operations with two identical
01099 /// inputs (1), and little-endian operantion with two different inputs (2).
01100 /// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
01101 bool PPC::isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind,
01102                                SelectionDAG &DAG) {
01103   bool IsLE = DAG.getTarget().getDataLayout()->isLittleEndian();
01104   if (ShuffleKind == 0) {
01105     if (IsLE)
01106       return false;
01107     for (unsigned i = 0; i != 16; i += 2)
01108       if (!isConstantOrUndef(N->getMaskElt(i  ),  i*2+2) ||
01109           !isConstantOrUndef(N->getMaskElt(i+1),  i*2+3))
01110         return false;
01111   } else if (ShuffleKind == 2) {
01112     if (!IsLE)
01113       return false;
01114     for (unsigned i = 0; i != 16; i += 2)
01115       if (!isConstantOrUndef(N->getMaskElt(i  ),  i*2) ||
01116           !isConstantOrUndef(N->getMaskElt(i+1),  i*2+1))
01117         return false;
01118   } else if (ShuffleKind == 1) {
01119     unsigned j = IsLE ? 0 : 2;
01120     for (unsigned i = 0; i != 8; i += 2)
01121       if (!isConstantOrUndef(N->getMaskElt(i  ),  i*2+j)   ||
01122           !isConstantOrUndef(N->getMaskElt(i+1),  i*2+j+1) ||
01123           !isConstantOrUndef(N->getMaskElt(i+8),  i*2+j)   ||
01124           !isConstantOrUndef(N->getMaskElt(i+9),  i*2+j+1))
01125         return false;
01126   }
01127   return true;
01128 }
01129 
01130 /// isVMerge - Common function, used to match vmrg* shuffles.
01131 ///
01132 static bool isVMerge(ShuffleVectorSDNode *N, unsigned UnitSize,
01133                      unsigned LHSStart, unsigned RHSStart) {
01134   if (N->getValueType(0) != MVT::v16i8)
01135     return false;
01136   assert((UnitSize == 1 || UnitSize == 2 || UnitSize == 4) &&
01137          "Unsupported merge size!");
01138 
01139   for (unsigned i = 0; i != 8/UnitSize; ++i)     // Step over units
01140     for (unsigned j = 0; j != UnitSize; ++j) {   // Step over bytes within unit
01141       if (!isConstantOrUndef(N->getMaskElt(i*UnitSize*2+j),
01142                              LHSStart+j+i*UnitSize) ||
01143           !isConstantOrUndef(N->getMaskElt(i*UnitSize*2+UnitSize+j),
01144                              RHSStart+j+i*UnitSize))
01145         return false;
01146     }
01147   return true;
01148 }
01149 
01150 /// isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for
01151 /// a VMRGL* instruction with the specified unit size (1,2 or 4 bytes).
01152 /// The ShuffleKind distinguishes between big-endian merges with two 
01153 /// different inputs (0), either-endian merges with two identical inputs (1),
01154 /// and little-endian merges with two different inputs (2).  For the latter,
01155 /// the input operands are swapped (see PPCInstrAltivec.td).
01156 bool PPC::isVMRGLShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,
01157                              unsigned ShuffleKind, SelectionDAG &DAG) {
01158   if (DAG.getTarget().getDataLayout()->isLittleEndian()) {
01159     if (ShuffleKind == 1) // unary
01160       return isVMerge(N, UnitSize, 0, 0);
01161     else if (ShuffleKind == 2) // swapped
01162       return isVMerge(N, UnitSize, 0, 16);
01163     else
01164       return false;
01165   } else {
01166     if (ShuffleKind == 1) // unary
01167       return isVMerge(N, UnitSize, 8, 8);
01168     else if (ShuffleKind == 0) // normal
01169       return isVMerge(N, UnitSize, 8, 24);
01170     else
01171       return false;
01172   }
01173 }
01174 
01175 /// isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for
01176 /// a VMRGH* instruction with the specified unit size (1,2 or 4 bytes).
01177 /// The ShuffleKind distinguishes between big-endian merges with two 
01178 /// different inputs (0), either-endian merges with two identical inputs (1),
01179 /// and little-endian merges with two different inputs (2).  For the latter,
01180 /// the input operands are swapped (see PPCInstrAltivec.td).
01181 bool PPC::isVMRGHShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,
01182                              unsigned ShuffleKind, SelectionDAG &DAG) {
01183   if (DAG.getTarget().getDataLayout()->isLittleEndian()) {
01184     if (ShuffleKind == 1) // unary
01185       return isVMerge(N, UnitSize, 8, 8);
01186     else if (ShuffleKind == 2) // swapped
01187       return isVMerge(N, UnitSize, 8, 24);
01188     else
01189       return false;
01190   } else {
01191     if (ShuffleKind == 1) // unary
01192       return isVMerge(N, UnitSize, 0, 0);
01193     else if (ShuffleKind == 0) // normal
01194       return isVMerge(N, UnitSize, 0, 16);
01195     else
01196       return false;
01197   }
01198 }
01199 
01200 
01201 /// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift
01202 /// amount, otherwise return -1.
01203 /// The ShuffleKind distinguishes between big-endian operations with two 
01204 /// different inputs (0), either-endian operations with two identical inputs
01205 /// (1), and little-endian operations with two different inputs (2).  For the
01206 /// latter, the input operands are swapped (see PPCInstrAltivec.td).
01207 int PPC::isVSLDOIShuffleMask(SDNode *N, unsigned ShuffleKind,
01208                              SelectionDAG &DAG) {
01209   if (N->getValueType(0) != MVT::v16i8)
01210     return -1;
01211 
01212   ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
01213 
01214   // Find the first non-undef value in the shuffle mask.
01215   unsigned i;
01216   for (i = 0; i != 16 && SVOp->getMaskElt(i) < 0; ++i)
01217     /*search*/;
01218 
01219   if (i == 16) return -1;  // all undef.
01220 
01221   // Otherwise, check to see if the rest of the elements are consecutively
01222   // numbered from this value.
01223   unsigned ShiftAmt = SVOp->getMaskElt(i);
01224   if (ShiftAmt < i) return -1;
01225 
01226   ShiftAmt -= i;
01227   bool isLE = DAG.getTarget().getDataLayout()->isLittleEndian();
01228 
01229   if ((ShuffleKind == 0 && !isLE) || (ShuffleKind == 2 && isLE)) {
01230     // Check the rest of the elements to see if they are consecutive.
01231     for (++i; i != 16; ++i)
01232       if (!isConstantOrUndef(SVOp->getMaskElt(i), ShiftAmt+i))
01233         return -1;
01234   } else if (ShuffleKind == 1) {
01235     // Check the rest of the elements to see if they are consecutive.
01236     for (++i; i != 16; ++i)
01237       if (!isConstantOrUndef(SVOp->getMaskElt(i), (ShiftAmt+i) & 15))
01238         return -1;
01239   } else
01240     return -1;
01241 
01242   if (ShuffleKind == 2 && isLE)
01243     ShiftAmt = 16 - ShiftAmt;
01244 
01245   return ShiftAmt;
01246 }
01247 
01248 /// isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand
01249 /// specifies a splat of a single element that is suitable for input to
01250 /// VSPLTB/VSPLTH/VSPLTW.
01251 bool PPC::isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize) {
01252   assert(N->getValueType(0) == MVT::v16i8 &&
01253          (EltSize == 1 || EltSize == 2 || EltSize == 4));
01254 
01255   // This is a splat operation if each element of the permute is the same, and
01256   // if the value doesn't reference the second vector.
01257   unsigned ElementBase = N->getMaskElt(0);
01258 
01259   // FIXME: Handle UNDEF elements too!
01260   if (ElementBase >= 16)
01261     return false;
01262 
01263   // Check that the indices are consecutive, in the case of a multi-byte element
01264   // splatted with a v16i8 mask.
01265   for (unsigned i = 1; i != EltSize; ++i)
01266     if (N->getMaskElt(i) < 0 || N->getMaskElt(i) != (int)(i+ElementBase))
01267       return false;
01268 
01269   for (unsigned i = EltSize, e = 16; i != e; i += EltSize) {
01270     if (N->getMaskElt(i) < 0) continue;
01271     for (unsigned j = 0; j != EltSize; ++j)
01272       if (N->getMaskElt(i+j) != N->getMaskElt(j))
01273         return false;
01274   }
01275   return true;
01276 }
01277 
01278 /// isAllNegativeZeroVector - Returns true if all elements of build_vector
01279 /// are -0.0.
01280 bool PPC::isAllNegativeZeroVector(SDNode *N) {
01281   BuildVectorSDNode *BV = cast<BuildVectorSDNode>(N);
01282 
01283   APInt APVal, APUndef;
01284   unsigned BitSize;
01285   bool HasAnyUndefs;
01286 
01287   if (BV->isConstantSplat(APVal, APUndef, BitSize, HasAnyUndefs, 32, true))
01288     if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N->getOperand(0)))
01289       return CFP->getValueAPF().isNegZero();
01290 
01291   return false;
01292 }
01293 
01294 /// getVSPLTImmediate - Return the appropriate VSPLT* immediate to splat the
01295 /// specified isSplatShuffleMask VECTOR_SHUFFLE mask.
01296 unsigned PPC::getVSPLTImmediate(SDNode *N, unsigned EltSize,
01297                                 SelectionDAG &DAG) {
01298   ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
01299   assert(isSplatShuffleMask(SVOp, EltSize));
01300   if (DAG.getTarget().getDataLayout()->isLittleEndian())
01301     return (16 / EltSize) - 1 - (SVOp->getMaskElt(0) / EltSize);
01302   else
01303     return SVOp->getMaskElt(0) / EltSize;
01304 }
01305 
01306 /// get_VSPLTI_elt - If this is a build_vector of constants which can be formed
01307 /// by using a vspltis[bhw] instruction of the specified element size, return
01308 /// the constant being splatted.  The ByteSize field indicates the number of
01309 /// bytes of each element [124] -> [bhw].
01310 SDValue PPC::get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG) {
01311   SDValue OpVal(nullptr, 0);
01312 
01313   // If ByteSize of the splat is bigger than the element size of the
01314   // build_vector, then we have a case where we are checking for a splat where
01315   // multiple elements of the buildvector are folded together into a single
01316   // logical element of the splat (e.g. "vsplish 1" to splat {0,1}*8).
01317   unsigned EltSize = 16/N->getNumOperands();
01318   if (EltSize < ByteSize) {
01319     unsigned Multiple = ByteSize/EltSize;   // Number of BV entries per spltval.
01320     SDValue UniquedVals[4];
01321     assert(Multiple > 1 && Multiple <= 4 && "How can this happen?");
01322 
01323     // See if all of the elements in the buildvector agree across.
01324     for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
01325       if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
01326       // If the element isn't a constant, bail fully out.
01327       if (!isa<ConstantSDNode>(N->getOperand(i))) return SDValue();
01328 
01329 
01330       if (!UniquedVals[i&(Multiple-1)].getNode())
01331         UniquedVals[i&(Multiple-1)] = N->getOperand(i);
01332       else if (UniquedVals[i&(Multiple-1)] != N->getOperand(i))
01333         return SDValue();  // no match.
01334     }
01335 
01336     // Okay, if we reached this point, UniquedVals[0..Multiple-1] contains
01337     // either constant or undef values that are identical for each chunk.  See
01338     // if these chunks can form into a larger vspltis*.
01339 
01340     // Check to see if all of the leading entries are either 0 or -1.  If
01341     // neither, then this won't fit into the immediate field.
01342     bool LeadingZero = true;
01343     bool LeadingOnes = true;
01344     for (unsigned i = 0; i != Multiple-1; ++i) {
01345       if (!UniquedVals[i].getNode()) continue;  // Must have been undefs.
01346 
01347       LeadingZero &= cast<ConstantSDNode>(UniquedVals[i])->isNullValue();
01348       LeadingOnes &= cast<ConstantSDNode>(UniquedVals[i])->isAllOnesValue();
01349     }
01350     // Finally, check the least significant entry.
01351     if (LeadingZero) {
01352       if (!UniquedVals[Multiple-1].getNode())
01353         return DAG.getTargetConstant(0, MVT::i32);  // 0,0,0,undef
01354       int Val = cast<ConstantSDNode>(UniquedVals[Multiple-1])->getZExtValue();
01355       if (Val < 16)
01356         return DAG.getTargetConstant(Val, MVT::i32);  // 0,0,0,4 -> vspltisw(4)
01357     }
01358     if (LeadingOnes) {
01359       if (!UniquedVals[Multiple-1].getNode())
01360         return DAG.getTargetConstant(~0U, MVT::i32);  // -1,-1,-1,undef
01361       int Val =cast<ConstantSDNode>(UniquedVals[Multiple-1])->getSExtValue();
01362       if (Val >= -16)                            // -1,-1,-1,-2 -> vspltisw(-2)
01363         return DAG.getTargetConstant(Val, MVT::i32);
01364     }
01365 
01366     return SDValue();
01367   }
01368 
01369   // Check to see if this buildvec has a single non-undef value in its elements.
01370   for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
01371     if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
01372     if (!OpVal.getNode())
01373       OpVal = N->getOperand(i);
01374     else if (OpVal != N->getOperand(i))
01375       return SDValue();
01376   }
01377 
01378   if (!OpVal.getNode()) return SDValue();  // All UNDEF: use implicit def.
01379 
01380   unsigned ValSizeInBytes = EltSize;
01381   uint64_t Value = 0;
01382   if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
01383     Value = CN->getZExtValue();
01384   } else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) {
01385     assert(CN->getValueType(0) == MVT::f32 && "Only one legal FP vector type!");
01386     Value = FloatToBits(CN->getValueAPF().convertToFloat());
01387   }
01388 
01389   // If the splat value is larger than the element value, then we can never do
01390   // this splat.  The only case that we could fit the replicated bits into our
01391   // immediate field for would be zero, and we prefer to use vxor for it.
01392   if (ValSizeInBytes < ByteSize) return SDValue();
01393 
01394   // If the element value is larger than the splat value, cut it in half and
01395   // check to see if the two halves are equal.  Continue doing this until we
01396   // get to ByteSize.  This allows us to handle 0x01010101 as 0x01.
01397   while (ValSizeInBytes > ByteSize) {
01398     ValSizeInBytes >>= 1;
01399 
01400     // If the top half equals the bottom half, we're still ok.
01401     if (((Value >> (ValSizeInBytes*8)) & ((1 << (8*ValSizeInBytes))-1)) !=
01402          (Value                        & ((1 << (8*ValSizeInBytes))-1)))
01403       return SDValue();
01404   }
01405 
01406   // Properly sign extend the value.
01407   int MaskVal = SignExtend32(Value, ByteSize * 8);
01408 
01409   // If this is zero, don't match, zero matches ISD::isBuildVectorAllZeros.
01410   if (MaskVal == 0) return SDValue();
01411 
01412   // Finally, if this value fits in a 5 bit sext field, return it
01413   if (SignExtend32<5>(MaskVal) == MaskVal)
01414     return DAG.getTargetConstant(MaskVal, MVT::i32);
01415   return SDValue();
01416 }
01417 
01418 /// isQVALIGNIShuffleMask - If this is a qvaligni shuffle mask, return the shift
01419 /// amount, otherwise return -1.
01420 int PPC::isQVALIGNIShuffleMask(SDNode *N) {
01421   EVT VT = N->getValueType(0);
01422   if (VT != MVT::v4f64 && VT != MVT::v4f32 && VT != MVT::v4i1)
01423     return -1;
01424 
01425   ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
01426 
01427   // Find the first non-undef value in the shuffle mask.
01428   unsigned i;
01429   for (i = 0; i != 4 && SVOp->getMaskElt(i) < 0; ++i)
01430     /*search*/;
01431 
01432   if (i == 4) return -1;  // all undef.
01433 
01434   // Otherwise, check to see if the rest of the elements are consecutively
01435   // numbered from this value.
01436   unsigned ShiftAmt = SVOp->getMaskElt(i);
01437   if (ShiftAmt < i) return -1;
01438   ShiftAmt -= i;
01439 
01440   // Check the rest of the elements to see if they are consecutive.
01441   for (++i; i != 4; ++i)
01442     if (!isConstantOrUndef(SVOp->getMaskElt(i), ShiftAmt+i))
01443       return -1;
01444 
01445   return ShiftAmt;
01446 }
01447 
01448 //===----------------------------------------------------------------------===//
01449 //  Addressing Mode Selection
01450 //===----------------------------------------------------------------------===//
01451 
01452 /// isIntS16Immediate - This method tests to see if the node is either a 32-bit
01453 /// or 64-bit immediate, and if the value can be accurately represented as a
01454 /// sign extension from a 16-bit value.  If so, this returns true and the
01455 /// immediate.
01456 static bool isIntS16Immediate(SDNode *N, short &Imm) {
01457   if (!isa<ConstantSDNode>(N))
01458     return false;
01459 
01460   Imm = (short)cast<ConstantSDNode>(N)->getZExtValue();
01461   if (N->getValueType(0) == MVT::i32)
01462     return Imm == (int32_t)cast<ConstantSDNode>(N)->getZExtValue();
01463   else
01464     return Imm == (int64_t)cast<ConstantSDNode>(N)->getZExtValue();
01465 }
01466 static bool isIntS16Immediate(SDValue Op, short &Imm) {
01467   return isIntS16Immediate(Op.getNode(), Imm);
01468 }
01469 
01470 
01471 /// SelectAddressRegReg - Given the specified addressed, check to see if it
01472 /// can be represented as an indexed [r+r] operation.  Returns false if it
01473 /// can be more efficiently represented with [r+imm].
01474 bool PPCTargetLowering::SelectAddressRegReg(SDValue N, SDValue &Base,
01475                                             SDValue &Index,
01476                                             SelectionDAG &DAG) const {
01477   short imm = 0;
01478   if (N.getOpcode() == ISD::ADD) {
01479     if (isIntS16Immediate(N.getOperand(1), imm))
01480       return false;    // r+i
01481     if (N.getOperand(1).getOpcode() == PPCISD::Lo)
01482       return false;    // r+i
01483 
01484     Base = N.getOperand(0);
01485     Index = N.getOperand(1);
01486     return true;
01487   } else if (N.getOpcode() == ISD::OR) {
01488     if (isIntS16Immediate(N.getOperand(1), imm))
01489       return false;    // r+i can fold it if we can.
01490 
01491     // If this is an or of disjoint bitfields, we can codegen this as an add
01492     // (for better address arithmetic) if the LHS and RHS of the OR are provably
01493     // disjoint.
01494     APInt LHSKnownZero, LHSKnownOne;
01495     APInt RHSKnownZero, RHSKnownOne;
01496     DAG.computeKnownBits(N.getOperand(0),
01497                          LHSKnownZero, LHSKnownOne);
01498 
01499     if (LHSKnownZero.getBoolValue()) {
01500       DAG.computeKnownBits(N.getOperand(1),
01501                            RHSKnownZero, RHSKnownOne);
01502       // If all of the bits are known zero on the LHS or RHS, the add won't
01503       // carry.
01504       if (~(LHSKnownZero | RHSKnownZero) == 0) {
01505         Base = N.getOperand(0);
01506         Index = N.getOperand(1);
01507         return true;
01508       }
01509     }
01510   }
01511 
01512   return false;
01513 }
01514 
01515 // If we happen to be doing an i64 load or store into a stack slot that has
01516 // less than a 4-byte alignment, then the frame-index elimination may need to
01517 // use an indexed load or store instruction (because the offset may not be a
01518 // multiple of 4). The extra register needed to hold the offset comes from the
01519 // register scavenger, and it is possible that the scavenger will need to use
01520 // an emergency spill slot. As a result, we need to make sure that a spill slot
01521 // is allocated when doing an i64 load/store into a less-than-4-byte-aligned
01522 // stack slot.
01523 static void fixupFuncForFI(SelectionDAG &DAG, int FrameIdx, EVT VT) {
01524   // FIXME: This does not handle the LWA case.
01525   if (VT != MVT::i64)
01526     return;
01527 
01528   // NOTE: We'll exclude negative FIs here, which come from argument
01529   // lowering, because there are no known test cases triggering this problem
01530   // using packed structures (or similar). We can remove this exclusion if
01531   // we find such a test case. The reason why this is so test-case driven is
01532   // because this entire 'fixup' is only to prevent crashes (from the
01533   // register scavenger) on not-really-valid inputs. For example, if we have:
01534   //   %a = alloca i1
01535   //   %b = bitcast i1* %a to i64*
01536   //   store i64* a, i64 b
01537   // then the store should really be marked as 'align 1', but is not. If it
01538   // were marked as 'align 1' then the indexed form would have been
01539   // instruction-selected initially, and the problem this 'fixup' is preventing
01540   // won't happen regardless.
01541   if (FrameIdx < 0)
01542     return;
01543 
01544   MachineFunction &MF = DAG.getMachineFunction();
01545   MachineFrameInfo *MFI = MF.getFrameInfo();
01546 
01547   unsigned Align = MFI->getObjectAlignment(FrameIdx);
01548   if (Align >= 4)
01549     return;
01550 
01551   PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
01552   FuncInfo->setHasNonRISpills();
01553 }
01554 
01555 /// Returns true if the address N can be represented by a base register plus
01556 /// a signed 16-bit displacement [r+imm], and if it is not better
01557 /// represented as reg+reg.  If Aligned is true, only accept displacements
01558 /// suitable for STD and friends, i.e. multiples of 4.
01559 bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp,
01560                                             SDValue &Base,
01561                                             SelectionDAG &DAG,
01562                                             bool Aligned) const {
01563   // FIXME dl should come from parent load or store, not from address
01564   SDLoc dl(N);
01565   // If this can be more profitably realized as r+r, fail.
01566   if (SelectAddressRegReg(N, Disp, Base, DAG))
01567     return false;
01568 
01569   if (N.getOpcode() == ISD::ADD) {
01570     short imm = 0;
01571     if (isIntS16Immediate(N.getOperand(1), imm) &&
01572         (!Aligned || (imm & 3) == 0)) {
01573       Disp = DAG.getTargetConstant(imm, N.getValueType());
01574       if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
01575         Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
01576         fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
01577       } else {
01578         Base = N.getOperand(0);
01579       }
01580       return true; // [r+i]
01581     } else if (N.getOperand(1).getOpcode() == PPCISD::Lo) {
01582       // Match LOAD (ADD (X, Lo(G))).
01583       assert(!cast<ConstantSDNode>(N.getOperand(1).getOperand(1))->getZExtValue()
01584              && "Cannot handle constant offsets yet!");
01585       Disp = N.getOperand(1).getOperand(0);  // The global address.
01586       assert(Disp.getOpcode() == ISD::TargetGlobalAddress ||
01587              Disp.getOpcode() == ISD::TargetGlobalTLSAddress ||
01588              Disp.getOpcode() == ISD::TargetConstantPool ||
01589              Disp.getOpcode() == ISD::TargetJumpTable);
01590       Base = N.getOperand(0);
01591       return true;  // [&g+r]
01592     }
01593   } else if (N.getOpcode() == ISD::OR) {
01594     short imm = 0;
01595     if (isIntS16Immediate(N.getOperand(1), imm) &&
01596         (!Aligned || (imm & 3) == 0)) {
01597       // If this is an or of disjoint bitfields, we can codegen this as an add
01598       // (for better address arithmetic) if the LHS and RHS of the OR are
01599       // provably disjoint.
01600       APInt LHSKnownZero, LHSKnownOne;
01601       DAG.computeKnownBits(N.getOperand(0), LHSKnownZero, LHSKnownOne);
01602 
01603       if ((LHSKnownZero.getZExtValue()|~(uint64_t)imm) == ~0ULL) {
01604         // If all of the bits are known zero on the LHS or RHS, the add won't
01605         // carry.
01606         if (FrameIndexSDNode *FI =
01607               dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
01608           Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
01609           fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
01610         } else {
01611           Base = N.getOperand(0);
01612         }
01613         Disp = DAG.getTargetConstant(imm, N.getValueType());
01614         return true;
01615       }
01616     }
01617   } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) {
01618     // Loading from a constant address.
01619 
01620     // If this address fits entirely in a 16-bit sext immediate field, codegen
01621     // this as "d, 0"
01622     short Imm;
01623     if (isIntS16Immediate(CN, Imm) && (!Aligned || (Imm & 3) == 0)) {
01624       Disp = DAG.getTargetConstant(Imm, CN->getValueType(0));
01625       Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
01626                              CN->getValueType(0));
01627       return true;
01628     }
01629 
01630     // Handle 32-bit sext immediates with LIS + addr mode.
01631     if ((CN->getValueType(0) == MVT::i32 ||
01632          (int64_t)CN->getZExtValue() == (int)CN->getZExtValue()) &&
01633         (!Aligned || (CN->getZExtValue() & 3) == 0)) {
01634       int Addr = (int)CN->getZExtValue();
01635 
01636       // Otherwise, break this down into an LIS + disp.
01637       Disp = DAG.getTargetConstant((short)Addr, MVT::i32);
01638 
01639       Base = DAG.getTargetConstant((Addr - (signed short)Addr) >> 16, MVT::i32);
01640       unsigned Opc = CN->getValueType(0) == MVT::i32 ? PPC::LIS : PPC::LIS8;
01641       Base = SDValue(DAG.getMachineNode(Opc, dl, CN->getValueType(0), Base), 0);
01642       return true;
01643     }
01644   }
01645 
01646   Disp = DAG.getTargetConstant(0, getPointerTy());
01647   if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N)) {
01648     Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
01649     fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
01650   } else
01651     Base = N;
01652   return true;      // [r+0]
01653 }
01654 
01655 /// SelectAddressRegRegOnly - Given the specified addressed, force it to be
01656 /// represented as an indexed [r+r] operation.
01657 bool PPCTargetLowering::SelectAddressRegRegOnly(SDValue N, SDValue &Base,
01658                                                 SDValue &Index,
01659                                                 SelectionDAG &DAG) const {
01660   // Check to see if we can easily represent this as an [r+r] address.  This
01661   // will fail if it thinks that the address is more profitably represented as
01662   // reg+imm, e.g. where imm = 0.
01663   if (SelectAddressRegReg(N, Base, Index, DAG))
01664     return true;
01665 
01666   // If the operand is an addition, always emit this as [r+r], since this is
01667   // better (for code size, and execution, as the memop does the add for free)
01668   // than emitting an explicit add.
01669   if (N.getOpcode() == ISD::ADD) {
01670     Base = N.getOperand(0);
01671     Index = N.getOperand(1);
01672     return true;
01673   }
01674 
01675   // Otherwise, do it the hard way, using R0 as the base register.
01676   Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
01677                          N.getValueType());
01678   Index = N;
01679   return true;
01680 }
01681 
01682 /// getPreIndexedAddressParts - returns true by value, base pointer and
01683 /// offset pointer and addressing mode by reference if the node's address
01684 /// can be legally represented as pre-indexed load / store address.
01685 bool PPCTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base,
01686                                                   SDValue &Offset,
01687                                                   ISD::MemIndexedMode &AM,
01688                                                   SelectionDAG &DAG) const {
01689   if (DisablePPCPreinc) return false;
01690 
01691   bool isLoad = true;
01692   SDValue Ptr;
01693   EVT VT;
01694   unsigned Alignment;
01695   if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
01696     Ptr = LD->getBasePtr();
01697     VT = LD->getMemoryVT();
01698     Alignment = LD->getAlignment();
01699   } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
01700     Ptr = ST->getBasePtr();
01701     VT  = ST->getMemoryVT();
01702     Alignment = ST->getAlignment();
01703     isLoad = false;
01704   } else
01705     return false;
01706 
01707   // PowerPC doesn't have preinc load/store instructions for vectors (except
01708   // for QPX, which does have preinc r+r forms).
01709   if (VT.isVector()) {
01710     if (!Subtarget.hasQPX() || (VT != MVT::v4f64 && VT != MVT::v4f32)) {
01711       return false;
01712     } else if (SelectAddressRegRegOnly(Ptr, Offset, Base, DAG)) {
01713       AM = ISD::PRE_INC;
01714       return true;
01715     }
01716   }
01717 
01718   if (SelectAddressRegReg(Ptr, Base, Offset, DAG)) {
01719 
01720     // Common code will reject creating a pre-inc form if the base pointer
01721     // is a frame index, or if N is a store and the base pointer is either
01722     // the same as or a predecessor of the value being stored.  Check for
01723     // those situations here, and try with swapped Base/Offset instead.
01724     bool Swap = false;
01725 
01726     if (isa<FrameIndexSDNode>(Base) || isa<RegisterSDNode>(Base))
01727       Swap = true;
01728     else if (!isLoad) {
01729       SDValue Val = cast<StoreSDNode>(N)->getValue();
01730       if (Val == Base || Base.getNode()->isPredecessorOf(Val.getNode()))
01731         Swap = true;
01732     }
01733 
01734     if (Swap)
01735       std::swap(Base, Offset);
01736 
01737     AM = ISD::PRE_INC;
01738     return true;
01739   }
01740 
01741   // LDU/STU can only handle immediates that are a multiple of 4.
01742   if (VT != MVT::i64) {
01743     if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, false))
01744       return false;
01745   } else {
01746     // LDU/STU need an address with at least 4-byte alignment.
01747     if (Alignment < 4)
01748       return false;
01749 
01750     if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, true))
01751       return false;
01752   }
01753 
01754   if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
01755     // PPC64 doesn't have lwau, but it does have lwaux.  Reject preinc load of
01756     // sext i32 to i64 when addr mode is r+i.
01757     if (LD->getValueType(0) == MVT::i64 && LD->getMemoryVT() == MVT::i32 &&
01758         LD->getExtensionType() == ISD::SEXTLOAD &&
01759         isa<ConstantSDNode>(Offset))
01760       return false;
01761   }
01762 
01763   AM = ISD::PRE_INC;
01764   return true;
01765 }
01766 
01767 //===----------------------------------------------------------------------===//
01768 //  LowerOperation implementation
01769 //===----------------------------------------------------------------------===//
01770 
01771 /// GetLabelAccessInfo - Return true if we should reference labels using a
01772 /// PICBase, set the HiOpFlags and LoOpFlags to the target MO flags.
01773 static bool GetLabelAccessInfo(const TargetMachine &TM,
01774                                const PPCSubtarget &Subtarget,
01775                                unsigned &HiOpFlags, unsigned &LoOpFlags,
01776                                const GlobalValue *GV = nullptr) {
01777   HiOpFlags = PPCII::MO_HA;
01778   LoOpFlags = PPCII::MO_LO;
01779 
01780   // Don't use the pic base if not in PIC relocation model.
01781   bool isPIC = TM.getRelocationModel() == Reloc::PIC_;
01782 
01783   if (isPIC) {
01784     HiOpFlags |= PPCII::MO_PIC_FLAG;
01785     LoOpFlags |= PPCII::MO_PIC_FLAG;
01786   }
01787 
01788   // If this is a reference to a global value that requires a non-lazy-ptr, make
01789   // sure that instruction lowering adds it.
01790   if (GV && Subtarget.hasLazyResolverStub(GV)) {
01791     HiOpFlags |= PPCII::MO_NLP_FLAG;
01792     LoOpFlags |= PPCII::MO_NLP_FLAG;
01793 
01794     if (GV->hasHiddenVisibility()) {
01795       HiOpFlags |= PPCII::MO_NLP_HIDDEN_FLAG;
01796       LoOpFlags |= PPCII::MO_NLP_HIDDEN_FLAG;
01797     }
01798   }
01799 
01800   return isPIC;
01801 }
01802 
01803 static SDValue LowerLabelRef(SDValue HiPart, SDValue LoPart, bool isPIC,
01804                              SelectionDAG &DAG) {
01805   EVT PtrVT = HiPart.getValueType();
01806   SDValue Zero = DAG.getConstant(0, PtrVT);
01807   SDLoc DL(HiPart);
01808 
01809   SDValue Hi = DAG.getNode(PPCISD::Hi, DL, PtrVT, HiPart, Zero);
01810   SDValue Lo = DAG.getNode(PPCISD::Lo, DL, PtrVT, LoPart, Zero);
01811 
01812   // With PIC, the first instruction is actually "GR+hi(&G)".
01813   if (isPIC)
01814     Hi = DAG.getNode(ISD::ADD, DL, PtrVT,
01815                      DAG.getNode(PPCISD::GlobalBaseReg, DL, PtrVT), Hi);
01816 
01817   // Generate non-pic code that has direct accesses to the constant pool.
01818   // The address of the global is just (hi(&g)+lo(&g)).
01819   return DAG.getNode(ISD::ADD, DL, PtrVT, Hi, Lo);
01820 }
01821 
01822 static void setUsesTOCBasePtr(MachineFunction &MF) {
01823   PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
01824   FuncInfo->setUsesTOCBasePtr();
01825 }
01826 
01827 static void setUsesTOCBasePtr(SelectionDAG &DAG) {
01828   setUsesTOCBasePtr(DAG.getMachineFunction());
01829 }
01830 
01831 static SDValue getTOCEntry(SelectionDAG &DAG, SDLoc dl, bool Is64Bit,
01832                            SDValue GA) {
01833   EVT VT = Is64Bit ? MVT::i64 : MVT::i32;
01834   SDValue Reg = Is64Bit ? DAG.getRegister(PPC::X2, VT) :
01835                 DAG.getNode(PPCISD::GlobalBaseReg, dl, VT);
01836 
01837   SDValue Ops[] = { GA, Reg };
01838   return DAG.getMemIntrinsicNode(PPCISD::TOC_ENTRY, dl,
01839                                  DAG.getVTList(VT, MVT::Other), Ops, VT,
01840                                  MachinePointerInfo::getGOT(), 0, false, true,
01841                                  false, 0);
01842 }
01843 
01844 SDValue PPCTargetLowering::LowerConstantPool(SDValue Op,
01845                                              SelectionDAG &DAG) const {
01846   EVT PtrVT = Op.getValueType();
01847   ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
01848   const Constant *C = CP->getConstVal();
01849 
01850   // 64-bit SVR4 ABI code is always position-independent.
01851   // The actual address of the GlobalValue is stored in the TOC.
01852   if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) {
01853     setUsesTOCBasePtr(DAG);
01854     SDValue GA = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 0);
01855     return getTOCEntry(DAG, SDLoc(CP), true, GA);
01856   }
01857 
01858   unsigned MOHiFlag, MOLoFlag;
01859   bool isPIC =
01860       GetLabelAccessInfo(DAG.getTarget(), Subtarget, MOHiFlag, MOLoFlag);
01861 
01862   if (isPIC && Subtarget.isSVR4ABI()) {
01863     SDValue GA = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(),
01864                                            PPCII::MO_PIC_FLAG);
01865     return getTOCEntry(DAG, SDLoc(CP), false, GA);
01866   }
01867 
01868   SDValue CPIHi =
01869     DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 0, MOHiFlag);
01870   SDValue CPILo =
01871     DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 0, MOLoFlag);
01872   return LowerLabelRef(CPIHi, CPILo, isPIC, DAG);
01873 }
01874 
01875 SDValue PPCTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
01876   EVT PtrVT = Op.getValueType();
01877   JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
01878 
01879   // 64-bit SVR4 ABI code is always position-independent.
01880   // The actual address of the GlobalValue is stored in the TOC.
01881   if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) {
01882     setUsesTOCBasePtr(DAG);
01883     SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
01884     return getTOCEntry(DAG, SDLoc(JT), true, GA);
01885   }
01886 
01887   unsigned MOHiFlag, MOLoFlag;
01888   bool isPIC =
01889       GetLabelAccessInfo(DAG.getTarget(), Subtarget, MOHiFlag, MOLoFlag);
01890 
01891   if (isPIC && Subtarget.isSVR4ABI()) {
01892     SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT,
01893                                         PPCII::MO_PIC_FLAG);
01894     return getTOCEntry(DAG, SDLoc(GA), false, GA);
01895   }
01896 
01897   SDValue JTIHi = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOHiFlag);
01898   SDValue JTILo = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOLoFlag);
01899   return LowerLabelRef(JTIHi, JTILo, isPIC, DAG);
01900 }
01901 
01902 SDValue PPCTargetLowering::LowerBlockAddress(SDValue Op,
01903                                              SelectionDAG &DAG) const {
01904   EVT PtrVT = Op.getValueType();
01905   BlockAddressSDNode *BASDN = cast<BlockAddressSDNode>(Op);
01906   const BlockAddress *BA = BASDN->getBlockAddress();
01907 
01908   // 64-bit SVR4 ABI code is always position-independent.
01909   // The actual BlockAddress is stored in the TOC.
01910   if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) {
01911     setUsesTOCBasePtr(DAG);
01912     SDValue GA = DAG.getTargetBlockAddress(BA, PtrVT, BASDN->getOffset());
01913     return getTOCEntry(DAG, SDLoc(BASDN), true, GA);
01914   }
01915 
01916   unsigned MOHiFlag, MOLoFlag;
01917   bool isPIC =
01918       GetLabelAccessInfo(DAG.getTarget(), Subtarget, MOHiFlag, MOLoFlag);
01919   SDValue TgtBAHi = DAG.getTargetBlockAddress(BA, PtrVT, 0, MOHiFlag);
01920   SDValue TgtBALo = DAG.getTargetBlockAddress(BA, PtrVT, 0, MOLoFlag);
01921   return LowerLabelRef(TgtBAHi, TgtBALo, isPIC, DAG);
01922 }
01923 
01924 SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op,
01925                                               SelectionDAG &DAG) const {
01926 
01927   // FIXME: TLS addresses currently use medium model code sequences,
01928   // which is the most useful form.  Eventually support for small and
01929   // large models could be added if users need it, at the cost of
01930   // additional complexity.
01931   GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
01932   SDLoc dl(GA);
01933   const GlobalValue *GV = GA->getGlobal();
01934   EVT PtrVT = getPointerTy();
01935   bool is64bit = Subtarget.isPPC64();
01936   const Module *M = DAG.getMachineFunction().getFunction()->getParent();
01937   PICLevel::Level picLevel = M->getPICLevel();
01938 
01939   TLSModel::Model Model = getTargetMachine().getTLSModel(GV);
01940 
01941   if (Model == TLSModel::LocalExec) {
01942     SDValue TGAHi = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
01943                                                PPCII::MO_TPREL_HA);
01944     SDValue TGALo = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
01945                                                PPCII::MO_TPREL_LO);
01946     SDValue TLSReg = DAG.getRegister(is64bit ? PPC::X13 : PPC::R2,
01947                                      is64bit ? MVT::i64 : MVT::i32);
01948     SDValue Hi = DAG.getNode(PPCISD::Hi, dl, PtrVT, TGAHi, TLSReg);
01949     return DAG.getNode(PPCISD::Lo, dl, PtrVT, TGALo, Hi);
01950   }
01951 
01952   if (Model == TLSModel::InitialExec) {
01953     SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
01954     SDValue TGATLS = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
01955                                                 PPCII::MO_TLS);
01956     SDValue GOTPtr;
01957     if (is64bit) {
01958       setUsesTOCBasePtr(DAG);
01959       SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
01960       GOTPtr = DAG.getNode(PPCISD::ADDIS_GOT_TPREL_HA, dl,
01961                            PtrVT, GOTReg, TGA);
01962     } else
01963       GOTPtr = DAG.getNode(PPCISD::PPC32_GOT, dl, PtrVT);
01964     SDValue TPOffset = DAG.getNode(PPCISD::LD_GOT_TPREL_L, dl,
01965                                    PtrVT, TGA, GOTPtr);
01966     return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TPOffset, TGATLS);
01967   }
01968 
01969   if (Model == TLSModel::GeneralDynamic) {
01970     SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
01971     SDValue GOTPtr;
01972     if (is64bit) {
01973       setUsesTOCBasePtr(DAG);
01974       SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
01975       GOTPtr = DAG.getNode(PPCISD::ADDIS_TLSGD_HA, dl, PtrVT,
01976                                    GOTReg, TGA);
01977     } else {
01978       if (picLevel == PICLevel::Small)
01979         GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT);
01980       else
01981         GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
01982     }
01983     return DAG.getNode(PPCISD::ADDI_TLSGD_L_ADDR, dl, PtrVT,
01984                        GOTPtr, TGA, TGA);
01985   }
01986 
01987   if (Model == TLSModel::LocalDynamic) {
01988     SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
01989     SDValue GOTPtr;
01990     if (is64bit) {
01991       setUsesTOCBasePtr(DAG);
01992       SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
01993       GOTPtr = DAG.getNode(PPCISD::ADDIS_TLSLD_HA, dl, PtrVT,
01994                            GOTReg, TGA);
01995     } else {
01996       if (picLevel == PICLevel::Small)
01997         GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT);
01998       else
01999         GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
02000     }
02001     SDValue TLSAddr = DAG.getNode(PPCISD::ADDI_TLSLD_L_ADDR, dl,
02002                                   PtrVT, GOTPtr, TGA, TGA);
02003     SDValue DtvOffsetHi = DAG.getNode(PPCISD::ADDIS_DTPREL_HA, dl,
02004                                       PtrVT, TLSAddr, TGA);
02005     return DAG.getNode(PPCISD::ADDI_DTPREL_L, dl, PtrVT, DtvOffsetHi, TGA);
02006   }
02007 
02008   llvm_unreachable("Unknown TLS model!");
02009 }
02010 
02011 SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op,
02012                                               SelectionDAG &DAG) const {
02013   EVT PtrVT = Op.getValueType();
02014   GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
02015   SDLoc DL(GSDN);
02016   const GlobalValue *GV = GSDN->getGlobal();
02017 
02018   // 64-bit SVR4 ABI code is always position-independent.
02019   // The actual address of the GlobalValue is stored in the TOC.
02020   if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) {
02021     setUsesTOCBasePtr(DAG);
02022     SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset());
02023     return getTOCEntry(DAG, DL, true, GA);
02024   }
02025 
02026   unsigned MOHiFlag, MOLoFlag;
02027   bool isPIC =
02028       GetLabelAccessInfo(DAG.getTarget(), Subtarget, MOHiFlag, MOLoFlag, GV);
02029 
02030   if (isPIC && Subtarget.isSVR4ABI()) {
02031     SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT,
02032                                             GSDN->getOffset(),
02033                                             PPCII::MO_PIC_FLAG);
02034     return getTOCEntry(DAG, DL, false, GA);
02035   }
02036 
02037   SDValue GAHi =
02038     DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOHiFlag);
02039   SDValue GALo =
02040     DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOLoFlag);
02041 
02042   SDValue Ptr = LowerLabelRef(GAHi, GALo, isPIC, DAG);
02043 
02044   // If the global reference is actually to a non-lazy-pointer, we have to do an
02045   // extra load to get the address of the global.
02046   if (MOHiFlag & PPCII::MO_NLP_FLAG)
02047     Ptr = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo(),
02048                       false, false, false, 0);
02049   return Ptr;
02050 }
02051 
02052 SDValue PPCTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
02053   ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
02054   SDLoc dl(Op);
02055 
02056   if (Op.getValueType() == MVT::v2i64) {
02057     // When the operands themselves are v2i64 values, we need to do something
02058     // special because VSX has no underlying comparison operations for these.
02059     if (Op.getOperand(0).getValueType() == MVT::v2i64) {
02060       // Equality can be handled by casting to the legal type for Altivec
02061       // comparisons, everything else needs to be expanded.
02062       if (CC == ISD::SETEQ || CC == ISD::SETNE) {
02063         return DAG.getNode(ISD::BITCAST, dl, MVT::v2i64,
02064                  DAG.getSetCC(dl, MVT::v4i32,
02065                    DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op.getOperand(0)),
02066                    DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op.getOperand(1)),
02067                    CC));
02068       }
02069 
02070       return SDValue();
02071     }
02072 
02073     // We handle most of these in the usual way.
02074     return Op;
02075   }
02076 
02077   // If we're comparing for equality to zero, expose the fact that this is
02078   // implented as a ctlz/srl pair on ppc, so that the dag combiner can
02079   // fold the new nodes.
02080   if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
02081     if (C->isNullValue() && CC == ISD::SETEQ) {
02082       EVT VT = Op.getOperand(0).getValueType();
02083       SDValue Zext = Op.getOperand(0);
02084       if (VT.bitsLT(MVT::i32)) {
02085         VT = MVT::i32;
02086         Zext = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Op.getOperand(0));
02087       }
02088       unsigned Log2b = Log2_32(VT.getSizeInBits());
02089       SDValue Clz = DAG.getNode(ISD::CTLZ, dl, VT, Zext);
02090       SDValue Scc = DAG.getNode(ISD::SRL, dl, VT, Clz,
02091                                 DAG.getConstant(Log2b, MVT::i32));
02092       return DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Scc);
02093     }
02094     // Leave comparisons against 0 and -1 alone for now, since they're usually
02095     // optimized.  FIXME: revisit this when we can custom lower all setcc
02096     // optimizations.
02097     if (C->isAllOnesValue() || C->isNullValue())
02098       return SDValue();
02099   }
02100 
02101   // If we have an integer seteq/setne, turn it into a compare against zero
02102   // by xor'ing the rhs with the lhs, which is faster than setting a
02103   // condition register, reading it back out, and masking the correct bit.  The
02104   // normal approach here uses sub to do this instead of xor.  Using xor exposes
02105   // the result to other bit-twiddling opportunities.
02106   EVT LHSVT = Op.getOperand(0).getValueType();
02107   if (LHSVT.isInteger() && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
02108     EVT VT = Op.getValueType();
02109     SDValue Sub = DAG.getNode(ISD::XOR, dl, LHSVT, Op.getOperand(0),
02110                                 Op.getOperand(1));
02111     return DAG.getSetCC(dl, VT, Sub, DAG.getConstant(0, LHSVT), CC);
02112   }
02113   return SDValue();
02114 }
02115 
02116 SDValue PPCTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG,
02117                                       const PPCSubtarget &Subtarget) const {
02118   SDNode *Node = Op.getNode();
02119   EVT VT = Node->getValueType(0);
02120   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
02121   SDValue InChain = Node->getOperand(0);
02122   SDValue VAListPtr = Node->getOperand(1);
02123   const Value *SV = cast<SrcValueSDNode>(Node->getOperand(2))->getValue();
02124   SDLoc dl(Node);
02125 
02126   assert(!Subtarget.isPPC64() && "LowerVAARG is PPC32 only");
02127 
02128   // gpr_index
02129   SDValue GprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain,
02130                                     VAListPtr, MachinePointerInfo(SV), MVT::i8,
02131                                     false, false, false, 0);
02132   InChain = GprIndex.getValue(1);
02133 
02134   if (VT == MVT::i64) {
02135     // Check if GprIndex is even
02136     SDValue GprAnd = DAG.getNode(ISD::AND, dl, MVT::i32, GprIndex,
02137                                  DAG.getConstant(1, MVT::i32));
02138     SDValue CC64 = DAG.getSetCC(dl, MVT::i32, GprAnd,
02139                                 DAG.getConstant(0, MVT::i32), ISD::SETNE);
02140     SDValue GprIndexPlusOne = DAG.getNode(ISD::ADD, dl, MVT::i32, GprIndex,
02141                                           DAG.getConstant(1, MVT::i32));
02142     // Align GprIndex to be even if it isn't
02143     GprIndex = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC64, GprIndexPlusOne,
02144                            GprIndex);
02145   }
02146 
02147   // fpr index is 1 byte after gpr
02148   SDValue FprPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
02149                                DAG.getConstant(1, MVT::i32));
02150 
02151   // fpr
02152   SDValue FprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain,
02153                                     FprPtr, MachinePointerInfo(SV), MVT::i8,
02154                                     false, false, false, 0);
02155   InChain = FprIndex.getValue(1);
02156 
02157   SDValue RegSaveAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
02158                                        DAG.getConstant(8, MVT::i32));
02159 
02160   SDValue OverflowAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
02161                                         DAG.getConstant(4, MVT::i32));
02162 
02163   // areas
02164   SDValue OverflowArea = DAG.getLoad(MVT::i32, dl, InChain, OverflowAreaPtr,
02165                                      MachinePointerInfo(), false, false,
02166                                      false, 0);
02167   InChain = OverflowArea.getValue(1);
02168 
02169   SDValue RegSaveArea = DAG.getLoad(MVT::i32, dl, InChain, RegSaveAreaPtr,
02170                                     MachinePointerInfo(), false, false,
02171                                     false, 0);
02172   InChain = RegSaveArea.getValue(1);
02173 
02174   // select overflow_area if index > 8
02175   SDValue CC = DAG.getSetCC(dl, MVT::i32, VT.isInteger() ? GprIndex : FprIndex,
02176                             DAG.getConstant(8, MVT::i32), ISD::SETLT);
02177 
02178   // adjustment constant gpr_index * 4/8
02179   SDValue RegConstant = DAG.getNode(ISD::MUL, dl, MVT::i32,
02180                                     VT.isInteger() ? GprIndex : FprIndex,
02181                                     DAG.getConstant(VT.isInteger() ? 4 : 8,
02182                                                     MVT::i32));
02183 
02184   // OurReg = RegSaveArea + RegConstant
02185   SDValue OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, RegSaveArea,
02186                                RegConstant);
02187 
02188   // Floating types are 32 bytes into RegSaveArea
02189   if (VT.isFloatingPoint())
02190     OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, OurReg,
02191                          DAG.getConstant(32, MVT::i32));
02192 
02193   // increase {f,g}pr_index by 1 (or 2 if VT is i64)
02194   SDValue IndexPlus1 = DAG.getNode(ISD::ADD, dl, MVT::i32,
02195                                    VT.isInteger() ? GprIndex : FprIndex,
02196                                    DAG.getConstant(VT == MVT::i64 ? 2 : 1,
02197                                                    MVT::i32));
02198 
02199   InChain = DAG.getTruncStore(InChain, dl, IndexPlus1,
02200                               VT.isInteger() ? VAListPtr : FprPtr,
02201                               MachinePointerInfo(SV),
02202                               MVT::i8, false, false, 0);
02203 
02204   // determine if we should load from reg_save_area or overflow_area
02205   SDValue Result = DAG.getNode(ISD::SELECT, dl, PtrVT, CC, OurReg, OverflowArea);
02206 
02207   // increase overflow_area by 4/8 if gpr/fpr > 8
02208   SDValue OverflowAreaPlusN = DAG.getNode(ISD::ADD, dl, PtrVT, OverflowArea,
02209                                           DAG.getConstant(VT.isInteger() ? 4 : 8,
02210                                           MVT::i32));
02211 
02212   OverflowArea = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC, OverflowArea,
02213                              OverflowAreaPlusN);
02214 
02215   InChain = DAG.getTruncStore(InChain, dl, OverflowArea,
02216                               OverflowAreaPtr,
02217                               MachinePointerInfo(),
02218                               MVT::i32, false, false, 0);
02219 
02220   return DAG.getLoad(VT, dl, InChain, Result, MachinePointerInfo(),
02221                      false, false, false, 0);
02222 }
02223 
02224 SDValue PPCTargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG,
02225                                        const PPCSubtarget &Subtarget) const {
02226   assert(!Subtarget.isPPC64() && "LowerVACOPY is PPC32 only");
02227 
02228   // We have to copy the entire va_list struct:
02229   // 2*sizeof(char) + 2 Byte alignment + 2*sizeof(char*) = 12 Byte
02230   return DAG.getMemcpy(Op.getOperand(0), Op,
02231                        Op.getOperand(1), Op.getOperand(2),
02232                        DAG.getConstant(12, MVT::i32), 8, false, true,
02233                        MachinePointerInfo(), MachinePointerInfo());
02234 }
02235 
02236 SDValue PPCTargetLowering::LowerADJUST_TRAMPOLINE(SDValue Op,
02237                                                   SelectionDAG &DAG) const {
02238   return Op.getOperand(0);
02239 }
02240 
02241 SDValue PPCTargetLowering::LowerINIT_TRAMPOLINE(SDValue Op,
02242                                                 SelectionDAG &DAG) const {
02243   SDValue Chain = Op.getOperand(0);
02244   SDValue Trmp = Op.getOperand(1); // trampoline
02245   SDValue FPtr = Op.getOperand(2); // nested function
02246   SDValue Nest = Op.getOperand(3); // 'nest' parameter value
02247   SDLoc dl(Op);
02248 
02249   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
02250   bool isPPC64 = (PtrVT == MVT::i64);
02251   Type *IntPtrTy =
02252     DAG.getTargetLoweringInfo().getDataLayout()->getIntPtrType(
02253                                                              *DAG.getContext());
02254 
02255   TargetLowering::ArgListTy Args;
02256   TargetLowering::ArgListEntry Entry;
02257 
02258   Entry.Ty = IntPtrTy;
02259   Entry.Node = Trmp; Args.push_back(Entry);
02260 
02261   // TrampSize == (isPPC64 ? 48 : 40);
02262   Entry.Node = DAG.getConstant(isPPC64 ? 48 : 40,
02263                                isPPC64 ? MVT::i64 : MVT::i32);
02264   Args.push_back(Entry);
02265 
02266   Entry.Node = FPtr; Args.push_back(Entry);
02267   Entry.Node = Nest; Args.push_back(Entry);
02268 
02269   // Lower to a call to __trampoline_setup(Trmp, TrampSize, FPtr, ctx_reg)
02270   TargetLowering::CallLoweringInfo CLI(DAG);
02271   CLI.setDebugLoc(dl).setChain(Chain)
02272     .setCallee(CallingConv::C, Type::getVoidTy(*DAG.getContext()),
02273                DAG.getExternalSymbol("__trampoline_setup", PtrVT),
02274                std::move(Args), 0);
02275 
02276   std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
02277   return CallResult.second;
02278 }
02279 
02280 SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG,
02281                                         const PPCSubtarget &Subtarget) const {
02282   MachineFunction &MF = DAG.getMachineFunction();
02283   PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
02284 
02285   SDLoc dl(Op);
02286 
02287   if (Subtarget.isDarwinABI() || Subtarget.isPPC64()) {
02288     // vastart just stores the address of the VarArgsFrameIndex slot into the
02289     // memory location argument.
02290     EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
02291     SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
02292     const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
02293     return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1),
02294                         MachinePointerInfo(SV),
02295                         false, false, 0);
02296   }
02297 
02298   // For the 32-bit SVR4 ABI we follow the layout of the va_list struct.
02299   // We suppose the given va_list is already allocated.
02300   //
02301   // typedef struct {
02302   //  char gpr;     /* index into the array of 8 GPRs
02303   //                 * stored in the register save area
02304   //                 * gpr=0 corresponds to r3,
02305   //                 * gpr=1 to r4, etc.
02306   //                 */
02307   //  char fpr;     /* index into the array of 8 FPRs
02308   //                 * stored in the register save area
02309   //                 * fpr=0 corresponds to f1,
02310   //                 * fpr=1 to f2, etc.
02311   //                 */
02312   //  char *overflow_arg_area;
02313   //                /* location on stack that holds
02314   //                 * the next overflow argument
02315   //                 */
02316   //  char *reg_save_area;
02317   //               /* where r3:r10 and f1:f8 (if saved)
02318   //                * are stored
02319   //                */
02320   // } va_list[1];
02321 
02322 
02323   SDValue ArgGPR = DAG.getConstant(FuncInfo->getVarArgsNumGPR(), MVT::i32);
02324   SDValue ArgFPR = DAG.getConstant(FuncInfo->getVarArgsNumFPR(), MVT::i32);
02325 
02326 
02327   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
02328 
02329   SDValue StackOffsetFI = DAG.getFrameIndex(FuncInfo->getVarArgsStackOffset(),
02330                                             PtrVT);
02331   SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
02332                                  PtrVT);
02333 
02334   uint64_t FrameOffset = PtrVT.getSizeInBits()/8;
02335   SDValue ConstFrameOffset = DAG.getConstant(FrameOffset, PtrVT);
02336 
02337   uint64_t StackOffset = PtrVT.getSizeInBits()/8 - 1;
02338   SDValue ConstStackOffset = DAG.getConstant(StackOffset, PtrVT);
02339 
02340   uint64_t FPROffset = 1;
02341   SDValue ConstFPROffset = DAG.getConstant(FPROffset, PtrVT);
02342 
02343   const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
02344 
02345   // Store first byte : number of int regs
02346   SDValue firstStore = DAG.getTruncStore(Op.getOperand(0), dl, ArgGPR,
02347                                          Op.getOperand(1),
02348                                          MachinePointerInfo(SV),
02349                                          MVT::i8, false, false, 0);
02350   uint64_t nextOffset = FPROffset;
02351   SDValue nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, Op.getOperand(1),
02352                                   ConstFPROffset);
02353 
02354   // Store second byte : number of float regs
02355   SDValue secondStore =
02356     DAG.getTruncStore(firstStore, dl, ArgFPR, nextPtr,
02357                       MachinePointerInfo(SV, nextOffset), MVT::i8,
02358                       false, false, 0);
02359   nextOffset += StackOffset;
02360   nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstStackOffset);
02361 
02362   // Store second word : arguments given on stack
02363   SDValue thirdStore =
02364     DAG.getStore(secondStore, dl, StackOffsetFI, nextPtr,
02365                  MachinePointerInfo(SV, nextOffset),
02366                  false, false, 0);
02367   nextOffset += FrameOffset;
02368   nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstFrameOffset);
02369 
02370   // Store third word : arguments given in registers
02371   return DAG.getStore(thirdStore, dl, FR, nextPtr,
02372                       MachinePointerInfo(SV, nextOffset),
02373                       false, false, 0);
02374 
02375 }
02376 
02377 #include "PPCGenCallingConv.inc"
02378 
02379 // Function whose sole purpose is to kill compiler warnings 
02380 // stemming from unused functions included from PPCGenCallingConv.inc.
02381 CCAssignFn *PPCTargetLowering::useFastISelCCs(unsigned Flag) const {
02382   return Flag ? CC_PPC64_ELF_FIS : RetCC_PPC64_ELF_FIS;
02383 }
02384 
02385 bool llvm::CC_PPC32_SVR4_Custom_Dummy(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
02386                                       CCValAssign::LocInfo &LocInfo,
02387                                       ISD::ArgFlagsTy &ArgFlags,
02388                                       CCState &State) {
02389   return true;
02390 }
02391 
02392 bool llvm::CC_PPC32_SVR4_Custom_AlignArgRegs(unsigned &ValNo, MVT &ValVT,
02393                                              MVT &LocVT,
02394                                              CCValAssign::LocInfo &LocInfo,
02395                                              ISD::ArgFlagsTy &ArgFlags,
02396                                              CCState &State) {
02397   static const MCPhysReg ArgRegs[] = {
02398     PPC::R3, PPC::R4, PPC::R5, PPC::R6,
02399     PPC::R7, PPC::R8, PPC::R9, PPC::R10,
02400   };
02401   const unsigned NumArgRegs = array_lengthof(ArgRegs);
02402 
02403   unsigned RegNum = State.getFirstUnallocated(ArgRegs);
02404 
02405   // Skip one register if the first unallocated register has an even register
02406   // number and there are still argument registers available which have not been
02407   // allocated yet. RegNum is actually an index into ArgRegs, which means we
02408   // need to skip a register if RegNum is odd.
02409   if (RegNum != NumArgRegs && RegNum % 2 == 1) {
02410     State.AllocateReg(ArgRegs[RegNum]);
02411   }
02412 
02413   // Always return false here, as this function only makes sure that the first
02414   // unallocated register has an odd register number and does not actually
02415   // allocate a register for the current argument.
02416   return false;
02417 }
02418 
02419 bool llvm::CC_PPC32_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, MVT &ValVT,
02420                                                MVT &LocVT,
02421                                                CCValAssign::LocInfo &LocInfo,
02422                                                ISD::ArgFlagsTy &ArgFlags,
02423                                                CCState &State) {
02424   static const MCPhysReg ArgRegs[] = {
02425     PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
02426     PPC::F8
02427   };
02428 
02429   const unsigned NumArgRegs = array_lengthof(ArgRegs);
02430 
02431   unsigned RegNum = State.getFirstUnallocated(ArgRegs);
02432 
02433   // If there is only one Floating-point register left we need to put both f64
02434   // values of a split ppc_fp128 value on the stack.
02435   if (RegNum != NumArgRegs && ArgRegs[RegNum] == PPC::F8) {
02436     State.AllocateReg(ArgRegs[RegNum]);
02437   }
02438 
02439   // Always return false here, as this function only makes sure that the two f64
02440   // values a ppc_fp128 value is split into are both passed in registers or both
02441   // passed on the stack and does not actually allocate a register for the
02442   // current argument.
02443   return false;
02444 }
02445 
02446 /// FPR - The set of FP registers that should be allocated for arguments,
02447 /// on Darwin.
02448 static const MCPhysReg FPR[] = {PPC::F1,  PPC::F2,  PPC::F3, PPC::F4, PPC::F5,
02449                                 PPC::F6,  PPC::F7,  PPC::F8, PPC::F9, PPC::F10,
02450                                 PPC::F11, PPC::F12, PPC::F13};
02451 
02452 /// QFPR - The set of QPX registers that should be allocated for arguments.
02453 static const MCPhysReg QFPR[] = {
02454     PPC::QF1, PPC::QF2, PPC::QF3,  PPC::QF4,  PPC::QF5,  PPC::QF6, PPC::QF7,
02455     PPC::QF8, PPC::QF9, PPC::QF10, PPC::QF11, PPC::QF12, PPC::QF13};
02456 
02457 /// CalculateStackSlotSize - Calculates the size reserved for this argument on
02458 /// the stack.
02459 static unsigned CalculateStackSlotSize(EVT ArgVT, ISD::ArgFlagsTy Flags,
02460                                        unsigned PtrByteSize) {
02461   unsigned ArgSize = ArgVT.getStoreSize();
02462   if (Flags.isByVal())
02463     ArgSize = Flags.getByValSize();
02464 
02465   // Round up to multiples of the pointer size, except for array members,
02466   // which are always packed.
02467   if (!Flags.isInConsecutiveRegs())
02468     ArgSize = ((ArgSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
02469 
02470   return ArgSize;
02471 }
02472 
02473 /// CalculateStackSlotAlignment - Calculates the alignment of this argument
02474 /// on the stack.
02475 static unsigned CalculateStackSlotAlignment(EVT ArgVT, EVT OrigVT,
02476                                             ISD::ArgFlagsTy Flags,
02477                                             unsigned PtrByteSize) {
02478   unsigned Align = PtrByteSize;
02479 
02480   // Altivec parameters are padded to a 16 byte boundary.
02481   if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||
02482       ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||
02483       ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64)
02484     Align = 16;
02485   // QPX vector types stored in double-precision are padded to a 32 byte
02486   // boundary.
02487   else if (ArgVT == MVT::v4f64 || ArgVT == MVT::v4i1)
02488     Align = 32;
02489 
02490   // ByVal parameters are aligned as requested.
02491   if (Flags.isByVal()) {
02492     unsigned BVAlign = Flags.getByValAlign();
02493     if (BVAlign > PtrByteSize) {
02494       if (BVAlign % PtrByteSize != 0)
02495           llvm_unreachable(
02496             "ByVal alignment is not a multiple of the pointer size");
02497 
02498       Align = BVAlign;
02499     }
02500   }
02501 
02502   // Array members are always packed to their original alignment.
02503   if (Flags.isInConsecutiveRegs()) {
02504     // If the array member was split into multiple registers, the first
02505     // needs to be aligned to the size of the full type.  (Except for
02506     // ppcf128, which is only aligned as its f64 components.)
02507     if (Flags.isSplit() && OrigVT != MVT::ppcf128)
02508       Align = OrigVT.getStoreSize();
02509     else
02510       Align = ArgVT.getStoreSize();
02511   }
02512 
02513   return Align;
02514 }
02515 
02516 /// CalculateStackSlotUsed - Return whether this argument will use its
02517 /// stack slot (instead of being passed in registers).  ArgOffset,
02518 /// AvailableFPRs, and AvailableVRs must hold the current argument
02519 /// position, and will be updated to account for this argument.
02520 static bool CalculateStackSlotUsed(EVT ArgVT, EVT OrigVT,
02521                                    ISD::ArgFlagsTy Flags,
02522                                    unsigned PtrByteSize,
02523                                    unsigned LinkageSize,
02524                                    unsigned ParamAreaSize,
02525                                    unsigned &ArgOffset,
02526                                    unsigned &AvailableFPRs,
02527                                    unsigned &AvailableVRs, bool HasQPX) {
02528   bool UseMemory = false;
02529 
02530   // Respect alignment of argument on the stack.
02531   unsigned Align =
02532     CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
02533   ArgOffset = ((ArgOffset + Align - 1) / Align) * Align;
02534   // If there's no space left in the argument save area, we must
02535   // use memory (this check also catches zero-sized arguments).
02536   if (ArgOffset >= LinkageSize + ParamAreaSize)
02537     UseMemory = true;
02538 
02539   // Allocate argument on the stack.
02540   ArgOffset += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
02541   if (Flags.isInConsecutiveRegsLast())
02542     ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
02543   // If we overran the argument save area, we must use memory
02544   // (this check catches arguments passed partially in memory)
02545   if (ArgOffset > LinkageSize + ParamAreaSize)
02546     UseMemory = true;
02547 
02548   // However, if the argument is actually passed in an FPR or a VR,
02549   // we don't use memory after all.
02550   if (!Flags.isByVal()) {
02551     if (ArgVT == MVT::f32 || ArgVT == MVT::f64 ||
02552         // QPX registers overlap with the scalar FP registers.
02553         (HasQPX && (ArgVT == MVT::v4f32 ||
02554                     ArgVT == MVT::v4f64 ||
02555                     ArgVT == MVT::v4i1)))
02556       if (AvailableFPRs > 0) {
02557         --AvailableFPRs;
02558         return false;
02559       }
02560     if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||
02561         ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||
02562         ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64)
02563       if (AvailableVRs > 0) {
02564         --AvailableVRs;
02565         return false;
02566       }
02567   }
02568 
02569   return UseMemory;
02570 }
02571 
02572 /// EnsureStackAlignment - Round stack frame size up from NumBytes to
02573 /// ensure minimum alignment required for target.
02574 static unsigned EnsureStackAlignment(const PPCFrameLowering *Lowering,
02575                                      unsigned NumBytes) {
02576   unsigned TargetAlign = Lowering->getStackAlignment();
02577   unsigned AlignMask = TargetAlign - 1;
02578   NumBytes = (NumBytes + AlignMask) & ~AlignMask;
02579   return NumBytes;
02580 }
02581 
02582 SDValue
02583 PPCTargetLowering::LowerFormalArguments(SDValue Chain,
02584                                         CallingConv::ID CallConv, bool isVarArg,
02585                                         const SmallVectorImpl<ISD::InputArg>
02586                                           &Ins,
02587                                         SDLoc dl, SelectionDAG &DAG,
02588                                         SmallVectorImpl<SDValue> &InVals)
02589                                           const {
02590   if (Subtarget.isSVR4ABI()) {
02591     if (Subtarget.isPPC64())
02592       return LowerFormalArguments_64SVR4(Chain, CallConv, isVarArg, Ins,
02593                                          dl, DAG, InVals);
02594     else
02595       return LowerFormalArguments_32SVR4(Chain, CallConv, isVarArg, Ins,
02596                                          dl, DAG, InVals);
02597   } else {
02598     return LowerFormalArguments_Darwin(Chain, CallConv, isVarArg, Ins,
02599                                        dl, DAG, InVals);
02600   }
02601 }
02602 
02603 SDValue
02604 PPCTargetLowering::LowerFormalArguments_32SVR4(
02605                                       SDValue Chain,
02606                                       CallingConv::ID CallConv, bool isVarArg,
02607                                       const SmallVectorImpl<ISD::InputArg>
02608                                         &Ins,
02609                                       SDLoc dl, SelectionDAG &DAG,
02610                                       SmallVectorImpl<SDValue> &InVals) const {
02611 
02612   // 32-bit SVR4 ABI Stack Frame Layout:
02613   //              +-----------------------------------+
02614   //        +-->  |            Back chain             |
02615   //        |     +-----------------------------------+
02616   //        |     | Floating-point register save area |
02617   //        |     +-----------------------------------+
02618   //        |     |    General register save area     |
02619   //        |     +-----------------------------------+
02620   //        |     |          CR save word             |
02621   //        |     +-----------------------------------+
02622   //        |     |         VRSAVE save word          |
02623   //        |     +-----------------------------------+
02624   //        |     |         Alignment padding         |
02625   //        |     +-----------------------------------+
02626   //        |     |     Vector register save area     |
02627   //        |     +-----------------------------------+
02628   //        |     |       Local variable space        |
02629   //        |     +-----------------------------------+
02630   //        |     |        Parameter list area        |
02631   //        |     +-----------------------------------+
02632   //        |     |           LR save word            |
02633   //        |     +-----------------------------------+
02634   // SP-->  +---  |            Back chain             |
02635   //              +-----------------------------------+
02636   //
02637   // Specifications:
02638   //   System V Application Binary Interface PowerPC Processor Supplement
02639   //   AltiVec Technology Programming Interface Manual
02640 
02641   MachineFunction &MF = DAG.getMachineFunction();
02642   MachineFrameInfo *MFI = MF.getFrameInfo();
02643   PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
02644 
02645   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
02646   // Potential tail calls could cause overwriting of argument stack slots.
02647   bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
02648                        (CallConv == CallingConv::Fast));
02649   unsigned PtrByteSize = 4;
02650 
02651   // Assign locations to all of the incoming arguments.
02652   SmallVector<CCValAssign, 16> ArgLocs;
02653   CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
02654                  *DAG.getContext());
02655 
02656   // Reserve space for the linkage area on the stack.
02657   unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
02658   CCInfo.AllocateStack(LinkageSize, PtrByteSize);
02659 
02660   CCInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4);
02661 
02662   for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
02663     CCValAssign &VA = ArgLocs[i];
02664 
02665     // Arguments stored in registers.
02666     if (VA.isRegLoc()) {
02667       const TargetRegisterClass *RC;
02668       EVT ValVT = VA.getValVT();
02669 
02670       switch (ValVT.getSimpleVT().SimpleTy) {
02671         default:
02672           llvm_unreachable("ValVT not supported by formal arguments Lowering");
02673         case MVT::i1:
02674         case MVT::i32:
02675           RC = &PPC::GPRCRegClass;
02676           break;
02677         case MVT::f32:
02678           RC = &PPC::F4RCRegClass;
02679           break;
02680         case MVT::f64:
02681           if (Subtarget.hasVSX())
02682             RC = &PPC::VSFRCRegClass;
02683           else
02684             RC = &PPC::F8RCRegClass;
02685           break;
02686         case MVT::v16i8:
02687         case MVT::v8i16:
02688         case MVT::v4i32:
02689           RC = &PPC::VRRCRegClass;
02690           break;
02691         case MVT::v4f32:
02692           RC = Subtarget.hasQPX() ? &PPC::QSRCRegClass : &PPC::VRRCRegClass;
02693           break;
02694         case MVT::v2f64:
02695         case MVT::v2i64:
02696           RC = &PPC::VSHRCRegClass;
02697           break;
02698         case MVT::v4f64:
02699           RC = &PPC::QFRCRegClass;
02700           break;
02701         case MVT::v4i1:
02702           RC = &PPC::QBRCRegClass;
02703           break;
02704       }
02705 
02706       // Transform the arguments stored in physical registers into virtual ones.
02707       unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
02708       SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, Reg,
02709                                             ValVT == MVT::i1 ? MVT::i32 : ValVT);
02710 
02711       if (ValVT == MVT::i1)
02712         ArgValue = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, ArgValue);
02713 
02714       InVals.push_back(ArgValue);
02715     } else {
02716       // Argument stored in memory.
02717       assert(VA.isMemLoc());
02718 
02719       unsigned ArgSize = VA.getLocVT().getStoreSize();
02720       int FI = MFI->CreateFixedObject(ArgSize, VA.getLocMemOffset(),
02721                                       isImmutable);
02722 
02723       // Create load nodes to retrieve arguments from the stack.
02724       SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
02725       InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN,
02726                                    MachinePointerInfo(),
02727                                    false, false, false, 0));
02728     }
02729   }
02730 
02731   // Assign locations to all of the incoming aggregate by value arguments.
02732   // Aggregates passed by value are stored in the local variable space of the
02733   // caller's stack frame, right above the parameter list area.
02734   SmallVector<CCValAssign, 16> ByValArgLocs;
02735   CCState CCByValInfo(CallConv, isVarArg, DAG.getMachineFunction(),
02736                       ByValArgLocs, *DAG.getContext());
02737 
02738   // Reserve stack space for the allocations in CCInfo.
02739   CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrByteSize);
02740 
02741   CCByValInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4_ByVal);
02742 
02743   // Area that is at least reserved in the caller of this function.
02744   unsigned MinReservedArea = CCByValInfo.getNextStackOffset();
02745   MinReservedArea = std::max(MinReservedArea, LinkageSize);
02746 
02747   // Set the size that is at least reserved in caller of this function.  Tail
02748   // call optimized function's reserved stack space needs to be aligned so that
02749   // taking the difference between two stack areas will result in an aligned
02750   // stack.
02751   MinReservedArea =
02752       EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea);
02753   FuncInfo->setMinReservedArea(MinReservedArea);
02754 
02755   SmallVector<SDValue, 8> MemOps;
02756 
02757   // If the function takes variable number of arguments, make a frame index for
02758   // the start of the first vararg value... for expansion of llvm.va_start.
02759   if (isVarArg) {
02760     static const MCPhysReg GPArgRegs[] = {
02761       PPC::R3, PPC::R4, PPC::R5, PPC::R6,
02762       PPC::R7, PPC::R8, PPC::R9, PPC::R10,
02763     };
02764     const unsigned NumGPArgRegs = array_lengthof(GPArgRegs);
02765 
02766     static const MCPhysReg FPArgRegs[] = {
02767       PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
02768       PPC::F8
02769     };
02770     unsigned NumFPArgRegs = array_lengthof(FPArgRegs);
02771     if (DisablePPCFloatInVariadic)
02772       NumFPArgRegs = 0;
02773 
02774     FuncInfo->setVarArgsNumGPR(CCInfo.getFirstUnallocated(GPArgRegs));
02775     FuncInfo->setVarArgsNumFPR(CCInfo.getFirstUnallocated(FPArgRegs));
02776 
02777     // Make room for NumGPArgRegs and NumFPArgRegs.
02778     int Depth = NumGPArgRegs * PtrVT.getSizeInBits()/8 +
02779                 NumFPArgRegs * MVT(MVT::f64).getSizeInBits()/8;
02780 
02781     FuncInfo->setVarArgsStackOffset(
02782       MFI->CreateFixedObject(PtrVT.getSizeInBits()/8,
02783                              CCInfo.getNextStackOffset(), true));
02784 
02785     FuncInfo->setVarArgsFrameIndex(MFI->CreateStackObject(Depth, 8, false));
02786     SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
02787 
02788     // The fixed integer arguments of a variadic function are stored to the
02789     // VarArgsFrameIndex on the stack so that they may be loaded by deferencing
02790     // the result of va_next.
02791     for (unsigned GPRIndex = 0; GPRIndex != NumGPArgRegs; ++GPRIndex) {
02792       // Get an existing live-in vreg, or add a new one.
02793       unsigned VReg = MF.getRegInfo().getLiveInVirtReg(GPArgRegs[GPRIndex]);
02794       if (!VReg)
02795         VReg = MF.addLiveIn(GPArgRegs[GPRIndex], &PPC::GPRCRegClass);
02796 
02797       SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
02798       SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
02799                                    MachinePointerInfo(), false, false, 0);
02800       MemOps.push_back(Store);
02801       // Increment the address by four for the next argument to store
02802       SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, PtrVT);
02803       FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
02804     }
02805 
02806     // FIXME 32-bit SVR4: We only need to save FP argument registers if CR bit 6
02807     // is set.
02808     // The double arguments are stored to the VarArgsFrameIndex
02809     // on the stack.
02810     for (unsigned FPRIndex = 0; FPRIndex != NumFPArgRegs; ++FPRIndex) {
02811       // Get an existing live-in vreg, or add a new one.
02812       unsigned VReg = MF.getRegInfo().getLiveInVirtReg(FPArgRegs[FPRIndex]);
02813       if (!VReg)
02814         VReg = MF.addLiveIn(FPArgRegs[FPRIndex], &PPC::F8RCRegClass);
02815 
02816       SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::f64);
02817       SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
02818                                    MachinePointerInfo(), false, false, 0);
02819       MemOps.push_back(Store);
02820       // Increment the address by eight for the next argument to store
02821       SDValue PtrOff = DAG.getConstant(MVT(MVT::f64).getSizeInBits()/8,
02822                                          PtrVT);
02823       FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
02824     }
02825   }
02826 
02827   if (!MemOps.empty())
02828     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
02829 
02830   return Chain;
02831 }
02832 
02833 // PPC64 passes i8, i16, and i32 values in i64 registers. Promote
02834 // value to MVT::i64 and then truncate to the correct register size.
02835 SDValue
02836 PPCTargetLowering::extendArgForPPC64(ISD::ArgFlagsTy Flags, EVT ObjectVT,
02837                                      SelectionDAG &DAG, SDValue ArgVal,
02838                                      SDLoc dl) const {
02839   if (Flags.isSExt())
02840     ArgVal = DAG.getNode(ISD::AssertSext, dl, MVT::i64, ArgVal,
02841                          DAG.getValueType(ObjectVT));
02842   else if (Flags.isZExt())
02843     ArgVal = DAG.getNode(ISD::AssertZext, dl, MVT::i64, ArgVal,
02844                          DAG.getValueType(ObjectVT));
02845 
02846   return DAG.getNode(ISD::TRUNCATE, dl, ObjectVT, ArgVal);
02847 }
02848 
02849 SDValue
02850 PPCTargetLowering::LowerFormalArguments_64SVR4(
02851                                       SDValue Chain,
02852                                       CallingConv::ID CallConv, bool isVarArg,
02853                                       const SmallVectorImpl<ISD::InputArg>
02854                                         &Ins,
02855                                       SDLoc dl, SelectionDAG &DAG,
02856                                       SmallVectorImpl<SDValue> &InVals) const {
02857   // TODO: add description of PPC stack frame format, or at least some docs.
02858   //
02859   bool isELFv2ABI = Subtarget.isELFv2ABI();
02860   bool isLittleEndian = Subtarget.isLittleEndian();
02861   MachineFunction &MF = DAG.getMachineFunction();
02862   MachineFrameInfo *MFI = MF.getFrameInfo();
02863   PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
02864 
02865   assert(!(CallConv == CallingConv::Fast && isVarArg) &&
02866          "fastcc not supported on varargs functions");
02867 
02868   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
02869   // Potential tail calls could cause overwriting of argument stack slots.
02870   bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
02871                        (CallConv == CallingConv::Fast));
02872   unsigned PtrByteSize = 8;
02873   unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
02874 
02875   static const MCPhysReg GPR[] = {
02876     PPC::X3, PPC::X4, PPC::X5, PPC::X6,
02877     PPC::X7, PPC::X8, PPC::X9, PPC::X10,
02878   };
02879   static const MCPhysReg VR[] = {
02880     PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
02881     PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
02882   };
02883   static const MCPhysReg VSRH[] = {
02884     PPC::VSH2, PPC::VSH3, PPC::VSH4, PPC::VSH5, PPC::VSH6, PPC::VSH7, PPC::VSH8,
02885     PPC::VSH9, PPC::VSH10, PPC::VSH11, PPC::VSH12, PPC::VSH13
02886   };
02887 
02888   const unsigned Num_GPR_Regs = array_lengthof(GPR);
02889   const unsigned Num_FPR_Regs = 13;
02890   const unsigned Num_VR_Regs  = array_lengthof(VR);
02891   const unsigned Num_QFPR_Regs = Num_FPR_Regs;
02892 
02893   // Do a first pass over the arguments to determine whether the ABI
02894   // guarantees that our caller has allocated the parameter save area
02895   // on its stack frame.  In the ELFv1 ABI, this is always the case;
02896   // in the ELFv2 ABI, it is true if this is a vararg function or if
02897   // any parameter is located in a stack slot.
02898 
02899   bool HasParameterArea = !isELFv2ABI || isVarArg;
02900   unsigned ParamAreaSize = Num_GPR_Regs * PtrByteSize;
02901   unsigned NumBytes = LinkageSize;
02902   unsigned AvailableFPRs = Num_FPR_Regs;
02903   unsigned AvailableVRs = Num_VR_Regs;
02904   for (unsigned i = 0, e = Ins.size(); i != e; ++i)
02905     if (CalculateStackSlotUsed(Ins[i].VT, Ins[i].ArgVT, Ins[i].Flags,
02906                                PtrByteSize, LinkageSize, ParamAreaSize,
02907                                NumBytes, AvailableFPRs, AvailableVRs,
02908                                Subtarget.hasQPX()))
02909       HasParameterArea = true;
02910 
02911   // Add DAG nodes to load the arguments or copy them out of registers.  On
02912   // entry to a function on PPC, the arguments start after the linkage area,
02913   // although the first ones are often in registers.
02914 
02915   unsigned ArgOffset = LinkageSize;
02916   unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
02917   unsigned &QFPR_idx = FPR_idx;
02918   SmallVector<SDValue, 8> MemOps;
02919   Function::const_arg_iterator FuncArg = MF.getFunction()->arg_begin();
02920   unsigned CurArgIdx = 0;
02921   for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) {
02922     SDValue ArgVal;
02923     bool needsLoad = false;
02924     EVT ObjectVT = Ins[ArgNo].VT;
02925     EVT OrigVT = Ins[ArgNo].ArgVT;
02926     unsigned ObjSize = ObjectVT.getStoreSize();
02927     unsigned ArgSize = ObjSize;
02928     ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;
02929     if (Ins[ArgNo].isOrigArg()) {
02930       std::advance(FuncArg, Ins[ArgNo].getOrigArgIndex() - CurArgIdx);
02931       CurArgIdx = Ins[ArgNo].getOrigArgIndex();
02932     }
02933     // We re-align the argument offset for each argument, except when using the
02934     // fast calling convention, when we need to make sure we do that only when
02935     // we'll actually use a stack slot.
02936     unsigned CurArgOffset, Align;
02937     auto ComputeArgOffset = [&]() {
02938       /* Respect alignment of argument on the stack.  */
02939       Align = CalculateStackSlotAlignment(ObjectVT, OrigVT, Flags, PtrByteSize);
02940       ArgOffset = ((ArgOffset + Align - 1) / Align) * Align;
02941       CurArgOffset = ArgOffset;
02942     };
02943 
02944     if (CallConv != CallingConv::Fast) {
02945       ComputeArgOffset();
02946 
02947       /* Compute GPR index associated with argument offset.  */
02948       GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
02949       GPR_idx = std::min(GPR_idx, Num_GPR_Regs);
02950     }
02951 
02952     // FIXME the codegen can be much improved in some cases.
02953     // We do not have to keep everything in memory.
02954     if (Flags.isByVal()) {
02955       assert(Ins[ArgNo].isOrigArg() && "Byval arguments cannot be implicit");
02956 
02957       if (CallConv == CallingConv::Fast)
02958         ComputeArgOffset();
02959 
02960       // ObjSize is the true size, ArgSize rounded up to multiple of registers.
02961       ObjSize = Flags.getByValSize();
02962       ArgSize = ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
02963       // Empty aggregate parameters do not take up registers.  Examples:
02964       //   struct { } a;
02965       //   union  { } b;
02966       //   int c[0];
02967       // etc.  However, we have to provide a place-holder in InVals, so
02968       // pretend we have an 8-byte item at the current address for that
02969       // purpose.
02970       if (!ObjSize) {
02971         int FI = MFI->CreateFixedObject(PtrByteSize, ArgOffset, true);
02972         SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
02973         InVals.push_back(FIN);
02974         continue;
02975       }
02976 
02977       // Create a stack object covering all stack doublewords occupied
02978       // by the argument.  If the argument is (fully or partially) on
02979       // the stack, or if the argument is fully in registers but the
02980       // caller has allocated the parameter save anyway, we can refer
02981       // directly to the caller's stack frame.  Otherwise, create a
02982       // local copy in our own frame.
02983       int FI;
02984       if (HasParameterArea ||
02985           ArgSize + ArgOffset > LinkageSize + Num_GPR_Regs * PtrByteSize)
02986         FI = MFI->CreateFixedObject(ArgSize, ArgOffset, false, true);
02987       else
02988         FI = MFI->CreateStackObject(ArgSize, Align, false);
02989       SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
02990 
02991       // Handle aggregates smaller than 8 bytes.
02992       if (ObjSize < PtrByteSize) {
02993         // The value of the object is its address, which differs from the
02994         // address of the enclosing doubleword on big-endian systems.
02995         SDValue Arg = FIN;
02996         if (!isLittleEndian) {
02997           SDValue ArgOff = DAG.getConstant(PtrByteSize - ObjSize, PtrVT);
02998           Arg = DAG.getNode(ISD::ADD, dl, ArgOff.getValueType(), Arg, ArgOff);
02999         }
03000         InVals.push_back(Arg);
03001 
03002         if (GPR_idx != Num_GPR_Regs) {
03003           unsigned VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
03004           SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
03005           SDValue Store;
03006 
03007           if (ObjSize==1 || ObjSize==2 || ObjSize==4) {
03008             EVT ObjType = (ObjSize == 1 ? MVT::i8 :
03009                            (ObjSize == 2 ? MVT::i16 : MVT::i32));
03010             Store = DAG.getTruncStore(Val.getValue(1), dl, Val, Arg,
03011                                       MachinePointerInfo(FuncArg),
03012                                       ObjType, false, false, 0);
03013           } else {
03014             // For sizes that don't fit a truncating store (3, 5, 6, 7),
03015             // store the whole register as-is to the parameter save area
03016             // slot.
03017             Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
03018                                  MachinePointerInfo(FuncArg),
03019                                  false, false, 0);
03020           }
03021 
03022           MemOps.push_back(Store);
03023         }
03024         // Whether we copied from a register or not, advance the offset
03025         // into the parameter save area by a full doubleword.
03026         ArgOffset += PtrByteSize;
03027         continue;
03028       }
03029 
03030       // The value of the object is its address, which is the address of
03031       // its first stack doubleword.
03032       InVals.push_back(FIN);
03033 
03034       // Store whatever pieces of the object are in registers to memory.
03035       for (unsigned j = 0; j < ArgSize; j += PtrByteSize) {
03036         if (GPR_idx == Num_GPR_Regs)
03037           break;
03038 
03039         unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
03040         SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
03041         SDValue Addr = FIN;
03042         if (j) {
03043           SDValue Off = DAG.getConstant(j, PtrVT);
03044           Addr = DAG.getNode(ISD::ADD, dl, Off.getValueType(), Addr, Off);
03045         }
03046         SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, Addr,
03047                                      MachinePointerInfo(FuncArg, j),
03048                                      false, false, 0);
03049         MemOps.push_back(Store);
03050         ++GPR_idx;
03051       }
03052       ArgOffset += ArgSize;
03053       continue;
03054     }
03055 
03056     switch (ObjectVT.getSimpleVT().SimpleTy) {
03057     default: llvm_unreachable("Unhandled argument type!");
03058     case MVT::i1:
03059     case MVT::i32:
03060     case MVT::i64:
03061       // These can be scalar arguments or elements of an integer array type
03062       // passed directly.  Clang may use those instead of "byval" aggregate
03063       // types to avoid forcing arguments to memory unnecessarily.
03064       if (GPR_idx != Num_GPR_Regs) {
03065         unsigned VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
03066         ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
03067 
03068         if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1)
03069           // PPC64 passes i8, i16, and i32 values in i64 registers. Promote
03070           // value to MVT::i64 and then truncate to the correct register size.
03071           ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
03072       } else {
03073         if (CallConv == CallingConv::Fast)
03074           ComputeArgOffset();
03075 
03076         needsLoad = true;
03077         ArgSize = PtrByteSize;
03078       }
03079       if (CallConv != CallingConv::Fast || needsLoad)
03080         ArgOffset += 8;
03081       break;
03082 
03083     case MVT::f32:
03084     case MVT::f64:
03085       // These can be scalar arguments or elements of a float array type
03086       // passed directly.  The latter are used to implement ELFv2 homogenous
03087       // float aggregates.
03088       if (FPR_idx != Num_FPR_Regs) {
03089         unsigned VReg;
03090 
03091         if (ObjectVT == MVT::f32)
03092           VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F4RCRegClass);
03093         else
03094           VReg = MF.addLiveIn(FPR[FPR_idx], Subtarget.hasVSX()
03095                                                 ? &PPC::VSFRCRegClass
03096                                                 : &PPC::F8RCRegClass);
03097 
03098         ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
03099         ++FPR_idx;
03100       } else if (GPR_idx != Num_GPR_Regs && CallConv != CallingConv::Fast) {
03101         // FIXME: We may want to re-enable this for CallingConv::Fast on the P8
03102         // once we support fp <-> gpr moves.
03103 
03104         // This can only ever happen in the presence of f32 array types,
03105         // since otherwise we never run out of FPRs before running out
03106         // of GPRs.
03107         unsigned VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
03108         ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
03109 
03110         if (ObjectVT == MVT::f32) {
03111           if ((ArgOffset % PtrByteSize) == (isLittleEndian ? 4 : 0))
03112             ArgVal = DAG.getNode(ISD::SRL, dl, MVT::i64, ArgVal,
03113                                  DAG.getConstant(32, MVT::i32));
03114           ArgVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, ArgVal);
03115         }
03116 
03117         ArgVal = DAG.getNode(ISD::BITCAST, dl, ObjectVT, ArgVal);
03118       } else {
03119         if (CallConv == CallingConv::Fast)
03120           ComputeArgOffset();
03121 
03122         needsLoad = true;
03123       }
03124 
03125       // When passing an array of floats, the array occupies consecutive
03126       // space in the argument area; only round up to the next doubleword
03127       // at the end of the array.  Otherwise, each float takes 8 bytes.
03128       if (CallConv != CallingConv::Fast || needsLoad) {
03129         ArgSize = Flags.isInConsecutiveRegs() ? ObjSize : PtrByteSize;
03130         ArgOffset += ArgSize;
03131         if (Flags.isInConsecutiveRegsLast())
03132           ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
03133       }
03134       break;
03135     case MVT::v4f32:
03136     case MVT::v4i32:
03137     case MVT::v8i16:
03138     case MVT::v16i8:
03139     case MVT::v2f64:
03140     case MVT::v2i64:
03141       if (!Subtarget.hasQPX()) {
03142       // These can be scalar arguments or elements of a vector array type
03143       // passed directly.  The latter are used to implement ELFv2 homogenous
03144       // vector aggregates.
03145       if (VR_idx != Num_VR_Regs) {
03146         unsigned VReg = (ObjectVT == MVT::v2f64 || ObjectVT == MVT::v2i64) ?
03147                         MF.addLiveIn(VSRH[VR_idx], &PPC::VSHRCRegClass) :
03148                         MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass);
03149         ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
03150         ++VR_idx;
03151       } else {
03152         if (CallConv == CallingConv::Fast)
03153           ComputeArgOffset();
03154 
03155         needsLoad = true;
03156       }
03157       if (CallConv != CallingConv::Fast || needsLoad)
03158         ArgOffset += 16;
03159       break;
03160       } // not QPX
03161 
03162       assert(ObjectVT.getSimpleVT().SimpleTy == MVT::v4f32 &&
03163              "Invalid QPX parameter type");
03164       /* fall through */
03165 
03166     case MVT::v4f64:
03167     case MVT::v4i1:
03168       // QPX vectors are treated like their scalar floating-point subregisters
03169       // (except that they're larger).
03170       unsigned Sz = ObjectVT.getSimpleVT().SimpleTy == MVT::v4f32 ? 16 : 32;
03171       if (QFPR_idx != Num_QFPR_Regs) {
03172         const TargetRegisterClass *RC;
03173         switch (ObjectVT.getSimpleVT().SimpleTy) {
03174         case MVT::v4f64: RC = &PPC::QFRCRegClass; break;
03175         case MVT::v4f32: RC = &PPC::QSRCRegClass; break;
03176         default:         RC = &PPC::QBRCRegClass; break;
03177         }
03178 
03179         unsigned VReg = MF.addLiveIn(QFPR[QFPR_idx], RC);
03180         ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
03181         ++QFPR_idx;
03182       } else {
03183         if (CallConv == CallingConv::Fast)
03184           ComputeArgOffset();
03185         needsLoad = true;
03186       }
03187       if (CallConv != CallingConv::Fast || needsLoad)
03188         ArgOffset += Sz;
03189       break;
03190     }
03191 
03192     // We need to load the argument to a virtual register if we determined
03193     // above that we ran out of physical registers of the appropriate type.
03194     if (needsLoad) {
03195       if (ObjSize < ArgSize && !isLittleEndian)
03196         CurArgOffset += ArgSize - ObjSize;
03197       int FI = MFI->CreateFixedObject(ObjSize, CurArgOffset, isImmutable);
03198       SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
03199       ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo(),
03200                            false, false, false, 0);
03201     }
03202 
03203     InVals.push_back(ArgVal);
03204   }
03205 
03206   // Area that is at least reserved in the caller of this function.
03207   unsigned MinReservedArea;
03208   if (HasParameterArea)
03209     MinReservedArea = std::max(ArgOffset, LinkageSize + 8 * PtrByteSize);
03210   else
03211     MinReservedArea = LinkageSize;
03212 
03213   // Set the size that is at least reserved in caller of this function.  Tail
03214   // call optimized functions' reserved stack space needs to be aligned so that
03215   // taking the difference between two stack areas will result in an aligned
03216   // stack.
03217   MinReservedArea =
03218       EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea);
03219   FuncInfo->setMinReservedArea(MinReservedArea);
03220 
03221   // If the function takes variable number of arguments, make a frame index for
03222   // the start of the first vararg value... for expansion of llvm.va_start.
03223   if (isVarArg) {
03224     int Depth = ArgOffset;
03225 
03226     FuncInfo->setVarArgsFrameIndex(
03227       MFI->CreateFixedObject(PtrByteSize, Depth, true));
03228     SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
03229 
03230     // If this function is vararg, store any remaining integer argument regs
03231     // to their spots on the stack so that they may be loaded by deferencing the
03232     // result of va_next.
03233     for (GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
03234          GPR_idx < Num_GPR_Regs; ++GPR_idx) {
03235       unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
03236       SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
03237       SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
03238                                    MachinePointerInfo(), false, false, 0);
03239       MemOps.push_back(Store);
03240       // Increment the address by four for the next argument to store
03241       SDValue PtrOff = DAG.getConstant(PtrByteSize, PtrVT);
03242       FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
03243     }
03244   }
03245 
03246   if (!MemOps.empty())
03247     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
03248 
03249   return Chain;
03250 }
03251 
03252 SDValue
03253 PPCTargetLowering::LowerFormalArguments_Darwin(
03254                                       SDValue Chain,
03255                                       CallingConv::ID CallConv, bool isVarArg,
03256                                       const SmallVectorImpl<ISD::InputArg>
03257                                         &Ins,
03258                                       SDLoc dl, SelectionDAG &DAG,
03259                                       SmallVectorImpl<SDValue> &InVals) const {
03260   // TODO: add description of PPC stack frame format, or at least some docs.
03261   //
03262   MachineFunction &MF = DAG.getMachineFunction();
03263   MachineFrameInfo *MFI = MF.getFrameInfo();
03264   PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
03265 
03266   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
03267   bool isPPC64 = PtrVT == MVT::i64;
03268   // Potential tail calls could cause overwriting of argument stack slots.
03269   bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
03270                        (CallConv == CallingConv::Fast));
03271   unsigned PtrByteSize = isPPC64 ? 8 : 4;
03272   unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
03273   unsigned ArgOffset = LinkageSize;
03274   // Area that is at least reserved in caller of this function.
03275   unsigned MinReservedArea = ArgOffset;
03276 
03277   static const MCPhysReg GPR_32[] = {           // 32-bit registers.
03278     PPC::R3, PPC::R4, PPC::R5, PPC::R6,
03279     PPC::R7, PPC::R8, PPC::R9, PPC::R10,
03280   };
03281   static const MCPhysReg GPR_64[] = {           // 64-bit registers.
03282     PPC::X3, PPC::X4, PPC::X5, PPC::X6,
03283     PPC::X7, PPC::X8, PPC::X9, PPC::X10,
03284   };
03285   static const MCPhysReg VR[] = {
03286     PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
03287     PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
03288   };
03289 
03290   const unsigned Num_GPR_Regs = array_lengthof(GPR_32);
03291   const unsigned Num_FPR_Regs = 13;
03292   const unsigned Num_VR_Regs  = array_lengthof( VR);
03293 
03294   unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
03295 
03296   const MCPhysReg *GPR = isPPC64 ? GPR_64 : GPR_32;
03297 
03298   // In 32-bit non-varargs functions, the stack space for vectors is after the
03299   // stack space for non-vectors.  We do not use this space unless we have
03300   // too many vectors to fit in registers, something that only occurs in
03301   // constructed examples:), but we have to walk the arglist to figure
03302   // that out...for the pathological case, compute VecArgOffset as the
03303   // start of the vector parameter area.  Computing VecArgOffset is the
03304   // entire point of the following loop.
03305   unsigned VecArgOffset = ArgOffset;
03306   if (!isVarArg && !isPPC64) {
03307     for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e;
03308          ++ArgNo) {
03309       EVT ObjectVT = Ins[ArgNo].VT;
03310       ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;
03311 
03312       if (Flags.isByVal()) {
03313         // ObjSize is the true size, ArgSize rounded up to multiple of regs.
03314         unsigned ObjSize = Flags.getByValSize();
03315         unsigned ArgSize =
03316                 ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
03317         VecArgOffset += ArgSize;
03318         continue;
03319       }
03320 
03321       switch(ObjectVT.getSimpleVT().SimpleTy) {
03322       default: llvm_unreachable("Unhandled argument type!");
03323       case MVT::i1:
03324       case MVT::i32:
03325       case MVT::f32:
03326         VecArgOffset += 4;
03327         break;
03328       case MVT::i64:  // PPC64
03329       case MVT::f64:
03330         // FIXME: We are guaranteed to be !isPPC64 at this point.
03331         // Does MVT::i64 apply?
03332         VecArgOffset += 8;
03333         break;
03334       case MVT::v4f32:
03335       case MVT::v4i32:
03336       case MVT::v8i16:
03337       case MVT::v16i8:
03338         // Nothing to do, we're only looking at Nonvector args here.
03339         break;
03340       }
03341     }
03342   }
03343   // We've found where the vector parameter area in memory is.  Skip the
03344   // first 12 parameters; these don't use that memory.
03345   VecArgOffset = ((VecArgOffset+15)/16)*16;
03346   VecArgOffset += 12*16;
03347 
03348   // Add DAG nodes to load the arguments or copy them out of registers.  On
03349   // entry to a function on PPC, the arguments start after the linkage area,
03350   // although the first ones are often in registers.
03351 
03352   SmallVector<SDValue, 8> MemOps;
03353   unsigned nAltivecParamsAtEnd = 0;
03354   Function::const_arg_iterator FuncArg = MF.getFunction()->arg_begin();
03355   unsigned CurArgIdx = 0;
03356   for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) {
03357     SDValue ArgVal;
03358     bool needsLoad = false;
03359     EVT ObjectVT = Ins[ArgNo].VT;
03360     unsigned ObjSize = ObjectVT.getSizeInBits()/8;
03361     unsigned ArgSize = ObjSize;
03362     ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;
03363     if (Ins[ArgNo].isOrigArg()) {
03364       std::advance(FuncArg, Ins[ArgNo].getOrigArgIndex() - CurArgIdx);
03365       CurArgIdx = Ins[ArgNo].getOrigArgIndex();
03366     }
03367     unsigned CurArgOffset = ArgOffset;
03368 
03369     // Varargs or 64 bit Altivec parameters are padded to a 16 byte boundary.
03370     if (ObjectVT==MVT::v4f32 || ObjectVT==MVT::v4i32 ||
03371         ObjectVT==MVT::v8i16 || ObjectVT==MVT::v16i8) {
03372       if (isVarArg || isPPC64) {
03373         MinReservedArea = ((MinReservedArea+15)/16)*16;
03374         MinReservedArea += CalculateStackSlotSize(ObjectVT,
03375                                                   Flags,
03376                                                   PtrByteSize);
03377       } else  nAltivecParamsAtEnd++;
03378     } else
03379       // Calculate min reserved area.
03380       MinReservedArea += CalculateStackSlotSize(Ins[ArgNo].VT,
03381                                                 Flags,
03382                                                 PtrByteSize);
03383 
03384     // FIXME the codegen can be much improved in some cases.
03385     // We do not have to keep everything in memory.
03386     if (Flags.isByVal()) {
03387       assert(Ins[ArgNo].isOrigArg() && "Byval arguments cannot be implicit");
03388 
03389       // ObjSize is the true size, ArgSize rounded up to multiple of registers.
03390       ObjSize = Flags.getByValSize();
03391       ArgSize = ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
03392       // Objects of size 1 and 2 are right justified, everything else is
03393       // left justified.  This means the memory address is adjusted forwards.
03394       if (ObjSize==1 || ObjSize==2) {
03395         CurArgOffset = CurArgOffset + (4 - ObjSize);
03396       }
03397       // The value of the object is its address.
03398       int FI = MFI->CreateFixedObject(ObjSize, CurArgOffset, false, true);
03399       SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
03400       InVals.push_back(FIN);
03401       if (ObjSize==1 || ObjSize==2) {
03402         if (GPR_idx != Num_GPR_Regs) {
03403           unsigned VReg;
03404           if (isPPC64)
03405             VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
03406           else
03407             VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
03408           SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
03409           EVT ObjType = ObjSize == 1 ? MVT::i8 : MVT::i16;
03410           SDValue Store = DAG.getTruncStore(Val.getValue(1), dl, Val, FIN,
03411                                             MachinePointerInfo(FuncArg),
03412                                             ObjType, false, false, 0);
03413           MemOps.push_back(Store);
03414           ++GPR_idx;
03415         }
03416 
03417         ArgOffset += PtrByteSize;
03418 
03419         continue;
03420       }
03421       for (unsigned j = 0; j < ArgSize; j += PtrByteSize) {
03422         // Store whatever pieces of the object are in registers
03423         // to memory.  ArgOffset will be the address of the beginning
03424         // of the object.
03425         if (GPR_idx != Num_GPR_Regs) {
03426           unsigned VReg;
03427           if (isPPC64)
03428             VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
03429           else
03430             VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
03431           int FI = MFI->CreateFixedObject(PtrByteSize, ArgOffset, true);
03432           SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
03433           SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
03434           SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
03435                                        MachinePointerInfo(FuncArg, j),
03436                                        false, false, 0);
03437           MemOps.push_back(Store);
03438           ++GPR_idx;
03439           ArgOffset += PtrByteSize;
03440         } else {
03441           ArgOffset += ArgSize - (ArgOffset-CurArgOffset);
03442           break;
03443         }
03444       }
03445       continue;
03446     }
03447 
03448     switch (ObjectVT.getSimpleVT().SimpleTy) {
03449     default: llvm_unreachable("Unhandled argument type!");
03450     case MVT::i1:
03451     case MVT::i32:
03452       if (!isPPC64) {
03453         if (GPR_idx != Num_GPR_Regs) {
03454           unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
03455           ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32);
03456 
03457           if (ObjectVT == MVT::i1)
03458             ArgVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, ArgVal);
03459 
03460           ++GPR_idx;
03461         } else {
03462           needsLoad = true;
03463           ArgSize = PtrByteSize;
03464         }
03465         // All int arguments reserve stack space in the Darwin ABI.
03466         ArgOffset += PtrByteSize;
03467         break;
03468       }
03469       // FALLTHROUGH
03470     case MVT::i64:  // PPC64
03471       if (GPR_idx != Num_GPR_Regs) {
03472         unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
03473         ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
03474 
03475         if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1)
03476           // PPC64 passes i8, i16, and i32 values in i64 registers. Promote
03477           // value to MVT::i64 and then truncate to the correct register size.
03478           ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
03479 
03480         ++GPR_idx;
03481       } else {
03482         needsLoad = true;
03483         ArgSize = PtrByteSize;
03484       }
03485       // All int arguments reserve stack space in the Darwin ABI.
03486       ArgOffset += 8;
03487       break;
03488 
03489     case MVT::f32:
03490     case MVT::f64:
03491       // Every 4 bytes of argument space consumes one of the GPRs available for
03492       // argument passing.
03493       if (GPR_idx != Num_GPR_Regs) {
03494         ++GPR_idx;
03495         if (ObjSize == 8 && GPR_idx != Num_GPR_Regs && !isPPC64)
03496           ++GPR_idx;
03497       }
03498       if (FPR_idx != Num_FPR_Regs) {
03499         unsigned VReg;
03500 
03501         if (ObjectVT == MVT::f32)
03502           VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F4RCRegClass);
03503         else
03504           VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F8RCRegClass);
03505 
03506         ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
03507         ++FPR_idx;
03508       } else {
03509         needsLoad = true;
03510       }
03511 
03512       // All FP arguments reserve stack space in the Darwin ABI.
03513       ArgOffset += isPPC64 ? 8 : ObjSize;
03514       break;
03515     case MVT::v4f32:
03516     case MVT::v4i32:
03517     case MVT::v8i16:
03518     case MVT::v16i8:
03519       // Note that vector arguments in registers don't reserve stack space,
03520       // except in varargs functions.
03521       if (VR_idx != Num_VR_Regs) {
03522         unsigned VReg = MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass);
03523         ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
03524         if (isVarArg) {
03525           while ((ArgOffset % 16) != 0) {
03526             ArgOffset += PtrByteSize;
03527             if (GPR_idx != Num_GPR_Regs)
03528               GPR_idx++;
03529           }
03530           ArgOffset += 16;
03531           GPR_idx = std::min(GPR_idx+4, Num_GPR_Regs); // FIXME correct for ppc64?
03532         }
03533         ++VR_idx;
03534       } else {
03535         if (!isVarArg && !isPPC64) {
03536           // Vectors go after all the nonvectors.
03537           CurArgOffset = VecArgOffset;
03538           VecArgOffset += 16;
03539         } else {
03540           // Vectors are aligned.
03541           ArgOffset = ((ArgOffset+15)/16)*16;
03542           CurArgOffset = ArgOffset;
03543           ArgOffset += 16;
03544         }
03545         needsLoad = true;
03546       }
03547       break;
03548     }
03549 
03550     // We need to load the argument to a virtual register if we determined above
03551     // that we ran out of physical registers of the appropriate type.
03552     if (needsLoad) {
03553       int FI = MFI->CreateFixedObject(ObjSize,
03554                                       CurArgOffset + (ArgSize - ObjSize),
03555                                       isImmutable);
03556       SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
03557       ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo(),
03558                            false, false, false, 0);
03559     }
03560 
03561     InVals.push_back(ArgVal);
03562   }
03563 
03564   // Allow for Altivec parameters at the end, if needed.
03565   if (nAltivecParamsAtEnd) {
03566     MinReservedArea = ((MinReservedArea+15)/16)*16;
03567     MinReservedArea += 16*nAltivecParamsAtEnd;
03568   }
03569 
03570   // Area that is at least reserved in the caller of this function.
03571   MinReservedArea = std::max(MinReservedArea, LinkageSize + 8 * PtrByteSize);
03572 
03573   // Set the size that is at least reserved in caller of this function.  Tail
03574   // call optimized functions' reserved stack space needs to be aligned so that
03575   // taking the difference between two stack areas will result in an aligned
03576   // stack.
03577   MinReservedArea =
03578       EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea);
03579   FuncInfo->setMinReservedArea(MinReservedArea);
03580 
03581   // If the function takes variable number of arguments, make a frame index for
03582   // the start of the first vararg value... for expansion of llvm.va_start.
03583   if (isVarArg) {
03584     int Depth = ArgOffset;
03585 
03586     FuncInfo->setVarArgsFrameIndex(
03587       MFI->CreateFixedObject(PtrVT.getSizeInBits()/8,
03588                              Depth, true));
03589     SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
03590 
03591     // If this function is vararg, store any remaining integer argument regs
03592     // to their spots on the stack so that they may be loaded by deferencing the
03593     // result of va_next.
03594     for (; GPR_idx != Num_GPR_Regs; ++GPR_idx) {
03595       unsigned VReg;
03596 
03597       if (isPPC64)
03598         VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
03599       else
03600         VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
03601 
03602       SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
03603       SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
03604                                    MachinePointerInfo(), false, false, 0);
03605       MemOps.push_back(Store);
03606       // Increment the address by four for the next argument to store
03607       SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, PtrVT);
03608       FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
03609     }
03610   }
03611 
03612   if (!MemOps.empty())
03613     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
03614 
03615   return Chain;
03616 }
03617 
03618 /// CalculateTailCallSPDiff - Get the amount the stack pointer has to be
03619 /// adjusted to accommodate the arguments for the tailcall.
03620 static int CalculateTailCallSPDiff(SelectionDAG& DAG, bool isTailCall,
03621                                    unsigned ParamSize) {
03622 
03623   if (!isTailCall) return 0;
03624 
03625   PPCFunctionInfo *FI = DAG.getMachineFunction().getInfo<PPCFunctionInfo>();
03626   unsigned CallerMinReservedArea = FI->getMinReservedArea();
03627   int SPDiff = (int)CallerMinReservedArea - (int)ParamSize;
03628   // Remember only if the new adjustement is bigger.
03629   if (SPDiff < FI->getTailCallSPDelta())
03630     FI->setTailCallSPDelta(SPDiff);
03631 
03632   return SPDiff;
03633 }
03634 
03635 /// IsEligibleForTailCallOptimization - Check whether the call is eligible
03636 /// for tail call optimization. Targets which want to do tail call
03637 /// optimization should implement this function.
03638 bool
03639 PPCTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
03640                                                      CallingConv::ID CalleeCC,
03641                                                      bool isVarArg,
03642                                       const SmallVectorImpl<ISD::InputArg> &Ins,
03643                                                      SelectionDAG& DAG) const {
03644   if (!getTargetMachine().Options.GuaranteedTailCallOpt)
03645     return false;
03646 
03647   // Variable argument functions are not supported.
03648   if (isVarArg)
03649     return false;
03650 
03651   MachineFunction &MF = DAG.getMachineFunction();
03652   CallingConv::ID CallerCC = MF.getFunction()->getCallingConv();
03653   if (CalleeCC == CallingConv::Fast && CallerCC == CalleeCC) {
03654     // Functions containing by val parameters are not supported.
03655     for (unsigned i = 0; i != Ins.size(); i++) {
03656        ISD::ArgFlagsTy Flags = Ins[i].Flags;
03657        if (Flags.isByVal()) return false;
03658     }
03659 
03660     // Non-PIC/GOT tail calls are supported.
03661     if (getTargetMachine().getRelocationModel() != Reloc::PIC_)
03662       return true;
03663 
03664     // At the moment we can only do local tail calls (in same module, hidden
03665     // or protected) if we are generating PIC.
03666     if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
03667       return G->getGlobal()->hasHiddenVisibility()
03668           || G->getGlobal()->hasProtectedVisibility();
03669   }
03670 
03671   return false;
03672 }
03673 
03674 /// isCallCompatibleAddress - Return the immediate to use if the specified
03675 /// 32-bit value is representable in the immediate field of a BxA instruction.
03676 static SDNode *isBLACompatibleAddress(SDValue Op, SelectionDAG &DAG) {
03677   ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
03678   if (!C) return nullptr;
03679 
03680   int Addr = C->getZExtValue();
03681   if ((Addr & 3) != 0 ||  // Low 2 bits are implicitly zero.
03682       SignExtend32<26>(Addr) != Addr)
03683     return nullptr;  // Top 6 bits have to be sext of immediate.
03684 
03685   return DAG.getConstant((int)C->getZExtValue() >> 2,
03686                          DAG.getTargetLoweringInfo().getPointerTy()).getNode();
03687 }
03688 
03689 namespace {
03690 
03691 struct TailCallArgumentInfo {
03692   SDValue Arg;
03693   SDValue FrameIdxOp;
03694   int       FrameIdx;
03695 
03696   TailCallArgumentInfo() : FrameIdx(0) {}
03697 };
03698 
03699 }
03700 
03701 /// StoreTailCallArgumentsToStackSlot - Stores arguments to their stack slot.
03702 static void
03703 StoreTailCallArgumentsToStackSlot(SelectionDAG &DAG,
03704                                            SDValue Chain,
03705                    const SmallVectorImpl<TailCallArgumentInfo> &TailCallArgs,
03706                    SmallVectorImpl<SDValue> &MemOpChains,
03707                    SDLoc dl) {
03708   for (unsigned i = 0, e = TailCallArgs.size(); i != e; ++i) {
03709     SDValue Arg = TailCallArgs[i].Arg;
03710     SDValue FIN = TailCallArgs[i].FrameIdxOp;
03711     int FI = TailCallArgs[i].FrameIdx;
03712     // Store relative to framepointer.
03713     MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, FIN,
03714                                        MachinePointerInfo::getFixedStack(FI),
03715                                        false, false, 0));
03716   }
03717 }
03718 
03719 /// EmitTailCallStoreFPAndRetAddr - Move the frame pointer and return address to
03720 /// the appropriate stack slot for the tail call optimized function call.
03721 static SDValue EmitTailCallStoreFPAndRetAddr(SelectionDAG &DAG,
03722                                                MachineFunction &MF,
03723                                                SDValue Chain,
03724                                                SDValue OldRetAddr,
03725                                                SDValue OldFP,
03726                                                int SPDiff,
03727                                                bool isPPC64,
03728                                                bool isDarwinABI,
03729                                                SDLoc dl) {
03730   if (SPDiff) {
03731     // Calculate the new stack slot for the return address.
03732     int SlotSize = isPPC64 ? 8 : 4;
03733     const PPCFrameLowering *FL =
03734         MF.getSubtarget<PPCSubtarget>().getFrameLowering();
03735     int NewRetAddrLoc = SPDiff + FL->getReturnSaveOffset();
03736     int NewRetAddr = MF.getFrameInfo()->CreateFixedObject(SlotSize,
03737                                                           NewRetAddrLoc, true);
03738     EVT VT = isPPC64 ? MVT::i64 : MVT::i32;
03739     SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewRetAddr, VT);
03740     Chain = DAG.getStore(Chain, dl, OldRetAddr, NewRetAddrFrIdx,
03741                          MachinePointerInfo::getFixedStack(NewRetAddr),
03742                          false, false, 0);
03743 
03744     // When using the 32/64-bit SVR4 ABI there is no need to move the FP stack
03745     // slot as the FP is never overwritten.
03746     if (isDarwinABI) {
03747       int NewFPLoc = SPDiff + FL->getFramePointerSaveOffset();
03748       int NewFPIdx = MF.getFrameInfo()->CreateFixedObject(SlotSize, NewFPLoc,
03749                                                           true);
03750       SDValue NewFramePtrIdx = DAG.getFrameIndex(NewFPIdx, VT);
03751       Chain = DAG.getStore(Chain, dl, OldFP, NewFramePtrIdx,
03752                            MachinePointerInfo::getFixedStack(NewFPIdx),
03753                            false, false, 0);
03754     }
03755   }
03756   return Chain;
03757 }
03758 
03759 /// CalculateTailCallArgDest - Remember Argument for later processing. Calculate
03760 /// the position of the argument.
03761 static void
03762 CalculateTailCallArgDest(SelectionDAG &DAG, MachineFunction &MF, bool isPPC64,
03763                          SDValue Arg, int SPDiff, unsigned ArgOffset,
03764                      SmallVectorImpl<TailCallArgumentInfo>& TailCallArguments) {
03765   int Offset = ArgOffset + SPDiff;
03766   uint32_t OpSize = (Arg.getValueType().getSizeInBits()+7)/8;
03767   int FI = MF.getFrameInfo()->CreateFixedObject(OpSize, Offset, true);
03768   EVT VT = isPPC64 ? MVT::i64 : MVT::i32;
03769   SDValue FIN = DAG.getFrameIndex(FI, VT);
03770   TailCallArgumentInfo Info;
03771   Info.Arg = Arg;
03772   Info.FrameIdxOp = FIN;
03773   Info.FrameIdx = FI;
03774   TailCallArguments.push_back(Info);
03775 }
03776 
03777 /// EmitTCFPAndRetAddrLoad - Emit load from frame pointer and return address
03778 /// stack slot. Returns the chain as result and the loaded frame pointers in
03779 /// LROpOut/FPOpout. Used when tail calling.
03780 SDValue PPCTargetLowering::EmitTailCallLoadFPAndRetAddr(SelectionDAG & DAG,
03781                                                         int SPDiff,
03782                                                         SDValue Chain,
03783                                                         SDValue &LROpOut,
03784                                                         SDValue &FPOpOut,
03785                                                         bool isDarwinABI,
03786                                                         SDLoc dl) const {
03787   if (SPDiff) {
03788     // Load the LR and FP stack slot for later adjusting.
03789     EVT VT = Subtarget.isPPC64() ? MVT::i64 : MVT::i32;
03790     LROpOut = getReturnAddrFrameIndex(DAG);
03791     LROpOut = DAG.getLoad(VT, dl, Chain, LROpOut, MachinePointerInfo(),
03792                           false, false, false, 0);
03793     Chain = SDValue(LROpOut.getNode(), 1);
03794 
03795     // When using the 32/64-bit SVR4 ABI there is no need to load the FP stack
03796     // slot as the FP is never overwritten.
03797     if (isDarwinABI) {
03798       FPOpOut = getFramePointerFrameIndex(DAG);
03799       FPOpOut = DAG.getLoad(VT, dl, Chain, FPOpOut, MachinePointerInfo(),
03800                             false, false, false, 0);
03801       Chain = SDValue(FPOpOut.getNode(), 1);
03802     }
03803   }
03804   return Chain;
03805 }
03806 
03807 /// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified
03808 /// by "Src" to address "Dst" of size "Size".  Alignment information is
03809 /// specified by the specific parameter attribute. The copy will be passed as
03810 /// a byval function parameter.
03811 /// Sometimes what we are copying is the end of a larger object, the part that
03812 /// does not fit in registers.
03813 static SDValue
03814 CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain,
03815                           ISD::ArgFlagsTy Flags, SelectionDAG &DAG,
03816                           SDLoc dl) {
03817   SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), MVT::i32);
03818   return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(),
03819                        false, false, MachinePointerInfo(),
03820                        MachinePointerInfo());
03821 }
03822 
03823 /// LowerMemOpCallTo - Store the argument to the stack or remember it in case of
03824 /// tail calls.
03825 static void
03826 LowerMemOpCallTo(SelectionDAG &DAG, MachineFunction &MF, SDValue Chain,
03827                  SDValue Arg, SDValue PtrOff, int SPDiff,
03828                  unsigned ArgOffset, bool isPPC64, bool isTailCall,
03829                  bool isVector, SmallVectorImpl<SDValue> &MemOpChains,
03830                  SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments,
03831                  SDLoc dl) {
03832   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
03833   if (!isTailCall) {
03834     if (isVector) {
03835       SDValue StackPtr;
03836       if (isPPC64)
03837         StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
03838       else
03839         StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
03840       PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr,
03841                            DAG.getConstant(ArgOffset, PtrVT));
03842     }
03843     MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff,
03844                                        MachinePointerInfo(), false, false, 0));
03845   // Calculate and remember argument location.
03846   } else CalculateTailCallArgDest(DAG, MF, isPPC64, Arg, SPDiff, ArgOffset,
03847                                   TailCallArguments);
03848 }
03849 
03850 static
03851 void PrepareTailCall(SelectionDAG &DAG, SDValue &InFlag, SDValue &Chain,
03852                      SDLoc dl, bool isPPC64, int SPDiff, unsigned NumBytes,
03853                      SDValue LROp, SDValue FPOp, bool isDarwinABI,
03854                      SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments) {
03855   MachineFunction &MF = DAG.getMachineFunction();
03856 
03857   // Emit a sequence of copyto/copyfrom virtual registers for arguments that
03858   // might overwrite each other in case of tail call optimization.
03859   SmallVector<SDValue, 8> MemOpChains2;
03860   // Do not flag preceding copytoreg stuff together with the following stuff.
03861   InFlag = SDValue();
03862   StoreTailCallArgumentsToStackSlot(DAG, Chain, TailCallArguments,
03863                                     MemOpChains2, dl);
03864   if (!MemOpChains2.empty())
03865     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains2);
03866 
03867   // Store the return address to the appropriate stack slot.
03868   Chain = EmitTailCallStoreFPAndRetAddr(DAG, MF, Chain, LROp, FPOp, SPDiff,
03869                                         isPPC64, isDarwinABI, dl);
03870 
03871   // Emit callseq_end just before tailcall node.
03872   Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true),
03873                              DAG.getIntPtrConstant(0, true), InFlag, dl);
03874   InFlag = Chain.getValue(1);
03875 }
03876 
03877 // Is this global address that of a function that can be called by name? (as
03878 // opposed to something that must hold a descriptor for an indirect call).
03879 static bool isFunctionGlobalAddress(SDValue Callee) {
03880   if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
03881     if (Callee.getOpcode() == ISD::GlobalTLSAddress ||
03882         Callee.getOpcode() == ISD::TargetGlobalTLSAddress)
03883       return false;
03884 
03885     return G->getGlobal()->getType()->getElementType()->isFunctionTy();
03886   }
03887 
03888   return false;
03889 }
03890 
03891 static
03892 unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag,
03893                      SDValue &Chain, SDValue CallSeqStart, SDLoc dl, int SPDiff,
03894                      bool isTailCall, bool IsPatchPoint,
03895                      SmallVectorImpl<std::pair<unsigned, SDValue> > &RegsToPass,
03896                      SmallVectorImpl<SDValue> &Ops, std::vector<EVT> &NodeTys,
03897                      ImmutableCallSite *CS, const PPCSubtarget &Subtarget) {
03898 
03899   bool isPPC64 = Subtarget.isPPC64();
03900   bool isSVR4ABI = Subtarget.isSVR4ABI();
03901   bool isELFv2ABI = Subtarget.isELFv2ABI();
03902 
03903   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
03904   NodeTys.push_back(MVT::Other);   // Returns a chain
03905   NodeTys.push_back(MVT::Glue);    // Returns a flag for retval copy to use.
03906 
03907   unsigned CallOpc = PPCISD::CALL;
03908 
03909   bool needIndirectCall = true;
03910   if (!isSVR4ABI || !isPPC64)
03911     if (SDNode *Dest = isBLACompatibleAddress(Callee, DAG)) {
03912       // If this is an absolute destination address, use the munged value.
03913       Callee = SDValue(Dest, 0);
03914       needIndirectCall = false;
03915     }
03916 
03917   if (isFunctionGlobalAddress(Callee)) {
03918     GlobalAddressSDNode *G = cast<GlobalAddressSDNode>(Callee);
03919     // A call to a TLS address is actually an indirect call to a
03920     // thread-specific pointer.
03921     unsigned OpFlags = 0;
03922     if ((DAG.getTarget().getRelocationModel() != Reloc::Static &&
03923          (Subtarget.getTargetTriple().isMacOSX() &&
03924           Subtarget.getTargetTriple().isMacOSXVersionLT(10, 5)) &&
03925          (G->getGlobal()->isDeclaration() ||
03926           G->getGlobal()->isWeakForLinker())) ||
03927         (Subtarget.isTargetELF() && !isPPC64 &&
03928          !G->getGlobal()->hasLocalLinkage() &&
03929          DAG.getTarget().getRelocationModel() == Reloc::PIC_)) {
03930       // PC-relative references to external symbols should go through $stub,
03931       // unless we're building with the leopard linker or later, which
03932       // automatically synthesizes these stubs.
03933       OpFlags = PPCII::MO_PLT_OR_STUB;
03934     }
03935 
03936     // If the callee is a GlobalAddress/ExternalSymbol node (quite common,
03937     // every direct call is) turn it into a TargetGlobalAddress /
03938     // TargetExternalSymbol node so that legalize doesn't hack it.
03939     Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl,
03940                                         Callee.getValueType(), 0, OpFlags);
03941     needIndirectCall = false;
03942   }
03943 
03944   if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
03945     unsigned char OpFlags = 0;
03946 
03947     if ((DAG.getTarget().getRelocationModel() != Reloc::Static &&
03948          (Subtarget.getTargetTriple().isMacOSX() &&
03949           Subtarget.getTargetTriple().isMacOSXVersionLT(10, 5))) ||
03950         (Subtarget.isTargetELF() && !isPPC64 &&
03951          DAG.getTarget().getRelocationModel() == Reloc::PIC_)) {
03952       // PC-relative references to external symbols should go through $stub,
03953       // unless we're building with the leopard linker or later, which
03954       // automatically synthesizes these stubs.
03955       OpFlags = PPCII::MO_PLT_OR_STUB;
03956     }
03957 
03958     Callee = DAG.getTargetExternalSymbol(S->getSymbol(), Callee.getValueType(),
03959                                          OpFlags);
03960     needIndirectCall = false;
03961   }
03962 
03963   if (IsPatchPoint) {
03964     // We'll form an invalid direct call when lowering a patchpoint; the full
03965     // sequence for an indirect call is complicated, and many of the
03966     // instructions introduced might have side effects (and, thus, can't be
03967     // removed later). The call itself will be removed as soon as the
03968     // argument/return lowering is complete, so the fact that it has the wrong
03969     // kind of operands should not really matter.
03970     needIndirectCall = false;
03971   }
03972 
03973   if (needIndirectCall) {
03974     // Otherwise, this is an indirect call.  We have to use a MTCTR/BCTRL pair
03975     // to do the call, we can't use PPCISD::CALL.
03976     SDValue MTCTROps[] = {Chain, Callee, InFlag};
03977 
03978     if (isSVR4ABI && isPPC64 && !isELFv2ABI) {
03979       // Function pointers in the 64-bit SVR4 ABI do not point to the function
03980       // entry point, but to the function descriptor (the function entry point
03981       // address is part of the function descriptor though).
03982       // The function descriptor is a three doubleword structure with the
03983       // following fields: function entry point, TOC base address and
03984       // environment pointer.
03985       // Thus for a call through a function pointer, the following actions need
03986       // to be performed:
03987       //   1. Save the TOC of the caller in the TOC save area of its stack
03988       //      frame (this is done in LowerCall_Darwin() or LowerCall_64SVR4()).
03989       //   2. Load the address of the function entry point from the function
03990       //      descriptor.
03991       //   3. Load the TOC of the callee from the function descriptor into r2.
03992       //   4. Load the environment pointer from the function descriptor into
03993       //      r11.
03994       //   5. Branch to the function entry point address.
03995       //   6. On return of the callee, the TOC of the caller needs to be
03996       //      restored (this is done in FinishCall()).
03997       //
03998       // The loads are scheduled at the beginning of the call sequence, and the
03999       // register copies are flagged together to ensure that no other
04000       // operations can be scheduled in between. E.g. without flagging the
04001       // copies together, a TOC access in the caller could be scheduled between
04002       // the assignment of the callee TOC and the branch to the callee, which
04003       // results in the TOC access going through the TOC of the callee instead
04004       // of going through the TOC of the caller, which leads to incorrect code.
04005 
04006       // Load the address of the function entry point from the function
04007       // descriptor.
04008       SDValue LDChain = CallSeqStart.getValue(CallSeqStart->getNumValues()-1);
04009       if (LDChain.getValueType() == MVT::Glue)
04010         LDChain = CallSeqStart.getValue(CallSeqStart->getNumValues()-2);
04011 
04012       bool LoadsInv = Subtarget.hasInvariantFunctionDescriptors();
04013 
04014       MachinePointerInfo MPI(CS ? CS->getCalledValue() : nullptr);
04015       SDValue LoadFuncPtr = DAG.getLoad(MVT::i64, dl, LDChain, Callee, MPI,
04016                                         false, false, LoadsInv, 8);
04017 
04018       // Load environment pointer into r11.
04019       SDValue PtrOff = DAG.getIntPtrConstant(16);
04020       SDValue AddPtr = DAG.getNode(ISD::ADD, dl, MVT::i64, Callee, PtrOff);
04021       SDValue LoadEnvPtr = DAG.getLoad(MVT::i64, dl, LDChain, AddPtr,
04022                                        MPI.getWithOffset(16), false, false,
04023                                        LoadsInv, 8);
04024 
04025       SDValue TOCOff = DAG.getIntPtrConstant(8);
04026       SDValue AddTOC = DAG.getNode(ISD::ADD, dl, MVT::i64, Callee, TOCOff);
04027       SDValue TOCPtr = DAG.getLoad(MVT::i64, dl, LDChain, AddTOC,
04028                                    MPI.getWithOffset(8), false, false,
04029                                    LoadsInv, 8);
04030 
04031       setUsesTOCBasePtr(DAG);
04032       SDValue TOCVal = DAG.getCopyToReg(Chain, dl, PPC::X2, TOCPtr,
04033                                         InFlag);
04034       Chain = TOCVal.getValue(0);
04035       InFlag = TOCVal.getValue(1);
04036 
04037       SDValue EnvVal = DAG.getCopyToReg(Chain, dl, PPC::X11, LoadEnvPtr,
04038                                         InFlag);
04039 
04040       Chain = EnvVal.getValue(0);
04041       InFlag = EnvVal.getValue(1);
04042 
04043       MTCTROps[0] = Chain;
04044       MTCTROps[1] = LoadFuncPtr;
04045       MTCTROps[2] = InFlag;
04046     }
04047 
04048     Chain = DAG.getNode(PPCISD::MTCTR, dl, NodeTys,
04049                         makeArrayRef(MTCTROps, InFlag.getNode() ? 3 : 2));
04050     InFlag = Chain.getValue(1);
04051 
04052     NodeTys.clear();
04053     NodeTys.push_back(MVT::Other);
04054     NodeTys.push_back(MVT::Glue);
04055     Ops.push_back(Chain);
04056     CallOpc = PPCISD::BCTRL;
04057     Callee.setNode(nullptr);
04058     // Add use of X11 (holding environment pointer)
04059     if (isSVR4ABI && isPPC64 && !isELFv2ABI)
04060       Ops.push_back(DAG.getRegister(PPC::X11, PtrVT));
04061     // Add CTR register as callee so a bctr can be emitted later.
04062     if (isTailCall)
04063       Ops.push_back(DAG.getRegister(isPPC64 ? PPC::CTR8 : PPC::CTR, PtrVT));
04064   }
04065 
04066   // If this is a direct call, pass the chain and the callee.
04067   if (Callee.getNode()) {
04068     Ops.push_back(Chain);
04069     Ops.push_back(Callee);
04070   }
04071   // If this is a tail call add stack pointer delta.
04072   if (isTailCall)
04073     Ops.push_back(DAG.getConstant(SPDiff, MVT::i32));
04074 
04075   // Add argument registers to the end of the list so that they are known live
04076   // into the call.
04077   for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
04078     Ops.push_back(DAG.getRegister(RegsToPass[i].first,
04079                                   RegsToPass[i].second.getValueType()));
04080 
04081   // All calls, in both the ELF V1 and V2 ABIs, need the TOC register live
04082   // into the call.
04083   if (isSVR4ABI && isPPC64 && !IsPatchPoint) {
04084     setUsesTOCBasePtr(DAG);
04085     Ops.push_back(DAG.getRegister(PPC::X2, PtrVT));
04086   }
04087 
04088   return CallOpc;
04089 }
04090 
04091 static
04092 bool isLocalCall(const SDValue &Callee)
04093 {
04094   if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
04095     return !G->getGlobal()->isDeclaration() &&
04096            !G->getGlobal()->isWeakForLinker();
04097   return false;
04098 }
04099 
04100 SDValue
04101 PPCTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
04102                                    CallingConv::ID CallConv, bool isVarArg,
04103                                    const SmallVectorImpl<ISD::InputArg> &Ins,
04104                                    SDLoc dl, SelectionDAG &DAG,
04105                                    SmallVectorImpl<SDValue> &InVals) const {
04106 
04107   SmallVector<CCValAssign, 16> RVLocs;
04108   CCState CCRetInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
04109                     *DAG.getContext());
04110   CCRetInfo.AnalyzeCallResult(Ins, RetCC_PPC);
04111 
04112   // Copy all of the result registers out of their specified physreg.
04113   for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
04114     CCValAssign &VA = RVLocs[i];
04115     assert(VA.isRegLoc() && "Can only return in registers!");
04116 
04117     SDValue Val = DAG.getCopyFromReg(Chain, dl,
04118                                      VA.getLocReg(), VA.getLocVT(), InFlag);
04119     Chain = Val.getValue(1);
04120     InFlag = Val.getValue(2);
04121 
04122     switch (VA.getLocInfo()) {
04123     default: llvm_unreachable("Unknown loc info!");
04124     case CCValAssign::Full: break;
04125     case CCValAssign::AExt:
04126       Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
04127       break;
04128     case CCValAssign::ZExt:
04129       Val = DAG.getNode(ISD::AssertZext, dl, VA.getLocVT(), Val,
04130                         DAG.getValueType(VA.getValVT()));
04131       Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
04132       break;
04133     case CCValAssign::SExt:
04134       Val = DAG.getNode(ISD::AssertSext, dl, VA.getLocVT(), Val,
04135                         DAG.getValueType(VA.getValVT()));
04136       Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
04137       break;
04138     }
04139 
04140     InVals.push_back(Val);
04141   }
04142 
04143   return Chain;
04144 }
04145 
04146 SDValue
04147 PPCTargetLowering::FinishCall(CallingConv::ID CallConv, SDLoc dl,
04148                               bool isTailCall, bool isVarArg, bool IsPatchPoint,
04149                               SelectionDAG &DAG,
04150                               SmallVector<std::pair<unsigned, SDValue>, 8>
04151                                 &RegsToPass,
04152                               SDValue InFlag, SDValue Chain,
04153                               SDValue CallSeqStart, SDValue &Callee,
04154                               int SPDiff, unsigned NumBytes,
04155                               const SmallVectorImpl<ISD::InputArg> &Ins,
04156                               SmallVectorImpl<SDValue> &InVals,
04157                               ImmutableCallSite *CS) const {
04158 
04159   std::vector<EVT> NodeTys;
04160   SmallVector<SDValue, 8> Ops;
04161   unsigned CallOpc = PrepareCall(DAG, Callee, InFlag, Chain, CallSeqStart, dl,
04162                                  SPDiff, isTailCall, IsPatchPoint, RegsToPass,
04163                                  Ops, NodeTys, CS, Subtarget);
04164 
04165   // Add implicit use of CR bit 6 for 32-bit SVR4 vararg calls
04166   if (isVarArg && Subtarget.isSVR4ABI() && !Subtarget.isPPC64())
04167     Ops.push_back(DAG.getRegister(PPC::CR1EQ, MVT::i32));
04168 
04169   // When performing tail call optimization the callee pops its arguments off
04170   // the stack. Account for this here so these bytes can be pushed back on in
04171   // PPCFrameLowering::eliminateCallFramePseudoInstr.
04172   int BytesCalleePops =
04173     (CallConv == CallingConv::Fast &&
04174      getTargetMachine().Options.GuaranteedTailCallOpt) ? NumBytes : 0;
04175 
04176   // Add a register mask operand representing the call-preserved registers.
04177   const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
04178   const uint32_t *Mask = TRI->getCallPreservedMask(CallConv);
04179   assert(Mask && "Missing call preserved mask for calling convention");
04180   Ops.push_back(DAG.getRegisterMask(Mask));
04181 
04182   if (InFlag.getNode())
04183     Ops.push_back(InFlag);
04184 
04185   // Emit tail call.
04186   if (isTailCall) {
04187     assert(((Callee.getOpcode() == ISD::Register &&
04188              cast<RegisterSDNode>(Callee)->getReg() == PPC::CTR) ||
04189             Callee.getOpcode() == ISD::TargetExternalSymbol ||
04190             Callee.getOpcode() == ISD::TargetGlobalAddress ||
04191             isa<ConstantSDNode>(Callee)) &&
04192     "Expecting an global address, external symbol, absolute value or register");
04193 
04194     return DAG.getNode(PPCISD::TC_RETURN, dl, MVT::Other, Ops);
04195   }
04196 
04197   // Add a NOP immediately after the branch instruction when using the 64-bit
04198   // SVR4 ABI. At link time, if caller and callee are in a different module and
04199   // thus have a different TOC, the call will be replaced with a call to a stub
04200   // function which saves the current TOC, loads the TOC of the callee and
04201   // branches to the callee. The NOP will be replaced with a load instruction
04202   // which restores the TOC of the caller from the TOC save slot of the current
04203   // stack frame. If caller and callee belong to the same module (and have the
04204   // same TOC), the NOP will remain unchanged.
04205 
04206   if (!isTailCall && Subtarget.isSVR4ABI()&& Subtarget.isPPC64() &&
04207       !IsPatchPoint) {
04208     if (CallOpc == PPCISD::BCTRL) {
04209       // This is a call through a function pointer.
04210       // Restore the caller TOC from the save area into R2.
04211       // See PrepareCall() for more information about calls through function
04212       // pointers in the 64-bit SVR4 ABI.
04213       // We are using a target-specific load with r2 hard coded, because the
04214       // result of a target-independent load would never go directly into r2,
04215       // since r2 is a reserved register (which prevents the register allocator
04216       // from allocating it), resulting in an additional register being
04217       // allocated and an unnecessary move instruction being generated.
04218       CallOpc = PPCISD::BCTRL_LOAD_TOC;
04219 
04220       EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
04221       SDValue StackPtr = DAG.getRegister(PPC::X1, PtrVT);
04222       unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset();
04223       SDValue TOCOff = DAG.getIntPtrConstant(TOCSaveOffset);
04224       SDValue AddTOC = DAG.getNode(ISD::ADD, dl, MVT::i64, StackPtr, TOCOff);
04225 
04226       // The address needs to go after the chain input but before the flag (or
04227       // any other variadic arguments).
04228       Ops.insert(std::next(Ops.begin()), AddTOC);
04229     } else if ((CallOpc == PPCISD::CALL) &&
04230                (!isLocalCall(Callee) ||
04231                 DAG.getTarget().getRelocationModel() == Reloc::PIC_))
04232       // Otherwise insert NOP for non-local calls.
04233       CallOpc = PPCISD::CALL_NOP;
04234   }
04235 
04236   Chain = DAG.getNode(CallOpc, dl, NodeTys, Ops);
04237   InFlag = Chain.getValue(1);
04238 
04239   Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true),
04240                              DAG.getIntPtrConstant(BytesCalleePops, true),
04241                              InFlag, dl);
04242   if (!Ins.empty())
04243     InFlag = Chain.getValue(1);
04244 
04245   return LowerCallResult(Chain, InFlag, CallConv, isVarArg,
04246                          Ins, dl, DAG, InVals);
04247 }
04248 
04249 SDValue
04250 PPCTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
04251                              SmallVectorImpl<SDValue> &InVals) const {
04252   SelectionDAG &DAG                     = CLI.DAG;
04253   SDLoc &dl                             = CLI.DL;
04254   SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
04255   SmallVectorImpl<SDValue> &OutVals     = CLI.OutVals;
04256   SmallVectorImpl<ISD::InputArg> &Ins   = CLI.Ins;
04257   SDValue Chain                         = CLI.Chain;
04258   SDValue Callee                        = CLI.Callee;
04259   bool &isTailCall                      = CLI.IsTailCall;
04260   CallingConv::ID CallConv              = CLI.CallConv;
04261   bool isVarArg                         = CLI.IsVarArg;
04262   bool IsPatchPoint                     = CLI.IsPatchPoint;
04263   ImmutableCallSite *CS                 = CLI.CS;
04264 
04265   if (isTailCall)
04266     isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv, isVarArg,
04267                                                    Ins, DAG);
04268 
04269   if (!isTailCall && CS && CS->isMustTailCall())
04270     report_fatal_error("failed to perform tail call elimination on a call "
04271                        "site marked musttail");
04272 
04273   if (Subtarget.isSVR4ABI()) {
04274     if (Subtarget.isPPC64())
04275       return LowerCall_64SVR4(Chain, Callee, CallConv, isVarArg,
04276                               isTailCall, IsPatchPoint, Outs, OutVals, Ins,
04277                               dl, DAG, InVals, CS);
04278     else
04279       return LowerCall_32SVR4(Chain, Callee, CallConv, isVarArg,
04280                               isTailCall, IsPatchPoint, Outs, OutVals, Ins,
04281                               dl, DAG, InVals, CS);
04282   }
04283 
04284   return LowerCall_Darwin(Chain, Callee, CallConv, isVarArg,
04285                           isTailCall, IsPatchPoint, Outs, OutVals, Ins,
04286                           dl, DAG, InVals, CS);
04287 }
04288 
04289 SDValue
04290 PPCTargetLowering::LowerCall_32SVR4(SDValue Chain, SDValue Callee,
04291                                     CallingConv::ID CallConv, bool isVarArg,
04292                                     bool isTailCall, bool IsPatchPoint,
04293                                     const SmallVectorImpl<ISD::OutputArg> &Outs,
04294                                     const SmallVectorImpl<SDValue> &OutVals,
04295                                     const SmallVectorImpl<ISD::InputArg> &Ins,
04296                                     SDLoc dl, SelectionDAG &DAG,
04297                                     SmallVectorImpl<SDValue> &InVals,
04298                                     ImmutableCallSite *CS) const {
04299   // See PPCTargetLowering::LowerFormalArguments_32SVR4() for a description
04300   // of the 32-bit SVR4 ABI stack frame layout.
04301 
04302   assert((CallConv == CallingConv::C ||
04303           CallConv == CallingConv::Fast) && "Unknown calling convention!");
04304 
04305   unsigned PtrByteSize = 4;
04306 
04307   MachineFunction &MF = DAG.getMachineFunction();
04308 
04309   // Mark this function as potentially containing a function that contains a
04310   // tail call. As a consequence the frame pointer will be used for dynamicalloc
04311   // and restoring the callers stack pointer in this functions epilog. This is
04312   // done because by tail calling the called function might overwrite the value
04313   // in this function's (MF) stack pointer stack slot 0(SP).
04314   if (getTargetMachine().Options.GuaranteedTailCallOpt &&
04315       CallConv == CallingConv::Fast)
04316     MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
04317 
04318   // Count how many bytes are to be pushed on the stack, including the linkage
04319   // area, parameter list area and the part of the local variable space which
04320   // contains copies of aggregates which are passed by value.
04321 
04322   // Assign locations to all of the outgoing arguments.
04323   SmallVector<CCValAssign, 16> ArgLocs;
04324   CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
04325                  *DAG.getContext());
04326 
04327   // Reserve space for the linkage area on the stack.
04328   CCInfo.AllocateStack(Subtarget.getFrameLowering()->getLinkageSize(),
04329                        PtrByteSize);
04330 
04331   if (isVarArg) {
04332     // Handle fixed and variable vector arguments differently.
04333     // Fixed vector arguments go into registers as long as registers are
04334     // available. Variable vector arguments always go into memory.
04335     unsigned NumArgs = Outs.size();
04336 
04337     for (unsigned i = 0; i != NumArgs; ++i) {
04338       MVT ArgVT = Outs[i].VT;
04339       ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
04340       bool Result;
04341 
04342       if (Outs[i].IsFixed) {
04343         Result = CC_PPC32_SVR4(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags,
04344                                CCInfo);
04345       } else {
04346         Result = CC_PPC32_SVR4_VarArg(i, ArgVT, ArgVT, CCValAssign::Full,
04347                                       ArgFlags, CCInfo);
04348       }
04349 
04350       if (Result) {
04351 #ifndef NDEBUG
04352         errs() << "Call operand #" << i << " has unhandled type "
04353              << EVT(ArgVT).getEVTString() << "\n";
04354 #endif
04355         llvm_unreachable(nullptr);
04356       }
04357     }
04358   } else {
04359     // All arguments are treated the same.
04360     CCInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4);
04361   }
04362 
04363   // Assign locations to all of the outgoing aggregate by value arguments.
04364   SmallVector<CCValAssign, 16> ByValArgLocs;
04365   CCState CCByValInfo(CallConv, isVarArg, DAG.getMachineFunction(),
04366                       ByValArgLocs, *DAG.getContext());
04367 
04368   // Reserve stack space for the allocations in CCInfo.
04369   CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrByteSize);
04370 
04371   CCByValInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4_ByVal);
04372 
04373   // Size of the linkage area, parameter list area and the part of the local
04374   // space variable where copies of aggregates which are passed by value are
04375   // stored.
04376   unsigned NumBytes = CCByValInfo.getNextStackOffset();
04377 
04378   // Calculate by how many bytes the stack has to be adjusted in case of tail
04379   // call optimization.
04380   int SPDiff = CalculateTailCallSPDiff(DAG, isTailCall, NumBytes);
04381 
04382   // Adjust the stack pointer for the new arguments...
04383   // These operations are automatically eliminated by the prolog/epilog pass
04384   Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true),
04385                                dl);
04386   SDValue CallSeqStart = Chain;
04387 
04388   // Load the return address and frame pointer so it can be moved somewhere else
04389   // later.
04390   SDValue LROp, FPOp;
04391   Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, false,
04392                                        dl);
04393 
04394   // Set up a copy of the stack pointer for use loading and storing any
04395   // arguments that may not fit in the registers available for argument
04396   // passing.
04397   SDValue StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
04398 
04399   SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
04400   SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
04401   SmallVector<SDValue, 8> MemOpChains;
04402 
04403   bool seenFloatArg = false;
04404   // Walk the register/memloc assignments, inserting copies/loads.
04405   for (unsigned i = 0, j = 0, e = ArgLocs.size();
04406        i != e;
04407        ++i) {
04408     CCValAssign &VA = ArgLocs[i];
04409     SDValue Arg = OutVals[i];
04410     ISD::ArgFlagsTy Flags = Outs[i].Flags;
04411 
04412     if (Flags.isByVal()) {
04413       // Argument is an aggregate which is passed by value, thus we need to
04414       // create a copy of it in the local variable space of the current stack
04415       // frame (which is the stack frame of the caller) and pass the address of
04416       // this copy to the callee.
04417       assert((j < ByValArgLocs.size()) && "Index out of bounds!");
04418       CCValAssign &ByValVA = ByValArgLocs[j++];
04419       assert((VA.getValNo() == ByValVA.getValNo()) && "ValNo mismatch!");
04420 
04421       // Memory reserved in the local variable space of the callers stack frame.
04422       unsigned LocMemOffset = ByValVA.getLocMemOffset();
04423 
04424       SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset);
04425       PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff);
04426 
04427       // Create a copy of the argument in the local area of the current
04428       // stack frame.
04429       SDValue MemcpyCall =
04430         CreateCopyOfByValArgument(Arg, PtrOff,
04431                                   CallSeqStart.getNode()->getOperand(0),
04432                                   Flags, DAG, dl);
04433 
04434       // This must go outside the CALLSEQ_START..END.
04435       SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall,
04436                            CallSeqStart.getNode()->getOperand(1),
04437                            SDLoc(MemcpyCall));
04438       DAG.ReplaceAllUsesWith(CallSeqStart.getNode(),
04439                              NewCallSeqStart.getNode());
04440       Chain = CallSeqStart = NewCallSeqStart;
04441 
04442       // Pass the address of the aggregate copy on the stack either in a
04443       // physical register or in the parameter list area of the current stack
04444       // frame to the callee.
04445       Arg = PtrOff;
04446     }
04447 
04448     if (VA.isRegLoc()) {
04449       if (Arg.getValueType() == MVT::i1)
04450         Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Arg);
04451 
04452       seenFloatArg |= VA.getLocVT().isFloatingPoint();
04453       // Put argument in a physical register.
04454       RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
04455     } else {
04456       // Put argument in the parameter list area of the current stack frame.
04457       assert(VA.isMemLoc());
04458       unsigned LocMemOffset = VA.getLocMemOffset();
04459 
04460       if (!isTailCall) {
04461         SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset);
04462         PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff);
04463 
04464         MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff,
04465                                            MachinePointerInfo(),
04466                                            false, false, 0));
04467       } else {
04468         // Calculate and remember argument location.
04469         CalculateTailCallArgDest(DAG, MF, false, Arg, SPDiff, LocMemOffset,
04470                                  TailCallArguments);
04471       }
04472     }
04473   }
04474 
04475   if (!MemOpChains.empty())
04476     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
04477 
04478   // Build a sequence of copy-to-reg nodes chained together with token chain
04479   // and flag operands which copy the outgoing args into the appropriate regs.
04480   SDValue InFlag;
04481   for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
04482     Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
04483                              RegsToPass[i].second, InFlag);
04484     InFlag = Chain.getValue(1);
04485   }
04486 
04487   // Set CR bit 6 to true if this is a vararg call with floating args passed in
04488   // registers.
04489   if (isVarArg) {
04490     SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue);
04491     SDValue Ops[] = { Chain, InFlag };
04492 
04493     Chain = DAG.getNode(seenFloatArg ? PPCISD::CR6SET : PPCISD::CR6UNSET,
04494                         dl, VTs, makeArrayRef(Ops, InFlag.getNode() ? 2 : 1));
04495 
04496     InFlag = Chain.getValue(1);
04497   }
04498 
04499   if (isTailCall)
04500     PrepareTailCall(DAG, InFlag, Chain, dl, false, SPDiff, NumBytes, LROp, FPOp,
04501                     false, TailCallArguments);
04502 
04503   return FinishCall(CallConv, dl, isTailCall, isVarArg, IsPatchPoint, DAG,
04504                     RegsToPass, InFlag, Chain, CallSeqStart, Callee, SPDiff,
04505                     NumBytes, Ins, InVals, CS);
04506 }
04507 
04508 // Copy an argument into memory, being careful to do this outside the
04509 // call sequence for the call to which the argument belongs.
04510 SDValue
04511 PPCTargetLowering::createMemcpyOutsideCallSeq(SDValue Arg, SDValue PtrOff,
04512                                               SDValue CallSeqStart,
04513                                               ISD::ArgFlagsTy Flags,
04514                                               SelectionDAG &DAG,
04515                                               SDLoc dl) const {
04516   SDValue MemcpyCall = CreateCopyOfByValArgument(Arg, PtrOff,
04517                         CallSeqStart.getNode()->getOperand(0),
04518                         Flags, DAG, dl);
04519   // The MEMCPY must go outside the CALLSEQ_START..END.
04520   SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall,
04521                              CallSeqStart.getNode()->getOperand(1),
04522                              SDLoc(MemcpyCall));
04523   DAG.ReplaceAllUsesWith(CallSeqStart.getNode(),
04524                          NewCallSeqStart.getNode());
04525   return NewCallSeqStart;
04526 }
04527 
04528 SDValue
04529 PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
04530                                     CallingConv::ID CallConv, bool isVarArg,
04531                                     bool isTailCall, bool IsPatchPoint,
04532                                     const SmallVectorImpl<ISD::OutputArg> &Outs,
04533                                     const SmallVectorImpl<SDValue> &OutVals,
04534                                     const SmallVectorImpl<ISD::InputArg> &Ins,
04535                                     SDLoc dl, SelectionDAG &DAG,
04536                                     SmallVectorImpl<SDValue> &InVals,
04537                                     ImmutableCallSite *CS) const {
04538 
04539   bool isELFv2ABI = Subtarget.isELFv2ABI();
04540   bool isLittleEndian = Subtarget.isLittleEndian();
04541   unsigned NumOps = Outs.size();
04542 
04543   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
04544   unsigned PtrByteSize = 8;
04545 
04546   MachineFunction &MF = DAG.getMachineFunction();
04547 
04548   // Mark this function as potentially containing a function that contains a
04549   // tail call. As a consequence the frame pointer will be used for dynamicalloc
04550   // and restoring the callers stack pointer in this functions epilog. This is
04551   // done because by tail calling the called function might overwrite the value
04552   // in this function's (MF) stack pointer stack slot 0(SP).
04553   if (getTargetMachine().Options.GuaranteedTailCallOpt &&
04554       CallConv == CallingConv::Fast)
04555     MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
04556 
04557   assert(!(CallConv == CallingConv::Fast && isVarArg) &&
04558          "fastcc not supported on varargs functions");
04559 
04560   // Count how many bytes are to be pushed on the stack, including the linkage
04561   // area, and parameter passing area.  On ELFv1, the linkage area is 48 bytes
04562   // reserved space for [SP][CR][LR][2 x unused][TOC]; on ELFv2, the linkage
04563   // area is 32 bytes reserved space for [SP][CR][LR][TOC].
04564   unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
04565   unsigned NumBytes = LinkageSize;
04566   unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
04567   unsigned &QFPR_idx = FPR_idx;
04568 
04569   static const MCPhysReg GPR[] = {
04570     PPC::X3, PPC::X4, PPC::X5, PPC::X6,
04571     PPC::X7, PPC::X8, PPC::X9, PPC::X10,
04572   };
04573   static const MCPhysReg VR[] = {
04574     PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
04575     PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
04576   };
04577   static const MCPhysReg VSRH[] = {
04578     PPC::VSH2, PPC::VSH3, PPC::VSH4, PPC::VSH5, PPC::VSH6, PPC::VSH7, PPC::VSH8,
04579     PPC::VSH9, PPC::VSH10, PPC::VSH11, PPC::VSH12, PPC::VSH13
04580   };
04581 
04582   const unsigned NumGPRs = array_lengthof(GPR);
04583   const unsigned NumFPRs = 13;
04584   const unsigned NumVRs  = array_lengthof(VR);
04585   const unsigned NumQFPRs = NumFPRs;
04586 
04587   // When using the fast calling convention, we don't provide backing for
04588   // arguments that will be in registers.
04589   unsigned NumGPRsUsed = 0, NumFPRsUsed = 0, NumVRsUsed = 0;
04590 
04591   // Add up all the space actually used.
04592   for (unsigned i = 0; i != NumOps; ++i) {
04593     ISD::ArgFlagsTy Flags = Outs[i].Flags;
04594     EVT ArgVT = Outs[i].VT;
04595     EVT OrigVT = Outs[i].ArgVT;
04596 
04597     if (CallConv == CallingConv::Fast) {
04598       if (Flags.isByVal())
04599         NumGPRsUsed += (Flags.getByValSize()+7)/8;
04600       else
04601         switch (ArgVT.getSimpleVT().SimpleTy) {
04602         default: llvm_unreachable("Unexpected ValueType for argument!");
04603         case MVT::i1:
04604         case MVT::i32:
04605         case MVT::i64:
04606           if (++NumGPRsUsed <= NumGPRs)
04607             continue;
04608           break;
04609         case MVT::v4i32:
04610         case MVT::v8i16:
04611         case MVT::v16i8:
04612         case MVT::v2f64:
04613         case MVT::v2i64:
04614           if (++NumVRsUsed <= NumVRs)
04615             continue;
04616           break;
04617         case MVT::v4f32:
04618     // When using QPX, this is handled like a FP register, otherwise, it
04619     // is an Altivec register.
04620           if (Subtarget.hasQPX()) {
04621             if (++NumFPRsUsed <= NumFPRs)
04622               continue;
04623           } else {
04624             if (++NumVRsUsed <= NumVRs)
04625               continue;
04626           }
04627           break;
04628         case MVT::f32:
04629         case MVT::f64:
04630         case MVT::v4f64: // QPX
04631         case MVT::v4i1:  // QPX
04632           if (++NumFPRsUsed <= NumFPRs)
04633             continue;
04634           break;
04635         }
04636     }
04637 
04638     /* Respect alignment of argument on the stack.  */
04639     unsigned Align =
04640       CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
04641     NumBytes = ((NumBytes + Align - 1) / Align) * Align;
04642 
04643     NumBytes += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
04644     if (Flags.isInConsecutiveRegsLast())
04645       NumBytes = ((NumBytes + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
04646   }
04647 
04648   unsigned NumBytesActuallyUsed = NumBytes;
04649 
04650   // The prolog code of the callee may store up to 8 GPR argument registers to
04651   // the stack, allowing va_start to index over them in memory if its varargs.
04652   // Because we cannot tell if this is needed on the caller side, we have to
04653   // conservatively assume that it is needed.  As such, make sure we have at
04654   // least enough stack space for the caller to store the 8 GPRs.
04655   // FIXME: On ELFv2, it may be unnecessary to allocate the parameter area.
04656   NumBytes = std::max(NumBytes, LinkageSize + 8 * PtrByteSize);
04657 
04658   // Tail call needs the stack to be aligned.
04659   if (getTargetMachine().Options.GuaranteedTailCallOpt &&
04660       CallConv == CallingConv::Fast)
04661     NumBytes = EnsureStackAlignment(Subtarget.getFrameLowering(), NumBytes);
04662 
04663   // Calculate by how many bytes the stack has to be adjusted in case of tail
04664   // call optimization.
04665   int SPDiff = CalculateTailCallSPDiff(DAG, isTailCall, NumBytes);
04666 
04667   // To protect arguments on the stack from being clobbered in a tail call,
04668   // force all the loads to happen before doing any other lowering.
04669   if (isTailCall)
04670     Chain = DAG.getStackArgumentTokenFactor(Chain);
04671 
04672   // Adjust the stack pointer for the new arguments...
04673   // These operations are automatically eliminated by the prolog/epilog pass
04674   Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true),
04675                                dl);
04676   SDValue CallSeqStart = Chain;
04677 
04678   // Load the return address and frame pointer so it can be move somewhere else
04679   // later.
04680   SDValue LROp, FPOp;
04681   Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, true,
04682                                        dl);
04683 
04684   // Set up a copy of the stack pointer for use loading and storing any
04685   // arguments that may not fit in the registers available for argument
04686   // passing.
04687   SDValue StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
04688 
04689   // Figure out which arguments are going to go in registers, and which in
04690   // memory.  Also, if this is a vararg function, floating point operations
04691   // must be stored to our stack, and loaded into integer regs as well, if
04692   // any integer regs are available for argument passing.
04693   unsigned ArgOffset = LinkageSize;
04694 
04695   SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
04696   SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
04697 
04698   SmallVector<SDValue, 8> MemOpChains;
04699   for (unsigned i = 0; i != NumOps; ++i) {
04700     SDValue Arg = OutVals[i];
04701     ISD::ArgFlagsTy Flags = Outs[i].Flags;
04702     EVT ArgVT = Outs[i].VT;
04703     EVT OrigVT = Outs[i].ArgVT;
04704 
04705     // PtrOff will be used to store the current argument to the stack if a
04706     // register cannot be found for it.
04707     SDValue PtrOff;
04708 
04709     // We re-align the argument offset for each argument, except when using the
04710     // fast calling convention, when we need to make sure we do that only when
04711     // we'll actually use a stack slot.
04712     auto ComputePtrOff = [&]() {
04713       /* Respect alignment of argument on the stack.  */
04714       unsigned Align =
04715         CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
04716       ArgOffset = ((ArgOffset + Align - 1) / Align) * Align;
04717 
04718       PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType());
04719 
04720       PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
04721     };
04722 
04723     if (CallConv != CallingConv::Fast) {
04724       ComputePtrOff();
04725 
04726       /* Compute GPR index associated with argument offset.  */
04727       GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
04728       GPR_idx = std::min(GPR_idx, NumGPRs);
04729     }
04730 
04731     // Promote integers to 64-bit values.
04732     if (Arg.getValueType() == MVT::i32 || Arg.getValueType() == MVT::i1) {
04733       // FIXME: Should this use ANY_EXTEND if neither sext nor zext?
04734       unsigned ExtOp = Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
04735       Arg = DAG.getNode(ExtOp, dl, MVT::i64, Arg);
04736     }
04737 
04738     // FIXME memcpy is used way more than necessary.  Correctness first.
04739     // Note: "by value" is code for passing a structure by value, not
04740     // basic types.
04741     if (Flags.isByVal()) {
04742       // Note: Size includes alignment padding, so
04743       //   struct x { short a; char b; }
04744       // will have Size = 4.  With #pragma pack(1), it will have Size = 3.
04745       // These are the proper values we need for right-justifying the
04746       // aggregate in a parameter register.
04747       unsigned Size = Flags.getByValSize();
04748 
04749       // An empty aggregate parameter takes up no storage and no
04750       // registers.
04751       if (Size == 0)
04752         continue;
04753 
04754       if (CallConv == CallingConv::Fast)
04755         ComputePtrOff();
04756 
04757       // All aggregates smaller than 8 bytes must be passed right-justified.
04758       if (Size==1 || Size==2 || Size==4) {
04759         EVT VT = (Size==1) ? MVT::i8 : ((Size==2) ? MVT::i16 : MVT::i32);
04760         if (GPR_idx != NumGPRs) {
04761           SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, Arg,
04762                                         MachinePointerInfo(), VT,
04763                                         false, false, false, 0);
04764           MemOpChains.push_back(Load.getValue(1));
04765           RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
04766 
04767           ArgOffset += PtrByteSize;
04768           continue;
04769         }
04770       }
04771 
04772       if (GPR_idx == NumGPRs && Size < 8) {
04773         SDValue AddPtr = PtrOff;
04774         if (!isLittleEndian) {
04775           SDValue Const = DAG.getConstant(PtrByteSize - Size,
04776                                           PtrOff.getValueType());
04777           AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
04778         }
04779         Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,
04780                                                           CallSeqStart,
04781                                                           Flags, DAG, dl);
04782         ArgOffset += PtrByteSize;
04783         continue;
04784       }
04785       // Copy entire object into memory.  There are cases where gcc-generated
04786       // code assumes it is there, even if it could be put entirely into
04787       // registers.  (This is not what the doc says.)
04788 
04789       // FIXME: The above statement is likely due to a misunderstanding of the
04790       // documents.  All arguments must be copied into the parameter area BY
04791       // THE CALLEE in the event that the callee takes the address of any
04792       // formal argument.  That has not yet been implemented.  However, it is
04793       // reasonable to use the stack area as a staging area for the register
04794       // load.
04795 
04796       // Skip this for small aggregates, as we will use the same slot for a
04797       // right-justified copy, below.
04798       if (Size >= 8)
04799         Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, PtrOff,
04800                                                           CallSeqStart,
04801                                                           Flags, DAG, dl);
04802 
04803       // When a register is available, pass a small aggregate right-justified.
04804       if (Size < 8 && GPR_idx != NumGPRs) {
04805         // The easiest way to get this right-justified in a register
04806         // is to copy the structure into the rightmost portion of a
04807         // local variable slot, then load the whole slot into the
04808         // register.
04809         // FIXME: The memcpy seems to produce pretty awful code for
04810         // small aggregates, particularly for packed ones.
04811         // FIXME: It would be preferable to use the slot in the
04812         // parameter save area instead of a new local variable.
04813         SDValue AddPtr = PtrOff;
04814         if (!isLittleEndian) {
04815           SDValue Const = DAG.getConstant(8 - Size, PtrOff.getValueType());
04816           AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
04817         }
04818         Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,
04819                                                           CallSeqStart,
04820                                                           Flags, DAG, dl);
04821 
04822         // Load the slot into the register.
04823         SDValue Load = DAG.getLoad(PtrVT, dl, Chain, PtrOff,
04824                                    MachinePointerInfo(),
04825                                    false, false, false, 0);
04826         MemOpChains.push_back(Load.getValue(1));
04827         RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
04828 
04829         // Done with this argument.
04830         ArgOffset += PtrByteSize;
04831         continue;
04832       }
04833 
04834       // For aggregates larger than PtrByteSize, copy the pieces of the
04835       // object that fit into registers from the parameter save area.
04836       for (unsigned j=0; j<Size; j+=PtrByteSize) {
04837         SDValue Const = DAG.getConstant(j, PtrOff.getValueType());
04838         SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);
04839         if (GPR_idx != NumGPRs) {
04840           SDValue Load = DAG.getLoad(PtrVT, dl, Chain, AddArg,
04841                                      MachinePointerInfo(),
04842                                      false, false, false, 0);
04843           MemOpChains.push_back(Load.getValue(1));
04844           RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
04845           ArgOffset += PtrByteSize;
04846         } else {
04847           ArgOffset += ((Size - j + PtrByteSize-1)/PtrByteSize)*PtrByteSize;
04848           break;
04849         }
04850       }
04851       continue;
04852     }
04853 
04854     switch (Arg.getSimpleValueType().SimpleTy) {
04855     default: llvm_unreachable("Unexpected ValueType for argument!");
04856     case MVT::i1:
04857     case MVT::i32:
04858     case MVT::i64:
04859       // These can be scalar arguments or elements of an integer array type
04860       // passed directly.  Clang may use those instead of "byval" aggregate
04861       // types to avoid forcing arguments to memory unnecessarily.
04862       if (GPR_idx != NumGPRs) {
04863         RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Arg));
04864       } else {
04865         if (CallConv == CallingConv::Fast)
04866           ComputePtrOff();
04867 
04868         LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
04869                          true, isTailCall, false, MemOpChains,
04870                          TailCallArguments, dl);
04871         if (CallConv == CallingConv::Fast)
04872           ArgOffset += PtrByteSize;
04873       }
04874       if (CallConv != CallingConv::Fast)
04875         ArgOffset += PtrByteSize;
04876       break;
04877     case MVT::f32:
04878     case MVT::f64: {
04879       // These can be scalar arguments or elements of a float array type
04880       // passed directly.  The latter are used to implement ELFv2 homogenous
04881       // float aggregates.
04882 
04883       // Named arguments go into FPRs first, and once they overflow, the
04884       // remaining arguments go into GPRs and then the parameter save area.
04885       // Unnamed arguments for vararg functions always go to GPRs and
04886       // then the parameter save area.  For now, put all arguments to vararg
04887       // routines always in both locations (FPR *and* GPR or stack slot).
04888       bool NeedGPROrStack = isVarArg || FPR_idx == NumFPRs;
04889       bool NeededLoad = false;
04890 
04891       // First load the argument into the next available FPR.
04892       if (FPR_idx != NumFPRs)
04893         RegsToPass.push_back(std::make_pair(FPR[FPR_idx++], Arg));
04894 
04895       // Next, load the argument into GPR or stack slot if needed.
04896       if (!NeedGPROrStack)
04897         ;
04898       else if (GPR_idx != NumGPRs && CallConv != CallingConv::Fast) {
04899         // FIXME: We may want to re-enable this for CallingConv::Fast on the P8
04900         // once we support fp <-> gpr moves.
04901 
04902         // In the non-vararg case, this can only ever happen in the
04903         // presence of f32 array types, since otherwise we never run
04904         // out of FPRs before running out of GPRs.
04905         SDValue ArgVal;
04906 
04907         // Double values are always passed in a single GPR.
04908         if (Arg.getValueType() != MVT::f32) {
04909           ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i64, Arg);
04910 
04911         // Non-array float values are extended and passed in a GPR.
04912         } else if (!Flags.isInConsecutiveRegs()) {
04913           ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
04914           ArgVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, ArgVal);
04915 
04916         // If we have an array of floats, we collect every odd element
04917         // together with its predecessor into one GPR.
04918         } else if (ArgOffset % PtrByteSize != 0) {
04919           SDValue Lo, Hi;
04920           Lo = DAG.getNode(ISD::BITCAST, dl, MVT::i32, OutVals[i - 1]);
04921           Hi = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
04922           if (!isLittleEndian)
04923             std::swap(Lo, Hi);
04924           ArgVal = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
04925 
04926         // The final element, if even, goes into the first half of a GPR.
04927         } else if (Flags.isInConsecutiveRegsLast()) {
04928           ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
04929           ArgVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, ArgVal);
04930           if (!isLittleEndian)
04931             ArgVal = DAG.getNode(ISD::SHL, dl, MVT::i64, ArgVal,
04932                                  DAG.getConstant(32, MVT::i32));
04933 
04934         // Non-final even elements are skipped; they will be handled
04935         // together the with subsequent argument on the next go-around.
04936         } else
04937           ArgVal = SDValue();
04938 
04939         if (ArgVal.getNode())
04940           RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], ArgVal));
04941       } else {
04942         if (CallConv == CallingConv::Fast)
04943           ComputePtrOff();
04944 
04945         // Single-precision floating-point values are mapped to the
04946         // second (rightmost) word of the stack doubleword.
04947         if (Arg.getValueType() == MVT::f32 &&
04948             !isLittleEndian && !Flags.isInConsecutiveRegs()) {
04949           SDValue ConstFour = DAG.getConstant(4, PtrOff.getValueType());
04950           PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour);
04951         }
04952 
04953         LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
04954                          true, isTailCall, false, MemOpChains,
04955                          TailCallArguments, dl);
04956 
04957         NeededLoad = true;
04958       }
04959       // When passing an array of floats, the array occupies consecutive
04960       // space in the argument area; only round up to the next doubleword
04961       // at the end of the array.  Otherwise, each float takes 8 bytes.
04962       if (CallConv != CallingConv::Fast || NeededLoad) {
04963         ArgOffset += (Arg.getValueType() == MVT::f32 &&
04964                       Flags.isInConsecutiveRegs()) ? 4 : 8;
04965         if (Flags.isInConsecutiveRegsLast())
04966           ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
04967       }
04968       break;
04969     }
04970     case MVT::v4f32:
04971     case MVT::v4i32:
04972     case MVT::v8i16:
04973     case MVT::v16i8:
04974     case MVT::v2f64:
04975     case MVT::v2i64:
04976       if (!Subtarget.hasQPX()) {
04977       // These can be scalar arguments or elements of a vector array type
04978       // passed directly.  The latter are used to implement ELFv2 homogenous
04979       // vector aggregates.
04980 
04981       // For a varargs call, named arguments go into VRs or on the stack as
04982       // usual; unnamed arguments always go to the stack or the corresponding
04983       // GPRs when within range.  For now, we always put the value in both
04984       // locations (or even all three).
04985       if (isVarArg) {
04986         // We could elide this store in the case where the object fits
04987         // entirely in R registers.  Maybe later.
04988         SDValue Store = DAG.getStore(Chain, dl, Arg, PtrOff,
04989                                      MachinePointerInfo(), false, false, 0);
04990         MemOpChains.push_back(Store);
04991         if (VR_idx != NumVRs) {
04992           SDValue Load = DAG.getLoad(MVT::v4f32, dl, Store, PtrOff,
04993                                      MachinePointerInfo(),
04994                                      false, false, false, 0);
04995           MemOpChains.push_back(Load.getValue(1));
04996 
04997           unsigned VReg = (Arg.getSimpleValueType() == MVT::v2f64 ||
04998                            Arg.getSimpleValueType() == MVT::v2i64) ?
04999                           VSRH[VR_idx] : VR[VR_idx];
05000           ++VR_idx;
05001 
05002           RegsToPass.push_back(std::make_pair(VReg, Load));
05003         }
05004         ArgOffset += 16;
05005         for (unsigned i=0; i<16; i+=PtrByteSize) {
05006           if (GPR_idx == NumGPRs)
05007             break;
05008           SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff,
05009                                   DAG.getConstant(i, PtrVT));
05010           SDValue Load = DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo(),
05011                                      false, false, false, 0);
05012           MemOpChains.push_back(Load.getValue(1));
05013           RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
05014         }
05015         break;
05016       }
05017 
05018       // Non-varargs Altivec params go into VRs or on the stack.
05019       if (VR_idx != NumVRs) {
05020         unsigned VReg = (Arg.getSimpleValueType() == MVT::v2f64 ||
05021                          Arg.getSimpleValueType() == MVT::v2i64) ?
05022                         VSRH[VR_idx] : VR[VR_idx];
05023         ++VR_idx;
05024 
05025         RegsToPass.push_back(std::make_pair(VReg, Arg));
05026       } else {
05027         if (CallConv == CallingConv::Fast)
05028           ComputePtrOff();
05029 
05030         LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
05031                          true, isTailCall, true, MemOpChains,
05032                          TailCallArguments, dl);
05033         if (CallConv == CallingConv::Fast)
05034           ArgOffset += 16;
05035       }
05036 
05037       if (CallConv != CallingConv::Fast)
05038         ArgOffset += 16;
05039       break;
05040       } // not QPX
05041 
05042       assert(Arg.getValueType().getSimpleVT().SimpleTy == MVT::v4f32 &&
05043              "Invalid QPX parameter type");
05044 
05045       /* fall through */
05046     case MVT::v4f64:
05047     case MVT::v4i1: {
05048       bool IsF32 = Arg.getValueType().getSimpleVT().SimpleTy == MVT::v4f32;
05049       if (isVarArg) {
05050         // We could elide this store in the case where the object fits
05051         // entirely in R registers.  Maybe later.
05052         SDValue Store = DAG.getStore(Chain, dl, Arg, PtrOff,
05053                                      MachinePointerInfo(), false, false, 0);
05054         MemOpChains.push_back(Store);
05055         if (QFPR_idx != NumQFPRs) {
05056           SDValue Load = DAG.getLoad(IsF32 ? MVT::v4f32 : MVT::v4f64, dl,
05057                                      Store, PtrOff, MachinePointerInfo(),
05058                                      false, false, false, 0);
05059           MemOpChains.push_back(Load.getValue(1));
05060           RegsToPass.push_back(std::make_pair(QFPR[QFPR_idx++], Load));
05061         }
05062         ArgOffset += (IsF32 ? 16 : 32);
05063         for (unsigned i = 0; i < (IsF32 ? 16U : 32U); i += PtrByteSize) {
05064           if (GPR_idx == NumGPRs)
05065             break;
05066           SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff,
05067                                   DAG.getConstant(i, PtrVT));
05068           SDValue Load = DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo(),
05069                                      false, false, false, 0);
05070           MemOpChains.push_back(Load.getValue(1));
05071           RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
05072         }
05073         break;
05074       }
05075 
05076       // Non-varargs QPX params go into registers or on the stack.
05077       if (QFPR_idx != NumQFPRs) {
05078         RegsToPass.push_back(std::make_pair(QFPR[QFPR_idx++], Arg));
05079       } else {
05080         if (CallConv == CallingConv::Fast)
05081           ComputePtrOff();
05082 
05083         LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
05084                          true, isTailCall, true, MemOpChains,
05085                          TailCallArguments, dl);
05086         if (CallConv == CallingConv::Fast)
05087           ArgOffset += (IsF32 ? 16 : 32);
05088       }
05089 
05090       if (CallConv != CallingConv::Fast)
05091         ArgOffset += (IsF32 ? 16 : 32);
05092       break;
05093       }
05094     }
05095   }
05096 
05097   assert(NumBytesActuallyUsed == ArgOffset);
05098   (void)NumBytesActuallyUsed;
05099 
05100   if (!MemOpChains.empty())
05101     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
05102 
05103   // Check if this is an indirect call (MTCTR/BCTRL).
05104   // See PrepareCall() for more information about calls through function
05105   // pointers in the 64-bit SVR4 ABI.
05106   if (!isTailCall && !IsPatchPoint &&
05107       !isFunctionGlobalAddress(Callee) &&
05108       !isa<ExternalSymbolSDNode>(Callee)) {
05109     // Load r2 into a virtual register and store it to the TOC save area.
05110     setUsesTOCBasePtr(DAG);
05111     SDValue Val = DAG.getCopyFromReg(Chain, dl, PPC::X2, MVT::i64);
05112     // TOC save area offset.
05113     unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset();
05114     SDValue PtrOff = DAG.getIntPtrConstant(TOCSaveOffset);
05115     SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
05116     Chain = DAG.getStore(Val.getValue(1), dl, Val, AddPtr,
05117                          MachinePointerInfo::getStack(TOCSaveOffset),
05118                          false, false, 0);
05119     // In the ELFv2 ABI, R12 must contain the address of an indirect callee.
05120     // This does not mean the MTCTR instruction must use R12; it's easier
05121     // to model this as an extra parameter, so do that.
05122     if (isELFv2ABI && !IsPatchPoint)
05123       RegsToPass.push_back(std::make_pair((unsigned)PPC::X12, Callee));
05124   }
05125 
05126   // Build a sequence of copy-to-reg nodes chained together with token chain
05127   // and flag operands which copy the outgoing args into the appropriate regs.
05128   SDValue InFlag;
05129   for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
05130     Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
05131                              RegsToPass[i].second, InFlag);
05132     InFlag = Chain.getValue(1);
05133   }
05134 
05135   if (isTailCall)
05136     PrepareTailCall(DAG, InFlag, Chain, dl, true, SPDiff, NumBytes, LROp,
05137                     FPOp, true, TailCallArguments);
05138 
05139   return FinishCall(CallConv, dl, isTailCall, isVarArg, IsPatchPoint, DAG,
05140                     RegsToPass, InFlag, Chain, CallSeqStart, Callee, SPDiff,
05141                     NumBytes, Ins, InVals, CS);
05142 }
05143 
05144 SDValue
05145 PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee,
05146                                     CallingConv::ID CallConv, bool isVarArg,
05147                                     bool isTailCall, bool IsPatchPoint,
05148                                     const SmallVectorImpl<ISD::OutputArg> &Outs,
05149                                     const SmallVectorImpl<SDValue> &OutVals,
05150                                     const SmallVectorImpl<ISD::InputArg> &Ins,
05151                                     SDLoc dl, SelectionDAG &DAG,
05152                                     SmallVectorImpl<SDValue> &InVals,
05153                                     ImmutableCallSite *CS) const {
05154 
05155   unsigned NumOps = Outs.size();
05156 
05157   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
05158   bool isPPC64 = PtrVT == MVT::i64;
05159   unsigned PtrByteSize = isPPC64 ? 8 : 4;
05160 
05161   MachineFunction &MF = DAG.getMachineFunction();
05162 
05163   // Mark this function as potentially containing a function that contains a
05164   // tail call. As a consequence the frame pointer will be used for dynamicalloc
05165   // and restoring the callers stack pointer in this functions epilog. This is
05166   // done because by tail calling the called function might overwrite the value
05167   // in this function's (MF) stack pointer stack slot 0(SP).
05168   if (getTargetMachine().Options.GuaranteedTailCallOpt &&
05169       CallConv == CallingConv::Fast)
05170     MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
05171 
05172   // Count how many bytes are to be pushed on the stack, including the linkage
05173   // area, and parameter passing area.  We start with 24/48 bytes, which is
05174   // prereserved space for [SP][CR][LR][3 x unused].
05175   unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
05176   unsigned NumBytes = LinkageSize;
05177 
05178   // Add up all the space actually used.
05179   // In 32-bit non-varargs calls, Altivec parameters all go at the end; usually
05180   // they all go in registers, but we must reserve stack space for them for
05181   // possible use by the caller.  In varargs or 64-bit calls, parameters are
05182   // assigned stack space in order, with padding so Altivec parameters are
05183   // 16-byte aligned.
05184   unsigned nAltivecParamsAtEnd = 0;
05185   for (unsigned i = 0; i != NumOps; ++i) {
05186     ISD::ArgFlagsTy Flags = Outs[i].Flags;
05187     EVT ArgVT = Outs[i].VT;
05188     // Varargs Altivec parameters are padded to a 16 byte boundary.
05189     if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||
05190         ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||
05191         ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64) {
05192       if (!isVarArg && !isPPC64) {
05193         // Non-varargs Altivec parameters go after all the non-Altivec
05194         // parameters; handle those later so we know how much padding we need.
05195         nAltivecParamsAtEnd++;
05196         continue;
05197       }
05198       // Varargs and 64-bit Altivec parameters are padded to 16 byte boundary.
05199       NumBytes = ((NumBytes+15)/16)*16;
05200     }
05201     NumBytes += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
05202   }
05203 
05204   // Allow for Altivec parameters at the end, if needed.
05205   if (nAltivecParamsAtEnd) {
05206     NumBytes = ((NumBytes+15)/16)*16;
05207     NumBytes += 16*nAltivecParamsAtEnd;
05208   }
05209 
05210   // The prolog code of the callee may store up to 8 GPR argument registers to
05211   // the stack, allowing va_start to index over them in memory if its varargs.
05212   // Because we cannot tell if this is needed on the caller side, we have to
05213   // conservatively assume that it is needed.  As such, make sure we have at
05214   // least enough stack space for the caller to store the 8 GPRs.
05215   NumBytes = std::max(NumBytes, LinkageSize + 8 * PtrByteSize);
05216 
05217   // Tail call needs the stack to be aligned.
05218   if (getTargetMachine().Options.GuaranteedTailCallOpt &&
05219       CallConv == CallingConv::Fast)
05220     NumBytes = EnsureStackAlignment(Subtarget.getFrameLowering(), NumBytes);
05221 
05222   // Calculate by how many bytes the stack has to be adjusted in case of tail
05223   // call optimization.
05224   int SPDiff = CalculateTailCallSPDiff(DAG, isTailCall, NumBytes);
05225 
05226   // To protect arguments on the stack from being clobbered in a tail call,
05227   // force all the loads to happen before doing any other lowering.
05228   if (isTailCall)
05229     Chain = DAG.getStackArgumentTokenFactor(Chain);
05230 
05231   // Adjust the stack pointer for the new arguments...
05232   // These operations are automatically eliminated by the prolog/epilog pass
05233   Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true),
05234                                dl);
05235   SDValue CallSeqStart = Chain;
05236 
05237   // Load the return address and frame pointer so it can be move somewhere else
05238   // later.
05239   SDValue LROp, FPOp;
05240   Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, true,
05241                                        dl);
05242 
05243   // Set up a copy of the stack pointer for use loading and storing any
05244   // arguments that may not fit in the registers available for argument
05245   // passing.
05246   SDValue StackPtr;
05247   if (isPPC64)
05248     StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
05249   else
05250     StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
05251 
05252   // Figure out which arguments are going to go in registers, and which in
05253   // memory.  Also, if this is a vararg function, floating point operations
05254   // must be stored to our stack, and loaded into integer regs as well, if
05255   // any integer regs are available for argument passing.
05256   unsigned ArgOffset = LinkageSize;
05257   unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
05258 
05259   static const MCPhysReg GPR_32[] = {           // 32-bit registers.
05260     PPC::R3, PPC::R4, PPC::R5, PPC::R6,
05261     PPC::R7, PPC::R8, PPC::R9, PPC::R10,
05262   };
05263   static const MCPhysReg GPR_64[] = {           // 64-bit registers.
05264     PPC::X3, PPC::X4, PPC::X5, PPC::X6,
05265     PPC::X7, PPC::X8, PPC::X9, PPC::X10,
05266   };
05267   static const MCPhysReg VR[] = {
05268     PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
05269     PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
05270   };
05271   const unsigned NumGPRs = array_lengthof(GPR_32);
05272   const unsigned NumFPRs = 13;
05273   const unsigned NumVRs  = array_lengthof(VR);
05274 
05275   const MCPhysReg *GPR = isPPC64 ? GPR_64 : GPR_32;
05276 
05277   SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
05278   SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
05279 
05280   SmallVector<SDValue, 8> MemOpChains;
05281   for (unsigned i = 0; i != NumOps; ++i) {
05282     SDValue Arg = OutVals[i];
05283     ISD::ArgFlagsTy Flags = Outs[i].Flags;
05284 
05285     // PtrOff will be used to store the current argument to the stack if a
05286     // register cannot be found for it.
05287     SDValue PtrOff;
05288 
05289     PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType());
05290 
05291     PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
05292 
05293     // On PPC64, promote integers to 64-bit values.
05294     if (isPPC64 && Arg.getValueType() == MVT::i32) {
05295       // FIXME: Should this use ANY_EXTEND if neither sext nor zext?
05296       unsigned ExtOp = Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
05297       Arg = DAG.getNode(ExtOp, dl, MVT::i64, Arg);
05298     }
05299 
05300     // FIXME memcpy is used way more than necessary.  Correctness first.
05301     // Note: "by value" is code for passing a structure by value, not
05302     // basic types.
05303     if (Flags.isByVal()) {
05304       unsigned Size = Flags.getByValSize();
05305       // Very small objects are passed right-justified.  Everything else is
05306       // passed left-justified.
05307       if (Size==1 || Size==2) {
05308         EVT VT = (Size==1) ? MVT::i8 : MVT::i16;
05309         if (GPR_idx != NumGPRs) {
05310           SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, Arg,
05311                                         MachinePointerInfo(), VT,
05312                                         false, false, false, 0);
05313           MemOpChains.push_back(Load.getValue(1));
05314           RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
05315 
05316           ArgOffset += PtrByteSize;
05317         } else {
05318           SDValue Const = DAG.getConstant(PtrByteSize - Size,
05319                                           PtrOff.getValueType());
05320           SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
05321           Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,
05322                                                             CallSeqStart,
05323                                                             Flags, DAG, dl);
05324           ArgOffset += PtrByteSize;
05325         }
05326         continue;
05327       }
05328       // Copy entire object into memory.  There are cases where gcc-generated
05329       // code assumes it is there, even if it could be put entirely into
05330       // registers.  (This is not what the doc says.)
05331       Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, PtrOff,
05332                                                         CallSeqStart,
05333                                                         Flags, DAG, dl);
05334 
05335       // For small aggregates (Darwin only) and aggregates >= PtrByteSize,
05336       // copy the pieces of the object that fit into registers from the
05337       // parameter save area.
05338       for (unsigned j=0; j<Size; j+=PtrByteSize) {
05339         SDValue Const = DAG.getConstant(j, PtrOff.getValueType());
05340         SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);
05341         if (GPR_idx != NumGPRs) {
05342           SDValue Load = DAG.getLoad(PtrVT, dl, Chain, AddArg,
05343                                      MachinePointerInfo(),
05344                                      false, false, false, 0);
05345           MemOpChains.push_back(Load.getValue(1));
05346           RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
05347           ArgOffset += PtrByteSize;
05348         } else {
05349           ArgOffset += ((Size - j + PtrByteSize-1)/PtrByteSize)*PtrByteSize;
05350           break;
05351         }
05352       }
05353       continue;
05354     }
05355 
05356     switch (Arg.getSimpleValueType().SimpleTy) {
05357     default: llvm_unreachable("Unexpected ValueType for argument!");
05358     case MVT::i1:
05359     case MVT::i32:
05360     case MVT::i64:
05361       if (GPR_idx != NumGPRs) {
05362         if (Arg.getValueType() == MVT::i1)
05363           Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, PtrVT, Arg);
05364 
05365         RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Arg));
05366       } else {
05367         LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
05368                          isPPC64, isTailCall, false, MemOpChains,
05369                          TailCallArguments, dl);
05370       }
05371       ArgOffset += PtrByteSize;
05372       break;
05373     case MVT::f32:
05374     case MVT::f64:
05375       if (FPR_idx != NumFPRs) {
05376         RegsToPass.push_back(std::make_pair(FPR[FPR_idx++], Arg));
05377 
05378         if (isVarArg) {
05379           SDValue Store = DAG.getStore(Chain, dl, Arg, PtrOff,
05380                                        MachinePointerInfo(), false, false, 0);
05381           MemOpChains.push_back(Store);
05382 
05383           // Float varargs are always shadowed in available integer registers
05384           if (GPR_idx != NumGPRs) {
05385             SDValue Load = DAG.getLoad(PtrVT, dl, Store, PtrOff,
05386                                        MachinePointerInfo(), false, false,
05387                                        false, 0);
05388             MemOpChains.push_back(Load.getValue(1));
05389             RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
05390           }
05391           if (GPR_idx != NumGPRs && Arg.getValueType() == MVT::f64 && !isPPC64){
05392             SDValue ConstFour = DAG.getConstant(4, PtrOff.getValueType());
05393             PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour);
05394             SDValue Load = DAG.getLoad(PtrVT, dl, Store, PtrOff,
05395                                        MachinePointerInfo(),
05396                                        false, false, false, 0);
05397             MemOpChains.push_back(Load.getValue(1));
05398             RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
05399           }
05400         } else {
05401           // If we have any FPRs remaining, we may also have GPRs remaining.
05402           // Args passed in FPRs consume either 1 (f32) or 2 (f64) available
05403           // GPRs.
05404           if (GPR_idx != NumGPRs)
05405             ++GPR_idx;
05406           if (GPR_idx != NumGPRs && Arg.getValueType() == MVT::f64 &&
05407               !isPPC64)  // PPC64 has 64-bit GPR's obviously :)
05408             ++GPR_idx;
05409         }
05410       } else
05411         LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
05412                          isPPC64, isTailCall, false, MemOpChains,
05413                          TailCallArguments, dl);
05414       if (isPPC64)
05415         ArgOffset += 8;
05416       else
05417         ArgOffset += Arg.getValueType() == MVT::f32 ? 4 : 8;
05418       break;
05419     case MVT::v4f32:
05420     case MVT::v4i32:
05421     case MVT::v8i16:
05422     case MVT::v16i8:
05423       if (isVarArg) {
05424         // These go aligned on the stack, or in the corresponding R registers
05425         // when within range.  The Darwin PPC ABI doc claims they also go in
05426         // V registers; in fact gcc does this only for arguments that are
05427         // prototyped, not for those that match the ...  We do it for all
05428         // arguments, seems to work.
05429         while (ArgOffset % 16 !=0) {
05430           ArgOffset += PtrByteSize;
05431           if (GPR_idx != NumGPRs)
05432             GPR_idx++;
05433         }
05434         // We could elide this store in the case where the object fits
05435         // entirely in R registers.  Maybe later.
05436         PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr,
05437                             DAG.getConstant(ArgOffset, PtrVT));
05438         SDValue Store = DAG.getStore(Chain, dl, Arg, PtrOff,
05439                                      MachinePointerInfo(), false, false, 0);
05440         MemOpChains.push_back(Store);
05441         if (VR_idx != NumVRs) {
05442           SDValue Load = DAG.getLoad(MVT::v4f32, dl, Store, PtrOff,
05443                                      MachinePointerInfo(),
05444                                      false, false, false, 0);
05445           MemOpChains.push_back(Load.getValue(1));
05446           RegsToPass.push_back(std::make_pair(VR[VR_idx++], Load));
05447         }
05448         ArgOffset += 16;
05449         for (unsigned i=0; i<16; i+=PtrByteSize) {
05450           if (GPR_idx == NumGPRs)
05451             break;
05452           SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff,
05453                                   DAG.getConstant(i, PtrVT));
05454           SDValue Load = DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo(),
05455                                      false, false, false, 0);
05456           MemOpChains.push_back(Load.getValue(1));
05457           RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
05458         }
05459         break;
05460       }
05461 
05462       // Non-varargs Altivec params generally go in registers, but have
05463       // stack space allocated at the end.
05464       if (VR_idx != NumVRs) {
05465         // Doesn't have GPR space allocated.
05466         RegsToPass.push_back(std::make_pair(VR[VR_idx++], Arg));
05467       } else if (nAltivecParamsAtEnd==0) {
05468         // We are emitting Altivec params in order.
05469         LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
05470                          isPPC64, isTailCall, true, MemOpChains,
05471                          TailCallArguments, dl);
05472         ArgOffset += 16;
05473       }
05474       break;
05475     }
05476   }
05477   // If all Altivec parameters fit in registers, as they usually do,
05478   // they get stack space following the non-Altivec parameters.  We
05479   // don't track this here because nobody below needs it.
05480   // If there are more Altivec parameters than fit in registers emit
05481   // the stores here.
05482   if (!isVarArg && nAltivecParamsAtEnd > NumVRs) {
05483     unsigned j = 0;
05484     // Offset is aligned; skip 1st 12 params which go in V registers.
05485     ArgOffset = ((ArgOffset+15)/16)*16;
05486     ArgOffset += 12*16;
05487     for (unsigned i = 0; i != NumOps; ++i) {
05488       SDValue Arg = OutVals[i];
05489       EVT ArgType = Outs[i].VT;
05490       if (ArgType==MVT::v4f32 || ArgType==MVT::v4i32 ||
05491           ArgType==MVT::v8i16 || ArgType==MVT::v16i8) {
05492         if (++j > NumVRs) {
05493           SDValue PtrOff;
05494           // We are emitting Altivec params in order.
05495           LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
05496                            isPPC64, isTailCall, true, MemOpChains,
05497                            TailCallArguments, dl);
05498           ArgOffset += 16;
05499         }
05500       }
05501     }
05502   }
05503 
05504   if (!MemOpChains.empty())
05505     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
05506 
05507   // On Darwin, R12 must contain the address of an indirect callee.  This does
05508   // not mean the MTCTR instruction must use R12; it's easier to model this as
05509   // an extra parameter, so do that.
05510   if (!isTailCall &&
05511       !isFunctionGlobalAddress(Callee) &&
05512       !isa<ExternalSymbolSDNode>(Callee) &&
05513       !isBLACompatibleAddress(Callee, DAG))
05514     RegsToPass.push_back(std::make_pair((unsigned)(isPPC64 ? PPC::X12 :
05515                                                    PPC::R12), Callee));
05516 
05517   // Build a sequence of copy-to-reg nodes chained together with token chain
05518   // and flag operands which copy the outgoing args into the appropriate regs.
05519   SDValue InFlag;
05520   for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
05521     Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
05522                              RegsToPass[i].second, InFlag);
05523     InFlag = Chain.getValue(1);
05524   }
05525 
05526   if (isTailCall)
05527     PrepareTailCall(DAG, InFlag, Chain, dl, isPPC64, SPDiff, NumBytes, LROp,
05528                     FPOp, true, TailCallArguments);
05529 
05530   return FinishCall(CallConv, dl, isTailCall, isVarArg, IsPatchPoint, DAG,
05531                     RegsToPass, InFlag, Chain, CallSeqStart, Callee, SPDiff,
05532                     NumBytes, Ins, InVals, CS);
05533 }
05534 
05535 bool
05536 PPCTargetLowering::CanLowerReturn(CallingConv::ID CallConv,
05537                                   MachineFunction &MF, bool isVarArg,
05538                                   const SmallVectorImpl<ISD::OutputArg> &Outs,
05539                                   LLVMContext &Context) const {
05540   SmallVector<CCValAssign, 16> RVLocs;
05541   CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
05542   return CCInfo.CheckReturn(Outs, RetCC_PPC);
05543 }
05544 
05545 SDValue
05546 PPCTargetLowering::LowerReturn(SDValue Chain,
05547                                CallingConv::ID CallConv, bool isVarArg,
05548                                const SmallVectorImpl<ISD::OutputArg> &Outs,
05549                                const SmallVectorImpl<SDValue> &OutVals,
05550                                SDLoc dl, SelectionDAG &DAG) const {
05551 
05552   SmallVector<CCValAssign, 16> RVLocs;
05553   CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
05554                  *DAG.getContext());
05555   CCInfo.AnalyzeReturn(Outs, RetCC_PPC);
05556 
05557   SDValue Flag;
05558   SmallVector<SDValue, 4> RetOps(1, Chain);
05559 
05560   // Copy the result values into the output registers.
05561   for (unsigned i = 0; i != RVLocs.size(); ++i) {
05562     CCValAssign &VA = RVLocs[i];
05563     assert(VA.isRegLoc() && "Can only return in registers!");
05564 
05565     SDValue Arg = OutVals[i];
05566 
05567     switch (VA.getLocInfo()) {
05568     default: llvm_unreachable("Unknown loc info!");
05569     case CCValAssign::Full: break;
05570     case CCValAssign::AExt:
05571       Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
05572       break;
05573     case CCValAssign::ZExt:
05574       Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
05575       break;
05576     case CCValAssign::SExt:
05577       Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
05578       break;
05579     }
05580 
05581     Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Flag);
05582     Flag = Chain.getValue(1);
05583     RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
05584   }
05585 
05586   RetOps[0] = Chain;  // Update chain.
05587 
05588   // Add the flag if we have it.
05589   if (Flag.getNode())
05590     RetOps.push_back(Flag);
05591 
05592   return DAG.getNode(PPCISD::RET_FLAG, dl, MVT::Other, RetOps);
05593 }
05594 
05595 SDValue PPCTargetLowering::LowerSTACKRESTORE(SDValue Op, SelectionDAG &DAG,
05596                                    const PPCSubtarget &Subtarget) const {
05597   // When we pop the dynamic allocation we need to restore the SP link.
05598   SDLoc dl(Op);
05599 
05600   // Get the corect type for pointers.
05601   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
05602 
05603   // Construct the stack pointer operand.
05604   bool isPPC64 = Subtarget.isPPC64();
05605   unsigned SP = isPPC64 ? PPC::X1 : PPC::R1;
05606   SDValue StackPtr = DAG.getRegister(SP, PtrVT);
05607 
05608   // Get the operands for the STACKRESTORE.
05609   SDValue Chain = Op.getOperand(0);
05610   SDValue SaveSP = Op.getOperand(1);
05611 
05612   // Load the old link SP.
05613   SDValue LoadLinkSP = DAG.getLoad(PtrVT, dl, Chain, StackPtr,
05614                                    MachinePointerInfo(),
05615                                    false, false, false, 0);
05616 
05617   // Restore the stack pointer.
05618   Chain = DAG.getCopyToReg(LoadLinkSP.getValue(1), dl, SP, SaveSP);
05619 
05620   // Store the old link SP.
05621   return DAG.getStore(Chain, dl, LoadLinkSP, StackPtr, MachinePointerInfo(),
05622                       false, false, 0);
05623 }
05624 
05625 
05626 
05627 SDValue
05628 PPCTargetLowering::getReturnAddrFrameIndex(SelectionDAG & DAG) const {
05629   MachineFunction &MF = DAG.getMachineFunction();
05630   bool isPPC64 = Subtarget.isPPC64();
05631   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
05632 
05633   // Get current frame pointer save index.  The users of this index will be
05634   // primarily DYNALLOC instructions.
05635   PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
05636   int RASI = FI->getReturnAddrSaveIndex();
05637 
05638   // If the frame pointer save index hasn't been defined yet.
05639   if (!RASI) {
05640     // Find out what the fix offset of the frame pointer save area.
05641     int LROffset = Subtarget.getFrameLowering()->getReturnSaveOffset();
05642     // Allocate the frame index for frame pointer save area.
05643     RASI = MF.getFrameInfo()->CreateFixedObject(isPPC64? 8 : 4, LROffset, false);
05644     // Save the result.
05645     FI->setReturnAddrSaveIndex(RASI);
05646   }
05647   return DAG.getFrameIndex(RASI, PtrVT);
05648 }
05649 
05650 SDValue
05651 PPCTargetLowering::getFramePointerFrameIndex(SelectionDAG & DAG) const {
05652   MachineFunction &MF = DAG.getMachineFunction();
05653   bool isPPC64 = Subtarget.isPPC64();
05654   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
05655 
05656   // Get current frame pointer save index.  The users of this index will be
05657   // primarily DYNALLOC instructions.
05658   PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
05659   int FPSI = FI->getFramePointerSaveIndex();
05660 
05661   // If the frame pointer save index hasn't been defined yet.
05662   if (!FPSI) {
05663     // Find out what the fix offset of the frame pointer save area.
05664     int FPOffset = Subtarget.getFrameLowering()->getFramePointerSaveOffset();
05665     // Allocate the frame index for frame pointer save area.
05666     FPSI = MF.getFrameInfo()->CreateFixedObject(isPPC64? 8 : 4, FPOffset, true);
05667     // Save the result.
05668     FI->setFramePointerSaveIndex(FPSI);
05669   }
05670   return DAG.getFrameIndex(FPSI, PtrVT);
05671 }
05672 
05673 SDValue PPCTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
05674                                          SelectionDAG &DAG,
05675                                          const PPCSubtarget &Subtarget) const {
05676   // Get the inputs.
05677   SDValue Chain = Op.getOperand(0);
05678   SDValue Size  = Op.getOperand(1);
05679   SDLoc dl(Op);
05680 
05681   // Get the corect type for pointers.
05682   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
05683   // Negate the size.
05684   SDValue NegSize = DAG.getNode(ISD::SUB, dl, PtrVT,
05685                                   DAG.getConstant(0, PtrVT), Size);
05686   // Construct a node for the frame pointer save index.
05687   SDValue FPSIdx = getFramePointerFrameIndex(DAG);
05688   // Build a DYNALLOC node.
05689   SDValue Ops[3] = { Chain, NegSize, FPSIdx };
05690   SDVTList VTs = DAG.getVTList(PtrVT, MVT::Other);
05691   return DAG.getNode(PPCISD::DYNALLOC, dl, VTs, Ops);
05692 }
05693 
05694 SDValue PPCTargetLowering::lowerEH_SJLJ_SETJMP(SDValue Op,
05695                                                SelectionDAG &DAG) const {
05696   SDLoc DL(Op);
05697   return DAG.getNode(PPCISD::EH_SJLJ_SETJMP, DL,
05698                      DAG.getVTList(MVT::i32, MVT::Other),
05699                      Op.getOperand(0), Op.getOperand(1));
05700 }
05701 
05702 SDValue PPCTargetLowering::lowerEH_SJLJ_LONGJMP(SDValue Op,
05703                                                 SelectionDAG &DAG) const {
05704   SDLoc DL(Op);
05705   return DAG.getNode(PPCISD::EH_SJLJ_LONGJMP, DL, MVT::Other,
05706                      Op.getOperand(0), Op.getOperand(1));
05707 }
05708 
05709 SDValue PPCTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
05710   if (Op.getValueType().isVector())
05711     return LowerVectorLoad(Op, DAG);
05712 
05713   assert(Op.getValueType() == MVT::i1 &&
05714          "Custom lowering only for i1 loads");
05715 
05716   // First, load 8 bits into 32 bits, then truncate to 1 bit.
05717 
05718   SDLoc dl(Op);
05719   LoadSDNode *LD = cast<LoadSDNode>(Op);
05720 
05721   SDValue Chain = LD->getChain();
05722   SDValue BasePtr = LD->getBasePtr();
05723   MachineMemOperand *MMO = LD->getMemOperand();
05724 
05725   SDValue NewLD = DAG.getExtLoad(ISD::EXTLOAD, dl, getPointerTy(), Chain,
05726                                  BasePtr, MVT::i8, MMO);
05727   SDValue Result = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, NewLD);
05728 
05729   SDValue Ops[] = { Result, SDValue(NewLD.getNode(), 1) };
05730   return DAG.getMergeValues(Ops, dl);
05731 }
05732 
05733 SDValue PPCTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
05734   if (Op.getOperand(1).getValueType().isVector())
05735     return LowerVectorStore(Op, DAG);
05736 
05737   assert(Op.getOperand(1).getValueType() == MVT::i1 &&
05738          "Custom lowering only for i1 stores");
05739 
05740   // First, zero extend to 32 bits, then use a truncating store to 8 bits.
05741 
05742   SDLoc dl(Op);
05743   StoreSDNode *ST = cast<StoreSDNode>(Op);
05744 
05745   SDValue Chain = ST->getChain();
05746   SDValue BasePtr = ST->getBasePtr();
05747   SDValue Value = ST->getValue();
05748   MachineMemOperand *MMO = ST->getMemOperand();
05749 
05750   Value = DAG.getNode(ISD::ZERO_EXTEND, dl, getPointerTy(), Value);
05751   return DAG.getTruncStore(Chain, dl, Value, BasePtr, MVT::i8, MMO);
05752 }
05753 
05754 // FIXME: Remove this once the ANDI glue bug is fixed:
05755 SDValue PPCTargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
05756   assert(Op.getValueType() == MVT::i1 &&
05757          "Custom lowering only for i1 results");
05758 
05759   SDLoc DL(Op);
05760   return DAG.getNode(PPCISD::ANDIo_1_GT_BIT, DL, MVT::i1,
05761                      Op.getOperand(0));
05762 }
05763 
05764 /// LowerSELECT_CC - Lower floating point select_cc's into fsel instruction when
05765 /// possible.
05766 SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
05767   // Not FP? Not a fsel.
05768   if (!Op.getOperand(0).getValueType().isFloatingPoint() ||
05769       !Op.getOperand(2).getValueType().isFloatingPoint())
05770     return Op;
05771 
05772   // We might be able to do better than this under some circumstances, but in
05773   // general, fsel-based lowering of select is a finite-math-only optimization.
05774   // For more information, see section F.3 of the 2.06 ISA specification.
05775   if (!DAG.getTarget().Options.NoInfsFPMath ||
05776       !DAG.getTarget().Options.NoNaNsFPMath)
05777     return Op;
05778 
05779   ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
05780 
05781   EVT ResVT = Op.getValueType();
05782   EVT CmpVT = Op.getOperand(0).getValueType();
05783   SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
05784   SDValue TV  = Op.getOperand(2), FV  = Op.getOperand(3);
05785   SDLoc dl(Op);
05786 
05787   // If the RHS of the comparison is a 0.0, we don't need to do the
05788   // subtraction at all.
05789   SDValue Sel1;
05790   if (isFloatingPointZero(RHS))
05791     switch (CC) {
05792     default: break;       // SETUO etc aren't handled by fsel.
05793     case ISD::SETNE:
05794       std::swap(TV, FV);
05795     case ISD::SETEQ:
05796       if (LHS.getValueType() == MVT::f32)   // Comparison is always 64-bits
05797         LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
05798       Sel1 = DAG.getNode(PPCISD::FSEL, dl, ResVT, LHS, TV, FV);
05799       if (Sel1.getValueType() == MVT::f32)   // Comparison is always 64-bits
05800         Sel1 = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Sel1);
05801       return DAG.getNode(PPCISD::FSEL, dl, ResVT,
05802                          DAG.getNode(ISD::FNEG, dl, MVT::f64, LHS), Sel1, FV);
05803     case ISD::SETULT:
05804     case ISD::SETLT:
05805       std::swap(TV, FV);  // fsel is natively setge, swap operands for setlt
05806     case ISD::SETOGE:
05807     case ISD::SETGE:
05808       if (LHS.getValueType() == MVT::f32)   // Comparison is always 64-bits
05809         LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
05810       return DAG.getNode(PPCISD::FSEL, dl, ResVT, LHS, TV, FV);
05811     case ISD::SETUGT:
05812     case ISD::SETGT:
05813       std::swap(TV, FV);  // fsel is natively setge, swap operands for setlt
05814     case ISD::SETOLE:
05815     case ISD::SETLE:
05816       if (LHS.getValueType() == MVT::f32)   // Comparison is always 64-bits
05817         LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
05818       return DAG.getNode(PPCISD::FSEL, dl, ResVT,
05819                          DAG.getNode(ISD::FNEG, dl, MVT::f64, LHS), TV, FV);
05820     }
05821 
05822   SDValue Cmp;
05823   switch (CC) {
05824   default: break;       // SETUO etc aren't handled by fsel.
05825   case ISD::SETNE:
05826     std::swap(TV, FV);
05827   case ISD::SETEQ:
05828     Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS);
05829     if (Cmp.getValueType() == MVT::f32)   // Comparison is always 64-bits
05830       Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
05831     Sel1 = DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
05832     if (Sel1.getValueType() == MVT::f32)   // Comparison is always 64-bits
05833       Sel1 = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Sel1);
05834     return DAG.getNode(PPCISD::FSEL, dl, ResVT,
05835                        DAG.getNode(ISD::FNEG, dl, MVT::f64, Cmp), Sel1, FV);
05836   case ISD::SETULT:
05837   case ISD::SETLT:
05838     Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS);
05839     if (Cmp.getValueType() == MVT::f32)   // Comparison is always 64-bits
05840       Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
05841     return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV);
05842   case ISD::SETOGE:
05843   case ISD::SETGE:
05844     Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS);
05845     if (Cmp.getValueType() == MVT::f32)   // Comparison is always 64-bits
05846       Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
05847     return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
05848   case ISD::SETUGT:
05849   case ISD::SETGT:
05850     Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS);
05851     if (Cmp.getValueType() == MVT::f32)   // Comparison is always 64-bits
05852       Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
05853     return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV);
05854   case ISD::SETOLE:
05855   case ISD::SETLE:
05856     Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS);
05857     if (Cmp.getValueType() == MVT::f32)   // Comparison is always 64-bits
05858       Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
05859     return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
05860   }
05861   return Op;
05862 }
05863 
05864 void PPCTargetLowering::LowerFP_TO_INTForReuse(SDValue Op, ReuseLoadInfo &RLI,
05865                                                SelectionDAG &DAG,
05866                                                SDLoc dl) const {
05867   assert(Op.getOperand(0).getValueType().isFloatingPoint());
05868   SDValue Src = Op.getOperand(0);
05869   if (Src.getValueType() == MVT::f32)
05870     Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src);
05871 
05872   SDValue Tmp;
05873   switch (Op.getSimpleValueType().SimpleTy) {
05874   default: llvm_unreachable("Unhandled FP_TO_INT type in custom expander!");
05875   case MVT::i32:
05876     Tmp = DAG.getNode(
05877         Op.getOpcode() == ISD::FP_TO_SINT
05878             ? PPCISD::FCTIWZ
05879             : (Subtarget.hasFPCVT() ? PPCISD::FCTIWUZ : PPCISD::FCTIDZ),
05880         dl, MVT::f64, Src);
05881     break;
05882   case MVT::i64:
05883     assert((Op.getOpcode() == ISD::FP_TO_SINT || Subtarget.hasFPCVT()) &&
05884            "i64 FP_TO_UINT is supported only with FPCVT");
05885     Tmp = DAG.getNode(Op.getOpcode()==ISD::FP_TO_SINT ? PPCISD::FCTIDZ :
05886                                                         PPCISD::FCTIDUZ,
05887                       dl, MVT::f64, Src);
05888     break;
05889   }
05890 
05891   // Convert the FP value to an int value through memory.
05892   bool i32Stack = Op.getValueType() == MVT::i32 && Subtarget.hasSTFIWX() &&
05893     (Op.getOpcode() == ISD::FP_TO_SINT || Subtarget.hasFPCVT());
05894   SDValue FIPtr = DAG.CreateStackTemporary(i32Stack ? MVT::i32 : MVT::f64);
05895   int FI = cast<FrameIndexSDNode>(FIPtr)->getIndex();
05896   MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(FI);
05897 
05898   // Emit a store to the stack slot.
05899   SDValue Chain;
05900   if (i32Stack) {
05901     MachineFunction &MF = DAG.getMachineFunction();
05902     MachineMemOperand *MMO =
05903       MF.getMachineMemOperand(MPI, MachineMemOperand::MOStore, 4, 4);
05904     SDValue Ops[] = { DAG.getEntryNode(), Tmp, FIPtr };
05905     Chain = DAG.getMemIntrinsicNode(PPCISD::STFIWX, dl,
05906               DAG.getVTList(MVT::Other), Ops, MVT::i32, MMO);
05907   } else
05908     Chain = DAG.getStore(DAG.getEntryNode(), dl, Tmp, FIPtr,
05909                          MPI, false, false, 0);
05910 
05911   // Result is a load from the stack slot.  If loading 4 bytes, make sure to
05912   // add in a bias.
05913   if (Op.getValueType() == MVT::i32 && !i32Stack) {
05914     FIPtr = DAG.getNode(ISD::ADD, dl, FIPtr.getValueType(), FIPtr,
05915                         DAG.getConstant(4, FIPtr.getValueType()));
05916     MPI = MPI.getWithOffset(4);
05917   }
05918 
05919   RLI.Chain = Chain;
05920   RLI.Ptr = FIPtr;
05921   RLI.MPI = MPI;
05922 }
05923 
05924 SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
05925                                           SDLoc dl) const {
05926   ReuseLoadInfo RLI;
05927   LowerFP_TO_INTForReuse(Op, RLI, DAG, dl);
05928 
05929   return DAG.getLoad(Op.getValueType(), dl, RLI.Chain, RLI.Ptr, RLI.MPI, false,
05930                      false, RLI.IsInvariant, RLI.Alignment, RLI.AAInfo,
05931                      RLI.Ranges);
05932 }
05933 
05934 // We're trying to insert a regular store, S, and then a load, L. If the
05935 // incoming value, O, is a load, we might just be able to have our load use the
05936 // address used by O. However, we don't know if anything else will store to
05937 // that address before we can load from it. To prevent this situation, we need
05938 // to insert our load, L, into the chain as a peer of O. To do this, we give L
05939 // the same chain operand as O, we create a token factor from the chain results
05940 // of O and L, and we replace all uses of O's chain result with that token
05941 // factor (see spliceIntoChain below for this last part).
05942 bool PPCTargetLowering::canReuseLoadAddress(SDValue Op, EVT MemVT,
05943                                             ReuseLoadInfo &RLI,
05944                                             SelectionDAG &DAG,
05945                                             ISD::LoadExtType ET) const {
05946   SDLoc dl(Op);
05947   if (ET == ISD::NON_EXTLOAD &&
05948       (Op.getOpcode() == ISD::FP_TO_UINT ||
05949        Op.getOpcode() == ISD::FP_TO_SINT) &&
05950       isOperationLegalOrCustom(Op.getOpcode(),
05951                                Op.getOperand(0).getValueType())) {
05952 
05953     LowerFP_TO_INTForReuse(Op, RLI, DAG, dl);
05954     return true;
05955   }
05956 
05957   LoadSDNode *LD = dyn_cast<LoadSDNode>(Op);
05958   if (!LD || LD->getExtensionType() != ET || LD->isVolatile() ||
05959       LD->isNonTemporal())
05960     return false;
05961   if (LD->getMemoryVT() != MemVT)
05962     return false;
05963 
05964   RLI.Ptr = LD->getBasePtr();
05965   if (LD->isIndexed() && LD->getOffset().getOpcode() != ISD::UNDEF) {
05966     assert(LD->getAddressingMode() == ISD::PRE_INC &&
05967            "Non-pre-inc AM on PPC?");
05968     RLI.Ptr = DAG.getNode(ISD::ADD, dl, RLI.Ptr.getValueType(), RLI.Ptr,
05969                           LD->getOffset());
05970   }
05971 
05972   RLI.Chain = LD->