LLVM  mainline
PPCISelLowering.cpp
Go to the documentation of this file.
00001 //===-- PPCISelLowering.cpp - PPC DAG Lowering Implementation -------------===//
00002 //
00003 //                     The LLVM Compiler Infrastructure
00004 //
00005 // This file is distributed under the University of Illinois Open Source
00006 // License. See LICENSE.TXT for details.
00007 //
00008 //===----------------------------------------------------------------------===//
00009 //
00010 // This file implements the PPCISelLowering class.
00011 //
00012 //===----------------------------------------------------------------------===//
00013 
00014 #include "PPCISelLowering.h"
00015 #include "MCTargetDesc/PPCPredicates.h"
00016 #include "PPCCallingConv.h"
00017 #include "PPCMachineFunctionInfo.h"
00018 #include "PPCPerfectShuffle.h"
00019 #include "PPCTargetMachine.h"
00020 #include "PPCTargetObjectFile.h"
00021 #include "llvm/ADT/STLExtras.h"
00022 #include "llvm/ADT/StringSwitch.h"
00023 #include "llvm/ADT/Triple.h"
00024 #include "llvm/CodeGen/CallingConvLower.h"
00025 #include "llvm/CodeGen/MachineFrameInfo.h"
00026 #include "llvm/CodeGen/MachineFunction.h"
00027 #include "llvm/CodeGen/MachineInstrBuilder.h"
00028 #include "llvm/CodeGen/MachineLoopInfo.h"
00029 #include "llvm/CodeGen/MachineRegisterInfo.h"
00030 #include "llvm/CodeGen/SelectionDAG.h"
00031 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
00032 #include "llvm/IR/CallingConv.h"
00033 #include "llvm/IR/Constants.h"
00034 #include "llvm/IR/DerivedTypes.h"
00035 #include "llvm/IR/Function.h"
00036 #include "llvm/IR/Intrinsics.h"
00037 #include "llvm/Support/CommandLine.h"
00038 #include "llvm/Support/ErrorHandling.h"
00039 #include "llvm/Support/MathExtras.h"
00040 #include "llvm/Support/raw_ostream.h"
00041 #include "llvm/Target/TargetOptions.h"
00042 using namespace llvm;
00043 
00044 // FIXME: Remove this once soft-float is supported.
00045 static cl::opt<bool> DisablePPCFloatInVariadic("disable-ppc-float-in-variadic",
00046 cl::desc("disable saving float registers for va_start on PPC"), cl::Hidden);
00047 
00048 static cl::opt<bool> DisablePPCPreinc("disable-ppc-preinc",
00049 cl::desc("disable preincrement load/store generation on PPC"), cl::Hidden);
00050 
00051 static cl::opt<bool> DisableILPPref("disable-ppc-ilp-pref",
00052 cl::desc("disable setting the node scheduling preference to ILP on PPC"), cl::Hidden);
00053 
00054 static cl::opt<bool> DisablePPCUnaligned("disable-ppc-unaligned",
00055 cl::desc("disable unaligned load/store generation on PPC"), cl::Hidden);
00056 
00057 // FIXME: Remove this once the bug has been fixed!
00058 extern cl::opt<bool> ANDIGlueBug;
00059 
00060 PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
00061                                      const PPCSubtarget &STI)
00062     : TargetLowering(TM), Subtarget(STI) {
00063   // Use _setjmp/_longjmp instead of setjmp/longjmp.
00064   setUseUnderscoreSetJmp(true);
00065   setUseUnderscoreLongJmp(true);
00066 
00067   // On PPC32/64, arguments smaller than 4/8 bytes are extended, so all
00068   // arguments are at least 4/8 bytes aligned.
00069   bool isPPC64 = Subtarget.isPPC64();
00070   setMinStackArgumentAlignment(isPPC64 ? 8:4);
00071 
00072   // Set up the register classes.
00073   addRegisterClass(MVT::i32, &PPC::GPRCRegClass);
00074   addRegisterClass(MVT::f32, &PPC::F4RCRegClass);
00075   addRegisterClass(MVT::f64, &PPC::F8RCRegClass);
00076 
00077   // PowerPC has an i16 but no i8 (or i1) SEXTLOAD
00078   for (MVT VT : MVT::integer_valuetypes()) {
00079     setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
00080     setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i8, Expand);
00081   }
00082 
00083   setTruncStoreAction(MVT::f64, MVT::f32, Expand);
00084 
00085   // PowerPC has pre-inc load and store's.
00086   setIndexedLoadAction(ISD::PRE_INC, MVT::i1, Legal);
00087   setIndexedLoadAction(ISD::PRE_INC, MVT::i8, Legal);
00088   setIndexedLoadAction(ISD::PRE_INC, MVT::i16, Legal);
00089   setIndexedLoadAction(ISD::PRE_INC, MVT::i32, Legal);
00090   setIndexedLoadAction(ISD::PRE_INC, MVT::i64, Legal);
00091   setIndexedLoadAction(ISD::PRE_INC, MVT::f32, Legal);
00092   setIndexedLoadAction(ISD::PRE_INC, MVT::f64, Legal);
00093   setIndexedStoreAction(ISD::PRE_INC, MVT::i1, Legal);
00094   setIndexedStoreAction(ISD::PRE_INC, MVT::i8, Legal);
00095   setIndexedStoreAction(ISD::PRE_INC, MVT::i16, Legal);
00096   setIndexedStoreAction(ISD::PRE_INC, MVT::i32, Legal);
00097   setIndexedStoreAction(ISD::PRE_INC, MVT::i64, Legal);
00098   setIndexedStoreAction(ISD::PRE_INC, MVT::f32, Legal);
00099   setIndexedStoreAction(ISD::PRE_INC, MVT::f64, Legal);
00100 
00101   if (Subtarget.useCRBits()) {
00102     setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
00103 
00104     if (isPPC64 || Subtarget.hasFPCVT()) {
00105       setOperationAction(ISD::SINT_TO_FP, MVT::i1, Promote);
00106       AddPromotedToType (ISD::SINT_TO_FP, MVT::i1,
00107                          isPPC64 ? MVT::i64 : MVT::i32);
00108       setOperationAction(ISD::UINT_TO_FP, MVT::i1, Promote);
00109       AddPromotedToType (ISD::UINT_TO_FP, MVT::i1, 
00110                          isPPC64 ? MVT::i64 : MVT::i32);
00111     } else {
00112       setOperationAction(ISD::SINT_TO_FP, MVT::i1, Custom);
00113       setOperationAction(ISD::UINT_TO_FP, MVT::i1, Custom);
00114     }
00115 
00116     // PowerPC does not support direct load / store of condition registers
00117     setOperationAction(ISD::LOAD, MVT::i1, Custom);
00118     setOperationAction(ISD::STORE, MVT::i1, Custom);
00119 
00120     // FIXME: Remove this once the ANDI glue bug is fixed:
00121     if (ANDIGlueBug)
00122       setOperationAction(ISD::TRUNCATE, MVT::i1, Custom);
00123 
00124     for (MVT VT : MVT::integer_valuetypes()) {
00125       setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
00126       setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote);
00127       setTruncStoreAction(VT, MVT::i1, Expand);
00128     }
00129 
00130     addRegisterClass(MVT::i1, &PPC::CRBITRCRegClass);
00131   }
00132 
00133   // This is used in the ppcf128->int sequence.  Note it has different semantics
00134   // from FP_ROUND:  that rounds to nearest, this rounds to zero.
00135   setOperationAction(ISD::FP_ROUND_INREG, MVT::ppcf128, Custom);
00136 
00137   // We do not currently implement these libm ops for PowerPC.
00138   setOperationAction(ISD::FFLOOR, MVT::ppcf128, Expand);
00139   setOperationAction(ISD::FCEIL,  MVT::ppcf128, Expand);
00140   setOperationAction(ISD::FTRUNC, MVT::ppcf128, Expand);
00141   setOperationAction(ISD::FRINT,  MVT::ppcf128, Expand);
00142   setOperationAction(ISD::FNEARBYINT, MVT::ppcf128, Expand);
00143   setOperationAction(ISD::FREM, MVT::ppcf128, Expand);
00144 
00145   // PowerPC has no SREM/UREM instructions
00146   setOperationAction(ISD::SREM, MVT::i32, Expand);
00147   setOperationAction(ISD::UREM, MVT::i32, Expand);
00148   setOperationAction(ISD::SREM, MVT::i64, Expand);
00149   setOperationAction(ISD::UREM, MVT::i64, Expand);
00150 
00151   // Don't use SMUL_LOHI/UMUL_LOHI or SDIVREM/UDIVREM to lower SREM/UREM.
00152   setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);
00153   setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
00154   setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand);
00155   setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand);
00156   setOperationAction(ISD::UDIVREM, MVT::i32, Expand);
00157   setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
00158   setOperationAction(ISD::UDIVREM, MVT::i64, Expand);
00159   setOperationAction(ISD::SDIVREM, MVT::i64, Expand);
00160 
00161   // We don't support sin/cos/sqrt/fmod/pow
00162   setOperationAction(ISD::FSIN , MVT::f64, Expand);
00163   setOperationAction(ISD::FCOS , MVT::f64, Expand);
00164   setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
00165   setOperationAction(ISD::FREM , MVT::f64, Expand);
00166   setOperationAction(ISD::FPOW , MVT::f64, Expand);
00167   setOperationAction(ISD::FMA  , MVT::f64, Legal);
00168   setOperationAction(ISD::FSIN , MVT::f32, Expand);
00169   setOperationAction(ISD::FCOS , MVT::f32, Expand);
00170   setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
00171   setOperationAction(ISD::FREM , MVT::f32, Expand);
00172   setOperationAction(ISD::FPOW , MVT::f32, Expand);
00173   setOperationAction(ISD::FMA  , MVT::f32, Legal);
00174 
00175   setOperationAction(ISD::FLT_ROUNDS_, MVT::i32, Custom);
00176 
00177   // If we're enabling GP optimizations, use hardware square root
00178   if (!Subtarget.hasFSQRT() &&
00179       !(TM.Options.UnsafeFPMath && Subtarget.hasFRSQRTE() &&
00180         Subtarget.hasFRE()))
00181     setOperationAction(ISD::FSQRT, MVT::f64, Expand);
00182 
00183   if (!Subtarget.hasFSQRT() &&
00184       !(TM.Options.UnsafeFPMath && Subtarget.hasFRSQRTES() &&
00185         Subtarget.hasFRES()))
00186     setOperationAction(ISD::FSQRT, MVT::f32, Expand);
00187 
00188   if (Subtarget.hasFCPSGN()) {
00189     setOperationAction(ISD::FCOPYSIGN, MVT::f64, Legal);
00190     setOperationAction(ISD::FCOPYSIGN, MVT::f32, Legal);
00191   } else {
00192     setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
00193     setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
00194   }
00195 
00196   if (Subtarget.hasFPRND()) {
00197     setOperationAction(ISD::FFLOOR, MVT::f64, Legal);
00198     setOperationAction(ISD::FCEIL,  MVT::f64, Legal);
00199     setOperationAction(ISD::FTRUNC, MVT::f64, Legal);
00200     setOperationAction(ISD::FROUND, MVT::f64, Legal);
00201 
00202     setOperationAction(ISD::FFLOOR, MVT::f32, Legal);
00203     setOperationAction(ISD::FCEIL,  MVT::f32, Legal);
00204     setOperationAction(ISD::FTRUNC, MVT::f32, Legal);
00205     setOperationAction(ISD::FROUND, MVT::f32, Legal);
00206   }
00207 
00208   // PowerPC does not have BSWAP, CTPOP or CTTZ
00209   setOperationAction(ISD::BSWAP, MVT::i32  , Expand);
00210   setOperationAction(ISD::CTTZ , MVT::i32  , Expand);
00211   setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32, Expand);
00212   setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Expand);
00213   setOperationAction(ISD::BSWAP, MVT::i64  , Expand);
00214   setOperationAction(ISD::CTTZ , MVT::i64  , Expand);
00215   setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i64, Expand);
00216   setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Expand);
00217 
00218   if (Subtarget.hasPOPCNTD()) {
00219     setOperationAction(ISD::CTPOP, MVT::i32  , Legal);
00220     setOperationAction(ISD::CTPOP, MVT::i64  , Legal);
00221   } else {
00222     setOperationAction(ISD::CTPOP, MVT::i32  , Expand);
00223     setOperationAction(ISD::CTPOP, MVT::i64  , Expand);
00224   }
00225 
00226   // PowerPC does not have ROTR
00227   setOperationAction(ISD::ROTR, MVT::i32   , Expand);
00228   setOperationAction(ISD::ROTR, MVT::i64   , Expand);
00229 
00230   if (!Subtarget.useCRBits()) {
00231     // PowerPC does not have Select
00232     setOperationAction(ISD::SELECT, MVT::i32, Expand);
00233     setOperationAction(ISD::SELECT, MVT::i64, Expand);
00234     setOperationAction(ISD::SELECT, MVT::f32, Expand);
00235     setOperationAction(ISD::SELECT, MVT::f64, Expand);
00236   }
00237 
00238   // PowerPC wants to turn select_cc of FP into fsel when possible.
00239   setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
00240   setOperationAction(ISD::SELECT_CC, MVT::f64, Custom);
00241 
00242   // PowerPC wants to optimize integer setcc a bit
00243   if (!Subtarget.useCRBits())
00244     setOperationAction(ISD::SETCC, MVT::i32, Custom);
00245 
00246   // PowerPC does not have BRCOND which requires SetCC
00247   if (!Subtarget.useCRBits())
00248     setOperationAction(ISD::BRCOND, MVT::Other, Expand);
00249 
00250   setOperationAction(ISD::BR_JT,  MVT::Other, Expand);
00251 
00252   // PowerPC turns FP_TO_SINT into FCTIWZ and some load/stores.
00253   setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
00254 
00255   // PowerPC does not have [U|S]INT_TO_FP
00256   setOperationAction(ISD::SINT_TO_FP, MVT::i32, Expand);
00257   setOperationAction(ISD::UINT_TO_FP, MVT::i32, Expand);
00258 
00259   setOperationAction(ISD::BITCAST, MVT::f32, Expand);
00260   setOperationAction(ISD::BITCAST, MVT::i32, Expand);
00261   setOperationAction(ISD::BITCAST, MVT::i64, Expand);
00262   setOperationAction(ISD::BITCAST, MVT::f64, Expand);
00263 
00264   // We cannot sextinreg(i1).  Expand to shifts.
00265   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
00266 
00267   // NOTE: EH_SJLJ_SETJMP/_LONGJMP supported here is NOT intended to support
00268   // SjLj exception handling but a light-weight setjmp/longjmp replacement to
00269   // support continuation, user-level threading, and etc.. As a result, no
00270   // other SjLj exception interfaces are implemented and please don't build
00271   // your own exception handling based on them.
00272   // LLVM/Clang supports zero-cost DWARF exception handling.
00273   setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom);
00274   setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom);
00275 
00276   // We want to legalize GlobalAddress and ConstantPool nodes into the
00277   // appropriate instructions to materialize the address.
00278   setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
00279   setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom);
00280   setOperationAction(ISD::BlockAddress,  MVT::i32, Custom);
00281   setOperationAction(ISD::ConstantPool,  MVT::i32, Custom);
00282   setOperationAction(ISD::JumpTable,     MVT::i32, Custom);
00283   setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
00284   setOperationAction(ISD::GlobalTLSAddress, MVT::i64, Custom);
00285   setOperationAction(ISD::BlockAddress,  MVT::i64, Custom);
00286   setOperationAction(ISD::ConstantPool,  MVT::i64, Custom);
00287   setOperationAction(ISD::JumpTable,     MVT::i64, Custom);
00288 
00289   // TRAP is legal.
00290   setOperationAction(ISD::TRAP, MVT::Other, Legal);
00291 
00292   // TRAMPOLINE is custom lowered.
00293   setOperationAction(ISD::INIT_TRAMPOLINE, MVT::Other, Custom);
00294   setOperationAction(ISD::ADJUST_TRAMPOLINE, MVT::Other, Custom);
00295 
00296   // VASTART needs to be custom lowered to use the VarArgsFrameIndex
00297   setOperationAction(ISD::VASTART           , MVT::Other, Custom);
00298 
00299   if (Subtarget.isSVR4ABI()) {
00300     if (isPPC64) {
00301       // VAARG always uses double-word chunks, so promote anything smaller.
00302       setOperationAction(ISD::VAARG, MVT::i1, Promote);
00303       AddPromotedToType (ISD::VAARG, MVT::i1, MVT::i64);
00304       setOperationAction(ISD::VAARG, MVT::i8, Promote);
00305       AddPromotedToType (ISD::VAARG, MVT::i8, MVT::i64);
00306       setOperationAction(ISD::VAARG, MVT::i16, Promote);
00307       AddPromotedToType (ISD::VAARG, MVT::i16, MVT::i64);
00308       setOperationAction(ISD::VAARG, MVT::i32, Promote);
00309       AddPromotedToType (ISD::VAARG, MVT::i32, MVT::i64);
00310       setOperationAction(ISD::VAARG, MVT::Other, Expand);
00311     } else {
00312       // VAARG is custom lowered with the 32-bit SVR4 ABI.
00313       setOperationAction(ISD::VAARG, MVT::Other, Custom);
00314       setOperationAction(ISD::VAARG, MVT::i64, Custom);
00315     }
00316   } else
00317     setOperationAction(ISD::VAARG, MVT::Other, Expand);
00318 
00319   if (Subtarget.isSVR4ABI() && !isPPC64)
00320     // VACOPY is custom lowered with the 32-bit SVR4 ABI.
00321     setOperationAction(ISD::VACOPY            , MVT::Other, Custom);
00322   else
00323     setOperationAction(ISD::VACOPY            , MVT::Other, Expand);
00324 
00325   // Use the default implementation.
00326   setOperationAction(ISD::VAEND             , MVT::Other, Expand);
00327   setOperationAction(ISD::STACKSAVE         , MVT::Other, Expand);
00328   setOperationAction(ISD::STACKRESTORE      , MVT::Other, Custom);
00329   setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32  , Custom);
00330   setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64  , Custom);
00331 
00332   // We want to custom lower some of our intrinsics.
00333   setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
00334 
00335   // To handle counter-based loop conditions.
00336   setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i1, Custom);
00337 
00338   // Comparisons that require checking two conditions.
00339   setCondCodeAction(ISD::SETULT, MVT::f32, Expand);
00340   setCondCodeAction(ISD::SETULT, MVT::f64, Expand);
00341   setCondCodeAction(ISD::SETUGT, MVT::f32, Expand);
00342   setCondCodeAction(ISD::SETUGT, MVT::f64, Expand);
00343   setCondCodeAction(ISD::SETUEQ, MVT::f32, Expand);
00344   setCondCodeAction(ISD::SETUEQ, MVT::f64, Expand);
00345   setCondCodeAction(ISD::SETOGE, MVT::f32, Expand);
00346   setCondCodeAction(ISD::SETOGE, MVT::f64, Expand);
00347   setCondCodeAction(ISD::SETOLE, MVT::f32, Expand);
00348   setCondCodeAction(ISD::SETOLE, MVT::f64, Expand);
00349   setCondCodeAction(ISD::SETONE, MVT::f32, Expand);
00350   setCondCodeAction(ISD::SETONE, MVT::f64, Expand);
00351 
00352   if (Subtarget.has64BitSupport()) {
00353     // They also have instructions for converting between i64 and fp.
00354     setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
00355     setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand);
00356     setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
00357     setOperationAction(ISD::UINT_TO_FP, MVT::i64, Expand);
00358     // This is just the low 32 bits of a (signed) fp->i64 conversion.
00359     // We cannot do this with Promote because i64 is not a legal type.
00360     setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
00361 
00362     if (Subtarget.hasLFIWAX() || Subtarget.isPPC64())
00363       setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
00364   } else {
00365     // PowerPC does not have FP_TO_UINT on 32-bit implementations.
00366     setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand);
00367   }
00368 
00369   // With the instructions enabled under FPCVT, we can do everything.
00370   if (Subtarget.hasFPCVT()) {
00371     if (Subtarget.has64BitSupport()) {
00372       setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
00373       setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
00374       setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
00375       setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
00376     }
00377 
00378     setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
00379     setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
00380     setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
00381     setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
00382   }
00383 
00384   if (Subtarget.use64BitRegs()) {
00385     // 64-bit PowerPC implementations can support i64 types directly
00386     addRegisterClass(MVT::i64, &PPC::G8RCRegClass);
00387     // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
00388     setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
00389     // 64-bit PowerPC wants to expand i128 shifts itself.
00390     setOperationAction(ISD::SHL_PARTS, MVT::i64, Custom);
00391     setOperationAction(ISD::SRA_PARTS, MVT::i64, Custom);
00392     setOperationAction(ISD::SRL_PARTS, MVT::i64, Custom);
00393   } else {
00394     // 32-bit PowerPC wants to expand i64 shifts itself.
00395     setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom);
00396     setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom);
00397     setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom);
00398   }
00399 
00400   if (Subtarget.hasAltivec()) {
00401     // First set operation action for all vector types to expand. Then we
00402     // will selectively turn on ones that can be effectively codegen'd.
00403     for (MVT VT : MVT::vector_valuetypes()) {
00404       // add/sub are legal for all supported vector VT's.
00405       setOperationAction(ISD::ADD , VT, Legal);
00406       setOperationAction(ISD::SUB , VT, Legal);
00407 
00408       // Vector instructions introduced in P8
00409       if (Subtarget.hasP8Altivec()) {
00410         setOperationAction(ISD::CTPOP, VT, Legal);
00411         setOperationAction(ISD::CTLZ, VT, Legal);
00412       }
00413       else {
00414         setOperationAction(ISD::CTPOP, VT, Expand);
00415         setOperationAction(ISD::CTLZ, VT, Expand);
00416       }
00417 
00418       // We promote all shuffles to v16i8.
00419       setOperationAction(ISD::VECTOR_SHUFFLE, VT, Promote);
00420       AddPromotedToType (ISD::VECTOR_SHUFFLE, VT, MVT::v16i8);
00421 
00422       // We promote all non-typed operations to v4i32.
00423       setOperationAction(ISD::AND   , VT, Promote);
00424       AddPromotedToType (ISD::AND   , VT, MVT::v4i32);
00425       setOperationAction(ISD::OR    , VT, Promote);
00426       AddPromotedToType (ISD::OR    , VT, MVT::v4i32);
00427       setOperationAction(ISD::XOR   , VT, Promote);
00428       AddPromotedToType (ISD::XOR   , VT, MVT::v4i32);
00429       setOperationAction(ISD::LOAD  , VT, Promote);
00430       AddPromotedToType (ISD::LOAD  , VT, MVT::v4i32);
00431       setOperationAction(ISD::SELECT, VT, Promote);
00432       AddPromotedToType (ISD::SELECT, VT, MVT::v4i32);
00433       setOperationAction(ISD::STORE, VT, Promote);
00434       AddPromotedToType (ISD::STORE, VT, MVT::v4i32);
00435 
00436       // No other operations are legal.
00437       setOperationAction(ISD::MUL , VT, Expand);
00438       setOperationAction(ISD::SDIV, VT, Expand);
00439       setOperationAction(ISD::SREM, VT, Expand);
00440       setOperationAction(ISD::UDIV, VT, Expand);
00441       setOperationAction(ISD::UREM, VT, Expand);
00442       setOperationAction(ISD::FDIV, VT, Expand);
00443       setOperationAction(ISD::FREM, VT, Expand);
00444       setOperationAction(ISD::FNEG, VT, Expand);
00445       setOperationAction(ISD::FSQRT, VT, Expand);
00446       setOperationAction(ISD::FLOG, VT, Expand);
00447       setOperationAction(ISD::FLOG10, VT, Expand);
00448       setOperationAction(ISD::FLOG2, VT, Expand);
00449       setOperationAction(ISD::FEXP, VT, Expand);
00450       setOperationAction(ISD::FEXP2, VT, Expand);
00451       setOperationAction(ISD::FSIN, VT, Expand);
00452       setOperationAction(ISD::FCOS, VT, Expand);
00453       setOperationAction(ISD::FABS, VT, Expand);
00454       setOperationAction(ISD::FPOWI, VT, Expand);
00455       setOperationAction(ISD::FFLOOR, VT, Expand);
00456       setOperationAction(ISD::FCEIL,  VT, Expand);
00457       setOperationAction(ISD::FTRUNC, VT, Expand);
00458       setOperationAction(ISD::FRINT,  VT, Expand);
00459       setOperationAction(ISD::FNEARBYINT, VT, Expand);
00460       setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Expand);
00461       setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Expand);
00462       setOperationAction(ISD::BUILD_VECTOR, VT, Expand);
00463       setOperationAction(ISD::MULHU, VT, Expand);
00464       setOperationAction(ISD::MULHS, VT, Expand);
00465       setOperationAction(ISD::UMUL_LOHI, VT, Expand);
00466       setOperationAction(ISD::SMUL_LOHI, VT, Expand);
00467       setOperationAction(ISD::UDIVREM, VT, Expand);
00468       setOperationAction(ISD::SDIVREM, VT, Expand);
00469       setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Expand);
00470       setOperationAction(ISD::FPOW, VT, Expand);
00471       setOperationAction(ISD::BSWAP, VT, Expand);
00472       setOperationAction(ISD::CTLZ_ZERO_UNDEF, VT, Expand);
00473       setOperationAction(ISD::CTTZ, VT, Expand);
00474       setOperationAction(ISD::CTTZ_ZERO_UNDEF, VT, Expand);
00475       setOperationAction(ISD::VSELECT, VT, Expand);
00476       setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand);
00477 
00478       for (MVT InnerVT : MVT::vector_valuetypes()) {
00479         setTruncStoreAction(VT, InnerVT, Expand);
00480         setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand);
00481         setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand);
00482         setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
00483       }
00484     }
00485 
00486     // We can custom expand all VECTOR_SHUFFLEs to VPERM, others we can handle
00487     // with merges, splats, etc.
00488     setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v16i8, Custom);
00489 
00490     setOperationAction(ISD::AND   , MVT::v4i32, Legal);
00491     setOperationAction(ISD::OR    , MVT::v4i32, Legal);
00492     setOperationAction(ISD::XOR   , MVT::v4i32, Legal);
00493     setOperationAction(ISD::LOAD  , MVT::v4i32, Legal);
00494     setOperationAction(ISD::SELECT, MVT::v4i32,
00495                        Subtarget.useCRBits() ? Legal : Expand);
00496     setOperationAction(ISD::STORE , MVT::v4i32, Legal);
00497     setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal);
00498     setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Legal);
00499     setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal);
00500     setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Legal);
00501     setOperationAction(ISD::FFLOOR, MVT::v4f32, Legal);
00502     setOperationAction(ISD::FCEIL, MVT::v4f32, Legal);
00503     setOperationAction(ISD::FTRUNC, MVT::v4f32, Legal);
00504     setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Legal);
00505 
00506     addRegisterClass(MVT::v4f32, &PPC::VRRCRegClass);
00507     addRegisterClass(MVT::v4i32, &PPC::VRRCRegClass);
00508     addRegisterClass(MVT::v8i16, &PPC::VRRCRegClass);
00509     addRegisterClass(MVT::v16i8, &PPC::VRRCRegClass);
00510 
00511     setOperationAction(ISD::MUL, MVT::v4f32, Legal);
00512     setOperationAction(ISD::FMA, MVT::v4f32, Legal);
00513 
00514     if (TM.Options.UnsafeFPMath || Subtarget.hasVSX()) {
00515       setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
00516       setOperationAction(ISD::FSQRT, MVT::v4f32, Legal);
00517     }
00518 
00519     
00520     if (Subtarget.hasP8Altivec()) 
00521       setOperationAction(ISD::MUL, MVT::v4i32, Legal);
00522     else
00523       setOperationAction(ISD::MUL, MVT::v4i32, Custom);
00524       
00525     setOperationAction(ISD::MUL, MVT::v8i16, Custom);
00526     setOperationAction(ISD::MUL, MVT::v16i8, Custom);
00527 
00528     setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom);
00529     setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i32, Custom);
00530 
00531     setOperationAction(ISD::BUILD_VECTOR, MVT::v16i8, Custom);
00532     setOperationAction(ISD::BUILD_VECTOR, MVT::v8i16, Custom);
00533     setOperationAction(ISD::BUILD_VECTOR, MVT::v4i32, Custom);
00534     setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom);
00535 
00536     // Altivec does not contain unordered floating-point compare instructions
00537     setCondCodeAction(ISD::SETUO, MVT::v4f32, Expand);
00538     setCondCodeAction(ISD::SETUEQ, MVT::v4f32, Expand);
00539     setCondCodeAction(ISD::SETO,   MVT::v4f32, Expand);
00540     setCondCodeAction(ISD::SETONE, MVT::v4f32, Expand);
00541 
00542     if (Subtarget.hasVSX()) {
00543       setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2f64, Legal);
00544       setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Legal);
00545 
00546       setOperationAction(ISD::FFLOOR, MVT::v2f64, Legal);
00547       setOperationAction(ISD::FCEIL, MVT::v2f64, Legal);
00548       setOperationAction(ISD::FTRUNC, MVT::v2f64, Legal);
00549       setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Legal);
00550       setOperationAction(ISD::FROUND, MVT::v2f64, Legal);
00551 
00552       setOperationAction(ISD::FROUND, MVT::v4f32, Legal);
00553 
00554       setOperationAction(ISD::MUL, MVT::v2f64, Legal);
00555       setOperationAction(ISD::FMA, MVT::v2f64, Legal);
00556 
00557       setOperationAction(ISD::FDIV, MVT::v2f64, Legal);
00558       setOperationAction(ISD::FSQRT, MVT::v2f64, Legal);
00559 
00560       setOperationAction(ISD::VSELECT, MVT::v16i8, Legal);
00561       setOperationAction(ISD::VSELECT, MVT::v8i16, Legal);
00562       setOperationAction(ISD::VSELECT, MVT::v4i32, Legal);
00563       setOperationAction(ISD::VSELECT, MVT::v4f32, Legal);
00564       setOperationAction(ISD::VSELECT, MVT::v2f64, Legal);
00565 
00566       // Share the Altivec comparison restrictions.
00567       setCondCodeAction(ISD::SETUO, MVT::v2f64, Expand);
00568       setCondCodeAction(ISD::SETUEQ, MVT::v2f64, Expand);
00569       setCondCodeAction(ISD::SETO,   MVT::v2f64, Expand);
00570       setCondCodeAction(ISD::SETONE, MVT::v2f64, Expand);
00571 
00572       setOperationAction(ISD::LOAD, MVT::v2f64, Legal);
00573       setOperationAction(ISD::STORE, MVT::v2f64, Legal);
00574 
00575       setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2f64, Legal);
00576 
00577       addRegisterClass(MVT::f64, &PPC::VSFRCRegClass);
00578 
00579       addRegisterClass(MVT::v4f32, &PPC::VSRCRegClass);
00580       addRegisterClass(MVT::v2f64, &PPC::VSRCRegClass);
00581 
00582       if (Subtarget.hasP8Altivec()) {
00583         setOperationAction(ISD::SHL, MVT::v2i64, Legal);
00584         setOperationAction(ISD::SRA, MVT::v2i64, Legal);
00585         setOperationAction(ISD::SRL, MVT::v2i64, Legal);
00586 
00587         setOperationAction(ISD::SETCC, MVT::v2i64, Legal);
00588       }
00589       else {
00590         setOperationAction(ISD::SHL, MVT::v2i64, Expand);
00591         setOperationAction(ISD::SRA, MVT::v2i64, Expand);
00592         setOperationAction(ISD::SRL, MVT::v2i64, Expand);
00593 
00594         setOperationAction(ISD::SETCC, MVT::v2i64, Custom);
00595 
00596         // VSX v2i64 only supports non-arithmetic operations.
00597         setOperationAction(ISD::ADD, MVT::v2i64, Expand);
00598         setOperationAction(ISD::SUB, MVT::v2i64, Expand);
00599       }
00600 
00601       setOperationAction(ISD::LOAD, MVT::v2i64, Promote);
00602       AddPromotedToType (ISD::LOAD, MVT::v2i64, MVT::v2f64);
00603       setOperationAction(ISD::STORE, MVT::v2i64, Promote);
00604       AddPromotedToType (ISD::STORE, MVT::v2i64, MVT::v2f64);
00605 
00606       setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i64, Legal);
00607 
00608       setOperationAction(ISD::SINT_TO_FP, MVT::v2i64, Legal);
00609       setOperationAction(ISD::UINT_TO_FP, MVT::v2i64, Legal);
00610       setOperationAction(ISD::FP_TO_SINT, MVT::v2i64, Legal);
00611       setOperationAction(ISD::FP_TO_UINT, MVT::v2i64, Legal);
00612 
00613       // Vector operation legalization checks the result type of
00614       // SIGN_EXTEND_INREG, overall legalization checks the inner type.
00615       setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i64, Legal);
00616       setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i32, Legal);
00617       setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i16, Custom);
00618       setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i8, Custom);
00619 
00620       addRegisterClass(MVT::v2i64, &PPC::VSRCRegClass);
00621     }
00622 
00623     if (Subtarget.hasP8Altivec()) 
00624       addRegisterClass(MVT::v2i64, &PPC::VRRCRegClass);
00625   }
00626 
00627   if (Subtarget.hasQPX()) {
00628     setOperationAction(ISD::FADD, MVT::v4f64, Legal);
00629     setOperationAction(ISD::FSUB, MVT::v4f64, Legal);
00630     setOperationAction(ISD::FMUL, MVT::v4f64, Legal);
00631     setOperationAction(ISD::FREM, MVT::v4f64, Expand);
00632 
00633     setOperationAction(ISD::FCOPYSIGN, MVT::v4f64, Legal);
00634     setOperationAction(ISD::FGETSIGN, MVT::v4f64, Expand);
00635 
00636     setOperationAction(ISD::LOAD  , MVT::v4f64, Custom);
00637     setOperationAction(ISD::STORE , MVT::v4f64, Custom);
00638 
00639     setTruncStoreAction(MVT::v4f64, MVT::v4f32, Custom);
00640     setLoadExtAction(ISD::EXTLOAD, MVT::v4f64, MVT::v4f32, Custom);
00641 
00642     if (!Subtarget.useCRBits())
00643       setOperationAction(ISD::SELECT, MVT::v4f64, Expand);
00644     setOperationAction(ISD::VSELECT, MVT::v4f64, Legal);
00645 
00646     setOperationAction(ISD::EXTRACT_VECTOR_ELT , MVT::v4f64, Legal);
00647     setOperationAction(ISD::INSERT_VECTOR_ELT , MVT::v4f64, Expand);
00648     setOperationAction(ISD::CONCAT_VECTORS , MVT::v4f64, Expand);
00649     setOperationAction(ISD::EXTRACT_SUBVECTOR , MVT::v4f64, Expand);
00650     setOperationAction(ISD::VECTOR_SHUFFLE , MVT::v4f64, Custom);
00651     setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f64, Legal);
00652     setOperationAction(ISD::BUILD_VECTOR, MVT::v4f64, Custom);
00653 
00654     setOperationAction(ISD::FP_TO_SINT , MVT::v4f64, Legal);
00655     setOperationAction(ISD::FP_TO_UINT , MVT::v4f64, Expand);
00656 
00657     setOperationAction(ISD::FP_ROUND , MVT::v4f32, Legal);
00658     setOperationAction(ISD::FP_ROUND_INREG , MVT::v4f32, Expand);
00659     setOperationAction(ISD::FP_EXTEND, MVT::v4f64, Legal);
00660 
00661     setOperationAction(ISD::FNEG , MVT::v4f64, Legal);
00662     setOperationAction(ISD::FABS , MVT::v4f64, Legal);
00663     setOperationAction(ISD::FSIN , MVT::v4f64, Expand);
00664     setOperationAction(ISD::FCOS , MVT::v4f64, Expand);
00665     setOperationAction(ISD::FPOWI , MVT::v4f64, Expand);
00666     setOperationAction(ISD::FPOW , MVT::v4f64, Expand);
00667     setOperationAction(ISD::FLOG , MVT::v4f64, Expand);
00668     setOperationAction(ISD::FLOG2 , MVT::v4f64, Expand);
00669     setOperationAction(ISD::FLOG10 , MVT::v4f64, Expand);
00670     setOperationAction(ISD::FEXP , MVT::v4f64, Expand);
00671     setOperationAction(ISD::FEXP2 , MVT::v4f64, Expand);
00672 
00673     setOperationAction(ISD::FMINNUM, MVT::v4f64, Legal);
00674     setOperationAction(ISD::FMAXNUM, MVT::v4f64, Legal);
00675 
00676     setIndexedLoadAction(ISD::PRE_INC, MVT::v4f64, Legal);
00677     setIndexedStoreAction(ISD::PRE_INC, MVT::v4f64, Legal);
00678 
00679     addRegisterClass(MVT::v4f64, &PPC::QFRCRegClass);
00680 
00681     setOperationAction(ISD::FADD, MVT::v4f32, Legal);
00682     setOperationAction(ISD::FSUB, MVT::v4f32, Legal);
00683     setOperationAction(ISD::FMUL, MVT::v4f32, Legal);
00684     setOperationAction(ISD::FREM, MVT::v4f32, Expand);
00685 
00686     setOperationAction(ISD::FCOPYSIGN, MVT::v4f32, Legal);
00687     setOperationAction(ISD::FGETSIGN, MVT::v4f32, Expand);
00688 
00689     setOperationAction(ISD::LOAD  , MVT::v4f32, Custom);
00690     setOperationAction(ISD::STORE , MVT::v4f32, Custom);
00691 
00692     if (!Subtarget.useCRBits())
00693       setOperationAction(ISD::SELECT, MVT::v4f32, Expand);
00694     setOperationAction(ISD::VSELECT, MVT::v4f32, Legal);
00695 
00696     setOperationAction(ISD::EXTRACT_VECTOR_ELT , MVT::v4f32, Legal);
00697     setOperationAction(ISD::INSERT_VECTOR_ELT , MVT::v4f32, Expand);
00698     setOperationAction(ISD::CONCAT_VECTORS , MVT::v4f32, Expand);
00699     setOperationAction(ISD::EXTRACT_SUBVECTOR , MVT::v4f32, Expand);
00700     setOperationAction(ISD::VECTOR_SHUFFLE , MVT::v4f32, Custom);
00701     setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Legal);
00702     setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom);
00703 
00704     setOperationAction(ISD::FP_TO_SINT , MVT::v4f32, Legal);
00705     setOperationAction(ISD::FP_TO_UINT , MVT::v4f32, Expand);
00706 
00707     setOperationAction(ISD::FNEG , MVT::v4f32, Legal);
00708     setOperationAction(ISD::FABS , MVT::v4f32, Legal);
00709     setOperationAction(ISD::FSIN , MVT::v4f32, Expand);
00710     setOperationAction(ISD::FCOS , MVT::v4f32, Expand);
00711     setOperationAction(ISD::FPOWI , MVT::v4f32, Expand);
00712     setOperationAction(ISD::FPOW , MVT::v4f32, Expand);
00713     setOperationAction(ISD::FLOG , MVT::v4f32, Expand);
00714     setOperationAction(ISD::FLOG2 , MVT::v4f32, Expand);
00715     setOperationAction(ISD::FLOG10 , MVT::v4f32, Expand);
00716     setOperationAction(ISD::FEXP , MVT::v4f32, Expand);
00717     setOperationAction(ISD::FEXP2 , MVT::v4f32, Expand);
00718 
00719     setOperationAction(ISD::FMINNUM, MVT::v4f32, Legal);
00720     setOperationAction(ISD::FMAXNUM, MVT::v4f32, Legal);
00721 
00722     setIndexedLoadAction(ISD::PRE_INC, MVT::v4f32, Legal);
00723     setIndexedStoreAction(ISD::PRE_INC, MVT::v4f32, Legal);
00724 
00725     addRegisterClass(MVT::v4f32, &PPC::QSRCRegClass);
00726 
00727     setOperationAction(ISD::AND , MVT::v4i1, Legal);
00728     setOperationAction(ISD::OR , MVT::v4i1, Legal);
00729     setOperationAction(ISD::XOR , MVT::v4i1, Legal);
00730 
00731     if (!Subtarget.useCRBits())
00732       setOperationAction(ISD::SELECT, MVT::v4i1, Expand);
00733     setOperationAction(ISD::VSELECT, MVT::v4i1, Legal);
00734 
00735     setOperationAction(ISD::LOAD  , MVT::v4i1, Custom);
00736     setOperationAction(ISD::STORE , MVT::v4i1, Custom);
00737 
00738     setOperationAction(ISD::EXTRACT_VECTOR_ELT , MVT::v4i1, Custom);
00739     setOperationAction(ISD::INSERT_VECTOR_ELT , MVT::v4i1, Expand);
00740     setOperationAction(ISD::CONCAT_VECTORS , MVT::v4i1, Expand);
00741     setOperationAction(ISD::EXTRACT_SUBVECTOR , MVT::v4i1, Expand);
00742     setOperationAction(ISD::VECTOR_SHUFFLE , MVT::v4i1, Custom);
00743     setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i1, Expand);
00744     setOperationAction(ISD::BUILD_VECTOR, MVT::v4i1, Custom);
00745 
00746     setOperationAction(ISD::SINT_TO_FP, MVT::v4i1, Custom);
00747     setOperationAction(ISD::UINT_TO_FP, MVT::v4i1, Custom);
00748 
00749     addRegisterClass(MVT::v4i1, &PPC::QBRCRegClass);
00750 
00751     setOperationAction(ISD::FFLOOR, MVT::v4f64, Legal);
00752     setOperationAction(ISD::FCEIL,  MVT::v4f64, Legal);
00753     setOperationAction(ISD::FTRUNC, MVT::v4f64, Legal);
00754     setOperationAction(ISD::FROUND, MVT::v4f64, Legal);
00755 
00756     setOperationAction(ISD::FFLOOR, MVT::v4f32, Legal);
00757     setOperationAction(ISD::FCEIL,  MVT::v4f32, Legal);
00758     setOperationAction(ISD::FTRUNC, MVT::v4f32, Legal);
00759     setOperationAction(ISD::FROUND, MVT::v4f32, Legal);
00760 
00761     setOperationAction(ISD::FNEARBYINT, MVT::v4f64, Expand);
00762     setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Expand);
00763 
00764     // These need to set FE_INEXACT, and so cannot be vectorized here.
00765     setOperationAction(ISD::FRINT, MVT::v4f64, Expand);
00766     setOperationAction(ISD::FRINT, MVT::v4f32, Expand);
00767 
00768     if (TM.Options.UnsafeFPMath) {
00769       setOperationAction(ISD::FDIV, MVT::v4f64, Legal);
00770       setOperationAction(ISD::FSQRT, MVT::v4f64, Legal);
00771 
00772       setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
00773       setOperationAction(ISD::FSQRT, MVT::v4f32, Legal);
00774     } else {
00775       setOperationAction(ISD::FDIV, MVT::v4f64, Expand);
00776       setOperationAction(ISD::FSQRT, MVT::v4f64, Expand);
00777 
00778       setOperationAction(ISD::FDIV, MVT::v4f32, Expand);
00779       setOperationAction(ISD::FSQRT, MVT::v4f32, Expand);
00780     }
00781   }
00782 
00783   if (Subtarget.has64BitSupport())
00784     setOperationAction(ISD::PREFETCH, MVT::Other, Legal);
00785 
00786   setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, isPPC64 ? Legal : Custom);
00787 
00788   if (!isPPC64) {
00789     setOperationAction(ISD::ATOMIC_LOAD,  MVT::i64, Expand);
00790     setOperationAction(ISD::ATOMIC_STORE, MVT::i64, Expand);
00791   }
00792 
00793   setBooleanContents(ZeroOrOneBooleanContent);
00794 
00795   if (Subtarget.hasAltivec()) {
00796     // Altivec instructions set fields to all zeros or all ones.
00797     setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
00798   }
00799 
00800   if (!isPPC64) {
00801     // These libcalls are not available in 32-bit.
00802     setLibcallName(RTLIB::SHL_I128, nullptr);
00803     setLibcallName(RTLIB::SRL_I128, nullptr);
00804     setLibcallName(RTLIB::SRA_I128, nullptr);
00805   }
00806 
00807   if (isPPC64) {
00808     setStackPointerRegisterToSaveRestore(PPC::X1);
00809     setExceptionPointerRegister(PPC::X3);
00810     setExceptionSelectorRegister(PPC::X4);
00811   } else {
00812     setStackPointerRegisterToSaveRestore(PPC::R1);
00813     setExceptionPointerRegister(PPC::R3);
00814     setExceptionSelectorRegister(PPC::R4);
00815   }
00816 
00817   // We have target-specific dag combine patterns for the following nodes:
00818   setTargetDAGCombine(ISD::SINT_TO_FP);
00819   if (Subtarget.hasFPCVT())
00820     setTargetDAGCombine(ISD::UINT_TO_FP);
00821   setTargetDAGCombine(ISD::LOAD);
00822   setTargetDAGCombine(ISD::STORE);
00823   setTargetDAGCombine(ISD::BR_CC);
00824   if (Subtarget.useCRBits())
00825     setTargetDAGCombine(ISD::BRCOND);
00826   setTargetDAGCombine(ISD::BSWAP);
00827   setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN);
00828   setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN);
00829   setTargetDAGCombine(ISD::INTRINSIC_VOID);
00830 
00831   setTargetDAGCombine(ISD::SIGN_EXTEND);
00832   setTargetDAGCombine(ISD::ZERO_EXTEND);
00833   setTargetDAGCombine(ISD::ANY_EXTEND);
00834 
00835   if (Subtarget.useCRBits()) {
00836     setTargetDAGCombine(ISD::TRUNCATE);
00837     setTargetDAGCombine(ISD::SETCC);
00838     setTargetDAGCombine(ISD::SELECT_CC);
00839   }
00840 
00841   // Use reciprocal estimates.
00842   if (TM.Options.UnsafeFPMath) {
00843     setTargetDAGCombine(ISD::FDIV);
00844     setTargetDAGCombine(ISD::FSQRT);
00845   }
00846 
00847   // Darwin long double math library functions have $LDBL128 appended.
00848   if (Subtarget.isDarwin()) {
00849     setLibcallName(RTLIB::COS_PPCF128, "cosl$LDBL128");
00850     setLibcallName(RTLIB::POW_PPCF128, "powl$LDBL128");
00851     setLibcallName(RTLIB::REM_PPCF128, "fmodl$LDBL128");
00852     setLibcallName(RTLIB::SIN_PPCF128, "sinl$LDBL128");
00853     setLibcallName(RTLIB::SQRT_PPCF128, "sqrtl$LDBL128");
00854     setLibcallName(RTLIB::LOG_PPCF128, "logl$LDBL128");
00855     setLibcallName(RTLIB::LOG2_PPCF128, "log2l$LDBL128");
00856     setLibcallName(RTLIB::LOG10_PPCF128, "log10l$LDBL128");
00857     setLibcallName(RTLIB::EXP_PPCF128, "expl$LDBL128");
00858     setLibcallName(RTLIB::EXP2_PPCF128, "exp2l$LDBL128");
00859   }
00860 
00861   // With 32 condition bits, we don't need to sink (and duplicate) compares
00862   // aggressively in CodeGenPrep.
00863   if (Subtarget.useCRBits()) {
00864     setHasMultipleConditionRegisters();
00865     setJumpIsExpensive();
00866   }
00867 
00868   setMinFunctionAlignment(2);
00869   if (Subtarget.isDarwin())
00870     setPrefFunctionAlignment(4);
00871 
00872   switch (Subtarget.getDarwinDirective()) {
00873   default: break;
00874   case PPC::DIR_970:
00875   case PPC::DIR_A2:
00876   case PPC::DIR_E500mc:
00877   case PPC::DIR_E5500:
00878   case PPC::DIR_PWR4:
00879   case PPC::DIR_PWR5:
00880   case PPC::DIR_PWR5X:
00881   case PPC::DIR_PWR6:
00882   case PPC::DIR_PWR6X:
00883   case PPC::DIR_PWR7:
00884   case PPC::DIR_PWR8:
00885     setPrefFunctionAlignment(4);
00886     setPrefLoopAlignment(4);
00887     break;
00888   }
00889 
00890   setInsertFencesForAtomic(true);
00891 
00892   if (Subtarget.enableMachineScheduler())
00893     setSchedulingPreference(Sched::Source);
00894   else
00895     setSchedulingPreference(Sched::Hybrid);
00896 
00897   computeRegisterProperties(STI.getRegisterInfo());
00898 
00899   // The Freescale cores do better with aggressive inlining of memcpy and
00900   // friends. GCC uses same threshold of 128 bytes (= 32 word stores).
00901   if (Subtarget.getDarwinDirective() == PPC::DIR_E500mc ||
00902       Subtarget.getDarwinDirective() == PPC::DIR_E5500) {
00903     MaxStoresPerMemset = 32;
00904     MaxStoresPerMemsetOptSize = 16;
00905     MaxStoresPerMemcpy = 32;
00906     MaxStoresPerMemcpyOptSize = 8;
00907     MaxStoresPerMemmove = 32;
00908     MaxStoresPerMemmoveOptSize = 8;
00909   } else if (Subtarget.getDarwinDirective() == PPC::DIR_A2) {
00910     // The A2 also benefits from (very) aggressive inlining of memcpy and
00911     // friends. The overhead of a the function call, even when warm, can be
00912     // over one hundred cycles.
00913     MaxStoresPerMemset = 128;
00914     MaxStoresPerMemcpy = 128;
00915     MaxStoresPerMemmove = 128;
00916   }
00917 }
00918 
00919 /// getMaxByValAlign - Helper for getByValTypeAlignment to determine
00920 /// the desired ByVal argument alignment.
00921 static void getMaxByValAlign(Type *Ty, unsigned &MaxAlign,
00922                              unsigned MaxMaxAlign) {
00923   if (MaxAlign == MaxMaxAlign)
00924     return;
00925   if (VectorType *VTy = dyn_cast<VectorType>(Ty)) {
00926     if (MaxMaxAlign >= 32 && VTy->getBitWidth() >= 256)
00927       MaxAlign = 32;
00928     else if (VTy->getBitWidth() >= 128 && MaxAlign < 16)
00929       MaxAlign = 16;
00930   } else if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
00931     unsigned EltAlign = 0;
00932     getMaxByValAlign(ATy->getElementType(), EltAlign, MaxMaxAlign);
00933     if (EltAlign > MaxAlign)
00934       MaxAlign = EltAlign;
00935   } else if (StructType *STy = dyn_cast<StructType>(Ty)) {
00936     for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
00937       unsigned EltAlign = 0;
00938       getMaxByValAlign(STy->getElementType(i), EltAlign, MaxMaxAlign);
00939       if (EltAlign > MaxAlign)
00940         MaxAlign = EltAlign;
00941       if (MaxAlign == MaxMaxAlign)
00942         break;
00943     }
00944   }
00945 }
00946 
00947 /// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
00948 /// function arguments in the caller parameter area.
00949 unsigned PPCTargetLowering::getByValTypeAlignment(Type *Ty) const {
00950   // Darwin passes everything on 4 byte boundary.
00951   if (Subtarget.isDarwin())
00952     return 4;
00953 
00954   // 16byte and wider vectors are passed on 16byte boundary.
00955   // The rest is 8 on PPC64 and 4 on PPC32 boundary.
00956   unsigned Align = Subtarget.isPPC64() ? 8 : 4;
00957   if (Subtarget.hasAltivec() || Subtarget.hasQPX())
00958     getMaxByValAlign(Ty, Align, Subtarget.hasQPX() ? 32 : 16);
00959   return Align;
00960 }
00961 
00962 const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
00963   switch (Opcode) {
00964   default: return nullptr;
00965   case PPCISD::FSEL:            return "PPCISD::FSEL";
00966   case PPCISD::FCFID:           return "PPCISD::FCFID";
00967   case PPCISD::FCFIDU:          return "PPCISD::FCFIDU";
00968   case PPCISD::FCFIDS:          return "PPCISD::FCFIDS";
00969   case PPCISD::FCFIDUS:         return "PPCISD::FCFIDUS";
00970   case PPCISD::FCTIDZ:          return "PPCISD::FCTIDZ";
00971   case PPCISD::FCTIWZ:          return "PPCISD::FCTIWZ";
00972   case PPCISD::FCTIDUZ:         return "PPCISD::FCTIDUZ";
00973   case PPCISD::FCTIWUZ:         return "PPCISD::FCTIWUZ";
00974   case PPCISD::FRE:             return "PPCISD::FRE";
00975   case PPCISD::FRSQRTE:         return "PPCISD::FRSQRTE";
00976   case PPCISD::STFIWX:          return "PPCISD::STFIWX";
00977   case PPCISD::VMADDFP:         return "PPCISD::VMADDFP";
00978   case PPCISD::VNMSUBFP:        return "PPCISD::VNMSUBFP";
00979   case PPCISD::VPERM:           return "PPCISD::VPERM";
00980   case PPCISD::CMPB:            return "PPCISD::CMPB";
00981   case PPCISD::Hi:              return "PPCISD::Hi";
00982   case PPCISD::Lo:              return "PPCISD::Lo";
00983   case PPCISD::TOC_ENTRY:       return "PPCISD::TOC_ENTRY";
00984   case PPCISD::DYNALLOC:        return "PPCISD::DYNALLOC";
00985   case PPCISD::GlobalBaseReg:   return "PPCISD::GlobalBaseReg";
00986   case PPCISD::SRL:             return "PPCISD::SRL";
00987   case PPCISD::SRA:             return "PPCISD::SRA";
00988   case PPCISD::SHL:             return "PPCISD::SHL";
00989   case PPCISD::CALL:            return "PPCISD::CALL";
00990   case PPCISD::CALL_NOP:        return "PPCISD::CALL_NOP";
00991   case PPCISD::MTCTR:           return "PPCISD::MTCTR";
00992   case PPCISD::BCTRL:           return "PPCISD::BCTRL";
00993   case PPCISD::BCTRL_LOAD_TOC:  return "PPCISD::BCTRL_LOAD_TOC";
00994   case PPCISD::RET_FLAG:        return "PPCISD::RET_FLAG";
00995   case PPCISD::READ_TIME_BASE:  return "PPCISD::READ_TIME_BASE";
00996   case PPCISD::EH_SJLJ_SETJMP:  return "PPCISD::EH_SJLJ_SETJMP";
00997   case PPCISD::EH_SJLJ_LONGJMP: return "PPCISD::EH_SJLJ_LONGJMP";
00998   case PPCISD::MFOCRF:          return "PPCISD::MFOCRF";
00999   case PPCISD::MFVSR:           return "PPCISD::MFVSR";
01000   case PPCISD::MTVSRA:          return "PPCISD::MTVSRA";
01001   case PPCISD::MTVSRZ:          return "PPCISD::MTVSRZ";
01002   case PPCISD::VCMP:            return "PPCISD::VCMP";
01003   case PPCISD::VCMPo:           return "PPCISD::VCMPo";
01004   case PPCISD::LBRX:            return "PPCISD::LBRX";
01005   case PPCISD::STBRX:           return "PPCISD::STBRX";
01006   case PPCISD::LFIWAX:          return "PPCISD::LFIWAX";
01007   case PPCISD::LFIWZX:          return "PPCISD::LFIWZX";
01008   case PPCISD::COND_BRANCH:     return "PPCISD::COND_BRANCH";
01009   case PPCISD::BDNZ:            return "PPCISD::BDNZ";
01010   case PPCISD::BDZ:             return "PPCISD::BDZ";
01011   case PPCISD::MFFS:            return "PPCISD::MFFS";
01012   case PPCISD::FADDRTZ:         return "PPCISD::FADDRTZ";
01013   case PPCISD::TC_RETURN:       return "PPCISD::TC_RETURN";
01014   case PPCISD::CR6SET:          return "PPCISD::CR6SET";
01015   case PPCISD::CR6UNSET:        return "PPCISD::CR6UNSET";
01016   case PPCISD::PPC32_GOT:       return "PPCISD::PPC32_GOT";
01017   case PPCISD::ADDIS_GOT_TPREL_HA: return "PPCISD::ADDIS_GOT_TPREL_HA";
01018   case PPCISD::LD_GOT_TPREL_L:  return "PPCISD::LD_GOT_TPREL_L";
01019   case PPCISD::ADD_TLS:         return "PPCISD::ADD_TLS";
01020   case PPCISD::ADDIS_TLSGD_HA:  return "PPCISD::ADDIS_TLSGD_HA";
01021   case PPCISD::ADDI_TLSGD_L:    return "PPCISD::ADDI_TLSGD_L";
01022   case PPCISD::GET_TLS_ADDR:    return "PPCISD::GET_TLS_ADDR";
01023   case PPCISD::ADDI_TLSGD_L_ADDR: return "PPCISD::ADDI_TLSGD_L_ADDR";
01024   case PPCISD::ADDIS_TLSLD_HA:  return "PPCISD::ADDIS_TLSLD_HA";
01025   case PPCISD::ADDI_TLSLD_L:    return "PPCISD::ADDI_TLSLD_L";
01026   case PPCISD::GET_TLSLD_ADDR:  return "PPCISD::GET_TLSLD_ADDR";
01027   case PPCISD::ADDI_TLSLD_L_ADDR: return "PPCISD::ADDI_TLSLD_L_ADDR";
01028   case PPCISD::ADDIS_DTPREL_HA: return "PPCISD::ADDIS_DTPREL_HA";
01029   case PPCISD::ADDI_DTPREL_L:   return "PPCISD::ADDI_DTPREL_L";
01030   case PPCISD::VADD_SPLAT:      return "PPCISD::VADD_SPLAT";
01031   case PPCISD::SC:              return "PPCISD::SC";
01032   case PPCISD::QVFPERM:         return "PPCISD::QVFPERM";
01033   case PPCISD::QVGPCI:          return "PPCISD::QVGPCI";
01034   case PPCISD::QVALIGNI:        return "PPCISD::QVALIGNI";
01035   case PPCISD::QVESPLATI:       return "PPCISD::QVESPLATI";
01036   case PPCISD::QBFLT:           return "PPCISD::QBFLT";
01037   case PPCISD::QVLFSb:          return "PPCISD::QVLFSb";
01038   }
01039 }
01040 
01041 EVT PPCTargetLowering::getSetCCResultType(LLVMContext &C, EVT VT) const {
01042   if (!VT.isVector())
01043     return Subtarget.useCRBits() ? MVT::i1 : MVT::i32;
01044 
01045   if (Subtarget.hasQPX())
01046     return EVT::getVectorVT(C, MVT::i1, VT.getVectorNumElements());
01047 
01048   return VT.changeVectorElementTypeToInteger();
01049 }
01050 
01051 bool PPCTargetLowering::enableAggressiveFMAFusion(EVT VT) const {
01052   assert(VT.isFloatingPoint() && "Non-floating-point FMA?");
01053   return true;
01054 }
01055 
01056 //===----------------------------------------------------------------------===//
01057 // Node matching predicates, for use by the tblgen matching code.
01058 //===----------------------------------------------------------------------===//
01059 
01060 /// isFloatingPointZero - Return true if this is 0.0 or -0.0.
01061 static bool isFloatingPointZero(SDValue Op) {
01062   if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op))
01063     return CFP->getValueAPF().isZero();
01064   else if (ISD::isEXTLoad(Op.getNode()) || ISD::isNON_EXTLoad(Op.getNode())) {
01065     // Maybe this has already been legalized into the constant pool?
01066     if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Op.getOperand(1)))
01067       if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal()))
01068         return CFP->getValueAPF().isZero();
01069   }
01070   return false;
01071 }
01072 
01073 /// isConstantOrUndef - Op is either an undef node or a ConstantSDNode.  Return
01074 /// true if Op is undef or if it matches the specified value.
01075 static bool isConstantOrUndef(int Op, int Val) {
01076   return Op < 0 || Op == Val;
01077 }
01078 
01079 /// isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a
01080 /// VPKUHUM instruction.
01081 /// The ShuffleKind distinguishes between big-endian operations with
01082 /// two different inputs (0), either-endian operations with two identical
01083 /// inputs (1), and little-endian operantion with two different inputs (2).
01084 /// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
01085 bool PPC::isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind,
01086                                SelectionDAG &DAG) {
01087   bool IsLE = DAG.getTarget().getDataLayout()->isLittleEndian();
01088   if (ShuffleKind == 0) {
01089     if (IsLE)
01090       return false;
01091     for (unsigned i = 0; i != 16; ++i)
01092       if (!isConstantOrUndef(N->getMaskElt(i), i*2+1))
01093         return false;
01094   } else if (ShuffleKind == 2) {
01095     if (!IsLE)
01096       return false;
01097     for (unsigned i = 0; i != 16; ++i)
01098       if (!isConstantOrUndef(N->getMaskElt(i), i*2))
01099         return false;
01100   } else if (ShuffleKind == 1) {
01101     unsigned j = IsLE ? 0 : 1;
01102     for (unsigned i = 0; i != 8; ++i)
01103       if (!isConstantOrUndef(N->getMaskElt(i),    i*2+j) ||
01104           !isConstantOrUndef(N->getMaskElt(i+8),  i*2+j))
01105         return false;
01106   }
01107   return true;
01108 }
01109 
01110 /// isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a
01111 /// VPKUWUM instruction.
01112 /// The ShuffleKind distinguishes between big-endian operations with
01113 /// two different inputs (0), either-endian operations with two identical
01114 /// inputs (1), and little-endian operantion with two different inputs (2).
01115 /// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
01116 bool PPC::isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind,
01117                                SelectionDAG &DAG) {
01118   bool IsLE = DAG.getTarget().getDataLayout()->isLittleEndian();
01119   if (ShuffleKind == 0) {
01120     if (IsLE)
01121       return false;
01122     for (unsigned i = 0; i != 16; i += 2)
01123       if (!isConstantOrUndef(N->getMaskElt(i  ),  i*2+2) ||
01124           !isConstantOrUndef(N->getMaskElt(i+1),  i*2+3))
01125         return false;
01126   } else if (ShuffleKind == 2) {
01127     if (!IsLE)
01128       return false;
01129     for (unsigned i = 0; i != 16; i += 2)
01130       if (!isConstantOrUndef(N->getMaskElt(i  ),  i*2) ||
01131           !isConstantOrUndef(N->getMaskElt(i+1),  i*2+1))
01132         return false;
01133   } else if (ShuffleKind == 1) {
01134     unsigned j = IsLE ? 0 : 2;
01135     for (unsigned i = 0; i != 8; i += 2)
01136       if (!isConstantOrUndef(N->getMaskElt(i  ),  i*2+j)   ||
01137           !isConstantOrUndef(N->getMaskElt(i+1),  i*2+j+1) ||
01138           !isConstantOrUndef(N->getMaskElt(i+8),  i*2+j)   ||
01139           !isConstantOrUndef(N->getMaskElt(i+9),  i*2+j+1))
01140         return false;
01141   }
01142   return true;
01143 }
01144 
01145 /// isVMerge - Common function, used to match vmrg* shuffles.
01146 ///
01147 static bool isVMerge(ShuffleVectorSDNode *N, unsigned UnitSize,
01148                      unsigned LHSStart, unsigned RHSStart) {
01149   if (N->getValueType(0) != MVT::v16i8)
01150     return false;
01151   assert((UnitSize == 1 || UnitSize == 2 || UnitSize == 4) &&
01152          "Unsupported merge size!");
01153 
01154   for (unsigned i = 0; i != 8/UnitSize; ++i)     // Step over units
01155     for (unsigned j = 0; j != UnitSize; ++j) {   // Step over bytes within unit
01156       if (!isConstantOrUndef(N->getMaskElt(i*UnitSize*2+j),
01157                              LHSStart+j+i*UnitSize) ||
01158           !isConstantOrUndef(N->getMaskElt(i*UnitSize*2+UnitSize+j),
01159                              RHSStart+j+i*UnitSize))
01160         return false;
01161     }
01162   return true;
01163 }
01164 
01165 /// isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for
01166 /// a VMRGL* instruction with the specified unit size (1,2 or 4 bytes).
01167 /// The ShuffleKind distinguishes between big-endian merges with two 
01168 /// different inputs (0), either-endian merges with two identical inputs (1),
01169 /// and little-endian merges with two different inputs (2).  For the latter,
01170 /// the input operands are swapped (see PPCInstrAltivec.td).
01171 bool PPC::isVMRGLShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,
01172                              unsigned ShuffleKind, SelectionDAG &DAG) {
01173   if (DAG.getTarget().getDataLayout()->isLittleEndian()) {
01174     if (ShuffleKind == 1) // unary
01175       return isVMerge(N, UnitSize, 0, 0);
01176     else if (ShuffleKind == 2) // swapped
01177       return isVMerge(N, UnitSize, 0, 16);
01178     else
01179       return false;
01180   } else {
01181     if (ShuffleKind == 1) // unary
01182       return isVMerge(N, UnitSize, 8, 8);
01183     else if (ShuffleKind == 0) // normal
01184       return isVMerge(N, UnitSize, 8, 24);
01185     else
01186       return false;
01187   }
01188 }
01189 
01190 /// isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for
01191 /// a VMRGH* instruction with the specified unit size (1,2 or 4 bytes).
01192 /// The ShuffleKind distinguishes between big-endian merges with two 
01193 /// different inputs (0), either-endian merges with two identical inputs (1),
01194 /// and little-endian merges with two different inputs (2).  For the latter,
01195 /// the input operands are swapped (see PPCInstrAltivec.td).
01196 bool PPC::isVMRGHShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,
01197                              unsigned ShuffleKind, SelectionDAG &DAG) {
01198   if (DAG.getTarget().getDataLayout()->isLittleEndian()) {
01199     if (ShuffleKind == 1) // unary
01200       return isVMerge(N, UnitSize, 8, 8);
01201     else if (ShuffleKind == 2) // swapped
01202       return isVMerge(N, UnitSize, 8, 24);
01203     else
01204       return false;
01205   } else {
01206     if (ShuffleKind == 1) // unary
01207       return isVMerge(N, UnitSize, 0, 0);
01208     else if (ShuffleKind == 0) // normal
01209       return isVMerge(N, UnitSize, 0, 16);
01210     else
01211       return false;
01212   }
01213 }
01214 
01215 
01216 /// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift
01217 /// amount, otherwise return -1.
01218 /// The ShuffleKind distinguishes between big-endian operations with two 
01219 /// different inputs (0), either-endian operations with two identical inputs
01220 /// (1), and little-endian operations with two different inputs (2).  For the
01221 /// latter, the input operands are swapped (see PPCInstrAltivec.td).
01222 int PPC::isVSLDOIShuffleMask(SDNode *N, unsigned ShuffleKind,
01223                              SelectionDAG &DAG) {
01224   if (N->getValueType(0) != MVT::v16i8)
01225     return -1;
01226 
01227   ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
01228 
01229   // Find the first non-undef value in the shuffle mask.
01230   unsigned i;
01231   for (i = 0; i != 16 && SVOp->getMaskElt(i) < 0; ++i)
01232     /*search*/;
01233 
01234   if (i == 16) return -1;  // all undef.
01235 
01236   // Otherwise, check to see if the rest of the elements are consecutively
01237   // numbered from this value.
01238   unsigned ShiftAmt = SVOp->getMaskElt(i);
01239   if (ShiftAmt < i) return -1;
01240 
01241   ShiftAmt -= i;
01242   bool isLE = DAG.getTarget().getDataLayout()->isLittleEndian();
01243 
01244   if ((ShuffleKind == 0 && !isLE) || (ShuffleKind == 2 && isLE)) {
01245     // Check the rest of the elements to see if they are consecutive.
01246     for (++i; i != 16; ++i)
01247       if (!isConstantOrUndef(SVOp->getMaskElt(i), ShiftAmt+i))
01248         return -1;
01249   } else if (ShuffleKind == 1) {
01250     // Check the rest of the elements to see if they are consecutive.
01251     for (++i; i != 16; ++i)
01252       if (!isConstantOrUndef(SVOp->getMaskElt(i), (ShiftAmt+i) & 15))
01253         return -1;
01254   } else
01255     return -1;
01256 
01257   if (ShuffleKind == 2 && isLE)
01258     ShiftAmt = 16 - ShiftAmt;
01259 
01260   return ShiftAmt;
01261 }
01262 
01263 /// isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand
01264 /// specifies a splat of a single element that is suitable for input to
01265 /// VSPLTB/VSPLTH/VSPLTW.
01266 bool PPC::isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize) {
01267   assert(N->getValueType(0) == MVT::v16i8 &&
01268          (EltSize == 1 || EltSize == 2 || EltSize == 4));
01269 
01270   // This is a splat operation if each element of the permute is the same, and
01271   // if the value doesn't reference the second vector.
01272   unsigned ElementBase = N->getMaskElt(0);
01273 
01274   // FIXME: Handle UNDEF elements too!
01275   if (ElementBase >= 16)
01276     return false;
01277 
01278   // Check that the indices are consecutive, in the case of a multi-byte element
01279   // splatted with a v16i8 mask.
01280   for (unsigned i = 1; i != EltSize; ++i)
01281     if (N->getMaskElt(i) < 0 || N->getMaskElt(i) != (int)(i+ElementBase))
01282       return false;
01283 
01284   for (unsigned i = EltSize, e = 16; i != e; i += EltSize) {
01285     if (N->getMaskElt(i) < 0) continue;
01286     for (unsigned j = 0; j != EltSize; ++j)
01287       if (N->getMaskElt(i+j) != N->getMaskElt(j))
01288         return false;
01289   }
01290   return true;
01291 }
01292 
01293 /// getVSPLTImmediate - Return the appropriate VSPLT* immediate to splat the
01294 /// specified isSplatShuffleMask VECTOR_SHUFFLE mask.
01295 unsigned PPC::getVSPLTImmediate(SDNode *N, unsigned EltSize,
01296                                 SelectionDAG &DAG) {
01297   ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
01298   assert(isSplatShuffleMask(SVOp, EltSize));
01299   if (DAG.getTarget().getDataLayout()->isLittleEndian())
01300     return (16 / EltSize) - 1 - (SVOp->getMaskElt(0) / EltSize);
01301   else
01302     return SVOp->getMaskElt(0) / EltSize;
01303 }
01304 
01305 /// get_VSPLTI_elt - If this is a build_vector of constants which can be formed
01306 /// by using a vspltis[bhw] instruction of the specified element size, return
01307 /// the constant being splatted.  The ByteSize field indicates the number of
01308 /// bytes of each element [124] -> [bhw].
01309 SDValue PPC::get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG) {
01310   SDValue OpVal(nullptr, 0);
01311 
01312   // If ByteSize of the splat is bigger than the element size of the
01313   // build_vector, then we have a case where we are checking for a splat where
01314   // multiple elements of the buildvector are folded together into a single
01315   // logical element of the splat (e.g. "vsplish 1" to splat {0,1}*8).
01316   unsigned EltSize = 16/N->getNumOperands();
01317   if (EltSize < ByteSize) {
01318     unsigned Multiple = ByteSize/EltSize;   // Number of BV entries per spltval.
01319     SDValue UniquedVals[4];
01320     assert(Multiple > 1 && Multiple <= 4 && "How can this happen?");
01321 
01322     // See if all of the elements in the buildvector agree across.
01323     for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
01324       if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
01325       // If the element isn't a constant, bail fully out.
01326       if (!isa<ConstantSDNode>(N->getOperand(i))) return SDValue();
01327 
01328 
01329       if (!UniquedVals[i&(Multiple-1)].getNode())
01330         UniquedVals[i&(Multiple-1)] = N->getOperand(i);
01331       else if (UniquedVals[i&(Multiple-1)] != N->getOperand(i))
01332         return SDValue();  // no match.
01333     }
01334 
01335     // Okay, if we reached this point, UniquedVals[0..Multiple-1] contains
01336     // either constant or undef values that are identical for each chunk.  See
01337     // if these chunks can form into a larger vspltis*.
01338 
01339     // Check to see if all of the leading entries are either 0 or -1.  If
01340     // neither, then this won't fit into the immediate field.
01341     bool LeadingZero = true;
01342     bool LeadingOnes = true;
01343     for (unsigned i = 0; i != Multiple-1; ++i) {
01344       if (!UniquedVals[i].getNode()) continue;  // Must have been undefs.
01345 
01346       LeadingZero &= cast<ConstantSDNode>(UniquedVals[i])->isNullValue();
01347       LeadingOnes &= cast<ConstantSDNode>(UniquedVals[i])->isAllOnesValue();
01348     }
01349     // Finally, check the least significant entry.
01350     if (LeadingZero) {
01351       if (!UniquedVals[Multiple-1].getNode())
01352         return DAG.getTargetConstant(0, MVT::i32);  // 0,0,0,undef
01353       int Val = cast<ConstantSDNode>(UniquedVals[Multiple-1])->getZExtValue();
01354       if (Val < 16)
01355         return DAG.getTargetConstant(Val, MVT::i32);  // 0,0,0,4 -> vspltisw(4)
01356     }
01357     if (LeadingOnes) {
01358       if (!UniquedVals[Multiple-1].getNode())
01359         return DAG.getTargetConstant(~0U, MVT::i32);  // -1,-1,-1,undef
01360       int Val =cast<ConstantSDNode>(UniquedVals[Multiple-1])->getSExtValue();
01361       if (Val >= -16)                            // -1,-1,-1,-2 -> vspltisw(-2)
01362         return DAG.getTargetConstant(Val, MVT::i32);
01363     }
01364 
01365     return SDValue();
01366   }
01367 
01368   // Check to see if this buildvec has a single non-undef value in its elements.
01369   for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
01370     if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
01371     if (!OpVal.getNode())
01372       OpVal = N->getOperand(i);
01373     else if (OpVal != N->getOperand(i))
01374       return SDValue();
01375   }
01376 
01377   if (!OpVal.getNode()) return SDValue();  // All UNDEF: use implicit def.
01378 
01379   unsigned ValSizeInBytes = EltSize;
01380   uint64_t Value = 0;
01381   if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
01382     Value = CN->getZExtValue();
01383   } else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) {
01384     assert(CN->getValueType(0) == MVT::f32 && "Only one legal FP vector type!");
01385     Value = FloatToBits(CN->getValueAPF().convertToFloat());
01386   }
01387 
01388   // If the splat value is larger than the element value, then we can never do
01389   // this splat.  The only case that we could fit the replicated bits into our
01390   // immediate field for would be zero, and we prefer to use vxor for it.
01391   if (ValSizeInBytes < ByteSize) return SDValue();
01392 
01393   // If the element value is larger than the splat value, check if it consists
01394   // of a repeated bit pattern of size ByteSize.
01395   if (!APInt(ValSizeInBytes * 8, Value).isSplat(ByteSize * 8))
01396     return SDValue();
01397 
01398   // Properly sign extend the value.
01399   int MaskVal = SignExtend32(Value, ByteSize * 8);
01400 
01401   // If this is zero, don't match, zero matches ISD::isBuildVectorAllZeros.
01402   if (MaskVal == 0) return SDValue();
01403 
01404   // Finally, if this value fits in a 5 bit sext field, return it
01405   if (SignExtend32<5>(MaskVal) == MaskVal)
01406     return DAG.getTargetConstant(MaskVal, MVT::i32);
01407   return SDValue();
01408 }
01409 
01410 /// isQVALIGNIShuffleMask - If this is a qvaligni shuffle mask, return the shift
01411 /// amount, otherwise return -1.
01412 int PPC::isQVALIGNIShuffleMask(SDNode *N) {
01413   EVT VT = N->getValueType(0);
01414   if (VT != MVT::v4f64 && VT != MVT::v4f32 && VT != MVT::v4i1)
01415     return -1;
01416 
01417   ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
01418 
01419   // Find the first non-undef value in the shuffle mask.
01420   unsigned i;
01421   for (i = 0; i != 4 && SVOp->getMaskElt(i) < 0; ++i)
01422     /*search*/;
01423 
01424   if (i == 4) return -1;  // all undef.
01425 
01426   // Otherwise, check to see if the rest of the elements are consecutively
01427   // numbered from this value.
01428   unsigned ShiftAmt = SVOp->getMaskElt(i);
01429   if (ShiftAmt < i) return -1;
01430   ShiftAmt -= i;
01431 
01432   // Check the rest of the elements to see if they are consecutive.
01433   for (++i; i != 4; ++i)
01434     if (!isConstantOrUndef(SVOp->getMaskElt(i), ShiftAmt+i))
01435       return -1;
01436 
01437   return ShiftAmt;
01438 }
01439 
01440 //===----------------------------------------------------------------------===//
01441 //  Addressing Mode Selection
01442 //===----------------------------------------------------------------------===//
01443 
01444 /// isIntS16Immediate - This method tests to see if the node is either a 32-bit
01445 /// or 64-bit immediate, and if the value can be accurately represented as a
01446 /// sign extension from a 16-bit value.  If so, this returns true and the
01447 /// immediate.
01448 static bool isIntS16Immediate(SDNode *N, short &Imm) {
01449   if (!isa<ConstantSDNode>(N))
01450     return false;
01451 
01452   Imm = (short)cast<ConstantSDNode>(N)->getZExtValue();
01453   if (N->getValueType(0) == MVT::i32)
01454     return Imm == (int32_t)cast<ConstantSDNode>(N)->getZExtValue();
01455   else
01456     return Imm == (int64_t)cast<ConstantSDNode>(N)->getZExtValue();
01457 }
01458 static bool isIntS16Immediate(SDValue Op, short &Imm) {
01459   return isIntS16Immediate(Op.getNode(), Imm);
01460 }
01461 
01462 
01463 /// SelectAddressRegReg - Given the specified addressed, check to see if it
01464 /// can be represented as an indexed [r+r] operation.  Returns false if it
01465 /// can be more efficiently represented with [r+imm].
01466 bool PPCTargetLowering::SelectAddressRegReg(SDValue N, SDValue &Base,
01467                                             SDValue &Index,
01468                                             SelectionDAG &DAG) const {
01469   short imm = 0;
01470   if (N.getOpcode() == ISD::ADD) {
01471     if (isIntS16Immediate(N.getOperand(1), imm))
01472       return false;    // r+i
01473     if (N.getOperand(1).getOpcode() == PPCISD::Lo)
01474       return false;    // r+i
01475 
01476     Base = N.getOperand(0);
01477     Index = N.getOperand(1);
01478     return true;
01479   } else if (N.getOpcode() == ISD::OR) {
01480     if (isIntS16Immediate(N.getOperand(1), imm))
01481       return false;    // r+i can fold it if we can.
01482 
01483     // If this is an or of disjoint bitfields, we can codegen this as an add
01484     // (for better address arithmetic) if the LHS and RHS of the OR are provably
01485     // disjoint.
01486     APInt LHSKnownZero, LHSKnownOne;
01487     APInt RHSKnownZero, RHSKnownOne;
01488     DAG.computeKnownBits(N.getOperand(0),
01489                          LHSKnownZero, LHSKnownOne);
01490 
01491     if (LHSKnownZero.getBoolValue()) {
01492       DAG.computeKnownBits(N.getOperand(1),
01493                            RHSKnownZero, RHSKnownOne);
01494       // If all of the bits are known zero on the LHS or RHS, the add won't
01495       // carry.
01496       if (~(LHSKnownZero | RHSKnownZero) == 0) {
01497         Base = N.getOperand(0);
01498         Index = N.getOperand(1);
01499         return true;
01500       }
01501     }
01502   }
01503 
01504   return false;
01505 }
01506 
01507 // If we happen to be doing an i64 load or store into a stack slot that has
01508 // less than a 4-byte alignment, then the frame-index elimination may need to
01509 // use an indexed load or store instruction (because the offset may not be a
01510 // multiple of 4). The extra register needed to hold the offset comes from the
01511 // register scavenger, and it is possible that the scavenger will need to use
01512 // an emergency spill slot. As a result, we need to make sure that a spill slot
01513 // is allocated when doing an i64 load/store into a less-than-4-byte-aligned
01514 // stack slot.
01515 static void fixupFuncForFI(SelectionDAG &DAG, int FrameIdx, EVT VT) {
01516   // FIXME: This does not handle the LWA case.
01517   if (VT != MVT::i64)
01518     return;
01519 
01520   // NOTE: We'll exclude negative FIs here, which come from argument
01521   // lowering, because there are no known test cases triggering this problem
01522   // using packed structures (or similar). We can remove this exclusion if
01523   // we find such a test case. The reason why this is so test-case driven is
01524   // because this entire 'fixup' is only to prevent crashes (from the
01525   // register scavenger) on not-really-valid inputs. For example, if we have:
01526   //   %a = alloca i1
01527   //   %b = bitcast i1* %a to i64*
01528   //   store i64* a, i64 b
01529   // then the store should really be marked as 'align 1', but is not. If it
01530   // were marked as 'align 1' then the indexed form would have been
01531   // instruction-selected initially, and the problem this 'fixup' is preventing
01532   // won't happen regardless.
01533   if (FrameIdx < 0)
01534     return;
01535 
01536   MachineFunction &MF = DAG.getMachineFunction();
01537   MachineFrameInfo *MFI = MF.getFrameInfo();
01538 
01539   unsigned Align = MFI->getObjectAlignment(FrameIdx);
01540   if (Align >= 4)
01541     return;
01542 
01543   PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
01544   FuncInfo->setHasNonRISpills();
01545 }
01546 
01547 /// Returns true if the address N can be represented by a base register plus
01548 /// a signed 16-bit displacement [r+imm], and if it is not better
01549 /// represented as reg+reg.  If Aligned is true, only accept displacements
01550 /// suitable for STD and friends, i.e. multiples of 4.
01551 bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp,
01552                                             SDValue &Base,
01553                                             SelectionDAG &DAG,
01554                                             bool Aligned) const {
01555   // FIXME dl should come from parent load or store, not from address
01556   SDLoc dl(N);
01557   // If this can be more profitably realized as r+r, fail.
01558   if (SelectAddressRegReg(N, Disp, Base, DAG))
01559     return false;
01560 
01561   if (N.getOpcode() == ISD::ADD) {
01562     short imm = 0;
01563     if (isIntS16Immediate(N.getOperand(1), imm) &&
01564         (!Aligned || (imm & 3) == 0)) {
01565       Disp = DAG.getTargetConstant(imm, N.getValueType());
01566       if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
01567         Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
01568         fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
01569       } else {
01570         Base = N.getOperand(0);
01571       }
01572       return true; // [r+i]
01573     } else if (N.getOperand(1).getOpcode() == PPCISD::Lo) {
01574       // Match LOAD (ADD (X, Lo(G))).
01575       assert(!cast<ConstantSDNode>(N.getOperand(1).getOperand(1))->getZExtValue()
01576              && "Cannot handle constant offsets yet!");
01577       Disp = N.getOperand(1).getOperand(0);  // The global address.
01578       assert(Disp.getOpcode() == ISD::TargetGlobalAddress ||
01579              Disp.getOpcode() == ISD::TargetGlobalTLSAddress ||
01580              Disp.getOpcode() == ISD::TargetConstantPool ||
01581              Disp.getOpcode() == ISD::TargetJumpTable);
01582       Base = N.getOperand(0);
01583       return true;  // [&g+r]
01584     }
01585   } else if (N.getOpcode() == ISD::OR) {
01586     short imm = 0;
01587     if (isIntS16Immediate(N.getOperand(1), imm) &&
01588         (!Aligned || (imm & 3) == 0)) {
01589       // If this is an or of disjoint bitfields, we can codegen this as an add
01590       // (for better address arithmetic) if the LHS and RHS of the OR are
01591       // provably disjoint.
01592       APInt LHSKnownZero, LHSKnownOne;
01593       DAG.computeKnownBits(N.getOperand(0), LHSKnownZero, LHSKnownOne);
01594 
01595       if ((LHSKnownZero.getZExtValue()|~(uint64_t)imm) == ~0ULL) {
01596         // If all of the bits are known zero on the LHS or RHS, the add won't
01597         // carry.
01598         if (FrameIndexSDNode *FI =
01599               dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
01600           Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
01601           fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
01602         } else {
01603           Base = N.getOperand(0);
01604         }
01605         Disp = DAG.getTargetConstant(imm, N.getValueType());
01606         return true;
01607       }
01608     }
01609   } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) {
01610     // Loading from a constant address.
01611 
01612     // If this address fits entirely in a 16-bit sext immediate field, codegen
01613     // this as "d, 0"
01614     short Imm;
01615     if (isIntS16Immediate(CN, Imm) && (!Aligned || (Imm & 3) == 0)) {
01616       Disp = DAG.getTargetConstant(Imm, CN->getValueType(0));
01617       Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
01618                              CN->getValueType(0));
01619       return true;
01620     }
01621 
01622     // Handle 32-bit sext immediates with LIS + addr mode.
01623     if ((CN->getValueType(0) == MVT::i32 ||
01624          (int64_t)CN->getZExtValue() == (int)CN->getZExtValue()) &&
01625         (!Aligned || (CN->getZExtValue() & 3) == 0)) {
01626       int Addr = (int)CN->getZExtValue();
01627 
01628       // Otherwise, break this down into an LIS + disp.
01629       Disp = DAG.getTargetConstant((short)Addr, MVT::i32);
01630 
01631       Base = DAG.getTargetConstant((Addr - (signed short)Addr) >> 16, MVT::i32);
01632       unsigned Opc = CN->getValueType(0) == MVT::i32 ? PPC::LIS : PPC::LIS8;
01633       Base = SDValue(DAG.getMachineNode(Opc, dl, CN->getValueType(0), Base), 0);
01634       return true;
01635     }
01636   }
01637 
01638   Disp = DAG.getTargetConstant(0, getPointerTy());
01639   if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N)) {
01640     Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
01641     fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
01642   } else
01643     Base = N;
01644   return true;      // [r+0]
01645 }
01646 
01647 /// SelectAddressRegRegOnly - Given the specified addressed, force it to be
01648 /// represented as an indexed [r+r] operation.
01649 bool PPCTargetLowering::SelectAddressRegRegOnly(SDValue N, SDValue &Base,
01650                                                 SDValue &Index,
01651                                                 SelectionDAG &DAG) const {
01652   // Check to see if we can easily represent this as an [r+r] address.  This
01653   // will fail if it thinks that the address is more profitably represented as
01654   // reg+imm, e.g. where imm = 0.
01655   if (SelectAddressRegReg(N, Base, Index, DAG))
01656     return true;
01657 
01658   // If the operand is an addition, always emit this as [r+r], since this is
01659   // better (for code size, and execution, as the memop does the add for free)
01660   // than emitting an explicit add.
01661   if (N.getOpcode() == ISD::ADD) {
01662     Base = N.getOperand(0);
01663     Index = N.getOperand(1);
01664     return true;
01665   }
01666 
01667   // Otherwise, do it the hard way, using R0 as the base register.
01668   Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
01669                          N.getValueType());
01670   Index = N;
01671   return true;
01672 }
01673 
01674 /// getPreIndexedAddressParts - returns true by value, base pointer and
01675 /// offset pointer and addressing mode by reference if the node's address
01676 /// can be legally represented as pre-indexed load / store address.
01677 bool PPCTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base,
01678                                                   SDValue &Offset,
01679                                                   ISD::MemIndexedMode &AM,
01680                                                   SelectionDAG &DAG) const {
01681   if (DisablePPCPreinc) return false;
01682 
01683   bool isLoad = true;
01684   SDValue Ptr;
01685   EVT VT;
01686   unsigned Alignment;
01687   if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
01688     Ptr = LD->getBasePtr();
01689     VT = LD->getMemoryVT();
01690     Alignment = LD->getAlignment();
01691   } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
01692     Ptr = ST->getBasePtr();
01693     VT  = ST->getMemoryVT();
01694     Alignment = ST->getAlignment();
01695     isLoad = false;
01696   } else
01697     return false;
01698 
01699   // PowerPC doesn't have preinc load/store instructions for vectors (except
01700   // for QPX, which does have preinc r+r forms).
01701   if (VT.isVector()) {
01702     if (!Subtarget.hasQPX() || (VT != MVT::v4f64 && VT != MVT::v4f32)) {
01703       return false;
01704     } else if (SelectAddressRegRegOnly(Ptr, Offset, Base, DAG)) {
01705       AM = ISD::PRE_INC;
01706       return true;
01707     }
01708   }
01709 
01710   if (SelectAddressRegReg(Ptr, Base, Offset, DAG)) {
01711 
01712     // Common code will reject creating a pre-inc form if the base pointer
01713     // is a frame index, or if N is a store and the base pointer is either
01714     // the same as or a predecessor of the value being stored.  Check for
01715     // those situations here, and try with swapped Base/Offset instead.
01716     bool Swap = false;
01717 
01718     if (isa<FrameIndexSDNode>(Base) || isa<RegisterSDNode>(Base))
01719       Swap = true;
01720     else if (!isLoad) {
01721       SDValue Val = cast<StoreSDNode>(N)->getValue();
01722       if (Val == Base || Base.getNode()->isPredecessorOf(Val.getNode()))
01723         Swap = true;
01724     }
01725 
01726     if (Swap)
01727       std::swap(Base, Offset);
01728 
01729     AM = ISD::PRE_INC;
01730     return true;
01731   }
01732 
01733   // LDU/STU can only handle immediates that are a multiple of 4.
01734   if (VT != MVT::i64) {
01735     if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, false))
01736       return false;
01737   } else {
01738     // LDU/STU need an address with at least 4-byte alignment.
01739     if (Alignment < 4)
01740       return false;
01741 
01742     if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, true))
01743       return false;
01744   }
01745 
01746   if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
01747     // PPC64 doesn't have lwau, but it does have lwaux.  Reject preinc load of
01748     // sext i32 to i64 when addr mode is r+i.
01749     if (LD->getValueType(0) == MVT::i64 && LD->getMemoryVT() == MVT::i32 &&
01750         LD->getExtensionType() == ISD::SEXTLOAD &&
01751         isa<ConstantSDNode>(Offset))
01752       return false;
01753   }
01754 
01755   AM = ISD::PRE_INC;
01756   return true;
01757 }
01758 
01759 //===----------------------------------------------------------------------===//
01760 //  LowerOperation implementation
01761 //===----------------------------------------------------------------------===//
01762 
01763 /// GetLabelAccessInfo - Return true if we should reference labels using a
01764 /// PICBase, set the HiOpFlags and LoOpFlags to the target MO flags.
01765 static bool GetLabelAccessInfo(const TargetMachine &TM,
01766                                const PPCSubtarget &Subtarget,
01767                                unsigned &HiOpFlags, unsigned &LoOpFlags,
01768                                const GlobalValue *GV = nullptr) {
01769   HiOpFlags = PPCII::MO_HA;
01770   LoOpFlags = PPCII::MO_LO;
01771 
01772   // Don't use the pic base if not in PIC relocation model.
01773   bool isPIC = TM.getRelocationModel() == Reloc::PIC_;
01774 
01775   if (isPIC) {
01776     HiOpFlags |= PPCII::MO_PIC_FLAG;
01777     LoOpFlags |= PPCII::MO_PIC_FLAG;
01778   }
01779 
01780   // If this is a reference to a global value that requires a non-lazy-ptr, make
01781   // sure that instruction lowering adds it.
01782   if (GV && Subtarget.hasLazyResolverStub(GV)) {
01783     HiOpFlags |= PPCII::MO_NLP_FLAG;
01784     LoOpFlags |= PPCII::MO_NLP_FLAG;
01785 
01786     if (GV->hasHiddenVisibility()) {
01787       HiOpFlags |= PPCII::MO_NLP_HIDDEN_FLAG;
01788       LoOpFlags |= PPCII::MO_NLP_HIDDEN_FLAG;
01789     }
01790   }
01791 
01792   return isPIC;
01793 }
01794 
01795 static SDValue LowerLabelRef(SDValue HiPart, SDValue LoPart, bool isPIC,
01796                              SelectionDAG &DAG) {
01797   EVT PtrVT = HiPart.getValueType();
01798   SDValue Zero = DAG.getConstant(0, PtrVT);
01799   SDLoc DL(HiPart);
01800 
01801   SDValue Hi = DAG.getNode(PPCISD::Hi, DL, PtrVT, HiPart, Zero);
01802   SDValue Lo = DAG.getNode(PPCISD::Lo, DL, PtrVT, LoPart, Zero);
01803 
01804   // With PIC, the first instruction is actually "GR+hi(&G)".
01805   if (isPIC)
01806     Hi = DAG.getNode(ISD::ADD, DL, PtrVT,
01807                      DAG.getNode(PPCISD::GlobalBaseReg, DL, PtrVT), Hi);
01808 
01809   // Generate non-pic code that has direct accesses to the constant pool.
01810   // The address of the global is just (hi(&g)+lo(&g)).
01811   return DAG.getNode(ISD::ADD, DL, PtrVT, Hi, Lo);
01812 }
01813 
01814 static void setUsesTOCBasePtr(MachineFunction &MF) {
01815   PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
01816   FuncInfo->setUsesTOCBasePtr();
01817 }
01818 
01819 static void setUsesTOCBasePtr(SelectionDAG &DAG) {
01820   setUsesTOCBasePtr(DAG.getMachineFunction());
01821 }
01822 
01823 static SDValue getTOCEntry(SelectionDAG &DAG, SDLoc dl, bool Is64Bit,
01824                            SDValue GA) {
01825   EVT VT = Is64Bit ? MVT::i64 : MVT::i32;
01826   SDValue Reg = Is64Bit ? DAG.getRegister(PPC::X2, VT) :
01827                 DAG.getNode(PPCISD::GlobalBaseReg, dl, VT);
01828 
01829   SDValue Ops[] = { GA, Reg };
01830   return DAG.getMemIntrinsicNode(PPCISD::TOC_ENTRY, dl,
01831                                  DAG.getVTList(VT, MVT::Other), Ops, VT,
01832                                  MachinePointerInfo::getGOT(), 0, false, true,
01833                                  false, 0);
01834 }
01835 
01836 SDValue PPCTargetLowering::LowerConstantPool(SDValue Op,
01837                                              SelectionDAG &DAG) const {
01838   EVT PtrVT = Op.getValueType();
01839   ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
01840   const Constant *C = CP->getConstVal();
01841 
01842   // 64-bit SVR4 ABI code is always position-independent.
01843   // The actual address of the GlobalValue is stored in the TOC.
01844   if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) {
01845     setUsesTOCBasePtr(DAG);
01846     SDValue GA = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 0);
01847     return getTOCEntry(DAG, SDLoc(CP), true, GA);
01848   }
01849 
01850   unsigned MOHiFlag, MOLoFlag;
01851   bool isPIC =
01852       GetLabelAccessInfo(DAG.getTarget(), Subtarget, MOHiFlag, MOLoFlag);
01853 
01854   if (isPIC && Subtarget.isSVR4ABI()) {
01855     SDValue GA = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(),
01856                                            PPCII::MO_PIC_FLAG);
01857     return getTOCEntry(DAG, SDLoc(CP), false, GA);
01858   }
01859 
01860   SDValue CPIHi =
01861     DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 0, MOHiFlag);
01862   SDValue CPILo =
01863     DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 0, MOLoFlag);
01864   return LowerLabelRef(CPIHi, CPILo, isPIC, DAG);
01865 }
01866 
01867 SDValue PPCTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
01868   EVT PtrVT = Op.getValueType();
01869   JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
01870 
01871   // 64-bit SVR4 ABI code is always position-independent.
01872   // The actual address of the GlobalValue is stored in the TOC.
01873   if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) {
01874     setUsesTOCBasePtr(DAG);
01875     SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
01876     return getTOCEntry(DAG, SDLoc(JT), true, GA);
01877   }
01878 
01879   unsigned MOHiFlag, MOLoFlag;
01880   bool isPIC =
01881       GetLabelAccessInfo(DAG.getTarget(), Subtarget, MOHiFlag, MOLoFlag);
01882 
01883   if (isPIC && Subtarget.isSVR4ABI()) {
01884     SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT,
01885                                         PPCII::MO_PIC_FLAG);
01886     return getTOCEntry(DAG, SDLoc(GA), false, GA);
01887   }
01888 
01889   SDValue JTIHi = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOHiFlag);
01890   SDValue JTILo = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOLoFlag);
01891   return LowerLabelRef(JTIHi, JTILo, isPIC, DAG);
01892 }
01893 
01894 SDValue PPCTargetLowering::LowerBlockAddress(SDValue Op,
01895                                              SelectionDAG &DAG) const {
01896   EVT PtrVT = Op.getValueType();
01897   BlockAddressSDNode *BASDN = cast<BlockAddressSDNode>(Op);
01898   const BlockAddress *BA = BASDN->getBlockAddress();
01899 
01900   // 64-bit SVR4 ABI code is always position-independent.
01901   // The actual BlockAddress is stored in the TOC.
01902   if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) {
01903     setUsesTOCBasePtr(DAG);
01904     SDValue GA = DAG.getTargetBlockAddress(BA, PtrVT, BASDN->getOffset());
01905     return getTOCEntry(DAG, SDLoc(BASDN), true, GA);
01906   }
01907 
01908   unsigned MOHiFlag, MOLoFlag;
01909   bool isPIC =
01910       GetLabelAccessInfo(DAG.getTarget(), Subtarget, MOHiFlag, MOLoFlag);
01911   SDValue TgtBAHi = DAG.getTargetBlockAddress(BA, PtrVT, 0, MOHiFlag);
01912   SDValue TgtBALo = DAG.getTargetBlockAddress(BA, PtrVT, 0, MOLoFlag);
01913   return LowerLabelRef(TgtBAHi, TgtBALo, isPIC, DAG);
01914 }
01915 
01916 SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op,
01917                                               SelectionDAG &DAG) const {
01918 
01919   // FIXME: TLS addresses currently use medium model code sequences,
01920   // which is the most useful form.  Eventually support for small and
01921   // large models could be added if users need it, at the cost of
01922   // additional complexity.
01923   GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
01924   SDLoc dl(GA);
01925   const GlobalValue *GV = GA->getGlobal();
01926   EVT PtrVT = getPointerTy();
01927   bool is64bit = Subtarget.isPPC64();
01928   const Module *M = DAG.getMachineFunction().getFunction()->getParent();
01929   PICLevel::Level picLevel = M->getPICLevel();
01930 
01931   TLSModel::Model Model = getTargetMachine().getTLSModel(GV);
01932 
01933   if (Model == TLSModel::LocalExec) {
01934     SDValue TGAHi = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
01935                                                PPCII::MO_TPREL_HA);
01936     SDValue TGALo = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
01937                                                PPCII::MO_TPREL_LO);
01938     SDValue TLSReg = DAG.getRegister(is64bit ? PPC::X13 : PPC::R2,
01939                                      is64bit ? MVT::i64 : MVT::i32);
01940     SDValue Hi = DAG.getNode(PPCISD::Hi, dl, PtrVT, TGAHi, TLSReg);
01941     return DAG.getNode(PPCISD::Lo, dl, PtrVT, TGALo, Hi);
01942   }
01943 
01944   if (Model == TLSModel::InitialExec) {
01945     SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
01946     SDValue TGATLS = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
01947                                                 PPCII::MO_TLS);
01948     SDValue GOTPtr;
01949     if (is64bit) {
01950       setUsesTOCBasePtr(DAG);
01951       SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
01952       GOTPtr = DAG.getNode(PPCISD::ADDIS_GOT_TPREL_HA, dl,
01953                            PtrVT, GOTReg, TGA);
01954     } else
01955       GOTPtr = DAG.getNode(PPCISD::PPC32_GOT, dl, PtrVT);
01956     SDValue TPOffset = DAG.getNode(PPCISD::LD_GOT_TPREL_L, dl,
01957                                    PtrVT, TGA, GOTPtr);
01958     return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TPOffset, TGATLS);
01959   }
01960 
01961   if (Model == TLSModel::GeneralDynamic) {
01962     SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
01963     SDValue GOTPtr;
01964     if (is64bit) {
01965       setUsesTOCBasePtr(DAG);
01966       SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
01967       GOTPtr = DAG.getNode(PPCISD::ADDIS_TLSGD_HA, dl, PtrVT,
01968                                    GOTReg, TGA);
01969     } else {
01970       if (picLevel == PICLevel::Small)
01971         GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT);
01972       else
01973         GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
01974     }
01975     return DAG.getNode(PPCISD::ADDI_TLSGD_L_ADDR, dl, PtrVT,
01976                        GOTPtr, TGA, TGA);
01977   }
01978 
01979   if (Model == TLSModel::LocalDynamic) {
01980     SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
01981     SDValue GOTPtr;
01982     if (is64bit) {
01983       setUsesTOCBasePtr(DAG);
01984       SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
01985       GOTPtr = DAG.getNode(PPCISD::ADDIS_TLSLD_HA, dl, PtrVT,
01986                            GOTReg, TGA);
01987     } else {
01988       if (picLevel == PICLevel::Small)
01989         GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT);
01990       else
01991         GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
01992     }
01993     SDValue TLSAddr = DAG.getNode(PPCISD::ADDI_TLSLD_L_ADDR, dl,
01994                                   PtrVT, GOTPtr, TGA, TGA);
01995     SDValue DtvOffsetHi = DAG.getNode(PPCISD::ADDIS_DTPREL_HA, dl,
01996                                       PtrVT, TLSAddr, TGA);
01997     return DAG.getNode(PPCISD::ADDI_DTPREL_L, dl, PtrVT, DtvOffsetHi, TGA);
01998   }
01999 
02000   llvm_unreachable("Unknown TLS model!");
02001 }
02002 
02003 SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op,
02004                                               SelectionDAG &DAG) const {
02005   EVT PtrVT = Op.getValueType();
02006   GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
02007   SDLoc DL(GSDN);
02008   const GlobalValue *GV = GSDN->getGlobal();
02009 
02010   // 64-bit SVR4 ABI code is always position-independent.
02011   // The actual address of the GlobalValue is stored in the TOC.
02012   if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) {
02013     setUsesTOCBasePtr(DAG);
02014     SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset());
02015     return getTOCEntry(DAG, DL, true, GA);
02016   }
02017 
02018   unsigned MOHiFlag, MOLoFlag;
02019   bool isPIC =
02020       GetLabelAccessInfo(DAG.getTarget(), Subtarget, MOHiFlag, MOLoFlag, GV);
02021 
02022   if (isPIC && Subtarget.isSVR4ABI()) {
02023     SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT,
02024                                             GSDN->getOffset(),
02025                                             PPCII::MO_PIC_FLAG);
02026     return getTOCEntry(DAG, DL, false, GA);
02027   }
02028 
02029   SDValue GAHi =
02030     DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOHiFlag);
02031   SDValue GALo =
02032     DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOLoFlag);
02033 
02034   SDValue Ptr = LowerLabelRef(GAHi, GALo, isPIC, DAG);
02035 
02036   // If the global reference is actually to a non-lazy-pointer, we have to do an
02037   // extra load to get the address of the global.
02038   if (MOHiFlag & PPCII::MO_NLP_FLAG)
02039     Ptr = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo(),
02040                       false, false, false, 0);
02041   return Ptr;
02042 }
02043 
02044 SDValue PPCTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
02045   ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
02046   SDLoc dl(Op);
02047 
02048   if (Op.getValueType() == MVT::v2i64) {
02049     // When the operands themselves are v2i64 values, we need to do something
02050     // special because VSX has no underlying comparison operations for these.
02051     if (Op.getOperand(0).getValueType() == MVT::v2i64) {
02052       // Equality can be handled by casting to the legal type for Altivec
02053       // comparisons, everything else needs to be expanded.
02054       if (CC == ISD::SETEQ || CC == ISD::SETNE) {
02055         return DAG.getNode(ISD::BITCAST, dl, MVT::v2i64,
02056                  DAG.getSetCC(dl, MVT::v4i32,
02057                    DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op.getOperand(0)),
02058                    DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op.getOperand(1)),
02059                    CC));
02060       }
02061 
02062       return SDValue();
02063     }
02064 
02065     // We handle most of these in the usual way.
02066     return Op;
02067   }
02068 
02069   // If we're comparing for equality to zero, expose the fact that this is
02070   // implented as a ctlz/srl pair on ppc, so that the dag combiner can
02071   // fold the new nodes.
02072   if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
02073     if (C->isNullValue() && CC == ISD::SETEQ) {
02074       EVT VT = Op.getOperand(0).getValueType();
02075       SDValue Zext = Op.getOperand(0);
02076       if (VT.bitsLT(MVT::i32)) {
02077         VT = MVT::i32;
02078         Zext = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Op.getOperand(0));
02079       }
02080       unsigned Log2b = Log2_32(VT.getSizeInBits());
02081       SDValue Clz = DAG.getNode(ISD::CTLZ, dl, VT, Zext);
02082       SDValue Scc = DAG.getNode(ISD::SRL, dl, VT, Clz,
02083                                 DAG.getConstant(Log2b, MVT::i32));
02084       return DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Scc);
02085     }
02086     // Leave comparisons against 0 and -1 alone for now, since they're usually
02087     // optimized.  FIXME: revisit this when we can custom lower all setcc
02088     // optimizations.
02089     if (C->isAllOnesValue() || C->isNullValue())
02090       return SDValue();
02091   }
02092 
02093   // If we have an integer seteq/setne, turn it into a compare against zero
02094   // by xor'ing the rhs with the lhs, which is faster than setting a
02095   // condition register, reading it back out, and masking the correct bit.  The
02096   // normal approach here uses sub to do this instead of xor.  Using xor exposes
02097   // the result to other bit-twiddling opportunities.
02098   EVT LHSVT = Op.getOperand(0).getValueType();
02099   if (LHSVT.isInteger() && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
02100     EVT VT = Op.getValueType();
02101     SDValue Sub = DAG.getNode(ISD::XOR, dl, LHSVT, Op.getOperand(0),
02102                                 Op.getOperand(1));
02103     return DAG.getSetCC(dl, VT, Sub, DAG.getConstant(0, LHSVT), CC);
02104   }
02105   return SDValue();
02106 }
02107 
02108 SDValue PPCTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG,
02109                                       const PPCSubtarget &Subtarget) const {
02110   SDNode *Node = Op.getNode();
02111   EVT VT = Node->getValueType(0);
02112   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
02113   SDValue InChain = Node->getOperand(0);
02114   SDValue VAListPtr = Node->getOperand(1);
02115   const Value *SV = cast<SrcValueSDNode>(Node->getOperand(2))->getValue();
02116   SDLoc dl(Node);
02117 
02118   assert(!Subtarget.isPPC64() && "LowerVAARG is PPC32 only");
02119 
02120   // gpr_index
02121   SDValue GprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain,
02122                                     VAListPtr, MachinePointerInfo(SV), MVT::i8,
02123                                     false, false, false, 0);
02124   InChain = GprIndex.getValue(1);
02125 
02126   if (VT == MVT::i64) {
02127     // Check if GprIndex is even
02128     SDValue GprAnd = DAG.getNode(ISD::AND, dl, MVT::i32, GprIndex,
02129                                  DAG.getConstant(1, MVT::i32));
02130     SDValue CC64 = DAG.getSetCC(dl, MVT::i32, GprAnd,
02131                                 DAG.getConstant(0, MVT::i32), ISD::SETNE);
02132     SDValue GprIndexPlusOne = DAG.getNode(ISD::ADD, dl, MVT::i32, GprIndex,
02133                                           DAG.getConstant(1, MVT::i32));
02134     // Align GprIndex to be even if it isn't
02135     GprIndex = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC64, GprIndexPlusOne,
02136                            GprIndex);
02137   }
02138 
02139   // fpr index is 1 byte after gpr
02140   SDValue FprPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
02141                                DAG.getConstant(1, MVT::i32));
02142 
02143   // fpr
02144   SDValue FprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain,
02145                                     FprPtr, MachinePointerInfo(SV), MVT::i8,
02146                                     false, false, false, 0);
02147   InChain = FprIndex.getValue(1);
02148 
02149   SDValue RegSaveAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
02150                                        DAG.getConstant(8, MVT::i32));
02151 
02152   SDValue OverflowAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
02153                                         DAG.getConstant(4, MVT::i32));
02154 
02155   // areas
02156   SDValue OverflowArea = DAG.getLoad(MVT::i32, dl, InChain, OverflowAreaPtr,
02157                                      MachinePointerInfo(), false, false,
02158                                      false, 0);
02159   InChain = OverflowArea.getValue(1);
02160 
02161   SDValue RegSaveArea = DAG.getLoad(MVT::i32, dl, InChain, RegSaveAreaPtr,
02162                                     MachinePointerInfo(), false, false,
02163                                     false, 0);
02164   InChain = RegSaveArea.getValue(1);
02165 
02166   // select overflow_area if index > 8
02167   SDValue CC = DAG.getSetCC(dl, MVT::i32, VT.isInteger() ? GprIndex : FprIndex,
02168                             DAG.getConstant(8, MVT::i32), ISD::SETLT);
02169 
02170   // adjustment constant gpr_index * 4/8
02171   SDValue RegConstant = DAG.getNode(ISD::MUL, dl, MVT::i32,
02172                                     VT.isInteger() ? GprIndex : FprIndex,
02173                                     DAG.getConstant(VT.isInteger() ? 4 : 8,
02174                                                     MVT::i32));
02175 
02176   // OurReg = RegSaveArea + RegConstant
02177   SDValue OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, RegSaveArea,
02178                                RegConstant);
02179 
02180   // Floating types are 32 bytes into RegSaveArea
02181   if (VT.isFloatingPoint())
02182     OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, OurReg,
02183                          DAG.getConstant(32, MVT::i32));
02184 
02185   // increase {f,g}pr_index by 1 (or 2 if VT is i64)
02186   SDValue IndexPlus1 = DAG.getNode(ISD::ADD, dl, MVT::i32,
02187                                    VT.isInteger() ? GprIndex : FprIndex,
02188                                    DAG.getConstant(VT == MVT::i64 ? 2 : 1,
02189                                                    MVT::i32));
02190 
02191   InChain = DAG.getTruncStore(InChain, dl, IndexPlus1,
02192                               VT.isInteger() ? VAListPtr : FprPtr,
02193                               MachinePointerInfo(SV),
02194                               MVT::i8, false, false, 0);
02195 
02196   // determine if we should load from reg_save_area or overflow_area
02197   SDValue Result = DAG.getNode(ISD::SELECT, dl, PtrVT, CC, OurReg, OverflowArea);
02198 
02199   // increase overflow_area by 4/8 if gpr/fpr > 8
02200   SDValue OverflowAreaPlusN = DAG.getNode(ISD::ADD, dl, PtrVT, OverflowArea,
02201                                           DAG.getConstant(VT.isInteger() ? 4 : 8,
02202                                           MVT::i32));
02203 
02204   OverflowArea = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC, OverflowArea,
02205                              OverflowAreaPlusN);
02206 
02207   InChain = DAG.getTruncStore(InChain, dl, OverflowArea,
02208                               OverflowAreaPtr,
02209                               MachinePointerInfo(),
02210                               MVT::i32, false, false, 0);
02211 
02212   return DAG.getLoad(VT, dl, InChain, Result, MachinePointerInfo(),
02213                      false, false, false, 0);
02214 }
02215 
02216 SDValue PPCTargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG,
02217                                        const PPCSubtarget &Subtarget) const {
02218   assert(!Subtarget.isPPC64() && "LowerVACOPY is PPC32 only");
02219 
02220   // We have to copy the entire va_list struct:
02221   // 2*sizeof(char) + 2 Byte alignment + 2*sizeof(char*) = 12 Byte
02222   return DAG.getMemcpy(Op.getOperand(0), Op,
02223                        Op.getOperand(1), Op.getOperand(2),
02224                        DAG.getConstant(12, MVT::i32), 8, false, true, false,
02225                        MachinePointerInfo(), MachinePointerInfo());
02226 }
02227 
02228 SDValue PPCTargetLowering::LowerADJUST_TRAMPOLINE(SDValue Op,
02229                                                   SelectionDAG &DAG) const {
02230   return Op.getOperand(0);
02231 }
02232 
02233 SDValue PPCTargetLowering::LowerINIT_TRAMPOLINE(SDValue Op,
02234                                                 SelectionDAG &DAG) const {
02235   SDValue Chain = Op.getOperand(0);
02236   SDValue Trmp = Op.getOperand(1); // trampoline
02237   SDValue FPtr = Op.getOperand(2); // nested function
02238   SDValue Nest = Op.getOperand(3); // 'nest' parameter value
02239   SDLoc dl(Op);
02240 
02241   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
02242   bool isPPC64 = (PtrVT == MVT::i64);
02243   Type *IntPtrTy =
02244     DAG.getTargetLoweringInfo().getDataLayout()->getIntPtrType(
02245                                                              *DAG.getContext());
02246 
02247   TargetLowering::ArgListTy Args;
02248   TargetLowering::ArgListEntry Entry;
02249 
02250   Entry.Ty = IntPtrTy;
02251   Entry.Node = Trmp; Args.push_back(Entry);
02252 
02253   // TrampSize == (isPPC64 ? 48 : 40);
02254   Entry.Node = DAG.getConstant(isPPC64 ? 48 : 40,
02255                                isPPC64 ? MVT::i64 : MVT::i32);
02256   Args.push_back(Entry);
02257 
02258   Entry.Node = FPtr; Args.push_back(Entry);
02259   Entry.Node = Nest; Args.push_back(Entry);
02260 
02261   // Lower to a call to __trampoline_setup(Trmp, TrampSize, FPtr, ctx_reg)
02262   TargetLowering::CallLoweringInfo CLI(DAG);
02263   CLI.setDebugLoc(dl).setChain(Chain)
02264     .setCallee(CallingConv::C, Type::getVoidTy(*DAG.getContext()),
02265                DAG.getExternalSymbol("__trampoline_setup", PtrVT),
02266                std::move(Args), 0);
02267 
02268   std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
02269   return CallResult.second;
02270 }
02271 
02272 SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG,
02273                                         const PPCSubtarget &Subtarget) const {
02274   MachineFunction &MF = DAG.getMachineFunction();
02275   PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
02276 
02277   SDLoc dl(Op);
02278 
02279   if (Subtarget.isDarwinABI() || Subtarget.isPPC64()) {
02280     // vastart just stores the address of the VarArgsFrameIndex slot into the
02281     // memory location argument.
02282     EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
02283     SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
02284     const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
02285     return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1),
02286                         MachinePointerInfo(SV),
02287                         false, false, 0);
02288   }
02289 
02290   // For the 32-bit SVR4 ABI we follow the layout of the va_list struct.
02291   // We suppose the given va_list is already allocated.
02292   //
02293   // typedef struct {
02294   //  char gpr;     /* index into the array of 8 GPRs
02295   //                 * stored in the register save area
02296   //                 * gpr=0 corresponds to r3,
02297   //                 * gpr=1 to r4, etc.
02298   //                 */
02299   //  char fpr;     /* index into the array of 8 FPRs
02300   //                 * stored in the register save area
02301   //                 * fpr=0 corresponds to f1,
02302   //                 * fpr=1 to f2, etc.
02303   //                 */
02304   //  char *overflow_arg_area;
02305   //                /* location on stack that holds
02306   //                 * the next overflow argument
02307   //                 */
02308   //  char *reg_save_area;
02309   //               /* where r3:r10 and f1:f8 (if saved)
02310   //                * are stored
02311   //                */
02312   // } va_list[1];
02313 
02314 
02315   SDValue ArgGPR = DAG.getConstant(FuncInfo->getVarArgsNumGPR(), MVT::i32);
02316   SDValue ArgFPR = DAG.getConstant(FuncInfo->getVarArgsNumFPR(), MVT::i32);
02317 
02318 
02319   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
02320 
02321   SDValue StackOffsetFI = DAG.getFrameIndex(FuncInfo->getVarArgsStackOffset(),
02322                                             PtrVT);
02323   SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
02324                                  PtrVT);
02325 
02326   uint64_t FrameOffset = PtrVT.getSizeInBits()/8;
02327   SDValue ConstFrameOffset = DAG.getConstant(FrameOffset, PtrVT);
02328 
02329   uint64_t StackOffset = PtrVT.getSizeInBits()/8 - 1;
02330   SDValue ConstStackOffset = DAG.getConstant(StackOffset, PtrVT);
02331 
02332   uint64_t FPROffset = 1;
02333   SDValue ConstFPROffset = DAG.getConstant(FPROffset, PtrVT);
02334 
02335   const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
02336 
02337   // Store first byte : number of int regs
02338   SDValue firstStore = DAG.getTruncStore(Op.getOperand(0), dl, ArgGPR,
02339                                          Op.getOperand(1),
02340                                          MachinePointerInfo(SV),
02341                                          MVT::i8, false, false, 0);
02342   uint64_t nextOffset = FPROffset;
02343   SDValue nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, Op.getOperand(1),
02344                                   ConstFPROffset);
02345 
02346   // Store second byte : number of float regs
02347   SDValue secondStore =
02348     DAG.getTruncStore(firstStore, dl, ArgFPR, nextPtr,
02349                       MachinePointerInfo(SV, nextOffset), MVT::i8,
02350                       false, false, 0);
02351   nextOffset += StackOffset;
02352   nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstStackOffset);
02353 
02354   // Store second word : arguments given on stack
02355   SDValue thirdStore =
02356     DAG.getStore(secondStore, dl, StackOffsetFI, nextPtr,
02357                  MachinePointerInfo(SV, nextOffset),
02358                  false, false, 0);
02359   nextOffset += FrameOffset;
02360   nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstFrameOffset);
02361 
02362   // Store third word : arguments given in registers
02363   return DAG.getStore(thirdStore, dl, FR, nextPtr,
02364                       MachinePointerInfo(SV, nextOffset),
02365                       false, false, 0);
02366 
02367 }
02368 
02369 #include "PPCGenCallingConv.inc"
02370 
02371 // Function whose sole purpose is to kill compiler warnings 
02372 // stemming from unused functions included from PPCGenCallingConv.inc.
02373 CCAssignFn *PPCTargetLowering::useFastISelCCs(unsigned Flag) const {
02374   return Flag ? CC_PPC64_ELF_FIS : RetCC_PPC64_ELF_FIS;
02375 }
02376 
02377 bool llvm::CC_PPC32_SVR4_Custom_Dummy(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
02378                                       CCValAssign::LocInfo &LocInfo,
02379                                       ISD::ArgFlagsTy &ArgFlags,
02380                                       CCState &State) {
02381   return true;
02382 }
02383 
02384 bool llvm::CC_PPC32_SVR4_Custom_AlignArgRegs(unsigned &ValNo, MVT &ValVT,
02385                                              MVT &LocVT,
02386                                              CCValAssign::LocInfo &LocInfo,
02387                                              ISD::ArgFlagsTy &ArgFlags,
02388                                              CCState &State) {
02389   static const MCPhysReg ArgRegs[] = {
02390     PPC::R3, PPC::R4, PPC::R5, PPC::R6,
02391     PPC::R7, PPC::R8, PPC::R9, PPC::R10,
02392   };
02393   const unsigned NumArgRegs = array_lengthof(ArgRegs);
02394 
02395   unsigned RegNum = State.getFirstUnallocated(ArgRegs);
02396 
02397   // Skip one register if the first unallocated register has an even register
02398   // number and there are still argument registers available which have not been
02399   // allocated yet. RegNum is actually an index into ArgRegs, which means we
02400   // need to skip a register if RegNum is odd.
02401   if (RegNum != NumArgRegs && RegNum % 2 == 1) {
02402     State.AllocateReg(ArgRegs[RegNum]);
02403   }
02404 
02405   // Always return false here, as this function only makes sure that the first
02406   // unallocated register has an odd register number and does not actually
02407   // allocate a register for the current argument.
02408   return false;
02409 }
02410 
02411 bool llvm::CC_PPC32_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, MVT &ValVT,
02412                                                MVT &LocVT,
02413                                                CCValAssign::LocInfo &LocInfo,
02414                                                ISD::ArgFlagsTy &ArgFlags,
02415                                                CCState &State) {
02416   static const MCPhysReg ArgRegs[] = {
02417     PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
02418     PPC::F8
02419   };
02420 
02421   const unsigned NumArgRegs = array_lengthof(ArgRegs);
02422 
02423   unsigned RegNum = State.getFirstUnallocated(ArgRegs);
02424 
02425   // If there is only one Floating-point register left we need to put both f64
02426   // values of a split ppc_fp128 value on the stack.
02427   if (RegNum != NumArgRegs && ArgRegs[RegNum] == PPC::F8) {
02428     State.AllocateReg(ArgRegs[RegNum]);
02429   }
02430 
02431   // Always return false here, as this function only makes sure that the two f64
02432   // values a ppc_fp128 value is split into are both passed in registers or both
02433   // passed on the stack and does not actually allocate a register for the
02434   // current argument.
02435   return false;
02436 }
02437 
02438 /// FPR - The set of FP registers that should be allocated for arguments,
02439 /// on Darwin.
02440 static const MCPhysReg FPR[] = {PPC::F1,  PPC::F2,  PPC::F3, PPC::F4, PPC::F5,
02441                                 PPC::F6,  PPC::F7,  PPC::F8, PPC::F9, PPC::F10,
02442                                 PPC::F11, PPC::F12, PPC::F13};
02443 
02444 /// QFPR - The set of QPX registers that should be allocated for arguments.
02445 static const MCPhysReg QFPR[] = {
02446     PPC::QF1, PPC::QF2, PPC::QF3,  PPC::QF4,  PPC::QF5,  PPC::QF6, PPC::QF7,
02447     PPC::QF8, PPC::QF9, PPC::QF10, PPC::QF11, PPC::QF12, PPC::QF13};
02448 
02449 /// CalculateStackSlotSize - Calculates the size reserved for this argument on
02450 /// the stack.
02451 static unsigned CalculateStackSlotSize(EVT ArgVT, ISD::ArgFlagsTy Flags,
02452                                        unsigned PtrByteSize) {
02453   unsigned ArgSize = ArgVT.getStoreSize();
02454   if (Flags.isByVal())
02455     ArgSize = Flags.getByValSize();
02456 
02457   // Round up to multiples of the pointer size, except for array members,
02458   // which are always packed.
02459   if (!Flags.isInConsecutiveRegs())
02460     ArgSize = ((ArgSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
02461 
02462   return ArgSize;
02463 }
02464 
02465 /// CalculateStackSlotAlignment - Calculates the alignment of this argument
02466 /// on the stack.
02467 static unsigned CalculateStackSlotAlignment(EVT ArgVT, EVT OrigVT,
02468                                             ISD::ArgFlagsTy Flags,
02469                                             unsigned PtrByteSize) {
02470   unsigned Align = PtrByteSize;
02471 
02472   // Altivec parameters are padded to a 16 byte boundary.
02473   if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||
02474       ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||
02475       ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64)
02476     Align = 16;
02477   // QPX vector types stored in double-precision are padded to a 32 byte
02478   // boundary.
02479   else if (ArgVT == MVT::v4f64 || ArgVT == MVT::v4i1)
02480     Align = 32;
02481 
02482   // ByVal parameters are aligned as requested.
02483   if (Flags.isByVal()) {
02484     unsigned BVAlign = Flags.getByValAlign();
02485     if (BVAlign > PtrByteSize) {
02486       if (BVAlign % PtrByteSize != 0)
02487           llvm_unreachable(
02488             "ByVal alignment is not a multiple of the pointer size");
02489 
02490       Align = BVAlign;
02491     }
02492   }
02493 
02494   // Array members are always packed to their original alignment.
02495   if (Flags.isInConsecutiveRegs()) {
02496     // If the array member was split into multiple registers, the first
02497     // needs to be aligned to the size of the full type.  (Except for
02498     // ppcf128, which is only aligned as its f64 components.)
02499     if (Flags.isSplit() && OrigVT != MVT::ppcf128)
02500       Align = OrigVT.getStoreSize();
02501     else
02502       Align = ArgVT.getStoreSize();
02503   }
02504 
02505   return Align;
02506 }
02507 
02508 /// CalculateStackSlotUsed - Return whether this argument will use its
02509 /// stack slot (instead of being passed in registers).  ArgOffset,
02510 /// AvailableFPRs, and AvailableVRs must hold the current argument
02511 /// position, and will be updated to account for this argument.
02512 static bool CalculateStackSlotUsed(EVT ArgVT, EVT OrigVT,
02513                                    ISD::ArgFlagsTy Flags,
02514                                    unsigned PtrByteSize,
02515                                    unsigned LinkageSize,
02516                                    unsigned ParamAreaSize,
02517                                    unsigned &ArgOffset,
02518                                    unsigned &AvailableFPRs,
02519                                    unsigned &AvailableVRs, bool HasQPX) {
02520   bool UseMemory = false;
02521 
02522   // Respect alignment of argument on the stack.
02523   unsigned Align =
02524     CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
02525   ArgOffset = ((ArgOffset + Align - 1) / Align) * Align;
02526   // If there's no space left in the argument save area, we must
02527   // use memory (this check also catches zero-sized arguments).
02528   if (ArgOffset >= LinkageSize + ParamAreaSize)
02529     UseMemory = true;
02530 
02531   // Allocate argument on the stack.
02532   ArgOffset += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
02533   if (Flags.isInConsecutiveRegsLast())
02534     ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
02535   // If we overran the argument save area, we must use memory
02536   // (this check catches arguments passed partially in memory)
02537   if (ArgOffset > LinkageSize + ParamAreaSize)
02538     UseMemory = true;
02539 
02540   // However, if the argument is actually passed in an FPR or a VR,
02541   // we don't use memory after all.
02542   if (!Flags.isByVal()) {
02543     if (ArgVT == MVT::f32 || ArgVT == MVT::f64 ||
02544         // QPX registers overlap with the scalar FP registers.
02545         (HasQPX && (ArgVT == MVT::v4f32 ||
02546                     ArgVT == MVT::v4f64 ||
02547                     ArgVT == MVT::v4i1)))
02548       if (AvailableFPRs > 0) {
02549         --AvailableFPRs;
02550         return false;
02551       }
02552     if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||
02553         ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||
02554         ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64)
02555       if (AvailableVRs > 0) {
02556         --AvailableVRs;
02557         return false;
02558       }
02559   }
02560 
02561   return UseMemory;
02562 }
02563 
02564 /// EnsureStackAlignment - Round stack frame size up from NumBytes to
02565 /// ensure minimum alignment required for target.
02566 static unsigned EnsureStackAlignment(const PPCFrameLowering *Lowering,
02567                                      unsigned NumBytes) {
02568   unsigned TargetAlign = Lowering->getStackAlignment();
02569   unsigned AlignMask = TargetAlign - 1;
02570   NumBytes = (NumBytes + AlignMask) & ~AlignMask;
02571   return NumBytes;
02572 }
02573 
02574 SDValue
02575 PPCTargetLowering::LowerFormalArguments(SDValue Chain,
02576                                         CallingConv::ID CallConv, bool isVarArg,
02577                                         const SmallVectorImpl<ISD::InputArg>
02578                                           &Ins,
02579                                         SDLoc dl, SelectionDAG &DAG,
02580                                         SmallVectorImpl<SDValue> &InVals)
02581                                           const {
02582   if (Subtarget.isSVR4ABI()) {
02583     if (Subtarget.isPPC64())
02584       return LowerFormalArguments_64SVR4(Chain, CallConv, isVarArg, Ins,
02585                                          dl, DAG, InVals);
02586     else
02587       return LowerFormalArguments_32SVR4(Chain, CallConv, isVarArg, Ins,
02588                                          dl, DAG, InVals);
02589   } else {
02590     return LowerFormalArguments_Darwin(Chain, CallConv, isVarArg, Ins,
02591                                        dl, DAG, InVals);
02592   }
02593 }
02594 
02595 SDValue
02596 PPCTargetLowering::LowerFormalArguments_32SVR4(
02597                                       SDValue Chain,
02598                                       CallingConv::ID CallConv, bool isVarArg,
02599                                       const SmallVectorImpl<ISD::InputArg>
02600                                         &Ins,
02601                                       SDLoc dl, SelectionDAG &DAG,
02602                                       SmallVectorImpl<SDValue> &InVals) const {
02603 
02604   // 32-bit SVR4 ABI Stack Frame Layout:
02605   //              +-----------------------------------+
02606   //        +-->  |            Back chain             |
02607   //        |     +-----------------------------------+
02608   //        |     | Floating-point register save area |
02609   //        |     +-----------------------------------+
02610   //        |     |    General register save area     |
02611   //        |     +-----------------------------------+
02612   //        |     |          CR save word             |
02613   //        |     +-----------------------------------+
02614   //        |     |         VRSAVE save word          |
02615   //        |     +-----------------------------------+
02616   //        |     |         Alignment padding         |
02617   //        |     +-----------------------------------+
02618   //        |     |     Vector register save area     |
02619   //        |     +-----------------------------------+
02620   //        |     |       Local variable space        |
02621   //        |     +-----------------------------------+
02622   //        |     |        Parameter list area        |
02623   //        |     +-----------------------------------+
02624   //        |     |           LR save word            |
02625   //        |     +-----------------------------------+
02626   // SP-->  +---  |            Back chain             |
02627   //              +-----------------------------------+
02628   //
02629   // Specifications:
02630   //   System V Application Binary Interface PowerPC Processor Supplement
02631   //   AltiVec Technology Programming Interface Manual
02632 
02633   MachineFunction &MF = DAG.getMachineFunction();
02634   MachineFrameInfo *MFI = MF.getFrameInfo();
02635   PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
02636 
02637   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
02638   // Potential tail calls could cause overwriting of argument stack slots.
02639   bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
02640                        (CallConv == CallingConv::Fast));
02641   unsigned PtrByteSize = 4;
02642 
02643   // Assign locations to all of the incoming arguments.
02644   SmallVector<CCValAssign, 16> ArgLocs;
02645   CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
02646                  *DAG.getContext());
02647 
02648   // Reserve space for the linkage area on the stack.
02649   unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
02650   CCInfo.AllocateStack(LinkageSize, PtrByteSize);
02651 
02652   CCInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4);
02653 
02654   for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
02655     CCValAssign &VA = ArgLocs[i];
02656 
02657     // Arguments stored in registers.
02658     if (VA.isRegLoc()) {
02659       const TargetRegisterClass *RC;
02660       EVT ValVT = VA.getValVT();
02661 
02662       switch (ValVT.getSimpleVT().SimpleTy) {
02663         default:
02664           llvm_unreachable("ValVT not supported by formal arguments Lowering");
02665         case MVT::i1:
02666         case MVT::i32:
02667           RC = &PPC::GPRCRegClass;
02668           break;
02669         case MVT::f32:
02670           RC = &PPC::F4RCRegClass;
02671           break;
02672         case MVT::f64:
02673           if (Subtarget.hasVSX())
02674             RC = &PPC::VSFRCRegClass;
02675           else
02676             RC = &PPC::F8RCRegClass;
02677           break;
02678         case MVT::v16i8:
02679         case MVT::v8i16:
02680         case MVT::v4i32:
02681           RC = &PPC::VRRCRegClass;
02682           break;
02683         case MVT::v4f32:
02684           RC = Subtarget.hasQPX() ? &PPC::QSRCRegClass : &PPC::VRRCRegClass;
02685           break;
02686         case MVT::v2f64:
02687         case MVT::v2i64:
02688           RC = &PPC::VSHRCRegClass;
02689           break;
02690         case MVT::v4f64:
02691           RC = &PPC::QFRCRegClass;
02692           break;
02693         case MVT::v4i1:
02694           RC = &PPC::QBRCRegClass;
02695           break;
02696       }
02697 
02698       // Transform the arguments stored in physical registers into virtual ones.
02699       unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
02700       SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, Reg,
02701                                             ValVT == MVT::i1 ? MVT::i32 : ValVT);
02702 
02703       if (ValVT == MVT::i1)
02704         ArgValue = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, ArgValue);
02705 
02706       InVals.push_back(ArgValue);
02707     } else {
02708       // Argument stored in memory.
02709       assert(VA.isMemLoc());
02710 
02711       unsigned ArgSize = VA.getLocVT().getStoreSize();
02712       int FI = MFI->CreateFixedObject(ArgSize, VA.getLocMemOffset(),
02713                                       isImmutable);
02714 
02715       // Create load nodes to retrieve arguments from the stack.
02716       SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
02717       InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN,
02718                                    MachinePointerInfo(),
02719                                    false, false, false, 0));
02720     }
02721   }
02722 
02723   // Assign locations to all of the incoming aggregate by value arguments.
02724   // Aggregates passed by value are stored in the local variable space of the
02725   // caller's stack frame, right above the parameter list area.
02726   SmallVector<CCValAssign, 16> ByValArgLocs;
02727   CCState CCByValInfo(CallConv, isVarArg, DAG.getMachineFunction(),
02728                       ByValArgLocs, *DAG.getContext());
02729 
02730   // Reserve stack space for the allocations in CCInfo.
02731   CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrByteSize);
02732 
02733   CCByValInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4_ByVal);
02734 
02735   // Area that is at least reserved in the caller of this function.
02736   unsigned MinReservedArea = CCByValInfo.getNextStackOffset();
02737   MinReservedArea = std::max(MinReservedArea, LinkageSize);
02738 
02739   // Set the size that is at least reserved in caller of this function.  Tail
02740   // call optimized function's reserved stack space needs to be aligned so that
02741   // taking the difference between two stack areas will result in an aligned
02742   // stack.
02743   MinReservedArea =
02744       EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea);
02745   FuncInfo->setMinReservedArea(MinReservedArea);
02746 
02747   SmallVector<SDValue, 8> MemOps;
02748 
02749   // If the function takes variable number of arguments, make a frame index for
02750   // the start of the first vararg value... for expansion of llvm.va_start.
02751   if (isVarArg) {
02752     static const MCPhysReg GPArgRegs[] = {
02753       PPC::R3, PPC::R4, PPC::R5, PPC::R6,
02754       PPC::R7, PPC::R8, PPC::R9, PPC::R10,
02755     };
02756     const unsigned NumGPArgRegs = array_lengthof(GPArgRegs);
02757 
02758     static const MCPhysReg FPArgRegs[] = {
02759       PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
02760       PPC::F8
02761     };
02762     unsigned NumFPArgRegs = array_lengthof(FPArgRegs);
02763     if (DisablePPCFloatInVariadic)
02764       NumFPArgRegs = 0;
02765 
02766     FuncInfo->setVarArgsNumGPR(CCInfo.getFirstUnallocated(GPArgRegs));
02767     FuncInfo->setVarArgsNumFPR(CCInfo.getFirstUnallocated(FPArgRegs));
02768 
02769     // Make room for NumGPArgRegs and NumFPArgRegs.
02770     int Depth = NumGPArgRegs * PtrVT.getSizeInBits()/8 +
02771                 NumFPArgRegs * MVT(MVT::f64).getSizeInBits()/8;
02772 
02773     FuncInfo->setVarArgsStackOffset(
02774       MFI->CreateFixedObject(PtrVT.getSizeInBits()/8,
02775                              CCInfo.getNextStackOffset(), true));
02776 
02777     FuncInfo->setVarArgsFrameIndex(MFI->CreateStackObject(Depth, 8, false));
02778     SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
02779 
02780     // The fixed integer arguments of a variadic function are stored to the
02781     // VarArgsFrameIndex on the stack so that they may be loaded by deferencing
02782     // the result of va_next.
02783     for (unsigned GPRIndex = 0; GPRIndex != NumGPArgRegs; ++GPRIndex) {
02784       // Get an existing live-in vreg, or add a new one.
02785       unsigned VReg = MF.getRegInfo().getLiveInVirtReg(GPArgRegs[GPRIndex]);
02786       if (!VReg)
02787         VReg = MF.addLiveIn(GPArgRegs[GPRIndex], &PPC::GPRCRegClass);
02788 
02789       SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
02790       SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
02791                                    MachinePointerInfo(), false, false, 0);
02792       MemOps.push_back(Store);
02793       // Increment the address by four for the next argument to store
02794       SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, PtrVT);
02795       FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
02796     }
02797 
02798     // FIXME 32-bit SVR4: We only need to save FP argument registers if CR bit 6
02799     // is set.
02800     // The double arguments are stored to the VarArgsFrameIndex
02801     // on the stack.
02802     for (unsigned FPRIndex = 0; FPRIndex != NumFPArgRegs; ++FPRIndex) {
02803       // Get an existing live-in vreg, or add a new one.
02804       unsigned VReg = MF.getRegInfo().getLiveInVirtReg(FPArgRegs[FPRIndex]);
02805       if (!VReg)
02806         VReg = MF.addLiveIn(FPArgRegs[FPRIndex], &PPC::F8RCRegClass);
02807 
02808       SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::f64);
02809       SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
02810                                    MachinePointerInfo(), false, false, 0);
02811       MemOps.push_back(Store);
02812       // Increment the address by eight for the next argument to store
02813       SDValue PtrOff = DAG.getConstant(MVT(MVT::f64).getSizeInBits()/8,
02814                                          PtrVT);
02815       FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
02816     }
02817   }
02818 
02819   if (!MemOps.empty())
02820     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
02821 
02822   return Chain;
02823 }
02824 
02825 // PPC64 passes i8, i16, and i32 values in i64 registers. Promote
02826 // value to MVT::i64 and then truncate to the correct register size.
02827 SDValue
02828 PPCTargetLowering::extendArgForPPC64(ISD::ArgFlagsTy Flags, EVT ObjectVT,
02829                                      SelectionDAG &DAG, SDValue ArgVal,
02830                                      SDLoc dl) const {
02831   if (Flags.isSExt())
02832     ArgVal = DAG.getNode(ISD::AssertSext, dl, MVT::i64, ArgVal,
02833                          DAG.getValueType(ObjectVT));
02834   else if (Flags.isZExt())
02835     ArgVal = DAG.getNode(ISD::AssertZext, dl, MVT::i64, ArgVal,
02836                          DAG.getValueType(ObjectVT));
02837 
02838   return DAG.getNode(ISD::TRUNCATE, dl, ObjectVT, ArgVal);
02839 }
02840 
02841 SDValue
02842 PPCTargetLowering::LowerFormalArguments_64SVR4(
02843                                       SDValue Chain,
02844                                       CallingConv::ID CallConv, bool isVarArg,
02845                                       const SmallVectorImpl<ISD::InputArg>
02846                                         &Ins,
02847                                       SDLoc dl, SelectionDAG &DAG,
02848                                       SmallVectorImpl<SDValue> &InVals) const {
02849   // TODO: add description of PPC stack frame format, or at least some docs.
02850   //
02851   bool isELFv2ABI = Subtarget.isELFv2ABI();
02852   bool isLittleEndian = Subtarget.isLittleEndian();
02853   MachineFunction &MF = DAG.getMachineFunction();
02854   MachineFrameInfo *MFI = MF.getFrameInfo();
02855   PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
02856 
02857   assert(!(CallConv == CallingConv::Fast && isVarArg) &&
02858          "fastcc not supported on varargs functions");
02859 
02860   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
02861   // Potential tail calls could cause overwriting of argument stack slots.
02862   bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
02863                        (CallConv == CallingConv::Fast));
02864   unsigned PtrByteSize = 8;
02865   unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
02866 
02867   static const MCPhysReg GPR[] = {
02868     PPC::X3, PPC::X4, PPC::X5, PPC::X6,
02869     PPC::X7, PPC::X8, PPC::X9, PPC::X10,
02870   };
02871   static const MCPhysReg VR[] = {
02872     PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
02873     PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
02874   };
02875   static const MCPhysReg VSRH[] = {
02876     PPC::VSH2, PPC::VSH3, PPC::VSH4, PPC::VSH5, PPC::VSH6, PPC::VSH7, PPC::VSH8,
02877     PPC::VSH9, PPC::VSH10, PPC::VSH11, PPC::VSH12, PPC::VSH13
02878   };
02879 
02880   const unsigned Num_GPR_Regs = array_lengthof(GPR);
02881   const unsigned Num_FPR_Regs = 13;
02882   const unsigned Num_VR_Regs  = array_lengthof(VR);
02883   const unsigned Num_QFPR_Regs = Num_FPR_Regs;
02884 
02885   // Do a first pass over the arguments to determine whether the ABI
02886   // guarantees that our caller has allocated the parameter save area
02887   // on its stack frame.  In the ELFv1 ABI, this is always the case;
02888   // in the ELFv2 ABI, it is true if this is a vararg function or if
02889   // any parameter is located in a stack slot.
02890 
02891   bool HasParameterArea = !isELFv2ABI || isVarArg;
02892   unsigned ParamAreaSize = Num_GPR_Regs * PtrByteSize;
02893   unsigned NumBytes = LinkageSize;
02894   unsigned AvailableFPRs = Num_FPR_Regs;
02895   unsigned AvailableVRs = Num_VR_Regs;
02896   for (unsigned i = 0, e = Ins.size(); i != e; ++i)
02897     if (CalculateStackSlotUsed(Ins[i].VT, Ins[i].ArgVT, Ins[i].Flags,
02898                                PtrByteSize, LinkageSize, ParamAreaSize,
02899                                NumBytes, AvailableFPRs, AvailableVRs,
02900                                Subtarget.hasQPX()))
02901       HasParameterArea = true;
02902 
02903   // Add DAG nodes to load the arguments or copy them out of registers.  On
02904   // entry to a function on PPC, the arguments start after the linkage area,
02905   // although the first ones are often in registers.
02906 
02907   unsigned ArgOffset = LinkageSize;
02908   unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
02909   unsigned &QFPR_idx = FPR_idx;
02910   SmallVector<SDValue, 8> MemOps;
02911   Function::const_arg_iterator FuncArg = MF.getFunction()->arg_begin();
02912   unsigned CurArgIdx = 0;
02913   for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) {
02914     SDValue ArgVal;
02915     bool needsLoad = false;
02916     EVT ObjectVT = Ins[ArgNo].VT;
02917     EVT OrigVT = Ins[ArgNo].ArgVT;
02918     unsigned ObjSize = ObjectVT.getStoreSize();
02919     unsigned ArgSize = ObjSize;
02920     ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;
02921     if (Ins[ArgNo].isOrigArg()) {
02922       std::advance(FuncArg, Ins[ArgNo].getOrigArgIndex() - CurArgIdx);
02923       CurArgIdx = Ins[ArgNo].getOrigArgIndex();
02924     }
02925     // We re-align the argument offset for each argument, except when using the
02926     // fast calling convention, when we need to make sure we do that only when
02927     // we'll actually use a stack slot.
02928     unsigned CurArgOffset, Align;
02929     auto ComputeArgOffset = [&]() {
02930       /* Respect alignment of argument on the stack.  */
02931       Align = CalculateStackSlotAlignment(ObjectVT, OrigVT, Flags, PtrByteSize);
02932       ArgOffset = ((ArgOffset + Align - 1) / Align) * Align;
02933       CurArgOffset = ArgOffset;
02934     };
02935 
02936     if (CallConv != CallingConv::Fast) {
02937       ComputeArgOffset();
02938 
02939       /* Compute GPR index associated with argument offset.  */
02940       GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
02941       GPR_idx = std::min(GPR_idx, Num_GPR_Regs);
02942     }
02943 
02944     // FIXME the codegen can be much improved in some cases.
02945     // We do not have to keep everything in memory.
02946     if (Flags.isByVal()) {
02947       assert(Ins[ArgNo].isOrigArg() && "Byval arguments cannot be implicit");
02948 
02949       if (CallConv == CallingConv::Fast)
02950         ComputeArgOffset();
02951 
02952       // ObjSize is the true size, ArgSize rounded up to multiple of registers.
02953       ObjSize = Flags.getByValSize();
02954       ArgSize = ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
02955       // Empty aggregate parameters do not take up registers.  Examples:
02956       //   struct { } a;
02957       //   union  { } b;
02958       //   int c[0];
02959       // etc.  However, we have to provide a place-holder in InVals, so
02960       // pretend we have an 8-byte item at the current address for that
02961       // purpose.
02962       if (!ObjSize) {
02963         int FI = MFI->CreateFixedObject(PtrByteSize, ArgOffset, true);
02964         SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
02965         InVals.push_back(FIN);
02966         continue;
02967       }
02968 
02969       // Create a stack object covering all stack doublewords occupied
02970       // by the argument.  If the argument is (fully or partially) on
02971       // the stack, or if the argument is fully in registers but the
02972       // caller has allocated the parameter save anyway, we can refer
02973       // directly to the caller's stack frame.  Otherwise, create a
02974       // local copy in our own frame.
02975       int FI;
02976       if (HasParameterArea ||
02977           ArgSize + ArgOffset > LinkageSize + Num_GPR_Regs * PtrByteSize)
02978         FI = MFI->CreateFixedObject(ArgSize, ArgOffset, false, true);
02979       else
02980         FI = MFI->CreateStackObject(ArgSize, Align, false);
02981       SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
02982 
02983       // Handle aggregates smaller than 8 bytes.
02984       if (ObjSize < PtrByteSize) {
02985         // The value of the object is its address, which differs from the
02986         // address of the enclosing doubleword on big-endian systems.
02987         SDValue Arg = FIN;
02988         if (!isLittleEndian) {
02989           SDValue ArgOff = DAG.getConstant(PtrByteSize - ObjSize, PtrVT);
02990           Arg = DAG.getNode(ISD::ADD, dl, ArgOff.getValueType(), Arg, ArgOff);
02991         }
02992         InVals.push_back(Arg);
02993 
02994         if (GPR_idx != Num_GPR_Regs) {
02995           unsigned VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
02996           SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
02997           SDValue Store;
02998 
02999           if (ObjSize==1 || ObjSize==2 || ObjSize==4) {
03000             EVT ObjType = (ObjSize == 1 ? MVT::i8 :
03001                            (ObjSize == 2 ? MVT::i16 : MVT::i32));
03002             Store = DAG.getTruncStore(Val.getValue(1), dl, Val, Arg,
03003                                       MachinePointerInfo(FuncArg),
03004                                       ObjType, false, false, 0);
03005           } else {
03006             // For sizes that don't fit a truncating store (3, 5, 6, 7),
03007             // store the whole register as-is to the parameter save area
03008             // slot.
03009             Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
03010                                  MachinePointerInfo(FuncArg),
03011                                  false, false, 0);
03012           }
03013 
03014           MemOps.push_back(Store);
03015         }
03016         // Whether we copied from a register or not, advance the offset
03017         // into the parameter save area by a full doubleword.
03018         ArgOffset += PtrByteSize;
03019         continue;
03020       }
03021 
03022       // The value of the object is its address, which is the address of
03023       // its first stack doubleword.
03024       InVals.push_back(FIN);
03025 
03026       // Store whatever pieces of the object are in registers to memory.
03027       for (unsigned j = 0; j < ArgSize; j += PtrByteSize) {
03028         if (GPR_idx == Num_GPR_Regs)
03029           break;
03030 
03031         unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
03032         SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
03033         SDValue Addr = FIN;
03034         if (j) {
03035           SDValue Off = DAG.getConstant(j, PtrVT);
03036           Addr = DAG.getNode(ISD::ADD, dl, Off.getValueType(), Addr, Off);
03037         }
03038         SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, Addr,
03039                                      MachinePointerInfo(FuncArg, j),
03040                                      false, false, 0);
03041         MemOps.push_back(Store);
03042         ++GPR_idx;
03043       }
03044       ArgOffset += ArgSize;
03045       continue;
03046     }
03047 
03048     switch (ObjectVT.getSimpleVT().SimpleTy) {
03049     default: llvm_unreachable("Unhandled argument type!");
03050     case MVT::i1:
03051     case MVT::i32:
03052     case MVT::i64:
03053       // These can be scalar arguments or elements of an integer array type
03054       // passed directly.  Clang may use those instead of "byval" aggregate
03055       // types to avoid forcing arguments to memory unnecessarily.
03056       if (GPR_idx != Num_GPR_Regs) {
03057         unsigned VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
03058         ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
03059 
03060         if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1)
03061           // PPC64 passes i8, i16, and i32 values in i64 registers. Promote
03062           // value to MVT::i64 and then truncate to the correct register size.
03063           ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
03064       } else {
03065         if (CallConv == CallingConv::Fast)
03066           ComputeArgOffset();
03067 
03068         needsLoad = true;
03069         ArgSize = PtrByteSize;
03070       }
03071       if (CallConv != CallingConv::Fast || needsLoad)
03072         ArgOffset += 8;
03073       break;
03074 
03075     case MVT::f32:
03076     case MVT::f64:
03077       // These can be scalar arguments or elements of a float array type
03078       // passed directly.  The latter are used to implement ELFv2 homogenous
03079       // float aggregates.
03080       if (FPR_idx != Num_FPR_Regs) {
03081         unsigned VReg;
03082 
03083         if (ObjectVT == MVT::f32)
03084           VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F4RCRegClass);
03085         else
03086           VReg = MF.addLiveIn(FPR[FPR_idx], Subtarget.hasVSX()
03087                                                 ? &PPC::VSFRCRegClass
03088                                                 : &PPC::F8RCRegClass);
03089 
03090         ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
03091         ++FPR_idx;
03092       } else if (GPR_idx != Num_GPR_Regs && CallConv != CallingConv::Fast) {
03093         // FIXME: We may want to re-enable this for CallingConv::Fast on the P8
03094         // once we support fp <-> gpr moves.
03095 
03096         // This can only ever happen in the presence of f32 array types,
03097         // since otherwise we never run out of FPRs before running out
03098         // of GPRs.
03099         unsigned VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
03100         ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
03101 
03102         if (ObjectVT == MVT::f32) {
03103           if ((ArgOffset % PtrByteSize) == (isLittleEndian ? 4 : 0))
03104             ArgVal = DAG.getNode(ISD::SRL, dl, MVT::i64, ArgVal,
03105                                  DAG.getConstant(32, MVT::i32));
03106           ArgVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, ArgVal);
03107         }
03108 
03109         ArgVal = DAG.getNode(ISD::BITCAST, dl, ObjectVT, ArgVal);
03110       } else {
03111         if (CallConv == CallingConv::Fast)
03112           ComputeArgOffset();
03113 
03114         needsLoad = true;
03115       }
03116 
03117       // When passing an array of floats, the array occupies consecutive
03118       // space in the argument area; only round up to the next doubleword
03119       // at the end of the array.  Otherwise, each float takes 8 bytes.
03120       if (CallConv != CallingConv::Fast || needsLoad) {
03121         ArgSize = Flags.isInConsecutiveRegs() ? ObjSize : PtrByteSize;
03122         ArgOffset += ArgSize;
03123         if (Flags.isInConsecutiveRegsLast())
03124           ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
03125       }
03126       break;
03127     case MVT::v4f32:
03128     case MVT::v4i32:
03129     case MVT::v8i16:
03130     case MVT::v16i8:
03131     case MVT::v2f64:
03132     case MVT::v2i64:
03133       if (!Subtarget.hasQPX()) {
03134       // These can be scalar arguments or elements of a vector array type
03135       // passed directly.  The latter are used to implement ELFv2 homogenous
03136       // vector aggregates.
03137       if (VR_idx != Num_VR_Regs) {
03138         unsigned VReg = (ObjectVT == MVT::v2f64 || ObjectVT == MVT::v2i64) ?
03139                         MF.addLiveIn(VSRH[VR_idx], &PPC::VSHRCRegClass) :
03140                         MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass);
03141         ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
03142         ++VR_idx;
03143       } else {
03144         if (CallConv == CallingConv::Fast)
03145           ComputeArgOffset();
03146 
03147         needsLoad = true;
03148       }
03149       if (CallConv != CallingConv::Fast || needsLoad)
03150         ArgOffset += 16;
03151       break;
03152       } // not QPX
03153 
03154       assert(ObjectVT.getSimpleVT().SimpleTy == MVT::v4f32 &&
03155              "Invalid QPX parameter type");
03156       /* fall through */
03157 
03158     case MVT::v4f64:
03159     case MVT::v4i1:
03160       // QPX vectors are treated like their scalar floating-point subregisters
03161       // (except that they're larger).
03162       unsigned Sz = ObjectVT.getSimpleVT().SimpleTy == MVT::v4f32 ? 16 : 32;
03163       if (QFPR_idx != Num_QFPR_Regs) {
03164         const TargetRegisterClass *RC;
03165         switch (ObjectVT.getSimpleVT().SimpleTy) {
03166         case MVT::v4f64: RC = &PPC::QFRCRegClass; break;
03167         case MVT::v4f32: RC = &PPC::QSRCRegClass; break;
03168         default:         RC = &PPC::QBRCRegClass; break;
03169         }
03170 
03171         unsigned VReg = MF.addLiveIn(QFPR[QFPR_idx], RC);
03172         ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
03173         ++QFPR_idx;
03174       } else {
03175         if (CallConv == CallingConv::Fast)
03176           ComputeArgOffset();
03177         needsLoad = true;
03178       }
03179       if (CallConv != CallingConv::Fast || needsLoad)
03180         ArgOffset += Sz;
03181       break;
03182     }
03183 
03184     // We need to load the argument to a virtual register if we determined
03185     // above that we ran out of physical registers of the appropriate type.
03186     if (needsLoad) {
03187       if (ObjSize < ArgSize && !isLittleEndian)
03188         CurArgOffset += ArgSize - ObjSize;
03189       int FI = MFI->CreateFixedObject(ObjSize, CurArgOffset, isImmutable);
03190       SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
03191       ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo(),
03192                            false, false, false, 0);
03193     }
03194 
03195     InVals.push_back(ArgVal);
03196   }
03197 
03198   // Area that is at least reserved in the caller of this function.
03199   unsigned MinReservedArea;
03200   if (HasParameterArea)
03201     MinReservedArea = std::max(ArgOffset, LinkageSize + 8 * PtrByteSize);
03202   else
03203     MinReservedArea = LinkageSize;
03204 
03205   // Set the size that is at least reserved in caller of this function.  Tail
03206   // call optimized functions' reserved stack space needs to be aligned so that
03207   // taking the difference between two stack areas will result in an aligned
03208   // stack.
03209   MinReservedArea =
03210       EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea);
03211   FuncInfo->setMinReservedArea(MinReservedArea);
03212 
03213   // If the function takes variable number of arguments, make a frame index for
03214   // the start of the first vararg value... for expansion of llvm.va_start.
03215   if (isVarArg) {
03216     int Depth = ArgOffset;
03217 
03218     FuncInfo->setVarArgsFrameIndex(
03219       MFI->CreateFixedObject(PtrByteSize, Depth, true));
03220     SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
03221 
03222     // If this function is vararg, store any remaining integer argument regs
03223     // to their spots on the stack so that they may be loaded by deferencing the
03224     // result of va_next.
03225     for (GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
03226          GPR_idx < Num_GPR_Regs; ++GPR_idx) {
03227       unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
03228       SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
03229       SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
03230                                    MachinePointerInfo(), false, false, 0);
03231       MemOps.push_back(Store);
03232       // Increment the address by four for the next argument to store
03233       SDValue PtrOff = DAG.getConstant(PtrByteSize, PtrVT);
03234       FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
03235     }
03236   }
03237 
03238   if (!MemOps.empty())
03239     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
03240 
03241   return Chain;
03242 }
03243 
03244 SDValue
03245 PPCTargetLowering::LowerFormalArguments_Darwin(
03246                                       SDValue Chain,
03247                                       CallingConv::ID CallConv, bool isVarArg,
03248                                       const SmallVectorImpl<ISD::InputArg>
03249                                         &Ins,
03250                                       SDLoc dl, SelectionDAG &DAG,
03251                                       SmallVectorImpl<SDValue> &InVals) const {
03252   // TODO: add description of PPC stack frame format, or at least some docs.
03253   //
03254   MachineFunction &MF = DAG.getMachineFunction();
03255   MachineFrameInfo *MFI = MF.getFrameInfo();
03256   PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
03257 
03258   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
03259   bool isPPC64 = PtrVT == MVT::i64;
03260   // Potential tail calls could cause overwriting of argument stack slots.
03261   bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
03262                        (CallConv == CallingConv::Fast));
03263   unsigned PtrByteSize = isPPC64 ? 8 : 4;
03264   unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
03265   unsigned ArgOffset = LinkageSize;
03266   // Area that is at least reserved in caller of this function.
03267   unsigned MinReservedArea = ArgOffset;
03268 
03269   static const MCPhysReg GPR_32[] = {           // 32-bit registers.
03270     PPC::R3, PPC::R4, PPC::R5, PPC::R6,
03271     PPC::R7, PPC::R8, PPC::R9, PPC::R10,
03272   };
03273   static const MCPhysReg GPR_64[] = {           // 64-bit registers.
03274     PPC::X3, PPC::X4, PPC::X5, PPC::X6,
03275     PPC::X7, PPC::X8, PPC::X9, PPC::X10,
03276   };
03277   static const MCPhysReg VR[] = {
03278     PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
03279     PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
03280   };
03281 
03282   const unsigned Num_GPR_Regs = array_lengthof(GPR_32);
03283   const unsigned Num_FPR_Regs = 13;
03284   const unsigned Num_VR_Regs  = array_lengthof( VR);
03285 
03286   unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
03287 
03288   const MCPhysReg *GPR = isPPC64 ? GPR_64 : GPR_32;
03289 
03290   // In 32-bit non-varargs functions, the stack space for vectors is after the
03291   // stack space for non-vectors.  We do not use this space unless we have
03292   // too many vectors to fit in registers, something that only occurs in
03293   // constructed examples:), but we have to walk the arglist to figure
03294   // that out...for the pathological case, compute VecArgOffset as the
03295   // start of the vector parameter area.  Computing VecArgOffset is the
03296   // entire point of the following loop.
03297   unsigned VecArgOffset = ArgOffset;
03298   if (!isVarArg && !isPPC64) {
03299     for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e;
03300          ++ArgNo) {
03301       EVT ObjectVT = Ins[ArgNo].VT;
03302       ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;
03303 
03304       if (Flags.isByVal()) {
03305         // ObjSize is the true size, ArgSize rounded up to multiple of regs.
03306         unsigned ObjSize = Flags.getByValSize();
03307         unsigned ArgSize =
03308                 ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
03309         VecArgOffset += ArgSize;
03310         continue;
03311       }
03312 
03313       switch(ObjectVT.getSimpleVT().SimpleTy) {
03314       default: llvm_unreachable("Unhandled argument type!");
03315       case MVT::i1:
03316       case MVT::i32:
03317       case MVT::f32:
03318         VecArgOffset += 4;
03319         break;
03320       case MVT::i64:  // PPC64
03321       case MVT::f64:
03322         // FIXME: We are guaranteed to be !isPPC64 at this point.
03323         // Does MVT::i64 apply?
03324         VecArgOffset += 8;
03325         break;
03326       case MVT::v4f32:
03327       case MVT::v4i32:
03328       case MVT::v8i16:
03329       case MVT::v16i8:
03330         // Nothing to do, we're only looking at Nonvector args here.
03331         break;
03332       }
03333     }
03334   }
03335   // We've found where the vector parameter area in memory is.  Skip the
03336   // first 12 parameters; these don't use that memory.
03337   VecArgOffset = ((VecArgOffset+15)/16)*16;
03338   VecArgOffset += 12*16;
03339 
03340   // Add DAG nodes to load the arguments or copy them out of registers.  On
03341   // entry to a function on PPC, the arguments start after the linkage area,
03342   // although the first ones are often in registers.
03343 
03344   SmallVector<SDValue, 8> MemOps;
03345   unsigned nAltivecParamsAtEnd = 0;
03346   Function::const_arg_iterator FuncArg = MF.getFunction()->arg_begin();
03347   unsigned CurArgIdx = 0;
03348   for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) {
03349     SDValue ArgVal;
03350     bool needsLoad = false;
03351     EVT ObjectVT = Ins[ArgNo].VT;
03352     unsigned ObjSize = ObjectVT.getSizeInBits()/8;
03353     unsigned ArgSize = ObjSize;
03354     ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;
03355     if (Ins[ArgNo].isOrigArg()) {
03356       std::advance(FuncArg, Ins[ArgNo].getOrigArgIndex() - CurArgIdx);
03357       CurArgIdx = Ins[ArgNo].getOrigArgIndex();
03358     }
03359     unsigned CurArgOffset = ArgOffset;
03360 
03361     // Varargs or 64 bit Altivec parameters are padded to a 16 byte boundary.
03362     if (ObjectVT==MVT::v4f32 || ObjectVT==MVT::v4i32 ||
03363         ObjectVT==MVT::v8i16 || ObjectVT==MVT::v16i8) {
03364       if (isVarArg || isPPC64) {
03365         MinReservedArea = ((MinReservedArea+15)/16)*16;
03366         MinReservedArea += CalculateStackSlotSize(ObjectVT,
03367                                                   Flags,
03368                                                   PtrByteSize);
03369       } else  nAltivecParamsAtEnd++;
03370     } else
03371       // Calculate min reserved area.
03372       MinReservedArea += CalculateStackSlotSize(Ins[ArgNo].VT,
03373                                                 Flags,
03374                                                 PtrByteSize);
03375 
03376     // FIXME the codegen can be much improved in some cases.
03377     // We do not have to keep everything in memory.
03378     if (Flags.isByVal()) {
03379       assert(Ins[ArgNo].isOrigArg() && "Byval arguments cannot be implicit");
03380 
03381       // ObjSize is the true size, ArgSize rounded up to multiple of registers.
03382       ObjSize = Flags.getByValSize();
03383       ArgSize = ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
03384       // Objects of size 1 and 2 are right justified, everything else is
03385       // left justified.  This means the memory address is adjusted forwards.
03386       if (ObjSize==1 || ObjSize==2) {
03387         CurArgOffset = CurArgOffset + (4 - ObjSize);
03388       }
03389       // The value of the object is its address.
03390       int FI = MFI->CreateFixedObject(ObjSize, CurArgOffset, false, true);
03391       SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
03392       InVals.push_back(FIN);
03393       if (ObjSize==1 || ObjSize==2) {
03394         if (GPR_idx != Num_GPR_Regs) {
03395           unsigned VReg;
03396           if (isPPC64)
03397             VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
03398           else
03399             VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
03400           SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
03401           EVT ObjType = ObjSize == 1 ? MVT::i8 : MVT::i16;
03402           SDValue Store = DAG.getTruncStore(Val.getValue(1), dl, Val, FIN,
03403                                             MachinePointerInfo(FuncArg),
03404                                             ObjType, false, false, 0);
03405           MemOps.push_back(Store);
03406           ++GPR_idx;
03407         }
03408 
03409         ArgOffset += PtrByteSize;
03410 
03411         continue;
03412       }
03413       for (unsigned j = 0; j < ArgSize; j += PtrByteSize) {
03414         // Store whatever pieces of the object are in registers
03415         // to memory.  ArgOffset will be the address of the beginning
03416         // of the object.
03417         if (GPR_idx != Num_GPR_Regs) {
03418           unsigned VReg;
03419           if (isPPC64)
03420             VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
03421           else
03422             VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
03423           int FI = MFI->CreateFixedObject(PtrByteSize, ArgOffset, true);
03424           SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
03425           SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
03426           SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
03427                                        MachinePointerInfo(FuncArg, j),
03428                                        false, false, 0);
03429           MemOps.push_back(Store);
03430           ++GPR_idx;
03431           ArgOffset += PtrByteSize;
03432         } else {
03433           ArgOffset += ArgSize - (ArgOffset-CurArgOffset);
03434           break;
03435         }
03436       }
03437       continue;
03438     }
03439 
03440     switch (ObjectVT.getSimpleVT().SimpleTy) {
03441     default: llvm_unreachable("Unhandled argument type!");
03442     case MVT::i1:
03443     case MVT::i32:
03444       if (!isPPC64) {
03445         if (GPR_idx != Num_GPR_Regs) {
03446           unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
03447           ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32);
03448 
03449           if (ObjectVT == MVT::i1)
03450             ArgVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, ArgVal);
03451 
03452           ++GPR_idx;
03453         } else {
03454           needsLoad = true;
03455           ArgSize = PtrByteSize;
03456         }
03457         // All int arguments reserve stack space in the Darwin ABI.
03458         ArgOffset += PtrByteSize;
03459         break;
03460       }
03461       // FALLTHROUGH
03462     case MVT::i64:  // PPC64
03463       if (GPR_idx != Num_GPR_Regs) {
03464         unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
03465         ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
03466 
03467         if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1)
03468           // PPC64 passes i8, i16, and i32 values in i64 registers. Promote
03469           // value to MVT::i64 and then truncate to the correct register size.
03470           ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
03471 
03472         ++GPR_idx;
03473       } else {
03474         needsLoad = true;
03475         ArgSize = PtrByteSize;
03476       }
03477       // All int arguments reserve stack space in the Darwin ABI.
03478       ArgOffset += 8;
03479       break;
03480 
03481     case MVT::f32:
03482     case MVT::f64:
03483       // Every 4 bytes of argument space consumes one of the GPRs available for
03484       // argument passing.
03485       if (GPR_idx != Num_GPR_Regs) {
03486         ++GPR_idx;
03487         if (ObjSize == 8 && GPR_idx != Num_GPR_Regs && !isPPC64)
03488           ++GPR_idx;
03489       }
03490       if (FPR_idx != Num_FPR_Regs) {
03491         unsigned VReg;
03492 
03493         if (ObjectVT == MVT::f32)
03494           VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F4RCRegClass);
03495         else
03496           VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F8RCRegClass);
03497 
03498         ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
03499         ++FPR_idx;
03500       } else {
03501         needsLoad = true;
03502       }
03503 
03504       // All FP arguments reserve stack space in the Darwin ABI.
03505       ArgOffset += isPPC64 ? 8 : ObjSize;
03506       break;
03507     case MVT::v4f32:
03508     case MVT::v4i32:
03509     case MVT::v8i16:
03510     case MVT::v16i8:
03511       // Note that vector arguments in registers don't reserve stack space,
03512       // except in varargs functions.
03513       if (VR_idx != Num_VR_Regs) {
03514         unsigned VReg = MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass);
03515         ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
03516         if (isVarArg) {
03517           while ((ArgOffset % 16) != 0) {
03518             ArgOffset += PtrByteSize;
03519             if (GPR_idx != Num_GPR_Regs)
03520               GPR_idx++;
03521           }
03522           ArgOffset += 16;
03523           GPR_idx = std::min(GPR_idx+4, Num_GPR_Regs); // FIXME correct for ppc64?
03524         }
03525         ++VR_idx;
03526       } else {
03527         if (!isVarArg && !isPPC64) {
03528           // Vectors go after all the nonvectors.
03529           CurArgOffset = VecArgOffset;
03530           VecArgOffset += 16;
03531         } else {
03532           // Vectors are aligned.
03533           ArgOffset = ((ArgOffset+15)/16)*16;
03534           CurArgOffset = ArgOffset;
03535           ArgOffset += 16;
03536         }
03537         needsLoad = true;
03538       }
03539       break;
03540     }
03541 
03542     // We need to load the argument to a virtual register if we determined above
03543     // that we ran out of physical registers of the appropriate type.
03544     if (needsLoad) {
03545       int FI = MFI->CreateFixedObject(ObjSize,
03546                                       CurArgOffset + (ArgSize - ObjSize),
03547                                       isImmutable);
03548       SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
03549       ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo(),
03550                            false, false, false, 0);
03551     }
03552 
03553     InVals.push_back(ArgVal);
03554   }
03555 
03556   // Allow for Altivec parameters at the end, if needed.
03557   if (nAltivecParamsAtEnd) {
03558     MinReservedArea = ((MinReservedArea+15)/16)*16;
03559     MinReservedArea += 16*nAltivecParamsAtEnd;
03560   }
03561 
03562   // Area that is at least reserved in the caller of this function.
03563   MinReservedArea = std::max(MinReservedArea, LinkageSize + 8 * PtrByteSize);
03564 
03565   // Set the size that is at least reserved in caller of this function.  Tail
03566   // call optimized functions' reserved stack space needs to be aligned so that
03567   // taking the difference between two stack areas will result in an aligned
03568   // stack.
03569   MinReservedArea =
03570       EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea);
03571   FuncInfo->setMinReservedArea(MinReservedArea);
03572 
03573   // If the function takes variable number of arguments, make a frame index for
03574   // the start of the first vararg value... for expansion of llvm.va_start.
03575   if (isVarArg) {
03576     int Depth = ArgOffset;
03577 
03578     FuncInfo->setVarArgsFrameIndex(
03579       MFI->CreateFixedObject(PtrVT.getSizeInBits()/8,
03580                              Depth, true));
03581     SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
03582 
03583     // If this function is vararg, store any remaining integer argument regs
03584     // to their spots on the stack so that they may be loaded by deferencing the
03585     // result of va_next.
03586     for (; GPR_idx != Num_GPR_Regs; ++GPR_idx) {
03587       unsigned VReg;
03588 
03589       if (isPPC64)
03590         VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
03591       else
03592         VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
03593 
03594       SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
03595       SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
03596                                    MachinePointerInfo(), false, false, 0);
03597       MemOps.push_back(Store);
03598       // Increment the address by four for the next argument to store
03599       SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, PtrVT);
03600       FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
03601     }
03602   }
03603 
03604   if (!MemOps.empty())
03605     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
03606 
03607   return Chain;
03608 }
03609 
03610 /// CalculateTailCallSPDiff - Get the amount the stack pointer has to be
03611 /// adjusted to accommodate the arguments for the tailcall.
03612 static int CalculateTailCallSPDiff(SelectionDAG& DAG, bool isTailCall,
03613                                    unsigned ParamSize) {
03614 
03615   if (!isTailCall) return 0;
03616 
03617   PPCFunctionInfo *FI = DAG.getMachineFunction().getInfo<PPCFunctionInfo>();
03618   unsigned CallerMinReservedArea = FI->getMinReservedArea();
03619   int SPDiff = (int)CallerMinReservedArea - (int)ParamSize;
03620   // Remember only if the new adjustement is bigger.
03621   if (SPDiff < FI->getTailCallSPDelta())
03622     FI->setTailCallSPDelta(SPDiff);
03623 
03624   return SPDiff;
03625 }
03626 
03627 /// IsEligibleForTailCallOptimization - Check whether the call is eligible
03628 /// for tail call optimization. Targets which want to do tail call
03629 /// optimization should implement this function.
03630 bool
03631 PPCTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
03632                                                      CallingConv::ID CalleeCC,
03633                                                      bool isVarArg,
03634                                       const SmallVectorImpl<ISD::InputArg> &Ins,
03635                                                      SelectionDAG& DAG) const {
03636   if (!getTargetMachine().Options.GuaranteedTailCallOpt)
03637     return false;
03638 
03639   // Variable argument functions are not supported.
03640   if (isVarArg)
03641     return false;
03642 
03643   MachineFunction &MF = DAG.getMachineFunction();
03644   CallingConv::ID CallerCC = MF.getFunction()->getCallingConv();
03645   if (CalleeCC == CallingConv::Fast && CallerCC == CalleeCC) {
03646     // Functions containing by val parameters are not supported.
03647     for (unsigned i = 0; i != Ins.size(); i++) {
03648        ISD::ArgFlagsTy Flags = Ins[i].Flags;
03649        if (Flags.isByVal()) return false;
03650     }
03651 
03652     // Non-PIC/GOT tail calls are supported.
03653     if (getTargetMachine().getRelocationModel() != Reloc::PIC_)
03654       return true;
03655 
03656     // At the moment we can only do local tail calls (in same module, hidden
03657     // or protected) if we are generating PIC.
03658     if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
03659       return G->getGlobal()->hasHiddenVisibility()
03660           || G->getGlobal()->hasProtectedVisibility();
03661   }
03662 
03663   return false;
03664 }
03665 
03666 /// isCallCompatibleAddress - Return the immediate to use if the specified
03667 /// 32-bit value is representable in the immediate field of a BxA instruction.
03668 static SDNode *isBLACompatibleAddress(SDValue Op, SelectionDAG &DAG) {
03669   ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
03670   if (!C) return nullptr;
03671 
03672   int Addr = C->getZExtValue();
03673   if ((Addr & 3) != 0 ||  // Low 2 bits are implicitly zero.
03674       SignExtend32<26>(Addr) != Addr)
03675     return nullptr;  // Top 6 bits have to be sext of immediate.
03676 
03677   return DAG.getConstant((int)C->getZExtValue() >> 2,
03678                          DAG.getTargetLoweringInfo().getPointerTy()).getNode();
03679 }
03680 
03681 namespace {
03682 
03683 struct TailCallArgumentInfo {
03684   SDValue Arg;
03685   SDValue FrameIdxOp;
03686   int       FrameIdx;
03687 
03688   TailCallArgumentInfo() : FrameIdx(0) {}
03689 };
03690 
03691 }
03692 
03693 /// StoreTailCallArgumentsToStackSlot - Stores arguments to their stack slot.
03694 static void
03695 StoreTailCallArgumentsToStackSlot(SelectionDAG &DAG,
03696                                            SDValue Chain,
03697                    const SmallVectorImpl<TailCallArgumentInfo> &TailCallArgs,
03698                    SmallVectorImpl<SDValue> &MemOpChains,
03699                    SDLoc dl) {
03700   for (unsigned i = 0, e = TailCallArgs.size(); i != e; ++i) {
03701     SDValue Arg = TailCallArgs[i].Arg;
03702     SDValue FIN = TailCallArgs[i].FrameIdxOp;
03703     int FI = TailCallArgs[i].FrameIdx;
03704     // Store relative to framepointer.
03705     MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, FIN,
03706                                        MachinePointerInfo::getFixedStack(FI),
03707                                        false, false, 0));
03708   }
03709 }
03710 
03711 /// EmitTailCallStoreFPAndRetAddr - Move the frame pointer and return address to
03712 /// the appropriate stack slot for the tail call optimized function call.
03713 static SDValue EmitTailCallStoreFPAndRetAddr(SelectionDAG &DAG,
03714                                                MachineFunction &MF,
03715                                                SDValue Chain,
03716                                                SDValue OldRetAddr,
03717                                                SDValue OldFP,
03718                                                int SPDiff,
03719                                                bool isPPC64,
03720                                                bool isDarwinABI,
03721                                                SDLoc dl) {
03722   if (SPDiff) {
03723     // Calculate the new stack slot for the return address.
03724     int SlotSize = isPPC64 ? 8 : 4;
03725     const PPCFrameLowering *FL =
03726         MF.getSubtarget<PPCSubtarget>().getFrameLowering();
03727     int NewRetAddrLoc = SPDiff + FL->getReturnSaveOffset();
03728     int NewRetAddr = MF.getFrameInfo()->CreateFixedObject(SlotSize,
03729                                                           NewRetAddrLoc, true);
03730     EVT VT = isPPC64 ? MVT::i64 : MVT::i32;
03731     SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewRetAddr, VT);
03732     Chain = DAG.getStore(Chain, dl, OldRetAddr, NewRetAddrFrIdx,
03733                          MachinePointerInfo::getFixedStack(NewRetAddr),
03734                          false, false, 0);
03735 
03736     // When using the 32/64-bit SVR4 ABI there is no need to move the FP stack
03737     // slot as the FP is never overwritten.
03738     if (isDarwinABI) {
03739       int NewFPLoc = SPDiff + FL->getFramePointerSaveOffset();
03740       int NewFPIdx = MF.getFrameInfo()->CreateFixedObject(SlotSize, NewFPLoc,
03741                                                           true);
03742       SDValue NewFramePtrIdx = DAG.getFrameIndex(NewFPIdx, VT);
03743       Chain = DAG.getStore(Chain, dl, OldFP, NewFramePtrIdx,
03744                            MachinePointerInfo::getFixedStack(NewFPIdx),
03745                            false, false, 0);
03746     }
03747   }
03748   return Chain;
03749 }
03750 
03751 /// CalculateTailCallArgDest - Remember Argument for later processing. Calculate
03752 /// the position of the argument.
03753 static void
03754 CalculateTailCallArgDest(SelectionDAG &DAG, MachineFunction &MF, bool isPPC64,
03755                          SDValue Arg, int SPDiff, unsigned ArgOffset,
03756                      SmallVectorImpl<TailCallArgumentInfo>& TailCallArguments) {
03757   int Offset = ArgOffset + SPDiff;
03758   uint32_t OpSize = (Arg.getValueType().getSizeInBits()+7)/8;
03759   int FI = MF.getFrameInfo()->CreateFixedObject(OpSize, Offset, true);
03760   EVT VT = isPPC64 ? MVT::i64 : MVT::i32;
03761   SDValue FIN = DAG.getFrameIndex(FI, VT);
03762   TailCallArgumentInfo Info;
03763   Info.Arg = Arg;
03764   Info.FrameIdxOp = FIN;
03765   Info.FrameIdx = FI;
03766   TailCallArguments.push_back(Info);
03767 }
03768 
03769 /// EmitTCFPAndRetAddrLoad - Emit load from frame pointer and return address
03770 /// stack slot. Returns the chain as result and the loaded frame pointers in
03771 /// LROpOut/FPOpout. Used when tail calling.
03772 SDValue PPCTargetLowering::EmitTailCallLoadFPAndRetAddr(SelectionDAG & DAG,
03773                                                         int SPDiff,
03774                                                         SDValue Chain,
03775                                                         SDValue &LROpOut,
03776                                                         SDValue &FPOpOut,
03777                                                         bool isDarwinABI,
03778                                                         SDLoc dl) const {
03779   if (SPDiff) {
03780     // Load the LR and FP stack slot for later adjusting.
03781     EVT VT = Subtarget.isPPC64() ? MVT::i64 : MVT::i32;
03782     LROpOut = getReturnAddrFrameIndex(DAG);
03783     LROpOut = DAG.getLoad(VT, dl, Chain, LROpOut, MachinePointerInfo(),
03784                           false, false, false, 0);
03785     Chain = SDValue(LROpOut.getNode(), 1);
03786 
03787     // When using the 32/64-bit SVR4 ABI there is no need to load the FP stack
03788     // slot as the FP is never overwritten.
03789     if (isDarwinABI) {
03790       FPOpOut = getFramePointerFrameIndex(DAG);
03791       FPOpOut = DAG.getLoad(VT, dl, Chain, FPOpOut, MachinePointerInfo(),
03792                             false, false, false, 0);
03793       Chain = SDValue(FPOpOut.getNode(), 1);
03794     }
03795   }
03796   return Chain;
03797 }
03798 
03799 /// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified
03800 /// by "Src" to address "Dst" of size "Size".  Alignment information is
03801 /// specified by the specific parameter attribute. The copy will be passed as
03802 /// a byval function parameter.
03803 /// Sometimes what we are copying is the end of a larger object, the part that
03804 /// does not fit in registers.
03805 static SDValue
03806 CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain,
03807                           ISD::ArgFlagsTy Flags, SelectionDAG &DAG,
03808                           SDLoc dl) {
03809   SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), MVT::i32);
03810   return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(),
03811                        false, false, false, MachinePointerInfo(),
03812                        MachinePointerInfo());
03813 }
03814 
03815 /// LowerMemOpCallTo - Store the argument to the stack or remember it in case of
03816 /// tail calls.
03817 static void
03818 LowerMemOpCallTo(SelectionDAG &DAG, MachineFunction &MF, SDValue Chain,
03819                  SDValue Arg, SDValue PtrOff, int SPDiff,
03820                  unsigned ArgOffset, bool isPPC64, bool isTailCall,
03821                  bool isVector, SmallVectorImpl<SDValue> &MemOpChains,
03822                  SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments,
03823                  SDLoc dl) {
03824   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
03825   if (!isTailCall) {
03826     if (isVector) {
03827       SDValue StackPtr;
03828       if (isPPC64)
03829         StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
03830       else
03831         StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
03832       PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr,
03833                            DAG.getConstant(ArgOffset, PtrVT));
03834     }
03835     MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff,
03836                                        MachinePointerInfo(), false, false, 0));
03837   // Calculate and remember argument location.
03838   } else CalculateTailCallArgDest(DAG, MF, isPPC64, Arg, SPDiff, ArgOffset,
03839                                   TailCallArguments);
03840 }
03841 
03842 static
03843 void PrepareTailCall(SelectionDAG &DAG, SDValue &InFlag, SDValue &Chain,
03844                      SDLoc dl, bool isPPC64, int SPDiff, unsigned NumBytes,
03845                      SDValue LROp, SDValue FPOp, bool isDarwinABI,
03846                      SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments) {
03847   MachineFunction &MF = DAG.getMachineFunction();
03848 
03849   // Emit a sequence of copyto/copyfrom virtual registers for arguments that
03850   // might overwrite each other in case of tail call optimization.
03851   SmallVector<SDValue, 8> MemOpChains2;
03852   // Do not flag preceding copytoreg stuff together with the following stuff.
03853   InFlag = SDValue();
03854   StoreTailCallArgumentsToStackSlot(DAG, Chain, TailCallArguments,
03855                                     MemOpChains2, dl);
03856   if (!MemOpChains2.empty())
03857     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains2);
03858 
03859   // Store the return address to the appropriate stack slot.
03860   Chain = EmitTailCallStoreFPAndRetAddr(DAG, MF, Chain, LROp, FPOp, SPDiff,
03861                                         isPPC64, isDarwinABI, dl);
03862 
03863   // Emit callseq_end just before tailcall node.
03864   Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true),
03865                              DAG.getIntPtrConstant(0, true), InFlag, dl);
03866   InFlag = Chain.getValue(1);
03867 }
03868 
03869 // Is this global address that of a function that can be called by name? (as
03870 // opposed to something that must hold a descriptor for an indirect call).
03871 static bool isFunctionGlobalAddress(SDValue Callee) {
03872   if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
03873     if (Callee.getOpcode() == ISD::GlobalTLSAddress ||
03874         Callee.getOpcode() == ISD::TargetGlobalTLSAddress)
03875       return false;
03876 
03877     return G->getGlobal()->getType()->getElementType()->isFunctionTy();
03878   }
03879 
03880   return false;
03881 }
03882 
03883 static
03884 unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag,
03885                      SDValue &Chain, SDValue CallSeqStart, SDLoc dl, int SPDiff,
03886                      bool isTailCall, bool IsPatchPoint,
03887                      SmallVectorImpl<std::pair<unsigned, SDValue> > &RegsToPass,
03888                      SmallVectorImpl<SDValue> &Ops, std::vector<EVT> &NodeTys,
03889                      ImmutableCallSite *CS, const PPCSubtarget &Subtarget) {
03890 
03891   bool isPPC64 = Subtarget.isPPC64();
03892   bool isSVR4ABI = Subtarget.isSVR4ABI();
03893   bool isELFv2ABI = Subtarget.isELFv2ABI();
03894 
03895   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
03896   NodeTys.push_back(MVT::Other);   // Returns a chain
03897   NodeTys.push_back(MVT::Glue);    // Returns a flag for retval copy to use.
03898 
03899   unsigned CallOpc = PPCISD::CALL;
03900 
03901   bool needIndirectCall = true;
03902   if (!isSVR4ABI || !isPPC64)
03903     if (SDNode *Dest = isBLACompatibleAddress(Callee, DAG)) {
03904       // If this is an absolute destination address, use the munged value.
03905       Callee = SDValue(Dest, 0);
03906       needIndirectCall = false;
03907     }
03908 
03909   if (isFunctionGlobalAddress(Callee)) {
03910     GlobalAddressSDNode *G = cast<GlobalAddressSDNode>(Callee);
03911     // A call to a TLS address is actually an indirect call to a
03912     // thread-specific pointer.
03913     unsigned OpFlags = 0;
03914     if ((DAG.getTarget().getRelocationModel() != Reloc::Static &&
03915          (Subtarget.getTargetTriple().isMacOSX() &&
03916           Subtarget.getTargetTriple().isMacOSXVersionLT(10, 5)) &&
03917          (G->getGlobal()->isDeclaration() ||
03918           G->getGlobal()->isWeakForLinker())) ||
03919         (Subtarget.isTargetELF() && !isPPC64 &&
03920          !G->getGlobal()->hasLocalLinkage() &&
03921          DAG.getTarget().getRelocationModel() == Reloc::PIC_)) {
03922       // PC-relative references to external symbols should go through $stub,
03923       // unless we're building with the leopard linker or later, which
03924       // automatically synthesizes these stubs.
03925       OpFlags = PPCII::MO_PLT_OR_STUB;
03926     }
03927 
03928     // If the callee is a GlobalAddress/ExternalSymbol node (quite common,
03929     // every direct call is) turn it into a TargetGlobalAddress /
03930     // TargetExternalSymbol node so that legalize doesn't hack it.
03931     Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl,
03932                                         Callee.getValueType(), 0, OpFlags);
03933     needIndirectCall = false;
03934   }
03935 
03936   if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
03937     unsigned char OpFlags = 0;
03938 
03939     if ((DAG.getTarget().getRelocationModel() != Reloc::Static &&
03940          (Subtarget.getTargetTriple().isMacOSX() &&
03941           Subtarget.getTargetTriple().isMacOSXVersionLT(10, 5))) ||
03942         (Subtarget.isTargetELF() && !isPPC64 &&
03943          DAG.getTarget().getRelocationModel() == Reloc::PIC_)) {
03944       // PC-relative references to external symbols should go through $stub,
03945       // unless we're building with the leopard linker or later, which
03946       // automatically synthesizes these stubs.
03947       OpFlags = PPCII::MO_PLT_OR_STUB;
03948     }
03949 
03950     Callee = DAG.getTargetExternalSymbol(S->getSymbol(), Callee.getValueType(),
03951                                          OpFlags);
03952     needIndirectCall = false;
03953   }
03954 
03955   if (IsPatchPoint) {
03956     // We'll form an invalid direct call when lowering a patchpoint; the full
03957     // sequence for an indirect call is complicated, and many of the
03958     // instructions introduced might have side effects (and, thus, can't be
03959     // removed later). The call itself will be removed as soon as the
03960     // argument/return lowering is complete, so the fact that it has the wrong
03961     // kind of operands should not really matter.
03962     needIndirectCall = false;
03963   }
03964 
03965   if (needIndirectCall) {
03966     // Otherwise, this is an indirect call.  We have to use a MTCTR/BCTRL pair
03967     // to do the call, we can't use PPCISD::CALL.
03968     SDValue MTCTROps[] = {Chain, Callee, InFlag};
03969 
03970     if (isSVR4ABI && isPPC64 && !isELFv2ABI) {
03971       // Function pointers in the 64-bit SVR4 ABI do not point to the function
03972       // entry point, but to the function descriptor (the function entry point
03973       // address is part of the function descriptor though).
03974       // The function descriptor is a three doubleword structure with the
03975       // following fields: function entry point, TOC base address and
03976       // environment pointer.
03977       // Thus for a call through a function pointer, the following actions need
03978       // to be performed:
03979       //   1. Save the TOC of the caller in the TOC save area of its stack
03980       //      frame (this is done in LowerCall_Darwin() or LowerCall_64SVR4()).
03981       //   2. Load the address of the function entry point from the function
03982       //      descriptor.
03983       //   3. Load the TOC of the callee from the function descriptor into r2.
03984       //   4. Load the environment pointer from the function descriptor into
03985       //      r11.
03986       //   5. Branch to the function entry point address.
03987       //   6. On return of the callee, the TOC of the caller needs to be
03988       //      restored (this is done in FinishCall()).
03989       //
03990       // The loads are scheduled at the beginning of the call sequence, and the
03991       // register copies are flagged together to ensure that no other
03992       // operations can be scheduled in between. E.g. without flagging the
03993       // copies together, a TOC access in the caller could be scheduled between
03994       // the assignment of the callee TOC and the branch to the callee, which
03995       // results in the TOC access going through the TOC of the callee instead
03996       // of going through the TOC of the caller, which leads to incorrect code.
03997 
03998       // Load the address of the function entry point from the function
03999       // descriptor.
04000       SDValue LDChain = CallSeqStart.getValue(CallSeqStart->getNumValues()-1);
04001       if (LDChain.getValueType() == MVT::Glue)
04002         LDChain = CallSeqStart.getValue(CallSeqStart->getNumValues()-2);
04003 
04004       bool LoadsInv = Subtarget.hasInvariantFunctionDescriptors();
04005 
04006       MachinePointerInfo MPI(CS ? CS->getCalledValue() : nullptr);
04007       SDValue LoadFuncPtr = DAG.getLoad(MVT::i64, dl, LDChain, Callee, MPI,
04008                                         false, false, LoadsInv, 8);
04009 
04010       // Load environment pointer into r11.
04011       SDValue PtrOff = DAG.getIntPtrConstant(16);
04012       SDValue AddPtr = DAG.getNode(ISD::ADD, dl, MVT::i64, Callee, PtrOff);
04013       SDValue LoadEnvPtr = DAG.getLoad(MVT::i64, dl, LDChain, AddPtr,
04014                                        MPI.getWithOffset(16), false, false,
04015                                        LoadsInv, 8);
04016 
04017       SDValue TOCOff = DAG.getIntPtrConstant(8);
04018       SDValue AddTOC = DAG.getNode(ISD::ADD, dl, MVT::i64, Callee, TOCOff);
04019       SDValue TOCPtr = DAG.getLoad(MVT::i64, dl, LDChain, AddTOC,
04020                                    MPI.getWithOffset(8), false, false,
04021                                    LoadsInv, 8);
04022 
04023       setUsesTOCBasePtr(DAG);
04024       SDValue TOCVal = DAG.getCopyToReg(Chain, dl, PPC::X2, TOCPtr,
04025                                         InFlag);
04026       Chain = TOCVal.getValue(0);
04027       InFlag = TOCVal.getValue(1);
04028 
04029       SDValue EnvVal = DAG.getCopyToReg(Chain, dl, PPC::X11, LoadEnvPtr,
04030                                         InFlag);
04031 
04032       Chain = EnvVal.getValue(0);
04033       InFlag = EnvVal.getValue(1);
04034 
04035       MTCTROps[0] = Chain;
04036       MTCTROps[1] = LoadFuncPtr;
04037       MTCTROps[2] = InFlag;
04038     }
04039 
04040     Chain = DAG.getNode(PPCISD::MTCTR, dl, NodeTys,
04041                         makeArrayRef(MTCTROps, InFlag.getNode() ? 3 : 2));
04042     InFlag = Chain.getValue(1);
04043 
04044     NodeTys.clear();
04045     NodeTys.push_back(MVT::Other);
04046     NodeTys.push_back(MVT::Glue);
04047     Ops.push_back(Chain);
04048     CallOpc = PPCISD::BCTRL;
04049     Callee.setNode(nullptr);
04050     // Add use of X11 (holding environment pointer)
04051     if (isSVR4ABI && isPPC64 && !isELFv2ABI)
04052       Ops.push_back(DAG.getRegister(PPC::X11, PtrVT));
04053     // Add CTR register as callee so a bctr can be emitted later.
04054     if (isTailCall)
04055       Ops.push_back(DAG.getRegister(isPPC64 ? PPC::CTR8 : PPC::CTR, PtrVT));
04056   }
04057 
04058   // If this is a direct call, pass the chain and the callee.
04059   if (Callee.getNode()) {
04060     Ops.push_back(Chain);
04061     Ops.push_back(Callee);
04062   }
04063   // If this is a tail call add stack pointer delta.
04064   if (isTailCall)
04065     Ops.push_back(DAG.getConstant(SPDiff, MVT::i32));
04066 
04067   // Add argument registers to the end of the list so that they are known live
04068   // into the call.
04069   for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
04070     Ops.push_back(DAG.getRegister(RegsToPass[i].first,
04071                                   RegsToPass[i].second.getValueType()));
04072 
04073   // All calls, in both the ELF V1 and V2 ABIs, need the TOC register live
04074   // into the call.
04075   if (isSVR4ABI && isPPC64 && !IsPatchPoint) {
04076     setUsesTOCBasePtr(DAG);
04077     Ops.push_back(DAG.getRegister(PPC::X2, PtrVT));
04078   }
04079 
04080   return CallOpc;
04081 }
04082 
04083 static
04084 bool isLocalCall(const SDValue &Callee)
04085 {
04086   if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
04087     return !G->getGlobal()->isDeclaration() &&
04088            !G->getGlobal()->isWeakForLinker();
04089   return false;
04090 }
04091 
04092 SDValue
04093 PPCTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
04094                                    CallingConv::ID CallConv, bool isVarArg,
04095                                    const SmallVectorImpl<ISD::InputArg> &Ins,
04096                                    SDLoc dl, SelectionDAG &DAG,
04097                                    SmallVectorImpl<SDValue> &InVals) const {
04098 
04099   SmallVector<CCValAssign, 16> RVLocs;
04100   CCState CCRetInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
04101                     *DAG.getContext());
04102   CCRetInfo.AnalyzeCallResult(Ins, RetCC_PPC);
04103 
04104   // Copy all of the result registers out of their specified physreg.
04105   for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
04106     CCValAssign &VA = RVLocs[i];
04107     assert(VA.isRegLoc() && "Can only return in registers!");
04108 
04109     SDValue Val = DAG.getCopyFromReg(Chain, dl,
04110                                      VA.getLocReg(), VA.getLocVT(), InFlag);
04111     Chain = Val.getValue(1);
04112     InFlag = Val.getValue(2);
04113 
04114     switch (VA.getLocInfo()) {
04115     default: llvm_unreachable("Unknown loc info!");
04116     case CCValAssign::Full: break;
04117     case CCValAssign::AExt:
04118       Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
04119       break;
04120     case CCValAssign::ZExt:
04121       Val = DAG.getNode(ISD::AssertZext, dl, VA.getLocVT(), Val,
04122                         DAG.getValueType(VA.getValVT()));
04123       Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
04124       break;
04125     case CCValAssign::SExt:
04126       Val = DAG.getNode(ISD::AssertSext, dl, VA.getLocVT(), Val,
04127                         DAG.getValueType(VA.getValVT()));
04128       Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
04129       break;
04130     }
04131 
04132     InVals.push_back(Val);
04133   }
04134 
04135   return Chain;
04136 }
04137 
04138 SDValue
04139 PPCTargetLowering::FinishCall(CallingConv::ID CallConv, SDLoc dl,
04140                               bool isTailCall, bool isVarArg, bool IsPatchPoint,
04141                               SelectionDAG &DAG,
04142                               SmallVector<std::pair<unsigned, SDValue>, 8>
04143                                 &RegsToPass,
04144                               SDValue InFlag, SDValue Chain,
04145                               SDValue CallSeqStart, SDValue &Callee,
04146                               int SPDiff, unsigned NumBytes,
04147                               const SmallVectorImpl<ISD::InputArg> &Ins,
04148                               SmallVectorImpl<SDValue> &InVals,
04149                               ImmutableCallSite *CS) const {
04150 
04151   std::vector<EVT> NodeTys;
04152   SmallVector<SDValue, 8> Ops;
04153   unsigned CallOpc = PrepareCall(DAG, Callee, InFlag, Chain, CallSeqStart, dl,
04154                                  SPDiff, isTailCall, IsPatchPoint, RegsToPass,
04155                                  Ops, NodeTys, CS, Subtarget);
04156 
04157   // Add implicit use of CR bit 6 for 32-bit SVR4 vararg calls
04158   if (isVarArg && Subtarget.isSVR4ABI() && !Subtarget.isPPC64())
04159     Ops.push_back(DAG.getRegister(PPC::CR1EQ, MVT::i32));
04160 
04161   // When performing tail call optimization the callee pops its arguments off
04162   // the stack. Account for this here so these bytes can be pushed back on in
04163   // PPCFrameLowering::eliminateCallFramePseudoInstr.
04164   int BytesCalleePops =
04165     (CallConv == CallingConv::Fast &&
04166      getTargetMachine().Options.GuaranteedTailCallOpt) ? NumBytes : 0;
04167 
04168   // Add a register mask operand representing the call-preserved registers.
04169   const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
04170   const uint32_t *Mask =
04171       TRI->getCallPreservedMask(DAG.getMachineFunction(), CallConv);
04172   assert(Mask && "Missing call preserved mask for calling convention");
04173   Ops.push_back(DAG.getRegisterMask(Mask));
04174 
04175   if (InFlag.getNode())
04176     Ops.push_back(InFlag);
04177 
04178   // Emit tail call.
04179   if (isTailCall) {
04180     assert(((Callee.getOpcode() == ISD::Register &&
04181              cast<RegisterSDNode>(Callee)->getReg() == PPC::CTR) ||
04182             Callee.getOpcode() == ISD::TargetExternalSymbol ||
04183             Callee.getOpcode() == ISD::TargetGlobalAddress ||
04184             isa<ConstantSDNode>(Callee)) &&
04185     "Expecting an global address, external symbol, absolute value or register");
04186 
04187     return DAG.getNode(PPCISD::TC_RETURN, dl, MVT::Other, Ops);
04188   }
04189 
04190   // Add a NOP immediately after the branch instruction when using the 64-bit
04191   // SVR4 ABI. At link time, if caller and callee are in a different module and
04192   // thus have a different TOC, the call will be replaced with a call to a stub
04193   // function which saves the current TOC, loads the TOC of the callee and
04194   // branches to the callee. The NOP will be replaced with a load instruction
04195   // which restores the TOC of the caller from the TOC save slot of the current
04196   // stack frame. If caller and callee belong to the same module (and have the
04197   // same TOC), the NOP will remain unchanged.
04198 
04199   if (!isTailCall && Subtarget.isSVR4ABI()&& Subtarget.isPPC64() &&
04200       !IsPatchPoint) {
04201     if (CallOpc == PPCISD::BCTRL) {
04202       // This is a call through a function pointer.
04203       // Restore the caller TOC from the save area into R2.
04204       // See PrepareCall() for more information about calls through function
04205       // pointers in the 64-bit SVR4 ABI.
04206       // We are using a target-specific load with r2 hard coded, because the
04207       // result of a target-independent load would never go directly into r2,
04208       // since r2 is a reserved register (which prevents the register allocator
04209       // from allocating it), resulting in an additional register being
04210       // allocated and an unnecessary move instruction being generated.
04211       CallOpc = PPCISD::BCTRL_LOAD_TOC;
04212 
04213       EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
04214       SDValue StackPtr = DAG.getRegister(PPC::X1, PtrVT);
04215       unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset();
04216       SDValue TOCOff = DAG.getIntPtrConstant(TOCSaveOffset);
04217       SDValue AddTOC = DAG.getNode(ISD::ADD, dl, MVT::i64, StackPtr, TOCOff);
04218 
04219       // The address needs to go after the chain input but before the flag (or
04220       // any other variadic arguments).
04221       Ops.insert(std::next(Ops.begin()), AddTOC);
04222     } else if ((CallOpc == PPCISD::CALL) &&
04223                (!isLocalCall(Callee) ||
04224                 DAG.getTarget().getRelocationModel() == Reloc::PIC_))
04225       // Otherwise insert NOP for non-local calls.
04226       CallOpc = PPCISD::CALL_NOP;
04227   }
04228 
04229   Chain = DAG.getNode(CallOpc, dl, NodeTys, Ops);
04230   InFlag = Chain.getValue(1);
04231 
04232   Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true),
04233                              DAG.getIntPtrConstant(BytesCalleePops, true),
04234                              InFlag, dl);
04235   if (!Ins.empty())
04236     InFlag = Chain.getValue(1);
04237 
04238   return LowerCallResult(Chain, InFlag, CallConv, isVarArg,
04239                          Ins, dl, DAG, InVals);
04240 }
04241 
04242 SDValue
04243 PPCTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
04244                              SmallVectorImpl<SDValue> &InVals) const {
04245   SelectionDAG &DAG                     = CLI.DAG;
04246   SDLoc &dl                             = CLI.DL;
04247   SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
04248   SmallVectorImpl<SDValue> &OutVals     = CLI.OutVals;
04249   SmallVectorImpl<ISD::InputArg> &Ins   = CLI.Ins;
04250   SDValue Chain                         = CLI.Chain;
04251   SDValue Callee                        = CLI.Callee;
04252   bool &isTailCall                      = CLI.IsTailCall;
04253   CallingConv::ID CallConv              = CLI.CallConv;
04254   bool isVarArg                         = CLI.IsVarArg;
04255   bool IsPatchPoint                     = CLI.IsPatchPoint;
04256   ImmutableCallSite *CS                 = CLI.CS;
04257 
04258   if (isTailCall)
04259     isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv, isVarArg,
04260                                                    Ins, DAG);
04261 
04262   if (!isTailCall && CS && CS->isMustTailCall())
04263     report_fatal_error("failed to perform tail call elimination on a call "
04264                        "site marked musttail");
04265 
04266   if (Subtarget.isSVR4ABI()) {
04267     if (Subtarget.isPPC64())
04268       return LowerCall_64SVR4(Chain, Callee, CallConv, isVarArg,
04269                               isTailCall, IsPatchPoint, Outs, OutVals, Ins,
04270                               dl, DAG, InVals, CS);
04271     else
04272       return LowerCall_32SVR4(Chain, Callee, CallConv, isVarArg,
04273                               isTailCall, IsPatchPoint, Outs, OutVals, Ins,
04274                               dl, DAG, InVals, CS);
04275   }
04276 
04277   return LowerCall_Darwin(Chain, Callee, CallConv, isVarArg,
04278                           isTailCall, IsPatchPoint, Outs, OutVals, Ins,
04279                           dl, DAG, InVals, CS);
04280 }
04281 
04282 SDValue
04283 PPCTargetLowering::LowerCall_32SVR4(SDValue Chain, SDValue Callee,
04284                                     CallingConv::ID CallConv, bool isVarArg,
04285                                     bool isTailCall, bool IsPatchPoint,
04286                                     const SmallVectorImpl<ISD::OutputArg> &Outs,
04287                                     const SmallVectorImpl<SDValue> &OutVals,
04288                                     const SmallVectorImpl<ISD::InputArg> &Ins,
04289                                     SDLoc dl, SelectionDAG &DAG,
04290                                     SmallVectorImpl<SDValue> &InVals,
04291                                     ImmutableCallSite *CS) const {
04292   // See PPCTargetLowering::LowerFormalArguments_32SVR4() for a description
04293   // of the 32-bit SVR4 ABI stack frame layout.
04294 
04295   assert((CallConv == CallingConv::C ||
04296           CallConv == CallingConv::Fast) && "Unknown calling convention!");
04297 
04298   unsigned PtrByteSize = 4;
04299 
04300   MachineFunction &MF = DAG.getMachineFunction();
04301 
04302   // Mark this function as potentially containing a function that contains a
04303   // tail call. As a consequence the frame pointer will be used for dynamicalloc
04304   // and restoring the callers stack pointer in this functions epilog. This is
04305   // done because by tail calling the called function might overwrite the value
04306   // in this function's (MF) stack pointer stack slot 0(SP).
04307   if (getTargetMachine().Options.GuaranteedTailCallOpt &&
04308       CallConv == CallingConv::Fast)
04309     MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
04310 
04311   // Count how many bytes are to be pushed on the stack, including the linkage
04312   // area, parameter list area and the part of the local variable space which
04313   // contains copies of aggregates which are passed by value.
04314 
04315   // Assign locations to all of the outgoing arguments.
04316   SmallVector<CCValAssign, 16> ArgLocs;
04317   CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
04318                  *DAG.getContext());
04319 
04320   // Reserve space for the linkage area on the stack.
04321   CCInfo.AllocateStack(Subtarget.getFrameLowering()->getLinkageSize(),
04322                        PtrByteSize);
04323 
04324   if (isVarArg) {
04325     // Handle fixed and variable vector arguments differently.
04326     // Fixed vector arguments go into registers as long as registers are
04327     // available. Variable vector arguments always go into memory.
04328     unsigned NumArgs = Outs.size();
04329 
04330     for (unsigned i = 0; i != NumArgs; ++i) {
04331       MVT ArgVT = Outs[i].VT;
04332       ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
04333       bool Result;
04334 
04335       if (Outs[i].IsFixed) {
04336         Result = CC_PPC32_SVR4(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags,
04337                                CCInfo);
04338       } else {
04339         Result = CC_PPC32_SVR4_VarArg(i, ArgVT, ArgVT, CCValAssign::Full,
04340                                       ArgFlags, CCInfo);
04341       }
04342 
04343       if (Result) {
04344 #ifndef NDEBUG
04345         errs() << "Call operand #" << i << " has unhandled type "
04346              << EVT(ArgVT).getEVTString() << "\n";
04347 #endif
04348         llvm_unreachable(nullptr);
04349       }
04350     }
04351   } else {
04352     // All arguments are treated the same.
04353     CCInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4);
04354   }
04355 
04356   // Assign locations to all of the outgoing aggregate by value arguments.
04357   SmallVector<CCValAssign, 16> ByValArgLocs;
04358   CCState CCByValInfo(CallConv, isVarArg, DAG.getMachineFunction(),
04359                       ByValArgLocs, *DAG.getContext());
04360 
04361   // Reserve stack space for the allocations in CCInfo.
04362   CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrByteSize);
04363 
04364   CCByValInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4_ByVal);
04365 
04366   // Size of the linkage area, parameter list area and the part of the local
04367   // space variable where copies of aggregates which are passed by value are
04368   // stored.
04369   unsigned NumBytes = CCByValInfo.getNextStackOffset();
04370 
04371   // Calculate by how many bytes the stack has to be adjusted in case of tail
04372   // call optimization.
04373   int SPDiff = CalculateTailCallSPDiff(DAG, isTailCall, NumBytes);
04374 
04375   // Adjust the stack pointer for the new arguments...
04376   // These operations are automatically eliminated by the prolog/epilog pass
04377   Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true),
04378                                dl);
04379   SDValue CallSeqStart = Chain;
04380 
04381   // Load the return address and frame pointer so it can be moved somewhere else
04382   // later.
04383   SDValue LROp, FPOp;
04384   Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, false,
04385                                        dl);
04386 
04387   // Set up a copy of the stack pointer for use loading and storing any
04388   // arguments that may not fit in the registers available for argument
04389   // passing.
04390   SDValue StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
04391 
04392   SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
04393   SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
04394   SmallVector<SDValue, 8> MemOpChains;
04395 
04396   bool seenFloatArg = false;
04397   // Walk the register/memloc assignments, inserting copies/loads.
04398   for (unsigned i = 0, j = 0, e = ArgLocs.size();
04399        i != e;
04400        ++i) {
04401     CCValAssign &VA = ArgLocs[i];
04402     SDValue Arg = OutVals[i];
04403     ISD::ArgFlagsTy Flags = Outs[i].Flags;
04404 
04405     if (Flags.isByVal()) {
04406       // Argument is an aggregate which is passed by value, thus we need to
04407       // create a copy of it in the local variable space of the current stack
04408       // frame (which is the stack frame of the caller) and pass the address of
04409       // this copy to the callee.
04410       assert((j < ByValArgLocs.size()) && "Index out of bounds!");
04411       CCValAssign &ByValVA = ByValArgLocs[j++];
04412       assert((VA.getValNo() == ByValVA.getValNo()) && "ValNo mismatch!");
04413 
04414       // Memory reserved in the local variable space of the callers stack frame.
04415       unsigned LocMemOffset = ByValVA.getLocMemOffset();
04416 
04417       SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset);
04418       PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff);
04419 
04420       // Create a copy of the argument in the local area of the current
04421       // stack frame.
04422       SDValue MemcpyCall =
04423         CreateCopyOfByValArgument(Arg, PtrOff,
04424                                   CallSeqStart.getNode()->getOperand(0),
04425                                   Flags, DAG, dl);
04426 
04427       // This must go outside the CALLSEQ_START..END.
04428       SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall,
04429                            CallSeqStart.getNode()->getOperand(1),
04430                            SDLoc(MemcpyCall));
04431       DAG.ReplaceAllUsesWith(CallSeqStart.getNode(),
04432                              NewCallSeqStart.getNode());
04433       Chain = CallSeqStart = NewCallSeqStart;
04434 
04435       // Pass the address of the aggregate copy on the stack either in a
04436       // physical register or in the parameter list area of the current stack
04437       // frame to the callee.
04438       Arg = PtrOff;
04439     }
04440 
04441     if (VA.isRegLoc()) {
04442       if (Arg.getValueType() == MVT::i1)
04443         Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Arg);
04444 
04445       seenFloatArg |= VA.getLocVT().isFloatingPoint();
04446       // Put argument in a physical register.
04447       RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
04448     } else {
04449       // Put argument in the parameter list area of the current stack frame.
04450       assert(VA.isMemLoc());
04451       unsigned LocMemOffset = VA.getLocMemOffset();
04452 
04453       if (!isTailCall) {
04454         SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset);
04455         PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff);
04456 
04457         MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff,
04458                                            MachinePointerInfo(),
04459                                            false, false, 0));
04460       } else {
04461         // Calculate and remember argument location.
04462         CalculateTailCallArgDest(DAG, MF, false, Arg, SPDiff, LocMemOffset,
04463                                  TailCallArguments);
04464       }
04465     }
04466   }
04467 
04468   if (!MemOpChains.empty())
04469     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
04470 
04471   // Build a sequence of copy-to-reg nodes chained together with token chain
04472   // and flag operands which copy the outgoing args into the appropriate regs.
04473   SDValue InFlag;
04474   for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
04475     Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
04476                              RegsToPass[i].second, InFlag);
04477     InFlag = Chain.getValue(1);
04478   }
04479 
04480   // Set CR bit 6 to true if this is a vararg call with floating args passed in
04481   // registers.
04482   if (isVarArg) {
04483     SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue);
04484     SDValue Ops[] = { Chain, InFlag };
04485 
04486     Chain = DAG.getNode(seenFloatArg ? PPCISD::CR6SET : PPCISD::CR6UNSET,
04487                         dl, VTs, makeArrayRef(Ops, InFlag.getNode() ? 2 : 1));
04488 
04489     InFlag = Chain.getValue(1);
04490   }
04491 
04492   if (isTailCall)
04493     PrepareTailCall(DAG, InFlag, Chain, dl, false, SPDiff, NumBytes, LROp, FPOp,
04494                     false, TailCallArguments);
04495 
04496   return FinishCall(CallConv, dl, isTailCall, isVarArg, IsPatchPoint, DAG,
04497                     RegsToPass, InFlag, Chain, CallSeqStart, Callee, SPDiff,
04498                     NumBytes, Ins, InVals, CS);
04499 }
04500 
04501 // Copy an argument into memory, being careful to do this outside the
04502 // call sequence for the call to which the argument belongs.
04503 SDValue
04504 PPCTargetLowering::createMemcpyOutsideCallSeq(SDValue Arg, SDValue PtrOff,
04505                                               SDValue CallSeqStart,
04506                                               ISD::ArgFlagsTy Flags,
04507                                               SelectionDAG &DAG,
04508                                               SDLoc dl) const {
04509   SDValue MemcpyCall = CreateCopyOfByValArgument(Arg, PtrOff,
04510                         CallSeqStart.getNode()->getOperand(0),
04511                         Flags, DAG, dl);
04512   // The MEMCPY must go outside the CALLSEQ_START..END.
04513   SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall,
04514                              CallSeqStart.getNode()->getOperand(1),
04515                              SDLoc(MemcpyCall));
04516   DAG.ReplaceAllUsesWith(CallSeqStart.getNode(),
04517                          NewCallSeqStart.getNode());
04518   return NewCallSeqStart;
04519 }
04520 
04521 SDValue
04522 PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
04523                                     CallingConv::ID CallConv, bool isVarArg,
04524                                     bool isTailCall, bool IsPatchPoint,
04525                                     const SmallVectorImpl<ISD::OutputArg> &Outs,
04526                                     const SmallVectorImpl<SDValue> &OutVals,
04527                                     const SmallVectorImpl<ISD::InputArg> &Ins,
04528                                     SDLoc dl, SelectionDAG &DAG,
04529                                     SmallVectorImpl<SDValue> &InVals,
04530                                     ImmutableCallSite *CS) const {
04531 
04532   bool isELFv2ABI = Subtarget.isELFv2ABI();
04533   bool isLittleEndian = Subtarget.isLittleEndian();
04534   unsigned NumOps = Outs.size();
04535 
04536   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
04537   unsigned PtrByteSize = 8;
04538 
04539   MachineFunction &MF = DAG.getMachineFunction();
04540 
04541   // Mark this function as potentially containing a function that contains a
04542   // tail call. As a consequence the frame pointer will be used for dynamicalloc
04543   // and restoring the callers stack pointer in this functions epilog. This is
04544   // done because by tail calling the called function might overwrite the value
04545   // in this function's (MF) stack pointer stack slot 0(SP).
04546   if (getTargetMachine().Options.GuaranteedTailCallOpt &&
04547       CallConv == CallingConv::Fast)
04548     MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
04549 
04550   assert(!(CallConv == CallingConv::Fast && isVarArg) &&
04551          "fastcc not supported on varargs functions");
04552 
04553   // Count how many bytes are to be pushed on the stack, including the linkage
04554   // area, and parameter passing area.  On ELFv1, the linkage area is 48 bytes
04555   // reserved space for [SP][CR][LR][2 x unused][TOC]; on ELFv2, the linkage
04556   // area is 32 bytes reserved space for [SP][CR][LR][TOC].
04557   unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
04558   unsigned NumBytes = LinkageSize;
04559   unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
04560   unsigned &QFPR_idx = FPR_idx;
04561 
04562   static const MCPhysReg GPR[] = {
04563     PPC::X3, PPC::X4, PPC::X5, PPC::X6,
04564     PPC::X7, PPC::X8, PPC::X9, PPC::X10,
04565   };
04566   static const MCPhysReg VR[] = {
04567     PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
04568     PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
04569   };
04570   static const MCPhysReg VSRH[] = {
04571     PPC::VSH2, PPC::VSH3, PPC::VSH4, PPC::VSH5, PPC::VSH6, PPC::VSH7, PPC::VSH8,
04572     PPC::VSH9, PPC::VSH10, PPC::VSH11, PPC::VSH12, PPC::VSH13
04573   };
04574 
04575   const unsigned NumGPRs = array_lengthof(GPR);
04576   const unsigned NumFPRs = 13;
04577   const unsigned NumVRs  = array_lengthof(VR);
04578   const unsigned NumQFPRs = NumFPRs;
04579 
04580   // When using the fast calling convention, we don't provide backing for
04581   // arguments that will be in registers.
04582   unsigned NumGPRsUsed = 0, NumFPRsUsed = 0, NumVRsUsed = 0;
04583 
04584   // Add up all the space actually used.
04585   for (unsigned i = 0; i != NumOps; ++i) {
04586     ISD::ArgFlagsTy Flags = Outs[i].Flags;
04587     EVT ArgVT = Outs[i].VT;
04588     EVT OrigVT = Outs[i].ArgVT;
04589 
04590     if (CallConv == CallingConv::Fast) {
04591       if (Flags.isByVal())
04592         NumGPRsUsed += (Flags.getByValSize()+7)/8;
04593       else
04594         switch (ArgVT.getSimpleVT().SimpleTy) {
04595         default: llvm_unreachable("Unexpected ValueType for argument!");
04596         case MVT::i1:
04597         case MVT::i32:
04598         case MVT::i64:
04599           if (++NumGPRsUsed <= NumGPRs)
04600             continue;
04601           break;
04602         case MVT::v4i32:
04603         case MVT::v8i16:
04604         case MVT::v16i8:
04605         case MVT::v2f64:
04606         case MVT::v2i64:
04607           if (++NumVRsUsed <= NumVRs)
04608             continue;
04609           break;
04610         case MVT::v4f32:
04611     // When using QPX, this is handled like a FP register, otherwise, it
04612     // is an Altivec register.
04613           if (Subtarget.hasQPX()) {
04614             if (++NumFPRsUsed <= NumFPRs)
04615               continue;
04616           } else {
04617             if (++NumVRsUsed <= NumVRs)
04618               continue;
04619           }
04620           break;
04621         case MVT::f32:
04622         case MVT::f64:
04623         case MVT::v4f64: // QPX
04624         case MVT::v4i1:  // QPX
04625           if (++NumFPRsUsed <= NumFPRs)
04626             continue;
04627           break;
04628         }
04629     }
04630 
04631     /* Respect alignment of argument on the stack.  */
04632     unsigned Align =
04633       CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
04634     NumBytes = ((NumBytes + Align - 1) / Align) * Align;
04635 
04636     NumBytes += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
04637     if (Flags.isInConsecutiveRegsLast())
04638       NumBytes = ((NumBytes + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
04639   }
04640 
04641   unsigned NumBytesActuallyUsed = NumBytes;
04642 
04643   // The prolog code of the callee may store up to 8 GPR argument registers to
04644   // the stack, allowing va_start to index over them in memory if its varargs.
04645   // Because we cannot tell if this is needed on the caller side, we have to
04646   // conservatively assume that it is needed.  As such, make sure we have at
04647   // least enough stack space for the caller to store the 8 GPRs.
04648   // FIXME: On ELFv2, it may be unnecessary to allocate the parameter area.
04649   NumBytes = std::max(NumBytes, LinkageSize + 8 * PtrByteSize);
04650 
04651   // Tail call needs the stack to be aligned.
04652   if (getTargetMachine().Options.GuaranteedTailCallOpt &&
04653       CallConv == CallingConv::Fast)
04654     NumBytes = EnsureStackAlignment(Subtarget.getFrameLowering(), NumBytes);
04655 
04656   // Calculate by how many bytes the stack has to be adjusted in case of tail
04657   // call optimization.
04658   int SPDiff = CalculateTailCallSPDiff(DAG, isTailCall, NumBytes);
04659 
04660   // To protect arguments on the stack from being clobbered in a tail call,
04661   // force all the loads to happen before doing any other lowering.
04662   if (isTailCall)
04663     Chain = DAG.getStackArgumentTokenFactor(Chain);
04664 
04665   // Adjust the stack pointer for the new arguments...
04666   // These operations are automatically eliminated by the prolog/epilog pass
04667   Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true),
04668                                dl);
04669   SDValue CallSeqStart = Chain;
04670 
04671   // Load the return address and frame pointer so it can be move somewhere else
04672   // later.
04673   SDValue LROp, FPOp;
04674   Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, true,
04675                                        dl);
04676 
04677   // Set up a copy of the stack pointer for use loading and storing any
04678   // arguments that may not fit in the registers available for argument
04679   // passing.
04680   SDValue StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
04681 
04682   // Figure out which arguments are going to go in registers, and which in
04683   // memory.  Also, if this is a vararg function, floating point operations
04684   // must be stored to our stack, and loaded into integer regs as well, if
04685   // any integer regs are available for argument passing.
04686   unsigned ArgOffset = LinkageSize;
04687 
04688   SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
04689   SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
04690 
04691   SmallVector<SDValue, 8> MemOpChains;
04692   for (unsigned i = 0; i != NumOps; ++i) {
04693     SDValue Arg = OutVals[i];
04694     ISD::ArgFlagsTy Flags = Outs[i].Flags;
04695     EVT ArgVT = Outs[i].VT;
04696     EVT OrigVT = Outs[i].ArgVT;
04697 
04698     // PtrOff will be used to store the current argument to the stack if a
04699     // register cannot be found for it.
04700     SDValue PtrOff;
04701 
04702     // We re-align the argument offset for each argument, except when using the
04703     // fast calling convention, when we need to make sure we do that only when
04704     // we'll actually use a stack slot.
04705     auto ComputePtrOff = [&]() {
04706       /* Respect alignment of argument on the stack.  */
04707       unsigned Align =
04708         CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
04709       ArgOffset = ((ArgOffset + Align - 1) / Align) * Align;
04710 
04711       PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType());
04712 
04713       PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
04714     };
04715 
04716     if (CallConv != CallingConv::Fast) {
04717       ComputePtrOff();
04718 
04719       /* Compute GPR index associated with argument offset.  */
04720       GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
04721       GPR_idx = std::min(GPR_idx, NumGPRs);
04722     }
04723 
04724     // Promote integers to 64-bit values.
04725     if (Arg.getValueType() == MVT::i32 || Arg.getValueType() == MVT::i1) {
04726       // FIXME: Should this use ANY_EXTEND if neither sext nor zext?
04727       unsigned ExtOp = Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
04728       Arg = DAG.getNode(ExtOp, dl, MVT::i64, Arg);
04729     }
04730 
04731     // FIXME memcpy is used way more than necessary.  Correctness first.
04732     // Note: "by value" is code for passing a structure by value, not
04733     // basic types.
04734     if (Flags.isByVal()) {
04735       // Note: Size includes alignment padding, so
04736       //   struct x { short a; char b; }
04737       // will have Size = 4.  With #pragma pack(1), it will have Size = 3.
04738       // These are the proper values we need for right-justifying the
04739       // aggregate in a parameter register.
04740       unsigned Size = Flags.getByValSize();
04741 
04742       // An empty aggregate parameter takes up no storage and no
04743       // registers.
04744       if (Size == 0)
04745         continue;
04746 
04747       if (CallConv == CallingConv::Fast)
04748         ComputePtrOff();
04749 
04750       // All aggregates smaller than 8 bytes must be passed right-justified.
04751       if (Size==1 || Size==2 || Size==4) {
04752         EVT VT = (Size==1) ? MVT::i8 : ((Size==2) ? MVT::i16 : MVT::i32);
04753         if (GPR_idx != NumGPRs) {
04754           SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, Arg,
04755                                         MachinePointerInfo(), VT,
04756                                         false, false, false, 0);
04757           MemOpChains.push_back(Load.getValue(1));
04758           RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
04759 
04760           ArgOffset += PtrByteSize;
04761           continue;
04762         }
04763       }
04764 
04765       if (GPR_idx == NumGPRs && Size < 8) {
04766         SDValue AddPtr = PtrOff;
04767         if (!isLittleEndian) {
04768           SDValue Const = DAG.getConstant(PtrByteSize - Size,
04769                                           PtrOff.getValueType());
04770           AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
04771         }
04772         Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,
04773                                                           CallSeqStart,
04774                                                           Flags, DAG, dl);
04775         ArgOffset += PtrByteSize;
04776         continue;
04777       }
04778       // Copy entire object into memory.  There are cases where gcc-generated
04779       // code assumes it is there, even if it could be put entirely into
04780       // registers.  (This is not what the doc says.)
04781 
04782       // FIXME: The above statement is likely due to a misunderstanding of the
04783       // documents.  All arguments must be copied into the parameter area BY
04784       // THE CALLEE in the event that the callee takes the address of any
04785       // formal argument.  That has not yet been implemented.  However, it is
04786       // reasonable to use the stack area as a staging area for the register
04787       // load.
04788 
04789       // Skip this for small aggregates, as we will use the same slot for a
04790       // right-justified copy, below.
04791       if (Size >= 8)
04792         Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, PtrOff,
04793                                                           CallSeqStart,
04794                                                           Flags, DAG, dl);
04795 
04796       // When a register is available, pass a small aggregate right-justified.
04797       if (Size < 8 && GPR_idx != NumGPRs) {
04798         // The easiest way to get this right-justified in a register
04799         // is to copy the structure into the rightmost portion of a
04800         // local variable slot, then load the whole slot into the
04801         // register.
04802         // FIXME: The memcpy seems to produce pretty awful code for
04803         // small aggregates, particularly for packed ones.
04804         // FIXME: It would be preferable to use the slot in the
04805         // parameter save area instead of a new local variable.
04806         SDValue AddPtr = PtrOff;
04807         if (!isLittleEndian) {
04808           SDValue Const = DAG.getConstant(8 - Size, PtrOff.getValueType());
04809           AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
04810         }
04811         Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,
04812                                                           CallSeqStart,
04813                                                           Flags, DAG, dl);
04814 
04815         // Load the slot into the register.
04816         SDValue Load = DAG.getLoad(PtrVT, dl, Chain, PtrOff,
04817                                    MachinePointerInfo(),
04818                                    false, false, false, 0);
04819         MemOpChains.push_back(Load.getValue(1));
04820         RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
04821 
04822         // Done with this argument.
04823         ArgOffset += PtrByteSize;
04824         continue;
04825       }
04826 
04827       // For aggregates larger than PtrByteSize, copy the pieces of the
04828       // object that fit into registers from the parameter save area.
04829       for (unsigned j=0; j<Size; j+=PtrByteSize) {
04830         SDValue Const = DAG.getConstant(j, PtrOff.getValueType());
04831         SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);
04832         if (GPR_idx != NumGPRs) {
04833           SDValue Load = DAG.getLoad(PtrVT, dl, Chain, AddArg,
04834                                      MachinePointerInfo(),
04835                                      false, false, false, 0);
04836           MemOpChains.push_back(Load.getValue(1));
04837           RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
04838           ArgOffset += PtrByteSize;
04839         } else {
04840           ArgOffset += ((Size - j + PtrByteSize-1)/PtrByteSize)*PtrByteSize;
04841           break;
04842         }
04843       }
04844       continue;
04845     }
04846 
04847     switch (Arg.getSimpleValueType().SimpleTy) {
04848     default: llvm_unreachable("Unexpected ValueType for argument!");
04849     case MVT::i1:
04850     case MVT::i32:
04851     case MVT::i64:
04852       // These can be scalar arguments or elements of an integer array type
04853       // passed directly.  Clang may use those instead of "byval" aggregate
04854       // types to avoid forcing arguments to memory unnecessarily.
04855       if (GPR_idx != NumGPRs) {
04856         RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Arg));
04857       } else {
04858         if (CallConv == CallingConv::Fast)
04859           ComputePtrOff();
04860 
04861         LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
04862                          true, isTailCall, false, MemOpChains,
04863                          TailCallArguments, dl);
04864         if (CallConv == CallingConv::Fast)
04865           ArgOffset += PtrByteSize;
04866       }
04867       if (CallConv != CallingConv::Fast)
04868         ArgOffset += PtrByteSize;
04869       break;
04870     case MVT::f32:
04871     case MVT::f64: {
04872       // These can be scalar arguments or elements of a float array type
04873       // passed directly.  The latter are used to implement ELFv2 homogenous
04874       // float aggregates.
04875 
04876       // Named arguments go into FPRs first, and once they overflow, the
04877       // remaining arguments go into GPRs and then the parameter save area.
04878       // Unnamed arguments for vararg functions always go to GPRs and
04879       // then the parameter save area.  For now, put all arguments to vararg
04880       // routines always in both locations (FPR *and* GPR or stack slot).
04881       bool NeedGPROrStack = isVarArg || FPR_idx == NumFPRs;
04882       bool NeededLoad = false;
04883 
04884       // First load the argument into the next available FPR.
04885       if (FPR_idx != NumFPRs)
04886         RegsToPass.push_back(std::make_pair(FPR[FPR_idx++], Arg));
04887 
04888       // Next, load the argument into GPR or stack slot if needed.
04889       if (!NeedGPROrStack)
04890         ;
04891       else if (GPR_idx != NumGPRs && CallConv != CallingConv::Fast) {
04892         // FIXME: We may want to re-enable this for CallingConv::Fast on the P8
04893         // once we support fp <-> gpr moves.
04894 
04895         // In the non-vararg case, this can only ever happen in the
04896         // presence of f32 array types, since otherwise we never run
04897         // out of FPRs before running out of GPRs.
04898         SDValue ArgVal;
04899 
04900         // Double values are always passed in a single GPR.
04901         if (Arg.getValueType() != MVT::f32) {
04902           ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i64, Arg);
04903 
04904         // Non-array float values are extended and passed in a GPR.
04905         } else if (!Flags.isInConsecutiveRegs()) {
04906           ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
04907           ArgVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, ArgVal);
04908 
04909         // If we have an array of floats, we collect every odd element
04910         // together with its predecessor into one GPR.
04911         } else if (ArgOffset % PtrByteSize != 0) {
04912           SDValue Lo, Hi;
04913           Lo = DAG.getNode(ISD::BITCAST, dl, MVT::i32, OutVals[i - 1]);
04914           Hi = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
04915           if (!isLittleEndian)
04916             std::swap(Lo, Hi);
04917           ArgVal = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
04918 
04919         // The final element, if even, goes into the first half of a GPR.
04920         } else if (Flags.isInConsecutiveRegsLast()) {
04921           ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
04922           ArgVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, ArgVal);
04923           if (!isLittleEndian)
04924             ArgVal = DAG.getNode(ISD::SHL, dl, MVT::i64, ArgVal,
04925                                  DAG.getConstant(32, MVT::i32));
04926 
04927         // Non-final even elements are skipped; they will be handled
04928         // together the with subsequent argument on the next go-around.
04929         } else
04930           ArgVal = SDValue();
04931 
04932         if (ArgVal.getNode())
04933           RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], ArgVal));
04934       } else {
04935         if (CallConv == CallingConv::Fast)
04936           ComputePtrOff();
04937 
04938         // Single-precision floating-point values are mapped to the
04939         // second (rightmost) word of the stack doubleword.
04940         if (Arg.getValueType() == MVT::f32 &&
04941             !isLittleEndian && !Flags.isInConsecutiveRegs()) {
04942           SDValue ConstFour = DAG.getConstant(4, PtrOff.getValueType());
04943           PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour);
04944         }
04945 
04946         LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
04947                          true, isTailCall, false, MemOpChains,
04948                          TailCallArguments, dl);
04949 
04950         NeededLoad = true;
04951       }
04952       // When passing an array of floats, the array occupies consecutive
04953       // space in the argument area; only round up to the next doubleword
04954       // at the end of the array.  Otherwise, each float takes 8 bytes.
04955       if (CallConv != CallingConv::Fast || NeededLoad) {
04956         ArgOffset += (Arg.getValueType() == MVT::f32 &&
04957                       Flags.isInConsecutiveRegs()) ? 4 : 8;
04958         if (Flags.isInConsecutiveRegsLast())
04959           ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
04960       }
04961       break;
04962     }
04963     case MVT::v4f32:
04964     case MVT::v4i32:
04965     case MVT::v8i16:
04966     case MVT::v16i8:
04967     case MVT::v2f64:
04968     case MVT::v2i64:
04969       if (!Subtarget.hasQPX()) {
04970       // These can be scalar arguments or elements of a vector array type
04971       // passed directly.  The latter are used to implement ELFv2 homogenous
04972       // vector aggregates.
04973 
04974       // For a varargs call, named arguments go into VRs or on the stack as
04975       // usual; unnamed arguments always go to the stack or the corresponding
04976       // GPRs when within range.  For now, we always put the value in both
04977       // locations (or even all three).
04978       if (isVarArg) {
04979         // We could elide this store in the case where the object fits
04980         // entirely in R registers.  Maybe later.
04981         SDValue Store = DAG.getStore(Chain, dl, Arg, PtrOff,
04982                                      MachinePointerInfo(), false, false, 0);
04983         MemOpChains.push_back(Store);
04984         if (VR_idx != NumVRs) {
04985           SDValue Load = DAG.getLoad(MVT::v4f32, dl, Store, PtrOff,
04986                                      MachinePointerInfo(),
04987                                      false, false, false, 0);
04988           MemOpChains.push_back(Load.getValue(1));
04989 
04990           unsigned VReg = (Arg.getSimpleValueType() == MVT::v2f64 ||
04991                            Arg.getSimpleValueType() == MVT::v2i64) ?
04992                           VSRH[VR_idx] : VR[VR_idx];
04993           ++VR_idx;
04994 
04995           RegsToPass.push_back(std::make_pair(VReg, Load));
04996         }
04997         ArgOffset += 16;
04998         for (unsigned i=0; i<16; i+=PtrByteSize) {
04999           if (GPR_idx == NumGPRs)
05000             break;
05001           SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff,
05002                                   DAG.getConstant(i, PtrVT));
05003           SDValue Load = DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo(),
05004                                      false, false, false, 0);
05005           MemOpChains.push_back(Load.getValue(1));
05006           RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
05007         }
05008         break;
05009       }
05010 
05011       // Non-varargs Altivec params go into VRs or on the stack.
05012       if (VR_idx != NumVRs) {
05013         unsigned VReg = (Arg.getSimpleValueType() == MVT::v2f64 ||
05014                          Arg.getSimpleValueType() == MVT::v2i64) ?
05015                         VSRH[VR_idx] : VR[VR_idx];
05016         ++VR_idx;
05017 
05018         RegsToPass.push_back(std::make_pair(VReg, Arg));
05019       } else {
05020         if (CallConv == CallingConv::Fast)
05021           ComputePtrOff();
05022 
05023         LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
05024                          true, isTailCall, true, MemOpChains,
05025                          TailCallArguments, dl);
05026         if (CallConv == CallingConv::Fast)
05027           ArgOffset += 16;
05028       }
05029 
05030       if (CallConv != CallingConv::Fast)
05031         ArgOffset += 16;
05032       break;
05033       } // not QPX
05034 
05035       assert(Arg.getValueType().getSimpleVT().SimpleTy == MVT::v4f32 &&
05036              "Invalid QPX parameter type");
05037 
05038       /* fall through */
05039     case MVT::v4f64:
05040     case MVT::v4i1: {
05041       bool IsF32 = Arg.getValueType().getSimpleVT().SimpleTy == MVT::v4f32;
05042       if (isVarArg) {
05043         // We could elide this store in the case where the object fits
05044         // entirely in R registers.  Maybe later.
05045         SDValue Store = DAG.getStore(Chain, dl, Arg, PtrOff,
05046                                      MachinePointerInfo(), false, false, 0);
05047         MemOpChains.push_back(Store);
05048         if (QFPR_idx != NumQFPRs) {
05049           SDValue Load = DAG.getLoad(IsF32 ? MVT::v4f32 : MVT::v4f64, dl,
05050                                      Store, PtrOff, MachinePointerInfo(),
05051                                      false, false, false, 0);
05052           MemOpChains.push_back(Load.getValue(1));
05053           RegsToPass.push_back(std::make_pair(QFPR[QFPR_idx++], Load));
05054         }
05055         ArgOffset += (IsF32 ? 16 : 32);
05056         for (unsigned i = 0; i < (IsF32 ? 16U : 32U); i += PtrByteSize) {
05057           if (GPR_idx == NumGPRs)
05058             break;
05059           SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff,
05060                                   DAG.getConstant(i, PtrVT));
05061           SDValue Load = DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo(),
05062                                      false, false, false, 0);
05063           MemOpChains.push_back(Load.getValue(1));
05064           RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
05065         }
05066         break;
05067       }
05068 
05069       // Non-varargs QPX params go into registers or on the stack.
05070       if (QFPR_idx != NumQFPRs) {
05071         RegsToPass.push_back(std::make_pair(QFPR[QFPR_idx++], Arg));
05072       } else {
05073         if (CallConv == CallingConv::Fast)
05074           ComputePtrOff();
05075 
05076         LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
05077                          true, isTailCall, true, MemOpChains,
05078                          TailCallArguments, dl);
05079         if (CallConv == CallingConv::Fast)
05080           ArgOffset += (IsF32 ? 16 : 32);
05081       }
05082 
05083       if (CallConv != CallingConv::Fast)
05084         ArgOffset += (IsF32 ? 16 : 32);
05085       break;
05086       }
05087     }
05088   }
05089 
05090   assert(NumBytesActuallyUsed == ArgOffset);
05091   (void)NumBytesActuallyUsed;
05092 
05093   if (!MemOpChains.empty())
05094     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
05095 
05096   // Check if this is an indirect call (MTCTR/BCTRL).
05097   // See PrepareCall() for more information about calls through function
05098   // pointers in the 64-bit SVR4 ABI.
05099   if (!isTailCall && !IsPatchPoint &&
05100       !isFunctionGlobalAddress(Callee) &&
05101       !isa<ExternalSymbolSDNode>(Callee)) {
05102     // Load r2 into a virtual register and store it to the TOC save area.
05103     setUsesTOCBasePtr(DAG);
05104     SDValue Val = DAG.getCopyFromReg(Chain, dl, PPC::X2, MVT::i64);
05105     // TOC save area offset.
05106     unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset();
05107     SDValue PtrOff = DAG.getIntPtrConstant(TOCSaveOffset);
05108     SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
05109     Chain = DAG.getStore(Val.getValue(1), dl, Val, AddPtr,
05110                          MachinePointerInfo::getStack(TOCSaveOffset),
05111                          false, false, 0);
05112     // In the ELFv2 ABI, R12 must contain the address of an indirect callee.
05113     // This does not mean the MTCTR instruction must use R12; it's easier
05114     // to model this as an extra parameter, so do that.
05115     if (isELFv2ABI && !IsPatchPoint)
05116       RegsToPass.push_back(std::make_pair((unsigned)PPC::X12, Callee));
05117   }
05118 
05119   // Build a sequence of copy-to-reg nodes chained together with token chain
05120   // and flag operands which copy the outgoing args into the appropriate regs.
05121   SDValue InFlag;
05122   for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
05123     Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
05124                              RegsToPass[i].second, InFlag);
05125     InFlag = Chain.getValue(1);
05126   }
05127 
05128   if (isTailCall)
05129     PrepareTailCall(DAG, InFlag, Chain, dl, true, SPDiff, NumBytes, LROp,
05130                     FPOp, true, TailCallArguments);
05131 
05132   return FinishCall(CallConv, dl, isTailCall, isVarArg, IsPatchPoint, DAG,
05133                     RegsToPass, InFlag, Chain, CallSeqStart, Callee, SPDiff,
05134                     NumBytes, Ins, InVals, CS);
05135 }
05136 
05137 SDValue
05138 PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee,
05139                                     CallingConv::ID CallConv, bool isVarArg,
05140                                     bool isTailCall, bool IsPatchPoint,
05141                                     const SmallVectorImpl<ISD::OutputArg> &Outs,
05142                                     const SmallVectorImpl<SDValue> &OutVals,
05143                                     const SmallVectorImpl<ISD::InputArg> &Ins,
05144                                     SDLoc dl, SelectionDAG &DAG,
05145                                     SmallVectorImpl<SDValue> &InVals,
05146                                     ImmutableCallSite *CS) const {
05147 
05148   unsigned NumOps = Outs.size();
05149 
05150   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
05151   bool isPPC64 = PtrVT == MVT::i64;
05152   unsigned PtrByteSize = isPPC64 ? 8 : 4;
05153 
05154   MachineFunction &MF = DAG.getMachineFunction();
05155 
05156   // Mark this function as potentially containing a function that contains a
05157   // tail call. As a consequence the frame pointer will be used for dynamicalloc
05158   // and restoring the callers stack pointer in this functions epilog. This is
05159   // done because by tail calling the called function might overwrite the value
05160   // in this function's (MF) stack pointer stack slot 0(SP).
05161   if (getTargetMachine().Options.GuaranteedTailCallOpt &&
05162       CallConv == CallingConv::Fast)
05163     MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
05164 
05165   // Count how many bytes are to be pushed on the stack, including the linkage
05166   // area, and parameter passing area.  We start with 24/48 bytes, which is
05167   // prereserved space for [SP][CR][LR][3 x unused].
05168   unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
05169   unsigned NumBytes = LinkageSize;
05170 
05171   // Add up all the space actually used.
05172   // In 32-bit non-varargs calls, Altivec parameters all go at the end; usually
05173   // they all go in registers, but we must reserve stack space for them for
05174   // possible use by the caller.  In varargs or 64-bit calls, parameters are
05175   // assigned stack space in order, with padding so Altivec parameters are
05176   // 16-byte aligned.
05177   unsigned nAltivecParamsAtEnd = 0;
05178   for (unsigned i = 0; i != NumOps; ++i) {
05179     ISD::ArgFlagsTy Flags = Outs[i].Flags;
05180     EVT ArgVT = Outs[i].VT;
05181     // Varargs Altivec parameters are padded to a 16 byte boundary.
05182     if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||
05183         ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||
05184         ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64) {
05185       if (!isVarArg && !isPPC64) {
05186         // Non-varargs Altivec parameters go after all the non-Altivec
05187         // parameters; handle those later so we know how much padding we need.
05188         nAltivecParamsAtEnd++;
05189         continue;
05190       }
05191       // Varargs and 64-bit Altivec parameters are padded to 16 byte boundary.
05192       NumBytes = ((NumBytes+15)/16)*16;
05193     }
05194     NumBytes += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
05195   }
05196 
05197   // Allow for Altivec parameters at the end, if needed.
05198   if (nAltivecParamsAtEnd) {
05199     NumBytes = ((NumBytes+15)/16)*16;
05200     NumBytes += 16*nAltivecParamsAtEnd;
05201   }
05202 
05203   // The prolog code of the callee may store up to 8 GPR argument registers to
05204   // the stack, allowing va_start to index over them in memory if its varargs.
05205   // Because we cannot tell if this is needed on the caller side, we have to
05206   // conservatively assume that it is needed.  As such, make sure we have at
05207   // least enough stack space for the caller to store the 8 GPRs.
05208   NumBytes = std::max(NumBytes, LinkageSize + 8 * PtrByteSize);
05209 
05210   // Tail call needs the stack to be aligned.
05211   if (getTargetMachine().Options.GuaranteedTailCallOpt &&
05212       CallConv == CallingConv::Fast)
05213     NumBytes = EnsureStackAlignment(Subtarget.getFrameLowering(), NumBytes);
05214 
05215   // Calculate by how many bytes the stack has to be adjusted in case of tail
05216   // call optimization.
05217   int SPDiff = CalculateTailCallSPDiff(DAG, isTailCall, NumBytes);
05218 
05219   // To protect arguments on the stack from being clobbered in a tail call,
05220   // force all the loads to happen before doing any other lowering.
05221   if (isTailCall)
05222     Chain = DAG.getStackArgumentTokenFactor(Chain);
05223 
05224   // Adjust the stack pointer for the new arguments...
05225   // These operations are automatically eliminated by the prolog/epilog pass
05226   Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true),
05227                                dl);
05228   SDValue CallSeqStart = Chain;
05229 
05230   // Load the return address and frame pointer so it can be move somewhere else
05231   // later.
05232   SDValue LROp, FPOp;
05233   Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, true,
05234                                        dl);
05235 
05236   // Set up a copy of the stack pointer for use loading and storing any
05237   // arguments that may not fit in the registers available for argument
05238   // passing.
05239   SDValue StackPtr;
05240   if (isPPC64)
05241     StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
05242   else
05243     StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
05244 
05245   // Figure out which arguments are going to go in registers, and which in
05246   // memory.  Also, if this is a vararg function, floating point operations
05247   // must be stored to our stack, and loaded into integer regs as well, if
05248   // any integer regs are available for argument passing.
05249   unsigned ArgOffset = LinkageSize;
05250   unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
05251 
05252   static const MCPhysReg GPR_32[] = {           // 32-bit registers.
05253     PPC::R3, PPC::R4, PPC::R5, PPC::R6,
05254     PPC::R7, PPC::R8, PPC::R9, PPC::R10,
05255   };
05256   static const MCPhysReg GPR_64[] = {           // 64-bit registers.
05257     PPC::X3, PPC::X4, PPC::X5, PPC::X6,
05258     PPC::X7, PPC::X8, PPC::X9, PPC::X10,
05259   };
05260   static const MCPhysReg VR[] = {
05261     PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
05262     PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
05263   };
05264   const unsigned NumGPRs = array_lengthof(GPR_32);
05265   const unsigned NumFPRs = 13;
05266   const unsigned NumVRs  = array_lengthof(VR);
05267 
05268   const MCPhysReg *GPR = isPPC64 ? GPR_64 : GPR_32;
05269 
05270   SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
05271   SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
05272 
05273   SmallVector<SDValue, 8> MemOpChains;
05274   for (unsigned i = 0; i != NumOps; ++i) {
05275     SDValue Arg = OutVals[i];
05276     ISD::ArgFlagsTy Flags = Outs[i].Flags;
05277 
05278     // PtrOff will be used to store the current argument to the stack if a
05279     // register cannot be found for it.
05280     SDValue PtrOff;
05281 
05282     PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType());
05283 
05284     PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
05285 
05286     // On PPC64, promote integers to 64-bit values.
05287     if (isPPC64 && Arg.getValueType() == MVT::i32) {
05288       // FIXME: Should this use ANY_EXTEND if neither sext nor zext?
05289       unsigned ExtOp = Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
05290       Arg = DAG.getNode(ExtOp, dl, MVT::i64, Arg);
05291     }
05292 
05293     // FIXME memcpy is used way more than necessary.  Correctness first.
05294     // Note: "by value" is code for passing a structure by value, not
05295     // basic types.
05296     if (Flags.isByVal()) {
05297       unsigned Size = Flags.getByValSize();
05298       // Very small objects are passed right-justified.  Everything else is
05299       // passed left-justified.
05300       if (Size==1 || Size==2) {
05301         EVT VT = (Size==1) ? MVT::i8 : MVT::i16;
05302         if (GPR_idx != NumGPRs) {
05303           SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, Arg,
05304                                         MachinePointerInfo(), VT,
05305                                         false, false, false, 0);
05306           MemOpChains.push_back(Load.getValue(1));
05307           RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
05308 
05309           ArgOffset += PtrByteSize;
05310         } else {
05311           SDValue Const = DAG.getConstant(PtrByteSize - Size,
05312                                           PtrOff.getValueType());
05313           SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
05314           Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,
05315                                                             CallSeqStart,
05316                                                             Flags, DAG, dl);
05317           ArgOffset += PtrByteSize;
05318         }
05319         continue;
05320       }
05321       // Copy entire object into memory.  There are cases where gcc-generated
05322       // code assumes it is there, even if it could be put entirely into
05323       // registers.  (This is not what the doc says.)
05324       Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, PtrOff,
05325                                                         CallSeqStart,
05326                                                         Flags, DAG, dl);
05327 
05328       // For small aggregates (Darwin only) and aggregates >= PtrByteSize,
05329       // copy the pieces of the object that fit into registers from the
05330       // parameter save area.
05331       for (unsigned j=0; j<Size; j+=PtrByteSize) {
05332         SDValue Const = DAG.getConstant(j, PtrOff.getValueType());
05333         SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);
05334         if (GPR_idx != NumGPRs) {
05335           SDValue Load = DAG.getLoad(PtrVT, dl, Chain, AddArg,
05336                                      MachinePointerInfo(),
05337                                      false, false, false, 0);
05338           MemOpChains.push_back(Load.getValue(1));
05339           RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
05340           ArgOffset += PtrByteSize;
05341         } else {
05342           ArgOffset += ((Size - j + PtrByteSize-1)/PtrByteSize)*PtrByteSize;
05343           break;
05344         }
05345       }
05346       continue;
05347     }
05348 
05349     switch (Arg.getSimpleValueType().SimpleTy) {
05350     default: llvm_unreachable("Unexpected ValueType for argument!");
05351     case MVT::i1:
05352     case MVT::i32:
05353     case MVT::i64:
05354       if (GPR_idx != NumGPRs) {
05355         if (Arg.getValueType() == MVT::i1)
05356           Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, PtrVT, Arg);
05357 
05358         RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Arg));
05359       } else {
05360         LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
05361                          isPPC64, isTailCall, false, MemOpChains,
05362                          TailCallArguments, dl);
05363       }
05364       ArgOffset += PtrByteSize;
05365       break;
05366     case MVT::f32:
05367     case MVT::f64:
05368       if (FPR_idx != NumFPRs) {
05369         RegsToPass.push_back(std::make_pair(FPR[FPR_idx++], Arg));
05370 
05371         if (isVarArg) {
05372           SDValue Store = DAG.getStore(Chain, dl, Arg, PtrOff,
05373                                        MachinePointerInfo(), false, false, 0);
05374           MemOpChains.push_back(Store);
05375 
05376           // Float varargs are always shadowed in available integer registers
05377           if (GPR_idx != NumGPRs) {
05378             SDValue Load = DAG.getLoad(PtrVT, dl, Store, PtrOff,
05379                                        MachinePointerInfo(), false, false,
05380                                        false, 0);
05381             MemOpChains.push_back(Load.getValue(1));
05382             RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
05383           }
05384           if (GPR_idx != NumGPRs && Arg.getValueType() == MVT::f64 && !isPPC64){
05385             SDValue ConstFour = DAG.getConstant(4, PtrOff.getValueType());
05386             PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour);
05387             SDValue Load = DAG.getLoad(PtrVT, dl, Store, PtrOff,
05388                                        MachinePointerInfo(),
05389                                        false, false, false, 0);
05390             MemOpChains.push_back(Load.getValue(1));
05391             RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
05392           }
05393         } else {
05394           // If we have any FPRs remaining, we may also have GPRs remaining.
05395           // Args passed in FPRs consume either 1 (f32) or 2 (f64) available
05396           // GPRs.
05397           if (GPR_idx != NumGPRs)
05398             ++GPR_idx;
05399           if (GPR_idx != NumGPRs && Arg.getValueType() == MVT::f64 &&
05400               !isPPC64)  // PPC64 has 64-bit GPR's obviously :)
05401             ++GPR_idx;
05402         }
05403       } else
05404         LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
05405                          isPPC64, isTailCall, false, MemOpChains,
05406                          TailCallArguments, dl);
05407       if (isPPC64)
05408         ArgOffset += 8;
05409       else
05410         ArgOffset += Arg.getValueType() == MVT::f32 ? 4 : 8;
05411       break;
05412     case MVT::v4f32:
05413     case MVT::v4i32:
05414     case MVT::v8i16:
05415     case MVT::v16i8:
05416       if (isVarArg) {
05417         // These go aligned on the stack, or in the corresponding R registers
05418         // when within range.  The Darwin PPC ABI doc claims they also go in
05419         // V registers; in fact gcc does this only for arguments that are
05420         // prototyped, not for those that match the ...  We do it for all
05421         // arguments, seems to work.
05422         while (ArgOffset % 16 !=0) {
05423           ArgOffset += PtrByteSize;
05424           if (GPR_idx != NumGPRs)
05425             GPR_idx++;
05426         }
05427         // We could elide this store in the case where the object fits
05428         // entirely in R registers.  Maybe later.
05429         PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr,
05430                             DAG.getConstant(ArgOffset, PtrVT));
05431         SDValue Store = DAG.getStore(Chain, dl, Arg, PtrOff,
05432                                      MachinePointerInfo(), false, false, 0);
05433         MemOpChains.push_back(Store);
05434         if (VR_idx != NumVRs) {
05435           SDValue Load = DAG.getLoad(MVT::v4f32, dl, Store, PtrOff,
05436                                      MachinePointerInfo(),
05437                                      false, false, false, 0);
05438           MemOpChains.push_back(Load.getValue(1));
05439           RegsToPass.push_back(std::make_pair(VR[VR_idx++], Load));
05440         }
05441         ArgOffset += 16;
05442         for (unsigned i=0; i<16; i+=PtrByteSize) {
05443           if (GPR_idx == NumGPRs)
05444             break;
05445           SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff,
05446                                   DAG.getConstant(i, PtrVT));
05447           SDValue Load = DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo(),
05448                                      false, false, false, 0);
05449           MemOpChains.push_back(Load.getValue(1));
05450           RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
05451         }
05452         break;
05453       }
05454 
05455       // Non-varargs Altivec params generally go in registers, but have
05456       // stack space allocated at the end.
05457       if (VR_idx != NumVRs) {
05458         // Doesn't have GPR space allocated.
05459         RegsToPass.push_back(std::make_pair(VR[VR_idx++], Arg));
05460       } else if (nAltivecParamsAtEnd==0) {
05461         // We are emitting Altivec params in order.
05462         LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
05463                          isPPC64, isTailCall, true, MemOpChains,
05464                          TailCallArguments, dl);
05465         ArgOffset += 16;
05466       }
05467       break;
05468     }
05469   }
05470   // If all Altivec parameters fit in registers, as they usually do,
05471   // they get stack space following the non-Altivec parameters.  We
05472   // don't track this here because nobody below needs it.
05473   // If there are more Altivec parameters than fit in registers emit
05474   // the stores here.
05475   if (!isVarArg && nAltivecParamsAtEnd > NumVRs) {
05476     unsigned j = 0;
05477     // Offset is aligned; skip 1st 12 params which go in V registers.
05478     ArgOffset = ((ArgOffset+15)/16)*16;
05479     ArgOffset += 12*16;
05480     for (unsigned i = 0; i != NumOps; ++i) {
05481       SDValue Arg = OutVals[i];
05482       EVT ArgType = Outs[i].VT;
05483       if (ArgType==MVT::v4f32 || ArgType==MVT::v4i32 ||
05484           ArgType==MVT::v8i16 || ArgType==MVT::v16i8) {
05485         if (++j > NumVRs) {
05486           SDValue PtrOff;
05487           // We are emitting Altivec params in order.
05488           LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
05489                            isPPC64, isTailCall, true, MemOpChains,
05490                            TailCallArguments, dl);
05491           ArgOffset += 16;
05492         }
05493       }
05494     }
05495   }
05496 
05497   if (!MemOpChains.empty())
05498     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
05499 
05500   // On Darwin, R12 must contain the address of an indirect callee.  This does
05501   // not mean the MTCTR instruction must use R12; it's easier to model this as
05502   // an extra parameter, so do that.
05503   if (!isTailCall &&
05504       !isFunctionGlobalAddress(Callee) &&
05505       !isa<ExternalSymbolSDNode>(Callee) &&
05506       !isBLACompatibleAddress(Callee, DAG))
05507     RegsToPass.push_back(std::make_pair((unsigned)(isPPC64 ? PPC::X12 :
05508                                                    PPC::R12), Callee));
05509 
05510   // Build a sequence of copy-to-reg nodes chained together with token chain
05511   // and flag operands which copy the outgoing args into the appropriate regs.
05512   SDValue InFlag;
05513   for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
05514     Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
05515                              RegsToPass[i].second, InFlag);
05516     InFlag = Chain.getValue(1);
05517   }
05518 
05519   if (isTailCall)
05520     PrepareTailCall(DAG, InFlag, Chain, dl, isPPC64, SPDiff, NumBytes, LROp,
05521                     FPOp, true, TailCallArguments);
05522 
05523   return FinishCall(CallConv, dl, isTailCall, isVarArg, IsPatchPoint, DAG,
05524                     RegsToPass, InFlag, Chain, CallSeqStart, Callee, SPDiff,
05525                     NumBytes, Ins, InVals, CS);
05526 }
05527 
05528 bool
05529 PPCTargetLowering::CanLowerReturn(CallingConv::ID CallConv,
05530                                   MachineFunction &MF, bool isVarArg,
05531                                   const SmallVectorImpl<ISD::OutputArg> &Outs,
05532                                   LLVMContext &Context) const {
05533   SmallVector<CCValAssign, 16> RVLocs;
05534   CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
05535   return CCInfo.CheckReturn(Outs, RetCC_PPC);
05536 }
05537 
05538 SDValue
05539 PPCTargetLowering::LowerReturn(SDValue Chain,
05540                                CallingConv::ID CallConv, bool isVarArg,
05541                                const SmallVectorImpl<ISD::OutputArg> &Outs,
05542                                const SmallVectorImpl<SDValue> &OutVals,
05543                                SDLoc dl, SelectionDAG &DAG) const {
05544 
05545   SmallVector<CCValAssign, 16> RVLocs;
05546   CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
05547                  *DAG.getContext());
05548   CCInfo.AnalyzeReturn(Outs, RetCC_PPC);
05549 
05550   SDValue Flag;
05551   SmallVector<SDValue, 4> RetOps(1, Chain);
05552 
05553   // Copy the result values into the output registers.
05554   for (unsigned i = 0; i != RVLocs.size(); ++i) {
05555     CCValAssign &VA = RVLocs[i];
05556     assert(VA.isRegLoc() && "Can only return in registers!");
05557 
05558     SDValue Arg = OutVals[i];
05559 
05560     switch (VA.getLocInfo()) {
05561     default: llvm_unreachable("Unknown loc info!");
05562     case CCValAssign::Full: break;
05563     case CCValAssign::AExt:
05564       Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
05565       break;
05566     case CCValAssign::ZExt:
05567       Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
05568       break;
05569     case CCValAssign::SExt:
05570       Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
05571       break;
05572     }
05573 
05574     Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Flag);
05575     Flag = Chain.getValue(1);
05576     RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
05577   }
05578 
05579   RetOps[0] = Chain;  // Update chain.
05580 
05581   // Add the flag if we have it.
05582   if (Flag.getNode())
05583     RetOps.push_back(Flag);
05584 
05585   return DAG.getNode(PPCISD::RET_FLAG, dl, MVT::Other, RetOps);
05586 }
05587 
05588 SDValue PPCTargetLowering::LowerSTACKRESTORE(SDValue Op, SelectionDAG &DAG,
05589                                    const PPCSubtarget &Subtarget) const {
05590   // When we pop the dynamic allocation we need to restore the SP link.
05591   SDLoc dl(Op);
05592 
05593   // Get the corect type for pointers.
05594   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
05595 
05596   // Construct the stack pointer operand.
05597   bool isPPC64 = Subtarget.isPPC64();
05598   unsigned SP = isPPC64 ? PPC::X1 : PPC::R1;
05599   SDValue StackPtr = DAG.getRegister(SP, PtrVT);
05600 
05601   // Get the operands for the STACKRESTORE.
05602   SDValue Chain = Op.getOperand(0);
05603   SDValue SaveSP = Op.getOperand(1);
05604 
05605   // Load the old link SP.
05606   SDValue LoadLinkSP = DAG.getLoad(PtrVT, dl, Chain, StackPtr,
05607                                    MachinePointerInfo(),
05608                                    false, false, false, 0);
05609 
05610   // Restore the stack pointer.
05611   Chain = DAG.getCopyToReg(LoadLinkSP.getValue(1), dl, SP, SaveSP);
05612 
05613   // Store the old link SP.
05614   return DAG.getStore(Chain, dl, LoadLinkSP, StackPtr, MachinePointerInfo(),
05615                       false, false, 0);
05616 }
05617 
05618 
05619 
05620 SDValue
05621 PPCTargetLowering::getReturnAddrFrameIndex(SelectionDAG & DAG) const {
05622   MachineFunction &MF = DAG.getMachineFunction();
05623   bool isPPC64 = Subtarget.isPPC64();
05624   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
05625 
05626   // Get current frame pointer save index.  The users of this index will be
05627   // primarily DYNALLOC instructions.
05628   PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
05629   int RASI = FI->getReturnAddrSaveIndex();
05630 
05631   // If the frame pointer save index hasn't been defined yet.
05632   if (!RASI) {
05633     // Find out what the fix offset of the frame pointer save area.
05634     int LROffset = Subtarget.getFrameLowering()->getReturnSaveOffset();
05635     // Allocate the frame index for frame pointer save area.
05636     RASI = MF.getFrameInfo()->CreateFixedObject(isPPC64? 8 : 4, LROffset, false);
05637     // Save the result.
05638     FI->setReturnAddrSaveIndex(RASI);
05639   }
05640   return DAG.getFrameIndex(RASI, PtrVT);
05641 }
05642 
05643 SDValue
05644 PPCTargetLowering::getFramePointerFrameIndex(SelectionDAG & DAG) const {
05645   MachineFunction &MF = DAG.getMachineFunction();
05646   bool isPPC64 = Subtarget.isPPC64();
05647   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
05648 
05649   // Get current frame pointer save index.  The users of this index will be
05650   // primarily DYNALLOC instructions.
05651   PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
05652   int FPSI = FI->getFramePointerSaveIndex();
05653 
05654   // If the frame pointer save index hasn't been defined yet.
05655   if (!FPSI) {
05656     // Find out what the fix offset of the frame pointer save area.
05657     int FPOffset = Subtarget.getFrameLowering()->getFramePointerSaveOffset();
05658     // Allocate the frame index for frame pointer save area.
05659     FPSI = MF.getFrameInfo()->CreateFixedObject(isPPC64? 8 : 4, FPOffset, true);
05660     // Save the result.
05661     FI->setFramePointerSaveIndex(FPSI);
05662   }
05663   return DAG.getFrameIndex(FPSI, PtrVT);
05664 }
05665 
05666 SDValue PPCTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
05667                                          SelectionDAG &DAG,
05668                                          const PPCSubtarget &Subtarget) const {
05669   // Get the inputs.
05670   SDValue Chain = Op.getOperand(0);
05671   SDValue Size  = Op.getOperand(1);
05672   SDLoc dl(Op);
05673 
05674   // Get the corect type for pointers.
05675   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
05676   // Negate the size.
05677   SDValue NegSize = DAG.getNode(ISD::SUB, dl, PtrVT,
05678                                   DAG.getConstant(0, PtrVT), Size);
05679   // Construct a node for the frame pointer save index.
05680   SDValue FPSIdx = getFramePointerFrameIndex(DAG);
05681   // Build a DYNALLOC node.
05682   SDValue Ops[3] = { Chain, NegSize, FPSIdx };
05683   SDVTList VTs = DAG.getVTList(PtrVT, MVT::Other);
05684   return DAG.getNode(PPCISD::DYNALLOC, dl, VTs, Ops);
05685 }
05686 
05687 SDValue PPCTargetLowering::lowerEH_SJLJ_SETJMP(SDValue Op,
05688                                                SelectionDAG &DAG) const {
05689   SDLoc DL(Op);
05690   return DAG.getNode(PPCISD::EH_SJLJ_SETJMP, DL,
05691                      DAG.getVTList(MVT::i32, MVT::Other),
05692                      Op.getOperand(0), Op.getOperand(1));
05693 }
05694 
05695 SDValue PPCTargetLowering::lowerEH_SJLJ_LONGJMP(SDValue Op,
05696                                                 SelectionDAG &DAG) const {
05697   SDLoc DL(Op);
05698   return DAG.getNode(PPCISD::EH_SJLJ_LONGJMP, DL, MVT::Other,
05699                      Op.getOperand(0), Op.getOperand(1));
05700 }
05701 
05702 SDValue PPCTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
05703   if (Op.getValueType().isVector())
05704     return LowerVectorLoad(Op, DAG);
05705 
05706   assert(Op.getValueType() == MVT::i1 &&
05707          "Custom lowering only for i1 loads");
05708 
05709   // First, load 8 bits into 32 bits, then truncate to 1 bit.
05710 
05711   SDLoc dl(Op);
05712   LoadSDNode *LD = cast<LoadSDNode>(Op);
05713 
05714   SDValue Chain = LD->getChain();
05715   SDValue BasePtr = LD->getBasePtr();
05716   MachineMemOperand *MMO = LD->getMemOperand();
05717 
05718   SDValue NewLD = DAG.getExtLoad(ISD::EXTLOAD, dl, getPointerTy(), Chain,
05719                                  BasePtr, MVT::i8, MMO);
05720   SDValue Result = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, NewLD);
05721 
05722   SDValue Ops[] = { Result, SDValue(NewLD.getNode(), 1) };
05723   return DAG.getMergeValues(Ops, dl);
05724 }
05725 
05726 SDValue PPCTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
05727   if (Op.getOperand(1).getValueType().isVector())
05728     return LowerVectorStore(Op, DAG);
05729 
05730   assert(Op.getOperand(1).getValueType() == MVT::i1 &&
05731          "Custom lowering only for i1 stores");
05732 
05733   // First, zero extend to 32 bits, then use a truncating store to 8 bits.
05734 
05735   SDLoc dl(Op);
05736   StoreSDNode *ST = cast<StoreSDNode>(Op);
05737 
05738   SDValue Chain = ST->getChain();
05739   SDValue BasePtr = ST->getBasePtr();
05740   SDValue Value = ST->getValue();
05741   MachineMemOperand *MMO = ST->getMemOperand();
05742 
05743   Value = DAG.getNode(ISD::ZERO_EXTEND, dl, getPointerTy(), Value);
05744   return DAG.getTruncStore(Chain, dl, Value, BasePtr, MVT::i8, MMO);
05745 }
05746 
05747 // FIXME: Remove this once the ANDI glue bug is fixed:
05748 SDValue PPCTargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
05749   assert(Op.getValueType() == MVT::i1 &&
05750          "Custom lowering only for i1 results");
05751 
05752   SDLoc DL(Op);
05753   return DAG.getNode(PPCISD::ANDIo_1_GT_BIT, DL, MVT::i1,
05754                      Op.getOperand(0));
05755 }
05756 
05757 /// LowerSELECT_CC - Lower floating point select_cc's into fsel instruction when
05758 /// possible.
05759 SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
05760   // Not FP? Not a fsel.
05761   if (!Op.getOperand(0).getValueType().isFloatingPoint() ||
05762       !Op.getOperand(2).getValueType().isFloatingPoint())
05763     return Op;
05764 
05765   // We might be able to do better than this under some circumstances, but in
05766   // general, fsel-based lowering of select is a finite-math-only optimization.
05767   // For more information, see section F.3 of the 2.06 ISA specification.
05768   if (!DAG.getTarget().Options.NoInfsFPMath ||
05769       !DAG.getTarget().Options.NoNaNsFPMath)
05770     return Op;
05771 
05772   ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
05773 
05774   EVT ResVT = Op.getValueType();
05775   EVT CmpVT = Op.getOperand(0).getValueType();
05776   SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
05777   SDValue TV  = Op.getOperand(2), FV  = Op.getOperand(3);
05778   SDLoc dl(Op);
05779 
05780   // If the RHS of the comparison is a 0.0, we don't need to do the
05781   // subtraction at all.
05782   SDValue Sel1;
05783   if (isFloatingPointZero(RHS))
05784     switch (CC) {
05785     default: break;       // SETUO etc aren't handled by fsel.
05786     case ISD::SETNE:
05787       std::swap(TV, FV);
05788     case ISD::SETEQ:
05789       if (LHS.getValueType() == MVT::f32)   // Comparison is always 64-bits
05790         LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
05791       Sel1 = DAG.getNode(PPCISD::FSEL, dl, ResVT, LHS, TV, FV);
05792       if (Sel1.getValueType() == MVT::f32)   // Comparison is always 64-bits
05793         Sel1 = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Sel1);
05794       return DAG.getNode(PPCISD::FSEL, dl, ResVT,
05795                          DAG.getNode(ISD::FNEG, dl, MVT::f64, LHS), Sel1, FV);
05796     case ISD::SETULT:
05797     case ISD::SETLT:
05798       std::swap(TV, FV);  // fsel is natively setge, swap operands for setlt
05799     case ISD::SETOGE:
05800     case ISD::SETGE:
05801       if (LHS.getValueType() == MVT::f32)   // Comparison is always 64-bits
05802         LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
05803       return DAG.getNode(PPCISD::FSEL, dl, ResVT, LHS, TV, FV);
05804     case ISD::SETUGT:
05805     case ISD::SETGT:
05806       std::swap(TV, FV);  // fsel is natively setge, swap operands for setlt
05807     case ISD::SETOLE:
05808     case ISD::SETLE:
05809       if (LHS.getValueType() == MVT::f32)   // Comparison is always 64-bits
05810         LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
05811       return DAG.getNode(PPCISD::FSEL, dl, ResVT,
05812                          DAG.getNode(ISD::FNEG, dl, MVT::f64, LHS), TV, FV);
05813     }
05814 
05815   SDValue Cmp;
05816   switch (CC) {
05817   default: break;       // SETUO etc aren't handled by fsel.
05818   case ISD::SETNE:
05819     std::swap(TV, FV);
05820   case ISD::SETEQ:
05821     Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS);
05822     if (Cmp.getValueType() == MVT::f32)   // Comparison is always 64-bits
05823       Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
05824     Sel1 = DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
05825     if (Sel1.getValueType() == MVT::f32)   // Comparison is always 64-bits
05826       Sel1 = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Sel1);
05827     return DAG.getNode(PPCISD::FSEL, dl, ResVT,
05828                        DAG.getNode(ISD::FNEG, dl, MVT::f64, Cmp), Sel1, FV);
05829   case ISD::SETULT:
05830   case ISD::SETLT:
05831     Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS);
05832     if (Cmp.getValueType() == MVT::f32)   // Comparison is always 64-bits
05833       Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
05834     return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV);
05835   case ISD::SETOGE:
05836   case ISD::SETGE:
05837     Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS);
05838     if (Cmp.getValueType() == MVT::f32)   // Comparison is always 64-bits
05839       Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
05840     return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
05841   case ISD::SETUGT:
05842   case ISD::SETGT:
05843     Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS);
05844     if (Cmp.getValueType() == MVT::f32)   // Comparison is always 64-bits
05845       Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
05846     return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV);
05847   case ISD::SETOLE:
05848   case ISD::SETLE:
05849     Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS);
05850     if (Cmp.getValueType() == MVT::f32)   // Comparison is always 64-bits
05851       Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
05852     return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
05853   }
05854   return Op;
05855 }
05856 
05857 void PPCTargetLowering::LowerFP_TO_INTForReuse(SDValue Op, ReuseLoadInfo &RLI,
05858                                                SelectionDAG &DAG,
05859                                                SDLoc dl) const {
05860   assert(Op.getOperand(0).getValueType().isFloatingPoint());
05861   SDValue Src = Op.getOperand(0);
05862   if (Src.getValueType() == MVT::f32)
05863     Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src);
05864 
05865   SDValue Tmp;
05866   switch (Op.getSimpleValueType().SimpleTy) {
05867   default: llvm_unreachable("Unhandled FP_TO_INT type in custom expander!");
05868   case MVT::i32:
05869     Tmp = DAG.getNode(
05870         Op.getOpcode() == ISD::FP_TO_SINT
05871             ? PPCISD::FCTIWZ
05872             : (Subtarget.hasFPCVT() ? PPCISD::FCTIWUZ : PPCISD::FCTIDZ),
05873         dl, MVT::f64, Src);
05874     break;
05875   case MVT::i64:
05876     assert((Op.getOpcode() == ISD::FP_TO_SINT || Subtarget.hasFPCVT()) &&
05877            "i64 FP_TO_UINT is supported only with FPCVT");
05878     Tmp = DAG.getNode(Op.getOpcode()==ISD::FP_TO_SINT ? PPCISD::FCTIDZ :
05879                                                         PPCISD::FCTIDUZ,
05880                       dl, MVT::f64, Src);
05881     break;
05882   }
05883 
05884   // Convert the FP value to an int value through memory.
05885   bool i32Stack = Op.getValueType() == MVT::i32 && Subtarget.hasSTFIWX() &&
05886     (Op.getOpcode() == ISD::FP_TO_SINT || Subtarget.hasFPCVT());
05887   SDValue FIPtr = DAG.CreateStackTemporary(i32Stack ? MVT::i32 : MVT::f64);
05888   int FI = cast<FrameIndexSDNode>(FIPtr)->getIndex();
05889   MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(FI);
05890 
05891   // Emit a store to the stack slot.
05892   SDValue Chain;
05893   if (i32Stack) {
05894     MachineFunction &MF = DAG.getMachineFunction();
05895     MachineMemOperand *MMO =
05896       MF.getMachineMemOperand(MPI, MachineMemOperand::MOStore, 4, 4);
05897     SDValue Ops[] = { DAG.getEntryNode(), Tmp, FIPtr };
05898     Chain = DAG.getMemIntrinsicNode(PPCISD::STFIWX, dl,
05899               DAG.getVTList(MVT::Other), Ops, MVT::i32, MMO);
05900   } else
05901     Chain = DAG.getStore(DAG.ge